linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Salil Mehta <salil.mehta@huawei.com>
To: <davem@davemloft.net>
Cc: <salil.mehta@huawei.com>, <yisen.zhuang@huawei.com>,
	<lipeng321@huawei.com>, <mehta.salil@opnsrc.net>,
	<netdev@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
	<linuxarm@huawei.com>, Shiju Jose <shiju.jose@huawei.com>
Subject: [PATCH net-next 07/14] net: hns3: add handling of hw ras errors using new set of commands
Date: Fri, 7 Dec 2018 21:08:04 +0000	[thread overview]
Message-ID: <20181207210811.23844-8-salil.mehta@huawei.com> (raw)
In-Reply-To: <20181207210811.23844-1-salil.mehta@huawei.com>

From: Shiju Jose <shiju.jose@huawei.com>

1. This patch adds handling of hw ras errors using new set of
   common commands.
2. Updated the error message tables to match the register's name and
   error status returned by the commands.

Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h |   3 +
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c | 489 ++++++++++++++-------
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h |   9 +
 3 files changed, 331 insertions(+), 170 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index d2fb210..0a0eb6c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -217,6 +217,9 @@ enum hclge_opcode_type {
 	/* Error INT commands */
 	HCLGE_TM_SCH_ECC_INT_EN		= 0x0829,
 	HCLGE_COMMON_ECC_INT_CFG	= 0x1505,
+	HCLGE_QUERY_RAS_INT_STS_BD_NUM	= 0x1510,
+	HCLGE_QUERY_CLEAR_MPF_RAS_INT	= 0x1511,
+	HCLGE_QUERY_CLEAR_PF_RAS_INT	= 0x1512,
 	HCLGE_IGU_EGU_TNL_INT_EN	= 0x1803,
 	HCLGE_IGU_COMMON_INT_EN		= 0x1806,
 	HCLGE_TM_QCN_MEM_INT_CFG	= 0x1A14,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index d1c9f7a..22e7c5b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -20,12 +20,7 @@ static const struct hclge_hw_error hclge_imp_tcm_ecc_int[] = {
 	{ .int_msk = BIT(13), .msg = "imp_dtcm1_mem0_ecc_mbit_err" },
 	{ .int_msk = BIT(14), .msg = "imp_dtcm1_mem1_ecc_1bit_err" },
 	{ .int_msk = BIT(15), .msg = "imp_dtcm1_mem1_ecc_mbit_err" },
-	{ /* sentinel */ }
-};
-
-static const struct hclge_hw_error hclge_imp_itcm4_ecc_int[] = {
-	{ .int_msk = BIT(0), .msg = "imp_itcm4_ecc_1bit_err" },
-	{ .int_msk = BIT(1), .msg = "imp_itcm4_ecc_mbit_err" },
+	{ .int_msk = BIT(17), .msg = "imp_itcm4_ecc_mbit_err" },
 	{ /* sentinel */ }
 };
 
@@ -46,26 +41,14 @@ static const struct hclge_hw_error hclge_cmdq_nic_mem_ecc_int[] = {
 	{ .int_msk = BIT(13), .msg = "cmdq_nic_rx_addr_ecc_mbit_err" },
 	{ .int_msk = BIT(14), .msg = "cmdq_nic_tx_addr_ecc_1bit_err" },
 	{ .int_msk = BIT(15), .msg = "cmdq_nic_tx_addr_ecc_mbit_err" },
-	{ /* sentinel */ }
-};
-
-static const struct hclge_hw_error hclge_cmdq_rocee_mem_ecc_int[] = {
-	{ .int_msk = BIT(0), .msg = "cmdq_rocee_rx_depth_ecc_1bit_err" },
-	{ .int_msk = BIT(1), .msg = "cmdq_rocee_rx_depth_ecc_mbit_err" },
-	{ .int_msk = BIT(2), .msg = "cmdq_rocee_tx_depth_ecc_1bit_err" },
-	{ .int_msk = BIT(3), .msg = "cmdq_rocee_tx_depth_ecc_mbit_err" },
-	{ .int_msk = BIT(4), .msg = "cmdq_rocee_rx_tail_ecc_1bit_err" },
-	{ .int_msk = BIT(5), .msg = "cmdq_rocee_rx_tail_ecc_mbit_err" },
-	{ .int_msk = BIT(6), .msg = "cmdq_rocee_tx_tail_ecc_1bit_err" },
-	{ .int_msk = BIT(7), .msg = "cmdq_rocee_tx_tail_ecc_mbit_err" },
-	{ .int_msk = BIT(8), .msg = "cmdq_rocee_rx_head_ecc_1bit_err" },
-	{ .int_msk = BIT(9), .msg = "cmdq_rocee_rx_head_ecc_mbit_err" },
-	{ .int_msk = BIT(10), .msg = "cmdq_rocee_tx_head_ecc_1bit_err" },
-	{ .int_msk = BIT(11), .msg = "cmdq_rocee_tx_head_ecc_mbit_err" },
-	{ .int_msk = BIT(12), .msg = "cmdq_rocee_rx_addr_ecc_1bit_err" },
-	{ .int_msk = BIT(13), .msg = "cmdq_rocee_rx_addr_ecc_mbit_err" },
-	{ .int_msk = BIT(14), .msg = "cmdq_rocee_tx_addr_ecc_1bit_err" },
-	{ .int_msk = BIT(15), .msg = "cmdq_rocee_tx_addr_ecc_mbit_err" },
+	{ .int_msk = BIT(17), .msg = "cmdq_rocee_rx_depth_ecc_mbit_err" },
+	{ .int_msk = BIT(19), .msg = "cmdq_rocee_tx_depth_ecc_mbit_err" },
+	{ .int_msk = BIT(21), .msg = "cmdq_rocee_rx_tail_ecc_mbit_err" },
+	{ .int_msk = BIT(23), .msg = "cmdq_rocee_tx_tail_ecc_mbit_err" },
+	{ .int_msk = BIT(25), .msg = "cmdq_rocee_rx_head_ecc_mbit_err" },
+	{ .int_msk = BIT(27), .msg = "cmdq_rocee_tx_head_ecc_mbit_err" },
+	{ .int_msk = BIT(29), .msg = "cmdq_rocee_rx_addr_ecc_mbit_err" },
+	{ .int_msk = BIT(31), .msg = "cmdq_rocee_tx_addr_ecc_mbit_err" },
 	{ /* sentinel */ }
 };
 
@@ -85,7 +68,13 @@ static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = {
 	{ /* sentinel */ }
 };
 
-static const struct hclge_hw_error hclge_igu_com_err_int[] = {
+static const struct hclge_hw_error hclge_msix_sram_ecc_int[] = {
+	{ .int_msk = BIT(1), .msg = "msix_nic_ecc_mbit_err" },
+	{ .int_msk = BIT(3), .msg = "msix_rocee_ecc_mbit_err" },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_igu_int[] = {
 	{ .int_msk = BIT(0), .msg = "igu_rx_buf0_ecc_mbit_err" },
 	{ .int_msk = BIT(1), .msg = "igu_rx_buf0_ecc_1bit_err" },
 	{ .int_msk = BIT(2), .msg = "igu_rx_buf1_ecc_mbit_err" },
@@ -93,7 +82,7 @@ static const struct hclge_hw_error hclge_igu_com_err_int[] = {
 	{ /* sentinel */ }
 };
 
-static const struct hclge_hw_error hclge_igu_egu_tnl_err_int[] = {
+static const struct hclge_hw_error hclge_igu_egu_tnl_int[] = {
 	{ .int_msk = BIT(0), .msg = "rx_buf_overflow" },
 	{ .int_msk = BIT(1), .msg = "rx_stp_fifo_overflow" },
 	{ .int_msk = BIT(2), .msg = "rx_stp_fifo_undeflow" },
@@ -109,46 +98,7 @@ static const struct hclge_hw_error hclge_ncsi_err_int[] = {
 	{ /* sentinel */ }
 };
 
-static const struct hclge_hw_error hclge_ppp_mpf_int0[] = {
-	{ .int_msk = BIT(0), .msg = "vf_vlan_ad_mem_ecc_1bit_err" },
-	{ .int_msk = BIT(1), .msg = "umv_mcast_group_mem_ecc_1bit_err" },
-	{ .int_msk = BIT(2), .msg = "umv_key_mem0_ecc_1bit_err" },
-	{ .int_msk = BIT(3), .msg = "umv_key_mem1_ecc_1bit_err" },
-	{ .int_msk = BIT(4), .msg = "umv_key_mem2_ecc_1bit_err" },
-	{ .int_msk = BIT(5), .msg = "umv_key_mem3_ecc_1bit_err" },
-	{ .int_msk = BIT(6), .msg = "umv_ad_mem_ecc_1bit_err" },
-	{ .int_msk = BIT(7), .msg = "rss_tc_mode_mem_ecc_1bit_err" },
-	{ .int_msk = BIT(8), .msg = "rss_idt_mem0_ecc_1bit_err" },
-	{ .int_msk = BIT(9), .msg = "rss_idt_mem1_ecc_1bit_err" },
-	{ .int_msk = BIT(10), .msg = "rss_idt_mem2_ecc_1bit_err" },
-	{ .int_msk = BIT(11), .msg = "rss_idt_mem3_ecc_1bit_err" },
-	{ .int_msk = BIT(12), .msg = "rss_idt_mem4_ecc_1bit_err" },
-	{ .int_msk = BIT(13), .msg = "rss_idt_mem5_ecc_1bit_err" },
-	{ .int_msk = BIT(14), .msg = "rss_idt_mem6_ecc_1bit_err" },
-	{ .int_msk = BIT(15), .msg = "rss_idt_mem7_ecc_1bit_err" },
-	{ .int_msk = BIT(16), .msg = "rss_idt_mem8_ecc_1bit_err" },
-	{ .int_msk = BIT(17), .msg = "rss_idt_mem9_ecc_1bit_err" },
-	{ .int_msk = BIT(18), .msg = "rss_idt_mem10_ecc_1bit_err" },
-	{ .int_msk = BIT(19), .msg = "rss_idt_mem11_ecc_1bit_err" },
-	{ .int_msk = BIT(20), .msg = "rss_idt_mem12_ecc_1bit_err" },
-	{ .int_msk = BIT(21), .msg = "rss_idt_mem13_ecc_1bit_err" },
-	{ .int_msk = BIT(22), .msg = "rss_idt_mem14_ecc_1bit_err" },
-	{ .int_msk = BIT(23), .msg = "rss_idt_mem15_ecc_1bit_err" },
-	{ .int_msk = BIT(24), .msg = "port_vlan_mem_ecc_1bit_err" },
-	{ .int_msk = BIT(25), .msg = "mcast_linear_table_mem_ecc_1bit_err" },
-	{ .int_msk = BIT(26), .msg = "mcast_result_mem_ecc_1bit_err" },
-	{ .int_msk = BIT(27),
-		.msg = "flow_director_ad_mem0_ecc_1bit_err" },
-	{ .int_msk = BIT(28),
-		.msg = "flow_director_ad_mem1_ecc_1bit_err" },
-	{ .int_msk = BIT(29),
-		.msg = "rx_vlan_tag_memory_ecc_1bit_err" },
-	{ .int_msk = BIT(30),
-		.msg = "Tx_UP_mapping_config_mem_ecc_1bit_err" },
-	{ /* sentinel */ }
-};
-
-static const struct hclge_hw_error hclge_ppp_mpf_int1[] = {
+static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st1[] = {
 	{ .int_msk = BIT(0), .msg = "vf_vlan_ad_mem_ecc_mbit_err" },
 	{ .int_msk = BIT(1), .msg = "umv_mcast_group_mem_ecc_mbit_err" },
 	{ .int_msk = BIT(2), .msg = "umv_key_mem0_ecc_mbit_err" },
@@ -193,17 +143,7 @@ static const struct hclge_hw_error hclge_ppp_pf_int[] = {
 	{ /* sentinel */ }
 };
 
-static const struct hclge_hw_error hclge_ppp_mpf_int2[] = {
-	{ .int_msk = BIT(0), .msg = "hfs_fifo_mem_ecc_1bit_err" },
-	{ .int_msk = BIT(1), .msg = "rslt_descr_fifo_mem_ecc_1bit_err" },
-	{ .int_msk = BIT(2), .msg = "tx_vlan_tag_mem_ecc_1bit_err" },
-	{ .int_msk = BIT(3), .msg = "FD_CN0_memory_ecc_1bit_err" },
-	{ .int_msk = BIT(4), .msg = "FD_CN1_memory_ecc_1bit_err" },
-	{ .int_msk = BIT(5), .msg = "GRO_AD_memory_ecc_1bit_err" },
-	{ /* sentinel */ }
-};
-
-static const struct hclge_hw_error hclge_ppp_mpf_int3[] = {
+static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st3[] = {
 	{ .int_msk = BIT(0), .msg = "hfs_fifo_mem_ecc_mbit_err" },
 	{ .int_msk = BIT(1), .msg = "rslt_descr_fifo_mem_ecc_mbit_err" },
 	{ .int_msk = BIT(2), .msg = "tx_vlan_tag_mem_ecc_mbit_err" },
@@ -213,104 +153,75 @@ static const struct hclge_hw_error hclge_ppp_mpf_int3[] = {
 	{ /* sentinel */ }
 };
 
-struct hclge_tm_sch_ecc_info {
-	const char *name;
-};
-
-static const struct hclge_tm_sch_ecc_info hclge_tm_sch_ecc_err[7][15] = {
-	{
-		{ .name = "QSET_QUEUE_CTRL:PRI_LEN TAB" },
-		{ .name = "QSET_QUEUE_CTRL:SPA_LEN TAB" },
-		{ .name = "QSET_QUEUE_CTRL:SPB_LEN TAB" },
-		{ .name = "QSET_QUEUE_CTRL:WRRA_LEN TAB" },
-		{ .name = "QSET_QUEUE_CTRL:WRRB_LEN TAB" },
-		{ .name = "QSET_QUEUE_CTRL:SPA_HPTR TAB" },
-		{ .name = "QSET_QUEUE_CTRL:SPB_HPTR TAB" },
-		{ .name = "QSET_QUEUE_CTRL:WRRA_HPTR TAB" },
-		{ .name = "QSET_QUEUE_CTRL:WRRB_HPTR TAB" },
-		{ .name = "QSET_QUEUE_CTRL:QS_LINKLIST TAB" },
-		{ .name = "QSET_QUEUE_CTRL:SPA_TPTR TAB" },
-		{ .name = "QSET_QUEUE_CTRL:SPB_TPTR TAB" },
-		{ .name = "QSET_QUEUE_CTRL:WRRA_TPTR TAB" },
-		{ .name = "QSET_QUEUE_CTRL:WRRB_TPTR TAB" },
-		{ .name = "QSET_QUEUE_CTRL:QS_DEFICITCNT TAB" },
-	},
-	{
-		{ .name = "ROCE_QUEUE_CTRL:QS_LEN TAB" },
-		{ .name = "ROCE_QUEUE_CTRL:QS_TPTR TAB" },
-		{ .name = "ROCE_QUEUE_CTRL:QS_HPTR TAB" },
-		{ .name = "ROCE_QUEUE_CTRL:QLINKLIST TAB" },
-		{ .name = "ROCE_QUEUE_CTRL:QCLEN TAB" },
-	},
-	{
-		{ .name = "NIC_QUEUE_CTRL:QS_LEN TAB" },
-		{ .name = "NIC_QUEUE_CTRL:QS_TPTR TAB" },
-		{ .name = "NIC_QUEUE_CTRL:QS_HPTR TAB" },
-		{ .name = "NIC_QUEUE_CTRL:QLINKLIST TAB" },
-		{ .name = "NIC_QUEUE_CTRL:QCLEN TAB" },
-	},
-	{
-		{ .name = "RAM_CFG_CTRL:CSHAP TAB" },
-		{ .name = "RAM_CFG_CTRL:PSHAP TAB" },
-	},
-	{
-		{ .name = "SHAPER_CTRL:PSHAP TAB" },
-	},
-	{
-		{ .name = "MSCH_CTRL" },
-	},
-	{
-		{ .name = "TOP_CTRL" },
-	},
-};
-
-static const struct hclge_hw_error hclge_tm_sch_err_int[] = {
+static const struct hclge_hw_error hclge_tm_sch_rint[] = {
 	{ .int_msk = BIT(0), .msg = "tm_sch_ecc_1bit_err" },
 	{ .int_msk = BIT(1), .msg = "tm_sch_ecc_mbit_err" },
-	{ .int_msk = BIT(2), .msg = "tm_sch_port_shap_sub_fifo_wr_full_err" },
-	{ .int_msk = BIT(3), .msg = "tm_sch_port_shap_sub_fifo_rd_empty_err" },
-	{ .int_msk = BIT(4), .msg = "tm_sch_pg_pshap_sub_fifo_wr_full_err" },
-	{ .int_msk = BIT(5), .msg = "tm_sch_pg_pshap_sub_fifo_rd_empty_err" },
-	{ .int_msk = BIT(6), .msg = "tm_sch_pg_cshap_sub_fifo_wr_full_err" },
-	{ .int_msk = BIT(7), .msg = "tm_sch_pg_cshap_sub_fifo_rd_empty_err" },
-	{ .int_msk = BIT(8), .msg = "tm_sch_pri_pshap_sub_fifo_wr_full_err" },
-	{ .int_msk = BIT(9), .msg = "tm_sch_pri_pshap_sub_fifo_rd_empty_err" },
-	{ .int_msk = BIT(10), .msg = "tm_sch_pri_cshap_sub_fifo_wr_full_err" },
-	{ .int_msk = BIT(11), .msg = "tm_sch_pri_cshap_sub_fifo_rd_empty_err" },
+	{ .int_msk = BIT(2), .msg = "tm_sch_port_shap_sub_fifo_wr_err" },
+	{ .int_msk = BIT(3), .msg = "tm_sch_port_shap_sub_fifo_rd_err" },
+	{ .int_msk = BIT(4), .msg = "tm_sch_pg_pshap_sub_fifo_wr_err" },
+	{ .int_msk = BIT(5), .msg = "tm_sch_pg_pshap_sub_fifo_rd_err" },
+	{ .int_msk = BIT(6), .msg = "tm_sch_pg_cshap_sub_fifo_wr_err" },
+	{ .int_msk = BIT(7), .msg = "tm_sch_pg_cshap_sub_fifo_rd_err" },
+	{ .int_msk = BIT(8), .msg = "tm_sch_pri_pshap_sub_fifo_wr_err" },
+	{ .int_msk = BIT(9), .msg = "tm_sch_pri_pshap_sub_fifo_rd_err" },
+	{ .int_msk = BIT(10), .msg = "tm_sch_pri_cshap_sub_fifo_wr_err" },
+	{ .int_msk = BIT(11), .msg = "tm_sch_pri_cshap_sub_fifo_rd_err" },
 	{ .int_msk = BIT(12),
-	  .msg = "tm_sch_port_shap_offset_fifo_wr_full_err" },
+	  .msg = "tm_sch_port_shap_offset_fifo_wr_err" },
 	{ .int_msk = BIT(13),
-	  .msg = "tm_sch_port_shap_offset_fifo_rd_empty_err" },
+	  .msg = "tm_sch_port_shap_offset_fifo_rd_err" },
 	{ .int_msk = BIT(14),
-	  .msg = "tm_sch_pg_pshap_offset_fifo_wr_full_err" },
+	  .msg = "tm_sch_pg_pshap_offset_fifo_wr_err" },
 	{ .int_msk = BIT(15),
-	  .msg = "tm_sch_pg_pshap_offset_fifo_rd_empty_err" },
+	  .msg = "tm_sch_pg_pshap_offset_fifo_rd_err" },
 	{ .int_msk = BIT(16),
-	  .msg = "tm_sch_pg_cshap_offset_fifo_wr_full_err" },
+	  .msg = "tm_sch_pg_cshap_offset_fifo_wr_err" },
 	{ .int_msk = BIT(17),
-	  .msg = "tm_sch_pg_cshap_offset_fifo_rd_empty_err" },
+	  .msg = "tm_sch_pg_cshap_offset_fifo_rd_err" },
 	{ .int_msk = BIT(18),
-	  .msg = "tm_sch_pri_pshap_offset_fifo_wr_full_err" },
+	  .msg = "tm_sch_pri_pshap_offset_fifo_wr_err" },
 	{ .int_msk = BIT(19),
-	  .msg = "tm_sch_pri_pshap_offset_fifo_rd_empty_err" },
+	  .msg = "tm_sch_pri_pshap_offset_fifo_rd_err" },
 	{ .int_msk = BIT(20),
-	  .msg = "tm_sch_pri_cshap_offset_fifo_wr_full_err" },
+	  .msg = "tm_sch_pri_cshap_offset_fifo_wr_err" },
 	{ .int_msk = BIT(21),
-	  .msg = "tm_sch_pri_cshap_offset_fifo_rd_empty_err" },
-	{ .int_msk = BIT(22), .msg = "tm_sch_rq_fifo_wr_full_err" },
-	{ .int_msk = BIT(23), .msg = "tm_sch_rq_fifo_rd_empty_err" },
-	{ .int_msk = BIT(24), .msg = "tm_sch_nq_fifo_wr_full_err" },
-	{ .int_msk = BIT(25), .msg = "tm_sch_nq_fifo_rd_empty_err" },
-	{ .int_msk = BIT(26), .msg = "tm_sch_roce_up_fifo_wr_full_err" },
-	{ .int_msk = BIT(27), .msg = "tm_sch_roce_up_fifo_rd_empty_err" },
-	{ .int_msk = BIT(28), .msg = "tm_sch_rcb_byte_fifo_wr_full_err" },
-	{ .int_msk = BIT(29), .msg = "tm_sch_rcb_byte_fifo_rd_empty_err" },
-	{ .int_msk = BIT(30), .msg = "tm_sch_ssu_byte_fifo_wr_full_err" },
-	{ .int_msk = BIT(31), .msg = "tm_sch_ssu_byte_fifo_rd_empty_err" },
+	  .msg = "tm_sch_pri_cshap_offset_fifo_rd_err" },
+	{ .int_msk = BIT(22), .msg = "tm_sch_rq_fifo_wr_err" },
+	{ .int_msk = BIT(23), .msg = "tm_sch_rq_fifo_rd_err" },
+	{ .int_msk = BIT(24), .msg = "tm_sch_nq_fifo_wr_err" },
+	{ .int_msk = BIT(25), .msg = "tm_sch_nq_fifo_rd_err" },
+	{ .int_msk = BIT(26), .msg = "tm_sch_roce_up_fifo_wr_err" },
+	{ .int_msk = BIT(27), .msg = "tm_sch_roce_up_fifo_rd_err" },
+	{ .int_msk = BIT(28), .msg = "tm_sch_rcb_byte_fifo_wr_err" },
+	{ .int_msk = BIT(29), .msg = "tm_sch_rcb_byte_fifo_rd_err" },
+	{ .int_msk = BIT(30), .msg = "tm_sch_ssu_byte_fifo_wr_err" },
+	{ .int_msk = BIT(31), .msg = "tm_sch_ssu_byte_fifo_rd_err" },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_qcn_fifo_rint[] = {
+	{ .int_msk = BIT(0), .msg = "qcn_shap_gp0_sch_fifo_rd_err" },
+	{ .int_msk = BIT(1), .msg = "qcn_shap_gp0_sch_fifo_wr_err" },
+	{ .int_msk = BIT(2), .msg = "qcn_shap_gp1_sch_fifo_rd_err" },
+	{ .int_msk = BIT(3), .msg = "qcn_shap_gp1_sch_fifo_wr_err" },
+	{ .int_msk = BIT(4), .msg = "qcn_shap_gp2_sch_fifo_rd_err" },
+	{ .int_msk = BIT(5), .msg = "qcn_shap_gp2_sch_fifo_wr_err" },
+	{ .int_msk = BIT(6), .msg = "qcn_shap_gp3_sch_fifo_rd_err" },
+	{ .int_msk = BIT(7), .msg = "qcn_shap_gp3_sch_fifo_wr_err" },
+	{ .int_msk = BIT(8), .msg = "qcn_shap_gp0_offset_fifo_rd_err" },
+	{ .int_msk = BIT(9), .msg = "qcn_shap_gp0_offser_fifo_wr_err" },
+	{ .int_msk = BIT(10), .msg = "qcn_shap_gp1_offset_fifo_rd_err" },
+	{ .int_msk = BIT(11), .msg = "qcn_shap_gp1_offset_fifo_wr_err" },
+	{ .int_msk = BIT(12), .msg = "qcn_shap_gp2_offset_fifo_rd_err" },
+	{ .int_msk = BIT(13), .msg = "qcn_shap_gp2_offset_fifo_wr_err" },
+	{ .int_msk = BIT(14), .msg = "qcn_shap_gp3_offset_fifo_rd_err" },
+	{ .int_msk = BIT(15), .msg = "qcn_shap_gp3_offset_fifo_wr_err" },
+	{ .int_msk = BIT(16), .msg = "qcn_byte_info_fifo_rd_err" },
+	{ .int_msk = BIT(17), .msg = "qcn_byte_info_fifo_wr_err" },
 	{ /* sentinel */ }
 };
 
-static const struct hclge_hw_error hclge_qcn_ecc_err_int[] = {
+static const struct hclge_hw_error hclge_qcn_ecc_rint[] = {
 	{ .int_msk = BIT(0), .msg = "qcn_byte_mem_ecc_1bit_err" },
 	{ .int_msk = BIT(1), .msg = "qcn_byte_mem_ecc_mbit_err" },
 	{ .int_msk = BIT(2), .msg = "qcn_time_mem_ecc_1bit_err" },
@@ -336,6 +247,18 @@ static const struct hclge_hw_error hclge_qcn_ecc_err_int[] = {
 	{ /* sentinel */ }
 };
 
+static void hclge_log_error(struct device *dev, char *reg,
+			    const struct hclge_hw_error *err,
+			    u32 err_sts)
+{
+	while (err->msg) {
+		if (err->int_msk & err_sts)
+			dev_warn(dev, "%s %s found [error status=0x%x]\n",
+				 reg, err->msg, err_sts);
+		err++;
+	}
+}
+
 /* hclge_cmd_query_error: read the error information
  * @hdev: pointer to struct hclge_dev
  * @desc: descriptor for describing the command
@@ -388,7 +311,8 @@ static int hclge_config_common_hw_err_int(struct hclge_dev *hdev, bool en)
 		desc[0].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN |
 					HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN);
 		desc[0].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN);
-		desc[0].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN);
+		desc[0].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN |
+					      HCLGE_MSIX_SRAM_ECC_ERR_INT_EN);
 		desc[0].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN);
 	}
 
@@ -396,7 +320,8 @@ static int hclge_config_common_hw_err_int(struct hclge_dev *hdev, bool en)
 	desc[1].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN_MASK |
 				HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN_MASK);
 	desc[1].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK);
-	desc[1].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN_MASK);
+	desc[1].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN_MASK |
+				      HCLGE_MSIX_SRAM_ECC_ERR_INT_EN_MASK);
 	desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN_MASK);
 
 	ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
@@ -564,6 +489,225 @@ static int hclge_config_tm_hw_err_int(struct hclge_dev *hdev, bool en)
 	return ret;
 }
 
+#define HCLGE_SET_DEFAULT_RESET_REQUEST(reset_type) \
+	do { \
+		if (ae_dev->ops->set_default_reset_request) \
+			ae_dev->ops->set_default_reset_request(ae_dev, \
+							       reset_type); \
+	} while (0)
+
+/* hclge_handle_mpf_ras_error: handle all main PF RAS errors
+ * @hdev: pointer to struct hclge_dev
+ * @desc: descriptor for describing the command
+ * @num:  number of extended command structures
+ *
+ * This function handles all the main PF RAS errors in the
+ * hw register/s using command.
+ */
+static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev,
+				      struct hclge_desc *desc,
+				      int num)
+{
+	struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
+	struct device *dev = &hdev->pdev->dev;
+	__le32 *desc_data;
+	u32 status;
+	int ret;
+
+	/* query all main PF RAS errors */
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_MPF_RAS_INT,
+				   true);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
+	if (ret) {
+		dev_err(dev, "query all mpf ras int cmd failed (%d)\n", ret);
+		return ret;
+	}
+
+	/* log HNS common errors */
+	status = le32_to_cpu(desc[0].data[0]);
+	if (status) {
+		hclge_log_error(dev, "IMP_TCM_ECC_INT_STS",
+				&hclge_imp_tcm_ecc_int[0], status);
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+	}
+
+	status = le32_to_cpu(desc[0].data[1]);
+	if (status) {
+		hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS",
+				&hclge_cmdq_nic_mem_ecc_int[0], status);
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+	}
+
+	if ((le32_to_cpu(desc[0].data[2])) & BIT(0)) {
+		dev_warn(dev, "imp_rd_data_poison_err found\n");
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+	}
+
+	status = le32_to_cpu(desc[0].data[3]);
+	if (status) {
+		hclge_log_error(dev, "TQP_INT_ECC_INT_STS",
+				&hclge_tqp_int_ecc_int[0], status);
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+	}
+
+	status = le32_to_cpu(desc[0].data[4]);
+	if (status) {
+		hclge_log_error(dev, "MSIX_ECC_INT_STS",
+				&hclge_msix_sram_ecc_int[0], status);
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+	}
+
+	/* log IGU(Ingress Unit) errors */
+	desc_data = (__le32 *)&desc[3];
+	status = le32_to_cpu(*desc_data) & HCLGE_IGU_INT_MASK;
+	if (status)
+		hclge_log_error(dev, "IGU_INT_STS",
+				&hclge_igu_int[0], status);
+
+	/* log PPP(Programmable Packet Process) errors */
+	desc_data = (__le32 *)&desc[4];
+	status = le32_to_cpu(*(desc_data + 1));
+	if (status)
+		hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1",
+				&hclge_ppp_mpf_abnormal_int_st1[0], status);
+
+	status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPP_MPF_INT_ST3_MASK;
+	if (status)
+		hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3",
+				&hclge_ppp_mpf_abnormal_int_st3[0], status);
+
+	/* log TM(Traffic Manager) errors */
+	desc_data = (__le32 *)&desc[6];
+	status = le32_to_cpu(*desc_data);
+	if (status) {
+		hclge_log_error(dev, "TM_SCH_RINT",
+				&hclge_tm_sch_rint[0], status);
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+	}
+
+	/* log QCN(Quantized Congestion Control) errors */
+	desc_data = (__le32 *)&desc[7];
+	status = le32_to_cpu(*desc_data) & HCLGE_QCN_FIFO_INT_MASK;
+	if (status) {
+		hclge_log_error(dev, "QCN_FIFO_RINT",
+				&hclge_qcn_fifo_rint[0], status);
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+	}
+
+	status = le32_to_cpu(*(desc_data + 1)) & HCLGE_QCN_ECC_INT_MASK;
+	if (status) {
+		hclge_log_error(dev, "QCN_ECC_RINT",
+				&hclge_qcn_ecc_rint[0], status);
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+	}
+
+	/* log NCSI errors */
+	desc_data = (__le32 *)&desc[9];
+	status = le32_to_cpu(*desc_data) & HCLGE_NCSI_ECC_INT_MASK;
+	if (status) {
+		hclge_log_error(dev, "NCSI_ECC_INT_RPT",
+				&hclge_ncsi_err_int[0], status);
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+	}
+
+	/* clear all main PF RAS errors */
+	hclge_cmd_reuse_desc(&desc[0], false);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
+	if (ret)
+		dev_err(dev, "clear all mpf ras int cmd failed (%d)\n", ret);
+
+	return ret;
+}
+
+/* hclge_handle_pf_ras_error: handle all PF RAS errors
+ * @hdev: pointer to struct hclge_dev
+ * @desc: descriptor for describing the command
+ * @num:  number of extended command structures
+ *
+ * This function handles all the PF RAS errors in the
+ * hw register/s using command.
+ */
+static int hclge_handle_pf_ras_error(struct hclge_dev *hdev,
+				     struct hclge_desc *desc,
+				     int num)
+{
+	struct device *dev = &hdev->pdev->dev;
+	__le32 *desc_data;
+	u32 status;
+	int ret;
+
+	/* query all PF RAS errors */
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_PF_RAS_INT,
+				   true);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
+	if (ret) {
+		dev_err(dev, "query all pf ras int cmd failed (%d)\n", ret);
+		return ret;
+	}
+
+	/* log IGU(Ingress Unit) EGU(Egress Unit) TNL errors */
+	desc_data = (__le32 *)&desc[1];
+	status = le32_to_cpu(*desc_data) & HCLGE_IGU_EGU_TNL_INT_MASK;
+	if (status)
+		hclge_log_error(dev, "IGU_EGU_TNL_INT_STS",
+				&hclge_igu_egu_tnl_int[0], status);
+
+	/* clear all PF RAS errors */
+	hclge_cmd_reuse_desc(&desc[0], false);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
+	if (ret)
+		dev_err(dev, "clear all pf ras int cmd failed (%d)\n", ret);
+
+	return ret;
+}
+
+static int hclge_handle_all_ras_errors(struct hclge_dev *hdev)
+{
+	struct device *dev = &hdev->pdev->dev;
+	u32 mpf_bd_num, pf_bd_num, bd_num;
+	struct hclge_desc desc_bd;
+	struct hclge_desc *desc;
+	int ret;
+
+	/* query the number of registers in the RAS int status */
+	hclge_cmd_setup_basic_desc(&desc_bd, HCLGE_QUERY_RAS_INT_STS_BD_NUM,
+				   true);
+	ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1);
+	if (ret) {
+		dev_err(dev, "fail(%d) to query ras int status bd num\n", ret);
+		return ret;
+	}
+	mpf_bd_num = le32_to_cpu(desc_bd.data[0]);
+	pf_bd_num = le32_to_cpu(desc_bd.data[1]);
+	bd_num = max_t(u32, mpf_bd_num, pf_bd_num);
+
+	desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL);
+	if (!desc)
+		return -ENOMEM;
+
+	/* handle all main PF RAS errors */
+	ret = hclge_handle_mpf_ras_error(hdev, desc, mpf_bd_num);
+	if (ret) {
+		kfree(desc);
+		return ret;
+	}
+	memset(desc, 0, bd_num * sizeof(struct hclge_desc));
+
+	/* handle all PF RAS errors */
+	ret = hclge_handle_pf_ras_error(hdev, desc, pf_bd_num);
+	kfree(desc);
+
+	return ret;
+}
+
 static const struct hclge_hw_blk hw_blk[] = {
 	{
 	  .msk = BIT(0), .name = "IGU_EGU",
@@ -605,13 +749,18 @@ pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev)
 {
 	struct hclge_dev *hdev = ae_dev->priv;
 	struct device *dev = &hdev->pdev->dev;
-	u32 sts;
+	u32 status;
 
-	sts = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG);
+	status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG);
 
-	/* Handling Non-fatal RAS errors */
-	if (sts & HCLGE_RAS_REG_NFE_MASK)
-		dev_warn(dev, "HNS Non-Fatal RAS error identified\n");
+	/* Handling Non-fatal HNS RAS errors */
+	if (status & HCLGE_RAS_REG_NFE_MASK) {
+		dev_warn(dev,
+			 "HNS Non-Fatal RAS error(status=0x%x) identified\n",
+			 status);
+		hclge_handle_all_ras_errors(hdev);
+		return PCI_ERS_RESULT_NEED_RESET;
+	}
 
-	return PCI_ERS_RESULT_NEED_RESET;
+	return PCI_ERS_RESULT_RECOVERED;
 }
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
index 9fe1c96..809e698 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
@@ -21,6 +21,8 @@
 #define HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK	0x0100
 #define HCLGE_TQP_ECC_ERR_INT_EN	0x0FFF
 #define HCLGE_TQP_ECC_ERR_INT_EN_MASK	0x0FFF
+#define HCLGE_MSIX_SRAM_ECC_ERR_INT_EN_MASK	0x0F000000
+#define HCLGE_MSIX_SRAM_ECC_ERR_INT_EN	0x0F000000
 #define HCLGE_IGU_ERR_INT_EN	0x0000066F
 #define HCLGE_IGU_ERR_INT_EN_MASK	0x000F
 #define HCLGE_IGU_TNL_ERR_INT_EN    0x0002AABF
@@ -40,6 +42,13 @@
 #define HCLGE_NCSI_ERR_INT_EN	0x3
 #define HCLGE_NCSI_ERR_INT_TYPE	0x9
 
+#define HCLGE_IGU_INT_MASK		GENMASK(3, 0)
+#define HCLGE_IGU_EGU_TNL_INT_MASK	GENMASK(5, 0)
+#define HCLGE_PPP_MPF_INT_ST3_MASK	GENMASK(5, 0)
+#define HCLGE_QCN_FIFO_INT_MASK		GENMASK(17, 0)
+#define HCLGE_QCN_ECC_INT_MASK		GENMASK(21, 0)
+#define HCLGE_NCSI_ECC_INT_MASK		GENMASK(1, 0)
+
 enum hclge_err_int_type {
 	HCLGE_ERR_INT_MSIX = 0,
 	HCLGE_ERR_INT_RAS_CE = 1,
-- 
2.7.4



  parent reply	other threads:[~2018-12-07 21:09 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-07 21:07 [PATCH net-next 00/14] net: hns3: Additions/optimizations related to HNS3 H/W err handling Salil Mehta
2018-12-07 21:07 ` [PATCH net-next 01/14] net: hns3: remove existing process error functions and reorder hw_blk table Salil Mehta
2018-12-07 21:07 ` [PATCH net-next 02/14] net: hns3: rename enable error interrupt functions Salil Mehta
2018-12-07 21:08 ` [PATCH net-next 03/14] net: hns3: re-enable error interrupts on hw reset Salil Mehta
2018-12-07 21:08 ` [PATCH net-next 04/14] net: hns3: deletes unnecessary settings of the descriptor data Salil Mehta
2018-12-07 21:08 ` [PATCH net-next 05/14] net: hns3: rename process_hw_error function Salil Mehta
2018-12-07 21:08 ` [PATCH net-next 06/14] net: hns3: add optimization in the hclge_hw_error_set_state Salil Mehta
2018-12-07 21:08 ` Salil Mehta [this message]
2018-12-07 21:08 ` [PATCH net-next 08/14] net: hns3: deleted logging 1 bit errors Salil Mehta
2018-12-07 21:08 ` [PATCH net-next 09/14] net: hns3: add handling of hw errors reported through MSIX Salil Mehta
2018-12-07 21:08 ` [PATCH net-next 10/14] net: hns3: add handling of hw errors of MAC Salil Mehta
2018-12-07 21:08 ` [PATCH net-next 11/14] net: hns3: handle hw errors of PPP PF Salil Mehta
2018-12-07 21:08 ` [PATCH net-next 12/14] net: hns3: handle hw errors of PPU(RCB) Salil Mehta
2018-12-07 21:08 ` [PATCH net-next 13/14] net: hns3: handle hw errors of SSU Salil Mehta
2018-12-07 21:08 ` [PATCH net-next 14/14] net: hns3: add handling of RDMA RAS errors Salil Mehta
2018-12-08  0:03 ` [PATCH net-next 00/14] net: hns3: Additions/optimizations related to HNS3 H/W err handling David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181207210811.23844-8-salil.mehta@huawei.com \
    --to=salil.mehta@huawei.com \
    --cc=davem@davemloft.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linuxarm@huawei.com \
    --cc=lipeng321@huawei.com \
    --cc=mehta.salil@opnsrc.net \
    --cc=netdev@vger.kernel.org \
    --cc=shiju.jose@huawei.com \
    --cc=yisen.zhuang@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).