linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH V2 net-next 0/2] add vf fault detect support for HNS3 ethernet driver
@ 2023-10-07  3:12 Jijie Shao
  2023-10-07  3:12 ` [PATCH V2 net-next 1/2] net: hns3: add hns3 vf fault detect cap bit support Jijie Shao
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Jijie Shao @ 2023-10-07  3:12 UTC (permalink / raw)
  To: yisen.zhuang, salil.mehta, davem, edumazet, kuba, pabeni
  Cc: shenjian15, wangjie125, liuyonglong, shaojijie, netdev, linux-kernel

add vf fault detect support for HNS3 ethernet driver

Jie Wang (2):
  net: hns3: add hns3 vf fault detect cap bit support
  net: hns3: add vf fault detect support

 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |   5 +
 .../hns3/hns3_common/hclge_comm_cmd.c         |   1 +
 .../hns3/hns3_common/hclge_comm_cmd.h         |   2 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    |   3 +
 .../hisilicon/hns3/hns3pf/hclge_err.c         | 116 +++++++++++++++++-
 .../hisilicon/hns3/hns3pf/hclge_err.h         |   2 +
 .../hisilicon/hns3/hns3pf/hclge_main.c        |   3 +-
 .../hisilicon/hns3/hns3pf/hclge_main.h        |   2 +
 .../hisilicon/hns3/hns3pf/hclge_mbx.c         |   2 +-
 9 files changed, 129 insertions(+), 7 deletions(-)

-- 
2.30.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH V2 net-next 1/2] net: hns3: add hns3 vf fault detect cap bit support
  2023-10-07  3:12 [PATCH V2 net-next 0/2] add vf fault detect support for HNS3 ethernet driver Jijie Shao
@ 2023-10-07  3:12 ` Jijie Shao
  2023-10-07  3:12 ` [PATCH V2 net-next 2/2] net: hns3: add vf fault detect support Jijie Shao
  2023-10-11 20:40 ` [PATCH V2 net-next 0/2] add vf fault detect support for HNS3 ethernet driver patchwork-bot+netdevbpf
  2 siblings, 0 replies; 5+ messages in thread
From: Jijie Shao @ 2023-10-07  3:12 UTC (permalink / raw)
  To: yisen.zhuang, salil.mehta, davem, edumazet, kuba, pabeni
  Cc: shenjian15, wangjie125, liuyonglong, shaojijie, netdev, linux-kernel

From: Jie Wang <wangjie125@huawei.com>

Currently hns3 driver is designed to support VF fault detect feature in
new hardwares. For code compatibility, vf fault detect cap bit is added to
the driver.

Signed-off-by: Jie Wang <wangjie125@huawei.com>
Signed-off-by: Jijie Shao <shaojijie@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h                   | 4 ++++
 .../net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c  | 1 +
 .../net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h  | 1 +
 drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c            | 3 +++
 4 files changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index aaf1f42624a7..46062106fc6a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -103,6 +103,7 @@ enum HNAE3_DEV_CAP_BITS {
 	HNAE3_DEV_SUPPORT_LANE_NUM_B,
 	HNAE3_DEV_SUPPORT_WOL_B,
 	HNAE3_DEV_SUPPORT_TM_FLUSH_B,
+	HNAE3_DEV_SUPPORT_VF_FAULT_B,
 };
 
 #define hnae3_ae_dev_fd_supported(ae_dev) \
@@ -177,6 +178,9 @@ enum HNAE3_DEV_CAP_BITS {
 #define hnae3_ae_dev_tm_flush_supported(hdev) \
 	test_bit(HNAE3_DEV_SUPPORT_TM_FLUSH_B, (hdev)->ae_dev->caps)
 
+#define hnae3_ae_dev_vf_fault_supported(ae_dev) \
+	test_bit(HNAE3_DEV_SUPPORT_VF_FAULT_B, (ae_dev)->caps)
+
 enum HNAE3_PF_CAP_BITS {
 	HNAE3_PF_SUPPORT_VLAN_FLTR_MDF_B = 0,
 };
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c
index dcecb23daac6..d92ad6082d8e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c
@@ -157,6 +157,7 @@ static const struct hclge_comm_caps_bit_map hclge_pf_cmd_caps[] = {
 	{HCLGE_COMM_CAP_LANE_NUM_B, HNAE3_DEV_SUPPORT_LANE_NUM_B},
 	{HCLGE_COMM_CAP_WOL_B, HNAE3_DEV_SUPPORT_WOL_B},
 	{HCLGE_COMM_CAP_TM_FLUSH_B, HNAE3_DEV_SUPPORT_TM_FLUSH_B},
+	{HCLGE_COMM_CAP_VF_FAULT_B, HNAE3_DEV_SUPPORT_VF_FAULT_B},
 };
 
 static const struct hclge_comm_caps_bit_map hclge_vf_cmd_caps[] = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h
index 2b7197ce0ae8..92e73d44f0e5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h
@@ -348,6 +348,7 @@ enum HCLGE_COMM_CAP_BITS {
 	HCLGE_COMM_CAP_GRO_B = 20,
 	HCLGE_COMM_CAP_FD_B = 21,
 	HCLGE_COMM_CAP_FEC_STATS_B = 25,
+	HCLGE_COMM_CAP_VF_FAULT_B = 26,
 	HCLGE_COMM_CAP_LANE_NUM_B = 27,
 	HCLGE_COMM_CAP_WOL_B = 28,
 	HCLGE_COMM_CAP_TM_FLUSH_B = 31,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index b8508533878b..0b138635bafa 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -414,6 +414,9 @@ static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
 	}, {
 		.name = "support tm flush",
 		.cap_bit = HNAE3_DEV_SUPPORT_TM_FLUSH_B,
+	}, {
+		.name = "support vf fault detect",
+		.cap_bit = HNAE3_DEV_SUPPORT_VF_FAULT_B,
 	}
 };
 
-- 
2.30.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH V2 net-next 2/2] net: hns3: add vf fault detect support
  2023-10-07  3:12 [PATCH V2 net-next 0/2] add vf fault detect support for HNS3 ethernet driver Jijie Shao
  2023-10-07  3:12 ` [PATCH V2 net-next 1/2] net: hns3: add hns3 vf fault detect cap bit support Jijie Shao
@ 2023-10-07  3:12 ` Jijie Shao
  2023-10-09 13:42   ` Simon Horman
  2023-10-11 20:40 ` [PATCH V2 net-next 0/2] add vf fault detect support for HNS3 ethernet driver patchwork-bot+netdevbpf
  2 siblings, 1 reply; 5+ messages in thread
From: Jijie Shao @ 2023-10-07  3:12 UTC (permalink / raw)
  To: yisen.zhuang, salil.mehta, davem, edumazet, kuba, pabeni
  Cc: shenjian15, wangjie125, liuyonglong, shaojijie, netdev, linux-kernel

From: Jie Wang <wangjie125@huawei.com>

Currently hns3 driver supports vf fault detect feature. Several ras caused
by VF resources don't need to do PF function reset for recovery. The driver
only needs to reset the specified VF.

So this patch adds process in ras module. New process will get detailed
information about ras and do the most correct measures based on these
accurate information.

Signed-off-by: Jie Wang <wangjie125@huawei.com>
Signed-off-by: Jijie Shao <shaojijie@huawei.com>
---
changeLog:
v1 -> v2:
  - fix the wrong use of vf recovery notify interface
  - add BUILD_BUG_ON to gurantee macros
  - optimise hclge_handle_vf_queue_err_ras for unsupported firmware
  v1: https://lore.kernel.org/netdev/20230113020829.48451-1-lanhao@huawei.com/ 
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |   1 +
 .../hns3/hns3_common/hclge_comm_cmd.h         |   1 +
 .../hisilicon/hns3/hns3pf/hclge_err.c         | 116 +++++++++++++++++-
 .../hisilicon/hns3/hns3pf/hclge_err.h         |   2 +
 .../hisilicon/hns3/hns3pf/hclge_main.c        |   3 +-
 .../hisilicon/hns3/hns3pf/hclge_main.h        |   2 +
 .../hisilicon/hns3/hns3pf/hclge_mbx.c         |   2 +-
 7 files changed, 120 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 46062106fc6a..d7e175a9cb49 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -275,6 +275,7 @@ enum hnae3_reset_type {
 	HNAE3_GLOBAL_RESET,
 	HNAE3_IMP_RESET,
 	HNAE3_NONE_RESET,
+	HNAE3_VF_EXP_RESET,
 	HNAE3_MAX_RESET,
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h
index 92e73d44f0e5..533c19d25e4f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h
@@ -93,6 +93,7 @@ enum hclge_opcode_type {
 	HCLGE_OPC_DFX_SSU_REG_2		= 0x004F,
 
 	HCLGE_OPC_QUERY_DEV_SPECS	= 0x0050,
+	HCLGE_OPC_GET_QUEUE_ERR_VF      = 0x0067,
 
 	/* MAC command */
 	HCLGE_OPC_CONFIG_MAC_MODE	= 0x0301,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index 3f35227ef1fa..d63e114f93d0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -1301,10 +1301,12 @@ static const struct hclge_hw_type_id hclge_hw_type_id_st[] = {
 		.msg = "tqp_int_ecc_error"
 	}, {
 		.type_id = PF_ABNORMAL_INT_ERROR,
-		.msg = "pf_abnormal_int_error"
+		.msg = "pf_abnormal_int_error",
+		.cause_by_vf = true
 	}, {
 		.type_id = MPF_ABNORMAL_INT_ERROR,
-		.msg = "mpf_abnormal_int_error"
+		.msg = "mpf_abnormal_int_error",
+		.cause_by_vf = true
 	}, {
 		.type_id = COMMON_ERROR,
 		.msg = "common_error"
@@ -2759,7 +2761,7 @@ void hclge_handle_occurred_error(struct hclge_dev *hdev)
 		hclge_handle_error_info_log(ae_dev);
 }
 
-static void
+static bool
 hclge_handle_error_type_reg_log(struct device *dev,
 				struct hclge_mod_err_info *mod_info,
 				struct hclge_type_reg_err_info *type_reg_info)
@@ -2770,6 +2772,7 @@ hclge_handle_error_type_reg_log(struct device *dev,
 	u8 mod_id, total_module, type_id, total_type, i, is_ras;
 	u8 index_module = MODULE_NONE;
 	u8 index_type = NONE_ERROR;
+	bool cause_by_vf = false;
 
 	mod_id = mod_info->mod_id;
 	type_id = type_reg_info->type_id & HCLGE_ERR_TYPE_MASK;
@@ -2788,6 +2791,7 @@ hclge_handle_error_type_reg_log(struct device *dev,
 	for (i = 0; i < total_type; i++) {
 		if (type_id == hclge_hw_type_id_st[i].type_id) {
 			index_type = i;
+			cause_by_vf = hclge_hw_type_id_st[i].cause_by_vf;
 			break;
 		}
 	}
@@ -2805,6 +2809,8 @@ hclge_handle_error_type_reg_log(struct device *dev,
 	dev_err(dev, "reg_value:\n");
 	for (i = 0; i < type_reg_info->reg_num; i++)
 		dev_err(dev, "0x%08x\n", type_reg_info->hclge_reg[i]);
+
+	return cause_by_vf;
 }
 
 static void hclge_handle_error_module_log(struct hnae3_ae_dev *ae_dev,
@@ -2815,6 +2821,7 @@ static void hclge_handle_error_module_log(struct hnae3_ae_dev *ae_dev,
 	struct device *dev = &hdev->pdev->dev;
 	struct hclge_mod_err_info *mod_info;
 	struct hclge_sum_err_info *sum_info;
+	bool cause_by_vf = false;
 	u8 mod_num, err_num, i;
 	u32 offset = 0;
 
@@ -2843,12 +2850,16 @@ static void hclge_handle_error_module_log(struct hnae3_ae_dev *ae_dev,
 
 			type_reg_info = (struct hclge_type_reg_err_info *)
 					    &buf[offset++];
-			hclge_handle_error_type_reg_log(dev, mod_info,
-							type_reg_info);
+			if (hclge_handle_error_type_reg_log(dev, mod_info,
+							    type_reg_info))
+				cause_by_vf = true;
 
 			offset += type_reg_info->reg_num;
 		}
 	}
+
+	if (hnae3_ae_dev_vf_fault_supported(hdev->ae_dev) && cause_by_vf)
+		set_bit(HNAE3_VF_EXP_RESET, &ae_dev->hw_err_reset_req);
 }
 
 static int hclge_query_all_err_bd_num(struct hclge_dev *hdev, u32 *bd_num)
@@ -2940,3 +2951,98 @@ int hclge_handle_error_info_log(struct hnae3_ae_dev *ae_dev)
 out:
 	return ret;
 }
+
+static bool hclge_reset_vf_in_bitmap(struct hclge_dev *hdev,
+				     unsigned long *bitmap)
+{
+	struct hclge_vport *vport;
+	bool exist_set = false;
+	int func_id;
+	int ret;
+
+	func_id = find_first_bit(bitmap, HCLGE_VPORT_NUM);
+	if (func_id == PF_VPORT_ID)
+		return false;
+
+	while (func_id != HCLGE_VPORT_NUM) {
+		vport = hclge_get_vf_vport(hdev,
+					   func_id - HCLGE_VF_VPORT_START_NUM);
+		if (!vport) {
+			dev_err(&hdev->pdev->dev, "invalid func id(%d)\n",
+				func_id);
+			return false;
+		}
+
+		dev_info(&hdev->pdev->dev, "do function %d recovery.", func_id);
+
+		ret = hclge_reset_tqp(&vport->nic);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"failed to reset tqp, ret = %d.", ret);
+			return false;
+		}
+
+		ret = hclge_inform_vf_reset(vport, HNAE3_VF_FUNC_RESET);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"failed to reset func %d, ret = %d.",
+				func_id, ret);
+			return false;
+		}
+
+		exist_set = true;
+		clear_bit(func_id, bitmap);
+		func_id = find_first_bit(bitmap, HCLGE_VPORT_NUM);
+	}
+
+	return exist_set;
+}
+
+static void hclge_get_vf_fault_bitmap(struct hclge_desc *desc,
+				      unsigned long *bitmap)
+{
+#define HCLGE_FIR_FAULT_BYTES	24
+#define HCLGE_SEC_FAULT_BYTES	8
+
+	u8 *buff;
+
+	BUILD_BUG_ON(HCLGE_FIR_FAULT_BYTES + HCLGE_SEC_FAULT_BYTES !=
+		     BITS_TO_BYTES(HCLGE_VPORT_NUM));
+
+	memcpy(bitmap, desc[0].data, HCLGE_FIR_FAULT_BYTES);
+	buff = (u8 *)bitmap + HCLGE_FIR_FAULT_BYTES;
+	memcpy(buff, desc[1].data, HCLGE_SEC_FAULT_BYTES);
+}
+
+int hclge_handle_vf_queue_err_ras(struct hclge_dev *hdev)
+{
+	unsigned long vf_fault_bitmap[BITS_TO_LONGS(HCLGE_VPORT_NUM)];
+	struct hclge_desc desc[2];
+	bool cause_by_vf = false;
+	int ret;
+
+	if (!test_and_clear_bit(HNAE3_VF_EXP_RESET,
+				&hdev->ae_dev->hw_err_reset_req) ||
+	    !hnae3_ae_dev_vf_fault_supported(hdev->ae_dev))
+		return 0;
+
+	hclge_comm_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_GET_QUEUE_ERR_VF,
+					true);
+	desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT);
+	hclge_comm_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_GET_QUEUE_ERR_VF,
+					true);
+
+	ret = hclge_comm_cmd_send(&hdev->hw.hw, desc, 2);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to get vf bitmap, ret = %d.\n", ret);
+		return ret;
+	}
+	hclge_get_vf_fault_bitmap(desc, vf_fault_bitmap);
+
+	cause_by_vf = hclge_reset_vf_in_bitmap(hdev, vf_fault_bitmap);
+	if (cause_by_vf)
+		hdev->ae_dev->hw_err_reset_req = 0;
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
index 86be6fb32990..68b738affa66 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
@@ -196,6 +196,7 @@ struct hclge_hw_module_id {
 struct hclge_hw_type_id {
 	enum hclge_err_type_list type_id;
 	const char *msg;
+	bool cause_by_vf; /* indicate the error may from vf exception */
 };
 
 struct hclge_sum_err_info {
@@ -228,4 +229,5 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
 			       unsigned long *reset_requests);
 int hclge_handle_error_info_log(struct hnae3_ae_dev *ae_dev);
 int hclge_handle_mac_tnl(struct hclge_dev *hdev);
+int hclge_handle_vf_queue_err_ras(struct hclge_dev *hdev);
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index c42574e29747..99c0576e6383 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -3424,7 +3424,7 @@ static int hclge_get_status(struct hnae3_handle *handle)
 	return hdev->hw.mac.link;
 }
 
-static struct hclge_vport *hclge_get_vf_vport(struct hclge_dev *hdev, int vf)
+struct hclge_vport *hclge_get_vf_vport(struct hclge_dev *hdev, int vf)
 {
 	if (!pci_num_vf(hdev->pdev)) {
 		dev_err(&hdev->pdev->dev,
@@ -4468,6 +4468,7 @@ static void hclge_handle_err_recovery(struct hclge_dev *hdev)
 	if (hclge_find_error_source(hdev)) {
 		hclge_handle_error_info_log(ae_dev);
 		hclge_handle_mac_tnl(hdev);
+		hclge_handle_vf_queue_err_ras(hdev);
 	}
 
 	hclge_handle_err_reset_request(hdev);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 7bc2049b723d..02c7aab3546e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -1146,4 +1146,6 @@ int hclge_dbg_dump_rst_info(struct hclge_dev *hdev, char *buf, int len);
 int hclge_push_vf_link_status(struct hclge_vport *vport);
 int hclge_enable_vport_vlan_filter(struct hclge_vport *vport, bool request_en);
 int hclge_mac_update_stats(struct hclge_dev *hdev);
+struct hclge_vport *hclge_get_vf_vport(struct hclge_dev *hdev, int vf);
+int hclge_inform_vf_reset(struct hclge_vport *vport, u16 reset_type);
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 04ff9bf12185..4b0d07ca2505 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -124,7 +124,7 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len,
 	return status;
 }
 
-static int hclge_inform_vf_reset(struct hclge_vport *vport, u16 reset_type)
+int hclge_inform_vf_reset(struct hclge_vport *vport, u16 reset_type)
 {
 	__le16 msg_data;
 	u8 dest_vfid;
-- 
2.30.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH V2 net-next 2/2] net: hns3: add vf fault detect support
  2023-10-07  3:12 ` [PATCH V2 net-next 2/2] net: hns3: add vf fault detect support Jijie Shao
@ 2023-10-09 13:42   ` Simon Horman
  0 siblings, 0 replies; 5+ messages in thread
From: Simon Horman @ 2023-10-09 13:42 UTC (permalink / raw)
  To: Jijie Shao
  Cc: yisen.zhuang, salil.mehta, davem, edumazet, kuba, pabeni,
	shenjian15, wangjie125, liuyonglong, netdev, linux-kernel,
	Leon Romanovsky

+ Leon

On Sat, Oct 07, 2023 at 11:12:15AM +0800, Jijie Shao wrote:
> From: Jie Wang <wangjie125@huawei.com>
> 
> Currently hns3 driver supports vf fault detect feature. Several ras caused
> by VF resources don't need to do PF function reset for recovery. The driver
> only needs to reset the specified VF.
> 
> So this patch adds process in ras module. New process will get detailed
> information about ras and do the most correct measures based on these
> accurate information.
> 
> Signed-off-by: Jie Wang <wangjie125@huawei.com>
> Signed-off-by: Jijie Shao <shaojijie@huawei.com>
> ---
> changeLog:
> v1 -> v2:
>   - fix the wrong use of vf recovery notify interface
>   - add BUILD_BUG_ON to gurantee macros
>   - optimise hclge_handle_vf_queue_err_ras for unsupported firmware
>   v1: https://lore.kernel.org/netdev/20230113020829.48451-1-lanhao@huawei.com/ 

Hi Leon,

I believe you reviewed v1 of this back in January and February.
Could you find some time to look at v2?

> ---
>  drivers/net/ethernet/hisilicon/hns3/hnae3.h   |   1 +
>  .../hns3/hns3_common/hclge_comm_cmd.h         |   1 +
>  .../hisilicon/hns3/hns3pf/hclge_err.c         | 116 +++++++++++++++++-
>  .../hisilicon/hns3/hns3pf/hclge_err.h         |   2 +
>  .../hisilicon/hns3/hns3pf/hclge_main.c        |   3 +-
>  .../hisilicon/hns3/hns3pf/hclge_main.h        |   2 +
>  .../hisilicon/hns3/hns3pf/hclge_mbx.c         |   2 +-
>  7 files changed, 120 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
> index 46062106fc6a..d7e175a9cb49 100644
> --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
> +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
> @@ -275,6 +275,7 @@ enum hnae3_reset_type {
>  	HNAE3_GLOBAL_RESET,
>  	HNAE3_IMP_RESET,
>  	HNAE3_NONE_RESET,
> +	HNAE3_VF_EXP_RESET,
>  	HNAE3_MAX_RESET,
>  };
>  
> diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h
> index 92e73d44f0e5..533c19d25e4f 100644
> --- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h
> +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h
> @@ -93,6 +93,7 @@ enum hclge_opcode_type {
>  	HCLGE_OPC_DFX_SSU_REG_2		= 0x004F,
>  
>  	HCLGE_OPC_QUERY_DEV_SPECS	= 0x0050,
> +	HCLGE_OPC_GET_QUEUE_ERR_VF      = 0x0067,
>  
>  	/* MAC command */
>  	HCLGE_OPC_CONFIG_MAC_MODE	= 0x0301,
> diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
> index 3f35227ef1fa..d63e114f93d0 100644
> --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
> +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
> @@ -1301,10 +1301,12 @@ static const struct hclge_hw_type_id hclge_hw_type_id_st[] = {
>  		.msg = "tqp_int_ecc_error"
>  	}, {
>  		.type_id = PF_ABNORMAL_INT_ERROR,
> -		.msg = "pf_abnormal_int_error"
> +		.msg = "pf_abnormal_int_error",
> +		.cause_by_vf = true
>  	}, {
>  		.type_id = MPF_ABNORMAL_INT_ERROR,
> -		.msg = "mpf_abnormal_int_error"
> +		.msg = "mpf_abnormal_int_error",
> +		.cause_by_vf = true
>  	}, {
>  		.type_id = COMMON_ERROR,
>  		.msg = "common_error"
> @@ -2759,7 +2761,7 @@ void hclge_handle_occurred_error(struct hclge_dev *hdev)
>  		hclge_handle_error_info_log(ae_dev);
>  }
>  
> -static void
> +static bool
>  hclge_handle_error_type_reg_log(struct device *dev,
>  				struct hclge_mod_err_info *mod_info,
>  				struct hclge_type_reg_err_info *type_reg_info)
> @@ -2770,6 +2772,7 @@ hclge_handle_error_type_reg_log(struct device *dev,
>  	u8 mod_id, total_module, type_id, total_type, i, is_ras;
>  	u8 index_module = MODULE_NONE;
>  	u8 index_type = NONE_ERROR;
> +	bool cause_by_vf = false;
>  
>  	mod_id = mod_info->mod_id;
>  	type_id = type_reg_info->type_id & HCLGE_ERR_TYPE_MASK;
> @@ -2788,6 +2791,7 @@ hclge_handle_error_type_reg_log(struct device *dev,
>  	for (i = 0; i < total_type; i++) {
>  		if (type_id == hclge_hw_type_id_st[i].type_id) {
>  			index_type = i;
> +			cause_by_vf = hclge_hw_type_id_st[i].cause_by_vf;
>  			break;
>  		}
>  	}
> @@ -2805,6 +2809,8 @@ hclge_handle_error_type_reg_log(struct device *dev,
>  	dev_err(dev, "reg_value:\n");
>  	for (i = 0; i < type_reg_info->reg_num; i++)
>  		dev_err(dev, "0x%08x\n", type_reg_info->hclge_reg[i]);
> +
> +	return cause_by_vf;
>  }
>  
>  static void hclge_handle_error_module_log(struct hnae3_ae_dev *ae_dev,
> @@ -2815,6 +2821,7 @@ static void hclge_handle_error_module_log(struct hnae3_ae_dev *ae_dev,
>  	struct device *dev = &hdev->pdev->dev;
>  	struct hclge_mod_err_info *mod_info;
>  	struct hclge_sum_err_info *sum_info;
> +	bool cause_by_vf = false;
>  	u8 mod_num, err_num, i;
>  	u32 offset = 0;
>  
> @@ -2843,12 +2850,16 @@ static void hclge_handle_error_module_log(struct hnae3_ae_dev *ae_dev,
>  
>  			type_reg_info = (struct hclge_type_reg_err_info *)
>  					    &buf[offset++];
> -			hclge_handle_error_type_reg_log(dev, mod_info,
> -							type_reg_info);
> +			if (hclge_handle_error_type_reg_log(dev, mod_info,
> +							    type_reg_info))
> +				cause_by_vf = true;
>  
>  			offset += type_reg_info->reg_num;
>  		}
>  	}
> +
> +	if (hnae3_ae_dev_vf_fault_supported(hdev->ae_dev) && cause_by_vf)
> +		set_bit(HNAE3_VF_EXP_RESET, &ae_dev->hw_err_reset_req);
>  }
>  
>  static int hclge_query_all_err_bd_num(struct hclge_dev *hdev, u32 *bd_num)
> @@ -2940,3 +2951,98 @@ int hclge_handle_error_info_log(struct hnae3_ae_dev *ae_dev)
>  out:
>  	return ret;
>  }
> +
> +static bool hclge_reset_vf_in_bitmap(struct hclge_dev *hdev,
> +				     unsigned long *bitmap)
> +{
> +	struct hclge_vport *vport;
> +	bool exist_set = false;
> +	int func_id;
> +	int ret;
> +
> +	func_id = find_first_bit(bitmap, HCLGE_VPORT_NUM);
> +	if (func_id == PF_VPORT_ID)
> +		return false;
> +
> +	while (func_id != HCLGE_VPORT_NUM) {
> +		vport = hclge_get_vf_vport(hdev,
> +					   func_id - HCLGE_VF_VPORT_START_NUM);
> +		if (!vport) {
> +			dev_err(&hdev->pdev->dev, "invalid func id(%d)\n",
> +				func_id);
> +			return false;
> +		}
> +
> +		dev_info(&hdev->pdev->dev, "do function %d recovery.", func_id);
> +
> +		ret = hclge_reset_tqp(&vport->nic);
> +		if (ret) {
> +			dev_err(&hdev->pdev->dev,
> +				"failed to reset tqp, ret = %d.", ret);
> +			return false;
> +		}
> +
> +		ret = hclge_inform_vf_reset(vport, HNAE3_VF_FUNC_RESET);
> +		if (ret) {
> +			dev_err(&hdev->pdev->dev,
> +				"failed to reset func %d, ret = %d.",
> +				func_id, ret);
> +			return false;
> +		}
> +
> +		exist_set = true;
> +		clear_bit(func_id, bitmap);
> +		func_id = find_first_bit(bitmap, HCLGE_VPORT_NUM);
> +	}
> +
> +	return exist_set;
> +}
> +
> +static void hclge_get_vf_fault_bitmap(struct hclge_desc *desc,
> +				      unsigned long *bitmap)
> +{
> +#define HCLGE_FIR_FAULT_BYTES	24
> +#define HCLGE_SEC_FAULT_BYTES	8
> +
> +	u8 *buff;
> +
> +	BUILD_BUG_ON(HCLGE_FIR_FAULT_BYTES + HCLGE_SEC_FAULT_BYTES !=
> +		     BITS_TO_BYTES(HCLGE_VPORT_NUM));
> +
> +	memcpy(bitmap, desc[0].data, HCLGE_FIR_FAULT_BYTES);
> +	buff = (u8 *)bitmap + HCLGE_FIR_FAULT_BYTES;
> +	memcpy(buff, desc[1].data, HCLGE_SEC_FAULT_BYTES);
> +}
> +
> +int hclge_handle_vf_queue_err_ras(struct hclge_dev *hdev)
> +{
> +	unsigned long vf_fault_bitmap[BITS_TO_LONGS(HCLGE_VPORT_NUM)];
> +	struct hclge_desc desc[2];
> +	bool cause_by_vf = false;
> +	int ret;
> +
> +	if (!test_and_clear_bit(HNAE3_VF_EXP_RESET,
> +				&hdev->ae_dev->hw_err_reset_req) ||
> +	    !hnae3_ae_dev_vf_fault_supported(hdev->ae_dev))
> +		return 0;
> +
> +	hclge_comm_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_GET_QUEUE_ERR_VF,
> +					true);
> +	desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT);
> +	hclge_comm_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_GET_QUEUE_ERR_VF,
> +					true);
> +
> +	ret = hclge_comm_cmd_send(&hdev->hw.hw, desc, 2);
> +	if (ret) {
> +		dev_err(&hdev->pdev->dev,
> +			"failed to get vf bitmap, ret = %d.\n", ret);
> +		return ret;
> +	}
> +	hclge_get_vf_fault_bitmap(desc, vf_fault_bitmap);
> +
> +	cause_by_vf = hclge_reset_vf_in_bitmap(hdev, vf_fault_bitmap);
> +	if (cause_by_vf)
> +		hdev->ae_dev->hw_err_reset_req = 0;
> +
> +	return 0;
> +}
> diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
> index 86be6fb32990..68b738affa66 100644
> --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
> +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
> @@ -196,6 +196,7 @@ struct hclge_hw_module_id {
>  struct hclge_hw_type_id {
>  	enum hclge_err_type_list type_id;
>  	const char *msg;
> +	bool cause_by_vf; /* indicate the error may from vf exception */
>  };
>  
>  struct hclge_sum_err_info {
> @@ -228,4 +229,5 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
>  			       unsigned long *reset_requests);
>  int hclge_handle_error_info_log(struct hnae3_ae_dev *ae_dev);
>  int hclge_handle_mac_tnl(struct hclge_dev *hdev);
> +int hclge_handle_vf_queue_err_ras(struct hclge_dev *hdev);
>  #endif
> diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
> index c42574e29747..99c0576e6383 100644
> --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
> +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
> @@ -3424,7 +3424,7 @@ static int hclge_get_status(struct hnae3_handle *handle)
>  	return hdev->hw.mac.link;
>  }
>  
> -static struct hclge_vport *hclge_get_vf_vport(struct hclge_dev *hdev, int vf)
> +struct hclge_vport *hclge_get_vf_vport(struct hclge_dev *hdev, int vf)
>  {
>  	if (!pci_num_vf(hdev->pdev)) {
>  		dev_err(&hdev->pdev->dev,
> @@ -4468,6 +4468,7 @@ static void hclge_handle_err_recovery(struct hclge_dev *hdev)
>  	if (hclge_find_error_source(hdev)) {
>  		hclge_handle_error_info_log(ae_dev);
>  		hclge_handle_mac_tnl(hdev);
> +		hclge_handle_vf_queue_err_ras(hdev);
>  	}
>  
>  	hclge_handle_err_reset_request(hdev);
> diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
> index 7bc2049b723d..02c7aab3546e 100644
> --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
> +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
> @@ -1146,4 +1146,6 @@ int hclge_dbg_dump_rst_info(struct hclge_dev *hdev, char *buf, int len);
>  int hclge_push_vf_link_status(struct hclge_vport *vport);
>  int hclge_enable_vport_vlan_filter(struct hclge_vport *vport, bool request_en);
>  int hclge_mac_update_stats(struct hclge_dev *hdev);
> +struct hclge_vport *hclge_get_vf_vport(struct hclge_dev *hdev, int vf);
> +int hclge_inform_vf_reset(struct hclge_vport *vport, u16 reset_type);
>  #endif
> diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
> index 04ff9bf12185..4b0d07ca2505 100644
> --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
> +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
> @@ -124,7 +124,7 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len,
>  	return status;
>  }
>  
> -static int hclge_inform_vf_reset(struct hclge_vport *vport, u16 reset_type)
> +int hclge_inform_vf_reset(struct hclge_vport *vport, u16 reset_type)
>  {
>  	__le16 msg_data;
>  	u8 dest_vfid;
> -- 
> 2.30.0
> 
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH V2 net-next 0/2] add vf fault detect support for HNS3 ethernet driver
  2023-10-07  3:12 [PATCH V2 net-next 0/2] add vf fault detect support for HNS3 ethernet driver Jijie Shao
  2023-10-07  3:12 ` [PATCH V2 net-next 1/2] net: hns3: add hns3 vf fault detect cap bit support Jijie Shao
  2023-10-07  3:12 ` [PATCH V2 net-next 2/2] net: hns3: add vf fault detect support Jijie Shao
@ 2023-10-11 20:40 ` patchwork-bot+netdevbpf
  2 siblings, 0 replies; 5+ messages in thread
From: patchwork-bot+netdevbpf @ 2023-10-11 20:40 UTC (permalink / raw)
  To: Jijie Shao
  Cc: yisen.zhuang, salil.mehta, davem, edumazet, kuba, pabeni,
	shenjian15, wangjie125, liuyonglong, netdev, linux-kernel

Hello:

This series was applied to netdev/net-next.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Sat, 7 Oct 2023 11:12:13 +0800 you wrote:
> add vf fault detect support for HNS3 ethernet driver
> 
> Jie Wang (2):
>   net: hns3: add hns3 vf fault detect cap bit support
>   net: hns3: add vf fault detect support
> 
>  drivers/net/ethernet/hisilicon/hns3/hnae3.h   |   5 +
>  .../hns3/hns3_common/hclge_comm_cmd.c         |   1 +
>  .../hns3/hns3_common/hclge_comm_cmd.h         |   2 +
>  .../ethernet/hisilicon/hns3/hns3_debugfs.c    |   3 +
>  .../hisilicon/hns3/hns3pf/hclge_err.c         | 116 +++++++++++++++++-
>  .../hisilicon/hns3/hns3pf/hclge_err.h         |   2 +
>  .../hisilicon/hns3/hns3pf/hclge_main.c        |   3 +-
>  .../hisilicon/hns3/hns3pf/hclge_main.h        |   2 +
>  .../hisilicon/hns3/hns3pf/hclge_mbx.c         |   2 +-
>  9 files changed, 129 insertions(+), 7 deletions(-)

Here is the summary with links:
  - [V2,net-next,1/2] net: hns3: add hns3 vf fault detect cap bit support
    https://git.kernel.org/netdev/net-next/c/f1bc63aa6e11
  - [V2,net-next,2/2] net: hns3: add vf fault detect support
    https://git.kernel.org/netdev/net-next/c/8a45c4f9e159

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-10-11 20:40 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-07  3:12 [PATCH V2 net-next 0/2] add vf fault detect support for HNS3 ethernet driver Jijie Shao
2023-10-07  3:12 ` [PATCH V2 net-next 1/2] net: hns3: add hns3 vf fault detect cap bit support Jijie Shao
2023-10-07  3:12 ` [PATCH V2 net-next 2/2] net: hns3: add vf fault detect support Jijie Shao
2023-10-09 13:42   ` Simon Horman
2023-10-11 20:40 ` [PATCH V2 net-next 0/2] add vf fault detect support for HNS3 ethernet driver patchwork-bot+netdevbpf

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).