All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/9] drm/amdgpu:Define the unified ras function pointers of each IP block
@ 2021-11-25 10:56 yipechai
  2021-11-25 10:56 ` [PATCH 2/9] drm/amdgpu: Modify gfx block to fit for the unified ras function pointers yipechai
                   ` (8 more replies)
  0 siblings, 9 replies; 11+ messages in thread
From: yipechai @ 2021-11-25 10:56 UTC (permalink / raw)
  To: amd-gfx; +Cc: yipechai, yipechai

Define an unified ras function pointers for each ip block to adapt.

Signed-off-by: yipechai <YiPeng.Chai@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 20 ++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 36 ++++++++++++-------------
 2 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 90f0db3b4f65..dc6c8130e2d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2739,3 +2739,23 @@ static void amdgpu_register_bad_pages_mca_notifier(void)
         }
 }
 #endif
+
+/* check if ras is supported on block, say, sdma, gfx */
+int amdgpu_ras_is_supported(struct amdgpu_device *adev,
+		unsigned int block)
+{
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+	if (block >= AMDGPU_RAS_BLOCK_COUNT)
+		return 0;
+	return ras && (adev->ras_enabled & (1 << block));
+}
+
+int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+	if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
+		schedule_work(&ras->recovery_work);
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index cdd0010a5389..4b7da40dd837 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -469,6 +469,19 @@ struct ras_debug_if {
 	};
 	int op;
 };
+
+struct amdgpu_ras_block_ops {
+	int (*ras_late_init)(struct amdgpu_device *adev);
+	void (*ras_fini)(struct amdgpu_device *adev);
+	int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if);
+	void  (*query_ras_error_count)(struct amdgpu_device *adev,void *ras_error_status);
+	void (*query_ras_error_status)(struct amdgpu_device *adev);
+	bool  (*query_ras_poison_mode)(struct amdgpu_device *adev);
+	void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status);
+	void (*reset_ras_error_count)(struct amdgpu_device *adev);
+	void (*reset_ras_error_status)(struct amdgpu_device *adev);
+};
+
 /* work flow
  * vbios
  * 1: ras feature enable (enabled by default)
@@ -486,16 +499,6 @@ struct ras_debug_if {
 #define amdgpu_ras_get_context(adev)		((adev)->psp.ras_context.ras)
 #define amdgpu_ras_set_context(adev, ras_con)	((adev)->psp.ras_context.ras = (ras_con))
 
-/* check if ras is supported on block, say, sdma, gfx */
-static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
-		unsigned int block)
-{
-	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
-
-	if (block >= AMDGPU_RAS_BLOCK_COUNT)
-		return 0;
-	return ras && (adev->ras_enabled & (1 << block));
-}
 
 int amdgpu_ras_recovery_init(struct amdgpu_device *adev);
 
@@ -512,15 +515,6 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
 
 int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev);
 
-static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
-{
-	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
-
-	if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
-		schedule_work(&ras->recovery_work);
-	return 0;
-}
-
 static inline enum ta_ras_block
 amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) {
 	switch (block) {
@@ -652,4 +646,8 @@ const char *get_ras_block_str(struct ras_common_if *ras_block);
 
 bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev);
 
+int amdgpu_ras_is_supported(struct amdgpu_device *adev,	unsigned int block);
+
+int amdgpu_ras_reset_gpu(struct amdgpu_device *adev);
+
 #endif
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/9] drm/amdgpu: Modify gfx block to fit for the unified ras function pointers.
  2021-11-25 10:56 [PATCH 1/9] drm/amdgpu:Define the unified ras function pointers of each IP block yipechai
@ 2021-11-25 10:56 ` yipechai
  2021-11-25 10:56 ` [PATCH 3/9] drm/amdgpu: Modify gmc " yipechai
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: yipechai @ 2021-11-25 10:56 UTC (permalink / raw)
  To: amd-gfx; +Cc: yipechai, yipechai

Modify gfx block ras functions to fit for the unified ras function pointers.

Signed-off-by: yipechai <YiPeng.Chai@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 11 ++--------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 24 ++++++++++-----------
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 28 ++++++++++++-------------
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c   | 19 +++++++++--------
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c | 21 ++++++++++---------
 6 files changed, 51 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 1795d448c700..90ac0e9a32cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -697,8 +697,8 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
 		kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
 		if (adev->gfx.ras_funcs &&
-		    adev->gfx.ras_funcs->query_ras_error_count)
-			adev->gfx.ras_funcs->query_ras_error_count(adev, err_data);
+		    adev->gfx.ras_funcs->ops.query_ras_error_count)
+			adev->gfx.ras_funcs->ops.query_ras_error_count(adev, err_data);
 		amdgpu_ras_reset_gpu(adev);
 	}
 	return AMDGPU_RAS_SUCCESS;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 6b78b4a0e182..2a7f78f17c3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -31,6 +31,7 @@
 #include "amdgpu_ring.h"
 #include "amdgpu_rlc.h"
 #include "soc15.h"
+#include "amdgpu_ras.h"
 
 /* GFX current status */
 #define AMDGPU_GFX_NORMAL_MODE			0x00000000L
@@ -214,15 +215,7 @@ struct amdgpu_cu_info {
 };
 
 struct amdgpu_gfx_ras_funcs {
-	int (*ras_late_init)(struct amdgpu_device *adev);
-	void (*ras_fini)(struct amdgpu_device *adev);
-	int (*ras_error_inject)(struct amdgpu_device *adev,
-				void *inject_if);
-	int (*query_ras_error_count)(struct amdgpu_device *adev,
-				     void *ras_error_status);
-	void (*reset_ras_error_count)(struct amdgpu_device *adev);
-	void (*query_ras_error_status)(struct amdgpu_device *adev);
-	void (*reset_ras_error_status)(struct amdgpu_device *adev);
+	struct amdgpu_ras_block_ops ops;
 	void (*enable_watchdog_timer)(struct amdgpu_device *adev);
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index dc6c8130e2d7..790aaba065ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -920,12 +920,12 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
 		break;
 	case AMDGPU_RAS_BLOCK__GFX:
 		if (adev->gfx.ras_funcs &&
-		    adev->gfx.ras_funcs->query_ras_error_count)
-			adev->gfx.ras_funcs->query_ras_error_count(adev, &err_data);
+		    adev->gfx.ras_funcs->ops.query_ras_error_count)
+			adev->gfx.ras_funcs->ops.query_ras_error_count(adev, &err_data);
 
 		if (adev->gfx.ras_funcs &&
-		    adev->gfx.ras_funcs->query_ras_error_status)
-			adev->gfx.ras_funcs->query_ras_error_status(adev);
+		    adev->gfx.ras_funcs->ops.query_ras_error_status)
+			adev->gfx.ras_funcs->ops.query_ras_error_status(adev);
 		break;
 	case AMDGPU_RAS_BLOCK__MMHUB:
 		if (adev->mmhub.ras_funcs &&
@@ -1018,12 +1018,12 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
 	switch (block) {
 	case AMDGPU_RAS_BLOCK__GFX:
 		if (adev->gfx.ras_funcs &&
-		    adev->gfx.ras_funcs->reset_ras_error_count)
-			adev->gfx.ras_funcs->reset_ras_error_count(adev);
+		    adev->gfx.ras_funcs->ops.reset_ras_error_count)
+			adev->gfx.ras_funcs->ops.reset_ras_error_count(adev);
 
 		if (adev->gfx.ras_funcs &&
-		    adev->gfx.ras_funcs->reset_ras_error_status)
-			adev->gfx.ras_funcs->reset_ras_error_status(adev);
+		    adev->gfx.ras_funcs->ops.reset_ras_error_status)
+			adev->gfx.ras_funcs->ops.reset_ras_error_status(adev);
 		break;
 	case AMDGPU_RAS_BLOCK__MMHUB:
 		if (adev->mmhub.ras_funcs &&
@@ -1103,8 +1103,8 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
 	switch (info->head.block) {
 	case AMDGPU_RAS_BLOCK__GFX:
 		if (adev->gfx.ras_funcs &&
-		    adev->gfx.ras_funcs->ras_error_inject)
-			ret = adev->gfx.ras_funcs->ras_error_inject(adev, info);
+		    adev->gfx.ras_funcs->ops.ras_error_inject)
+			ret = adev->gfx.ras_funcs->ops.ras_error_inject(adev, info);
 		else
 			ret = -EINVAL;
 		break;
@@ -1734,8 +1734,8 @@ static void amdgpu_ras_error_status_query(struct amdgpu_device *adev,
 	switch (info->head.block) {
 	case AMDGPU_RAS_BLOCK__GFX:
 		if (adev->gfx.ras_funcs &&
-		    adev->gfx.ras_funcs->query_ras_error_status)
-			adev->gfx.ras_funcs->query_ras_error_status(adev);
+		    adev->gfx.ras_funcs->ops.query_ras_error_status)
+			adev->gfx.ras_funcs->ops.query_ras_error_status(adev);
 		break;
 	case AMDGPU_RAS_BLOCK__MMHUB:
 		if (adev->mmhub.ras_funcs &&
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 08e91e7245df..ba00dbbb5e4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -817,7 +817,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
-static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
+static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
 					  void *ras_error_status);
 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
 				     void *inject_if);
@@ -2128,11 +2128,13 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
 };
 
 static const struct amdgpu_gfx_ras_funcs gfx_v9_0_ras_funcs = {
-	.ras_late_init = amdgpu_gfx_ras_late_init,
-	.ras_fini = amdgpu_gfx_ras_fini,
-	.ras_error_inject = &gfx_v9_0_ras_error_inject,
-	.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
-	.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
+	.ops = {
+		.ras_late_init = amdgpu_gfx_ras_late_init,
+		.ras_fini = amdgpu_gfx_ras_fini,
+		.ras_error_inject = &gfx_v9_0_ras_error_inject,
+		.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
+		.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
+	}
 };
 
 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
@@ -2449,8 +2451,8 @@ static int gfx_v9_0_sw_fini(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	if (adev->gfx.ras_funcs &&
-	    adev->gfx.ras_funcs->ras_fini)
-		adev->gfx.ras_funcs->ras_fini(adev);
+	    adev->gfx.ras_funcs->ops.ras_fini)
+		adev->gfx.ras_funcs->ops.ras_fini(adev);
 
 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@ -4889,8 +4891,8 @@ static int gfx_v9_0_ecc_late_init(void *handle)
 		return r;
 
 	if (adev->gfx.ras_funcs &&
-	    adev->gfx.ras_funcs->ras_late_init) {
-		r = adev->gfx.ras_funcs->ras_late_init(adev);
+	    adev->gfx.ras_funcs->ops.ras_late_init) {
+		r = adev->gfx.ras_funcs->ops.ras_late_init(adev);
 		if (r)
 			return r;
 	}
@@ -6841,7 +6843,7 @@ static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
 }
 
-static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
+static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
 					  void *ras_error_status)
 {
 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
@@ -6850,7 +6852,7 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
 	uint32_t reg_value;
 
 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
-		return -EINVAL;
+		return;
 
 	err_data->ue_count = 0;
 	err_data->ce_count = 0;
@@ -6879,8 +6881,6 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 	gfx_v9_0_query_utc_edc_status(adev, err_data);
-
-	return 0;
 }
 
 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
index b4789dfc2bb9..758c51a076f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
@@ -863,7 +863,7 @@ static int gfx_v9_4_ras_error_count(struct amdgpu_device *adev,
 	return 0;
 }
 
-static int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
+static void gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
 					  void *ras_error_status)
 {
 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
@@ -872,7 +872,7 @@ static int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
 	uint32_t reg_value;
 
 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
-		return -EINVAL;
+		return;
 
 	err_data->ue_count = 0;
 	err_data->ce_count = 0;
@@ -903,7 +903,6 @@ static int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
 
 	gfx_v9_4_query_utc_edc_status(adev, err_data);
 
-	return 0;
 }
 
 static void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev)
@@ -1030,10 +1029,12 @@ static void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev)
 }
 
 const struct amdgpu_gfx_ras_funcs gfx_v9_4_ras_funcs = {
-        .ras_late_init = amdgpu_gfx_ras_late_init,
-        .ras_fini = amdgpu_gfx_ras_fini,
-        .ras_error_inject = &gfx_v9_4_ras_error_inject,
-        .query_ras_error_count = &gfx_v9_4_query_ras_error_count,
-        .reset_ras_error_count = &gfx_v9_4_reset_ras_error_count,
-        .query_ras_error_status = &gfx_v9_4_query_ras_error_status,
+	.ops = {
+		.ras_late_init = amdgpu_gfx_ras_late_init,
+		.ras_fini = amdgpu_gfx_ras_fini,
+		.ras_error_inject = &gfx_v9_4_ras_error_inject,
+		.query_ras_error_count = &gfx_v9_4_query_ras_error_count,
+		.reset_ras_error_count = &gfx_v9_4_reset_ras_error_count,
+		.query_ras_error_status = &gfx_v9_4_query_ras_error_status,
+	},
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
index 54306fd45ff1..00f9bfa68af7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
@@ -1644,14 +1644,14 @@ static int gfx_v9_4_2_query_utc_edc_count(struct amdgpu_device *adev,
 	return 0;
 }
 
-static int gfx_v9_4_2_query_ras_error_count(struct amdgpu_device *adev,
+static void gfx_v9_4_2_query_ras_error_count(struct amdgpu_device *adev,
 					    void *ras_error_status)
 {
 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
 	uint32_t sec_count = 0, ded_count = 0;
 
 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
-		return -EINVAL;
+		return;
 
 	err_data->ue_count = 0;
 	err_data->ce_count = 0;
@@ -1664,7 +1664,6 @@ static int gfx_v9_4_2_query_ras_error_count(struct amdgpu_device *adev,
 	err_data->ce_count += sec_count;
 	err_data->ue_count += ded_count;
 
-	return 0;
 }
 
 static void gfx_v9_4_2_reset_utc_err_status(struct amdgpu_device *adev)
@@ -1935,12 +1934,14 @@ static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev)
 }
 
 const struct amdgpu_gfx_ras_funcs gfx_v9_4_2_ras_funcs = {
-	.ras_late_init = amdgpu_gfx_ras_late_init,
-	.ras_fini = amdgpu_gfx_ras_fini,
-	.ras_error_inject = &gfx_v9_4_2_ras_error_inject,
-	.query_ras_error_count = &gfx_v9_4_2_query_ras_error_count,
-	.reset_ras_error_count = &gfx_v9_4_2_reset_ras_error_count,
-	.query_ras_error_status = &gfx_v9_4_2_query_ras_error_status,
-	.reset_ras_error_status = &gfx_v9_4_2_reset_ras_error_status,
+	.ops = {
+		.ras_late_init = amdgpu_gfx_ras_late_init,
+		.ras_fini = amdgpu_gfx_ras_fini,
+		.ras_error_inject = &gfx_v9_4_2_ras_error_inject,
+		.query_ras_error_count = &gfx_v9_4_2_query_ras_error_count,
+		.reset_ras_error_count = &gfx_v9_4_2_reset_ras_error_count,
+		.query_ras_error_status = &gfx_v9_4_2_query_ras_error_status,
+		.reset_ras_error_status = &gfx_v9_4_2_reset_ras_error_status,
+	},
 	.enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer,
 };
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/9] drm/amdgpu: Modify gmc block to fit for the unified ras function pointers.
  2021-11-25 10:56 [PATCH 1/9] drm/amdgpu:Define the unified ras function pointers of each IP block yipechai
  2021-11-25 10:56 ` [PATCH 2/9] drm/amdgpu: Modify gfx block to fit for the unified ras function pointers yipechai
@ 2021-11-25 10:56 ` yipechai
  2021-11-25 10:56 ` [PATCH 4/9] drm/amdgpu: Modify hdp " yipechai
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: yipechai @ 2021-11-25 10:56 UTC (permalink / raw)
  To: amd-gfx; +Cc: yipechai, yipechai

Modify gmc block ras functions to fit for the unified ras function pointers.

Signed-off-by: yipechai <YiPeng.Chai@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  |  8 ++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h  |  7 ++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c  |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 20 ++++++++++----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h |  2 +-
 5 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 83f26bca7dac..b7c462749d37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -452,8 +452,8 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
 		adev->gmc.xgmi.ras_funcs = &xgmi_ras_funcs;
 
 	if (adev->gmc.xgmi.ras_funcs &&
-	    adev->gmc.xgmi.ras_funcs->ras_late_init) {
-		r = adev->gmc.xgmi.ras_funcs->ras_late_init(adev);
+	    adev->gmc.xgmi.ras_funcs->ops.ras_late_init) {
+		r = adev->gmc.xgmi.ras_funcs->ops.ras_late_init(adev);
 		if (r)
 			return r;
 	}
@@ -500,8 +500,8 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
 		adev->mmhub.ras_funcs->ras_fini(adev);
 
 	if (adev->gmc.xgmi.ras_funcs &&
-	    adev->gmc.xgmi.ras_funcs->ras_fini)
-		adev->gmc.xgmi.ras_funcs->ras_fini(adev);
+	    adev->gmc.xgmi.ras_funcs->ops.ras_fini)
+		adev->gmc.xgmi.ras_funcs->ops.ras_fini(adev);
 
 	if (adev->hdp.ras_funcs &&
 	    adev->hdp.ras_funcs->ras_fini)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index e55201134a01..f6f7d996ff98 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -29,6 +29,7 @@
 #include <linux/types.h>
 
 #include "amdgpu_irq.h"
+#include "amdgpu_ras.h"
 
 /* VA hole for 48bit addresses on Vega10 */
 #define AMDGPU_GMC_HOLE_START	0x0000800000000000ULL
@@ -136,11 +137,7 @@ struct amdgpu_gmc_funcs {
 };
 
 struct amdgpu_xgmi_ras_funcs {
-	int (*ras_late_init)(struct amdgpu_device *adev);
-	void (*ras_fini)(struct amdgpu_device *adev);
-	int (*query_ras_error_count)(struct amdgpu_device *adev,
-				     void *ras_error_status);
-	void (*reset_ras_error_count)(struct amdgpu_device *adev);
+	struct amdgpu_ras_block_ops ops;
 };
 
 struct amdgpu_xgmi {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 790aaba065ab..7f830bf8f8df 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -943,8 +943,8 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
 		break;
 	case AMDGPU_RAS_BLOCK__XGMI_WAFL:
 		if (adev->gmc.xgmi.ras_funcs &&
-		    adev->gmc.xgmi.ras_funcs->query_ras_error_count)
-			adev->gmc.xgmi.ras_funcs->query_ras_error_count(adev, &err_data);
+		    adev->gmc.xgmi.ras_funcs->ops.query_ras_error_count)
+			adev->gmc.xgmi.ras_funcs->ops.query_ras_error_count(adev, &err_data);
 		break;
 	case AMDGPU_RAS_BLOCK__HDP:
 		if (adev->hdp.ras_funcs &&
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 0d149f5f000e..306962c95d52 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -739,7 +739,7 @@ static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
 	    adev->gmc.xgmi.num_physical_nodes == 0)
 		return 0;
 
-	adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev);
+	adev->gmc.xgmi.ras_funcs->ops.reset_ras_error_count(adev);
 
 	if (!adev->gmc.xgmi.ras_if) {
 		adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
@@ -859,7 +859,7 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev,
 	return 0;
 }
 
-static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
+static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
 					     void *ras_error_status)
 {
 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
@@ -868,7 +868,7 @@ static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
 	uint32_t ue_cnt = 0, ce_cnt = 0;
 
 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL))
-		return -EINVAL;
+		return ;
 
 	err_data->ue_count = 0;
 	err_data->ce_count = 0;
@@ -934,17 +934,17 @@ static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
 		break;
 	}
 
-	adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev);
+	adev->gmc.xgmi.ras_funcs->ops.reset_ras_error_count(adev);
 
 	err_data->ue_count += ue_cnt;
 	err_data->ce_count += ce_cnt;
-
-	return 0;
 }
 
 const struct amdgpu_xgmi_ras_funcs xgmi_ras_funcs = {
-	.ras_late_init = amdgpu_xgmi_ras_late_init,
-	.ras_fini = amdgpu_xgmi_ras_fini,
-	.query_ras_error_count = amdgpu_xgmi_query_ras_error_count,
-	.reset_ras_error_count = amdgpu_xgmi_reset_ras_error_count,
+	.ops = {
+		.ras_late_init = amdgpu_xgmi_ras_late_init,
+		.ras_fini = amdgpu_xgmi_ras_fini,
+		.query_ras_error_count = amdgpu_xgmi_query_ras_error_count,
+		.reset_ras_error_count = amdgpu_xgmi_reset_ras_error_count,
+	},
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index d2189bf7d428..0f3f09d58793 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -24,7 +24,7 @@
 
 #include <drm/task_barrier.h>
 #include "amdgpu_psp.h"
-
+#include "amdgpu_ras.h"
 
 struct amdgpu_hive_info {
 	struct kobject kobj;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 4/9] drm/amdgpu: Modify hdp block to fit for the unified ras function pointers.
  2021-11-25 10:56 [PATCH 1/9] drm/amdgpu:Define the unified ras function pointers of each IP block yipechai
  2021-11-25 10:56 ` [PATCH 2/9] drm/amdgpu: Modify gfx block to fit for the unified ras function pointers yipechai
  2021-11-25 10:56 ` [PATCH 3/9] drm/amdgpu: Modify gmc " yipechai
@ 2021-11-25 10:56 ` yipechai
  2021-11-25 10:56 ` [PATCH 5/9] drm/amdgpu: Modify mca " yipechai
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: yipechai @ 2021-11-25 10:56 UTC (permalink / raw)
  To: amd-gfx; +Cc: yipechai, yipechai

Modify hdp block ras functions to fit for the unified ras function pointers.

Signed-off-by: yipechai <YiPeng.Chai@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c |  8 ++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h |  7 ++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c |  8 ++++----
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   |  4 ++--
 drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c   | 10 ++++++----
 5 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index b7c462749d37..0aab31fce997 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -459,8 +459,8 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
 	}
 
 	if (adev->hdp.ras_funcs &&
-	    adev->hdp.ras_funcs->ras_late_init) {
-		r = adev->hdp.ras_funcs->ras_late_init(adev);
+	    adev->hdp.ras_funcs->ops.ras_late_init) {
+		r = adev->hdp.ras_funcs->ops.ras_late_init(adev);
 		if (r)
 			return r;
 	}
@@ -504,8 +504,8 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
 		adev->gmc.xgmi.ras_funcs->ops.ras_fini(adev);
 
 	if (adev->hdp.ras_funcs &&
-	    adev->hdp.ras_funcs->ras_fini)
-		adev->hdp.ras_funcs->ras_fini(adev);
+	    adev->hdp.ras_funcs->ops.ras_fini)
+		adev->hdp.ras_funcs->ops.ras_fini(adev);
 }
 
 	/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
index 7ec99d591584..49121eb7d599 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -22,13 +22,10 @@
  */
 #ifndef __AMDGPU_HDP_H__
 #define __AMDGPU_HDP_H__
+#include "amdgpu_ras.h"
 
 struct amdgpu_hdp_ras_funcs {
-	int (*ras_late_init)(struct amdgpu_device *adev);
-	void (*ras_fini)(struct amdgpu_device *adev);
-	void (*query_ras_error_count)(struct amdgpu_device *adev,
-				      void *ras_error_status);
-	void (*reset_ras_error_count)(struct amdgpu_device *adev);
+	struct amdgpu_ras_block_ops ops;
 };
 
 struct amdgpu_hdp_funcs {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 7f830bf8f8df..a3b606c84d45 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -948,8 +948,8 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
 		break;
 	case AMDGPU_RAS_BLOCK__HDP:
 		if (adev->hdp.ras_funcs &&
-		    adev->hdp.ras_funcs->query_ras_error_count)
-			adev->hdp.ras_funcs->query_ras_error_count(adev, &err_data);
+		    adev->hdp.ras_funcs->ops.query_ras_error_count)
+			adev->hdp.ras_funcs->ops.query_ras_error_count(adev, &err_data);
 		break;
 	case AMDGPU_RAS_BLOCK__MCA:
 		amdgpu_ras_mca_query_error_status(adev, &info->head, &err_data);
@@ -1040,8 +1040,8 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
 		break;
 	case AMDGPU_RAS_BLOCK__HDP:
 		if (adev->hdp.ras_funcs &&
-		    adev->hdp.ras_funcs->reset_ras_error_count)
-			adev->hdp.ras_funcs->reset_ras_error_count(adev);
+		    adev->hdp.ras_funcs->ops.reset_ras_error_count)
+			adev->hdp.ras_funcs->ops.reset_ras_error_count(adev);
 		break;
 	default:
 		break;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 3606d2cbff5e..c40c669d49c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1301,8 +1301,8 @@ static int gmc_v9_0_late_init(void *handle)
 			adev->mmhub.ras_funcs->reset_ras_error_count(adev);
 
 		if (adev->hdp.ras_funcs &&
-		    adev->hdp.ras_funcs->reset_ras_error_count)
-			adev->hdp.ras_funcs->reset_ras_error_count(adev);
+		    adev->hdp.ras_funcs->ops.reset_ras_error_count)
+			adev->hdp.ras_funcs->ops.reset_ras_error_count(adev);
 	}
 
 	r = amdgpu_gmc_ras_late_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index 74b90cc2bf48..9021ea08ee0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -150,10 +150,12 @@ static void hdp_v4_0_init_registers(struct amdgpu_device *adev)
 }
 
 const struct amdgpu_hdp_ras_funcs hdp_v4_0_ras_funcs = {
-	.ras_late_init = amdgpu_hdp_ras_late_init,
-	.ras_fini = amdgpu_hdp_ras_fini,
-	.query_ras_error_count = hdp_v4_0_query_ras_error_count,
-	.reset_ras_error_count = hdp_v4_0_reset_ras_error_count,
+	.ops = {
+		.ras_late_init = amdgpu_hdp_ras_late_init,
+		.ras_fini = amdgpu_hdp_ras_fini,
+		.query_ras_error_count = hdp_v4_0_query_ras_error_count,
+		.reset_ras_error_count = hdp_v4_0_reset_ras_error_count,
+	},
 };
 
 const struct amdgpu_hdp_funcs hdp_v4_0_funcs = {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 5/9] drm/amdgpu: Modify mca block to fit for the unified ras function pointers.
  2021-11-25 10:56 [PATCH 1/9] drm/amdgpu:Define the unified ras function pointers of each IP block yipechai
                   ` (2 preceding siblings ...)
  2021-11-25 10:56 ` [PATCH 4/9] drm/amdgpu: Modify hdp " yipechai
@ 2021-11-25 10:56 ` yipechai
  2021-11-25 10:56 ` [PATCH 6/9] drm/amdgpu: Modify mmhub " yipechai
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: yipechai @ 2021-11-25 10:56 UTC (permalink / raw)
  To: amd-gfx; +Cc: yipechai, yipechai

Modify mca block ras functions to fit for the unified ras function pointers.

Signed-off-by: yipechai <YiPeng.Chai@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 12 +++++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h |  8 ++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 12 +++++-----
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c   | 30 +++++++++++++++----------
 4 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 0aab31fce997..024342969267 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -466,22 +466,22 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
 	}
 
 	if (adev->mca.mp0.ras_funcs &&
-	    adev->mca.mp0.ras_funcs->ras_late_init) {
-		r = adev->mca.mp0.ras_funcs->ras_late_init(adev);
+	    adev->mca.mp0.ras_funcs->ops.ras_late_init) {
+		r = adev->mca.mp0.ras_funcs->ops.ras_late_init(adev);
 		if (r)
 			return r;
 	}
 
 	if (adev->mca.mp1.ras_funcs &&
-	    adev->mca.mp1.ras_funcs->ras_late_init) {
-		r = adev->mca.mp1.ras_funcs->ras_late_init(adev);
+	    adev->mca.mp1.ras_funcs->ops.ras_late_init) {
+		r = adev->mca.mp1.ras_funcs->ops.ras_late_init(adev);
 		if (r)
 			return r;
 	}
 
 	if (adev->mca.mpio.ras_funcs &&
-	    adev->mca.mpio.ras_funcs->ras_late_init) {
-		r = adev->mca.mpio.ras_funcs->ras_late_init(adev);
+	    adev->mca.mpio.ras_funcs->ops.ras_late_init) {
+		r = adev->mca.mpio.ras_funcs->ops.ras_late_init(adev);
 		if (r)
 			return r;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
index c74bc7177066..fbc3ebc81b99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
@@ -20,14 +20,10 @@
  */
 #ifndef __AMDGPU_MCA_H__
 #define __AMDGPU_MCA_H__
+#include "amdgpu_ras.h"
 
 struct amdgpu_mca_ras_funcs {
-	int (*ras_late_init)(struct amdgpu_device *adev);
-	void (*ras_fini)(struct amdgpu_device *adev);
-	void (*query_ras_error_count)(struct amdgpu_device *adev,
-				      void *ras_error_status);
-	void (*query_ras_error_address)(struct amdgpu_device *adev,
-					void *ras_error_status);
+	struct amdgpu_ras_block_ops ops;
 	uint32_t ras_block;
 	uint32_t ras_sub_block;
 	const char* sysfs_name;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index a3b606c84d45..e7cd2de07665 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -870,18 +870,18 @@ void amdgpu_ras_mca_query_error_status(struct amdgpu_device *adev,
 	switch (ras_block->sub_block_index) {
 	case AMDGPU_RAS_MCA_BLOCK__MP0:
 		if (adev->mca.mp0.ras_funcs &&
-		    adev->mca.mp0.ras_funcs->query_ras_error_count)
-			adev->mca.mp0.ras_funcs->query_ras_error_count(adev, &err_data);
+		    adev->mca.mp0.ras_funcs->ops.query_ras_error_count)
+			adev->mca.mp0.ras_funcs->ops.query_ras_error_count(adev, &err_data);
 		break;
 	case AMDGPU_RAS_MCA_BLOCK__MP1:
 		if (adev->mca.mp1.ras_funcs &&
-		    adev->mca.mp1.ras_funcs->query_ras_error_count)
-			adev->mca.mp1.ras_funcs->query_ras_error_count(adev, &err_data);
+		    adev->mca.mp1.ras_funcs->ops.query_ras_error_count)
+			adev->mca.mp1.ras_funcs->ops.query_ras_error_count(adev, &err_data);
 		break;
 	case AMDGPU_RAS_MCA_BLOCK__MPIO:
 		if (adev->mca.mpio.ras_funcs &&
-		    adev->mca.mpio.ras_funcs->query_ras_error_count)
-			adev->mca.mpio.ras_funcs->query_ras_error_count(adev, &err_data);
+		    adev->mca.mpio.ras_funcs->ops.query_ras_error_count)
+			adev->mca.mpio.ras_funcs->ops.query_ras_error_count(adev, &err_data);
 		break;
 	default:
 		break;
diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
index 8f7107d392af..dc2424587f12 100644
--- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
@@ -48,10 +48,12 @@ static void mca_v3_0_mp0_ras_fini(struct amdgpu_device *adev)
 }
 
 const struct amdgpu_mca_ras_funcs mca_v3_0_mp0_ras_funcs = {
-	.ras_late_init = mca_v3_0_mp0_ras_late_init,
-	.ras_fini = mca_v3_0_mp0_ras_fini,
-	.query_ras_error_count = mca_v3_0_mp0_query_ras_error_count,
-	.query_ras_error_address = NULL,
+	.ops = {
+		.ras_late_init = mca_v3_0_mp0_ras_late_init,
+		.ras_fini = mca_v3_0_mp0_ras_fini,
+		.query_ras_error_count = mca_v3_0_mp0_query_ras_error_count,
+		.query_ras_error_address = NULL,
+	},
 	.ras_block = AMDGPU_RAS_BLOCK__MCA,
 	.ras_sub_block = AMDGPU_RAS_MCA_BLOCK__MP0,
 	.sysfs_name = "mp0_err_count",
@@ -76,10 +78,12 @@ static void mca_v3_0_mp1_ras_fini(struct amdgpu_device *adev)
 }
 
 const struct amdgpu_mca_ras_funcs mca_v3_0_mp1_ras_funcs = {
-	.ras_late_init = mca_v3_0_mp1_ras_late_init,
-	.ras_fini = mca_v3_0_mp1_ras_fini,
-	.query_ras_error_count = mca_v3_0_mp1_query_ras_error_count,
-	.query_ras_error_address = NULL,
+	.ops = {
+		.ras_late_init = mca_v3_0_mp1_ras_late_init,
+		.ras_fini = mca_v3_0_mp1_ras_fini,
+		.query_ras_error_count = mca_v3_0_mp1_query_ras_error_count,
+		.query_ras_error_address = NULL,
+	},
 	.ras_block = AMDGPU_RAS_BLOCK__MCA,
 	.ras_sub_block = AMDGPU_RAS_MCA_BLOCK__MP1,
 	.sysfs_name = "mp1_err_count",
@@ -104,10 +108,12 @@ static void mca_v3_0_mpio_ras_fini(struct amdgpu_device *adev)
 }
 
 const struct amdgpu_mca_ras_funcs mca_v3_0_mpio_ras_funcs = {
-	.ras_late_init = mca_v3_0_mpio_ras_late_init,
-	.ras_fini = mca_v3_0_mpio_ras_fini,
-	.query_ras_error_count = mca_v3_0_mpio_query_ras_error_count,
-	.query_ras_error_address = NULL,
+	.ops = {
+		.ras_late_init = mca_v3_0_mpio_ras_late_init,
+		.ras_fini = mca_v3_0_mpio_ras_fini,
+		.query_ras_error_count = mca_v3_0_mpio_query_ras_error_count,
+		.query_ras_error_address = NULL,
+	},
 	.ras_block = AMDGPU_RAS_BLOCK__MCA,
 	.ras_sub_block = AMDGPU_RAS_MCA_BLOCK__MPIO,
 	.sysfs_name = "mpio_err_count",
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 6/9] drm/amdgpu: Modify mmhub block to fit for the unified ras function pointers.
  2021-11-25 10:56 [PATCH 1/9] drm/amdgpu:Define the unified ras function pointers of each IP block yipechai
                   ` (3 preceding siblings ...)
  2021-11-25 10:56 ` [PATCH 5/9] drm/amdgpu: Modify mca " yipechai
@ 2021-11-25 10:56 ` yipechai
  2021-11-25 10:56 ` [PATCH 7/9] drm/amdgpu: Modify nbio " yipechai
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: yipechai @ 2021-11-25 10:56 UTC (permalink / raw)
  To: amd-gfx; +Cc: yipechai, yipechai

Modify mmhub block ras funcions to fit for the unified ras function pointers.

Signed-off-by: yipechai <YiPeng.Chai@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  8 ++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c    |  8 ++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h  |  9 ++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c    | 20 ++++++++++----------
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      |  4 ++--
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c    | 10 ++++++----
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c    | 14 ++++++++------
 drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c    | 12 +++++++-----
 8 files changed, 43 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 73ec46140d68..fcdd06bdb5d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3378,8 +3378,8 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
 			goto fail;
 
 		if (adev->mmhub.ras_funcs &&
-		    adev->mmhub.ras_funcs->reset_ras_error_count)
-			adev->mmhub.ras_funcs->reset_ras_error_count(adev);
+		    adev->mmhub.ras_funcs->ops.reset_ras_error_count)
+			adev->mmhub.ras_funcs->ops.reset_ras_error_count(adev);
 	} else {
 
 		task_barrier_full(&hive->tb);
@@ -4704,8 +4704,8 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
 	if (!r && amdgpu_ras_intr_triggered()) {
 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
 			if (tmp_adev->mmhub.ras_funcs &&
-			    tmp_adev->mmhub.ras_funcs->reset_ras_error_count)
-				tmp_adev->mmhub.ras_funcs->reset_ras_error_count(tmp_adev);
+			    tmp_adev->mmhub.ras_funcs->ops.reset_ras_error_count)
+				tmp_adev->mmhub.ras_funcs->ops.reset_ras_error_count(tmp_adev);
 		}
 
 		amdgpu_ras_intr_cleared();
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 024342969267..7780effdf3ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -442,8 +442,8 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
 	}
 
 	if (adev->mmhub.ras_funcs &&
-	    adev->mmhub.ras_funcs->ras_late_init) {
-		r = adev->mmhub.ras_funcs->ras_late_init(adev);
+	    adev->mmhub.ras_funcs->ops.ras_late_init) {
+		r = adev->mmhub.ras_funcs->ops.ras_late_init(adev);
 		if (r)
 			return r;
 	}
@@ -496,8 +496,8 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
 		adev->umc.ras_funcs->ras_fini(adev);
 
 	if (adev->mmhub.ras_funcs &&
-	    adev->mmhub.ras_funcs->ras_fini)
-		adev->mmhub.ras_funcs->ras_fini(adev);
+	    adev->mmhub.ras_funcs->ops.ras_fini)
+		adev->mmhub.ras_funcs->ops.ras_fini(adev);
 
 	if (adev->gmc.xgmi.ras_funcs &&
 	    adev->gmc.xgmi.ras_funcs->ops.ras_fini)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
index b27fcbccce2b..ff7f28ef1d6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
@@ -20,15 +20,10 @@
  */
 #ifndef __AMDGPU_MMHUB_H__
 #define __AMDGPU_MMHUB_H__
+#include "amdgpu_ras.h"
 
 struct amdgpu_mmhub_ras_funcs {
-	int (*ras_late_init)(struct amdgpu_device *adev);
-	void (*ras_fini)(struct amdgpu_device *adev);
-	void (*query_ras_error_count)(struct amdgpu_device *adev,
-				      void *ras_error_status);
-	void (*query_ras_error_status)(struct amdgpu_device *adev);
-	void (*reset_ras_error_count)(struct amdgpu_device *adev);
-	void (*reset_ras_error_status)(struct amdgpu_device *adev);
+	struct amdgpu_ras_block_ops ops;
 };
 
 struct amdgpu_mmhub_funcs {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index e7cd2de07665..2d9ef677a2ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -929,12 +929,12 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
 		break;
 	case AMDGPU_RAS_BLOCK__MMHUB:
 		if (adev->mmhub.ras_funcs &&
-		    adev->mmhub.ras_funcs->query_ras_error_count)
-			adev->mmhub.ras_funcs->query_ras_error_count(adev, &err_data);
+		    adev->mmhub.ras_funcs->ops.query_ras_error_count)
+			adev->mmhub.ras_funcs->ops.query_ras_error_count(adev, &err_data);
 
 		if (adev->mmhub.ras_funcs &&
-		    adev->mmhub.ras_funcs->query_ras_error_status)
-			adev->mmhub.ras_funcs->query_ras_error_status(adev);
+		    adev->mmhub.ras_funcs->ops.query_ras_error_status)
+			adev->mmhub.ras_funcs->ops.query_ras_error_status(adev);
 		break;
 	case AMDGPU_RAS_BLOCK__PCIE_BIF:
 		if (adev->nbio.ras_funcs &&
@@ -1027,12 +1027,12 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
 		break;
 	case AMDGPU_RAS_BLOCK__MMHUB:
 		if (adev->mmhub.ras_funcs &&
-		    adev->mmhub.ras_funcs->reset_ras_error_count)
-			adev->mmhub.ras_funcs->reset_ras_error_count(adev);
+		    adev->mmhub.ras_funcs->ops.reset_ras_error_count)
+			adev->mmhub.ras_funcs->ops.reset_ras_error_count(adev);
 
 		if (adev->mmhub.ras_funcs &&
-		    adev->mmhub.ras_funcs->reset_ras_error_status)
-			adev->mmhub.ras_funcs->reset_ras_error_status(adev);
+		    adev->mmhub.ras_funcs->ops.reset_ras_error_status)
+			adev->mmhub.ras_funcs->ops.reset_ras_error_status(adev);
 		break;
 	case AMDGPU_RAS_BLOCK__SDMA:
 		if (adev->sdma.funcs->reset_ras_error_count)
@@ -1739,8 +1739,8 @@ static void amdgpu_ras_error_status_query(struct amdgpu_device *adev,
 		break;
 	case AMDGPU_RAS_BLOCK__MMHUB:
 		if (adev->mmhub.ras_funcs &&
-		    adev->mmhub.ras_funcs->query_ras_error_status)
-			adev->mmhub.ras_funcs->query_ras_error_status(adev);
+		    adev->mmhub.ras_funcs->ops.query_ras_error_status)
+			adev->mmhub.ras_funcs->ops.query_ras_error_status(adev);
 		break;
 	default:
 		break;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index c40c669d49c3..4470049874c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1297,8 +1297,8 @@ static int gmc_v9_0_late_init(void *handle)
 
 	if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
 		if (adev->mmhub.ras_funcs &&
-		    adev->mmhub.ras_funcs->reset_ras_error_count)
-			adev->mmhub.ras_funcs->reset_ras_error_count(adev);
+		    adev->mmhub.ras_funcs->ops.reset_ras_error_count)
+			adev->mmhub.ras_funcs->ops.reset_ras_error_count(adev);
 
 		if (adev->hdp.ras_funcs &&
 		    adev->hdp.ras_funcs->ops.reset_ras_error_count)
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index b3bede1dc41d..3b7133fb0cf6 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -775,10 +775,12 @@ static void mmhub_v1_0_reset_ras_error_count(struct amdgpu_device *adev)
 }
 
 const struct amdgpu_mmhub_ras_funcs mmhub_v1_0_ras_funcs = {
-	.ras_late_init = amdgpu_mmhub_ras_late_init,
-	.ras_fini = amdgpu_mmhub_ras_fini,
-	.query_ras_error_count = mmhub_v1_0_query_ras_error_count,
-	.reset_ras_error_count = mmhub_v1_0_reset_ras_error_count,
+	.ops = {
+		.ras_late_init = amdgpu_mmhub_ras_late_init,
+		.ras_fini = amdgpu_mmhub_ras_fini,
+		.query_ras_error_count = mmhub_v1_0_query_ras_error_count,
+		.reset_ras_error_count = mmhub_v1_0_reset_ras_error_count,
+	},
 };
 
 const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
index f5f7181f9af5..841fba8b0e08 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
@@ -1322,12 +1322,14 @@ static void mmhub_v1_7_reset_ras_error_status(struct amdgpu_device *adev)
 }
 
 const struct amdgpu_mmhub_ras_funcs mmhub_v1_7_ras_funcs = {
-	.ras_late_init = amdgpu_mmhub_ras_late_init,
-	.ras_fini = amdgpu_mmhub_ras_fini,
-	.query_ras_error_count = mmhub_v1_7_query_ras_error_count,
-	.reset_ras_error_count = mmhub_v1_7_reset_ras_error_count,
-	.query_ras_error_status = mmhub_v1_7_query_ras_error_status,
-	.reset_ras_error_status = mmhub_v1_7_reset_ras_error_status,
+	.ops = {
+		.ras_late_init = amdgpu_mmhub_ras_late_init,
+		.ras_fini = amdgpu_mmhub_ras_fini,
+		.query_ras_error_count = mmhub_v1_7_query_ras_error_count,
+		.reset_ras_error_count = mmhub_v1_7_reset_ras_error_count,
+		.query_ras_error_status = mmhub_v1_7_query_ras_error_status,
+		.reset_ras_error_status = mmhub_v1_7_reset_ras_error_status,
+	},
 };
 
 const struct amdgpu_mmhub_funcs mmhub_v1_7_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
index ff49eeaf7882..1173190c4d8d 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -1656,11 +1656,13 @@ static void mmhub_v9_4_query_ras_error_status(struct amdgpu_device *adev)
 }
 
 const struct amdgpu_mmhub_ras_funcs mmhub_v9_4_ras_funcs = {
-	.ras_late_init = amdgpu_mmhub_ras_late_init,
-	.ras_fini = amdgpu_mmhub_ras_fini,
-	.query_ras_error_count = mmhub_v9_4_query_ras_error_count,
-	.reset_ras_error_count = mmhub_v9_4_reset_ras_error_count,
-	.query_ras_error_status = mmhub_v9_4_query_ras_error_status,
+	.ops = {
+		.ras_late_init = amdgpu_mmhub_ras_late_init,
+		.ras_fini = amdgpu_mmhub_ras_fini,
+		.query_ras_error_count = mmhub_v9_4_query_ras_error_count,
+		.reset_ras_error_count = mmhub_v9_4_reset_ras_error_count,
+		.query_ras_error_status = mmhub_v9_4_query_ras_error_status,
+	},
 };
 
 const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs = {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 7/9] drm/amdgpu: Modify nbio block to fit for the unified ras function pointers.
  2021-11-25 10:56 [PATCH 1/9] drm/amdgpu:Define the unified ras function pointers of each IP block yipechai
                   ` (4 preceding siblings ...)
  2021-11-25 10:56 ` [PATCH 6/9] drm/amdgpu: Modify mmhub " yipechai
@ 2021-11-25 10:56 ` yipechai
  2021-11-25 10:57 ` [PATCH 8/9] drm/amdgpu: Modify umc " yipechai
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: yipechai @ 2021-11-25 10:56 UTC (permalink / raw)
  To: amd-gfx; +Cc: yipechai, yipechai

Modify nbio block ras functions to fit for the unified ras function pointers.

Signed-off-by: yipechai <YiPeng.Chai@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h | 7 ++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c  | 4 ++--
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c   | 8 +++++---
 drivers/gpu/drm/amd/amdgpu/soc15.c       | 8 ++++----
 4 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
index 843052205bd5..21574493afff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -22,7 +22,7 @@
  */
 #ifndef __AMDGPU_NBIO_H__
 #define __AMDGPU_NBIO_H__
-
+#include "amdgpu_ras.h"
 /*
  * amdgpu nbio functions
  */
@@ -48,14 +48,11 @@ struct nbio_hdp_flush_reg {
 };
 
 struct amdgpu_nbio_ras_funcs {
+	struct amdgpu_ras_block_ops ops;
 	void (*handle_ras_controller_intr_no_bifring)(struct amdgpu_device *adev);
 	void (*handle_ras_err_event_athub_intr_no_bifring)(struct amdgpu_device *adev);
 	int (*init_ras_controller_interrupt)(struct amdgpu_device *adev);
 	int (*init_ras_err_event_athub_interrupt)(struct amdgpu_device *adev);
-	void (*query_ras_error_count)(struct amdgpu_device *adev,
-				      void *ras_error_status);
-	int (*ras_late_init)(struct amdgpu_device *adev);
-	void (*ras_fini)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_nbio_funcs {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 2d9ef677a2ef..2c79172f6031 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -938,8 +938,8 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
 		break;
 	case AMDGPU_RAS_BLOCK__PCIE_BIF:
 		if (adev->nbio.ras_funcs &&
-		    adev->nbio.ras_funcs->query_ras_error_count)
-			adev->nbio.ras_funcs->query_ras_error_count(adev, &err_data);
+		    adev->nbio.ras_funcs->ops.query_ras_error_count)
+			adev->nbio.ras_funcs->ops.query_ras_error_count(adev, &err_data);
 		break;
 	case AMDGPU_RAS_BLOCK__XGMI_WAFL:
 		if (adev->gmc.xgmi.ras_funcs &&
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 91b3afa946f5..ebbe78d2ca52 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -643,9 +643,11 @@ const struct amdgpu_nbio_ras_funcs nbio_v7_4_ras_funcs = {
 	.handle_ras_err_event_athub_intr_no_bifring = nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring,
 	.init_ras_controller_interrupt = nbio_v7_4_init_ras_controller_interrupt,
 	.init_ras_err_event_athub_interrupt = nbio_v7_4_init_ras_err_event_athub_interrupt,
-	.query_ras_error_count = nbio_v7_4_query_ras_error_count,
-	.ras_late_init = amdgpu_nbio_ras_late_init,
-	.ras_fini = amdgpu_nbio_ras_fini,
+	.ops = {
+		.query_ras_error_count = nbio_v7_4_query_ras_error_count,
+		.ras_late_init = amdgpu_nbio_ras_late_init,
+		.ras_fini = amdgpu_nbio_ras_fini,
+	},
 };
 
 static void nbio_v7_4_program_ltr(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index f9d92b6deef0..99176af847f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1386,8 +1386,8 @@ static int soc15_common_late_init(void *handle)
 		xgpu_ai_mailbox_get_irq(adev);
 
 	if (adev->nbio.ras_funcs &&
-	    adev->nbio.ras_funcs->ras_late_init)
-		r = adev->nbio.ras_funcs->ras_late_init(adev);
+	    adev->nbio.ras_funcs->ops.ras_late_init)
+		r = adev->nbio.ras_funcs->ops.ras_late_init(adev);
 
 	return r;
 }
@@ -1409,8 +1409,8 @@ static int soc15_common_sw_fini(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	if (adev->nbio.ras_funcs &&
-	    adev->nbio.ras_funcs->ras_fini)
-		adev->nbio.ras_funcs->ras_fini(adev);
+	    adev->nbio.ras_funcs->ops.ras_fini)
+		adev->nbio.ras_funcs->ops.ras_fini(adev);
 	adev->df.funcs->sw_fini(adev);
 	return 0;
 }
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 8/9] drm/amdgpu: Modify umc block to fit for the unified ras function pointers.
  2021-11-25 10:56 [PATCH 1/9] drm/amdgpu:Define the unified ras function pointers of each IP block yipechai
                   ` (5 preceding siblings ...)
  2021-11-25 10:56 ` [PATCH 7/9] drm/amdgpu: Modify nbio " yipechai
@ 2021-11-25 10:57 ` yipechai
  2021-11-25 10:57 ` [PATCH 9/9] drm/amdgpu: Modify sdma " yipechai
  2021-11-25 11:41 ` [PATCH 1/9] drm/amdgpu:Define the unified ras function pointers of each IP block Lazar, Lijo
  8 siblings, 0 replies; 11+ messages in thread
From: yipechai @ 2021-11-25 10:57 UTC (permalink / raw)
  To: amd-gfx; +Cc: yipechai, yipechai

Modify umc block ras functions to fit for the unified ras function pointers.

Signed-off-by: yipechai <YiPeng.Chai@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c |  8 ++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 12 ++++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c |  8 ++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h |  9 ++-------
 drivers/gpu/drm/amd/amdgpu/umc_v6_1.c   | 10 ++++++----
 drivers/gpu/drm/amd/amdgpu/umc_v6_7.c   | 12 +++++++-----
 drivers/gpu/drm/amd/amdgpu/umc_v8_7.c   | 11 ++++++-----
 7 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 7780effdf3ac..4499cc5186cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -435,8 +435,8 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
 	int r;
 
 	if (adev->umc.ras_funcs &&
-	    adev->umc.ras_funcs->ras_late_init) {
-		r = adev->umc.ras_funcs->ras_late_init(adev);
+	    adev->umc.ras_funcs->ops.ras_late_init) {
+		r = adev->umc.ras_funcs->ops.ras_late_init(adev);
 		if (r)
 			return r;
 	}
@@ -492,8 +492,8 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
 void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
 {
 	if (adev->umc.ras_funcs &&
-	    adev->umc.ras_funcs->ras_fini)
-		adev->umc.ras_funcs->ras_fini(adev);
+	    adev->umc.ras_funcs->ops.ras_fini)
+		adev->umc.ras_funcs->ops.ras_fini(adev);
 
 	if (adev->mmhub.ras_funcs &&
 	    adev->mmhub.ras_funcs->ops.ras_fini)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 2c79172f6031..65306e0079af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -902,14 +902,14 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
 	switch (info->head.block) {
 	case AMDGPU_RAS_BLOCK__UMC:
 		if (adev->umc.ras_funcs &&
-		    adev->umc.ras_funcs->query_ras_error_count)
-			adev->umc.ras_funcs->query_ras_error_count(adev, &err_data);
+		    adev->umc.ras_funcs->ops.query_ras_error_count)
+			adev->umc.ras_funcs->ops.query_ras_error_count(adev, &err_data);
 		/* umc query_ras_error_address is also responsible for clearing
 		 * error status
 		 */
 		if (adev->umc.ras_funcs &&
-		    adev->umc.ras_funcs->query_ras_error_address)
-			adev->umc.ras_funcs->query_ras_error_address(adev, &err_data);
+		    adev->umc.ras_funcs->ops.query_ras_error_address)
+			adev->umc.ras_funcs->ops.query_ras_error_address(adev, &err_data);
 		break;
 	case AMDGPU_RAS_BLOCK__SDMA:
 		if (adev->sdma.funcs->query_ras_error_count) {
@@ -2341,11 +2341,11 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
 	if (adev->df.funcs &&
 	    adev->df.funcs->query_ras_poison_mode &&
 	    adev->umc.ras_funcs &&
-	    adev->umc.ras_funcs->query_ras_poison_mode) {
+	    adev->umc.ras_funcs->ops.query_ras_poison_mode) {
 		df_poison =
 			adev->df.funcs->query_ras_poison_mode(adev);
 		umc_poison =
-			adev->umc.ras_funcs->query_ras_poison_mode(adev);
+			adev->umc.ras_funcs->ops.query_ras_poison_mode(adev);
 		/* Only poison is set in both DF and UMC, we can support it */
 		if (df_poison && umc_poison)
 			con->poison_supported = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 0c7c56a91b25..9a44c410be06 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -98,11 +98,11 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
 
 	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
 	if (adev->umc.ras_funcs &&
-	    adev->umc.ras_funcs->query_ras_error_count)
-	    adev->umc.ras_funcs->query_ras_error_count(adev, ras_error_status);
+	    adev->umc.ras_funcs->ops.query_ras_error_count)
+	    adev->umc.ras_funcs->ops.query_ras_error_count(adev, ras_error_status);
 
 	if (adev->umc.ras_funcs &&
-	    adev->umc.ras_funcs->query_ras_error_address &&
+	    adev->umc.ras_funcs->ops.query_ras_error_address &&
 	    adev->umc.max_ras_err_cnt_per_query) {
 		err_data->err_addr =
 			kcalloc(adev->umc.max_ras_err_cnt_per_query,
@@ -118,7 +118,7 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
 		/* umc query_ras_error_address is also responsible for clearing
 		 * error status
 		 */
-		adev->umc.ras_funcs->query_ras_error_address(adev, ras_error_status);
+		adev->umc.ras_funcs->ops.query_ras_error_address(adev, ras_error_status);
 	}
 
 	/* only uncorrectable error needs gpu reset */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index 1f5fe2315236..d6d0d92f8fc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -20,6 +20,7 @@
  */
 #ifndef __AMDGPU_UMC_H__
 #define __AMDGPU_UMC_H__
+#include "amdgpu_ras.h"
 
 /*
  * (addr / 256) * 4096, the higher 26 bits in ErrorAddr
@@ -41,14 +42,8 @@
 #define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst))
 
 struct amdgpu_umc_ras_funcs {
+	struct amdgpu_ras_block_ops ops;
 	void (*err_cnt_init)(struct amdgpu_device *adev);
-	int (*ras_late_init)(struct amdgpu_device *adev);
-	void (*ras_fini)(struct amdgpu_device *adev);
-	void (*query_ras_error_count)(struct amdgpu_device *adev,
-				      void *ras_error_status);
-	void (*query_ras_error_address)(struct amdgpu_device *adev,
-					void *ras_error_status);
-	bool (*query_ras_poison_mode)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_umc_funcs {
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
index 921da7dffb1c..2451b6d025e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
@@ -467,8 +467,10 @@ static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev)
 
 const struct amdgpu_umc_ras_funcs umc_v6_1_ras_funcs = {
 	.err_cnt_init = umc_v6_1_err_cnt_init,
-	.ras_late_init = amdgpu_umc_ras_late_init,
-	.ras_fini = amdgpu_umc_ras_fini,
-	.query_ras_error_count = umc_v6_1_query_ras_error_count,
-	.query_ras_error_address = umc_v6_1_query_ras_error_address,
+	.ops = {
+		.ras_late_init = amdgpu_umc_ras_late_init,
+		.ras_fini = amdgpu_umc_ras_fini,
+		.query_ras_error_count = umc_v6_1_query_ras_error_count,
+		.query_ras_error_address = umc_v6_1_query_ras_error_address,
+	},
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
index f7ec3fe134e5..a4786de6186f 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
@@ -322,9 +322,11 @@ static bool umc_v6_7_query_ras_poison_mode(struct amdgpu_device *adev)
 }
 
 const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs = {
-	.ras_late_init = amdgpu_umc_ras_late_init,
-	.ras_fini = amdgpu_umc_ras_fini,
-	.query_ras_error_count = umc_v6_7_query_ras_error_count,
-	.query_ras_error_address = umc_v6_7_query_ras_error_address,
-	.query_ras_poison_mode = umc_v6_7_query_ras_poison_mode,
+	.ops = {
+		.ras_late_init = amdgpu_umc_ras_late_init,
+		.ras_fini = amdgpu_umc_ras_fini,
+		.query_ras_error_count = umc_v6_7_query_ras_error_count,
+		.query_ras_error_address = umc_v6_7_query_ras_error_address,
+		.query_ras_poison_mode = umc_v6_7_query_ras_poison_mode,
+	},
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
index af59a35788e3..2ae97edf9a47 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
@@ -325,9 +325,10 @@ static void umc_v8_7_err_cnt_init(struct amdgpu_device *adev)
 }
 
 const struct amdgpu_umc_ras_funcs umc_v8_7_ras_funcs = {
-	.err_cnt_init = umc_v8_7_err_cnt_init,
-	.ras_late_init = amdgpu_umc_ras_late_init,
-	.ras_fini = amdgpu_umc_ras_fini,
-	.query_ras_error_count = umc_v8_7_query_ras_error_count,
-	.query_ras_error_address = umc_v8_7_query_ras_error_address,
+	.ops = {
+		.ras_late_init = amdgpu_umc_ras_late_init,
+		.ras_fini = amdgpu_umc_ras_fini,
+		.query_ras_error_count = umc_v8_7_query_ras_error_count,
+		.query_ras_error_address = umc_v8_7_query_ras_error_address,
+	},
 };
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 9/9] drm/amdgpu: Modify sdma block to fit for the unified ras function pointers.
  2021-11-25 10:56 [PATCH 1/9] drm/amdgpu:Define the unified ras function pointers of each IP block yipechai
                   ` (6 preceding siblings ...)
  2021-11-25 10:57 ` [PATCH 8/9] drm/amdgpu: Modify umc " yipechai
@ 2021-11-25 10:57 ` yipechai
  2021-11-25 11:41 ` [PATCH 1/9] drm/amdgpu:Define the unified ras function pointers of each IP block Lazar, Lijo
  8 siblings, 0 replies; 11+ messages in thread
From: yipechai @ 2021-11-25 10:57 UTC (permalink / raw)
  To: amd-gfx; +Cc: yipechai, yipechai

Modify sdma block ras functions to fit for the unified ras function pointers.

Signed-off-by: yipechai <YiPeng.Chai@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c  | 11 +++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 11 +++----
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 42 ++++++++++++++++--------
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c   | 25 +++++++++++---
 4 files changed, 56 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 65306e0079af..e6d82e6e702c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -912,11 +912,8 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
 			adev->umc.ras_funcs->ops.query_ras_error_address(adev, &err_data);
 		break;
 	case AMDGPU_RAS_BLOCK__SDMA:
-		if (adev->sdma.funcs->query_ras_error_count) {
-			for (i = 0; i < adev->sdma.num_instances; i++)
-				adev->sdma.funcs->query_ras_error_count(adev, i,
-									&err_data);
-		}
+		if (adev->sdma.ras_funcs->ops.query_ras_error_count)
+			adev->sdma.ras_funcs->ops.query_ras_error_count(adev, &err_data);
 		break;
 	case AMDGPU_RAS_BLOCK__GFX:
 		if (adev->gfx.ras_funcs &&
@@ -1035,8 +1032,8 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
 			adev->mmhub.ras_funcs->ops.reset_ras_error_status(adev);
 		break;
 	case AMDGPU_RAS_BLOCK__SDMA:
-		if (adev->sdma.funcs->reset_ras_error_count)
-			adev->sdma.funcs->reset_ras_error_count(adev);
+		if (adev->sdma.ras_funcs->ops.reset_ras_error_count)
+			adev->sdma.ras_funcs->ops.reset_ras_error_count(adev);
 		break;
 	case AMDGPU_RAS_BLOCK__HDP:
 		if (adev->hdp.ras_funcs &&
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index f8fb755e3aa6..a76c63520ca0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -23,6 +23,7 @@
 
 #ifndef __AMDGPU_SDMA_H__
 #define __AMDGPU_SDMA_H__
+#include "amdgpu_ras.h"
 
 /* max number of IP instances */
 #define AMDGPU_MAX_SDMA_INSTANCES		8
@@ -51,12 +52,8 @@ struct amdgpu_sdma_instance {
 };
 
 struct amdgpu_sdma_ras_funcs {
-	int (*ras_late_init)(struct amdgpu_device *adev,
-			void *ras_ih_info);
-	void (*ras_fini)(struct amdgpu_device *adev);
-	int (*query_ras_error_count)(struct amdgpu_device *adev,
-			uint32_t instance, void *ras_error_status);
-	void (*reset_ras_error_count)(struct amdgpu_device *adev);
+	struct amdgpu_ras_block_ops ops;
+	int (*sdma_ras_late_init)(struct amdgpu_device *adev, void *ras_ih_info);
 };
 
 struct amdgpu_sdma {
@@ -73,7 +70,7 @@ struct amdgpu_sdma {
 	uint32_t                    srbm_soft_reset;
 	bool			has_page_queue;
 	struct ras_common_if	*ras_if;
-	const struct amdgpu_sdma_ras_funcs	*funcs;
+	const struct amdgpu_sdma_ras_funcs	*ras_funcs;
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 69c9e460c1eb..d5bd23b57f5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1898,13 +1898,13 @@ static int sdma_v4_0_late_init(void *handle)
 	sdma_v4_0_setup_ulv(adev);
 
 	if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
-		if (adev->sdma.funcs &&
-		    adev->sdma.funcs->reset_ras_error_count)
-			adev->sdma.funcs->reset_ras_error_count(adev);
+		if (adev->sdma.ras_funcs &&
+		    adev->sdma.ras_funcs->ops.reset_ras_error_count)
+			adev->sdma.ras_funcs->ops.reset_ras_error_count(adev);
 	}
 
-	if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init)
-		return adev->sdma.funcs->ras_late_init(adev, &ih_info);
+	if (adev->sdma.ras_funcs && adev->sdma.ras_funcs->sdma_ras_late_init)
+		return adev->sdma.ras_funcs->sdma_ras_late_init(adev, &ih_info);
 	else
 		return 0;
 }
@@ -2007,8 +2007,8 @@ static int sdma_v4_0_sw_fini(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int i;
 
-	if (adev->sdma.funcs && adev->sdma.funcs->ras_fini)
-		adev->sdma.funcs->ras_fini(adev);
+	if (adev->sdma.ras_funcs && adev->sdma.ras_funcs->ops.ras_fini)
+		adev->sdma.ras_funcs->ops.ras_fini(adev);
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
 		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
@@ -2745,7 +2745,7 @@ static void sdma_v4_0_get_ras_error_count(uint32_t value,
 	}
 }
 
-static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev,
+static int sdma_v4_0_query_ras_error_count_by_instance(struct amdgpu_device *adev,
 			uint32_t instance, void *ras_error_status)
 {
 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
@@ -2778,11 +2778,25 @@ static void sdma_v4_0_reset_ras_error_count(struct amdgpu_device *adev)
 	}
 }
 
+static void sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev,  void *ras_error_status)
+{
+	int i = 0;
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		if (sdma_v4_0_query_ras_error_count_by_instance(adev, i, ras_error_status))
+		{
+			dev_err(adev->dev, "Query ras error count failed in SDMA%d \n", i);
+			return;
+		}
+	}
+}
+
 static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = {
-	.ras_late_init = amdgpu_sdma_ras_late_init,
-	.ras_fini = amdgpu_sdma_ras_fini,
-	.query_ras_error_count = sdma_v4_0_query_ras_error_count,
-	.reset_ras_error_count = sdma_v4_0_reset_ras_error_count,
+	.ops = {
+		.ras_fini = amdgpu_sdma_ras_fini,
+		.query_ras_error_count = sdma_v4_0_query_ras_error_count,
+		.reset_ras_error_count = sdma_v4_0_reset_ras_error_count,
+	},
+	.sdma_ras_late_init = amdgpu_sdma_ras_late_init,
 };
 
 static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev)
@@ -2790,10 +2804,10 @@ static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev)
 	switch (adev->asic_type) {
 	case CHIP_VEGA20:
 	case CHIP_ARCTURUS:
-		adev->sdma.funcs = &sdma_v4_0_ras_funcs;
+		adev->sdma.ras_funcs = &sdma_v4_0_ras_funcs;
 		break;
 	case CHIP_ALDEBARAN:
-		adev->sdma.funcs = &sdma_v4_4_ras_funcs;
+		adev->sdma.ras_funcs = &sdma_v4_4_ras_funcs;
 		break;
 	default:
 		break;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
index bf95007f0843..a4b05dbb88ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
@@ -188,7 +188,7 @@ static void sdma_v4_4_get_ras_error_count(struct amdgpu_device *adev,
 	}
 }
 
-static int sdma_v4_4_query_ras_error_count(struct amdgpu_device *adev,
+static int sdma_v4_4_query_ras_error_count_by_instance(struct amdgpu_device *adev,
 					   uint32_t instance,
 					   void *ras_error_status)
 {
@@ -245,9 +245,24 @@ static void sdma_v4_4_reset_ras_error_count(struct amdgpu_device *adev)
 	}
 }
 
+static void sdma_v4_4_query_ras_error_count(struct amdgpu_device *adev,  void *ras_error_status)
+{
+	int i = 0;
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		if (sdma_v4_4_query_ras_error_count_by_instance(adev, i, ras_error_status))
+		{
+			dev_err(adev->dev, "Query ras error count failed in SDMA%d \n", i);
+			return;
+		}
+	}
+
+}
+
 const struct amdgpu_sdma_ras_funcs sdma_v4_4_ras_funcs = {
-	.ras_late_init = amdgpu_sdma_ras_late_init,
-	.ras_fini = amdgpu_sdma_ras_fini,
-	.query_ras_error_count = sdma_v4_4_query_ras_error_count,
-	.reset_ras_error_count = sdma_v4_4_reset_ras_error_count,
+	.ops = {
+		.ras_fini = amdgpu_sdma_ras_fini,
+		.query_ras_error_count = sdma_v4_4_query_ras_error_count,
+		.reset_ras_error_count = sdma_v4_4_reset_ras_error_count,
+	},
+	.sdma_ras_late_init = amdgpu_sdma_ras_late_init,
 };
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/9] drm/amdgpu:Define the unified ras function pointers of each IP block
  2021-11-25 10:56 [PATCH 1/9] drm/amdgpu:Define the unified ras function pointers of each IP block yipechai
                   ` (7 preceding siblings ...)
  2021-11-25 10:57 ` [PATCH 9/9] drm/amdgpu: Modify sdma " yipechai
@ 2021-11-25 11:41 ` Lazar, Lijo
  2021-11-26  2:39   ` Chai, Thomas
  8 siblings, 1 reply; 11+ messages in thread
From: Lazar, Lijo @ 2021-11-25 11:41 UTC (permalink / raw)
  To: yipechai, amd-gfx; +Cc: yipechai



On 11/25/2021 4:26 PM, yipechai wrote:
> Define an unified ras function pointers for each ip block to adapt.
> 
> Signed-off-by: yipechai <YiPeng.Chai@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 20 ++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 36 ++++++++++++-------------
>   2 files changed, 37 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 90f0db3b4f65..dc6c8130e2d7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -2739,3 +2739,23 @@ static void amdgpu_register_bad_pages_mca_notifier(void)
>           }
>   }
>   #endif
> +
> +/* check if ras is supported on block, say, sdma, gfx */
> +int amdgpu_ras_is_supported(struct amdgpu_device *adev,
> +		unsigned int block)
> +{
> +	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
> +
> +	if (block >= AMDGPU_RAS_BLOCK_COUNT)
> +		return 0;
> +	return ras && (adev->ras_enabled & (1 << block));
> +}
> +
> +int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
> +{
> +	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
> +
> +	if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
> +		schedule_work(&ras->recovery_work);
> +	return 0;
> +}

These changes look unrelated. Maybe as another patch to move from .h 
file to .c file.

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> index cdd0010a5389..4b7da40dd837 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> @@ -469,6 +469,19 @@ struct ras_debug_if {
>   	};
>   	int op;
>   };
> +
> +struct amdgpu_ras_block_ops {
> +	int (*ras_late_init)(struct amdgpu_device *adev);
> +	void (*ras_fini)(struct amdgpu_device *adev);
> +	int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if);
> +	void  (*query_ras_error_count)(struct amdgpu_device *adev,void *ras_error_status);
> +	void (*query_ras_error_status)(struct amdgpu_device *adev);
> +	bool  (*query_ras_poison_mode)(struct amdgpu_device *adev);
> +	void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status);
> +	void (*reset_ras_error_count)(struct amdgpu_device *adev);
> +	void (*reset_ras_error_status)(struct amdgpu_device *adev);
> +};
> +

Generic comment - Since all the operations are consolidated under _ops, 
it makes sense to rename the <ip>_ras_funcs to <ip>_ras.

Ex: amdgpu_gfx_ras_funcs => amdgpu_gfx_ras, amdgpu_xgmi_ras_funcs => 
amdgpu_xgmi_ras and so forth.

In future, these ras blocks may have data members to keep IP specific 
ras data.

Thanks,
Lijo

>   /* work flow
>    * vbios
>    * 1: ras feature enable (enabled by default)
> @@ -486,16 +499,6 @@ struct ras_debug_if {
>   #define amdgpu_ras_get_context(adev)		((adev)->psp.ras_context.ras)
>   #define amdgpu_ras_set_context(adev, ras_con)	((adev)->psp.ras_context.ras = (ras_con))
>   
> -/* check if ras is supported on block, say, sdma, gfx */
> -static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
> -		unsigned int block)
> -{
> -	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
> -
> -	if (block >= AMDGPU_RAS_BLOCK_COUNT)
> -		return 0;
> -	return ras && (adev->ras_enabled & (1 << block));
> -}
>   
>   int amdgpu_ras_recovery_init(struct amdgpu_device *adev);
>   
> @@ -512,15 +515,6 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
>   
>   int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev);
>   
> -static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
> -{
> -	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
> -
> -	if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
> -		schedule_work(&ras->recovery_work);
> -	return 0;
> -}
> -
>   static inline enum ta_ras_block
>   amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) {
>   	switch (block) {
> @@ -652,4 +646,8 @@ const char *get_ras_block_str(struct ras_common_if *ras_block);
>   
>   bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev);
>   
> +int amdgpu_ras_is_supported(struct amdgpu_device *adev,	unsigned int block);
> +
> +int amdgpu_ras_reset_gpu(struct amdgpu_device *adev);
> +
>   #endif
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH 1/9] drm/amdgpu:Define the unified ras function pointers of each IP block
  2021-11-25 11:41 ` [PATCH 1/9] drm/amdgpu:Define the unified ras function pointers of each IP block Lazar, Lijo
@ 2021-11-26  2:39   ` Chai, Thomas
  0 siblings, 0 replies; 11+ messages in thread
From: Chai, Thomas @ 2021-11-26  2:39 UTC (permalink / raw)
  To: Lazar, Lijo, amd-gfx

Hi Lijo:
   I add my replay after your comment.

Thanks,
Thomas
-----Original Message-----
From: Lazar, Lijo <Lijo.Lazar@amd.com> 
Sent: Thursday, November 25, 2021 7:41 PM
To: Chai, Thomas <YiPeng.Chai@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Chai, Thomas <YiPeng.Chai@amd.com>
Subject: Re: [PATCH 1/9] drm/amdgpu:Define the unified ras function pointers of each IP block



On 11/25/2021 4:26 PM, yipechai wrote:
> Define an unified ras function pointers for each ip block to adapt.
> 
> Signed-off-by: yipechai <YiPeng.Chai@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 20 ++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 36 ++++++++++++-------------
>   2 files changed, 37 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 90f0db3b4f65..dc6c8130e2d7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -2739,3 +2739,23 @@ static void amdgpu_register_bad_pages_mca_notifier(void)
>           }
>   }
>   #endif
> +
> +/* check if ras is supported on block, say, sdma, gfx */ int 
> +amdgpu_ras_is_supported(struct amdgpu_device *adev,
> +		unsigned int block)
> +{
> +	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
> +
> +	if (block >= AMDGPU_RAS_BLOCK_COUNT)
> +		return 0;
> +	return ras && (adev->ras_enabled & (1 << block)); }
> +
> +int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) {
> +	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
> +
> +	if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
> +		schedule_work(&ras->recovery_work);
> +	return 0;
> +}

>These changes look unrelated. Maybe as another patch to move from .h file to .c file.
   When add amdgpu_ras.h  to other ip blocks .h file (such as amdgpu_gfx.h amdgpu_xgmi.h ...) for other block using 'struct amdgpu_ras_block_ops',  the code compilation will make an error:
   	“drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h:499:46: error: dereferencing pointer to incomplete type ‘struct amdgpu_device’
 	 499 | #define amdgpu_ras_get_context(adev)  ((adev)->psp.ras_context.ras)”
   The struct amdgpu_device has been defined in amdgpu.h file, and the amdgpu.h file has been included in amdgpu_ras.h, it seems that there are some problems for .h file cross-include. Due to the amdgpu_ras_get_context(adev)  has only been used in the functions of 'amdgpu_ras_is_supported' and ' amdgpu_ras_reset_gpu '. When move these two function to .c file, the code compilation becomes successful.

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> index cdd0010a5389..4b7da40dd837 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> @@ -469,6 +469,19 @@ struct ras_debug_if {
>   	};
>   	int op;
>   };
> +
> +struct amdgpu_ras_block_ops {
> +	int (*ras_late_init)(struct amdgpu_device *adev);
> +	void (*ras_fini)(struct amdgpu_device *adev);
> +	int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if);
> +	void  (*query_ras_error_count)(struct amdgpu_device *adev,void *ras_error_status);
> +	void (*query_ras_error_status)(struct amdgpu_device *adev);
> +	bool  (*query_ras_poison_mode)(struct amdgpu_device *adev);
> +	void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status);
> +	void (*reset_ras_error_count)(struct amdgpu_device *adev);
> +	void (*reset_ras_error_status)(struct amdgpu_device *adev); };
> +

>Generic comment - Since all the operations are consolidated under _ops, it makes sense to rename the <ip>_ras_funcs to <ip>_ras.

>Ex: amdgpu_gfx_ras_funcs => amdgpu_gfx_ras, amdgpu_xgmi_ras_funcs => amdgpu_xgmi_ras and so forth.

>In future, these ras blocks may have data members to keep IP specific ras data.

OK, I will do it.

Thanks,
Lijo

>   /* work flow
>    * vbios
>    * 1: ras feature enable (enabled by default) @@ -486,16 +499,6 @@ 
> struct ras_debug_if {
>   #define amdgpu_ras_get_context(adev)		((adev)->psp.ras_context.ras)
>   #define amdgpu_ras_set_context(adev, ras_con)	((adev)->psp.ras_context.ras = (ras_con))
>   
> -/* check if ras is supported on block, say, sdma, gfx */ -static 
> inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
> -		unsigned int block)
> -{
> -	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
> -
> -	if (block >= AMDGPU_RAS_BLOCK_COUNT)
> -		return 0;
> -	return ras && (adev->ras_enabled & (1 << block));
> -}
>   
>   int amdgpu_ras_recovery_init(struct amdgpu_device *adev);
>   
> @@ -512,15 +515,6 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device 
> *adev,
>   
>   int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev);
>   
> -static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) -{
> -	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
> -
> -	if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
> -		schedule_work(&ras->recovery_work);
> -	return 0;
> -}
> -
>   static inline enum ta_ras_block
>   amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) {
>   	switch (block) {
> @@ -652,4 +646,8 @@ const char *get_ras_block_str(struct ras_common_if 
> *ras_block);
>   
>   bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device 
> *adev);
>   
> +int amdgpu_ras_is_supported(struct amdgpu_device *adev,	unsigned int block);
> +
> +int amdgpu_ras_reset_gpu(struct amdgpu_device *adev);
> +
>   #endif
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2021-11-26  2:40 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-25 10:56 [PATCH 1/9] drm/amdgpu:Define the unified ras function pointers of each IP block yipechai
2021-11-25 10:56 ` [PATCH 2/9] drm/amdgpu: Modify gfx block to fit for the unified ras function pointers yipechai
2021-11-25 10:56 ` [PATCH 3/9] drm/amdgpu: Modify gmc " yipechai
2021-11-25 10:56 ` [PATCH 4/9] drm/amdgpu: Modify hdp " yipechai
2021-11-25 10:56 ` [PATCH 5/9] drm/amdgpu: Modify mca " yipechai
2021-11-25 10:56 ` [PATCH 6/9] drm/amdgpu: Modify mmhub " yipechai
2021-11-25 10:56 ` [PATCH 7/9] drm/amdgpu: Modify nbio " yipechai
2021-11-25 10:57 ` [PATCH 8/9] drm/amdgpu: Modify umc " yipechai
2021-11-25 10:57 ` [PATCH 9/9] drm/amdgpu: Modify sdma " yipechai
2021-11-25 11:41 ` [PATCH 1/9] drm/amdgpu:Define the unified ras function pointers of each IP block Lazar, Lijo
2021-11-26  2:39   ` Chai, Thomas

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.