* [PATCH 01/26] drm/amdgpu: move some ras data structure to amdgpu_ras.h
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
@ 2019-07-31 17:57 ` Alex Deucher
2019-07-31 17:57 ` [PATCH 02/26] drm/amdgpu: init RSMU and UMC ip base address for vega20 Alex Deucher
` (24 subsequent siblings)
25 siblings, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:57 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Dennis Li, Hawking Zhang
From: Hawking Zhang <Hawking.Zhang@amd.com>
These are common structures that can be included by IP specific
source files
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Dennis Li <dennis.li@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 68 ------------------------
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 69 ++++++++++++++++++++++++-
2 files changed, 68 insertions(+), 69 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index b45aaf04a574..3be306bf1603 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -28,74 +28,6 @@
#include "amdgpu_ras.h"
#include "amdgpu_atomfirmware.h"
-struct ras_ih_data {
- /* interrupt bottom half */
- struct work_struct ih_work;
- int inuse;
- /* IP callback */
- ras_ih_cb cb;
- /* full of entries */
- unsigned char *ring;
- unsigned int ring_size;
- unsigned int element_size;
- unsigned int aligned_element_size;
- unsigned int rptr;
- unsigned int wptr;
-};
-
-struct ras_fs_data {
- char sysfs_name[32];
- char debugfs_name[32];
-};
-
-struct ras_err_data {
- unsigned long ue_count;
- unsigned long ce_count;
-};
-
-struct ras_err_handler_data {
- /* point to bad pages array */
- struct {
- unsigned long bp;
- struct amdgpu_bo *bo;
- } *bps;
- /* the count of entries */
- int count;
- /* the space can place new entries */
- int space_left;
- /* last reserved entry's index + 1 */
- int last_reserved;
-};
-
-struct ras_manager {
- struct ras_common_if head;
- /* reference count */
- int use;
- /* ras block link */
- struct list_head node;
- /* the device */
- struct amdgpu_device *adev;
- /* debugfs */
- struct dentry *ent;
- /* sysfs */
- struct device_attribute sysfs_attr;
- int attr_inuse;
-
- /* fs node name */
- struct ras_fs_data fs_data;
-
- /* IH data */
- struct ras_ih_data ih_data;
-
- struct ras_err_data err_data;
-};
-
-struct ras_badpage {
- unsigned int bp;
- unsigned int size;
- unsigned int flags;
-};
-
const char *ras_error_string[] = {
"none",
"parity",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index b2841195bd3b..80e94d604a2e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -108,8 +108,75 @@ struct amdgpu_ras {
uint32_t flags;
};
-/* interfaces for IP */
+struct ras_ih_data {
+ /* interrupt bottom half */
+ struct work_struct ih_work;
+ int inuse;
+ /* IP callback */
+ ras_ih_cb cb;
+ /* full of entries */
+ unsigned char *ring;
+ unsigned int ring_size;
+ unsigned int element_size;
+ unsigned int aligned_element_size;
+ unsigned int rptr;
+ unsigned int wptr;
+};
+
+struct ras_fs_data {
+ char sysfs_name[32];
+ char debugfs_name[32];
+};
+
+struct ras_err_data {
+ unsigned long ue_count;
+ unsigned long ce_count;
+};
+
+struct ras_err_handler_data {
+ /* point to bad pages array */
+ struct {
+ unsigned long bp;
+ struct amdgpu_bo *bo;
+ } *bps;
+ /* the count of entries */
+ int count;
+ /* the space can place new entries */
+ int space_left;
+ /* last reserved entry's index + 1 */
+ int last_reserved;
+};
+struct ras_manager {
+ struct ras_common_if head;
+ /* reference count */
+ int use;
+ /* ras block link */
+ struct list_head node;
+ /* the device */
+ struct amdgpu_device *adev;
+ /* debugfs */
+ struct dentry *ent;
+ /* sysfs */
+ struct device_attribute sysfs_attr;
+ int attr_inuse;
+
+ /* fs node name */
+ struct ras_fs_data fs_data;
+
+ /* IH data */
+ struct ras_ih_data ih_data;
+
+ struct ras_err_data err_data;
+};
+
+struct ras_badpage {
+ unsigned int bp;
+ unsigned int size;
+ unsigned int flags;
+};
+
+/* interfaces for IP */
struct ras_fs_if {
struct ras_common_if head;
char sysfs_name[32];
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 02/26] drm/amdgpu: init RSMU and UMC ip base address for vega20
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
2019-07-31 17:57 ` [PATCH 01/26] drm/amdgpu: move some ras data structure to amdgpu_ras.h Alex Deucher
@ 2019-07-31 17:57 ` Alex Deucher
2019-07-31 17:57 ` [PATCH 03/26] drm/amdgpu: add amdgpu_umc_functions structure Alex Deucher
` (23 subsequent siblings)
25 siblings, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:57 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Dennis Li, Hawking Zhang
From: Hawking Zhang <Hawking.Zhang@amd.com>
the driver needs to program RSMU and UMC registers to
support vega20 RAS feature
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Dennis Li <dennis.li@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++
drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 2 ++
2 files changed, 4 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 121cc5544b2b..a197f4b33eda 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -755,6 +755,8 @@ enum amd_hw_ip_block_type {
NBIF_HWIP,
THM_HWIP,
CLK_HWIP,
+ UMC_HWIP,
+ RSMU_HWIP,
MAX_HWIP
};
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
index 79223188bd47..587e33f5dcce 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
@@ -50,6 +50,8 @@ int vega20_reg_base_init(struct amdgpu_device *adev)
adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i]));
+ adev->reg_offset[UMC_HWIP][i] = (uint32_t *)(&(UMC_BASE.instance[i]));
+ adev->reg_offset[RSMU_HWIP][i] = (uint32_t *)(&(RSMU_BASE.instance[i]));
}
return 0;
}
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 03/26] drm/amdgpu: add amdgpu_umc_functions structure
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
2019-07-31 17:57 ` [PATCH 01/26] drm/amdgpu: move some ras data structure to amdgpu_ras.h Alex Deucher
2019-07-31 17:57 ` [PATCH 02/26] drm/amdgpu: init RSMU and UMC ip base address for vega20 Alex Deucher
@ 2019-07-31 17:57 ` Alex Deucher
2019-07-31 17:57 ` [PATCH 04/26] drm/amdgpu: add rsmu v_0_0_2 ip headers Alex Deucher
` (22 subsequent siblings)
25 siblings, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:57 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Dennis Li, Hawking Zhang
From: Hawking Zhang <Hawking.Zhang@amd.com>
This is common structure as UMC callback function
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Dennis Li <dennis.li@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 29 +++++++++++++++++++++++++
2 files changed, 31 insertions(+)
create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a197f4b33eda..72d0331f4ca1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -86,6 +86,7 @@
#include "amdgpu_smu.h"
#include "amdgpu_discovery.h"
#include "amdgpu_mes.h"
+#include "amdgpu_umc.h"
#define MAX_GPU_INSTANCE 16
@@ -970,6 +971,7 @@ struct amdgpu_device {
const struct amdgpu_nbio_funcs *nbio_funcs;
const struct amdgpu_df_funcs *df_funcs;
+ const struct amdgpu_umc_funcs *umc_funcs;
/* delayed work_func for deferring clockgating during resume */
struct delayed_work delayed_init_work;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
new file mode 100644
index 000000000000..1ee1a00e5ac8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __AMDGPU_UMC_H__
+#define __AMDGPU_UMC_H__
+
+struct amdgpu_umc_funcs {
+ void (*query_ras_error_count)(struct amdgpu_device *adev,
+ void *ras_error_status);
+};
+
+#endif
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 04/26] drm/amdgpu: add rsmu v_0_0_2 ip headers
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
` (2 preceding siblings ...)
2019-07-31 17:57 ` [PATCH 03/26] drm/amdgpu: add amdgpu_umc_functions structure Alex Deucher
@ 2019-07-31 17:57 ` Alex Deucher
2019-07-31 17:57 ` [PATCH 05/26] drm/amdgpu: add umc v6_1_1 IP headers Alex Deucher
` (21 subsequent siblings)
25 siblings, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:57 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Dennis Li, Hawking Zhang
From: Hawking Zhang <Hawking.Zhang@amd.com>
remote smu (rsmu) is a sub-block used as ip register interface,
error handling, reset generation.etc
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Dennis Li <dennis.li@amd.com>
---
.../include/asic_reg/rsmu/rsmu_0_0_2_offset.h | 27 ++++++++++++++++
.../asic_reg/rsmu/rsmu_0_0_2_sh_mask.h | 32 +++++++++++++++++++
2 files changed, 59 insertions(+)
create mode 100644 drivers/gpu/drm/amd/include/asic_reg/rsmu/rsmu_0_0_2_offset.h
create mode 100644 drivers/gpu/drm/amd/include/asic_reg/rsmu/rsmu_0_0_2_sh_mask.h
diff --git a/drivers/gpu/drm/amd/include/asic_reg/rsmu/rsmu_0_0_2_offset.h b/drivers/gpu/drm/amd/include/asic_reg/rsmu/rsmu_0_0_2_offset.h
new file mode 100644
index 000000000000..46466ae77f19
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/rsmu/rsmu_0_0_2_offset.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _rsmu_0_0_2_OFFSET_HEADER
+#define _rsmu_0_0_2_OFFSET_HEADER
+
+#define mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU 0x0d91
+#define mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU_BASE_IDX 0
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/rsmu/rsmu_0_0_2_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/rsmu/rsmu_0_0_2_sh_mask.h
new file mode 100644
index 000000000000..ea0acb598254
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/rsmu/rsmu_0_0_2_sh_mask.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _rsmu_0_0_2_SH_MASK_HEADER
+#define _rsmu_0_0_2_SH_MASK_HEADER
+
+//RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU
+#define RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU__RSMU_UMC_INDEX_WREN__SHIFT 0x0
+#define RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU__RSMU_UMC_INDEX_INSTANCE__SHIFT 0x10
+#define RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU__RSMU_UMC_INDEX_MODE_EN__SHIFT 0x1f
+#define RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU__RSMU_UMC_INDEX_WREN_MASK 0x0000FFFFL
+#define RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU__RSMU_UMC_INDEX_INSTANCE_MASK 0x000F0000L
+#define RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU__RSMU_UMC_INDEX_MODE_EN_MASK 0x80000000L
+
+#endif
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 05/26] drm/amdgpu: add umc v6_1_1 IP headers
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
` (3 preceding siblings ...)
2019-07-31 17:57 ` [PATCH 04/26] drm/amdgpu: add rsmu v_0_0_2 ip headers Alex Deucher
@ 2019-07-31 17:57 ` Alex Deucher
2019-07-31 17:57 ` [PATCH 06/26] drm/amdgpu: add umc v6_1 query error count support Alex Deucher
` (20 subsequent siblings)
25 siblings, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:57 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Dennis Li, Hawking Zhang
From: Hawking Zhang <Hawking.Zhang@amd.com>
the change introduces IP headers for unified memory controller (umc)
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Dennis Li <dennis.li@amd.com>
---
.../include/asic_reg/umc/umc_6_1_1_offset.h | 31 +++++++
.../include/asic_reg/umc/umc_6_1_1_sh_mask.h | 91 +++++++++++++++++++
2 files changed, 122 insertions(+)
create mode 100644 drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_offset.h
create mode 100644 drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_sh_mask.h
diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_offset.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_offset.h
new file mode 100644
index 000000000000..043aa695d63f
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_offset.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _umc_6_1_1_OFFSET_HEADER
+#define _umc_6_1_1_OFFSET_HEADER
+
+#define mmUMCCH0_0_EccErrCntSel 0x0360
+#define mmUMCCH0_0_EccErrCntSel_BASE_IDX 0
+#define mmUMCCH0_0_EccErrCnt 0x0361
+#define mmUMCCH0_0_EccErrCnt_BASE_IDX 0
+#define mmMCA_UMC_UMC0_MCUMC_STATUST0 0x03c2
+#define mmMCA_UMC_UMC0_MCUMC_STATUST0_BASE_IDX 0
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_sh_mask.h
new file mode 100644
index 000000000000..45c888280af9
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_6_1_1_sh_mask.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _umc_6_1_1_SH_MASK_HEADER
+#define _umc_6_1_1_SH_MASK_HEADER
+
+//UMCCH0_0_EccErrCntSel
+#define UMCCH0_0_EccErrCntSel__EccErrCntCsSel__SHIFT 0x0
+#define UMCCH0_0_EccErrCntSel__EccErrInt__SHIFT 0xc
+#define UMCCH0_0_EccErrCntSel__EccErrCntEn__SHIFT 0xf
+#define UMCCH0_0_EccErrCntSel__EccErrCntCsSel_MASK 0x0000000FL
+#define UMCCH0_0_EccErrCntSel__EccErrInt_MASK 0x00003000L
+#define UMCCH0_0_EccErrCntSel__EccErrCntEn_MASK 0x00008000L
+//UMCCH0_0_EccErrCnt
+#define UMCCH0_0_EccErrCnt__EccErrCnt__SHIFT 0x0
+#define UMCCH0_0_EccErrCnt__EccErrCnt_MASK 0x0000FFFFL
+//MCA_UMC_UMC0_MCUMC_STATUST0
+#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrorCode__SHIFT 0x0
+#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrorCodeExt__SHIFT 0x10
+#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV0__SHIFT 0x16
+#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrCoreId__SHIFT 0x20
+#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV1__SHIFT 0x26
+#define MCA_UMC_UMC0_MCUMC_STATUST0__Scrub__SHIFT 0x28
+#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV2__SHIFT 0x29
+#define MCA_UMC_UMC0_MCUMC_STATUST0__Poison__SHIFT 0x2b
+#define MCA_UMC_UMC0_MCUMC_STATUST0__Deferred__SHIFT 0x2c
+#define MCA_UMC_UMC0_MCUMC_STATUST0__UECC__SHIFT 0x2d
+#define MCA_UMC_UMC0_MCUMC_STATUST0__CECC__SHIFT 0x2e
+#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV3__SHIFT 0x2f
+#define MCA_UMC_UMC0_MCUMC_STATUST0__Transparent__SHIFT 0x34
+#define MCA_UMC_UMC0_MCUMC_STATUST0__SyndV__SHIFT 0x35
+#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV4__SHIFT 0x36
+#define MCA_UMC_UMC0_MCUMC_STATUST0__TCC__SHIFT 0x37
+#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrCoreIdVal__SHIFT 0x38
+#define MCA_UMC_UMC0_MCUMC_STATUST0__PCC__SHIFT 0x39
+#define MCA_UMC_UMC0_MCUMC_STATUST0__AddrV__SHIFT 0x3a
+#define MCA_UMC_UMC0_MCUMC_STATUST0__MiscV__SHIFT 0x3b
+#define MCA_UMC_UMC0_MCUMC_STATUST0__En__SHIFT 0x3c
+#define MCA_UMC_UMC0_MCUMC_STATUST0__UC__SHIFT 0x3d
+#define MCA_UMC_UMC0_MCUMC_STATUST0__Overflow__SHIFT 0x3e
+#define MCA_UMC_UMC0_MCUMC_STATUST0__Val__SHIFT 0x3f
+#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrorCode_MASK 0x000000000000FFFFL
+#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrorCodeExt_MASK 0x00000000003F0000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV0_MASK 0x00000000FFC00000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrCoreId_MASK 0x0000003F00000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV1_MASK 0x000000C000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0__Scrub_MASK 0x0000010000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV2_MASK 0x0000060000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0__Poison_MASK 0x0000080000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0__Deferred_MASK 0x0000100000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0__UECC_MASK 0x0000200000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0__CECC_MASK 0x0000400000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV3_MASK 0x000F800000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0__Transparent_MASK 0x0010000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0__SyndV_MASK 0x0020000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV4_MASK 0x0040000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0__TCC_MASK 0x0080000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrCoreIdVal_MASK 0x0100000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0__PCC_MASK 0x0200000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0__AddrV_MASK 0x0400000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0__MiscV_MASK 0x0800000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0__En_MASK 0x1000000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0__UC_MASK 0x2000000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0__Overflow_MASK 0x4000000000000000L
+#define MCA_UMC_UMC0_MCUMC_STATUST0__Val_MASK 0x8000000000000000L
+//MCA_UMC_UMC0_MCUMC_ADDRT0
+#define MCA_UMC_UMC0_MCUMC_ADDRT0__ErrorAddr__SHIFT 0x0
+#define MCA_UMC_UMC0_MCUMC_ADDRT0__LSB__SHIFT 0x38
+#define MCA_UMC_UMC0_MCUMC_ADDRT0__Reserved__SHIFT 0x3e
+#define MCA_UMC_UMC0_MCUMC_ADDRT0__ErrorAddr_MASK 0x00FFFFFFFFFFFFFFL
+#define MCA_UMC_UMC0_MCUMC_ADDRT0__LSB_MASK 0x3F00000000000000L
+#define MCA_UMC_UMC0_MCUMC_ADDRT0__Reserved_MASK 0xC000000000000000L
+
+#endif
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 06/26] drm/amdgpu: add umc v6_1 query error count support
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
` (4 preceding siblings ...)
2019-07-31 17:57 ` [PATCH 05/26] drm/amdgpu: add umc v6_1_1 IP headers Alex Deucher
@ 2019-07-31 17:57 ` Alex Deucher
2019-07-31 17:57 ` [PATCH 07/26] drm/amdgpu: init umc v6_1 functions for vega20 Alex Deucher
` (19 subsequent siblings)
25 siblings, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:57 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Tao Zhou, Dennis Li, Hawking Zhang
From: Hawking Zhang <Hawking.Zhang@amd.com>
Implement umc query_ras_error_count function to support querry
both correctable and uncorrectable error
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Dennis Li <dennis.li@amd.com>
---
drivers/gpu/drm/amd/amdgpu/Makefile | 4 +
drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 162 ++++++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/umc_v6_1.h | 39 +++++++
3 files changed, 205 insertions(+)
create mode 100644 drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
create mode 100644 drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 7a1a78c7b329..cc38a6836825 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -81,6 +81,10 @@ amdgpu-y += \
gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \
gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o
+# add UMC block
+amdgpu-y += \
+ umc_v6_1.o
+
# add IH block
amdgpu-y += \
amdgpu_irq.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
new file mode 100644
index 000000000000..1ca5ae642946
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
@@ -0,0 +1,162 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "umc_v6_1.h"
+#include "amdgpu_ras.h"
+#include "amdgpu.h"
+
+#include "rsmu/rsmu_0_0_2_offset.h"
+#include "rsmu/rsmu_0_0_2_sh_mask.h"
+#include "umc/umc_6_1_1_offset.h"
+#include "umc/umc_6_1_1_sh_mask.h"
+
+static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev,
+ uint32_t umc_instance)
+{
+ uint32_t rsmu_umc_index;
+
+ rsmu_umc_index = RREG32_SOC15(RSMU, 0,
+ mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
+ rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index,
+ RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
+ RSMU_UMC_INDEX_MODE_EN, 1);
+ rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index,
+ RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
+ RSMU_UMC_INDEX_INSTANCE, umc_instance);
+ rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index,
+ RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
+ RSMU_UMC_INDEX_WREN, 1 << umc_instance);
+ WREG32_SOC15(RSMU, 0, mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
+ rsmu_umc_index);
+}
+
+static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev)
+{
+ WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
+ RSMU_UMC_INDEX_MODE_EN, 0);
+}
+
+static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+ uint32_t ecc_err_cnt, ecc_err_cnt_addr;
+ uint64_t mc_umc_status;
+ uint32_t mc_umc_status_addr;
+
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+
+ /* select the lower chip and check the error count */
+ ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset);
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
+ EccErrCntCsSel, 0);
+ WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel);
+ ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset);
+ *error_count +=
+ REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt);
+ /* clear the lower chip err count */
+ WREG32(ecc_err_cnt_addr + umc_reg_offset, 0);
+
+ /* select the higher chip and check the err counter */
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
+ EccErrCntCsSel, 1);
+ WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel);
+ ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset);
+ *error_count +=
+ REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt);
+ /* clear the higher chip err count */
+ WREG32(ecc_err_cnt_addr + umc_reg_offset, 0);
+
+ /* check for SRAM correctable error
+ MCUMC_STATUS is a 64 bit register */
+ mc_umc_status =
+ RREG32(mc_umc_status_addr + umc_reg_offset);
+ mc_umc_status |=
+ (uint64_t)RREG32(mc_umc_status_addr + umc_reg_offset + 1) << 32;
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
+ *error_count += 1;
+
+ /* clear the MCUMC_STATUS */
+ WREG32(mc_umc_status_addr + umc_reg_offset, 0);
+ WREG32(mc_umc_status_addr + umc_reg_offset + 1, 0);
+}
+
+static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t mc_umc_status_addr;
+
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+
+ /* check the MCUMC_STATUS */
+ mc_umc_status = RREG32(mc_umc_status_addr + umc_reg_offset);
+ mc_umc_status |=
+ (uint64_t)RREG32(mc_umc_status_addr + umc_reg_offset + 1) << 32;
+
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
+ *error_count += 1;
+
+ /* clear the MCUMC_STATUS */
+ WREG32(mc_umc_status_addr + umc_reg_offset, 0);
+ WREG32(mc_umc_status_addr + umc_reg_offset + 1, 0);
+}
+
+static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint32_t umc_inst, channel_inst, umc_reg_offset;
+
+ for (umc_inst = 0; umc_inst < UMC_V6_1_UMC_INSTANCE_NUM; umc_inst++) {
+ /* enable the index mode to query eror count per channel */
+ umc_v6_1_enable_umc_index_mode(adev, umc_inst);
+ for (channel_inst = 0; channel_inst < UMC_V6_1_CHANNEL_INSTANCE_NUM; channel_inst++) {
+ /* calc the register offset according to channel instance */
+ umc_reg_offset = UMC_V6_1_PER_CHANNEL_OFFSET * channel_inst;
+ umc_v6_1_query_correctable_error_count(adev, umc_reg_offset,
+ &(err_data->ce_count));
+ umc_v6_1_querry_uncorrectable_error_count(adev, umc_reg_offset,
+ &(err_data->ue_count));
+ }
+ }
+ umc_v6_1_disable_umc_index_mode(adev);
+}
+
+const struct amdgpu_umc_funcs umc_v6_1_funcs = {
+ .query_ras_error_count = umc_v6_1_query_ras_error_count,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
new file mode 100644
index 000000000000..d25ae414f4d8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __UMC_V6_1_H__
+#define __UMC_V6_1_H__
+
+#include "soc15_common.h"
+
+/* HBM Memory Channel Width */
+#define UMC_V6_1_HBM_MEMORY_CHANNEL_WIDTH 128
+/* number of umc channel instance with memory map register access */
+#define UMC_V6_1_CHANNEL_INSTANCE_NUM 4
+/* number of umc instance with memory map register access */
+#define UMC_V6_1_UMC_INSTANCE_NUM 8
+/* UMC regiser per channel offset */
+#define UMC_V6_1_PER_CHANNEL_OFFSET 0x800
+
+extern const struct amdgpu_umc_funcs umc_v6_1_funcs;
+
+#endif
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 07/26] drm/amdgpu: init umc v6_1 functions for vega20
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
` (5 preceding siblings ...)
2019-07-31 17:57 ` [PATCH 06/26] drm/amdgpu: add umc v6_1 query error count support Alex Deucher
@ 2019-07-31 17:57 ` Alex Deucher
2019-07-31 17:58 ` [PATCH 08/26] drm/amdgpu: querry umc error count Alex Deucher
` (18 subsequent siblings)
25 siblings, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:57 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Dennis Li, Hawking Zhang
From: Hawking Zhang <Hawking.Zhang@amd.com>
init umc callback function for vega20 in sw early init phase
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Dennis Li <dennis.li@amd.com>
---
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 5bd937ffc3ad..4116841eb0e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -45,6 +45,7 @@
#include "mmhub_v1_0.h"
#include "gfxhub_v1_1.h"
#include "mmhub_v9_4.h"
+#include "umc_v6_1.h"
#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
@@ -623,12 +624,24 @@ static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs;
}
+static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ adev->umc_funcs = &umc_v6_1_funcs;
+ break;
+ default:
+ break;
+ }
+}
+
static int gmc_v9_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
gmc_v9_0_set_gmc_funcs(adev);
gmc_v9_0_set_irq_funcs(adev);
+ gmc_v9_0_set_umc_funcs(adev);
adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
adev->gmc.shared_aperture_end =
@@ -717,6 +730,7 @@ static int gmc_v9_0_ecc_late_init(void *handle)
amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
return 0;
}
+
/* handle resume path. */
if (*ras_if) {
/* resend ras TA enable cmd during resume.
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 08/26] drm/amdgpu: querry umc error count
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
` (6 preceding siblings ...)
2019-07-31 17:57 ` [PATCH 07/26] drm/amdgpu: init umc v6_1 functions for vega20 Alex Deucher
@ 2019-07-31 17:58 ` Alex Deucher
2019-07-31 17:58 ` [PATCH 09/26] drm/amdgpu: add ras error count after each query (v2) Alex Deucher
` (17 subsequent siblings)
25 siblings, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:58 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Dennis Li, Hawking Zhang
From: Hawking Zhang <Hawking.Zhang@amd.com>
check umc error count in both ras querry function and
ras interrupt handler
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Dennis Li <dennis.li@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 11 ++++++++++-
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 +++
2 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 3be306bf1603..845e75f35b19 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -586,11 +586,19 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
struct ras_query_if *info)
{
struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
+ struct ras_err_data err_data = {0, 0};
if (!obj)
return -EINVAL;
- /* TODO might read the register to read the count */
+ switch (info->head.block) {
+ case AMDGPU_RAS_BLOCK__UMC:
+ if (adev->umc_funcs->query_ras_error_count)
+ adev->umc_funcs->query_ras_error_count(adev, &err_data);
+ break;
+ default:
+ break;
+ }
info->ue_count = obj->err_data.ue_count;
info->ce_count = obj->err_data.ce_count;
@@ -984,6 +992,7 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
struct ras_ih_data *data = &obj->ih_data;
struct amdgpu_iv_entry entry;
int ret;
+ struct ras_err_data err_data = {0, 0};
while (data->rptr != data->wptr) {
rmb();
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 4116841eb0e3..2748bd110fab 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -241,7 +241,10 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
static int gmc_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
+ struct ras_err_data err_data = {0, 0};
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+ if (adev->umc_funcs->query_ras_error_count)
+ adev->umc_funcs->query_ras_error_count(adev, &err_data);
amdgpu_ras_reset_gpu(adev, 0);
return AMDGPU_RAS_UE;
}
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 09/26] drm/amdgpu: add ras error count after each query (v2)
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
` (7 preceding siblings ...)
2019-07-31 17:58 ` [PATCH 08/26] drm/amdgpu: querry umc error count Alex Deucher
@ 2019-07-31 17:58 ` Alex Deucher
2019-07-31 17:58 ` [PATCH 10/26] drm/amdgpu: add RREG64/WREG64(_PCIE) operations Alex Deucher
` (16 subsequent siblings)
25 siblings, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:58 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Tao Zhou, Dennis Li, Hawking Zhang
From: Tao Zhou <tao.zhou1@amd.com>
v1: increase ras ce/ue error count
v2: log the number of correctable and uncorrectable errors
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Dennis Li <dennis.li@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 845e75f35b19..4f81b1f6d09f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -599,9 +599,20 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
default:
break;
}
+
+ obj->err_data.ue_count += err_data.ue_count;
+ obj->err_data.ce_count += err_data.ce_count;
+
info->ue_count = obj->err_data.ue_count;
info->ce_count = obj->err_data.ce_count;
+ if (err_data.ce_count)
+ dev_info(adev->dev, "%ld correctable errors detected in %s block\n",
+ obj->err_data.ce_count, ras_block_str(info->head.block));
+ if (err_data.ue_count)
+ dev_info(adev->dev, "%ld uncorrectable errors detected in %s block\n",
+ obj->err_data.ue_count, ras_block_str(info->head.block));
+
return 0;
}
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 10/26] drm/amdgpu: add RREG64/WREG64(_PCIE) operations
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
` (8 preceding siblings ...)
2019-07-31 17:58 ` [PATCH 09/26] drm/amdgpu: add ras error count after each query (v2) Alex Deucher
@ 2019-07-31 17:58 ` Alex Deucher
2019-07-31 17:58 ` [PATCH 11/26] drm/amdgpu: use 64bit operation macros for umc Alex Deucher
` (15 subsequent siblings)
25 siblings, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:58 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Tao Zhou, Dennis Li
From: Tao Zhou <tao.zhou1@amd.com>
add 64 bits register access functions
v2: implement 64 bit functions in low level
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Dennis Li <dennis.li@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 11 ++++
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 73 ++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/soc15.c | 45 +++++++++++++
3 files changed, 129 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 72d0331f4ca1..3e2b623d86c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -640,6 +640,9 @@ void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device);
typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t);
typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
+typedef uint64_t (*amdgpu_rreg64_t)(struct amdgpu_device*, uint32_t);
+typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t);
+
typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t);
@@ -833,6 +836,8 @@ struct amdgpu_device {
amdgpu_wreg_t pcie_wreg;
amdgpu_rreg_t pciep_rreg;
amdgpu_wreg_t pciep_wreg;
+ amdgpu_rreg64_t pcie_rreg64;
+ amdgpu_wreg64_t pcie_wreg64;
/* protects concurrent UVD register access */
spinlock_t uvd_ctx_idx_lock;
amdgpu_rreg_t uvd_ctx_rreg;
@@ -1033,6 +1038,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
uint32_t acc_flags);
void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value);
uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);
+uint64_t amdgpu_mm_rreg64(struct amdgpu_device *adev, uint32_t reg);
+void amdgpu_mm_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v);
u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg);
void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v);
@@ -1060,12 +1067,16 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), 0))
#define WREG32(reg, v) amdgpu_mm_wreg(adev, (reg), (v), 0)
#define WREG32_IDX(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_IDX)
+#define RREG64(reg) amdgpu_mm_rreg64(adev, (reg))
+#define WREG64(reg, v) amdgpu_mm_wreg64(adev, (reg), (v))
#define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
#define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
#define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
#define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
#define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
#define WREG32_PCIE_PORT(reg, v) adev->pciep_wreg(adev, (reg), (v))
+#define RREG64_PCIE(reg) adev->pcie_rreg64(adev, (reg))
+#define WREG64_PCIE(reg, v) adev->pcie_wreg64(adev, (reg), (v))
#define RREG32_SMC(reg) adev->smc_rreg(adev, (reg))
#define WREG32_SMC(reg, v) adev->smc_wreg(adev, (reg), (v))
#define RREG32_UVD_CTX(reg) adev->uvd_ctx_rreg(adev, (reg))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 127ed01ed8fd..08ba05c34782 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -260,6 +260,43 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
}
}
+/**
+ * amdgpu_mm_rreg64 - read a 64 bit memory mapped IO register
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ *
+ * Returns the 64 bit value from the offset specified.
+ */
+uint64_t amdgpu_mm_rreg64(struct amdgpu_device *adev, uint32_t reg)
+{
+ uint64_t ret;
+
+ if ((reg * 4) < adev->rmmio_size)
+ ret = readq(((void __iomem *)adev->rmmio) + (reg * 4));
+ else
+ BUG();
+
+ return ret;
+}
+
+/**
+ * amdgpu_mm_wreg64 - write to a 64 bit memory mapped IO register
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @v: 64 bit value to write to the register
+ *
+ * Writes the value specified to the offset specified.
+ */
+void amdgpu_mm_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
+{
+ if ((reg * 4) < adev->rmmio_size)
+ writeq(v, ((void __iomem *)adev->rmmio) + (reg * 4));
+ else
+ BUG();
+}
+
/**
* amdgpu_io_rreg - read an IO register
*
@@ -415,6 +452,40 @@ static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32
BUG();
}
+/**
+ * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
+ *
+ * @adev: amdgpu device pointer
+ * @reg: offset of register
+ *
+ * Dummy register read function. Used for register blocks
+ * that certain asics don't have (all asics).
+ * Returns the value in the register.
+ */
+static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
+{
+ DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
+ BUG();
+ return 0;
+}
+
+/**
+ * amdgpu_invalid_wreg64 - dummy reg write function
+ *
+ * @adev: amdgpu device pointer
+ * @reg: offset of register
+ * @v: value to write to the register
+ *
+ * Dummy register read function. Used for register blocks
+ * that certain asics don't have (all asics).
+ */
+static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
+{
+ DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
+ reg, v);
+ BUG();
+}
+
/**
* amdgpu_block_invalid_rreg - dummy reg read function
*
@@ -2536,6 +2607,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->pcie_wreg = &amdgpu_invalid_wreg;
adev->pciep_rreg = &amdgpu_invalid_rreg;
adev->pciep_wreg = &amdgpu_invalid_wreg;
+ adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
+ adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
adev->didt_rreg = &amdgpu_invalid_rreg;
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 347a44f2757a..214fc9d880e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -115,6 +115,49 @@ static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
}
+static u64 soc15_pcie_rreg64(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags, address, data;
+ u64 r;
+ address = adev->nbio_funcs->get_pcie_index_offset(adev);
+ data = adev->nbio_funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ /* read low 32 bit */
+ WREG32(address, reg);
+ (void)RREG32(address);
+ r = RREG32(data);
+
+ /* read high 32 bit*/
+ WREG32(address, reg + 4);
+ (void)RREG32(address);
+ r |= ((u64)RREG32(data) << 32);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ return r;
+}
+
+static void soc15_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v)
+{
+ unsigned long flags, address, data;
+
+ address = adev->nbio_funcs->get_pcie_index_offset(adev);
+ data = adev->nbio_funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ /* write low 32 bit */
+ WREG32(address, reg);
+ (void)RREG32(address);
+ WREG32(data, (u32)(v & 0xffffffffULL));
+ (void)RREG32(data);
+
+ /* write high 32 bit */
+ WREG32(address, reg + 4);
+ (void)RREG32(address);
+ WREG32(data, (u32)(v >> 32));
+ (void)RREG32(data);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
static u32 soc15_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg)
{
unsigned long flags, address, data;
@@ -865,6 +908,8 @@ static int soc15_common_early_init(void *handle)
adev->smc_wreg = NULL;
adev->pcie_rreg = &soc15_pcie_rreg;
adev->pcie_wreg = &soc15_pcie_wreg;
+ adev->pcie_rreg64 = &soc15_pcie_rreg64;
+ adev->pcie_wreg64 = &soc15_pcie_wreg64;
adev->uvd_ctx_rreg = &soc15_uvd_ctx_rreg;
adev->uvd_ctx_wreg = &soc15_uvd_ctx_wreg;
adev->didt_rreg = &soc15_didt_rreg;
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 11/26] drm/amdgpu: use 64bit operation macros for umc
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
` (9 preceding siblings ...)
2019-07-31 17:58 ` [PATCH 10/26] drm/amdgpu: add RREG64/WREG64(_PCIE) operations Alex Deucher
@ 2019-07-31 17:58 ` Alex Deucher
2019-07-31 17:58 ` [PATCH 12/26] drm/amdgpu: switch to amdgpu_umc structure Alex Deucher
` (14 subsequent siblings)
25 siblings, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:58 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Tao Zhou, Dennis Li
From: Tao Zhou <tao.zhou1@amd.com>
replace some 32bit macros with 64bit operations to simplify code
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Dennis Li <dennis.li@amd.com>
---
drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 25 ++++++++-----------------
1 file changed, 8 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
index 1ca5ae642946..8fbd81d3ce70 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
@@ -94,18 +94,11 @@ static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev,
/* check for SRAM correctable error
MCUMC_STATUS is a 64 bit register */
- mc_umc_status =
- RREG32(mc_umc_status_addr + umc_reg_offset);
- mc_umc_status |=
- (uint64_t)RREG32(mc_umc_status_addr + umc_reg_offset + 1) << 32;
+ mc_umc_status = RREG64(mc_umc_status_addr + umc_reg_offset);
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 &&
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
*error_count += 1;
-
- /* clear the MCUMC_STATUS */
- WREG32(mc_umc_status_addr + umc_reg_offset, 0);
- WREG32(mc_umc_status_addr + umc_reg_offset + 1, 0);
}
static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev,
@@ -119,10 +112,7 @@ static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev
SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
/* check the MCUMC_STATUS */
- mc_umc_status = RREG32(mc_umc_status_addr + umc_reg_offset);
- mc_umc_status |=
- (uint64_t)RREG32(mc_umc_status_addr + umc_reg_offset + 1) << 32;
-
+ mc_umc_status = RREG64(mc_umc_status_addr + umc_reg_offset);
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
@@ -130,17 +120,16 @@ static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
*error_count += 1;
-
- /* clear the MCUMC_STATUS */
- WREG32(mc_umc_status_addr + umc_reg_offset, 0);
- WREG32(mc_umc_status_addr + umc_reg_offset + 1, 0);
}
static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
- uint32_t umc_inst, channel_inst, umc_reg_offset;
+ uint32_t umc_inst, channel_inst, umc_reg_offset, mc_umc_status_addr;
+
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
for (umc_inst = 0; umc_inst < UMC_V6_1_UMC_INSTANCE_NUM; umc_inst++) {
/* enable the index mode to query eror count per channel */
@@ -152,6 +141,8 @@ static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev,
&(err_data->ce_count));
umc_v6_1_querry_uncorrectable_error_count(adev, umc_reg_offset,
&(err_data->ue_count));
+ /* clear umc status */
+ WREG64(mc_umc_status_addr + umc_reg_offset, 0x0ULL);
}
}
umc_v6_1_disable_umc_index_mode(adev);
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 12/26] drm/amdgpu: switch to amdgpu_umc structure
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
` (10 preceding siblings ...)
2019-07-31 17:58 ` [PATCH 11/26] drm/amdgpu: use 64bit operation macros for umc Alex Deucher
@ 2019-07-31 17:58 ` Alex Deucher
2019-07-31 17:58 ` [PATCH 13/26] drm/amdgpu: update algorithm of umc uncorrectable error counting Alex Deucher
` (13 subsequent siblings)
25 siblings, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:58 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Tao Zhou, Dennis Li, Hawking Zhang
From: Tao Zhou <tao.zhou1@amd.com>
create new amdgpu_umc structure to for more umc
settings in future and switch to the new structure
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Dennis Li <dennis.li@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 +++-
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 ++--
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 6 ++++++
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 +++++---
4 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 3e2b623d86c7..41f677613ffa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -951,6 +951,9 @@ struct amdgpu_device {
/* KFD */
struct amdgpu_kfd_dev kfd;
+ /* UMC */
+ struct amdgpu_umc umc;
+
/* display related functionality */
struct amdgpu_display_manager dm;
@@ -976,7 +979,6 @@ struct amdgpu_device {
const struct amdgpu_nbio_funcs *nbio_funcs;
const struct amdgpu_df_funcs *df_funcs;
- const struct amdgpu_umc_funcs *umc_funcs;
/* delayed work_func for deferring clockgating during resume */
struct delayed_work delayed_init_work;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 4f81b1f6d09f..e087da46fc24 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -593,8 +593,8 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
switch (info->head.block) {
case AMDGPU_RAS_BLOCK__UMC:
- if (adev->umc_funcs->query_ras_error_count)
- adev->umc_funcs->query_ras_error_count(adev, &err_data);
+ if (adev->umc.funcs->query_ras_error_count)
+ adev->umc.funcs->query_ras_error_count(adev, &err_data);
break;
default:
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index 1ee1a00e5ac8..f5d6def96414 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -26,4 +26,10 @@ struct amdgpu_umc_funcs {
void *ras_error_status);
};
+struct amdgpu_umc {
+ /* max error count in one ras query call */
+ uint32_t max_ras_err_cnt_per_query;
+ const struct amdgpu_umc_funcs *funcs;
+};
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 2748bd110fab..a02bc633a89a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -243,8 +243,8 @@ static int gmc_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
{
struct ras_err_data err_data = {0, 0};
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
- if (adev->umc_funcs->query_ras_error_count)
- adev->umc_funcs->query_ras_error_count(adev, &err_data);
+ if (adev->umc.funcs->query_ras_error_count)
+ adev->umc.funcs->query_ras_error_count(adev, &err_data);
amdgpu_ras_reset_gpu(adev, 0);
return AMDGPU_RAS_UE;
}
@@ -631,7 +631,9 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_VEGA20:
- adev->umc_funcs = &umc_v6_1_funcs;
+ adev->umc.max_ras_err_cnt_per_query =
+ UMC_V6_1_UMC_INSTANCE_NUM * UMC_V6_1_CHANNEL_INSTANCE_NUM;
+ adev->umc.funcs = &umc_v6_1_funcs;
break;
default:
break;
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 13/26] drm/amdgpu: update algorithm of umc uncorrectable error counting
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
` (11 preceding siblings ...)
2019-07-31 17:58 ` [PATCH 12/26] drm/amdgpu: switch to amdgpu_umc structure Alex Deucher
@ 2019-07-31 17:58 ` Alex Deucher
2019-07-31 17:58 ` [PATCH 14/26] drm/amdgpu: add support for recording ras error address Alex Deucher
` (12 subsequent siblings)
25 siblings, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:58 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Tao Zhou, Dennis Li, Hawking Zhang
From: Tao Zhou <tao.zhou1@amd.com>
remove the check of ErrorCodeExt
v2: refine the if condition for ue counting
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Dennis Li <dennis.li@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
index 8fbd81d3ce70..5b1ccb81b3a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
@@ -113,12 +113,12 @@ static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev
/* check the MCUMC_STATUS */
mc_umc_status = RREG64(mc_umc_status_addr + umc_reg_offset);
- if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 &&
- (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
+ if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
*error_count += 1;
}
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 14/26] drm/amdgpu: add support for recording ras error address
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
` (12 preceding siblings ...)
2019-07-31 17:58 ` [PATCH 13/26] drm/amdgpu: update algorithm of umc uncorrectable error counting Alex Deucher
@ 2019-07-31 17:58 ` Alex Deucher
2019-07-31 17:58 ` [PATCH 15/26] drm/amdgpu: add structures for umc error address translation Alex Deucher
` (11 subsequent siblings)
25 siblings, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:58 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Tao Zhou, Dennis Li, Hawking Zhang
From: Tao Zhou <tao.zhou1@amd.com>
more than one error address may be recorded in one query
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Dennis Li <dennis.li@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 ++
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index e087da46fc24..1914f37bee59 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -586,7 +586,7 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
struct ras_query_if *info)
{
struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
- struct ras_err_data err_data = {0, 0};
+ struct ras_err_data err_data = {0, 0, 0, NULL};
if (!obj)
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 80e94d604a2e..0920db7aff34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -131,6 +131,8 @@ struct ras_fs_data {
struct ras_err_data {
unsigned long ue_count;
unsigned long ce_count;
+ unsigned long err_addr_cnt;
+ uint64_t *err_addr;
};
struct ras_err_handler_data {
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 15/26] drm/amdgpu: add structures for umc error address translation
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
` (13 preceding siblings ...)
2019-07-31 17:58 ` [PATCH 14/26] drm/amdgpu: add support for recording ras error address Alex Deucher
@ 2019-07-31 17:58 ` Alex Deucher
2019-07-31 17:58 ` [PATCH 16/26] drm/amdgpu: query umc ras error address Alex Deucher
` (10 subsequent siblings)
25 siblings, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:58 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Tao Zhou, Hawking Zhang
From: Tao Zhou <tao.zhou1@amd.com>
add related registers, callback function and channel index table
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 2 ++
drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 10 ++++++++++
2 files changed, 12 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index f5d6def96414..dfa1a39e57af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -24,6 +24,8 @@
struct amdgpu_umc_funcs {
void (*query_ras_error_count)(struct amdgpu_device *adev,
void *ras_error_status);
+ void (*query_ras_error_address)(struct amdgpu_device *adev,
+ void *ras_error_status);
};
struct amdgpu_umc {
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
index 5b1ccb81b3a2..e05f3e68edb0 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
@@ -29,6 +29,16 @@
#include "umc/umc_6_1_1_offset.h"
#include "umc/umc_6_1_1_sh_mask.h"
+#define smnMCA_UMC0_MCUMC_ADDRT0 0x50f10
+
+static uint32_t
+ umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM] = {
+ {2, 18, 11, 27}, {4, 20, 13, 29},
+ {1, 17, 8, 24}, {7, 23, 14, 30},
+ {10, 26, 3, 19}, {12, 28, 5, 21},
+ {9, 25, 0, 16}, {15, 31, 6, 22}
+};
+
static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev,
uint32_t umc_instance)
{
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 16/26] drm/amdgpu: query umc ras error address
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
` (14 preceding siblings ...)
2019-07-31 17:58 ` [PATCH 15/26] drm/amdgpu: add structures for umc error address translation Alex Deucher
@ 2019-07-31 17:58 ` Alex Deucher
2019-07-31 17:58 ` [PATCH 17/26] drm/amdgpu: allow ras interrupt callback to return error data Alex Deucher
` (9 subsequent siblings)
25 siblings, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:58 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Tao Zhou, Dennis Li, Hawking Zhang
From: Tao Zhou <tao.zhou1@amd.com>
query umc ras error address, translate it to gpu 4k page view
and save it.
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Dennis Li <dennis.li@amd.com>
---
drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 80 +++++++++++++++++++++++++++
1 file changed, 80 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
index e05f3e68edb0..bff1a12f2cc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
@@ -31,6 +31,16 @@
#define smnMCA_UMC0_MCUMC_ADDRT0 0x50f10
+/*
+ * (addr / 256) * 8192, the higher 26 bits in ErrorAddr
+ * is the index of 8KB block
+ */
+#define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5)
+/* channel index is the index of 256B block */
+#define ADDR_OF_256B_BLOCK(channel_index) ((channel_index) << 8)
+/* offset in 256B block */
+#define OFFSET_IN_256B_BLOCK(addr) ((addr) & 0xffULL)
+
static uint32_t
umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM] = {
{2, 18, 11, 27}, {4, 20, 13, 29},
@@ -158,6 +168,76 @@ static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev,
umc_v6_1_disable_umc_index_mode(adev);
}
+static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset, uint32_t channel_index,
+ struct ras_err_data *err_data)
+{
+ uint32_t lsb;
+ uint64_t mc_umc_status, err_addr;
+ uint32_t mc_umc_status_addr;
+
+ /* skip error address process if -ENOMEM */
+ if (!err_data->err_addr)
+ return;
+
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+ mc_umc_status = RREG64(mc_umc_status_addr + umc_reg_offset);
+
+ /* calculate error address if ue/ce error is detected */
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
+ err_addr = RREG64_PCIE(smnMCA_UMC0_MCUMC_ADDRT0 + umc_reg_offset * 4);
+
+ /* the lowest lsb bits should be ignored */
+ lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB);
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+ err_addr &= ~((0x1ULL << lsb) - 1);
+
+ /* translate umc channel address to soc pa, 3 parts are included */
+ err_data->err_addr[err_data->err_addr_cnt] =
+ ADDR_OF_8KB_BLOCK(err_addr)
+ | ADDR_OF_256B_BLOCK(channel_index)
+ | OFFSET_IN_256B_BLOCK(err_addr);
+
+ err_data->err_addr_cnt++;
+ }
+}
+
+static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint32_t umc_inst, channel_inst, umc_reg_offset;
+ uint32_t channel_index, mc_umc_status_addr;
+
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+
+ for (umc_inst = 0; umc_inst < UMC_V6_1_UMC_INSTANCE_NUM; umc_inst++) {
+ /* enable the index mode to query eror count per channel */
+ umc_v6_1_enable_umc_index_mode(adev, umc_inst);
+ for (channel_inst = 0; channel_inst < UMC_V6_1_CHANNEL_INSTANCE_NUM; channel_inst++) {
+ /* calc the register offset according to channel instance */
+ umc_reg_offset = UMC_V6_1_PER_CHANNEL_OFFSET * channel_inst;
+ /* get channel index of interleaved memory */
+ channel_index = umc_v6_1_channel_idx_tbl[umc_inst][channel_inst];
+
+ umc_v6_1_query_error_address(adev, umc_reg_offset,
+ channel_index, err_data);
+
+ /* clear umc status */
+ WREG64(mc_umc_status_addr + umc_reg_offset, 0x0ULL);
+ /* clear error address register */
+ WREG64_PCIE(smnMCA_UMC0_MCUMC_ADDRT0 + umc_reg_offset * 4, 0x0ULL);
+ }
+ }
+
+ umc_v6_1_disable_umc_index_mode(adev);
+}
+
const struct amdgpu_umc_funcs umc_v6_1_funcs = {
.query_ras_error_count = umc_v6_1_query_ras_error_count,
+ .query_ras_error_address = umc_v6_1_query_ras_error_address,
};
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 17/26] drm/amdgpu: allow ras interrupt callback to return error data
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
` (15 preceding siblings ...)
2019-07-31 17:58 ` [PATCH 16/26] drm/amdgpu: query umc ras error address Alex Deucher
@ 2019-07-31 17:58 ` Alex Deucher
2019-07-31 17:58 ` [PATCH 18/26] drm/amdgpu: update interrupt callback for all ras clients Alex Deucher
` (8 subsequent siblings)
25 siblings, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:58 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Tao Zhou, Dennis Li, Hawking Zhang
From: Tao Zhou <tao.zhou1@amd.com>
add error data as parameter for ras interrupt cb and process it
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Dennis Li <dennis.li@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 ++--
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 37 +++++++++++++------------
2 files changed, 22 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 1914f37bee59..0eeb85d8399d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1003,7 +1003,7 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
struct ras_ih_data *data = &obj->ih_data;
struct amdgpu_iv_entry entry;
int ret;
- struct ras_err_data err_data = {0, 0};
+ struct ras_err_data err_data = {0, 0, 0, NULL};
while (data->rptr != data->wptr) {
rmb();
@@ -1018,14 +1018,14 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
* from the callback to udpate the error type/count, etc
*/
if (data->cb) {
- ret = data->cb(obj->adev, &entry);
+ ret = data->cb(obj->adev, &err_data, &entry);
/* ue will trigger an interrupt, and in that case
* we need do a reset to recovery the whole system.
* But leave IP do that recovery, here we just dispatch
* the error.
*/
if (ret == AMDGPU_RAS_UE) {
- obj->err_data.ue_count++;
+ obj->err_data.ue_count += err_data.ue_count;
}
/* Might need get ce count by register, but not all IP
* saves ce count, some IP just use one bit or two bits
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 0920db7aff34..2c86a5135ec9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -76,9 +76,6 @@ struct ras_common_if {
char name[32];
};
-typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
- struct amdgpu_iv_entry *entry);
-
struct amdgpu_ras {
/* ras infrastructure */
/* for ras itself. */
@@ -108,21 +105,6 @@ struct amdgpu_ras {
uint32_t flags;
};
-struct ras_ih_data {
- /* interrupt bottom half */
- struct work_struct ih_work;
- int inuse;
- /* IP callback */
- ras_ih_cb cb;
- /* full of entries */
- unsigned char *ring;
- unsigned int ring_size;
- unsigned int element_size;
- unsigned int aligned_element_size;
- unsigned int rptr;
- unsigned int wptr;
-};
-
struct ras_fs_data {
char sysfs_name[32];
char debugfs_name[32];
@@ -149,6 +131,25 @@ struct ras_err_handler_data {
int last_reserved;
};
+typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
+ struct amdgpu_iv_entry *entry);
+
+struct ras_ih_data {
+ /* interrupt bottom half */
+ struct work_struct ih_work;
+ int inuse;
+ /* IP callback */
+ ras_ih_cb cb;
+ /* full of entries */
+ unsigned char *ring;
+ unsigned int ring_size;
+ unsigned int element_size;
+ unsigned int aligned_element_size;
+ unsigned int rptr;
+ unsigned int wptr;
+};
+
struct ras_manager {
struct ras_common_if head;
/* reference count */
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 18/26] drm/amdgpu: update interrupt callback for all ras clients
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
` (16 preceding siblings ...)
2019-07-31 17:58 ` [PATCH 17/26] drm/amdgpu: allow ras interrupt callback to return error data Alex Deucher
@ 2019-07-31 17:58 ` Alex Deucher
2019-07-31 17:58 ` [PATCH 19/26] drm/amdgpu: add check for ras error type Alex Deucher
` (7 subsequent siblings)
25 siblings, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:58 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Tao Zhou, Dennis Li, Hawking Zhang
From: Tao Zhou <tao.zhou1@amd.com>
add err_data parameter in interrupt cb for ras clients
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Dennis Li <dennis.li@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 ++
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 ++--
drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 2 ++
3 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index bd7f431a24d9..048803c75048 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3951,6 +3951,7 @@ static int gfx_v9_0_early_init(void *handle)
}
static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
struct amdgpu_iv_entry *entry);
static int gfx_v9_0_ecc_late_init(void *handle)
@@ -5265,6 +5266,7 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
}
static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
struct amdgpu_iv_entry *entry)
{
/* TODO ue will trigger an interrupt. */
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index a02bc633a89a..ee06cbe2a7e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -239,12 +239,12 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
}
static int gmc_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
struct amdgpu_iv_entry *entry)
{
- struct ras_err_data err_data = {0, 0};
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
if (adev->umc.funcs->query_ras_error_count)
- adev->umc.funcs->query_ras_error_count(adev, &err_data);
+ adev->umc.funcs->query_ras_error_count(adev, err_data);
amdgpu_ras_reset_gpu(adev, 0);
return AMDGPU_RAS_UE;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 36dc5025c461..2ffc9a41d8b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1624,6 +1624,7 @@ static int sdma_v4_0_early_init(void *handle)
}
static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
struct amdgpu_iv_entry *entry);
static int sdma_v4_0_late_init(void *handle)
@@ -1957,6 +1958,7 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
}
static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
struct amdgpu_iv_entry *entry)
{
uint32_t instance, err_source;
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 19/26] drm/amdgpu: add check for ras error type
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
` (17 preceding siblings ...)
2019-07-31 17:58 ` [PATCH 18/26] drm/amdgpu: update interrupt callback for all ras clients Alex Deucher
@ 2019-07-31 17:58 ` Alex Deucher
2019-07-31 17:58 ` [PATCH 20/26] drm/amdgpu: remove ras_reserve_vram in ras injection Alex Deucher
` (6 subsequent siblings)
25 siblings, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:58 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Tao Zhou, Dennis Li, Hawking Zhang
From: Tao Zhou <tao.zhou1@amd.com>
only ue and ce errors are supported
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Dennis Li <dennis.li@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 0eeb85d8399d..a87deb7be414 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -153,9 +153,14 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
return -EINVAL;
data->head.block = block_id;
- data->head.type = memcmp("ue", err, 2) == 0 ?
- AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE :
- AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE;
+ /* only ue and ce errors are supported */
+ if (!memcmp("ue", err, 2))
+ data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ else if (!memcmp("ce", err, 2))
+ data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE;
+ else
+ return -EINVAL;
+
data->op = op;
if (op == 2) {
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 20/26] drm/amdgpu: remove ras_reserve_vram in ras injection
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
` (18 preceding siblings ...)
2019-07-31 17:58 ` [PATCH 19/26] drm/amdgpu: add check for ras error type Alex Deucher
@ 2019-07-31 17:58 ` Alex Deucher
2019-07-31 17:58 ` [PATCH 21/26] drm/amd/include: add bitfield define for EDC registers Alex Deucher
` (5 subsequent siblings)
25 siblings, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:58 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Tao Zhou, Dennis Li, Hawking Zhang
From: Tao Zhou <tao.zhou1@amd.com>
error injection address is not in gpu address space
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Dennis Li <dennis.li@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 21 ++++++++++-----------
1 file changed, 10 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index a87deb7be414..ccd5863bca88 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -60,6 +60,9 @@ const char *ras_block_string[] = {
#define AMDGPU_RAS_FLAG_INIT_NEED_RESET 2
#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
+/* inject address is 52 bits */
+#define RAS_UMC_INJECT_ADDR_LIMIT (0x1ULL << 52)
+
static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev,
uint64_t offset, uint64_t size,
struct amdgpu_bo **bo_ptr);
@@ -245,7 +248,6 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
{
struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
struct ras_debug_if data;
- struct amdgpu_bo *bo;
int ret = 0;
ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data);
@@ -263,17 +265,14 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
ret = amdgpu_ras_feature_enable(adev, &data.head, 1);
break;
case 2:
- ret = amdgpu_ras_reserve_vram(adev,
- data.inject.address, PAGE_SIZE, &bo);
- if (ret) {
- /* address was offset, now it is absolute.*/
- data.inject.address += adev->gmc.vram_start;
- if (data.inject.address > adev->gmc.vram_end)
- break;
- } else
- data.inject.address = amdgpu_bo_gpu_offset(bo);
+ if ((data.inject.address >= adev->gmc.mc_vram_size) ||
+ (data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
+ ret = -EINVAL;
+ break;
+ }
+
+ /* data.inject.address is offset instead of absolute gpu address */
ret = amdgpu_ras_error_inject(adev, &data.inject);
- amdgpu_ras_release_vram(adev, &bo);
break;
default:
ret = -EINVAL;
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 21/26] drm/amd/include: add bitfield define for EDC registers
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
` (19 preceding siblings ...)
2019-07-31 17:58 ` [PATCH 20/26] drm/amdgpu: remove ras_reserve_vram in ras injection Alex Deucher
@ 2019-07-31 17:58 ` Alex Deucher
2019-07-31 17:58 ` [PATCH 22/26] drm/amd/include: add define of TCP_EDC_CNT_NEW Alex Deucher
` (4 subsequent siblings)
25 siblings, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:58 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Tao Zhou, Dennis Li, Hawking Zhang
From: Dennis Li <Dennis.Li@amd.com>
Add EDC registers to support VEGA20 RAS
Change-Id: Iafa8029135aa407edc0c77f1779a1cb9982c1492
Signed-off-by: Dennis Li <Dennis.Li@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
.../amd/include/asic_reg/gc/gc_9_0_sh_mask.h | 157 ++++++++++++++++++
1 file changed, 157 insertions(+)
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h
index 2e1214be67a2..064c4bb1dc62 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h
@@ -21,6 +21,105 @@
#ifndef _gc_9_0_SH_MASK_HEADER
#define _gc_9_0_SH_MASK_HEADER
+//GCEA_EDC_CNT
+#define GCEA_EDC_CNT__DRAMRD_CMDMEM_SEC_COUNT__SHIFT 0x0
+#define GCEA_EDC_CNT__DRAMRD_CMDMEM_DED_COUNT__SHIFT 0x2
+#define GCEA_EDC_CNT__DRAMWR_CMDMEM_SEC_COUNT__SHIFT 0x4
+#define GCEA_EDC_CNT__DRAMWR_CMDMEM_DED_COUNT__SHIFT 0x6
+#define GCEA_EDC_CNT__DRAMWR_DATAMEM_SEC_COUNT__SHIFT 0x8
+#define GCEA_EDC_CNT__DRAMWR_DATAMEM_DED_COUNT__SHIFT 0xa
+#define GCEA_EDC_CNT__RRET_TAGMEM_SEC_COUNT__SHIFT 0xc
+#define GCEA_EDC_CNT__RRET_TAGMEM_DED_COUNT__SHIFT 0xe
+#define GCEA_EDC_CNT__WRET_TAGMEM_SEC_COUNT__SHIFT 0x10
+#define GCEA_EDC_CNT__WRET_TAGMEM_DED_COUNT__SHIFT 0x12
+#define GCEA_EDC_CNT__DRAMRD_PAGEMEM_SED_COUNT__SHIFT 0x14
+#define GCEA_EDC_CNT__DRAMWR_PAGEMEM_SED_COUNT__SHIFT 0x16
+#define GCEA_EDC_CNT__IORD_CMDMEM_SED_COUNT__SHIFT 0x18
+#define GCEA_EDC_CNT__IOWR_CMDMEM_SED_COUNT__SHIFT 0x1a
+#define GCEA_EDC_CNT__IOWR_DATAMEM_SED_COUNT__SHIFT 0x1c
+#define GCEA_EDC_CNT__DRAMRD_CMDMEM_SEC_COUNT_MASK 0x00000003L
+#define GCEA_EDC_CNT__DRAMRD_CMDMEM_DED_COUNT_MASK 0x0000000CL
+#define GCEA_EDC_CNT__DRAMWR_CMDMEM_SEC_COUNT_MASK 0x00000030L
+#define GCEA_EDC_CNT__DRAMWR_CMDMEM_DED_COUNT_MASK 0x000000C0L
+#define GCEA_EDC_CNT__DRAMWR_DATAMEM_SEC_COUNT_MASK 0x00000300L
+#define GCEA_EDC_CNT__DRAMWR_DATAMEM_DED_COUNT_MASK 0x00000C00L
+#define GCEA_EDC_CNT__RRET_TAGMEM_SEC_COUNT_MASK 0x00003000L
+#define GCEA_EDC_CNT__RRET_TAGMEM_DED_COUNT_MASK 0x0000C000L
+#define GCEA_EDC_CNT__WRET_TAGMEM_SEC_COUNT_MASK 0x00030000L
+#define GCEA_EDC_CNT__WRET_TAGMEM_DED_COUNT_MASK 0x000C0000L
+#define GCEA_EDC_CNT__DRAMRD_PAGEMEM_SED_COUNT_MASK 0x00300000L
+#define GCEA_EDC_CNT__DRAMWR_PAGEMEM_SED_COUNT_MASK 0x00C00000L
+#define GCEA_EDC_CNT__IORD_CMDMEM_SED_COUNT_MASK 0x03000000L
+#define GCEA_EDC_CNT__IOWR_CMDMEM_SED_COUNT_MASK 0x0C000000L
+#define GCEA_EDC_CNT__IOWR_DATAMEM_SED_COUNT_MASK 0x30000000L
+
+#define GCEA_EDC_CNT2__GMIRD_CMDMEM_SEC_COUNT__SHIFT 0x0
+#define GCEA_EDC_CNT2__GMIRD_CMDMEM_DED_COUNT__SHIFT 0x2
+#define GCEA_EDC_CNT2__GMIWR_CMDMEM_SEC_COUNT__SHIFT 0x4
+#define GCEA_EDC_CNT2__GMIWR_CMDMEM_DED_COUNT__SHIFT 0x6
+#define GCEA_EDC_CNT2__GMIWR_DATAMEM_SEC_COUNT__SHIFT 0x8
+#define GCEA_EDC_CNT2__GMIWR_DATAMEM_DED_COUNT__SHIFT 0xa
+#define GCEA_EDC_CNT2__GMIRD_PAGEMEM_SED_COUNT__SHIFT 0xc
+#define GCEA_EDC_CNT2__GMIWR_PAGEMEM_SED_COUNT__SHIFT 0xe
+#define GCEA_EDC_CNT2__MAM_D0MEM_SED_COUNT__SHIFT 0x10
+#define GCEA_EDC_CNT2__MAM_D1MEM_SED_COUNT__SHIFT 0x12
+#define GCEA_EDC_CNT2__MAM_D2MEM_SED_COUNT__SHIFT 0x14
+#define GCEA_EDC_CNT2__MAM_D3MEM_SED_COUNT__SHIFT 0x16
+#define GCEA_EDC_CNT2__GMIRD_CMDMEM_SEC_COUNT_MASK 0x00000003L
+#define GCEA_EDC_CNT2__GMIRD_CMDMEM_DED_COUNT_MASK 0x0000000CL
+#define GCEA_EDC_CNT2__GMIWR_CMDMEM_SEC_COUNT_MASK 0x00000030L
+#define GCEA_EDC_CNT2__GMIWR_CMDMEM_DED_COUNT_MASK 0x000000C0L
+#define GCEA_EDC_CNT2__GMIWR_DATAMEM_SEC_COUNT_MASK 0x00000300L
+#define GCEA_EDC_CNT2__GMIWR_DATAMEM_DED_COUNT_MASK 0x00000C00L
+#define GCEA_EDC_CNT2__GMIRD_PAGEMEM_SED_COUNT_MASK 0x00003000L
+#define GCEA_EDC_CNT2__GMIWR_PAGEMEM_SED_COUNT_MASK 0x0000C000L
+#define GCEA_EDC_CNT2__MAM_D0MEM_SED_COUNT_MASK 0x00030000L
+#define GCEA_EDC_CNT2__MAM_D1MEM_SED_COUNT_MASK 0x000C0000L
+#define GCEA_EDC_CNT2__MAM_D2MEM_SED_COUNT_MASK 0x00300000L
+#define GCEA_EDC_CNT2__MAM_D3MEM_SED_COUNT_MASK 0x00C00000L
+
+// addressBlock: gc_cppdec2
+//CPF_EDC_TAG_CNT
+#define CPF_EDC_TAG_CNT__DED_COUNT__SHIFT 0x0
+#define CPF_EDC_TAG_CNT__SEC_COUNT__SHIFT 0x2
+#define CPF_EDC_TAG_CNT__DED_COUNT_MASK 0x00000003L
+#define CPF_EDC_TAG_CNT__SEC_COUNT_MASK 0x0000000CL
+//CPF_EDC_ROQ_CNT
+#define CPF_EDC_ROQ_CNT__COUNT_ME1__SHIFT 0x0
+#define CPF_EDC_ROQ_CNT__COUNT_ME2__SHIFT 0x2
+#define CPF_EDC_ROQ_CNT__COUNT_ME1_MASK 0x00000003L
+#define CPF_EDC_ROQ_CNT__COUNT_ME2_MASK 0x0000000CL
+//CPG_EDC_TAG_CNT
+#define CPG_EDC_TAG_CNT__DED_COUNT__SHIFT 0x0
+#define CPG_EDC_TAG_CNT__SEC_COUNT__SHIFT 0x2
+#define CPG_EDC_TAG_CNT__DED_COUNT_MASK 0x00000003L
+#define CPG_EDC_TAG_CNT__SEC_COUNT_MASK 0x0000000CL
+//CPG_EDC_DMA_CNT
+#define CPG_EDC_DMA_CNT__ROQ_COUNT__SHIFT 0x0
+#define CPG_EDC_DMA_CNT__TAG_DED_COUNT__SHIFT 0x2
+#define CPG_EDC_DMA_CNT__TAG_SEC_COUNT__SHIFT 0x4
+#define CPG_EDC_DMA_CNT__ROQ_COUNT_MASK 0x00000003L
+#define CPG_EDC_DMA_CNT__TAG_DED_COUNT_MASK 0x0000000CL
+#define CPG_EDC_DMA_CNT__TAG_SEC_COUNT_MASK 0x00000030L
+//CPC_EDC_SCRATCH_CNT
+#define CPC_EDC_SCRATCH_CNT__DED_COUNT__SHIFT 0x0
+#define CPC_EDC_SCRATCH_CNT__SEC_COUNT__SHIFT 0x2
+#define CPC_EDC_SCRATCH_CNT__DED_COUNT_MASK 0x00000003L
+#define CPC_EDC_SCRATCH_CNT__SEC_COUNT_MASK 0x0000000CL
+//CPC_EDC_UCODE_CNT
+#define CPC_EDC_UCODE_CNT__DED_COUNT__SHIFT 0x0
+#define CPC_EDC_UCODE_CNT__SEC_COUNT__SHIFT 0x2
+#define CPC_EDC_UCODE_CNT__DED_COUNT_MASK 0x00000003L
+#define CPC_EDC_UCODE_CNT__SEC_COUNT_MASK 0x0000000CL
+//DC_EDC_STATE_CNT
+#define DC_EDC_STATE_CNT__COUNT_ME1__SHIFT 0x0
+#define DC_EDC_STATE_CNT__COUNT_ME1_MASK 0x00000003L
+//DC_EDC_CSINVOC_CNT
+#define DC_EDC_CSINVOC_CNT__COUNT_ME1__SHIFT 0x0
+#define DC_EDC_CSINVOC_CNT__COUNT_ME1_MASK 0x00000003L
+//DC_EDC_RESTORE_CNT
+#define DC_EDC_RESTORE_CNT__COUNT_ME1__SHIFT 0x0
+#define DC_EDC_RESTORE_CNT__COUNT_ME1_MASK 0x00000003L
// addressBlock: gc_grbmdec
//GRBM_CNTL
@@ -9033,11 +9132,15 @@
#define TCC_EDC_CNT2__SRC_FIFO_NEXT_RAM_SED_COUNT__SHIFT 0x4
#define TCC_EDC_CNT2__LATENCY_FIFO_NEXT_RAM_SED_COUNT__SHIFT 0x6
#define TCC_EDC_CNT2__CACHE_TAG_PROBE_FIFO_SED_COUNT__SHIFT 0x8
+#define TCC_EDC_CNT2__WRRET_TAG_WRITE_RETURN_SED_COUNT__SHIFT 0xa
+#define TCC_EDC_CNT2__ATOMIC_RETURN_BUFFER_SED_COUNT__SHIFT 0xc
#define TCC_EDC_CNT2__WRITE_RETURN_SED_COUNT_MASK 0x00000003L
#define TCC_EDC_CNT2__WRITE_CACHE_READ_SED_COUNT_MASK 0x0000000CL
#define TCC_EDC_CNT2__SRC_FIFO_NEXT_RAM_SED_COUNT_MASK 0x00000030L
#define TCC_EDC_CNT2__LATENCY_FIFO_NEXT_RAM_SED_COUNT_MASK 0x000000C0L
#define TCC_EDC_CNT2__CACHE_TAG_PROBE_FIFO_SED_COUNT_MASK 0x00000300L
+#define TCC_EDC_CNT2__WRRET_TAG_WRITE_RETURN_SED_COUNT_MASK 0x00000C00L
+#define TCC_EDC_CNT2__ATOMIC_RETURN_BUFFER_SED_COUNT_MASK 0x00003000L
//TCC_REDUNDANCY
#define TCC_REDUNDANCY__MC_SEL0__SHIFT 0x0
#define TCC_REDUNDANCY__MC_SEL1__SHIFT 0x1
@@ -29818,6 +29921,60 @@
#define DIDT_DBR_STALL_EVENT_COUNTER__DIDT_STALL_EVENT_COUNTER__SHIFT 0x0
#define DIDT_DBR_STALL_EVENT_COUNTER__DIDT_STALL_EVENT_COUNTER_MASK 0xFFFFFFFFL
+//TA_EDC_CNT
+#define TA_EDC_CNT__TA_FS_DFIFO_SEC_COUNT__SHIFT 0x0
+#define TA_EDC_CNT__TA_FS_DFIFO_DED_COUNT__SHIFT 0x2
+#define TA_EDC_CNT__TA_FS_AFIFO_SED_COUNT__SHIFT 0x4
+#define TA_EDC_CNT__TA_FL_LFIFO_SED_COUNT__SHIFT 0x6
+#define TA_EDC_CNT__TA_FX_LFIFO_SED_COUNT__SHIFT 0x8
+#define TA_EDC_CNT__TA_FS_CFIFO_SED_COUNT__SHIFT 0xa
+#define TA_EDC_CNT__TA_FS_DFIFO_SEC_COUNT_MASK 0x00000003L
+#define TA_EDC_CNT__TA_FS_DFIFO_DED_COUNT_MASK 0x0000000CL
+#define TA_EDC_CNT__TA_FS_AFIFO_SED_COUNT_MASK 0x00000030L
+#define TA_EDC_CNT__TA_FL_LFIFO_SED_COUNT_MASK 0x000000C0L
+#define TA_EDC_CNT__TA_FX_LFIFO_SED_COUNT_MASK 0x00000300L
+#define TA_EDC_CNT__TA_FS_CFIFO_SED_COUNT_MASK 0x00000C00L
+
+//TCI_EDC_CNT
+#define TCI_EDC_CNT__WRITE_RAM_SED_COUNT__SHIFT 0x0
+#define TCI_EDC_CNT__WRITE_RAM_SED_COUNT_MASK 0x00000003L
+
+//TCP_EDC_CNT_NEW
+#define TCP_EDC_CNT_NEW__CACHE_RAM_SEC_COUNT__SHIFT 0x0
+#define TCP_EDC_CNT_NEW__CACHE_RAM_DED_COUNT__SHIFT 0x2
+#define TCP_EDC_CNT_NEW__LFIFO_RAM_SEC_COUNT__SHIFT 0x4
+#define TCP_EDC_CNT_NEW__LFIFO_RAM_DED_COUNT__SHIFT 0x6
+#define TCP_EDC_CNT_NEW__CMD_FIFO_SED_COUNT__SHIFT 0x8
+#define TCP_EDC_CNT_NEW__VM_FIFO_SEC_COUNT__SHIFT 0xa
+#define TCP_EDC_CNT_NEW__VM_FIFO_DED_COUNT__SHIFT 0xc
+#define TCP_EDC_CNT_NEW__DB_RAM_SED_COUNT__SHIFT 0xe
+#define TCP_EDC_CNT_NEW__UTCL1_LFIFO0_SEC_COUNT__SHIFT 0x10
+#define TCP_EDC_CNT_NEW__UTCL1_LFIFO0_DED_COUNT__SHIFT 0x12
+#define TCP_EDC_CNT_NEW__UTCL1_LFIFO1_SEC_COUNT__SHIFT 0x14
+#define TCP_EDC_CNT_NEW__UTCL1_LFIFO1_DED_COUNT__SHIFT 0x16
+#define TCP_EDC_CNT_NEW__CACHE_RAM_SEC_COUNT_MASK 0x00000003L
+#define TCP_EDC_CNT_NEW__CACHE_RAM_DED_COUNT_MASK 0x0000000CL
+#define TCP_EDC_CNT_NEW__LFIFO_RAM_SEC_COUNT_MASK 0x00000030L
+#define TCP_EDC_CNT_NEW__LFIFO_RAM_DED_COUNT_MASK 0x000000C0L
+#define TCP_EDC_CNT_NEW__CMD_FIFO_SED_COUNT_MASK 0x00000300L
+#define TCP_EDC_CNT_NEW__VM_FIFO_SEC_COUNT_MASK 0x00000C00L
+#define TCP_EDC_CNT_NEW__VM_FIFO_DED_COUNT_MASK 0x00003000L
+#define TCP_EDC_CNT_NEW__DB_RAM_SED_COUNT_MASK 0x0000C000L
+#define TCP_EDC_CNT_NEW__UTCL1_LFIFO0_SEC_COUNT_MASK 0x00030000L
+#define TCP_EDC_CNT_NEW__UTCL1_LFIFO0_DED_COUNT_MASK 0x000C0000L
+#define TCP_EDC_CNT_NEW__UTCL1_LFIFO1_SEC_COUNT_MASK 0x00300000L
+#define TCP_EDC_CNT_NEW__UTCL1_LFIFO1_DED_COUNT_MASK 0x00C00000L
+//TD_EDC_CNT
+#define TD_EDC_CNT__SS_FIFO_LO_SEC_COUNT__SHIFT 0x0
+#define TD_EDC_CNT__SS_FIFO_LO_DED_COUNT__SHIFT 0x2
+#define TD_EDC_CNT__SS_FIFO_HI_SEC_COUNT__SHIFT 0x4
+#define TD_EDC_CNT__SS_FIFO_HI_DED_COUNT__SHIFT 0x6
+#define TD_EDC_CNT__CS_FIFO_SED_COUNT__SHIFT 0x8
+#define TD_EDC_CNT__SS_FIFO_LO_SEC_COUNT_MASK 0x00000003L
+#define TD_EDC_CNT__SS_FIFO_LO_DED_COUNT_MASK 0x0000000CL
+#define TD_EDC_CNT__SS_FIFO_HI_SEC_COUNT_MASK 0x00000030L
+#define TD_EDC_CNT__SS_FIFO_HI_DED_COUNT_MASK 0x000000C0L
+#define TD_EDC_CNT__CS_FIFO_SED_COUNT_MASK 0x00000300L
#endif
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 22/26] drm/amd/include: add define of TCP_EDC_CNT_NEW
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
` (20 preceding siblings ...)
2019-07-31 17:58 ` [PATCH 21/26] drm/amd/include: add bitfield define for EDC registers Alex Deucher
@ 2019-07-31 17:58 ` Alex Deucher
2019-07-31 17:58 ` [PATCH 23/26] drm/amdgpu: add define for gfx ras subblock Alex Deucher
` (3 subsequent siblings)
25 siblings, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:58 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Tao Zhou, Dennis Li, Hawking Zhang
From: Dennis Li <Dennis.Li@amd.com>
Change-Id: Iedd4bac2187e3b800662485d4623ace246af3f36
Signed-off-by: Dennis Li <Dennis.Li@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h
index f1d048e0ed2c..ca16d9125fbc 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h
@@ -1700,6 +1700,8 @@
#define mmTCP_BUFFER_ADDR_HASH_CNTL_BASE_IDX 0
#define mmTCP_EDC_CNT 0x0b17
#define mmTCP_EDC_CNT_BASE_IDX 0
+#define mmTCP_EDC_CNT_NEW 0x0b18
+#define mmTCP_EDC_CNT_NEW_BASE_IDX 0
#define mmTC_CFG_L1_LOAD_POLICY0 0x0b1a
#define mmTC_CFG_L1_LOAD_POLICY0_BASE_IDX 0
#define mmTC_CFG_L1_LOAD_POLICY1 0x0b1b
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 23/26] drm/amdgpu: add define for gfx ras subblock
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
` (21 preceding siblings ...)
2019-07-31 17:58 ` [PATCH 22/26] drm/amd/include: add define of TCP_EDC_CNT_NEW Alex Deucher
@ 2019-07-31 17:58 ` Alex Deucher
2019-07-31 17:58 ` [PATCH 24/26] drm/amdgpu: add RAS callback for gfx Alex Deucher
` (2 subsequent siblings)
25 siblings, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:58 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Tao Zhou, Dennis Li, Hawking Zhang
From: Dennis Li <Dennis.Li@amd.com>
Change-Id: Ib4b019b2bcbe6ef0b85ef170e7cf032bfa400553
Signed-off-by: Dennis Li <Dennis.Li@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 230 ++++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 201 +++++++++++++++++++++
2 files changed, 431 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 2c86a5135ec9..2765f2dbb1e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -52,6 +52,236 @@ enum amdgpu_ras_block {
#define AMDGPU_RAS_BLOCK_COUNT AMDGPU_RAS_BLOCK__LAST
#define AMDGPU_RAS_BLOCK_MASK ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1)
+enum amdgpu_ras_gfx_subblock {
+ /* CPC */
+ AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
+ AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH =
+ AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_CPC_UCODE,
+ AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1,
+ AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
+ AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1,
+ AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2,
+ AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
+ AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
+ AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END =
+ AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
+ /* CPF */
+ AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 =
+ AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1,
+ AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
+ AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
+ /* CPG */
+ AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ =
+ AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG,
+ AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
+ AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
+ /* GDS */
+ AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
+ AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
+ AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
+ AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
+ AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END =
+ AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
+ /* SPI */
+ AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM,
+ /* SQ */
+ AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D,
+ AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I,
+ AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
+ AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
+ /* SQC (3 ranges) */
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
+ /* SQC range 0 */
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START =
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END =
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
+ /* SQC range 1 */
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END =
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
+ /* SQC range 2 */
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END =
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END =
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END,
+ /* TA */
+ AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO =
+ AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO,
+ AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO,
+ AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO,
+ AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
+ AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
+ /* TCA */
+ AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO =
+ AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END =
+ AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
+ /* TCC (5 sub-ranges) */
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
+ /* TCC range 0 */
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START =
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA =
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
+ AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
+ AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END =
+ AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
+ /* TCC range 1 */
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC =
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END =
+ AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
+ /* TCC range 2 */
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA =
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
+ AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
+ AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
+ AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END =
+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
+ /* TCC range 3 */
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO =
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END =
+ AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
+ /* TCC range 4 */
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END =
+ AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END =
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END,
+ /* TCI */
+ AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM,
+ /* TCP */
+ AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM =
+ AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
+ AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM,
+ AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
+ AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
+ AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END =
+ AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
+ /* TD */
+ AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO =
+ AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
+ AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
+ /* EA (3 sub-ranges) */
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
+ /* EA range 0 */
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START =
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM =
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START,
+ AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END =
+ AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
+ /* EA range 1 */
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
+ AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM =
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
+ AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END =
+ AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
+ /* EA range 2 */
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
+ AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM =
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
+ AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END =
+ AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END =
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END,
+ /* UTC VM L2 bank */
+ AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE,
+ /* UTC VM walker */
+ AMDGPU_RAS_BLOCK__UTC_VML2_WALKER,
+ /* UTC ATC L2 2MB cache */
+ AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
+ /* UTC ATC L2 4KB cache */
+ AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
+ AMDGPU_RAS_BLOCK__GFX_MAX
+};
+
enum amdgpu_ras_error_type {
AMDGPU_RAS_ERROR__NONE = 0,
AMDGPU_RAS_ERROR__PARITY = 1,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 048803c75048..fba552b93cc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -117,6 +117,207 @@ MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
#define mmTCP_CHAN_STEER_5_ARCT 0x0b0c
#define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0
+enum ta_ras_gfx_subblock {
+ /*CPC*/
+ TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
+ TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
+ TA_RAS_BLOCK__GFX_CPC_UCODE,
+ TA_RAS_BLOCK__GFX_DC_STATE_ME1,
+ TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
+ TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
+ TA_RAS_BLOCK__GFX_DC_STATE_ME2,
+ TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
+ TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
+ TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
+ /* CPF*/
+ TA_RAS_BLOCK__GFX_CPF_INDEX_START,
+ TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
+ TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
+ TA_RAS_BLOCK__GFX_CPF_TAG,
+ TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
+ /* CPG*/
+ TA_RAS_BLOCK__GFX_CPG_INDEX_START,
+ TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
+ TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
+ TA_RAS_BLOCK__GFX_CPG_TAG,
+ TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
+ /* GDS*/
+ TA_RAS_BLOCK__GFX_GDS_INDEX_START,
+ TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
+ TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
+ TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
+ TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
+ TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
+ TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
+ /* SPI*/
+ TA_RAS_BLOCK__GFX_SPI_SR_MEM,
+ /* SQ*/
+ TA_RAS_BLOCK__GFX_SQ_INDEX_START,
+ TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
+ TA_RAS_BLOCK__GFX_SQ_LDS_D,
+ TA_RAS_BLOCK__GFX_SQ_LDS_I,
+ TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
+ TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
+ /* SQC (3 ranges)*/
+ TA_RAS_BLOCK__GFX_SQC_INDEX_START,
+ /* SQC range 0*/
+ TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
+ TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
+ TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
+ TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
+ TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
+ TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
+ TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
+ TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
+ TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
+ TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
+ TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
+ /* SQC range 1*/
+ TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
+ TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
+ TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
+ TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
+ TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
+ TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
+ TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
+ /* SQC range 2*/
+ TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
+ TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
+ TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
+ TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
+ TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
+ TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
+ TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
+ TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
+ /* TA*/
+ TA_RAS_BLOCK__GFX_TA_INDEX_START,
+ TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
+ TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
+ TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
+ TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
+ TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
+ TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
+ /* TCA*/
+ TA_RAS_BLOCK__GFX_TCA_INDEX_START,
+ TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
+ TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
+ TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
+ /* TCC (5 sub-ranges)*/
+ TA_RAS_BLOCK__GFX_TCC_INDEX_START,
+ /* TCC range 0*/
+ TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
+ TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
+ TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
+ TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
+ TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
+ TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
+ TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
+ TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
+ TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
+ TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
+ /* TCC range 1*/
+ TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
+ TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
+ TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
+ TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
+ TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
+ /* TCC range 2*/
+ TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
+ TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
+ TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
+ TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
+ TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
+ TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
+ TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
+ TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
+ TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
+ TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
+ TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
+ /* TCC range 3*/
+ TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
+ TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
+ TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
+ TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
+ TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
+ /* TCC range 4*/
+ TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
+ TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
+ TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
+ TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
+ TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
+ TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
+ TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
+ /* TCI*/
+ TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
+ /* TCP*/
+ TA_RAS_BLOCK__GFX_TCP_INDEX_START,
+ TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
+ TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
+ TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
+ TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
+ TA_RAS_BLOCK__GFX_TCP_DB_RAM,
+ TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
+ TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
+ TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
+ /* TD*/
+ TA_RAS_BLOCK__GFX_TD_INDEX_START,
+ TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
+ TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
+ TA_RAS_BLOCK__GFX_TD_CS_FIFO,
+ TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
+ /* EA (3 sub-ranges)*/
+ TA_RAS_BLOCK__GFX_EA_INDEX_START,
+ /* EA range 0*/
+ TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
+ TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
+ TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
+ TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
+ TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
+ TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
+ TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
+ TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
+ TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
+ TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
+ /* EA range 1*/
+ TA_RAS_BLOCK__GFX_EA_INDEX1_START,
+ TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
+ TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
+ TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
+ TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
+ TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
+ TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
+ TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
+ TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
+ /* EA range 2*/
+ TA_RAS_BLOCK__GFX_EA_INDEX2_START,
+ TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
+ TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
+ TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
+ TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
+ TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
+ TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
+ /* UTC VM L2 bank*/
+ TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
+ /* UTC VM walker*/
+ TA_RAS_BLOCK__UTC_VML2_WALKER,
+ /* UTC ATC L2 2MB cache*/
+ TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
+ /* UTC ATC L2 4KB cache*/
+ TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
+ TA_RAS_BLOCK__GFX_MAX
+};
static const struct soc15_reg_golden golden_settings_gc_9_0[] =
{
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 24/26] drm/amdgpu: add RAS callback for gfx
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
` (22 preceding siblings ...)
2019-07-31 17:58 ` [PATCH 23/26] drm/amdgpu: add define for gfx ras subblock Alex Deucher
@ 2019-07-31 17:58 ` Alex Deucher
[not found] ` <20190731175818.20159-25-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
2019-07-31 17:58 ` [PATCH 25/26] drm/amdgpu: support gfx ras error injection and err_cnt query Alex Deucher
2019-07-31 17:58 ` [PATCH 26/26] drm/amdgpu: disable inject for failed subblocks of gfx Alex Deucher
25 siblings, 1 reply; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:58 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Tao Zhou, Dennis Li, Hawking Zhang
From: Dennis Li <Dennis.Li@amd.com>
Add functions for RAS error inject and query error counter
Signed-off-by: Dennis Li <Dennis.Li@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 +
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 530 +++++++++++++++++++++++-
2 files changed, 531 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 1199b5828b90..554a59b3c4a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -196,6 +196,8 @@ struct amdgpu_gfx_funcs {
uint32_t *dst);
void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe,
u32 queue, u32 vmid);
+ int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if);
+ int (*query_ras_error_count) (struct amdgpu_device *adev, void *ras_error_status);
};
struct amdgpu_ngg_buf {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index fba552b93cc8..d7902e782be4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -318,6 +318,135 @@ enum ta_ras_gfx_subblock {
TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
TA_RAS_BLOCK__GFX_MAX
};
+
+struct ras_gfx_subblock {
+ unsigned char *name;
+ int ta_subblock;
+ int supported_error_type;
+};
+
+#define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d) \
+ [AMDGPU_RAS_BLOCK__##subblock] = { \
+ #subblock, \
+ TA_RAS_BLOCK__##subblock, \
+ ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \
+ }
+
+static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1),
+};
+
static const struct soc15_reg_golden golden_settings_gc_9_0[] =
{
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
@@ -536,6 +665,10 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
+static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status);
+static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
+ void *inject_if);
static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
{
@@ -1621,7 +1754,9 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
.read_wave_data = &gfx_v9_0_read_wave_data,
.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
- .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
+ .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
+ .ras_error_inject = &gfx_v9_0_ras_error_inject,
+ .query_ras_error_count = &gfx_v9_0_query_ras_error_count
};
static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
@@ -5476,6 +5611,399 @@ static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
return AMDGPU_RAS_UE;
}
+static const struct {
+ const char *name;
+ uint32_t ip;
+ uint32_t inst;
+ uint32_t seg;
+ uint32_t reg_offset;
+ uint32_t per_se_instance;
+ int32_t num_instance;
+ uint32_t sec_count_mask;
+ uint32_t ded_count_mask;
+} gfx_ras_edc_regs[] = {
+ { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1,
+ REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
+ REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) },
+ { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1,
+ REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT),
+ REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) },
+ { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
+ REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 },
+ { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
+ REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 },
+ { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1,
+ REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT),
+ REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) },
+ { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
+ REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 },
+ { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
+ REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
+ REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) },
+ { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1,
+ REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT),
+ REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) },
+ { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1,
+ REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 },
+ { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1,
+ REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 },
+ { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1,
+ REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 },
+ { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
+ REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC),
+ REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) },
+ { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
+ REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 },
+ { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
+ 0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
+ REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) },
+ { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
+ REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
+ REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) },
+ { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
+ REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 },
+ { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
+ REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
+ REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) },
+ { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
+ REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
+ REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) },
+ { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
+ REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
+ REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) },
+ { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
+ REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
+ REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) },
+ { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1,
+ REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 },
+ { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
+ REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
+ REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) },
+ { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
+ REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 },
+ { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
+ REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 },
+ { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
+ REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 },
+ { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
+ REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 },
+ { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
+ REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 },
+ { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
+ REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 },
+ { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
+ REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
+ REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) },
+ { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
+ REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
+ REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) },
+ { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
+ REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
+ REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) },
+ { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
+ REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
+ REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) },
+ { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
+ REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
+ REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) },
+ { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
+ REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 },
+ { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
+ REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 },
+ { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
+ REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 },
+ { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
+ REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 },
+ { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
+ REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 },
+ { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
+ REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 },
+ { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
+ REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 },
+ { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
+ REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 },
+ { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
+ 16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 },
+ { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ 0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
+ 0 },
+ { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
+ 16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 },
+ { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ 0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
+ 0 },
+ { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
+ 16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 },
+ { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72,
+ REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 },
+ { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
+ REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
+ REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) },
+ { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
+ REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
+ REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) },
+ { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
+ REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 },
+ { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
+ REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 },
+ { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
+ REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 },
+ { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
+ REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
+ REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) },
+ { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
+ REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
+ REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) },
+ { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
+ REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
+ REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) },
+ { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
+ REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
+ REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) },
+ { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
+ REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 },
+ { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
+ REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT),
+ REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) },
+ { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
+ REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT),
+ REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) },
+ { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
+ REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT),
+ REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) },
+ { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
+ REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT),
+ REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) },
+ { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
+ REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT),
+ REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) },
+ { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
+ REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT),
+ REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) },
+ { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
+ REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT),
+ REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) },
+ { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
+ REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) },
+ { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
+ 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
+ REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) },
+ { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
+ REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) },
+ { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
+ 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
+ REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) },
+ { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
+ REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) },
+ { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
+ 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
+ REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) },
+ { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
+ 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
+ REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) },
+ { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
+ 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
+ REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
+ 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
+ REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
+ 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
+ REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) },
+ { "SQC_INST_BANKA_UTCL1_MISS_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
+ REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
+ 0 },
+ { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
+ 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 },
+ { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
+ 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 },
+ { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
+ 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 },
+ { "SQC_DATA_BANKA_DIRTY_BIT_RAM",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
+ REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 },
+ { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
+ REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
+ REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) },
+ { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
+ 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
+ REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) },
+ { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
+ 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
+ REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
+ 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
+ REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
+ 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
+ REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) },
+ { "SQC_INST_BANKB_UTCL1_MISS_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
+ REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
+ 0 },
+ { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
+ 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 },
+ { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
+ 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 },
+ { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
+ 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 },
+ { "SQC_DATA_BANKB_DIRTY_BIT_RAM",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
+ REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 },
+ { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
+ REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) },
+ { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
+ REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) },
+ { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
+ REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) },
+ { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
+ REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) },
+ { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
+ REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) },
+ { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
+ REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 },
+ { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
+ REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 },
+ { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
+ REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 },
+ { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
+ REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 },
+ { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
+ REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 },
+ { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
+ REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) },
+ { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
+ REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) },
+ { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
+ REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) },
+ { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
+ REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 },
+ { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
+ REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 },
+ { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
+ REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 },
+ { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
+ REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 },
+ { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
+ REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 },
+ { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
+ REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 },
+};
+
+static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
+ void *inject_if)
+{
+ struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
+ int ret;
+ struct ta_ras_trigger_error_input block_info = { 0 };
+
+ if (adev->asic_type != CHIP_VEGA20)
+ return -EINVAL;
+
+ if (!ras_gfx_subblocks[info->head.sub_block_index].name)
+ return -EPERM;
+
+ if (!(ras_gfx_subblocks[info->head.sub_block_index].supported_error_type &
+ info->head.type))
+ return -EPERM;
+
+ block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
+ block_info.sub_block_index =
+ ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
+ block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
+ block_info.address = info->address;
+ block_info.value = info->value;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ ret = psp_ras_trigger_error(&adev->psp, &block_info);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return ret;
+}
+
+static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint32_t sec_count, ded_count;
+ uint32_t i;
+ uint32_t reg_value;
+ uint32_t se_id, instance_id;
+
+ if (adev->asic_type != CHIP_VEGA20)
+ return -EINVAL;
+
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) {
+ for (instance_id = 0; instance_id < 256; instance_id++) {
+ for (i = 0;
+ i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]);
+ i++) {
+ if (se_id != 0 &&
+ !gfx_ras_edc_regs[i].per_se_instance)
+ continue;
+ if (instance_id >= gfx_ras_edc_regs[i].num_instance)
+ continue;
+
+ gfx_v9_0_select_se_sh(adev, se_id, 0,
+ instance_id);
+
+ reg_value = RREG32(
+ adev->reg_offset[gfx_ras_edc_regs[i].ip]
+ [gfx_ras_edc_regs[i].inst]
+ [gfx_ras_edc_regs[i].seg] +
+ gfx_ras_edc_regs[i].reg_offset);
+ sec_count = reg_value &
+ gfx_ras_edc_regs[i].sec_count_mask;
+ ded_count = reg_value &
+ gfx_ras_edc_regs[i].ded_count_mask;
+ if (sec_count) {
+ DRM_INFO(
+ "Instance[%d][%d]: SubBlock %s, SEC %d\n",
+ se_id, instance_id,
+ gfx_ras_edc_regs[i].name,
+ sec_count);
+ err_data->ce_count++;
+ }
+
+ if (ded_count) {
+ DRM_INFO(
+ "Instance[%d][%d]: SubBlock %s, DED %d\n",
+ se_id, instance_id,
+ gfx_ras_edc_regs[i].name,
+ ded_count);
+ err_data->ue_count++;
+ }
+ }
+ }
+ }
+ gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 25/26] drm/amdgpu: support gfx ras error injection and err_cnt query
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
` (23 preceding siblings ...)
2019-07-31 17:58 ` [PATCH 24/26] drm/amdgpu: add RAS callback for gfx Alex Deucher
@ 2019-07-31 17:58 ` Alex Deucher
2019-07-31 17:58 ` [PATCH 26/26] drm/amdgpu: disable inject for failed subblocks of gfx Alex Deucher
25 siblings, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:58 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Tao Zhou, Dennis Li, Hawking Zhang
From: Dennis Li <Dennis.Li@amd.com>
check gfx error count in both ras querry function and
ras interrupt handler.
gfx ras is still disabled by default due to known stability
issue found in gpu reset.
Signed-off-by: Dennis Li <Dennis.Li@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 19 ++++++++++++++++---
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 ++
2 files changed, 18 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index ccd5863bca88..a96b0f17c619 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -600,6 +600,10 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
if (adev->umc.funcs->query_ras_error_count)
adev->umc.funcs->query_ras_error_count(adev, &err_data);
break;
+ case AMDGPU_RAS_BLOCK__GFX:
+ if (adev->gfx.funcs->query_ras_error_count)
+ adev->gfx.funcs->query_ras_error_count(adev, &err_data);
+ break;
default:
break;
}
@@ -637,13 +641,22 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
if (!obj)
return -EINVAL;
- if (block_info.block_id != TA_RAS_BLOCK__UMC) {
+ switch (info->head.block) {
+ case AMDGPU_RAS_BLOCK__GFX:
+ if (adev->gfx.funcs->ras_error_inject)
+ ret = adev->gfx.funcs->ras_error_inject(adev, info);
+ else
+ ret = -EINVAL;
+ break;
+ case AMDGPU_RAS_BLOCK__UMC:
+ ret = psp_ras_trigger_error(&adev->psp, &block_info);
+ break;
+ default:
DRM_INFO("%s error injection is not supported yet\n",
ras_block_str(info->head.block));
- return -EINVAL;
+ ret = -EINVAL;
}
- ret = psp_ras_trigger_error(&adev->psp, &block_info);
if (ret)
DRM_ERROR("RAS ERROR: inject %s error failed ret %d\n",
ras_block_str(info->head.block),
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index d7902e782be4..206176710f79 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -5607,6 +5607,8 @@ static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
{
/* TODO ue will trigger an interrupt. */
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+ if (adev->gfx.funcs->query_ras_error_count)
+ adev->gfx.funcs->query_ras_error_count(adev, err_data);
amdgpu_ras_reset_gpu(adev, 0);
return AMDGPU_RAS_UE;
}
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 26/26] drm/amdgpu: disable inject for failed subblocks of gfx
[not found] ` <20190731175818.20159-1-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
` (24 preceding siblings ...)
2019-07-31 17:58 ` [PATCH 25/26] drm/amdgpu: support gfx ras error injection and err_cnt query Alex Deucher
@ 2019-07-31 17:58 ` Alex Deucher
25 siblings, 0 replies; 28+ messages in thread
From: Alex Deucher @ 2019-07-31 17:58 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Tao Zhou, Dennis Li, Hawking Zhang
From: Dennis Li <Dennis.Li@amd.com>
some subblocks of gfx fail in inject test, disable them
Change-Id: I54176e291cf5d58a94838ec688a96289c6cebb46
Signed-off-by: Dennis Li <Dennis.Li@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 281 +++++++++++++++-----------
1 file changed, 165 insertions(+), 116 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 206176710f79..bcd0301eee1e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -322,129 +322,166 @@ enum ta_ras_gfx_subblock {
struct ras_gfx_subblock {
unsigned char *name;
int ta_subblock;
- int supported_error_type;
+ int hw_supported_error_type;
+ int sw_supported_error_type;
};
-#define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d) \
+#define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \
[AMDGPU_RAS_BLOCK__##subblock] = { \
#subblock, \
TA_RAS_BLOCK__##subblock, \
((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \
+ (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \
}
static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
- AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1),
- AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1),
- AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
+ 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
+ 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
+ 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
+ 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
+ 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
+ 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
+ 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
+ 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
+ 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
+ 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
+ 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
+ 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
};
static const struct soc15_reg_golden golden_settings_gc_9_0[] =
@@ -5923,9 +5960,21 @@ static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
if (!ras_gfx_subblocks[info->head.sub_block_index].name)
return -EPERM;
- if (!(ras_gfx_subblocks[info->head.sub_block_index].supported_error_type &
- info->head.type))
+ if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
+ info->head.type)) {
+ DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
+ ras_gfx_subblocks[info->head.sub_block_index].name,
+ info->head.type);
return -EPERM;
+ }
+
+ if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
+ info->head.type)) {
+ DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
+ ras_gfx_subblocks[info->head.sub_block_index].name,
+ info->head.type);
+ return -EPERM;
+ }
block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
block_info.sub_block_index =
--
2.20.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 28+ messages in thread