* [PATCH] drm/amdgpu/mes11: print MES opcodes rather than numbers
@ 2024-04-17 19:53 Alex Deucher
2024-04-18 17:54 ` Felix Kuehling
0 siblings, 1 reply; 7+ messages in thread
From: Alex Deucher @ 2024-04-17 19:53 UTC (permalink / raw)
To: amd-gfx; +Cc: Alex Deucher
Makes it easier to review the logs when there are MES
errors.
v2: use dbg for emitted, add helpers for fetching strings
v3: fix missing commas (Harish)
Reviewed by Shaoyun.liu <Shaoyun.liu@amd.com> (v2)
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 78 ++++++++++++++++++++++++--
1 file changed, 74 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 81833395324a0..414b7beff397f 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -100,18 +100,72 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = {
.insert_nop = amdgpu_ring_insert_nop,
};
+static const char *mes_v11_0_opcodes[] = {
+ "MES_SCH_API_SET_HW_RSRC",
+ "MES_SCH_API_SET_SCHEDULING_CONFIG",
+ "MES_SCH_API_ADD_QUEUE",
+ "MES_SCH_API_REMOVE_QUEUE",
+ "MES_SCH_API_PERFORM_YIELD",
+ "MES_SCH_API_SET_GANG_PRIORITY_LEVEL",
+ "MES_SCH_API_SUSPEND",
+ "MES_SCH_API_RESUME",
+ "MES_SCH_API_RESET",
+ "MES_SCH_API_SET_LOG_BUFFER",
+ "MES_SCH_API_CHANGE_GANG_PRORITY",
+ "MES_SCH_API_QUERY_SCHEDULER_STATUS",
+ "MES_SCH_API_PROGRAM_GDS",
+ "MES_SCH_API_SET_DEBUG_VMID",
+ "MES_SCH_API_MISC",
+ "MES_SCH_API_UPDATE_ROOT_PAGE_TABLE",
+ "MES_SCH_API_AMD_LOG",
+};
+
+static const char *mes_v11_0_misc_opcodes[] = {
+ "MESAPI_MISC__WRITE_REG",
+ "MESAPI_MISC__INV_GART",
+ "MESAPI_MISC__QUERY_STATUS",
+ "MESAPI_MISC__READ_REG",
+ "MESAPI_MISC__WAIT_REG_MEM",
+ "MESAPI_MISC__SET_SHADER_DEBUGGER",
+};
+
+static const char *mes_v11_0_get_op_string(union MESAPI__MISC *x_pkt)
+{
+ const char *op_str = NULL;
+
+ if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
+ op_str = mes_v11_0_opcodes[x_pkt->header.opcode];
+
+ return op_str;
+}
+
+static const char *mes_v11_0_get_misc_op_string(union MESAPI__MISC *x_pkt)
+{
+ const char *op_str = NULL;
+
+ if ((x_pkt->header.opcode == MES_SCH_API_MISC) &&
+ (x_pkt->opcode <= ARRAY_SIZE(mes_v11_0_misc_opcodes)))
+ op_str = mes_v11_0_misc_opcodes[x_pkt->opcode];
+
+ return op_str;
+}
+
static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
void *pkt, int size,
int api_status_off)
{
int ndw = size / 4;
signed long r;
- union MESAPI__ADD_QUEUE *x_pkt = pkt;
+ union MESAPI__MISC *x_pkt = pkt;
struct MES_API_STATUS *api_status;
struct amdgpu_device *adev = mes->adev;
struct amdgpu_ring *ring = &mes->ring;
unsigned long flags;
signed long timeout = 3000000; /* 3000 ms */
+ const char *op_str, *misc_op_str;
+
+ if (x_pkt->header.opcode >= MES_SCH_API_MAX)
+ return -EINVAL;
if (amdgpu_emu_mode) {
timeout *= 100;
@@ -135,13 +189,29 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&mes->ring_lock, flags);
- DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
+ op_str = mes_v11_0_get_op_string(x_pkt);
+ misc_op_str = mes_v11_0_get_misc_op_string(x_pkt);
+
+ if (misc_op_str)
+ dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, misc_op_str);
+ else if (op_str)
+ dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str);
+ else
+ dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode);
r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
timeout);
if (r < 1) {
- DRM_ERROR("MES failed to response msg=%d\n",
- x_pkt->header.opcode);
+
+ if (misc_op_str)
+ dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n",
+ op_str, misc_op_str);
+ else if (op_str)
+ dev_err(adev->dev, "MES failed to respond to msg=%s\n",
+ op_str);
+ else
+ dev_err(adev->dev, "MES failed to respond to msg=%d\n",
+ x_pkt->header.opcode);
while (halt_if_hws_hang)
schedule();
--
2.44.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH] drm/amdgpu/mes11: print MES opcodes rather than numbers
2024-04-17 19:53 [PATCH] drm/amdgpu/mes11: print MES opcodes rather than numbers Alex Deucher
@ 2024-04-18 17:54 ` Felix Kuehling
0 siblings, 0 replies; 7+ messages in thread
From: Felix Kuehling @ 2024-04-18 17:54 UTC (permalink / raw)
To: amd-gfx, Deucher, Alexander
On 2024-04-17 15:53, Alex Deucher wrote:
> Makes it easier to review the logs when there are MES
> errors.
>
> v2: use dbg for emitted, add helpers for fetching strings
> v3: fix missing commas (Harish)
>
> Reviewed by Shaoyun.liu <Shaoyun.liu@amd.com> (v2)
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 78 ++++++++++++++++++++++++--
> 1 file changed, 74 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> index 81833395324a0..414b7beff397f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> @@ -100,18 +100,72 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = {
> .insert_nop = amdgpu_ring_insert_nop,
> };
>
> +static const char *mes_v11_0_opcodes[] = {
> + "MES_SCH_API_SET_HW_RSRC",
> + "MES_SCH_API_SET_SCHEDULING_CONFIG",
> + "MES_SCH_API_ADD_QUEUE",
> + "MES_SCH_API_REMOVE_QUEUE",
> + "MES_SCH_API_PERFORM_YIELD",
> + "MES_SCH_API_SET_GANG_PRIORITY_LEVEL",
> + "MES_SCH_API_SUSPEND",
> + "MES_SCH_API_RESUME",
> + "MES_SCH_API_RESET",
> + "MES_SCH_API_SET_LOG_BUFFER",
> + "MES_SCH_API_CHANGE_GANG_PRORITY",
> + "MES_SCH_API_QUERY_SCHEDULER_STATUS",
> + "MES_SCH_API_PROGRAM_GDS",
> + "MES_SCH_API_SET_DEBUG_VMID",
> + "MES_SCH_API_MISC",
> + "MES_SCH_API_UPDATE_ROOT_PAGE_TABLE",
> + "MES_SCH_API_AMD_LOG",
Maybe drop the prefixes. They don't add any information value and only
bloat the log messages and module binary size. Other than that, the patch is
Acked-by: Felix Kuehling <felix.kuehling@amd.com>
> +};
> +
> +static const char *mes_v11_0_misc_opcodes[] = {
> + "MESAPI_MISC__WRITE_REG",
> + "MESAPI_MISC__INV_GART",
> + "MESAPI_MISC__QUERY_STATUS",
> + "MESAPI_MISC__READ_REG",
> + "MESAPI_MISC__WAIT_REG_MEM",
> + "MESAPI_MISC__SET_SHADER_DEBUGGER",
> +};
> +
> +static const char *mes_v11_0_get_op_string(union MESAPI__MISC *x_pkt)
> +{
> + const char *op_str = NULL;
> +
> + if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
> + op_str = mes_v11_0_opcodes[x_pkt->header.opcode];
> +
> + return op_str;
> +}
> +
> +static const char *mes_v11_0_get_misc_op_string(union MESAPI__MISC *x_pkt)
> +{
> + const char *op_str = NULL;
> +
> + if ((x_pkt->header.opcode == MES_SCH_API_MISC) &&
> + (x_pkt->opcode <= ARRAY_SIZE(mes_v11_0_misc_opcodes)))
> + op_str = mes_v11_0_misc_opcodes[x_pkt->opcode];
> +
> + return op_str;
> +}
> +
> static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
> void *pkt, int size,
> int api_status_off)
> {
> int ndw = size / 4;
> signed long r;
> - union MESAPI__ADD_QUEUE *x_pkt = pkt;
> + union MESAPI__MISC *x_pkt = pkt;
> struct MES_API_STATUS *api_status;
> struct amdgpu_device *adev = mes->adev;
> struct amdgpu_ring *ring = &mes->ring;
> unsigned long flags;
> signed long timeout = 3000000; /* 3000 ms */
> + const char *op_str, *misc_op_str;
> +
> + if (x_pkt->header.opcode >= MES_SCH_API_MAX)
> + return -EINVAL;
>
> if (amdgpu_emu_mode) {
> timeout *= 100;
> @@ -135,13 +189,29 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
> amdgpu_ring_commit(ring);
> spin_unlock_irqrestore(&mes->ring_lock, flags);
>
> - DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
> + op_str = mes_v11_0_get_op_string(x_pkt);
> + misc_op_str = mes_v11_0_get_misc_op_string(x_pkt);
> +
> + if (misc_op_str)
> + dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, misc_op_str);
> + else if (op_str)
> + dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str);
> + else
> + dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode);
>
> r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
> timeout);
> if (r < 1) {
> - DRM_ERROR("MES failed to response msg=%d\n",
> - x_pkt->header.opcode);
> +
> + if (misc_op_str)
> + dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n",
> + op_str, misc_op_str);
> + else if (op_str)
> + dev_err(adev->dev, "MES failed to respond to msg=%s\n",
> + op_str);
> + else
> + dev_err(adev->dev, "MES failed to respond to msg=%d\n",
> + x_pkt->header.opcode);
>
> while (halt_if_hws_hang)
> schedule();
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] drm/amdgpu/mes11: print MES opcodes rather than numbers
2024-04-08 18:25 Alex Deucher
@ 2024-04-12 4:16 ` Alex Deucher
0 siblings, 0 replies; 7+ messages in thread
From: Alex Deucher @ 2024-04-12 4:16 UTC (permalink / raw)
To: Alex Deucher; +Cc: amd-gfx
Ping?
On Mon, Apr 8, 2024 at 3:02 PM Alex Deucher <alexander.deucher@amd.com> wrote:
>
> Makes it easier to review the logs when there are MES
> errors.
>
> v2: use dbg for emitted, add helpers for fetching strings
>
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 78 ++++++++++++++++++++++++--
> 1 file changed, 74 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> index 072c478665ade..69d39ba726e12 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> @@ -100,18 +100,72 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = {
> .insert_nop = amdgpu_ring_insert_nop,
> };
>
> +static const char *mes_v11_0_opcodes[] = {
> + "MES_SCH_API_SET_HW_RSRC",
> + "MES_SCH_API_SET_SCHEDULING_CONFIG",
> + "MES_SCH_API_ADD_QUEUE"
> + "MES_SCH_API_REMOVE_QUEUE"
> + "MES_SCH_API_PERFORM_YIELD"
> + "MES_SCH_API_SET_GANG_PRIORITY_LEVEL"
> + "MES_SCH_API_SUSPEND"
> + "MES_SCH_API_RESUME"
> + "MES_SCH_API_RESET"
> + "MES_SCH_API_SET_LOG_BUFFER"
> + "MES_SCH_API_CHANGE_GANG_PRORITY"
> + "MES_SCH_API_QUERY_SCHEDULER_STATUS"
> + "MES_SCH_API_PROGRAM_GDS"
> + "MES_SCH_API_SET_DEBUG_VMID"
> + "MES_SCH_API_MISC"
> + "MES_SCH_API_UPDATE_ROOT_PAGE_TABLE"
> + "MES_SCH_API_AMD_LOG"
> +};
> +
> +static const char *mes_v11_0_misc_opcodes[] = {
> + "MESAPI_MISC__WRITE_REG",
> + "MESAPI_MISC__INV_GART",
> + "MESAPI_MISC__QUERY_STATUS",
> + "MESAPI_MISC__READ_REG",
> + "MESAPI_MISC__WAIT_REG_MEM",
> + "MESAPI_MISC__SET_SHADER_DEBUGGER",
> +};
> +
> +static const char *mes_v11_0_get_op_string(union MESAPI__MISC *x_pkt)
> +{
> + const char *op_str = NULL;
> +
> + if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
> + op_str = mes_v11_0_opcodes[x_pkt->header.opcode];
> +
> + return op_str;
> +}
> +
> +static const char *mes_v11_0_get_misc_op_string(union MESAPI__MISC *x_pkt)
> +{
> + const char *op_str = NULL;
> +
> + if ((x_pkt->header.opcode == MES_SCH_API_MISC) &&
> + (x_pkt->opcode <= ARRAY_SIZE(mes_v11_0_misc_opcodes)))
> + op_str = mes_v11_0_misc_opcodes[x_pkt->opcode];
> +
> + return op_str;
> +}
> +
> static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
> void *pkt, int size,
> int api_status_off)
> {
> int ndw = size / 4;
> signed long r;
> - union MESAPI__ADD_QUEUE *x_pkt = pkt;
> + union MESAPI__MISC *x_pkt = pkt;
> struct MES_API_STATUS *api_status;
> struct amdgpu_device *adev = mes->adev;
> struct amdgpu_ring *ring = &mes->ring;
> unsigned long flags;
> signed long timeout = adev->usec_timeout;
> + const char *op_str, *misc_op_str;
> +
> + if (x_pkt->header.opcode >= MES_SCH_API_MAX)
> + return -EINVAL;
>
> if (amdgpu_emu_mode) {
> timeout *= 100;
> @@ -135,13 +189,29 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
> amdgpu_ring_commit(ring);
> spin_unlock_irqrestore(&mes->ring_lock, flags);
>
> - DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
> + op_str = mes_v11_0_get_op_string(x_pkt);
> + misc_op_str = mes_v11_0_get_misc_op_string(x_pkt);
> +
> + if (misc_op_str)
> + dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, misc_op_str);
> + else if (op_str)
> + dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str);
> + else
> + dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode);
>
> r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
> timeout);
> if (r < 1) {
> - DRM_ERROR("MES failed to response msg=%d\n",
> - x_pkt->header.opcode);
> +
> + if (misc_op_str)
> + dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n",
> + op_str, misc_op_str);
> + else if (op_str)
> + dev_err(adev->dev, "MES failed to respond to msg=%s\n",
> + op_str);
> + else
> + dev_err(adev->dev, "MES failed to respond to msg=%d\n",
> + x_pkt->header.opcode);
>
> while (halt_if_hws_hang)
> schedule();
> --
> 2.44.0
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH] drm/amdgpu/mes11: print MES opcodes rather than numbers
@ 2024-04-08 18:25 Alex Deucher
2024-04-12 4:16 ` Alex Deucher
0 siblings, 1 reply; 7+ messages in thread
From: Alex Deucher @ 2024-04-08 18:25 UTC (permalink / raw)
To: amd-gfx; +Cc: Alex Deucher
Makes it easier to review the logs when there are MES
errors.
v2: use dbg for emitted, add helpers for fetching strings
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 78 ++++++++++++++++++++++++--
1 file changed, 74 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 072c478665ade..69d39ba726e12 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -100,18 +100,72 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = {
.insert_nop = amdgpu_ring_insert_nop,
};
+static const char *mes_v11_0_opcodes[] = {
+ "MES_SCH_API_SET_HW_RSRC",
+ "MES_SCH_API_SET_SCHEDULING_CONFIG",
+ "MES_SCH_API_ADD_QUEUE"
+ "MES_SCH_API_REMOVE_QUEUE"
+ "MES_SCH_API_PERFORM_YIELD"
+ "MES_SCH_API_SET_GANG_PRIORITY_LEVEL"
+ "MES_SCH_API_SUSPEND"
+ "MES_SCH_API_RESUME"
+ "MES_SCH_API_RESET"
+ "MES_SCH_API_SET_LOG_BUFFER"
+ "MES_SCH_API_CHANGE_GANG_PRORITY"
+ "MES_SCH_API_QUERY_SCHEDULER_STATUS"
+ "MES_SCH_API_PROGRAM_GDS"
+ "MES_SCH_API_SET_DEBUG_VMID"
+ "MES_SCH_API_MISC"
+ "MES_SCH_API_UPDATE_ROOT_PAGE_TABLE"
+ "MES_SCH_API_AMD_LOG"
+};
+
+static const char *mes_v11_0_misc_opcodes[] = {
+ "MESAPI_MISC__WRITE_REG",
+ "MESAPI_MISC__INV_GART",
+ "MESAPI_MISC__QUERY_STATUS",
+ "MESAPI_MISC__READ_REG",
+ "MESAPI_MISC__WAIT_REG_MEM",
+ "MESAPI_MISC__SET_SHADER_DEBUGGER",
+};
+
+static const char *mes_v11_0_get_op_string(union MESAPI__MISC *x_pkt)
+{
+ const char *op_str = NULL;
+
+ if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
+ op_str = mes_v11_0_opcodes[x_pkt->header.opcode];
+
+ return op_str;
+}
+
+static const char *mes_v11_0_get_misc_op_string(union MESAPI__MISC *x_pkt)
+{
+ const char *op_str = NULL;
+
+ if ((x_pkt->header.opcode == MES_SCH_API_MISC) &&
+ (x_pkt->opcode <= ARRAY_SIZE(mes_v11_0_misc_opcodes)))
+ op_str = mes_v11_0_misc_opcodes[x_pkt->opcode];
+
+ return op_str;
+}
+
static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
void *pkt, int size,
int api_status_off)
{
int ndw = size / 4;
signed long r;
- union MESAPI__ADD_QUEUE *x_pkt = pkt;
+ union MESAPI__MISC *x_pkt = pkt;
struct MES_API_STATUS *api_status;
struct amdgpu_device *adev = mes->adev;
struct amdgpu_ring *ring = &mes->ring;
unsigned long flags;
signed long timeout = adev->usec_timeout;
+ const char *op_str, *misc_op_str;
+
+ if (x_pkt->header.opcode >= MES_SCH_API_MAX)
+ return -EINVAL;
if (amdgpu_emu_mode) {
timeout *= 100;
@@ -135,13 +189,29 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&mes->ring_lock, flags);
- DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
+ op_str = mes_v11_0_get_op_string(x_pkt);
+ misc_op_str = mes_v11_0_get_misc_op_string(x_pkt);
+
+ if (misc_op_str)
+ dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, misc_op_str);
+ else if (op_str)
+ dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str);
+ else
+ dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode);
r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
timeout);
if (r < 1) {
- DRM_ERROR("MES failed to response msg=%d\n",
- x_pkt->header.opcode);
+
+ if (misc_op_str)
+ dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n",
+ op_str, misc_op_str);
+ else if (op_str)
+ dev_err(adev->dev, "MES failed to respond to msg=%s\n",
+ op_str);
+ else
+ dev_err(adev->dev, "MES failed to respond to msg=%d\n",
+ x_pkt->header.opcode);
while (halt_if_hws_hang)
schedule();
--
2.44.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH] drm/amdgpu/mes11: print MES opcodes rather than numbers
2024-04-02 0:42 ` Liu, Shaoyun
@ 2024-04-04 10:02 ` Sharma, Shashank
0 siblings, 0 replies; 7+ messages in thread
From: Sharma, Shashank @ 2024-04-04 10:02 UTC (permalink / raw)
To: Liu, Shaoyun, Deucher, Alexander, amd-gfx
Hi Alex,
On 02/04/2024 02:42, Liu, Shaoyun wrote:
> [AMD Official Use Only - General]
>
> [AMD Official Use Only - General]
>
> Comments inline
>
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Alex Deucher
> Sent: Saturday, March 30, 2024 10:01 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
> Subject: [PATCH] drm/amdgpu/mes11: print MES opcodes rather than numbers
>
> Makes it easier to review the logs when there are MES errors.
>
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 65 ++++++++++++++++++++++++--
> 1 file changed, 61 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> index 072c478665ade..73a4bb0f5ba0f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> @@ -100,19 +100,51 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = {
> .insert_nop = amdgpu_ring_insert_nop,
> };
>
> +static const char *mes_v11_0_opcodes[] = {
> + "MES_SCH_API_SET_HW_RSRC",
> + "MES_SCH_API_SET_SCHEDULING_CONFIG",
> + "MES_SCH_API_ADD_QUEUE"
> + "MES_SCH_API_REMOVE_QUEUE"
> + "MES_SCH_API_PERFORM_YIELD"
> + "MES_SCH_API_SET_GANG_PRIORITY_LEVEL"
> + "MES_SCH_API_SUSPEND"
> + "MES_SCH_API_RESUME"
> + "MES_SCH_API_RESET"
> + "MES_SCH_API_SET_LOG_BUFFER"
> + "MES_SCH_API_CHANGE_GANG_PRORITY"
> + "MES_SCH_API_QUERY_SCHEDULER_STATUS"
> + "MES_SCH_API_PROGRAM_GDS"
> + "MES_SCH_API_SET_DEBUG_VMID"
> + "MES_SCH_API_MISC"
> + "MES_SCH_API_UPDATE_ROOT_PAGE_TABLE"
> + "MES_SCH_API_AMD_LOG"
> +};
> +
> +static const char *mes_v11_0_misc_opcodes[] = {
> + "MESAPI_MISC__WRITE_REG",
> + "MESAPI_MISC__INV_GART",
> + "MESAPI_MISC__QUERY_STATUS",
> + "MESAPI_MISC__READ_REG",
> + "MESAPI_MISC__WAIT_REG_MEM",
> + "MESAPI_MISC__SET_SHADER_DEBUGGER",
> +};
> +
> static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
> void *pkt, int size,
> int api_status_off)
> {
> int ndw = size / 4;
> signed long r;
> - union MESAPI__ADD_QUEUE *x_pkt = pkt;
> + union MESAPI__MISC *x_pkt = pkt;
> struct MES_API_STATUS *api_status;
> struct amdgpu_device *adev = mes->adev;
> struct amdgpu_ring *ring = &mes->ring;
> unsigned long flags;
> signed long timeout = adev->usec_timeout;
>
> + if (x_pkt->header.opcode >= MES_SCH_API_MAX)
> + return -EINVAL;
> +
> if (amdgpu_emu_mode) {
> timeout *= 100;
> } else if (amdgpu_sriov_vf(adev)) {
> @@ -135,13 +167,38 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
> amdgpu_ring_commit(ring);
> spin_unlock_irqrestore(&mes->ring_lock, fl
> - DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
> + if (x_pkt->header.opcode == MES_SCH_API_MISC) {
> + if (x_pkt->opcode <= ARRAY_SIZE(mes_v11_0_misc_opcodes))
> + dev_err(adev->dev, "MES msg=%s (%s) was emitted\n",
>
> [shaoyunl] Shouldn't we use DRM_DEBUG for valid condition ?
>
> Regards
> Shaoyun.liu
>
> + mes_v11_0_opcodes[x_pkt->header.opcode],
> + mes_v11_0_misc_opcodes[x_pkt->opcode]);
> + else
> + dev_err(adev->dev, "MES msg=%s (%d) was emitted\n",
> + mes_v11_0_opcodes[x_pkt->header.opcode],
> + x_pkt->opcode);
> + } else if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
> + dev_err(adev->dev, "MES msg=%s was emitted\n",
> + mes_v11_0_opcodes[x_pkt->header.opcode]);
> + else
> + dev_err(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode);
>
> r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
> timeout);
> if (r < 1) {
> - DRM_ERROR("MES failed to response msg=%d\n",
> - x_pkt->header.opcode);
> + if (x_pkt->header.opcode == MES_SCH_API_MISC) {
> + if (x_pkt->opcode <= ARRAY_SIZE(mes_v11_0_misc_opcodes))
> + dev_err(adev->dev, "MES failed to response msg=%s (%s)\n",
> + mes_v11_0_opcodes[x_pkt->header.opcode],
> + mes_v11_0_misc_opcodes[x_pkt->opcode]);
> + else
> + dev_err(adev->dev, "MES failed to response msg=%s (%d)\n",
> + mes_v11_0_opcodes[x_pkt->header.opcode], x_pkt->opcode);
> + } else if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
> + dev_err(adev->dev, "MES failed to response msg=%s\n",
> + mes_v11_0_opcodes[x_pkt->header.opcode]);
> + else
> + dev_err(adev->dev, "MES failed to response msg=%d\n",
> + x_pkt->header.opcode);
Please consider this small reformatting here for better readability:
static const char *
amdgpu_mes_find_op_name(union MESAPI__MISC *x_pkt)
{
const char *op_name = NULL;
if (x_pkt->header.opcode == MES_SCH_API_MISC) {
if (x_pkt->opcode <= ARRAY_SIZE(mes_v11_0_misc_opcodes))
op_name = mes_v11_0_misc_opcodes[x_pkt->opcode];
} else {
if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
op_name = mes_v11_0_opcodes[x_pkt->header.opcode];
}
return op_name;
}
op_name = amdgpu_mes_find_op_name(x_pkt)
if (op_name)
DRM_DEBUG_DRIVER("MES msg=%s was emitted\n", op_name);
else
DRM_DEBUG_DRIVER("MES msg=%d was emitted\n", x_pkt->header.opcode);
r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout);
if (r < 1) {
if (op_name)
dev_err("MES failed to response msg=%s \n", op_name);
else
dev_err(adev->dev, "MES failed to response msg=%d\n",
x_pkt->header.opcode);
}
- Shashank
> while (halt_if_hws_hang)
> schedule();
> --
> 2.44.0
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* RE: [PATCH] drm/amdgpu/mes11: print MES opcodes rather than numbers
2024-03-30 14:01 Alex Deucher
@ 2024-04-02 0:42 ` Liu, Shaoyun
2024-04-04 10:02 ` Sharma, Shashank
0 siblings, 1 reply; 7+ messages in thread
From: Liu, Shaoyun @ 2024-04-02 0:42 UTC (permalink / raw)
To: Deucher, Alexander, amd-gfx; +Cc: Deucher, Alexander
[AMD Official Use Only - General]
Comments inline
-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Alex Deucher
Sent: Saturday, March 30, 2024 10:01 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
Subject: [PATCH] drm/amdgpu/mes11: print MES opcodes rather than numbers
Makes it easier to review the logs when there are MES errors.
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 65 ++++++++++++++++++++++++--
1 file changed, 61 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 072c478665ade..73a4bb0f5ba0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -100,19 +100,51 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = {
.insert_nop = amdgpu_ring_insert_nop,
};
+static const char *mes_v11_0_opcodes[] = {
+ "MES_SCH_API_SET_HW_RSRC",
+ "MES_SCH_API_SET_SCHEDULING_CONFIG",
+ "MES_SCH_API_ADD_QUEUE"
+ "MES_SCH_API_REMOVE_QUEUE"
+ "MES_SCH_API_PERFORM_YIELD"
+ "MES_SCH_API_SET_GANG_PRIORITY_LEVEL"
+ "MES_SCH_API_SUSPEND"
+ "MES_SCH_API_RESUME"
+ "MES_SCH_API_RESET"
+ "MES_SCH_API_SET_LOG_BUFFER"
+ "MES_SCH_API_CHANGE_GANG_PRORITY"
+ "MES_SCH_API_QUERY_SCHEDULER_STATUS"
+ "MES_SCH_API_PROGRAM_GDS"
+ "MES_SCH_API_SET_DEBUG_VMID"
+ "MES_SCH_API_MISC"
+ "MES_SCH_API_UPDATE_ROOT_PAGE_TABLE"
+ "MES_SCH_API_AMD_LOG"
+};
+
+static const char *mes_v11_0_misc_opcodes[] = {
+ "MESAPI_MISC__WRITE_REG",
+ "MESAPI_MISC__INV_GART",
+ "MESAPI_MISC__QUERY_STATUS",
+ "MESAPI_MISC__READ_REG",
+ "MESAPI_MISC__WAIT_REG_MEM",
+ "MESAPI_MISC__SET_SHADER_DEBUGGER",
+};
+
static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
void *pkt, int size,
int api_status_off)
{
int ndw = size / 4;
signed long r;
- union MESAPI__ADD_QUEUE *x_pkt = pkt;
+ union MESAPI__MISC *x_pkt = pkt;
struct MES_API_STATUS *api_status;
struct amdgpu_device *adev = mes->adev;
struct amdgpu_ring *ring = &mes->ring;
unsigned long flags;
signed long timeout = adev->usec_timeout;
+ if (x_pkt->header.opcode >= MES_SCH_API_MAX)
+ return -EINVAL;
+
if (amdgpu_emu_mode) {
timeout *= 100;
} else if (amdgpu_sriov_vf(adev)) {
@@ -135,13 +167,38 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&mes->ring_lock, fl
- DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
+ if (x_pkt->header.opcode == MES_SCH_API_MISC) {
+ if (x_pkt->opcode <= ARRAY_SIZE(mes_v11_0_misc_opcodes))
+ dev_err(adev->dev, "MES msg=%s (%s) was emitted\n",
[shaoyunl] Shouldn't we use DRM_DEBUG for valid condition ?
Regards
Shaoyun.liu
+ mes_v11_0_opcodes[x_pkt->header.opcode],
+ mes_v11_0_misc_opcodes[x_pkt->opcode]);
+ else
+ dev_err(adev->dev, "MES msg=%s (%d) was emitted\n",
+ mes_v11_0_opcodes[x_pkt->header.opcode],
+ x_pkt->opcode);
+ } else if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
+ dev_err(adev->dev, "MES msg=%s was emitted\n",
+ mes_v11_0_opcodes[x_pkt->header.opcode]);
+ else
+ dev_err(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode);
r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
timeout);
if (r < 1) {
- DRM_ERROR("MES failed to response msg=%d\n",
- x_pkt->header.opcode);
+ if (x_pkt->header.opcode == MES_SCH_API_MISC) {
+ if (x_pkt->opcode <= ARRAY_SIZE(mes_v11_0_misc_opcodes))
+ dev_err(adev->dev, "MES failed to response msg=%s (%s)\n",
+ mes_v11_0_opcodes[x_pkt->header.opcode],
+ mes_v11_0_misc_opcodes[x_pkt->opcode]);
+ else
+ dev_err(adev->dev, "MES failed to response msg=%s (%d)\n",
+ mes_v11_0_opcodes[x_pkt->header.opcode], x_pkt->opcode);
+ } else if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
+ dev_err(adev->dev, "MES failed to response msg=%s\n",
+ mes_v11_0_opcodes[x_pkt->header.opcode]);
+ else
+ dev_err(adev->dev, "MES failed to response msg=%d\n",
+ x_pkt->header.opcode);
while (halt_if_hws_hang)
schedule();
--
2.44.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH] drm/amdgpu/mes11: print MES opcodes rather than numbers
@ 2024-03-30 14:01 Alex Deucher
2024-04-02 0:42 ` Liu, Shaoyun
0 siblings, 1 reply; 7+ messages in thread
From: Alex Deucher @ 2024-03-30 14:01 UTC (permalink / raw)
To: amd-gfx; +Cc: Alex Deucher
Makes it easier to review the logs when there are MES
errors.
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 65 ++++++++++++++++++++++++--
1 file changed, 61 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 072c478665ade..73a4bb0f5ba0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -100,19 +100,51 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = {
.insert_nop = amdgpu_ring_insert_nop,
};
+static const char *mes_v11_0_opcodes[] = {
+ "MES_SCH_API_SET_HW_RSRC",
+ "MES_SCH_API_SET_SCHEDULING_CONFIG",
+ "MES_SCH_API_ADD_QUEUE"
+ "MES_SCH_API_REMOVE_QUEUE"
+ "MES_SCH_API_PERFORM_YIELD"
+ "MES_SCH_API_SET_GANG_PRIORITY_LEVEL"
+ "MES_SCH_API_SUSPEND"
+ "MES_SCH_API_RESUME"
+ "MES_SCH_API_RESET"
+ "MES_SCH_API_SET_LOG_BUFFER"
+ "MES_SCH_API_CHANGE_GANG_PRORITY"
+ "MES_SCH_API_QUERY_SCHEDULER_STATUS"
+ "MES_SCH_API_PROGRAM_GDS"
+ "MES_SCH_API_SET_DEBUG_VMID"
+ "MES_SCH_API_MISC"
+ "MES_SCH_API_UPDATE_ROOT_PAGE_TABLE"
+ "MES_SCH_API_AMD_LOG"
+};
+
+static const char *mes_v11_0_misc_opcodes[] = {
+ "MESAPI_MISC__WRITE_REG",
+ "MESAPI_MISC__INV_GART",
+ "MESAPI_MISC__QUERY_STATUS",
+ "MESAPI_MISC__READ_REG",
+ "MESAPI_MISC__WAIT_REG_MEM",
+ "MESAPI_MISC__SET_SHADER_DEBUGGER",
+};
+
static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
void *pkt, int size,
int api_status_off)
{
int ndw = size / 4;
signed long r;
- union MESAPI__ADD_QUEUE *x_pkt = pkt;
+ union MESAPI__MISC *x_pkt = pkt;
struct MES_API_STATUS *api_status;
struct amdgpu_device *adev = mes->adev;
struct amdgpu_ring *ring = &mes->ring;
unsigned long flags;
signed long timeout = adev->usec_timeout;
+ if (x_pkt->header.opcode >= MES_SCH_API_MAX)
+ return -EINVAL;
+
if (amdgpu_emu_mode) {
timeout *= 100;
} else if (amdgpu_sriov_vf(adev)) {
@@ -135,13 +167,38 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&mes->ring_lock, flags);
- DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
+ if (x_pkt->header.opcode == MES_SCH_API_MISC) {
+ if (x_pkt->opcode <= ARRAY_SIZE(mes_v11_0_misc_opcodes))
+ dev_err(adev->dev, "MES msg=%s (%s) was emitted\n",
+ mes_v11_0_opcodes[x_pkt->header.opcode],
+ mes_v11_0_misc_opcodes[x_pkt->opcode]);
+ else
+ dev_err(adev->dev, "MES msg=%s (%d) was emitted\n",
+ mes_v11_0_opcodes[x_pkt->header.opcode],
+ x_pkt->opcode);
+ } else if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
+ dev_err(adev->dev, "MES msg=%s was emitted\n",
+ mes_v11_0_opcodes[x_pkt->header.opcode]);
+ else
+ dev_err(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode);
r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
timeout);
if (r < 1) {
- DRM_ERROR("MES failed to response msg=%d\n",
- x_pkt->header.opcode);
+ if (x_pkt->header.opcode == MES_SCH_API_MISC) {
+ if (x_pkt->opcode <= ARRAY_SIZE(mes_v11_0_misc_opcodes))
+ dev_err(adev->dev, "MES failed to response msg=%s (%s)\n",
+ mes_v11_0_opcodes[x_pkt->header.opcode],
+ mes_v11_0_misc_opcodes[x_pkt->opcode]);
+ else
+ dev_err(adev->dev, "MES failed to response msg=%s (%d)\n",
+ mes_v11_0_opcodes[x_pkt->header.opcode], x_pkt->opcode);
+ } else if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
+ dev_err(adev->dev, "MES failed to response msg=%s\n",
+ mes_v11_0_opcodes[x_pkt->header.opcode]);
+ else
+ dev_err(adev->dev, "MES failed to response msg=%d\n",
+ x_pkt->header.opcode);
while (halt_if_hws_hang)
schedule();
--
2.44.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
end of thread, other threads:[~2024-04-18 17:54 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-17 19:53 [PATCH] drm/amdgpu/mes11: print MES opcodes rather than numbers Alex Deucher
2024-04-18 17:54 ` Felix Kuehling
-- strict thread matches above, loose matches on Subject: below --
2024-04-08 18:25 Alex Deucher
2024-04-12 4:16 ` Alex Deucher
2024-03-30 14:01 Alex Deucher
2024-04-02 0:42 ` Liu, Shaoyun
2024-04-04 10:02 ` Sharma, Shashank
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).