amd-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu/mes11: print MES opcodes rather than numbers
@ 2024-04-17 19:53 Alex Deucher
  2024-04-18 17:54 ` Felix Kuehling
  0 siblings, 1 reply; 7+ messages in thread
From: Alex Deucher @ 2024-04-17 19:53 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Deucher

Makes it easier to review the logs when there are MES
errors.

v2: use dbg for emitted, add helpers for fetching strings
v3: fix missing commas (Harish)

Reviewed by Shaoyun.liu <Shaoyun.liu@amd.com> (v2)
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 78 ++++++++++++++++++++++++--
 1 file changed, 74 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 81833395324a0..414b7beff397f 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -100,18 +100,72 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = {
 	.insert_nop = amdgpu_ring_insert_nop,
 };
 
+static const char *mes_v11_0_opcodes[] = {
+	"MES_SCH_API_SET_HW_RSRC",
+	"MES_SCH_API_SET_SCHEDULING_CONFIG",
+	"MES_SCH_API_ADD_QUEUE",
+	"MES_SCH_API_REMOVE_QUEUE",
+	"MES_SCH_API_PERFORM_YIELD",
+	"MES_SCH_API_SET_GANG_PRIORITY_LEVEL",
+	"MES_SCH_API_SUSPEND",
+	"MES_SCH_API_RESUME",
+	"MES_SCH_API_RESET",
+	"MES_SCH_API_SET_LOG_BUFFER",
+	"MES_SCH_API_CHANGE_GANG_PRORITY",
+	"MES_SCH_API_QUERY_SCHEDULER_STATUS",
+	"MES_SCH_API_PROGRAM_GDS",
+	"MES_SCH_API_SET_DEBUG_VMID",
+	"MES_SCH_API_MISC",
+	"MES_SCH_API_UPDATE_ROOT_PAGE_TABLE",
+	"MES_SCH_API_AMD_LOG",
+};
+
+static const char *mes_v11_0_misc_opcodes[] = {
+	"MESAPI_MISC__WRITE_REG",
+	"MESAPI_MISC__INV_GART",
+	"MESAPI_MISC__QUERY_STATUS",
+	"MESAPI_MISC__READ_REG",
+	"MESAPI_MISC__WAIT_REG_MEM",
+	"MESAPI_MISC__SET_SHADER_DEBUGGER",
+};
+
+static const char *mes_v11_0_get_op_string(union MESAPI__MISC *x_pkt)
+{
+	const char *op_str = NULL;
+
+	if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
+		op_str = mes_v11_0_opcodes[x_pkt->header.opcode];
+
+	return op_str;
+}
+
+static const char *mes_v11_0_get_misc_op_string(union MESAPI__MISC *x_pkt)
+{
+	const char *op_str = NULL;
+
+	if ((x_pkt->header.opcode == MES_SCH_API_MISC) &&
+	    (x_pkt->opcode <= ARRAY_SIZE(mes_v11_0_misc_opcodes)))
+		op_str = mes_v11_0_misc_opcodes[x_pkt->opcode];
+
+	return op_str;
+}
+
 static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
 						    void *pkt, int size,
 						    int api_status_off)
 {
 	int ndw = size / 4;
 	signed long r;
-	union MESAPI__ADD_QUEUE *x_pkt = pkt;
+	union MESAPI__MISC *x_pkt = pkt;
 	struct MES_API_STATUS *api_status;
 	struct amdgpu_device *adev = mes->adev;
 	struct amdgpu_ring *ring = &mes->ring;
 	unsigned long flags;
 	signed long timeout = 3000000; /* 3000 ms */
+	const char *op_str, *misc_op_str;
+
+	if (x_pkt->header.opcode >= MES_SCH_API_MAX)
+		return -EINVAL;
 
 	if (amdgpu_emu_mode) {
 		timeout *= 100;
@@ -135,13 +189,29 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
 	amdgpu_ring_commit(ring);
 	spin_unlock_irqrestore(&mes->ring_lock, flags);
 
-	DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
+	op_str = mes_v11_0_get_op_string(x_pkt);
+	misc_op_str = mes_v11_0_get_misc_op_string(x_pkt);
+
+	if (misc_op_str)
+		dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, misc_op_str);
+	else if (op_str)
+		dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str);
+	else
+		dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode);
 
 	r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
 		      timeout);
 	if (r < 1) {
-		DRM_ERROR("MES failed to response msg=%d\n",
-			  x_pkt->header.opcode);
+
+		if (misc_op_str)
+			dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n",
+				op_str, misc_op_str);
+		else if (op_str)
+			dev_err(adev->dev, "MES failed to respond to msg=%s\n",
+				op_str);
+		else
+			dev_err(adev->dev, "MES failed to respond to msg=%d\n",
+				x_pkt->header.opcode);
 
 		while (halt_if_hws_hang)
 			schedule();
-- 
2.44.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/amdgpu/mes11: print MES opcodes rather than numbers
  2024-04-17 19:53 [PATCH] drm/amdgpu/mes11: print MES opcodes rather than numbers Alex Deucher
@ 2024-04-18 17:54 ` Felix Kuehling
  0 siblings, 0 replies; 7+ messages in thread
From: Felix Kuehling @ 2024-04-18 17:54 UTC (permalink / raw)
  To: amd-gfx, Deucher, Alexander


On 2024-04-17 15:53, Alex Deucher wrote:
> Makes it easier to review the logs when there are MES
> errors.
>
> v2: use dbg for emitted, add helpers for fetching strings
> v3: fix missing commas (Harish)
>
> Reviewed by Shaoyun.liu <Shaoyun.liu@amd.com> (v2)
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 78 ++++++++++++++++++++++++--
>   1 file changed, 74 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> index 81833395324a0..414b7beff397f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> @@ -100,18 +100,72 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = {
>   	.insert_nop = amdgpu_ring_insert_nop,
>   };
>   
> +static const char *mes_v11_0_opcodes[] = {
> +	"MES_SCH_API_SET_HW_RSRC",
> +	"MES_SCH_API_SET_SCHEDULING_CONFIG",
> +	"MES_SCH_API_ADD_QUEUE",
> +	"MES_SCH_API_REMOVE_QUEUE",
> +	"MES_SCH_API_PERFORM_YIELD",
> +	"MES_SCH_API_SET_GANG_PRIORITY_LEVEL",
> +	"MES_SCH_API_SUSPEND",
> +	"MES_SCH_API_RESUME",
> +	"MES_SCH_API_RESET",
> +	"MES_SCH_API_SET_LOG_BUFFER",
> +	"MES_SCH_API_CHANGE_GANG_PRORITY",
> +	"MES_SCH_API_QUERY_SCHEDULER_STATUS",
> +	"MES_SCH_API_PROGRAM_GDS",
> +	"MES_SCH_API_SET_DEBUG_VMID",
> +	"MES_SCH_API_MISC",
> +	"MES_SCH_API_UPDATE_ROOT_PAGE_TABLE",
> +	"MES_SCH_API_AMD_LOG",

Maybe drop the prefixes. They don't add any information value and only 
bloat the log messages and module binary size. Other than that, the patch is

Acked-by: Felix Kuehling <felix.kuehling@amd.com>


> +};
> +
> +static const char *mes_v11_0_misc_opcodes[] = {
> +	"MESAPI_MISC__WRITE_REG",
> +	"MESAPI_MISC__INV_GART",
> +	"MESAPI_MISC__QUERY_STATUS",
> +	"MESAPI_MISC__READ_REG",
> +	"MESAPI_MISC__WAIT_REG_MEM",
> +	"MESAPI_MISC__SET_SHADER_DEBUGGER",
> +};
> +
> +static const char *mes_v11_0_get_op_string(union MESAPI__MISC *x_pkt)
> +{
> +	const char *op_str = NULL;
> +
> +	if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
> +		op_str = mes_v11_0_opcodes[x_pkt->header.opcode];
> +
> +	return op_str;
> +}
> +
> +static const char *mes_v11_0_get_misc_op_string(union MESAPI__MISC *x_pkt)
> +{
> +	const char *op_str = NULL;
> +
> +	if ((x_pkt->header.opcode == MES_SCH_API_MISC) &&
> +	    (x_pkt->opcode <= ARRAY_SIZE(mes_v11_0_misc_opcodes)))
> +		op_str = mes_v11_0_misc_opcodes[x_pkt->opcode];
> +
> +	return op_str;
> +}
> +
>   static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
>   						    void *pkt, int size,
>   						    int api_status_off)
>   {
>   	int ndw = size / 4;
>   	signed long r;
> -	union MESAPI__ADD_QUEUE *x_pkt = pkt;
> +	union MESAPI__MISC *x_pkt = pkt;
>   	struct MES_API_STATUS *api_status;
>   	struct amdgpu_device *adev = mes->adev;
>   	struct amdgpu_ring *ring = &mes->ring;
>   	unsigned long flags;
>   	signed long timeout = 3000000; /* 3000 ms */
> +	const char *op_str, *misc_op_str;
> +
> +	if (x_pkt->header.opcode >= MES_SCH_API_MAX)
> +		return -EINVAL;
>   
>   	if (amdgpu_emu_mode) {
>   		timeout *= 100;
> @@ -135,13 +189,29 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
>   	amdgpu_ring_commit(ring);
>   	spin_unlock_irqrestore(&mes->ring_lock, flags);
>   
> -	DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
> +	op_str = mes_v11_0_get_op_string(x_pkt);
> +	misc_op_str = mes_v11_0_get_misc_op_string(x_pkt);
> +
> +	if (misc_op_str)
> +		dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, misc_op_str);
> +	else if (op_str)
> +		dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str);
> +	else
> +		dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode);
>   
>   	r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
>   		      timeout);
>   	if (r < 1) {
> -		DRM_ERROR("MES failed to response msg=%d\n",
> -			  x_pkt->header.opcode);
> +
> +		if (misc_op_str)
> +			dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n",
> +				op_str, misc_op_str);
> +		else if (op_str)
> +			dev_err(adev->dev, "MES failed to respond to msg=%s\n",
> +				op_str);
> +		else
> +			dev_err(adev->dev, "MES failed to respond to msg=%d\n",
> +				x_pkt->header.opcode);
>   
>   		while (halt_if_hws_hang)
>   			schedule();

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/amdgpu/mes11: print MES opcodes rather than numbers
  2024-04-08 18:25 Alex Deucher
@ 2024-04-12  4:16 ` Alex Deucher
  0 siblings, 0 replies; 7+ messages in thread
From: Alex Deucher @ 2024-04-12  4:16 UTC (permalink / raw)
  To: Alex Deucher; +Cc: amd-gfx

Ping?

On Mon, Apr 8, 2024 at 3:02 PM Alex Deucher <alexander.deucher@amd.com> wrote:
>
> Makes it easier to review the logs when there are MES
> errors.
>
> v2: use dbg for emitted, add helpers for fetching strings
>
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 78 ++++++++++++++++++++++++--
>  1 file changed, 74 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> index 072c478665ade..69d39ba726e12 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> @@ -100,18 +100,72 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = {
>         .insert_nop = amdgpu_ring_insert_nop,
>  };
>
> +static const char *mes_v11_0_opcodes[] = {
> +       "MES_SCH_API_SET_HW_RSRC",
> +       "MES_SCH_API_SET_SCHEDULING_CONFIG",
> +       "MES_SCH_API_ADD_QUEUE"
> +       "MES_SCH_API_REMOVE_QUEUE"
> +       "MES_SCH_API_PERFORM_YIELD"
> +       "MES_SCH_API_SET_GANG_PRIORITY_LEVEL"
> +       "MES_SCH_API_SUSPEND"
> +       "MES_SCH_API_RESUME"
> +       "MES_SCH_API_RESET"
> +       "MES_SCH_API_SET_LOG_BUFFER"
> +       "MES_SCH_API_CHANGE_GANG_PRORITY"
> +       "MES_SCH_API_QUERY_SCHEDULER_STATUS"
> +       "MES_SCH_API_PROGRAM_GDS"
> +       "MES_SCH_API_SET_DEBUG_VMID"
> +       "MES_SCH_API_MISC"
> +       "MES_SCH_API_UPDATE_ROOT_PAGE_TABLE"
> +       "MES_SCH_API_AMD_LOG"
> +};
> +
> +static const char *mes_v11_0_misc_opcodes[] = {
> +       "MESAPI_MISC__WRITE_REG",
> +       "MESAPI_MISC__INV_GART",
> +       "MESAPI_MISC__QUERY_STATUS",
> +       "MESAPI_MISC__READ_REG",
> +       "MESAPI_MISC__WAIT_REG_MEM",
> +       "MESAPI_MISC__SET_SHADER_DEBUGGER",
> +};
> +
> +static const char *mes_v11_0_get_op_string(union MESAPI__MISC *x_pkt)
> +{
> +       const char *op_str = NULL;
> +
> +       if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
> +               op_str = mes_v11_0_opcodes[x_pkt->header.opcode];
> +
> +       return op_str;
> +}
> +
> +static const char *mes_v11_0_get_misc_op_string(union MESAPI__MISC *x_pkt)
> +{
> +       const char *op_str = NULL;
> +
> +       if ((x_pkt->header.opcode == MES_SCH_API_MISC) &&
> +           (x_pkt->opcode <= ARRAY_SIZE(mes_v11_0_misc_opcodes)))
> +               op_str = mes_v11_0_misc_opcodes[x_pkt->opcode];
> +
> +       return op_str;
> +}
> +
>  static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
>                                                     void *pkt, int size,
>                                                     int api_status_off)
>  {
>         int ndw = size / 4;
>         signed long r;
> -       union MESAPI__ADD_QUEUE *x_pkt = pkt;
> +       union MESAPI__MISC *x_pkt = pkt;
>         struct MES_API_STATUS *api_status;
>         struct amdgpu_device *adev = mes->adev;
>         struct amdgpu_ring *ring = &mes->ring;
>         unsigned long flags;
>         signed long timeout = adev->usec_timeout;
> +       const char *op_str, *misc_op_str;
> +
> +       if (x_pkt->header.opcode >= MES_SCH_API_MAX)
> +               return -EINVAL;
>
>         if (amdgpu_emu_mode) {
>                 timeout *= 100;
> @@ -135,13 +189,29 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
>         amdgpu_ring_commit(ring);
>         spin_unlock_irqrestore(&mes->ring_lock, flags);
>
> -       DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
> +       op_str = mes_v11_0_get_op_string(x_pkt);
> +       misc_op_str = mes_v11_0_get_misc_op_string(x_pkt);
> +
> +       if (misc_op_str)
> +               dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, misc_op_str);
> +       else if (op_str)
> +               dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str);
> +       else
> +               dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode);
>
>         r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
>                       timeout);
>         if (r < 1) {
> -               DRM_ERROR("MES failed to response msg=%d\n",
> -                         x_pkt->header.opcode);
> +
> +               if (misc_op_str)
> +                       dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n",
> +                               op_str, misc_op_str);
> +               else if (op_str)
> +                       dev_err(adev->dev, "MES failed to respond to msg=%s\n",
> +                               op_str);
> +               else
> +                       dev_err(adev->dev, "MES failed to respond to msg=%d\n",
> +                               x_pkt->header.opcode);
>
>                 while (halt_if_hws_hang)
>                         schedule();
> --
> 2.44.0
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH] drm/amdgpu/mes11: print MES opcodes rather than numbers
@ 2024-04-08 18:25 Alex Deucher
  2024-04-12  4:16 ` Alex Deucher
  0 siblings, 1 reply; 7+ messages in thread
From: Alex Deucher @ 2024-04-08 18:25 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Deucher

Makes it easier to review the logs when there are MES
errors.

v2: use dbg for emitted, add helpers for fetching strings

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 78 ++++++++++++++++++++++++--
 1 file changed, 74 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 072c478665ade..69d39ba726e12 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -100,18 +100,72 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = {
 	.insert_nop = amdgpu_ring_insert_nop,
 };
 
+static const char *mes_v11_0_opcodes[] = {
+	"MES_SCH_API_SET_HW_RSRC",
+	"MES_SCH_API_SET_SCHEDULING_CONFIG",
+	"MES_SCH_API_ADD_QUEUE"
+	"MES_SCH_API_REMOVE_QUEUE"
+	"MES_SCH_API_PERFORM_YIELD"
+	"MES_SCH_API_SET_GANG_PRIORITY_LEVEL"
+	"MES_SCH_API_SUSPEND"
+	"MES_SCH_API_RESUME"
+	"MES_SCH_API_RESET"
+	"MES_SCH_API_SET_LOG_BUFFER"
+	"MES_SCH_API_CHANGE_GANG_PRORITY"
+	"MES_SCH_API_QUERY_SCHEDULER_STATUS"
+	"MES_SCH_API_PROGRAM_GDS"
+	"MES_SCH_API_SET_DEBUG_VMID"
+	"MES_SCH_API_MISC"
+	"MES_SCH_API_UPDATE_ROOT_PAGE_TABLE"
+	"MES_SCH_API_AMD_LOG"
+};
+
+static const char *mes_v11_0_misc_opcodes[] = {
+	"MESAPI_MISC__WRITE_REG",
+	"MESAPI_MISC__INV_GART",
+	"MESAPI_MISC__QUERY_STATUS",
+	"MESAPI_MISC__READ_REG",
+	"MESAPI_MISC__WAIT_REG_MEM",
+	"MESAPI_MISC__SET_SHADER_DEBUGGER",
+};
+
+static const char *mes_v11_0_get_op_string(union MESAPI__MISC *x_pkt)
+{
+	const char *op_str = NULL;
+
+	if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
+		op_str = mes_v11_0_opcodes[x_pkt->header.opcode];
+
+	return op_str;
+}
+
+static const char *mes_v11_0_get_misc_op_string(union MESAPI__MISC *x_pkt)
+{
+	const char *op_str = NULL;
+
+	if ((x_pkt->header.opcode == MES_SCH_API_MISC) &&
+	    (x_pkt->opcode <= ARRAY_SIZE(mes_v11_0_misc_opcodes)))
+		op_str = mes_v11_0_misc_opcodes[x_pkt->opcode];
+
+	return op_str;
+}
+
 static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
 						    void *pkt, int size,
 						    int api_status_off)
 {
 	int ndw = size / 4;
 	signed long r;
-	union MESAPI__ADD_QUEUE *x_pkt = pkt;
+	union MESAPI__MISC *x_pkt = pkt;
 	struct MES_API_STATUS *api_status;
 	struct amdgpu_device *adev = mes->adev;
 	struct amdgpu_ring *ring = &mes->ring;
 	unsigned long flags;
 	signed long timeout = adev->usec_timeout;
+	const char *op_str, *misc_op_str;
+
+	if (x_pkt->header.opcode >= MES_SCH_API_MAX)
+		return -EINVAL;
 
 	if (amdgpu_emu_mode) {
 		timeout *= 100;
@@ -135,13 +189,29 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
 	amdgpu_ring_commit(ring);
 	spin_unlock_irqrestore(&mes->ring_lock, flags);
 
-	DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
+	op_str = mes_v11_0_get_op_string(x_pkt);
+	misc_op_str = mes_v11_0_get_misc_op_string(x_pkt);
+
+	if (misc_op_str)
+		dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, misc_op_str);
+	else if (op_str)
+		dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str);
+	else
+		dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode);
 
 	r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
 		      timeout);
 	if (r < 1) {
-		DRM_ERROR("MES failed to response msg=%d\n",
-			  x_pkt->header.opcode);
+
+		if (misc_op_str)
+			dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n",
+				op_str, misc_op_str);
+		else if (op_str)
+			dev_err(adev->dev, "MES failed to respond to msg=%s\n",
+				op_str);
+		else
+			dev_err(adev->dev, "MES failed to respond to msg=%d\n",
+				x_pkt->header.opcode);
 
 		while (halt_if_hws_hang)
 			schedule();
-- 
2.44.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/amdgpu/mes11: print MES opcodes rather than numbers
  2024-04-02  0:42 ` Liu, Shaoyun
@ 2024-04-04 10:02   ` Sharma, Shashank
  0 siblings, 0 replies; 7+ messages in thread
From: Sharma, Shashank @ 2024-04-04 10:02 UTC (permalink / raw)
  To: Liu, Shaoyun, Deucher, Alexander, amd-gfx

Hi Alex,

On 02/04/2024 02:42, Liu, Shaoyun wrote:
> [AMD Official Use Only - General]
>
> [AMD Official Use Only - General]
>
> Comments inline
>
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Alex Deucher
> Sent: Saturday, March 30, 2024 10:01 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
> Subject: [PATCH] drm/amdgpu/mes11: print MES opcodes rather than numbers
>
> Makes it easier to review the logs when there are MES errors.
>
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 65 ++++++++++++++++++++++++--
>   1 file changed, 61 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> index 072c478665ade..73a4bb0f5ba0f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> @@ -100,19 +100,51 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = {
>          .insert_nop = amdgpu_ring_insert_nop,
>   };
>
> +static const char *mes_v11_0_opcodes[] = {
> +       "MES_SCH_API_SET_HW_RSRC",
> +       "MES_SCH_API_SET_SCHEDULING_CONFIG",
> +       "MES_SCH_API_ADD_QUEUE"
> +       "MES_SCH_API_REMOVE_QUEUE"
> +       "MES_SCH_API_PERFORM_YIELD"
> +       "MES_SCH_API_SET_GANG_PRIORITY_LEVEL"
> +       "MES_SCH_API_SUSPEND"
> +       "MES_SCH_API_RESUME"
> +       "MES_SCH_API_RESET"
> +       "MES_SCH_API_SET_LOG_BUFFER"
> +       "MES_SCH_API_CHANGE_GANG_PRORITY"
> +       "MES_SCH_API_QUERY_SCHEDULER_STATUS"
> +       "MES_SCH_API_PROGRAM_GDS"
> +       "MES_SCH_API_SET_DEBUG_VMID"
> +       "MES_SCH_API_MISC"
> +       "MES_SCH_API_UPDATE_ROOT_PAGE_TABLE"
> +       "MES_SCH_API_AMD_LOG"
> +};
> +
> +static const char *mes_v11_0_misc_opcodes[] = {
> +       "MESAPI_MISC__WRITE_REG",
> +       "MESAPI_MISC__INV_GART",
> +       "MESAPI_MISC__QUERY_STATUS",
> +       "MESAPI_MISC__READ_REG",
> +       "MESAPI_MISC__WAIT_REG_MEM",
> +       "MESAPI_MISC__SET_SHADER_DEBUGGER",
> +};
> +
>   static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
>                                                      void *pkt, int size,
>                                                      int api_status_off)
>   {
>          int ndw = size / 4;
>          signed long r;
> -       union MESAPI__ADD_QUEUE *x_pkt = pkt;
> +       union MESAPI__MISC *x_pkt = pkt;
>          struct MES_API_STATUS *api_status;
>          struct amdgpu_device *adev = mes->adev;
>          struct amdgpu_ring *ring = &mes->ring;
>          unsigned long flags;
>          signed long timeout = adev->usec_timeout;
>
> +       if (x_pkt->header.opcode >= MES_SCH_API_MAX)
> +               return -EINVAL;
> +
>          if (amdgpu_emu_mode) {
>                  timeout *= 100;
>          } else if (amdgpu_sriov_vf(adev)) {
> @@ -135,13 +167,38 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
>          amdgpu_ring_commit(ring);
>          spin_unlock_irqrestore(&mes->ring_lock, fl
> -       DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
> +       if (x_pkt->header.opcode == MES_SCH_API_MISC) {
> +               if (x_pkt->opcode <= ARRAY_SIZE(mes_v11_0_misc_opcodes))
> +                       dev_err(adev->dev, "MES msg=%s (%s) was emitted\n",
>
> [shaoyunl]  Shouldn't  we  use DRM_DEBUG  for valid  condition ?
>
> Regards
> Shaoyun.liu
>
> +                               mes_v11_0_opcodes[x_pkt->header.opcode],
> +                               mes_v11_0_misc_opcodes[x_pkt->opcode]);
> +               else
> +                       dev_err(adev->dev, "MES msg=%s (%d) was emitted\n",
> +                               mes_v11_0_opcodes[x_pkt->header.opcode],
> +                               x_pkt->opcode);
> +       } else if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
> +               dev_err(adev->dev, "MES msg=%s was emitted\n",
> +                       mes_v11_0_opcodes[x_pkt->header.opcode]);
> +       else
> +               dev_err(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode);
>
>          r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
>                        timeout);
>          if (r < 1) {
> -               DRM_ERROR("MES failed to response msg=%d\n",
> -                         x_pkt->header.opcode);
> +               if (x_pkt->header.opcode == MES_SCH_API_MISC) {
> +                       if (x_pkt->opcode <= ARRAY_SIZE(mes_v11_0_misc_opcodes))
> +                               dev_err(adev->dev, "MES failed to response msg=%s (%s)\n",
> +                                       mes_v11_0_opcodes[x_pkt->header.opcode],
> +                                       mes_v11_0_misc_opcodes[x_pkt->opcode]);
> +                       else
> +                               dev_err(adev->dev, "MES failed to response msg=%s (%d)\n",
> +                                       mes_v11_0_opcodes[x_pkt->header.opcode], x_pkt->opcode);
> +               } else if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
> +                       dev_err(adev->dev, "MES failed to response msg=%s\n",
> +                               mes_v11_0_opcodes[x_pkt->header.opcode]);
> +               else
> +                       dev_err(adev->dev, "MES failed to response msg=%d\n",
> +                               x_pkt->header.opcode);

Please consider this small reformatting here for better readability:

static const char *
amdgpu_mes_find_op_name(union MESAPI__MISC *x_pkt)
{
         const char *op_name = NULL;

         if (x_pkt->header.opcode == MES_SCH_API_MISC) {
                 if (x_pkt->opcode <= ARRAY_SIZE(mes_v11_0_misc_opcodes))
                         op_name = mes_v11_0_misc_opcodes[x_pkt->opcode];
         } else {
                 if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
                         op_name = mes_v11_0_opcodes[x_pkt->header.opcode];
         }

         return op_name;
}

op_name = amdgpu_mes_find_op_name(x_pkt)
if (op_name)
         DRM_DEBUG_DRIVER("MES msg=%s was emitted\n", op_name);
else
         DRM_DEBUG_DRIVER("MES msg=%d was emitted\n", x_pkt->header.opcode);

r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout);
if (r < 1) {
         if (op_name)
                 dev_err("MES failed to response msg=%s \n", op_name);
         else
                 dev_err(adev->dev, "MES failed to response msg=%d\n",
                         x_pkt->header.opcode);
}

- Shashank

>                  while (halt_if_hws_hang)
>                          schedule();
> --
> 2.44.0
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH] drm/amdgpu/mes11: print MES opcodes rather than numbers
  2024-03-30 14:01 Alex Deucher
@ 2024-04-02  0:42 ` Liu, Shaoyun
  2024-04-04 10:02   ` Sharma, Shashank
  0 siblings, 1 reply; 7+ messages in thread
From: Liu, Shaoyun @ 2024-04-02  0:42 UTC (permalink / raw)
  To: Deucher, Alexander, amd-gfx; +Cc: Deucher, Alexander

[AMD Official Use Only - General]

Comments inline

-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Alex Deucher
Sent: Saturday, March 30, 2024 10:01 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
Subject: [PATCH] drm/amdgpu/mes11: print MES opcodes rather than numbers

Makes it easier to review the logs when there are MES errors.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 65 ++++++++++++++++++++++++--
 1 file changed, 61 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 072c478665ade..73a4bb0f5ba0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -100,19 +100,51 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = {
        .insert_nop = amdgpu_ring_insert_nop,
 };

+static const char *mes_v11_0_opcodes[] = {
+       "MES_SCH_API_SET_HW_RSRC",
+       "MES_SCH_API_SET_SCHEDULING_CONFIG",
+       "MES_SCH_API_ADD_QUEUE"
+       "MES_SCH_API_REMOVE_QUEUE"
+       "MES_SCH_API_PERFORM_YIELD"
+       "MES_SCH_API_SET_GANG_PRIORITY_LEVEL"
+       "MES_SCH_API_SUSPEND"
+       "MES_SCH_API_RESUME"
+       "MES_SCH_API_RESET"
+       "MES_SCH_API_SET_LOG_BUFFER"
+       "MES_SCH_API_CHANGE_GANG_PRORITY"
+       "MES_SCH_API_QUERY_SCHEDULER_STATUS"
+       "MES_SCH_API_PROGRAM_GDS"
+       "MES_SCH_API_SET_DEBUG_VMID"
+       "MES_SCH_API_MISC"
+       "MES_SCH_API_UPDATE_ROOT_PAGE_TABLE"
+       "MES_SCH_API_AMD_LOG"
+};
+
+static const char *mes_v11_0_misc_opcodes[] = {
+       "MESAPI_MISC__WRITE_REG",
+       "MESAPI_MISC__INV_GART",
+       "MESAPI_MISC__QUERY_STATUS",
+       "MESAPI_MISC__READ_REG",
+       "MESAPI_MISC__WAIT_REG_MEM",
+       "MESAPI_MISC__SET_SHADER_DEBUGGER",
+};
+
 static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
                                                    void *pkt, int size,
                                                    int api_status_off)
 {
        int ndw = size / 4;
        signed long r;
-       union MESAPI__ADD_QUEUE *x_pkt = pkt;
+       union MESAPI__MISC *x_pkt = pkt;
        struct MES_API_STATUS *api_status;
        struct amdgpu_device *adev = mes->adev;
        struct amdgpu_ring *ring = &mes->ring;
        unsigned long flags;
        signed long timeout = adev->usec_timeout;

+       if (x_pkt->header.opcode >= MES_SCH_API_MAX)
+               return -EINVAL;
+
        if (amdgpu_emu_mode) {
                timeout *= 100;
        } else if (amdgpu_sriov_vf(adev)) {
@@ -135,13 +167,38 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
        amdgpu_ring_commit(ring);
        spin_unlock_irqrestore(&mes->ring_lock, fl
-       DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
+       if (x_pkt->header.opcode == MES_SCH_API_MISC) {
+               if (x_pkt->opcode <= ARRAY_SIZE(mes_v11_0_misc_opcodes))
+                       dev_err(adev->dev, "MES msg=%s (%s) was emitted\n",

[shaoyunl]  Shouldn't  we  use DRM_DEBUG  for valid  condition ?

Regards
Shaoyun.liu

+                               mes_v11_0_opcodes[x_pkt->header.opcode],
+                               mes_v11_0_misc_opcodes[x_pkt->opcode]);
+               else
+                       dev_err(adev->dev, "MES msg=%s (%d) was emitted\n",
+                               mes_v11_0_opcodes[x_pkt->header.opcode],
+                               x_pkt->opcode);
+       } else if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
+               dev_err(adev->dev, "MES msg=%s was emitted\n",
+                       mes_v11_0_opcodes[x_pkt->header.opcode]);
+       else
+               dev_err(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode);

        r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
                      timeout);
        if (r < 1) {
-               DRM_ERROR("MES failed to response msg=%d\n",
-                         x_pkt->header.opcode);
+               if (x_pkt->header.opcode == MES_SCH_API_MISC) {
+                       if (x_pkt->opcode <= ARRAY_SIZE(mes_v11_0_misc_opcodes))
+                               dev_err(adev->dev, "MES failed to response msg=%s (%s)\n",
+                                       mes_v11_0_opcodes[x_pkt->header.opcode],
+                                       mes_v11_0_misc_opcodes[x_pkt->opcode]);
+                       else
+                               dev_err(adev->dev, "MES failed to response msg=%s (%d)\n",
+                                       mes_v11_0_opcodes[x_pkt->header.opcode], x_pkt->opcode);
+               } else if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
+                       dev_err(adev->dev, "MES failed to response msg=%s\n",
+                               mes_v11_0_opcodes[x_pkt->header.opcode]);
+               else
+                       dev_err(adev->dev, "MES failed to response msg=%d\n",
+                               x_pkt->header.opcode);

                while (halt_if_hws_hang)
                        schedule();
--
2.44.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH] drm/amdgpu/mes11: print MES opcodes rather than numbers
@ 2024-03-30 14:01 Alex Deucher
  2024-04-02  0:42 ` Liu, Shaoyun
  0 siblings, 1 reply; 7+ messages in thread
From: Alex Deucher @ 2024-03-30 14:01 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Deucher

Makes it easier to review the logs when there are MES
errors.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 65 ++++++++++++++++++++++++--
 1 file changed, 61 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 072c478665ade..73a4bb0f5ba0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -100,19 +100,51 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = {
 	.insert_nop = amdgpu_ring_insert_nop,
 };
 
+static const char *mes_v11_0_opcodes[] = {
+	"MES_SCH_API_SET_HW_RSRC",
+	"MES_SCH_API_SET_SCHEDULING_CONFIG",
+	"MES_SCH_API_ADD_QUEUE"
+	"MES_SCH_API_REMOVE_QUEUE"
+	"MES_SCH_API_PERFORM_YIELD"
+	"MES_SCH_API_SET_GANG_PRIORITY_LEVEL"
+	"MES_SCH_API_SUSPEND"
+	"MES_SCH_API_RESUME"
+	"MES_SCH_API_RESET"
+	"MES_SCH_API_SET_LOG_BUFFER"
+	"MES_SCH_API_CHANGE_GANG_PRORITY"
+	"MES_SCH_API_QUERY_SCHEDULER_STATUS"
+	"MES_SCH_API_PROGRAM_GDS"
+	"MES_SCH_API_SET_DEBUG_VMID"
+	"MES_SCH_API_MISC"
+	"MES_SCH_API_UPDATE_ROOT_PAGE_TABLE"
+	"MES_SCH_API_AMD_LOG"
+};
+
+static const char *mes_v11_0_misc_opcodes[] = {
+	"MESAPI_MISC__WRITE_REG",
+	"MESAPI_MISC__INV_GART",
+	"MESAPI_MISC__QUERY_STATUS",
+	"MESAPI_MISC__READ_REG",
+	"MESAPI_MISC__WAIT_REG_MEM",
+	"MESAPI_MISC__SET_SHADER_DEBUGGER",
+};
+
 static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
 						    void *pkt, int size,
 						    int api_status_off)
 {
 	int ndw = size / 4;
 	signed long r;
-	union MESAPI__ADD_QUEUE *x_pkt = pkt;
+	union MESAPI__MISC *x_pkt = pkt;
 	struct MES_API_STATUS *api_status;
 	struct amdgpu_device *adev = mes->adev;
 	struct amdgpu_ring *ring = &mes->ring;
 	unsigned long flags;
 	signed long timeout = adev->usec_timeout;
 
+	if (x_pkt->header.opcode >= MES_SCH_API_MAX)
+		return -EINVAL;
+
 	if (amdgpu_emu_mode) {
 		timeout *= 100;
 	} else if (amdgpu_sriov_vf(adev)) {
@@ -135,13 +167,38 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
 	amdgpu_ring_commit(ring);
 	spin_unlock_irqrestore(&mes->ring_lock, flags);
 
-	DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
+	if (x_pkt->header.opcode == MES_SCH_API_MISC) {
+		if (x_pkt->opcode <= ARRAY_SIZE(mes_v11_0_misc_opcodes))
+			dev_err(adev->dev, "MES msg=%s (%s) was emitted\n",
+				mes_v11_0_opcodes[x_pkt->header.opcode],
+				mes_v11_0_misc_opcodes[x_pkt->opcode]);
+		else
+			dev_err(adev->dev, "MES msg=%s (%d) was emitted\n",
+				mes_v11_0_opcodes[x_pkt->header.opcode],
+				x_pkt->opcode);
+	} else if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
+		dev_err(adev->dev, "MES msg=%s was emitted\n",
+			mes_v11_0_opcodes[x_pkt->header.opcode]);
+	else
+		dev_err(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode);
 
 	r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
 		      timeout);
 	if (r < 1) {
-		DRM_ERROR("MES failed to response msg=%d\n",
-			  x_pkt->header.opcode);
+		if (x_pkt->header.opcode == MES_SCH_API_MISC) {
+			if (x_pkt->opcode <= ARRAY_SIZE(mes_v11_0_misc_opcodes))
+				dev_err(adev->dev, "MES failed to response msg=%s (%s)\n",
+					mes_v11_0_opcodes[x_pkt->header.opcode],
+					mes_v11_0_misc_opcodes[x_pkt->opcode]);
+			else
+				dev_err(adev->dev, "MES failed to response msg=%s (%d)\n",
+					mes_v11_0_opcodes[x_pkt->header.opcode], x_pkt->opcode);
+		} else if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
+			dev_err(adev->dev, "MES failed to response msg=%s\n",
+				mes_v11_0_opcodes[x_pkt->header.opcode]);
+		else
+			dev_err(adev->dev, "MES failed to response msg=%d\n",
+				x_pkt->header.opcode);
 
 		while (halt_if_hws_hang)
 			schedule();
-- 
2.44.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2024-04-18 17:54 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-17 19:53 [PATCH] drm/amdgpu/mes11: print MES opcodes rather than numbers Alex Deucher
2024-04-18 17:54 ` Felix Kuehling
  -- strict thread matches above, loose matches on Subject: below --
2024-04-08 18:25 Alex Deucher
2024-04-12  4:16 ` Alex Deucher
2024-03-30 14:01 Alex Deucher
2024-04-02  0:42 ` Liu, Shaoyun
2024-04-04 10:02   ` Sharma, Shashank

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).