amd-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [bug report] drm/amdgpu/mes: use ring for kernel queue submission
@ 2022-05-09  9:07 Dan Carpenter
  0 siblings, 0 replies; 3+ messages in thread
From: Dan Carpenter @ 2022-05-09  9:07 UTC (permalink / raw)
  To: Jack.Xiao; +Cc: amd-gfx

Hello Jack Xiao,

The patch d0c423b64765: "drm/amdgpu/mes: use ring for kernel queue
submission" from Mar 27, 2020, leads to the following Smatch static
checker warning:

drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:924 amdgpu_mes_add_ring() error: format string overflow. buf_size: 16 length: 39
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:927 amdgpu_mes_add_ring() error: format string overflow. buf_size: 16 length: 43
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:930 amdgpu_mes_add_ring() error: format string overflow. buf_size: 16 length: 40

drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
    848 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
    849                         int queue_type, int idx,
    850                         struct amdgpu_mes_ctx_data *ctx_data,
    851                         struct amdgpu_ring **out)
    852 {
    853         struct amdgpu_ring *ring;
    854         struct amdgpu_mes_gang *gang;
    855         struct amdgpu_mes_queue_properties qprops = {0};
    856         int r, queue_id, pasid;
    857 
    858         /*
    859          * Avoid taking any other locks under MES lock to avoid circular
    860          * lock dependencies.
    861          */
    862         amdgpu_mes_lock(&adev->mes);
    863         gang = idr_find(&adev->mes.gang_id_idr, gang_id);
    864         if (!gang) {
    865                 DRM_ERROR("gang id %d doesn't exist\n", gang_id);
    866                 amdgpu_mes_unlock(&adev->mes);
    867                 return -EINVAL;
    868         }
    869         pasid = gang->process->pasid;
    870 
    871         ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
    872         if (!ring) {
    873                 amdgpu_mes_unlock(&adev->mes);
    874                 return -ENOMEM;
    875         }
    876 
    877         ring->ring_obj = NULL;
    878         ring->use_doorbell = true;
    879         ring->is_mes_queue = true;
    880         ring->mes_ctx = ctx_data;
    881         ring->idx = idx;
    882         ring->no_scheduler = true;
    883 
    884         if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
    885                 int offset = offsetof(struct amdgpu_mes_ctx_meta_data,
    886                                       compute[ring->idx].mec_hpd);
    887                 ring->eop_gpu_addr =
    888                         amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
    889         }
    890 
    891         switch (queue_type) {
    892         case AMDGPU_RING_TYPE_GFX:
    893                 ring->funcs = adev->gfx.gfx_ring[0].funcs;
    894                 break;
    895         case AMDGPU_RING_TYPE_COMPUTE:
    896                 ring->funcs = adev->gfx.compute_ring[0].funcs;
    897                 break;
    898         case AMDGPU_RING_TYPE_SDMA:
    899                 ring->funcs = adev->sdma.instance[0].ring.funcs;
    900                 break;
    901         default:
    902                 BUG();
    903         }
    904 
    905         r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
    906                              AMDGPU_RING_PRIO_DEFAULT, NULL);
    907         if (r)
    908                 goto clean_up_memory;
    909 
    910         amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
    911 
    912         dma_fence_wait(gang->process->vm->last_update, false);
    913         dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
    914         amdgpu_mes_unlock(&adev->mes);
    915 
    916         r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
    917         if (r)
    918                 goto clean_up_ring;
    919 
    920         ring->hw_queue_id = queue_id;
    921         ring->doorbell_index = qprops.doorbell_off;
    922 
    923         if (queue_type == AMDGPU_RING_TYPE_GFX)
--> 924                 sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id);

Using sprintf() is always ill-advised.  Better to use snprintf().

"gfx_.." 6 characters.
passid is capped at USHRT_MAX so 5 characters
gang_id is capped at INT_MAX so 10 characters
queue_id is up to 10 characters as well.
1 char for the NUL terminator

Smatch is saying that it can be 39 characters but depending on the
implementation of idr_alloc() this could reach up to 32 characters.
Still that's well past the 16 characters avaliable.

    925         else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
    926                 sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id,
    927                         queue_id);

Same

    928         else if (queue_type == AMDGPU_RING_TYPE_SDMA)
    929                 sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id,
    930                         queue_id);

Same

    931         else
    932                 BUG();
    933 
    934         *out = ring;
    935         return 0;
    936 
    937 clean_up_ring:
    938         amdgpu_ring_fini(ring);
    939 clean_up_memory:
    940         kfree(ring);
    941         amdgpu_mes_unlock(&adev->mes);
    942         return r;
    943 }

regards,
dan carpenter

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [bug report] drm/amdgpu/mes: use ring for kernel queue submission
@ 2022-10-26  9:35 Dan Carpenter
  0 siblings, 0 replies; 3+ messages in thread
From: Dan Carpenter @ 2022-10-26  9:35 UTC (permalink / raw)
  To: Jack.Xiao; +Cc: amd-gfx

Hello Jack Xiao,

The patch d0c423b64765: "drm/amdgpu/mes: use ring for kernel queue
submission" from Mar 27, 2020, leads to the following Smatch static
checker warning:

	drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:1056 amdgpu_mes_add_ring()
	error: format string overflow. buf_size: 16 length: 38 [user data]

drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
    980 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
    981                         int queue_type, int idx,
    982                         struct amdgpu_mes_ctx_data *ctx_data,
    983                         struct amdgpu_ring **out)
    984 {
    985         struct amdgpu_ring *ring;
    986         struct amdgpu_mes_gang *gang;
    987         struct amdgpu_mes_queue_properties qprops = {0};
    988         int r, queue_id, pasid;
    989 
    990         /*
    991          * Avoid taking any other locks under MES lock to avoid circular
    992          * lock dependencies.
    993          */
    994         amdgpu_mes_lock(&adev->mes);
    995         gang = idr_find(&adev->mes.gang_id_idr, gang_id);
    996         if (!gang) {
    997                 DRM_ERROR("gang id %d doesn't exist\n", gang_id);
    998                 amdgpu_mes_unlock(&adev->mes);
    999                 return -EINVAL;
    1000         }
    1001         pasid = gang->process->pasid;
    1002 
    1003         ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
    1004         if (!ring) {
    1005                 amdgpu_mes_unlock(&adev->mes);
    1006                 return -ENOMEM;
    1007         }
    1008 
    1009         ring->ring_obj = NULL;
    1010         ring->use_doorbell = true;
    1011         ring->is_mes_queue = true;
    1012         ring->mes_ctx = ctx_data;
    1013         ring->idx = idx;
    1014         ring->no_scheduler = true;
    1015 
    1016         if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
    1017                 int offset = offsetof(struct amdgpu_mes_ctx_meta_data,
    1018                                       compute[ring->idx].mec_hpd);
    1019                 ring->eop_gpu_addr =
    1020                         amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
    1021         }
    1022 
    1023         switch (queue_type) {
    1024         case AMDGPU_RING_TYPE_GFX:
    1025                 ring->funcs = adev->gfx.gfx_ring[0].funcs;
    1026                 break;
    1027         case AMDGPU_RING_TYPE_COMPUTE:
    1028                 ring->funcs = adev->gfx.compute_ring[0].funcs;
    1029                 break;
    1030         case AMDGPU_RING_TYPE_SDMA:
    1031                 ring->funcs = adev->sdma.instance[0].ring.funcs;
    1032                 break;
    1033         default:
    1034                 BUG();
    1035         }
    1036 
    1037         r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
    1038                              AMDGPU_RING_PRIO_DEFAULT, NULL);
    1039         if (r)
    1040                 goto clean_up_memory;
    1041 
    1042         amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
    1043 
    1044         dma_fence_wait(gang->process->vm->last_update, false);
    1045         dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
    1046         amdgpu_mes_unlock(&adev->mes);
    1047 
    1048         r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
    1049         if (r)
    1050                 goto clean_up_ring;
    1051 
    1052         ring->hw_queue_id = queue_id;
    1053         ring->doorbell_index = qprops.doorbell_off;
    1054 
    1055         if (queue_type == AMDGPU_RING_TYPE_GFX)
--> 1056                 sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id);

I'm not sure why this is warning now instead of in 2020.  But the bug is
definitely real.  "gang_id" is capped at INT_MAX so that can overflow
already even if the values of "pasid" and "queue_id" are zero.

Using snprintf() is safer but also probably the buffer should be larger.

    1057         else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
    1058                 sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id,
    1059                         queue_id);
    1060         else if (queue_type == AMDGPU_RING_TYPE_SDMA)
    1061                 sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id,
    1062                         queue_id);
    1063         else
    1064                 BUG();
    1065 
    1066         *out = ring;
    1067         return 0;
    1068 
    1069 clean_up_ring:
    1070         amdgpu_ring_fini(ring);
    1071 clean_up_memory:
    1072         kfree(ring);
    1073         amdgpu_mes_unlock(&adev->mes);
    1074         return r;
    1075 }

regards,
dan carpenter

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [bug report] drm/amdgpu/mes: use ring for kernel queue submission
@ 2022-05-09  7:41 Dan Carpenter
  0 siblings, 0 replies; 3+ messages in thread
From: Dan Carpenter @ 2022-05-09  7:41 UTC (permalink / raw)
  To: Jack.Xiao; +Cc: amd-gfx

Hello Jack Xiao,

The patch d0c423b64765: "drm/amdgpu/mes: use ring for kernel queue
submission" from Mar 27, 2020, leads to the following Smatch static
checker warning:

	drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:941 amdgpu_mes_add_ring()
	error: double unlocked '&adev->mes.mutex_hidden' (orig line 914)

drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
    848 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
    849                         int queue_type, int idx,
    850                         struct amdgpu_mes_ctx_data *ctx_data,
    851                         struct amdgpu_ring **out)
    852 {
    853         struct amdgpu_ring *ring;
    854         struct amdgpu_mes_gang *gang;
    855         struct amdgpu_mes_queue_properties qprops = {0};
    856         int r, queue_id, pasid;
    857 
    858         /*
    859          * Avoid taking any other locks under MES lock to avoid circular
    860          * lock dependencies.
    861          */
    862         amdgpu_mes_lock(&adev->mes);
    863         gang = idr_find(&adev->mes.gang_id_idr, gang_id);
    864         if (!gang) {
    865                 DRM_ERROR("gang id %d doesn't exist\n", gang_id);
    866                 amdgpu_mes_unlock(&adev->mes);
    867                 return -EINVAL;
    868         }
    869         pasid = gang->process->pasid;
    870 
    871         ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
    872         if (!ring) {
    873                 amdgpu_mes_unlock(&adev->mes);
    874                 return -ENOMEM;
    875         }
    876 
    877         ring->ring_obj = NULL;
    878         ring->use_doorbell = true;
    879         ring->is_mes_queue = true;
    880         ring->mes_ctx = ctx_data;
    881         ring->idx = idx;
    882         ring->no_scheduler = true;
    883 
    884         if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
    885                 int offset = offsetof(struct amdgpu_mes_ctx_meta_data,
    886                                       compute[ring->idx].mec_hpd);
    887                 ring->eop_gpu_addr =
    888                         amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
    889         }
    890 
    891         switch (queue_type) {
    892         case AMDGPU_RING_TYPE_GFX:
    893                 ring->funcs = adev->gfx.gfx_ring[0].funcs;
    894                 break;
    895         case AMDGPU_RING_TYPE_COMPUTE:
    896                 ring->funcs = adev->gfx.compute_ring[0].funcs;
    897                 break;
    898         case AMDGPU_RING_TYPE_SDMA:
    899                 ring->funcs = adev->sdma.instance[0].ring.funcs;
    900                 break;
    901         default:
    902                 BUG();
    903         }
    904 
    905         r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
    906                              AMDGPU_RING_PRIO_DEFAULT, NULL);
    907         if (r)
    908                 goto clean_up_memory;
    909 
    910         amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
    911 
    912         dma_fence_wait(gang->process->vm->last_update, false);
    913         dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
    914         amdgpu_mes_unlock(&adev->mes);

Unlocked

    915 
    916         r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
    917         if (r)
    918                 goto clean_up_ring;

Double unlocked by goto.  It's weird that this warning is only showing
up now but my guess is that this code was only just released to the
public?

    919 
    920         ring->hw_queue_id = queue_id;
    921         ring->doorbell_index = qprops.doorbell_off;
    922 
    923         if (queue_type == AMDGPU_RING_TYPE_GFX)
    924                 sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id);
    925         else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
    926                 sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id,
    927                         queue_id);
    928         else if (queue_type == AMDGPU_RING_TYPE_SDMA)
    929                 sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id,
    930                         queue_id);
    931         else
    932                 BUG();
    933 
    934         *out = ring;
    935         return 0;
    936 
    937 clean_up_ring:
    938         amdgpu_ring_fini(ring);
    939 clean_up_memory:
    940         kfree(ring);
--> 941         amdgpu_mes_unlock(&adev->mes);
    942         return r;
    943 }

regards,
dan carpenter

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-10-26  9:36 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-09  9:07 [bug report] drm/amdgpu/mes: use ring for kernel queue submission Dan Carpenter
  -- strict thread matches above, loose matches on Subject: below --
2022-10-26  9:35 Dan Carpenter
2022-05-09  7:41 Dan Carpenter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).