All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/amdgpu: add a spinlock to wb allocation
@ 2024-04-22 14:37 Alex Deucher
  2024-04-22 14:37 ` [PATCH 2/2] drm/amdgpu/mes11: Use a separate fence per transaction Alex Deucher
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Alex Deucher @ 2024-04-22 14:37 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Deucher

As we use wb slots more dynamically, we need to lock
access to avoid racing on allocation or free.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index cac0ca64367b..f87d53e183c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -502,6 +502,7 @@ struct amdgpu_wb {
 	uint64_t		gpu_addr;
 	u32			num_wb;	/* Number of wb slots actually reserved for amdgpu. */
 	unsigned long		used[DIV_ROUND_UP(AMDGPU_MAX_WB, BITS_PER_LONG)];
+	spinlock_t		lock;
 };
 
 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f8a34db5d9e3..869256394136 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1482,13 +1482,17 @@ static int amdgpu_device_wb_init(struct amdgpu_device *adev)
  */
 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
 {
-	unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
+	unsigned long flags, offset;
 
+	spin_lock_irqsave(&adev->wb.lock, flags);
+	offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
 	if (offset < adev->wb.num_wb) {
 		__set_bit(offset, adev->wb.used);
+		spin_unlock_irqrestore(&adev->wb.lock, flags);
 		*wb = offset << 3; /* convert to dw offset */
 		return 0;
 	} else {
+		spin_unlock_irqrestore(&adev->wb.lock, flags);
 		return -EINVAL;
 	}
 }
@@ -1503,9 +1507,13 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
  */
 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
 {
+	unsigned long flags;
+
 	wb >>= 3;
+	spin_lock_irqsave(&adev->wb.lock, flags);
 	if (wb < adev->wb.num_wb)
 		__clear_bit(wb, adev->wb.used);
+	spin_unlock_irqrestore(&adev->wb.lock, flags);
 }
 
 /**
@@ -4061,6 +4069,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	spin_lock_init(&adev->se_cac_idx_lock);
 	spin_lock_init(&adev->audio_endpt_idx_lock);
 	spin_lock_init(&adev->mm_stats.lock);
+	spin_lock_init(&adev->wb.lock);
 
 	INIT_LIST_HEAD(&adev->shadow_list);
 	mutex_init(&adev->shadow_list_lock);
-- 
2.44.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/2] drm/amdgpu/mes11: Use a separate fence per transaction
  2024-04-22 14:37 [PATCH 1/2] drm/amdgpu: add a spinlock to wb allocation Alex Deucher
@ 2024-04-22 14:37 ` Alex Deucher
  2024-04-22 18:56 ` [PATCH 1/2] drm/amdgpu: add a spinlock to wb allocation Liu, Shaoyun
  2024-04-23  6:12 ` Christian König
  2 siblings, 0 replies; 7+ messages in thread
From: Alex Deucher @ 2024-04-22 14:37 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Deucher

We can't use a shared fence location because each transaction
should be considered independently.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 12 ++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h |  4 ++++
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c  | 21 +++++++++++++++++----
 3 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 78e4f88f5134..92c6fae780f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -32,6 +32,18 @@
 #define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
 #define AMDGPU_ONE_DOORBELL_SIZE 8
 
+signed long amdgpu_mes_fence_wait_polling(u64 *fence,
+					  u64 wait_seq,
+					  signed long timeout)
+{
+
+	while ((s64)(wait_seq - *fence) > 0 && timeout > 0) {
+		udelay(2);
+		timeout -= 2;
+	}
+	return timeout > 0 ? timeout : 0;
+}
+
 int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev)
 {
 	return roundup(AMDGPU_ONE_DOORBELL_SIZE *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 6b3e1844eac5..b99a2b3cffe3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -340,6 +340,10 @@ struct amdgpu_mes_funcs {
 #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
 #define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev))
 
+signed long amdgpu_mes_fence_wait_polling(u64 *fence,
+					  u64 wait_seq,
+					  signed long timeout);
+
 int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs);
 
 int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe);
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index eb25af46622e..09193aee71c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -163,6 +163,10 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
 	unsigned long flags;
 	signed long timeout = 3000000; /* 3000 ms */
 	const char *op_str, *misc_op_str;
+	u32 fence_offset;
+	u64 fence_gpu_addr;
+	u64 *fence_ptr;
+	int ret;
 
 	if (x_pkt->header.opcode >= MES_SCH_API_MAX)
 		return -EINVAL;
@@ -175,15 +179,24 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
 	}
 	BUG_ON(size % 4 != 0);
 
+	ret = amdgpu_device_wb_get(adev, &fence_offset);
+	if (ret)
+		return ret;
+	fence_gpu_addr =
+		adev->wb.gpu_addr + (fence_offset * 4);
+	fence_ptr = (u64 *)&adev->wb.wb[fence_offset];
+	*fence_ptr = 0;
+
 	spin_lock_irqsave(&mes->ring_lock, flags);
 	if (amdgpu_ring_alloc(ring, ndw)) {
 		spin_unlock_irqrestore(&mes->ring_lock, flags);
+		amdgpu_device_wb_free(adev, fence_offset);
 		return -ENOMEM;
 	}
 
 	api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
-	api_status->api_completion_fence_addr = mes->ring.fence_drv.gpu_addr;
-	api_status->api_completion_fence_value = ++mes->ring.fence_drv.sync_seq;
+	api_status->api_completion_fence_addr = fence_gpu_addr;
+	api_status->api_completion_fence_value = 1;
 
 	amdgpu_ring_write_multiple(ring, pkt, ndw);
 	amdgpu_ring_commit(ring);
@@ -199,8 +212,8 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
 	else
 		dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode);
 
-	r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
-		      timeout);
+	r = amdgpu_mes_fence_wait_polling(fence_ptr, (u64)1, timeout);
+	amdgpu_device_wb_free(adev, fence_offset);
 	if (r < 1) {
 
 		if (misc_op_str)
-- 
2.44.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* RE: [PATCH 1/2] drm/amdgpu: add a spinlock to wb allocation
  2024-04-22 14:37 [PATCH 1/2] drm/amdgpu: add a spinlock to wb allocation Alex Deucher
  2024-04-22 14:37 ` [PATCH 2/2] drm/amdgpu/mes11: Use a separate fence per transaction Alex Deucher
@ 2024-04-22 18:56 ` Liu, Shaoyun
  2024-04-23  6:12 ` Christian König
  2 siblings, 0 replies; 7+ messages in thread
From: Liu, Shaoyun @ 2024-04-22 18:56 UTC (permalink / raw)
  To: Deucher, Alexander, amd-gfx; +Cc: Deucher, Alexander

[AMD Official Use Only - General]

These two patches Looks good to me .

Reviewed by Shaoyun.liu <shaoyunl@amd.com>

-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Alex Deucher
Sent: Monday, April 22, 2024 10:38 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
Subject: [PATCH 1/2] drm/amdgpu: add a spinlock to wb allocation

As we use wb slots more dynamically, we need to lock access to avoid racing on allocation or free.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index cac0ca64367b..f87d53e183c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -502,6 +502,7 @@ struct amdgpu_wb {
        uint64_t                gpu_addr;
        u32                     num_wb; /* Number of wb slots actually reserved for amdgpu. */
        unsigned long           used[DIV_ROUND_UP(AMDGPU_MAX_WB, BITS_PER_LONG)];
+       spinlock_t              lock;
 };

 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f8a34db5d9e3..869256394136 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1482,13 +1482,17 @@ static int amdgpu_device_wb_init(struct amdgpu_device *adev)
  */
 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)  {
-       unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
+       unsigned long flags, offset;

+       spin_lock_irqsave(&adev->wb.lock, flags);
+       offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
        if (offset < adev->wb.num_wb) {
                __set_bit(offset, adev->wb.used);
+               spin_unlock_irqrestore(&adev->wb.lock, flags);
                *wb = offset << 3; /* convert to dw offset */
                return 0;
        } else {
+               spin_unlock_irqrestore(&adev->wb.lock, flags);
                return -EINVAL;
        }
 }
@@ -1503,9 +1507,13 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
  */
 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)  {
+       unsigned long flags;
+
        wb >>= 3;
+       spin_lock_irqsave(&adev->wb.lock, flags);
        if (wb < adev->wb.num_wb)
                __clear_bit(wb, adev->wb.used);
+       spin_unlock_irqrestore(&adev->wb.lock, flags);
 }

 /**
@@ -4061,6 +4069,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        spin_lock_init(&adev->se_cac_idx_lock);
        spin_lock_init(&adev->audio_endpt_idx_lock);
        spin_lock_init(&adev->mm_stats.lock);
+       spin_lock_init(&adev->wb.lock);

        INIT_LIST_HEAD(&adev->shadow_list);
        mutex_init(&adev->shadow_list_lock);
--
2.44.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: add a spinlock to wb allocation
  2024-04-22 14:37 [PATCH 1/2] drm/amdgpu: add a spinlock to wb allocation Alex Deucher
  2024-04-22 14:37 ` [PATCH 2/2] drm/amdgpu/mes11: Use a separate fence per transaction Alex Deucher
  2024-04-22 18:56 ` [PATCH 1/2] drm/amdgpu: add a spinlock to wb allocation Liu, Shaoyun
@ 2024-04-23  6:12 ` Christian König
  2024-04-23 13:18   ` Alex Deucher
  2 siblings, 1 reply; 7+ messages in thread
From: Christian König @ 2024-04-23  6:12 UTC (permalink / raw)
  To: Alex Deucher, amd-gfx

Am 22.04.24 um 16:37 schrieb Alex Deucher:
> As we use wb slots more dynamically, we need to lock
> access to avoid racing on allocation or free.

Wait a second. Why are we using the wb slots dynamically?

The number of slots made available is statically calculated, when this 
is suddenly used dynamically we have quite a bug here.

Regards,
Christian.

>
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++++++++++-
>   2 files changed, 11 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index cac0ca64367b..f87d53e183c3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -502,6 +502,7 @@ struct amdgpu_wb {
>   	uint64_t		gpu_addr;
>   	u32			num_wb;	/* Number of wb slots actually reserved for amdgpu. */
>   	unsigned long		used[DIV_ROUND_UP(AMDGPU_MAX_WB, BITS_PER_LONG)];
> +	spinlock_t		lock;
>   };
>   
>   int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index f8a34db5d9e3..869256394136 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -1482,13 +1482,17 @@ static int amdgpu_device_wb_init(struct amdgpu_device *adev)
>    */
>   int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
>   {
> -	unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
> +	unsigned long flags, offset;
>   
> +	spin_lock_irqsave(&adev->wb.lock, flags);
> +	offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
>   	if (offset < adev->wb.num_wb) {
>   		__set_bit(offset, adev->wb.used);
> +		spin_unlock_irqrestore(&adev->wb.lock, flags);
>   		*wb = offset << 3; /* convert to dw offset */
>   		return 0;
>   	} else {
> +		spin_unlock_irqrestore(&adev->wb.lock, flags);
>   		return -EINVAL;
>   	}
>   }
> @@ -1503,9 +1507,13 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
>    */
>   void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
>   {
> +	unsigned long flags;
> +
>   	wb >>= 3;
> +	spin_lock_irqsave(&adev->wb.lock, flags);
>   	if (wb < adev->wb.num_wb)
>   		__clear_bit(wb, adev->wb.used);
> +	spin_unlock_irqrestore(&adev->wb.lock, flags);
>   }
>   
>   /**
> @@ -4061,6 +4069,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   	spin_lock_init(&adev->se_cac_idx_lock);
>   	spin_lock_init(&adev->audio_endpt_idx_lock);
>   	spin_lock_init(&adev->mm_stats.lock);
> +	spin_lock_init(&adev->wb.lock);
>   
>   	INIT_LIST_HEAD(&adev->shadow_list);
>   	mutex_init(&adev->shadow_list_lock);


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: add a spinlock to wb allocation
  2024-04-23  6:12 ` Christian König
@ 2024-04-23 13:18   ` Alex Deucher
  2024-04-23 13:58     ` Christian König
  0 siblings, 1 reply; 7+ messages in thread
From: Alex Deucher @ 2024-04-23 13:18 UTC (permalink / raw)
  To: Christian König; +Cc: Alex Deucher, amd-gfx

On Tue, Apr 23, 2024 at 2:57 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Am 22.04.24 um 16:37 schrieb Alex Deucher:
> > As we use wb slots more dynamically, we need to lock
> > access to avoid racing on allocation or free.
>
> Wait a second. Why are we using the wb slots dynamically?
>

See patch 2.  I needed a way to allocate small GPU accessible memory
locations on the fly.  Using WB seems like a good solution.

Alex

> The number of slots made available is statically calculated, when this
> is suddenly used dynamically we have quite a bug here.
>
> Regards,
> Christian.
>
> >
> > Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++++++++++-
> >   2 files changed, 11 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > index cac0ca64367b..f87d53e183c3 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > @@ -502,6 +502,7 @@ struct amdgpu_wb {
> >       uint64_t                gpu_addr;
> >       u32                     num_wb; /* Number of wb slots actually reserved for amdgpu. */
> >       unsigned long           used[DIV_ROUND_UP(AMDGPU_MAX_WB, BITS_PER_LONG)];
> > +     spinlock_t              lock;
> >   };
> >
> >   int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > index f8a34db5d9e3..869256394136 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > @@ -1482,13 +1482,17 @@ static int amdgpu_device_wb_init(struct amdgpu_device *adev)
> >    */
> >   int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
> >   {
> > -     unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
> > +     unsigned long flags, offset;
> >
> > +     spin_lock_irqsave(&adev->wb.lock, flags);
> > +     offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
> >       if (offset < adev->wb.num_wb) {
> >               __set_bit(offset, adev->wb.used);
> > +             spin_unlock_irqrestore(&adev->wb.lock, flags);
> >               *wb = offset << 3; /* convert to dw offset */
> >               return 0;
> >       } else {
> > +             spin_unlock_irqrestore(&adev->wb.lock, flags);
> >               return -EINVAL;
> >       }
> >   }
> > @@ -1503,9 +1507,13 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
> >    */
> >   void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
> >   {
> > +     unsigned long flags;
> > +
> >       wb >>= 3;
> > +     spin_lock_irqsave(&adev->wb.lock, flags);
> >       if (wb < adev->wb.num_wb)
> >               __clear_bit(wb, adev->wb.used);
> > +     spin_unlock_irqrestore(&adev->wb.lock, flags);
> >   }
> >
> >   /**
> > @@ -4061,6 +4069,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> >       spin_lock_init(&adev->se_cac_idx_lock);
> >       spin_lock_init(&adev->audio_endpt_idx_lock);
> >       spin_lock_init(&adev->mm_stats.lock);
> > +     spin_lock_init(&adev->wb.lock);
> >
> >       INIT_LIST_HEAD(&adev->shadow_list);
> >       mutex_init(&adev->shadow_list_lock);
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: add a spinlock to wb allocation
  2024-04-23 13:18   ` Alex Deucher
@ 2024-04-23 13:58     ` Christian König
  2024-04-23 14:09       ` Alex Deucher
  0 siblings, 1 reply; 7+ messages in thread
From: Christian König @ 2024-04-23 13:58 UTC (permalink / raw)
  To: Alex Deucher; +Cc: Alex Deucher, amd-gfx

Am 23.04.24 um 15:18 schrieb Alex Deucher:
> On Tue, Apr 23, 2024 at 2:57 AM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> Am 22.04.24 um 16:37 schrieb Alex Deucher:
>>> As we use wb slots more dynamically, we need to lock
>>> access to avoid racing on allocation or free.
>> Wait a second. Why are we using the wb slots dynamically?
>>
> See patch 2.  I needed a way to allocate small GPU accessible memory
> locations on the fly.  Using WB seems like a good solution.

That's probably better done with the seq64 allocator. At least the 
original idea was that it is self containing and can be used by many 
threads at the same time.

Apart from that I really think we need to talk with the MES guys about 
changing that behavior ASAP. This is really a bug we need to fix and not 
work around like that.

Christian.

>
> Alex
>
>> The number of slots made available is statically calculated, when this
>> is suddenly used dynamically we have quite a bug here.
>>
>> Regards,
>> Christian.
>>
>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++++++++++-
>>>    2 files changed, 11 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index cac0ca64367b..f87d53e183c3 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -502,6 +502,7 @@ struct amdgpu_wb {
>>>        uint64_t                gpu_addr;
>>>        u32                     num_wb; /* Number of wb slots actually reserved for amdgpu. */
>>>        unsigned long           used[DIV_ROUND_UP(AMDGPU_MAX_WB, BITS_PER_LONG)];
>>> +     spinlock_t              lock;
>>>    };
>>>
>>>    int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index f8a34db5d9e3..869256394136 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -1482,13 +1482,17 @@ static int amdgpu_device_wb_init(struct amdgpu_device *adev)
>>>     */
>>>    int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
>>>    {
>>> -     unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
>>> +     unsigned long flags, offset;
>>>
>>> +     spin_lock_irqsave(&adev->wb.lock, flags);
>>> +     offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
>>>        if (offset < adev->wb.num_wb) {
>>>                __set_bit(offset, adev->wb.used);
>>> +             spin_unlock_irqrestore(&adev->wb.lock, flags);
>>>                *wb = offset << 3; /* convert to dw offset */
>>>                return 0;
>>>        } else {
>>> +             spin_unlock_irqrestore(&adev->wb.lock, flags);
>>>                return -EINVAL;
>>>        }
>>>    }
>>> @@ -1503,9 +1507,13 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
>>>     */
>>>    void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
>>>    {
>>> +     unsigned long flags;
>>> +
>>>        wb >>= 3;
>>> +     spin_lock_irqsave(&adev->wb.lock, flags);
>>>        if (wb < adev->wb.num_wb)
>>>                __clear_bit(wb, adev->wb.used);
>>> +     spin_unlock_irqrestore(&adev->wb.lock, flags);
>>>    }
>>>
>>>    /**
>>> @@ -4061,6 +4069,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>>        spin_lock_init(&adev->se_cac_idx_lock);
>>>        spin_lock_init(&adev->audio_endpt_idx_lock);
>>>        spin_lock_init(&adev->mm_stats.lock);
>>> +     spin_lock_init(&adev->wb.lock);
>>>
>>>        INIT_LIST_HEAD(&adev->shadow_list);
>>>        mutex_init(&adev->shadow_list_lock);


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: add a spinlock to wb allocation
  2024-04-23 13:58     ` Christian König
@ 2024-04-23 14:09       ` Alex Deucher
  0 siblings, 0 replies; 7+ messages in thread
From: Alex Deucher @ 2024-04-23 14:09 UTC (permalink / raw)
  To: Christian König; +Cc: Alex Deucher, amd-gfx

On Tue, Apr 23, 2024 at 9:58 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Am 23.04.24 um 15:18 schrieb Alex Deucher:
> > On Tue, Apr 23, 2024 at 2:57 AM Christian König
> > <ckoenig.leichtzumerken@gmail.com> wrote:
> >> Am 22.04.24 um 16:37 schrieb Alex Deucher:
> >>> As we use wb slots more dynamically, we need to lock
> >>> access to avoid racing on allocation or free.
> >> Wait a second. Why are we using the wb slots dynamically?
> >>
> > See patch 2.  I needed a way to allocate small GPU accessible memory
> > locations on the fly.  Using WB seems like a good solution.
>
> That's probably better done with the seq64 allocator. At least the
> original idea was that it is self containing and can be used by many
> threads at the same time.

Sure, but that is mapped into the user VMs and has sequence numbers
handling which we don't really need for handling the MES.  It also
makes seq64 a requirement for this fix which makes it harder to port
back to older kernels.

Alex

>
> Apart from that I really think we need to talk with the MES guys about
> changing that behavior ASAP. This is really a bug we need to fix and not
> work around like that.
>
> Christian.
>
> >
> > Alex
> >
> >> The number of slots made available is statically calculated, when this
> >> is suddenly used dynamically we have quite a bug here.
> >>
> >> Regards,
> >> Christian.
> >>
> >>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> >>> ---
> >>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
> >>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++++++++++-
> >>>    2 files changed, 11 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> index cac0ca64367b..f87d53e183c3 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> @@ -502,6 +502,7 @@ struct amdgpu_wb {
> >>>        uint64_t                gpu_addr;
> >>>        u32                     num_wb; /* Number of wb slots actually reserved for amdgpu. */
> >>>        unsigned long           used[DIV_ROUND_UP(AMDGPU_MAX_WB, BITS_PER_LONG)];
> >>> +     spinlock_t              lock;
> >>>    };
> >>>
> >>>    int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> index f8a34db5d9e3..869256394136 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> @@ -1482,13 +1482,17 @@ static int amdgpu_device_wb_init(struct amdgpu_device *adev)
> >>>     */
> >>>    int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
> >>>    {
> >>> -     unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
> >>> +     unsigned long flags, offset;
> >>>
> >>> +     spin_lock_irqsave(&adev->wb.lock, flags);
> >>> +     offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
> >>>        if (offset < adev->wb.num_wb) {
> >>>                __set_bit(offset, adev->wb.used);
> >>> +             spin_unlock_irqrestore(&adev->wb.lock, flags);
> >>>                *wb = offset << 3; /* convert to dw offset */
> >>>                return 0;
> >>>        } else {
> >>> +             spin_unlock_irqrestore(&adev->wb.lock, flags);
> >>>                return -EINVAL;
> >>>        }
> >>>    }
> >>> @@ -1503,9 +1507,13 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
> >>>     */
> >>>    void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
> >>>    {
> >>> +     unsigned long flags;
> >>> +
> >>>        wb >>= 3;
> >>> +     spin_lock_irqsave(&adev->wb.lock, flags);
> >>>        if (wb < adev->wb.num_wb)
> >>>                __clear_bit(wb, adev->wb.used);
> >>> +     spin_unlock_irqrestore(&adev->wb.lock, flags);
> >>>    }
> >>>
> >>>    /**
> >>> @@ -4061,6 +4069,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> >>>        spin_lock_init(&adev->se_cac_idx_lock);
> >>>        spin_lock_init(&adev->audio_endpt_idx_lock);
> >>>        spin_lock_init(&adev->mm_stats.lock);
> >>> +     spin_lock_init(&adev->wb.lock);
> >>>
> >>>        INIT_LIST_HEAD(&adev->shadow_list);
> >>>        mutex_init(&adev->shadow_list_lock);
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2024-04-23 14:09 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-22 14:37 [PATCH 1/2] drm/amdgpu: add a spinlock to wb allocation Alex Deucher
2024-04-22 14:37 ` [PATCH 2/2] drm/amdgpu/mes11: Use a separate fence per transaction Alex Deucher
2024-04-22 18:56 ` [PATCH 1/2] drm/amdgpu: add a spinlock to wb allocation Liu, Shaoyun
2024-04-23  6:12 ` Christian König
2024-04-23 13:18   ` Alex Deucher
2024-04-23 13:58     ` Christian König
2024-04-23 14:09       ` Alex Deucher

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.