amd-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Set the 64-bit address of the wptr of SDMA doorbell properly
@ 2022-04-25  9:15 Haohui Mai
  2022-04-25 11:01 ` Christian König
  0 siblings, 1 reply; 4+ messages in thread
From: Haohui Mai @ 2022-04-25  9:15 UTC (permalink / raw)
  To: amd-gfx; +Cc: ckoenig.leichtzumerken, emily.deng

Computing the address of the doorbell should be done before instead of after
separating the 64-bit address into the higher and lower half. The
current code sets the MMIO registers incorrectly if the address of the
doorbell is above 1G.

Signed-off-by: Haohui Mai <ricetons@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/cik_sdma.c  | 4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 8 ++++----
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++----
 drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++----
 drivers/gpu/drm/amd/amdgpu/si_dma.c    | 4 ++--
 7 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index c8ebd108548d..df863d346995 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -195,7 +195,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
  struct amdgpu_device *adev = ring->adev;

  WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
-        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
+        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
 }

 static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
@@ -487,7 +487,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
  WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);

  ring->wptr = 0;
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));

  /* enable DMA RB */
  WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i],
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 1d8bbcbd7a37..b83fd00466fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -223,7 +223,7 @@ static void sdma_v2_4_ring_set_wptr(struct
amdgpu_ring *ring)
 {
  struct amdgpu_device *adev = ring->adev;

- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
lower_32_bits(ring->wptr) << 2);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
lower_32_bits(ring->wptr << 2));
 }

 static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
@@ -465,7 +465,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
  WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);

  ring->wptr = 0;
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));

  /* enable DMA RB */
  rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 4ef4feff5649..557a7d5174b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -389,14 +389,14 @@ static void sdma_v3_0_ring_set_wptr(struct
amdgpu_ring *ring)
  if (ring->use_doorbell) {
  u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
  /* XXX check if swapping is necessary on BE */
- WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
- WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2);
+ WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr << 2));
  } else if (ring->use_pollmem) {
  u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];

- WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
+ WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
  } else {
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
lower_32_bits(ring->wptr) << 2);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
lower_32_bits(ring->wptr << 2));
  }
 }

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index d7e8f7232364..ff86c43b63d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct
amdgpu_ring *ring)

  DRM_DEBUG("Using doorbell -- "
  "wptr_offs == 0x%08x "
- "lower_32_bits(ring->wptr) << 2 == 0x%08x "
- "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ "lower_32_bits(ring->wptr << 2) == 0x%08x "
+ "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
  ring->wptr_offs,
  lower_32_bits(ring->wptr << 2),
  upper_32_bits(ring->wptr << 2));
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index a8d49c005f73..627eb1f147c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct
amdgpu_ring *ring)
  if (ring->use_doorbell) {
  DRM_DEBUG("Using doorbell -- "
  "wptr_offs == 0x%08x "
- "lower_32_bits(ring->wptr) << 2 == 0x%08x "
- "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ "lower_32_bits(ring->wptr << 2) == 0x%08x "
+ "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
  ring->wptr_offs,
  lower_32_bits(ring->wptr << 2),
  upper_32_bits(ring->wptr << 2));
@@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)

  if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
for wptr */
  WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
-        lower_32_bits(ring->wptr) << 2);
+        lower_32_bits(ring->wptr << 2));
  WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
-        upper_32_bits(ring->wptr) << 2);
+        upper_32_bits(ring->wptr << 2));
  }

  doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
mmSDMA0_GFX_DOORBELL));
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 824eace69884..a5eb82bfeaa8 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct
amdgpu_ring *ring)
  if (ring->use_doorbell) {
  DRM_DEBUG("Using doorbell -- "
  "wptr_offs == 0x%08x "
- "lower_32_bits(ring->wptr) << 2 == 0x%08x "
- "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ "lower_32_bits(ring->wptr << 2) == 0x%08x "
+ "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
  ring->wptr_offs,
  lower_32_bits(ring->wptr << 2),
  upper_32_bits(ring->wptr << 2));
@@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
  WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);

  if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
for wptr */
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
lower_32_bits(ring->wptr) << 2);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
upper_32_bits(ring->wptr) << 2);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
lower_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
upper_32_bits(ring->wptr << 2));
  }

  doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
mmSDMA0_GFX_DOORBELL));
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c
b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index 195b45bcb8ad..0af11d3b00e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -57,7 +57,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
  u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;

  WREG32(DMA_RB_WPTR + sdma_offsets[me],
-        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
+        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
 }

 static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
@@ -175,7 +175,7 @@ static int si_dma_start(struct amdgpu_device *adev)
  WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl);

  ring->wptr = 0;
- WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
+ WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
  WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);

  ring->sched.ready = true;
--
2.25.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] Set the 64-bit address of the wptr of SDMA doorbell properly
  2022-04-25  9:15 [PATCH] Set the 64-bit address of the wptr of SDMA doorbell properly Haohui Mai
@ 2022-04-25 11:01 ` Christian König
  2022-04-25 11:14   ` Haohui Mai
  0 siblings, 1 reply; 4+ messages in thread
From: Christian König @ 2022-04-25 11:01 UTC (permalink / raw)
  To: Haohui Mai, amd-gfx; +Cc: emily.deng

Am 25.04.22 um 11:15 schrieb Haohui Mai:
> Computing the address of the doorbell should be done before instead of after
> separating the 64-bit address into the higher and lower half. The
> current code sets the MMIO registers incorrectly if the address of the
> doorbell is above 1G.

That doesn't make any sense at all. The address of the doorbell is 
completely irrelevant to the value you write into it.

What we could do is to stop using the lower_32_bits() function, since 
the WPTR can't handle more than 16, 20 or 24 bits (IIRC) depending on hw 
generation anyway.

Regards,
Christian.

>
> Signed-off-by: Haohui Mai <ricetons@gmail.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/cik_sdma.c  | 4 ++--
>   drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++--
>   drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 8 ++++----
>   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++----
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++----
>   drivers/gpu/drm/amd/amdgpu/si_dma.c    | 4 ++--
>   7 files changed, 20 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> index c8ebd108548d..df863d346995 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> @@ -195,7 +195,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
>    struct amdgpu_device *adev = ring->adev;
>
>    WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> -        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
> +        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
>   }
>
>   static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> @@ -487,7 +487,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
>    WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
>
>    ring->wptr = 0;
> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
>
>    /* enable DMA RB */
>    WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i],
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> index 1d8bbcbd7a37..b83fd00466fe 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> @@ -223,7 +223,7 @@ static void sdma_v2_4_ring_set_wptr(struct
> amdgpu_ring *ring)
>   {
>    struct amdgpu_device *adev = ring->adev;
>
> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> lower_32_bits(ring->wptr) << 2);
> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> lower_32_bits(ring->wptr << 2));
>   }
>
>   static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> @@ -465,7 +465,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
>    WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
>
>    ring->wptr = 0;
> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
>
>    /* enable DMA RB */
>    rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> index 4ef4feff5649..557a7d5174b0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> @@ -389,14 +389,14 @@ static void sdma_v3_0_ring_set_wptr(struct
> amdgpu_ring *ring)
>    if (ring->use_doorbell) {
>    u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
>    /* XXX check if swapping is necessary on BE */
> - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
> - WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2);
> + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
> + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr << 2));
>    } else if (ring->use_pollmem) {
>    u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
>
> - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
> + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
>    } else {
> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> lower_32_bits(ring->wptr) << 2);
> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> lower_32_bits(ring->wptr << 2));
>    }
>   }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index d7e8f7232364..ff86c43b63d1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct
> amdgpu_ring *ring)
>
>    DRM_DEBUG("Using doorbell -- "
>    "wptr_offs == 0x%08x "
> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>    ring->wptr_offs,
>    lower_32_bits(ring->wptr << 2),
>    upper_32_bits(ring->wptr << 2));
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index a8d49c005f73..627eb1f147c2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct
> amdgpu_ring *ring)
>    if (ring->use_doorbell) {
>    DRM_DEBUG("Using doorbell -- "
>    "wptr_offs == 0x%08x "
> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>    ring->wptr_offs,
>    lower_32_bits(ring->wptr << 2),
>    upper_32_bits(ring->wptr << 2));
> @@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
>
>    if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
> for wptr */
>    WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
> -        lower_32_bits(ring->wptr) << 2);
> +        lower_32_bits(ring->wptr << 2));
>    WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
> -        upper_32_bits(ring->wptr) << 2);
> +        upper_32_bits(ring->wptr << 2));
>    }
>
>    doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
> mmSDMA0_GFX_DOORBELL));
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> index 824eace69884..a5eb82bfeaa8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> @@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct
> amdgpu_ring *ring)
>    if (ring->use_doorbell) {
>    DRM_DEBUG("Using doorbell -- "
>    "wptr_offs == 0x%08x "
> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>    ring->wptr_offs,
>    lower_32_bits(ring->wptr << 2),
>    upper_32_bits(ring->wptr << 2));
> @@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
>    WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
> mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
>
>    if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
> for wptr */
> - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
> lower_32_bits(ring->wptr) << 2);
> - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
> upper_32_bits(ring->wptr) << 2);
> + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
> lower_32_bits(ring->wptr << 2));
> + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
> upper_32_bits(ring->wptr << 2));
>    }
>
>    doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
> mmSDMA0_GFX_DOORBELL));
> diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c
> b/drivers/gpu/drm/amd/amdgpu/si_dma.c
> index 195b45bcb8ad..0af11d3b00e7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
> @@ -57,7 +57,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
>    u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
>
>    WREG32(DMA_RB_WPTR + sdma_offsets[me],
> -        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
> +        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
>   }
>
>   static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
> @@ -175,7 +175,7 @@ static int si_dma_start(struct amdgpu_device *adev)
>    WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl);
>
>    ring->wptr = 0;
> - WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
> + WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
>    WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
>
>    ring->sched.ready = true;
> --
> 2.25.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Set the 64-bit address of the wptr of SDMA doorbell properly
  2022-04-25 11:01 ` Christian König
@ 2022-04-25 11:14   ` Haohui Mai
  2022-04-25 11:20     ` Christian König
  0 siblings, 1 reply; 4+ messages in thread
From: Haohui Mai @ 2022-04-25 11:14 UTC (permalink / raw)
  To: Christian König; +Cc: emily.deng, amd-gfx

Sorry for the confusion. I misread the code, but it still seems to me
it is a valid issue. What the patch tries to do is to fix the
following pattern:

-                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
-                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
+                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));

I agree with you that ring->wptr is an offset to the ring. Just
looking at the above lines it seems that they are incorrect when
ring->wptr is larger than 1GB.

As you pointed out that ring->wptr cannot be larger than (1 << 24), it
can be resolved via either (1) fixing the patterns in the provided
patch, or (2) clamping the results to (1 << 24) - 1 and getting rid of
lower_32_bits() / higher_32_bits() at all.

What's your suggestion of moving forward?

Thanks,
Haohui

On Mon, Apr 25, 2022 at 7:02 PM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Am 25.04.22 um 11:15 schrieb Haohui Mai:
> > Computing the address of the doorbell should be done before instead of after
> > separating the 64-bit address into the higher and lower half. The
> > current code sets the MMIO registers incorrectly if the address of the
> > doorbell is above 1G.
>
> That doesn't make any sense at all. The address of the doorbell is
> completely irrelevant to the value you write into it.
>
> What we could do is to stop using the lower_32_bits() function, since
> the WPTR can't handle more than 16, 20 or 24 bits (IIRC) depending on hw
> generation anyway.
>
> Regards,
> Christian.
>
> >
> > Signed-off-by: Haohui Mai <ricetons@gmail.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/cik_sdma.c  | 4 ++--
> >   drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++--
> >   drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 8 ++++----
> >   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
> >   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++----
> >   drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++----
> >   drivers/gpu/drm/amd/amdgpu/si_dma.c    | 4 ++--
> >   7 files changed, 20 insertions(+), 20 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> > b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> > index c8ebd108548d..df863d346995 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> > @@ -195,7 +195,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
> >    struct amdgpu_device *adev = ring->adev;
> >
> >    WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> > -        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
> > +        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
> >   }
> >
> >   static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> > @@ -487,7 +487,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
> >    WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
> >
> >    ring->wptr = 0;
> > - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
> > + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
> >
> >    /* enable DMA RB */
> >    WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i],
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> > index 1d8bbcbd7a37..b83fd00466fe 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> > @@ -223,7 +223,7 @@ static void sdma_v2_4_ring_set_wptr(struct
> > amdgpu_ring *ring)
> >   {
> >    struct amdgpu_device *adev = ring->adev;
> >
> > - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> > lower_32_bits(ring->wptr) << 2);
> > + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> > lower_32_bits(ring->wptr << 2));
> >   }
> >
> >   static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> > @@ -465,7 +465,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
> >    WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
> >
> >    ring->wptr = 0;
> > - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
> > + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
> >
> >    /* enable DMA RB */
> >    rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> > index 4ef4feff5649..557a7d5174b0 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> > @@ -389,14 +389,14 @@ static void sdma_v3_0_ring_set_wptr(struct
> > amdgpu_ring *ring)
> >    if (ring->use_doorbell) {
> >    u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
> >    /* XXX check if swapping is necessary on BE */
> > - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
> > - WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2);
> > + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
> > + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr << 2));
> >    } else if (ring->use_pollmem) {
> >    u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
> >
> > - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
> > + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
> >    } else {
> > - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> > lower_32_bits(ring->wptr) << 2);
> > + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> > lower_32_bits(ring->wptr << 2));
> >    }
> >   }
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > index d7e8f7232364..ff86c43b63d1 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > @@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct
> > amdgpu_ring *ring)
> >
> >    DRM_DEBUG("Using doorbell -- "
> >    "wptr_offs == 0x%08x "
> > - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> > - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> > + "lower_32_bits(ring->wptr << 2) == 0x%08x "
> > + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> >    ring->wptr_offs,
> >    lower_32_bits(ring->wptr << 2),
> >    upper_32_bits(ring->wptr << 2));
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > index a8d49c005f73..627eb1f147c2 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > @@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct
> > amdgpu_ring *ring)
> >    if (ring->use_doorbell) {
> >    DRM_DEBUG("Using doorbell -- "
> >    "wptr_offs == 0x%08x "
> > - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> > - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> > + "lower_32_bits(ring->wptr << 2) == 0x%08x "
> > + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> >    ring->wptr_offs,
> >    lower_32_bits(ring->wptr << 2),
> >    upper_32_bits(ring->wptr << 2));
> > @@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
> >
> >    if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
> > for wptr */
> >    WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
> > -        lower_32_bits(ring->wptr) << 2);
> > +        lower_32_bits(ring->wptr << 2));
> >    WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
> > -        upper_32_bits(ring->wptr) << 2);
> > +        upper_32_bits(ring->wptr << 2));
> >    }
> >
> >    doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
> > mmSDMA0_GFX_DOORBELL));
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > index 824eace69884..a5eb82bfeaa8 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > @@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct
> > amdgpu_ring *ring)
> >    if (ring->use_doorbell) {
> >    DRM_DEBUG("Using doorbell -- "
> >    "wptr_offs == 0x%08x "
> > - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> > - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> > + "lower_32_bits(ring->wptr << 2) == 0x%08x "
> > + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> >    ring->wptr_offs,
> >    lower_32_bits(ring->wptr << 2),
> >    upper_32_bits(ring->wptr << 2));
> > @@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> >    WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
> > mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
> >
> >    if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
> > for wptr */
> > - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
> > lower_32_bits(ring->wptr) << 2);
> > - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
> > upper_32_bits(ring->wptr) << 2);
> > + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
> > lower_32_bits(ring->wptr << 2));
> > + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
> > upper_32_bits(ring->wptr << 2));
> >    }
> >
> >    doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
> > mmSDMA0_GFX_DOORBELL));
> > diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c
> > b/drivers/gpu/drm/amd/amdgpu/si_dma.c
> > index 195b45bcb8ad..0af11d3b00e7 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
> > @@ -57,7 +57,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
> >    u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
> >
> >    WREG32(DMA_RB_WPTR + sdma_offsets[me],
> > -        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
> > +        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
> >   }
> >
> >   static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
> > @@ -175,7 +175,7 @@ static int si_dma_start(struct amdgpu_device *adev)
> >    WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl);
> >
> >    ring->wptr = 0;
> > - WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
> > + WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
> >    WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
> >
> >    ring->sched.ready = true;
> > --
> > 2.25.1
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Set the 64-bit address of the wptr of SDMA doorbell properly
  2022-04-25 11:14   ` Haohui Mai
@ 2022-04-25 11:20     ` Christian König
  0 siblings, 0 replies; 4+ messages in thread
From: Christian König @ 2022-04-25 11:20 UTC (permalink / raw)
  To: Haohui Mai; +Cc: emily.deng, amd-gfx

Am 25.04.22 um 13:14 schrieb Haohui Mai:
> Sorry for the confusion. I misread the code, but it still seems to me
> it is a valid issue. What the patch tries to do is to fix the
> following pattern:
>
> -                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
> -                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
> +                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
> +                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));

Ah, yes that's indeed a bug.

>
> I agree with you that ring->wptr is an offset to the ring. Just
> looking at the above lines it seems that they are incorrect when
> ring->wptr is larger than 1GB.
>
> As you pointed out that ring->wptr cannot be larger than (1 << 24), it
> can be resolved via either (1) fixing the patterns in the provided
> patch, or (2) clamping the results to (1 << 24) - 1 and getting rid of
> lower_32_bits() / higher_32_bits() at all.
>
> What's your suggestion of moving forward?

It depends on the hardware generation. We used to have a bunch of older 
hardware where the wptr was one 32bit register where actually only 16,20 
or 24bits where used.

Then we have the newer hardware which has two 32bit registers for the 
WPTR and those really have 64bits in them.

Clamping the value actually doesn't much sense because the hardware will 
do it anyway and if the value is larger than supported it won't work 
either way.

So my suggestion is that you generate one patch where you fix all the 
lower/upper and shift mixup for the 64bit cases.

And then maybe another patch where all the lower+masking for the 32bit 
cases is removed as a cleanup.

Regards,
Christian.

>
> Thanks,
> Haohui
>
> On Mon, Apr 25, 2022 at 7:02 PM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> Am 25.04.22 um 11:15 schrieb Haohui Mai:
>>> Computing the address of the doorbell should be done before instead of after
>>> separating the 64-bit address into the higher and lower half. The
>>> current code sets the MMIO registers incorrectly if the address of the
>>> doorbell is above 1G.
>> That doesn't make any sense at all. The address of the doorbell is
>> completely irrelevant to the value you write into it.
>>
>> What we could do is to stop using the lower_32_bits() function, since
>> the WPTR can't handle more than 16, 20 or 24 bits (IIRC) depending on hw
>> generation anyway.
>>
>> Regards,
>> Christian.
>>
>>> Signed-off-by: Haohui Mai <ricetons@gmail.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/cik_sdma.c  | 4 ++--
>>>    drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++--
>>>    drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 8 ++++----
>>>    drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
>>>    drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++----
>>>    drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++----
>>>    drivers/gpu/drm/amd/amdgpu/si_dma.c    | 4 ++--
>>>    7 files changed, 20 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>> b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>> index c8ebd108548d..df863d346995 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>> @@ -195,7 +195,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
>>>     struct amdgpu_device *adev = ring->adev;
>>>
>>>     WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
>>> -        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
>>> +        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
>>>    }
>>>
>>>    static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
>>> @@ -487,7 +487,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
>>>     WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
>>>
>>>     ring->wptr = 0;
>>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
>>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
>>>
>>>     /* enable DMA RB */
>>>     WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i],
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>> index 1d8bbcbd7a37..b83fd00466fe 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>> @@ -223,7 +223,7 @@ static void sdma_v2_4_ring_set_wptr(struct
>>> amdgpu_ring *ring)
>>>    {
>>>     struct amdgpu_device *adev = ring->adev;
>>>
>>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
>>> lower_32_bits(ring->wptr) << 2);
>>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
>>> lower_32_bits(ring->wptr << 2));
>>>    }
>>>
>>>    static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
>>> @@ -465,7 +465,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
>>>     WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
>>>
>>>     ring->wptr = 0;
>>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
>>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
>>>
>>>     /* enable DMA RB */
>>>     rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>> index 4ef4feff5649..557a7d5174b0 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>> @@ -389,14 +389,14 @@ static void sdma_v3_0_ring_set_wptr(struct
>>> amdgpu_ring *ring)
>>>     if (ring->use_doorbell) {
>>>     u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
>>>     /* XXX check if swapping is necessary on BE */
>>> - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
>>> - WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2);
>>> + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
>>> + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr << 2));
>>>     } else if (ring->use_pollmem) {
>>>     u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
>>>
>>> - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
>>> + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
>>>     } else {
>>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
>>> lower_32_bits(ring->wptr) << 2);
>>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
>>> lower_32_bits(ring->wptr << 2));
>>>     }
>>>    }
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> index d7e8f7232364..ff86c43b63d1 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> @@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct
>>> amdgpu_ring *ring)
>>>
>>>     DRM_DEBUG("Using doorbell -- "
>>>     "wptr_offs == 0x%08x "
>>> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
>>> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
>>> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
>>> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>>>     ring->wptr_offs,
>>>     lower_32_bits(ring->wptr << 2),
>>>     upper_32_bits(ring->wptr << 2));
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>> index a8d49c005f73..627eb1f147c2 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>> @@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct
>>> amdgpu_ring *ring)
>>>     if (ring->use_doorbell) {
>>>     DRM_DEBUG("Using doorbell -- "
>>>     "wptr_offs == 0x%08x "
>>> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
>>> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
>>> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
>>> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>>>     ring->wptr_offs,
>>>     lower_32_bits(ring->wptr << 2),
>>>     upper_32_bits(ring->wptr << 2));
>>> @@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
>>>
>>>     if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
>>> for wptr */
>>>     WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
>>> -        lower_32_bits(ring->wptr) << 2);
>>> +        lower_32_bits(ring->wptr << 2));
>>>     WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
>>> -        upper_32_bits(ring->wptr) << 2);
>>> +        upper_32_bits(ring->wptr << 2));
>>>     }
>>>
>>>     doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
>>> mmSDMA0_GFX_DOORBELL));
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
>>> index 824eace69884..a5eb82bfeaa8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
>>> @@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct
>>> amdgpu_ring *ring)
>>>     if (ring->use_doorbell) {
>>>     DRM_DEBUG("Using doorbell -- "
>>>     "wptr_offs == 0x%08x "
>>> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
>>> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
>>> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
>>> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>>>     ring->wptr_offs,
>>>     lower_32_bits(ring->wptr << 2),
>>>     upper_32_bits(ring->wptr << 2));
>>> @@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
>>>     WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
>>> mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
>>>
>>>     if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
>>> for wptr */
>>> - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
>>> lower_32_bits(ring->wptr) << 2);
>>> - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
>>> upper_32_bits(ring->wptr) << 2);
>>> + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
>>> lower_32_bits(ring->wptr << 2));
>>> + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
>>> upper_32_bits(ring->wptr << 2));
>>>     }
>>>
>>>     doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
>>> mmSDMA0_GFX_DOORBELL));
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c
>>> b/drivers/gpu/drm/amd/amdgpu/si_dma.c
>>> index 195b45bcb8ad..0af11d3b00e7 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
>>> @@ -57,7 +57,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
>>>     u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
>>>
>>>     WREG32(DMA_RB_WPTR + sdma_offsets[me],
>>> -        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
>>> +        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
>>>    }
>>>
>>>    static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
>>> @@ -175,7 +175,7 @@ static int si_dma_start(struct amdgpu_device *adev)
>>>     WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl);
>>>
>>>     ring->wptr = 0;
>>> - WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
>>> + WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
>>>     WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
>>>
>>>     ring->sched.ready = true;
>>> --
>>> 2.25.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-04-25 11:20 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-25  9:15 [PATCH] Set the 64-bit address of the wptr of SDMA doorbell properly Haohui Mai
2022-04-25 11:01 ` Christian König
2022-04-25 11:14   ` Haohui Mai
2022-04-25 11:20     ` Christian König

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).