All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] Fix incorrect calculations of the wptr of the doorbells
@ 2022-04-25 11:47 Haohui Mai
  2022-04-25 11:52 ` Christian König
  0 siblings, 1 reply; 8+ messages in thread
From: Haohui Mai @ 2022-04-25 11:47 UTC (permalink / raw)
  To: amd-gfx; +Cc: ckoenig.leichtzumerken, emily.deng

Updated the commit messages based on the previous discussion.

Signed-off-by: Haohui Mai <ricetons@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/cik_sdma.c  | 4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 8 ++++----
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++----
 drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++----
 drivers/gpu/drm/amd/amdgpu/si_dma.c    | 4 ++--
 7 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index c8ebd108548d..df863d346995 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -195,7 +195,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
  struct amdgpu_device *adev = ring->adev;

  WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
-        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
+        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
 }

 static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
@@ -487,7 +487,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
  WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);

  ring->wptr = 0;
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));

  /* enable DMA RB */
  WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i],
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 1d8bbcbd7a37..b83fd00466fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -223,7 +223,7 @@ static void sdma_v2_4_ring_set_wptr(struct
amdgpu_ring *ring)
 {
  struct amdgpu_device *adev = ring->adev;

- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
lower_32_bits(ring->wptr) << 2);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
lower_32_bits(ring->wptr << 2));
 }

 static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
@@ -465,7 +465,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
  WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);

  ring->wptr = 0;
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));

  /* enable DMA RB */
  rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 4ef4feff5649..557a7d5174b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -389,14 +389,14 @@ static void sdma_v3_0_ring_set_wptr(struct
amdgpu_ring *ring)
  if (ring->use_doorbell) {
  u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
  /* XXX check if swapping is necessary on BE */
- WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
- WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2);
+ WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr << 2));
  } else if (ring->use_pollmem) {
  u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];

- WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
+ WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
  } else {
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
lower_32_bits(ring->wptr) << 2);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
lower_32_bits(ring->wptr << 2));
  }
 }

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index d7e8f7232364..ff86c43b63d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct
amdgpu_ring *ring)

  DRM_DEBUG("Using doorbell -- "
  "wptr_offs == 0x%08x "
- "lower_32_bits(ring->wptr) << 2 == 0x%08x "
- "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ "lower_32_bits(ring->wptr << 2) == 0x%08x "
+ "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
  ring->wptr_offs,
  lower_32_bits(ring->wptr << 2),
  upper_32_bits(ring->wptr << 2));
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index a8d49c005f73..627eb1f147c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct
amdgpu_ring *ring)
  if (ring->use_doorbell) {
  DRM_DEBUG("Using doorbell -- "
  "wptr_offs == 0x%08x "
- "lower_32_bits(ring->wptr) << 2 == 0x%08x "
- "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ "lower_32_bits(ring->wptr << 2) == 0x%08x "
+ "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
  ring->wptr_offs,
  lower_32_bits(ring->wptr << 2),
  upper_32_bits(ring->wptr << 2));
@@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)

  if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
for wptr */
  WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
-        lower_32_bits(ring->wptr) << 2);
+        lower_32_bits(ring->wptr << 2));
  WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
-        upper_32_bits(ring->wptr) << 2);
+        upper_32_bits(ring->wptr << 2));
  }

  doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
mmSDMA0_GFX_DOORBELL));
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 824eace69884..a5eb82bfeaa8 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct
amdgpu_ring *ring)
  if (ring->use_doorbell) {
  DRM_DEBUG("Using doorbell -- "
  "wptr_offs == 0x%08x "
- "lower_32_bits(ring->wptr) << 2 == 0x%08x "
- "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ "lower_32_bits(ring->wptr << 2) == 0x%08x "
+ "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
  ring->wptr_offs,
  lower_32_bits(ring->wptr << 2),
  upper_32_bits(ring->wptr << 2));
@@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
  WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);

  if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
for wptr */
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
lower_32_bits(ring->wptr) << 2);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
upper_32_bits(ring->wptr) << 2);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
lower_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
upper_32_bits(ring->wptr << 2));
  }

  doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
mmSDMA0_GFX_DOORBELL));
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c
b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index 195b45bcb8ad..0af11d3b00e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -57,7 +57,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
  u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;

  WREG32(DMA_RB_WPTR + sdma_offsets[me],
-        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
+        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
 }

 static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
@@ -175,7 +175,7 @@ static int si_dma_start(struct amdgpu_device *adev)
  WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl);

  ring->wptr = 0;
- WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
+ WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
  WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);

  ring->sched.ready = true;
--
2.25.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] Fix incorrect calculations of the wptr of the doorbells
  2022-04-25 11:47 [PATCH 1/2] Fix incorrect calculations of the wptr of the doorbells Haohui Mai
@ 2022-04-25 11:52 ` Christian König
  2022-04-25 12:19   ` Haohui Mai
  0 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2022-04-25 11:52 UTC (permalink / raw)
  To: Haohui Mai, amd-gfx; +Cc: emily.deng

Am 25.04.22 um 13:47 schrieb Haohui Mai:
> Updated the commit messages based on the previous discussion.

Please drop all the changes for pre SDMA v4 hardware (e.g. the ones with 
only a 32bit register), so that we only have the changes for the 64bit 
hw versions in here.

Apart from that looks good to me.

Thanks,
Christian.

>
> Signed-off-by: Haohui Mai <ricetons@gmail.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/cik_sdma.c  | 4 ++--
>   drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++--
>   drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 8 ++++----
>   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++----
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++----
>   drivers/gpu/drm/amd/amdgpu/si_dma.c    | 4 ++--
>   7 files changed, 20 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> index c8ebd108548d..df863d346995 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> @@ -195,7 +195,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
>    struct amdgpu_device *adev = ring->adev;
>
>    WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> -        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
> +        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
>   }
>
>   static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> @@ -487,7 +487,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
>    WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
>
>    ring->wptr = 0;
> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
>
>    /* enable DMA RB */
>    WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i],
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> index 1d8bbcbd7a37..b83fd00466fe 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> @@ -223,7 +223,7 @@ static void sdma_v2_4_ring_set_wptr(struct
> amdgpu_ring *ring)
>   {
>    struct amdgpu_device *adev = ring->adev;
>
> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> lower_32_bits(ring->wptr) << 2);
> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> lower_32_bits(ring->wptr << 2));
>   }
>
>   static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> @@ -465,7 +465,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
>    WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
>
>    ring->wptr = 0;
> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
>
>    /* enable DMA RB */
>    rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> index 4ef4feff5649..557a7d5174b0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> @@ -389,14 +389,14 @@ static void sdma_v3_0_ring_set_wptr(struct
> amdgpu_ring *ring)
>    if (ring->use_doorbell) {
>    u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
>    /* XXX check if swapping is necessary on BE */
> - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
> - WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2);
> + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
> + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr << 2));
>    } else if (ring->use_pollmem) {
>    u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
>
> - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
> + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
>    } else {
> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> lower_32_bits(ring->wptr) << 2);
> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> lower_32_bits(ring->wptr << 2));
>    }
>   }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index d7e8f7232364..ff86c43b63d1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct
> amdgpu_ring *ring)
>
>    DRM_DEBUG("Using doorbell -- "
>    "wptr_offs == 0x%08x "
> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>    ring->wptr_offs,
>    lower_32_bits(ring->wptr << 2),
>    upper_32_bits(ring->wptr << 2));
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index a8d49c005f73..627eb1f147c2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct
> amdgpu_ring *ring)
>    if (ring->use_doorbell) {
>    DRM_DEBUG("Using doorbell -- "
>    "wptr_offs == 0x%08x "
> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>    ring->wptr_offs,
>    lower_32_bits(ring->wptr << 2),
>    upper_32_bits(ring->wptr << 2));
> @@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
>
>    if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
> for wptr */
>    WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
> -        lower_32_bits(ring->wptr) << 2);
> +        lower_32_bits(ring->wptr << 2));
>    WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
> -        upper_32_bits(ring->wptr) << 2);
> +        upper_32_bits(ring->wptr << 2));
>    }
>
>    doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
> mmSDMA0_GFX_DOORBELL));
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> index 824eace69884..a5eb82bfeaa8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> @@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct
> amdgpu_ring *ring)
>    if (ring->use_doorbell) {
>    DRM_DEBUG("Using doorbell -- "
>    "wptr_offs == 0x%08x "
> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>    ring->wptr_offs,
>    lower_32_bits(ring->wptr << 2),
>    upper_32_bits(ring->wptr << 2));
> @@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
>    WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
> mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
>
>    if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
> for wptr */
> - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
> lower_32_bits(ring->wptr) << 2);
> - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
> upper_32_bits(ring->wptr) << 2);
> + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
> lower_32_bits(ring->wptr << 2));
> + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
> upper_32_bits(ring->wptr << 2));
>    }
>
>    doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
> mmSDMA0_GFX_DOORBELL));
> diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c
> b/drivers/gpu/drm/amd/amdgpu/si_dma.c
> index 195b45bcb8ad..0af11d3b00e7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
> @@ -57,7 +57,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
>    u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
>
>    WREG32(DMA_RB_WPTR + sdma_offsets[me],
> -        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
> +        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
>   }
>
>   static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
> @@ -175,7 +175,7 @@ static int si_dma_start(struct amdgpu_device *adev)
>    WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl);
>
>    ring->wptr = 0;
> - WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
> + WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
>    WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
>
>    ring->sched.ready = true;
> --
> 2.25.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] Fix incorrect calculations of the wptr of the doorbells
  2022-04-25 11:52 ` Christian König
@ 2022-04-25 12:19   ` Haohui Mai
  2022-04-25 12:33     ` Christian König
  0 siblings, 1 reply; 8+ messages in thread
From: Haohui Mai @ 2022-04-25 12:19 UTC (permalink / raw)
  To: Christian König; +Cc: emily.deng, amd-gfx

Dropped the changes of older generations.

Signed-off-by: Haohui Mai <ricetons@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++----
 drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++----
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index d7e8f7232364..ff86c43b63d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct
amdgpu_ring *ring)

                DRM_DEBUG("Using doorbell -- "
                                "wptr_offs == 0x%08x "
-                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
-                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
+                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
                                ring->wptr_offs,
                                lower_32_bits(ring->wptr << 2),
                                upper_32_bits(ring->wptr << 2));
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index a8d49c005f73..627eb1f147c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct
amdgpu_ring *ring)
        if (ring->use_doorbell) {
                DRM_DEBUG("Using doorbell -- "
                                "wptr_offs == 0x%08x "
-                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
-                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
+                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
                                ring->wptr_offs,
                                lower_32_bits(ring->wptr << 2),
                                upper_32_bits(ring->wptr << 2));
@@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)

                if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use
register write for wptr */
                        WREG32(sdma_v5_0_get_reg_offset(adev, i,
mmSDMA0_GFX_RB_WPTR),
-                              lower_32_bits(ring->wptr) << 2);
+                              lower_32_bits(ring->wptr << 2));
                        WREG32(sdma_v5_0_get_reg_offset(adev, i,
mmSDMA0_GFX_RB_WPTR_HI),
-                              upper_32_bits(ring->wptr) << 2);
+                              upper_32_bits(ring->wptr << 2));
                }

                doorbell = RREG32_SOC15_IP(GC,
sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 824eace69884..a5eb82bfeaa8 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct
amdgpu_ring *ring)
        if (ring->use_doorbell) {
                DRM_DEBUG("Using doorbell -- "
                                "wptr_offs == 0x%08x "
-                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
-                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
+                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
                                ring->wptr_offs,
                                lower_32_bits(ring->wptr << 2),
                                upper_32_bits(ring->wptr << 2));
@@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
                WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);

                if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use
register write for wptr */
-                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
-                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
+                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
                }

                doorbell = RREG32_SOC15_IP(GC,
sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
--
2.25.1

On Mon, Apr 25, 2022 at 7:52 PM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Am 25.04.22 um 13:47 schrieb Haohui Mai:
> > Updated the commit messages based on the previous discussion.
>
> Please drop all the changes for pre SDMA v4 hardware (e.g. the ones with
> only a 32bit register), so that we only have the changes for the 64bit
> hw versions in here.
>
> Apart from that looks good to me.
>
> Thanks,
> Christian.
>
> >
> > Signed-off-by: Haohui Mai <ricetons@gmail.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/cik_sdma.c  | 4 ++--
> >   drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++--
> >   drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 8 ++++----
> >   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
> >   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++----
> >   drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++----
> >   drivers/gpu/drm/amd/amdgpu/si_dma.c    | 4 ++--
> >   7 files changed, 20 insertions(+), 20 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> > b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> > index c8ebd108548d..df863d346995 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> > @@ -195,7 +195,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
> >    struct amdgpu_device *adev = ring->adev;
> >
> >    WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> > -        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
> > +        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
> >   }
> >
> >   static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> > @@ -487,7 +487,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
> >    WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
> >
> >    ring->wptr = 0;
> > - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
> > + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
> >
> >    /* enable DMA RB */
> >    WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i],
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> > index 1d8bbcbd7a37..b83fd00466fe 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> > @@ -223,7 +223,7 @@ static void sdma_v2_4_ring_set_wptr(struct
> > amdgpu_ring *ring)
> >   {
> >    struct amdgpu_device *adev = ring->adev;
> >
> > - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> > lower_32_bits(ring->wptr) << 2);
> > + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> > lower_32_bits(ring->wptr << 2));
> >   }
> >
> >   static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> > @@ -465,7 +465,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
> >    WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
> >
> >    ring->wptr = 0;
> > - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
> > + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
> >
> >    /* enable DMA RB */
> >    rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> > index 4ef4feff5649..557a7d5174b0 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> > @@ -389,14 +389,14 @@ static void sdma_v3_0_ring_set_wptr(struct
> > amdgpu_ring *ring)
> >    if (ring->use_doorbell) {
> >    u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
> >    /* XXX check if swapping is necessary on BE */
> > - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
> > - WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2);
> > + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
> > + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr << 2));
> >    } else if (ring->use_pollmem) {
> >    u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
> >
> > - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
> > + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
> >    } else {
> > - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> > lower_32_bits(ring->wptr) << 2);
> > + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> > lower_32_bits(ring->wptr << 2));
> >    }
> >   }
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > index d7e8f7232364..ff86c43b63d1 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > @@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct
> > amdgpu_ring *ring)
> >
> >    DRM_DEBUG("Using doorbell -- "
> >    "wptr_offs == 0x%08x "
> > - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> > - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> > + "lower_32_bits(ring->wptr << 2) == 0x%08x "
> > + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> >    ring->wptr_offs,
> >    lower_32_bits(ring->wptr << 2),
> >    upper_32_bits(ring->wptr << 2));
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > index a8d49c005f73..627eb1f147c2 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > @@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct
> > amdgpu_ring *ring)
> >    if (ring->use_doorbell) {
> >    DRM_DEBUG("Using doorbell -- "
> >    "wptr_offs == 0x%08x "
> > - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> > - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> > + "lower_32_bits(ring->wptr << 2) == 0x%08x "
> > + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> >    ring->wptr_offs,
> >    lower_32_bits(ring->wptr << 2),
> >    upper_32_bits(ring->wptr << 2));
> > @@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
> >
> >    if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
> > for wptr */
> >    WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
> > -        lower_32_bits(ring->wptr) << 2);
> > +        lower_32_bits(ring->wptr << 2));
> >    WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
> > -        upper_32_bits(ring->wptr) << 2);
> > +        upper_32_bits(ring->wptr << 2));
> >    }
> >
> >    doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
> > mmSDMA0_GFX_DOORBELL));
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > index 824eace69884..a5eb82bfeaa8 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > @@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct
> > amdgpu_ring *ring)
> >    if (ring->use_doorbell) {
> >    DRM_DEBUG("Using doorbell -- "
> >    "wptr_offs == 0x%08x "
> > - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> > - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> > + "lower_32_bits(ring->wptr << 2) == 0x%08x "
> > + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> >    ring->wptr_offs,
> >    lower_32_bits(ring->wptr << 2),
> >    upper_32_bits(ring->wptr << 2));
> > @@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> >    WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
> > mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
> >
> >    if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
> > for wptr */
> > - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
> > lower_32_bits(ring->wptr) << 2);
> > - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
> > upper_32_bits(ring->wptr) << 2);
> > + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
> > lower_32_bits(ring->wptr << 2));
> > + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
> > upper_32_bits(ring->wptr << 2));
> >    }
> >
> >    doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
> > mmSDMA0_GFX_DOORBELL));
> > diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c
> > b/drivers/gpu/drm/amd/amdgpu/si_dma.c
> > index 195b45bcb8ad..0af11d3b00e7 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
> > @@ -57,7 +57,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
> >    u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
> >
> >    WREG32(DMA_RB_WPTR + sdma_offsets[me],
> > -        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
> > +        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
> >   }
> >
> >   static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
> > @@ -175,7 +175,7 @@ static int si_dma_start(struct amdgpu_device *adev)
> >    WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl);
> >
> >    ring->wptr = 0;
> > - WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
> > + WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
> >    WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
> >
> >    ring->sched.ready = true;
> > --
> > 2.25.1
>

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] Fix incorrect calculations of the wptr of the doorbells
  2022-04-25 12:19   ` Haohui Mai
@ 2022-04-25 12:33     ` Christian König
  2022-04-25 12:41       ` Haohui Mai
  0 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2022-04-25 12:33 UTC (permalink / raw)
  To: Haohui Mai; +Cc: emily.deng, amd-gfx

Am 25.04.22 um 14:19 schrieb Haohui Mai:
> Dropped the changes of older generations.
>
> Signed-off-by: Haohui Mai <ricetons@gmail.com>

Please update the commit messages to include all the background we just 
discussed.

With that done the series is Reviewed-by: Christian König 
<christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++----
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++----
>   3 files changed, 10 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index d7e8f7232364..ff86c43b63d1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct
> amdgpu_ring *ring)
>
>                  DRM_DEBUG("Using doorbell -- "
>                                  "wptr_offs == 0x%08x "
> -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
> +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>                                  ring->wptr_offs,
>                                  lower_32_bits(ring->wptr << 2),
>                                  upper_32_bits(ring->wptr << 2));
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index a8d49c005f73..627eb1f147c2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct
> amdgpu_ring *ring)
>          if (ring->use_doorbell) {
>                  DRM_DEBUG("Using doorbell -- "
>                                  "wptr_offs == 0x%08x "
> -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
> +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>                                  ring->wptr_offs,
>                                  lower_32_bits(ring->wptr << 2),
>                                  upper_32_bits(ring->wptr << 2));
> @@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
>
>                  if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use
> register write for wptr */
>                          WREG32(sdma_v5_0_get_reg_offset(adev, i,
> mmSDMA0_GFX_RB_WPTR),
> -                              lower_32_bits(ring->wptr) << 2);
> +                              lower_32_bits(ring->wptr << 2));
>                          WREG32(sdma_v5_0_get_reg_offset(adev, i,
> mmSDMA0_GFX_RB_WPTR_HI),
> -                              upper_32_bits(ring->wptr) << 2);
> +                              upper_32_bits(ring->wptr << 2));
>                  }
>
>                  doorbell = RREG32_SOC15_IP(GC,
> sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> index 824eace69884..a5eb82bfeaa8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> @@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct
> amdgpu_ring *ring)
>          if (ring->use_doorbell) {
>                  DRM_DEBUG("Using doorbell -- "
>                                  "wptr_offs == 0x%08x "
> -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
> +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>                                  ring->wptr_offs,
>                                  lower_32_bits(ring->wptr << 2),
>                                  upper_32_bits(ring->wptr << 2));
> @@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
>                  WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
> mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
>
>                  if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use
> register write for wptr */
> -                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
> -                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
> +                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
> +                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
>                  }
>
>                  doorbell = RREG32_SOC15_IP(GC,
> sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
> --
> 2.25.1
>
> On Mon, Apr 25, 2022 at 7:52 PM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> Am 25.04.22 um 13:47 schrieb Haohui Mai:
>>> Updated the commit messages based on the previous discussion.
>> Please drop all the changes for pre SDMA v4 hardware (e.g. the ones with
>> only a 32bit register), so that we only have the changes for the 64bit
>> hw versions in here.
>>
>> Apart from that looks good to me.
>>
>> Thanks,
>> Christian.
>>
>>> Signed-off-by: Haohui Mai <ricetons@gmail.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/cik_sdma.c  | 4 ++--
>>>    drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++--
>>>    drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 8 ++++----
>>>    drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
>>>    drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++----
>>>    drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++----
>>>    drivers/gpu/drm/amd/amdgpu/si_dma.c    | 4 ++--
>>>    7 files changed, 20 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>> b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>> index c8ebd108548d..df863d346995 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>> @@ -195,7 +195,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
>>>     struct amdgpu_device *adev = ring->adev;
>>>
>>>     WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
>>> -        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
>>> +        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
>>>    }
>>>
>>>    static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
>>> @@ -487,7 +487,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
>>>     WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
>>>
>>>     ring->wptr = 0;
>>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
>>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
>>>
>>>     /* enable DMA RB */
>>>     WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i],
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>> index 1d8bbcbd7a37..b83fd00466fe 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>> @@ -223,7 +223,7 @@ static void sdma_v2_4_ring_set_wptr(struct
>>> amdgpu_ring *ring)
>>>    {
>>>     struct amdgpu_device *adev = ring->adev;
>>>
>>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
>>> lower_32_bits(ring->wptr) << 2);
>>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
>>> lower_32_bits(ring->wptr << 2));
>>>    }
>>>
>>>    static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
>>> @@ -465,7 +465,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
>>>     WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
>>>
>>>     ring->wptr = 0;
>>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
>>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
>>>
>>>     /* enable DMA RB */
>>>     rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>> index 4ef4feff5649..557a7d5174b0 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>> @@ -389,14 +389,14 @@ static void sdma_v3_0_ring_set_wptr(struct
>>> amdgpu_ring *ring)
>>>     if (ring->use_doorbell) {
>>>     u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
>>>     /* XXX check if swapping is necessary on BE */
>>> - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
>>> - WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2);
>>> + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
>>> + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr << 2));
>>>     } else if (ring->use_pollmem) {
>>>     u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
>>>
>>> - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
>>> + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
>>>     } else {
>>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
>>> lower_32_bits(ring->wptr) << 2);
>>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
>>> lower_32_bits(ring->wptr << 2));
>>>     }
>>>    }
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> index d7e8f7232364..ff86c43b63d1 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> @@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct
>>> amdgpu_ring *ring)
>>>
>>>     DRM_DEBUG("Using doorbell -- "
>>>     "wptr_offs == 0x%08x "
>>> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
>>> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
>>> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
>>> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>>>     ring->wptr_offs,
>>>     lower_32_bits(ring->wptr << 2),
>>>     upper_32_bits(ring->wptr << 2));
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>> index a8d49c005f73..627eb1f147c2 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>> @@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct
>>> amdgpu_ring *ring)
>>>     if (ring->use_doorbell) {
>>>     DRM_DEBUG("Using doorbell -- "
>>>     "wptr_offs == 0x%08x "
>>> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
>>> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
>>> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
>>> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>>>     ring->wptr_offs,
>>>     lower_32_bits(ring->wptr << 2),
>>>     upper_32_bits(ring->wptr << 2));
>>> @@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
>>>
>>>     if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
>>> for wptr */
>>>     WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
>>> -        lower_32_bits(ring->wptr) << 2);
>>> +        lower_32_bits(ring->wptr << 2));
>>>     WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
>>> -        upper_32_bits(ring->wptr) << 2);
>>> +        upper_32_bits(ring->wptr << 2));
>>>     }
>>>
>>>     doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
>>> mmSDMA0_GFX_DOORBELL));
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
>>> index 824eace69884..a5eb82bfeaa8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
>>> @@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct
>>> amdgpu_ring *ring)
>>>     if (ring->use_doorbell) {
>>>     DRM_DEBUG("Using doorbell -- "
>>>     "wptr_offs == 0x%08x "
>>> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
>>> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
>>> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
>>> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>>>     ring->wptr_offs,
>>>     lower_32_bits(ring->wptr << 2),
>>>     upper_32_bits(ring->wptr << 2));
>>> @@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
>>>     WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
>>> mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
>>>
>>>     if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
>>> for wptr */
>>> - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
>>> lower_32_bits(ring->wptr) << 2);
>>> - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
>>> upper_32_bits(ring->wptr) << 2);
>>> + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
>>> lower_32_bits(ring->wptr << 2));
>>> + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
>>> upper_32_bits(ring->wptr << 2));
>>>     }
>>>
>>>     doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
>>> mmSDMA0_GFX_DOORBELL));
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c
>>> b/drivers/gpu/drm/amd/amdgpu/si_dma.c
>>> index 195b45bcb8ad..0af11d3b00e7 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
>>> @@ -57,7 +57,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
>>>     u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
>>>
>>>     WREG32(DMA_RB_WPTR + sdma_offsets[me],
>>> -        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
>>> +        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
>>>    }
>>>
>>>    static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
>>> @@ -175,7 +175,7 @@ static int si_dma_start(struct amdgpu_device *adev)
>>>     WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl);
>>>
>>>     ring->wptr = 0;
>>> - WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
>>> + WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
>>>     WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
>>>
>>>     ring->sched.ready = true;
>>> --
>>> 2.25.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] Fix incorrect calculations of the wptr of the doorbells
  2022-04-25 12:33     ` Christian König
@ 2022-04-25 12:41       ` Haohui Mai
  2022-04-25 12:44         ` Haohui Mai
  0 siblings, 1 reply; 8+ messages in thread
From: Haohui Mai @ 2022-04-25 12:41 UTC (permalink / raw)
  To: Christian König; +Cc: emily.deng, amd-gfx

This patch fixes the issue where the driver miscomputes the 64-bit
values of the wptr of the SDMA doorbell when initializing the
hardware. SDMA engines v4 and later on have full 64-bit registers for
wptr thus they should be set properly.

Older generation hardwares like CIK / SI have only 16 / 20 / 24bits
for the WPTR, where the calls of lower_32_bits() will be removed in a
following patch.

Signed-off-by: Haohui Mai <ricetons@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++----
 drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++----
 3 files changed, 10 insertions(+), 10 deletions(-)


diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index d7e8f7232364..ff86c43b63d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct
amdgpu_ring *ring)

                DRM_DEBUG("Using doorbell -- "
                                "wptr_offs == 0x%08x "
-                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
-                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
+                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
                                ring->wptr_offs,
                                lower_32_bits(ring->wptr << 2),
                                upper_32_bits(ring->wptr << 2));
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index a8d49c005f73..627eb1f147c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct
amdgpu_ring *ring)
        if (ring->use_doorbell) {
                DRM_DEBUG("Using doorbell -- "
                                "wptr_offs == 0x%08x "
-                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
-                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
+                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
                                ring->wptr_offs,
                                lower_32_bits(ring->wptr << 2),
                                upper_32_bits(ring->wptr << 2));
@@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)

                if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use
register write for wptr */
                        WREG32(sdma_v5_0_get_reg_offset(adev, i,
mmSDMA0_GFX_RB_WPTR),
-                              lower_32_bits(ring->wptr) << 2);
+                              lower_32_bits(ring->wptr << 2));
                        WREG32(sdma_v5_0_get_reg_offset(adev, i,
mmSDMA0_GFX_RB_WPTR_HI),
-                              upper_32_bits(ring->wptr) << 2);
+                              upper_32_bits(ring->wptr << 2));
                }

                doorbell = RREG32_SOC15_IP(GC,
sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 824eace69884..a5eb82bfeaa8 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct
amdgpu_ring *ring)
        if (ring->use_doorbell) {
                DRM_DEBUG("Using doorbell -- "
                                "wptr_offs == 0x%08x "
-                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
-                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
+                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
                                ring->wptr_offs,
                                lower_32_bits(ring->wptr << 2),
                                upper_32_bits(ring->wptr << 2));
@@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
                WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);

                if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use
register write for wptr */
-                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
-                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
+                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
                }

                doorbell = RREG32_SOC15_IP(GC,
sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
--
2.25.1

On Mon, Apr 25, 2022 at 8:33 PM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Am 25.04.22 um 14:19 schrieb Haohui Mai:
> > Dropped the changes of older generations.
> >
> > Signed-off-by: Haohui Mai <ricetons@gmail.com>
>
> Please update the commit messages to include all the background we just
> discussed.
>
> With that done the series is Reviewed-by: Christian König
> <christian.koenig@amd.com>
>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
> >   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++----
> >   drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++----
> >   3 files changed, 10 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > index d7e8f7232364..ff86c43b63d1 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > @@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct
> > amdgpu_ring *ring)
> >
> >                  DRM_DEBUG("Using doorbell -- "
> >                                  "wptr_offs == 0x%08x "
> > -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> > -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> > +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
> > +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> >                                  ring->wptr_offs,
> >                                  lower_32_bits(ring->wptr << 2),
> >                                  upper_32_bits(ring->wptr << 2));
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > index a8d49c005f73..627eb1f147c2 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > @@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct
> > amdgpu_ring *ring)
> >          if (ring->use_doorbell) {
> >                  DRM_DEBUG("Using doorbell -- "
> >                                  "wptr_offs == 0x%08x "
> > -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> > -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> > +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
> > +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> >                                  ring->wptr_offs,
> >                                  lower_32_bits(ring->wptr << 2),
> >                                  upper_32_bits(ring->wptr << 2));
> > @@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
> >
> >                  if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use
> > register write for wptr */
> >                          WREG32(sdma_v5_0_get_reg_offset(adev, i,
> > mmSDMA0_GFX_RB_WPTR),
> > -                              lower_32_bits(ring->wptr) << 2);
> > +                              lower_32_bits(ring->wptr << 2));
> >                          WREG32(sdma_v5_0_get_reg_offset(adev, i,
> > mmSDMA0_GFX_RB_WPTR_HI),
> > -                              upper_32_bits(ring->wptr) << 2);
> > +                              upper_32_bits(ring->wptr << 2));
> >                  }
> >
> >                  doorbell = RREG32_SOC15_IP(GC,
> > sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > index 824eace69884..a5eb82bfeaa8 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > @@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct
> > amdgpu_ring *ring)
> >          if (ring->use_doorbell) {
> >                  DRM_DEBUG("Using doorbell -- "
> >                                  "wptr_offs == 0x%08x "
> > -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> > -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> > +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
> > +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> >                                  ring->wptr_offs,
> >                                  lower_32_bits(ring->wptr << 2),
> >                                  upper_32_bits(ring->wptr << 2));
> > @@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> >                  WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
> > mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
> >
> >                  if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use
> > register write for wptr */
> > -                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> > mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
> > -                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> > mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
> > +                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> > mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
> > +                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> > mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
> >                  }
> >
> >                  doorbell = RREG32_SOC15_IP(GC,
> > sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
> > --
> > 2.25.1
> >
> > On Mon, Apr 25, 2022 at 7:52 PM Christian König
> > <ckoenig.leichtzumerken@gmail.com> wrote:
> >> Am 25.04.22 um 13:47 schrieb Haohui Mai:
> >>> Updated the commit messages based on the previous discussion.
> >> Please drop all the changes for pre SDMA v4 hardware (e.g. the ones with
> >> only a 32bit register), so that we only have the changes for the 64bit
> >> hw versions in here.
> >>
> >> Apart from that looks good to me.
> >>
> >> Thanks,
> >> Christian.
> >>
> >>> Signed-off-by: Haohui Mai <ricetons@gmail.com>
> >>> ---
> >>>    drivers/gpu/drm/amd/amdgpu/cik_sdma.c  | 4 ++--
> >>>    drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++--
> >>>    drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 8 ++++----
> >>>    drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
> >>>    drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++----
> >>>    drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++----
> >>>    drivers/gpu/drm/amd/amdgpu/si_dma.c    | 4 ++--
> >>>    7 files changed, 20 insertions(+), 20 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> >>> b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> >>> index c8ebd108548d..df863d346995 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> >>> @@ -195,7 +195,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
> >>>     struct amdgpu_device *adev = ring->adev;
> >>>
> >>>     WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> >>> -        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
> >>> +        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
> >>>    }
> >>>
> >>>    static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> >>> @@ -487,7 +487,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
> >>>     WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
> >>>
> >>>     ring->wptr = 0;
> >>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
> >>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
> >>>
> >>>     /* enable DMA RB */
> >>>     WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i],
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> >>> b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> >>> index 1d8bbcbd7a37..b83fd00466fe 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> >>> @@ -223,7 +223,7 @@ static void sdma_v2_4_ring_set_wptr(struct
> >>> amdgpu_ring *ring)
> >>>    {
> >>>     struct amdgpu_device *adev = ring->adev;
> >>>
> >>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> >>> lower_32_bits(ring->wptr) << 2);
> >>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> >>> lower_32_bits(ring->wptr << 2));
> >>>    }
> >>>
> >>>    static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> >>> @@ -465,7 +465,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
> >>>     WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
> >>>
> >>>     ring->wptr = 0;
> >>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
> >>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
> >>>
> >>>     /* enable DMA RB */
> >>>     rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> >>> b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> >>> index 4ef4feff5649..557a7d5174b0 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> >>> @@ -389,14 +389,14 @@ static void sdma_v3_0_ring_set_wptr(struct
> >>> amdgpu_ring *ring)
> >>>     if (ring->use_doorbell) {
> >>>     u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
> >>>     /* XXX check if swapping is necessary on BE */
> >>> - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
> >>> - WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2);
> >>> + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
> >>> + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr << 2));
> >>>     } else if (ring->use_pollmem) {
> >>>     u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
> >>>
> >>> - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
> >>> + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
> >>>     } else {
> >>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> >>> lower_32_bits(ring->wptr) << 2);
> >>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> >>> lower_32_bits(ring->wptr << 2));
> >>>     }
> >>>    }
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> >>> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> >>> index d7e8f7232364..ff86c43b63d1 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> >>> @@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct
> >>> amdgpu_ring *ring)
> >>>
> >>>     DRM_DEBUG("Using doorbell -- "
> >>>     "wptr_offs == 0x%08x "
> >>> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> >>> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> >>> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
> >>> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> >>>     ring->wptr_offs,
> >>>     lower_32_bits(ring->wptr << 2),
> >>>     upper_32_bits(ring->wptr << 2));
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> >>> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> >>> index a8d49c005f73..627eb1f147c2 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> >>> @@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct
> >>> amdgpu_ring *ring)
> >>>     if (ring->use_doorbell) {
> >>>     DRM_DEBUG("Using doorbell -- "
> >>>     "wptr_offs == 0x%08x "
> >>> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> >>> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> >>> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
> >>> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> >>>     ring->wptr_offs,
> >>>     lower_32_bits(ring->wptr << 2),
> >>>     upper_32_bits(ring->wptr << 2));
> >>> @@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
> >>>
> >>>     if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
> >>> for wptr */
> >>>     WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
> >>> -        lower_32_bits(ring->wptr) << 2);
> >>> +        lower_32_bits(ring->wptr << 2));
> >>>     WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
> >>> -        upper_32_bits(ring->wptr) << 2);
> >>> +        upper_32_bits(ring->wptr << 2));
> >>>     }
> >>>
> >>>     doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
> >>> mmSDMA0_GFX_DOORBELL));
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> >>> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> >>> index 824eace69884..a5eb82bfeaa8 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> >>> @@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct
> >>> amdgpu_ring *ring)
> >>>     if (ring->use_doorbell) {
> >>>     DRM_DEBUG("Using doorbell -- "
> >>>     "wptr_offs == 0x%08x "
> >>> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> >>> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> >>> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
> >>> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> >>>     ring->wptr_offs,
> >>>     lower_32_bits(ring->wptr << 2),
> >>>     upper_32_bits(ring->wptr << 2));
> >>> @@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> >>>     WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
> >>> mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
> >>>
> >>>     if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
> >>> for wptr */
> >>> - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
> >>> lower_32_bits(ring->wptr) << 2);
> >>> - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
> >>> upper_32_bits(ring->wptr) << 2);
> >>> + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
> >>> lower_32_bits(ring->wptr << 2));
> >>> + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
> >>> upper_32_bits(ring->wptr << 2));
> >>>     }
> >>>
> >>>     doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
> >>> mmSDMA0_GFX_DOORBELL));
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c
> >>> b/drivers/gpu/drm/amd/amdgpu/si_dma.c
> >>> index 195b45bcb8ad..0af11d3b00e7 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
> >>> @@ -57,7 +57,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
> >>>     u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
> >>>
> >>>     WREG32(DMA_RB_WPTR + sdma_offsets[me],
> >>> -        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
> >>> +        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
> >>>    }
> >>>
> >>>    static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
> >>> @@ -175,7 +175,7 @@ static int si_dma_start(struct amdgpu_device *adev)
> >>>     WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl);
> >>>
> >>>     ring->wptr = 0;
> >>> - WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
> >>> + WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
> >>>     WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
> >>>
> >>>     ring->sched.ready = true;
> >>> --
> >>> 2.25.1
>

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] Fix incorrect calculations of the wptr of the doorbells
  2022-04-25 12:41       ` Haohui Mai
@ 2022-04-25 12:44         ` Haohui Mai
  2022-04-25 16:03           ` Christian König
  0 siblings, 1 reply; 8+ messages in thread
From: Haohui Mai @ 2022-04-25 12:44 UTC (permalink / raw)
  To: Christian König; +Cc: emily.deng, amd-gfx

Your prompt reviews are highly appreciated. Thanks.

A little bit off-topic -- I'm not too familiar with the whole process.
Just wondering, what else needs to be done in order to ensure the
patches get picked up in the next available merge window?

Thanks,
Haohui

On Mon, Apr 25, 2022 at 8:41 PM Haohui Mai <ricetons@gmail.com> wrote:
>
> This patch fixes the issue where the driver miscomputes the 64-bit
> values of the wptr of the SDMA doorbell when initializing the
> hardware. SDMA engines v4 and later on have full 64-bit registers for
> wptr thus they should be set properly.
>
> Older generation hardwares like CIK / SI have only 16 / 20 / 24bits
> for the WPTR, where the calls of lower_32_bits() will be removed in a
> following patch.
>
> Signed-off-by: Haohui Mai <ricetons@gmail.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
>  drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++----
>  drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++----
>  3 files changed, 10 insertions(+), 10 deletions(-)
>
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index d7e8f7232364..ff86c43b63d1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct
> amdgpu_ring *ring)
>
>                 DRM_DEBUG("Using doorbell -- "
>                                 "wptr_offs == 0x%08x "
> -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
> +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>                                 ring->wptr_offs,
>                                 lower_32_bits(ring->wptr << 2),
>                                 upper_32_bits(ring->wptr << 2));
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index a8d49c005f73..627eb1f147c2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct
> amdgpu_ring *ring)
>         if (ring->use_doorbell) {
>                 DRM_DEBUG("Using doorbell -- "
>                                 "wptr_offs == 0x%08x "
> -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
> +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>                                 ring->wptr_offs,
>                                 lower_32_bits(ring->wptr << 2),
>                                 upper_32_bits(ring->wptr << 2));
> @@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
>
>                 if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use
> register write for wptr */
>                         WREG32(sdma_v5_0_get_reg_offset(adev, i,
> mmSDMA0_GFX_RB_WPTR),
> -                              lower_32_bits(ring->wptr) << 2);
> +                              lower_32_bits(ring->wptr << 2));
>                         WREG32(sdma_v5_0_get_reg_offset(adev, i,
> mmSDMA0_GFX_RB_WPTR_HI),
> -                              upper_32_bits(ring->wptr) << 2);
> +                              upper_32_bits(ring->wptr << 2));
>                 }
>
>                 doorbell = RREG32_SOC15_IP(GC,
> sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> index 824eace69884..a5eb82bfeaa8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> @@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct
> amdgpu_ring *ring)
>         if (ring->use_doorbell) {
>                 DRM_DEBUG("Using doorbell -- "
>                                 "wptr_offs == 0x%08x "
> -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
> +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>                                 ring->wptr_offs,
>                                 lower_32_bits(ring->wptr << 2),
>                                 upper_32_bits(ring->wptr << 2));
> @@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
>                 WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
> mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
>
>                 if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use
> register write for wptr */
> -                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
> -                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
> +                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
> +                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
>                 }
>
>                 doorbell = RREG32_SOC15_IP(GC,
> sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
> --
> 2.25.1
>
> On Mon, Apr 25, 2022 at 8:33 PM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
> >
> > Am 25.04.22 um 14:19 schrieb Haohui Mai:
> > > Dropped the changes of older generations.
> > >
> > > Signed-off-by: Haohui Mai <ricetons@gmail.com>
> >
> > Please update the commit messages to include all the background we just
> > discussed.
> >
> > With that done the series is Reviewed-by: Christian König
> > <christian.koenig@amd.com>
> >
> > > ---
> > >   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
> > >   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++----
> > >   drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++----
> > >   3 files changed, 10 insertions(+), 10 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > > b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > > index d7e8f7232364..ff86c43b63d1 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > > @@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct
> > > amdgpu_ring *ring)
> > >
> > >                  DRM_DEBUG("Using doorbell -- "
> > >                                  "wptr_offs == 0x%08x "
> > > -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> > > -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> > > +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
> > > +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> > >                                  ring->wptr_offs,
> > >                                  lower_32_bits(ring->wptr << 2),
> > >                                  upper_32_bits(ring->wptr << 2));
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > > b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > > index a8d49c005f73..627eb1f147c2 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > > @@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct
> > > amdgpu_ring *ring)
> > >          if (ring->use_doorbell) {
> > >                  DRM_DEBUG("Using doorbell -- "
> > >                                  "wptr_offs == 0x%08x "
> > > -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> > > -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> > > +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
> > > +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> > >                                  ring->wptr_offs,
> > >                                  lower_32_bits(ring->wptr << 2),
> > >                                  upper_32_bits(ring->wptr << 2));
> > > @@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
> > >
> > >                  if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use
> > > register write for wptr */
> > >                          WREG32(sdma_v5_0_get_reg_offset(adev, i,
> > > mmSDMA0_GFX_RB_WPTR),
> > > -                              lower_32_bits(ring->wptr) << 2);
> > > +                              lower_32_bits(ring->wptr << 2));
> > >                          WREG32(sdma_v5_0_get_reg_offset(adev, i,
> > > mmSDMA0_GFX_RB_WPTR_HI),
> > > -                              upper_32_bits(ring->wptr) << 2);
> > > +                              upper_32_bits(ring->wptr << 2));
> > >                  }
> > >
> > >                  doorbell = RREG32_SOC15_IP(GC,
> > > sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > > b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > > index 824eace69884..a5eb82bfeaa8 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > > @@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct
> > > amdgpu_ring *ring)
> > >          if (ring->use_doorbell) {
> > >                  DRM_DEBUG("Using doorbell -- "
> > >                                  "wptr_offs == 0x%08x "
> > > -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> > > -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> > > +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
> > > +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> > >                                  ring->wptr_offs,
> > >                                  lower_32_bits(ring->wptr << 2),
> > >                                  upper_32_bits(ring->wptr << 2));
> > > @@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> > >                  WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
> > > mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
> > >
> > >                  if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use
> > > register write for wptr */
> > > -                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> > > mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
> > > -                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> > > mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
> > > +                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> > > mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
> > > +                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> > > mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
> > >                  }
> > >
> > >                  doorbell = RREG32_SOC15_IP(GC,
> > > sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
> > > --
> > > 2.25.1
> > >
> > > On Mon, Apr 25, 2022 at 7:52 PM Christian König
> > > <ckoenig.leichtzumerken@gmail.com> wrote:
> > >> Am 25.04.22 um 13:47 schrieb Haohui Mai:
> > >>> Updated the commit messages based on the previous discussion.
> > >> Please drop all the changes for pre SDMA v4 hardware (e.g. the ones with
> > >> only a 32bit register), so that we only have the changes for the 64bit
> > >> hw versions in here.
> > >>
> > >> Apart from that looks good to me.
> > >>
> > >> Thanks,
> > >> Christian.
> > >>
> > >>> Signed-off-by: Haohui Mai <ricetons@gmail.com>
> > >>> ---
> > >>>    drivers/gpu/drm/amd/amdgpu/cik_sdma.c  | 4 ++--
> > >>>    drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++--
> > >>>    drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 8 ++++----
> > >>>    drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
> > >>>    drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++----
> > >>>    drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++----
> > >>>    drivers/gpu/drm/amd/amdgpu/si_dma.c    | 4 ++--
> > >>>    7 files changed, 20 insertions(+), 20 deletions(-)
> > >>>
> > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> > >>> b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> > >>> index c8ebd108548d..df863d346995 100644
> > >>> --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> > >>> +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> > >>> @@ -195,7 +195,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
> > >>>     struct amdgpu_device *adev = ring->adev;
> > >>>
> > >>>     WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> > >>> -        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
> > >>> +        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
> > >>>    }
> > >>>
> > >>>    static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> > >>> @@ -487,7 +487,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
> > >>>     WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
> > >>>
> > >>>     ring->wptr = 0;
> > >>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
> > >>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
> > >>>
> > >>>     /* enable DMA RB */
> > >>>     WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i],
> > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> > >>> b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> > >>> index 1d8bbcbd7a37..b83fd00466fe 100644
> > >>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> > >>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> > >>> @@ -223,7 +223,7 @@ static void sdma_v2_4_ring_set_wptr(struct
> > >>> amdgpu_ring *ring)
> > >>>    {
> > >>>     struct amdgpu_device *adev = ring->adev;
> > >>>
> > >>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> > >>> lower_32_bits(ring->wptr) << 2);
> > >>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> > >>> lower_32_bits(ring->wptr << 2));
> > >>>    }
> > >>>
> > >>>    static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> > >>> @@ -465,7 +465,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
> > >>>     WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
> > >>>
> > >>>     ring->wptr = 0;
> > >>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
> > >>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
> > >>>
> > >>>     /* enable DMA RB */
> > >>>     rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
> > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> > >>> b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> > >>> index 4ef4feff5649..557a7d5174b0 100644
> > >>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> > >>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> > >>> @@ -389,14 +389,14 @@ static void sdma_v3_0_ring_set_wptr(struct
> > >>> amdgpu_ring *ring)
> > >>>     if (ring->use_doorbell) {
> > >>>     u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
> > >>>     /* XXX check if swapping is necessary on BE */
> > >>> - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
> > >>> - WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2);
> > >>> + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
> > >>> + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr << 2));
> > >>>     } else if (ring->use_pollmem) {
> > >>>     u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
> > >>>
> > >>> - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
> > >>> + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
> > >>>     } else {
> > >>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> > >>> lower_32_bits(ring->wptr) << 2);
> > >>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> > >>> lower_32_bits(ring->wptr << 2));
> > >>>     }
> > >>>    }
> > >>>
> > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > >>> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > >>> index d7e8f7232364..ff86c43b63d1 100644
> > >>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > >>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > >>> @@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct
> > >>> amdgpu_ring *ring)
> > >>>
> > >>>     DRM_DEBUG("Using doorbell -- "
> > >>>     "wptr_offs == 0x%08x "
> > >>> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> > >>> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> > >>> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
> > >>> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> > >>>     ring->wptr_offs,
> > >>>     lower_32_bits(ring->wptr << 2),
> > >>>     upper_32_bits(ring->wptr << 2));
> > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > >>> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > >>> index a8d49c005f73..627eb1f147c2 100644
> > >>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > >>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > >>> @@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct
> > >>> amdgpu_ring *ring)
> > >>>     if (ring->use_doorbell) {
> > >>>     DRM_DEBUG("Using doorbell -- "
> > >>>     "wptr_offs == 0x%08x "
> > >>> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> > >>> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> > >>> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
> > >>> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> > >>>     ring->wptr_offs,
> > >>>     lower_32_bits(ring->wptr << 2),
> > >>>     upper_32_bits(ring->wptr << 2));
> > >>> @@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
> > >>>
> > >>>     if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
> > >>> for wptr */
> > >>>     WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
> > >>> -        lower_32_bits(ring->wptr) << 2);
> > >>> +        lower_32_bits(ring->wptr << 2));
> > >>>     WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
> > >>> -        upper_32_bits(ring->wptr) << 2);
> > >>> +        upper_32_bits(ring->wptr << 2));
> > >>>     }
> > >>>
> > >>>     doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
> > >>> mmSDMA0_GFX_DOORBELL));
> > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > >>> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > >>> index 824eace69884..a5eb82bfeaa8 100644
> > >>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > >>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > >>> @@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct
> > >>> amdgpu_ring *ring)
> > >>>     if (ring->use_doorbell) {
> > >>>     DRM_DEBUG("Using doorbell -- "
> > >>>     "wptr_offs == 0x%08x "
> > >>> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> > >>> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> > >>> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
> > >>> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> > >>>     ring->wptr_offs,
> > >>>     lower_32_bits(ring->wptr << 2),
> > >>>     upper_32_bits(ring->wptr << 2));
> > >>> @@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> > >>>     WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
> > >>> mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
> > >>>
> > >>>     if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
> > >>> for wptr */
> > >>> - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
> > >>> lower_32_bits(ring->wptr) << 2);
> > >>> - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
> > >>> upper_32_bits(ring->wptr) << 2);
> > >>> + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
> > >>> lower_32_bits(ring->wptr << 2));
> > >>> + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
> > >>> upper_32_bits(ring->wptr << 2));
> > >>>     }
> > >>>
> > >>>     doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
> > >>> mmSDMA0_GFX_DOORBELL));
> > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c
> > >>> b/drivers/gpu/drm/amd/amdgpu/si_dma.c
> > >>> index 195b45bcb8ad..0af11d3b00e7 100644
> > >>> --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
> > >>> +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
> > >>> @@ -57,7 +57,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
> > >>>     u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
> > >>>
> > >>>     WREG32(DMA_RB_WPTR + sdma_offsets[me],
> > >>> -        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
> > >>> +        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
> > >>>    }
> > >>>
> > >>>    static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
> > >>> @@ -175,7 +175,7 @@ static int si_dma_start(struct amdgpu_device *adev)
> > >>>     WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl);
> > >>>
> > >>>     ring->wptr = 0;
> > >>> - WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
> > >>> + WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
> > >>>     WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
> > >>>
> > >>>     ring->sched.ready = true;
> > >>> --
> > >>> 2.25.1
> >

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] Fix incorrect calculations of the wptr of the doorbells
  2022-04-25 12:44         ` Haohui Mai
@ 2022-04-25 16:03           ` Christian König
  2022-04-25 17:55             ` Alex Deucher
  0 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2022-04-25 16:03 UTC (permalink / raw)
  To: Haohui Mai; +Cc: emily.deng, amd-gfx

Alex is usually picking up patches like this one here from the mailing list.

Feel free to add a Reviewed-by: Christian König 
<christian.koenig@amd.com> to the series.

Thanks for the help,
Christian.

Am 25.04.22 um 14:44 schrieb Haohui Mai:
> Your prompt reviews are highly appreciated. Thanks.
>
> A little bit off-topic -- I'm not too familiar with the whole process.
> Just wondering, what else needs to be done in order to ensure the
> patches get picked up in the next available merge window?
>
> Thanks,
> Haohui
>
> On Mon, Apr 25, 2022 at 8:41 PM Haohui Mai <ricetons@gmail.com> wrote:
>> This patch fixes the issue where the driver miscomputes the 64-bit
>> values of the wptr of the SDMA doorbell when initializing the
>> hardware. SDMA engines v4 and later on have full 64-bit registers for
>> wptr thus they should be set properly.
>>
>> Older generation hardwares like CIK / SI have only 16 / 20 / 24bits
>> for the WPTR, where the calls of lower_32_bits() will be removed in a
>> following patch.
>>
>> Signed-off-by: Haohui Mai <ricetons@gmail.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
>>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++----
>>   drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++----
>>   3 files changed, 10 insertions(+), 10 deletions(-)
>>
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>> index d7e8f7232364..ff86c43b63d1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>> @@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct
>> amdgpu_ring *ring)
>>
>>                  DRM_DEBUG("Using doorbell -- "
>>                                  "wptr_offs == 0x%08x "
>> -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
>> -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
>> +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
>> +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>>                                  ring->wptr_offs,
>>                                  lower_32_bits(ring->wptr << 2),
>>                                  upper_32_bits(ring->wptr << 2));
>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>> index a8d49c005f73..627eb1f147c2 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>> @@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct
>> amdgpu_ring *ring)
>>          if (ring->use_doorbell) {
>>                  DRM_DEBUG("Using doorbell -- "
>>                                  "wptr_offs == 0x%08x "
>> -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
>> -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
>> +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
>> +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>>                                  ring->wptr_offs,
>>                                  lower_32_bits(ring->wptr << 2),
>>                                  upper_32_bits(ring->wptr << 2));
>> @@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
>>
>>                  if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use
>> register write for wptr */
>>                          WREG32(sdma_v5_0_get_reg_offset(adev, i,
>> mmSDMA0_GFX_RB_WPTR),
>> -                              lower_32_bits(ring->wptr) << 2);
>> +                              lower_32_bits(ring->wptr << 2));
>>                          WREG32(sdma_v5_0_get_reg_offset(adev, i,
>> mmSDMA0_GFX_RB_WPTR_HI),
>> -                              upper_32_bits(ring->wptr) << 2);
>> +                              upper_32_bits(ring->wptr << 2));
>>                  }
>>
>>                  doorbell = RREG32_SOC15_IP(GC,
>> sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
>> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
>> index 824eace69884..a5eb82bfeaa8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
>> @@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct
>> amdgpu_ring *ring)
>>          if (ring->use_doorbell) {
>>                  DRM_DEBUG("Using doorbell -- "
>>                                  "wptr_offs == 0x%08x "
>> -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
>> -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
>> +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
>> +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>>                                  ring->wptr_offs,
>>                                  lower_32_bits(ring->wptr << 2),
>>                                  upper_32_bits(ring->wptr << 2));
>> @@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
>>                  WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
>> mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
>>
>>                  if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use
>> register write for wptr */
>> -                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
>> mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
>> -                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
>> mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
>> +                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
>> mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
>> +                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
>> mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
>>                  }
>>
>>                  doorbell = RREG32_SOC15_IP(GC,
>> sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
>> --
>> 2.25.1
>>
>> On Mon, Apr 25, 2022 at 8:33 PM Christian König
>> <ckoenig.leichtzumerken@gmail.com> wrote:
>>> Am 25.04.22 um 14:19 schrieb Haohui Mai:
>>>> Dropped the changes of older generations.
>>>>
>>>> Signed-off-by: Haohui Mai <ricetons@gmail.com>
>>> Please update the commit messages to include all the background we just
>>> discussed.
>>>
>>> With that done the series is Reviewed-by: Christian König
>>> <christian.koenig@amd.com>
>>>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
>>>>    drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++----
>>>>    drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++----
>>>>    3 files changed, 10 insertions(+), 10 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>>> index d7e8f7232364..ff86c43b63d1 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>>> @@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct
>>>> amdgpu_ring *ring)
>>>>
>>>>                   DRM_DEBUG("Using doorbell -- "
>>>>                                   "wptr_offs == 0x%08x "
>>>> -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
>>>> -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
>>>> +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
>>>> +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>>>>                                   ring->wptr_offs,
>>>>                                   lower_32_bits(ring->wptr << 2),
>>>>                                   upper_32_bits(ring->wptr << 2));
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>>> index a8d49c005f73..627eb1f147c2 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>>> @@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct
>>>> amdgpu_ring *ring)
>>>>           if (ring->use_doorbell) {
>>>>                   DRM_DEBUG("Using doorbell -- "
>>>>                                   "wptr_offs == 0x%08x "
>>>> -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
>>>> -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
>>>> +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
>>>> +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>>>>                                   ring->wptr_offs,
>>>>                                   lower_32_bits(ring->wptr << 2),
>>>>                                   upper_32_bits(ring->wptr << 2));
>>>> @@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
>>>>
>>>>                   if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use
>>>> register write for wptr */
>>>>                           WREG32(sdma_v5_0_get_reg_offset(adev, i,
>>>> mmSDMA0_GFX_RB_WPTR),
>>>> -                              lower_32_bits(ring->wptr) << 2);
>>>> +                              lower_32_bits(ring->wptr << 2));
>>>>                           WREG32(sdma_v5_0_get_reg_offset(adev, i,
>>>> mmSDMA0_GFX_RB_WPTR_HI),
>>>> -                              upper_32_bits(ring->wptr) << 2);
>>>> +                              upper_32_bits(ring->wptr << 2));
>>>>                   }
>>>>
>>>>                   doorbell = RREG32_SOC15_IP(GC,
>>>> sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
>>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
>>>> index 824eace69884..a5eb82bfeaa8 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
>>>> @@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct
>>>> amdgpu_ring *ring)
>>>>           if (ring->use_doorbell) {
>>>>                   DRM_DEBUG("Using doorbell -- "
>>>>                                   "wptr_offs == 0x%08x "
>>>> -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
>>>> -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
>>>> +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
>>>> +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>>>>                                   ring->wptr_offs,
>>>>                                   lower_32_bits(ring->wptr << 2),
>>>>                                   upper_32_bits(ring->wptr << 2));
>>>> @@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
>>>>                   WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
>>>> mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
>>>>
>>>>                   if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use
>>>> register write for wptr */
>>>> -                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
>>>> mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
>>>> -                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
>>>> mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
>>>> +                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
>>>> mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
>>>> +                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
>>>> mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
>>>>                   }
>>>>
>>>>                   doorbell = RREG32_SOC15_IP(GC,
>>>> sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
>>>> --
>>>> 2.25.1
>>>>
>>>> On Mon, Apr 25, 2022 at 7:52 PM Christian König
>>>> <ckoenig.leichtzumerken@gmail.com> wrote:
>>>>> Am 25.04.22 um 13:47 schrieb Haohui Mai:
>>>>>> Updated the commit messages based on the previous discussion.
>>>>> Please drop all the changes for pre SDMA v4 hardware (e.g. the ones with
>>>>> only a 32bit register), so that we only have the changes for the 64bit
>>>>> hw versions in here.
>>>>>
>>>>> Apart from that looks good to me.
>>>>>
>>>>> Thanks,
>>>>> Christian.
>>>>>
>>>>>> Signed-off-by: Haohui Mai <ricetons@gmail.com>
>>>>>> ---
>>>>>>     drivers/gpu/drm/amd/amdgpu/cik_sdma.c  | 4 ++--
>>>>>>     drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++--
>>>>>>     drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 8 ++++----
>>>>>>     drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
>>>>>>     drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++----
>>>>>>     drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++----
>>>>>>     drivers/gpu/drm/amd/amdgpu/si_dma.c    | 4 ++--
>>>>>>     7 files changed, 20 insertions(+), 20 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>>>>> index c8ebd108548d..df863d346995 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>>>>> @@ -195,7 +195,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
>>>>>>      struct amdgpu_device *adev = ring->adev;
>>>>>>
>>>>>>      WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
>>>>>> -        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
>>>>>> +        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
>>>>>>     }
>>>>>>
>>>>>>     static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
>>>>>> @@ -487,7 +487,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
>>>>>>      WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
>>>>>>
>>>>>>      ring->wptr = 0;
>>>>>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
>>>>>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
>>>>>>
>>>>>>      /* enable DMA RB */
>>>>>>      WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i],
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>>>>> index 1d8bbcbd7a37..b83fd00466fe 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>>>>> @@ -223,7 +223,7 @@ static void sdma_v2_4_ring_set_wptr(struct
>>>>>> amdgpu_ring *ring)
>>>>>>     {
>>>>>>      struct amdgpu_device *adev = ring->adev;
>>>>>>
>>>>>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
>>>>>> lower_32_bits(ring->wptr) << 2);
>>>>>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
>>>>>> lower_32_bits(ring->wptr << 2));
>>>>>>     }
>>>>>>
>>>>>>     static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
>>>>>> @@ -465,7 +465,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
>>>>>>      WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
>>>>>>
>>>>>>      ring->wptr = 0;
>>>>>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
>>>>>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
>>>>>>
>>>>>>      /* enable DMA RB */
>>>>>>      rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>>>>> index 4ef4feff5649..557a7d5174b0 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>>>>> @@ -389,14 +389,14 @@ static void sdma_v3_0_ring_set_wptr(struct
>>>>>> amdgpu_ring *ring)
>>>>>>      if (ring->use_doorbell) {
>>>>>>      u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
>>>>>>      /* XXX check if swapping is necessary on BE */
>>>>>> - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
>>>>>> - WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2);
>>>>>> + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
>>>>>> + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr << 2));
>>>>>>      } else if (ring->use_pollmem) {
>>>>>>      u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
>>>>>>
>>>>>> - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
>>>>>> + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
>>>>>>      } else {
>>>>>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
>>>>>> lower_32_bits(ring->wptr) << 2);
>>>>>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
>>>>>> lower_32_bits(ring->wptr << 2));
>>>>>>      }
>>>>>>     }
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>>>>> index d7e8f7232364..ff86c43b63d1 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>>>>> @@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct
>>>>>> amdgpu_ring *ring)
>>>>>>
>>>>>>      DRM_DEBUG("Using doorbell -- "
>>>>>>      "wptr_offs == 0x%08x "
>>>>>> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
>>>>>> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
>>>>>> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
>>>>>> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>>>>>>      ring->wptr_offs,
>>>>>>      lower_32_bits(ring->wptr << 2),
>>>>>>      upper_32_bits(ring->wptr << 2));
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>>>>> index a8d49c005f73..627eb1f147c2 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>>>>> @@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct
>>>>>> amdgpu_ring *ring)
>>>>>>      if (ring->use_doorbell) {
>>>>>>      DRM_DEBUG("Using doorbell -- "
>>>>>>      "wptr_offs == 0x%08x "
>>>>>> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
>>>>>> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
>>>>>> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
>>>>>> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>>>>>>      ring->wptr_offs,
>>>>>>      lower_32_bits(ring->wptr << 2),
>>>>>>      upper_32_bits(ring->wptr << 2));
>>>>>> @@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
>>>>>>
>>>>>>      if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
>>>>>> for wptr */
>>>>>>      WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
>>>>>> -        lower_32_bits(ring->wptr) << 2);
>>>>>> +        lower_32_bits(ring->wptr << 2));
>>>>>>      WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
>>>>>> -        upper_32_bits(ring->wptr) << 2);
>>>>>> +        upper_32_bits(ring->wptr << 2));
>>>>>>      }
>>>>>>
>>>>>>      doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
>>>>>> mmSDMA0_GFX_DOORBELL));
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
>>>>>> index 824eace69884..a5eb82bfeaa8 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
>>>>>> @@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct
>>>>>> amdgpu_ring *ring)
>>>>>>      if (ring->use_doorbell) {
>>>>>>      DRM_DEBUG("Using doorbell -- "
>>>>>>      "wptr_offs == 0x%08x "
>>>>>> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
>>>>>> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
>>>>>> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
>>>>>> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>>>>>>      ring->wptr_offs,
>>>>>>      lower_32_bits(ring->wptr << 2),
>>>>>>      upper_32_bits(ring->wptr << 2));
>>>>>> @@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
>>>>>>      WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
>>>>>> mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
>>>>>>
>>>>>>      if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
>>>>>> for wptr */
>>>>>> - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
>>>>>> lower_32_bits(ring->wptr) << 2);
>>>>>> - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
>>>>>> upper_32_bits(ring->wptr) << 2);
>>>>>> + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
>>>>>> lower_32_bits(ring->wptr << 2));
>>>>>> + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
>>>>>> upper_32_bits(ring->wptr << 2));
>>>>>>      }
>>>>>>
>>>>>>      doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
>>>>>> mmSDMA0_GFX_DOORBELL));
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/si_dma.c
>>>>>> index 195b45bcb8ad..0af11d3b00e7 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
>>>>>> @@ -57,7 +57,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
>>>>>>      u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
>>>>>>
>>>>>>      WREG32(DMA_RB_WPTR + sdma_offsets[me],
>>>>>> -        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
>>>>>> +        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
>>>>>>     }
>>>>>>
>>>>>>     static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
>>>>>> @@ -175,7 +175,7 @@ static int si_dma_start(struct amdgpu_device *adev)
>>>>>>      WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl);
>>>>>>
>>>>>>      ring->wptr = 0;
>>>>>> - WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
>>>>>> + WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
>>>>>>      WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
>>>>>>
>>>>>>      ring->sched.ready = true;
>>>>>> --
>>>>>> 2.25.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] Fix incorrect calculations of the wptr of the doorbells
  2022-04-25 16:03           ` Christian König
@ 2022-04-25 17:55             ` Alex Deucher
  0 siblings, 0 replies; 8+ messages in thread
From: Alex Deucher @ 2022-04-25 17:55 UTC (permalink / raw)
  To: Christian König; +Cc: Emily Deng, Haohui Mai, amd-gfx list

I applied the patches, but I had to manually munge them to make them
apply since the formatting was all messed up.  Please use git
send-email in the future.

Alex

On Mon, Apr 25, 2022 at 12:03 PM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Alex is usually picking up patches like this one here from the mailing list.
>
> Feel free to add a Reviewed-by: Christian König
> <christian.koenig@amd.com> to the series.
>
> Thanks for the help,
> Christian.
>
> Am 25.04.22 um 14:44 schrieb Haohui Mai:
> > Your prompt reviews are highly appreciated. Thanks.
> >
> > A little bit off-topic -- I'm not too familiar with the whole process.
> > Just wondering, what else needs to be done in order to ensure the
> > patches get picked up in the next available merge window?
> >
> > Thanks,
> > Haohui
> >
> > On Mon, Apr 25, 2022 at 8:41 PM Haohui Mai <ricetons@gmail.com> wrote:
> >> This patch fixes the issue where the driver miscomputes the 64-bit
> >> values of the wptr of the SDMA doorbell when initializing the
> >> hardware. SDMA engines v4 and later on have full 64-bit registers for
> >> wptr thus they should be set properly.
> >>
> >> Older generation hardwares like CIK / SI have only 16 / 20 / 24bits
> >> for the WPTR, where the calls of lower_32_bits() will be removed in a
> >> following patch.
> >>
> >> Signed-off-by: Haohui Mai <ricetons@gmail.com>
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
> >>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++----
> >>   drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++----
> >>   3 files changed, 10 insertions(+), 10 deletions(-)
> >>
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> >> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> >> index d7e8f7232364..ff86c43b63d1 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> >> @@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct
> >> amdgpu_ring *ring)
> >>
> >>                  DRM_DEBUG("Using doorbell -- "
> >>                                  "wptr_offs == 0x%08x "
> >> -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> >> -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> >> +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
> >> +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> >>                                  ring->wptr_offs,
> >>                                  lower_32_bits(ring->wptr << 2),
> >>                                  upper_32_bits(ring->wptr << 2));
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> >> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> >> index a8d49c005f73..627eb1f147c2 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> >> @@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct
> >> amdgpu_ring *ring)
> >>          if (ring->use_doorbell) {
> >>                  DRM_DEBUG("Using doorbell -- "
> >>                                  "wptr_offs == 0x%08x "
> >> -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> >> -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> >> +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
> >> +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> >>                                  ring->wptr_offs,
> >>                                  lower_32_bits(ring->wptr << 2),
> >>                                  upper_32_bits(ring->wptr << 2));
> >> @@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
> >>
> >>                  if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use
> >> register write for wptr */
> >>                          WREG32(sdma_v5_0_get_reg_offset(adev, i,
> >> mmSDMA0_GFX_RB_WPTR),
> >> -                              lower_32_bits(ring->wptr) << 2);
> >> +                              lower_32_bits(ring->wptr << 2));
> >>                          WREG32(sdma_v5_0_get_reg_offset(adev, i,
> >> mmSDMA0_GFX_RB_WPTR_HI),
> >> -                              upper_32_bits(ring->wptr) << 2);
> >> +                              upper_32_bits(ring->wptr << 2));
> >>                  }
> >>
> >>                  doorbell = RREG32_SOC15_IP(GC,
> >> sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> >> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> >> index 824eace69884..a5eb82bfeaa8 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> >> @@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct
> >> amdgpu_ring *ring)
> >>          if (ring->use_doorbell) {
> >>                  DRM_DEBUG("Using doorbell -- "
> >>                                  "wptr_offs == 0x%08x "
> >> -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> >> -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> >> +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
> >> +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> >>                                  ring->wptr_offs,
> >>                                  lower_32_bits(ring->wptr << 2),
> >>                                  upper_32_bits(ring->wptr << 2));
> >> @@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> >>                  WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
> >> mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
> >>
> >>                  if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use
> >> register write for wptr */
> >> -                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> >> mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
> >> -                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> >> mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
> >> +                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> >> mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
> >> +                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> >> mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
> >>                  }
> >>
> >>                  doorbell = RREG32_SOC15_IP(GC,
> >> sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
> >> --
> >> 2.25.1
> >>
> >> On Mon, Apr 25, 2022 at 8:33 PM Christian König
> >> <ckoenig.leichtzumerken@gmail.com> wrote:
> >>> Am 25.04.22 um 14:19 schrieb Haohui Mai:
> >>>> Dropped the changes of older generations.
> >>>>
> >>>> Signed-off-by: Haohui Mai <ricetons@gmail.com>
> >>> Please update the commit messages to include all the background we just
> >>> discussed.
> >>>
> >>> With that done the series is Reviewed-by: Christian König
> >>> <christian.koenig@amd.com>
> >>>
> >>>> ---
> >>>>    drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
> >>>>    drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++----
> >>>>    drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++----
> >>>>    3 files changed, 10 insertions(+), 10 deletions(-)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> >>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> >>>> index d7e8f7232364..ff86c43b63d1 100644
> >>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> >>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> >>>> @@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct
> >>>> amdgpu_ring *ring)
> >>>>
> >>>>                   DRM_DEBUG("Using doorbell -- "
> >>>>                                   "wptr_offs == 0x%08x "
> >>>> -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> >>>> -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> >>>> +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
> >>>> +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> >>>>                                   ring->wptr_offs,
> >>>>                                   lower_32_bits(ring->wptr << 2),
> >>>>                                   upper_32_bits(ring->wptr << 2));
> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> >>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> >>>> index a8d49c005f73..627eb1f147c2 100644
> >>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> >>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> >>>> @@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct
> >>>> amdgpu_ring *ring)
> >>>>           if (ring->use_doorbell) {
> >>>>                   DRM_DEBUG("Using doorbell -- "
> >>>>                                   "wptr_offs == 0x%08x "
> >>>> -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> >>>> -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> >>>> +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
> >>>> +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> >>>>                                   ring->wptr_offs,
> >>>>                                   lower_32_bits(ring->wptr << 2),
> >>>>                                   upper_32_bits(ring->wptr << 2));
> >>>> @@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
> >>>>
> >>>>                   if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use
> >>>> register write for wptr */
> >>>>                           WREG32(sdma_v5_0_get_reg_offset(adev, i,
> >>>> mmSDMA0_GFX_RB_WPTR),
> >>>> -                              lower_32_bits(ring->wptr) << 2);
> >>>> +                              lower_32_bits(ring->wptr << 2));
> >>>>                           WREG32(sdma_v5_0_get_reg_offset(adev, i,
> >>>> mmSDMA0_GFX_RB_WPTR_HI),
> >>>> -                              upper_32_bits(ring->wptr) << 2);
> >>>> +                              upper_32_bits(ring->wptr << 2));
> >>>>                   }
> >>>>
> >>>>                   doorbell = RREG32_SOC15_IP(GC,
> >>>> sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> >>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> >>>> index 824eace69884..a5eb82bfeaa8 100644
> >>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> >>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> >>>> @@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct
> >>>> amdgpu_ring *ring)
> >>>>           if (ring->use_doorbell) {
> >>>>                   DRM_DEBUG("Using doorbell -- "
> >>>>                                   "wptr_offs == 0x%08x "
> >>>> -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> >>>> -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> >>>> +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
> >>>> +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> >>>>                                   ring->wptr_offs,
> >>>>                                   lower_32_bits(ring->wptr << 2),
> >>>>                                   upper_32_bits(ring->wptr << 2));
> >>>> @@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> >>>>                   WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
> >>>> mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
> >>>>
> >>>>                   if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use
> >>>> register write for wptr */
> >>>> -                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> >>>> mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
> >>>> -                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> >>>> mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
> >>>> +                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> >>>> mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
> >>>> +                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> >>>> mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
> >>>>                   }
> >>>>
> >>>>                   doorbell = RREG32_SOC15_IP(GC,
> >>>> sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
> >>>> --
> >>>> 2.25.1
> >>>>
> >>>> On Mon, Apr 25, 2022 at 7:52 PM Christian König
> >>>> <ckoenig.leichtzumerken@gmail.com> wrote:
> >>>>> Am 25.04.22 um 13:47 schrieb Haohui Mai:
> >>>>>> Updated the commit messages based on the previous discussion.
> >>>>> Please drop all the changes for pre SDMA v4 hardware (e.g. the ones with
> >>>>> only a 32bit register), so that we only have the changes for the 64bit
> >>>>> hw versions in here.
> >>>>>
> >>>>> Apart from that looks good to me.
> >>>>>
> >>>>> Thanks,
> >>>>> Christian.
> >>>>>
> >>>>>> Signed-off-by: Haohui Mai <ricetons@gmail.com>
> >>>>>> ---
> >>>>>>     drivers/gpu/drm/amd/amdgpu/cik_sdma.c  | 4 ++--
> >>>>>>     drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++--
> >>>>>>     drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 8 ++++----
> >>>>>>     drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
> >>>>>>     drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++----
> >>>>>>     drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++----
> >>>>>>     drivers/gpu/drm/amd/amdgpu/si_dma.c    | 4 ++--
> >>>>>>     7 files changed, 20 insertions(+), 20 deletions(-)
> >>>>>>
> >>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> >>>>>> b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> >>>>>> index c8ebd108548d..df863d346995 100644
> >>>>>> --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> >>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> >>>>>> @@ -195,7 +195,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
> >>>>>>      struct amdgpu_device *adev = ring->adev;
> >>>>>>
> >>>>>>      WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> >>>>>> -        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
> >>>>>> +        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
> >>>>>>     }
> >>>>>>
> >>>>>>     static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> >>>>>> @@ -487,7 +487,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
> >>>>>>      WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
> >>>>>>
> >>>>>>      ring->wptr = 0;
> >>>>>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
> >>>>>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
> >>>>>>
> >>>>>>      /* enable DMA RB */
> >>>>>>      WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i],
> >>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> >>>>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> >>>>>> index 1d8bbcbd7a37..b83fd00466fe 100644
> >>>>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> >>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> >>>>>> @@ -223,7 +223,7 @@ static void sdma_v2_4_ring_set_wptr(struct
> >>>>>> amdgpu_ring *ring)
> >>>>>>     {
> >>>>>>      struct amdgpu_device *adev = ring->adev;
> >>>>>>
> >>>>>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> >>>>>> lower_32_bits(ring->wptr) << 2);
> >>>>>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> >>>>>> lower_32_bits(ring->wptr << 2));
> >>>>>>     }
> >>>>>>
> >>>>>>     static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> >>>>>> @@ -465,7 +465,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
> >>>>>>      WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
> >>>>>>
> >>>>>>      ring->wptr = 0;
> >>>>>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
> >>>>>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
> >>>>>>
> >>>>>>      /* enable DMA RB */
> >>>>>>      rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
> >>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> >>>>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> >>>>>> index 4ef4feff5649..557a7d5174b0 100644
> >>>>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> >>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> >>>>>> @@ -389,14 +389,14 @@ static void sdma_v3_0_ring_set_wptr(struct
> >>>>>> amdgpu_ring *ring)
> >>>>>>      if (ring->use_doorbell) {
> >>>>>>      u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
> >>>>>>      /* XXX check if swapping is necessary on BE */
> >>>>>> - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
> >>>>>> - WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2);
> >>>>>> + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
> >>>>>> + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr << 2));
> >>>>>>      } else if (ring->use_pollmem) {
> >>>>>>      u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
> >>>>>>
> >>>>>> - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
> >>>>>> + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
> >>>>>>      } else {
> >>>>>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> >>>>>> lower_32_bits(ring->wptr) << 2);
> >>>>>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> >>>>>> lower_32_bits(ring->wptr << 2));
> >>>>>>      }
> >>>>>>     }
> >>>>>>
> >>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> >>>>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> >>>>>> index d7e8f7232364..ff86c43b63d1 100644
> >>>>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> >>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> >>>>>> @@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct
> >>>>>> amdgpu_ring *ring)
> >>>>>>
> >>>>>>      DRM_DEBUG("Using doorbell -- "
> >>>>>>      "wptr_offs == 0x%08x "
> >>>>>> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> >>>>>> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> >>>>>> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
> >>>>>> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> >>>>>>      ring->wptr_offs,
> >>>>>>      lower_32_bits(ring->wptr << 2),
> >>>>>>      upper_32_bits(ring->wptr << 2));
> >>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> >>>>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> >>>>>> index a8d49c005f73..627eb1f147c2 100644
> >>>>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> >>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> >>>>>> @@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct
> >>>>>> amdgpu_ring *ring)
> >>>>>>      if (ring->use_doorbell) {
> >>>>>>      DRM_DEBUG("Using doorbell -- "
> >>>>>>      "wptr_offs == 0x%08x "
> >>>>>> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> >>>>>> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> >>>>>> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
> >>>>>> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> >>>>>>      ring->wptr_offs,
> >>>>>>      lower_32_bits(ring->wptr << 2),
> >>>>>>      upper_32_bits(ring->wptr << 2));
> >>>>>> @@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
> >>>>>>
> >>>>>>      if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
> >>>>>> for wptr */
> >>>>>>      WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
> >>>>>> -        lower_32_bits(ring->wptr) << 2);
> >>>>>> +        lower_32_bits(ring->wptr << 2));
> >>>>>>      WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
> >>>>>> -        upper_32_bits(ring->wptr) << 2);
> >>>>>> +        upper_32_bits(ring->wptr << 2));
> >>>>>>      }
> >>>>>>
> >>>>>>      doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
> >>>>>> mmSDMA0_GFX_DOORBELL));
> >>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> >>>>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> >>>>>> index 824eace69884..a5eb82bfeaa8 100644
> >>>>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> >>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> >>>>>> @@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct
> >>>>>> amdgpu_ring *ring)
> >>>>>>      if (ring->use_doorbell) {
> >>>>>>      DRM_DEBUG("Using doorbell -- "
> >>>>>>      "wptr_offs == 0x%08x "
> >>>>>> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> >>>>>> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> >>>>>> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
> >>>>>> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> >>>>>>      ring->wptr_offs,
> >>>>>>      lower_32_bits(ring->wptr << 2),
> >>>>>>      upper_32_bits(ring->wptr << 2));
> >>>>>> @@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> >>>>>>      WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
> >>>>>> mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
> >>>>>>
> >>>>>>      if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
> >>>>>> for wptr */
> >>>>>> - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
> >>>>>> lower_32_bits(ring->wptr) << 2);
> >>>>>> - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
> >>>>>> upper_32_bits(ring->wptr) << 2);
> >>>>>> + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
> >>>>>> lower_32_bits(ring->wptr << 2));
> >>>>>> + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
> >>>>>> upper_32_bits(ring->wptr << 2));
> >>>>>>      }
> >>>>>>
> >>>>>>      doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
> >>>>>> mmSDMA0_GFX_DOORBELL));
> >>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c
> >>>>>> b/drivers/gpu/drm/amd/amdgpu/si_dma.c
> >>>>>> index 195b45bcb8ad..0af11d3b00e7 100644
> >>>>>> --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
> >>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
> >>>>>> @@ -57,7 +57,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
> >>>>>>      u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
> >>>>>>
> >>>>>>      WREG32(DMA_RB_WPTR + sdma_offsets[me],
> >>>>>> -        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
> >>>>>> +        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
> >>>>>>     }
> >>>>>>
> >>>>>>     static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
> >>>>>> @@ -175,7 +175,7 @@ static int si_dma_start(struct amdgpu_device *adev)
> >>>>>>      WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl);
> >>>>>>
> >>>>>>      ring->wptr = 0;
> >>>>>> - WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
> >>>>>> + WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
> >>>>>>      WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
> >>>>>>
> >>>>>>      ring->sched.ready = true;
> >>>>>> --
> >>>>>> 2.25.1
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-04-25 17:56 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-25 11:47 [PATCH 1/2] Fix incorrect calculations of the wptr of the doorbells Haohui Mai
2022-04-25 11:52 ` Christian König
2022-04-25 12:19   ` Haohui Mai
2022-04-25 12:33     ` Christian König
2022-04-25 12:41       ` Haohui Mai
2022-04-25 12:44         ` Haohui Mai
2022-04-25 16:03           ` Christian König
2022-04-25 17:55             ` Alex Deucher

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.