All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v6] drm/amdgpu: Ensure the DMA engine is deactivated during set ups
@ 2022-04-30  7:34 ricetons
  2022-05-04 22:48 ` Haohui Mai
  2022-05-05 16:33 ` Alex Deucher
  0 siblings, 2 replies; 11+ messages in thread
From: ricetons @ 2022-04-30  7:34 UTC (permalink / raw)
  To: amd-gfx
  Cc: yifan1.zhang, Guchun.Chen, ckoenig.leichtzumerken, Haohui Mai,
	lang.yu, Hawking.Zhang

From: Haohui Mai <ricetons@gmail.com>

The patch fully deactivates the DMA engine before setting up the ring
buffer to avoid potential data races and crashes.

Signed-off-by: Haohui Mai <ricetons@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 109 +++++++++++++++----------
 1 file changed, 64 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 013d2dec81d0..1fac9d8e99de 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -459,7 +459,6 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
 	}
 }
 
-
 /**
  * sdma_v5_2_gfx_stop - stop the gfx async dma engines
  *
@@ -505,17 +504,21 @@ static void sdma_v5_2_rlc_stop(struct amdgpu_device *adev)
 }
 
 /**
- * sdma_v5_2_ctx_switch_enable - stop the async dma engines context switch
+ * sdma_v5_2_ctx_switch_enable_for_instance - start the async dma engines
+ * context switch for an instance
  *
  * @adev: amdgpu_device pointer
- * @enable: enable/disable the DMA MEs context switch.
+ * @instance_idx: the index of the SDMA instance
  *
- * Halt or unhalt the async dma engines context switch.
+ * Unhalt the async dma engines context switch.
  */
-static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
+static void sdma_v5_2_ctx_switch_enable_for_instance(struct amdgpu_device *adev, int instance_idx)
 {
 	u32 f32_cntl, phase_quantum = 0;
-	int i;
+
+	if (WARN_ON(instance_idx >= adev->sdma.num_instances)) {
+		return;
+	}
 
 	if (amdgpu_sdma_phase_quantum) {
 		unsigned value = amdgpu_sdma_phase_quantum;
@@ -539,50 +542,68 @@ static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
 		phase_quantum =
 			value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
 			unit  << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
-	}
-
-	for (i = 0; i < adev->sdma.num_instances; i++) {
-		if (enable && amdgpu_sdma_phase_quantum) {
-			WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
-			       phase_quantum);
-			WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
-			       phase_quantum);
-			WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
-			       phase_quantum);
-		}
 
-		if (!amdgpu_sriov_vf(adev)) {
-			f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
-			f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
-					AUTO_CTXSW_ENABLE, enable ? 1 : 0);
-			WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
-		}
+		WREG32_SOC15_IP(GC,
+			sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE0_QUANTUM),
+			phase_quantum);
+		WREG32_SOC15_IP(GC,
+			sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE1_QUANTUM),
+		    phase_quantum);
+		WREG32_SOC15_IP(GC,
+			sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE2_QUANTUM),
+		    phase_quantum);
 	}
 
+	if (!amdgpu_sriov_vf(adev)) {
+		f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL));
+		f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+				AUTO_CTXSW_ENABLE, 1);
+		WREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL), f32_cntl);
+	}
 }
 
 /**
- * sdma_v5_2_enable - stop the async dma engines
+ * sdma_v5_2_ctx_switch_disable_all - stop the async dma engines context switch
  *
  * @adev: amdgpu_device pointer
- * @enable: enable/disable the DMA MEs.
  *
- * Halt or unhalt the async dma engines.
+ * Halt the async dma engines context switch.
  */
-static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
+static void sdma_v5_2_ctx_switch_disable_all(struct amdgpu_device *adev)
 {
 	u32 f32_cntl;
 	int i;
 
-	if (!enable) {
-		sdma_v5_2_gfx_stop(adev);
-		sdma_v5_2_rlc_stop(adev);
+	if (amdgpu_sriov_vf(adev))
+		return;
+
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
+		f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+				AUTO_CTXSW_ENABLE, 0);
+		WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
 	}
+}
+
+/**
+ * sdma_v5_2_halt - stop the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Halt the async dma engines.
+ */
+static void sdma_v5_2_halt(struct amdgpu_device *adev)
+{
+	int i;
+	u32 f32_cntl;
+
+	sdma_v5_2_gfx_stop(adev);
+	sdma_v5_2_rlc_stop(adev);
 
 	if (!amdgpu_sriov_vf(adev)) {
 		for (i = 0; i < adev->sdma.num_instances; i++) {
 			f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
-			f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
+			f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
 			WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
 		}
 	}
@@ -594,6 +615,9 @@ static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
  * @adev: amdgpu_device pointer
  *
  * Set up the gfx DMA ring buffers and enable them.
+ * It assumes that the dma engine is stopped for each instance.
+ * The function enables the engine and preemptions sequentially for each instance.
+ *
  * Returns 0 for success, error for failure.
  */
 static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
@@ -737,10 +761,7 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
 
 		ring->sched.ready = true;
 
-		if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
-			sdma_v5_2_ctx_switch_enable(adev, true);
-			sdma_v5_2_enable(adev, true);
-		}
+		sdma_v5_2_ctx_switch_enable_for_instance(adev, i);
 
 		r = amdgpu_ring_test_ring(ring);
 		if (r) {
@@ -784,7 +805,7 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)
 	int i, j;
 
 	/* halt the MEs */
-	sdma_v5_2_enable(adev, false);
+	sdma_v5_2_halt(adev);
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
 		if (!adev->sdma.instance[i].fw)
@@ -856,8 +877,8 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
 	int r = 0;
 
 	if (amdgpu_sriov_vf(adev)) {
-		sdma_v5_2_ctx_switch_enable(adev, false);
-		sdma_v5_2_enable(adev, false);
+		sdma_v5_2_ctx_switch_disable_all(adev);
+		sdma_v5_2_halt(adev);
 
 		/* set RB registers */
 		r = sdma_v5_2_gfx_resume(adev);
@@ -881,12 +902,10 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
 		amdgpu_gfx_off_ctrl(adev, false);
 
 	sdma_v5_2_soft_reset(adev);
-	/* unhalt the MEs */
-	sdma_v5_2_enable(adev, true);
-	/* enable sdma ring preemption */
-	sdma_v5_2_ctx_switch_enable(adev, true);
 
-	/* start the gfx rings and rlc compute queues */
+	/* Soft reset supposes to disable the dma engine and preemption.
+	 * Now start the gfx rings and rlc compute queues.
+	 */
 	r = sdma_v5_2_gfx_resume(adev);
 	if (adev->in_s0ix)
 		amdgpu_gfx_off_ctrl(adev, true);
@@ -1340,8 +1359,8 @@ static int sdma_v5_2_hw_fini(void *handle)
 	if (amdgpu_sriov_vf(adev))
 		return 0;
 
-	sdma_v5_2_ctx_switch_enable(adev, false);
-	sdma_v5_2_enable(adev, false);
+	sdma_v5_2_ctx_switch_disable_all(adev);
+	sdma_v5_2_halt(adev);
 
 	return 0;
 }
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v6] drm/amdgpu: Ensure the DMA engine is deactivated during set ups
  2022-04-30  7:34 [PATCH v6] drm/amdgpu: Ensure the DMA engine is deactivated during set ups ricetons
@ 2022-05-04 22:48 ` Haohui Mai
  2022-05-05 16:33 ` Alex Deucher
  1 sibling, 0 replies; 11+ messages in thread
From: Haohui Mai @ 2022-05-04 22:48 UTC (permalink / raw)
  To: amd-gfx
  Cc: ckoenig.leichtzumerken, Hawking.Zhang, lang.yu, Guchun.Chen,
	yifan1.zhang

[-- Attachment #1: Type: text/plain, Size: 9133 bytes --]

Ping...
________________________________
From: ricetons@gmail.com <ricetons@gmail.com>
Sent: Saturday, April 30, 2022 3:34:00 PM
To: amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Cc: lang.yu@amd.com <lang.yu@amd.com>; ckoenig.leichtzumerken@gmail.com <ckoenig.leichtzumerken@gmail.com>; Guchun.Chen@amd.com <Guchun.Chen@amd.com>; yifan1.zhang@amd.com <yifan1.zhang@amd.com>; Hawking.Zhang@amd.com <Hawking.Zhang@amd.com>; Haohui Mai <ricetons@gmail.com>
Subject: [PATCH v6] drm/amdgpu: Ensure the DMA engine is deactivated during set ups

From: Haohui Mai <ricetons@gmail.com>

The patch fully deactivates the DMA engine before setting up the ring
buffer to avoid potential data races and crashes.

Signed-off-by: Haohui Mai <ricetons@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 109 +++++++++++++++----------
 1 file changed, 64 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 013d2dec81d0..1fac9d8e99de 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -459,7 +459,6 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
         }
 }

-
 /**
  * sdma_v5_2_gfx_stop - stop the gfx async dma engines
  *
@@ -505,17 +504,21 @@ static void sdma_v5_2_rlc_stop(struct amdgpu_device *adev)
 }

 /**
- * sdma_v5_2_ctx_switch_enable - stop the async dma engines context switch
+ * sdma_v5_2_ctx_switch_enable_for_instance - start the async dma engines
+ * context switch for an instance
  *
  * @adev: amdgpu_device pointer
- * @enable: enable/disable the DMA MEs context switch.
+ * @instance_idx: the index of the SDMA instance
  *
- * Halt or unhalt the async dma engines context switch.
+ * Unhalt the async dma engines context switch.
  */
-static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
+static void sdma_v5_2_ctx_switch_enable_for_instance(struct amdgpu_device *adev, int instance_idx)
 {
         u32 f32_cntl, phase_quantum = 0;
-       int i;
+
+       if (WARN_ON(instance_idx >= adev->sdma.num_instances)) {
+               return;
+       }

         if (amdgpu_sdma_phase_quantum) {
                 unsigned value = amdgpu_sdma_phase_quantum;
@@ -539,50 +542,68 @@ static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
                 phase_quantum =
                         value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
                         unit  << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
-       }
-
-       for (i = 0; i < adev->sdma.num_instances; i++) {
-               if (enable && amdgpu_sdma_phase_quantum) {
-                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
-                              phase_quantum);
-                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
-                              phase_quantum);
-                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
-                              phase_quantum);
-               }

-               if (!amdgpu_sriov_vf(adev)) {
-                       f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
-                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
-                                       AUTO_CTXSW_ENABLE, enable ? 1 : 0);
-                       WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
-               }
+               WREG32_SOC15_IP(GC,
+                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE0_QUANTUM),
+                       phase_quantum);
+               WREG32_SOC15_IP(GC,
+                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE1_QUANTUM),
+                   phase_quantum);
+               WREG32_SOC15_IP(GC,
+                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE2_QUANTUM),
+                   phase_quantum);
         }

+       if (!amdgpu_sriov_vf(adev)) {
+               f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL));
+               f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+                               AUTO_CTXSW_ENABLE, 1);
+               WREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL), f32_cntl);
+       }
 }

 /**
- * sdma_v5_2_enable - stop the async dma engines
+ * sdma_v5_2_ctx_switch_disable_all - stop the async dma engines context switch
  *
  * @adev: amdgpu_device pointer
- * @enable: enable/disable the DMA MEs.
  *
- * Halt or unhalt the async dma engines.
+ * Halt the async dma engines context switch.
  */
-static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
+static void sdma_v5_2_ctx_switch_disable_all(struct amdgpu_device *adev)
 {
         u32 f32_cntl;
         int i;

-       if (!enable) {
-               sdma_v5_2_gfx_stop(adev);
-               sdma_v5_2_rlc_stop(adev);
+       if (amdgpu_sriov_vf(adev))
+               return;
+
+       for (i = 0; i < adev->sdma.num_instances; i++) {
+               f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
+               f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+                               AUTO_CTXSW_ENABLE, 0);
+               WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
         }
+}
+
+/**
+ * sdma_v5_2_halt - stop the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Halt the async dma engines.
+ */
+static void sdma_v5_2_halt(struct amdgpu_device *adev)
+{
+       int i;
+       u32 f32_cntl;
+
+       sdma_v5_2_gfx_stop(adev);
+       sdma_v5_2_rlc_stop(adev);

         if (!amdgpu_sriov_vf(adev)) {
                 for (i = 0; i < adev->sdma.num_instances; i++) {
                         f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
-                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
+                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
                         WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
                 }
         }
@@ -594,6 +615,9 @@ static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
  * @adev: amdgpu_device pointer
  *
  * Set up the gfx DMA ring buffers and enable them.
+ * It assumes that the dma engine is stopped for each instance.
+ * The function enables the engine and preemptions sequentially for each instance.
+ *
  * Returns 0 for success, error for failure.
  */
 static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
@@ -737,10 +761,7 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)

                 ring->sched.ready = true;

-               if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
-                       sdma_v5_2_ctx_switch_enable(adev, true);
-                       sdma_v5_2_enable(adev, true);
-               }
+               sdma_v5_2_ctx_switch_enable_for_instance(adev, i);

                 r = amdgpu_ring_test_ring(ring);
                 if (r) {
@@ -784,7 +805,7 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)
         int i, j;

         /* halt the MEs */
-       sdma_v5_2_enable(adev, false);
+       sdma_v5_2_halt(adev);

         for (i = 0; i < adev->sdma.num_instances; i++) {
                 if (!adev->sdma.instance[i].fw)
@@ -856,8 +877,8 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
         int r = 0;

         if (amdgpu_sriov_vf(adev)) {
-               sdma_v5_2_ctx_switch_enable(adev, false);
-               sdma_v5_2_enable(adev, false);
+               sdma_v5_2_ctx_switch_disable_all(adev);
+               sdma_v5_2_halt(adev);

                 /* set RB registers */
                 r = sdma_v5_2_gfx_resume(adev);
@@ -881,12 +902,10 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
                 amdgpu_gfx_off_ctrl(adev, false);

         sdma_v5_2_soft_reset(adev);
-       /* unhalt the MEs */
-       sdma_v5_2_enable(adev, true);
-       /* enable sdma ring preemption */
-       sdma_v5_2_ctx_switch_enable(adev, true);

-       /* start the gfx rings and rlc compute queues */
+       /* Soft reset supposes to disable the dma engine and preemption.
+        * Now start the gfx rings and rlc compute queues.
+        */
         r = sdma_v5_2_gfx_resume(adev);
         if (adev->in_s0ix)
                 amdgpu_gfx_off_ctrl(adev, true);
@@ -1340,8 +1359,8 @@ static int sdma_v5_2_hw_fini(void *handle)
         if (amdgpu_sriov_vf(adev))
                 return 0;

-       sdma_v5_2_ctx_switch_enable(adev, false);
-       sdma_v5_2_enable(adev, false);
+       sdma_v5_2_ctx_switch_disable_all(adev);
+       sdma_v5_2_halt(adev);

         return 0;
 }
--
2.25.1


[-- Attachment #2: Type: text/html, Size: 18472 bytes --]

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v6] drm/amdgpu: Ensure the DMA engine is deactivated during set ups
  2022-04-30  7:34 [PATCH v6] drm/amdgpu: Ensure the DMA engine is deactivated during set ups ricetons
  2022-05-04 22:48 ` Haohui Mai
@ 2022-05-05 16:33 ` Alex Deucher
  2022-05-06  5:11   ` Haohui Mai
  1 sibling, 1 reply; 11+ messages in thread
From: Alex Deucher @ 2022-05-05 16:33 UTC (permalink / raw)
  To: Haohui Mai
  Cc: Yifan Zhang, Chen, Guchun, Christian König, amd-gfx list,
	Lang Yu, Hawking Zhang

On Sat, Apr 30, 2022 at 3:34 AM <ricetons@gmail.com> wrote:
>
> From: Haohui Mai <ricetons@gmail.com>
>
> The patch fully deactivates the DMA engine before setting up the ring
> buffer to avoid potential data races and crashes.

Does this actually fix an issue you are seeing?  I don't think it will
hurt anything, but I also don't think it's strictly necessary.  AFAIK,
only the HALT bit really matters.  So the commit message might be
somewhat misleading.

Alex

>
> Signed-off-by: Haohui Mai <ricetons@gmail.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 109 +++++++++++++++----------
>  1 file changed, 64 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> index 013d2dec81d0..1fac9d8e99de 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> @@ -459,7 +459,6 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
>         }
>  }
>
> -
>  /**
>   * sdma_v5_2_gfx_stop - stop the gfx async dma engines
>   *
> @@ -505,17 +504,21 @@ static void sdma_v5_2_rlc_stop(struct amdgpu_device *adev)
>  }
>
>  /**
> - * sdma_v5_2_ctx_switch_enable - stop the async dma engines context switch
> + * sdma_v5_2_ctx_switch_enable_for_instance - start the async dma engines
> + * context switch for an instance
>   *
>   * @adev: amdgpu_device pointer
> - * @enable: enable/disable the DMA MEs context switch.
> + * @instance_idx: the index of the SDMA instance
>   *
> - * Halt or unhalt the async dma engines context switch.
> + * Unhalt the async dma engines context switch.
>   */
> -static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
> +static void sdma_v5_2_ctx_switch_enable_for_instance(struct amdgpu_device *adev, int instance_idx)
>  {
>         u32 f32_cntl, phase_quantum = 0;
> -       int i;
> +
> +       if (WARN_ON(instance_idx >= adev->sdma.num_instances)) {
> +               return;
> +       }
>
>         if (amdgpu_sdma_phase_quantum) {
>                 unsigned value = amdgpu_sdma_phase_quantum;
> @@ -539,50 +542,68 @@ static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
>                 phase_quantum =
>                         value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
>                         unit  << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
> -       }
> -
> -       for (i = 0; i < adev->sdma.num_instances; i++) {
> -               if (enable && amdgpu_sdma_phase_quantum) {
> -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
> -                              phase_quantum);
> -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
> -                              phase_quantum);
> -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
> -                              phase_quantum);
> -               }
>
> -               if (!amdgpu_sriov_vf(adev)) {
> -                       f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
> -                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> -                                       AUTO_CTXSW_ENABLE, enable ? 1 : 0);
> -                       WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
> -               }
> +               WREG32_SOC15_IP(GC,
> +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE0_QUANTUM),
> +                       phase_quantum);
> +               WREG32_SOC15_IP(GC,
> +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE1_QUANTUM),
> +                   phase_quantum);
> +               WREG32_SOC15_IP(GC,
> +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE2_QUANTUM),
> +                   phase_quantum);
>         }
>
> +       if (!amdgpu_sriov_vf(adev)) {
> +               f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL));
> +               f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> +                               AUTO_CTXSW_ENABLE, 1);
> +               WREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL), f32_cntl);
> +       }
>  }
>
>  /**
> - * sdma_v5_2_enable - stop the async dma engines
> + * sdma_v5_2_ctx_switch_disable_all - stop the async dma engines context switch
>   *
>   * @adev: amdgpu_device pointer
> - * @enable: enable/disable the DMA MEs.
>   *
> - * Halt or unhalt the async dma engines.
> + * Halt the async dma engines context switch.
>   */
> -static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
> +static void sdma_v5_2_ctx_switch_disable_all(struct amdgpu_device *adev)
>  {
>         u32 f32_cntl;
>         int i;
>
> -       if (!enable) {
> -               sdma_v5_2_gfx_stop(adev);
> -               sdma_v5_2_rlc_stop(adev);
> +       if (amdgpu_sriov_vf(adev))
> +               return;
> +
> +       for (i = 0; i < adev->sdma.num_instances; i++) {
> +               f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
> +               f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> +                               AUTO_CTXSW_ENABLE, 0);
> +               WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
>         }
> +}
> +
> +/**
> + * sdma_v5_2_halt - stop the async dma engines
> + *
> + * @adev: amdgpu_device pointer
> + *
> + * Halt the async dma engines.
> + */
> +static void sdma_v5_2_halt(struct amdgpu_device *adev)
> +{
> +       int i;
> +       u32 f32_cntl;
> +
> +       sdma_v5_2_gfx_stop(adev);
> +       sdma_v5_2_rlc_stop(adev);
>
>         if (!amdgpu_sriov_vf(adev)) {
>                 for (i = 0; i < adev->sdma.num_instances; i++) {
>                         f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
> -                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
> +                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
>                         WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
>                 }
>         }
> @@ -594,6 +615,9 @@ static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
>   * @adev: amdgpu_device pointer
>   *
>   * Set up the gfx DMA ring buffers and enable them.
> + * It assumes that the dma engine is stopped for each instance.
> + * The function enables the engine and preemptions sequentially for each instance.
> + *
>   * Returns 0 for success, error for failure.
>   */
>  static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> @@ -737,10 +761,7 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
>
>                 ring->sched.ready = true;
>
> -               if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
> -                       sdma_v5_2_ctx_switch_enable(adev, true);
> -                       sdma_v5_2_enable(adev, true);
> -               }
> +               sdma_v5_2_ctx_switch_enable_for_instance(adev, i);
>
>                 r = amdgpu_ring_test_ring(ring);
>                 if (r) {
> @@ -784,7 +805,7 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)
>         int i, j;
>
>         /* halt the MEs */
> -       sdma_v5_2_enable(adev, false);
> +       sdma_v5_2_halt(adev);
>
>         for (i = 0; i < adev->sdma.num_instances; i++) {
>                 if (!adev->sdma.instance[i].fw)
> @@ -856,8 +877,8 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
>         int r = 0;
>
>         if (amdgpu_sriov_vf(adev)) {
> -               sdma_v5_2_ctx_switch_enable(adev, false);
> -               sdma_v5_2_enable(adev, false);
> +               sdma_v5_2_ctx_switch_disable_all(adev);
> +               sdma_v5_2_halt(adev);
>
>                 /* set RB registers */
>                 r = sdma_v5_2_gfx_resume(adev);
> @@ -881,12 +902,10 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
>                 amdgpu_gfx_off_ctrl(adev, false);
>
>         sdma_v5_2_soft_reset(adev);
> -       /* unhalt the MEs */
> -       sdma_v5_2_enable(adev, true);
> -       /* enable sdma ring preemption */
> -       sdma_v5_2_ctx_switch_enable(adev, true);
>
> -       /* start the gfx rings and rlc compute queues */
> +       /* Soft reset supposes to disable the dma engine and preemption.
> +        * Now start the gfx rings and rlc compute queues.
> +        */
>         r = sdma_v5_2_gfx_resume(adev);
>         if (adev->in_s0ix)
>                 amdgpu_gfx_off_ctrl(adev, true);
> @@ -1340,8 +1359,8 @@ static int sdma_v5_2_hw_fini(void *handle)
>         if (amdgpu_sriov_vf(adev))
>                 return 0;
>
> -       sdma_v5_2_ctx_switch_enable(adev, false);
> -       sdma_v5_2_enable(adev, false);
> +       sdma_v5_2_ctx_switch_disable_all(adev);
> +       sdma_v5_2_halt(adev);
>
>         return 0;
>  }
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v6] drm/amdgpu: Ensure the DMA engine is deactivated during set ups
  2022-05-05 16:33 ` Alex Deucher
@ 2022-05-06  5:11   ` Haohui Mai
  2022-05-06 13:36     ` Alex Deucher
  0 siblings, 1 reply; 11+ messages in thread
From: Haohui Mai @ 2022-05-06  5:11 UTC (permalink / raw)
  To: Alex Deucher
  Cc: Yifan Zhang, Chen, Guchun, Christian König, amd-gfx list,
	Lang Yu, Hawking Zhang

The only thing that matters is that the IP should be halted before
programming the ring buffers.

What about rephrasing the commit messages to highlight the issue a
little bit better?

On Fri, May 6, 2022 at 12:33 AM Alex Deucher <alexdeucher@gmail.com> wrote:
>
> On Sat, Apr 30, 2022 at 3:34 AM <ricetons@gmail.com> wrote:
> >
> > From: Haohui Mai <ricetons@gmail.com>
> >
> > The patch fully deactivates the DMA engine before setting up the ring
> > buffer to avoid potential data races and crashes.
>
> Does this actually fix an issue you are seeing?  I don't think it will
> hurt anything, but I also don't think it's strictly necessary.  AFAIK,
> only the HALT bit really matters.  So the commit message might be
> somewhat misleading.
>
> Alex
>
> >
> > Signed-off-by: Haohui Mai <ricetons@gmail.com>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 109 +++++++++++++++----------
> >  1 file changed, 64 insertions(+), 45 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > index 013d2dec81d0..1fac9d8e99de 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > @@ -459,7 +459,6 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
> >         }
> >  }
> >
> > -
> >  /**
> >   * sdma_v5_2_gfx_stop - stop the gfx async dma engines
> >   *
> > @@ -505,17 +504,21 @@ static void sdma_v5_2_rlc_stop(struct amdgpu_device *adev)
> >  }
> >
> >  /**
> > - * sdma_v5_2_ctx_switch_enable - stop the async dma engines context switch
> > + * sdma_v5_2_ctx_switch_enable_for_instance - start the async dma engines
> > + * context switch for an instance
> >   *
> >   * @adev: amdgpu_device pointer
> > - * @enable: enable/disable the DMA MEs context switch.
> > + * @instance_idx: the index of the SDMA instance
> >   *
> > - * Halt or unhalt the async dma engines context switch.
> > + * Unhalt the async dma engines context switch.
> >   */
> > -static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
> > +static void sdma_v5_2_ctx_switch_enable_for_instance(struct amdgpu_device *adev, int instance_idx)
> >  {
> >         u32 f32_cntl, phase_quantum = 0;
> > -       int i;
> > +
> > +       if (WARN_ON(instance_idx >= adev->sdma.num_instances)) {
> > +               return;
> > +       }
> >
> >         if (amdgpu_sdma_phase_quantum) {
> >                 unsigned value = amdgpu_sdma_phase_quantum;
> > @@ -539,50 +542,68 @@ static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
> >                 phase_quantum =
> >                         value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
> >                         unit  << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
> > -       }
> > -
> > -       for (i = 0; i < adev->sdma.num_instances; i++) {
> > -               if (enable && amdgpu_sdma_phase_quantum) {
> > -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
> > -                              phase_quantum);
> > -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
> > -                              phase_quantum);
> > -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
> > -                              phase_quantum);
> > -               }
> >
> > -               if (!amdgpu_sriov_vf(adev)) {
> > -                       f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
> > -                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> > -                                       AUTO_CTXSW_ENABLE, enable ? 1 : 0);
> > -                       WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
> > -               }
> > +               WREG32_SOC15_IP(GC,
> > +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE0_QUANTUM),
> > +                       phase_quantum);
> > +               WREG32_SOC15_IP(GC,
> > +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE1_QUANTUM),
> > +                   phase_quantum);
> > +               WREG32_SOC15_IP(GC,
> > +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE2_QUANTUM),
> > +                   phase_quantum);
> >         }
> >
> > +       if (!amdgpu_sriov_vf(adev)) {
> > +               f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL));
> > +               f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> > +                               AUTO_CTXSW_ENABLE, 1);
> > +               WREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL), f32_cntl);
> > +       }
> >  }
> >
> >  /**
> > - * sdma_v5_2_enable - stop the async dma engines
> > + * sdma_v5_2_ctx_switch_disable_all - stop the async dma engines context switch
> >   *
> >   * @adev: amdgpu_device pointer
> > - * @enable: enable/disable the DMA MEs.
> >   *
> > - * Halt or unhalt the async dma engines.
> > + * Halt the async dma engines context switch.
> >   */
> > -static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
> > +static void sdma_v5_2_ctx_switch_disable_all(struct amdgpu_device *adev)
> >  {
> >         u32 f32_cntl;
> >         int i;
> >
> > -       if (!enable) {
> > -               sdma_v5_2_gfx_stop(adev);
> > -               sdma_v5_2_rlc_stop(adev);
> > +       if (amdgpu_sriov_vf(adev))
> > +               return;
> > +
> > +       for (i = 0; i < adev->sdma.num_instances; i++) {
> > +               f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
> > +               f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> > +                               AUTO_CTXSW_ENABLE, 0);
> > +               WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
> >         }
> > +}
> > +
> > +/**
> > + * sdma_v5_2_halt - stop the async dma engines
> > + *
> > + * @adev: amdgpu_device pointer
> > + *
> > + * Halt the async dma engines.
> > + */
> > +static void sdma_v5_2_halt(struct amdgpu_device *adev)
> > +{
> > +       int i;
> > +       u32 f32_cntl;
> > +
> > +       sdma_v5_2_gfx_stop(adev);
> > +       sdma_v5_2_rlc_stop(adev);
> >
> >         if (!amdgpu_sriov_vf(adev)) {
> >                 for (i = 0; i < adev->sdma.num_instances; i++) {
> >                         f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
> > -                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
> > +                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
> >                         WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
> >                 }
> >         }
> > @@ -594,6 +615,9 @@ static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
> >   * @adev: amdgpu_device pointer
> >   *
> >   * Set up the gfx DMA ring buffers and enable them.
> > + * It assumes that the dma engine is stopped for each instance.
> > + * The function enables the engine and preemptions sequentially for each instance.
> > + *
> >   * Returns 0 for success, error for failure.
> >   */
> >  static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> > @@ -737,10 +761,7 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> >
> >                 ring->sched.ready = true;
> >
> > -               if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
> > -                       sdma_v5_2_ctx_switch_enable(adev, true);
> > -                       sdma_v5_2_enable(adev, true);
> > -               }
> > +               sdma_v5_2_ctx_switch_enable_for_instance(adev, i);
> >
> >                 r = amdgpu_ring_test_ring(ring);
> >                 if (r) {
> > @@ -784,7 +805,7 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)
> >         int i, j;
> >
> >         /* halt the MEs */
> > -       sdma_v5_2_enable(adev, false);
> > +       sdma_v5_2_halt(adev);
> >
> >         for (i = 0; i < adev->sdma.num_instances; i++) {
> >                 if (!adev->sdma.instance[i].fw)
> > @@ -856,8 +877,8 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
> >         int r = 0;
> >
> >         if (amdgpu_sriov_vf(adev)) {
> > -               sdma_v5_2_ctx_switch_enable(adev, false);
> > -               sdma_v5_2_enable(adev, false);
> > +               sdma_v5_2_ctx_switch_disable_all(adev);
> > +               sdma_v5_2_halt(adev);
> >
> >                 /* set RB registers */
> >                 r = sdma_v5_2_gfx_resume(adev);
> > @@ -881,12 +902,10 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
> >                 amdgpu_gfx_off_ctrl(adev, false);
> >
> >         sdma_v5_2_soft_reset(adev);
> > -       /* unhalt the MEs */
> > -       sdma_v5_2_enable(adev, true);
> > -       /* enable sdma ring preemption */
> > -       sdma_v5_2_ctx_switch_enable(adev, true);
> >
> > -       /* start the gfx rings and rlc compute queues */
> > +       /* Soft reset supposes to disable the dma engine and preemption.
> > +        * Now start the gfx rings and rlc compute queues.
> > +        */
> >         r = sdma_v5_2_gfx_resume(adev);
> >         if (adev->in_s0ix)
> >                 amdgpu_gfx_off_ctrl(adev, true);
> > @@ -1340,8 +1359,8 @@ static int sdma_v5_2_hw_fini(void *handle)
> >         if (amdgpu_sriov_vf(adev))
> >                 return 0;
> >
> > -       sdma_v5_2_ctx_switch_enable(adev, false);
> > -       sdma_v5_2_enable(adev, false);
> > +       sdma_v5_2_ctx_switch_disable_all(adev);
> > +       sdma_v5_2_halt(adev);
> >
> >         return 0;
> >  }
> > --
> > 2.25.1
> >

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v6] drm/amdgpu: Ensure the DMA engine is deactivated during set ups
  2022-05-06  5:11   ` Haohui Mai
@ 2022-05-06 13:36     ` Alex Deucher
  2022-05-07  2:30       ` Haohui Mai
  0 siblings, 1 reply; 11+ messages in thread
From: Alex Deucher @ 2022-05-06 13:36 UTC (permalink / raw)
  To: Haohui Mai
  Cc: Yifan Zhang, Chen, Guchun, Christian König, amd-gfx list,
	Lang Yu, Hawking Zhang

On Fri, May 6, 2022 at 1:11 AM Haohui Mai <ricetons@gmail.com> wrote:
>
> The only thing that matters is that the IP should be halted before
> programming the ring buffers.
>
> What about rephrasing the commit messages to highlight the issue a
> little bit better?

Yeah, that is fine.  Thanks!

Alex

>
> On Fri, May 6, 2022 at 12:33 AM Alex Deucher <alexdeucher@gmail.com> wrote:
> >
> > On Sat, Apr 30, 2022 at 3:34 AM <ricetons@gmail.com> wrote:
> > >
> > > From: Haohui Mai <ricetons@gmail.com>
> > >
> > > The patch fully deactivates the DMA engine before setting up the ring
> > > buffer to avoid potential data races and crashes.
> >
> > Does this actually fix an issue you are seeing?  I don't think it will
> > hurt anything, but I also don't think it's strictly necessary.  AFAIK,
> > only the HALT bit really matters.  So the commit message might be
> > somewhat misleading.
> >
> > Alex
> >
> > >
> > > Signed-off-by: Haohui Mai <ricetons@gmail.com>
> > > ---
> > >  drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 109 +++++++++++++++----------
> > >  1 file changed, 64 insertions(+), 45 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > > index 013d2dec81d0..1fac9d8e99de 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > > @@ -459,7 +459,6 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
> > >         }
> > >  }
> > >
> > > -
> > >  /**
> > >   * sdma_v5_2_gfx_stop - stop the gfx async dma engines
> > >   *
> > > @@ -505,17 +504,21 @@ static void sdma_v5_2_rlc_stop(struct amdgpu_device *adev)
> > >  }
> > >
> > >  /**
> > > - * sdma_v5_2_ctx_switch_enable - stop the async dma engines context switch
> > > + * sdma_v5_2_ctx_switch_enable_for_instance - start the async dma engines
> > > + * context switch for an instance
> > >   *
> > >   * @adev: amdgpu_device pointer
> > > - * @enable: enable/disable the DMA MEs context switch.
> > > + * @instance_idx: the index of the SDMA instance
> > >   *
> > > - * Halt or unhalt the async dma engines context switch.
> > > + * Unhalt the async dma engines context switch.
> > >   */
> > > -static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
> > > +static void sdma_v5_2_ctx_switch_enable_for_instance(struct amdgpu_device *adev, int instance_idx)
> > >  {
> > >         u32 f32_cntl, phase_quantum = 0;
> > > -       int i;
> > > +
> > > +       if (WARN_ON(instance_idx >= adev->sdma.num_instances)) {
> > > +               return;
> > > +       }
> > >
> > >         if (amdgpu_sdma_phase_quantum) {
> > >                 unsigned value = amdgpu_sdma_phase_quantum;
> > > @@ -539,50 +542,68 @@ static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
> > >                 phase_quantum =
> > >                         value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
> > >                         unit  << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
> > > -       }
> > > -
> > > -       for (i = 0; i < adev->sdma.num_instances; i++) {
> > > -               if (enable && amdgpu_sdma_phase_quantum) {
> > > -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
> > > -                              phase_quantum);
> > > -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
> > > -                              phase_quantum);
> > > -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
> > > -                              phase_quantum);
> > > -               }
> > >
> > > -               if (!amdgpu_sriov_vf(adev)) {
> > > -                       f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
> > > -                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> > > -                                       AUTO_CTXSW_ENABLE, enable ? 1 : 0);
> > > -                       WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
> > > -               }
> > > +               WREG32_SOC15_IP(GC,
> > > +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE0_QUANTUM),
> > > +                       phase_quantum);
> > > +               WREG32_SOC15_IP(GC,
> > > +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE1_QUANTUM),
> > > +                   phase_quantum);
> > > +               WREG32_SOC15_IP(GC,
> > > +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE2_QUANTUM),
> > > +                   phase_quantum);
> > >         }
> > >
> > > +       if (!amdgpu_sriov_vf(adev)) {
> > > +               f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL));
> > > +               f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> > > +                               AUTO_CTXSW_ENABLE, 1);
> > > +               WREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL), f32_cntl);
> > > +       }
> > >  }
> > >
> > >  /**
> > > - * sdma_v5_2_enable - stop the async dma engines
> > > + * sdma_v5_2_ctx_switch_disable_all - stop the async dma engines context switch
> > >   *
> > >   * @adev: amdgpu_device pointer
> > > - * @enable: enable/disable the DMA MEs.
> > >   *
> > > - * Halt or unhalt the async dma engines.
> > > + * Halt the async dma engines context switch.
> > >   */
> > > -static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
> > > +static void sdma_v5_2_ctx_switch_disable_all(struct amdgpu_device *adev)
> > >  {
> > >         u32 f32_cntl;
> > >         int i;
> > >
> > > -       if (!enable) {
> > > -               sdma_v5_2_gfx_stop(adev);
> > > -               sdma_v5_2_rlc_stop(adev);
> > > +       if (amdgpu_sriov_vf(adev))
> > > +               return;
> > > +
> > > +       for (i = 0; i < adev->sdma.num_instances; i++) {
> > > +               f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
> > > +               f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> > > +                               AUTO_CTXSW_ENABLE, 0);
> > > +               WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
> > >         }
> > > +}
> > > +
> > > +/**
> > > + * sdma_v5_2_halt - stop the async dma engines
> > > + *
> > > + * @adev: amdgpu_device pointer
> > > + *
> > > + * Halt the async dma engines.
> > > + */
> > > +static void sdma_v5_2_halt(struct amdgpu_device *adev)
> > > +{
> > > +       int i;
> > > +       u32 f32_cntl;
> > > +
> > > +       sdma_v5_2_gfx_stop(adev);
> > > +       sdma_v5_2_rlc_stop(adev);
> > >
> > >         if (!amdgpu_sriov_vf(adev)) {
> > >                 for (i = 0; i < adev->sdma.num_instances; i++) {
> > >                         f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
> > > -                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
> > > +                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
> > >                         WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
> > >                 }
> > >         }
> > > @@ -594,6 +615,9 @@ static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
> > >   * @adev: amdgpu_device pointer
> > >   *
> > >   * Set up the gfx DMA ring buffers and enable them.
> > > + * It assumes that the dma engine is stopped for each instance.
> > > + * The function enables the engine and preemptions sequentially for each instance.
> > > + *
> > >   * Returns 0 for success, error for failure.
> > >   */
> > >  static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> > > @@ -737,10 +761,7 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> > >
> > >                 ring->sched.ready = true;
> > >
> > > -               if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
> > > -                       sdma_v5_2_ctx_switch_enable(adev, true);
> > > -                       sdma_v5_2_enable(adev, true);
> > > -               }
> > > +               sdma_v5_2_ctx_switch_enable_for_instance(adev, i);
> > >
> > >                 r = amdgpu_ring_test_ring(ring);
> > >                 if (r) {
> > > @@ -784,7 +805,7 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)
> > >         int i, j;
> > >
> > >         /* halt the MEs */
> > > -       sdma_v5_2_enable(adev, false);
> > > +       sdma_v5_2_halt(adev);
> > >
> > >         for (i = 0; i < adev->sdma.num_instances; i++) {
> > >                 if (!adev->sdma.instance[i].fw)
> > > @@ -856,8 +877,8 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
> > >         int r = 0;
> > >
> > >         if (amdgpu_sriov_vf(adev)) {
> > > -               sdma_v5_2_ctx_switch_enable(adev, false);
> > > -               sdma_v5_2_enable(adev, false);
> > > +               sdma_v5_2_ctx_switch_disable_all(adev);
> > > +               sdma_v5_2_halt(adev);
> > >
> > >                 /* set RB registers */
> > >                 r = sdma_v5_2_gfx_resume(adev);
> > > @@ -881,12 +902,10 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
> > >                 amdgpu_gfx_off_ctrl(adev, false);
> > >
> > >         sdma_v5_2_soft_reset(adev);
> > > -       /* unhalt the MEs */
> > > -       sdma_v5_2_enable(adev, true);
> > > -       /* enable sdma ring preemption */
> > > -       sdma_v5_2_ctx_switch_enable(adev, true);
> > >
> > > -       /* start the gfx rings and rlc compute queues */
> > > +       /* Soft reset supposes to disable the dma engine and preemption.
> > > +        * Now start the gfx rings and rlc compute queues.
> > > +        */
> > >         r = sdma_v5_2_gfx_resume(adev);
> > >         if (adev->in_s0ix)
> > >                 amdgpu_gfx_off_ctrl(adev, true);
> > > @@ -1340,8 +1359,8 @@ static int sdma_v5_2_hw_fini(void *handle)
> > >         if (amdgpu_sriov_vf(adev))
> > >                 return 0;
> > >
> > > -       sdma_v5_2_ctx_switch_enable(adev, false);
> > > -       sdma_v5_2_enable(adev, false);
> > > +       sdma_v5_2_ctx_switch_disable_all(adev);
> > > +       sdma_v5_2_halt(adev);
> > >
> > >         return 0;
> > >  }
> > > --
> > > 2.25.1
> > >

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v6] drm/amdgpu: Ensure the DMA engine is deactivated during set ups
  2022-05-06 13:36     ` Alex Deucher
@ 2022-05-07  2:30       ` Haohui Mai
  2022-05-09 14:48         ` Alex Deucher
  0 siblings, 1 reply; 11+ messages in thread
From: Haohui Mai @ 2022-05-07  2:30 UTC (permalink / raw)
  To: Alex Deucher
  Cc: Yifan Zhang, Chen, Guchun, Christian König, amd-gfx list,
	Lang Yu, Hawking Zhang

What about

Setting the HALT bit of SDMA_F32_CNTL in all paths before programming
the ring buffer of the SDMA engine.

No other changes are required in the patch.

~Haohui

On Fri, May 6, 2022 at 9:36 PM Alex Deucher <alexdeucher@gmail.com> wrote:
>
> On Fri, May 6, 2022 at 1:11 AM Haohui Mai <ricetons@gmail.com> wrote:
> >
> > The only thing that matters is that the IP should be halted before
> > programming the ring buffers.
> >
> > What about rephrasing the commit messages to highlight the issue a
> > little bit better?
>
> Yeah, that is fine.  Thanks!
>
> Alex
>
> >
> > On Fri, May 6, 2022 at 12:33 AM Alex Deucher <alexdeucher@gmail.com> wrote:
> > >
> > > On Sat, Apr 30, 2022 at 3:34 AM <ricetons@gmail.com> wrote:
> > > >
> > > > From: Haohui Mai <ricetons@gmail.com>
> > > >
> > > > The patch fully deactivates the DMA engine before setting up the ring
> > > > buffer to avoid potential data races and crashes.
> > >
> > > Does this actually fix an issue you are seeing?  I don't think it will
> > > hurt anything, but I also don't think it's strictly necessary.  AFAIK,
> > > only the HALT bit really matters.  So the commit message might be
> > > somewhat misleading.
> > >
> > > Alex
> > >
> > > >
> > > > Signed-off-by: Haohui Mai <ricetons@gmail.com>
> > > > ---
> > > >  drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 109 +++++++++++++++----------
> > > >  1 file changed, 64 insertions(+), 45 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > > > index 013d2dec81d0..1fac9d8e99de 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > > > @@ -459,7 +459,6 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
> > > >         }
> > > >  }
> > > >
> > > > -
> > > >  /**
> > > >   * sdma_v5_2_gfx_stop - stop the gfx async dma engines
> > > >   *
> > > > @@ -505,17 +504,21 @@ static void sdma_v5_2_rlc_stop(struct amdgpu_device *adev)
> > > >  }
> > > >
> > > >  /**
> > > > - * sdma_v5_2_ctx_switch_enable - stop the async dma engines context switch
> > > > + * sdma_v5_2_ctx_switch_enable_for_instance - start the async dma engines
> > > > + * context switch for an instance
> > > >   *
> > > >   * @adev: amdgpu_device pointer
> > > > - * @enable: enable/disable the DMA MEs context switch.
> > > > + * @instance_idx: the index of the SDMA instance
> > > >   *
> > > > - * Halt or unhalt the async dma engines context switch.
> > > > + * Unhalt the async dma engines context switch.
> > > >   */
> > > > -static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
> > > > +static void sdma_v5_2_ctx_switch_enable_for_instance(struct amdgpu_device *adev, int instance_idx)
> > > >  {
> > > >         u32 f32_cntl, phase_quantum = 0;
> > > > -       int i;
> > > > +
> > > > +       if (WARN_ON(instance_idx >= adev->sdma.num_instances)) {
> > > > +               return;
> > > > +       }
> > > >
> > > >         if (amdgpu_sdma_phase_quantum) {
> > > >                 unsigned value = amdgpu_sdma_phase_quantum;
> > > > @@ -539,50 +542,68 @@ static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
> > > >                 phase_quantum =
> > > >                         value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
> > > >                         unit  << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
> > > > -       }
> > > > -
> > > > -       for (i = 0; i < adev->sdma.num_instances; i++) {
> > > > -               if (enable && amdgpu_sdma_phase_quantum) {
> > > > -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
> > > > -                              phase_quantum);
> > > > -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
> > > > -                              phase_quantum);
> > > > -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
> > > > -                              phase_quantum);
> > > > -               }
> > > >
> > > > -               if (!amdgpu_sriov_vf(adev)) {
> > > > -                       f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
> > > > -                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> > > > -                                       AUTO_CTXSW_ENABLE, enable ? 1 : 0);
> > > > -                       WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
> > > > -               }
> > > > +               WREG32_SOC15_IP(GC,
> > > > +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE0_QUANTUM),
> > > > +                       phase_quantum);
> > > > +               WREG32_SOC15_IP(GC,
> > > > +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE1_QUANTUM),
> > > > +                   phase_quantum);
> > > > +               WREG32_SOC15_IP(GC,
> > > > +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE2_QUANTUM),
> > > > +                   phase_quantum);
> > > >         }
> > > >
> > > > +       if (!amdgpu_sriov_vf(adev)) {
> > > > +               f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL));
> > > > +               f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> > > > +                               AUTO_CTXSW_ENABLE, 1);
> > > > +               WREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL), f32_cntl);
> > > > +       }
> > > >  }
> > > >
> > > >  /**
> > > > - * sdma_v5_2_enable - stop the async dma engines
> > > > + * sdma_v5_2_ctx_switch_disable_all - stop the async dma engines context switch
> > > >   *
> > > >   * @adev: amdgpu_device pointer
> > > > - * @enable: enable/disable the DMA MEs.
> > > >   *
> > > > - * Halt or unhalt the async dma engines.
> > > > + * Halt the async dma engines context switch.
> > > >   */
> > > > -static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
> > > > +static void sdma_v5_2_ctx_switch_disable_all(struct amdgpu_device *adev)
> > > >  {
> > > >         u32 f32_cntl;
> > > >         int i;
> > > >
> > > > -       if (!enable) {
> > > > -               sdma_v5_2_gfx_stop(adev);
> > > > -               sdma_v5_2_rlc_stop(adev);
> > > > +       if (amdgpu_sriov_vf(adev))
> > > > +               return;
> > > > +
> > > > +       for (i = 0; i < adev->sdma.num_instances; i++) {
> > > > +               f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
> > > > +               f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> > > > +                               AUTO_CTXSW_ENABLE, 0);
> > > > +               WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
> > > >         }
> > > > +}
> > > > +
> > > > +/**
> > > > + * sdma_v5_2_halt - stop the async dma engines
> > > > + *
> > > > + * @adev: amdgpu_device pointer
> > > > + *
> > > > + * Halt the async dma engines.
> > > > + */
> > > > +static void sdma_v5_2_halt(struct amdgpu_device *adev)
> > > > +{
> > > > +       int i;
> > > > +       u32 f32_cntl;
> > > > +
> > > > +       sdma_v5_2_gfx_stop(adev);
> > > > +       sdma_v5_2_rlc_stop(adev);
> > > >
> > > >         if (!amdgpu_sriov_vf(adev)) {
> > > >                 for (i = 0; i < adev->sdma.num_instances; i++) {
> > > >                         f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
> > > > -                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
> > > > +                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
> > > >                         WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
> > > >                 }
> > > >         }
> > > > @@ -594,6 +615,9 @@ static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
> > > >   * @adev: amdgpu_device pointer
> > > >   *
> > > >   * Set up the gfx DMA ring buffers and enable them.
> > > > + * It assumes that the dma engine is stopped for each instance.
> > > > + * The function enables the engine and preemptions sequentially for each instance.
> > > > + *
> > > >   * Returns 0 for success, error for failure.
> > > >   */
> > > >  static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> > > > @@ -737,10 +761,7 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> > > >
> > > >                 ring->sched.ready = true;
> > > >
> > > > -               if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
> > > > -                       sdma_v5_2_ctx_switch_enable(adev, true);
> > > > -                       sdma_v5_2_enable(adev, true);
> > > > -               }
> > > > +               sdma_v5_2_ctx_switch_enable_for_instance(adev, i);
> > > >
> > > >                 r = amdgpu_ring_test_ring(ring);
> > > >                 if (r) {
> > > > @@ -784,7 +805,7 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)
> > > >         int i, j;
> > > >
> > > >         /* halt the MEs */
> > > > -       sdma_v5_2_enable(adev, false);
> > > > +       sdma_v5_2_halt(adev);
> > > >
> > > >         for (i = 0; i < adev->sdma.num_instances; i++) {
> > > >                 if (!adev->sdma.instance[i].fw)
> > > > @@ -856,8 +877,8 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
> > > >         int r = 0;
> > > >
> > > >         if (amdgpu_sriov_vf(adev)) {
> > > > -               sdma_v5_2_ctx_switch_enable(adev, false);
> > > > -               sdma_v5_2_enable(adev, false);
> > > > +               sdma_v5_2_ctx_switch_disable_all(adev);
> > > > +               sdma_v5_2_halt(adev);
> > > >
> > > >                 /* set RB registers */
> > > >                 r = sdma_v5_2_gfx_resume(adev);
> > > > @@ -881,12 +902,10 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
> > > >                 amdgpu_gfx_off_ctrl(adev, false);
> > > >
> > > >         sdma_v5_2_soft_reset(adev);
> > > > -       /* unhalt the MEs */
> > > > -       sdma_v5_2_enable(adev, true);
> > > > -       /* enable sdma ring preemption */
> > > > -       sdma_v5_2_ctx_switch_enable(adev, true);
> > > >
> > > > -       /* start the gfx rings and rlc compute queues */
> > > > +       /* Soft reset supposes to disable the dma engine and preemption.
> > > > +        * Now start the gfx rings and rlc compute queues.
> > > > +        */
> > > >         r = sdma_v5_2_gfx_resume(adev);
> > > >         if (adev->in_s0ix)
> > > >                 amdgpu_gfx_off_ctrl(adev, true);
> > > > @@ -1340,8 +1359,8 @@ static int sdma_v5_2_hw_fini(void *handle)
> > > >         if (amdgpu_sriov_vf(adev))
> > > >                 return 0;
> > > >
> > > > -       sdma_v5_2_ctx_switch_enable(adev, false);
> > > > -       sdma_v5_2_enable(adev, false);
> > > > +       sdma_v5_2_ctx_switch_disable_all(adev);
> > > > +       sdma_v5_2_halt(adev);
> > > >
> > > >         return 0;
> > > >  }
> > > > --
> > > > 2.25.1
> > > >

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v6] drm/amdgpu: Ensure the DMA engine is deactivated during set ups
  2022-05-07  2:30       ` Haohui Mai
@ 2022-05-09 14:48         ` Alex Deucher
  2022-05-10 10:52           ` Haohui Mai
  0 siblings, 1 reply; 11+ messages in thread
From: Alex Deucher @ 2022-05-09 14:48 UTC (permalink / raw)
  To: Haohui Mai
  Cc: Yifan Zhang, Chen, Guchun, Christian König, amd-gfx list,
	Lang Yu, Hawking Zhang

On Fri, May 6, 2022 at 10:30 PM Haohui Mai <ricetons@gmail.com> wrote:
>
> What about
>
> Setting the HALT bit of SDMA_F32_CNTL in all paths before programming
> the ring buffer of the SDMA engine.
>

Sounds fine to me.

Alex

> No other changes are required in the patch.
>
> ~Haohui
>
> On Fri, May 6, 2022 at 9:36 PM Alex Deucher <alexdeucher@gmail.com> wrote:
> >
> > On Fri, May 6, 2022 at 1:11 AM Haohui Mai <ricetons@gmail.com> wrote:
> > >
> > > The only thing that matters is that the IP should be halted before
> > > programming the ring buffers.
> > >
> > > What about rephrasing the commit messages to highlight the issue a
> > > little bit better?
> >
> > Yeah, that is fine.  Thanks!
> >
> > Alex
> >
> > >
> > > On Fri, May 6, 2022 at 12:33 AM Alex Deucher <alexdeucher@gmail.com> wrote:
> > > >
> > > > On Sat, Apr 30, 2022 at 3:34 AM <ricetons@gmail.com> wrote:
> > > > >
> > > > > From: Haohui Mai <ricetons@gmail.com>
> > > > >
> > > > > The patch fully deactivates the DMA engine before setting up the ring
> > > > > buffer to avoid potential data races and crashes.
> > > >
> > > > Does this actually fix an issue you are seeing?  I don't think it will
> > > > hurt anything, but I also don't think it's strictly necessary.  AFAIK,
> > > > only the HALT bit really matters.  So the commit message might be
> > > > somewhat misleading.
> > > >
> > > > Alex
> > > >
> > > > >
> > > > > Signed-off-by: Haohui Mai <ricetons@gmail.com>
> > > > > ---
> > > > >  drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 109 +++++++++++++++----------
> > > > >  1 file changed, 64 insertions(+), 45 deletions(-)
> > > > >
> > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > > > > index 013d2dec81d0..1fac9d8e99de 100644
> > > > > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > > > > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > > > > @@ -459,7 +459,6 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
> > > > >         }
> > > > >  }
> > > > >
> > > > > -
> > > > >  /**
> > > > >   * sdma_v5_2_gfx_stop - stop the gfx async dma engines
> > > > >   *
> > > > > @@ -505,17 +504,21 @@ static void sdma_v5_2_rlc_stop(struct amdgpu_device *adev)
> > > > >  }
> > > > >
> > > > >  /**
> > > > > - * sdma_v5_2_ctx_switch_enable - stop the async dma engines context switch
> > > > > + * sdma_v5_2_ctx_switch_enable_for_instance - start the async dma engines
> > > > > + * context switch for an instance
> > > > >   *
> > > > >   * @adev: amdgpu_device pointer
> > > > > - * @enable: enable/disable the DMA MEs context switch.
> > > > > + * @instance_idx: the index of the SDMA instance
> > > > >   *
> > > > > - * Halt or unhalt the async dma engines context switch.
> > > > > + * Unhalt the async dma engines context switch.
> > > > >   */
> > > > > -static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
> > > > > +static void sdma_v5_2_ctx_switch_enable_for_instance(struct amdgpu_device *adev, int instance_idx)
> > > > >  {
> > > > >         u32 f32_cntl, phase_quantum = 0;
> > > > > -       int i;
> > > > > +
> > > > > +       if (WARN_ON(instance_idx >= adev->sdma.num_instances)) {
> > > > > +               return;
> > > > > +       }
> > > > >
> > > > >         if (amdgpu_sdma_phase_quantum) {
> > > > >                 unsigned value = amdgpu_sdma_phase_quantum;
> > > > > @@ -539,50 +542,68 @@ static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
> > > > >                 phase_quantum =
> > > > >                         value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
> > > > >                         unit  << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
> > > > > -       }
> > > > > -
> > > > > -       for (i = 0; i < adev->sdma.num_instances; i++) {
> > > > > -               if (enable && amdgpu_sdma_phase_quantum) {
> > > > > -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
> > > > > -                              phase_quantum);
> > > > > -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
> > > > > -                              phase_quantum);
> > > > > -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
> > > > > -                              phase_quantum);
> > > > > -               }
> > > > >
> > > > > -               if (!amdgpu_sriov_vf(adev)) {
> > > > > -                       f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
> > > > > -                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> > > > > -                                       AUTO_CTXSW_ENABLE, enable ? 1 : 0);
> > > > > -                       WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
> > > > > -               }
> > > > > +               WREG32_SOC15_IP(GC,
> > > > > +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE0_QUANTUM),
> > > > > +                       phase_quantum);
> > > > > +               WREG32_SOC15_IP(GC,
> > > > > +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE1_QUANTUM),
> > > > > +                   phase_quantum);
> > > > > +               WREG32_SOC15_IP(GC,
> > > > > +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE2_QUANTUM),
> > > > > +                   phase_quantum);
> > > > >         }
> > > > >
> > > > > +       if (!amdgpu_sriov_vf(adev)) {
> > > > > +               f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL));
> > > > > +               f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> > > > > +                               AUTO_CTXSW_ENABLE, 1);
> > > > > +               WREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL), f32_cntl);
> > > > > +       }
> > > > >  }
> > > > >
> > > > >  /**
> > > > > - * sdma_v5_2_enable - stop the async dma engines
> > > > > + * sdma_v5_2_ctx_switch_disable_all - stop the async dma engines context switch
> > > > >   *
> > > > >   * @adev: amdgpu_device pointer
> > > > > - * @enable: enable/disable the DMA MEs.
> > > > >   *
> > > > > - * Halt or unhalt the async dma engines.
> > > > > + * Halt the async dma engines context switch.
> > > > >   */
> > > > > -static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
> > > > > +static void sdma_v5_2_ctx_switch_disable_all(struct amdgpu_device *adev)
> > > > >  {
> > > > >         u32 f32_cntl;
> > > > >         int i;
> > > > >
> > > > > -       if (!enable) {
> > > > > -               sdma_v5_2_gfx_stop(adev);
> > > > > -               sdma_v5_2_rlc_stop(adev);
> > > > > +       if (amdgpu_sriov_vf(adev))
> > > > > +               return;
> > > > > +
> > > > > +       for (i = 0; i < adev->sdma.num_instances; i++) {
> > > > > +               f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
> > > > > +               f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> > > > > +                               AUTO_CTXSW_ENABLE, 0);
> > > > > +               WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
> > > > >         }
> > > > > +}
> > > > > +
> > > > > +/**
> > > > > + * sdma_v5_2_halt - stop the async dma engines
> > > > > + *
> > > > > + * @adev: amdgpu_device pointer
> > > > > + *
> > > > > + * Halt the async dma engines.
> > > > > + */
> > > > > +static void sdma_v5_2_halt(struct amdgpu_device *adev)
> > > > > +{
> > > > > +       int i;
> > > > > +       u32 f32_cntl;
> > > > > +
> > > > > +       sdma_v5_2_gfx_stop(adev);
> > > > > +       sdma_v5_2_rlc_stop(adev);
> > > > >
> > > > >         if (!amdgpu_sriov_vf(adev)) {
> > > > >                 for (i = 0; i < adev->sdma.num_instances; i++) {
> > > > >                         f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
> > > > > -                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
> > > > > +                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
> > > > >                         WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
> > > > >                 }
> > > > >         }
> > > > > @@ -594,6 +615,9 @@ static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
> > > > >   * @adev: amdgpu_device pointer
> > > > >   *
> > > > >   * Set up the gfx DMA ring buffers and enable them.
> > > > > + * It assumes that the dma engine is stopped for each instance.
> > > > > + * The function enables the engine and preemptions sequentially for each instance.
> > > > > + *
> > > > >   * Returns 0 for success, error for failure.
> > > > >   */
> > > > >  static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> > > > > @@ -737,10 +761,7 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> > > > >
> > > > >                 ring->sched.ready = true;
> > > > >
> > > > > -               if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
> > > > > -                       sdma_v5_2_ctx_switch_enable(adev, true);
> > > > > -                       sdma_v5_2_enable(adev, true);
> > > > > -               }
> > > > > +               sdma_v5_2_ctx_switch_enable_for_instance(adev, i);
> > > > >
> > > > >                 r = amdgpu_ring_test_ring(ring);
> > > > >                 if (r) {
> > > > > @@ -784,7 +805,7 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)
> > > > >         int i, j;
> > > > >
> > > > >         /* halt the MEs */
> > > > > -       sdma_v5_2_enable(adev, false);
> > > > > +       sdma_v5_2_halt(adev);
> > > > >
> > > > >         for (i = 0; i < adev->sdma.num_instances; i++) {
> > > > >                 if (!adev->sdma.instance[i].fw)
> > > > > @@ -856,8 +877,8 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
> > > > >         int r = 0;
> > > > >
> > > > >         if (amdgpu_sriov_vf(adev)) {
> > > > > -               sdma_v5_2_ctx_switch_enable(adev, false);
> > > > > -               sdma_v5_2_enable(adev, false);
> > > > > +               sdma_v5_2_ctx_switch_disable_all(adev);
> > > > > +               sdma_v5_2_halt(adev);
> > > > >
> > > > >                 /* set RB registers */
> > > > >                 r = sdma_v5_2_gfx_resume(adev);
> > > > > @@ -881,12 +902,10 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
> > > > >                 amdgpu_gfx_off_ctrl(adev, false);
> > > > >
> > > > >         sdma_v5_2_soft_reset(adev);
> > > > > -       /* unhalt the MEs */
> > > > > -       sdma_v5_2_enable(adev, true);
> > > > > -       /* enable sdma ring preemption */
> > > > > -       sdma_v5_2_ctx_switch_enable(adev, true);
> > > > >
> > > > > -       /* start the gfx rings and rlc compute queues */
> > > > > +       /* Soft reset supposes to disable the dma engine and preemption.
> > > > > +        * Now start the gfx rings and rlc compute queues.
> > > > > +        */
> > > > >         r = sdma_v5_2_gfx_resume(adev);
> > > > >         if (adev->in_s0ix)
> > > > >                 amdgpu_gfx_off_ctrl(adev, true);
> > > > > @@ -1340,8 +1359,8 @@ static int sdma_v5_2_hw_fini(void *handle)
> > > > >         if (amdgpu_sriov_vf(adev))
> > > > >                 return 0;
> > > > >
> > > > > -       sdma_v5_2_ctx_switch_enable(adev, false);
> > > > > -       sdma_v5_2_enable(adev, false);
> > > > > +       sdma_v5_2_ctx_switch_disable_all(adev);
> > > > > +       sdma_v5_2_halt(adev);
> > > > >
> > > > >         return 0;
> > > > >  }
> > > > > --
> > > > > 2.25.1
> > > > >

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v6] drm/amdgpu: Ensure the DMA engine is deactivated during set ups
  2022-05-09 14:48         ` Alex Deucher
@ 2022-05-10 10:52           ` Haohui Mai
  2022-05-10 14:14             ` Alex Deucher
  0 siblings, 1 reply; 11+ messages in thread
From: Haohui Mai @ 2022-05-10 10:52 UTC (permalink / raw)
  To: Alex Deucher
  Cc: Yifan Zhang, Chen, Guchun, Christian König, amd-gfx list,
	Lang Yu, Hawking Zhang

Hi Alex,

Is there anything open before it can be merged?

Thanks,
Haohui

On Mon, May 9, 2022 at 10:48 PM Alex Deucher <alexdeucher@gmail.com> wrote:
>
> On Fri, May 6, 2022 at 10:30 PM Haohui Mai <ricetons@gmail.com> wrote:
> >
> > What about
> >
> > Setting the HALT bit of SDMA_F32_CNTL in all paths before programming
> > the ring buffer of the SDMA engine.
> >
>
> Sounds fine to me.
>
> Alex
>
> > No other changes are required in the patch.
> >
> > ~Haohui
> >
> > On Fri, May 6, 2022 at 9:36 PM Alex Deucher <alexdeucher@gmail.com> wrote:
> > >
> > > On Fri, May 6, 2022 at 1:11 AM Haohui Mai <ricetons@gmail.com> wrote:
> > > >
> > > > The only thing that matters is that the IP should be halted before
> > > > programming the ring buffers.
> > > >
> > > > What about rephrasing the commit messages to highlight the issue a
> > > > little bit better?
> > >
> > > Yeah, that is fine.  Thanks!
> > >
> > > Alex
> > >
> > > >
> > > > On Fri, May 6, 2022 at 12:33 AM Alex Deucher <alexdeucher@gmail.com> wrote:
> > > > >
> > > > > On Sat, Apr 30, 2022 at 3:34 AM <ricetons@gmail.com> wrote:
> > > > > >
> > > > > > From: Haohui Mai <ricetons@gmail.com>
> > > > > >
> > > > > > The patch fully deactivates the DMA engine before setting up the ring
> > > > > > buffer to avoid potential data races and crashes.
> > > > >
> > > > > Does this actually fix an issue you are seeing?  I don't think it will
> > > > > hurt anything, but I also don't think it's strictly necessary.  AFAIK,
> > > > > only the HALT bit really matters.  So the commit message might be
> > > > > somewhat misleading.
> > > > >
> > > > > Alex
> > > > >
> > > > > >
> > > > > > Signed-off-by: Haohui Mai <ricetons@gmail.com>
> > > > > > ---
> > > > > >  drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 109 +++++++++++++++----------
> > > > > >  1 file changed, 64 insertions(+), 45 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > > > > > index 013d2dec81d0..1fac9d8e99de 100644
> > > > > > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > > > > > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > > > > > @@ -459,7 +459,6 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
> > > > > >         }
> > > > > >  }
> > > > > >
> > > > > > -
> > > > > >  /**
> > > > > >   * sdma_v5_2_gfx_stop - stop the gfx async dma engines
> > > > > >   *
> > > > > > @@ -505,17 +504,21 @@ static void sdma_v5_2_rlc_stop(struct amdgpu_device *adev)
> > > > > >  }
> > > > > >
> > > > > >  /**
> > > > > > - * sdma_v5_2_ctx_switch_enable - stop the async dma engines context switch
> > > > > > + * sdma_v5_2_ctx_switch_enable_for_instance - start the async dma engines
> > > > > > + * context switch for an instance
> > > > > >   *
> > > > > >   * @adev: amdgpu_device pointer
> > > > > > - * @enable: enable/disable the DMA MEs context switch.
> > > > > > + * @instance_idx: the index of the SDMA instance
> > > > > >   *
> > > > > > - * Halt or unhalt the async dma engines context switch.
> > > > > > + * Unhalt the async dma engines context switch.
> > > > > >   */
> > > > > > -static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
> > > > > > +static void sdma_v5_2_ctx_switch_enable_for_instance(struct amdgpu_device *adev, int instance_idx)
> > > > > >  {
> > > > > >         u32 f32_cntl, phase_quantum = 0;
> > > > > > -       int i;
> > > > > > +
> > > > > > +       if (WARN_ON(instance_idx >= adev->sdma.num_instances)) {
> > > > > > +               return;
> > > > > > +       }
> > > > > >
> > > > > >         if (amdgpu_sdma_phase_quantum) {
> > > > > >                 unsigned value = amdgpu_sdma_phase_quantum;
> > > > > > @@ -539,50 +542,68 @@ static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
> > > > > >                 phase_quantum =
> > > > > >                         value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
> > > > > >                         unit  << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
> > > > > > -       }
> > > > > > -
> > > > > > -       for (i = 0; i < adev->sdma.num_instances; i++) {
> > > > > > -               if (enable && amdgpu_sdma_phase_quantum) {
> > > > > > -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
> > > > > > -                              phase_quantum);
> > > > > > -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
> > > > > > -                              phase_quantum);
> > > > > > -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
> > > > > > -                              phase_quantum);
> > > > > > -               }
> > > > > >
> > > > > > -               if (!amdgpu_sriov_vf(adev)) {
> > > > > > -                       f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
> > > > > > -                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> > > > > > -                                       AUTO_CTXSW_ENABLE, enable ? 1 : 0);
> > > > > > -                       WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
> > > > > > -               }
> > > > > > +               WREG32_SOC15_IP(GC,
> > > > > > +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE0_QUANTUM),
> > > > > > +                       phase_quantum);
> > > > > > +               WREG32_SOC15_IP(GC,
> > > > > > +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE1_QUANTUM),
> > > > > > +                   phase_quantum);
> > > > > > +               WREG32_SOC15_IP(GC,
> > > > > > +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE2_QUANTUM),
> > > > > > +                   phase_quantum);
> > > > > >         }
> > > > > >
> > > > > > +       if (!amdgpu_sriov_vf(adev)) {
> > > > > > +               f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL));
> > > > > > +               f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> > > > > > +                               AUTO_CTXSW_ENABLE, 1);
> > > > > > +               WREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL), f32_cntl);
> > > > > > +       }
> > > > > >  }
> > > > > >
> > > > > >  /**
> > > > > > - * sdma_v5_2_enable - stop the async dma engines
> > > > > > + * sdma_v5_2_ctx_switch_disable_all - stop the async dma engines context switch
> > > > > >   *
> > > > > >   * @adev: amdgpu_device pointer
> > > > > > - * @enable: enable/disable the DMA MEs.
> > > > > >   *
> > > > > > - * Halt or unhalt the async dma engines.
> > > > > > + * Halt the async dma engines context switch.
> > > > > >   */
> > > > > > -static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
> > > > > > +static void sdma_v5_2_ctx_switch_disable_all(struct amdgpu_device *adev)
> > > > > >  {
> > > > > >         u32 f32_cntl;
> > > > > >         int i;
> > > > > >
> > > > > > -       if (!enable) {
> > > > > > -               sdma_v5_2_gfx_stop(adev);
> > > > > > -               sdma_v5_2_rlc_stop(adev);
> > > > > > +       if (amdgpu_sriov_vf(adev))
> > > > > > +               return;
> > > > > > +
> > > > > > +       for (i = 0; i < adev->sdma.num_instances; i++) {
> > > > > > +               f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
> > > > > > +               f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> > > > > > +                               AUTO_CTXSW_ENABLE, 0);
> > > > > > +               WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
> > > > > >         }
> > > > > > +}
> > > > > > +
> > > > > > +/**
> > > > > > + * sdma_v5_2_halt - stop the async dma engines
> > > > > > + *
> > > > > > + * @adev: amdgpu_device pointer
> > > > > > + *
> > > > > > + * Halt the async dma engines.
> > > > > > + */
> > > > > > +static void sdma_v5_2_halt(struct amdgpu_device *adev)
> > > > > > +{
> > > > > > +       int i;
> > > > > > +       u32 f32_cntl;
> > > > > > +
> > > > > > +       sdma_v5_2_gfx_stop(adev);
> > > > > > +       sdma_v5_2_rlc_stop(adev);
> > > > > >
> > > > > >         if (!amdgpu_sriov_vf(adev)) {
> > > > > >                 for (i = 0; i < adev->sdma.num_instances; i++) {
> > > > > >                         f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
> > > > > > -                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
> > > > > > +                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
> > > > > >                         WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
> > > > > >                 }
> > > > > >         }
> > > > > > @@ -594,6 +615,9 @@ static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
> > > > > >   * @adev: amdgpu_device pointer
> > > > > >   *
> > > > > >   * Set up the gfx DMA ring buffers and enable them.
> > > > > > + * It assumes that the dma engine is stopped for each instance.
> > > > > > + * The function enables the engine and preemptions sequentially for each instance.
> > > > > > + *
> > > > > >   * Returns 0 for success, error for failure.
> > > > > >   */
> > > > > >  static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> > > > > > @@ -737,10 +761,7 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> > > > > >
> > > > > >                 ring->sched.ready = true;
> > > > > >
> > > > > > -               if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
> > > > > > -                       sdma_v5_2_ctx_switch_enable(adev, true);
> > > > > > -                       sdma_v5_2_enable(adev, true);
> > > > > > -               }
> > > > > > +               sdma_v5_2_ctx_switch_enable_for_instance(adev, i);
> > > > > >
> > > > > >                 r = amdgpu_ring_test_ring(ring);
> > > > > >                 if (r) {
> > > > > > @@ -784,7 +805,7 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)
> > > > > >         int i, j;
> > > > > >
> > > > > >         /* halt the MEs */
> > > > > > -       sdma_v5_2_enable(adev, false);
> > > > > > +       sdma_v5_2_halt(adev);
> > > > > >
> > > > > >         for (i = 0; i < adev->sdma.num_instances; i++) {
> > > > > >                 if (!adev->sdma.instance[i].fw)
> > > > > > @@ -856,8 +877,8 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
> > > > > >         int r = 0;
> > > > > >
> > > > > >         if (amdgpu_sriov_vf(adev)) {
> > > > > > -               sdma_v5_2_ctx_switch_enable(adev, false);
> > > > > > -               sdma_v5_2_enable(adev, false);
> > > > > > +               sdma_v5_2_ctx_switch_disable_all(adev);
> > > > > > +               sdma_v5_2_halt(adev);
> > > > > >
> > > > > >                 /* set RB registers */
> > > > > >                 r = sdma_v5_2_gfx_resume(adev);
> > > > > > @@ -881,12 +902,10 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
> > > > > >                 amdgpu_gfx_off_ctrl(adev, false);
> > > > > >
> > > > > >         sdma_v5_2_soft_reset(adev);
> > > > > > -       /* unhalt the MEs */
> > > > > > -       sdma_v5_2_enable(adev, true);
> > > > > > -       /* enable sdma ring preemption */
> > > > > > -       sdma_v5_2_ctx_switch_enable(adev, true);
> > > > > >
> > > > > > -       /* start the gfx rings and rlc compute queues */
> > > > > > +       /* Soft reset supposes to disable the dma engine and preemption.
> > > > > > +        * Now start the gfx rings and rlc compute queues.
> > > > > > +        */
> > > > > >         r = sdma_v5_2_gfx_resume(adev);
> > > > > >         if (adev->in_s0ix)
> > > > > >                 amdgpu_gfx_off_ctrl(adev, true);
> > > > > > @@ -1340,8 +1359,8 @@ static int sdma_v5_2_hw_fini(void *handle)
> > > > > >         if (amdgpu_sriov_vf(adev))
> > > > > >                 return 0;
> > > > > >
> > > > > > -       sdma_v5_2_ctx_switch_enable(adev, false);
> > > > > > -       sdma_v5_2_enable(adev, false);
> > > > > > +       sdma_v5_2_ctx_switch_disable_all(adev);
> > > > > > +       sdma_v5_2_halt(adev);
> > > > > >
> > > > > >         return 0;
> > > > > >  }
> > > > > > --
> > > > > > 2.25.1
> > > > > >

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v6] drm/amdgpu: Ensure the DMA engine is deactivated during set ups
  2022-05-10 10:52           ` Haohui Mai
@ 2022-05-10 14:14             ` Alex Deucher
  2022-05-11  4:05               ` Haohui Mai
  0 siblings, 1 reply; 11+ messages in thread
From: Alex Deucher @ 2022-05-10 14:14 UTC (permalink / raw)
  To: Haohui Mai
  Cc: Yifan Zhang, Chen, Guchun, Christian König, amd-gfx list,
	Lang Yu, Hawking Zhang

On Tue, May 10, 2022 at 6:53 AM Haohui Mai <ricetons@gmail.com> wrote:
>
> Hi Alex,
>
> Is there anything open before it can be merged?

Were you going to send an updated patch?

Alex

>
> Thanks,
> Haohui
>
> On Mon, May 9, 2022 at 10:48 PM Alex Deucher <alexdeucher@gmail.com> wrote:
> >
> > On Fri, May 6, 2022 at 10:30 PM Haohui Mai <ricetons@gmail.com> wrote:
> > >
> > > What about
> > >
> > > Setting the HALT bit of SDMA_F32_CNTL in all paths before programming
> > > the ring buffer of the SDMA engine.
> > >
> >
> > Sounds fine to me.
> >
> > Alex
> >
> > > No other changes are required in the patch.
> > >
> > > ~Haohui
> > >
> > > On Fri, May 6, 2022 at 9:36 PM Alex Deucher <alexdeucher@gmail.com> wrote:
> > > >
> > > > On Fri, May 6, 2022 at 1:11 AM Haohui Mai <ricetons@gmail.com> wrote:
> > > > >
> > > > > The only thing that matters is that the IP should be halted before
> > > > > programming the ring buffers.
> > > > >
> > > > > What about rephrasing the commit messages to highlight the issue a
> > > > > little bit better?
> > > >
> > > > Yeah, that is fine.  Thanks!
> > > >
> > > > Alex
> > > >
> > > > >
> > > > > On Fri, May 6, 2022 at 12:33 AM Alex Deucher <alexdeucher@gmail.com> wrote:
> > > > > >
> > > > > > On Sat, Apr 30, 2022 at 3:34 AM <ricetons@gmail.com> wrote:
> > > > > > >
> > > > > > > From: Haohui Mai <ricetons@gmail.com>
> > > > > > >
> > > > > > > The patch fully deactivates the DMA engine before setting up the ring
> > > > > > > buffer to avoid potential data races and crashes.
> > > > > >
> > > > > > Does this actually fix an issue you are seeing?  I don't think it will
> > > > > > hurt anything, but I also don't think it's strictly necessary.  AFAIK,
> > > > > > only the HALT bit really matters.  So the commit message might be
> > > > > > somewhat misleading.
> > > > > >
> > > > > > Alex
> > > > > >
> > > > > > >
> > > > > > > Signed-off-by: Haohui Mai <ricetons@gmail.com>
> > > > > > > ---
> > > > > > >  drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 109 +++++++++++++++----------
> > > > > > >  1 file changed, 64 insertions(+), 45 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > > > > > > index 013d2dec81d0..1fac9d8e99de 100644
> > > > > > > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > > > > > > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > > > > > > @@ -459,7 +459,6 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
> > > > > > >         }
> > > > > > >  }
> > > > > > >
> > > > > > > -
> > > > > > >  /**
> > > > > > >   * sdma_v5_2_gfx_stop - stop the gfx async dma engines
> > > > > > >   *
> > > > > > > @@ -505,17 +504,21 @@ static void sdma_v5_2_rlc_stop(struct amdgpu_device *adev)
> > > > > > >  }
> > > > > > >
> > > > > > >  /**
> > > > > > > - * sdma_v5_2_ctx_switch_enable - stop the async dma engines context switch
> > > > > > > + * sdma_v5_2_ctx_switch_enable_for_instance - start the async dma engines
> > > > > > > + * context switch for an instance
> > > > > > >   *
> > > > > > >   * @adev: amdgpu_device pointer
> > > > > > > - * @enable: enable/disable the DMA MEs context switch.
> > > > > > > + * @instance_idx: the index of the SDMA instance
> > > > > > >   *
> > > > > > > - * Halt or unhalt the async dma engines context switch.
> > > > > > > + * Unhalt the async dma engines context switch.
> > > > > > >   */
> > > > > > > -static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
> > > > > > > +static void sdma_v5_2_ctx_switch_enable_for_instance(struct amdgpu_device *adev, int instance_idx)
> > > > > > >  {
> > > > > > >         u32 f32_cntl, phase_quantum = 0;
> > > > > > > -       int i;
> > > > > > > +
> > > > > > > +       if (WARN_ON(instance_idx >= adev->sdma.num_instances)) {
> > > > > > > +               return;
> > > > > > > +       }
> > > > > > >
> > > > > > >         if (amdgpu_sdma_phase_quantum) {
> > > > > > >                 unsigned value = amdgpu_sdma_phase_quantum;
> > > > > > > @@ -539,50 +542,68 @@ static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
> > > > > > >                 phase_quantum =
> > > > > > >                         value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
> > > > > > >                         unit  << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
> > > > > > > -       }
> > > > > > > -
> > > > > > > -       for (i = 0; i < adev->sdma.num_instances; i++) {
> > > > > > > -               if (enable && amdgpu_sdma_phase_quantum) {
> > > > > > > -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
> > > > > > > -                              phase_quantum);
> > > > > > > -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
> > > > > > > -                              phase_quantum);
> > > > > > > -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
> > > > > > > -                              phase_quantum);
> > > > > > > -               }
> > > > > > >
> > > > > > > -               if (!amdgpu_sriov_vf(adev)) {
> > > > > > > -                       f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
> > > > > > > -                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> > > > > > > -                                       AUTO_CTXSW_ENABLE, enable ? 1 : 0);
> > > > > > > -                       WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
> > > > > > > -               }
> > > > > > > +               WREG32_SOC15_IP(GC,
> > > > > > > +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE0_QUANTUM),
> > > > > > > +                       phase_quantum);
> > > > > > > +               WREG32_SOC15_IP(GC,
> > > > > > > +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE1_QUANTUM),
> > > > > > > +                   phase_quantum);
> > > > > > > +               WREG32_SOC15_IP(GC,
> > > > > > > +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE2_QUANTUM),
> > > > > > > +                   phase_quantum);
> > > > > > >         }
> > > > > > >
> > > > > > > +       if (!amdgpu_sriov_vf(adev)) {
> > > > > > > +               f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL));
> > > > > > > +               f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> > > > > > > +                               AUTO_CTXSW_ENABLE, 1);
> > > > > > > +               WREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL), f32_cntl);
> > > > > > > +       }
> > > > > > >  }
> > > > > > >
> > > > > > >  /**
> > > > > > > - * sdma_v5_2_enable - stop the async dma engines
> > > > > > > + * sdma_v5_2_ctx_switch_disable_all - stop the async dma engines context switch
> > > > > > >   *
> > > > > > >   * @adev: amdgpu_device pointer
> > > > > > > - * @enable: enable/disable the DMA MEs.
> > > > > > >   *
> > > > > > > - * Halt or unhalt the async dma engines.
> > > > > > > + * Halt the async dma engines context switch.
> > > > > > >   */
> > > > > > > -static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
> > > > > > > +static void sdma_v5_2_ctx_switch_disable_all(struct amdgpu_device *adev)
> > > > > > >  {
> > > > > > >         u32 f32_cntl;
> > > > > > >         int i;
> > > > > > >
> > > > > > > -       if (!enable) {
> > > > > > > -               sdma_v5_2_gfx_stop(adev);
> > > > > > > -               sdma_v5_2_rlc_stop(adev);
> > > > > > > +       if (amdgpu_sriov_vf(adev))
> > > > > > > +               return;
> > > > > > > +
> > > > > > > +       for (i = 0; i < adev->sdma.num_instances; i++) {
> > > > > > > +               f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
> > > > > > > +               f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> > > > > > > +                               AUTO_CTXSW_ENABLE, 0);
> > > > > > > +               WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
> > > > > > >         }
> > > > > > > +}
> > > > > > > +
> > > > > > > +/**
> > > > > > > + * sdma_v5_2_halt - stop the async dma engines
> > > > > > > + *
> > > > > > > + * @adev: amdgpu_device pointer
> > > > > > > + *
> > > > > > > + * Halt the async dma engines.
> > > > > > > + */
> > > > > > > +static void sdma_v5_2_halt(struct amdgpu_device *adev)
> > > > > > > +{
> > > > > > > +       int i;
> > > > > > > +       u32 f32_cntl;
> > > > > > > +
> > > > > > > +       sdma_v5_2_gfx_stop(adev);
> > > > > > > +       sdma_v5_2_rlc_stop(adev);
> > > > > > >
> > > > > > >         if (!amdgpu_sriov_vf(adev)) {
> > > > > > >                 for (i = 0; i < adev->sdma.num_instances; i++) {
> > > > > > >                         f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
> > > > > > > -                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
> > > > > > > +                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
> > > > > > >                         WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
> > > > > > >                 }
> > > > > > >         }
> > > > > > > @@ -594,6 +615,9 @@ static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
> > > > > > >   * @adev: amdgpu_device pointer
> > > > > > >   *
> > > > > > >   * Set up the gfx DMA ring buffers and enable them.
> > > > > > > + * It assumes that the dma engine is stopped for each instance.
> > > > > > > + * The function enables the engine and preemptions sequentially for each instance.
> > > > > > > + *
> > > > > > >   * Returns 0 for success, error for failure.
> > > > > > >   */
> > > > > > >  static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> > > > > > > @@ -737,10 +761,7 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> > > > > > >
> > > > > > >                 ring->sched.ready = true;
> > > > > > >
> > > > > > > -               if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
> > > > > > > -                       sdma_v5_2_ctx_switch_enable(adev, true);
> > > > > > > -                       sdma_v5_2_enable(adev, true);
> > > > > > > -               }
> > > > > > > +               sdma_v5_2_ctx_switch_enable_for_instance(adev, i);
> > > > > > >
> > > > > > >                 r = amdgpu_ring_test_ring(ring);
> > > > > > >                 if (r) {
> > > > > > > @@ -784,7 +805,7 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)
> > > > > > >         int i, j;
> > > > > > >
> > > > > > >         /* halt the MEs */
> > > > > > > -       sdma_v5_2_enable(adev, false);
> > > > > > > +       sdma_v5_2_halt(adev);
> > > > > > >
> > > > > > >         for (i = 0; i < adev->sdma.num_instances; i++) {
> > > > > > >                 if (!adev->sdma.instance[i].fw)
> > > > > > > @@ -856,8 +877,8 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
> > > > > > >         int r = 0;
> > > > > > >
> > > > > > >         if (amdgpu_sriov_vf(adev)) {
> > > > > > > -               sdma_v5_2_ctx_switch_enable(adev, false);
> > > > > > > -               sdma_v5_2_enable(adev, false);
> > > > > > > +               sdma_v5_2_ctx_switch_disable_all(adev);
> > > > > > > +               sdma_v5_2_halt(adev);
> > > > > > >
> > > > > > >                 /* set RB registers */
> > > > > > >                 r = sdma_v5_2_gfx_resume(adev);
> > > > > > > @@ -881,12 +902,10 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
> > > > > > >                 amdgpu_gfx_off_ctrl(adev, false);
> > > > > > >
> > > > > > >         sdma_v5_2_soft_reset(adev);
> > > > > > > -       /* unhalt the MEs */
> > > > > > > -       sdma_v5_2_enable(adev, true);
> > > > > > > -       /* enable sdma ring preemption */
> > > > > > > -       sdma_v5_2_ctx_switch_enable(adev, true);
> > > > > > >
> > > > > > > -       /* start the gfx rings and rlc compute queues */
> > > > > > > +       /* Soft reset supposes to disable the dma engine and preemption.
> > > > > > > +        * Now start the gfx rings and rlc compute queues.
> > > > > > > +        */
> > > > > > >         r = sdma_v5_2_gfx_resume(adev);
> > > > > > >         if (adev->in_s0ix)
> > > > > > >                 amdgpu_gfx_off_ctrl(adev, true);
> > > > > > > @@ -1340,8 +1359,8 @@ static int sdma_v5_2_hw_fini(void *handle)
> > > > > > >         if (amdgpu_sriov_vf(adev))
> > > > > > >                 return 0;
> > > > > > >
> > > > > > > -       sdma_v5_2_ctx_switch_enable(adev, false);
> > > > > > > -       sdma_v5_2_enable(adev, false);
> > > > > > > +       sdma_v5_2_ctx_switch_disable_all(adev);
> > > > > > > +       sdma_v5_2_halt(adev);
> > > > > > >
> > > > > > >         return 0;
> > > > > > >  }
> > > > > > > --
> > > > > > > 2.25.1
> > > > > > >

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v6] drm/amdgpu: Ensure the DMA engine is deactivated during set ups
  2022-05-10 14:14             ` Alex Deucher
@ 2022-05-11  4:05               ` Haohui Mai
  2022-05-11 15:23                 ` Alex Deucher
  0 siblings, 1 reply; 11+ messages in thread
From: Haohui Mai @ 2022-05-11  4:05 UTC (permalink / raw)
  To: Alex Deucher
  Cc: Yifan Zhang, Chen, Guchun, Christian König, amd-gfx list,
	Lang Yu, Hawking Zhang

It should be an identical patch except for the commit message. Do you
want me to send out a new one? Either way is fine with me.

~Haohui

On Tue, May 10, 2022 at 10:14 PM Alex Deucher <alexdeucher@gmail.com> wrote:
>
> On Tue, May 10, 2022 at 6:53 AM Haohui Mai <ricetons@gmail.com> wrote:
> >
> > Hi Alex,
> >
> > Is there anything open before it can be merged?
>
> Were you going to send an updated patch?
>
> Alex
>
> >
> > Thanks,
> > Haohui
> >
> > On Mon, May 9, 2022 at 10:48 PM Alex Deucher <alexdeucher@gmail.com> wrote:
> > >
> > > On Fri, May 6, 2022 at 10:30 PM Haohui Mai <ricetons@gmail.com> wrote:
> > > >
> > > > What about
> > > >
> > > > Setting the HALT bit of SDMA_F32_CNTL in all paths before programming
> > > > the ring buffer of the SDMA engine.
> > > >
> > >
> > > Sounds fine to me.
> > >
> > > Alex
> > >
> > > > No other changes are required in the patch.
> > > >
> > > > ~Haohui
> > > >
> > > > On Fri, May 6, 2022 at 9:36 PM Alex Deucher <alexdeucher@gmail.com> wrote:
> > > > >
> > > > > On Fri, May 6, 2022 at 1:11 AM Haohui Mai <ricetons@gmail.com> wrote:
> > > > > >
> > > > > > The only thing that matters is that the IP should be halted before
> > > > > > programming the ring buffers.
> > > > > >
> > > > > > What about rephrasing the commit messages to highlight the issue a
> > > > > > little bit better?
> > > > >
> > > > > Yeah, that is fine.  Thanks!
> > > > >
> > > > > Alex
> > > > >
> > > > > >
> > > > > > On Fri, May 6, 2022 at 12:33 AM Alex Deucher <alexdeucher@gmail.com> wrote:
> > > > > > >
> > > > > > > On Sat, Apr 30, 2022 at 3:34 AM <ricetons@gmail.com> wrote:
> > > > > > > >
> > > > > > > > From: Haohui Mai <ricetons@gmail.com>
> > > > > > > >
> > > > > > > > The patch fully deactivates the DMA engine before setting up the ring
> > > > > > > > buffer to avoid potential data races and crashes.
> > > > > > >
> > > > > > > Does this actually fix an issue you are seeing?  I don't think it will
> > > > > > > hurt anything, but I also don't think it's strictly necessary.  AFAIK,
> > > > > > > only the HALT bit really matters.  So the commit message might be
> > > > > > > somewhat misleading.
> > > > > > >
> > > > > > > Alex
> > > > > > >
> > > > > > > >
> > > > > > > > Signed-off-by: Haohui Mai <ricetons@gmail.com>
> > > > > > > > ---
> > > > > > > >  drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 109 +++++++++++++++----------
> > > > > > > >  1 file changed, 64 insertions(+), 45 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > > > > > > > index 013d2dec81d0..1fac9d8e99de 100644
> > > > > > > > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > > > > > > > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > > > > > > > @@ -459,7 +459,6 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
> > > > > > > >         }
> > > > > > > >  }
> > > > > > > >
> > > > > > > > -
> > > > > > > >  /**
> > > > > > > >   * sdma_v5_2_gfx_stop - stop the gfx async dma engines
> > > > > > > >   *
> > > > > > > > @@ -505,17 +504,21 @@ static void sdma_v5_2_rlc_stop(struct amdgpu_device *adev)
> > > > > > > >  }
> > > > > > > >
> > > > > > > >  /**
> > > > > > > > - * sdma_v5_2_ctx_switch_enable - stop the async dma engines context switch
> > > > > > > > + * sdma_v5_2_ctx_switch_enable_for_instance - start the async dma engines
> > > > > > > > + * context switch for an instance
> > > > > > > >   *
> > > > > > > >   * @adev: amdgpu_device pointer
> > > > > > > > - * @enable: enable/disable the DMA MEs context switch.
> > > > > > > > + * @instance_idx: the index of the SDMA instance
> > > > > > > >   *
> > > > > > > > - * Halt or unhalt the async dma engines context switch.
> > > > > > > > + * Unhalt the async dma engines context switch.
> > > > > > > >   */
> > > > > > > > -static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
> > > > > > > > +static void sdma_v5_2_ctx_switch_enable_for_instance(struct amdgpu_device *adev, int instance_idx)
> > > > > > > >  {
> > > > > > > >         u32 f32_cntl, phase_quantum = 0;
> > > > > > > > -       int i;
> > > > > > > > +
> > > > > > > > +       if (WARN_ON(instance_idx >= adev->sdma.num_instances)) {
> > > > > > > > +               return;
> > > > > > > > +       }
> > > > > > > >
> > > > > > > >         if (amdgpu_sdma_phase_quantum) {
> > > > > > > >                 unsigned value = amdgpu_sdma_phase_quantum;
> > > > > > > > @@ -539,50 +542,68 @@ static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
> > > > > > > >                 phase_quantum =
> > > > > > > >                         value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
> > > > > > > >                         unit  << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
> > > > > > > > -       }
> > > > > > > > -
> > > > > > > > -       for (i = 0; i < adev->sdma.num_instances; i++) {
> > > > > > > > -               if (enable && amdgpu_sdma_phase_quantum) {
> > > > > > > > -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
> > > > > > > > -                              phase_quantum);
> > > > > > > > -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
> > > > > > > > -                              phase_quantum);
> > > > > > > > -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
> > > > > > > > -                              phase_quantum);
> > > > > > > > -               }
> > > > > > > >
> > > > > > > > -               if (!amdgpu_sriov_vf(adev)) {
> > > > > > > > -                       f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
> > > > > > > > -                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> > > > > > > > -                                       AUTO_CTXSW_ENABLE, enable ? 1 : 0);
> > > > > > > > -                       WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
> > > > > > > > -               }
> > > > > > > > +               WREG32_SOC15_IP(GC,
> > > > > > > > +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE0_QUANTUM),
> > > > > > > > +                       phase_quantum);
> > > > > > > > +               WREG32_SOC15_IP(GC,
> > > > > > > > +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE1_QUANTUM),
> > > > > > > > +                   phase_quantum);
> > > > > > > > +               WREG32_SOC15_IP(GC,
> > > > > > > > +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE2_QUANTUM),
> > > > > > > > +                   phase_quantum);
> > > > > > > >         }
> > > > > > > >
> > > > > > > > +       if (!amdgpu_sriov_vf(adev)) {
> > > > > > > > +               f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL));
> > > > > > > > +               f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> > > > > > > > +                               AUTO_CTXSW_ENABLE, 1);
> > > > > > > > +               WREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL), f32_cntl);
> > > > > > > > +       }
> > > > > > > >  }
> > > > > > > >
> > > > > > > >  /**
> > > > > > > > - * sdma_v5_2_enable - stop the async dma engines
> > > > > > > > + * sdma_v5_2_ctx_switch_disable_all - stop the async dma engines context switch
> > > > > > > >   *
> > > > > > > >   * @adev: amdgpu_device pointer
> > > > > > > > - * @enable: enable/disable the DMA MEs.
> > > > > > > >   *
> > > > > > > > - * Halt or unhalt the async dma engines.
> > > > > > > > + * Halt the async dma engines context switch.
> > > > > > > >   */
> > > > > > > > -static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
> > > > > > > > +static void sdma_v5_2_ctx_switch_disable_all(struct amdgpu_device *adev)
> > > > > > > >  {
> > > > > > > >         u32 f32_cntl;
> > > > > > > >         int i;
> > > > > > > >
> > > > > > > > -       if (!enable) {
> > > > > > > > -               sdma_v5_2_gfx_stop(adev);
> > > > > > > > -               sdma_v5_2_rlc_stop(adev);
> > > > > > > > +       if (amdgpu_sriov_vf(adev))
> > > > > > > > +               return;
> > > > > > > > +
> > > > > > > > +       for (i = 0; i < adev->sdma.num_instances; i++) {
> > > > > > > > +               f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
> > > > > > > > +               f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> > > > > > > > +                               AUTO_CTXSW_ENABLE, 0);
> > > > > > > > +               WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
> > > > > > > >         }
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +/**
> > > > > > > > + * sdma_v5_2_halt - stop the async dma engines
> > > > > > > > + *
> > > > > > > > + * @adev: amdgpu_device pointer
> > > > > > > > + *
> > > > > > > > + * Halt the async dma engines.
> > > > > > > > + */
> > > > > > > > +static void sdma_v5_2_halt(struct amdgpu_device *adev)
> > > > > > > > +{
> > > > > > > > +       int i;
> > > > > > > > +       u32 f32_cntl;
> > > > > > > > +
> > > > > > > > +       sdma_v5_2_gfx_stop(adev);
> > > > > > > > +       sdma_v5_2_rlc_stop(adev);
> > > > > > > >
> > > > > > > >         if (!amdgpu_sriov_vf(adev)) {
> > > > > > > >                 for (i = 0; i < adev->sdma.num_instances; i++) {
> > > > > > > >                         f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
> > > > > > > > -                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
> > > > > > > > +                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
> > > > > > > >                         WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
> > > > > > > >                 }
> > > > > > > >         }
> > > > > > > > @@ -594,6 +615,9 @@ static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
> > > > > > > >   * @adev: amdgpu_device pointer
> > > > > > > >   *
> > > > > > > >   * Set up the gfx DMA ring buffers and enable them.
> > > > > > > > + * It assumes that the dma engine is stopped for each instance.
> > > > > > > > + * The function enables the engine and preemptions sequentially for each instance.
> > > > > > > > + *
> > > > > > > >   * Returns 0 for success, error for failure.
> > > > > > > >   */
> > > > > > > >  static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> > > > > > > > @@ -737,10 +761,7 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> > > > > > > >
> > > > > > > >                 ring->sched.ready = true;
> > > > > > > >
> > > > > > > > -               if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
> > > > > > > > -                       sdma_v5_2_ctx_switch_enable(adev, true);
> > > > > > > > -                       sdma_v5_2_enable(adev, true);
> > > > > > > > -               }
> > > > > > > > +               sdma_v5_2_ctx_switch_enable_for_instance(adev, i);
> > > > > > > >
> > > > > > > >                 r = amdgpu_ring_test_ring(ring);
> > > > > > > >                 if (r) {
> > > > > > > > @@ -784,7 +805,7 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)
> > > > > > > >         int i, j;
> > > > > > > >
> > > > > > > >         /* halt the MEs */
> > > > > > > > -       sdma_v5_2_enable(adev, false);
> > > > > > > > +       sdma_v5_2_halt(adev);
> > > > > > > >
> > > > > > > >         for (i = 0; i < adev->sdma.num_instances; i++) {
> > > > > > > >                 if (!adev->sdma.instance[i].fw)
> > > > > > > > @@ -856,8 +877,8 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
> > > > > > > >         int r = 0;
> > > > > > > >
> > > > > > > >         if (amdgpu_sriov_vf(adev)) {
> > > > > > > > -               sdma_v5_2_ctx_switch_enable(adev, false);
> > > > > > > > -               sdma_v5_2_enable(adev, false);
> > > > > > > > +               sdma_v5_2_ctx_switch_disable_all(adev);
> > > > > > > > +               sdma_v5_2_halt(adev);
> > > > > > > >
> > > > > > > >                 /* set RB registers */
> > > > > > > >                 r = sdma_v5_2_gfx_resume(adev);
> > > > > > > > @@ -881,12 +902,10 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
> > > > > > > >                 amdgpu_gfx_off_ctrl(adev, false);
> > > > > > > >
> > > > > > > >         sdma_v5_2_soft_reset(adev);
> > > > > > > > -       /* unhalt the MEs */
> > > > > > > > -       sdma_v5_2_enable(adev, true);
> > > > > > > > -       /* enable sdma ring preemption */
> > > > > > > > -       sdma_v5_2_ctx_switch_enable(adev, true);
> > > > > > > >
> > > > > > > > -       /* start the gfx rings and rlc compute queues */
> > > > > > > > +       /* Soft reset supposes to disable the dma engine and preemption.
> > > > > > > > +        * Now start the gfx rings and rlc compute queues.
> > > > > > > > +        */
> > > > > > > >         r = sdma_v5_2_gfx_resume(adev);
> > > > > > > >         if (adev->in_s0ix)
> > > > > > > >                 amdgpu_gfx_off_ctrl(adev, true);
> > > > > > > > @@ -1340,8 +1359,8 @@ static int sdma_v5_2_hw_fini(void *handle)
> > > > > > > >         if (amdgpu_sriov_vf(adev))
> > > > > > > >                 return 0;
> > > > > > > >
> > > > > > > > -       sdma_v5_2_ctx_switch_enable(adev, false);
> > > > > > > > -       sdma_v5_2_enable(adev, false);
> > > > > > > > +       sdma_v5_2_ctx_switch_disable_all(adev);
> > > > > > > > +       sdma_v5_2_halt(adev);
> > > > > > > >
> > > > > > > >         return 0;
> > > > > > > >  }
> > > > > > > > --
> > > > > > > > 2.25.1
> > > > > > > >

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v6] drm/amdgpu: Ensure the DMA engine is deactivated during set ups
  2022-05-11  4:05               ` Haohui Mai
@ 2022-05-11 15:23                 ` Alex Deucher
  0 siblings, 0 replies; 11+ messages in thread
From: Alex Deucher @ 2022-05-11 15:23 UTC (permalink / raw)
  To: Haohui Mai
  Cc: Yifan Zhang, Chen, Guchun, Christian König, amd-gfx list,
	Lang Yu, Hawking Zhang

I've applied it with the change to the commit message.  Sorry for the confusion.

Alex

On Wed, May 11, 2022 at 12:05 AM Haohui Mai <ricetons@gmail.com> wrote:
>
> It should be an identical patch except for the commit message. Do you
> want me to send out a new one? Either way is fine with me.
>
> ~Haohui
>
> On Tue, May 10, 2022 at 10:14 PM Alex Deucher <alexdeucher@gmail.com> wrote:
> >
> > On Tue, May 10, 2022 at 6:53 AM Haohui Mai <ricetons@gmail.com> wrote:
> > >
> > > Hi Alex,
> > >
> > > Is there anything open before it can be merged?
> >
> > Were you going to send an updated patch?
> >
> > Alex
> >
> > >
> > > Thanks,
> > > Haohui
> > >
> > > On Mon, May 9, 2022 at 10:48 PM Alex Deucher <alexdeucher@gmail.com> wrote:
> > > >
> > > > On Fri, May 6, 2022 at 10:30 PM Haohui Mai <ricetons@gmail.com> wrote:
> > > > >
> > > > > What about
> > > > >
> > > > > Setting the HALT bit of SDMA_F32_CNTL in all paths before programming
> > > > > the ring buffer of the SDMA engine.
> > > > >
> > > >
> > > > Sounds fine to me.
> > > >
> > > > Alex
> > > >
> > > > > No other changes are required in the patch.
> > > > >
> > > > > ~Haohui
> > > > >
> > > > > On Fri, May 6, 2022 at 9:36 PM Alex Deucher <alexdeucher@gmail.com> wrote:
> > > > > >
> > > > > > On Fri, May 6, 2022 at 1:11 AM Haohui Mai <ricetons@gmail.com> wrote:
> > > > > > >
> > > > > > > The only thing that matters is that the IP should be halted before
> > > > > > > programming the ring buffers.
> > > > > > >
> > > > > > > What about rephrasing the commit messages to highlight the issue a
> > > > > > > little bit better?
> > > > > >
> > > > > > Yeah, that is fine.  Thanks!
> > > > > >
> > > > > > Alex
> > > > > >
> > > > > > >
> > > > > > > On Fri, May 6, 2022 at 12:33 AM Alex Deucher <alexdeucher@gmail.com> wrote:
> > > > > > > >
> > > > > > > > On Sat, Apr 30, 2022 at 3:34 AM <ricetons@gmail.com> wrote:
> > > > > > > > >
> > > > > > > > > From: Haohui Mai <ricetons@gmail.com>
> > > > > > > > >
> > > > > > > > > The patch fully deactivates the DMA engine before setting up the ring
> > > > > > > > > buffer to avoid potential data races and crashes.
> > > > > > > >
> > > > > > > > Does this actually fix an issue you are seeing?  I don't think it will
> > > > > > > > hurt anything, but I also don't think it's strictly necessary.  AFAIK,
> > > > > > > > only the HALT bit really matters.  So the commit message might be
> > > > > > > > somewhat misleading.
> > > > > > > >
> > > > > > > > Alex
> > > > > > > >
> > > > > > > > >
> > > > > > > > > Signed-off-by: Haohui Mai <ricetons@gmail.com>
> > > > > > > > > ---
> > > > > > > > >  drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 109 +++++++++++++++----------
> > > > > > > > >  1 file changed, 64 insertions(+), 45 deletions(-)
> > > > > > > > >
> > > > > > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > > > > > > > > index 013d2dec81d0..1fac9d8e99de 100644
> > > > > > > > > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > > > > > > > > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > > > > > > > > @@ -459,7 +459,6 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
> > > > > > > > >         }
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > -
> > > > > > > > >  /**
> > > > > > > > >   * sdma_v5_2_gfx_stop - stop the gfx async dma engines
> > > > > > > > >   *
> > > > > > > > > @@ -505,17 +504,21 @@ static void sdma_v5_2_rlc_stop(struct amdgpu_device *adev)
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > >  /**
> > > > > > > > > - * sdma_v5_2_ctx_switch_enable - stop the async dma engines context switch
> > > > > > > > > + * sdma_v5_2_ctx_switch_enable_for_instance - start the async dma engines
> > > > > > > > > + * context switch for an instance
> > > > > > > > >   *
> > > > > > > > >   * @adev: amdgpu_device pointer
> > > > > > > > > - * @enable: enable/disable the DMA MEs context switch.
> > > > > > > > > + * @instance_idx: the index of the SDMA instance
> > > > > > > > >   *
> > > > > > > > > - * Halt or unhalt the async dma engines context switch.
> > > > > > > > > + * Unhalt the async dma engines context switch.
> > > > > > > > >   */
> > > > > > > > > -static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
> > > > > > > > > +static void sdma_v5_2_ctx_switch_enable_for_instance(struct amdgpu_device *adev, int instance_idx)
> > > > > > > > >  {
> > > > > > > > >         u32 f32_cntl, phase_quantum = 0;
> > > > > > > > > -       int i;
> > > > > > > > > +
> > > > > > > > > +       if (WARN_ON(instance_idx >= adev->sdma.num_instances)) {
> > > > > > > > > +               return;
> > > > > > > > > +       }
> > > > > > > > >
> > > > > > > > >         if (amdgpu_sdma_phase_quantum) {
> > > > > > > > >                 unsigned value = amdgpu_sdma_phase_quantum;
> > > > > > > > > @@ -539,50 +542,68 @@ static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
> > > > > > > > >                 phase_quantum =
> > > > > > > > >                         value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
> > > > > > > > >                         unit  << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
> > > > > > > > > -       }
> > > > > > > > > -
> > > > > > > > > -       for (i = 0; i < adev->sdma.num_instances; i++) {
> > > > > > > > > -               if (enable && amdgpu_sdma_phase_quantum) {
> > > > > > > > > -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
> > > > > > > > > -                              phase_quantum);
> > > > > > > > > -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
> > > > > > > > > -                              phase_quantum);
> > > > > > > > > -                       WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
> > > > > > > > > -                              phase_quantum);
> > > > > > > > > -               }
> > > > > > > > >
> > > > > > > > > -               if (!amdgpu_sriov_vf(adev)) {
> > > > > > > > > -                       f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
> > > > > > > > > -                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> > > > > > > > > -                                       AUTO_CTXSW_ENABLE, enable ? 1 : 0);
> > > > > > > > > -                       WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
> > > > > > > > > -               }
> > > > > > > > > +               WREG32_SOC15_IP(GC,
> > > > > > > > > +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE0_QUANTUM),
> > > > > > > > > +                       phase_quantum);
> > > > > > > > > +               WREG32_SOC15_IP(GC,
> > > > > > > > > +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE1_QUANTUM),
> > > > > > > > > +                   phase_quantum);
> > > > > > > > > +               WREG32_SOC15_IP(GC,
> > > > > > > > > +                       sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE2_QUANTUM),
> > > > > > > > > +                   phase_quantum);
> > > > > > > > >         }
> > > > > > > > >
> > > > > > > > > +       if (!amdgpu_sriov_vf(adev)) {
> > > > > > > > > +               f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL));
> > > > > > > > > +               f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> > > > > > > > > +                               AUTO_CTXSW_ENABLE, 1);
> > > > > > > > > +               WREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL), f32_cntl);
> > > > > > > > > +       }
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > >  /**
> > > > > > > > > - * sdma_v5_2_enable - stop the async dma engines
> > > > > > > > > + * sdma_v5_2_ctx_switch_disable_all - stop the async dma engines context switch
> > > > > > > > >   *
> > > > > > > > >   * @adev: amdgpu_device pointer
> > > > > > > > > - * @enable: enable/disable the DMA MEs.
> > > > > > > > >   *
> > > > > > > > > - * Halt or unhalt the async dma engines.
> > > > > > > > > + * Halt the async dma engines context switch.
> > > > > > > > >   */
> > > > > > > > > -static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
> > > > > > > > > +static void sdma_v5_2_ctx_switch_disable_all(struct amdgpu_device *adev)
> > > > > > > > >  {
> > > > > > > > >         u32 f32_cntl;
> > > > > > > > >         int i;
> > > > > > > > >
> > > > > > > > > -       if (!enable) {
> > > > > > > > > -               sdma_v5_2_gfx_stop(adev);
> > > > > > > > > -               sdma_v5_2_rlc_stop(adev);
> > > > > > > > > +       if (amdgpu_sriov_vf(adev))
> > > > > > > > > +               return;
> > > > > > > > > +
> > > > > > > > > +       for (i = 0; i < adev->sdma.num_instances; i++) {
> > > > > > > > > +               f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
> > > > > > > > > +               f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> > > > > > > > > +                               AUTO_CTXSW_ENABLE, 0);
> > > > > > > > > +               WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
> > > > > > > > >         }
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > > +/**
> > > > > > > > > + * sdma_v5_2_halt - stop the async dma engines
> > > > > > > > > + *
> > > > > > > > > + * @adev: amdgpu_device pointer
> > > > > > > > > + *
> > > > > > > > > + * Halt the async dma engines.
> > > > > > > > > + */
> > > > > > > > > +static void sdma_v5_2_halt(struct amdgpu_device *adev)
> > > > > > > > > +{
> > > > > > > > > +       int i;
> > > > > > > > > +       u32 f32_cntl;
> > > > > > > > > +
> > > > > > > > > +       sdma_v5_2_gfx_stop(adev);
> > > > > > > > > +       sdma_v5_2_rlc_stop(adev);
> > > > > > > > >
> > > > > > > > >         if (!amdgpu_sriov_vf(adev)) {
> > > > > > > > >                 for (i = 0; i < adev->sdma.num_instances; i++) {
> > > > > > > > >                         f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
> > > > > > > > > -                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
> > > > > > > > > +                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
> > > > > > > > >                         WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
> > > > > > > > >                 }
> > > > > > > > >         }
> > > > > > > > > @@ -594,6 +615,9 @@ static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
> > > > > > > > >   * @adev: amdgpu_device pointer
> > > > > > > > >   *
> > > > > > > > >   * Set up the gfx DMA ring buffers and enable them.
> > > > > > > > > + * It assumes that the dma engine is stopped for each instance.
> > > > > > > > > + * The function enables the engine and preemptions sequentially for each instance.
> > > > > > > > > + *
> > > > > > > > >   * Returns 0 for success, error for failure.
> > > > > > > > >   */
> > > > > > > > >  static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> > > > > > > > > @@ -737,10 +761,7 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> > > > > > > > >
> > > > > > > > >                 ring->sched.ready = true;
> > > > > > > > >
> > > > > > > > > -               if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
> > > > > > > > > -                       sdma_v5_2_ctx_switch_enable(adev, true);
> > > > > > > > > -                       sdma_v5_2_enable(adev, true);
> > > > > > > > > -               }
> > > > > > > > > +               sdma_v5_2_ctx_switch_enable_for_instance(adev, i);
> > > > > > > > >
> > > > > > > > >                 r = amdgpu_ring_test_ring(ring);
> > > > > > > > >                 if (r) {
> > > > > > > > > @@ -784,7 +805,7 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)
> > > > > > > > >         int i, j;
> > > > > > > > >
> > > > > > > > >         /* halt the MEs */
> > > > > > > > > -       sdma_v5_2_enable(adev, false);
> > > > > > > > > +       sdma_v5_2_halt(adev);
> > > > > > > > >
> > > > > > > > >         for (i = 0; i < adev->sdma.num_instances; i++) {
> > > > > > > > >                 if (!adev->sdma.instance[i].fw)
> > > > > > > > > @@ -856,8 +877,8 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
> > > > > > > > >         int r = 0;
> > > > > > > > >
> > > > > > > > >         if (amdgpu_sriov_vf(adev)) {
> > > > > > > > > -               sdma_v5_2_ctx_switch_enable(adev, false);
> > > > > > > > > -               sdma_v5_2_enable(adev, false);
> > > > > > > > > +               sdma_v5_2_ctx_switch_disable_all(adev);
> > > > > > > > > +               sdma_v5_2_halt(adev);
> > > > > > > > >
> > > > > > > > >                 /* set RB registers */
> > > > > > > > >                 r = sdma_v5_2_gfx_resume(adev);
> > > > > > > > > @@ -881,12 +902,10 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
> > > > > > > > >                 amdgpu_gfx_off_ctrl(adev, false);
> > > > > > > > >
> > > > > > > > >         sdma_v5_2_soft_reset(adev);
> > > > > > > > > -       /* unhalt the MEs */
> > > > > > > > > -       sdma_v5_2_enable(adev, true);
> > > > > > > > > -       /* enable sdma ring preemption */
> > > > > > > > > -       sdma_v5_2_ctx_switch_enable(adev, true);
> > > > > > > > >
> > > > > > > > > -       /* start the gfx rings and rlc compute queues */
> > > > > > > > > +       /* Soft reset supposes to disable the dma engine and preemption.
> > > > > > > > > +        * Now start the gfx rings and rlc compute queues.
> > > > > > > > > +        */
> > > > > > > > >         r = sdma_v5_2_gfx_resume(adev);
> > > > > > > > >         if (adev->in_s0ix)
> > > > > > > > >                 amdgpu_gfx_off_ctrl(adev, true);
> > > > > > > > > @@ -1340,8 +1359,8 @@ static int sdma_v5_2_hw_fini(void *handle)
> > > > > > > > >         if (amdgpu_sriov_vf(adev))
> > > > > > > > >                 return 0;
> > > > > > > > >
> > > > > > > > > -       sdma_v5_2_ctx_switch_enable(adev, false);
> > > > > > > > > -       sdma_v5_2_enable(adev, false);
> > > > > > > > > +       sdma_v5_2_ctx_switch_disable_all(adev);
> > > > > > > > > +       sdma_v5_2_halt(adev);
> > > > > > > > >
> > > > > > > > >         return 0;
> > > > > > > > >  }
> > > > > > > > > --
> > > > > > > > > 2.25.1
> > > > > > > > >

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2022-05-11 15:24 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-30  7:34 [PATCH v6] drm/amdgpu: Ensure the DMA engine is deactivated during set ups ricetons
2022-05-04 22:48 ` Haohui Mai
2022-05-05 16:33 ` Alex Deucher
2022-05-06  5:11   ` Haohui Mai
2022-05-06 13:36     ` Alex Deucher
2022-05-07  2:30       ` Haohui Mai
2022-05-09 14:48         ` Alex Deucher
2022-05-10 10:52           ` Haohui Mai
2022-05-10 14:14             ` Alex Deucher
2022-05-11  4:05               ` Haohui Mai
2022-05-11 15:23                 ` Alex Deucher

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.