All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/amdgpu: modify mcbp implement for gfx9
@ 2022-07-15  8:43 jiadong.zhu
  2022-07-15  8:43 ` [PATCH 2/3] drm/amdgpu: add mcbp support for sdma v4.0 jiadong.zhu
  2022-07-15  8:43 ` [PATCH 3/3] drm/amdgpu: skip put fence if signal fails jiadong.zhu
  0 siblings, 2 replies; 8+ messages in thread
From: jiadong.zhu @ 2022-07-15  8:43 UTC (permalink / raw)
  To: amd-gfx; +Cc: Ray.Huang, Jiadong.Zhu, aaron.liu

From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>

1. Use unmap_queue package to trigger preemption on gfx9
   Add trailing fence to track the preemption done.
2. Modify emit_ce_meta emit_de_meta functions
   for the resumed ibs.
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 156 ++++++++++++++++++++---
 drivers/gpu/drm/amd/amdgpu/soc15d.h      |   2 +
 3 files changed, 138 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 7d89a52091c0..2b402a8bc4fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -59,6 +59,7 @@ enum amdgpu_ring_priority_level {
 #define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT           (1 << 1)
 #define AMDGPU_FENCE_FLAG_TC_WB_ONLY    (1 << 2)
+#define AMDGPU_FENCE_FLAG_EXEC          (1 << 3)
 
 #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 603bfa52e6e8..d6106d480d0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -751,7 +751,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
 				struct amdgpu_cu_info *cu_info);
 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
 					  void *ras_error_status);
@@ -824,9 +824,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
 			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
 
 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
-		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
-		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
-		amdgpu_ring_write(kiq_ring, seq);
+		amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
+		amdgpu_ring_write(kiq_ring, 0);
+		amdgpu_ring_write(kiq_ring, 0);
+
 	} else {
 		amdgpu_ring_write(kiq_ring, 0);
 		amdgpu_ring_write(kiq_ring, 0);
@@ -5463,11 +5464,15 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 
 	control |= ib->length_dw | (vmid << 24);
 
-	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
+	if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
 		control |= INDIRECT_BUFFER_PRE_ENB(1);
 
+		if (flags & AMDGPU_IB_PREEMPTED)
+			control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
 		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
-			gfx_v9_0_ring_emit_de_meta(ring);
+			gfx_v9_0_ring_emit_de_meta(ring,
+				 (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
 	}
 
 	amdgpu_ring_write(ring, header);
@@ -5522,6 +5527,7 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+	bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
 
 	/* RELEASE_MEM - flush caches, send int */
 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
@@ -5532,6 +5538,7 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
 					       EOP_TC_WB_ACTION_EN |
 					       EOP_TC_MD_ACTION_EN)) |
 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+				 (exec ? EOP_EXEC : 0x0) |
 				 EVENT_INDEX(5)));
 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
 
@@ -5637,33 +5644,132 @@ static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, 0);
 }
 
-static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
+static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
 {
+	struct amdgpu_device *adev = ring->adev;
 	struct v9_ce_ib_state ce_payload = {0};
-	uint64_t csa_addr;
+	uint64_t offset, ce_payload_gpu_addr;
+	void *ce_payload_cpu_addr;
 	int cnt;
 
 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
-	csa_addr = amdgpu_csa_vaddr(ring->adev);
+
+	if (ring->is_mes_queue) {
+		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+				  gfx[0].gfx_meta_data) +
+			offsetof(struct v9_gfx_meta_data, ce_payload);
+		ce_payload_gpu_addr =
+			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+		ce_payload_cpu_addr =
+			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+	} else {
+		offset = offsetof(struct v9_gfx_meta_data, ce_payload);
+		ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+	}
 
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
 				 WRITE_DATA_DST_SEL(8) |
 				 WR_CONFIRM) |
 				 WRITE_DATA_CACHE_POLICY(0));
-	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
-	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
-	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
+	amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
+	amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
+
+	if (resume)
+		amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
+					   sizeof(ce_payload) >> 2);
+	else
+		amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
+					   sizeof(ce_payload) >> 2);
+}
+
+static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+	int i, r = 0;
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+	struct amdgpu_ring *kiq_ring = &kiq->ring;
+	unsigned long flags;
+
+	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+		return -EINVAL;
+
+	spin_lock_irqsave(&kiq->ring_lock, flags);
+
+	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+		spin_unlock_irqrestore(&kiq->ring_lock, flags);
+		return -ENOMEM;
+	}
+
+	/* assert preemption condition */
+	amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+	ring->trail_seq += 1;
+	amdgpu_ring_alloc(ring, 8);
+	gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
+				  ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC);
+	/* assert IB preemption, emit the trailing fence */
+	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
+				   ring->trail_fence_gpu_addr,
+				   ring->trail_seq);
+
+	amdgpu_ring_commit(kiq_ring);
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+	/* poll the trailing fence */
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (ring->trail_seq ==
+		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+			break;
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout) {
+		r = -EINVAL;
+		DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
+	}
+
+	amdgpu_ring_commit(ring);
+	/*reset the CP_VMID_PREEMPT after trailing fence*/
+	WREG32_SOC15(GC, 0, mmCP_VMID_PREEMPT, 0x0);
+
+	/* deassert preemption condition */
+	amdgpu_ring_set_preempt_cond_exec(ring, true);
+	return r;
 }
 
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
 {
+	struct amdgpu_device *adev = ring->adev;
 	struct v9_de_ib_state de_payload = {0};
-	uint64_t csa_addr, gds_addr;
+	uint64_t offset, gds_addr, de_payload_gpu_addr;
+	void *de_payload_cpu_addr;
 	int cnt;
 
-	csa_addr = amdgpu_csa_vaddr(ring->adev);
-	gds_addr = csa_addr + 4096;
+	if (ring->is_mes_queue) {
+		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+				  gfx[0].gfx_meta_data) +
+			offsetof(struct v9_gfx_meta_data, de_payload);
+		de_payload_gpu_addr =
+			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+		de_payload_cpu_addr =
+			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+
+		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+				  gfx[0].gds_backup) +
+			offsetof(struct v9_gfx_meta_data, de_payload);
+		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+	} else {
+		offset = offsetof(struct v9_gfx_meta_data, de_payload);
+		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+
+		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+				 AMDGPU_CSA_SIZE - adev->gds.gds_size,
+				 PAGE_SIZE);
+	}
+
 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
 
@@ -5673,9 +5779,15 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
 				 WRITE_DATA_DST_SEL(8) |
 				 WR_CONFIRM) |
 				 WRITE_DATA_CACHE_POLICY(0));
-	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
-	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
-	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
+	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
+	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
+
+	if (resume)
+		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
+					   sizeof(de_payload) >> 2);
+	else
+		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
+					   sizeof(de_payload) >> 2);
 }
 
 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
@@ -5691,8 +5803,9 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
 {
 	uint32_t dw2 = 0;
 
-	if (amdgpu_sriov_vf(ring->adev))
-		gfx_v9_0_ring_emit_ce_meta(ring);
+	if (amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp)
+		gfx_v9_0_ring_emit_ce_meta(ring,
+				    (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
 
 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
@@ -7041,6 +7154,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
+	.preempt_ib = gfx_v9_0_ring_preempt_ib,
 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
index 799925d22fc8..614e9f8467fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
@@ -162,6 +162,7 @@
 		 * 2 - Bypass
 		 */
 #define     INDIRECT_BUFFER_PRE_ENB(x)		 ((x) << 21)
+#define     INDIRECT_BUFFER_PRE_RESUME(x)           ((x) << 30)
 #define	PACKET3_COPY_DATA				0x40
 #define	PACKET3_PFP_SYNC_ME				0x42
 #define	PACKET3_COND_WRITE				0x45
@@ -184,6 +185,7 @@
 #define		EOP_TC_ACTION_EN                        (1 << 17) /* L2 */
 #define		EOP_TC_NC_ACTION_EN			(1 << 19)
 #define		EOP_TC_MD_ACTION_EN			(1 << 21) /* L2 metadata */
+#define		EOP_EXEC					(1 << 28) /* For Trailing Fence */
 
 #define		DATA_SEL(x)                             ((x) << 29)
 		/* 0 - discard
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/3] drm/amdgpu: add mcbp support for sdma v4.0
  2022-07-15  8:43 [PATCH 1/3] drm/amdgpu: modify mcbp implement for gfx9 jiadong.zhu
@ 2022-07-15  8:43 ` jiadong.zhu
  2022-07-15  8:43 ` [PATCH 3/3] drm/amdgpu: skip put fence if signal fails jiadong.zhu
  1 sibling, 0 replies; 8+ messages in thread
From: jiadong.zhu @ 2022-07-15  8:43 UTC (permalink / raw)
  To: amd-gfx; +Cc: Ray.Huang, Jiadong.Zhu, aaron.liu

From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>

Set register to enable mcbp according to amdgpu_mcbp.
Add sdma preempt_ib function used for debugfs test.
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 53 ++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index d35f18536da2..bc69af4b4ada 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1502,6 +1502,11 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
 		/* set utc l1 enable flag always to 1 */
 		temp = RREG32_SDMA(i, mmSDMA0_CNTL);
 		temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
+
+		if (amdgpu_mcbp){
+			/* enable MCBP */
+			temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1);
+		}
 		WREG32_SDMA(i, mmSDMA0_CNTL, temp);
 
 		if (!amdgpu_sriov_vf(adev)) {
@@ -2102,6 +2107,53 @@ static int sdma_v4_0_soft_reset(void *handle)
 	return 0;
 }
 
+static int sdma_v4_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+	int i, r = 0;
+	struct amdgpu_device *adev = ring->adev;
+	u32 index = 0;
+	u64 sdma_gfx_preempt;
+
+	amdgpu_sdma_get_index_from_ring(ring, &index);
+	if (index == 0)
+		sdma_gfx_preempt = mmSDMA0_GFX_PREEMPT;
+	else
+		sdma_gfx_preempt = mmSDMA1_GFX_PREEMPT;
+
+	/* assert preemption condition */
+	amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+	/* emit the trailing fence */
+	ring->trail_seq += 1;
+	amdgpu_ring_alloc(ring, 10);
+	sdma_v4_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
+				  ring->trail_seq, 0);
+	amdgpu_ring_commit(ring);
+
+	/* assert IB preemption */
+	WREG32(sdma_gfx_preempt, 1);
+
+	/* poll the trailing fence */
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (ring->trail_seq ==
+		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+			break;
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout) {
+		r = -EINVAL;
+		DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
+	}
+
+	/* deassert IB preemption */
+	WREG32(sdma_gfx_preempt, 0);
+
+	/* deassert the preemption condition */
+	amdgpu_ring_set_preempt_cond_exec(ring, true);
+	return r;
+}
+
 static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev,
 					struct amdgpu_irq_src *source,
 					unsigned type,
@@ -2435,6 +2487,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
 	.emit_wreg = sdma_v4_0_ring_emit_wreg,
 	.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+	.preempt_ib = sdma_v4_0_ring_preempt_ib,
 };
 
 /*
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/3] drm/amdgpu: skip put fence if signal fails
  2022-07-15  8:43 [PATCH 1/3] drm/amdgpu: modify mcbp implement for gfx9 jiadong.zhu
  2022-07-15  8:43 ` [PATCH 2/3] drm/amdgpu: add mcbp support for sdma v4.0 jiadong.zhu
@ 2022-07-15  8:43 ` jiadong.zhu
  2022-07-15  8:48   ` Christian König
  1 sibling, 1 reply; 8+ messages in thread
From: jiadong.zhu @ 2022-07-15  8:43 UTC (permalink / raw)
  To: amd-gfx; +Cc: Ray.Huang, Jiadong.Zhu, aaron.liu

From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>

Dma_fence_signal returning non-zero indicates
that the fence is signaled and put somewhere else.
Skip dma_fence_put to make the fence refcount correct.

Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index f4ed0785d523..93c1a5e83835 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1500,8 +1500,8 @@ static void amdgpu_ib_preempt_signal_fences(struct dma_fence **fences,
 		fence = fences[i];
 		if (!fence)
 			continue;
-		dma_fence_signal(fence);
-		dma_fence_put(fence);
+		if (!dma_fence_signal(fence))
+			dma_fence_put(fence);
 	}
 }
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 3/3] drm/amdgpu: skip put fence if signal fails
  2022-07-15  8:43 ` [PATCH 3/3] drm/amdgpu: skip put fence if signal fails jiadong.zhu
@ 2022-07-15  8:48   ` Christian König
  2022-07-15  9:12     ` Zhu, Jiadong
  0 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2022-07-15  8:48 UTC (permalink / raw)
  To: jiadong.zhu, amd-gfx, Andrey Grodzovsky; +Cc: Ray.Huang, aaron.liu

Am 15.07.22 um 10:43 schrieb jiadong.zhu@amd.com:
> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
>
> Dma_fence_signal returning non-zero indicates
> that the fence is signaled and put somewhere else.
> Skip dma_fence_put to make the fence refcount correct.

Well quite a big NAK on this.

Reference counting should be completely independent where a fence signals.

Andrey can you take a look at this as well?

Thanks,
Christian.

>
> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 4 ++--
>   1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index f4ed0785d523..93c1a5e83835 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -1500,8 +1500,8 @@ static void amdgpu_ib_preempt_signal_fences(struct dma_fence **fences,
>   		fence = fences[i];
>   		if (!fence)
>   			continue;
> -		dma_fence_signal(fence);
> -		dma_fence_put(fence);
> +		if (!dma_fence_signal(fence))
> +			dma_fence_put(fence);
>   	}
>   }
>   


^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH 3/3] drm/amdgpu: skip put fence if signal fails
  2022-07-15  8:48   ` Christian König
@ 2022-07-15  9:12     ` Zhu, Jiadong
  2022-07-15  9:28       ` Zhu, Jiadong
  0 siblings, 1 reply; 8+ messages in thread
From: Zhu, Jiadong @ 2022-07-15  9:12 UTC (permalink / raw)
  To: Christian König, amd-gfx, Grodzovsky,  Andrey; +Cc: Huang, Ray, Liu, Aaron

[AMD Official Use Only - General]

Hi Christian,

The resubmitted job in function amdgpu_ib_preempt_job_recovery returns the same hw fence because of this commit:

static void amdgpu_ib_preempt_job_recovery(struct drm_gpu_scheduler *sched)
{
        struct drm_sched_job *s_job;
        struct dma_fence *fence;

        spin_lock(&sched->job_list_lock);
        list_for_each_entry(s_job, &sched->pending_list, list) {
                fence = sched->ops->run_job(s_job);       //fence returned has the same address with swapped fences
                dma_fence_put(fence);
        }
        spin_unlock(&sched->job_list_lock);
}



commit c530b02f39850a639b72d01ebbf7e5d745c60831
Author: Jack Zhang <Jack.Zhang1@amd.com>
Date:   Wed May 12 15:06:35 2021 +0800

    drm/amd/amdgpu embed hw_fence into amdgpu_job

    Why: Previously hw fence is alloced separately with job.
    It caused historical lifetime issues and corner cases.
    The ideal situation is to take fence to manage both job
    and fence's lifetime, and simplify the design of gpu-scheduler.

    How:
    We propose to embed hw_fence into amdgpu_job.
    1. We cover the normal job submission by this method.
    2. For ib_test, and submit without a parent job keep the
    legacy way to create a hw fence separately.
    v2:
    use AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT to show that the fence is
    embedded in a job.
    v3:
    remove redundant variable ring in amdgpu_job
    v4:
    add tdr sequence support for this feature. Add a job_run_counter to
    indicate whether this job is a resubmit job.
    v5
    add missing handling in amdgpu_fence_enable_signaling

    Signed-off-by: Jingwen Chen <Jingwen.Chen2@amd.com>
    Signed-off-by: Jack Zhang <Jack.Zhang7@hotmail.com>
    Reviewed-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
    Reviewed by: Monk Liu <monk.liu@amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher@amd.com>


Thus the fence we swapped out is signaled and put twice in the following 2 functions and we get " refcount_t: underflow; use-after-free. " errors latter.

                /* wait for jobs finished */
                amdgpu_fence_wait_empty(ring); //signal and put the resubmitted jobs fence.

                /* signal the old fences */
                amdgpu_ib_preempt_signal_fences(fences, length);   //signal and put the previous swapped fence, signal would return -22.

Thanks,
Jiadong


-----Original Message-----
From: Christian König <ckoenig.leichtzumerken@gmail.com>
Sent: Friday, July 15, 2022 4:48 PM
To: Zhu, Jiadong <Jiadong.Zhu@amd.com>; amd-gfx@lists.freedesktop.org; Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
Cc: Huang, Ray <Ray.Huang@amd.com>; Liu, Aaron <Aaron.Liu@amd.com>
Subject: Re: [PATCH 3/3] drm/amdgpu: skip put fence if signal fails

[CAUTION: External Email]

Am 15.07.22 um 10:43 schrieb jiadong.zhu@amd.com:
> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
>
> Dma_fence_signal returning non-zero indicates that the fence is
> signaled and put somewhere else.
> Skip dma_fence_put to make the fence refcount correct.

Well quite a big NAK on this.

Reference counting should be completely independent where a fence signals.

Andrey can you take a look at this as well?

Thanks,
Christian.

>
> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 4 ++--
>   1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index f4ed0785d523..93c1a5e83835 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -1500,8 +1500,8 @@ static void amdgpu_ib_preempt_signal_fences(struct dma_fence **fences,
>               fence = fences[i];
>               if (!fence)
>                       continue;
> -             dma_fence_signal(fence);
> -             dma_fence_put(fence);
> +             if (!dma_fence_signal(fence))
> +                     dma_fence_put(fence);
>       }
>   }
>


^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH 3/3] drm/amdgpu: skip put fence if signal fails
  2022-07-15  9:12     ` Zhu, Jiadong
@ 2022-07-15  9:28       ` Zhu, Jiadong
  2022-07-15 15:42         ` Andrey Grodzovsky
  0 siblings, 1 reply; 8+ messages in thread
From: Zhu, Jiadong @ 2022-07-15  9:28 UTC (permalink / raw)
  To: Christian König, amd-gfx, Grodzovsky,  Andrey; +Cc: Huang, Ray, Liu, Aaron

[AMD Official Use Only - General]

Updated some comments

-----Original Message-----
From: Zhu, Jiadong
Sent: Friday, July 15, 2022 5:13 PM
To: Christian König <ckoenig.leichtzumerken@gmail.com>; amd-gfx@lists.freedesktop.org; Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
Cc: Huang, Ray <Ray.Huang@amd.com>; Liu, Aaron <Aaron.Liu@amd.com>
Subject: RE: [PATCH 3/3] drm/amdgpu: skip put fence if signal fails

Hi Christian,

The resubmitted job in function amdgpu_ib_preempt_job_recovery returns the same hw fence because of this commit:

static void amdgpu_ib_preempt_job_recovery(struct drm_gpu_scheduler *sched) {
        struct drm_sched_job *s_job;
        struct dma_fence *fence;

        spin_lock(&sched->job_list_lock);
        list_for_each_entry(s_job, &sched->pending_list, list) {
                fence = sched->ops->run_job(s_job);       //fence returned has the same address with swapped fences
                dma_fence_put(fence);
        }
        spin_unlock(&sched->job_list_lock);
}



commit c530b02f39850a639b72d01ebbf7e5d745c60831
Author: Jack Zhang <Jack.Zhang1@amd.com>
Date:   Wed May 12 15:06:35 2021 +0800

    drm/amd/amdgpu embed hw_fence into amdgpu_job

    Why: Previously hw fence is alloced separately with job.
    It caused historical lifetime issues and corner cases.
    The ideal situation is to take fence to manage both job
    and fence's lifetime, and simplify the design of gpu-scheduler.

    How:
    We propose to embed hw_fence into amdgpu_job.
    1. We cover the normal job submission by this method.
    2. For ib_test, and submit without a parent job keep the
    legacy way to create a hw fence separately.
    v2:
    use AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT to show that the fence is
    embedded in a job.
    v3:
    remove redundant variable ring in amdgpu_job
    v4:
    add tdr sequence support for this feature. Add a job_run_counter to
    indicate whether this job is a resubmit job.
    v5
    add missing handling in amdgpu_fence_enable_signaling

    Signed-off-by: Jingwen Chen <Jingwen.Chen2@amd.com>
    Signed-off-by: Jack Zhang <Jack.Zhang7@hotmail.com>
    Reviewed-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
    Reviewed by: Monk Liu <monk.liu@amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher@amd.com>


Thus the fence we swapped out is signaled and put twice in the following 2 functions and we get " refcount_t: underflow; use-after-free. " errors latter.

                /* wait for jobs finished */
                amdgpu_fence_wait_empty(ring); //wait on the resubmitted fence which is signaled and put somewhere else. The refcount decreased by 1 after amdgpu_fence_wait_empty.

                /* signal the old fences */
                amdgpu_ib_preempt_signal_fences(fences, length);   //signal and put the previous swapped fence, signal would return -22.

Thanks,
Jiadong


-----Original Message-----
From: Christian König <ckoenig.leichtzumerken@gmail.com>
Sent: Friday, July 15, 2022 4:48 PM
To: Zhu, Jiadong <Jiadong.Zhu@amd.com>; amd-gfx@lists.freedesktop.org; Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
Cc: Huang, Ray <Ray.Huang@amd.com>; Liu, Aaron <Aaron.Liu@amd.com>
Subject: Re: [PATCH 3/3] drm/amdgpu: skip put fence if signal fails

[CAUTION: External Email]

Am 15.07.22 um 10:43 schrieb jiadong.zhu@amd.com:
> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
>
> Dma_fence_signal returning non-zero indicates that the fence is
> signaled and put somewhere else.
> Skip dma_fence_put to make the fence refcount correct.

Well quite a big NAK on this.

Reference counting should be completely independent where a fence signals.

Andrey can you take a look at this as well?

Thanks,
Christian.

>
> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 4 ++--
>   1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index f4ed0785d523..93c1a5e83835 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -1500,8 +1500,8 @@ static void amdgpu_ib_preempt_signal_fences(struct dma_fence **fences,
>               fence = fences[i];
>               if (!fence)
>                       continue;
> -             dma_fence_signal(fence);
> -             dma_fence_put(fence);
> +             if (!dma_fence_signal(fence))
> +                     dma_fence_put(fence);
>       }
>   }
>


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 3/3] drm/amdgpu: skip put fence if signal fails
  2022-07-15  9:28       ` Zhu, Jiadong
@ 2022-07-15 15:42         ` Andrey Grodzovsky
  2022-07-18  4:53           ` Zhu, Jiadong
  0 siblings, 1 reply; 8+ messages in thread
From: Andrey Grodzovsky @ 2022-07-15 15:42 UTC (permalink / raw)
  To: Zhu, Jiadong, Christian König, amd-gfx; +Cc: Huang, Ray, Liu, Aaron


On 2022-07-15 05:28, Zhu, Jiadong wrote:
> [AMD Official Use Only - General]
>
> Updated some comments
>
> -----Original Message-----
> From: Zhu, Jiadong
> Sent: Friday, July 15, 2022 5:13 PM
> To: Christian König <ckoenig.leichtzumerken@gmail.com>; amd-gfx@lists.freedesktop.org; Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
> Cc: Huang, Ray <Ray.Huang@amd.com>; Liu, Aaron <Aaron.Liu@amd.com>
> Subject: RE: [PATCH 3/3] drm/amdgpu: skip put fence if signal fails
>
> Hi Christian,
>
> The resubmitted job in function amdgpu_ib_preempt_job_recovery returns the same hw fence because of this commit:
>
> static void amdgpu_ib_preempt_job_recovery(struct drm_gpu_scheduler *sched) {
>          struct drm_sched_job *s_job;
>          struct dma_fence *fence;
>
>          spin_lock(&sched->job_list_lock);
>          list_for_each_entry(s_job, &sched->pending_list, list) {
>                  fence = sched->ops->run_job(s_job);       //fence returned has the same address with swapped fences
>                  dma_fence_put(fence);
>          }
>          spin_unlock(&sched->job_list_lock);
> }
>
>
>
> commit c530b02f39850a639b72d01ebbf7e5d745c60831
> Author: Jack Zhang <Jack.Zhang1@amd.com>
> Date:   Wed May 12 15:06:35 2021 +0800
>
>      drm/amd/amdgpu embed hw_fence into amdgpu_job
>
>      Why: Previously hw fence is alloced separately with job.
>      It caused historical lifetime issues and corner cases.
>      The ideal situation is to take fence to manage both job
>      and fence's lifetime, and simplify the design of gpu-scheduler.
>
>      How:
>      We propose to embed hw_fence into amdgpu_job.
>      1. We cover the normal job submission by this method.
>      2. For ib_test, and submit without a parent job keep the
>      legacy way to create a hw fence separately.
>      v2:
>      use AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT to show that the fence is
>      embedded in a job.
>      v3:
>      remove redundant variable ring in amdgpu_job
>      v4:
>      add tdr sequence support for this feature. Add a job_run_counter to
>      indicate whether this job is a resubmit job.
>      v5
>      add missing handling in amdgpu_fence_enable_signaling
>
>      Signed-off-by: Jingwen Chen <Jingwen.Chen2@amd.com>
>      Signed-off-by: Jack Zhang <Jack.Zhang7@hotmail.com>
>      Reviewed-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>      Reviewed by: Monk Liu <monk.liu@amd.com>
>      Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>
>
> Thus the fence we swapped out is signaled and put twice in the following 2 functions and we get " refcount_t: underflow; use-after-free. " errors latter.
>
>                  /* wait for jobs finished */
>                  amdgpu_fence_wait_empty(ring); //wait on the resubmitted fence which is signaled and put somewhere else. The refcount decreased by 1 after amdgpu_fence_wait_empty.
>
>                  /* signal the old fences */
>                  amdgpu_ib_preempt_signal_fences(fences, length);   //signal and put the previous swapped fence, signal would return -22.
>
> Thanks,
> Jiadong


Did you have 'drm/amdgpu: Follow up change to previous drm scheduler 
change.' this commit in your branch while you encountered this problem ? 
I don't see an underflow issue
for the preempted job when inspecting the code with this commit in mind -

amdgpu_fence_emit
             dma_fence_init 1
             dma_fence_get(fence) 2
             rcu_assign_pointer(*ptr, dma_fence_get(fence) 3

drm_sched_main
     s_fence->parent = dma_fence_get(fence); 4
     dma_fence_put(fence); 3

amdgpu_ib_preempt_job_recovery
     amdgpu_fence_emit
         if (job && job->job_run_counter) -> dma_fence_get(fence); 4
         rcu_assign_pointer(*ptr, dma_fence_get(fence)); 5

     dma_fence_put(fence); 4

amdgpu_fence_wait_empty
     dma_fence_get_rcu(fence) 5
     dma_fence_put(fence) 4

amdgpu_process_fence (EOP interrupt for re-submission of preempted job)
     dma_fence_put 3

amdgpu_ib_preempt_signal_fences
     dma_fence_put 2

amdgpu_job_free_cb
     dma_fence_put(&job->hw_fence) 1

drm_sched_fence_release_scheduled
     dma_fence_put(fence->parent); 0

Also take a look here for reference - 
https://drive.google.com/file/d/1yEoeW6OQC9WnwmzFW6NBLhFP_jD0xcHm/view

Andrey





Andrey


>
>
> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Friday, July 15, 2022 4:48 PM
> To: Zhu, Jiadong <Jiadong.Zhu@amd.com>; amd-gfx@lists.freedesktop.org; Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
> Cc: Huang, Ray <Ray.Huang@amd.com>; Liu, Aaron <Aaron.Liu@amd.com>
> Subject: Re: [PATCH 3/3] drm/amdgpu: skip put fence if signal fails
>
> [CAUTION: External Email]
>
> Am 15.07.22 um 10:43 schrieb jiadong.zhu@amd.com:
>> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
>>
>> Dma_fence_signal returning non-zero indicates that the fence is
>> signaled and put somewhere else.
>> Skip dma_fence_put to make the fence refcount correct.
> Well quite a big NAK on this.
>
> Reference counting should be completely independent where a fence signals.
>
> Andrey can you take a look at this as well?
>
> Thanks,
> Christian.
>
>> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 4 ++--
>>    1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> index f4ed0785d523..93c1a5e83835 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> @@ -1500,8 +1500,8 @@ static void amdgpu_ib_preempt_signal_fences(struct dma_fence **fences,
>>                fence = fences[i];
>>                if (!fence)
>>                        continue;
>> -             dma_fence_signal(fence);
>> -             dma_fence_put(fence);
>> +             if (!dma_fence_signal(fence))
>> +                     dma_fence_put(fence);
>>        }
>>    }
>>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH 3/3] drm/amdgpu: skip put fence if signal fails
  2022-07-15 15:42         ` Andrey Grodzovsky
@ 2022-07-18  4:53           ` Zhu, Jiadong
  0 siblings, 0 replies; 8+ messages in thread
From: Zhu, Jiadong @ 2022-07-18  4:53 UTC (permalink / raw)
  To: Grodzovsky, Andrey, Christian König, amd-gfx; +Cc: Huang, Ray, Liu, Aaron

[AMD Official Use Only - General]

Hi Andrey,

Thank you for your clarifying.
The refcount modified by amdgpu_fence_emit on my side is different.
I update my code and get the patch 'drm/amdgpu: Follow up change to previous drm scheduler change.' , the " underflow " disappears.

My patch is not needed.


Thanks,
Jiadong


-----Original Message-----
From: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
Sent: Friday, July 15, 2022 11:43 PM
To: Zhu, Jiadong <Jiadong.Zhu@amd.com>; Christian König <ckoenig.leichtzumerken@gmail.com>; amd-gfx@lists.freedesktop.org
Cc: Huang, Ray <Ray.Huang@amd.com>; Liu, Aaron <Aaron.Liu@amd.com>
Subject: Re: [PATCH 3/3] drm/amdgpu: skip put fence if signal fails


On 2022-07-15 05:28, Zhu, Jiadong wrote:
> [AMD Official Use Only - General]
>
> Updated some comments
>
> -----Original Message-----
> From: Zhu, Jiadong
> Sent: Friday, July 15, 2022 5:13 PM
> To: Christian König <ckoenig.leichtzumerken@gmail.com>;
> amd-gfx@lists.freedesktop.org; Grodzovsky, Andrey
> <Andrey.Grodzovsky@amd.com>
> Cc: Huang, Ray <Ray.Huang@amd.com>; Liu, Aaron <Aaron.Liu@amd.com>
> Subject: RE: [PATCH 3/3] drm/amdgpu: skip put fence if signal fails
>
> Hi Christian,
>
> The resubmitted job in function amdgpu_ib_preempt_job_recovery returns the same hw fence because of this commit:
>
> static void amdgpu_ib_preempt_job_recovery(struct drm_gpu_scheduler *sched) {
>          struct drm_sched_job *s_job;
>          struct dma_fence *fence;
>
>          spin_lock(&sched->job_list_lock);
>          list_for_each_entry(s_job, &sched->pending_list, list) {
>                  fence = sched->ops->run_job(s_job);       //fence returned has the same address with swapped fences
>                  dma_fence_put(fence);
>          }
>          spin_unlock(&sched->job_list_lock);
> }
>
>
>
> commit c530b02f39850a639b72d01ebbf7e5d745c60831
> Author: Jack Zhang <Jack.Zhang1@amd.com>
> Date:   Wed May 12 15:06:35 2021 +0800
>
>      drm/amd/amdgpu embed hw_fence into amdgpu_job
>
>      Why: Previously hw fence is alloced separately with job.
>      It caused historical lifetime issues and corner cases.
>      The ideal situation is to take fence to manage both job
>      and fence's lifetime, and simplify the design of gpu-scheduler.
>
>      How:
>      We propose to embed hw_fence into amdgpu_job.
>      1. We cover the normal job submission by this method.
>      2. For ib_test, and submit without a parent job keep the
>      legacy way to create a hw fence separately.
>      v2:
>      use AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT to show that the fence is
>      embedded in a job.
>      v3:
>      remove redundant variable ring in amdgpu_job
>      v4:
>      add tdr sequence support for this feature. Add a job_run_counter to
>      indicate whether this job is a resubmit job.
>      v5
>      add missing handling in amdgpu_fence_enable_signaling
>
>      Signed-off-by: Jingwen Chen <Jingwen.Chen2@amd.com>
>      Signed-off-by: Jack Zhang <Jack.Zhang7@hotmail.com>
>      Reviewed-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>      Reviewed by: Monk Liu <monk.liu@amd.com>
>      Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>
>
> Thus the fence we swapped out is signaled and put twice in the following 2 functions and we get " refcount_t: underflow; use-after-free. " errors latter.
>
>                  /* wait for jobs finished */
>                  amdgpu_fence_wait_empty(ring); //wait on the resubmitted fence which is signaled and put somewhere else. The refcount decreased by 1 after amdgpu_fence_wait_empty.
>
>                  /* signal the old fences */
>                  amdgpu_ib_preempt_signal_fences(fences, length);   //signal and put the previous swapped fence, signal would return -22.
>
> Thanks,
> Jiadong


Did you have 'drm/amdgpu: Follow up change to previous drm scheduler change.' this commit in your branch while you encountered this problem ?
I don't see an underflow issue
for the preempted job when inspecting the code with this commit in mind -

amdgpu_fence_emit
             dma_fence_init 1
             dma_fence_get(fence) 2
             rcu_assign_pointer(*ptr, dma_fence_get(fence) 3

drm_sched_main
     s_fence->parent = dma_fence_get(fence); 4
     dma_fence_put(fence); 3

amdgpu_ib_preempt_job_recovery
     amdgpu_fence_emit
         if (job && job->job_run_counter) -> dma_fence_get(fence); 4
         rcu_assign_pointer(*ptr, dma_fence_get(fence)); 5

     dma_fence_put(fence); 4

amdgpu_fence_wait_empty
     dma_fence_get_rcu(fence) 5
     dma_fence_put(fence) 4

amdgpu_process_fence (EOP interrupt for re-submission of preempted job)
     dma_fence_put 3

amdgpu_ib_preempt_signal_fences
     dma_fence_put 2

amdgpu_job_free_cb
     dma_fence_put(&job->hw_fence) 1

drm_sched_fence_release_scheduled
     dma_fence_put(fence->parent); 0

Also take a look here for reference -
https://drive.google.com/file/d/1yEoeW6OQC9WnwmzFW6NBLhFP_jD0xcHm/view

Andrey





Andrey


>
>
> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Friday, July 15, 2022 4:48 PM
> To: Zhu, Jiadong <Jiadong.Zhu@amd.com>; amd-gfx@lists.freedesktop.org;
> Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
> Cc: Huang, Ray <Ray.Huang@amd.com>; Liu, Aaron <Aaron.Liu@amd.com>
> Subject: Re: [PATCH 3/3] drm/amdgpu: skip put fence if signal fails
>
> [CAUTION: External Email]
>
> Am 15.07.22 um 10:43 schrieb jiadong.zhu@amd.com:
>> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
>>
>> Dma_fence_signal returning non-zero indicates that the fence is
>> signaled and put somewhere else.
>> Skip dma_fence_put to make the fence refcount correct.
> Well quite a big NAK on this.
>
> Reference counting should be completely independent where a fence signals.
>
> Andrey can you take a look at this as well?
>
> Thanks,
> Christian.
>
>> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 4 ++--
>>    1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> index f4ed0785d523..93c1a5e83835 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> @@ -1500,8 +1500,8 @@ static void amdgpu_ib_preempt_signal_fences(struct dma_fence **fences,
>>                fence = fences[i];
>>                if (!fence)
>>                        continue;
>> -             dma_fence_signal(fence);
>> -             dma_fence_put(fence);
>> +             if (!dma_fence_signal(fence))
>> +                     dma_fence_put(fence);
>>        }
>>    }
>>

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-07-18  4:53 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-15  8:43 [PATCH 1/3] drm/amdgpu: modify mcbp implement for gfx9 jiadong.zhu
2022-07-15  8:43 ` [PATCH 2/3] drm/amdgpu: add mcbp support for sdma v4.0 jiadong.zhu
2022-07-15  8:43 ` [PATCH 3/3] drm/amdgpu: skip put fence if signal fails jiadong.zhu
2022-07-15  8:48   ` Christian König
2022-07-15  9:12     ` Zhu, Jiadong
2022-07-15  9:28       ` Zhu, Jiadong
2022-07-15 15:42         ` Andrey Grodzovsky
2022-07-18  4:53           ` Zhu, Jiadong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.