linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] drm/msm: Improved hang detection
@ 2022-10-31 22:54 Rob Clark
  2022-10-31 22:54 ` [PATCH 1/2] drm/msm/adreno: Simplify read64/write64 helpers Rob Clark
  2022-10-31 22:54 ` [PATCH 2/2] drm/msm: Hangcheck progress detection Rob Clark
  0 siblings, 2 replies; 7+ messages in thread
From: Rob Clark @ 2022-10-31 22:54 UTC (permalink / raw)
  To: dri-devel
  Cc: linux-arm-msm, freedreno, Rob Clark, Akhil P Oommen, Chia-I Wu,
	Dmitry Baryshkov, Douglas Anderson, Konrad Dybcio, open list,
	Sean Paul, Vladimir Lypak

From: Rob Clark <robdclark@chromium.org>

Try to detect when submit jobs are making forward progress and give them
a bit more time.

Rob Clark (2):
  drm/msm/adreno: Simplify read64/write64 helpers
  drm/msm: Hangcheck progress detection

 drivers/gpu/drm/msm/adreno/a4xx_gpu.c       |  3 +-
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c       | 27 ++++------
 drivers/gpu/drm/msm/adreno/a5xx_preempt.c   |  4 +-
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c       | 58 +++++++++++++++------
 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c |  3 +-
 drivers/gpu/drm/msm/msm_drv.h               |  8 ++-
 drivers/gpu/drm/msm/msm_gpu.c               | 20 ++++++-
 drivers/gpu/drm/msm/msm_gpu.h               | 17 +++---
 drivers/gpu/drm/msm/msm_ringbuffer.h        | 24 +++++++++
 9 files changed, 115 insertions(+), 49 deletions(-)

-- 
2.37.3


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/2] drm/msm/adreno: Simplify read64/write64 helpers
  2022-10-31 22:54 [PATCH 0/2] drm/msm: Improved hang detection Rob Clark
@ 2022-10-31 22:54 ` Rob Clark
  2022-11-01  0:46   ` Dmitry Baryshkov
  2022-11-01 20:03   ` Akhil P Oommen
  2022-10-31 22:54 ` [PATCH 2/2] drm/msm: Hangcheck progress detection Rob Clark
  1 sibling, 2 replies; 7+ messages in thread
From: Rob Clark @ 2022-10-31 22:54 UTC (permalink / raw)
  To: dri-devel
  Cc: linux-arm-msm, freedreno, Rob Clark, Rob Clark, Abhinav Kumar,
	Dmitry Baryshkov, Sean Paul, David Airlie, Daniel Vetter,
	Akhil P Oommen, Vladimir Lypak, Douglas Anderson, Chia-I Wu,
	Konrad Dybcio, open list

From: Rob Clark <robdclark@chromium.org>

The _HI reg is always following the _LO reg, so no need to pass these
offsets seprately.

Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/adreno/a4xx_gpu.c       |  3 +--
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c       | 27 ++++++++-------------
 drivers/gpu/drm/msm/adreno/a5xx_preempt.c   |  4 +--
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c       | 24 ++++++------------
 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c |  3 +--
 drivers/gpu/drm/msm/msm_gpu.h               | 12 ++++-----
 6 files changed, 27 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
index 7cb8d9849c07..a10feb8a4194 100644
--- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
@@ -606,8 +606,7 @@ static int a4xx_pm_suspend(struct msm_gpu *gpu) {
 
 static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
 {
-	*value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO,
-		REG_A4XX_RBBM_PERFCTR_CP_0_HI);
+	*value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 3dcec7acb384..ba22d3c918bc 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -605,11 +605,9 @@ static int a5xx_ucode_init(struct msm_gpu *gpu)
 		a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo);
 	}
 
-	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
-		REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
+	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO, a5xx_gpu->pm4_iova);
 
-	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
-		REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
+	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO, a5xx_gpu->pfp_iova);
 
 	return 0;
 }
@@ -868,8 +866,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
 	 * memory rendering at this point in time and we don't want to block off
 	 * part of the virtual memory space.
 	 */
-	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
-		REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
+	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 0x00000000);
 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
 
 	/* Put the GPU into 64 bit by default */
@@ -908,8 +905,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
 		return ret;
 
 	/* Set the ringbuffer address */
-	gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI,
-		gpu->rb[0]->iova);
+	gpu_write64(gpu, REG_A5XX_CP_RB_BASE, gpu->rb[0]->iova);
 
 	/*
 	 * If the microcode supports the WHERE_AM_I opcode then we can use that
@@ -936,7 +932,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
 		}
 
 		gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR,
-			REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0]));
+			    shadowptr(a5xx_gpu, gpu->rb[0]));
 	} else if (gpu->nr_rings > 1) {
 		/* Disable preemption if WHERE_AM_I isn't available */
 		a5xx_preempt_fini(gpu);
@@ -1239,9 +1235,9 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
 		gpu_read(gpu, REG_A5XX_RBBM_STATUS),
 		gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
 		gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
-		gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
+		gpu_read64(gpu, REG_A5XX_CP_IB1_BASE),
 		gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
-		gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
+		gpu_read64(gpu, REG_A5XX_CP_IB2_BASE),
 		gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
 
 	/* Turn off the hangcheck timer to keep it from bothering us */
@@ -1427,8 +1423,7 @@ static int a5xx_pm_suspend(struct msm_gpu *gpu)
 
 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
 {
-	*value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO,
-		REG_A5XX_RBBM_ALWAYSON_COUNTER_HI);
+	*value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO);
 
 	return 0;
 }
@@ -1465,8 +1460,7 @@ static int a5xx_crashdumper_run(struct msm_gpu *gpu,
 	if (IS_ERR_OR_NULL(dumper->ptr))
 		return -EINVAL;
 
-	gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
-		REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
+	gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO, dumper->iova);
 
 	gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
 
@@ -1666,8 +1660,7 @@ static u64 a5xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
 {
 	u64 busy_cycles;
 
-	busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
-			REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
+	busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO);
 	*out_sample_rate = clk_get_rate(gpu->core_clk);
 
 	return busy_cycles;
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
index 8abc9a2b114a..7658e89844b4 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
@@ -137,7 +137,6 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu)
 
 	/* Set the address of the incoming preemption record */
 	gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO,
-		REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI,
 		a5xx_gpu->preempt_iova[ring->id]);
 
 	a5xx_gpu->next_ring = ring;
@@ -211,8 +210,7 @@ void a5xx_preempt_hw_init(struct msm_gpu *gpu)
 	}
 
 	/* Write a 0 to signal that we aren't switching pagetables */
-	gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
-		REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI, 0);
+	gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO, 0);
 
 	/* Reset the preemption state */
 	set_preempt_state(a5xx_gpu, PREEMPT_NONE);
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index fdc578016e0b..1ff605c18ee6 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -247,8 +247,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
 	OUT_RING(ring, submit->seqno);
 
 	trace_msm_gpu_submit_flush(submit,
-		gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
-			REG_A6XX_CP_ALWAYS_ON_COUNTER_HI));
+		gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO));
 
 	a6xx_flush(gpu, ring);
 }
@@ -947,8 +946,7 @@ static int a6xx_ucode_init(struct msm_gpu *gpu)
 		}
 	}
 
-	gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE,
-		REG_A6XX_CP_SQE_INSTR_BASE+1, a6xx_gpu->sqe_iova);
+	gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova);
 
 	return 0;
 }
@@ -999,8 +997,7 @@ static int hw_init(struct msm_gpu *gpu)
 	 * memory rendering at this point in time and we don't want to block off
 	 * part of the virtual memory space.
 	 */
-	gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
-		REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
+	gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 0x00000000);
 	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
 
 	/* Turn on 64 bit addressing for all blocks */
@@ -1049,11 +1046,9 @@ static int hw_init(struct msm_gpu *gpu)
 
 	if (!adreno_is_a650_family(adreno_gpu)) {
 		/* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
-		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO,
-			REG_A6XX_UCHE_GMEM_RANGE_MIN_HI, 0x00100000);
+		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
 
 		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX_LO,
-			REG_A6XX_UCHE_GMEM_RANGE_MAX_HI,
 			0x00100000 + adreno_gpu->gmem - 1);
 	}
 
@@ -1145,8 +1140,7 @@ static int hw_init(struct msm_gpu *gpu)
 		goto out;
 
 	/* Set the ringbuffer address */
-	gpu_write64(gpu, REG_A6XX_CP_RB_BASE, REG_A6XX_CP_RB_BASE_HI,
-		gpu->rb[0]->iova);
+	gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova);
 
 	/* Targets that support extended APRIV can use the RPTR shadow from
 	 * hardware but all the other ones need to disable the feature. Targets
@@ -1178,7 +1172,6 @@ static int hw_init(struct msm_gpu *gpu)
 		}
 
 		gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR_LO,
-			REG_A6XX_CP_RB_RPTR_ADDR_HI,
 			shadowptr(a6xx_gpu, gpu->rb[0]));
 	}
 
@@ -1499,9 +1492,9 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
 		gpu_read(gpu, REG_A6XX_RBBM_STATUS),
 		gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
 		gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
-		gpu_read64(gpu, REG_A6XX_CP_IB1_BASE, REG_A6XX_CP_IB1_BASE_HI),
+		gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
 		gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
-		gpu_read64(gpu, REG_A6XX_CP_IB2_BASE, REG_A6XX_CP_IB2_BASE_HI),
+		gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
 		gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE));
 
 	/* Turn off the hangcheck timer to keep it from bothering us */
@@ -1712,8 +1705,7 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
 	/* Force the GPU power on so we can read this register */
 	a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
 
-	*value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
-			    REG_A6XX_CP_ALWAYS_ON_COUNTER_HI);
+	*value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO);
 
 	a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
 
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
index a5c3d1ed255a..a023d5f962dc 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
@@ -147,8 +147,7 @@ static int a6xx_crashdumper_run(struct msm_gpu *gpu,
 	/* Make sure all pending memory writes are posted */
 	wmb();
 
-	gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO,
-		REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
+	gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO, dumper->iova);
 
 	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
 
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 58a72e6b1400..585fd9c8d45a 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -540,7 +540,7 @@ static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or)
 	msm_rmw(gpu->mmio + (reg << 2), mask, or);
 }
 
-static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi)
+static inline u64 gpu_read64(struct msm_gpu *gpu, u32 reg)
 {
 	u64 val;
 
@@ -558,17 +558,17 @@ static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi)
 	 * when the lo is read, so make sure to read the lo first to trigger
 	 * that
 	 */
-	val = (u64) msm_readl(gpu->mmio + (lo << 2));
-	val |= ((u64) msm_readl(gpu->mmio + (hi << 2)) << 32);
+	val = (u64) msm_readl(gpu->mmio + (reg << 2));
+	val |= ((u64) msm_readl(gpu->mmio + ((reg + 1) << 2)) << 32);
 
 	return val;
 }
 
-static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val)
+static inline void gpu_write64(struct msm_gpu *gpu, u32 reg, u64 val)
 {
 	/* Why not a writeq here? Read the screed above */
-	msm_writel(lower_32_bits(val), gpu->mmio + (lo << 2));
-	msm_writel(upper_32_bits(val), gpu->mmio + (hi << 2));
+	msm_writel(lower_32_bits(val), gpu->mmio + (reg << 2));
+	msm_writel(upper_32_bits(val), gpu->mmio + ((reg + 1) << 2));
 }
 
 int msm_gpu_pm_suspend(struct msm_gpu *gpu);
-- 
2.37.3


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/2] drm/msm: Hangcheck progress detection
  2022-10-31 22:54 [PATCH 0/2] drm/msm: Improved hang detection Rob Clark
  2022-10-31 22:54 ` [PATCH 1/2] drm/msm/adreno: Simplify read64/write64 helpers Rob Clark
@ 2022-10-31 22:54 ` Rob Clark
  2022-11-01 19:54   ` Akhil P Oommen
  1 sibling, 1 reply; 7+ messages in thread
From: Rob Clark @ 2022-10-31 22:54 UTC (permalink / raw)
  To: dri-devel
  Cc: linux-arm-msm, freedreno, Rob Clark, Rob Clark, Abhinav Kumar,
	Dmitry Baryshkov, Sean Paul, David Airlie, Daniel Vetter,
	Akhil P Oommen, Chia-I Wu, Konrad Dybcio, Douglas Anderson,
	open list

From: Rob Clark <robdclark@chromium.org>

If the hangcheck timer expires, check if the fw's position in the
cmdstream has advanced (changed) since last timer expiration, and
allow it up to three additional "extensions" to it's alotted time.
The intention is to continue to catch "shader stuck in a loop" type
hangs quickly, but allow more time for things that are actually
making forward progress.

Because we need to sample the CP state twice to detect if there has
not been progress, this also cuts the the timer's duration in half.

Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 34 +++++++++++++++++++++++++++
 drivers/gpu/drm/msm/msm_drv.h         |  8 ++++++-
 drivers/gpu/drm/msm/msm_gpu.c         | 20 +++++++++++++++-
 drivers/gpu/drm/msm/msm_gpu.h         |  5 +++-
 drivers/gpu/drm/msm/msm_ringbuffer.h  | 24 +++++++++++++++++++
 5 files changed, 88 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 1ff605c18ee6..3b8fb7a11dff 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1843,6 +1843,39 @@ static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 	return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR);
 }
 
+static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+{
+	struct msm_cp_state cp_state = {
+		.ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
+		.ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
+		.ib1_rem  = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
+		.ib2_rem  = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE),
+	};
+	bool progress;
+
+	/*
+	 * Adjust the remaining data to account for what has already been
+	 * fetched from memory, but not yet consumed by the SQE.
+	 *
+	 * This is not *technically* correct, the amount buffered could
+	 * exceed the IB size due to hw prefetching ahead, but:
+	 *
+	 * (1) We aren't trying to find the exact position, just whether
+	 *     progress has been made
+	 * (2) The CP_REG_TO_MEM at the end of a submit should be enough
+	 *     to prevent prefetching into an unrelated submit.  (And
+	 *     either way, at some point the ROQ will be full.)
+	 */
+	cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB1_STAT) >> 16;
+	cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB1_STAT) >> 16;
+
+	progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state));
+
+	ring->last_cp_state = cp_state;
+
+	return progress;
+}
+
 static u32 a618_get_speed_bin(u32 fuse)
 {
 	if (fuse == 0)
@@ -1961,6 +1994,7 @@ static const struct adreno_gpu_funcs funcs = {
 		.create_address_space = a6xx_create_address_space,
 		.create_private_address_space = a6xx_create_private_address_space,
 		.get_rptr = a6xx_get_rptr,
+		.progress = a6xx_progress,
 	},
 	.get_timestamp = a6xx_get_timestamp,
 };
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index efcd7260f428..970a1a0ab34f 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -226,7 +226,13 @@ struct msm_drm_private {
 
 	struct drm_atomic_state *pm_state;
 
-	/* For hang detection, in ms */
+	/**
+	 * hangcheck_period: For hang detection, in ms
+	 *
+	 * Note that in practice, a submit/job will get at least two hangcheck
+	 * periods, due to checking for progress being implemented as simply
+	 * "have the CP position registers changed since last time?"
+	 */
 	unsigned int hangcheck_period;
 
 	/**
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 3dffee54a951..136f5977b0bf 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -500,6 +500,21 @@ static void hangcheck_timer_reset(struct msm_gpu *gpu)
 			round_jiffies_up(jiffies + msecs_to_jiffies(priv->hangcheck_period)));
 }
 
+static bool made_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+{
+	if (ring->hangcheck_progress_retries >= DRM_MSM_HANGCHECK_PROGRESS_RETRIES)
+		return false;
+
+	if (!gpu->funcs->progress)
+		return false;
+
+	if (!gpu->funcs->progress(gpu, ring))
+		return false;
+
+	ring->hangcheck_progress_retries++;
+	return true;
+}
+
 static void hangcheck_handler(struct timer_list *t)
 {
 	struct msm_gpu *gpu = from_timer(gpu, t, hangcheck_timer);
@@ -511,9 +526,12 @@ static void hangcheck_handler(struct timer_list *t)
 	if (fence != ring->hangcheck_fence) {
 		/* some progress has been made.. ya! */
 		ring->hangcheck_fence = fence;
-	} else if (fence_before(fence, ring->fctx->last_fence)) {
+		ring->hangcheck_progress_retries = 0;
+	} else if (fence_before(fence, ring->fctx->last_fence) &&
+			!made_progress(gpu, ring)) {
 		/* no progress and not done.. hung! */
 		ring->hangcheck_fence = fence;
+		ring->hangcheck_progress_retries = 0;
 		DRM_DEV_ERROR(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n",
 				gpu->name, ring->id);
 		DRM_DEV_ERROR(dev->dev, "%s:     completed fence: %u\n",
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 585fd9c8d45a..d8f355e9f0b2 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -78,6 +78,8 @@ struct msm_gpu_funcs {
 	struct msm_gem_address_space *(*create_private_address_space)
 		(struct msm_gpu *gpu);
 	uint32_t (*get_rptr)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
+
+	bool (*progress)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
 };
 
 /* Additional state for iommu faults: */
@@ -236,7 +238,8 @@ struct msm_gpu {
 	 */
 #define DRM_MSM_INACTIVE_PERIOD   66 /* in ms (roughly four frames) */
 
-#define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 500 /* in ms */
+#define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 250 /* in ms */
+#define DRM_MSM_HANGCHECK_PROGRESS_RETRIES 3
 	struct timer_list hangcheck_timer;
 
 	/* Fault info for most recent iova fault: */
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h
index 2a5045abe46e..e3d33bae3380 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.h
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.h
@@ -35,6 +35,11 @@ struct msm_rbmemptrs {
 	volatile u64 ttbr0;
 };
 
+struct msm_cp_state {
+	uint64_t ib1_base, ib2_base;
+	uint32_t ib1_rem, ib2_rem;
+};
+
 struct msm_ringbuffer {
 	struct msm_gpu *gpu;
 	int id;
@@ -64,6 +69,25 @@ struct msm_ringbuffer {
 	uint64_t memptrs_iova;
 	struct msm_fence_context *fctx;
 
+	/**
+	 * hangcheck_progress_retries:
+	 *
+	 * The number of extra hangcheck duration cycles that we have given
+	 * due to it appearing that the GPU is making forward progress.
+	 *
+	 * If the GPU appears to be making progress (ie. the CP has advanced
+	 * in the command stream, we'll allow up to DRM_MSM_HANGCHECK_PROGRESS_RETRIES
+	 * expirations of the hangcheck timer before killing the job.  In other
+	 * words we'll let the submit run for up to
+	 * DRM_MSM_HANGCHECK_DEFAULT_PERIOD *  DRM_MSM_HANGCHECK_PROGRESS_RETRIES
+	 */
+	int hangcheck_progress_retries;
+
+	/**
+	 * last_cp_state: The state of the CP at the last call to gpu->progress()
+	 */
+	struct msm_cp_state last_cp_state;
+
 	/*
 	 * preempt_lock protects preemption and serializes wptr updates against
 	 * preemption.  Can be aquired from irq context.
-- 
2.37.3


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] drm/msm/adreno: Simplify read64/write64 helpers
  2022-10-31 22:54 ` [PATCH 1/2] drm/msm/adreno: Simplify read64/write64 helpers Rob Clark
@ 2022-11-01  0:46   ` Dmitry Baryshkov
  2022-11-01 20:03   ` Akhil P Oommen
  1 sibling, 0 replies; 7+ messages in thread
From: Dmitry Baryshkov @ 2022-11-01  0:46 UTC (permalink / raw)
  To: Rob Clark, dri-devel
  Cc: linux-arm-msm, freedreno, Rob Clark, Abhinav Kumar, Sean Paul,
	David Airlie, Daniel Vetter, Akhil P Oommen, Vladimir Lypak,
	Douglas Anderson, Chia-I Wu, Konrad Dybcio, open list

On 01/11/2022 01:54, Rob Clark wrote:
> From: Rob Clark <robdclark@chromium.org>
> 
> The _HI reg is always following the _LO reg, so no need to pass these
> offsets seprately.
> 
> Signed-off-by: Rob Clark <robdclark@chromium.org>
> ---
>   drivers/gpu/drm/msm/adreno/a4xx_gpu.c       |  3 +--
>   drivers/gpu/drm/msm/adreno/a5xx_gpu.c       | 27 ++++++++-------------
>   drivers/gpu/drm/msm/adreno/a5xx_preempt.c   |  4 +--
>   drivers/gpu/drm/msm/adreno/a6xx_gpu.c       | 24 ++++++------------
>   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c |  3 +--
>   drivers/gpu/drm/msm/msm_gpu.h               | 12 ++++-----
>   6 files changed, 27 insertions(+), 46 deletions(-)

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>

-- 
With best wishes
Dmitry


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] drm/msm: Hangcheck progress detection
  2022-10-31 22:54 ` [PATCH 2/2] drm/msm: Hangcheck progress detection Rob Clark
@ 2022-11-01 19:54   ` Akhil P Oommen
  2022-11-01 22:29     ` Rob Clark
  0 siblings, 1 reply; 7+ messages in thread
From: Akhil P Oommen @ 2022-11-01 19:54 UTC (permalink / raw)
  To: Rob Clark, dri-devel
  Cc: linux-arm-msm, freedreno, Rob Clark, Abhinav Kumar,
	Dmitry Baryshkov, Sean Paul, David Airlie, Daniel Vetter,
	Chia-I Wu, Konrad Dybcio, Douglas Anderson, open list

On 11/1/2022 4:24 AM, Rob Clark wrote:
> From: Rob Clark <robdclark@chromium.org>
>
> If the hangcheck timer expires, check if the fw's position in the
> cmdstream has advanced (changed) since last timer expiration, and
> allow it up to three additional "extensions" to it's alotted time.
> The intention is to continue to catch "shader stuck in a loop" type
> hangs quickly, but allow more time for things that are actually
> making forward progress.
>
> Because we need to sample the CP state twice to detect if there has
> not been progress, this also cuts the the timer's duration in half.
>
> Signed-off-by: Rob Clark <robdclark@chromium.org>
> ---
>   drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 34 +++++++++++++++++++++++++++
>   drivers/gpu/drm/msm/msm_drv.h         |  8 ++++++-
>   drivers/gpu/drm/msm/msm_gpu.c         | 20 +++++++++++++++-
>   drivers/gpu/drm/msm/msm_gpu.h         |  5 +++-
>   drivers/gpu/drm/msm/msm_ringbuffer.h  | 24 +++++++++++++++++++
>   5 files changed, 88 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> index 1ff605c18ee6..3b8fb7a11dff 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> @@ -1843,6 +1843,39 @@ static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
>   	return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR);
>   }
>   
> +static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
> +{
> +	struct msm_cp_state cp_state = {
> +		.ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
> +		.ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
> +		.ib1_rem  = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
> +		.ib2_rem  = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE),
> +	};
> +	bool progress;
> +
> +	/*
> +	 * Adjust the remaining data to account for what has already been
> +	 * fetched from memory, but not yet consumed by the SQE.
> +	 *
> +	 * This is not *technically* correct, the amount buffered could
> +	 * exceed the IB size due to hw prefetching ahead, but:
> +	 *
> +	 * (1) We aren't trying to find the exact position, just whether
> +	 *     progress has been made
> +	 * (2) The CP_REG_TO_MEM at the end of a submit should be enough
> +	 *     to prevent prefetching into an unrelated submit.  (And
> +	 *     either way, at some point the ROQ will be full.)
> +	 */
> +	cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB1_STAT) >> 16;
> +	cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB1_STAT) >> 16;
REG_A6XX_CP_CSQ_IB1_STAT -> REG_A6XX_CP_CSQ_IB2_STAT

With that, Reviewed-by: Akhil P Oommen <quic_akhilpo@quicinc.com>

-Akhil.
> +
> +	progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state));
> +
> +	ring->last_cp_state = cp_state;
> +
> +	return progress;
> +}
> +
>   static u32 a618_get_speed_bin(u32 fuse)
>   {
>   	if (fuse == 0)
> @@ -1961,6 +1994,7 @@ static const struct adreno_gpu_funcs funcs = {
>   		.create_address_space = a6xx_create_address_space,
>   		.create_private_address_space = a6xx_create_private_address_space,
>   		.get_rptr = a6xx_get_rptr,
> +		.progress = a6xx_progress,
>   	},
>   	.get_timestamp = a6xx_get_timestamp,
>   };
> diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
> index efcd7260f428..970a1a0ab34f 100644
> --- a/drivers/gpu/drm/msm/msm_drv.h
> +++ b/drivers/gpu/drm/msm/msm_drv.h
> @@ -226,7 +226,13 @@ struct msm_drm_private {
>   
>   	struct drm_atomic_state *pm_state;
>   
> -	/* For hang detection, in ms */
> +	/**
> +	 * hangcheck_period: For hang detection, in ms
> +	 *
> +	 * Note that in practice, a submit/job will get at least two hangcheck
> +	 * periods, due to checking for progress being implemented as simply
> +	 * "have the CP position registers changed since last time?"
> +	 */
>   	unsigned int hangcheck_period;
>   
>   	/**
> diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
> index 3dffee54a951..136f5977b0bf 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.c
> +++ b/drivers/gpu/drm/msm/msm_gpu.c
> @@ -500,6 +500,21 @@ static void hangcheck_timer_reset(struct msm_gpu *gpu)
>   			round_jiffies_up(jiffies + msecs_to_jiffies(priv->hangcheck_period)));
>   }
>   
> +static bool made_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
> +{
> +	if (ring->hangcheck_progress_retries >= DRM_MSM_HANGCHECK_PROGRESS_RETRIES)
> +		return false;
> +
> +	if (!gpu->funcs->progress)
> +		return false;
> +
> +	if (!gpu->funcs->progress(gpu, ring))
> +		return false;
> +
> +	ring->hangcheck_progress_retries++;
> +	return true;
> +}
> +
>   static void hangcheck_handler(struct timer_list *t)
>   {
>   	struct msm_gpu *gpu = from_timer(gpu, t, hangcheck_timer);
> @@ -511,9 +526,12 @@ static void hangcheck_handler(struct timer_list *t)
>   	if (fence != ring->hangcheck_fence) {
>   		/* some progress has been made.. ya! */
>   		ring->hangcheck_fence = fence;
> -	} else if (fence_before(fence, ring->fctx->last_fence)) {
> +		ring->hangcheck_progress_retries = 0;
> +	} else if (fence_before(fence, ring->fctx->last_fence) &&
> +			!made_progress(gpu, ring)) {
>   		/* no progress and not done.. hung! */
>   		ring->hangcheck_fence = fence;
> +		ring->hangcheck_progress_retries = 0;
>   		DRM_DEV_ERROR(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n",
>   				gpu->name, ring->id);
>   		DRM_DEV_ERROR(dev->dev, "%s:     completed fence: %u\n",
> diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
> index 585fd9c8d45a..d8f355e9f0b2 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.h
> +++ b/drivers/gpu/drm/msm/msm_gpu.h
> @@ -78,6 +78,8 @@ struct msm_gpu_funcs {
>   	struct msm_gem_address_space *(*create_private_address_space)
>   		(struct msm_gpu *gpu);
>   	uint32_t (*get_rptr)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
> +
> +	bool (*progress)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
>   };
>   
>   /* Additional state for iommu faults: */
> @@ -236,7 +238,8 @@ struct msm_gpu {
>   	 */
>   #define DRM_MSM_INACTIVE_PERIOD   66 /* in ms (roughly four frames) */
>   
> -#define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 500 /* in ms */
> +#define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 250 /* in ms */
> +#define DRM_MSM_HANGCHECK_PROGRESS_RETRIES 3
>   	struct timer_list hangcheck_timer;
>   
>   	/* Fault info for most recent iova fault: */
> diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h
> index 2a5045abe46e..e3d33bae3380 100644
> --- a/drivers/gpu/drm/msm/msm_ringbuffer.h
> +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h
> @@ -35,6 +35,11 @@ struct msm_rbmemptrs {
>   	volatile u64 ttbr0;
>   };
>   
> +struct msm_cp_state {
> +	uint64_t ib1_base, ib2_base;
> +	uint32_t ib1_rem, ib2_rem;
> +};
> +
I think we can add CP_RB_RPTR too here.
>   struct msm_ringbuffer {
>   	struct msm_gpu *gpu;
>   	int id;
> @@ -64,6 +69,25 @@ struct msm_ringbuffer {
>   	uint64_t memptrs_iova;
>   	struct msm_fence_context *fctx;
>   
> +	/**
> +	 * hangcheck_progress_retries:
> +	 *
> +	 * The number of extra hangcheck duration cycles that we have given
> +	 * due to it appearing that the GPU is making forward progress.
> +	 *
> +	 * If the GPU appears to be making progress (ie. the CP has advanced
> +	 * in the command stream, we'll allow up to DRM_MSM_HANGCHECK_PROGRESS_RETRIES
> +	 * expirations of the hangcheck timer before killing the job.  In other
> +	 * words we'll let the submit run for up to
> +	 * DRM_MSM_HANGCHECK_DEFAULT_PERIOD *  DRM_MSM_HANGCHECK_PROGRESS_RETRIES
> +	 */
> +	int hangcheck_progress_retries;
> +
> +	/**
> +	 * last_cp_state: The state of the CP at the last call to gpu->progress()
> +	 */
> +	struct msm_cp_state last_cp_state;
> +
>   	/*
>   	 * preempt_lock protects preemption and serializes wptr updates against
>   	 * preemption.  Can be aquired from irq context.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] drm/msm/adreno: Simplify read64/write64 helpers
  2022-10-31 22:54 ` [PATCH 1/2] drm/msm/adreno: Simplify read64/write64 helpers Rob Clark
  2022-11-01  0:46   ` Dmitry Baryshkov
@ 2022-11-01 20:03   ` Akhil P Oommen
  1 sibling, 0 replies; 7+ messages in thread
From: Akhil P Oommen @ 2022-11-01 20:03 UTC (permalink / raw)
  To: Rob Clark, dri-devel
  Cc: linux-arm-msm, freedreno, Rob Clark, Abhinav Kumar,
	Dmitry Baryshkov, Sean Paul, David Airlie, Daniel Vetter,
	Vladimir Lypak, Douglas Anderson, Chia-I Wu, Konrad Dybcio,
	open list

On 11/1/2022 4:24 AM, Rob Clark wrote:
> From: Rob Clark <robdclark@chromium.org>
>
> The _HI reg is always following the _LO reg, so no need to pass these
> offsets seprately.
>
> Signed-off-by: Rob Clark <robdclark@chromium.org>
> ---
>   drivers/gpu/drm/msm/adreno/a4xx_gpu.c       |  3 +--
>   drivers/gpu/drm/msm/adreno/a5xx_gpu.c       | 27 ++++++++-------------
>   drivers/gpu/drm/msm/adreno/a5xx_preempt.c   |  4 +--
>   drivers/gpu/drm/msm/adreno/a6xx_gpu.c       | 24 ++++++------------
>   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c |  3 +--
>   drivers/gpu/drm/msm/msm_gpu.h               | 12 ++++-----
>   6 files changed, 27 insertions(+), 46 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
> index 7cb8d9849c07..a10feb8a4194 100644
> --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
> @@ -606,8 +606,7 @@ static int a4xx_pm_suspend(struct msm_gpu *gpu) {
>   
>   static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
>   {
> -	*value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO,
> -		REG_A4XX_RBBM_PERFCTR_CP_0_HI);
> +	*value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO);
>   
>   	return 0;
>   }
> diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> index 3dcec7acb384..ba22d3c918bc 100644
> --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> @@ -605,11 +605,9 @@ static int a5xx_ucode_init(struct msm_gpu *gpu)
>   		a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo);
>   	}
>   
> -	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
> -		REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
> +	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO, a5xx_gpu->pm4_iova);
>   
> -	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
> -		REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
> +	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO, a5xx_gpu->pfp_iova);
>   
>   	return 0;
>   }
> @@ -868,8 +866,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
>   	 * memory rendering at this point in time and we don't want to block off
>   	 * part of the virtual memory space.
>   	 */
> -	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
> -		REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
> +	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 0x00000000);
>   	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
>   
>   	/* Put the GPU into 64 bit by default */
> @@ -908,8 +905,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
>   		return ret;
>   
>   	/* Set the ringbuffer address */
> -	gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI,
> -		gpu->rb[0]->iova);
> +	gpu_write64(gpu, REG_A5XX_CP_RB_BASE, gpu->rb[0]->iova);
>   
>   	/*
>   	 * If the microcode supports the WHERE_AM_I opcode then we can use that
> @@ -936,7 +932,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
>   		}
>   
>   		gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR,
> -			REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0]));
> +			    shadowptr(a5xx_gpu, gpu->rb[0]));
>   	} else if (gpu->nr_rings > 1) {
>   		/* Disable preemption if WHERE_AM_I isn't available */
>   		a5xx_preempt_fini(gpu);
> @@ -1239,9 +1235,9 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
>   		gpu_read(gpu, REG_A5XX_RBBM_STATUS),
>   		gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
>   		gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
> -		gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
> +		gpu_read64(gpu, REG_A5XX_CP_IB1_BASE),
>   		gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
> -		gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
> +		gpu_read64(gpu, REG_A5XX_CP_IB2_BASE),
>   		gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
>   
>   	/* Turn off the hangcheck timer to keep it from bothering us */
> @@ -1427,8 +1423,7 @@ static int a5xx_pm_suspend(struct msm_gpu *gpu)
>   
>   static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
>   {
> -	*value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO,
> -		REG_A5XX_RBBM_ALWAYSON_COUNTER_HI);
> +	*value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO);
>   
>   	return 0;
>   }
> @@ -1465,8 +1460,7 @@ static int a5xx_crashdumper_run(struct msm_gpu *gpu,
>   	if (IS_ERR_OR_NULL(dumper->ptr))
>   		return -EINVAL;
>   
> -	gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
> -		REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
> +	gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO, dumper->iova);
>   
>   	gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
>   
> @@ -1666,8 +1660,7 @@ static u64 a5xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
>   {
>   	u64 busy_cycles;
>   
> -	busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
> -			REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
> +	busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO);
>   	*out_sample_rate = clk_get_rate(gpu->core_clk);
>   
>   	return busy_cycles;
> diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
> index 8abc9a2b114a..7658e89844b4 100644
> --- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
> +++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
> @@ -137,7 +137,6 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu)
>   
>   	/* Set the address of the incoming preemption record */
>   	gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO,
> -		REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI,
>   		a5xx_gpu->preempt_iova[ring->id]);
>   
>   	a5xx_gpu->next_ring = ring;
> @@ -211,8 +210,7 @@ void a5xx_preempt_hw_init(struct msm_gpu *gpu)
>   	}
>   
>   	/* Write a 0 to signal that we aren't switching pagetables */
> -	gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
> -		REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI, 0);
> +	gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO, 0);
>   
>   	/* Reset the preemption state */
>   	set_preempt_state(a5xx_gpu, PREEMPT_NONE);
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> index fdc578016e0b..1ff605c18ee6 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> @@ -247,8 +247,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
>   	OUT_RING(ring, submit->seqno);
>   
>   	trace_msm_gpu_submit_flush(submit,
> -		gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
> -			REG_A6XX_CP_ALWAYS_ON_COUNTER_HI));
> +		gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO));
>   
>   	a6xx_flush(gpu, ring);
>   }
> @@ -947,8 +946,7 @@ static int a6xx_ucode_init(struct msm_gpu *gpu)
>   		}
>   	}
>   
> -	gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE,
> -		REG_A6XX_CP_SQE_INSTR_BASE+1, a6xx_gpu->sqe_iova);
> +	gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova);
>   
>   	return 0;
>   }
> @@ -999,8 +997,7 @@ static int hw_init(struct msm_gpu *gpu)
>   	 * memory rendering at this point in time and we don't want to block off
>   	 * part of the virtual memory space.
>   	 */
> -	gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
> -		REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
> +	gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 0x00000000);
>   	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
>   
>   	/* Turn on 64 bit addressing for all blocks */
> @@ -1049,11 +1046,9 @@ static int hw_init(struct msm_gpu *gpu)
>   
>   	if (!adreno_is_a650_family(adreno_gpu)) {
>   		/* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
> -		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO,
> -			REG_A6XX_UCHE_GMEM_RANGE_MIN_HI, 0x00100000);
> +		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
>   
>   		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX_LO,
> -			REG_A6XX_UCHE_GMEM_RANGE_MAX_HI,
>   			0x00100000 + adreno_gpu->gmem - 1);
>   	}
>   
> @@ -1145,8 +1140,7 @@ static int hw_init(struct msm_gpu *gpu)
>   		goto out;
>   
>   	/* Set the ringbuffer address */
> -	gpu_write64(gpu, REG_A6XX_CP_RB_BASE, REG_A6XX_CP_RB_BASE_HI,
> -		gpu->rb[0]->iova);
> +	gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova);
>   
>   	/* Targets that support extended APRIV can use the RPTR shadow from
>   	 * hardware but all the other ones need to disable the feature. Targets
> @@ -1178,7 +1172,6 @@ static int hw_init(struct msm_gpu *gpu)
>   		}
>   
>   		gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR_LO,
> -			REG_A6XX_CP_RB_RPTR_ADDR_HI,
>   			shadowptr(a6xx_gpu, gpu->rb[0]));
>   	}
>   
> @@ -1499,9 +1492,9 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
>   		gpu_read(gpu, REG_A6XX_RBBM_STATUS),
>   		gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
>   		gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
> -		gpu_read64(gpu, REG_A6XX_CP_IB1_BASE, REG_A6XX_CP_IB1_BASE_HI),
> +		gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
>   		gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
> -		gpu_read64(gpu, REG_A6XX_CP_IB2_BASE, REG_A6XX_CP_IB2_BASE_HI),
> +		gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
>   		gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE));
>   
>   	/* Turn off the hangcheck timer to keep it from bothering us */
> @@ -1712,8 +1705,7 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
>   	/* Force the GPU power on so we can read this register */
>   	a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
>   
> -	*value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
> -			    REG_A6XX_CP_ALWAYS_ON_COUNTER_HI);
> +	*value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO);
>   
>   	a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
>   
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
> index a5c3d1ed255a..a023d5f962dc 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
> @@ -147,8 +147,7 @@ static int a6xx_crashdumper_run(struct msm_gpu *gpu,
>   	/* Make sure all pending memory writes are posted */
>   	wmb();
>   
> -	gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO,
> -		REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
> +	gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO, dumper->iova);
>   
>   	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
>   
> diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
> index 58a72e6b1400..585fd9c8d45a 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.h
> +++ b/drivers/gpu/drm/msm/msm_gpu.h
> @@ -540,7 +540,7 @@ static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or)
>   	msm_rmw(gpu->mmio + (reg << 2), mask, or);
>   }
>   
> -static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi)
> +static inline u64 gpu_read64(struct msm_gpu *gpu, u32 reg)
>   {
>   	u64 val;
>   
> @@ -558,17 +558,17 @@ static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi)
>   	 * when the lo is read, so make sure to read the lo first to trigger
>   	 * that
>   	 */
> -	val = (u64) msm_readl(gpu->mmio + (lo << 2));
> -	val |= ((u64) msm_readl(gpu->mmio + (hi << 2)) << 32);
> +	val = (u64) msm_readl(gpu->mmio + (reg << 2));
> +	val |= ((u64) msm_readl(gpu->mmio + ((reg + 1) << 2)) << 32);
>   
>   	return val;
>   }
>   
> -static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val)
> +static inline void gpu_write64(struct msm_gpu *gpu, u32 reg, u64 val)
>   {
>   	/* Why not a writeq here? Read the screed above */
> -	msm_writel(lower_32_bits(val), gpu->mmio + (lo << 2));
> -	msm_writel(upper_32_bits(val), gpu->mmio + (hi << 2));
> +	msm_writel(lower_32_bits(val), gpu->mmio + (reg << 2));
> +	msm_writel(upper_32_bits(val), gpu->mmio + ((reg + 1) << 2));
>   }
>   
>   int msm_gpu_pm_suspend(struct msm_gpu *gpu);

Reviewed-by: Akhil P Oommen <quic_akhilpo@quicinc.com>


-Akhil.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] drm/msm: Hangcheck progress detection
  2022-11-01 19:54   ` Akhil P Oommen
@ 2022-11-01 22:29     ` Rob Clark
  0 siblings, 0 replies; 7+ messages in thread
From: Rob Clark @ 2022-11-01 22:29 UTC (permalink / raw)
  To: Akhil P Oommen
  Cc: dri-devel, linux-arm-msm, freedreno, Rob Clark, Abhinav Kumar,
	Dmitry Baryshkov, Sean Paul, David Airlie, Daniel Vetter,
	Chia-I Wu, Konrad Dybcio, Douglas Anderson, open list

On Tue, Nov 1, 2022 at 12:54 PM Akhil P Oommen <quic_akhilpo@quicinc.com> wrote:
>
> On 11/1/2022 4:24 AM, Rob Clark wrote:
> > From: Rob Clark <robdclark@chromium.org>
> >
> > If the hangcheck timer expires, check if the fw's position in the
> > cmdstream has advanced (changed) since last timer expiration, and
> > allow it up to three additional "extensions" to it's alotted time.
> > The intention is to continue to catch "shader stuck in a loop" type
> > hangs quickly, but allow more time for things that are actually
> > making forward progress.
> >
> > Because we need to sample the CP state twice to detect if there has
> > not been progress, this also cuts the the timer's duration in half.
> >
> > Signed-off-by: Rob Clark <robdclark@chromium.org>
> > ---
> >   drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 34 +++++++++++++++++++++++++++
> >   drivers/gpu/drm/msm/msm_drv.h         |  8 ++++++-
> >   drivers/gpu/drm/msm/msm_gpu.c         | 20 +++++++++++++++-
> >   drivers/gpu/drm/msm/msm_gpu.h         |  5 +++-
> >   drivers/gpu/drm/msm/msm_ringbuffer.h  | 24 +++++++++++++++++++
> >   5 files changed, 88 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> > index 1ff605c18ee6..3b8fb7a11dff 100644
> > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> > @@ -1843,6 +1843,39 @@ static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
> >       return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR);
> >   }
> >
> > +static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
> > +{
> > +     struct msm_cp_state cp_state = {
> > +             .ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
> > +             .ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
> > +             .ib1_rem  = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
> > +             .ib2_rem  = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE),
> > +     };
> > +     bool progress;
> > +
> > +     /*
> > +      * Adjust the remaining data to account for what has already been
> > +      * fetched from memory, but not yet consumed by the SQE.
> > +      *
> > +      * This is not *technically* correct, the amount buffered could
> > +      * exceed the IB size due to hw prefetching ahead, but:
> > +      *
> > +      * (1) We aren't trying to find the exact position, just whether
> > +      *     progress has been made
> > +      * (2) The CP_REG_TO_MEM at the end of a submit should be enough
> > +      *     to prevent prefetching into an unrelated submit.  (And
> > +      *     either way, at some point the ROQ will be full.)
> > +      */
> > +     cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB1_STAT) >> 16;
> > +     cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB1_STAT) >> 16;
> REG_A6XX_CP_CSQ_IB1_STAT -> REG_A6XX_CP_CSQ_IB2_STAT

oops, will fix in v2

> With that, Reviewed-by: Akhil P Oommen <quic_akhilpo@quicinc.com>
>
> -Akhil.
> > +
> > +     progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state));
> > +
> > +     ring->last_cp_state = cp_state;
> > +
> > +     return progress;
> > +}
> > +
> >   static u32 a618_get_speed_bin(u32 fuse)
> >   {
> >       if (fuse == 0)
> > @@ -1961,6 +1994,7 @@ static const struct adreno_gpu_funcs funcs = {
> >               .create_address_space = a6xx_create_address_space,
> >               .create_private_address_space = a6xx_create_private_address_space,
> >               .get_rptr = a6xx_get_rptr,
> > +             .progress = a6xx_progress,
> >       },
> >       .get_timestamp = a6xx_get_timestamp,
> >   };
> > diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
> > index efcd7260f428..970a1a0ab34f 100644
> > --- a/drivers/gpu/drm/msm/msm_drv.h
> > +++ b/drivers/gpu/drm/msm/msm_drv.h
> > @@ -226,7 +226,13 @@ struct msm_drm_private {
> >
> >       struct drm_atomic_state *pm_state;
> >
> > -     /* For hang detection, in ms */
> > +     /**
> > +      * hangcheck_period: For hang detection, in ms
> > +      *
> > +      * Note that in practice, a submit/job will get at least two hangcheck
> > +      * periods, due to checking for progress being implemented as simply
> > +      * "have the CP position registers changed since last time?"
> > +      */
> >       unsigned int hangcheck_period;
> >
> >       /**
> > diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
> > index 3dffee54a951..136f5977b0bf 100644
> > --- a/drivers/gpu/drm/msm/msm_gpu.c
> > +++ b/drivers/gpu/drm/msm/msm_gpu.c
> > @@ -500,6 +500,21 @@ static void hangcheck_timer_reset(struct msm_gpu *gpu)
> >                       round_jiffies_up(jiffies + msecs_to_jiffies(priv->hangcheck_period)));
> >   }
> >
> > +static bool made_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
> > +{
> > +     if (ring->hangcheck_progress_retries >= DRM_MSM_HANGCHECK_PROGRESS_RETRIES)
> > +             return false;
> > +
> > +     if (!gpu->funcs->progress)
> > +             return false;
> > +
> > +     if (!gpu->funcs->progress(gpu, ring))
> > +             return false;
> > +
> > +     ring->hangcheck_progress_retries++;
> > +     return true;
> > +}
> > +
> >   static void hangcheck_handler(struct timer_list *t)
> >   {
> >       struct msm_gpu *gpu = from_timer(gpu, t, hangcheck_timer);
> > @@ -511,9 +526,12 @@ static void hangcheck_handler(struct timer_list *t)
> >       if (fence != ring->hangcheck_fence) {
> >               /* some progress has been made.. ya! */
> >               ring->hangcheck_fence = fence;
> > -     } else if (fence_before(fence, ring->fctx->last_fence)) {
> > +             ring->hangcheck_progress_retries = 0;
> > +     } else if (fence_before(fence, ring->fctx->last_fence) &&
> > +                     !made_progress(gpu, ring)) {
> >               /* no progress and not done.. hung! */
> >               ring->hangcheck_fence = fence;
> > +             ring->hangcheck_progress_retries = 0;
> >               DRM_DEV_ERROR(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n",
> >                               gpu->name, ring->id);
> >               DRM_DEV_ERROR(dev->dev, "%s:     completed fence: %u\n",
> > diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
> > index 585fd9c8d45a..d8f355e9f0b2 100644
> > --- a/drivers/gpu/drm/msm/msm_gpu.h
> > +++ b/drivers/gpu/drm/msm/msm_gpu.h
> > @@ -78,6 +78,8 @@ struct msm_gpu_funcs {
> >       struct msm_gem_address_space *(*create_private_address_space)
> >               (struct msm_gpu *gpu);
> >       uint32_t (*get_rptr)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
> > +
> > +     bool (*progress)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
> >   };
> >
> >   /* Additional state for iommu faults: */
> > @@ -236,7 +238,8 @@ struct msm_gpu {
> >        */
> >   #define DRM_MSM_INACTIVE_PERIOD   66 /* in ms (roughly four frames) */
> >
> > -#define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 500 /* in ms */
> > +#define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 250 /* in ms */
> > +#define DRM_MSM_HANGCHECK_PROGRESS_RETRIES 3
> >       struct timer_list hangcheck_timer;
> >
> >       /* Fault info for most recent iova fault: */
> > diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h
> > index 2a5045abe46e..e3d33bae3380 100644
> > --- a/drivers/gpu/drm/msm/msm_ringbuffer.h
> > +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h
> > @@ -35,6 +35,11 @@ struct msm_rbmemptrs {
> >       volatile u64 ttbr0;
> >   };
> >
> > +struct msm_cp_state {
> > +     uint64_t ib1_base, ib2_base;
> > +     uint32_t ib1_rem, ib2_rem;
> > +};
> > +
> I think we can add CP_RB_RPTR too here.

I originally was going to, but I figured if it is making progress in
the RB, probably fences will be getting updated.  And if not at least
IB1 state should be changing, since we don't write all that much into
RB other than CP_INDIRECT_BRANCH (ie. nothing that would take long
enough to run for hangcheck timer to expire)

I guess I could at least add a comment.

BR,
-R

> >   struct msm_ringbuffer {
> >       struct msm_gpu *gpu;
> >       int id;
> > @@ -64,6 +69,25 @@ struct msm_ringbuffer {
> >       uint64_t memptrs_iova;
> >       struct msm_fence_context *fctx;
> >
> > +     /**
> > +      * hangcheck_progress_retries:
> > +      *
> > +      * The number of extra hangcheck duration cycles that we have given
> > +      * due to it appearing that the GPU is making forward progress.
> > +      *
> > +      * If the GPU appears to be making progress (ie. the CP has advanced
> > +      * in the command stream, we'll allow up to DRM_MSM_HANGCHECK_PROGRESS_RETRIES
> > +      * expirations of the hangcheck timer before killing the job.  In other
> > +      * words we'll let the submit run for up to
> > +      * DRM_MSM_HANGCHECK_DEFAULT_PERIOD *  DRM_MSM_HANGCHECK_PROGRESS_RETRIES
> > +      */
> > +     int hangcheck_progress_retries;
> > +
> > +     /**
> > +      * last_cp_state: The state of the CP at the last call to gpu->progress()
> > +      */
> > +     struct msm_cp_state last_cp_state;
> > +
> >       /*
> >        * preempt_lock protects preemption and serializes wptr updates against
> >        * preemption.  Can be aquired from irq context.
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2022-11-01 22:29 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-31 22:54 [PATCH 0/2] drm/msm: Improved hang detection Rob Clark
2022-10-31 22:54 ` [PATCH 1/2] drm/msm/adreno: Simplify read64/write64 helpers Rob Clark
2022-11-01  0:46   ` Dmitry Baryshkov
2022-11-01 20:03   ` Akhil P Oommen
2022-10-31 22:54 ` [PATCH 2/2] drm/msm: Hangcheck progress detection Rob Clark
2022-11-01 19:54   ` Akhil P Oommen
2022-11-01 22:29     ` Rob Clark

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).