linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] drm/msm: Add tracking for faults associated with an address space
@ 2022-01-27 17:10 Rob Clark
  2022-01-27 17:10 ` [PATCH 1/2] drm/msm/gpu: Add ctx to get_param() Rob Clark
  2022-01-27 17:10 ` [PATCH 2/2] drm/msm/gpu: Add param to get address space faults Rob Clark
  0 siblings, 2 replies; 3+ messages in thread
From: Rob Clark @ 2022-01-27 17:10 UTC (permalink / raw)
  To: dri-devel
  Cc: freedreno, linux-arm-msm, Rob Clark, Abhinav Kumar,
	Akhil P Oommen, AngeloGioacchino Del Regno, Bjorn Andersson,
	Christian König, Jonathan Marek, Jordan Crouse, open list,
	Sai Prakash Ranjan, Stephen Boyd, Vladimir Lypak, Yangtao Li

From: Rob Clark <robdclark@chromium.org>

Currently, for GL_EXT_robustness userspace uses the global and per-
submitqueue fault counters to determine GUILTY_CONTEXT_RESET_EXT vs
INNOCENT_CONTEXT_RESET_EXT.  But that is a bit overly paranoid, in
that a fault in a different process's context (when it has it's own
isolated address space) should not hurt anything.

This is particularly annoying with CrOS and chrome's exit_on_context_lost quirk,
while running deqp in the android container, as the deqp-egl suite has
tests that intentionally trigger gpu hangs (for the purpose of testing
the robustness extension), which triggers chrome to restart, which
restarts the android container!

This new param gives userspace a way to ignore faults triggered by other
processes.

Applies on top of https://patchwork.freedesktop.org/series/98907/

Rob Clark (2):
  drm/msm/gpu: Add ctx to get_param()
  drm/msm/gpu: Add param to get address space faults

 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 6 +++++-
 drivers/gpu/drm/msm/adreno/adreno_gpu.h | 3 ++-
 drivers/gpu/drm/msm/msm_drv.c           | 4 +++-
 drivers/gpu/drm/msm/msm_gem.h           | 3 +++
 drivers/gpu/drm/msm/msm_gpu.c           | 1 +
 drivers/gpu/drm/msm/msm_gpu.h           | 3 ++-
 drivers/gpu/drm/msm/msm_rd.c            | 6 ++++--
 include/uapi/drm/msm_drm.h              | 3 ++-
 8 files changed, 22 insertions(+), 7 deletions(-)

-- 
2.34.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1/2] drm/msm/gpu: Add ctx to get_param()
  2022-01-27 17:10 [PATCH 0/2] drm/msm: Add tracking for faults associated with an address space Rob Clark
@ 2022-01-27 17:10 ` Rob Clark
  2022-01-27 17:10 ` [PATCH 2/2] drm/msm/gpu: Add param to get address space faults Rob Clark
  1 sibling, 0 replies; 3+ messages in thread
From: Rob Clark @ 2022-01-27 17:10 UTC (permalink / raw)
  To: dri-devel
  Cc: freedreno, linux-arm-msm, Rob Clark, Rob Clark, Sean Paul,
	Abhinav Kumar, David Airlie, Daniel Vetter, Jonathan Marek,
	Jordan Crouse, Sai Prakash Ranjan, Akhil P Oommen, Yangtao Li,
	AngeloGioacchino Del Regno, Bjorn Andersson, Vladimir Lypak,
	open list

From: Rob Clark <robdclark@chromium.org>

Prep work for next patch.

Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 3 ++-
 drivers/gpu/drm/msm/adreno/adreno_gpu.h | 3 ++-
 drivers/gpu/drm/msm/msm_drv.c           | 3 ++-
 drivers/gpu/drm/msm/msm_gpu.h           | 3 ++-
 drivers/gpu/drm/msm/msm_rd.c            | 6 ++++--
 5 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index f33cfa4ef1c8..caa9076197de 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -227,7 +227,8 @@ adreno_iommu_create_address_space(struct msm_gpu *gpu,
 	return aspace;
 }
 
-int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value)
+int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
+		     uint32_t param, uint64_t *value)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index cffabe7d33c1..432590036b31 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -279,7 +279,8 @@ static inline int adreno_is_a650_family(struct adreno_gpu *gpu)
 	       adreno_is_a660_family(gpu);
 }
 
-int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value);
+int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
+		     uint32_t param, uint64_t *value);
 const struct firmware *adreno_request_fw(struct adreno_gpu *adreno_gpu,
 		const char *fwname);
 struct drm_gem_object *adreno_fw_create_bo(struct msm_gpu *gpu,
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 555666e3f960..72060247e43c 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -763,7 +763,8 @@ static int msm_ioctl_get_param(struct drm_device *dev, void *data,
 	if (!gpu)
 		return -ENXIO;
 
-	return gpu->funcs->get_param(gpu, args->param, &args->value);
+	return gpu->funcs->get_param(gpu, file->driver_priv,
+				     args->param, &args->value);
 }
 
 static int msm_ioctl_gem_new(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 92aa1e9196c6..ba8407231340 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -42,7 +42,8 @@ struct msm_gpu_config {
  *    + z180_gpu
  */
 struct msm_gpu_funcs {
-	int (*get_param)(struct msm_gpu *gpu, uint32_t param, uint64_t *value);
+	int (*get_param)(struct msm_gpu *gpu, struct msm_file_private *ctx,
+			 uint32_t param, uint64_t *value);
 	int (*hw_init)(struct msm_gpu *gpu);
 	int (*pm_suspend)(struct msm_gpu *gpu);
 	int (*pm_resume)(struct msm_gpu *gpu);
diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
index 7e4d6460719e..dd3605b46264 100644
--- a/drivers/gpu/drm/msm/msm_rd.c
+++ b/drivers/gpu/drm/msm/msm_rd.c
@@ -197,13 +197,15 @@ static int rd_open(struct inode *inode, struct file *file)
 
 	/* the parsing tools need to know gpu-id to know which
 	 * register database to load.
+	 *
+	 * Note: These particular param does not require a context
 	 */
-	gpu->funcs->get_param(gpu, MSM_PARAM_GPU_ID, &val);
+	gpu->funcs->get_param(gpu, NULL, MSM_PARAM_GPU_ID, &val);
 	gpu_id = val;
 
 	rd_write_section(rd, RD_GPU_ID, &gpu_id, sizeof(gpu_id));
 
-	gpu->funcs->get_param(gpu, MSM_PARAM_CHIP_ID, &val);
+	gpu->funcs->get_param(gpu, NULL, MSM_PARAM_CHIP_ID, &val);
 	rd_write_section(rd, RD_CHIP_ID, &val, sizeof(val));
 
 out:
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] drm/msm/gpu: Add param to get address space faults
  2022-01-27 17:10 [PATCH 0/2] drm/msm: Add tracking for faults associated with an address space Rob Clark
  2022-01-27 17:10 ` [PATCH 1/2] drm/msm/gpu: Add ctx to get_param() Rob Clark
@ 2022-01-27 17:10 ` Rob Clark
  1 sibling, 0 replies; 3+ messages in thread
From: Rob Clark @ 2022-01-27 17:10 UTC (permalink / raw)
  To: dri-devel
  Cc: freedreno, linux-arm-msm, Rob Clark, Rob Clark, Sean Paul,
	Abhinav Kumar, David Airlie, Daniel Vetter, Jonathan Marek,
	Jordan Crouse, Sai Prakash Ranjan, Christian König,
	Yangtao Li, Stephen Boyd, open list

From: Rob Clark <robdclark@chromium.org>

Add a param so that userspace can query the fault count for faults that
might effect them (ie. any context sharing the same address space).

Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 3 +++
 drivers/gpu/drm/msm/msm_drv.c           | 1 +
 drivers/gpu/drm/msm/msm_gem.h           | 3 +++
 drivers/gpu/drm/msm/msm_gpu.c           | 1 +
 include/uapi/drm/msm_drm.h              | 3 ++-
 5 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index caa9076197de..05899c77ca38 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -271,6 +271,9 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
 	case MSM_PARAM_FAULTS:
 		*value = gpu->global_faults;
 		return 0;
+	case MSM_PARAM_AS_FAULTS:
+		*value = ctx->aspace->faults;
+		return 0;
 	case MSM_PARAM_SUSPENDS:
 		*value = gpu->suspend_count;
 		return 0;
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 72060247e43c..39ab9a361d7f 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -41,6 +41,7 @@
  * - 1.6.0 - Syncobj support
  * - 1.7.0 - Add MSM_PARAM_SUSPENDS to access suspend count
  * - 1.8.0 - Add MSM_BO_CACHED_COHERENT for supported GPUs (a6xx)
+ * - 1.9.0 - Add MSM_PARAM_AS_FAULTS
  */
 #define MSM_VERSION_MAJOR	1
 #define MSM_VERSION_MINOR	8
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 54ca0817d807..af612add5264 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -35,6 +35,9 @@ struct msm_gem_address_space {
 	 * will be non-NULL:
 	 */
 	struct pid *pid;
+
+	/* @faults: the number of GPU hangs associated with this address space */
+	int faults;
 };
 
 struct msm_gem_vma {
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 2c1049c0ea14..1029a51cb016 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -372,6 +372,7 @@ static void recover_worker(struct kthread_work *work)
 		/* Increment the fault counts */
 		gpu->global_faults++;
 		submit->queue->faults++;
+		submit->aspace->faults++;
 
 		task = get_pid_task(submit->pid, PIDTYPE_PID);
 		if (task) {
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index 6b8fffc28a50..b7d92868b945 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -75,8 +75,9 @@ struct drm_msm_timespec {
 #define MSM_PARAM_GMEM_BASE  0x06
 #define MSM_PARAM_PRIORITIES 0x07  /* The # of priority levels */
 #define MSM_PARAM_PP_PGTABLE 0x08  /* => 1 for per-process pagetables, else 0 */
-#define MSM_PARAM_FAULTS     0x09
+#define MSM_PARAM_FAULTS     0x09  /* global fault count */
 #define MSM_PARAM_SUSPENDS   0x0a
+#define MSM_PARAM_AS_FAULTS  0x0b  /* faults in any context sharing same address space */
 
 /* For backwards compat.  The original support for preemption was based on
  * a single ring per priority level so # of priority levels equals the #
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-01-27 17:10 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-27 17:10 [PATCH 0/2] drm/msm: Add tracking for faults associated with an address space Rob Clark
2022-01-27 17:10 ` [PATCH 1/2] drm/msm/gpu: Add ctx to get_param() Rob Clark
2022-01-27 17:10 ` [PATCH 2/2] drm/msm/gpu: Add param to get address space faults Rob Clark

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).