[PATCH 1/3] drm/amdgpu: Fix entities for disabled HW blocks.

* [PATCH 1/3] drm/amdgpu: Fix entities for disabled HW blocks.
@ 2019-01-29 10:20 Bas Nieuwenhuizen
       [not found] ` <20190129102048.21827-1-bas-dldO88ZXqoXqqjsSq9zF6IRWq/SkRNHw@public.gmane.org>
  0 siblings, 1 reply; 6+ messages in thread
From: Bas Nieuwenhuizen @ 2019-01-29 10:20 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Bas Nieuwenhuizen

If we have some disabled HW blocks (say VCN), then the rings are
not initialized. This reuslts in entities that refer to uninitialized
rqs (runqueues?).

In normal usage this does not result in issues because userspace
generally knows to ignore the unsupported blocks, but e.g. setting
the priorities on all the entities resulted in a NULL access while
locking the rq spinlock.

This could probably also be induced when actually adding a job for
the blocks whith some less smart userspace.

Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 31 +++++++++++++++++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  1 +
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index d85184b5b35c..6f72ce785b32 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -169,9 +169,13 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 		for (j = 0; j < num_rings; ++j)
 			rqs[j] = &rings[j]->sched.sched_rq[priority];
 
-		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
-			r = drm_sched_entity_init(&ctx->entities[i][j].entity,
-						  rqs, num_rings, &ctx->guilty);
+		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+			ctx->entities[i][j].enabled = rings[0]->adev != NULL;
+			if (ctx->entities[i][j].enabled) {
+				r = drm_sched_entity_init(&ctx->entities[i][j].entity,
+							  rqs, num_rings, &ctx->guilty);
+			}
+		}
 		if (r)
 			goto error_cleanup_entities;
 	}
@@ -180,7 +184,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 
 error_cleanup_entities:
 	for (i = 0; i < num_entities; ++i)
-		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+		if (ctx->entities[0][i].enabled)
+			drm_sched_entity_destroy(&ctx->entities[0][i].entity);
 	kfree(ctx->entities[0]);
 
 error_free_fences:
@@ -229,6 +234,11 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
 		return -EINVAL;
 	}
 
+	if (!ctx->entities[hw_ip][ring].enabled) {
+		DRM_DEBUG("disabled ring: %d %d\n", hw_ip, ring);
+		return -EINVAL;
+	}
+
 	*entity = &ctx->entities[hw_ip][ring].entity;
 	return 0;
 }
@@ -279,7 +289,8 @@ static void amdgpu_ctx_do_release(struct kref *ref)
 		num_entities += amdgpu_ctx_num_entities[i];
 
 	for (i = 0; i < num_entities; i++)
-		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+		if (ctx->entities[0][i].enabled)
+			drm_sched_entity_destroy(&ctx->entities[0][i].entity);
 
 	amdgpu_ctx_fini(ref);
 }
@@ -505,7 +516,9 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
 	for (i = 0; i < num_entities; i++) {
 		struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
 
-		drm_sched_entity_set_priority(entity, ctx_prio);
+
+		if (ctx->entities[0][1].enabled)
+			drm_sched_entity_set_priority(entity, ctx_prio);
 	}
 }
 
@@ -557,6 +570,9 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
 		for (i = 0; i < num_entities; i++) {
 			struct drm_sched_entity *entity;
 
+			if (!ctx->entities[0][i].enabled)
+				continue;
+
 			entity = &ctx->entities[0][i].entity;
 			max_wait = drm_sched_entity_flush(entity, max_wait);
 		}
@@ -584,7 +600,8 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 		}
 
 		for (i = 0; i < num_entities; i++)
-			drm_sched_entity_fini(&ctx->entities[0][i].entity);
+			if (ctx->entities[0][i].enabled)
+				drm_sched_entity_fini(&ctx->entities[0][i].entity);
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index b3b012c0a7da..183a783aedd8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -33,6 +33,7 @@ struct amdgpu_ctx_entity {
 	uint64_t		sequence;
 	struct dma_fence	**fences;
 	struct drm_sched_entity	entity;
+	bool                    enabled;
 };
 
 struct amdgpu_ctx {
-- 
2.20.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread