All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/amdgpu: individualize amdgpu entity allocation per HW IP
@ 2020-01-21 16:50 Nirmoy Das
  2020-01-21 16:50 ` [PATCH 2/2] drm/amdgpu: allocate entities on demand Nirmoy Das
  2020-01-21 23:34 ` [PATCH 1/2] drm/amdgpu: individualize amdgpu entity allocation per HW IP Luben Tuikov
  0 siblings, 2 replies; 9+ messages in thread
From: Nirmoy Das @ 2020-01-21 16:50 UTC (permalink / raw)
  To: amd-gfx; +Cc: alexander.deucher, kenny.ho, nirmoy.das, christian.koenig

Do not allocate all the entity at once. This is required for
dynamic amdgpu_ctx_entity creation.

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 130 ++++++++++++------------
 1 file changed, 65 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 05c2af61e7de..91638a2a5163 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -42,16 +42,6 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
 	[AMDGPU_HW_IP_VCN_JPEG]	=	1,
 };
 
-static int amdgpu_ctx_total_num_entities(void)
-{
-	unsigned i, num_entities = 0;
-
-	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
-		num_entities += amdgpu_ctx_num_entities[i];
-
-	return num_entities;
-}
-
 static int amdgpu_ctx_priority_permit(struct drm_file *filp,
 				      enum drm_sched_priority priority)
 {
@@ -73,7 +63,6 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 			   struct drm_file *filp,
 			   struct amdgpu_ctx *ctx)
 {
-	unsigned num_entities = amdgpu_ctx_total_num_entities();
 	unsigned i, j;
 	int r;
 
@@ -87,28 +76,29 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 	memset(ctx, 0, sizeof(*ctx));
 	ctx->adev = adev;
 
-
-	ctx->entities[0] = kcalloc(num_entities,
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+		ctx->entities[i] = kcalloc(amdgpu_ctx_num_entities[i],
 				   sizeof(struct amdgpu_ctx_entity),
 				   GFP_KERNEL);
-	if (!ctx->entities[0])
-		return -ENOMEM;
 
+		if (!ctx->entities[0]) {
+			r =  -ENOMEM;
+			goto error_cleanup_entity_memory;
+		}
+	}
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 
-	for (i = 0; i < num_entities; ++i) {
-		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
 
-		entity->sequence = 1;
-		entity->fences = kcalloc(amdgpu_sched_jobs,
+			entity->sequence = 1;
+			entity->fences = kcalloc(amdgpu_sched_jobs,
 					 sizeof(struct dma_fence*), GFP_KERNEL);
-		if (!entity->fences) {
-			r = -ENOMEM;
-			goto error_cleanup_memory;
+			if (!entity->fences) {
+				r = -ENOMEM;
+				goto error_cleanup_memory;
+			}
 		}
-	}
-	for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
-		ctx->entities[i] = ctx->entities[i - 1] +
-			amdgpu_ctx_num_entities[i - 1];
 
 	kref_init(&ctx->refcount);
 	spin_lock_init(&ctx->ring_lock);
@@ -179,44 +169,52 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 	return 0;
 
 error_cleanup_entities:
-	for (i = 0; i < num_entities; ++i)
-		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
+			drm_sched_entity_destroy(&ctx->entities[i][j].entity);
 
 error_cleanup_memory:
-	for (i = 0; i < num_entities; ++i) {
-		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
 
-		kfree(entity->fences);
-		entity->fences = NULL;
+			kfree(entity->fences);
+			entity->fences = NULL;
+		}
+
+error_cleanup_entity_memory:
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+		kfree(ctx->entities[i]);
+		ctx->entities[i] = NULL;
 	}
 
-	kfree(ctx->entities[0]);
-	ctx->entities[0] = NULL;
 	return r;
 }
 
 static void amdgpu_ctx_fini(struct kref *ref)
 {
 	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
-	unsigned num_entities = amdgpu_ctx_total_num_entities();
 	struct amdgpu_device *adev = ctx->adev;
-	unsigned i, j;
+	unsigned i, j, k;
 
 	if (!adev)
 		return;
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
 
-	for (i = 0; i < num_entities; ++i) {
-		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+			for (k = 0; k < amdgpu_sched_jobs; ++k)
+				dma_fence_put(entity->fences[k]);
 
-		for (j = 0; j < amdgpu_sched_jobs; ++j)
-			dma_fence_put(entity->fences[j]);
+			kfree(entity->fences);
+		}
 
-		kfree(entity->fences);
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+		kfree(ctx->entities[i]);
+		ctx->entities[i] = NULL;
 	}
 
-	kfree(ctx->entities[0]);
 	mutex_destroy(&ctx->lock);
-
 	kfree(ctx);
 }
 
@@ -279,14 +277,12 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 static void amdgpu_ctx_do_release(struct kref *ref)
 {
 	struct amdgpu_ctx *ctx;
-	unsigned num_entities;
-	u32 i;
+	u32 i, j;
 
 	ctx = container_of(ref, struct amdgpu_ctx, refcount);
-
-	num_entities = amdgpu_ctx_total_num_entities();
-	for (i = 0; i < num_entities; i++)
-		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
+			drm_sched_entity_destroy(&ctx->entities[i][j].entity);
 
 	amdgpu_ctx_fini(ref);
 }
@@ -516,20 +512,21 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
 				  enum drm_sched_priority priority)
 {
-	unsigned num_entities = amdgpu_ctx_total_num_entities();
 	enum drm_sched_priority ctx_prio;
-	unsigned i;
+	unsigned i, j;
 
 	ctx->override_priority = priority;
 
 	ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
 			ctx->init_priority : ctx->override_priority;
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 
-	for (i = 0; i < num_entities; i++) {
-		struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
+			struct drm_sched_entity *entity =
+				&ctx->entities[i][j].entity;
 
-		drm_sched_entity_set_priority(entity, ctx_prio);
-	}
+			drm_sched_entity_set_priority(entity, ctx_prio);
+		}
 }
 
 int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
@@ -564,20 +561,20 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
 
 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
 {
-	unsigned num_entities = amdgpu_ctx_total_num_entities();
 	struct amdgpu_ctx *ctx;
 	struct idr *idp;
-	uint32_t id, i;
+	uint32_t id, i, j;
 
 	idp = &mgr->ctx_handles;
 
 	mutex_lock(&mgr->lock);
 	idr_for_each_entry(idp, ctx, id) {
-		for (i = 0; i < num_entities; i++) {
-			struct drm_sched_entity *entity;
+		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+				struct drm_sched_entity *entity;
 
-			entity = &ctx->entities[0][i].entity;
-			timeout = drm_sched_entity_flush(entity, timeout);
+				entity = &ctx->entities[i][j].entity;
+				timeout = drm_sched_entity_flush(entity, timeout);
 		}
 	}
 	mutex_unlock(&mgr->lock);
@@ -586,10 +583,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
 
 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 {
-	unsigned num_entities = amdgpu_ctx_total_num_entities();
 	struct amdgpu_ctx *ctx;
 	struct idr *idp;
-	uint32_t id, i;
+	uint32_t id, i, j;
 
 	idp = &mgr->ctx_handles;
 
@@ -598,9 +594,13 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 			DRM_ERROR("ctx %p is still alive\n", ctx);
 			continue;
 		}
+		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+				struct drm_sched_entity *entity;
 
-		for (i = 0; i < num_entities; i++)
-			drm_sched_entity_fini(&ctx->entities[0][i].entity);
+				entity = &ctx->entities[i][j].entity;
+				drm_sched_entity_fini(entity);
+			}
 	}
 }
 
-- 
2.24.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] drm/amdgpu: allocate entities on demand
  2020-01-21 16:50 [PATCH 1/2] drm/amdgpu: individualize amdgpu entity allocation per HW IP Nirmoy Das
@ 2020-01-21 16:50 ` Nirmoy Das
  2020-01-22  0:07   ` Luben Tuikov
  2020-01-21 23:34 ` [PATCH 1/2] drm/amdgpu: individualize amdgpu entity allocation per HW IP Luben Tuikov
  1 sibling, 1 reply; 9+ messages in thread
From: Nirmoy Das @ 2020-01-21 16:50 UTC (permalink / raw)
  To: amd-gfx; +Cc: alexander.deucher, kenny.ho, nirmoy.das, christian.koenig

Currently we pre-allocate entities and fences for all the HW IPs on
context creation and some of which are might never be used.

This patch tries to resolve entity/fences wastage by creating entities
for a HW IP only when it is required.

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 163 +++++++++++++-----------
 1 file changed, 92 insertions(+), 71 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 91638a2a5163..43f1266b1b2e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -58,64 +58,37 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
 	return -EACCES;
 }
 
-static int amdgpu_ctx_init(struct amdgpu_device *adev,
-			   enum drm_sched_priority priority,
-			   struct drm_file *filp,
-			   struct amdgpu_ctx *ctx)
+static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip)
 {
-	unsigned i, j;
-	int r;
+	struct amdgpu_device *adev = ctx->adev;
+	struct drm_gpu_scheduler **scheds;
+	struct drm_gpu_scheduler *sched;
+	unsigned num_scheds = 0;
+	enum drm_sched_priority priority;
+	int j, r;
 
-	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
-		return -EINVAL;
+	ctx->entities[hw_ip] = kcalloc(amdgpu_ctx_num_entities[hw_ip],
+			sizeof(struct amdgpu_ctx_entity), GFP_KERNEL);
 
-	r = amdgpu_ctx_priority_permit(filp, priority);
-	if (r)
-		return r;
+	if (!ctx->entities[hw_ip])
+		return  -ENOMEM;
 
-	memset(ctx, 0, sizeof(*ctx));
-	ctx->adev = adev;
+	for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) {
 
-	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-		ctx->entities[i] = kcalloc(amdgpu_ctx_num_entities[i],
-				   sizeof(struct amdgpu_ctx_entity),
-				   GFP_KERNEL);
+		struct amdgpu_ctx_entity *entity = &ctx->entities[hw_ip][j];
 
-		if (!ctx->entities[0]) {
-			r =  -ENOMEM;
-			goto error_cleanup_entity_memory;
+		entity->sequence = 1;
+		entity->fences = kcalloc(amdgpu_sched_jobs,
+				sizeof(struct dma_fence*), GFP_KERNEL);
+		if (!entity->fences) {
+			r = -ENOMEM;
+			goto error_cleanup_memory;
 		}
 	}
-	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
-		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
-
-			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
-
-			entity->sequence = 1;
-			entity->fences = kcalloc(amdgpu_sched_jobs,
-					 sizeof(struct dma_fence*), GFP_KERNEL);
-			if (!entity->fences) {
-				r = -ENOMEM;
-				goto error_cleanup_memory;
-			}
-		}
 
-	kref_init(&ctx->refcount);
-	spin_lock_init(&ctx->ring_lock);
-	mutex_init(&ctx->lock);
-
-	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
-	ctx->reset_counter_query = ctx->reset_counter;
-	ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
-	ctx->init_priority = priority;
-	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
-
-	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-		struct drm_gpu_scheduler **scheds;
-		struct drm_gpu_scheduler *sched;
-		unsigned num_scheds = 0;
-
-		switch (i) {
+	priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
+				ctx->init_priority : ctx->override_priority;
+	switch (hw_ip) {
 		case AMDGPU_HW_IP_GFX:
 			sched = &adev->gfx.gfx_ring[0].sched;
 			scheds = &sched;
@@ -156,12 +129,12 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 			scheds = adev->jpeg.jpeg_sched;
 			num_scheds =  adev->jpeg.num_jpeg_sched;
 			break;
-		}
+	}
 
-		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
-			r = drm_sched_entity_init(&ctx->entities[i][j].entity,
-						  priority, scheds,
-						  num_scheds, &ctx->guilty);
+	for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) {
+		r = drm_sched_entity_init(&ctx->entities[hw_ip][j].entity,
+				priority, scheds,
+				num_scheds, &ctx->guilty);
 		if (r)
 			goto error_cleanup_entities;
 	}
@@ -169,28 +142,54 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 	return 0;
 
 error_cleanup_entities:
-	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
-		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
-			drm_sched_entity_destroy(&ctx->entities[i][j].entity);
+	for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j)
+		drm_sched_entity_destroy(&ctx->entities[hw_ip][j].entity);
 
 error_cleanup_memory:
-	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
-		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
-			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
+	for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) {
+		struct amdgpu_ctx_entity *entity = &ctx->entities[hw_ip][j];
 
-			kfree(entity->fences);
-			entity->fences = NULL;
-		}
-
-error_cleanup_entity_memory:
-	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-		kfree(ctx->entities[i]);
-		ctx->entities[i] = NULL;
+		kfree(entity->fences);
+		entity->fences = NULL;
 	}
 
+	kfree(ctx->entities[hw_ip]);
+	ctx->entities[hw_ip] = NULL;
+
 	return r;
 }
 
+static int amdgpu_ctx_init(struct amdgpu_device *adev,
+			   enum drm_sched_priority priority,
+			   struct drm_file *filp,
+			   struct amdgpu_ctx *ctx)
+{
+	int r;
+
+	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
+		return -EINVAL;
+
+	r = amdgpu_ctx_priority_permit(filp, priority);
+	if (r)
+		return r;
+
+	memset(ctx, 0, sizeof(*ctx));
+	ctx->adev = adev;
+
+	kref_init(&ctx->refcount);
+	spin_lock_init(&ctx->ring_lock);
+	mutex_init(&ctx->lock);
+
+	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
+	ctx->reset_counter_query = ctx->reset_counter;
+	ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
+	ctx->init_priority = priority;
+	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
+
+	return 0;
+
+}
+
 static void amdgpu_ctx_fini(struct kref *ref)
 {
 	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
@@ -201,10 +200,18 @@ static void amdgpu_ctx_fini(struct kref *ref)
 		return;
 	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
 		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
-			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
+			struct amdgpu_ctx_entity *entity;
+
+			if (!ctx->entities[i])
+				continue;
+
+			entity = &ctx->entities[i][j];
+			if (!entity->fences)
+				continue;
 
-			for (k = 0; k < amdgpu_sched_jobs; ++k)
+			for (k = 0; k < amdgpu_sched_jobs; ++k) {
 				dma_fence_put(entity->fences[k]);
+			}
 
 			kfree(entity->fences);
 		}
@@ -237,6 +244,11 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
 		return -EINVAL;
 	}
 
+	if (ctx->entities[hw_ip] == NULL) {
+		amdgpu_ctx_init_entity(ctx, hw_ip);
+	}
+
+
 	*entity = &ctx->entities[hw_ip][ring].entity;
 	return 0;
 }
@@ -281,8 +293,11 @@ static void amdgpu_ctx_do_release(struct kref *ref)
 
 	ctx = container_of(ref, struct amdgpu_ctx, refcount);
 	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
-		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
+		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+			if (!ctx->entities[i])
+				continue;
 			drm_sched_entity_destroy(&ctx->entities[i][j].entity);
+		}
 
 	amdgpu_ctx_fini(ref);
 }
@@ -573,6 +588,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
 			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 				struct drm_sched_entity *entity;
 
+				if (!ctx->entities[i])
+					continue;
+
 				entity = &ctx->entities[i][j].entity;
 				timeout = drm_sched_entity_flush(entity, timeout);
 		}
@@ -598,6 +616,9 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 				struct drm_sched_entity *entity;
 
+				if (!ctx->entities[i])
+					continue;
+
 				entity = &ctx->entities[i][j].entity;
 				drm_sched_entity_fini(entity);
 			}
-- 
2.24.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: individualize amdgpu entity allocation per HW IP
  2020-01-21 16:50 [PATCH 1/2] drm/amdgpu: individualize amdgpu entity allocation per HW IP Nirmoy Das
  2020-01-21 16:50 ` [PATCH 2/2] drm/amdgpu: allocate entities on demand Nirmoy Das
@ 2020-01-21 23:34 ` Luben Tuikov
  2020-01-22  9:32   ` Nirmoy
  1 sibling, 1 reply; 9+ messages in thread
From: Luben Tuikov @ 2020-01-21 23:34 UTC (permalink / raw)
  To: Nirmoy Das, amd-gfx
  Cc: alexander.deucher, kenny.ho, nirmoy.das, christian.koenig

On 2020-01-21 11:50 a.m., Nirmoy Das wrote:
> Do not allocate all the entity at once. This is required for
> dynamic amdgpu_ctx_entity creation.
> 
> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 130 ++++++++++++------------
>  1 file changed, 65 insertions(+), 65 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index 05c2af61e7de..91638a2a5163 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -42,16 +42,6 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
>  	[AMDGPU_HW_IP_VCN_JPEG]	=	1,
>  };
>  
> -static int amdgpu_ctx_total_num_entities(void)
> -{
> -	unsigned i, num_entities = 0;
> -
> -	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
> -		num_entities += amdgpu_ctx_num_entities[i];
> -
> -	return num_entities;
> -}
> -
>  static int amdgpu_ctx_priority_permit(struct drm_file *filp,
>  				      enum drm_sched_priority priority)
>  {
> @@ -73,7 +63,6 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>  			   struct drm_file *filp,
>  			   struct amdgpu_ctx *ctx)
>  {
> -	unsigned num_entities = amdgpu_ctx_total_num_entities();
>  	unsigned i, j;
>  	int r;
>  
> @@ -87,28 +76,29 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>  	memset(ctx, 0, sizeof(*ctx));
>  	ctx->adev = adev;
>  
> -
> -	ctx->entities[0] = kcalloc(num_entities,
> +	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> +		ctx->entities[i] = kcalloc(amdgpu_ctx_num_entities[i],
>  				   sizeof(struct amdgpu_ctx_entity),
>  				   GFP_KERNEL);

Are these lines indented to the agument list column? They seem
that they are not...

> -	if (!ctx->entities[0])
> -		return -ENOMEM;
>  
> +		if (!ctx->entities[0]) {
> +			r =  -ENOMEM;
> +			goto error_cleanup_entity_memory;
> +		}
> +	}

Brake the paragraphs with an empty line, here.

> +	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
> +		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {

Please use a brace for the outer for-loop, the i-counter.
This style leaves the ending row/column
empty for two levels of indentation.

>  
> -	for (i = 0; i < num_entities; ++i) {
> -		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
> +			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
>  
> -		entity->sequence = 1;
> -		entity->fences = kcalloc(amdgpu_sched_jobs,
> +			entity->sequence = 1;
> +			entity->fences = kcalloc(amdgpu_sched_jobs,
>  					 sizeof(struct dma_fence*), GFP_KERNEL);

The indentation of sizeof(... seems incorrect.

> -		if (!entity->fences) {
> -			r = -ENOMEM;
> -			goto error_cleanup_memory;
> +			if (!entity->fences) {
> +				r = -ENOMEM;
> +				goto error_cleanup_memory;
> +			}
>  		}
> -	}

This brace would stay...

> -	for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
> -		ctx->entities[i] = ctx->entities[i - 1] +
> -			amdgpu_ctx_num_entities[i - 1];
>  
>  	kref_init(&ctx->refcount);
>  	spin_lock_init(&ctx->ring_lock);
> @@ -179,44 +169,52 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>  	return 0;
>  
>  error_cleanup_entities:

Notice this label sits after the "return 0;", so it really
is an "unroll" and "free" operation.

> -	for (i = 0; i < num_entities; ++i)
> -		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
> +	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
> +		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
> +			drm_sched_entity_destroy(&ctx->entities[i][j].entity);
>  
>  error_cleanup_memory:
> -	for (i = 0; i < num_entities; ++i) {
> -		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
> +	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)

Add the brace back in for completeness and style.

> +		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> +			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
>  
> -		kfree(entity->fences);
> -		entity->fences = NULL;
> +			kfree(entity->fences);
> +			entity->fences = NULL;
> +		}
> +
> +error_cleanup_entity_memory:

"cleanup" refers to something spilled, or something to be collected.
(Or winning in a wagered game of chance.) Also at "module_exit", maybe.

The kernel has traditionally used "unroll" and "free" for this. Now, since
you're unrolling the loop (notice that it sits after the "return 0;"), then
you can backtrack and name it like this:

Error_unroll_free1:
	for ( ; i >= 0; i--)
		free(my_array[i]);

> +	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> +		kfree(ctx->entities[i]);
> +		ctx->entities[i] = NULL;
>  	}
>  
> -	kfree(ctx->entities[0]);
> -	ctx->entities[0] = NULL;
>  	return r;
>  }
>  
>  static void amdgpu_ctx_fini(struct kref *ref)
>  {
>  	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
> -	unsigned num_entities = amdgpu_ctx_total_num_entities();
>  	struct amdgpu_device *adev = ctx->adev;
> -	unsigned i, j;
> +	unsigned i, j, k;
>  
>  	if (!adev)
>  		return;
> +	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
> +		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> +			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
>  
> -	for (i = 0; i < num_entities; ++i) {
> -		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
> +			for (k = 0; k < amdgpu_sched_jobs; ++k)
> +				dma_fence_put(entity->fences[k]);
>  
> -		for (j = 0; j < amdgpu_sched_jobs; ++j)
> -			dma_fence_put(entity->fences[j]);
> +			kfree(entity->fences);
> +		}
>  
> -		kfree(entity->fences);
> +	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> +		kfree(ctx->entities[i]);
> +		ctx->entities[i] = NULL;
>  	}
>  
> -	kfree(ctx->entities[0]);
>  	mutex_destroy(&ctx->lock);
> -
>  	kfree(ctx);
>  }
>  
> @@ -279,14 +277,12 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
>  static void amdgpu_ctx_do_release(struct kref *ref)
>  {
>  	struct amdgpu_ctx *ctx;
> -	unsigned num_entities;
> -	u32 i;
> +	u32 i, j;
>  
>  	ctx = container_of(ref, struct amdgpu_ctx, refcount);
> -
> -	num_entities = amdgpu_ctx_total_num_entities();
> -	for (i = 0; i < num_entities; i++)
> -		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
> +	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
> +		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
> +			drm_sched_entity_destroy(&ctx->entities[i][j].entity);
>  
>  	amdgpu_ctx_fini(ref);
>  }
> @@ -516,20 +512,21 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
>  void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
>  				  enum drm_sched_priority priority)
>  {
> -	unsigned num_entities = amdgpu_ctx_total_num_entities();
>  	enum drm_sched_priority ctx_prio;
> -	unsigned i;
> +	unsigned i, j;
>  
>  	ctx->override_priority = priority;
>  
>  	ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
>  			ctx->init_priority : ctx->override_priority;
> +	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)

Brace.

> +		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>  
> -	for (i = 0; i < num_entities; i++) {
> -		struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
> +			struct drm_sched_entity *entity =
> +				&ctx->entities[i][j].entity;
>  
> -		drm_sched_entity_set_priority(entity, ctx_prio);
> -	}
> +			drm_sched_entity_set_priority(entity, ctx_prio);
> +		}
	} ;-)
>  }
>  
>  int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
> @@ -564,20 +561,20 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
>  
>  long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
>  {
> -	unsigned num_entities = amdgpu_ctx_total_num_entities();
>  	struct amdgpu_ctx *ctx;
>  	struct idr *idp;
> -	uint32_t id, i;
> +	uint32_t id, i, j;
>  
>  	idp = &mgr->ctx_handles;
>  
>  	mutex_lock(&mgr->lock);
>  	idr_for_each_entry(idp, ctx, id) {
> -		for (i = 0; i < num_entities; i++) {
> -			struct drm_sched_entity *entity;
> +		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)

Brace.

> +			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> +				struct drm_sched_entity *entity;
>  
> -			entity = &ctx->entities[0][i].entity;
> -			timeout = drm_sched_entity_flush(entity, timeout);
> +				entity = &ctx->entities[i][j].entity;
> +				timeout = drm_sched_entity_flush(entity, timeout);
>  		}
>  	}
>  	mutex_unlock(&mgr->lock);
> @@ -586,10 +583,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
>  
>  void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
>  {
> -	unsigned num_entities = amdgpu_ctx_total_num_entities();
>  	struct amdgpu_ctx *ctx;
>  	struct idr *idp;
> -	uint32_t id, i;
> +	uint32_t id, i, j;
>  
>  	idp = &mgr->ctx_handles;
>  
> @@ -598,9 +594,13 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
>  			DRM_ERROR("ctx %p is still alive\n", ctx);
>  			continue;
>  		}
> +		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)

Brace.

> +			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> +				struct drm_sched_entity *entity;
>  
> -		for (i = 0; i < num_entities; i++)
> -			drm_sched_entity_fini(&ctx->entities[0][i].entity);
> +				entity = &ctx->entities[i][j].entity;
> +				drm_sched_entity_fini(entity);
> +			}
>  	}
>  }
>  
> 

Regards,
Luben
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: allocate entities on demand
  2020-01-21 16:50 ` [PATCH 2/2] drm/amdgpu: allocate entities on demand Nirmoy Das
@ 2020-01-22  0:07   ` Luben Tuikov
  2020-01-22  9:25     ` Nirmoy
  0 siblings, 1 reply; 9+ messages in thread
From: Luben Tuikov @ 2020-01-22  0:07 UTC (permalink / raw)
  To: Nirmoy Das, amd-gfx
  Cc: alexander.deucher, kenny.ho, nirmoy.das, christian.koenig

On 2020-01-21 11:50 a.m., Nirmoy Das wrote:
> Currently we pre-allocate entities and fences for all the HW IPs on
> context creation and some of which are might never be used.
> 
> This patch tries to resolve entity/fences wastage by creating entities
> for a HW IP only when it is required.
> 
> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 163 +++++++++++++-----------
>  1 file changed, 92 insertions(+), 71 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index 91638a2a5163..43f1266b1b2e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -58,64 +58,37 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
>  	return -EACCES;
>  }
>  
> -static int amdgpu_ctx_init(struct amdgpu_device *adev,
> -			   enum drm_sched_priority priority,
> -			   struct drm_file *filp,
> -			   struct amdgpu_ctx *ctx)
> +static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip)

I believe we can set "hw_ip" to "const u32 hw_ip", to protect it
from accidential changes.

>  {
> -	unsigned i, j;
> -	int r;
> +	struct amdgpu_device *adev = ctx->adev;
> +	struct drm_gpu_scheduler **scheds;
> +	struct drm_gpu_scheduler *sched;
> +	unsigned num_scheds = 0;
> +	enum drm_sched_priority priority;
> +	int j, r;
>  
> -	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
> -		return -EINVAL;
> +	ctx->entities[hw_ip] = kcalloc(amdgpu_ctx_num_entities[hw_ip],
> +			sizeof(struct amdgpu_ctx_entity), GFP_KERNEL);
>  
> -	r = amdgpu_ctx_priority_permit(filp, priority);
> -	if (r)
> -		return r;
> +	if (!ctx->entities[hw_ip])
> +		return  -ENOMEM;

There seem to be two spaces here.

>  
> -	memset(ctx, 0, sizeof(*ctx));
> -	ctx->adev = adev;
> +	for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) {
>  
> -	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> -		ctx->entities[i] = kcalloc(amdgpu_ctx_num_entities[i],
> -				   sizeof(struct amdgpu_ctx_entity),
> -				   GFP_KERNEL);
> +		struct amdgpu_ctx_entity *entity = &ctx->entities[hw_ip][j];
>  
> -		if (!ctx->entities[0]) {
> -			r =  -ENOMEM;
> -			goto error_cleanup_entity_memory;
> +		entity->sequence = 1;
> +		entity->fences = kcalloc(amdgpu_sched_jobs,
> +				sizeof(struct dma_fence*), GFP_KERNEL);

The indent here seems wrong...

> +		if (!entity->fences) {
> +			r = -ENOMEM;
> +			goto error_cleanup_memory;
>  		}
>  	}
> -	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
> -		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> -
> -			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
> -
> -			entity->sequence = 1;
> -			entity->fences = kcalloc(amdgpu_sched_jobs,
> -					 sizeof(struct dma_fence*), GFP_KERNEL);
> -			if (!entity->fences) {
> -				r = -ENOMEM;
> -				goto error_cleanup_memory;
> -			}
> -		}
>  
> -	kref_init(&ctx->refcount);
> -	spin_lock_init(&ctx->ring_lock);
> -	mutex_init(&ctx->lock);
> -
> -	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
> -	ctx->reset_counter_query = ctx->reset_counter;
> -	ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
> -	ctx->init_priority = priority;
> -	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
> -
> -	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> -		struct drm_gpu_scheduler **scheds;
> -		struct drm_gpu_scheduler *sched;
> -		unsigned num_scheds = 0;
> -
> -		switch (i) {
> +	priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
> +				ctx->init_priority : ctx->override_priority;

You don't need parenthesis around the relational equality operator used here.
It has higher precedence than the ternary conditional, in which it is embedded.

> +	switch (hw_ip) {
>  		case AMDGPU_HW_IP_GFX:
>  			sched = &adev->gfx.gfx_ring[0].sched;
>  			scheds = &sched;
> @@ -156,12 +129,12 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>  			scheds = adev->jpeg.jpeg_sched;
>  			num_scheds =  adev->jpeg.num_jpeg_sched;
>  			break;
> -		}
> +	}
>  
> -		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
> -			r = drm_sched_entity_init(&ctx->entities[i][j].entity,
> -						  priority, scheds,
> -						  num_scheds, &ctx->guilty);
> +	for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) {
> +		r = drm_sched_entity_init(&ctx->entities[hw_ip][j].entity,
> +				priority, scheds,
> +				num_scheds, &ctx->guilty);

The indent here seems off...

>  		if (r)
>  			goto error_cleanup_entities;
>  	}
> @@ -169,28 +142,54 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>  	return 0;
>  
>  error_cleanup_entities:
> -	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
> -		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
> -			drm_sched_entity_destroy(&ctx->entities[i][j].entity);
> +	for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j)
> +		drm_sched_entity_destroy(&ctx->entities[hw_ip][j].entity);
>  
>  error_cleanup_memory:
> -	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
> -		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> -			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
> +	for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) {
> +		struct amdgpu_ctx_entity *entity = &ctx->entities[hw_ip][j];
>  
> -			kfree(entity->fences);
> -			entity->fences = NULL;
> -		}
> -
> -error_cleanup_entity_memory:
> -	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> -		kfree(ctx->entities[i]);
> -		ctx->entities[i] = NULL;
> +		kfree(entity->fences);
> +		entity->fences = NULL;
>  	}
>  
> +	kfree(ctx->entities[hw_ip]);
> +	ctx->entities[hw_ip] = NULL;
> +
>  	return r;
>  }
>  
> +static int amdgpu_ctx_init(struct amdgpu_device *adev,
> +			   enum drm_sched_priority priority,
> +			   struct drm_file *filp,
> +			   struct amdgpu_ctx *ctx)

The indent of the argument list here seems off...

> +{
> +	int r;
> +
> +	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
> +		return -EINVAL;

This shouldn't be possible since it is an enum...
But why not do this check in "amdgpu_ctx_priority_permit()"
which is introduced by this patchset and used in the imediately
following line?

Or perhaps fix up amdgpu_to_sched_priority() where it is massaged
from the ioctl argument which is an integer.

On a side note: I noticed that the enum drm_sched_priority
has no DRM_SCHED_PRIORITY_NONE.

I've found value in setting the first value of an enum to
"_NONE" (unless zero actually has a meaning as set by HW/etc., anyway).
Enum drm_sched_priority starts with "_MIN" and "_LOW" which
are both set to the same (zero) value.

So having DRM_SCHED_PRIORITY_NONE, could be used to denote
that no priority was given and any is fine, or to mean
that if the priority was given out of range, set it to "none",
to mean pick any.

> +
> +	r = amdgpu_ctx_priority_permit(filp, priority);
> +	if (r)
> +		return r;
> +
> +	memset(ctx, 0, sizeof(*ctx));
> +	ctx->adev = adev;
> +
> +	kref_init(&ctx->refcount);
> +	spin_lock_init(&ctx->ring_lock);
> +	mutex_init(&ctx->lock);
> +
> +	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
> +	ctx->reset_counter_query = ctx->reset_counter;
> +	ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
> +	ctx->init_priority = priority;
> +	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
> +
> +	return 0;
> +
> +}
> +
>  static void amdgpu_ctx_fini(struct kref *ref)
>  {
>  	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
> @@ -201,10 +200,18 @@ static void amdgpu_ctx_fini(struct kref *ref)
>  		return;
>  	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
>  		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> -			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
> +			struct amdgpu_ctx_entity *entity;
> +
> +			if (!ctx->entities[i])
> +				continue;
> +
> +			entity = &ctx->entities[i][j];
> +			if (!entity->fences)
> +				continue;
>  
> -			for (k = 0; k < amdgpu_sched_jobs; ++k)
> +			for (k = 0; k < amdgpu_sched_jobs; ++k) {
>  				dma_fence_put(entity->fences[k]);
> +			}

LKCS: A single non-compound statement as the body of a loop, doesn't
warrant braces. So you can leave this is it was.

>  
>  			kfree(entity->fences);
>  		}
> @@ -237,6 +244,11 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
>  		return -EINVAL;
>  	}
>  
> +	if (ctx->entities[hw_ip] == NULL) {
> +		amdgpu_ctx_init_entity(ctx, hw_ip);
> +	}

No need for braces.

> +
> +
>  	*entity = &ctx->entities[hw_ip][ring].entity;
>  	return 0;
>  }
> @@ -281,8 +293,11 @@ static void amdgpu_ctx_do_release(struct kref *ref)
>  
>  	ctx = container_of(ref, struct amdgpu_ctx, refcount);
>  	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
> -		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
> +		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> +			if (!ctx->entities[i])
> +				continue;
>  			drm_sched_entity_destroy(&ctx->entities[i][j].entity);
> +		}
>  
>  	amdgpu_ctx_fini(ref);
>  }
> @@ -573,6 +588,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
>  			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>  				struct drm_sched_entity *entity;
>  
> +				if (!ctx->entities[i])
> +					continue;
> +
>  				entity = &ctx->entities[i][j].entity;
>  				timeout = drm_sched_entity_flush(entity, timeout);
>  		}
> @@ -598,6 +616,9 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
>  			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>  				struct drm_sched_entity *entity;
>  
> +				if (!ctx->entities[i])
> +					continue;
> +
>  				entity = &ctx->entities[i][j].entity;
>  				drm_sched_entity_fini(entity);
>  			}
> 

I think these changes are good and in the right direction.

Regards,
Luben
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: allocate entities on demand
  2020-01-22  0:07   ` Luben Tuikov
@ 2020-01-22  9:25     ` Nirmoy
  2020-01-22 15:51       ` Luben Tuikov
  0 siblings, 1 reply; 9+ messages in thread
From: Nirmoy @ 2020-01-22  9:25 UTC (permalink / raw)
  To: Luben Tuikov, amd-gfx
  Cc: alexander.deucher, kenny.ho, nirmoy.das, christian.koenig

Hi Luben,

Thanks for reviewing.  Was expecting lot from vim auto-indent :/

On 1/22/20 1:07 AM, Luben Tuikov wrote:
> On 2020-01-21 11:50 a.m., Nirmoy Das wrote:
>
>> -		switch (i) {
>> +	priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
>> +				ctx->init_priority : ctx->override_priority;
> You don't need parenthesis around the relational equality operator used here.
> It has higher precedence than the ternary conditional, in which it is embedded.

Parenthesis makes it more human readable.


>> +	int r;
>> +
>> +	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
>> +		return -EINVAL;
> This shouldn't be possible since it is an enum...
What should not be possible ?
> But why not do this check in "amdgpu_ctx_priority_permit()"
> which is introduced by this patchset and used in the imediately
> following line?
Makes sense.
>
> Or perhaps fix up amdgpu_to_sched_priority() where it is massaged
> from the ioctl argument which is an integer.
>
> On a side note: I noticed that the enum drm_sched_priority
> has no DRM_SCHED_PRIORITY_NONE.
>
> I've found value in setting the first value of an enum to
> "_NONE" (unless zero actually has a meaning as set by HW/etc., anyway).
> Enum drm_sched_priority starts with "_MIN" and "_LOW" which
> are both set to the same (zero) value.
>
> So having DRM_SCHED_PRIORITY_NONE, could be used to denote
> that no priority was given and any is fine, or to mean
> that if the priority was given out of range, set it to "none",
> to mean pick any.

Not sure about if HW accepts 0.


Regards,

Nirmoy


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: individualize amdgpu entity allocation per HW IP
  2020-01-21 23:34 ` [PATCH 1/2] drm/amdgpu: individualize amdgpu entity allocation per HW IP Luben Tuikov
@ 2020-01-22  9:32   ` Nirmoy
  2020-01-22 16:24     ` Luben Tuikov
  0 siblings, 1 reply; 9+ messages in thread
From: Nirmoy @ 2020-01-22  9:32 UTC (permalink / raw)
  To: Luben Tuikov, Nirmoy Das, amd-gfx
  Cc: alexander.deucher, kenny.ho, nirmoy.das, christian.koenig


On 1/22/20 12:34 AM, Luben Tuikov wrote:
>
>> +		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>> +			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
>>   
>> -		kfree(entity->fences);
>> -		entity->fences = NULL;
>> +			kfree(entity->fences);
>> +			entity->fences = NULL;
>> +		}
>> +
>> +error_cleanup_entity_memory:
> "cleanup" refers to something spilled, or something to be collected.
It is  collecting memory.
> (Or winning in a wagered game of chance.) Also at "module_exit", maybe.
>
> The kernel has traditionally used "unroll" and "free" for this. Now, since
> you're unrolling the loop (notice that it sits after the "return 0;"), then
> you can backtrack and name it like this:
>
> Error_unroll_free1:
> 	for ( ; i >= 0; i--)
> 		free(my_array[i]);
>
I prefer the existing style makes it more readable.


Regards,

Nirmoy

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: allocate entities on demand
  2020-01-22  9:25     ` Nirmoy
@ 2020-01-22 15:51       ` Luben Tuikov
  0 siblings, 0 replies; 9+ messages in thread
From: Luben Tuikov @ 2020-01-22 15:51 UTC (permalink / raw)
  To: Nirmoy, amd-gfx; +Cc: alexander.deucher, kenny.ho, nirmoy.das, christian.koenig

On 2020-01-22 4:25 a.m., Nirmoy wrote:
> Hi Luben,
> 
> Thanks for reviewing.  Was expecting lot from vim auto-indent :/

Yes, no problem. It's a learning experience for me as well.

If you use Emacs, pressing TAB anywhere on a line, will indent
it according to the mode. (It runs the (c-indent-line-or-region)
Lisp function, when the major mode is "C".) Pressing TAB anywhere
in a line which has already been indented according to mode, does
nothing.

You can also indent the whole file (region) by pressing C-M-\.

> 
> On 1/22/20 1:07 AM, Luben Tuikov wrote:
>> On 2020-01-21 11:50 a.m., Nirmoy Das wrote:
>>
>>> -		switch (i) {
>>> +	priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
>>> +				ctx->init_priority : ctx->override_priority;
>> You don't need parenthesis around the relational equality operator used here.
>> It has higher precedence than the ternary conditional, in which it is embedded.
> 
> Parenthesis makes it more human readable.
> 
> 
>>> +	int r;
>>> +
>>> +	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
>>> +		return -EINVAL;
>> This shouldn't be possible since it is an enum...
> What should not be possible ?
>> But why not do this check in "amdgpu_ctx_priority_permit()"
>> which is introduced by this patchset and used in the imediately
>> following line?
> Makes sense.
>>
>> Or perhaps fix up amdgpu_to_sched_priority() where it is massaged
>> from the ioctl argument which is an integer.
>>
>> On a side note: I noticed that the enum drm_sched_priority
>> has no DRM_SCHED_PRIORITY_NONE.
>>
>> I've found value in setting the first value of an enum to
>> "_NONE" (unless zero actually has a meaning as set by HW/etc., anyway).
>> Enum drm_sched_priority starts with "_MIN" and "_LOW" which
>> are both set to the same (zero) value.
>>
>> So having DRM_SCHED_PRIORITY_NONE, could be used to denote
>> that no priority was given and any is fine, or to mean
>> that if the priority was given out of range, set it to "none",
>> to mean pick any.
> 
> Not sure about if HW accepts 0.

Exactly! We use "0" to mean, as I described above,
"that no priority was given and any is fine, or to mean
that if the priority given [by the user] is out of range,
set it to ``none'' to mean pick any", that is so that
we actually input to hardware a meaningful value.

It's just a conveyance.

Using/adding "_NONE" in enums (as usually the first enumerated literal (0))
really changes algorithms, as it makes representing concepts easier.
(Kind of like when we said, let i = sqrt(-1).)

Regards,
Luben
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: individualize amdgpu entity allocation per HW IP
  2020-01-22  9:32   ` Nirmoy
@ 2020-01-22 16:24     ` Luben Tuikov
  0 siblings, 0 replies; 9+ messages in thread
From: Luben Tuikov @ 2020-01-22 16:24 UTC (permalink / raw)
  To: Nirmoy, Nirmoy Das, amd-gfx
  Cc: alexander.deucher, kenny.ho, nirmoy.das, christian.koenig

On 2020-01-22 4:32 a.m., Nirmoy wrote:
> 
> On 1/22/20 12:34 AM, Luben Tuikov wrote:
>>
>>> +		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>>> +			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
>>>   
>>> -		kfree(entity->fences);
>>> -		entity->fences = NULL;
>>> +			kfree(entity->fences);
>>> +			entity->fences = NULL;
>>> +		}
>>> +
>>> +error_cleanup_entity_memory:
>> "cleanup" refers to something spilled, or something to be collected.
> It is  collecting memory.

But it really is doing a "kfree()". Freeing previously allocated
memory. The traditional working is most accessible, in my opinion.

>> (Or winning in a wagered game of chance.) Also at "module_exit", maybe.
>>
>> The kernel has traditionally used "unroll" and "free" for this. Now, since
>> you're unrolling the loop (notice that it sits after the "return 0;"), then
>> you can backtrack and name it like this:
>>
>> Error_unroll_free1:
>> 	for ( ; i >= 0; i--)
>> 		free(my_array[i]);
>>
> I prefer the existing style makes it more readable.
> 
> 
> Regards,
> 
> Nirmoy
> 

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/2] drm/amdgpu: individualize amdgpu entity allocation per HW IP
@ 2020-01-22  9:33 Nirmoy Das
  0 siblings, 0 replies; 9+ messages in thread
From: Nirmoy Das @ 2020-01-22  9:33 UTC (permalink / raw)
  To: amd-gfx; +Cc: alexander.deucher, kenny.ho, nirmoy.das, christian.koenig

Do not allocate all the entity at once. This is required for
dynamic amdgpu_ctx_entity creation.

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 143 +++++++++++++-----------
 1 file changed, 76 insertions(+), 67 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 05c2af61e7de..eecbd68db986 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -42,16 +42,6 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
 	[AMDGPU_HW_IP_VCN_JPEG]	=	1,
 };
 
-static int amdgpu_ctx_total_num_entities(void)
-{
-	unsigned i, num_entities = 0;
-
-	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
-		num_entities += amdgpu_ctx_num_entities[i];
-
-	return num_entities;
-}
-
 static int amdgpu_ctx_priority_permit(struct drm_file *filp,
 				      enum drm_sched_priority priority)
 {
@@ -73,7 +63,6 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 			   struct drm_file *filp,
 			   struct amdgpu_ctx *ctx)
 {
-	unsigned num_entities = amdgpu_ctx_total_num_entities();
 	unsigned i, j;
 	int r;
 
@@ -87,28 +76,30 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 	memset(ctx, 0, sizeof(*ctx));
 	ctx->adev = adev;
 
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+		ctx->entities[i] = kcalloc(amdgpu_ctx_num_entities[i],
+					   sizeof(struct amdgpu_ctx_entity),
+					   GFP_KERNEL);
 
-	ctx->entities[0] = kcalloc(num_entities,
-				   sizeof(struct amdgpu_ctx_entity),
-				   GFP_KERNEL);
-	if (!ctx->entities[0])
-		return -ENOMEM;
-
-
-	for (i = 0; i < num_entities; ++i) {
-		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+		if (!ctx->entities[0]) {
+			r =  -ENOMEM;
+			goto error_cleanup_entity_memory;
+		}
+	}
 
-		entity->sequence = 1;
-		entity->fences = kcalloc(amdgpu_sched_jobs,
-					 sizeof(struct dma_fence*), GFP_KERNEL);
-		if (!entity->fences) {
-			r = -ENOMEM;
-			goto error_cleanup_memory;
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
+
+			entity->sequence = 1;
+			entity->fences = kcalloc(amdgpu_sched_jobs,
+						 sizeof(struct dma_fence*), GFP_KERNEL);
+			if (!entity->fences) {
+				r = -ENOMEM;
+				goto error_cleanup_memory;
+			}
 		}
 	}
-	for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
-		ctx->entities[i] = ctx->entities[i - 1] +
-			amdgpu_ctx_num_entities[i - 1];
 
 	kref_init(&ctx->refcount);
 	spin_lock_init(&ctx->ring_lock);
@@ -179,44 +170,55 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 	return 0;
 
 error_cleanup_entities:
-	for (i = 0; i < num_entities; ++i)
-		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
+			drm_sched_entity_destroy(&ctx->entities[i][j].entity);
+	}
 
 error_cleanup_memory:
-	for (i = 0; i < num_entities; ++i) {
-		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
 
-		kfree(entity->fences);
-		entity->fences = NULL;
+			kfree(entity->fences);
+			entity->fences = NULL;
+		}
+	}
+
+error_cleanup_entity_memory:
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+		kfree(ctx->entities[i]);
+		ctx->entities[i] = NULL;
 	}
 
-	kfree(ctx->entities[0]);
-	ctx->entities[0] = NULL;
 	return r;
 }
 
 static void amdgpu_ctx_fini(struct kref *ref)
 {
 	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
-	unsigned num_entities = amdgpu_ctx_total_num_entities();
 	struct amdgpu_device *adev = ctx->adev;
-	unsigned i, j;
+	unsigned i, j, k;
 
 	if (!adev)
 		return;
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
 
-	for (i = 0; i < num_entities; ++i) {
-		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+			for (k = 0; k < amdgpu_sched_jobs; ++k)
+				dma_fence_put(entity->fences[k]);
 
-		for (j = 0; j < amdgpu_sched_jobs; ++j)
-			dma_fence_put(entity->fences[j]);
+			kfree(entity->fences);
+		}
+	}
 
-		kfree(entity->fences);
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+		kfree(ctx->entities[i]);
+		ctx->entities[i] = NULL;
 	}
 
-	kfree(ctx->entities[0]);
 	mutex_destroy(&ctx->lock);
-
 	kfree(ctx);
 }
 
@@ -279,14 +281,13 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 static void amdgpu_ctx_do_release(struct kref *ref)
 {
 	struct amdgpu_ctx *ctx;
-	unsigned num_entities;
-	u32 i;
+	u32 i, j;
 
 	ctx = container_of(ref, struct amdgpu_ctx, refcount);
-
-	num_entities = amdgpu_ctx_total_num_entities();
-	for (i = 0; i < num_entities; i++)
-		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
+			drm_sched_entity_destroy(&ctx->entities[i][j].entity);
+	}
 
 	amdgpu_ctx_fini(ref);
 }
@@ -516,19 +517,21 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
 				  enum drm_sched_priority priority)
 {
-	unsigned num_entities = amdgpu_ctx_total_num_entities();
 	enum drm_sched_priority ctx_prio;
-	unsigned i;
+	unsigned i, j;
 
 	ctx->override_priority = priority;
 
 	ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
 			ctx->init_priority : ctx->override_priority;
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 
-	for (i = 0; i < num_entities; i++) {
-		struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
+			struct drm_sched_entity *entity =
+				&ctx->entities[i][j].entity;
 
-		drm_sched_entity_set_priority(entity, ctx_prio);
+			drm_sched_entity_set_priority(entity, ctx_prio);
+		}
 	}
 }
 
@@ -564,20 +567,21 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
 
 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
 {
-	unsigned num_entities = amdgpu_ctx_total_num_entities();
 	struct amdgpu_ctx *ctx;
 	struct idr *idp;
-	uint32_t id, i;
+	uint32_t id, i, j;
 
 	idp = &mgr->ctx_handles;
 
 	mutex_lock(&mgr->lock);
 	idr_for_each_entry(idp, ctx, id) {
-		for (i = 0; i < num_entities; i++) {
-			struct drm_sched_entity *entity;
+		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+				struct drm_sched_entity *entity;
 
-			entity = &ctx->entities[0][i].entity;
-			timeout = drm_sched_entity_flush(entity, timeout);
+				entity = &ctx->entities[i][j].entity;
+				timeout = drm_sched_entity_flush(entity, timeout);
+			}
 		}
 	}
 	mutex_unlock(&mgr->lock);
@@ -586,10 +590,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
 
 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 {
-	unsigned num_entities = amdgpu_ctx_total_num_entities();
 	struct amdgpu_ctx *ctx;
 	struct idr *idp;
-	uint32_t id, i;
+	uint32_t id, i, j;
 
 	idp = &mgr->ctx_handles;
 
@@ -599,8 +602,14 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 			continue;
 		}
 
-		for (i = 0; i < num_entities; i++)
-			drm_sched_entity_fini(&ctx->entities[0][i].entity);
+		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+				struct drm_sched_entity *entity;
+
+				entity = &ctx->entities[i][j].entity;
+				drm_sched_entity_fini(entity);
+			}
+		}
 	}
 }
 
-- 
2.24.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-01-22 16:24 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-21 16:50 [PATCH 1/2] drm/amdgpu: individualize amdgpu entity allocation per HW IP Nirmoy Das
2020-01-21 16:50 ` [PATCH 2/2] drm/amdgpu: allocate entities on demand Nirmoy Das
2020-01-22  0:07   ` Luben Tuikov
2020-01-22  9:25     ` Nirmoy
2020-01-22 15:51       ` Luben Tuikov
2020-01-21 23:34 ` [PATCH 1/2] drm/amdgpu: individualize amdgpu entity allocation per HW IP Luben Tuikov
2020-01-22  9:32   ` Nirmoy
2020-01-22 16:24     ` Luben Tuikov
2020-01-22  9:33 Nirmoy Das

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.