All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH] drm/amdgpu: allocate entities on demand
@ 2019-11-25 23:17 ` Nirmoy Das
  0 siblings, 0 replies; 18+ messages in thread
From: Nirmoy Das @ 2019-11-25 23:17 UTC (permalink / raw)
  To: alexander.deucher-5C7GfCeVMHo, kenny.ho-5C7GfCeVMHo,
	christian.koenig-5C7GfCeVMHo
  Cc: nirmoy.das-5C7GfCeVMHo, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Currently we pre-allocate entities for all the HW IPs on
context creation and some of which are might never be used.

This patch tries to resolve entity wastage by creating entities
for a HW IP only when it is required.

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 142 ++++++++++++++----------
 1 file changed, 81 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index a0d3d7b756eb..0a390ebe873f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -74,7 +74,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 			   struct amdgpu_ctx *ctx)
 {
 	unsigned num_entities = amdgpu_ctx_total_num_entities();
-	unsigned i, j, k;
+	unsigned i;
 	int r;
 
 	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
@@ -103,7 +103,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 	for (i = 0; i < num_entities; ++i) {
 		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
 
-		entity->sequence = 1;
+		entity->sequence = -1;
 		entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
 	}
 	for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
@@ -120,25 +120,59 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 	ctx->init_priority = priority;
 	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
 
-	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-		struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
-		struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
-		unsigned num_rings = 0;
-		unsigned num_rqs = 0;
+	return 0;
+
+error_free_fences:
+	kfree(ctx->fences);
+	ctx->fences = NULL;
+	return r;
+}
+
+static void amdgpu_ctx_fini(struct kref *ref)
+{
+	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
+	unsigned num_entities = amdgpu_ctx_total_num_entities();
+	struct amdgpu_device *adev = ctx->adev;
+	unsigned i, j;
+
+	if (!adev)
+		return;
+
+	for (i = 0; i < num_entities; ++i)
+		for (j = 0; j < amdgpu_sched_jobs; ++j)
+			dma_fence_put(ctx->entities[0][i].fences[j]);
+	kfree(ctx->fences);
+	kfree(ctx->entities[0]);
+
+	mutex_destroy(&ctx->lock);
+
+	kfree(ctx);
+}
 
-		switch (i) {
+
+int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip)
+{
+	struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
+	struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
+	struct amdgpu_device *adev = ctx->adev;
+	unsigned num_rings = 0;
+	unsigned num_rqs = 0;
+	unsigned i, j;
+	int r = 0;
+
+	switch (hw_ip) {
 		case AMDGPU_HW_IP_GFX:
 			rings[0] = &adev->gfx.gfx_ring[0];
 			num_rings = 1;
 			break;
 		case AMDGPU_HW_IP_COMPUTE:
-			for (j = 0; j < adev->gfx.num_compute_rings; ++j)
-				rings[j] = &adev->gfx.compute_ring[j];
+			for (i = 0; i < adev->gfx.num_compute_rings; ++i)
+				rings[i] = &adev->gfx.compute_ring[i];
 			num_rings = adev->gfx.num_compute_rings;
 			break;
 		case AMDGPU_HW_IP_DMA:
-			for (j = 0; j < adev->sdma.num_instances; ++j)
-				rings[j] = &adev->sdma.instance[j].ring;
+			for (i = 0; i < adev->sdma.num_instances; ++i)
+				rings[i] = &adev->sdma.instance[i].ring;
 			num_rings = adev->sdma.num_instances;
 			break;
 		case AMDGPU_HW_IP_UVD:
@@ -154,80 +188,59 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 			num_rings = 1;
 			break;
 		case AMDGPU_HW_IP_VCN_DEC:
-			for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
-				if (adev->vcn.harvest_config & (1 << j))
+			for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+				if (adev->vcn.harvest_config & (1 << i))
 					continue;
-				rings[num_rings++] = &adev->vcn.inst[j].ring_dec;
+				rings[num_rings++] = &adev->vcn.inst[i].ring_dec;
 			}
 			break;
 		case AMDGPU_HW_IP_VCN_ENC:
-			for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
-				if (adev->vcn.harvest_config & (1 << j))
+			for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+				if (adev->vcn.harvest_config & (1 << i))
 					continue;
-				for (k = 0; k < adev->vcn.num_enc_rings; ++k)
-					rings[num_rings++] = &adev->vcn.inst[j].ring_enc[k];
+				for (j = 0; j < adev->vcn.num_enc_rings; ++j)
+					rings[num_rings++] = &adev->vcn.inst[i].ring_enc[j];
 			}
 			break;
 		case AMDGPU_HW_IP_VCN_JPEG:
-			for (j = 0; j < adev->jpeg.num_jpeg_inst; ++j) {
-				if (adev->vcn.harvest_config & (1 << j))
+			for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+				if (adev->vcn.harvest_config & (1 << i))
 					continue;
-				rings[num_rings++] = &adev->jpeg.inst[j].ring_dec;
+				rings[num_rings++] = &adev->jpeg.inst[i].ring_dec;
 			}
 			break;
-		}
-
-		for (j = 0; j < num_rings; ++j) {
-			if (!rings[j]->adev)
-				continue;
+	}
 
-			rqs[num_rqs++] = &rings[j]->sched.sched_rq[priority];
-		}
+	for (i = 0; i < num_rings; ++i) {
+		if (!rings[i]->adev)
+			continue;
 
-		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
-			r = drm_sched_entity_init(&ctx->entities[i][j].entity,
-						  rqs, num_rqs, &ctx->guilty);
-		if (r)
-			goto error_cleanup_entities;
+		rqs[num_rqs++] = &rings[i]->sched.sched_rq[ctx->init_priority];
 	}
 
+	for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
+		r = drm_sched_entity_init(&ctx->entities[hw_ip][i].entity,
+				rqs, num_rqs, &ctx->guilty);
+	if (r)
+		goto error_cleanup_entities;
+
+	for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
+		ctx->entities[hw_ip][i].sequence = 1;
+
 	return 0;
 
 error_cleanup_entities:
-	for (i = 0; i < num_entities; ++i)
-		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
-	kfree(ctx->entities[0]);
+	for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
+		drm_sched_entity_destroy(&ctx->entities[hw_ip][i].entity);
 
-error_free_fences:
-	kfree(ctx->fences);
-	ctx->fences = NULL;
 	return r;
 }
 
-static void amdgpu_ctx_fini(struct kref *ref)
-{
-	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
-	unsigned num_entities = amdgpu_ctx_total_num_entities();
-	struct amdgpu_device *adev = ctx->adev;
-	unsigned i, j;
-
-	if (!adev)
-		return;
-
-	for (i = 0; i < num_entities; ++i)
-		for (j = 0; j < amdgpu_sched_jobs; ++j)
-			dma_fence_put(ctx->entities[0][i].fences[j]);
-	kfree(ctx->fences);
-	kfree(ctx->entities[0]);
-
-	mutex_destroy(&ctx->lock);
-
-	kfree(ctx);
-}
-
 int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
 			  u32 ring, struct drm_sched_entity **entity)
 {
+	int r;
+
 	if (hw_ip >= AMDGPU_HW_IP_NUM) {
 		DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
 		return -EINVAL;
@@ -244,6 +257,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
 		return -EINVAL;
 	}
 
+	if (ctx->entities[hw_ip][ring].sequence == -1) {
+		r = amdgpu_ctx_init_entity(ctx, hw_ip);
+
+		if (r)
+			return r;
+	}
+
 	*entity = &ctx->entities[hw_ip][ring].entity;
 	return 0;
 }
-- 
2.23.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [RFC PATCH] drm/amdgpu: allocate entities on demand
@ 2019-11-25 23:17 ` Nirmoy Das
  0 siblings, 0 replies; 18+ messages in thread
From: Nirmoy Das @ 2019-11-25 23:17 UTC (permalink / raw)
  To: alexander.deucher, kenny.ho, christian.koenig; +Cc: nirmoy.das, amd-gfx

Currently we pre-allocate entities for all the HW IPs on
context creation and some of which are might never be used.

This patch tries to resolve entity wastage by creating entities
for a HW IP only when it is required.

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 142 ++++++++++++++----------
 1 file changed, 81 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index a0d3d7b756eb..0a390ebe873f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -74,7 +74,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 			   struct amdgpu_ctx *ctx)
 {
 	unsigned num_entities = amdgpu_ctx_total_num_entities();
-	unsigned i, j, k;
+	unsigned i;
 	int r;
 
 	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
@@ -103,7 +103,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 	for (i = 0; i < num_entities; ++i) {
 		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
 
-		entity->sequence = 1;
+		entity->sequence = -1;
 		entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
 	}
 	for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
@@ -120,25 +120,59 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 	ctx->init_priority = priority;
 	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
 
-	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-		struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
-		struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
-		unsigned num_rings = 0;
-		unsigned num_rqs = 0;
+	return 0;
+
+error_free_fences:
+	kfree(ctx->fences);
+	ctx->fences = NULL;
+	return r;
+}
+
+static void amdgpu_ctx_fini(struct kref *ref)
+{
+	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
+	unsigned num_entities = amdgpu_ctx_total_num_entities();
+	struct amdgpu_device *adev = ctx->adev;
+	unsigned i, j;
+
+	if (!adev)
+		return;
+
+	for (i = 0; i < num_entities; ++i)
+		for (j = 0; j < amdgpu_sched_jobs; ++j)
+			dma_fence_put(ctx->entities[0][i].fences[j]);
+	kfree(ctx->fences);
+	kfree(ctx->entities[0]);
+
+	mutex_destroy(&ctx->lock);
+
+	kfree(ctx);
+}
 
-		switch (i) {
+
+int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip)
+{
+	struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
+	struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
+	struct amdgpu_device *adev = ctx->adev;
+	unsigned num_rings = 0;
+	unsigned num_rqs = 0;
+	unsigned i, j;
+	int r = 0;
+
+	switch (hw_ip) {
 		case AMDGPU_HW_IP_GFX:
 			rings[0] = &adev->gfx.gfx_ring[0];
 			num_rings = 1;
 			break;
 		case AMDGPU_HW_IP_COMPUTE:
-			for (j = 0; j < adev->gfx.num_compute_rings; ++j)
-				rings[j] = &adev->gfx.compute_ring[j];
+			for (i = 0; i < adev->gfx.num_compute_rings; ++i)
+				rings[i] = &adev->gfx.compute_ring[i];
 			num_rings = adev->gfx.num_compute_rings;
 			break;
 		case AMDGPU_HW_IP_DMA:
-			for (j = 0; j < adev->sdma.num_instances; ++j)
-				rings[j] = &adev->sdma.instance[j].ring;
+			for (i = 0; i < adev->sdma.num_instances; ++i)
+				rings[i] = &adev->sdma.instance[i].ring;
 			num_rings = adev->sdma.num_instances;
 			break;
 		case AMDGPU_HW_IP_UVD:
@@ -154,80 +188,59 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 			num_rings = 1;
 			break;
 		case AMDGPU_HW_IP_VCN_DEC:
-			for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
-				if (adev->vcn.harvest_config & (1 << j))
+			for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+				if (adev->vcn.harvest_config & (1 << i))
 					continue;
-				rings[num_rings++] = &adev->vcn.inst[j].ring_dec;
+				rings[num_rings++] = &adev->vcn.inst[i].ring_dec;
 			}
 			break;
 		case AMDGPU_HW_IP_VCN_ENC:
-			for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
-				if (adev->vcn.harvest_config & (1 << j))
+			for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+				if (adev->vcn.harvest_config & (1 << i))
 					continue;
-				for (k = 0; k < adev->vcn.num_enc_rings; ++k)
-					rings[num_rings++] = &adev->vcn.inst[j].ring_enc[k];
+				for (j = 0; j < adev->vcn.num_enc_rings; ++j)
+					rings[num_rings++] = &adev->vcn.inst[i].ring_enc[j];
 			}
 			break;
 		case AMDGPU_HW_IP_VCN_JPEG:
-			for (j = 0; j < adev->jpeg.num_jpeg_inst; ++j) {
-				if (adev->vcn.harvest_config & (1 << j))
+			for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+				if (adev->vcn.harvest_config & (1 << i))
 					continue;
-				rings[num_rings++] = &adev->jpeg.inst[j].ring_dec;
+				rings[num_rings++] = &adev->jpeg.inst[i].ring_dec;
 			}
 			break;
-		}
-
-		for (j = 0; j < num_rings; ++j) {
-			if (!rings[j]->adev)
-				continue;
+	}
 
-			rqs[num_rqs++] = &rings[j]->sched.sched_rq[priority];
-		}
+	for (i = 0; i < num_rings; ++i) {
+		if (!rings[i]->adev)
+			continue;
 
-		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
-			r = drm_sched_entity_init(&ctx->entities[i][j].entity,
-						  rqs, num_rqs, &ctx->guilty);
-		if (r)
-			goto error_cleanup_entities;
+		rqs[num_rqs++] = &rings[i]->sched.sched_rq[ctx->init_priority];
 	}
 
+	for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
+		r = drm_sched_entity_init(&ctx->entities[hw_ip][i].entity,
+				rqs, num_rqs, &ctx->guilty);
+	if (r)
+		goto error_cleanup_entities;
+
+	for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
+		ctx->entities[hw_ip][i].sequence = 1;
+
 	return 0;
 
 error_cleanup_entities:
-	for (i = 0; i < num_entities; ++i)
-		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
-	kfree(ctx->entities[0]);
+	for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
+		drm_sched_entity_destroy(&ctx->entities[hw_ip][i].entity);
 
-error_free_fences:
-	kfree(ctx->fences);
-	ctx->fences = NULL;
 	return r;
 }
 
-static void amdgpu_ctx_fini(struct kref *ref)
-{
-	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
-	unsigned num_entities = amdgpu_ctx_total_num_entities();
-	struct amdgpu_device *adev = ctx->adev;
-	unsigned i, j;
-
-	if (!adev)
-		return;
-
-	for (i = 0; i < num_entities; ++i)
-		for (j = 0; j < amdgpu_sched_jobs; ++j)
-			dma_fence_put(ctx->entities[0][i].fences[j]);
-	kfree(ctx->fences);
-	kfree(ctx->entities[0]);
-
-	mutex_destroy(&ctx->lock);
-
-	kfree(ctx);
-}
-
 int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
 			  u32 ring, struct drm_sched_entity **entity)
 {
+	int r;
+
 	if (hw_ip >= AMDGPU_HW_IP_NUM) {
 		DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
 		return -EINVAL;
@@ -244,6 +257,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
 		return -EINVAL;
 	}
 
+	if (ctx->entities[hw_ip][ring].sequence == -1) {
+		r = amdgpu_ctx_init_entity(ctx, hw_ip);
+
+		if (r)
+			return r;
+	}
+
 	*entity = &ctx->entities[hw_ip][ring].entity;
 	return 0;
 }
-- 
2.23.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] drm/amdgpu: allocate entities on demand
@ 2019-11-25 23:31     ` Nirmoy
  0 siblings, 0 replies; 18+ messages in thread
From: Nirmoy @ 2019-11-25 23:31 UTC (permalink / raw)
  To: Nirmoy Das, alexander.deucher-5C7GfCeVMHo, kenny.ho-5C7GfCeVMHo,
	christian.koenig-5C7GfCeVMHo
  Cc: nirmoy.das-5C7GfCeVMHo, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Ran amdgpu_test(drm) successfully multiple times to test this. But I am 
pretty sure I am missing some corner case here.


Regards,

Nirmoy

On 11/26/19 12:17 AM, Nirmoy Das wrote:
> Currently we pre-allocate entities for all the HW IPs on
> context creation and some of which are might never be used.
>
> This patch tries to resolve entity wastage by creating entities
> for a HW IP only when it is required.
>
> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 142 ++++++++++++++----------
>   1 file changed, 81 insertions(+), 61 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index a0d3d7b756eb..0a390ebe873f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -74,7 +74,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>   			   struct amdgpu_ctx *ctx)
>   {
>   	unsigned num_entities = amdgpu_ctx_total_num_entities();
> -	unsigned i, j, k;
> +	unsigned i;
>   	int r;
>   
>   	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
> @@ -103,7 +103,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>   	for (i = 0; i < num_entities; ++i) {
>   		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
>   
> -		entity->sequence = 1;
> +		entity->sequence = -1;
>   		entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
>   	}
>   	for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
> @@ -120,25 +120,59 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>   	ctx->init_priority = priority;
>   	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
>   
> -	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> -		struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
> -		struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
> -		unsigned num_rings = 0;
> -		unsigned num_rqs = 0;
> +	return 0;
> +
> +error_free_fences:
> +	kfree(ctx->fences);
> +	ctx->fences = NULL;
> +	return r;
> +}
> +
> +static void amdgpu_ctx_fini(struct kref *ref)
> +{
> +	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
> +	unsigned num_entities = amdgpu_ctx_total_num_entities();
> +	struct amdgpu_device *adev = ctx->adev;
> +	unsigned i, j;
> +
> +	if (!adev)
> +		return;
> +
> +	for (i = 0; i < num_entities; ++i)
> +		for (j = 0; j < amdgpu_sched_jobs; ++j)
> +			dma_fence_put(ctx->entities[0][i].fences[j]);
> +	kfree(ctx->fences);
> +	kfree(ctx->entities[0]);
> +
> +	mutex_destroy(&ctx->lock);
> +
> +	kfree(ctx);
> +}
>   
> -		switch (i) {
> +
> +int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip)
This should be a static function which I forgot to change
> +{
> +	struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
> +	struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
> +	struct amdgpu_device *adev = ctx->adev;
> +	unsigned num_rings = 0;
> +	unsigned num_rqs = 0;
> +	unsigned i, j;
> +	int r = 0;
> +
> +	switch (hw_ip) {
>   		case AMDGPU_HW_IP_GFX:
>   			rings[0] = &adev->gfx.gfx_ring[0];
>   			num_rings = 1;
>   			break;
>   		case AMDGPU_HW_IP_COMPUTE:
> -			for (j = 0; j < adev->gfx.num_compute_rings; ++j)
> -				rings[j] = &adev->gfx.compute_ring[j];
> +			for (i = 0; i < adev->gfx.num_compute_rings; ++i)
> +				rings[i] = &adev->gfx.compute_ring[i];
>   			num_rings = adev->gfx.num_compute_rings;
>   			break;
>   		case AMDGPU_HW_IP_DMA:
> -			for (j = 0; j < adev->sdma.num_instances; ++j)
> -				rings[j] = &adev->sdma.instance[j].ring;
> +			for (i = 0; i < adev->sdma.num_instances; ++i)
> +				rings[i] = &adev->sdma.instance[i].ring;
>   			num_rings = adev->sdma.num_instances;
>   			break;
>   		case AMDGPU_HW_IP_UVD:
> @@ -154,80 +188,59 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>   			num_rings = 1;
>   			break;
>   		case AMDGPU_HW_IP_VCN_DEC:
> -			for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
> -				if (adev->vcn.harvest_config & (1 << j))
> +			for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> +				if (adev->vcn.harvest_config & (1 << i))
>   					continue;
> -				rings[num_rings++] = &adev->vcn.inst[j].ring_dec;
> +				rings[num_rings++] = &adev->vcn.inst[i].ring_dec;
>   			}
>   			break;
>   		case AMDGPU_HW_IP_VCN_ENC:
> -			for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
> -				if (adev->vcn.harvest_config & (1 << j))
> +			for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> +				if (adev->vcn.harvest_config & (1 << i))
>   					continue;
> -				for (k = 0; k < adev->vcn.num_enc_rings; ++k)
> -					rings[num_rings++] = &adev->vcn.inst[j].ring_enc[k];
> +				for (j = 0; j < adev->vcn.num_enc_rings; ++j)
> +					rings[num_rings++] = &adev->vcn.inst[i].ring_enc[j];
>   			}
>   			break;
>   		case AMDGPU_HW_IP_VCN_JPEG:
> -			for (j = 0; j < adev->jpeg.num_jpeg_inst; ++j) {
> -				if (adev->vcn.harvest_config & (1 << j))
> +			for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
> +				if (adev->vcn.harvest_config & (1 << i))
>   					continue;
> -				rings[num_rings++] = &adev->jpeg.inst[j].ring_dec;
> +				rings[num_rings++] = &adev->jpeg.inst[i].ring_dec;
>   			}
>   			break;
> -		}
> -
> -		for (j = 0; j < num_rings; ++j) {
> -			if (!rings[j]->adev)
> -				continue;
> +	}
>   
> -			rqs[num_rqs++] = &rings[j]->sched.sched_rq[priority];
> -		}
> +	for (i = 0; i < num_rings; ++i) {
> +		if (!rings[i]->adev)
> +			continue;
>   
> -		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
> -			r = drm_sched_entity_init(&ctx->entities[i][j].entity,
> -						  rqs, num_rqs, &ctx->guilty);
> -		if (r)
> -			goto error_cleanup_entities;
> +		rqs[num_rqs++] = &rings[i]->sched.sched_rq[ctx->init_priority];
>   	}
>   
> +	for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
> +		r = drm_sched_entity_init(&ctx->entities[hw_ip][i].entity,
> +				rqs, num_rqs, &ctx->guilty);
> +	if (r)
> +		goto error_cleanup_entities;
> +
> +	for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
> +		ctx->entities[hw_ip][i].sequence = 1;
> +
>   	return 0;
>   
>   error_cleanup_entities:
> -	for (i = 0; i < num_entities; ++i)
> -		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
> -	kfree(ctx->entities[0]);
> +	for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
> +		drm_sched_entity_destroy(&ctx->entities[hw_ip][i].entity);
>   
> -error_free_fences:
> -	kfree(ctx->fences);
> -	ctx->fences = NULL;
>   	return r;
>   }
>   
> -static void amdgpu_ctx_fini(struct kref *ref)
> -{
> -	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
> -	unsigned num_entities = amdgpu_ctx_total_num_entities();
> -	struct amdgpu_device *adev = ctx->adev;
> -	unsigned i, j;
> -
> -	if (!adev)
> -		return;
> -
> -	for (i = 0; i < num_entities; ++i)
> -		for (j = 0; j < amdgpu_sched_jobs; ++j)
> -			dma_fence_put(ctx->entities[0][i].fences[j]);
> -	kfree(ctx->fences);
> -	kfree(ctx->entities[0]);
> -
> -	mutex_destroy(&ctx->lock);
> -
> -	kfree(ctx);
> -}
> -
>   int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
>   			  u32 ring, struct drm_sched_entity **entity)
>   {
> +	int r;
> +
>   	if (hw_ip >= AMDGPU_HW_IP_NUM) {
>   		DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
>   		return -EINVAL;
> @@ -244,6 +257,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
>   		return -EINVAL;
>   	}
>   
> +	if (ctx->entities[hw_ip][ring].sequence == -1) {
> +		r = amdgpu_ctx_init_entity(ctx, hw_ip);
> +
> +		if (r)
> +			return r;
> +	}
> +
>   	*entity = &ctx->entities[hw_ip][ring].entity;
>   	return 0;
>   }
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] drm/amdgpu: allocate entities on demand
@ 2019-11-25 23:31     ` Nirmoy
  0 siblings, 0 replies; 18+ messages in thread
From: Nirmoy @ 2019-11-25 23:31 UTC (permalink / raw)
  To: Nirmoy Das, alexander.deucher, kenny.ho, christian.koenig
  Cc: nirmoy.das, amd-gfx

Ran amdgpu_test(drm) successfully multiple times to test this. But I am 
pretty sure I am missing some corner case here.


Regards,

Nirmoy

On 11/26/19 12:17 AM, Nirmoy Das wrote:
> Currently we pre-allocate entities for all the HW IPs on
> context creation and some of which are might never be used.
>
> This patch tries to resolve entity wastage by creating entities
> for a HW IP only when it is required.
>
> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 142 ++++++++++++++----------
>   1 file changed, 81 insertions(+), 61 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index a0d3d7b756eb..0a390ebe873f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -74,7 +74,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>   			   struct amdgpu_ctx *ctx)
>   {
>   	unsigned num_entities = amdgpu_ctx_total_num_entities();
> -	unsigned i, j, k;
> +	unsigned i;
>   	int r;
>   
>   	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
> @@ -103,7 +103,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>   	for (i = 0; i < num_entities; ++i) {
>   		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
>   
> -		entity->sequence = 1;
> +		entity->sequence = -1;
>   		entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
>   	}
>   	for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
> @@ -120,25 +120,59 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>   	ctx->init_priority = priority;
>   	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
>   
> -	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> -		struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
> -		struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
> -		unsigned num_rings = 0;
> -		unsigned num_rqs = 0;
> +	return 0;
> +
> +error_free_fences:
> +	kfree(ctx->fences);
> +	ctx->fences = NULL;
> +	return r;
> +}
> +
> +static void amdgpu_ctx_fini(struct kref *ref)
> +{
> +	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
> +	unsigned num_entities = amdgpu_ctx_total_num_entities();
> +	struct amdgpu_device *adev = ctx->adev;
> +	unsigned i, j;
> +
> +	if (!adev)
> +		return;
> +
> +	for (i = 0; i < num_entities; ++i)
> +		for (j = 0; j < amdgpu_sched_jobs; ++j)
> +			dma_fence_put(ctx->entities[0][i].fences[j]);
> +	kfree(ctx->fences);
> +	kfree(ctx->entities[0]);
> +
> +	mutex_destroy(&ctx->lock);
> +
> +	kfree(ctx);
> +}
>   
> -		switch (i) {
> +
> +int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip)
This should be a static function which I forgot to change
> +{
> +	struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
> +	struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
> +	struct amdgpu_device *adev = ctx->adev;
> +	unsigned num_rings = 0;
> +	unsigned num_rqs = 0;
> +	unsigned i, j;
> +	int r = 0;
> +
> +	switch (hw_ip) {
>   		case AMDGPU_HW_IP_GFX:
>   			rings[0] = &adev->gfx.gfx_ring[0];
>   			num_rings = 1;
>   			break;
>   		case AMDGPU_HW_IP_COMPUTE:
> -			for (j = 0; j < adev->gfx.num_compute_rings; ++j)
> -				rings[j] = &adev->gfx.compute_ring[j];
> +			for (i = 0; i < adev->gfx.num_compute_rings; ++i)
> +				rings[i] = &adev->gfx.compute_ring[i];
>   			num_rings = adev->gfx.num_compute_rings;
>   			break;
>   		case AMDGPU_HW_IP_DMA:
> -			for (j = 0; j < adev->sdma.num_instances; ++j)
> -				rings[j] = &adev->sdma.instance[j].ring;
> +			for (i = 0; i < adev->sdma.num_instances; ++i)
> +				rings[i] = &adev->sdma.instance[i].ring;
>   			num_rings = adev->sdma.num_instances;
>   			break;
>   		case AMDGPU_HW_IP_UVD:
> @@ -154,80 +188,59 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>   			num_rings = 1;
>   			break;
>   		case AMDGPU_HW_IP_VCN_DEC:
> -			for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
> -				if (adev->vcn.harvest_config & (1 << j))
> +			for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> +				if (adev->vcn.harvest_config & (1 << i))
>   					continue;
> -				rings[num_rings++] = &adev->vcn.inst[j].ring_dec;
> +				rings[num_rings++] = &adev->vcn.inst[i].ring_dec;
>   			}
>   			break;
>   		case AMDGPU_HW_IP_VCN_ENC:
> -			for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
> -				if (adev->vcn.harvest_config & (1 << j))
> +			for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> +				if (adev->vcn.harvest_config & (1 << i))
>   					continue;
> -				for (k = 0; k < adev->vcn.num_enc_rings; ++k)
> -					rings[num_rings++] = &adev->vcn.inst[j].ring_enc[k];
> +				for (j = 0; j < adev->vcn.num_enc_rings; ++j)
> +					rings[num_rings++] = &adev->vcn.inst[i].ring_enc[j];
>   			}
>   			break;
>   		case AMDGPU_HW_IP_VCN_JPEG:
> -			for (j = 0; j < adev->jpeg.num_jpeg_inst; ++j) {
> -				if (adev->vcn.harvest_config & (1 << j))
> +			for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
> +				if (adev->vcn.harvest_config & (1 << i))
>   					continue;
> -				rings[num_rings++] = &adev->jpeg.inst[j].ring_dec;
> +				rings[num_rings++] = &adev->jpeg.inst[i].ring_dec;
>   			}
>   			break;
> -		}
> -
> -		for (j = 0; j < num_rings; ++j) {
> -			if (!rings[j]->adev)
> -				continue;
> +	}
>   
> -			rqs[num_rqs++] = &rings[j]->sched.sched_rq[priority];
> -		}
> +	for (i = 0; i < num_rings; ++i) {
> +		if (!rings[i]->adev)
> +			continue;
>   
> -		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
> -			r = drm_sched_entity_init(&ctx->entities[i][j].entity,
> -						  rqs, num_rqs, &ctx->guilty);
> -		if (r)
> -			goto error_cleanup_entities;
> +		rqs[num_rqs++] = &rings[i]->sched.sched_rq[ctx->init_priority];
>   	}
>   
> +	for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
> +		r = drm_sched_entity_init(&ctx->entities[hw_ip][i].entity,
> +				rqs, num_rqs, &ctx->guilty);
> +	if (r)
> +		goto error_cleanup_entities;
> +
> +	for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
> +		ctx->entities[hw_ip][i].sequence = 1;
> +
>   	return 0;
>   
>   error_cleanup_entities:
> -	for (i = 0; i < num_entities; ++i)
> -		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
> -	kfree(ctx->entities[0]);
> +	for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
> +		drm_sched_entity_destroy(&ctx->entities[hw_ip][i].entity);
>   
> -error_free_fences:
> -	kfree(ctx->fences);
> -	ctx->fences = NULL;
>   	return r;
>   }
>   
> -static void amdgpu_ctx_fini(struct kref *ref)
> -{
> -	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
> -	unsigned num_entities = amdgpu_ctx_total_num_entities();
> -	struct amdgpu_device *adev = ctx->adev;
> -	unsigned i, j;
> -
> -	if (!adev)
> -		return;
> -
> -	for (i = 0; i < num_entities; ++i)
> -		for (j = 0; j < amdgpu_sched_jobs; ++j)
> -			dma_fence_put(ctx->entities[0][i].fences[j]);
> -	kfree(ctx->fences);
> -	kfree(ctx->entities[0]);
> -
> -	mutex_destroy(&ctx->lock);
> -
> -	kfree(ctx);
> -}
> -
>   int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
>   			  u32 ring, struct drm_sched_entity **entity)
>   {
> +	int r;
> +
>   	if (hw_ip >= AMDGPU_HW_IP_NUM) {
>   		DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
>   		return -EINVAL;
> @@ -244,6 +257,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
>   		return -EINVAL;
>   	}
>   
> +	if (ctx->entities[hw_ip][ring].sequence == -1) {
> +		r = amdgpu_ctx_init_entity(ctx, hw_ip);
> +
> +		if (r)
> +			return r;
> +	}
> +
>   	*entity = &ctx->entities[hw_ip][ring].entity;
>   	return 0;
>   }
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] drm/amdgpu: allocate entities on demand
@ 2019-11-26  9:45         ` Christian König
  0 siblings, 0 replies; 18+ messages in thread
From: Christian König @ 2019-11-26  9:45 UTC (permalink / raw)
  To: Nirmoy, Nirmoy Das, alexander.deucher-5C7GfCeVMHo,
	kenny.ho-5C7GfCeVMHo, christian.koenig-5C7GfCeVMHo
  Cc: nirmoy.das-5C7GfCeVMHo, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

It looks like a start, but there numerous things which needs to be fixed.

Question number one is: What's that good for? Entities are not the 
problem here. The real issue is the fence ring and the rq_list.

The rq_list could actually be made constant since it should never be 
changed by the entity. It is only changed for backward compatibility in 
drm_sched_entity_set_priority().

So I would start there and cleanup the drm_sched_entity_set_priority() 
to actually just set a new constant rq list instead.

Then we could embed the fences in amdgpu_ctx_entity as dynamic array at 
the end of the structure.

And last we can start to dynamic allocate and initialize the 
amdgpu_ctx_entity() structures.

Regards,
Christian.

Am 26.11.19 um 00:31 schrieb Nirmoy:
> Ran amdgpu_test(drm) successfully multiple times to test this. But I 
> am pretty sure I am missing some corner case here.
>
>
> Regards,
>
> Nirmoy
>
> On 11/26/19 12:17 AM, Nirmoy Das wrote:
>> Currently we pre-allocate entities for all the HW IPs on
>> context creation and some of which are might never be used.
>>
>> This patch tries to resolve entity wastage by creating entities
>> for a HW IP only when it is required.
>>
>> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 142 ++++++++++++++----------
>>   1 file changed, 81 insertions(+), 61 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> index a0d3d7b756eb..0a390ebe873f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> @@ -74,7 +74,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>>                  struct amdgpu_ctx *ctx)
>>   {
>>       unsigned num_entities = amdgpu_ctx_total_num_entities();
>> -    unsigned i, j, k;
>> +    unsigned i;
>>       int r;
>>         if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
>> @@ -103,7 +103,7 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>> *adev,
>>       for (i = 0; i < num_entities; ++i) {
>>           struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
>>   -        entity->sequence = 1;
>> +        entity->sequence = -1;
>>           entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
>>       }
>>       for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
>> @@ -120,25 +120,59 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>> *adev,
>>       ctx->init_priority = priority;
>>       ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
>>   -    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> -        struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
>> -        struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
>> -        unsigned num_rings = 0;
>> -        unsigned num_rqs = 0;
>> +    return 0;
>> +
>> +error_free_fences:
>> +    kfree(ctx->fences);
>> +    ctx->fences = NULL;
>> +    return r;
>> +}
>> +
>> +static void amdgpu_ctx_fini(struct kref *ref)
>> +{
>> +    struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, 
>> refcount);
>> +    unsigned num_entities = amdgpu_ctx_total_num_entities();
>> +    struct amdgpu_device *adev = ctx->adev;
>> +    unsigned i, j;
>> +
>> +    if (!adev)
>> +        return;
>> +
>> +    for (i = 0; i < num_entities; ++i)
>> +        for (j = 0; j < amdgpu_sched_jobs; ++j)
>> +            dma_fence_put(ctx->entities[0][i].fences[j]);
>> +    kfree(ctx->fences);
>> +    kfree(ctx->entities[0]);
>> +
>> +    mutex_destroy(&ctx->lock);
>> +
>> +    kfree(ctx);
>> +}
>>   -        switch (i) {
>> +
>> +int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip)
> This should be a static function which I forgot to change
>> +{
>> +    struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
>> +    struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
>> +    struct amdgpu_device *adev = ctx->adev;
>> +    unsigned num_rings = 0;
>> +    unsigned num_rqs = 0;
>> +    unsigned i, j;
>> +    int r = 0;
>> +
>> +    switch (hw_ip) {
>>           case AMDGPU_HW_IP_GFX:
>>               rings[0] = &adev->gfx.gfx_ring[0];
>>               num_rings = 1;
>>               break;
>>           case AMDGPU_HW_IP_COMPUTE:
>> -            for (j = 0; j < adev->gfx.num_compute_rings; ++j)
>> -                rings[j] = &adev->gfx.compute_ring[j];
>> +            for (i = 0; i < adev->gfx.num_compute_rings; ++i)
>> +                rings[i] = &adev->gfx.compute_ring[i];
>>               num_rings = adev->gfx.num_compute_rings;
>>               break;
>>           case AMDGPU_HW_IP_DMA:
>> -            for (j = 0; j < adev->sdma.num_instances; ++j)
>> -                rings[j] = &adev->sdma.instance[j].ring;
>> +            for (i = 0; i < adev->sdma.num_instances; ++i)
>> +                rings[i] = &adev->sdma.instance[i].ring;
>>               num_rings = adev->sdma.num_instances;
>>               break;
>>           case AMDGPU_HW_IP_UVD:
>> @@ -154,80 +188,59 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>> *adev,
>>               num_rings = 1;
>>               break;
>>           case AMDGPU_HW_IP_VCN_DEC:
>> -            for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
>> -                if (adev->vcn.harvest_config & (1 << j))
>> +            for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
>> +                if (adev->vcn.harvest_config & (1 << i))
>>                       continue;
>> -                rings[num_rings++] = &adev->vcn.inst[j].ring_dec;
>> +                rings[num_rings++] = &adev->vcn.inst[i].ring_dec;
>>               }
>>               break;
>>           case AMDGPU_HW_IP_VCN_ENC:
>> -            for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
>> -                if (adev->vcn.harvest_config & (1 << j))
>> +            for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
>> +                if (adev->vcn.harvest_config & (1 << i))
>>                       continue;
>> -                for (k = 0; k < adev->vcn.num_enc_rings; ++k)
>> -                    rings[num_rings++] = 
>> &adev->vcn.inst[j].ring_enc[k];
>> +                for (j = 0; j < adev->vcn.num_enc_rings; ++j)
>> +                    rings[num_rings++] = 
>> &adev->vcn.inst[i].ring_enc[j];
>>               }
>>               break;
>>           case AMDGPU_HW_IP_VCN_JPEG:
>> -            for (j = 0; j < adev->jpeg.num_jpeg_inst; ++j) {
>> -                if (adev->vcn.harvest_config & (1 << j))
>> +            for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
>> +                if (adev->vcn.harvest_config & (1 << i))
>>                       continue;
>> -                rings[num_rings++] = &adev->jpeg.inst[j].ring_dec;
>> +                rings[num_rings++] = &adev->jpeg.inst[i].ring_dec;
>>               }
>>               break;
>> -        }
>> -
>> -        for (j = 0; j < num_rings; ++j) {
>> -            if (!rings[j]->adev)
>> -                continue;
>> +    }
>>   -            rqs[num_rqs++] = &rings[j]->sched.sched_rq[priority];
>> -        }
>> +    for (i = 0; i < num_rings; ++i) {
>> +        if (!rings[i]->adev)
>> +            continue;
>>   -        for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
>> -            r = drm_sched_entity_init(&ctx->entities[i][j].entity,
>> -                          rqs, num_rqs, &ctx->guilty);
>> -        if (r)
>> -            goto error_cleanup_entities;
>> +        rqs[num_rqs++] = &rings[i]->sched.sched_rq[ctx->init_priority];
>>       }
>>   +    for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
>> +        r = drm_sched_entity_init(&ctx->entities[hw_ip][i].entity,
>> +                rqs, num_rqs, &ctx->guilty);
>> +    if (r)
>> +        goto error_cleanup_entities;
>> +
>> +    for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
>> +        ctx->entities[hw_ip][i].sequence = 1;
>> +
>>       return 0;
>>     error_cleanup_entities:
>> -    for (i = 0; i < num_entities; ++i)
>> - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
>> -    kfree(ctx->entities[0]);
>> +    for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
>> + drm_sched_entity_destroy(&ctx->entities[hw_ip][i].entity);
>>   -error_free_fences:
>> -    kfree(ctx->fences);
>> -    ctx->fences = NULL;
>>       return r;
>>   }
>>   -static void amdgpu_ctx_fini(struct kref *ref)
>> -{
>> -    struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, 
>> refcount);
>> -    unsigned num_entities = amdgpu_ctx_total_num_entities();
>> -    struct amdgpu_device *adev = ctx->adev;
>> -    unsigned i, j;
>> -
>> -    if (!adev)
>> -        return;
>> -
>> -    for (i = 0; i < num_entities; ++i)
>> -        for (j = 0; j < amdgpu_sched_jobs; ++j)
>> -            dma_fence_put(ctx->entities[0][i].fences[j]);
>> -    kfree(ctx->fences);
>> -    kfree(ctx->entities[0]);
>> -
>> -    mutex_destroy(&ctx->lock);
>> -
>> -    kfree(ctx);
>> -}
>> -
>>   int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 
>> instance,
>>                 u32 ring, struct drm_sched_entity **entity)
>>   {
>> +    int r;
>> +
>>       if (hw_ip >= AMDGPU_HW_IP_NUM) {
>>           DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
>>           return -EINVAL;
>> @@ -244,6 +257,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx 
>> *ctx, u32 hw_ip, u32 instance,
>>           return -EINVAL;
>>       }
>>   +    if (ctx->entities[hw_ip][ring].sequence == -1) {
>> +        r = amdgpu_ctx_init_entity(ctx, hw_ip);
>> +
>> +        if (r)
>> +            return r;
>> +    }
>> +
>>       *entity = &ctx->entities[hw_ip][ring].entity;
>>       return 0;
>>   }
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] drm/amdgpu: allocate entities on demand
@ 2019-11-26  9:45         ` Christian König
  0 siblings, 0 replies; 18+ messages in thread
From: Christian König @ 2019-11-26  9:45 UTC (permalink / raw)
  To: Nirmoy, Nirmoy Das, alexander.deucher, kenny.ho, christian.koenig
  Cc: nirmoy.das, amd-gfx

It looks like a start, but there numerous things which needs to be fixed.

Question number one is: What's that good for? Entities are not the 
problem here. The real issue is the fence ring and the rq_list.

The rq_list could actually be made constant since it should never be 
changed by the entity. It is only changed for backward compatibility in 
drm_sched_entity_set_priority().

So I would start there and cleanup the drm_sched_entity_set_priority() 
to actually just set a new constant rq list instead.

Then we could embed the fences in amdgpu_ctx_entity as dynamic array at 
the end of the structure.

And last we can start to dynamic allocate and initialize the 
amdgpu_ctx_entity() structures.

Regards,
Christian.

Am 26.11.19 um 00:31 schrieb Nirmoy:
> Ran amdgpu_test(drm) successfully multiple times to test this. But I 
> am pretty sure I am missing some corner case here.
>
>
> Regards,
>
> Nirmoy
>
> On 11/26/19 12:17 AM, Nirmoy Das wrote:
>> Currently we pre-allocate entities for all the HW IPs on
>> context creation and some of which are might never be used.
>>
>> This patch tries to resolve entity wastage by creating entities
>> for a HW IP only when it is required.
>>
>> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 142 ++++++++++++++----------
>>   1 file changed, 81 insertions(+), 61 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> index a0d3d7b756eb..0a390ebe873f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> @@ -74,7 +74,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>>                  struct amdgpu_ctx *ctx)
>>   {
>>       unsigned num_entities = amdgpu_ctx_total_num_entities();
>> -    unsigned i, j, k;
>> +    unsigned i;
>>       int r;
>>         if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
>> @@ -103,7 +103,7 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>> *adev,
>>       for (i = 0; i < num_entities; ++i) {
>>           struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
>>   -        entity->sequence = 1;
>> +        entity->sequence = -1;
>>           entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
>>       }
>>       for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
>> @@ -120,25 +120,59 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>> *adev,
>>       ctx->init_priority = priority;
>>       ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
>>   -    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> -        struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
>> -        struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
>> -        unsigned num_rings = 0;
>> -        unsigned num_rqs = 0;
>> +    return 0;
>> +
>> +error_free_fences:
>> +    kfree(ctx->fences);
>> +    ctx->fences = NULL;
>> +    return r;
>> +}
>> +
>> +static void amdgpu_ctx_fini(struct kref *ref)
>> +{
>> +    struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, 
>> refcount);
>> +    unsigned num_entities = amdgpu_ctx_total_num_entities();
>> +    struct amdgpu_device *adev = ctx->adev;
>> +    unsigned i, j;
>> +
>> +    if (!adev)
>> +        return;
>> +
>> +    for (i = 0; i < num_entities; ++i)
>> +        for (j = 0; j < amdgpu_sched_jobs; ++j)
>> +            dma_fence_put(ctx->entities[0][i].fences[j]);
>> +    kfree(ctx->fences);
>> +    kfree(ctx->entities[0]);
>> +
>> +    mutex_destroy(&ctx->lock);
>> +
>> +    kfree(ctx);
>> +}
>>   -        switch (i) {
>> +
>> +int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip)
> This should be a static function which I forgot to change
>> +{
>> +    struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
>> +    struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
>> +    struct amdgpu_device *adev = ctx->adev;
>> +    unsigned num_rings = 0;
>> +    unsigned num_rqs = 0;
>> +    unsigned i, j;
>> +    int r = 0;
>> +
>> +    switch (hw_ip) {
>>           case AMDGPU_HW_IP_GFX:
>>               rings[0] = &adev->gfx.gfx_ring[0];
>>               num_rings = 1;
>>               break;
>>           case AMDGPU_HW_IP_COMPUTE:
>> -            for (j = 0; j < adev->gfx.num_compute_rings; ++j)
>> -                rings[j] = &adev->gfx.compute_ring[j];
>> +            for (i = 0; i < adev->gfx.num_compute_rings; ++i)
>> +                rings[i] = &adev->gfx.compute_ring[i];
>>               num_rings = adev->gfx.num_compute_rings;
>>               break;
>>           case AMDGPU_HW_IP_DMA:
>> -            for (j = 0; j < adev->sdma.num_instances; ++j)
>> -                rings[j] = &adev->sdma.instance[j].ring;
>> +            for (i = 0; i < adev->sdma.num_instances; ++i)
>> +                rings[i] = &adev->sdma.instance[i].ring;
>>               num_rings = adev->sdma.num_instances;
>>               break;
>>           case AMDGPU_HW_IP_UVD:
>> @@ -154,80 +188,59 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>> *adev,
>>               num_rings = 1;
>>               break;
>>           case AMDGPU_HW_IP_VCN_DEC:
>> -            for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
>> -                if (adev->vcn.harvest_config & (1 << j))
>> +            for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
>> +                if (adev->vcn.harvest_config & (1 << i))
>>                       continue;
>> -                rings[num_rings++] = &adev->vcn.inst[j].ring_dec;
>> +                rings[num_rings++] = &adev->vcn.inst[i].ring_dec;
>>               }
>>               break;
>>           case AMDGPU_HW_IP_VCN_ENC:
>> -            for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
>> -                if (adev->vcn.harvest_config & (1 << j))
>> +            for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
>> +                if (adev->vcn.harvest_config & (1 << i))
>>                       continue;
>> -                for (k = 0; k < adev->vcn.num_enc_rings; ++k)
>> -                    rings[num_rings++] = 
>> &adev->vcn.inst[j].ring_enc[k];
>> +                for (j = 0; j < adev->vcn.num_enc_rings; ++j)
>> +                    rings[num_rings++] = 
>> &adev->vcn.inst[i].ring_enc[j];
>>               }
>>               break;
>>           case AMDGPU_HW_IP_VCN_JPEG:
>> -            for (j = 0; j < adev->jpeg.num_jpeg_inst; ++j) {
>> -                if (adev->vcn.harvest_config & (1 << j))
>> +            for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
>> +                if (adev->vcn.harvest_config & (1 << i))
>>                       continue;
>> -                rings[num_rings++] = &adev->jpeg.inst[j].ring_dec;
>> +                rings[num_rings++] = &adev->jpeg.inst[i].ring_dec;
>>               }
>>               break;
>> -        }
>> -
>> -        for (j = 0; j < num_rings; ++j) {
>> -            if (!rings[j]->adev)
>> -                continue;
>> +    }
>>   -            rqs[num_rqs++] = &rings[j]->sched.sched_rq[priority];
>> -        }
>> +    for (i = 0; i < num_rings; ++i) {
>> +        if (!rings[i]->adev)
>> +            continue;
>>   -        for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
>> -            r = drm_sched_entity_init(&ctx->entities[i][j].entity,
>> -                          rqs, num_rqs, &ctx->guilty);
>> -        if (r)
>> -            goto error_cleanup_entities;
>> +        rqs[num_rqs++] = &rings[i]->sched.sched_rq[ctx->init_priority];
>>       }
>>   +    for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
>> +        r = drm_sched_entity_init(&ctx->entities[hw_ip][i].entity,
>> +                rqs, num_rqs, &ctx->guilty);
>> +    if (r)
>> +        goto error_cleanup_entities;
>> +
>> +    for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
>> +        ctx->entities[hw_ip][i].sequence = 1;
>> +
>>       return 0;
>>     error_cleanup_entities:
>> -    for (i = 0; i < num_entities; ++i)
>> - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
>> -    kfree(ctx->entities[0]);
>> +    for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
>> + drm_sched_entity_destroy(&ctx->entities[hw_ip][i].entity);
>>   -error_free_fences:
>> -    kfree(ctx->fences);
>> -    ctx->fences = NULL;
>>       return r;
>>   }
>>   -static void amdgpu_ctx_fini(struct kref *ref)
>> -{
>> -    struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, 
>> refcount);
>> -    unsigned num_entities = amdgpu_ctx_total_num_entities();
>> -    struct amdgpu_device *adev = ctx->adev;
>> -    unsigned i, j;
>> -
>> -    if (!adev)
>> -        return;
>> -
>> -    for (i = 0; i < num_entities; ++i)
>> -        for (j = 0; j < amdgpu_sched_jobs; ++j)
>> -            dma_fence_put(ctx->entities[0][i].fences[j]);
>> -    kfree(ctx->fences);
>> -    kfree(ctx->entities[0]);
>> -
>> -    mutex_destroy(&ctx->lock);
>> -
>> -    kfree(ctx);
>> -}
>> -
>>   int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 
>> instance,
>>                 u32 ring, struct drm_sched_entity **entity)
>>   {
>> +    int r;
>> +
>>       if (hw_ip >= AMDGPU_HW_IP_NUM) {
>>           DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
>>           return -EINVAL;
>> @@ -244,6 +257,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx 
>> *ctx, u32 hw_ip, u32 instance,
>>           return -EINVAL;
>>       }
>>   +    if (ctx->entities[hw_ip][ring].sequence == -1) {
>> +        r = amdgpu_ctx_init_entity(ctx, hw_ip);
>> +
>> +        if (r)
>> +            return r;
>> +    }
>> +
>>       *entity = &ctx->entities[hw_ip][ring].entity;
>>       return 0;
>>   }
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] drm/amdgpu: allocate entities on demand
@ 2019-11-26 14:41             ` Nirmoy
  0 siblings, 0 replies; 18+ messages in thread
From: Nirmoy @ 2019-11-26 14:41 UTC (permalink / raw)
  To: christian.koenig-5C7GfCeVMHo, Nirmoy Das,
	alexander.deucher-5C7GfCeVMHo, kenny.ho-5C7GfCeVMHo
  Cc: nirmoy.das-5C7GfCeVMHo, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Thanks Christian for reviewing it.  I will try to cleanup 
drm_sched_entity_set_priority and come up with another patch.

On 11/26/19 10:45 AM, Christian König wrote:
> It looks like a start, but there numerous things which needs to be fixed.
>
> Question number one is: What's that good for? Entities are not the 
> problem here. The real issue is the fence ring and the rq_list.
>
> The rq_list could actually be made constant since it should never be 
> changed by the entity. It is only changed for backward compatibility 
> in drm_sched_entity_set_priority().
>
> So I would start there and cleanup the drm_sched_entity_set_priority() 
> to actually just set a new constant rq list instead.
>
> Then we could embed the fences in amdgpu_ctx_entity as dynamic array 
> at the end of the structure.
>
> And last we can start to dynamic allocate and initialize the 
> amdgpu_ctx_entity() structures.
>
> Regards,
> Christian.
>
> Am 26.11.19 um 00:31 schrieb Nirmoy:
>> Ran amdgpu_test(drm) successfully multiple times to test this. But I 
>> am pretty sure I am missing some corner case here.
>>
>>
>> Regards,
>>
>> Nirmoy
>>
>> On 11/26/19 12:17 AM, Nirmoy Das wrote:
>>> Currently we pre-allocate entities for all the HW IPs on
>>> context creation and some of which are might never be used.
>>>
>>> This patch tries to resolve entity wastage by creating entities
>>> for a HW IP only when it is required.
>>>
>>> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 142 
>>> ++++++++++++++----------
>>>   1 file changed, 81 insertions(+), 61 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> index a0d3d7b756eb..0a390ebe873f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> @@ -74,7 +74,7 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>>> *adev,
>>>                  struct amdgpu_ctx *ctx)
>>>   {
>>>       unsigned num_entities = amdgpu_ctx_total_num_entities();
>>> -    unsigned i, j, k;
>>> +    unsigned i;
>>>       int r;
>>>         if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
>>> @@ -103,7 +103,7 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>>> *adev,
>>>       for (i = 0; i < num_entities; ++i) {
>>>           struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
>>>   -        entity->sequence = 1;
>>> +        entity->sequence = -1;
>>>           entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
>>>       }
>>>       for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
>>> @@ -120,25 +120,59 @@ static int amdgpu_ctx_init(struct 
>>> amdgpu_device *adev,
>>>       ctx->init_priority = priority;
>>>       ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
>>>   -    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>>> -        struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
>>> -        struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
>>> -        unsigned num_rings = 0;
>>> -        unsigned num_rqs = 0;
>>> +    return 0;
>>> +
>>> +error_free_fences:
>>> +    kfree(ctx->fences);
>>> +    ctx->fences = NULL;
>>> +    return r;
>>> +}
>>> +
>>> +static void amdgpu_ctx_fini(struct kref *ref)
>>> +{
>>> +    struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, 
>>> refcount);
>>> +    unsigned num_entities = amdgpu_ctx_total_num_entities();
>>> +    struct amdgpu_device *adev = ctx->adev;
>>> +    unsigned i, j;
>>> +
>>> +    if (!adev)
>>> +        return;
>>> +
>>> +    for (i = 0; i < num_entities; ++i)
>>> +        for (j = 0; j < amdgpu_sched_jobs; ++j)
>>> +            dma_fence_put(ctx->entities[0][i].fences[j]);
>>> +    kfree(ctx->fences);
>>> +    kfree(ctx->entities[0]);
>>> +
>>> +    mutex_destroy(&ctx->lock);
>>> +
>>> +    kfree(ctx);
>>> +}
>>>   -        switch (i) {
>>> +
>>> +int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip)
>> This should be a static function which I forgot to change
>>> +{
>>> +    struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
>>> +    struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
>>> +    struct amdgpu_device *adev = ctx->adev;
>>> +    unsigned num_rings = 0;
>>> +    unsigned num_rqs = 0;
>>> +    unsigned i, j;
>>> +    int r = 0;
>>> +
>>> +    switch (hw_ip) {
>>>           case AMDGPU_HW_IP_GFX:
>>>               rings[0] = &adev->gfx.gfx_ring[0];
>>>               num_rings = 1;
>>>               break;
>>>           case AMDGPU_HW_IP_COMPUTE:
>>> -            for (j = 0; j < adev->gfx.num_compute_rings; ++j)
>>> -                rings[j] = &adev->gfx.compute_ring[j];
>>> +            for (i = 0; i < adev->gfx.num_compute_rings; ++i)
>>> +                rings[i] = &adev->gfx.compute_ring[i];
>>>               num_rings = adev->gfx.num_compute_rings;
>>>               break;
>>>           case AMDGPU_HW_IP_DMA:
>>> -            for (j = 0; j < adev->sdma.num_instances; ++j)
>>> -                rings[j] = &adev->sdma.instance[j].ring;
>>> +            for (i = 0; i < adev->sdma.num_instances; ++i)
>>> +                rings[i] = &adev->sdma.instance[i].ring;
>>>               num_rings = adev->sdma.num_instances;
>>>               break;
>>>           case AMDGPU_HW_IP_UVD:
>>> @@ -154,80 +188,59 @@ static int amdgpu_ctx_init(struct 
>>> amdgpu_device *adev,
>>>               num_rings = 1;
>>>               break;
>>>           case AMDGPU_HW_IP_VCN_DEC:
>>> -            for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
>>> -                if (adev->vcn.harvest_config & (1 << j))
>>> +            for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
>>> +                if (adev->vcn.harvest_config & (1 << i))
>>>                       continue;
>>> -                rings[num_rings++] = &adev->vcn.inst[j].ring_dec;
>>> +                rings[num_rings++] = &adev->vcn.inst[i].ring_dec;
>>>               }
>>>               break;
>>>           case AMDGPU_HW_IP_VCN_ENC:
>>> -            for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
>>> -                if (adev->vcn.harvest_config & (1 << j))
>>> +            for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
>>> +                if (adev->vcn.harvest_config & (1 << i))
>>>                       continue;
>>> -                for (k = 0; k < adev->vcn.num_enc_rings; ++k)
>>> -                    rings[num_rings++] = 
>>> &adev->vcn.inst[j].ring_enc[k];
>>> +                for (j = 0; j < adev->vcn.num_enc_rings; ++j)
>>> +                    rings[num_rings++] = 
>>> &adev->vcn.inst[i].ring_enc[j];
>>>               }
>>>               break;
>>>           case AMDGPU_HW_IP_VCN_JPEG:
>>> -            for (j = 0; j < adev->jpeg.num_jpeg_inst; ++j) {
>>> -                if (adev->vcn.harvest_config & (1 << j))
>>> +            for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
>>> +                if (adev->vcn.harvest_config & (1 << i))
>>>                       continue;
>>> -                rings[num_rings++] = &adev->jpeg.inst[j].ring_dec;
>>> +                rings[num_rings++] = &adev->jpeg.inst[i].ring_dec;
>>>               }
>>>               break;
>>> -        }
>>> -
>>> -        for (j = 0; j < num_rings; ++j) {
>>> -            if (!rings[j]->adev)
>>> -                continue;
>>> +    }
>>>   -            rqs[num_rqs++] = &rings[j]->sched.sched_rq[priority];
>>> -        }
>>> +    for (i = 0; i < num_rings; ++i) {
>>> +        if (!rings[i]->adev)
>>> +            continue;
>>>   -        for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
>>> -            r = drm_sched_entity_init(&ctx->entities[i][j].entity,
>>> -                          rqs, num_rqs, &ctx->guilty);
>>> -        if (r)
>>> -            goto error_cleanup_entities;
>>> +        rqs[num_rqs++] = 
>>> &rings[i]->sched.sched_rq[ctx->init_priority];
>>>       }
>>>   +    for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
>>> +        r = drm_sched_entity_init(&ctx->entities[hw_ip][i].entity,
>>> +                rqs, num_rqs, &ctx->guilty);
>>> +    if (r)
>>> +        goto error_cleanup_entities;
>>> +
>>> +    for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
>>> +        ctx->entities[hw_ip][i].sequence = 1;
>>> +
>>>       return 0;
>>>     error_cleanup_entities:
>>> -    for (i = 0; i < num_entities; ++i)
>>> - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
>>> -    kfree(ctx->entities[0]);
>>> +    for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
>>> + drm_sched_entity_destroy(&ctx->entities[hw_ip][i].entity);
>>>   -error_free_fences:
>>> -    kfree(ctx->fences);
>>> -    ctx->fences = NULL;
>>>       return r;
>>>   }
>>>   -static void amdgpu_ctx_fini(struct kref *ref)
>>> -{
>>> -    struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, 
>>> refcount);
>>> -    unsigned num_entities = amdgpu_ctx_total_num_entities();
>>> -    struct amdgpu_device *adev = ctx->adev;
>>> -    unsigned i, j;
>>> -
>>> -    if (!adev)
>>> -        return;
>>> -
>>> -    for (i = 0; i < num_entities; ++i)
>>> -        for (j = 0; j < amdgpu_sched_jobs; ++j)
>>> -            dma_fence_put(ctx->entities[0][i].fences[j]);
>>> -    kfree(ctx->fences);
>>> -    kfree(ctx->entities[0]);
>>> -
>>> -    mutex_destroy(&ctx->lock);
>>> -
>>> -    kfree(ctx);
>>> -}
>>> -
>>>   int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 
>>> instance,
>>>                 u32 ring, struct drm_sched_entity **entity)
>>>   {
>>> +    int r;
>>> +
>>>       if (hw_ip >= AMDGPU_HW_IP_NUM) {
>>>           DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
>>>           return -EINVAL;
>>> @@ -244,6 +257,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx 
>>> *ctx, u32 hw_ip, u32 instance,
>>>           return -EINVAL;
>>>       }
>>>   +    if (ctx->entities[hw_ip][ring].sequence == -1) {
>>> +        r = amdgpu_ctx_init_entity(ctx, hw_ip);
>>> +
>>> +        if (r)
>>> +            return r;
>>> +    }
>>> +
>>>       *entity = &ctx->entities[hw_ip][ring].entity;
>>>       return 0;
>>>   }
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7CNirmoy.Das%40amd.com%7C6a5d5bd84fa44716094008d772556bbf%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637103583527091961&amp;sdata=iIvNTsPHlYa0xoayCjVJMZ4cgBhhvIa6hVKfmUYYhbY%3D&amp;reserved=0 
>>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] drm/amdgpu: allocate entities on demand
@ 2019-11-26 14:41             ` Nirmoy
  0 siblings, 0 replies; 18+ messages in thread
From: Nirmoy @ 2019-11-26 14:41 UTC (permalink / raw)
  To: christian.koenig, Nirmoy Das, alexander.deucher, kenny.ho
  Cc: nirmoy.das, amd-gfx

Thanks Christian for reviewing it.  I will try to cleanup 
drm_sched_entity_set_priority and come up with another patch.

On 11/26/19 10:45 AM, Christian König wrote:
> It looks like a start, but there numerous things which needs to be fixed.
>
> Question number one is: What's that good for? Entities are not the 
> problem here. The real issue is the fence ring and the rq_list.
>
> The rq_list could actually be made constant since it should never be 
> changed by the entity. It is only changed for backward compatibility 
> in drm_sched_entity_set_priority().
>
> So I would start there and cleanup the drm_sched_entity_set_priority() 
> to actually just set a new constant rq list instead.
>
> Then we could embed the fences in amdgpu_ctx_entity as dynamic array 
> at the end of the structure.
>
> And last we can start to dynamic allocate and initialize the 
> amdgpu_ctx_entity() structures.
>
> Regards,
> Christian.
>
> Am 26.11.19 um 00:31 schrieb Nirmoy:
>> Ran amdgpu_test(drm) successfully multiple times to test this. But I 
>> am pretty sure I am missing some corner case here.
>>
>>
>> Regards,
>>
>> Nirmoy
>>
>> On 11/26/19 12:17 AM, Nirmoy Das wrote:
>>> Currently we pre-allocate entities for all the HW IPs on
>>> context creation and some of which are might never be used.
>>>
>>> This patch tries to resolve entity wastage by creating entities
>>> for a HW IP only when it is required.
>>>
>>> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 142 
>>> ++++++++++++++----------
>>>   1 file changed, 81 insertions(+), 61 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> index a0d3d7b756eb..0a390ebe873f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> @@ -74,7 +74,7 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>>> *adev,
>>>                  struct amdgpu_ctx *ctx)
>>>   {
>>>       unsigned num_entities = amdgpu_ctx_total_num_entities();
>>> -    unsigned i, j, k;
>>> +    unsigned i;
>>>       int r;
>>>         if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
>>> @@ -103,7 +103,7 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>>> *adev,
>>>       for (i = 0; i < num_entities; ++i) {
>>>           struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
>>>   -        entity->sequence = 1;
>>> +        entity->sequence = -1;
>>>           entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
>>>       }
>>>       for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
>>> @@ -120,25 +120,59 @@ static int amdgpu_ctx_init(struct 
>>> amdgpu_device *adev,
>>>       ctx->init_priority = priority;
>>>       ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
>>>   -    for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>>> -        struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
>>> -        struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
>>> -        unsigned num_rings = 0;
>>> -        unsigned num_rqs = 0;
>>> +    return 0;
>>> +
>>> +error_free_fences:
>>> +    kfree(ctx->fences);
>>> +    ctx->fences = NULL;
>>> +    return r;
>>> +}
>>> +
>>> +static void amdgpu_ctx_fini(struct kref *ref)
>>> +{
>>> +    struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, 
>>> refcount);
>>> +    unsigned num_entities = amdgpu_ctx_total_num_entities();
>>> +    struct amdgpu_device *adev = ctx->adev;
>>> +    unsigned i, j;
>>> +
>>> +    if (!adev)
>>> +        return;
>>> +
>>> +    for (i = 0; i < num_entities; ++i)
>>> +        for (j = 0; j < amdgpu_sched_jobs; ++j)
>>> +            dma_fence_put(ctx->entities[0][i].fences[j]);
>>> +    kfree(ctx->fences);
>>> +    kfree(ctx->entities[0]);
>>> +
>>> +    mutex_destroy(&ctx->lock);
>>> +
>>> +    kfree(ctx);
>>> +}
>>>   -        switch (i) {
>>> +
>>> +int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip)
>> This should be a static function which I forgot to change
>>> +{
>>> +    struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
>>> +    struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
>>> +    struct amdgpu_device *adev = ctx->adev;
>>> +    unsigned num_rings = 0;
>>> +    unsigned num_rqs = 0;
>>> +    unsigned i, j;
>>> +    int r = 0;
>>> +
>>> +    switch (hw_ip) {
>>>           case AMDGPU_HW_IP_GFX:
>>>               rings[0] = &adev->gfx.gfx_ring[0];
>>>               num_rings = 1;
>>>               break;
>>>           case AMDGPU_HW_IP_COMPUTE:
>>> -            for (j = 0; j < adev->gfx.num_compute_rings; ++j)
>>> -                rings[j] = &adev->gfx.compute_ring[j];
>>> +            for (i = 0; i < adev->gfx.num_compute_rings; ++i)
>>> +                rings[i] = &adev->gfx.compute_ring[i];
>>>               num_rings = adev->gfx.num_compute_rings;
>>>               break;
>>>           case AMDGPU_HW_IP_DMA:
>>> -            for (j = 0; j < adev->sdma.num_instances; ++j)
>>> -                rings[j] = &adev->sdma.instance[j].ring;
>>> +            for (i = 0; i < adev->sdma.num_instances; ++i)
>>> +                rings[i] = &adev->sdma.instance[i].ring;
>>>               num_rings = adev->sdma.num_instances;
>>>               break;
>>>           case AMDGPU_HW_IP_UVD:
>>> @@ -154,80 +188,59 @@ static int amdgpu_ctx_init(struct 
>>> amdgpu_device *adev,
>>>               num_rings = 1;
>>>               break;
>>>           case AMDGPU_HW_IP_VCN_DEC:
>>> -            for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
>>> -                if (adev->vcn.harvest_config & (1 << j))
>>> +            for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
>>> +                if (adev->vcn.harvest_config & (1 << i))
>>>                       continue;
>>> -                rings[num_rings++] = &adev->vcn.inst[j].ring_dec;
>>> +                rings[num_rings++] = &adev->vcn.inst[i].ring_dec;
>>>               }
>>>               break;
>>>           case AMDGPU_HW_IP_VCN_ENC:
>>> -            for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
>>> -                if (adev->vcn.harvest_config & (1 << j))
>>> +            for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
>>> +                if (adev->vcn.harvest_config & (1 << i))
>>>                       continue;
>>> -                for (k = 0; k < adev->vcn.num_enc_rings; ++k)
>>> -                    rings[num_rings++] = 
>>> &adev->vcn.inst[j].ring_enc[k];
>>> +                for (j = 0; j < adev->vcn.num_enc_rings; ++j)
>>> +                    rings[num_rings++] = 
>>> &adev->vcn.inst[i].ring_enc[j];
>>>               }
>>>               break;
>>>           case AMDGPU_HW_IP_VCN_JPEG:
>>> -            for (j = 0; j < adev->jpeg.num_jpeg_inst; ++j) {
>>> -                if (adev->vcn.harvest_config & (1 << j))
>>> +            for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
>>> +                if (adev->vcn.harvest_config & (1 << i))
>>>                       continue;
>>> -                rings[num_rings++] = &adev->jpeg.inst[j].ring_dec;
>>> +                rings[num_rings++] = &adev->jpeg.inst[i].ring_dec;
>>>               }
>>>               break;
>>> -        }
>>> -
>>> -        for (j = 0; j < num_rings; ++j) {
>>> -            if (!rings[j]->adev)
>>> -                continue;
>>> +    }
>>>   -            rqs[num_rqs++] = &rings[j]->sched.sched_rq[priority];
>>> -        }
>>> +    for (i = 0; i < num_rings; ++i) {
>>> +        if (!rings[i]->adev)
>>> +            continue;
>>>   -        for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
>>> -            r = drm_sched_entity_init(&ctx->entities[i][j].entity,
>>> -                          rqs, num_rqs, &ctx->guilty);
>>> -        if (r)
>>> -            goto error_cleanup_entities;
>>> +        rqs[num_rqs++] = 
>>> &rings[i]->sched.sched_rq[ctx->init_priority];
>>>       }
>>>   +    for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
>>> +        r = drm_sched_entity_init(&ctx->entities[hw_ip][i].entity,
>>> +                rqs, num_rqs, &ctx->guilty);
>>> +    if (r)
>>> +        goto error_cleanup_entities;
>>> +
>>> +    for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
>>> +        ctx->entities[hw_ip][i].sequence = 1;
>>> +
>>>       return 0;
>>>     error_cleanup_entities:
>>> -    for (i = 0; i < num_entities; ++i)
>>> - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
>>> -    kfree(ctx->entities[0]);
>>> +    for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
>>> + drm_sched_entity_destroy(&ctx->entities[hw_ip][i].entity);
>>>   -error_free_fences:
>>> -    kfree(ctx->fences);
>>> -    ctx->fences = NULL;
>>>       return r;
>>>   }
>>>   -static void amdgpu_ctx_fini(struct kref *ref)
>>> -{
>>> -    struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, 
>>> refcount);
>>> -    unsigned num_entities = amdgpu_ctx_total_num_entities();
>>> -    struct amdgpu_device *adev = ctx->adev;
>>> -    unsigned i, j;
>>> -
>>> -    if (!adev)
>>> -        return;
>>> -
>>> -    for (i = 0; i < num_entities; ++i)
>>> -        for (j = 0; j < amdgpu_sched_jobs; ++j)
>>> -            dma_fence_put(ctx->entities[0][i].fences[j]);
>>> -    kfree(ctx->fences);
>>> -    kfree(ctx->entities[0]);
>>> -
>>> -    mutex_destroy(&ctx->lock);
>>> -
>>> -    kfree(ctx);
>>> -}
>>> -
>>>   int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 
>>> instance,
>>>                 u32 ring, struct drm_sched_entity **entity)
>>>   {
>>> +    int r;
>>> +
>>>       if (hw_ip >= AMDGPU_HW_IP_NUM) {
>>>           DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
>>>           return -EINVAL;
>>> @@ -244,6 +257,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx 
>>> *ctx, u32 hw_ip, u32 instance,
>>>           return -EINVAL;
>>>       }
>>>   +    if (ctx->entities[hw_ip][ring].sequence == -1) {
>>> +        r = amdgpu_ctx_init_entity(ctx, hw_ip);
>>> +
>>> +        if (r)
>>> +            return r;
>>> +    }
>>> +
>>>       *entity = &ctx->entities[hw_ip][ring].entity;
>>>       return 0;
>>>   }
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7CNirmoy.Das%40amd.com%7C6a5d5bd84fa44716094008d772556bbf%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637103583527091961&amp;sdata=iIvNTsPHlYa0xoayCjVJMZ4cgBhhvIa6hVKfmUYYhbY%3D&amp;reserved=0 
>>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] drm/amdgpu: allocate entities on demand
  2019-11-26  9:45         ` Christian König
  (?)
  (?)
@ 2019-11-29 14:29         ` Nirmoy
  2019-11-29 18:42           ` Christian König
  -1 siblings, 1 reply; 18+ messages in thread
From: Nirmoy @ 2019-11-29 14:29 UTC (permalink / raw)
  To: christian.koenig, alexander.deucher, kenny.ho; +Cc: nirmoy.das, amd-gfx

Hi Christian,

On 11/26/19 10:45 AM, Christian König wrote:
> It looks like a start, but there numerous things which needs to be fixed.
>
> Question number one is: What's that good for? Entities are not the 
> problem here. The real issue is the fence ring and the rq_list.
>
> The rq_list could actually be made constant since it should never be 
> changed by the entity. It is only changed for backward compatibility 
> in drm_sched_entity_set_priority().
>
> So I would start there and cleanup the drm_sched_entity_set_priority() 
> to actually just set a new constant rq list instead.

I am missing some context here. Can you please explain bit more? I 
looked over and over again but I still don't understand what do you mean 
by  new constant rq list :/

>
> Then we could embed the fences in amdgpu_ctx_entity as dynamic array 
> at the end of the structure.
>
> And last we can start to dynamic allocate and initialize the 
> amdgpu_ctx_entity() structures.
>
> Regards,
> Christian.
>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] drm/amdgpu: allocate entities on demand
  2019-11-29 14:29         ` Nirmoy
@ 2019-11-29 18:42           ` Christian König
  2019-12-02 14:43             ` Nirmoy
  0 siblings, 1 reply; 18+ messages in thread
From: Christian König @ 2019-11-29 18:42 UTC (permalink / raw)
  To: Nirmoy, alexander.deucher, kenny.ho; +Cc: nirmoy.das, amd-gfx

Am 29.11.19 um 15:29 schrieb Nirmoy:
> Hi Christian,
>
> On 11/26/19 10:45 AM, Christian König wrote:
>> It looks like a start, but there numerous things which needs to be 
>> fixed.
>>
>> Question number one is: What's that good for? Entities are not the 
>> problem here. The real issue is the fence ring and the rq_list.
>>
>> The rq_list could actually be made constant since it should never be 
>> changed by the entity. It is only changed for backward compatibility 
>> in drm_sched_entity_set_priority().
>>
>> So I would start there and cleanup the 
>> drm_sched_entity_set_priority() to actually just set a new constant 
>> rq list instead.
>
> I am missing some context here. Can you please explain bit more? I 
> looked over and over again but I still don't understand what do you 
> mean by  new constant rq list :/

Ok that needs a bit wider explanation.

The GPU scheduler consists mainly of drm_gpu_scheduler instances. Each 
of those instances contain multiple runqueues with different priorities 
(5 IIRC).

Now for each entity we give a list of runqueues where this entity can be 
served on, e.g. where the jobs which are pushed to the entities are 
executed.

The entity itself keeps a copy of that runqueue list because we have the 
drm_sched_entity_set_priority() which modifies this runqueue list.

But essentially that is complete overkill, the runqueue lists are 
constant for each amdgpu device, e.g. all contexts should use SDMA0 and 
SDMA1 in the same way.

In other words building the list on runqueues should happen only once 
and not for each contexts.

Multiple approach to fix this would be possible. One rather elegant 
solution would be to change the rq list into a scheduler instances list 
+ priority.

This way we would also fix the age old bug that changing the priority of 
a context could actually mess up already scheduled jobs.

The alternative I noted before would be to drop 
drm_sched_entity_set_priority() or change it into 
drm_sched_entity_set_runqueues().

Regards,
Christian.

>
>>
>> Then we could embed the fences in amdgpu_ctx_entity as dynamic array 
>> at the end of the structure.
>>
>> And last we can start to dynamic allocate and initialize the 
>> amdgpu_ctx_entity() structures.
>>
>> Regards,
>> Christian.
>>
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] drm/amdgpu: allocate entities on demand
  2019-11-29 18:42           ` Christian König
@ 2019-12-02 14:43             ` Nirmoy
  2019-12-02 14:59               ` Christian König
  0 siblings, 1 reply; 18+ messages in thread
From: Nirmoy @ 2019-12-02 14:43 UTC (permalink / raw)
  To: Christian König, alexander.deucher, kenny.ho; +Cc: nirmoy.das, amd-gfx


On 11/29/19 7:42 PM, Christian König wrote:
> Am 29.11.19 um 15:29 schrieb Nirmoy:
>> Hi Christian,
>>
>> On 11/26/19 10:45 AM, Christian König wrote:
>>> It looks like a start, but there numerous things which needs to be 
>>> fixed.
>>>
>>> Question number one is: What's that good for? Entities are not the 
>>> problem here. The real issue is the fence ring and the rq_list.
>>>
>>> The rq_list could actually be made constant since it should never be 
>>> changed by the entity. It is only changed for backward compatibility 
>>> in drm_sched_entity_set_priority().
>>>
>>> So I would start there and cleanup the 
>>> drm_sched_entity_set_priority() to actually just set a new constant 
>>> rq list instead.
>>
>> I am missing some context here. Can you please explain bit more? I 
>> looked over and over again but I still don't understand what do you 
>> mean by  new constant rq list :/
>
> Ok that needs a bit wider explanation.
>
> The GPU scheduler consists mainly of drm_gpu_scheduler instances. Each 
> of those instances contain multiple runqueues with different 
> priorities (5 IIRC).
>
> Now for each entity we give a list of runqueues where this entity can 
> be served on, e.g. where the jobs which are pushed to the entities are 
> executed.
>
> The entity itself keeps a copy of that runqueue list because we have 
> the drm_sched_entity_set_priority() which modifies this runqueue list.
>
> But essentially that is complete overkill, the runqueue lists are 
> constant for each amdgpu device, e.g. all contexts should use SDMA0 
> and SDMA1 in the same way.
>
> In other words building the list on runqueues should happen only once 
> and not for each contexts.
Okay I understand now the real problem. Thanks for detail explanation.
>
> Multiple approach to fix this would be possible. One rather elegant 
> solution would be to change the rq list into a scheduler instances 
> list + priority.

Do you mean something like

diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 684692a8ed76..ac67f8f098fa 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -81,7 +81,7 @@ enum drm_sched_priority {
  struct drm_sched_entity {
         struct list_head                list;
         struct drm_sched_rq             *rq;
-       struct drm_sched_rq             **rq_list;
+      struct drm_gpu_scheduler        **sched;
         unsigned int                    num_rq_list;
         spinlock_t                      rq_lock;

>
>
> This way we would also fix the age old bug that changing the priority 
> of a context could actually mess up already scheduled jobs.
>
> The alternative I noted before would be to drop 
> drm_sched_entity_set_priority() or change it into 
> drm_sched_entity_set_runqueues().
I was working on it but then I got stuck by a  "BUG: kernel NULL pointer 
dereference, address:" which I am trying to figure out why
>
> Regards,
> Christian.
>
>>
>>>
>>> Then we could embed the fences in amdgpu_ctx_entity as dynamic array 
>>> at the end of the structure.
>>>
>>> And last we can start to dynamic allocate and initialize the 
>>> amdgpu_ctx_entity() structures.
>>>
>>> Regards,
>>> Christian.
>>>
>>>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] drm/amdgpu: allocate entities on demand
  2019-12-02 14:43             ` Nirmoy
@ 2019-12-02 14:59               ` Christian König
  2019-12-03 15:02                 ` Nirmoy
  0 siblings, 1 reply; 18+ messages in thread
From: Christian König @ 2019-12-02 14:59 UTC (permalink / raw)
  To: Nirmoy, Christian König, alexander.deucher, kenny.ho
  Cc: nirmoy.das, amd-gfx

Am 02.12.19 um 15:43 schrieb Nirmoy:
>
> On 11/29/19 7:42 PM, Christian König wrote:
>> Am 29.11.19 um 15:29 schrieb Nirmoy:
>>> Hi Christian,
>>>
>>> On 11/26/19 10:45 AM, Christian König wrote:
>>>> It looks like a start, but there numerous things which needs to be 
>>>> fixed.
>>>>
>>>> Question number one is: What's that good for? Entities are not the 
>>>> problem here. The real issue is the fence ring and the rq_list.
>>>>
>>>> The rq_list could actually be made constant since it should never 
>>>> be changed by the entity. It is only changed for backward 
>>>> compatibility in drm_sched_entity_set_priority().
>>>>
>>>> So I would start there and cleanup the 
>>>> drm_sched_entity_set_priority() to actually just set a new constant 
>>>> rq list instead.
>>>
>>> I am missing some context here. Can you please explain bit more? I 
>>> looked over and over again but I still don't understand what do you 
>>> mean by  new constant rq list :/
>>
>> Ok that needs a bit wider explanation.
>>
>> The GPU scheduler consists mainly of drm_gpu_scheduler instances. 
>> Each of those instances contain multiple runqueues with different 
>> priorities (5 IIRC).
>>
>> Now for each entity we give a list of runqueues where this entity can 
>> be served on, e.g. where the jobs which are pushed to the entities 
>> are executed.
>>
>> The entity itself keeps a copy of that runqueue list because we have 
>> the drm_sched_entity_set_priority() which modifies this runqueue list.
>>
>> But essentially that is complete overkill, the runqueue lists are 
>> constant for each amdgpu device, e.g. all contexts should use SDMA0 
>> and SDMA1 in the same way.
>>
>> In other words building the list on runqueues should happen only once 
>> and not for each contexts.
> Okay I understand now the real problem. Thanks for detail explanation.
>>
>> Multiple approach to fix this would be possible. One rather elegant 
>> solution would be to change the rq list into a scheduler instances 
>> list + priority.
>
> Do you mean something like
>
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 684692a8ed76..ac67f8f098fa 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -81,7 +81,7 @@ enum drm_sched_priority {
>  struct drm_sched_entity {
>         struct list_head                list;
>         struct drm_sched_rq             *rq;
> -       struct drm_sched_rq             **rq_list;
> +      struct drm_gpu_scheduler        **sched;
>         unsigned int                    num_rq_list;
>         spinlock_t                      rq_lock;

Yes, exactly. Problem is that I'm not 100% sure if that really works 
with all users of the rq_list.

Regards,
Christian.

>
>>
>>
>> This way we would also fix the age old bug that changing the priority 
>> of a context could actually mess up already scheduled jobs.
>>
>> The alternative I noted before would be to drop 
>> drm_sched_entity_set_priority() or change it into 
>> drm_sched_entity_set_runqueues().
> I was working on it but then I got stuck by a  "BUG: kernel NULL 
> pointer dereference, address:" which I am trying to figure out why
>>
>> Regards,
>> Christian.
>>
>>>
>>>>
>>>> Then we could embed the fences in amdgpu_ctx_entity as dynamic 
>>>> array at the end of the structure.
>>>>
>>>> And last we can start to dynamic allocate and initialize the 
>>>> amdgpu_ctx_entity() structures.
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>
>>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] drm/amdgpu: allocate entities on demand
  2019-12-02 14:59               ` Christian König
@ 2019-12-03 15:02                 ` Nirmoy
  2019-12-03 17:33                   ` Christian König
  0 siblings, 1 reply; 18+ messages in thread
From: Nirmoy @ 2019-12-03 15:02 UTC (permalink / raw)
  To: christian.koenig, alexander.deucher, kenny.ho; +Cc: nirmoy.das, amd-gfx

Hi Christian,

On 12/2/19 3:59 PM, Christian König wrote:
> Am 02.12.19 um 15:43 schrieb Nirmoy:
>>
>> Do you mean something like
>>
>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>> index 684692a8ed76..ac67f8f098fa 100644
>> --- a/include/drm/gpu_scheduler.h
>> +++ b/include/drm/gpu_scheduler.h
>> @@ -81,7 +81,7 @@ enum drm_sched_priority {
>>  struct drm_sched_entity {
>>         struct list_head                list;
>>         struct drm_sched_rq             *rq;
>> -       struct drm_sched_rq             **rq_list;
>> +      struct drm_gpu_scheduler        **sched;
>>         unsigned int                    num_rq_list;
>>         spinlock_t                      rq_lock;
>
> Yes, exactly. Problem is that I'm not 100% sure if that really works 
> with all users of the rq_list.

currently rq_list users does two main tasks.

1  change rq priority for a context on user requests

2  helps drm scheduler to find rq  with least load.

Can you please check the bellow diff it doesn't really work because I 
get some kernel panic. But do you think

it is matching your idea ?

test@install:~/linux> git diff drivers/gpu/drm/scheduler/sched_entity.c 
|tee
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
b/drivers/gpu/drm/scheduler/sched_entity.c
index 1a5153197fe9..0bbd8ddd6c83 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -37,9 +37,9 @@
   * submit to HW ring.
   *
   * @entity: scheduler entity to init
- * @rq_list: the list of run queue on which jobs from this
+ * @sched_list: the list of drm scheds on which jobs from this
   *           entity can be submitted
- * @num_rq_list: number of run queue in rq_list
+ * @num_sched_list: number of drm sched in sched_list
   * @guilty: atomic_t set to 1 when a job on this queue
   *          is found to be guilty causing a timeout
   *
@@ -49,30 +49,24 @@
   * Returns 0 on success or a negative error code on failure.
   */
  int drm_sched_entity_init(struct drm_sched_entity *entity,
-              struct drm_sched_rq **rq_list,
-              unsigned int num_rq_list,
-              atomic_t *guilty)
+              struct drm_gpu_scheduler **sched_list,
+              unsigned int num_sched_list,
+              atomic_t *guilty, enum drm_sched_priority priority)
  {
-    int i;

-    if (!(entity && rq_list && (num_rq_list == 0 || rq_list[0])))
+    if (!(entity && sched_list && (num_sched_list == 0 || sched_list[0])))
          return -EINVAL;

      memset(entity, 0, sizeof(struct drm_sched_entity));
      INIT_LIST_HEAD(&entity->list);
      entity->rq = NULL;
      entity->guilty = guilty;
-    entity->num_rq_list = num_rq_list;
-    entity->rq_list = kcalloc(num_rq_list, sizeof(struct drm_sched_rq *),
-                GFP_KERNEL);
-    if (!entity->rq_list)
-        return -ENOMEM;
-
-    for (i = 0; i < num_rq_list; ++i)
-        entity->rq_list[i] = rq_list[i];
+    entity->num_sched_list = num_sched_list;
+    entity->sched_list =  sched_list
+    entity->priority = priority;

-    if (num_rq_list)
-        entity->rq = rq_list[0];
+    if (num_sched_list)
+        entity->rq = &entity->sched_list[0]->sched_rq[entity->priority];

      entity->last_scheduled = NULL;

@@ -136,10 +130,10 @@ drm_sched_entity_get_free_sched(struct 
drm_sched_entity *entity)
      unsigned int min_jobs = UINT_MAX, num_jobs;
      int i;

-    for (i = 0; i < entity->num_rq_list; ++i) {
-        struct drm_gpu_scheduler *sched = entity->rq_list[i]->sched;
+    for (i = 0; i < entity->num_sched_list; ++i) {
+        struct drm_gpu_scheduler *sched = entity->sched_list[i];

-        if (!entity->rq_list[i]->sched->ready) {
+        if (!entity->sched_list[i]->ready) {
              DRM_WARN("sched%s is not ready, skipping", sched->name);
              continue;
          }
@@ -147,7 +141,7 @@ drm_sched_entity_get_free_sched(struct 
drm_sched_entity *entity)
          num_jobs = atomic_read(&sched->num_jobs);
          if (num_jobs < min_jobs) {
              min_jobs = num_jobs;
-            rq = entity->rq_list[i];
+            rq = &entity->sched_list[i]->sched_rq[entity->priority];
          }
      }

@@ -304,7 +298,6 @@ void drm_sched_entity_fini(struct drm_sched_entity 
*entity)

      dma_fence_put(entity->last_scheduled);
      entity->last_scheduled = NULL;
-    kfree(entity->rq_list);
  }
  EXPORT_SYMBOL(drm_sched_entity_fini);

@@ -372,8 +365,9 @@ void drm_sched_entity_set_priority(struct 
drm_sched_entity *entity,
      unsigned int i;

      spin_lock(&entity->rq_lock);
-
-    for (i = 0; i < entity->num_rq_list; ++i)
+//TODO
+/*
+    for (i = 0; i < entity->num_sched_list; ++i)
  drm_sched_entity_set_rq_priority(&entity->rq_list[i], priority);

      if (entity->rq) {
@@ -381,7 +375,7 @@ void drm_sched_entity_set_priority(struct 
drm_sched_entity *entity,
          drm_sched_entity_set_rq_priority(&entity->rq, priority);
          drm_sched_rq_add_entity(entity->rq, entity);
      }
-
+*/
      spin_unlock(&entity->rq_lock);
  }
  EXPORT_SYMBOL(drm_sched_entity_set_priority);
@@ -486,7 +480,7 @@ void drm_sched_entity_select_rq(struct 
drm_sched_entity *entity)
      struct dma_fence *fence;
      struct drm_sched_rq *rq;

-    if (spsc_queue_count(&entity->job_queue) || entity->num_rq_list <= 1)
+    if (spsc_queue_count(&entity->job_queue) || entity->num_sched_list 
<= 1)
          return;

      fence = READ_ONCE(entity->last_scheduled);
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] drm/amdgpu: allocate entities on demand
  2019-12-03 15:02                 ` Nirmoy
@ 2019-12-03 17:33                   ` Christian König
  2019-12-03 17:47                     ` Christian König
  0 siblings, 1 reply; 18+ messages in thread
From: Christian König @ 2019-12-03 17:33 UTC (permalink / raw)
  To: Nirmoy, alexander.deucher, kenny.ho; +Cc: nirmoy.das, amd-gfx

Am 03.12.19 um 16:02 schrieb Nirmoy:
> Hi Christian,
>
> On 12/2/19 3:59 PM, Christian König wrote:
>> Am 02.12.19 um 15:43 schrieb Nirmoy:
>>>
>>> Do you mean something like
>>>
>>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>>> index 684692a8ed76..ac67f8f098fa 100644
>>> --- a/include/drm/gpu_scheduler.h
>>> +++ b/include/drm/gpu_scheduler.h
>>> @@ -81,7 +81,7 @@ enum drm_sched_priority {
>>>  struct drm_sched_entity {
>>>         struct list_head                list;
>>>         struct drm_sched_rq             *rq;
>>> -       struct drm_sched_rq             **rq_list;
>>> +      struct drm_gpu_scheduler        **sched;
>>>         unsigned int                    num_rq_list;
>>>         spinlock_t                      rq_lock;
>>
>> Yes, exactly. Problem is that I'm not 100% sure if that really works 
>> with all users of the rq_list.
>
> currently rq_list users does two main tasks.
>
> 1  change rq priority for a context on user requests
>
> 2  helps drm scheduler to find rq  with least load.
>
> Can you please check the bellow diff it doesn't really work because I 
> get some kernel panic. But do you think
>
> it is matching your idea ?

Yes, that looks exactly like what I had in mind.

Christian.

>
> test@install:~/linux> git diff 
> drivers/gpu/drm/scheduler/sched_entity.c |tee
> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
> b/drivers/gpu/drm/scheduler/sched_entity.c
> index 1a5153197fe9..0bbd8ddd6c83 100644
> --- a/drivers/gpu/drm/scheduler/sched_entity.c
> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
> @@ -37,9 +37,9 @@
>   * submit to HW ring.
>   *
>   * @entity: scheduler entity to init
> - * @rq_list: the list of run queue on which jobs from this
> + * @sched_list: the list of drm scheds on which jobs from this
>   *           entity can be submitted
> - * @num_rq_list: number of run queue in rq_list
> + * @num_sched_list: number of drm sched in sched_list
>   * @guilty: atomic_t set to 1 when a job on this queue
>   *          is found to be guilty causing a timeout
>   *
> @@ -49,30 +49,24 @@
>   * Returns 0 on success or a negative error code on failure.
>   */
>  int drm_sched_entity_init(struct drm_sched_entity *entity,
> -              struct drm_sched_rq **rq_list,
> -              unsigned int num_rq_list,
> -              atomic_t *guilty)
> +              struct drm_gpu_scheduler **sched_list,
> +              unsigned int num_sched_list,
> +              atomic_t *guilty, enum drm_sched_priority priority)
>  {
> -    int i;
>
> -    if (!(entity && rq_list && (num_rq_list == 0 || rq_list[0])))
> +    if (!(entity && sched_list && (num_sched_list == 0 || 
> sched_list[0])))
>          return -EINVAL;
>
>      memset(entity, 0, sizeof(struct drm_sched_entity));
>      INIT_LIST_HEAD(&entity->list);
>      entity->rq = NULL;
>      entity->guilty = guilty;
> -    entity->num_rq_list = num_rq_list;
> -    entity->rq_list = kcalloc(num_rq_list, sizeof(struct drm_sched_rq 
> *),
> -                GFP_KERNEL);
> -    if (!entity->rq_list)
> -        return -ENOMEM;
> -
> -    for (i = 0; i < num_rq_list; ++i)
> -        entity->rq_list[i] = rq_list[i];
> +    entity->num_sched_list = num_sched_list;
> +    entity->sched_list =  sched_list
> +    entity->priority = priority;
>
> -    if (num_rq_list)
> -        entity->rq = rq_list[0];
> +    if (num_sched_list)
> +        entity->rq = &entity->sched_list[0]->sched_rq[entity->priority];
>
>      entity->last_scheduled = NULL;
>
> @@ -136,10 +130,10 @@ drm_sched_entity_get_free_sched(struct 
> drm_sched_entity *entity)
>      unsigned int min_jobs = UINT_MAX, num_jobs;
>      int i;
>
> -    for (i = 0; i < entity->num_rq_list; ++i) {
> -        struct drm_gpu_scheduler *sched = entity->rq_list[i]->sched;
> +    for (i = 0; i < entity->num_sched_list; ++i) {
> +        struct drm_gpu_scheduler *sched = entity->sched_list[i];
>
> -        if (!entity->rq_list[i]->sched->ready) {
> +        if (!entity->sched_list[i]->ready) {
>              DRM_WARN("sched%s is not ready, skipping", sched->name);
>              continue;
>          }
> @@ -147,7 +141,7 @@ drm_sched_entity_get_free_sched(struct 
> drm_sched_entity *entity)
>          num_jobs = atomic_read(&sched->num_jobs);
>          if (num_jobs < min_jobs) {
>              min_jobs = num_jobs;
> -            rq = entity->rq_list[i];
> +            rq = &entity->sched_list[i]->sched_rq[entity->priority];
>          }
>      }
>
> @@ -304,7 +298,6 @@ void drm_sched_entity_fini(struct drm_sched_entity 
> *entity)
>
>      dma_fence_put(entity->last_scheduled);
>      entity->last_scheduled = NULL;
> -    kfree(entity->rq_list);
>  }
>  EXPORT_SYMBOL(drm_sched_entity_fini);
>
> @@ -372,8 +365,9 @@ void drm_sched_entity_set_priority(struct 
> drm_sched_entity *entity,
>      unsigned int i;
>
>      spin_lock(&entity->rq_lock);
> -
> -    for (i = 0; i < entity->num_rq_list; ++i)
> +//TODO
> +/*
> +    for (i = 0; i < entity->num_sched_list; ++i)
>  drm_sched_entity_set_rq_priority(&entity->rq_list[i], priority);
>
>      if (entity->rq) {
> @@ -381,7 +375,7 @@ void drm_sched_entity_set_priority(struct 
> drm_sched_entity *entity,
>          drm_sched_entity_set_rq_priority(&entity->rq, priority);
>          drm_sched_rq_add_entity(entity->rq, entity);
>      }
> -
> +*/
>      spin_unlock(&entity->rq_lock);
>  }
>  EXPORT_SYMBOL(drm_sched_entity_set_priority);
> @@ -486,7 +480,7 @@ void drm_sched_entity_select_rq(struct 
> drm_sched_entity *entity)
>      struct dma_fence *fence;
>      struct drm_sched_rq *rq;
>
> -    if (spsc_queue_count(&entity->job_queue) || entity->num_rq_list 
> <= 1)
> +    if (spsc_queue_count(&entity->job_queue) || 
> entity->num_sched_list <= 1)
>          return;
>
>      fence = READ_ONCE(entity->last_scheduled);

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] drm/amdgpu: allocate entities on demand
  2019-12-03 17:33                   ` Christian König
@ 2019-12-03 17:47                     ` Christian König
  2019-12-04 19:15                       ` Nirmoy
  0 siblings, 1 reply; 18+ messages in thread
From: Christian König @ 2019-12-03 17:47 UTC (permalink / raw)
  To: Christian König, Nirmoy, alexander.deucher, kenny.ho
  Cc: nirmoy.das, amd-gfx

Am 03.12.19 um 18:33 schrieb Christian König:
> Am 03.12.19 um 16:02 schrieb Nirmoy:
>> Hi Christian,
>>
>> On 12/2/19 3:59 PM, Christian König wrote:
>>> Am 02.12.19 um 15:43 schrieb Nirmoy:
>>>>
>>>> Do you mean something like
>>>>
>>>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>>>> index 684692a8ed76..ac67f8f098fa 100644
>>>> --- a/include/drm/gpu_scheduler.h
>>>> +++ b/include/drm/gpu_scheduler.h
>>>> @@ -81,7 +81,7 @@ enum drm_sched_priority {
>>>>  struct drm_sched_entity {
>>>>         struct list_head                list;
>>>>         struct drm_sched_rq             *rq;
>>>> -       struct drm_sched_rq             **rq_list;
>>>> +      struct drm_gpu_scheduler        **sched;
>>>>         unsigned int                    num_rq_list;
>>>>         spinlock_t                      rq_lock;
>>>
>>> Yes, exactly. Problem is that I'm not 100% sure if that really works 
>>> with all users of the rq_list.
>>
>> currently rq_list users does two main tasks.
>>
>> 1  change rq priority for a context on user requests
>>
>> 2  helps drm scheduler to find rq  with least load.
>>
>> Can you please check the bellow diff it doesn't really work because I 
>> get some kernel panic. But do you think
>>
>> it is matching your idea ?
>
> Yes, that looks exactly like what I had in mind.

BTW: What does the matching amdgpu change look like?

Keep in mind that you can't allocate the list of schedulers on the stack 
any more.

That might be the reason for you kernel panic.

Christian.

>
> Christian.
>
>>
>> test@install:~/linux> git diff 
>> drivers/gpu/drm/scheduler/sched_entity.c |tee
>> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
>> b/drivers/gpu/drm/scheduler/sched_entity.c
>> index 1a5153197fe9..0bbd8ddd6c83 100644
>> --- a/drivers/gpu/drm/scheduler/sched_entity.c
>> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
>> @@ -37,9 +37,9 @@
>>   * submit to HW ring.
>>   *
>>   * @entity: scheduler entity to init
>> - * @rq_list: the list of run queue on which jobs from this
>> + * @sched_list: the list of drm scheds on which jobs from this
>>   *           entity can be submitted
>> - * @num_rq_list: number of run queue in rq_list
>> + * @num_sched_list: number of drm sched in sched_list
>>   * @guilty: atomic_t set to 1 when a job on this queue
>>   *          is found to be guilty causing a timeout
>>   *
>> @@ -49,30 +49,24 @@
>>   * Returns 0 on success or a negative error code on failure.
>>   */
>>  int drm_sched_entity_init(struct drm_sched_entity *entity,
>> -              struct drm_sched_rq **rq_list,
>> -              unsigned int num_rq_list,
>> -              atomic_t *guilty)
>> +              struct drm_gpu_scheduler **sched_list,
>> +              unsigned int num_sched_list,
>> +              atomic_t *guilty, enum drm_sched_priority priority)
>>  {
>> -    int i;
>>
>> -    if (!(entity && rq_list && (num_rq_list == 0 || rq_list[0])))
>> +    if (!(entity && sched_list && (num_sched_list == 0 || 
>> sched_list[0])))
>>          return -EINVAL;
>>
>>      memset(entity, 0, sizeof(struct drm_sched_entity));
>>      INIT_LIST_HEAD(&entity->list);
>>      entity->rq = NULL;
>>      entity->guilty = guilty;
>> -    entity->num_rq_list = num_rq_list;
>> -    entity->rq_list = kcalloc(num_rq_list, sizeof(struct 
>> drm_sched_rq *),
>> -                GFP_KERNEL);
>> -    if (!entity->rq_list)
>> -        return -ENOMEM;
>> -
>> -    for (i = 0; i < num_rq_list; ++i)
>> -        entity->rq_list[i] = rq_list[i];
>> +    entity->num_sched_list = num_sched_list;
>> +    entity->sched_list =  sched_list
>> +    entity->priority = priority;
>>
>> -    if (num_rq_list)
>> -        entity->rq = rq_list[0];
>> +    if (num_sched_list)
>> +        entity->rq = 
>> &entity->sched_list[0]->sched_rq[entity->priority];
>>
>>      entity->last_scheduled = NULL;
>>
>> @@ -136,10 +130,10 @@ drm_sched_entity_get_free_sched(struct 
>> drm_sched_entity *entity)
>>      unsigned int min_jobs = UINT_MAX, num_jobs;
>>      int i;
>>
>> -    for (i = 0; i < entity->num_rq_list; ++i) {
>> -        struct drm_gpu_scheduler *sched = entity->rq_list[i]->sched;
>> +    for (i = 0; i < entity->num_sched_list; ++i) {
>> +        struct drm_gpu_scheduler *sched = entity->sched_list[i];
>>
>> -        if (!entity->rq_list[i]->sched->ready) {
>> +        if (!entity->sched_list[i]->ready) {
>>              DRM_WARN("sched%s is not ready, skipping", sched->name);
>>              continue;
>>          }
>> @@ -147,7 +141,7 @@ drm_sched_entity_get_free_sched(struct 
>> drm_sched_entity *entity)
>>          num_jobs = atomic_read(&sched->num_jobs);
>>          if (num_jobs < min_jobs) {
>>              min_jobs = num_jobs;
>> -            rq = entity->rq_list[i];
>> +            rq = &entity->sched_list[i]->sched_rq[entity->priority];
>>          }
>>      }
>>
>> @@ -304,7 +298,6 @@ void drm_sched_entity_fini(struct 
>> drm_sched_entity *entity)
>>
>>      dma_fence_put(entity->last_scheduled);
>>      entity->last_scheduled = NULL;
>> -    kfree(entity->rq_list);
>>  }
>>  EXPORT_SYMBOL(drm_sched_entity_fini);
>>
>> @@ -372,8 +365,9 @@ void drm_sched_entity_set_priority(struct 
>> drm_sched_entity *entity,
>>      unsigned int i;
>>
>>      spin_lock(&entity->rq_lock);
>> -
>> -    for (i = 0; i < entity->num_rq_list; ++i)
>> +//TODO
>> +/*
>> +    for (i = 0; i < entity->num_sched_list; ++i)
>>  drm_sched_entity_set_rq_priority(&entity->rq_list[i], priority);
>>
>>      if (entity->rq) {
>> @@ -381,7 +375,7 @@ void drm_sched_entity_set_priority(struct 
>> drm_sched_entity *entity,
>>          drm_sched_entity_set_rq_priority(&entity->rq, priority);
>>          drm_sched_rq_add_entity(entity->rq, entity);
>>      }
>> -
>> +*/
>>      spin_unlock(&entity->rq_lock);
>>  }
>>  EXPORT_SYMBOL(drm_sched_entity_set_priority);
>> @@ -486,7 +480,7 @@ void drm_sched_entity_select_rq(struct 
>> drm_sched_entity *entity)
>>      struct dma_fence *fence;
>>      struct drm_sched_rq *rq;
>>
>> -    if (spsc_queue_count(&entity->job_queue) || entity->num_rq_list 
>> <= 1)
>> +    if (spsc_queue_count(&entity->job_queue) || 
>> entity->num_sched_list <= 1)
>>          return;
>>
>>      fence = READ_ONCE(entity->last_scheduled);
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] drm/amdgpu: allocate entities on demand
  2019-12-03 17:47                     ` Christian König
@ 2019-12-04 19:15                       ` Nirmoy
  0 siblings, 0 replies; 18+ messages in thread
From: Nirmoy @ 2019-12-04 19:15 UTC (permalink / raw)
  To: christian.koenig, alexander.deucher, kenny.ho; +Cc: nirmoy.das, amd-gfx

I saw y

On 12/3/19 6:47 PM, Christian König wrote:
> Am 03.12.19 um 18:33 schrieb Christian König:
>> Am 03.12.19 um 16:02 schrieb Nirmoy:
>>> Hi Christian,
>>>
>>> On 12/2/19 3:59 PM, Christian König wrote:
>>>> Am 02.12.19 um 15:43 schrieb Nirmoy:
>>>>>
>>>>> Do you mean something like
>>>>>
>>>>> diff --git a/include/drm/gpu_scheduler.h 
>>>>> b/include/drm/gpu_scheduler.h
>>>>> index 684692a8ed76..ac67f8f098fa 100644
>>>>> --- a/include/drm/gpu_scheduler.h
>>>>> +++ b/include/drm/gpu_scheduler.h
>>>>> @@ -81,7 +81,7 @@ enum drm_sched_priority {
>>>>>  struct drm_sched_entity {
>>>>>         struct list_head                list;
>>>>>         struct drm_sched_rq             *rq;
>>>>> -       struct drm_sched_rq             **rq_list;
>>>>> +      struct drm_gpu_scheduler        **sched;
>>>>>         unsigned int                    num_rq_list;
>>>>>         spinlock_t                      rq_lock;
>>>>
>>>> Yes, exactly. Problem is that I'm not 100% sure if that really 
>>>> works with all users of the rq_list.
>>>
>>> currently rq_list users does two main tasks.
>>>
>>> 1  change rq priority for a context on user requests
>>>
>>> 2  helps drm scheduler to find rq  with least load.
>>>
>>> Can you please check the bellow diff it doesn't really work because 
>>> I get some kernel panic. But do you think
>>>
>>> it is matching your idea ?
>>
>> Yes, that looks exactly like what I had in mind.
>
> BTW: What does the matching amdgpu change look like?
>
> Keep in mind that you can't allocate the list of schedulers on the 
> stack any more.
>
> That might be the reason for you kernel panic.

Just saw your email. I was scratching my head for a while because of 
this  memory corruption.

You are right it was because of stack memory. I am keeping kcalloc in 
drm_sched_entity_init();


Regards,

Nirmoy

>
> Christian.
>
>>
>> Christian.
>>
>>>
>>> test@install:~/linux> git diff 
>>> drivers/gpu/drm/scheduler/sched_entity.c |tee
>>> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
>>> b/drivers/gpu/drm/scheduler/sched_entity.c
>>> index 1a5153197fe9..0bbd8ddd6c83 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_entity.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
>>> @@ -37,9 +37,9 @@
>>>   * submit to HW ring.
>>>   *
>>>   * @entity: scheduler entity to init
>>> - * @rq_list: the list of run queue on which jobs from this
>>> + * @sched_list: the list of drm scheds on which jobs from this
>>>   *           entity can be submitted
>>> - * @num_rq_list: number of run queue in rq_list
>>> + * @num_sched_list: number of drm sched in sched_list
>>>   * @guilty: atomic_t set to 1 when a job on this queue
>>>   *          is found to be guilty causing a timeout
>>>   *
>>> @@ -49,30 +49,24 @@
>>>   * Returns 0 on success or a negative error code on failure.
>>>   */
>>>  int drm_sched_entity_init(struct drm_sched_entity *entity,
>>> -              struct drm_sched_rq **rq_list,
>>> -              unsigned int num_rq_list,
>>> -              atomic_t *guilty)
>>> +              struct drm_gpu_scheduler **sched_list,
>>> +              unsigned int num_sched_list,
>>> +              atomic_t *guilty, enum drm_sched_priority priority)
>>>  {
>>> -    int i;
>>>
>>> -    if (!(entity && rq_list && (num_rq_list == 0 || rq_list[0])))
>>> +    if (!(entity && sched_list && (num_sched_list == 0 || 
>>> sched_list[0])))
>>>          return -EINVAL;
>>>
>>>      memset(entity, 0, sizeof(struct drm_sched_entity));
>>>      INIT_LIST_HEAD(&entity->list);
>>>      entity->rq = NULL;
>>>      entity->guilty = guilty;
>>> -    entity->num_rq_list = num_rq_list;
>>> -    entity->rq_list = kcalloc(num_rq_list, sizeof(struct 
>>> drm_sched_rq *),
>>> -                GFP_KERNEL);
>>> -    if (!entity->rq_list)
>>> -        return -ENOMEM;
>>> -
>>> -    for (i = 0; i < num_rq_list; ++i)
>>> -        entity->rq_list[i] = rq_list[i];
>>> +    entity->num_sched_list = num_sched_list;
>>> +    entity->sched_list =  sched_list
>>> +    entity->priority = priority;
>>>
>>> -    if (num_rq_list)
>>> -        entity->rq = rq_list[0];
>>> +    if (num_sched_list)
>>> +        entity->rq = 
>>> &entity->sched_list[0]->sched_rq[entity->priority];
>>>
>>>      entity->last_scheduled = NULL;
>>>
>>> @@ -136,10 +130,10 @@ drm_sched_entity_get_free_sched(struct 
>>> drm_sched_entity *entity)
>>>      unsigned int min_jobs = UINT_MAX, num_jobs;
>>>      int i;
>>>
>>> -    for (i = 0; i < entity->num_rq_list; ++i) {
>>> -        struct drm_gpu_scheduler *sched = entity->rq_list[i]->sched;
>>> +    for (i = 0; i < entity->num_sched_list; ++i) {
>>> +        struct drm_gpu_scheduler *sched = entity->sched_list[i];
>>>
>>> -        if (!entity->rq_list[i]->sched->ready) {
>>> +        if (!entity->sched_list[i]->ready) {
>>>              DRM_WARN("sched%s is not ready, skipping", sched->name);
>>>              continue;
>>>          }
>>> @@ -147,7 +141,7 @@ drm_sched_entity_get_free_sched(struct 
>>> drm_sched_entity *entity)
>>>          num_jobs = atomic_read(&sched->num_jobs);
>>>          if (num_jobs < min_jobs) {
>>>              min_jobs = num_jobs;
>>> -            rq = entity->rq_list[i];
>>> +            rq = &entity->sched_list[i]->sched_rq[entity->priority];
>>>          }
>>>      }
>>>
>>> @@ -304,7 +298,6 @@ void drm_sched_entity_fini(struct 
>>> drm_sched_entity *entity)
>>>
>>>      dma_fence_put(entity->last_scheduled);
>>>      entity->last_scheduled = NULL;
>>> -    kfree(entity->rq_list);
>>>  }
>>>  EXPORT_SYMBOL(drm_sched_entity_fini);
>>>
>>> @@ -372,8 +365,9 @@ void drm_sched_entity_set_priority(struct 
>>> drm_sched_entity *entity,
>>>      unsigned int i;
>>>
>>>      spin_lock(&entity->rq_lock);
>>> -
>>> -    for (i = 0; i < entity->num_rq_list; ++i)
>>> +//TODO
>>> +/*
>>> +    for (i = 0; i < entity->num_sched_list; ++i)
>>>  drm_sched_entity_set_rq_priority(&entity->rq_list[i], priority);
>>>
>>>      if (entity->rq) {
>>> @@ -381,7 +375,7 @@ void drm_sched_entity_set_priority(struct 
>>> drm_sched_entity *entity,
>>>          drm_sched_entity_set_rq_priority(&entity->rq, priority);
>>>          drm_sched_rq_add_entity(entity->rq, entity);
>>>      }
>>> -
>>> +*/
>>>      spin_unlock(&entity->rq_lock);
>>>  }
>>>  EXPORT_SYMBOL(drm_sched_entity_set_priority);
>>> @@ -486,7 +480,7 @@ void drm_sched_entity_select_rq(struct 
>>> drm_sched_entity *entity)
>>>      struct dma_fence *fence;
>>>      struct drm_sched_rq *rq;
>>>
>>> -    if (spsc_queue_count(&entity->job_queue) || entity->num_rq_list 
>>> <= 1)
>>> +    if (spsc_queue_count(&entity->job_queue) || 
>>> entity->num_sched_list <= 1)
>>>          return;
>>>
>>>      fence = READ_ONCE(entity->last_scheduled);
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7CNirmoy.Das%40amd.com%7C1949d715ef4d44e739a808d77818ee32%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637109920777492433&amp;sdata=eOrmeKaUPvJLFL44w%2F6rFqU0KBo1lseA52%2FQNG9bMII%3D&amp;reserved=0 
>>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] drm/amdgpu: allocate entities on demand
  2019-11-26  9:45         ` Christian König
                           ` (2 preceding siblings ...)
  (?)
@ 2020-01-20 13:05         ` Nirmoy
  2020-01-20 13:09           ` Christian König
  -1 siblings, 1 reply; 18+ messages in thread
From: Nirmoy @ 2020-01-20 13:05 UTC (permalink / raw)
  To: christian.koenig, alexander.deucher, kenny.ho; +Cc: nirmoy.das, amd-gfx

Hi Christian,

On 11/26/19 10:45 AM, Christian König wrote:
> It looks like a start, but there numerous things which needs to be fixed.
>
> Question number one is: What's that good for? Entities are not the 
> problem here. The real issue is the fence ring and the rq_list.
>
> The rq_list could actually be made constant since it should never be 
> changed by the entity. It is only changed for backward compatibility 
> in drm_sched_entity_set_priority().
>
> So I would start there and cleanup the drm_sched_entity_set_priority() 
> to actually just set a new constant rq list instead.
>
> Then we could embed the fences in amdgpu_ctx_entity as dynamic array 
> at the end of the structure.
>
amdgpu_ctx_entity already contains a fence array. Do you mean another 
fence array ?

struct amdgpu_ctx_entity {

         uint64_t                sequence;
         struct dma_fence        **fences;
         struct drm_sched_entity entity;
};



Regards,

Nirmoy

> And last we can start to dynamic allocate and initialize the 
> amdgpu_ctx_entity() structures.
>
> Regards,
> Christian.
>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] drm/amdgpu: allocate entities on demand
  2020-01-20 13:05         ` Nirmoy
@ 2020-01-20 13:09           ` Christian König
  0 siblings, 0 replies; 18+ messages in thread
From: Christian König @ 2020-01-20 13:09 UTC (permalink / raw)
  To: Nirmoy, alexander.deucher, kenny.ho; +Cc: nirmoy.das, amd-gfx

Am 20.01.20 um 14:05 schrieb Nirmoy:
> Hi Christian,
>
> On 11/26/19 10:45 AM, Christian König wrote:
>> It looks like a start, but there numerous things which needs to be 
>> fixed.
>>
>> Question number one is: What's that good for? Entities are not the 
>> problem here. The real issue is the fence ring and the rq_list.
>>
>> The rq_list could actually be made constant since it should never be 
>> changed by the entity. It is only changed for backward compatibility 
>> in drm_sched_entity_set_priority().
>>
>> So I would start there and cleanup the 
>> drm_sched_entity_set_priority() to actually just set a new constant 
>> rq list instead.
>>
>> Then we could embed the fences in amdgpu_ctx_entity as dynamic array 
>> at the end of the structure.
>>
> amdgpu_ctx_entity already contains a fence array. Do you mean another 
> fence array ?

No I meant that one. See currently this fence array is allocated 
separately because its size depends on a module parameter (IIRC that was 
one big allocation for all entities).

Where we want to get is that it is allocated together with the 
amdgpu_ctx_entity as a dynamic array.

Regards,
Christian.

>
> struct amdgpu_ctx_entity {
>
>         uint64_t                sequence;
>         struct dma_fence        **fences;
>         struct drm_sched_entity entity;
> };
>
>
>
> Regards,
>
> Nirmoy
>
>> And last we can start to dynamic allocate and initialize the 
>> amdgpu_ctx_entity() structures.
>>
>> Regards,
>> Christian.
>>
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2020-01-20 13:09 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-25 23:17 [RFC PATCH] drm/amdgpu: allocate entities on demand Nirmoy Das
2019-11-25 23:17 ` Nirmoy Das
     [not found] ` <20191125231719.108949-1-nirmoy.das-5C7GfCeVMHo@public.gmane.org>
2019-11-25 23:31   ` Nirmoy
2019-11-25 23:31     ` Nirmoy
     [not found]     ` <2e514ab3-1582-6508-d81a-cbc2d12f42d7-5C7GfCeVMHo@public.gmane.org>
2019-11-26  9:45       ` Christian König
2019-11-26  9:45         ` Christian König
     [not found]         ` <74fd0faf-fd99-e33e-8d7a-95f9bb8be26a-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-11-26 14:41           ` Nirmoy
2019-11-26 14:41             ` Nirmoy
2019-11-29 14:29         ` Nirmoy
2019-11-29 18:42           ` Christian König
2019-12-02 14:43             ` Nirmoy
2019-12-02 14:59               ` Christian König
2019-12-03 15:02                 ` Nirmoy
2019-12-03 17:33                   ` Christian König
2019-12-03 17:47                     ` Christian König
2019-12-04 19:15                       ` Nirmoy
2020-01-20 13:05         ` Nirmoy
2020-01-20 13:09           ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.