* [PATCH] drm/amdgpu: allocate entities on demand
@ 2020-01-24 11:53 Nirmoy Das
2020-01-24 12:09 ` Christian König
` (3 more replies)
0 siblings, 4 replies; 14+ messages in thread
From: Nirmoy Das @ 2020-01-24 11:53 UTC (permalink / raw)
To: amd-gfx; +Cc: alexander.deucher, kenny.ho, nirmoy.das, christian.koenig
Currently we pre-allocate entities and fences for all the HW IPs on
context creation and some of which are might never be used.
This patch tries to resolve entity/fences wastage by creating entity
only when needed.
Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 244 +++++++++++++-----------
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 2 +-
2 files changed, 135 insertions(+), 111 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 05c2af61e7de..73f7615df8c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -42,19 +42,12 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
[AMDGPU_HW_IP_VCN_JPEG] = 1,
};
-static int amdgpu_ctx_total_num_entities(void)
-{
- unsigned i, num_entities = 0;
-
- for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
- num_entities += amdgpu_ctx_num_entities[i];
-
- return num_entities;
-}
-
static int amdgpu_ctx_priority_permit(struct drm_file *filp,
enum drm_sched_priority priority)
{
+ if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
+ return -EINVAL;
+
/* NORMAL and below are accessible by everyone */
if (priority <= DRM_SCHED_PRIORITY_NORMAL)
return 0;
@@ -68,64 +61,44 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
return -EACCES;
}
-static int amdgpu_ctx_init(struct amdgpu_device *adev,
- enum drm_sched_priority priority,
- struct drm_file *filp,
- struct amdgpu_ctx *ctx)
+static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const u32 ring)
{
- unsigned num_entities = amdgpu_ctx_total_num_entities();
- unsigned i, j;
+ struct amdgpu_device *adev = ctx->adev;
+ struct amdgpu_ctx_entity *entity;
+ struct drm_gpu_scheduler **scheds;
+ struct drm_gpu_scheduler *sched;
+ unsigned num_scheds = 0;
+ enum drm_sched_priority priority;
int r;
- if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
- return -EINVAL;
-
- r = amdgpu_ctx_priority_permit(filp, priority);
- if (r)
- return r;
-
- memset(ctx, 0, sizeof(*ctx));
- ctx->adev = adev;
-
-
- ctx->entities[0] = kcalloc(num_entities,
- sizeof(struct amdgpu_ctx_entity),
- GFP_KERNEL);
- if (!ctx->entities[0])
- return -ENOMEM;
-
-
- for (i = 0; i < num_entities; ++i) {
- struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
-
- entity->sequence = 1;
- entity->fences = kcalloc(amdgpu_sched_jobs,
- sizeof(struct dma_fence*), GFP_KERNEL);
- if (!entity->fences) {
- r = -ENOMEM;
- goto error_cleanup_memory;
- }
+ if (!ctx->entities[hw_ip]) {
+ ctx->entities[hw_ip] = kcalloc(amdgpu_ctx_num_entities[hw_ip],
+ sizeof(struct amdgpu_ctx_entity *),
+ GFP_KERNEL);
+ if (!ctx->entities[hw_ip])
+ return -ENOMEM;
}
- for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
- ctx->entities[i] = ctx->entities[i - 1] +
- amdgpu_ctx_num_entities[i - 1];
- kref_init(&ctx->refcount);
- spin_lock_init(&ctx->ring_lock);
- mutex_init(&ctx->lock);
+ ctx->entities[hw_ip][ring] = kcalloc(1, sizeof(struct amdgpu_ctx_entity),
+ GFP_KERNEL);
+ if (!ctx->entities[hw_ip][ring]) {
+ r = -ENOMEM;
+ goto error_free_entity;
+ }
- ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
- ctx->reset_counter_query = ctx->reset_counter;
- ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
- ctx->init_priority = priority;
- ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
+ entity = ctx->entities[hw_ip][ring];
- for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
- struct drm_gpu_scheduler **scheds;
- struct drm_gpu_scheduler *sched;
- unsigned num_scheds = 0;
+ entity->sequence = 1;
+ entity->fences = kcalloc(amdgpu_sched_jobs,
+ sizeof(struct dma_fence*), GFP_KERNEL);
+ if (!entity->fences) {
+ r = -ENOMEM;
+ goto error_free_entity;
+ }
- switch (i) {
+ priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
+ ctx->init_priority : ctx->override_priority;
+ switch (hw_ip) {
case AMDGPU_HW_IP_GFX:
sched = &adev->gfx.gfx_ring[0].sched;
scheds = &sched;
@@ -166,57 +139,85 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
scheds = adev->jpeg.jpeg_sched;
num_scheds = adev->jpeg.num_jpeg_sched;
break;
- }
-
- for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
- r = drm_sched_entity_init(&ctx->entities[i][j].entity,
- priority, scheds,
- num_scheds, &ctx->guilty);
- if (r)
- goto error_cleanup_entities;
}
+ r = drm_sched_entity_init(&ctx->entities[hw_ip][ring]->entity,
+ priority, scheds,
+ num_scheds, &ctx->guilty);
+ if (r)
+ goto error_free_entity;
+
return 0;
-error_cleanup_entities:
- for (i = 0; i < num_entities; ++i)
- drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+error_free_entity:
+ kfree(ctx->entities[hw_ip][ring]);
+ return r;
+}
-error_cleanup_memory:
- for (i = 0; i < num_entities; ++i) {
- struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+static int amdgpu_ctx_init(struct amdgpu_device *adev,
+ enum drm_sched_priority priority,
+ struct drm_file *filp,
+ struct amdgpu_ctx *ctx)
+{
+ int r;
- kfree(entity->fences);
- entity->fences = NULL;
- }
+ r = amdgpu_ctx_priority_permit(filp, priority);
+ if (r)
+ return r;
+
+ memset(ctx, 0, sizeof(*ctx));
+
+ ctx->adev = adev;
+
+ kref_init(&ctx->refcount);
+ spin_lock_init(&ctx->ring_lock);
+ mutex_init(&ctx->lock);
+
+ ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
+ ctx->reset_counter_query = ctx->reset_counter;
+ ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
+ ctx->init_priority = priority;
+ ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
+
+ return 0;
- kfree(ctx->entities[0]);
- ctx->entities[0] = NULL;
- return r;
}
static void amdgpu_ctx_fini(struct kref *ref)
{
struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
- unsigned num_entities = amdgpu_ctx_total_num_entities();
struct amdgpu_device *adev = ctx->adev;
- unsigned i, j;
+ unsigned i, j, k;
if (!adev)
return;
- for (i = 0; i < num_entities; ++i) {
- struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct amdgpu_ctx_entity *entity;
+
+ if (!ctx->entities[i] || !ctx->entities[i][j])
+ continue;
+
+ entity = ctx->entities[i][j];
+ if (!entity->fences)
+ continue;
- for (j = 0; j < amdgpu_sched_jobs; ++j)
- dma_fence_put(entity->fences[j]);
+ for (k = 0; k < amdgpu_sched_jobs; ++k)
+ dma_fence_put(entity->fences[k]);
- kfree(entity->fences);
+ kfree(entity->fences);
+ entity->fences = NULL;
+
+ kfree(entity);
+ ctx->entities[i][j] = NULL;
+ }
+
+ kfree(ctx->entities[i];
+ ctx->entities[i] = NULL;
}
- kfree(ctx->entities[0]);
mutex_destroy(&ctx->lock);
-
kfree(ctx);
}
@@ -239,7 +240,11 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
return -EINVAL;
}
- *entity = &ctx->entities[hw_ip][ring].entity;
+ if ((ctx->entities[hw_ip] == NULL) || (ctx->entities[hw_ip][ring] == NULL))
+ amdgpu_ctx_init_entity(ctx, hw_ip, ring);
+
+
+ *entity = &ctx->entities[hw_ip][ring]->entity;
return 0;
}
@@ -279,14 +284,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
static void amdgpu_ctx_do_release(struct kref *ref)
{
struct amdgpu_ctx *ctx;
- unsigned num_entities;
- u32 i;
+ u32 i, j;
ctx = container_of(ref, struct amdgpu_ctx, refcount);
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ if (!ctx->entities[i] || !ctx->entities[i][j])
+ continue;
- num_entities = amdgpu_ctx_total_num_entities();
- for (i = 0; i < num_entities; i++)
- drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+ drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
+ }
+ }
amdgpu_ctx_fini(ref);
}
@@ -516,19 +524,23 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
enum drm_sched_priority priority)
{
- unsigned num_entities = amdgpu_ctx_total_num_entities();
enum drm_sched_priority ctx_prio;
- unsigned i;
+ unsigned i, j;
ctx->override_priority = priority;
ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
ctx->init_priority : ctx->override_priority;
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct drm_sched_entity *entity;
- for (i = 0; i < num_entities; i++) {
- struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
+ if (!ctx->entities[i] || !ctx->entities[i][j])
+ continue;
- drm_sched_entity_set_priority(entity, ctx_prio);
+ entity = &ctx->entities[i][j]->entity;
+ drm_sched_entity_set_priority(entity, ctx_prio);
+ }
}
}
@@ -564,20 +576,24 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
{
- unsigned num_entities = amdgpu_ctx_total_num_entities();
struct amdgpu_ctx *ctx;
struct idr *idp;
- uint32_t id, i;
+ uint32_t id, i, j;
idp = &mgr->ctx_handles;
mutex_lock(&mgr->lock);
idr_for_each_entry(idp, ctx, id) {
- for (i = 0; i < num_entities; i++) {
- struct drm_sched_entity *entity;
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct drm_sched_entity *entity;
+
+ if (!ctx->entities[i] || !ctx->entities[i][j])
+ continue;
- entity = &ctx->entities[0][i].entity;
- timeout = drm_sched_entity_flush(entity, timeout);
+ entity = &ctx->entities[i][j]->entity;
+ timeout = drm_sched_entity_flush(entity, timeout);
+ }
}
}
mutex_unlock(&mgr->lock);
@@ -586,10 +602,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
{
- unsigned num_entities = amdgpu_ctx_total_num_entities();
struct amdgpu_ctx *ctx;
struct idr *idp;
- uint32_t id, i;
+ uint32_t id, i, j;
idp = &mgr->ctx_handles;
@@ -599,8 +614,17 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
continue;
}
- for (i = 0; i < num_entities; i++)
- drm_sched_entity_fini(&ctx->entities[0][i].entity);
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct drm_sched_entity *entity;
+
+ if (!ctx->entities[i] || !ctx->entities[i][j])
+ continue;
+
+ entity = &ctx->entities[i][j]->entity;
+ drm_sched_entity_fini(entity);
+ }
+ }
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index a6cd9d4b078c..ca9363e71df5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -42,7 +42,7 @@ struct amdgpu_ctx {
unsigned reset_counter_query;
uint32_t vram_lost_counter;
spinlock_t ring_lock;
- struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM];
+ struct amdgpu_ctx_entity **entities[AMDGPU_HW_IP_NUM];
bool preamble_presented;
enum drm_sched_priority init_priority;
enum drm_sched_priority override_priority;
--
2.24.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH] drm/amdgpu: allocate entities on demand
2020-01-24 11:53 [PATCH] drm/amdgpu: allocate entities on demand Nirmoy Das
@ 2020-01-24 12:09 ` Christian König
2020-01-24 12:43 ` Nirmoy
2020-01-24 12:41 ` Nirmoy Das
` (2 subsequent siblings)
3 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2020-01-24 12:09 UTC (permalink / raw)
To: Nirmoy Das, amd-gfx; +Cc: alexander.deucher, kenny.ho, nirmoy.das
Am 24.01.20 um 12:53 schrieb Nirmoy Das:
> Currently we pre-allocate entities and fences for all the HW IPs on
> context creation and some of which are might never be used.
>
> This patch tries to resolve entity/fences wastage by creating entity
> only when needed.
>
> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 244 +++++++++++++-----------
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 2 +-
> 2 files changed, 135 insertions(+), 111 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index 05c2af61e7de..73f7615df8c1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -42,19 +42,12 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
> [AMDGPU_HW_IP_VCN_JPEG] = 1,
> };
>
> -static int amdgpu_ctx_total_num_entities(void)
> -{
> - unsigned i, num_entities = 0;
> -
> - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
> - num_entities += amdgpu_ctx_num_entities[i];
> -
> - return num_entities;
> -}
> -
> static int amdgpu_ctx_priority_permit(struct drm_file *filp,
> enum drm_sched_priority priority)
> {
> + if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
> + return -EINVAL;
> +
> /* NORMAL and below are accessible by everyone */
> if (priority <= DRM_SCHED_PRIORITY_NORMAL)
> return 0;
> @@ -68,64 +61,44 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
> return -EACCES;
> }
>
> -static int amdgpu_ctx_init(struct amdgpu_device *adev,
> - enum drm_sched_priority priority,
> - struct drm_file *filp,
> - struct amdgpu_ctx *ctx)
> +static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const u32 ring)
> {
> - unsigned num_entities = amdgpu_ctx_total_num_entities();
> - unsigned i, j;
> + struct amdgpu_device *adev = ctx->adev;
> + struct amdgpu_ctx_entity *entity;
> + struct drm_gpu_scheduler **scheds;
> + struct drm_gpu_scheduler *sched;
> + unsigned num_scheds = 0;
> + enum drm_sched_priority priority;
> int r;
>
> - if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
> - return -EINVAL;
> -
> - r = amdgpu_ctx_priority_permit(filp, priority);
> - if (r)
> - return r;
> -
> - memset(ctx, 0, sizeof(*ctx));
> - ctx->adev = adev;
> -
> -
> - ctx->entities[0] = kcalloc(num_entities,
> - sizeof(struct amdgpu_ctx_entity),
> - GFP_KERNEL);
> - if (!ctx->entities[0])
> - return -ENOMEM;
> -
> -
> - for (i = 0; i < num_entities; ++i) {
> - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
> -
> - entity->sequence = 1;
> - entity->fences = kcalloc(amdgpu_sched_jobs,
> - sizeof(struct dma_fence*), GFP_KERNEL);
> - if (!entity->fences) {
> - r = -ENOMEM;
> - goto error_cleanup_memory;
> - }
> + if (!ctx->entities[hw_ip]) {
> + ctx->entities[hw_ip] = kcalloc(amdgpu_ctx_num_entities[hw_ip],
> + sizeof(struct amdgpu_ctx_entity *),
> + GFP_KERNEL);
> + if (!ctx->entities[hw_ip])
> + return -ENOMEM;
That's complete overkill, just statically allocate the array in the
amdgpu_ctx structure.
The maximum instance should be 4 IIRC, so something like "struct
amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][4];" so a maximum of 288
bytes used.
Only alternative I see would be to allocate the array behind the
structure, see dma_resv_list_alloc() for an example on how to do this.
But I don't think that this is worth it.
Christian.
> }
> - for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
> - ctx->entities[i] = ctx->entities[i - 1] +
> - amdgpu_ctx_num_entities[i - 1];
>
> - kref_init(&ctx->refcount);
> - spin_lock_init(&ctx->ring_lock);
> - mutex_init(&ctx->lock);
> + ctx->entities[hw_ip][ring] = kcalloc(1, sizeof(struct amdgpu_ctx_entity),
> + GFP_KERNEL);
> + if (!ctx->entities[hw_ip][ring]) {
> + r = -ENOMEM;
> + goto error_free_entity;
> + }
>
> - ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
> - ctx->reset_counter_query = ctx->reset_counter;
> - ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
> - ctx->init_priority = priority;
> - ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
> + entity = ctx->entities[hw_ip][ring];
>
> - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> - struct drm_gpu_scheduler **scheds;
> - struct drm_gpu_scheduler *sched;
> - unsigned num_scheds = 0;
> + entity->sequence = 1;
> + entity->fences = kcalloc(amdgpu_sched_jobs,
> + sizeof(struct dma_fence*), GFP_KERNEL);
> + if (!entity->fences) {
> + r = -ENOMEM;
> + goto error_free_entity;
> + }
>
> - switch (i) {
> + priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
> + ctx->init_priority : ctx->override_priority;
> + switch (hw_ip) {
> case AMDGPU_HW_IP_GFX:
> sched = &adev->gfx.gfx_ring[0].sched;
> scheds = &sched;
> @@ -166,57 +139,85 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
> scheds = adev->jpeg.jpeg_sched;
> num_scheds = adev->jpeg.num_jpeg_sched;
> break;
> - }
> -
> - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
> - r = drm_sched_entity_init(&ctx->entities[i][j].entity,
> - priority, scheds,
> - num_scheds, &ctx->guilty);
> - if (r)
> - goto error_cleanup_entities;
> }
>
> + r = drm_sched_entity_init(&ctx->entities[hw_ip][ring]->entity,
> + priority, scheds,
> + num_scheds, &ctx->guilty);
> + if (r)
> + goto error_free_entity;
> +
> return 0;
>
> -error_cleanup_entities:
> - for (i = 0; i < num_entities; ++i)
> - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
> +error_free_entity:
> + kfree(ctx->entities[hw_ip][ring]);
> + return r;
> +}
>
> -error_cleanup_memory:
> - for (i = 0; i < num_entities; ++i) {
> - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
> +static int amdgpu_ctx_init(struct amdgpu_device *adev,
> + enum drm_sched_priority priority,
> + struct drm_file *filp,
> + struct amdgpu_ctx *ctx)
> +{
> + int r;
>
> - kfree(entity->fences);
> - entity->fences = NULL;
> - }
> + r = amdgpu_ctx_priority_permit(filp, priority);
> + if (r)
> + return r;
> +
> + memset(ctx, 0, sizeof(*ctx));
> +
> + ctx->adev = adev;
> +
> + kref_init(&ctx->refcount);
> + spin_lock_init(&ctx->ring_lock);
> + mutex_init(&ctx->lock);
> +
> + ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
> + ctx->reset_counter_query = ctx->reset_counter;
> + ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
> + ctx->init_priority = priority;
> + ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
> +
> + return 0;
>
> - kfree(ctx->entities[0]);
> - ctx->entities[0] = NULL;
> - return r;
> }
>
> static void amdgpu_ctx_fini(struct kref *ref)
> {
> struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
> - unsigned num_entities = amdgpu_ctx_total_num_entities();
> struct amdgpu_device *adev = ctx->adev;
> - unsigned i, j;
> + unsigned i, j, k;
>
> if (!adev)
> return;
>
> - for (i = 0; i < num_entities; ++i) {
> - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> + struct amdgpu_ctx_entity *entity;
> +
> + if (!ctx->entities[i] || !ctx->entities[i][j])
> + continue;
> +
> + entity = ctx->entities[i][j];
> + if (!entity->fences)
> + continue;
>
> - for (j = 0; j < amdgpu_sched_jobs; ++j)
> - dma_fence_put(entity->fences[j]);
> + for (k = 0; k < amdgpu_sched_jobs; ++k)
> + dma_fence_put(entity->fences[k]);
>
> - kfree(entity->fences);
> + kfree(entity->fences);
> + entity->fences = NULL;
> +
> + kfree(entity);
> + ctx->entities[i][j] = NULL;
> + }
> +
> + kfree(ctx->entities[i];
> + ctx->entities[i] = NULL;
> }
>
> - kfree(ctx->entities[0]);
> mutex_destroy(&ctx->lock);
> -
> kfree(ctx);
> }
>
> @@ -239,7 +240,11 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
> return -EINVAL;
> }
>
> - *entity = &ctx->entities[hw_ip][ring].entity;
> + if ((ctx->entities[hw_ip] == NULL) || (ctx->entities[hw_ip][ring] == NULL))
> + amdgpu_ctx_init_entity(ctx, hw_ip, ring);
> +
> +
> + *entity = &ctx->entities[hw_ip][ring]->entity;
> return 0;
> }
>
> @@ -279,14 +284,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
> static void amdgpu_ctx_do_release(struct kref *ref)
> {
> struct amdgpu_ctx *ctx;
> - unsigned num_entities;
> - u32 i;
> + u32 i, j;
>
> ctx = container_of(ref, struct amdgpu_ctx, refcount);
> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> + if (!ctx->entities[i] || !ctx->entities[i][j])
> + continue;
>
> - num_entities = amdgpu_ctx_total_num_entities();
> - for (i = 0; i < num_entities; i++)
> - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
> + drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
> + }
> + }
>
> amdgpu_ctx_fini(ref);
> }
> @@ -516,19 +524,23 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
> void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
> enum drm_sched_priority priority)
> {
> - unsigned num_entities = amdgpu_ctx_total_num_entities();
> enum drm_sched_priority ctx_prio;
> - unsigned i;
> + unsigned i, j;
>
> ctx->override_priority = priority;
>
> ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
> ctx->init_priority : ctx->override_priority;
> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> + struct drm_sched_entity *entity;
>
> - for (i = 0; i < num_entities; i++) {
> - struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
> + if (!ctx->entities[i] || !ctx->entities[i][j])
> + continue;
>
> - drm_sched_entity_set_priority(entity, ctx_prio);
> + entity = &ctx->entities[i][j]->entity;
> + drm_sched_entity_set_priority(entity, ctx_prio);
> + }
> }
> }
>
> @@ -564,20 +576,24 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
>
> long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
> {
> - unsigned num_entities = amdgpu_ctx_total_num_entities();
> struct amdgpu_ctx *ctx;
> struct idr *idp;
> - uint32_t id, i;
> + uint32_t id, i, j;
>
> idp = &mgr->ctx_handles;
>
> mutex_lock(&mgr->lock);
> idr_for_each_entry(idp, ctx, id) {
> - for (i = 0; i < num_entities; i++) {
> - struct drm_sched_entity *entity;
> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> + struct drm_sched_entity *entity;
> +
> + if (!ctx->entities[i] || !ctx->entities[i][j])
> + continue;
>
> - entity = &ctx->entities[0][i].entity;
> - timeout = drm_sched_entity_flush(entity, timeout);
> + entity = &ctx->entities[i][j]->entity;
> + timeout = drm_sched_entity_flush(entity, timeout);
> + }
> }
> }
> mutex_unlock(&mgr->lock);
> @@ -586,10 +602,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
>
> void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
> {
> - unsigned num_entities = amdgpu_ctx_total_num_entities();
> struct amdgpu_ctx *ctx;
> struct idr *idp;
> - uint32_t id, i;
> + uint32_t id, i, j;
>
> idp = &mgr->ctx_handles;
>
> @@ -599,8 +614,17 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
> continue;
> }
>
> - for (i = 0; i < num_entities; i++)
> - drm_sched_entity_fini(&ctx->entities[0][i].entity);
> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> + struct drm_sched_entity *entity;
> +
> + if (!ctx->entities[i] || !ctx->entities[i][j])
> + continue;
> +
> + entity = &ctx->entities[i][j]->entity;
> + drm_sched_entity_fini(entity);
> + }
> + }
> }
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> index a6cd9d4b078c..ca9363e71df5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> @@ -42,7 +42,7 @@ struct amdgpu_ctx {
> unsigned reset_counter_query;
> uint32_t vram_lost_counter;
> spinlock_t ring_lock;
> - struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM];
> + struct amdgpu_ctx_entity **entities[AMDGPU_HW_IP_NUM];
> bool preamble_presented;
> enum drm_sched_priority init_priority;
> enum drm_sched_priority override_priority;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH] drm/amdgpu: allocate entities on demand
2020-01-24 11:53 [PATCH] drm/amdgpu: allocate entities on demand Nirmoy Das
2020-01-24 12:09 ` Christian König
@ 2020-01-24 12:41 ` Nirmoy Das
2020-01-24 12:56 ` Christian König
2020-01-24 15:15 ` [PATCH V2] " Nirmoy Das
2020-01-24 16:31 ` [PATCH] " Nirmoy Das
3 siblings, 1 reply; 14+ messages in thread
From: Nirmoy Das @ 2020-01-24 12:41 UTC (permalink / raw)
To: amd-gfx; +Cc: alexander.deucher, kenny.ho, nirmoy.das, christian.koenig
Currently we pre-allocate entities and fences for all the HW IPs on
context creation and some of which are might never be used.
This patch tries to resolve entity/fences wastage by creating entity
only when needed.
Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 232 +++++++++++++-----------
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 3 +-
2 files changed, 124 insertions(+), 111 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 05c2af61e7de..df7a18f12b8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -42,19 +42,12 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
[AMDGPU_HW_IP_VCN_JPEG] = 1,
};
-static int amdgpu_ctx_total_num_entities(void)
-{
- unsigned i, num_entities = 0;
-
- for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
- num_entities += amdgpu_ctx_num_entities[i];
-
- return num_entities;
-}
-
static int amdgpu_ctx_priority_permit(struct drm_file *filp,
enum drm_sched_priority priority)
{
+ if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
+ return -EINVAL;
+
/* NORMAL and below are accessible by everyone */
if (priority <= DRM_SCHED_PRIORITY_NORMAL)
return 0;
@@ -68,64 +61,35 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
return -EACCES;
}
-static int amdgpu_ctx_init(struct amdgpu_device *adev,
- enum drm_sched_priority priority,
- struct drm_file *filp,
- struct amdgpu_ctx *ctx)
+static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const u32 ring)
{
- unsigned num_entities = amdgpu_ctx_total_num_entities();
- unsigned i, j;
+ struct amdgpu_device *adev = ctx->adev;
+ struct amdgpu_ctx_entity *entity;
+ struct drm_gpu_scheduler **scheds;
+ struct drm_gpu_scheduler *sched;
+ unsigned num_scheds = 0;
+ enum drm_sched_priority priority;
int r;
- if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
- return -EINVAL;
-
- r = amdgpu_ctx_priority_permit(filp, priority);
- if (r)
- return r;
- memset(ctx, 0, sizeof(*ctx));
- ctx->adev = adev;
+ ctx->entities[hw_ip][ring] = kcalloc(1, sizeof(struct amdgpu_ctx_entity),
+ GFP_KERNEL);
+ if (!ctx->entities[hw_ip][ring])
+ return -ENOMEM;
+ entity = ctx->entities[hw_ip][ring];
- ctx->entities[0] = kcalloc(num_entities,
- sizeof(struct amdgpu_ctx_entity),
- GFP_KERNEL);
- if (!ctx->entities[0])
- return -ENOMEM;
-
-
- for (i = 0; i < num_entities; ++i) {
- struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
-
- entity->sequence = 1;
- entity->fences = kcalloc(amdgpu_sched_jobs,
- sizeof(struct dma_fence*), GFP_KERNEL);
- if (!entity->fences) {
- r = -ENOMEM;
- goto error_cleanup_memory;
- }
+ entity->sequence = 1;
+ entity->fences = kcalloc(amdgpu_sched_jobs,
+ sizeof(struct dma_fence*), GFP_KERNEL);
+ if (!entity->fences) {
+ r = -ENOMEM;
+ goto error_free_entity;
}
- for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
- ctx->entities[i] = ctx->entities[i - 1] +
- amdgpu_ctx_num_entities[i - 1];
-
- kref_init(&ctx->refcount);
- spin_lock_init(&ctx->ring_lock);
- mutex_init(&ctx->lock);
- ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
- ctx->reset_counter_query = ctx->reset_counter;
- ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
- ctx->init_priority = priority;
- ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
-
- for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
- struct drm_gpu_scheduler **scheds;
- struct drm_gpu_scheduler *sched;
- unsigned num_scheds = 0;
-
- switch (i) {
+ priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
+ ctx->init_priority : ctx->override_priority;
+ switch (hw_ip) {
case AMDGPU_HW_IP_GFX:
sched = &adev->gfx.gfx_ring[0].sched;
scheds = &sched;
@@ -166,57 +130,82 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
scheds = adev->jpeg.jpeg_sched;
num_scheds = adev->jpeg.num_jpeg_sched;
break;
- }
-
- for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
- r = drm_sched_entity_init(&ctx->entities[i][j].entity,
- priority, scheds,
- num_scheds, &ctx->guilty);
- if (r)
- goto error_cleanup_entities;
}
+ r = drm_sched_entity_init(&ctx->entities[hw_ip][ring]->entity,
+ priority, scheds,
+ num_scheds, &ctx->guilty);
+ if (r)
+ goto error_free_entity;
+
return 0;
-error_cleanup_entities:
- for (i = 0; i < num_entities; ++i)
- drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+error_free_entity:
+ kfree(ctx->entities[hw_ip][ring]);
+ return r;
+}
+
+static int amdgpu_ctx_init(struct amdgpu_device *adev,
+ enum drm_sched_priority priority,
+ struct drm_file *filp,
+ struct amdgpu_ctx *ctx)
+{
+ int r;
-error_cleanup_memory:
- for (i = 0; i < num_entities; ++i) {
- struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+ r = amdgpu_ctx_priority_permit(filp, priority);
+ if (r)
+ return r;
- kfree(entity->fences);
- entity->fences = NULL;
- }
+ memset(ctx, 0, sizeof(*ctx));
+
+ ctx->adev = adev;
+
+ kref_init(&ctx->refcount);
+ spin_lock_init(&ctx->ring_lock);
+ mutex_init(&ctx->lock);
+
+ ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
+ ctx->reset_counter_query = ctx->reset_counter;
+ ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
+ ctx->init_priority = priority;
+ ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
+
+ return 0;
- kfree(ctx->entities[0]);
- ctx->entities[0] = NULL;
- return r;
}
static void amdgpu_ctx_fini(struct kref *ref)
{
struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
- unsigned num_entities = amdgpu_ctx_total_num_entities();
struct amdgpu_device *adev = ctx->adev;
- unsigned i, j;
+ unsigned i, j, k;
if (!adev)
return;
- for (i = 0; i < num_entities; ++i) {
- struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct amdgpu_ctx_entity *entity;
+
+ if (!ctx->entities[i] || !ctx->entities[i][j])
+ continue;
+
+ entity = ctx->entities[i][j];
+ if (!entity->fences)
+ continue;
- for (j = 0; j < amdgpu_sched_jobs; ++j)
- dma_fence_put(entity->fences[j]);
+ for (k = 0; k < amdgpu_sched_jobs; ++k)
+ dma_fence_put(entity->fences[k]);
- kfree(entity->fences);
+ kfree(entity->fences);
+ entity->fences = NULL;
+
+ kfree(entity);
+ ctx->entities[i][j] = NULL;
+ }
}
- kfree(ctx->entities[0]);
mutex_destroy(&ctx->lock);
-
kfree(ctx);
}
@@ -239,7 +228,11 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
return -EINVAL;
}
- *entity = &ctx->entities[hw_ip][ring].entity;
+ if (!ctx->entities[hw_ip][ring])
+ amdgpu_ctx_init_entity(ctx, hw_ip, ring);
+
+
+ *entity = &ctx->entities[hw_ip][ring]->entity;
return 0;
}
@@ -279,14 +272,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
static void amdgpu_ctx_do_release(struct kref *ref)
{
struct amdgpu_ctx *ctx;
- unsigned num_entities;
- u32 i;
+ u32 i, j;
ctx = container_of(ref, struct amdgpu_ctx, refcount);
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ if (!ctx->entities[i][j])
+ continue;
- num_entities = amdgpu_ctx_total_num_entities();
- for (i = 0; i < num_entities; i++)
- drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+ drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
+ }
+ }
amdgpu_ctx_fini(ref);
}
@@ -516,19 +512,23 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
enum drm_sched_priority priority)
{
- unsigned num_entities = amdgpu_ctx_total_num_entities();
enum drm_sched_priority ctx_prio;
- unsigned i;
+ unsigned i, j;
ctx->override_priority = priority;
ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
ctx->init_priority : ctx->override_priority;
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct drm_sched_entity *entity;
- for (i = 0; i < num_entities; i++) {
- struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
+ if (!ctx->entities[i][j])
+ continue;
- drm_sched_entity_set_priority(entity, ctx_prio);
+ entity = &ctx->entities[i][j]->entity;
+ drm_sched_entity_set_priority(entity, ctx_prio);
+ }
}
}
@@ -564,20 +564,24 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
{
- unsigned num_entities = amdgpu_ctx_total_num_entities();
struct amdgpu_ctx *ctx;
struct idr *idp;
- uint32_t id, i;
+ uint32_t id, i, j;
idp = &mgr->ctx_handles;
mutex_lock(&mgr->lock);
idr_for_each_entry(idp, ctx, id) {
- for (i = 0; i < num_entities; i++) {
- struct drm_sched_entity *entity;
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct drm_sched_entity *entity;
+
+ if (!ctx->entities[i][j])
+ continue;
- entity = &ctx->entities[0][i].entity;
- timeout = drm_sched_entity_flush(entity, timeout);
+ entity = &ctx->entities[i][j]->entity;
+ timeout = drm_sched_entity_flush(entity, timeout);
+ }
}
}
mutex_unlock(&mgr->lock);
@@ -586,10 +590,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
{
- unsigned num_entities = amdgpu_ctx_total_num_entities();
struct amdgpu_ctx *ctx;
struct idr *idp;
- uint32_t id, i;
+ uint32_t id, i, j;
idp = &mgr->ctx_handles;
@@ -599,8 +602,17 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
continue;
}
- for (i = 0; i < num_entities; i++)
- drm_sched_entity_fini(&ctx->entities[0][i].entity);
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct drm_sched_entity *entity;
+
+ if (!ctx->entities[i][j])
+ continue;
+
+ entity = &ctx->entities[i][j]->entity;
+ drm_sched_entity_fini(entity);
+ }
+ }
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index a6cd9d4b078c..e67e522e1922 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -29,6 +29,7 @@ struct drm_device;
struct drm_file;
struct amdgpu_fpriv;
+#define AMDGPU_MAX_ENTITY_NUM 4
struct amdgpu_ctx_entity {
uint64_t sequence;
struct dma_fence **fences;
@@ -42,7 +43,7 @@ struct amdgpu_ctx {
unsigned reset_counter_query;
uint32_t vram_lost_counter;
spinlock_t ring_lock;
- struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM];
+ struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
bool preamble_presented;
enum drm_sched_priority init_priority;
enum drm_sched_priority override_priority;
--
2.24.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH] drm/amdgpu: allocate entities on demand
2020-01-24 12:09 ` Christian König
@ 2020-01-24 12:43 ` Nirmoy
2020-01-24 12:56 ` Christian König
0 siblings, 1 reply; 14+ messages in thread
From: Nirmoy @ 2020-01-24 12:43 UTC (permalink / raw)
To: Christian König, Nirmoy Das, amd-gfx
Cc: alexander.deucher, kenny.ho, nirmoy.das
[-- Attachment #1.1: Type: text/plain, Size: 595 bytes --]
On 1/24/20 1:09 PM, Christian König wrote:
>> + return -ENOMEM;
>
> That's complete overkill, just statically allocate the array in the
> amdgpu_ctx structure.
>
> The maximum instance should be 4 IIRC, so something like "struct
> amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][4];" so a maximum of 288
> bytes used.
>
> Only alternative I see would be to allocate the array behind the
> structure, see dma_resv_list_alloc() for an example on how to do this.
> But I don't think that this is worth it.
Resent with added
+#define AMDGPU_MAX_ENTITY_NUM 4
Regards,
Nirmoy
>
> Christian.
[-- Attachment #1.2: Type: text/html, Size: 1338 bytes --]
[-- Attachment #2: Type: text/plain, Size: 154 bytes --]
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] drm/amdgpu: allocate entities on demand
2020-01-24 12:41 ` Nirmoy Das
@ 2020-01-24 12:56 ` Christian König
2020-01-24 15:18 ` Nirmoy
0 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2020-01-24 12:56 UTC (permalink / raw)
To: Nirmoy Das, amd-gfx
Cc: alexander.deucher, kenny.ho, nirmoy.das, christian.koenig
Am 24.01.20 um 13:41 schrieb Nirmoy Das:
> Currently we pre-allocate entities and fences for all the HW IPs on
> context creation and some of which are might never be used.
>
> This patch tries to resolve entity/fences wastage by creating entity
> only when needed.
>
> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 232 +++++++++++++-----------
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 3 +-
> 2 files changed, 124 insertions(+), 111 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index 05c2af61e7de..df7a18f12b8e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -42,19 +42,12 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
> [AMDGPU_HW_IP_VCN_JPEG] = 1,
> };
>
> -static int amdgpu_ctx_total_num_entities(void)
> -{
> - unsigned i, num_entities = 0;
> -
> - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
> - num_entities += amdgpu_ctx_num_entities[i];
> -
> - return num_entities;
> -}
> -
> static int amdgpu_ctx_priority_permit(struct drm_file *filp,
> enum drm_sched_priority priority)
> {
> + if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
> + return -EINVAL;
> +
> /* NORMAL and below are accessible by everyone */
> if (priority <= DRM_SCHED_PRIORITY_NORMAL)
> return 0;
> @@ -68,64 +61,35 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
> return -EACCES;
> }
>
> -static int amdgpu_ctx_init(struct amdgpu_device *adev,
> - enum drm_sched_priority priority,
> - struct drm_file *filp,
> - struct amdgpu_ctx *ctx)
> +static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const u32 ring)
> {
> - unsigned num_entities = amdgpu_ctx_total_num_entities();
> - unsigned i, j;
> + struct amdgpu_device *adev = ctx->adev;
> + struct amdgpu_ctx_entity *entity;
> + struct drm_gpu_scheduler **scheds;
> + struct drm_gpu_scheduler *sched;
> + unsigned num_scheds = 0;
> + enum drm_sched_priority priority;
> int r;
>
> - if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
> - return -EINVAL;
> -
> - r = amdgpu_ctx_priority_permit(filp, priority);
> - if (r)
> - return r;
>
> - memset(ctx, 0, sizeof(*ctx));
> - ctx->adev = adev;
> + ctx->entities[hw_ip][ring] = kcalloc(1, sizeof(struct amdgpu_ctx_entity),
> + GFP_KERNEL);
> + if (!ctx->entities[hw_ip][ring])
> + return -ENOMEM;
>
> + entity = ctx->entities[hw_ip][ring];
>
> - ctx->entities[0] = kcalloc(num_entities,
> - sizeof(struct amdgpu_ctx_entity),
> - GFP_KERNEL);
> - if (!ctx->entities[0])
> - return -ENOMEM;
> -
> -
> - for (i = 0; i < num_entities; ++i) {
> - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
> -
> - entity->sequence = 1;
> - entity->fences = kcalloc(amdgpu_sched_jobs,
> - sizeof(struct dma_fence*), GFP_KERNEL);
> - if (!entity->fences) {
> - r = -ENOMEM;
> - goto error_cleanup_memory;
> - }
> + entity->sequence = 1;
> + entity->fences = kcalloc(amdgpu_sched_jobs,
> + sizeof(struct dma_fence*), GFP_KERNEL);
It would be rather nice to have to allocate the fences array together
with the entity.
Take a look at struct dma_resv_list and the function
dma_resv_list_alloc() on how to do this.
Apart from that the patch now looks like a nice cleanup to me, but I
need to fully read it from top till bottom once more.
Regards,
Christian.
> + if (!entity->fences) {
> + r = -ENOMEM;
> + goto error_free_entity;
> }
> - for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
> - ctx->entities[i] = ctx->entities[i - 1] +
> - amdgpu_ctx_num_entities[i - 1];
> -
> - kref_init(&ctx->refcount);
> - spin_lock_init(&ctx->ring_lock);
> - mutex_init(&ctx->lock);
>
> - ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
> - ctx->reset_counter_query = ctx->reset_counter;
> - ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
> - ctx->init_priority = priority;
> - ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
> -
> - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> - struct drm_gpu_scheduler **scheds;
> - struct drm_gpu_scheduler *sched;
> - unsigned num_scheds = 0;
> -
> - switch (i) {
> + priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
> + ctx->init_priority : ctx->override_priority;
> + switch (hw_ip) {
> case AMDGPU_HW_IP_GFX:
> sched = &adev->gfx.gfx_ring[0].sched;
> scheds = &sched;
> @@ -166,57 +130,82 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
> scheds = adev->jpeg.jpeg_sched;
> num_scheds = adev->jpeg.num_jpeg_sched;
> break;
> - }
> -
> - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
> - r = drm_sched_entity_init(&ctx->entities[i][j].entity,
> - priority, scheds,
> - num_scheds, &ctx->guilty);
> - if (r)
> - goto error_cleanup_entities;
> }
>
> + r = drm_sched_entity_init(&ctx->entities[hw_ip][ring]->entity,
> + priority, scheds,
> + num_scheds, &ctx->guilty);
> + if (r)
> + goto error_free_entity;
> +
> return 0;
>
> -error_cleanup_entities:
> - for (i = 0; i < num_entities; ++i)
> - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
> +error_free_entity:
> + kfree(ctx->entities[hw_ip][ring]);
> + return r;
> +}
> +
> +static int amdgpu_ctx_init(struct amdgpu_device *adev,
> + enum drm_sched_priority priority,
> + struct drm_file *filp,
> + struct amdgpu_ctx *ctx)
> +{
> + int r;
>
> -error_cleanup_memory:
> - for (i = 0; i < num_entities; ++i) {
> - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
> + r = amdgpu_ctx_priority_permit(filp, priority);
> + if (r)
> + return r;
>
> - kfree(entity->fences);
> - entity->fences = NULL;
> - }
> + memset(ctx, 0, sizeof(*ctx));
> +
> + ctx->adev = adev;
> +
> + kref_init(&ctx->refcount);
> + spin_lock_init(&ctx->ring_lock);
> + mutex_init(&ctx->lock);
> +
> + ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
> + ctx->reset_counter_query = ctx->reset_counter;
> + ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
> + ctx->init_priority = priority;
> + ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
> +
> + return 0;
>
> - kfree(ctx->entities[0]);
> - ctx->entities[0] = NULL;
> - return r;
> }
>
> static void amdgpu_ctx_fini(struct kref *ref)
> {
> struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
> - unsigned num_entities = amdgpu_ctx_total_num_entities();
> struct amdgpu_device *adev = ctx->adev;
> - unsigned i, j;
> + unsigned i, j, k;
>
> if (!adev)
> return;
>
> - for (i = 0; i < num_entities; ++i) {
> - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> + struct amdgpu_ctx_entity *entity;
> +
> + if (!ctx->entities[i] || !ctx->entities[i][j])
> + continue;
> +
> + entity = ctx->entities[i][j];
> + if (!entity->fences)
> + continue;
>
> - for (j = 0; j < amdgpu_sched_jobs; ++j)
> - dma_fence_put(entity->fences[j]);
> + for (k = 0; k < amdgpu_sched_jobs; ++k)
> + dma_fence_put(entity->fences[k]);
>
> - kfree(entity->fences);
> + kfree(entity->fences);
> + entity->fences = NULL;
> +
> + kfree(entity);
> + ctx->entities[i][j] = NULL;
> + }
> }
>
> - kfree(ctx->entities[0]);
> mutex_destroy(&ctx->lock);
> -
> kfree(ctx);
> }
>
> @@ -239,7 +228,11 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
> return -EINVAL;
> }
>
> - *entity = &ctx->entities[hw_ip][ring].entity;
> + if (!ctx->entities[hw_ip][ring])
> + amdgpu_ctx_init_entity(ctx, hw_ip, ring);
> +
> +
> + *entity = &ctx->entities[hw_ip][ring]->entity;
> return 0;
> }
>
> @@ -279,14 +272,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
> static void amdgpu_ctx_do_release(struct kref *ref)
> {
> struct amdgpu_ctx *ctx;
> - unsigned num_entities;
> - u32 i;
> + u32 i, j;
>
> ctx = container_of(ref, struct amdgpu_ctx, refcount);
> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> + if (!ctx->entities[i][j])
> + continue;
>
> - num_entities = amdgpu_ctx_total_num_entities();
> - for (i = 0; i < num_entities; i++)
> - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
> + drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
> + }
> + }
>
> amdgpu_ctx_fini(ref);
> }
> @@ -516,19 +512,23 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
> void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
> enum drm_sched_priority priority)
> {
> - unsigned num_entities = amdgpu_ctx_total_num_entities();
> enum drm_sched_priority ctx_prio;
> - unsigned i;
> + unsigned i, j;
>
> ctx->override_priority = priority;
>
> ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
> ctx->init_priority : ctx->override_priority;
> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> + struct drm_sched_entity *entity;
>
> - for (i = 0; i < num_entities; i++) {
> - struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
> + if (!ctx->entities[i][j])
> + continue;
>
> - drm_sched_entity_set_priority(entity, ctx_prio);
> + entity = &ctx->entities[i][j]->entity;
> + drm_sched_entity_set_priority(entity, ctx_prio);
> + }
> }
> }
>
> @@ -564,20 +564,24 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
>
> long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
> {
> - unsigned num_entities = amdgpu_ctx_total_num_entities();
> struct amdgpu_ctx *ctx;
> struct idr *idp;
> - uint32_t id, i;
> + uint32_t id, i, j;
>
> idp = &mgr->ctx_handles;
>
> mutex_lock(&mgr->lock);
> idr_for_each_entry(idp, ctx, id) {
> - for (i = 0; i < num_entities; i++) {
> - struct drm_sched_entity *entity;
> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> + struct drm_sched_entity *entity;
> +
> + if (!ctx->entities[i][j])
> + continue;
>
> - entity = &ctx->entities[0][i].entity;
> - timeout = drm_sched_entity_flush(entity, timeout);
> + entity = &ctx->entities[i][j]->entity;
> + timeout = drm_sched_entity_flush(entity, timeout);
> + }
> }
> }
> mutex_unlock(&mgr->lock);
> @@ -586,10 +590,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
>
> void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
> {
> - unsigned num_entities = amdgpu_ctx_total_num_entities();
> struct amdgpu_ctx *ctx;
> struct idr *idp;
> - uint32_t id, i;
> + uint32_t id, i, j;
>
> idp = &mgr->ctx_handles;
>
> @@ -599,8 +602,17 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
> continue;
> }
>
> - for (i = 0; i < num_entities; i++)
> - drm_sched_entity_fini(&ctx->entities[0][i].entity);
> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> + struct drm_sched_entity *entity;
> +
> + if (!ctx->entities[i][j])
> + continue;
> +
> + entity = &ctx->entities[i][j]->entity;
> + drm_sched_entity_fini(entity);
> + }
> + }
> }
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> index a6cd9d4b078c..e67e522e1922 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> @@ -29,6 +29,7 @@ struct drm_device;
> struct drm_file;
> struct amdgpu_fpriv;
>
> +#define AMDGPU_MAX_ENTITY_NUM 4
> struct amdgpu_ctx_entity {
> uint64_t sequence;
> struct dma_fence **fences;
> @@ -42,7 +43,7 @@ struct amdgpu_ctx {
> unsigned reset_counter_query;
> uint32_t vram_lost_counter;
> spinlock_t ring_lock;
> - struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM];
> + struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
> bool preamble_presented;
> enum drm_sched_priority init_priority;
> enum drm_sched_priority override_priority;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] drm/amdgpu: allocate entities on demand
2020-01-24 12:43 ` Nirmoy
@ 2020-01-24 12:56 ` Christian König
0 siblings, 0 replies; 14+ messages in thread
From: Christian König @ 2020-01-24 12:56 UTC (permalink / raw)
To: Nirmoy, Nirmoy Das, amd-gfx; +Cc: alexander.deucher, kenny.ho, nirmoy.das
[-- Attachment #1.1: Type: text/plain, Size: 727 bytes --]
Am 24.01.20 um 13:43 schrieb Nirmoy:
>
>
> On 1/24/20 1:09 PM, Christian König wrote:
>>> + return -ENOMEM;
>>
>> That's complete overkill, just statically allocate the array in the
>> amdgpu_ctx structure.
>>
>> The maximum instance should be 4 IIRC, so something like "struct
>> amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][4];" so a maximum of
>> 288 bytes used.
>>
>> Only alternative I see would be to allocate the array behind the
>> structure, see dma_resv_list_alloc() for an example on how to do
>> this. But I don't think that this is worth it.
> Resent with added
> +#define AMDGPU_MAX_ENTITY_NUM 4
Yes, of course that is certainly a good idea as well.
Christian.
>
> Regards,
> Nirmoy
>>
>> Christian.
[-- Attachment #1.2: Type: text/html, Size: 1848 bytes --]
[-- Attachment #2: Type: text/plain, Size: 154 bytes --]
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH V2] drm/amdgpu: allocate entities on demand
2020-01-24 11:53 [PATCH] drm/amdgpu: allocate entities on demand Nirmoy Das
2020-01-24 12:09 ` Christian König
2020-01-24 12:41 ` Nirmoy Das
@ 2020-01-24 15:15 ` Nirmoy Das
2020-01-24 15:31 ` Christian König
2020-01-24 16:31 ` [PATCH] " Nirmoy Das
3 siblings, 1 reply; 14+ messages in thread
From: Nirmoy Das @ 2020-01-24 15:15 UTC (permalink / raw)
To: amd-gfx; +Cc: alexander.deucher, kenny.ho, nirmoy.das, christian.koenig
Currently we pre-allocate entities and fences for all the HW IPs on
context creation and some of which are might never be used.
This patch tries to resolve entity/fences wastage by creating entity
only when needed.
v2: allocate memory for entity and fences together
Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 231 ++++++++++++------------
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 6 +-
2 files changed, 122 insertions(+), 115 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 05c2af61e7de..d246ae9fe0eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -42,19 +42,12 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
[AMDGPU_HW_IP_VCN_JPEG] = 1,
};
-static int amdgpu_ctx_total_num_entities(void)
-{
- unsigned i, num_entities = 0;
-
- for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
- num_entities += amdgpu_ctx_num_entities[i];
-
- return num_entities;
-}
-
static int amdgpu_ctx_priority_permit(struct drm_file *filp,
enum drm_sched_priority priority)
{
+ if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
+ return -EINVAL;
+
/* NORMAL and below are accessible by everyone */
if (priority <= DRM_SCHED_PRIORITY_NORMAL)
return 0;
@@ -68,64 +61,26 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
return -EACCES;
}
-static int amdgpu_ctx_init(struct amdgpu_device *adev,
- enum drm_sched_priority priority,
- struct drm_file *filp,
- struct amdgpu_ctx *ctx)
+static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const u32 ring)
{
- unsigned num_entities = amdgpu_ctx_total_num_entities();
- unsigned i, j;
+ struct amdgpu_device *adev = ctx->adev;
+ struct amdgpu_ctx_entity *entity;
+ struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
+ unsigned num_scheds = 0;
+ enum drm_sched_priority priority;
int r;
- if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
- return -EINVAL;
+ entity = kcalloc(1, offsetof(typeof(*entity), fences[amdgpu_sched_jobs]),
+ GFP_KERNEL);
+ if (!entity)
+ return -ENOMEM;
- r = amdgpu_ctx_priority_permit(filp, priority);
- if (r)
- return r;
-
- memset(ctx, 0, sizeof(*ctx));
- ctx->adev = adev;
-
-
- ctx->entities[0] = kcalloc(num_entities,
- sizeof(struct amdgpu_ctx_entity),
- GFP_KERNEL);
- if (!ctx->entities[0])
- return -ENOMEM;
+ entity->sequence = 1;
-
- for (i = 0; i < num_entities; ++i) {
- struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
-
- entity->sequence = 1;
- entity->fences = kcalloc(amdgpu_sched_jobs,
- sizeof(struct dma_fence*), GFP_KERNEL);
- if (!entity->fences) {
- r = -ENOMEM;
- goto error_cleanup_memory;
- }
- }
- for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
- ctx->entities[i] = ctx->entities[i - 1] +
- amdgpu_ctx_num_entities[i - 1];
-
- kref_init(&ctx->refcount);
- spin_lock_init(&ctx->ring_lock);
- mutex_init(&ctx->lock);
-
- ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
- ctx->reset_counter_query = ctx->reset_counter;
- ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
- ctx->init_priority = priority;
- ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
-
- for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
- struct drm_gpu_scheduler **scheds;
- struct drm_gpu_scheduler *sched;
- unsigned num_scheds = 0;
-
- switch (i) {
+ ctx->entities[hw_ip][ring] = entity;
+ priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
+ ctx->init_priority : ctx->override_priority;
+ switch (hw_ip) {
case AMDGPU_HW_IP_GFX:
sched = &adev->gfx.gfx_ring[0].sched;
scheds = &sched;
@@ -166,63 +121,87 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
scheds = adev->jpeg.jpeg_sched;
num_scheds = adev->jpeg.num_jpeg_sched;
break;
- }
-
- for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
- r = drm_sched_entity_init(&ctx->entities[i][j].entity,
- priority, scheds,
- num_scheds, &ctx->guilty);
- if (r)
- goto error_cleanup_entities;
}
+ r = drm_sched_entity_init(&ctx->entities[hw_ip][ring]->entity,
+ priority, scheds,
+ num_scheds, &ctx->guilty);
+ if (r)
+ goto error_free_entity;
+
return 0;
-error_cleanup_entities:
- for (i = 0; i < num_entities; ++i)
- drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+error_free_entity:
+ kfree(ctx->entities[hw_ip][ring]);
+ return r;
+}
-error_cleanup_memory:
- for (i = 0; i < num_entities; ++i) {
- struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+static int amdgpu_ctx_init(struct amdgpu_device *adev,
+ enum drm_sched_priority priority,
+ struct drm_file *filp,
+ struct amdgpu_ctx *ctx)
+{
+ int r;
- kfree(entity->fences);
- entity->fences = NULL;
- }
+ r = amdgpu_ctx_priority_permit(filp, priority);
+ if (r)
+ return r;
+
+ memset(ctx, 0, sizeof(*ctx));
+
+ ctx->adev = adev;
+
+ kref_init(&ctx->refcount);
+ spin_lock_init(&ctx->ring_lock);
+ mutex_init(&ctx->lock);
+
+ ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
+ ctx->reset_counter_query = ctx->reset_counter;
+ ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
+ ctx->init_priority = priority;
+ ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
+
+ return 0;
- kfree(ctx->entities[0]);
- ctx->entities[0] = NULL;
- return r;
}
static void amdgpu_ctx_fini(struct kref *ref)
{
struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
- unsigned num_entities = amdgpu_ctx_total_num_entities();
struct amdgpu_device *adev = ctx->adev;
- unsigned i, j;
+ unsigned i, j, k;
if (!adev)
return;
- for (i = 0; i < num_entities; ++i) {
- struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct amdgpu_ctx_entity *entity;
+
+ if (!ctx->entities[i] || !ctx->entities[i][j])
+ continue;
- for (j = 0; j < amdgpu_sched_jobs; ++j)
- dma_fence_put(entity->fences[j]);
+ entity = ctx->entities[i][j];
+ if (!entity->fences)
+ continue;
- kfree(entity->fences);
+ for (k = 0; k < amdgpu_sched_jobs; ++k)
+ dma_fence_put(entity->fences[k]);
+
+ kfree(entity);
+ ctx->entities[i][j] = NULL;
+ }
}
- kfree(ctx->entities[0]);
mutex_destroy(&ctx->lock);
-
kfree(ctx);
}
int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
u32 ring, struct drm_sched_entity **entity)
{
+ int r;
+
if (hw_ip >= AMDGPU_HW_IP_NUM) {
DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
return -EINVAL;
@@ -239,7 +218,14 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
return -EINVAL;
}
- *entity = &ctx->entities[hw_ip][ring].entity;
+ if (ctx->entities[hw_ip][ring] == NULL) {
+ r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
+ if (r)
+ return r;
+ }
+
+
+ *entity = &ctx->entities[hw_ip][ring]->entity;
return 0;
}
@@ -279,14 +265,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
static void amdgpu_ctx_do_release(struct kref *ref)
{
struct amdgpu_ctx *ctx;
- unsigned num_entities;
- u32 i;
+ u32 i, j;
ctx = container_of(ref, struct amdgpu_ctx, refcount);
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ if (!ctx->entities[i][j])
+ continue;
- num_entities = amdgpu_ctx_total_num_entities();
- for (i = 0; i < num_entities; i++)
- drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+ drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
+ }
+ }
amdgpu_ctx_fini(ref);
}
@@ -516,19 +505,23 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
enum drm_sched_priority priority)
{
- unsigned num_entities = amdgpu_ctx_total_num_entities();
enum drm_sched_priority ctx_prio;
- unsigned i;
+ unsigned i, j;
ctx->override_priority = priority;
ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
ctx->init_priority : ctx->override_priority;
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct drm_sched_entity *entity;
- for (i = 0; i < num_entities; i++) {
- struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
+ if (!ctx->entities[i][j])
+ continue;
- drm_sched_entity_set_priority(entity, ctx_prio);
+ entity = &ctx->entities[i][j]->entity;
+ drm_sched_entity_set_priority(entity, ctx_prio);
+ }
}
}
@@ -564,20 +557,24 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
{
- unsigned num_entities = amdgpu_ctx_total_num_entities();
struct amdgpu_ctx *ctx;
struct idr *idp;
- uint32_t id, i;
+ uint32_t id, i, j;
idp = &mgr->ctx_handles;
mutex_lock(&mgr->lock);
idr_for_each_entry(idp, ctx, id) {
- for (i = 0; i < num_entities; i++) {
- struct drm_sched_entity *entity;
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct drm_sched_entity *entity;
+
+ if (!ctx->entities[i][j])
+ continue;
- entity = &ctx->entities[0][i].entity;
- timeout = drm_sched_entity_flush(entity, timeout);
+ entity = &ctx->entities[i][j]->entity;
+ timeout = drm_sched_entity_flush(entity, timeout);
+ }
}
}
mutex_unlock(&mgr->lock);
@@ -586,10 +583,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
{
- unsigned num_entities = amdgpu_ctx_total_num_entities();
struct amdgpu_ctx *ctx;
struct idr *idp;
- uint32_t id, i;
+ uint32_t id, i, j;
idp = &mgr->ctx_handles;
@@ -599,8 +595,17 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
continue;
}
- for (i = 0; i < num_entities; i++)
- drm_sched_entity_fini(&ctx->entities[0][i].entity);
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct drm_sched_entity *entity;
+
+ if (!ctx->entities[i][j])
+ continue;
+
+ entity = &ctx->entities[i][j]->entity;
+ drm_sched_entity_fini(entity);
+ }
+ }
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index a6cd9d4b078c..de490f183af2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -29,10 +29,12 @@ struct drm_device;
struct drm_file;
struct amdgpu_fpriv;
+#define AMDGPU_MAX_ENTITY_NUM 4
+
struct amdgpu_ctx_entity {
uint64_t sequence;
- struct dma_fence **fences;
struct drm_sched_entity entity;
+ struct dma_fence *fences[];
};
struct amdgpu_ctx {
@@ -42,7 +44,7 @@ struct amdgpu_ctx {
unsigned reset_counter_query;
uint32_t vram_lost_counter;
spinlock_t ring_lock;
- struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM];
+ struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
bool preamble_presented;
enum drm_sched_priority init_priority;
enum drm_sched_priority override_priority;
--
2.24.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH] drm/amdgpu: allocate entities on demand
2020-01-24 12:56 ` Christian König
@ 2020-01-24 15:18 ` Nirmoy
0 siblings, 0 replies; 14+ messages in thread
From: Nirmoy @ 2020-01-24 15:18 UTC (permalink / raw)
To: christian.koenig, Nirmoy Das, amd-gfx
Cc: alexander.deucher, kenny.ho, nirmoy.das
On 1/24/20 1:56 PM, Christian König wrote:
>
>> + entity->fences = kcalloc(amdgpu_sched_jobs,
>> + sizeof(struct dma_fence*), GFP_KERNEL);
>
> It would be rather nice to have to allocate the fences array together
> with the entity.
>
> Take a look at struct dma_resv_list and the function
> dma_resv_list_alloc() on how to do this.
>
That is very interesting allocation method.
Updated the patch.
Thanks,
Nirmoy
> Apart from that the patch now looks like a nice cleanup to me, but I
> need to fully read it from top till bottom once more.
>
> Regards,
> Christian.
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH V2] drm/amdgpu: allocate entities on demand
2020-01-24 15:15 ` [PATCH V2] " Nirmoy Das
@ 2020-01-24 15:31 ` Christian König
0 siblings, 0 replies; 14+ messages in thread
From: Christian König @ 2020-01-24 15:31 UTC (permalink / raw)
To: Nirmoy Das, amd-gfx; +Cc: alexander.deucher, kenny.ho, nirmoy.das
Am 24.01.20 um 16:15 schrieb Nirmoy Das:
> Currently we pre-allocate entities and fences for all the HW IPs on
> context creation and some of which are might never be used.
>
> This patch tries to resolve entity/fences wastage by creating entity
> only when needed.
>
> v2: allocate memory for entity and fences together
>
> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 231 ++++++++++++------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 6 +-
> 2 files changed, 122 insertions(+), 115 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index 05c2af61e7de..d246ae9fe0eb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -42,19 +42,12 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
> [AMDGPU_HW_IP_VCN_JPEG] = 1,
> };
>
> -static int amdgpu_ctx_total_num_entities(void)
> -{
> - unsigned i, num_entities = 0;
> -
> - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
> - num_entities += amdgpu_ctx_num_entities[i];
> -
> - return num_entities;
> -}
> -
> static int amdgpu_ctx_priority_permit(struct drm_file *filp,
> enum drm_sched_priority priority)
> {
> + if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
> + return -EINVAL;
> +
> /* NORMAL and below are accessible by everyone */
> if (priority <= DRM_SCHED_PRIORITY_NORMAL)
> return 0;
> @@ -68,64 +61,26 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
> return -EACCES;
> }
>
> -static int amdgpu_ctx_init(struct amdgpu_device *adev,
> - enum drm_sched_priority priority,
> - struct drm_file *filp,
> - struct amdgpu_ctx *ctx)
> +static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const u32 ring)
> {
> - unsigned num_entities = amdgpu_ctx_total_num_entities();
> - unsigned i, j;
> + struct amdgpu_device *adev = ctx->adev;
> + struct amdgpu_ctx_entity *entity;
> + struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
> + unsigned num_scheds = 0;
> + enum drm_sched_priority priority;
> int r;
>
> - if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
> - return -EINVAL;
> + entity = kcalloc(1, offsetof(typeof(*entity), fences[amdgpu_sched_jobs]),
> + GFP_KERNEL);
> + if (!entity)
> + return -ENOMEM;
>
> - r = amdgpu_ctx_priority_permit(filp, priority);
> - if (r)
> - return r;
> -
> - memset(ctx, 0, sizeof(*ctx));
> - ctx->adev = adev;
> -
> -
> - ctx->entities[0] = kcalloc(num_entities,
> - sizeof(struct amdgpu_ctx_entity),
> - GFP_KERNEL);
> - if (!ctx->entities[0])
> - return -ENOMEM;
> + entity->sequence = 1;
>
> -
> - for (i = 0; i < num_entities; ++i) {
> - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
> -
> - entity->sequence = 1;
> - entity->fences = kcalloc(amdgpu_sched_jobs,
> - sizeof(struct dma_fence*), GFP_KERNEL);
> - if (!entity->fences) {
> - r = -ENOMEM;
> - goto error_cleanup_memory;
> - }
> - }
> - for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
> - ctx->entities[i] = ctx->entities[i - 1] +
> - amdgpu_ctx_num_entities[i - 1];
> -
> - kref_init(&ctx->refcount);
> - spin_lock_init(&ctx->ring_lock);
> - mutex_init(&ctx->lock);
> -
> - ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
> - ctx->reset_counter_query = ctx->reset_counter;
> - ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
> - ctx->init_priority = priority;
> - ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
> -
> - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> - struct drm_gpu_scheduler **scheds;
> - struct drm_gpu_scheduler *sched;
> - unsigned num_scheds = 0;
> -
> - switch (i) {
> + ctx->entities[hw_ip][ring] = entity;
> + priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
> + ctx->init_priority : ctx->override_priority;
> + switch (hw_ip) {
> case AMDGPU_HW_IP_GFX:
> sched = &adev->gfx.gfx_ring[0].sched;
> scheds = &sched;
> @@ -166,63 +121,87 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
> scheds = adev->jpeg.jpeg_sched;
> num_scheds = adev->jpeg.num_jpeg_sched;
> break;
> - }
> -
> - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
> - r = drm_sched_entity_init(&ctx->entities[i][j].entity,
> - priority, scheds,
> - num_scheds, &ctx->guilty);
> - if (r)
> - goto error_cleanup_entities;
> }
>
> + r = drm_sched_entity_init(&ctx->entities[hw_ip][ring]->entity,
> + priority, scheds,
> + num_scheds, &ctx->guilty);
> + if (r)
> + goto error_free_entity;
> +
> return 0;
>
> -error_cleanup_entities:
> - for (i = 0; i < num_entities; ++i)
> - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
> +error_free_entity:
> + kfree(ctx->entities[hw_ip][ring]);
You also need to reset ctx->entities[hw_ip][ring] to NULL here.
Even better would be to use kfree(entity) and only assign
ctx->entities[hw_ip][ring] to entity when everything worked as expected.
> + return r;
> +}
>
> -error_cleanup_memory:
> - for (i = 0; i < num_entities; ++i) {
> - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
> +static int amdgpu_ctx_init(struct amdgpu_device *adev,
> + enum drm_sched_priority priority,
> + struct drm_file *filp,
> + struct amdgpu_ctx *ctx)
> +{
> + int r;
>
> - kfree(entity->fences);
> - entity->fences = NULL;
> - }
> + r = amdgpu_ctx_priority_permit(filp, priority);
> + if (r)
> + return r;
> +
> + memset(ctx, 0, sizeof(*ctx));
> +
> + ctx->adev = adev;
> +
> + kref_init(&ctx->refcount);
> + spin_lock_init(&ctx->ring_lock);
> + mutex_init(&ctx->lock);
> +
> + ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
> + ctx->reset_counter_query = ctx->reset_counter;
> + ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
> + ctx->init_priority = priority;
> + ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
> +
> + return 0;
>
> - kfree(ctx->entities[0]);
> - ctx->entities[0] = NULL;
> - return r;
> }
>
> static void amdgpu_ctx_fini(struct kref *ref)
> {
> struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
> - unsigned num_entities = amdgpu_ctx_total_num_entities();
> struct amdgpu_device *adev = ctx->adev;
> - unsigned i, j;
> + unsigned i, j, k;
>
> if (!adev)
> return;
>
> - for (i = 0; i < num_entities; ++i) {
> - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
Maybe just use AMDGPU_MAX_ENTITY_NUM here.
> + struct amdgpu_ctx_entity *entity;
> +
> + if (!ctx->entities[i] || !ctx->entities[i][j])
> + continue;
>
> - for (j = 0; j < amdgpu_sched_jobs; ++j)
> - dma_fence_put(entity->fences[j]);
> + entity = ctx->entities[i][j];
> + if (!entity->fences)
> + continue;
Well that won't work, since entity->fences is now embedded into the
structure. I think you can just drop the "if".
>
> - kfree(entity->fences);
> + for (k = 0; k < amdgpu_sched_jobs; ++k)
> + dma_fence_put(entity->fences[k]);
> +
> + kfree(entity);
Maybe move that into a amdgpu_ctx_fini_entity function.
Regards,
Christian.
> + ctx->entities[i][j] = NULL;
> + }
> }
>
> - kfree(ctx->entities[0]);
> mutex_destroy(&ctx->lock);
> -
> kfree(ctx);
> }
>
> int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
> u32 ring, struct drm_sched_entity **entity)
> {
> + int r;
> +
> if (hw_ip >= AMDGPU_HW_IP_NUM) {
> DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
> return -EINVAL;
> @@ -239,7 +218,14 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
> return -EINVAL;
> }
>
> - *entity = &ctx->entities[hw_ip][ring].entity;
> + if (ctx->entities[hw_ip][ring] == NULL) {
> + r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
> + if (r)
> + return r;
> + }
> +
> +
> + *entity = &ctx->entities[hw_ip][ring]->entity;
> return 0;
> }
>
> @@ -279,14 +265,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
> static void amdgpu_ctx_do_release(struct kref *ref)
> {
> struct amdgpu_ctx *ctx;
> - unsigned num_entities;
> - u32 i;
> + u32 i, j;
>
> ctx = container_of(ref, struct amdgpu_ctx, refcount);
> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> + if (!ctx->entities[i][j])
> + continue;
>
> - num_entities = amdgpu_ctx_total_num_entities();
> - for (i = 0; i < num_entities; i++)
> - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
> + drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
> + }
> + }
>
> amdgpu_ctx_fini(ref);
> }
> @@ -516,19 +505,23 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
> void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
> enum drm_sched_priority priority)
> {
> - unsigned num_entities = amdgpu_ctx_total_num_entities();
> enum drm_sched_priority ctx_prio;
> - unsigned i;
> + unsigned i, j;
>
> ctx->override_priority = priority;
>
> ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
> ctx->init_priority : ctx->override_priority;
> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> + struct drm_sched_entity *entity;
>
> - for (i = 0; i < num_entities; i++) {
> - struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
> + if (!ctx->entities[i][j])
> + continue;
>
> - drm_sched_entity_set_priority(entity, ctx_prio);
> + entity = &ctx->entities[i][j]->entity;
> + drm_sched_entity_set_priority(entity, ctx_prio);
> + }
> }
> }
>
> @@ -564,20 +557,24 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
>
> long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
> {
> - unsigned num_entities = amdgpu_ctx_total_num_entities();
> struct amdgpu_ctx *ctx;
> struct idr *idp;
> - uint32_t id, i;
> + uint32_t id, i, j;
>
> idp = &mgr->ctx_handles;
>
> mutex_lock(&mgr->lock);
> idr_for_each_entry(idp, ctx, id) {
> - for (i = 0; i < num_entities; i++) {
> - struct drm_sched_entity *entity;
> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> + struct drm_sched_entity *entity;
> +
> + if (!ctx->entities[i][j])
> + continue;
>
> - entity = &ctx->entities[0][i].entity;
> - timeout = drm_sched_entity_flush(entity, timeout);
> + entity = &ctx->entities[i][j]->entity;
> + timeout = drm_sched_entity_flush(entity, timeout);
> + }
> }
> }
> mutex_unlock(&mgr->lock);
> @@ -586,10 +583,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
>
> void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
> {
> - unsigned num_entities = amdgpu_ctx_total_num_entities();
> struct amdgpu_ctx *ctx;
> struct idr *idp;
> - uint32_t id, i;
> + uint32_t id, i, j;
>
> idp = &mgr->ctx_handles;
>
> @@ -599,8 +595,17 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
> continue;
> }
>
> - for (i = 0; i < num_entities; i++)
> - drm_sched_entity_fini(&ctx->entities[0][i].entity);
> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> + struct drm_sched_entity *entity;
> +
> + if (!ctx->entities[i][j])
> + continue;
> +
> + entity = &ctx->entities[i][j]->entity;
> + drm_sched_entity_fini(entity);
> + }
> + }
> }
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> index a6cd9d4b078c..de490f183af2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> @@ -29,10 +29,12 @@ struct drm_device;
> struct drm_file;
> struct amdgpu_fpriv;
>
> +#define AMDGPU_MAX_ENTITY_NUM 4
> +
> struct amdgpu_ctx_entity {
> uint64_t sequence;
> - struct dma_fence **fences;
> struct drm_sched_entity entity;
> + struct dma_fence *fences[];
> };
>
> struct amdgpu_ctx {
> @@ -42,7 +44,7 @@ struct amdgpu_ctx {
> unsigned reset_counter_query;
> uint32_t vram_lost_counter;
> spinlock_t ring_lock;
> - struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM];
> + struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
> bool preamble_presented;
> enum drm_sched_priority init_priority;
> enum drm_sched_priority override_priority;
> --
> 2.24.1
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH] drm/amdgpu: allocate entities on demand
2020-01-24 11:53 [PATCH] drm/amdgpu: allocate entities on demand Nirmoy Das
` (2 preceding siblings ...)
2020-01-24 15:15 ` [PATCH V2] " Nirmoy Das
@ 2020-01-24 16:31 ` Nirmoy Das
2020-01-28 10:13 ` Nirmoy
3 siblings, 1 reply; 14+ messages in thread
From: Nirmoy Das @ 2020-01-24 16:31 UTC (permalink / raw)
To: amd-gfx; +Cc: alexander.deucher, kenny.ho, nirmoy.das, christian.koenig
Currently we pre-allocate entities and fences for all the HW IPs on
context creation and some of which are might never be used.
This patch tries to resolve entity/fences wastage by creating entity
only when needed.
v2: allocate memory for entity and fences together
Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 235 ++++++++++++------------
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 6 +-
2 files changed, 124 insertions(+), 117 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 05c2af61e7de..94a6c42f29ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -42,19 +42,12 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
[AMDGPU_HW_IP_VCN_JPEG] = 1,
};
-static int amdgpu_ctx_total_num_entities(void)
-{
- unsigned i, num_entities = 0;
-
- for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
- num_entities += amdgpu_ctx_num_entities[i];
-
- return num_entities;
-}
-
static int amdgpu_ctx_priority_permit(struct drm_file *filp,
enum drm_sched_priority priority)
{
+ if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
+ return -EINVAL;
+
/* NORMAL and below are accessible by everyone */
if (priority <= DRM_SCHED_PRIORITY_NORMAL)
return 0;
@@ -68,64 +61,24 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
return -EACCES;
}
-static int amdgpu_ctx_init(struct amdgpu_device *adev,
- enum drm_sched_priority priority,
- struct drm_file *filp,
- struct amdgpu_ctx *ctx)
+static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const u32 ring)
{
- unsigned num_entities = amdgpu_ctx_total_num_entities();
- unsigned i, j;
+ struct amdgpu_device *adev = ctx->adev;
+ struct amdgpu_ctx_entity *entity;
+ struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
+ unsigned num_scheds = 0;
+ enum drm_sched_priority priority;
int r;
- if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
- return -EINVAL;
-
- r = amdgpu_ctx_priority_permit(filp, priority);
- if (r)
- return r;
-
- memset(ctx, 0, sizeof(*ctx));
- ctx->adev = adev;
-
-
- ctx->entities[0] = kcalloc(num_entities,
- sizeof(struct amdgpu_ctx_entity),
- GFP_KERNEL);
- if (!ctx->entities[0])
- return -ENOMEM;
-
+ entity = kcalloc(1, offsetof(typeof(*entity), fences[amdgpu_sched_jobs]),
+ GFP_KERNEL);
+ if (!entity)
+ return -ENOMEM;
- for (i = 0; i < num_entities; ++i) {
- struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
-
- entity->sequence = 1;
- entity->fences = kcalloc(amdgpu_sched_jobs,
- sizeof(struct dma_fence*), GFP_KERNEL);
- if (!entity->fences) {
- r = -ENOMEM;
- goto error_cleanup_memory;
- }
- }
- for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
- ctx->entities[i] = ctx->entities[i - 1] +
- amdgpu_ctx_num_entities[i - 1];
-
- kref_init(&ctx->refcount);
- spin_lock_init(&ctx->ring_lock);
- mutex_init(&ctx->lock);
-
- ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
- ctx->reset_counter_query = ctx->reset_counter;
- ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
- ctx->init_priority = priority;
- ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
-
- for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
- struct drm_gpu_scheduler **scheds;
- struct drm_gpu_scheduler *sched;
- unsigned num_scheds = 0;
-
- switch (i) {
+ entity->sequence = 1;
+ priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
+ ctx->init_priority : ctx->override_priority;
+ switch (hw_ip) {
case AMDGPU_HW_IP_GFX:
sched = &adev->gfx.gfx_ring[0].sched;
scheds = &sched;
@@ -166,63 +119,90 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
scheds = adev->jpeg.jpeg_sched;
num_scheds = adev->jpeg.num_jpeg_sched;
break;
- }
-
- for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
- r = drm_sched_entity_init(&ctx->entities[i][j].entity,
- priority, scheds,
- num_scheds, &ctx->guilty);
- if (r)
- goto error_cleanup_entities;
}
+ r = drm_sched_entity_init(&entity->entity, priority, scheds, num_scheds,
+ &ctx->guilty);
+ if (r)
+ goto error_free_entity;
+
+ ctx->entities[hw_ip][ring] = entity;
return 0;
-error_cleanup_entities:
- for (i = 0; i < num_entities; ++i)
- drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+error_free_entity:
+ kfree(entity);
-error_cleanup_memory:
- for (i = 0; i < num_entities; ++i) {
- struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+ return r;
+}
- kfree(entity->fences);
- entity->fences = NULL;
- }
+static int amdgpu_ctx_init(struct amdgpu_device *adev,
+ enum drm_sched_priority priority,
+ struct drm_file *filp,
+ struct amdgpu_ctx *ctx)
+{
+ int r;
- kfree(ctx->entities[0]);
- ctx->entities[0] = NULL;
- return r;
+ r = amdgpu_ctx_priority_permit(filp, priority);
+ if (r)
+ return r;
+
+ memset(ctx, 0, sizeof(*ctx));
+
+ ctx->adev = adev;
+
+ kref_init(&ctx->refcount);
+ spin_lock_init(&ctx->ring_lock);
+ mutex_init(&ctx->lock);
+
+ ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
+ ctx->reset_counter_query = ctx->reset_counter;
+ ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
+ ctx->init_priority = priority;
+ ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
+
+ return 0;
+
+}
+
+static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
+{
+
+ int i;
+
+ if (!entity)
+ return;
+
+ for (i = 0; i < amdgpu_sched_jobs; ++i)
+ dma_fence_put(entity->fences[i]);
+
+ kfree(entity);
}
static void amdgpu_ctx_fini(struct kref *ref)
{
struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
- unsigned num_entities = amdgpu_ctx_total_num_entities();
struct amdgpu_device *adev = ctx->adev;
unsigned i, j;
if (!adev)
return;
- for (i = 0; i < num_entities; ++i) {
- struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
-
- for (j = 0; j < amdgpu_sched_jobs; ++j)
- dma_fence_put(entity->fences[j]);
-
- kfree(entity->fences);
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
+ amdgpu_ctx_fini_entity(ctx->entities[i][j]);
+ ctx->entities[i][j] = NULL;
+ }
}
- kfree(ctx->entities[0]);
mutex_destroy(&ctx->lock);
-
kfree(ctx);
}
int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
u32 ring, struct drm_sched_entity **entity)
{
+ int r;
+
if (hw_ip >= AMDGPU_HW_IP_NUM) {
DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
return -EINVAL;
@@ -239,7 +219,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
return -EINVAL;
}
- *entity = &ctx->entities[hw_ip][ring].entity;
+ if (ctx->entities[hw_ip][ring] == NULL) {
+ r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
+ if (r)
+ return r;
+ }
+
+ *entity = &ctx->entities[hw_ip][ring]->entity;
return 0;
}
@@ -279,14 +265,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
static void amdgpu_ctx_do_release(struct kref *ref)
{
struct amdgpu_ctx *ctx;
- unsigned num_entities;
- u32 i;
+ u32 i, j;
ctx = container_of(ref, struct amdgpu_ctx, refcount);
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ if (!ctx->entities[i][j])
+ continue;
- num_entities = amdgpu_ctx_total_num_entities();
- for (i = 0; i < num_entities; i++)
- drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+ drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
+ }
+ }
amdgpu_ctx_fini(ref);
}
@@ -516,19 +505,23 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
enum drm_sched_priority priority)
{
- unsigned num_entities = amdgpu_ctx_total_num_entities();
enum drm_sched_priority ctx_prio;
- unsigned i;
+ unsigned i, j;
ctx->override_priority = priority;
ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
ctx->init_priority : ctx->override_priority;
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct drm_sched_entity *entity;
- for (i = 0; i < num_entities; i++) {
- struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
+ if (!ctx->entities[i][j])
+ continue;
- drm_sched_entity_set_priority(entity, ctx_prio);
+ entity = &ctx->entities[i][j]->entity;
+ drm_sched_entity_set_priority(entity, ctx_prio);
+ }
}
}
@@ -564,20 +557,24 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
{
- unsigned num_entities = amdgpu_ctx_total_num_entities();
struct amdgpu_ctx *ctx;
struct idr *idp;
- uint32_t id, i;
+ uint32_t id, i, j;
idp = &mgr->ctx_handles;
mutex_lock(&mgr->lock);
idr_for_each_entry(idp, ctx, id) {
- for (i = 0; i < num_entities; i++) {
- struct drm_sched_entity *entity;
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct drm_sched_entity *entity;
+
+ if (!ctx->entities[i][j])
+ continue;
- entity = &ctx->entities[0][i].entity;
- timeout = drm_sched_entity_flush(entity, timeout);
+ entity = &ctx->entities[i][j]->entity;
+ timeout = drm_sched_entity_flush(entity, timeout);
+ }
}
}
mutex_unlock(&mgr->lock);
@@ -586,10 +583,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
{
- unsigned num_entities = amdgpu_ctx_total_num_entities();
struct amdgpu_ctx *ctx;
struct idr *idp;
- uint32_t id, i;
+ uint32_t id, i, j;
idp = &mgr->ctx_handles;
@@ -599,8 +595,17 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
continue;
}
- for (i = 0; i < num_entities; i++)
- drm_sched_entity_fini(&ctx->entities[0][i].entity);
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct drm_sched_entity *entity;
+
+ if (!ctx->entities[i][j])
+ continue;
+
+ entity = &ctx->entities[i][j]->entity;
+ drm_sched_entity_fini(entity);
+ }
+ }
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index a6cd9d4b078c..de490f183af2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -29,10 +29,12 @@ struct drm_device;
struct drm_file;
struct amdgpu_fpriv;
+#define AMDGPU_MAX_ENTITY_NUM 4
+
struct amdgpu_ctx_entity {
uint64_t sequence;
- struct dma_fence **fences;
struct drm_sched_entity entity;
+ struct dma_fence *fences[];
};
struct amdgpu_ctx {
@@ -42,7 +44,7 @@ struct amdgpu_ctx {
unsigned reset_counter_query;
uint32_t vram_lost_counter;
spinlock_t ring_lock;
- struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM];
+ struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
bool preamble_presented;
enum drm_sched_priority init_priority;
enum drm_sched_priority override_priority;
--
2.24.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH] drm/amdgpu: allocate entities on demand
2020-01-24 16:31 ` [PATCH] " Nirmoy Das
@ 2020-01-28 10:13 ` Nirmoy
2020-01-28 12:18 ` Christian König
0 siblings, 1 reply; 14+ messages in thread
From: Nirmoy @ 2020-01-28 10:13 UTC (permalink / raw)
To: Nirmoy Das, amd-gfx
Cc: alexander.deucher, kenny.ho, nirmoy.das, christian.koenig
Gentle reminder !
On 1/24/20 5:31 PM, Nirmoy Das wrote:
> Currently we pre-allocate entities and fences for all the HW IPs on
> context creation and some of which are might never be used.
>
> This patch tries to resolve entity/fences wastage by creating entity
> only when needed.
>
> v2: allocate memory for entity and fences together
>
> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 235 ++++++++++++------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 6 +-
> 2 files changed, 124 insertions(+), 117 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index 05c2af61e7de..94a6c42f29ea 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -42,19 +42,12 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
> [AMDGPU_HW_IP_VCN_JPEG] = 1,
> };
>
> -static int amdgpu_ctx_total_num_entities(void)
> -{
> - unsigned i, num_entities = 0;
> -
> - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
> - num_entities += amdgpu_ctx_num_entities[i];
> -
> - return num_entities;
> -}
> -
> static int amdgpu_ctx_priority_permit(struct drm_file *filp,
> enum drm_sched_priority priority)
> {
> + if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
> + return -EINVAL;
> +
> /* NORMAL and below are accessible by everyone */
> if (priority <= DRM_SCHED_PRIORITY_NORMAL)
> return 0;
> @@ -68,64 +61,24 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
> return -EACCES;
> }
>
> -static int amdgpu_ctx_init(struct amdgpu_device *adev,
> - enum drm_sched_priority priority,
> - struct drm_file *filp,
> - struct amdgpu_ctx *ctx)
> +static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const u32 ring)
> {
> - unsigned num_entities = amdgpu_ctx_total_num_entities();
> - unsigned i, j;
> + struct amdgpu_device *adev = ctx->adev;
> + struct amdgpu_ctx_entity *entity;
> + struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
> + unsigned num_scheds = 0;
> + enum drm_sched_priority priority;
> int r;
>
> - if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
> - return -EINVAL;
> -
> - r = amdgpu_ctx_priority_permit(filp, priority);
> - if (r)
> - return r;
> -
> - memset(ctx, 0, sizeof(*ctx));
> - ctx->adev = adev;
> -
> -
> - ctx->entities[0] = kcalloc(num_entities,
> - sizeof(struct amdgpu_ctx_entity),
> - GFP_KERNEL);
> - if (!ctx->entities[0])
> - return -ENOMEM;
> -
> + entity = kcalloc(1, offsetof(typeof(*entity), fences[amdgpu_sched_jobs]),
> + GFP_KERNEL);
> + if (!entity)
> + return -ENOMEM;
>
> - for (i = 0; i < num_entities; ++i) {
> - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
> -
> - entity->sequence = 1;
> - entity->fences = kcalloc(amdgpu_sched_jobs,
> - sizeof(struct dma_fence*), GFP_KERNEL);
> - if (!entity->fences) {
> - r = -ENOMEM;
> - goto error_cleanup_memory;
> - }
> - }
> - for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
> - ctx->entities[i] = ctx->entities[i - 1] +
> - amdgpu_ctx_num_entities[i - 1];
> -
> - kref_init(&ctx->refcount);
> - spin_lock_init(&ctx->ring_lock);
> - mutex_init(&ctx->lock);
> -
> - ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
> - ctx->reset_counter_query = ctx->reset_counter;
> - ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
> - ctx->init_priority = priority;
> - ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
> -
> - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> - struct drm_gpu_scheduler **scheds;
> - struct drm_gpu_scheduler *sched;
> - unsigned num_scheds = 0;
> -
> - switch (i) {
> + entity->sequence = 1;
> + priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
> + ctx->init_priority : ctx->override_priority;
> + switch (hw_ip) {
> case AMDGPU_HW_IP_GFX:
> sched = &adev->gfx.gfx_ring[0].sched;
> scheds = &sched;
> @@ -166,63 +119,90 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
> scheds = adev->jpeg.jpeg_sched;
> num_scheds = adev->jpeg.num_jpeg_sched;
> break;
> - }
> -
> - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
> - r = drm_sched_entity_init(&ctx->entities[i][j].entity,
> - priority, scheds,
> - num_scheds, &ctx->guilty);
> - if (r)
> - goto error_cleanup_entities;
> }
>
> + r = drm_sched_entity_init(&entity->entity, priority, scheds, num_scheds,
> + &ctx->guilty);
> + if (r)
> + goto error_free_entity;
> +
> + ctx->entities[hw_ip][ring] = entity;
> return 0;
>
> -error_cleanup_entities:
> - for (i = 0; i < num_entities; ++i)
> - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
> +error_free_entity:
> + kfree(entity);
>
> -error_cleanup_memory:
> - for (i = 0; i < num_entities; ++i) {
> - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
> + return r;
> +}
>
> - kfree(entity->fences);
> - entity->fences = NULL;
> - }
> +static int amdgpu_ctx_init(struct amdgpu_device *adev,
> + enum drm_sched_priority priority,
> + struct drm_file *filp,
> + struct amdgpu_ctx *ctx)
> +{
> + int r;
>
> - kfree(ctx->entities[0]);
> - ctx->entities[0] = NULL;
> - return r;
> + r = amdgpu_ctx_priority_permit(filp, priority);
> + if (r)
> + return r;
> +
> + memset(ctx, 0, sizeof(*ctx));
> +
> + ctx->adev = adev;
> +
> + kref_init(&ctx->refcount);
> + spin_lock_init(&ctx->ring_lock);
> + mutex_init(&ctx->lock);
> +
> + ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
> + ctx->reset_counter_query = ctx->reset_counter;
> + ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
> + ctx->init_priority = priority;
> + ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
> +
> + return 0;
> +
> +}
> +
> +static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
> +{
> +
> + int i;
> +
> + if (!entity)
> + return;
> +
> + for (i = 0; i < amdgpu_sched_jobs; ++i)
> + dma_fence_put(entity->fences[i]);
> +
> + kfree(entity);
> }
>
> static void amdgpu_ctx_fini(struct kref *ref)
> {
> struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
> - unsigned num_entities = amdgpu_ctx_total_num_entities();
> struct amdgpu_device *adev = ctx->adev;
> unsigned i, j;
>
> if (!adev)
> return;
>
> - for (i = 0; i < num_entities; ++i) {
> - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
> -
> - for (j = 0; j < amdgpu_sched_jobs; ++j)
> - dma_fence_put(entity->fences[j]);
> -
> - kfree(entity->fences);
> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> + for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
> + amdgpu_ctx_fini_entity(ctx->entities[i][j]);
> + ctx->entities[i][j] = NULL;
> + }
> }
>
> - kfree(ctx->entities[0]);
> mutex_destroy(&ctx->lock);
> -
> kfree(ctx);
> }
>
> int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
> u32 ring, struct drm_sched_entity **entity)
> {
> + int r;
> +
> if (hw_ip >= AMDGPU_HW_IP_NUM) {
> DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
> return -EINVAL;
> @@ -239,7 +219,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
> return -EINVAL;
> }
>
> - *entity = &ctx->entities[hw_ip][ring].entity;
> + if (ctx->entities[hw_ip][ring] == NULL) {
> + r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
> + if (r)
> + return r;
> + }
> +
> + *entity = &ctx->entities[hw_ip][ring]->entity;
> return 0;
> }
>
> @@ -279,14 +265,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
> static void amdgpu_ctx_do_release(struct kref *ref)
> {
> struct amdgpu_ctx *ctx;
> - unsigned num_entities;
> - u32 i;
> + u32 i, j;
>
> ctx = container_of(ref, struct amdgpu_ctx, refcount);
> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> + if (!ctx->entities[i][j])
> + continue;
>
> - num_entities = amdgpu_ctx_total_num_entities();
> - for (i = 0; i < num_entities; i++)
> - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
> + drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
> + }
> + }
>
> amdgpu_ctx_fini(ref);
> }
> @@ -516,19 +505,23 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
> void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
> enum drm_sched_priority priority)
> {
> - unsigned num_entities = amdgpu_ctx_total_num_entities();
> enum drm_sched_priority ctx_prio;
> - unsigned i;
> + unsigned i, j;
>
> ctx->override_priority = priority;
>
> ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
> ctx->init_priority : ctx->override_priority;
> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> + struct drm_sched_entity *entity;
>
> - for (i = 0; i < num_entities; i++) {
> - struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
> + if (!ctx->entities[i][j])
> + continue;
>
> - drm_sched_entity_set_priority(entity, ctx_prio);
> + entity = &ctx->entities[i][j]->entity;
> + drm_sched_entity_set_priority(entity, ctx_prio);
> + }
> }
> }
>
> @@ -564,20 +557,24 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
>
> long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
> {
> - unsigned num_entities = amdgpu_ctx_total_num_entities();
> struct amdgpu_ctx *ctx;
> struct idr *idp;
> - uint32_t id, i;
> + uint32_t id, i, j;
>
> idp = &mgr->ctx_handles;
>
> mutex_lock(&mgr->lock);
> idr_for_each_entry(idp, ctx, id) {
> - for (i = 0; i < num_entities; i++) {
> - struct drm_sched_entity *entity;
> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> + struct drm_sched_entity *entity;
> +
> + if (!ctx->entities[i][j])
> + continue;
>
> - entity = &ctx->entities[0][i].entity;
> - timeout = drm_sched_entity_flush(entity, timeout);
> + entity = &ctx->entities[i][j]->entity;
> + timeout = drm_sched_entity_flush(entity, timeout);
> + }
> }
> }
> mutex_unlock(&mgr->lock);
> @@ -586,10 +583,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
>
> void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
> {
> - unsigned num_entities = amdgpu_ctx_total_num_entities();
> struct amdgpu_ctx *ctx;
> struct idr *idp;
> - uint32_t id, i;
> + uint32_t id, i, j;
>
> idp = &mgr->ctx_handles;
>
> @@ -599,8 +595,17 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
> continue;
> }
>
> - for (i = 0; i < num_entities; i++)
> - drm_sched_entity_fini(&ctx->entities[0][i].entity);
> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> + struct drm_sched_entity *entity;
> +
> + if (!ctx->entities[i][j])
> + continue;
> +
> + entity = &ctx->entities[i][j]->entity;
> + drm_sched_entity_fini(entity);
> + }
> + }
> }
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> index a6cd9d4b078c..de490f183af2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> @@ -29,10 +29,12 @@ struct drm_device;
> struct drm_file;
> struct amdgpu_fpriv;
>
> +#define AMDGPU_MAX_ENTITY_NUM 4
> +
> struct amdgpu_ctx_entity {
> uint64_t sequence;
> - struct dma_fence **fences;
> struct drm_sched_entity entity;
> + struct dma_fence *fences[];
> };
>
> struct amdgpu_ctx {
> @@ -42,7 +44,7 @@ struct amdgpu_ctx {
> unsigned reset_counter_query;
> uint32_t vram_lost_counter;
> spinlock_t ring_lock;
> - struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM];
> + struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
> bool preamble_presented;
> enum drm_sched_priority init_priority;
> enum drm_sched_priority override_priority;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] drm/amdgpu: allocate entities on demand
2020-01-28 10:13 ` Nirmoy
@ 2020-01-28 12:18 ` Christian König
0 siblings, 0 replies; 14+ messages in thread
From: Christian König @ 2020-01-28 12:18 UTC (permalink / raw)
To: Nirmoy, Nirmoy Das, amd-gfx; +Cc: alexander.deucher, kenny.ho, nirmoy.das
Patch is Reviewed-by: Christian König <christian.koenig@amd.com>
Regards,
Christian.
Am 28.01.20 um 11:13 schrieb Nirmoy:
> Gentle reminder !
>
> On 1/24/20 5:31 PM, Nirmoy Das wrote:
>> Currently we pre-allocate entities and fences for all the HW IPs on
>> context creation and some of which are might never be used.
>>
>> This patch tries to resolve entity/fences wastage by creating entity
>> only when needed.
>>
>> v2: allocate memory for entity and fences together
>>
>> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 235 ++++++++++++------------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 6 +-
>> 2 files changed, 124 insertions(+), 117 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> index 05c2af61e7de..94a6c42f29ea 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> @@ -42,19 +42,12 @@ const unsigned int
>> amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
>> [AMDGPU_HW_IP_VCN_JPEG] = 1,
>> };
>> -static int amdgpu_ctx_total_num_entities(void)
>> -{
>> - unsigned i, num_entities = 0;
>> -
>> - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
>> - num_entities += amdgpu_ctx_num_entities[i];
>> -
>> - return num_entities;
>> -}
>> -
>> static int amdgpu_ctx_priority_permit(struct drm_file *filp,
>> enum drm_sched_priority priority)
>> {
>> + if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
>> + return -EINVAL;
>> +
>> /* NORMAL and below are accessible by everyone */
>> if (priority <= DRM_SCHED_PRIORITY_NORMAL)
>> return 0;
>> @@ -68,64 +61,24 @@ static int amdgpu_ctx_priority_permit(struct
>> drm_file *filp,
>> return -EACCES;
>> }
>> -static int amdgpu_ctx_init(struct amdgpu_device *adev,
>> - enum drm_sched_priority priority,
>> - struct drm_file *filp,
>> - struct amdgpu_ctx *ctx)
>> +static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32
>> hw_ip, const u32 ring)
>> {
>> - unsigned num_entities = amdgpu_ctx_total_num_entities();
>> - unsigned i, j;
>> + struct amdgpu_device *adev = ctx->adev;
>> + struct amdgpu_ctx_entity *entity;
>> + struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
>> + unsigned num_scheds = 0;
>> + enum drm_sched_priority priority;
>> int r;
>> - if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
>> - return -EINVAL;
>> -
>> - r = amdgpu_ctx_priority_permit(filp, priority);
>> - if (r)
>> - return r;
>> -
>> - memset(ctx, 0, sizeof(*ctx));
>> - ctx->adev = adev;
>> -
>> -
>> - ctx->entities[0] = kcalloc(num_entities,
>> - sizeof(struct amdgpu_ctx_entity),
>> - GFP_KERNEL);
>> - if (!ctx->entities[0])
>> - return -ENOMEM;
>> -
>> + entity = kcalloc(1, offsetof(typeof(*entity),
>> fences[amdgpu_sched_jobs]),
>> + GFP_KERNEL);
>> + if (!entity)
>> + return -ENOMEM;
>> - for (i = 0; i < num_entities; ++i) {
>> - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
>> -
>> - entity->sequence = 1;
>> - entity->fences = kcalloc(amdgpu_sched_jobs,
>> - sizeof(struct dma_fence*), GFP_KERNEL);
>> - if (!entity->fences) {
>> - r = -ENOMEM;
>> - goto error_cleanup_memory;
>> - }
>> - }
>> - for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
>> - ctx->entities[i] = ctx->entities[i - 1] +
>> - amdgpu_ctx_num_entities[i - 1];
>> -
>> - kref_init(&ctx->refcount);
>> - spin_lock_init(&ctx->ring_lock);
>> - mutex_init(&ctx->lock);
>> -
>> - ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
>> - ctx->reset_counter_query = ctx->reset_counter;
>> - ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
>> - ctx->init_priority = priority;
>> - ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
>> -
>> - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> - struct drm_gpu_scheduler **scheds;
>> - struct drm_gpu_scheduler *sched;
>> - unsigned num_scheds = 0;
>> -
>> - switch (i) {
>> + entity->sequence = 1;
>> + priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
>> + ctx->init_priority : ctx->override_priority;
>> + switch (hw_ip) {
>> case AMDGPU_HW_IP_GFX:
>> sched = &adev->gfx.gfx_ring[0].sched;
>> scheds = &sched;
>> @@ -166,63 +119,90 @@ static int amdgpu_ctx_init(struct amdgpu_device
>> *adev,
>> scheds = adev->jpeg.jpeg_sched;
>> num_scheds = adev->jpeg.num_jpeg_sched;
>> break;
>> - }
>> -
>> - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
>> - r = drm_sched_entity_init(&ctx->entities[i][j].entity,
>> - priority, scheds,
>> - num_scheds, &ctx->guilty);
>> - if (r)
>> - goto error_cleanup_entities;
>> }
>> + r = drm_sched_entity_init(&entity->entity, priority, scheds,
>> num_scheds,
>> + &ctx->guilty);
>> + if (r)
>> + goto error_free_entity;
>> +
>> + ctx->entities[hw_ip][ring] = entity;
>> return 0;
>> -error_cleanup_entities:
>> - for (i = 0; i < num_entities; ++i)
>> - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
>> +error_free_entity:
>> + kfree(entity);
>> -error_cleanup_memory:
>> - for (i = 0; i < num_entities; ++i) {
>> - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
>> + return r;
>> +}
>> - kfree(entity->fences);
>> - entity->fences = NULL;
>> - }
>> +static int amdgpu_ctx_init(struct amdgpu_device *adev,
>> + enum drm_sched_priority priority,
>> + struct drm_file *filp,
>> + struct amdgpu_ctx *ctx)
>> +{
>> + int r;
>> - kfree(ctx->entities[0]);
>> - ctx->entities[0] = NULL;
>> - return r;
>> + r = amdgpu_ctx_priority_permit(filp, priority);
>> + if (r)
>> + return r;
>> +
>> + memset(ctx, 0, sizeof(*ctx));
>> +
>> + ctx->adev = adev;
>> +
>> + kref_init(&ctx->refcount);
>> + spin_lock_init(&ctx->ring_lock);
>> + mutex_init(&ctx->lock);
>> +
>> + ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
>> + ctx->reset_counter_query = ctx->reset_counter;
>> + ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
>> + ctx->init_priority = priority;
>> + ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
>> +
>> + return 0;
>> +
>> +}
>> +
>> +static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
>> +{
>> +
>> + int i;
>> +
>> + if (!entity)
>> + return;
>> +
>> + for (i = 0; i < amdgpu_sched_jobs; ++i)
>> + dma_fence_put(entity->fences[i]);
>> +
>> + kfree(entity);
>> }
>> static void amdgpu_ctx_fini(struct kref *ref)
>> {
>> struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx,
>> refcount);
>> - unsigned num_entities = amdgpu_ctx_total_num_entities();
>> struct amdgpu_device *adev = ctx->adev;
>> unsigned i, j;
>> if (!adev)
>> return;
>> - for (i = 0; i < num_entities; ++i) {
>> - struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
>> -
>> - for (j = 0; j < amdgpu_sched_jobs; ++j)
>> - dma_fence_put(entity->fences[j]);
>> -
>> - kfree(entity->fences);
>> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> + for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
>> + amdgpu_ctx_fini_entity(ctx->entities[i][j]);
>> + ctx->entities[i][j] = NULL;
>> + }
>> }
>> - kfree(ctx->entities[0]);
>> mutex_destroy(&ctx->lock);
>> -
>> kfree(ctx);
>> }
>> int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32
>> instance,
>> u32 ring, struct drm_sched_entity **entity)
>> {
>> + int r;
>> +
>> if (hw_ip >= AMDGPU_HW_IP_NUM) {
>> DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
>> return -EINVAL;
>> @@ -239,7 +219,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx
>> *ctx, u32 hw_ip, u32 instance,
>> return -EINVAL;
>> }
>> - *entity = &ctx->entities[hw_ip][ring].entity;
>> + if (ctx->entities[hw_ip][ring] == NULL) {
>> + r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
>> + if (r)
>> + return r;
>> + }
>> +
>> + *entity = &ctx->entities[hw_ip][ring]->entity;
>> return 0;
>> }
>> @@ -279,14 +265,17 @@ static int amdgpu_ctx_alloc(struct
>> amdgpu_device *adev,
>> static void amdgpu_ctx_do_release(struct kref *ref)
>> {
>> struct amdgpu_ctx *ctx;
>> - unsigned num_entities;
>> - u32 i;
>> + u32 i, j;
>> ctx = container_of(ref, struct amdgpu_ctx, refcount);
>> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>> + if (!ctx->entities[i][j])
>> + continue;
>> - num_entities = amdgpu_ctx_total_num_entities();
>> - for (i = 0; i < num_entities; i++)
>> - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
>> + drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
>> + }
>> + }
>> amdgpu_ctx_fini(ref);
>> }
>> @@ -516,19 +505,23 @@ struct dma_fence *amdgpu_ctx_get_fence(struct
>> amdgpu_ctx *ctx,
>> void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
>> enum drm_sched_priority priority)
>> {
>> - unsigned num_entities = amdgpu_ctx_total_num_entities();
>> enum drm_sched_priority ctx_prio;
>> - unsigned i;
>> + unsigned i, j;
>> ctx->override_priority = priority;
>> ctx_prio = (ctx->override_priority ==
>> DRM_SCHED_PRIORITY_UNSET) ?
>> ctx->init_priority : ctx->override_priority;
>> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>> + struct drm_sched_entity *entity;
>> - for (i = 0; i < num_entities; i++) {
>> - struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
>> + if (!ctx->entities[i][j])
>> + continue;
>> - drm_sched_entity_set_priority(entity, ctx_prio);
>> + entity = &ctx->entities[i][j]->entity;
>> + drm_sched_entity_set_priority(entity, ctx_prio);
>> + }
>> }
>> }
>> @@ -564,20 +557,24 @@ void amdgpu_ctx_mgr_init(struct
>> amdgpu_ctx_mgr *mgr)
>> long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long
>> timeout)
>> {
>> - unsigned num_entities = amdgpu_ctx_total_num_entities();
>> struct amdgpu_ctx *ctx;
>> struct idr *idp;
>> - uint32_t id, i;
>> + uint32_t id, i, j;
>> idp = &mgr->ctx_handles;
>> mutex_lock(&mgr->lock);
>> idr_for_each_entry(idp, ctx, id) {
>> - for (i = 0; i < num_entities; i++) {
>> - struct drm_sched_entity *entity;
>> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>> + struct drm_sched_entity *entity;
>> +
>> + if (!ctx->entities[i][j])
>> + continue;
>> - entity = &ctx->entities[0][i].entity;
>> - timeout = drm_sched_entity_flush(entity, timeout);
>> + entity = &ctx->entities[i][j]->entity;
>> + timeout = drm_sched_entity_flush(entity, timeout);
>> + }
>> }
>> }
>> mutex_unlock(&mgr->lock);
>> @@ -586,10 +583,9 @@ long amdgpu_ctx_mgr_entity_flush(struct
>> amdgpu_ctx_mgr *mgr, long timeout)
>> void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
>> {
>> - unsigned num_entities = amdgpu_ctx_total_num_entities();
>> struct amdgpu_ctx *ctx;
>> struct idr *idp;
>> - uint32_t id, i;
>> + uint32_t id, i, j;
>> idp = &mgr->ctx_handles;
>> @@ -599,8 +595,17 @@ void amdgpu_ctx_mgr_entity_fini(struct
>> amdgpu_ctx_mgr *mgr)
>> continue;
>> }
>> - for (i = 0; i < num_entities; i++)
>> - drm_sched_entity_fini(&ctx->entities[0][i].entity);
>> + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>> + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>> + struct drm_sched_entity *entity;
>> +
>> + if (!ctx->entities[i][j])
>> + continue;
>> +
>> + entity = &ctx->entities[i][j]->entity;
>> + drm_sched_entity_fini(entity);
>> + }
>> + }
>> }
>> }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>> index a6cd9d4b078c..de490f183af2 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
>> @@ -29,10 +29,12 @@ struct drm_device;
>> struct drm_file;
>> struct amdgpu_fpriv;
>> +#define AMDGPU_MAX_ENTITY_NUM 4
>> +
>> struct amdgpu_ctx_entity {
>> uint64_t sequence;
>> - struct dma_fence **fences;
>> struct drm_sched_entity entity;
>> + struct dma_fence *fences[];
>> };
>> struct amdgpu_ctx {
>> @@ -42,7 +44,7 @@ struct amdgpu_ctx {
>> unsigned reset_counter_query;
>> uint32_t vram_lost_counter;
>> spinlock_t ring_lock;
>> - struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM];
>> + struct amdgpu_ctx_entity
>> *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
>> bool preamble_presented;
>> enum drm_sched_priority init_priority;
>> enum drm_sched_priority override_priority;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] drm/amdgpu: allocate entities on demand
2020-01-23 16:13 Nirmoy Das
@ 2020-01-24 11:53 ` Nirmoy
0 siblings, 0 replies; 14+ messages in thread
From: Nirmoy @ 2020-01-24 11:53 UTC (permalink / raw)
To: Nirmoy Das, amd-gfx
Cc: alexander.deucher, kenny.ho, nirmoy.das, christian.koenig
On 1/23/20 5:13 PM, Nirmoy Das wrote:
> + kfree(ctx->entities[i];
Forgot to add change that fix ^
Regards,
Nirmoy
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH] drm/amdgpu: allocate entities on demand
@ 2020-01-23 16:13 Nirmoy Das
2020-01-24 11:53 ` Nirmoy
0 siblings, 1 reply; 14+ messages in thread
From: Nirmoy Das @ 2020-01-23 16:13 UTC (permalink / raw)
To: amd-gfx; +Cc: alexander.deucher, kenny.ho, nirmoy.das, christian.koenig
Currently we pre-allocate entities and fences for all the HW IPs on
context creation and some of which are might never be used.
This patch tries to resolve entity/fences wastage by creating entity
only when needed.
Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 244 +++++++++++++-----------
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 2 +-
2 files changed, 135 insertions(+), 111 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 05c2af61e7de..73f7615df8c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -42,19 +42,12 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
[AMDGPU_HW_IP_VCN_JPEG] = 1,
};
-static int amdgpu_ctx_total_num_entities(void)
-{
- unsigned i, num_entities = 0;
-
- for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
- num_entities += amdgpu_ctx_num_entities[i];
-
- return num_entities;
-}
-
static int amdgpu_ctx_priority_permit(struct drm_file *filp,
enum drm_sched_priority priority)
{
+ if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
+ return -EINVAL;
+
/* NORMAL and below are accessible by everyone */
if (priority <= DRM_SCHED_PRIORITY_NORMAL)
return 0;
@@ -68,64 +61,44 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
return -EACCES;
}
-static int amdgpu_ctx_init(struct amdgpu_device *adev,
- enum drm_sched_priority priority,
- struct drm_file *filp,
- struct amdgpu_ctx *ctx)
+static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const u32 ring)
{
- unsigned num_entities = amdgpu_ctx_total_num_entities();
- unsigned i, j;
+ struct amdgpu_device *adev = ctx->adev;
+ struct amdgpu_ctx_entity *entity;
+ struct drm_gpu_scheduler **scheds;
+ struct drm_gpu_scheduler *sched;
+ unsigned num_scheds = 0;
+ enum drm_sched_priority priority;
int r;
- if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
- return -EINVAL;
-
- r = amdgpu_ctx_priority_permit(filp, priority);
- if (r)
- return r;
-
- memset(ctx, 0, sizeof(*ctx));
- ctx->adev = adev;
-
-
- ctx->entities[0] = kcalloc(num_entities,
- sizeof(struct amdgpu_ctx_entity),
- GFP_KERNEL);
- if (!ctx->entities[0])
- return -ENOMEM;
-
-
- for (i = 0; i < num_entities; ++i) {
- struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
-
- entity->sequence = 1;
- entity->fences = kcalloc(amdgpu_sched_jobs,
- sizeof(struct dma_fence*), GFP_KERNEL);
- if (!entity->fences) {
- r = -ENOMEM;
- goto error_cleanup_memory;
- }
+ if (!ctx->entities[hw_ip]) {
+ ctx->entities[hw_ip] = kcalloc(amdgpu_ctx_num_entities[hw_ip],
+ sizeof(struct amdgpu_ctx_entity *),
+ GFP_KERNEL);
+ if (!ctx->entities[hw_ip])
+ return -ENOMEM;
}
- for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
- ctx->entities[i] = ctx->entities[i - 1] +
- amdgpu_ctx_num_entities[i - 1];
- kref_init(&ctx->refcount);
- spin_lock_init(&ctx->ring_lock);
- mutex_init(&ctx->lock);
+ ctx->entities[hw_ip][ring] = kcalloc(1, sizeof(struct amdgpu_ctx_entity),
+ GFP_KERNEL);
+ if (!ctx->entities[hw_ip][ring]) {
+ r = -ENOMEM;
+ goto error_free_entity;
+ }
- ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
- ctx->reset_counter_query = ctx->reset_counter;
- ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
- ctx->init_priority = priority;
- ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
+ entity = ctx->entities[hw_ip][ring];
- for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
- struct drm_gpu_scheduler **scheds;
- struct drm_gpu_scheduler *sched;
- unsigned num_scheds = 0;
+ entity->sequence = 1;
+ entity->fences = kcalloc(amdgpu_sched_jobs,
+ sizeof(struct dma_fence*), GFP_KERNEL);
+ if (!entity->fences) {
+ r = -ENOMEM;
+ goto error_free_entity;
+ }
- switch (i) {
+ priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
+ ctx->init_priority : ctx->override_priority;
+ switch (hw_ip) {
case AMDGPU_HW_IP_GFX:
sched = &adev->gfx.gfx_ring[0].sched;
scheds = &sched;
@@ -166,57 +139,85 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
scheds = adev->jpeg.jpeg_sched;
num_scheds = adev->jpeg.num_jpeg_sched;
break;
- }
-
- for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
- r = drm_sched_entity_init(&ctx->entities[i][j].entity,
- priority, scheds,
- num_scheds, &ctx->guilty);
- if (r)
- goto error_cleanup_entities;
}
+ r = drm_sched_entity_init(&ctx->entities[hw_ip][ring]->entity,
+ priority, scheds,
+ num_scheds, &ctx->guilty);
+ if (r)
+ goto error_free_entity;
+
return 0;
-error_cleanup_entities:
- for (i = 0; i < num_entities; ++i)
- drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+error_free_entity:
+ kfree(ctx->entities[hw_ip][ring]);
+ return r;
+}
-error_cleanup_memory:
- for (i = 0; i < num_entities; ++i) {
- struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+static int amdgpu_ctx_init(struct amdgpu_device *adev,
+ enum drm_sched_priority priority,
+ struct drm_file *filp,
+ struct amdgpu_ctx *ctx)
+{
+ int r;
- kfree(entity->fences);
- entity->fences = NULL;
- }
+ r = amdgpu_ctx_priority_permit(filp, priority);
+ if (r)
+ return r;
+
+ memset(ctx, 0, sizeof(*ctx));
+
+ ctx->adev = adev;
+
+ kref_init(&ctx->refcount);
+ spin_lock_init(&ctx->ring_lock);
+ mutex_init(&ctx->lock);
+
+ ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
+ ctx->reset_counter_query = ctx->reset_counter;
+ ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
+ ctx->init_priority = priority;
+ ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
+
+ return 0;
- kfree(ctx->entities[0]);
- ctx->entities[0] = NULL;
- return r;
}
static void amdgpu_ctx_fini(struct kref *ref)
{
struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
- unsigned num_entities = amdgpu_ctx_total_num_entities();
struct amdgpu_device *adev = ctx->adev;
- unsigned i, j;
+ unsigned i, j, k;
if (!adev)
return;
- for (i = 0; i < num_entities; ++i) {
- struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct amdgpu_ctx_entity *entity;
+
+ if (!ctx->entities[i] || !ctx->entities[i][j])
+ continue;
+
+ entity = ctx->entities[i][j];
+ if (!entity->fences)
+ continue;
- for (j = 0; j < amdgpu_sched_jobs; ++j)
- dma_fence_put(entity->fences[j]);
+ for (k = 0; k < amdgpu_sched_jobs; ++k)
+ dma_fence_put(entity->fences[k]);
- kfree(entity->fences);
+ kfree(entity->fences);
+ entity->fences = NULL;
+
+ kfree(entity);
+ ctx->entities[i][j] = NULL;
+ }
+
+ kfree(ctx->entities[i];
+ ctx->entities[i] = NULL;
}
- kfree(ctx->entities[0]);
mutex_destroy(&ctx->lock);
-
kfree(ctx);
}
@@ -239,7 +240,11 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
return -EINVAL;
}
- *entity = &ctx->entities[hw_ip][ring].entity;
+ if ((ctx->entities[hw_ip] == NULL) || (ctx->entities[hw_ip][ring] == NULL))
+ amdgpu_ctx_init_entity(ctx, hw_ip, ring);
+
+
+ *entity = &ctx->entities[hw_ip][ring]->entity;
return 0;
}
@@ -279,14 +284,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
static void amdgpu_ctx_do_release(struct kref *ref)
{
struct amdgpu_ctx *ctx;
- unsigned num_entities;
- u32 i;
+ u32 i, j;
ctx = container_of(ref, struct amdgpu_ctx, refcount);
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ if (!ctx->entities[i] || !ctx->entities[i][j])
+ continue;
- num_entities = amdgpu_ctx_total_num_entities();
- for (i = 0; i < num_entities; i++)
- drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+ drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
+ }
+ }
amdgpu_ctx_fini(ref);
}
@@ -516,19 +524,23 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
enum drm_sched_priority priority)
{
- unsigned num_entities = amdgpu_ctx_total_num_entities();
enum drm_sched_priority ctx_prio;
- unsigned i;
+ unsigned i, j;
ctx->override_priority = priority;
ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
ctx->init_priority : ctx->override_priority;
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct drm_sched_entity *entity;
- for (i = 0; i < num_entities; i++) {
- struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
+ if (!ctx->entities[i] || !ctx->entities[i][j])
+ continue;
- drm_sched_entity_set_priority(entity, ctx_prio);
+ entity = &ctx->entities[i][j]->entity;
+ drm_sched_entity_set_priority(entity, ctx_prio);
+ }
}
}
@@ -564,20 +576,24 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
{
- unsigned num_entities = amdgpu_ctx_total_num_entities();
struct amdgpu_ctx *ctx;
struct idr *idp;
- uint32_t id, i;
+ uint32_t id, i, j;
idp = &mgr->ctx_handles;
mutex_lock(&mgr->lock);
idr_for_each_entry(idp, ctx, id) {
- for (i = 0; i < num_entities; i++) {
- struct drm_sched_entity *entity;
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct drm_sched_entity *entity;
+
+ if (!ctx->entities[i] || !ctx->entities[i][j])
+ continue;
- entity = &ctx->entities[0][i].entity;
- timeout = drm_sched_entity_flush(entity, timeout);
+ entity = &ctx->entities[i][j]->entity;
+ timeout = drm_sched_entity_flush(entity, timeout);
+ }
}
}
mutex_unlock(&mgr->lock);
@@ -586,10 +602,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
{
- unsigned num_entities = amdgpu_ctx_total_num_entities();
struct amdgpu_ctx *ctx;
struct idr *idp;
- uint32_t id, i;
+ uint32_t id, i, j;
idp = &mgr->ctx_handles;
@@ -599,8 +614,17 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
continue;
}
- for (i = 0; i < num_entities; i++)
- drm_sched_entity_fini(&ctx->entities[0][i].entity);
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct drm_sched_entity *entity;
+
+ if (!ctx->entities[i] || !ctx->entities[i][j])
+ continue;
+
+ entity = &ctx->entities[i][j]->entity;
+ drm_sched_entity_fini(entity);
+ }
+ }
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index a6cd9d4b078c..ca9363e71df5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -42,7 +42,7 @@ struct amdgpu_ctx {
unsigned reset_counter_query;
uint32_t vram_lost_counter;
spinlock_t ring_lock;
- struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM];
+ struct amdgpu_ctx_entity **entities[AMDGPU_HW_IP_NUM];
bool preamble_presented;
enum drm_sched_priority init_priority;
enum drm_sched_priority override_priority;
--
2.24.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 14+ messages in thread
end of thread, other threads:[~2020-01-28 12:18 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-24 11:53 [PATCH] drm/amdgpu: allocate entities on demand Nirmoy Das
2020-01-24 12:09 ` Christian König
2020-01-24 12:43 ` Nirmoy
2020-01-24 12:56 ` Christian König
2020-01-24 12:41 ` Nirmoy Das
2020-01-24 12:56 ` Christian König
2020-01-24 15:18 ` Nirmoy
2020-01-24 15:15 ` [PATCH V2] " Nirmoy Das
2020-01-24 15:31 ` Christian König
2020-01-24 16:31 ` [PATCH] " Nirmoy Das
2020-01-28 10:13 ` Nirmoy
2020-01-28 12:18 ` Christian König
-- strict thread matches above, loose matches on Subject: below --
2020-01-23 16:13 Nirmoy Das
2020-01-24 11:53 ` Nirmoy
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.