All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/4] drm/sched: Fix entities with 0 rqs.
@ 2019-01-30  1:53 Bas Nieuwenhuizen
       [not found] ` <20190130015322.105870-1-bas-dldO88ZXqoXqqjsSq9zF6IRWq/SkRNHw@public.gmane.org>
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Bas Nieuwenhuizen @ 2019-01-30  1:53 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Bas Nieuwenhuizen

Some blocks in amdgpu can have 0 rqs.

Job creation already fails with -ENOENT when entity->rq is NULL,
so jobs cannot be pushed. Without a rq there is no scheduler to
pop jobs, and rq selection already does the right thing with a
list of length 0.

So the operations we need to fix are:
  - Creation, do not set rq to rq_list[0] if the list can have length 0.
  - Do not flush any jobs when there is no rq.
  - On entity destruction handle the rq = NULL case.
  - on set_priority, do not try to change the rq if it is NULL.

Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
---
 drivers/gpu/drm/scheduler/sched_entity.c | 39 ++++++++++++++++--------
 1 file changed, 26 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index 4463d3826ecb..8e31b6628d09 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -52,12 +52,12 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
 {
 	int i;
 
-	if (!(entity && rq_list && num_rq_list > 0 && rq_list[0]))
+	if (!(entity && rq_list && (num_rq_list == 0 || rq_list[0])))
 		return -EINVAL;
 
 	memset(entity, 0, sizeof(struct drm_sched_entity));
 	INIT_LIST_HEAD(&entity->list);
-	entity->rq = rq_list[0];
+	entity->rq = NULL;
 	entity->guilty = guilty;
 	entity->num_rq_list = num_rq_list;
 	entity->rq_list = kcalloc(num_rq_list, sizeof(struct drm_sched_rq *),
@@ -67,6 +67,10 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
 
 	for (i = 0; i < num_rq_list; ++i)
 		entity->rq_list[i] = rq_list[i];
+
+	if (num_rq_list)
+		entity->rq = rq_list[0];
+
 	entity->last_scheduled = NULL;
 
 	spin_lock_init(&entity->rq_lock);
@@ -165,6 +169,9 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
 	struct task_struct *last_user;
 	long ret = timeout;
 
+	if (!entity->rq)
+		return 0;
+
 	sched = entity->rq->sched;
 	/**
 	 * The client will not queue more IBs during this fini, consume existing
@@ -264,20 +271,24 @@ static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity)
  */
 void drm_sched_entity_fini(struct drm_sched_entity *entity)
 {
-	struct drm_gpu_scheduler *sched;
+	struct drm_gpu_scheduler *sched = NULL;
 
-	sched = entity->rq->sched;
-	drm_sched_rq_remove_entity(entity->rq, entity);
+	if (entity->rq) {
+		sched = entity->rq->sched;
+		drm_sched_rq_remove_entity(entity->rq, entity);
+	}
 
 	/* Consumption of existing IBs wasn't completed. Forcefully
 	 * remove them here.
 	 */
 	if (spsc_queue_peek(&entity->job_queue)) {
-		/* Park the kernel for a moment to make sure it isn't processing
-		 * our enity.
-		 */
-		kthread_park(sched->thread);
-		kthread_unpark(sched->thread);
+		if (sched) {
+			/* Park the kernel for a moment to make sure it isn't processing
+			 * our enity.
+			 */
+			kthread_park(sched->thread);
+			kthread_unpark(sched->thread);
+		}
 		if (entity->dependency) {
 			dma_fence_remove_callback(entity->dependency,
 						  &entity->cb);
@@ -362,9 +373,11 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
 	for (i = 0; i < entity->num_rq_list; ++i)
 		drm_sched_entity_set_rq_priority(&entity->rq_list[i], priority);
 
-	drm_sched_rq_remove_entity(entity->rq, entity);
-	drm_sched_entity_set_rq_priority(&entity->rq, priority);
-	drm_sched_rq_add_entity(entity->rq, entity);
+	if (entity->rq) {
+		drm_sched_rq_remove_entity(entity->rq, entity);
+		drm_sched_entity_set_rq_priority(&entity->rq, priority);
+		drm_sched_rq_add_entity(entity->rq, entity);
+	}
 
 	spin_unlock(&entity->rq_lock);
 }
-- 
2.20.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 2/4] drm/amdgpu: Only add rqs for initialized rings.
       [not found] ` <20190130015322.105870-1-bas-dldO88ZXqoXqqjsSq9zF6IRWq/SkRNHw@public.gmane.org>
@ 2019-01-30  1:53   ` Bas Nieuwenhuizen
       [not found]     ` <20190130015322.105870-2-bas-dldO88ZXqoXqqjsSq9zF6IRWq/SkRNHw@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Bas Nieuwenhuizen @ 2019-01-30  1:53 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Bas Nieuwenhuizen

I don't see another way to figure out if a ring is initialized if
the hardware block might not be initialized.

Entities have been fixed up to handle num_rqs = 0.

Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index d85184b5b35c..30407e55593b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -124,6 +124,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 		struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
 		struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
 		unsigned num_rings;
+		unsigned num_rqs = 0;
 
 		switch (i) {
 		case AMDGPU_HW_IP_GFX:
@@ -166,12 +167,16 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 			break;
 		}
 
-		for (j = 0; j < num_rings; ++j)
-			rqs[j] = &rings[j]->sched.sched_rq[priority];
+		for (j = 0; j < num_rings; ++j) {
+			if (rings[j]->adev) {
+				rqs[num_rqs++] =
+					&rings[j]->sched.sched_rq[priority];
+			}
+		}
 
 		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
 			r = drm_sched_entity_init(&ctx->entities[i][j].entity,
-						  rqs, num_rings, &ctx->guilty);
+						  rqs, num_rqs, &ctx->guilty);
 		if (r)
 			goto error_cleanup_entities;
 	}
-- 
2.20.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 3/4] drm/amdgpu: Check if fd really is an amdgpu fd.
  2019-01-30  1:53 [PATCH v2 1/4] drm/sched: Fix entities with 0 rqs Bas Nieuwenhuizen
       [not found] ` <20190130015322.105870-1-bas-dldO88ZXqoXqqjsSq9zF6IRWq/SkRNHw@public.gmane.org>
@ 2019-01-30  1:53 ` Bas Nieuwenhuizen
  2019-01-30  1:53 ` [PATCH v2 4/4] drm/amdgpu: Add command to override the context priority Bas Nieuwenhuizen
  2019-01-30 10:43 ` [PATCH v2 1/4] drm/sched: Fix entities with 0 rqs Christian König
  3 siblings, 0 replies; 8+ messages in thread
From: Bas Nieuwenhuizen @ 2019-01-30  1:53 UTC (permalink / raw)
  To: amd-gfx, dri-devel

Otherwise we interpret the file private data as drm & amdgpu data
while it might not be, possibly allowing one to get memory corruption.

Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   | 16 ++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 10 +++++++---
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d67f8b1dfe80..17290cdb8ed8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -411,6 +411,8 @@ struct amdgpu_fpriv {
 	struct amdgpu_ctx_mgr	ctx_mgr;
 };
 
+int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
+
 int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		  unsigned size, struct amdgpu_ib *ib);
 void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index c806f984bcc5..90a520034c89 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1176,6 +1176,22 @@ static const struct file_operations amdgpu_driver_kms_fops = {
 #endif
 };
 
+int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
+{
+        struct drm_file *file;
+
+	if (!filp)
+		return -EINVAL;
+
+	if (filp->f_op != &amdgpu_driver_kms_fops) {
+		return -EINVAL;
+	}
+
+	file = filp->private_data;
+	*fpriv = file->driver_priv;
+	return 0;
+}
+
 static bool
 amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
 				 bool in_vblank_irq, int *vpos, int *hpos,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index 1cafe8d83a4d..0b70410488b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -54,16 +54,20 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
 						  enum drm_sched_priority priority)
 {
 	struct file *filp = fget(fd);
-	struct drm_file *file;
 	struct amdgpu_fpriv *fpriv;
 	struct amdgpu_ctx *ctx;
 	uint32_t id;
+	int r;
 
 	if (!filp)
 		return -EINVAL;
 
-	file = filp->private_data;
-	fpriv = file->driver_priv;
+	r = amdgpu_file_to_fpriv(filp, &fpriv);
+	if (r) {
+		fput(filp);
+		return r;
+	}
+
 	idr_for_each_entry(&fpriv->ctx_mgr.ctx_handles, ctx, id)
 		amdgpu_ctx_priority_override(ctx, priority);
 
-- 
2.20.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 4/4] drm/amdgpu: Add command to override the context priority.
  2019-01-30  1:53 [PATCH v2 1/4] drm/sched: Fix entities with 0 rqs Bas Nieuwenhuizen
       [not found] ` <20190130015322.105870-1-bas-dldO88ZXqoXqqjsSq9zF6IRWq/SkRNHw@public.gmane.org>
  2019-01-30  1:53 ` [PATCH v2 3/4] drm/amdgpu: Check if fd really is an amdgpu fd Bas Nieuwenhuizen
@ 2019-01-30  1:53 ` Bas Nieuwenhuizen
  2019-01-30 10:43 ` [PATCH v2 1/4] drm/sched: Fix entities with 0 rqs Christian König
  3 siblings, 0 replies; 8+ messages in thread
From: Bas Nieuwenhuizen @ 2019-01-30  1:53 UTC (permalink / raw)
  To: amd-gfx, dri-devel

Given a master fd we can then override the priority of the context
in another fd.

Using these overrides was recommended by Christian instead of trying
to submit from a master fd, and I am adding a way to override a
single context instead of the entire process so we can only upgrade
a single Vulkan queue and not effectively the entire process.

Reused the flags field as it was checked to be 0 anyways, so nothing
used it. This is source-incompatible (due to the name change), but
ABI compatible.

Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 41 ++++++++++++++++++++++-
 include/uapi/drm/amdgpu_drm.h             |  3 +-
 2 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index 0b70410488b6..0767a93e4d91 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -76,6 +76,39 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
 	return 0;
 }
 
+static int amdgpu_sched_context_priority_override(struct amdgpu_device *adev,
+						  int fd,
+						  unsigned ctx_id,
+						  enum drm_sched_priority priority)
+{
+	struct file *filp = fget(fd);
+	struct amdgpu_fpriv *fpriv;
+	struct amdgpu_ctx *ctx;
+	int r;
+
+	if (!filp)
+		return -EINVAL;
+
+	r = amdgpu_file_to_fpriv(filp, &fpriv);
+	if (r) {
+		fput(filp);
+		return r;
+	}
+
+	ctx = amdgpu_ctx_get(fpriv, ctx_id);
+
+	if (!ctx) {
+		fput(filp);
+		return -EINVAL;
+	}
+
+	amdgpu_ctx_priority_override(ctx, priority);
+	amdgpu_ctx_put(ctx);
+	fput(filp);
+
+	return 0;
+}
+
 int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *filp)
 {
@@ -85,7 +118,7 @@ int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
 	int r;
 
 	priority = amdgpu_to_sched_priority(args->in.priority);
-	if (args->in.flags || priority == DRM_SCHED_PRIORITY_INVALID)
+	if (priority == DRM_SCHED_PRIORITY_INVALID)
 		return -EINVAL;
 
 	switch (args->in.op) {
@@ -94,6 +127,12 @@ int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
 							   args->in.fd,
 							   priority);
 		break;
+	case AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE:
+		r = amdgpu_sched_context_priority_override(adev,
+							   args->in.fd,
+							   args->in.ctx_id,
+							   priority);
+		break;
 	default:
 		DRM_ERROR("Invalid sched op specified: %d\n", args->in.op);
 		r = -EINVAL;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index faaad04814e4..30fa340790b2 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -275,13 +275,14 @@ union drm_amdgpu_vm {
 
 /* sched ioctl */
 #define AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE	1
+#define AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE	2
 
 struct drm_amdgpu_sched_in {
 	/* AMDGPU_SCHED_OP_* */
 	__u32	op;
 	__u32	fd;
 	__s32	priority;
-	__u32	flags;
+	__u32   ctx_id;
 };
 
 union drm_amdgpu_sched {
-- 
2.20.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 2/4] drm/amdgpu: Only add rqs for initialized rings.
       [not found]     ` <20190130015322.105870-2-bas-dldO88ZXqoXqqjsSq9zF6IRWq/SkRNHw@public.gmane.org>
@ 2019-01-30 10:42       ` Christian König
  0 siblings, 0 replies; 8+ messages in thread
From: Christian König @ 2019-01-30 10:42 UTC (permalink / raw)
  To: Bas Nieuwenhuizen, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 30.01.19 um 02:53 schrieb Bas Nieuwenhuizen:
> I don't see another way to figure out if a ring is initialized if
> the hardware block might not be initialized.
>
> Entities have been fixed up to handle num_rqs = 0.
>
> Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 11 ++++++++---
>   1 file changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index d85184b5b35c..30407e55593b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -124,6 +124,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>   		struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
>   		struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
>   		unsigned num_rings;
> +		unsigned num_rqs = 0;
>   
>   		switch (i) {
>   		case AMDGPU_HW_IP_GFX:
> @@ -166,12 +167,16 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>   			break;
>   		}
>   
> -		for (j = 0; j < num_rings; ++j)
> -			rqs[j] = &rings[j]->sched.sched_rq[priority];
> +		for (j = 0; j < num_rings; ++j) {
> +			if (rings[j]->adev) {

Better do "if (!ring[j]->adev) continue;".

With that done the patch is Reviewed-by: Christian König 
<christian.koenig@amd.com>.

Regards,
Christian.

> +				rqs[num_rqs++] =
> +					&rings[j]->sched.sched_rq[priority];
> +			}
> +		}
>   
>   		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
>   			r = drm_sched_entity_init(&ctx->entities[i][j].entity,
> -						  rqs, num_rings, &ctx->guilty);
> +						  rqs, num_rqs, &ctx->guilty);
>   		if (r)
>   			goto error_cleanup_entities;
>   	}

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 1/4] drm/sched: Fix entities with 0 rqs.
  2019-01-30  1:53 [PATCH v2 1/4] drm/sched: Fix entities with 0 rqs Bas Nieuwenhuizen
                   ` (2 preceding siblings ...)
  2019-01-30  1:53 ` [PATCH v2 4/4] drm/amdgpu: Add command to override the context priority Bas Nieuwenhuizen
@ 2019-01-30 10:43 ` Christian König
       [not found]   ` <23f6617a-2049-11b3-b1df-07eb029db714-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  3 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2019-01-30 10:43 UTC (permalink / raw)
  To: Bas Nieuwenhuizen, amd-gfx, dri-devel

Am 30.01.19 um 02:53 schrieb Bas Nieuwenhuizen:
> Some blocks in amdgpu can have 0 rqs.
>
> Job creation already fails with -ENOENT when entity->rq is NULL,
> so jobs cannot be pushed. Without a rq there is no scheduler to
> pop jobs, and rq selection already does the right thing with a
> list of length 0.
>
> So the operations we need to fix are:
>    - Creation, do not set rq to rq_list[0] if the list can have length 0.
>    - Do not flush any jobs when there is no rq.
>    - On entity destruction handle the rq = NULL case.
>    - on set_priority, do not try to change the rq if it is NULL.
>
> Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

One minor comment on patch #2, apart from that the series is 
Reviewed-by: Christian König <christian.koenig@amd.com>.

I'm going to make the change on #2 and pick them up for inclusion in 
amd-staging-drm-next.

Thanks for the help,
Christian.

> ---
>   drivers/gpu/drm/scheduler/sched_entity.c | 39 ++++++++++++++++--------
>   1 file changed, 26 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
> index 4463d3826ecb..8e31b6628d09 100644
> --- a/drivers/gpu/drm/scheduler/sched_entity.c
> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
> @@ -52,12 +52,12 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
>   {
>   	int i;
>   
> -	if (!(entity && rq_list && num_rq_list > 0 && rq_list[0]))
> +	if (!(entity && rq_list && (num_rq_list == 0 || rq_list[0])))
>   		return -EINVAL;
>   
>   	memset(entity, 0, sizeof(struct drm_sched_entity));
>   	INIT_LIST_HEAD(&entity->list);
> -	entity->rq = rq_list[0];
> +	entity->rq = NULL;
>   	entity->guilty = guilty;
>   	entity->num_rq_list = num_rq_list;
>   	entity->rq_list = kcalloc(num_rq_list, sizeof(struct drm_sched_rq *),
> @@ -67,6 +67,10 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
>   
>   	for (i = 0; i < num_rq_list; ++i)
>   		entity->rq_list[i] = rq_list[i];
> +
> +	if (num_rq_list)
> +		entity->rq = rq_list[0];
> +
>   	entity->last_scheduled = NULL;
>   
>   	spin_lock_init(&entity->rq_lock);
> @@ -165,6 +169,9 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
>   	struct task_struct *last_user;
>   	long ret = timeout;
>   
> +	if (!entity->rq)
> +		return 0;
> +
>   	sched = entity->rq->sched;
>   	/**
>   	 * The client will not queue more IBs during this fini, consume existing
> @@ -264,20 +271,24 @@ static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity)
>    */
>   void drm_sched_entity_fini(struct drm_sched_entity *entity)
>   {
> -	struct drm_gpu_scheduler *sched;
> +	struct drm_gpu_scheduler *sched = NULL;
>   
> -	sched = entity->rq->sched;
> -	drm_sched_rq_remove_entity(entity->rq, entity);
> +	if (entity->rq) {
> +		sched = entity->rq->sched;
> +		drm_sched_rq_remove_entity(entity->rq, entity);
> +	}
>   
>   	/* Consumption of existing IBs wasn't completed. Forcefully
>   	 * remove them here.
>   	 */
>   	if (spsc_queue_peek(&entity->job_queue)) {
> -		/* Park the kernel for a moment to make sure it isn't processing
> -		 * our enity.
> -		 */
> -		kthread_park(sched->thread);
> -		kthread_unpark(sched->thread);
> +		if (sched) {
> +			/* Park the kernel for a moment to make sure it isn't processing
> +			 * our enity.
> +			 */
> +			kthread_park(sched->thread);
> +			kthread_unpark(sched->thread);
> +		}
>   		if (entity->dependency) {
>   			dma_fence_remove_callback(entity->dependency,
>   						  &entity->cb);
> @@ -362,9 +373,11 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
>   	for (i = 0; i < entity->num_rq_list; ++i)
>   		drm_sched_entity_set_rq_priority(&entity->rq_list[i], priority);
>   
> -	drm_sched_rq_remove_entity(entity->rq, entity);
> -	drm_sched_entity_set_rq_priority(&entity->rq, priority);
> -	drm_sched_rq_add_entity(entity->rq, entity);
> +	if (entity->rq) {
> +		drm_sched_rq_remove_entity(entity->rq, entity);
> +		drm_sched_entity_set_rq_priority(&entity->rq, priority);
> +		drm_sched_rq_add_entity(entity->rq, entity);
> +	}
>   
>   	spin_unlock(&entity->rq_lock);
>   }

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 1/4] drm/sched: Fix entities with 0 rqs.
       [not found]   ` <23f6617a-2049-11b3-b1df-07eb029db714-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-02-13 21:03     ` Alex Deucher via amd-gfx
       [not found]       ` <CADnq5_PcSTHw48oFu1oKVKh6sQt2=6FGtz3OJAX5xPLR8D9GUA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Alex Deucher via amd-gfx @ 2019-02-13 21:03 UTC (permalink / raw)
  To: Christian Koenig
  Cc: Maling list - DRI developers, amd-gfx list, Bas Nieuwenhuizen

On Wed, Jan 30, 2019 at 5:43 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Am 30.01.19 um 02:53 schrieb Bas Nieuwenhuizen:
> > Some blocks in amdgpu can have 0 rqs.
> >
> > Job creation already fails with -ENOENT when entity->rq is NULL,
> > so jobs cannot be pushed. Without a rq there is no scheduler to
> > pop jobs, and rq selection already does the right thing with a
> > list of length 0.
> >
> > So the operations we need to fix are:
> >    - Creation, do not set rq to rq_list[0] if the list can have length 0.
> >    - Do not flush any jobs when there is no rq.
> >    - On entity destruction handle the rq = NULL case.
> >    - on set_priority, do not try to change the rq if it is NULL.
> >
> > Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
>
> One minor comment on patch #2, apart from that the series is
> Reviewed-by: Christian König <christian.koenig@amd.com>.
>
> I'm going to make the change on #2 and pick them up for inclusion in
> amd-staging-drm-next.

Hi Christian,

I haven't seen these land yet.  Just want to make sure they don't fall
through the cracks.

Alex

>
> Thanks for the help,
> Christian.
>
> > ---
> >   drivers/gpu/drm/scheduler/sched_entity.c | 39 ++++++++++++++++--------
> >   1 file changed, 26 insertions(+), 13 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
> > index 4463d3826ecb..8e31b6628d09 100644
> > --- a/drivers/gpu/drm/scheduler/sched_entity.c
> > +++ b/drivers/gpu/drm/scheduler/sched_entity.c
> > @@ -52,12 +52,12 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
> >   {
> >       int i;
> >
> > -     if (!(entity && rq_list && num_rq_list > 0 && rq_list[0]))
> > +     if (!(entity && rq_list && (num_rq_list == 0 || rq_list[0])))
> >               return -EINVAL;
> >
> >       memset(entity, 0, sizeof(struct drm_sched_entity));
> >       INIT_LIST_HEAD(&entity->list);
> > -     entity->rq = rq_list[0];
> > +     entity->rq = NULL;
> >       entity->guilty = guilty;
> >       entity->num_rq_list = num_rq_list;
> >       entity->rq_list = kcalloc(num_rq_list, sizeof(struct drm_sched_rq *),
> > @@ -67,6 +67,10 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
> >
> >       for (i = 0; i < num_rq_list; ++i)
> >               entity->rq_list[i] = rq_list[i];
> > +
> > +     if (num_rq_list)
> > +             entity->rq = rq_list[0];
> > +
> >       entity->last_scheduled = NULL;
> >
> >       spin_lock_init(&entity->rq_lock);
> > @@ -165,6 +169,9 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
> >       struct task_struct *last_user;
> >       long ret = timeout;
> >
> > +     if (!entity->rq)
> > +             return 0;
> > +
> >       sched = entity->rq->sched;
> >       /**
> >        * The client will not queue more IBs during this fini, consume existing
> > @@ -264,20 +271,24 @@ static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity)
> >    */
> >   void drm_sched_entity_fini(struct drm_sched_entity *entity)
> >   {
> > -     struct drm_gpu_scheduler *sched;
> > +     struct drm_gpu_scheduler *sched = NULL;
> >
> > -     sched = entity->rq->sched;
> > -     drm_sched_rq_remove_entity(entity->rq, entity);
> > +     if (entity->rq) {
> > +             sched = entity->rq->sched;
> > +             drm_sched_rq_remove_entity(entity->rq, entity);
> > +     }
> >
> >       /* Consumption of existing IBs wasn't completed. Forcefully
> >        * remove them here.
> >        */
> >       if (spsc_queue_peek(&entity->job_queue)) {
> > -             /* Park the kernel for a moment to make sure it isn't processing
> > -              * our enity.
> > -              */
> > -             kthread_park(sched->thread);
> > -             kthread_unpark(sched->thread);
> > +             if (sched) {
> > +                     /* Park the kernel for a moment to make sure it isn't processing
> > +                      * our enity.
> > +                      */
> > +                     kthread_park(sched->thread);
> > +                     kthread_unpark(sched->thread);
> > +             }
> >               if (entity->dependency) {
> >                       dma_fence_remove_callback(entity->dependency,
> >                                                 &entity->cb);
> > @@ -362,9 +373,11 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
> >       for (i = 0; i < entity->num_rq_list; ++i)
> >               drm_sched_entity_set_rq_priority(&entity->rq_list[i], priority);
> >
> > -     drm_sched_rq_remove_entity(entity->rq, entity);
> > -     drm_sched_entity_set_rq_priority(&entity->rq, priority);
> > -     drm_sched_rq_add_entity(entity->rq, entity);
> > +     if (entity->rq) {
> > +             drm_sched_rq_remove_entity(entity->rq, entity);
> > +             drm_sched_entity_set_rq_priority(&entity->rq, priority);
> > +             drm_sched_rq_add_entity(entity->rq, entity);
> > +     }
> >
> >       spin_unlock(&entity->rq_lock);
> >   }
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 1/4] drm/sched: Fix entities with 0 rqs.
       [not found]       ` <CADnq5_PcSTHw48oFu1oKVKh6sQt2=6FGtz3OJAX5xPLR8D9GUA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2019-02-14  9:08         ` Christian König via amd-gfx
  0 siblings, 0 replies; 8+ messages in thread
From: Christian König via amd-gfx @ 2019-02-14  9:08 UTC (permalink / raw)
  To: Alex Deucher, Christian Koenig
  Cc: Christian König, amd-gfx list, Maling list - DRI developers,
	Bas Nieuwenhuizen

Am 13.02.19 um 22:03 schrieb Alex Deucher via amd-gfx:
> On Wed, Jan 30, 2019 at 5:43 AM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> Am 30.01.19 um 02:53 schrieb Bas Nieuwenhuizen:
>>> Some blocks in amdgpu can have 0 rqs.
>>>
>>> Job creation already fails with -ENOENT when entity->rq is NULL,
>>> so jobs cannot be pushed. Without a rq there is no scheduler to
>>> pop jobs, and rq selection already does the right thing with a
>>> list of length 0.
>>>
>>> So the operations we need to fix are:
>>>     - Creation, do not set rq to rq_list[0] if the list can have length 0.
>>>     - Do not flush any jobs when there is no rq.
>>>     - On entity destruction handle the rq = NULL case.
>>>     - on set_priority, do not try to change the rq if it is NULL.
>>>
>>> Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
>> One minor comment on patch #2, apart from that the series is
>> Reviewed-by: Christian König <christian.koenig@amd.com>.
>>
>> I'm going to make the change on #2 and pick them up for inclusion in
>> amd-staging-drm-next.
> Hi Christian,
>
> I haven't seen these land yet.  Just want to make sure they don't fall
> through the cracks.

Thanks for the reminder, I'm really having trouble catching up on 
applying patches lately.

Christian.

>
> Alex
>
>> Thanks for the help,
>> Christian.
>>
>>> ---
>>>    drivers/gpu/drm/scheduler/sched_entity.c | 39 ++++++++++++++++--------
>>>    1 file changed, 26 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
>>> index 4463d3826ecb..8e31b6628d09 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_entity.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
>>> @@ -52,12 +52,12 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
>>>    {
>>>        int i;
>>>
>>> -     if (!(entity && rq_list && num_rq_list > 0 && rq_list[0]))
>>> +     if (!(entity && rq_list && (num_rq_list == 0 || rq_list[0])))
>>>                return -EINVAL;
>>>
>>>        memset(entity, 0, sizeof(struct drm_sched_entity));
>>>        INIT_LIST_HEAD(&entity->list);
>>> -     entity->rq = rq_list[0];
>>> +     entity->rq = NULL;
>>>        entity->guilty = guilty;
>>>        entity->num_rq_list = num_rq_list;
>>>        entity->rq_list = kcalloc(num_rq_list, sizeof(struct drm_sched_rq *),
>>> @@ -67,6 +67,10 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
>>>
>>>        for (i = 0; i < num_rq_list; ++i)
>>>                entity->rq_list[i] = rq_list[i];
>>> +
>>> +     if (num_rq_list)
>>> +             entity->rq = rq_list[0];
>>> +
>>>        entity->last_scheduled = NULL;
>>>
>>>        spin_lock_init(&entity->rq_lock);
>>> @@ -165,6 +169,9 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
>>>        struct task_struct *last_user;
>>>        long ret = timeout;
>>>
>>> +     if (!entity->rq)
>>> +             return 0;
>>> +
>>>        sched = entity->rq->sched;
>>>        /**
>>>         * The client will not queue more IBs during this fini, consume existing
>>> @@ -264,20 +271,24 @@ static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity)
>>>     */
>>>    void drm_sched_entity_fini(struct drm_sched_entity *entity)
>>>    {
>>> -     struct drm_gpu_scheduler *sched;
>>> +     struct drm_gpu_scheduler *sched = NULL;
>>>
>>> -     sched = entity->rq->sched;
>>> -     drm_sched_rq_remove_entity(entity->rq, entity);
>>> +     if (entity->rq) {
>>> +             sched = entity->rq->sched;
>>> +             drm_sched_rq_remove_entity(entity->rq, entity);
>>> +     }
>>>
>>>        /* Consumption of existing IBs wasn't completed. Forcefully
>>>         * remove them here.
>>>         */
>>>        if (spsc_queue_peek(&entity->job_queue)) {
>>> -             /* Park the kernel for a moment to make sure it isn't processing
>>> -              * our enity.
>>> -              */
>>> -             kthread_park(sched->thread);
>>> -             kthread_unpark(sched->thread);
>>> +             if (sched) {
>>> +                     /* Park the kernel for a moment to make sure it isn't processing
>>> +                      * our enity.
>>> +                      */
>>> +                     kthread_park(sched->thread);
>>> +                     kthread_unpark(sched->thread);
>>> +             }
>>>                if (entity->dependency) {
>>>                        dma_fence_remove_callback(entity->dependency,
>>>                                                  &entity->cb);
>>> @@ -362,9 +373,11 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
>>>        for (i = 0; i < entity->num_rq_list; ++i)
>>>                drm_sched_entity_set_rq_priority(&entity->rq_list[i], priority);
>>>
>>> -     drm_sched_rq_remove_entity(entity->rq, entity);
>>> -     drm_sched_entity_set_rq_priority(&entity->rq, priority);
>>> -     drm_sched_rq_add_entity(entity->rq, entity);
>>> +     if (entity->rq) {
>>> +             drm_sched_rq_remove_entity(entity->rq, entity);
>>> +             drm_sched_entity_set_rq_priority(&entity->rq, priority);
>>> +             drm_sched_rq_add_entity(entity->rq, entity);
>>> +     }
>>>
>>>        spin_unlock(&entity->rq_lock);
>>>    }
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2019-02-14  9:08 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-30  1:53 [PATCH v2 1/4] drm/sched: Fix entities with 0 rqs Bas Nieuwenhuizen
     [not found] ` <20190130015322.105870-1-bas-dldO88ZXqoXqqjsSq9zF6IRWq/SkRNHw@public.gmane.org>
2019-01-30  1:53   ` [PATCH v2 2/4] drm/amdgpu: Only add rqs for initialized rings Bas Nieuwenhuizen
     [not found]     ` <20190130015322.105870-2-bas-dldO88ZXqoXqqjsSq9zF6IRWq/SkRNHw@public.gmane.org>
2019-01-30 10:42       ` Christian König
2019-01-30  1:53 ` [PATCH v2 3/4] drm/amdgpu: Check if fd really is an amdgpu fd Bas Nieuwenhuizen
2019-01-30  1:53 ` [PATCH v2 4/4] drm/amdgpu: Add command to override the context priority Bas Nieuwenhuizen
2019-01-30 10:43 ` [PATCH v2 1/4] drm/sched: Fix entities with 0 rqs Christian König
     [not found]   ` <23f6617a-2049-11b3-b1df-07eb029db714-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-02-13 21:03     ` Alex Deucher via amd-gfx
     [not found]       ` <CADnq5_PcSTHw48oFu1oKVKh6sQt2=6FGtz3OJAX5xPLR8D9GUA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2019-02-14  9:08         ` Christian König via amd-gfx

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.