All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/scheduler: only kill entity if last user is killed v2
@ 2018-07-30 11:03 Christian König
  2018-07-30 11:03 ` [PATCH 2/2] drm/scheduler: stop setting rq to NULL Christian König
  2018-07-30 13:34 ` [PATCH 1/2] drm/scheduler: only kill entity if last user is killed v2 Nayan Deshmukh
  0 siblings, 2 replies; 26+ messages in thread
From: Christian König @ 2018-07-30 11:03 UTC (permalink / raw)
  To: dri-devel; +Cc: nayan26deshmukh

Note which task is using the entity and only kill it if the last user of
the entity is killed. This should prevent problems when entities are leaked to
child processes.

v2: add missing kernel doc

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/scheduler/gpu_scheduler.c | 6 +++++-
 include/drm/gpu_scheduler.h               | 2 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c
index 3f2fc5e8242a..f563e4fbb4b6 100644
--- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
@@ -275,6 +275,7 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
 long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
 {
 	struct drm_gpu_scheduler *sched;
+	struct task_struct *last_user;
 	long ret = timeout;
 
 	sched = entity->rq->sched;
@@ -295,7 +296,9 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
 
 
 	/* For killed process disable any more IBs enqueue right now */
-	if ((current->flags & PF_EXITING) && (current->exit_code == SIGKILL))
+	last_user = cmpxchg(&entity->last_user, current->group_leader, NULL);
+	if ((!last_user || last_user == current->group_leader) &&
+	    (current->flags & PF_EXITING) && (current->exit_code == SIGKILL))
 		drm_sched_entity_set_rq(entity, NULL);
 
 	return ret;
@@ -541,6 +544,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
 
 	trace_drm_sched_job(sched_job, entity);
 
+	WRITE_ONCE(entity->last_user, current->group_leader);
 	first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node);
 
 	/* first job wakes up scheduler */
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 091b9afcd184..21c648b0b2a1 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -66,6 +66,7 @@ enum drm_sched_priority {
  * @guilty: points to ctx's guilty.
  * @fini_status: contains the exit status in case the process was signalled.
  * @last_scheduled: points to the finished fence of the last scheduled job.
+ * @last_user: last group leader pushing a job into the entity.
  *
  * Entities will emit jobs in order to their corresponding hardware
  * ring, and the scheduler will alternate between entities based on
@@ -85,6 +86,7 @@ struct drm_sched_entity {
 	struct dma_fence_cb		cb;
 	atomic_t			*guilty;
 	struct dma_fence                *last_scheduled;
+	struct task_struct		*last_user;
 };
 
 /**
-- 
2.14.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 2/2] drm/scheduler: stop setting rq to NULL
  2018-07-30 11:03 [PATCH 1/2] drm/scheduler: only kill entity if last user is killed v2 Christian König
@ 2018-07-30 11:03 ` Christian König
  2018-07-30 13:30   ` Nayan Deshmukh
  2018-08-01 22:25   ` Andrey Grodzovsky
  2018-07-30 13:34 ` [PATCH 1/2] drm/scheduler: only kill entity if last user is killed v2 Nayan Deshmukh
  1 sibling, 2 replies; 26+ messages in thread
From: Christian König @ 2018-07-30 11:03 UTC (permalink / raw)
  To: dri-devel; +Cc: nayan26deshmukh

We removed the redundancy of having an extra scheduler field, so we
can't set the rq to NULL any more or otherwise won't know which
scheduler to use for the cleanup.

Just remove the entity from the scheduling list instead.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/scheduler/gpu_scheduler.c | 35 +++++++------------------------
 1 file changed, 8 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c
index f563e4fbb4b6..1b733229201e 100644
--- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
@@ -198,21 +198,6 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
 }
 EXPORT_SYMBOL(drm_sched_entity_init);
 
-/**
- * drm_sched_entity_is_initialized - Query if entity is initialized
- *
- * @sched: Pointer to scheduler instance
- * @entity: The pointer to a valid scheduler entity
- *
- * return true if entity is initialized, false otherwise
-*/
-static bool drm_sched_entity_is_initialized(struct drm_gpu_scheduler *sched,
-					    struct drm_sched_entity *entity)
-{
-	return entity->rq != NULL &&
-		entity->rq->sched == sched;
-}
-
 /**
  * drm_sched_entity_is_idle - Check if entity is idle
  *
@@ -224,7 +209,8 @@ static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity)
 {
 	rmb();
 
-	if (!entity->rq || spsc_queue_peek(&entity->job_queue) == NULL)
+	if (list_empty(&entity->list) ||
+	    spsc_queue_peek(&entity->job_queue) == NULL)
 		return true;
 
 	return false;
@@ -279,8 +265,6 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
 	long ret = timeout;
 
 	sched = entity->rq->sched;
-	if (!drm_sched_entity_is_initialized(sched, entity))
-		return ret;
 	/**
 	 * The client will not queue more IBs during this fini, consume existing
 	 * queued IBs or discard them on SIGKILL
@@ -299,7 +283,7 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
 	last_user = cmpxchg(&entity->last_user, current->group_leader, NULL);
 	if ((!last_user || last_user == current->group_leader) &&
 	    (current->flags & PF_EXITING) && (current->exit_code == SIGKILL))
-		drm_sched_entity_set_rq(entity, NULL);
+		drm_sched_rq_remove_entity(entity->rq, entity);
 
 	return ret;
 }
@@ -320,7 +304,7 @@ void drm_sched_entity_fini(struct drm_sched_entity *entity)
 	struct drm_gpu_scheduler *sched;
 
 	sched = entity->rq->sched;
-	drm_sched_entity_set_rq(entity, NULL);
+	drm_sched_rq_remove_entity(entity->rq, entity);
 
 	/* Consumption of existing IBs wasn't completed. Forcefully
 	 * remove them here.
@@ -416,15 +400,12 @@ void drm_sched_entity_set_rq(struct drm_sched_entity *entity,
 	if (entity->rq == rq)
 		return;
 
-	spin_lock(&entity->rq_lock);
-
-	if (entity->rq)
-		drm_sched_rq_remove_entity(entity->rq, entity);
+	BUG_ON(!rq);
 
+	spin_lock(&entity->rq_lock);
+	drm_sched_rq_remove_entity(entity->rq, entity);
 	entity->rq = rq;
-	if (rq)
-		drm_sched_rq_add_entity(rq, entity);
-
+	drm_sched_rq_add_entity(rq, entity);
 	spin_unlock(&entity->rq_lock);
 }
 EXPORT_SYMBOL(drm_sched_entity_set_rq);
-- 
2.14.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: stop setting rq to NULL
  2018-07-30 11:03 ` [PATCH 2/2] drm/scheduler: stop setting rq to NULL Christian König
@ 2018-07-30 13:30   ` Nayan Deshmukh
  2018-07-30 20:51     ` Andrey Grodzovsky
  2018-08-01 22:25   ` Andrey Grodzovsky
  1 sibling, 1 reply; 26+ messages in thread
From: Nayan Deshmukh @ 2018-07-30 13:30 UTC (permalink / raw)
  To: ckoenig.leichtzumerken; +Cc: Maling list - DRI developers


[-- Attachment #1.1: Type: text/plain, Size: 3969 bytes --]

On Mon, Jul 30, 2018 at 4:33 PM Christian König <
ckoenig.leichtzumerken@gmail.com> wrote:

> We removed the redundancy of having an extra scheduler field, so we
> can't set the rq to NULL any more or otherwise won't know which
> scheduler to use for the cleanup.
>
> Just remove the entity from the scheduling list instead.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
>
Good catch.

Acked-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>

> ---
>  drivers/gpu/drm/scheduler/gpu_scheduler.c | 35
> +++++++------------------------
>  1 file changed, 8 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> index f563e4fbb4b6..1b733229201e 100644
> --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> @@ -198,21 +198,6 @@ int drm_sched_entity_init(struct drm_sched_entity
> *entity,
>  }
>  EXPORT_SYMBOL(drm_sched_entity_init);
>
> -/**
> - * drm_sched_entity_is_initialized - Query if entity is initialized
> - *
> - * @sched: Pointer to scheduler instance
> - * @entity: The pointer to a valid scheduler entity
> - *
> - * return true if entity is initialized, false otherwise
> -*/
> -static bool drm_sched_entity_is_initialized(struct drm_gpu_scheduler
> *sched,
> -                                           struct drm_sched_entity
> *entity)
> -{
> -       return entity->rq != NULL &&
> -               entity->rq->sched == sched;
> -}
> -
>  /**
>   * drm_sched_entity_is_idle - Check if entity is idle
>   *
> @@ -224,7 +209,8 @@ static bool drm_sched_entity_is_idle(struct
> drm_sched_entity *entity)
>  {
>         rmb();
>
> -       if (!entity->rq || spsc_queue_peek(&entity->job_queue) == NULL)
> +       if (list_empty(&entity->list) ||
> +           spsc_queue_peek(&entity->job_queue) == NULL)
>                 return true;
>
>         return false;
> @@ -279,8 +265,6 @@ long drm_sched_entity_flush(struct drm_sched_entity
> *entity, long timeout)
>         long ret = timeout;
>
>         sched = entity->rq->sched;
> -       if (!drm_sched_entity_is_initialized(sched, entity))
> -               return ret;
>         /**
>          * The client will not queue more IBs during this fini, consume
> existing
>          * queued IBs or discard them on SIGKILL
> @@ -299,7 +283,7 @@ long drm_sched_entity_flush(struct drm_sched_entity
> *entity, long timeout)
>         last_user = cmpxchg(&entity->last_user, current->group_leader,
> NULL);
>         if ((!last_user || last_user == current->group_leader) &&
>             (current->flags & PF_EXITING) && (current->exit_code ==
> SIGKILL))
> -               drm_sched_entity_set_rq(entity, NULL);
> +               drm_sched_rq_remove_entity(entity->rq, entity);
>
>         return ret;
>  }
> @@ -320,7 +304,7 @@ void drm_sched_entity_fini(struct drm_sched_entity
> *entity)
>         struct drm_gpu_scheduler *sched;
>
>         sched = entity->rq->sched;
> -       drm_sched_entity_set_rq(entity, NULL);
> +       drm_sched_rq_remove_entity(entity->rq, entity);
>
>         /* Consumption of existing IBs wasn't completed. Forcefully
>          * remove them here.
> @@ -416,15 +400,12 @@ void drm_sched_entity_set_rq(struct drm_sched_entity
> *entity,
>         if (entity->rq == rq)
>                 return;
>
> -       spin_lock(&entity->rq_lock);
> -
> -       if (entity->rq)
> -               drm_sched_rq_remove_entity(entity->rq, entity);
> +       BUG_ON(!rq);
>
> +       spin_lock(&entity->rq_lock);
> +       drm_sched_rq_remove_entity(entity->rq, entity);
>         entity->rq = rq;
> -       if (rq)
> -               drm_sched_rq_add_entity(rq, entity);
> -
> +       drm_sched_rq_add_entity(rq, entity);
>         spin_unlock(&entity->rq_lock);
>  }
>  EXPORT_SYMBOL(drm_sched_entity_set_rq);
> --
> 2.14.1
>
>

[-- Attachment #1.2: Type: text/html, Size: 5063 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] drm/scheduler: only kill entity if last user is killed v2
  2018-07-30 11:03 [PATCH 1/2] drm/scheduler: only kill entity if last user is killed v2 Christian König
  2018-07-30 11:03 ` [PATCH 2/2] drm/scheduler: stop setting rq to NULL Christian König
@ 2018-07-30 13:34 ` Nayan Deshmukh
  2018-07-30 20:42   ` Andrey Grodzovsky
  1 sibling, 1 reply; 26+ messages in thread
From: Nayan Deshmukh @ 2018-07-30 13:34 UTC (permalink / raw)
  To: ckoenig.leichtzumerken; +Cc: Maling list - DRI developers


[-- Attachment #1.1: Type: text/plain, Size: 3221 bytes --]

Hi Christian,

The code looks good to me. But I was just wondering what will happen when
the last user is killed and some other user tries to push to the entity.

Regards,
Nayan Deshmukh

On Mon, Jul 30, 2018 at 4:33 PM Christian König <
ckoenig.leichtzumerken@gmail.com> wrote:

> Note which task is using the entity and only kill it if the last user of
> the entity is killed. This should prevent problems when entities are
> leaked to
> child processes.
>
> v2: add missing kernel doc
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/scheduler/gpu_scheduler.c | 6 +++++-
>  include/drm/gpu_scheduler.h               | 2 ++
>  2 files changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> index 3f2fc5e8242a..f563e4fbb4b6 100644
> --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> @@ -275,6 +275,7 @@ static void drm_sched_entity_kill_jobs_cb(struct
> dma_fence *f,
>  long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
>  {
>         struct drm_gpu_scheduler *sched;
> +       struct task_struct *last_user;
>         long ret = timeout;
>
>         sched = entity->rq->sched;
> @@ -295,7 +296,9 @@ long drm_sched_entity_flush(struct drm_sched_entity
> *entity, long timeout)
>
>
>         /* For killed process disable any more IBs enqueue right now */
> -       if ((current->flags & PF_EXITING) && (current->exit_code ==
> SIGKILL))
> +       last_user = cmpxchg(&entity->last_user, current->group_leader,
> NULL);
> +       if ((!last_user || last_user == current->group_leader) &&
> +           (current->flags & PF_EXITING) && (current->exit_code ==
> SIGKILL))
>                 drm_sched_entity_set_rq(entity, NULL);
>
>         return ret;
> @@ -541,6 +544,7 @@ void drm_sched_entity_push_job(struct drm_sched_job
> *sched_job,
>
>         trace_drm_sched_job(sched_job, entity);
>
> +       WRITE_ONCE(entity->last_user, current->group_leader);
>         first = spsc_queue_push(&entity->job_queue,
> &sched_job->queue_node);
>
>         /* first job wakes up scheduler */
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 091b9afcd184..21c648b0b2a1 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -66,6 +66,7 @@ enum drm_sched_priority {
>   * @guilty: points to ctx's guilty.
>   * @fini_status: contains the exit status in case the process was
> signalled.
>   * @last_scheduled: points to the finished fence of the last scheduled
> job.
> + * @last_user: last group leader pushing a job into the entity.
>   *
>   * Entities will emit jobs in order to their corresponding hardware
>   * ring, and the scheduler will alternate between entities based on
> @@ -85,6 +86,7 @@ struct drm_sched_entity {
>         struct dma_fence_cb             cb;
>         atomic_t                        *guilty;
>         struct dma_fence                *last_scheduled;
> +       struct task_struct              *last_user;
>  };
>
>  /**
> --
> 2.14.1
>
>

[-- Attachment #1.2: Type: text/html, Size: 3934 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] drm/scheduler: only kill entity if last user is killed v2
  2018-07-30 13:34 ` [PATCH 1/2] drm/scheduler: only kill entity if last user is killed v2 Nayan Deshmukh
@ 2018-07-30 20:42   ` Andrey Grodzovsky
  2018-07-31  9:11     ` Nayan Deshmukh
  0 siblings, 1 reply; 26+ messages in thread
From: Andrey Grodzovsky @ 2018-07-30 20:42 UTC (permalink / raw)
  To: Nayan Deshmukh, ckoenig.leichtzumerken; +Cc: Maling list - DRI developers


[-- Attachment #1.1: Type: text/plain, Size: 4206 bytes --]

I believe that in this case

if (!entity->rq) {

     DRM_ERROR...

     return;

}

clause will take place.

P.S I remember we planned to actually propagate the error back to the 
caller so i guess we should take care of this sooner or later.

The change is Reviewed-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>

Andrey


On 07/30/2018 09:34 AM, Nayan Deshmukh wrote:
> Hi Christian,
>
> The code looks good to me. But I was just wondering what will happen 
> when the last user is killed and some other user tries to push to the 
> entity.
>
> Regards,
> Nayan Deshmukh
>
> On Mon, Jul 30, 2018 at 4:33 PM Christian König 
> <ckoenig.leichtzumerken@gmail.com 
> <mailto:ckoenig.leichtzumerken@gmail.com>> wrote:
>
>     Note which task is using the entity and only kill it if the last
>     user of
>     the entity is killed. This should prevent problems when entities
>     are leaked to
>     child processes.
>
>     v2: add missing kernel doc
>
>     Signed-off-by: Christian König <christian.koenig@amd.com
>     <mailto:christian.koenig@amd.com>>
>     ---
>      drivers/gpu/drm/scheduler/gpu_scheduler.c | 6 +++++-
>      include/drm/gpu_scheduler.h               | 2 ++
>      2 files changed, 7 insertions(+), 1 deletion(-)
>
>     diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>     b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>     index 3f2fc5e8242a..f563e4fbb4b6 100644
>     --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>     +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>     @@ -275,6 +275,7 @@ static void
>     drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
>      long drm_sched_entity_flush(struct drm_sched_entity *entity, long
>     timeout)
>      {
>             struct drm_gpu_scheduler *sched;
>     +       struct task_struct *last_user;
>             long ret = timeout;
>
>             sched = entity->rq->sched;
>     @@ -295,7 +296,9 @@ long drm_sched_entity_flush(struct
>     drm_sched_entity *entity, long timeout)
>
>
>             /* For killed process disable any more IBs enqueue right
>     now */
>     -       if ((current->flags & PF_EXITING) && (current->exit_code
>     == SIGKILL))
>     +       last_user = cmpxchg(&entity->last_user,
>     current->group_leader, NULL);
>     +       if ((!last_user || last_user == current->group_leader) &&
>     +           (current->flags & PF_EXITING) && (current->exit_code
>     == SIGKILL))
>                     drm_sched_entity_set_rq(entity, NULL);
>
>             return ret;
>     @@ -541,6 +544,7 @@ void drm_sched_entity_push_job(struct
>     drm_sched_job *sched_job,
>
>             trace_drm_sched_job(sched_job, entity);
>
>     +       WRITE_ONCE(entity->last_user, current->group_leader);
>             first = spsc_queue_push(&entity->job_queue,
>     &sched_job->queue_node);
>
>             /* first job wakes up scheduler */
>     diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>     index 091b9afcd184..21c648b0b2a1 100644
>     --- a/include/drm/gpu_scheduler.h
>     +++ b/include/drm/gpu_scheduler.h
>     @@ -66,6 +66,7 @@ enum drm_sched_priority {
>       * @guilty: points to ctx's guilty.
>       * @fini_status: contains the exit status in case the process was
>     signalled.
>       * @last_scheduled: points to the finished fence of the last
>     scheduled job.
>     + * @last_user: last group leader pushing a job into the entity.
>       *
>       * Entities will emit jobs in order to their corresponding hardware
>       * ring, and the scheduler will alternate between entities based on
>     @@ -85,6 +86,7 @@ struct drm_sched_entity {
>             struct dma_fence_cb             cb;
>             atomic_t                        *guilty;
>             struct dma_fence                *last_scheduled;
>     +       struct task_struct              *last_user;
>      };
>
>      /**
>     -- 
>     2.14.1
>
>
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel


[-- Attachment #1.2: Type: text/html, Size: 6518 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: stop setting rq to NULL
  2018-07-30 13:30   ` Nayan Deshmukh
@ 2018-07-30 20:51     ` Andrey Grodzovsky
  2018-07-31  6:50       ` Christian König
  0 siblings, 1 reply; 26+ messages in thread
From: Andrey Grodzovsky @ 2018-07-30 20:51 UTC (permalink / raw)
  To: Nayan Deshmukh, ckoenig.leichtzumerken; +Cc: Maling list - DRI developers


[-- Attachment #1.1: Type: text/plain, Size: 4925 bytes --]



On 07/30/2018 09:30 AM, Nayan Deshmukh wrote:
>
>
> On Mon, Jul 30, 2018 at 4:33 PM Christian König 
> <ckoenig.leichtzumerken@gmail.com 
> <mailto:ckoenig.leichtzumerken@gmail.com>> wrote:
>
>     We removed the redundancy of having an extra scheduler field, so we
>     can't set the rq to NULL any more or otherwise won't know which
>     scheduler to use for the cleanup.
>
>     Just remove the entity from the scheduling list instead.
>
>     Signed-off-by: Christian König <christian.koenig@amd.com
>     <mailto:christian.koenig@amd.com>>
>
> Good catch.
>
> Acked-by: Nayan Deshmukh <nayan26deshmukh@gmail.com 
> <mailto:nayan26deshmukh@gmail.com>>
>
>     ---
>      drivers/gpu/drm/scheduler/gpu_scheduler.c | 35
>     +++++++------------------------
>      1 file changed, 8 insertions(+), 27 deletions(-)
>
>     diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>     b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>     index f563e4fbb4b6..1b733229201e 100644
>     --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>     +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>     @@ -198,21 +198,6 @@ int drm_sched_entity_init(struct
>     drm_sched_entity *entity,
>      }
>      EXPORT_SYMBOL(drm_sched_entity_init);
>
>     -/**
>     - * drm_sched_entity_is_initialized - Query if entity is initialized
>     - *
>     - * @sched: Pointer to scheduler instance
>     - * @entity: The pointer to a valid scheduler entity
>     - *
>     - * return true if entity is initialized, false otherwise
>     -*/
>     -static bool drm_sched_entity_is_initialized(struct
>     drm_gpu_scheduler *sched,
>     -                                           struct
>     drm_sched_entity *entity)
>     -{
>     -       return entity->rq != NULL &&
>     -               entity->rq->sched == sched;
>     -}
>     -
>      /**
>       * drm_sched_entity_is_idle - Check if entity is idle
>       *
>     @@ -224,7 +209,8 @@ static bool drm_sched_entity_is_idle(struct
>     drm_sched_entity *entity)
>      {
>             rmb();
>
>     -       if (!entity->rq || spsc_queue_peek(&entity->job_queue) ==
>     NULL)
>     +       if (list_empty(&entity->list) ||
>     +           spsc_queue_peek(&entity->job_queue) == NULL)
>                     return true;
>
>             return false;
>     @@ -279,8 +265,6 @@ long drm_sched_entity_flush(struct
>     drm_sched_entity *entity, long timeout)
>             long ret = timeout;
>
>             sched = entity->rq->sched;
>     -       if (!drm_sched_entity_is_initialized(sched, entity))
>     -               return ret;
>

Just to be clear, you remove this function because it's redundant to 
call it?

Andrey

>             /**
>              * The client will not queue more IBs during this fini,
>     consume existing
>              * queued IBs or discard them on SIGKILL
>     @@ -299,7 +283,7 @@ long drm_sched_entity_flush(struct
>     drm_sched_entity *entity, long timeout)
>             last_user = cmpxchg(&entity->last_user,
>     current->group_leader, NULL);
>             if ((!last_user || last_user == current->group_leader) &&
>                 (current->flags & PF_EXITING) && (current->exit_code
>     == SIGKILL))
>     -               drm_sched_entity_set_rq(entity, NULL);
>     +               drm_sched_rq_remove_entity(entity->rq, entity);
>
>             return ret;
>      }
>     @@ -320,7 +304,7 @@ void drm_sched_entity_fini(struct
>     drm_sched_entity *entity)
>             struct drm_gpu_scheduler *sched;
>
>             sched = entity->rq->sched;
>     -       drm_sched_entity_set_rq(entity, NULL);
>     +       drm_sched_rq_remove_entity(entity->rq, entity);
>
>             /* Consumption of existing IBs wasn't completed. Forcefully
>              * remove them here.
>     @@ -416,15 +400,12 @@ void drm_sched_entity_set_rq(struct
>     drm_sched_entity *entity,
>             if (entity->rq == rq)
>                     return;
>
>     -       spin_lock(&entity->rq_lock);
>     -
>     -       if (entity->rq)
>     -               drm_sched_rq_remove_entity(entity->rq, entity);
>     +       BUG_ON(!rq);
>
>     +       spin_lock(&entity->rq_lock);
>     +       drm_sched_rq_remove_entity(entity->rq, entity);
>             entity->rq = rq;
>     -       if (rq)
>     -               drm_sched_rq_add_entity(rq, entity);
>     -
>     +       drm_sched_rq_add_entity(rq, entity);
>             spin_unlock(&entity->rq_lock);
>      }
>      EXPORT_SYMBOL(drm_sched_entity_set_rq);
>     -- 
>     2.14.1
>
>
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel


[-- Attachment #1.2: Type: text/html, Size: 8156 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: stop setting rq to NULL
  2018-07-30 20:51     ` Andrey Grodzovsky
@ 2018-07-31  6:50       ` Christian König
  2018-07-31 14:16         ` Andrey Grodzovsky
  0 siblings, 1 reply; 26+ messages in thread
From: Christian König @ 2018-07-31  6:50 UTC (permalink / raw)
  To: Andrey Grodzovsky, Nayan Deshmukh; +Cc: Maling list - DRI developers


[-- Attachment #1.1: Type: text/plain, Size: 5139 bytes --]

Am 30.07.2018 um 22:51 schrieb Andrey Grodzovsky:
>
>
>
> On 07/30/2018 09:30 AM, Nayan Deshmukh wrote:
>>
>>
>> On Mon, Jul 30, 2018 at 4:33 PM Christian König 
>> <ckoenig.leichtzumerken@gmail.com 
>> <mailto:ckoenig.leichtzumerken@gmail.com>> wrote:
>>
>>     We removed the redundancy of having an extra scheduler field, so we
>>     can't set the rq to NULL any more or otherwise won't know which
>>     scheduler to use for the cleanup.
>>
>>     Just remove the entity from the scheduling list instead.
>>
>>     Signed-off-by: Christian König <christian.koenig@amd.com
>>     <mailto:christian.koenig@amd.com>>
>>
>> Good catch.
>>
>> Acked-by: Nayan Deshmukh <nayan26deshmukh@gmail.com 
>> <mailto:nayan26deshmukh@gmail.com>>
>>
>>     ---
>>      drivers/gpu/drm/scheduler/gpu_scheduler.c | 35
>>     +++++++------------------------
>>      1 file changed, 8 insertions(+), 27 deletions(-)
>>
>>     diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>     b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>     index f563e4fbb4b6..1b733229201e 100644
>>     --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>     +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>     @@ -198,21 +198,6 @@ int drm_sched_entity_init(struct
>>     drm_sched_entity *entity,
>>      }
>>      EXPORT_SYMBOL(drm_sched_entity_init);
>>
>>     -/**
>>     - * drm_sched_entity_is_initialized - Query if entity is initialized
>>     - *
>>     - * @sched: Pointer to scheduler instance
>>     - * @entity: The pointer to a valid scheduler entity
>>     - *
>>     - * return true if entity is initialized, false otherwise
>>     -*/
>>     -static bool drm_sched_entity_is_initialized(struct
>>     drm_gpu_scheduler *sched,
>>     -                                           struct
>>     drm_sched_entity *entity)
>>     -{
>>     -       return entity->rq != NULL &&
>>     -               entity->rq->sched == sched;
>>     -}
>>     -
>>      /**
>>       * drm_sched_entity_is_idle - Check if entity is idle
>>       *
>>     @@ -224,7 +209,8 @@ static bool drm_sched_entity_is_idle(struct
>>     drm_sched_entity *entity)
>>      {
>>             rmb();
>>
>>     -       if (!entity->rq || spsc_queue_peek(&entity->job_queue) ==
>>     NULL)
>>     +       if (list_empty(&entity->list) ||
>>     +           spsc_queue_peek(&entity->job_queue) == NULL)
>>                     return true;
>>
>>             return false;
>>     @@ -279,8 +265,6 @@ long drm_sched_entity_flush(struct
>>     drm_sched_entity *entity, long timeout)
>>             long ret = timeout;
>>
>>             sched = entity->rq->sched;
>>     -       if (!drm_sched_entity_is_initialized(sched, entity))
>>     -               return ret;
>>
>
> Just to be clear, you remove this function because it's redundant to 
> call it?

Yes, exactly.

Christian.

>
> Andrey
>
>>     /**
>>              * The client will not queue more IBs during this fini,
>>     consume existing
>>              * queued IBs or discard them on SIGKILL
>>     @@ -299,7 +283,7 @@ long drm_sched_entity_flush(struct
>>     drm_sched_entity *entity, long timeout)
>>             last_user = cmpxchg(&entity->last_user,
>>     current->group_leader, NULL);
>>             if ((!last_user || last_user == current->group_leader) &&
>>                 (current->flags & PF_EXITING) && (current->exit_code
>>     == SIGKILL))
>>     -               drm_sched_entity_set_rq(entity, NULL);
>>     +               drm_sched_rq_remove_entity(entity->rq, entity);
>>
>>             return ret;
>>      }
>>     @@ -320,7 +304,7 @@ void drm_sched_entity_fini(struct
>>     drm_sched_entity *entity)
>>             struct drm_gpu_scheduler *sched;
>>
>>             sched = entity->rq->sched;
>>     -       drm_sched_entity_set_rq(entity, NULL);
>>     +       drm_sched_rq_remove_entity(entity->rq, entity);
>>
>>             /* Consumption of existing IBs wasn't completed. Forcefully
>>              * remove them here.
>>     @@ -416,15 +400,12 @@ void drm_sched_entity_set_rq(struct
>>     drm_sched_entity *entity,
>>             if (entity->rq == rq)
>>                     return;
>>
>>     -       spin_lock(&entity->rq_lock);
>>     -
>>     -       if (entity->rq)
>>     -               drm_sched_rq_remove_entity(entity->rq, entity);
>>     +       BUG_ON(!rq);
>>
>>     +       spin_lock(&entity->rq_lock);
>>     +       drm_sched_rq_remove_entity(entity->rq, entity);
>>             entity->rq = rq;
>>     -       if (rq)
>>     -               drm_sched_rq_add_entity(rq, entity);
>>     -
>>     +       drm_sched_rq_add_entity(rq, entity);
>>             spin_unlock(&entity->rq_lock);
>>      }
>>      EXPORT_SYMBOL(drm_sched_entity_set_rq);
>>     -- 
>>     2.14.1
>>
>>
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>


[-- Attachment #1.2: Type: text/html, Size: 9067 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] drm/scheduler: only kill entity if last user is killed v2
  2018-07-30 20:42   ` Andrey Grodzovsky
@ 2018-07-31  9:11     ` Nayan Deshmukh
  0 siblings, 0 replies; 26+ messages in thread
From: Nayan Deshmukh @ 2018-07-31  9:11 UTC (permalink / raw)
  To: Andrey.Grodzovsky; +Cc: ckoenig.leichtzumerken, Maling list - DRI developers


[-- Attachment #1.1: Type: text/plain, Size: 4131 bytes --]

That makes sense. The change is Acked-by: Nayan Deshmukh <
nayan26deshmukh@gmail.com>

On Tue, Jul 31, 2018 at 2:12 AM Andrey Grodzovsky <Andrey.Grodzovsky@amd.com>
wrote:

> I believe that in this case
>
> if (!entity->rq) {
>
>     DRM_ERROR...
>
>     return;
>
> }
>
> clause will take place.
>
> P.S I remember we planned to actually propagate the error back to the
> caller so i guess we should take care of this sooner or later.
>
> The change is Reviewed-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> <andrey.grodzovsky@amd.com>
>
> Andrey
>
> On 07/30/2018 09:34 AM, Nayan Deshmukh wrote:
>
> Hi Christian,
>
> The code looks good to me. But I was just wondering what will happen when
> the last user is killed and some other user tries to push to the entity.
>
> Regards,
> Nayan Deshmukh
>
> On Mon, Jul 30, 2018 at 4:33 PM Christian König <
> ckoenig.leichtzumerken@gmail.com> wrote:
>
>> Note which task is using the entity and only kill it if the last user of
>> the entity is killed. This should prevent problems when entities are
>> leaked to
>> child processes.
>>
>> v2: add missing kernel doc
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>  drivers/gpu/drm/scheduler/gpu_scheduler.c | 6 +++++-
>>  include/drm/gpu_scheduler.h               | 2 ++
>>  2 files changed, 7 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>> b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>> index 3f2fc5e8242a..f563e4fbb4b6 100644
>> --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>> @@ -275,6 +275,7 @@ static void drm_sched_entity_kill_jobs_cb(struct
>> dma_fence *f,
>>  long drm_sched_entity_flush(struct drm_sched_entity *entity, long
>> timeout)
>>  {
>>         struct drm_gpu_scheduler *sched;
>> +       struct task_struct *last_user;
>>         long ret = timeout;
>>
>>         sched = entity->rq->sched;
>> @@ -295,7 +296,9 @@ long drm_sched_entity_flush(struct drm_sched_entity
>> *entity, long timeout)
>>
>>
>>         /* For killed process disable any more IBs enqueue right now */
>> -       if ((current->flags & PF_EXITING) && (current->exit_code ==
>> SIGKILL))
>> +       last_user = cmpxchg(&entity->last_user, current->group_leader,
>> NULL);
>> +       if ((!last_user || last_user == current->group_leader) &&
>> +           (current->flags & PF_EXITING) && (current->exit_code ==
>> SIGKILL))
>>                 drm_sched_entity_set_rq(entity, NULL);
>>
>>         return ret;
>> @@ -541,6 +544,7 @@ void drm_sched_entity_push_job(struct drm_sched_job
>> *sched_job,
>>
>>         trace_drm_sched_job(sched_job, entity);
>>
>> +       WRITE_ONCE(entity->last_user, current->group_leader);
>>         first = spsc_queue_push(&entity->job_queue,
>> &sched_job->queue_node);
>>
>>         /* first job wakes up scheduler */
>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>> index 091b9afcd184..21c648b0b2a1 100644
>> --- a/include/drm/gpu_scheduler.h
>> +++ b/include/drm/gpu_scheduler.h
>> @@ -66,6 +66,7 @@ enum drm_sched_priority {
>>   * @guilty: points to ctx's guilty.
>>   * @fini_status: contains the exit status in case the process was
>> signalled.
>>   * @last_scheduled: points to the finished fence of the last scheduled
>> job.
>> + * @last_user: last group leader pushing a job into the entity.
>>   *
>>   * Entities will emit jobs in order to their corresponding hardware
>>   * ring, and the scheduler will alternate between entities based on
>> @@ -85,6 +86,7 @@ struct drm_sched_entity {
>>         struct dma_fence_cb             cb;
>>         atomic_t                        *guilty;
>>         struct dma_fence                *last_scheduled;
>> +       struct task_struct              *last_user;
>>  };
>>
>>  /**
>> --
>> 2.14.1
>>
>>
>
> _______________________________________________
> dri-devel mailing listdri-devel@lists.freedesktop.orghttps://lists.freedesktop.org/mailman/listinfo/dri-devel
>
>
>

[-- Attachment #1.2: Type: text/html, Size: 6963 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: stop setting rq to NULL
  2018-07-31  6:50       ` Christian König
@ 2018-07-31 14:16         ` Andrey Grodzovsky
  0 siblings, 0 replies; 26+ messages in thread
From: Andrey Grodzovsky @ 2018-07-31 14:16 UTC (permalink / raw)
  To: christian.koenig, Nayan Deshmukh; +Cc: Maling list - DRI developers


[-- Attachment #1.1: Type: text/plain, Size: 5382 bytes --]

Change is Reviewed-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>

Andrey


On 07/31/2018 02:50 AM, Christian König wrote:
> Am 30.07.2018 um 22:51 schrieb Andrey Grodzovsky:
>>
>>
>>
>> On 07/30/2018 09:30 AM, Nayan Deshmukh wrote:
>>>
>>>
>>> On Mon, Jul 30, 2018 at 4:33 PM Christian König 
>>> <ckoenig.leichtzumerken@gmail.com 
>>> <mailto:ckoenig.leichtzumerken@gmail.com>> wrote:
>>>
>>>     We removed the redundancy of having an extra scheduler field, so we
>>>     can't set the rq to NULL any more or otherwise won't know which
>>>     scheduler to use for the cleanup.
>>>
>>>     Just remove the entity from the scheduling list instead.
>>>
>>>     Signed-off-by: Christian König <christian.koenig@amd.com
>>>     <mailto:christian.koenig@amd.com>>
>>>
>>> Good catch.
>>>
>>> Acked-by: Nayan Deshmukh <nayan26deshmukh@gmail.com 
>>> <mailto:nayan26deshmukh@gmail.com>>
>>>
>>>     ---
>>>      drivers/gpu/drm/scheduler/gpu_scheduler.c | 35
>>>     +++++++------------------------
>>>      1 file changed, 8 insertions(+), 27 deletions(-)
>>>
>>>     diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>>     b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>>     index f563e4fbb4b6..1b733229201e 100644
>>>     --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>>     +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>>     @@ -198,21 +198,6 @@ int drm_sched_entity_init(struct
>>>     drm_sched_entity *entity,
>>>      }
>>>      EXPORT_SYMBOL(drm_sched_entity_init);
>>>
>>>     -/**
>>>     - * drm_sched_entity_is_initialized - Query if entity is initialized
>>>     - *
>>>     - * @sched: Pointer to scheduler instance
>>>     - * @entity: The pointer to a valid scheduler entity
>>>     - *
>>>     - * return true if entity is initialized, false otherwise
>>>     -*/
>>>     -static bool drm_sched_entity_is_initialized(struct
>>>     drm_gpu_scheduler *sched,
>>>     -                                           struct
>>>     drm_sched_entity *entity)
>>>     -{
>>>     -       return entity->rq != NULL &&
>>>     -               entity->rq->sched == sched;
>>>     -}
>>>     -
>>>      /**
>>>       * drm_sched_entity_is_idle - Check if entity is idle
>>>       *
>>>     @@ -224,7 +209,8 @@ static bool drm_sched_entity_is_idle(struct
>>>     drm_sched_entity *entity)
>>>      {
>>>             rmb();
>>>
>>>     -       if (!entity->rq || spsc_queue_peek(&entity->job_queue)
>>>     == NULL)
>>>     +       if (list_empty(&entity->list) ||
>>>     +           spsc_queue_peek(&entity->job_queue) == NULL)
>>>                     return true;
>>>
>>>             return false;
>>>     @@ -279,8 +265,6 @@ long drm_sched_entity_flush(struct
>>>     drm_sched_entity *entity, long timeout)
>>>             long ret = timeout;
>>>
>>>             sched = entity->rq->sched;
>>>     -       if (!drm_sched_entity_is_initialized(sched, entity))
>>>     -               return ret;
>>>
>>
>> Just to be clear, you remove this function because it's redundant to 
>> call it?
>
> Yes, exactly.
>
> Christian.
>
>>
>> Andrey
>>
>>>       /**
>>>              * The client will not queue more IBs during this fini,
>>>     consume existing
>>>              * queued IBs or discard them on SIGKILL
>>>     @@ -299,7 +283,7 @@ long drm_sched_entity_flush(struct
>>>     drm_sched_entity *entity, long timeout)
>>>             last_user = cmpxchg(&entity->last_user,
>>>     current->group_leader, NULL);
>>>             if ((!last_user || last_user == current->group_leader) &&
>>>                 (current->flags & PF_EXITING) && (current->exit_code
>>>     == SIGKILL))
>>>     -               drm_sched_entity_set_rq(entity, NULL);
>>>     +  drm_sched_rq_remove_entity(entity->rq, entity);
>>>
>>>             return ret;
>>>      }
>>>     @@ -320,7 +304,7 @@ void drm_sched_entity_fini(struct
>>>     drm_sched_entity *entity)
>>>             struct drm_gpu_scheduler *sched;
>>>
>>>             sched = entity->rq->sched;
>>>     -       drm_sched_entity_set_rq(entity, NULL);
>>>     +       drm_sched_rq_remove_entity(entity->rq, entity);
>>>
>>>             /* Consumption of existing IBs wasn't completed. Forcefully
>>>              * remove them here.
>>>     @@ -416,15 +400,12 @@ void drm_sched_entity_set_rq(struct
>>>     drm_sched_entity *entity,
>>>             if (entity->rq == rq)
>>>                     return;
>>>
>>>     -       spin_lock(&entity->rq_lock);
>>>     -
>>>     -       if (entity->rq)
>>>     -  drm_sched_rq_remove_entity(entity->rq, entity);
>>>     +       BUG_ON(!rq);
>>>
>>>     +       spin_lock(&entity->rq_lock);
>>>     +       drm_sched_rq_remove_entity(entity->rq, entity);
>>>             entity->rq = rq;
>>>     -       if (rq)
>>>     -               drm_sched_rq_add_entity(rq, entity);
>>>     -
>>>     +       drm_sched_rq_add_entity(rq, entity);
>>>             spin_unlock(&entity->rq_lock);
>>>      }
>>>      EXPORT_SYMBOL(drm_sched_entity_set_rq);
>>>     -- 
>>>     2.14.1
>>>
>>>
>>>
>>> _______________________________________________
>>> dri-devel mailing list
>>> dri-devel@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>>
>


[-- Attachment #1.2: Type: text/html, Size: 9857 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: stop setting rq to NULL
  2018-07-30 11:03 ` [PATCH 2/2] drm/scheduler: stop setting rq to NULL Christian König
  2018-07-30 13:30   ` Nayan Deshmukh
@ 2018-08-01 22:25   ` Andrey Grodzovsky
  2018-08-02  6:42     ` Christian König
  2018-08-02  6:43     ` Nayan Deshmukh
  1 sibling, 2 replies; 26+ messages in thread
From: Andrey Grodzovsky @ 2018-08-01 22:25 UTC (permalink / raw)
  To: Christian König, dri-devel; +Cc: nayan26deshmukh

Thinking again about this change and 53d5f1e drm/scheduler: move idle 
entities to scheduler with less load v2

Looks to me like use case for which fc9a539 drm/scheduler: add NULL 
pointer check for run queue (v2) was done

will not work anymore.

First of all in drm_sched_entity_push_job, 'if (!entity->rq)' will never 
be true any more since we stopped entity->rq to NULL in

drm_sched_entity_flush.

Second of all, even after we removed the entity from rq in 
drm_sched_entity_flush to terminate any subsequent submissions

to the queue the other thread doing push job can just acquire again a 
run queue

from drm_sched_entity_get_free_sched and continue submission so you 
can't substitute ' if (!entity->rq)' to 'if (list_empty(&entity->list))'.

What about adding a 'stopped' flag to drm_sched_entity to be set in 
drm_sched_entity_flush and in

drm_sched_entity_push_job check for  'if (!entity->stopped)' instead of  
' if (!entity->rq)' ?

Andrey


On 07/30/2018 07:03 AM, Christian König wrote:
> We removed the redundancy of having an extra scheduler field, so we
> can't set the rq to NULL any more or otherwise won't know which
> scheduler to use for the cleanup.
>
> Just remove the entity from the scheduling list instead.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/scheduler/gpu_scheduler.c | 35 +++++++------------------------
>   1 file changed, 8 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> index f563e4fbb4b6..1b733229201e 100644
> --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> @@ -198,21 +198,6 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
>   }
>   EXPORT_SYMBOL(drm_sched_entity_init);
>   
> -/**
> - * drm_sched_entity_is_initialized - Query if entity is initialized
> - *
> - * @sched: Pointer to scheduler instance
> - * @entity: The pointer to a valid scheduler entity
> - *
> - * return true if entity is initialized, false otherwise
> -*/
> -static bool drm_sched_entity_is_initialized(struct drm_gpu_scheduler *sched,
> -					    struct drm_sched_entity *entity)
> -{
> -	return entity->rq != NULL &&
> -		entity->rq->sched == sched;
> -}
> -
>   /**
>    * drm_sched_entity_is_idle - Check if entity is idle
>    *
> @@ -224,7 +209,8 @@ static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity)
>   {
>   	rmb();
>   
> -	if (!entity->rq || spsc_queue_peek(&entity->job_queue) == NULL)
> +	if (list_empty(&entity->list) ||
> +	    spsc_queue_peek(&entity->job_queue) == NULL)
>   		return true;
>   
>   	return false;
> @@ -279,8 +265,6 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
>   	long ret = timeout;
>   
>   	sched = entity->rq->sched;
> -	if (!drm_sched_entity_is_initialized(sched, entity))
> -		return ret;
>   	/**
>   	 * The client will not queue more IBs during this fini, consume existing
>   	 * queued IBs or discard them on SIGKILL
> @@ -299,7 +283,7 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
>   	last_user = cmpxchg(&entity->last_user, current->group_leader, NULL);
>   	if ((!last_user || last_user == current->group_leader) &&
>   	    (current->flags & PF_EXITING) && (current->exit_code == SIGKILL))
> -		drm_sched_entity_set_rq(entity, NULL);
> +		drm_sched_rq_remove_entity(entity->rq, entity);
>   
>   	return ret;
>   }
> @@ -320,7 +304,7 @@ void drm_sched_entity_fini(struct drm_sched_entity *entity)
>   	struct drm_gpu_scheduler *sched;
>   
>   	sched = entity->rq->sched;
> -	drm_sched_entity_set_rq(entity, NULL);
> +	drm_sched_rq_remove_entity(entity->rq, entity);
>   
>   	/* Consumption of existing IBs wasn't completed. Forcefully
>   	 * remove them here.
> @@ -416,15 +400,12 @@ void drm_sched_entity_set_rq(struct drm_sched_entity *entity,
>   	if (entity->rq == rq)
>   		return;
>   
> -	spin_lock(&entity->rq_lock);
> -
> -	if (entity->rq)
> -		drm_sched_rq_remove_entity(entity->rq, entity);
> +	BUG_ON(!rq);
>   
> +	spin_lock(&entity->rq_lock);
> +	drm_sched_rq_remove_entity(entity->rq, entity);
>   	entity->rq = rq;
> -	if (rq)
> -		drm_sched_rq_add_entity(rq, entity);
> -
> +	drm_sched_rq_add_entity(rq, entity);
>   	spin_unlock(&entity->rq_lock);
>   }
>   EXPORT_SYMBOL(drm_sched_entity_set_rq);

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: stop setting rq to NULL
  2018-08-01 22:25   ` Andrey Grodzovsky
@ 2018-08-02  6:42     ` Christian König
  2018-08-02  6:47       ` Nayan Deshmukh
  2018-08-02  6:43     ` Nayan Deshmukh
  1 sibling, 1 reply; 26+ messages in thread
From: Christian König @ 2018-08-02  6:42 UTC (permalink / raw)
  To: Andrey Grodzovsky, dri-devel; +Cc: nayan26deshmukh

Am 02.08.2018 um 00:25 schrieb Andrey Grodzovsky:
> Thinking again about this change and 53d5f1e drm/scheduler: move idle 
> entities to scheduler with less load v2
>
> Looks to me like use case for which fc9a539 drm/scheduler: add NULL 
> pointer check for run queue (v2) was done
>
> will not work anymore.
>
> First of all in drm_sched_entity_push_job, 'if (!entity->rq)' will 
> never be true any more since we stopped entity->rq to NULL in
>
> drm_sched_entity_flush.

Good point, going to remove that.

>
> Second of all, even after we removed the entity from rq in 
> drm_sched_entity_flush to terminate any subsequent submissions
>
> to the queue the other thread doing push job can just acquire again a 
> run queue
>
> from drm_sched_entity_get_free_sched and continue submission

That is actually desired.

When another process is now using the entity to submit jobs adding it 
back to the rq is actually the right thing to do cause the entity is 
still in use.

Christian.

> so you can't substitute ' if (!entity->rq)' to 'if 
> (list_empty(&entity->list))'.
>
> What about adding a 'stopped' flag to drm_sched_entity to be set in 
> drm_sched_entity_flush and in
>
> drm_sched_entity_push_job check for  'if (!entity->stopped)' instead 
> of  ' if (!entity->rq)' ?
>
> Andrey
>
>
> On 07/30/2018 07:03 AM, Christian König wrote:
>> We removed the redundancy of having an extra scheduler field, so we
>> can't set the rq to NULL any more or otherwise won't know which
>> scheduler to use for the cleanup.
>>
>> Just remove the entity from the scheduling list instead.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/scheduler/gpu_scheduler.c | 35 
>> +++++++------------------------
>>   1 file changed, 8 insertions(+), 27 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c 
>> b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>> index f563e4fbb4b6..1b733229201e 100644
>> --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>> @@ -198,21 +198,6 @@ int drm_sched_entity_init(struct 
>> drm_sched_entity *entity,
>>   }
>>   EXPORT_SYMBOL(drm_sched_entity_init);
>>   -/**
>> - * drm_sched_entity_is_initialized - Query if entity is initialized
>> - *
>> - * @sched: Pointer to scheduler instance
>> - * @entity: The pointer to a valid scheduler entity
>> - *
>> - * return true if entity is initialized, false otherwise
>> -*/
>> -static bool drm_sched_entity_is_initialized(struct drm_gpu_scheduler 
>> *sched,
>> -                        struct drm_sched_entity *entity)
>> -{
>> -    return entity->rq != NULL &&
>> -        entity->rq->sched == sched;
>> -}
>> -
>>   /**
>>    * drm_sched_entity_is_idle - Check if entity is idle
>>    *
>> @@ -224,7 +209,8 @@ static bool drm_sched_entity_is_idle(struct 
>> drm_sched_entity *entity)
>>   {
>>       rmb();
>>   -    if (!entity->rq || spsc_queue_peek(&entity->job_queue) == NULL)
>> +    if (list_empty(&entity->list) ||
>> +        spsc_queue_peek(&entity->job_queue) == NULL)
>>           return true;
>>         return false;
>> @@ -279,8 +265,6 @@ long drm_sched_entity_flush(struct 
>> drm_sched_entity *entity, long timeout)
>>       long ret = timeout;
>>         sched = entity->rq->sched;
>> -    if (!drm_sched_entity_is_initialized(sched, entity))
>> -        return ret;
>>       /**
>>        * The client will not queue more IBs during this fini, consume 
>> existing
>>        * queued IBs or discard them on SIGKILL
>> @@ -299,7 +283,7 @@ long drm_sched_entity_flush(struct 
>> drm_sched_entity *entity, long timeout)
>>       last_user = cmpxchg(&entity->last_user, current->group_leader, 
>> NULL);
>>       if ((!last_user || last_user == current->group_leader) &&
>>           (current->flags & PF_EXITING) && (current->exit_code == 
>> SIGKILL))
>> -        drm_sched_entity_set_rq(entity, NULL);
>> +        drm_sched_rq_remove_entity(entity->rq, entity);
>>         return ret;
>>   }
>> @@ -320,7 +304,7 @@ void drm_sched_entity_fini(struct 
>> drm_sched_entity *entity)
>>       struct drm_gpu_scheduler *sched;
>>         sched = entity->rq->sched;
>> -    drm_sched_entity_set_rq(entity, NULL);
>> +    drm_sched_rq_remove_entity(entity->rq, entity);
>>         /* Consumption of existing IBs wasn't completed. Forcefully
>>        * remove them here.
>> @@ -416,15 +400,12 @@ void drm_sched_entity_set_rq(struct 
>> drm_sched_entity *entity,
>>       if (entity->rq == rq)
>>           return;
>>   -    spin_lock(&entity->rq_lock);
>> -
>> -    if (entity->rq)
>> -        drm_sched_rq_remove_entity(entity->rq, entity);
>> +    BUG_ON(!rq);
>>   +    spin_lock(&entity->rq_lock);
>> +    drm_sched_rq_remove_entity(entity->rq, entity);
>>       entity->rq = rq;
>> -    if (rq)
>> -        drm_sched_rq_add_entity(rq, entity);
>> -
>> +    drm_sched_rq_add_entity(rq, entity);
>>       spin_unlock(&entity->rq_lock);
>>   }
>>   EXPORT_SYMBOL(drm_sched_entity_set_rq);
>

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: stop setting rq to NULL
  2018-08-01 22:25   ` Andrey Grodzovsky
  2018-08-02  6:42     ` Christian König
@ 2018-08-02  6:43     ` Nayan Deshmukh
  2018-08-02 14:38       ` Andrey Grodzovsky
  1 sibling, 1 reply; 26+ messages in thread
From: Nayan Deshmukh @ 2018-08-02  6:43 UTC (permalink / raw)
  To: Andrey Grodzovsky; +Cc: ckoenig.leichtzumerken, Maling list - DRI developers


[-- Attachment #1.1: Type: text/plain, Size: 7002 bytes --]

Hi Andrey,

Good catch. I guess since both Christian and I were working on it
parallelly we didn't catch this problem.

If it is only causing a problem in push_job then we can handle it something
like this:

----------------------------------------------------------------------------------------------------------------------------
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
b/drivers/gpu/drm/scheduler/gpu_scheduler.c
index 05dc6ecd4003..f344ce32f128 100644
--- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
@@ -563,6 +563,11 @@ void drm_sched_entity_push_job(struct drm_sched_job
*sched_job,
        first = spsc_queue_count(&entity->job_queue) == 0;
        reschedule = idle && first && (entity->num_rq_list > 1);

+       if (first && list_empty(&entity->list)) {
+               DRM_ERROR("Trying to push to a killed entity\n");
+               return;
+       }
+
        if (reschedule) {
                rq = drm_sched_entity_get_free_sched(entity);
                spin_lock(&entity->rq_lock);
@@ -580,11 +585,6 @@ void drm_sched_entity_push_job(struct drm_sched_job
*sched_job,
        if (first) {
                /* Add the entity to the run queue */
                spin_lock(&entity->rq_lock);
-               if (!entity->rq) {
-                       DRM_ERROR("Trying to push to a killed entity\n");
-                       spin_unlock(&entity->rq_lock);
-                       return;
-               }
                drm_sched_rq_add_entity(entity->rq, entity);
                spin_unlock(&entity->rq_lock);
                drm_sched_wakeup(entity->rq->sched);
-----------------------------------------------------------------------------------------------------------------------------


On Thu, Aug 2, 2018 at 3:55 AM Andrey Grodzovsky <Andrey.Grodzovsky@amd.com>
wrote:

> Thinking again about this change and 53d5f1e drm/scheduler: move idle
> entities to scheduler with less load v2
>
> Looks to me like use case for which fc9a539 drm/scheduler: add NULL
> pointer check for run queue (v2) was done
>
> will not work anymore.
>
> First of all in drm_sched_entity_push_job, 'if (!entity->rq)' will never
> be true any more since we stopped entity->rq to NULL in
>
> drm_sched_entity_flush.
>
> Second of all, even after we removed the entity from rq in
> drm_sched_entity_flush to terminate any subsequent submissions
>
> to the queue the other thread doing push job can just acquire again a
> run queue
>
> from drm_sched_entity_get_free_sched and continue submission so you
> can't substitute ' if (!entity->rq)' to 'if (list_empty(&entity->list))'.
>
> What about adding a 'stopped' flag to drm_sched_entity to be set in
> drm_sched_entity_flush and in
>
> But it might be worth adding a stopped flag field if it is required at
other places as well.

Thanks,
Nayan

> drm_sched_entity_push_job check for  'if (!entity->stopped)' instead of
> ' if (!entity->rq)' ?
>
> Andrey
>
>
> On 07/30/2018 07:03 AM, Christian König wrote:
> > We removed the redundancy of having an extra scheduler field, so we
> > can't set the rq to NULL any more or otherwise won't know which
> > scheduler to use for the cleanup.
> >
> > Just remove the entity from the scheduling list instead.
> >
> > Signed-off-by: Christian König <christian.koenig@amd.com>
> > ---
> >   drivers/gpu/drm/scheduler/gpu_scheduler.c | 35
> +++++++------------------------
> >   1 file changed, 8 insertions(+), 27 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> > index f563e4fbb4b6..1b733229201e 100644
> > --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> > +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> > @@ -198,21 +198,6 @@ int drm_sched_entity_init(struct drm_sched_entity
> *entity,
> >   }
> >   EXPORT_SYMBOL(drm_sched_entity_init);
> >
> > -/**
> > - * drm_sched_entity_is_initialized - Query if entity is initialized
> > - *
> > - * @sched: Pointer to scheduler instance
> > - * @entity: The pointer to a valid scheduler entity
> > - *
> > - * return true if entity is initialized, false otherwise
> > -*/
> > -static bool drm_sched_entity_is_initialized(struct drm_gpu_scheduler
> *sched,
> > -                                         struct drm_sched_entity
> *entity)
> > -{
> > -     return entity->rq != NULL &&
> > -             entity->rq->sched == sched;
> > -}
> > -
> >   /**
> >    * drm_sched_entity_is_idle - Check if entity is idle
> >    *
> > @@ -224,7 +209,8 @@ static bool drm_sched_entity_is_idle(struct
> drm_sched_entity *entity)
> >   {
> >       rmb();
> >
> > -     if (!entity->rq || spsc_queue_peek(&entity->job_queue) == NULL)
> > +     if (list_empty(&entity->list) ||
> > +         spsc_queue_peek(&entity->job_queue) == NULL)
> >               return true;
> >
> >       return false;
> > @@ -279,8 +265,6 @@ long drm_sched_entity_flush(struct drm_sched_entity
> *entity, long timeout)
> >       long ret = timeout;
> >
> >       sched = entity->rq->sched;
> > -     if (!drm_sched_entity_is_initialized(sched, entity))
> > -             return ret;
> >       /**
> >        * The client will not queue more IBs during this fini, consume
> existing
> >        * queued IBs or discard them on SIGKILL
> > @@ -299,7 +283,7 @@ long drm_sched_entity_flush(struct drm_sched_entity
> *entity, long timeout)
> >       last_user = cmpxchg(&entity->last_user, current->group_leader,
> NULL);
> >       if ((!last_user || last_user == current->group_leader) &&
> >           (current->flags & PF_EXITING) && (current->exit_code ==
> SIGKILL))
> > -             drm_sched_entity_set_rq(entity, NULL);
> > +             drm_sched_rq_remove_entity(entity->rq, entity);
> >
> >       return ret;
> >   }
> > @@ -320,7 +304,7 @@ void drm_sched_entity_fini(struct drm_sched_entity
> *entity)
> >       struct drm_gpu_scheduler *sched;
> >
> >       sched = entity->rq->sched;
> > -     drm_sched_entity_set_rq(entity, NULL);
> > +     drm_sched_rq_remove_entity(entity->rq, entity);
> >
> >       /* Consumption of existing IBs wasn't completed. Forcefully
> >        * remove them here.
> > @@ -416,15 +400,12 @@ void drm_sched_entity_set_rq(struct
> drm_sched_entity *entity,
> >       if (entity->rq == rq)
> >               return;
> >
> > -     spin_lock(&entity->rq_lock);
> > -
> > -     if (entity->rq)
> > -             drm_sched_rq_remove_entity(entity->rq, entity);
> > +     BUG_ON(!rq);
> >
> > +     spin_lock(&entity->rq_lock);
> > +     drm_sched_rq_remove_entity(entity->rq, entity);
> >       entity->rq = rq;
> > -     if (rq)
> > -             drm_sched_rq_add_entity(rq, entity);
> > -
> > +     drm_sched_rq_add_entity(rq, entity);
> >       spin_unlock(&entity->rq_lock);
> >   }
> >   EXPORT_SYMBOL(drm_sched_entity_set_rq);
>
>

[-- Attachment #1.2: Type: text/html, Size: 8965 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: stop setting rq to NULL
  2018-08-02  6:42     ` Christian König
@ 2018-08-02  6:47       ` Nayan Deshmukh
  2018-08-02  7:26         ` Christian König
  2018-08-02 14:11         ` Andrey Grodzovsky
  0 siblings, 2 replies; 26+ messages in thread
From: Nayan Deshmukh @ 2018-08-02  6:47 UTC (permalink / raw)
  To: Christian König; +Cc: Maling list - DRI developers


[-- Attachment #1.1: Type: text/plain, Size: 5736 bytes --]

On Thu, Aug 2, 2018 at 12:12 PM Christian König <
ckoenig.leichtzumerken@gmail.com> wrote:

> Am 02.08.2018 um 00:25 schrieb Andrey Grodzovsky:
> > Thinking again about this change and 53d5f1e drm/scheduler: move idle
> > entities to scheduler with less load v2
> >
> > Looks to me like use case for which fc9a539 drm/scheduler: add NULL
> > pointer check for run queue (v2) was done
> >
> > will not work anymore.
> >
> > First of all in drm_sched_entity_push_job, 'if (!entity->rq)' will
> > never be true any more since we stopped entity->rq to NULL in
> >
> > drm_sched_entity_flush.
>
> Good point, going to remove that.
>
> >
> > Second of all, even after we removed the entity from rq in
> > drm_sched_entity_flush to terminate any subsequent submissions
> >
> > to the queue the other thread doing push job can just acquire again a
> > run queue
> >
> > from drm_sched_entity_get_free_sched and continue submission
>
> Hi Christian

> That is actually desired.
>
> When another process is now using the entity to submit jobs adding it
> back to the rq is actually the right thing to do cause the entity is
> still in use.
>
I am not aware of the usecase so I might just be rambling. but if the
thread/process that created the entity has called drm_sched_entity_flush
then we shouldn't allow other processes to push jobs to that entity.

Nayan

>
> Christian.
>
> > so you can't substitute ' if (!entity->rq)' to 'if
> > (list_empty(&entity->list))'.
> >
> > What about adding a 'stopped' flag to drm_sched_entity to be set in
> > drm_sched_entity_flush and in
> >
> > drm_sched_entity_push_job check for  'if (!entity->stopped)' instead
> > of  ' if (!entity->rq)' ?
> >
> > Andrey
> >
> >
> > On 07/30/2018 07:03 AM, Christian König wrote:
> >> We removed the redundancy of having an extra scheduler field, so we
> >> can't set the rq to NULL any more or otherwise won't know which
> >> scheduler to use for the cleanup.
> >>
> >> Just remove the entity from the scheduling list instead.
> >>
> >> Signed-off-by: Christian König <christian.koenig@amd.com>
> >> ---
> >>   drivers/gpu/drm/scheduler/gpu_scheduler.c | 35
> >> +++++++------------------------
> >>   1 file changed, 8 insertions(+), 27 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> >> b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> >> index f563e4fbb4b6..1b733229201e 100644
> >> --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> >> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> >> @@ -198,21 +198,6 @@ int drm_sched_entity_init(struct
> >> drm_sched_entity *entity,
> >>   }
> >>   EXPORT_SYMBOL(drm_sched_entity_init);
> >>   -/**
> >> - * drm_sched_entity_is_initialized - Query if entity is initialized
> >> - *
> >> - * @sched: Pointer to scheduler instance
> >> - * @entity: The pointer to a valid scheduler entity
> >> - *
> >> - * return true if entity is initialized, false otherwise
> >> -*/
> >> -static bool drm_sched_entity_is_initialized(struct drm_gpu_scheduler
> >> *sched,
> >> -                        struct drm_sched_entity *entity)
> >> -{
> >> -    return entity->rq != NULL &&
> >> -        entity->rq->sched == sched;
> >> -}
> >> -
> >>   /**
> >>    * drm_sched_entity_is_idle - Check if entity is idle
> >>    *
> >> @@ -224,7 +209,8 @@ static bool drm_sched_entity_is_idle(struct
> >> drm_sched_entity *entity)
> >>   {
> >>       rmb();
> >>   -    if (!entity->rq || spsc_queue_peek(&entity->job_queue) == NULL)
> >> +    if (list_empty(&entity->list) ||
> >> +        spsc_queue_peek(&entity->job_queue) == NULL)
> >>           return true;
> >>         return false;
> >> @@ -279,8 +265,6 @@ long drm_sched_entity_flush(struct
> >> drm_sched_entity *entity, long timeout)
> >>       long ret = timeout;
> >>         sched = entity->rq->sched;
> >> -    if (!drm_sched_entity_is_initialized(sched, entity))
> >> -        return ret;
> >>       /**
> >>        * The client will not queue more IBs during this fini, consume
> >> existing
> >>        * queued IBs or discard them on SIGKILL
> >> @@ -299,7 +283,7 @@ long drm_sched_entity_flush(struct
> >> drm_sched_entity *entity, long timeout)
> >>       last_user = cmpxchg(&entity->last_user, current->group_leader,
> >> NULL);
> >>       if ((!last_user || last_user == current->group_leader) &&
> >>           (current->flags & PF_EXITING) && (current->exit_code ==
> >> SIGKILL))
> >> -        drm_sched_entity_set_rq(entity, NULL);
> >> +        drm_sched_rq_remove_entity(entity->rq, entity);
> >>         return ret;
> >>   }
> >> @@ -320,7 +304,7 @@ void drm_sched_entity_fini(struct
> >> drm_sched_entity *entity)
> >>       struct drm_gpu_scheduler *sched;
> >>         sched = entity->rq->sched;
> >> -    drm_sched_entity_set_rq(entity, NULL);
> >> +    drm_sched_rq_remove_entity(entity->rq, entity);
> >>         /* Consumption of existing IBs wasn't completed. Forcefully
> >>        * remove them here.
> >> @@ -416,15 +400,12 @@ void drm_sched_entity_set_rq(struct
> >> drm_sched_entity *entity,
> >>       if (entity->rq == rq)
> >>           return;
> >>   -    spin_lock(&entity->rq_lock);
> >> -
> >> -    if (entity->rq)
> >> -        drm_sched_rq_remove_entity(entity->rq, entity);
> >> +    BUG_ON(!rq);
> >>   +    spin_lock(&entity->rq_lock);
> >> +    drm_sched_rq_remove_entity(entity->rq, entity);
> >>       entity->rq = rq;
> >> -    if (rq)
> >> -        drm_sched_rq_add_entity(rq, entity);
> >> -
> >> +    drm_sched_rq_add_entity(rq, entity);
> >>       spin_unlock(&entity->rq_lock);
> >>   }
> >>   EXPORT_SYMBOL(drm_sched_entity_set_rq);
> >
>
>

[-- Attachment #1.2: Type: text/html, Size: 7799 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: stop setting rq to NULL
  2018-08-02  6:47       ` Nayan Deshmukh
@ 2018-08-02  7:26         ` Christian König
  2018-08-02 14:11         ` Andrey Grodzovsky
  1 sibling, 0 replies; 26+ messages in thread
From: Christian König @ 2018-08-02  7:26 UTC (permalink / raw)
  To: Nayan Deshmukh, Christian König; +Cc: Maling list - DRI developers


[-- Attachment #1.1: Type: text/plain, Size: 7424 bytes --]

Am 02.08.2018 um 08:47 schrieb Nayan Deshmukh:
>
>
> On Thu, Aug 2, 2018 at 12:12 PM Christian König 
> <ckoenig.leichtzumerken@gmail.com 
> <mailto:ckoenig.leichtzumerken@gmail.com>> wrote:
>
>     Am 02.08.2018 um 00:25 schrieb Andrey Grodzovsky:
>     > Thinking again about this change and 53d5f1e drm/scheduler: move
>     idle
>     > entities to scheduler with less load v2
>     >
>     > Looks to me like use case for which fc9a539 drm/scheduler: add NULL
>     > pointer check for run queue (v2) was done
>     >
>     > will not work anymore.
>     >
>     > First of all in drm_sched_entity_push_job, 'if (!entity->rq)' will
>     > never be true any more since we stopped entity->rq to NULL in
>     >
>     > drm_sched_entity_flush.
>
>     Good point, going to remove that.
>
>     >
>     > Second of all, even after we removed the entity from rq in
>     > drm_sched_entity_flush to terminate any subsequent submissions
>     >
>     > to the queue the other thread doing push job can just acquire
>     again a
>     > run queue
>     >
>     > from drm_sched_entity_get_free_sched and continue submission
>
> Hi Christian
>
>     That is actually desired.
>
>     When another process is now using the entity to submit jobs adding it
>     back to the rq is actually the right thing to do cause the entity is
>     still in use.
>
> I am not aware of the usecase so I might just be rambling. but if the 
> thread/process that created the entity has called 
> drm_sched_entity_flush then we shouldn't allow other processes to push 
> jobs to that entity.

Why not?

Calling flush() only means that we should wait for all waiting jobs to 
be send to the hardware. It doesn't mean that we need to stop accepting 
new jobs.

It is perfectly possible that a child process has inherited a file 
descriptor from it's parent and calls flush because it exits, while the 
parent is still using the file descriptor.

The only tricky part is how to handle it when a process is killed, but I 
think we came to good conclusion there as well.

We remove the entity from the scheduling when the last process which 
submitted jobs is killed or if there is no such process any more and the 
current flushing process is killed.

Christian.

>
> Nayan
>
>
>     Christian.
>
>     > so you can't substitute ' if (!entity->rq)' to 'if
>     > (list_empty(&entity->list))'.
>     >
>     > What about adding a 'stopped' flag to drm_sched_entity to be set in
>     > drm_sched_entity_flush and in
>     >
>     > drm_sched_entity_push_job check for  'if (!entity->stopped)'
>     instead
>     > of  ' if (!entity->rq)' ?
>     >
>     > Andrey
>     >
>     >
>     > On 07/30/2018 07:03 AM, Christian König wrote:
>     >> We removed the redundancy of having an extra scheduler field, so we
>     >> can't set the rq to NULL any more or otherwise won't know which
>     >> scheduler to use for the cleanup.
>     >>
>     >> Just remove the entity from the scheduling list instead.
>     >>
>     >> Signed-off-by: Christian König <christian.koenig@amd.com
>     <mailto:christian.koenig@amd.com>>
>     >> ---
>     >>   drivers/gpu/drm/scheduler/gpu_scheduler.c | 35
>     >> +++++++------------------------
>     >>   1 file changed, 8 insertions(+), 27 deletions(-)
>     >>
>     >> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>     >> b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>     >> index f563e4fbb4b6..1b733229201e 100644
>     >> --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>     >> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>     >> @@ -198,21 +198,6 @@ int drm_sched_entity_init(struct
>     >> drm_sched_entity *entity,
>     >>   }
>     >>   EXPORT_SYMBOL(drm_sched_entity_init);
>     >>   -/**
>     >> - * drm_sched_entity_is_initialized - Query if entity is
>     initialized
>     >> - *
>     >> - * @sched: Pointer to scheduler instance
>     >> - * @entity: The pointer to a valid scheduler entity
>     >> - *
>     >> - * return true if entity is initialized, false otherwise
>     >> -*/
>     >> -static bool drm_sched_entity_is_initialized(struct
>     drm_gpu_scheduler
>     >> *sched,
>     >> -                        struct drm_sched_entity *entity)
>     >> -{
>     >> -    return entity->rq != NULL &&
>     >> -        entity->rq->sched == sched;
>     >> -}
>     >> -
>     >>   /**
>     >>    * drm_sched_entity_is_idle - Check if entity is idle
>     >>    *
>     >> @@ -224,7 +209,8 @@ static bool drm_sched_entity_is_idle(struct
>     >> drm_sched_entity *entity)
>     >>   {
>     >>       rmb();
>     >>   -    if (!entity->rq || spsc_queue_peek(&entity->job_queue)
>     == NULL)
>     >> +    if (list_empty(&entity->list) ||
>     >> +        spsc_queue_peek(&entity->job_queue) == NULL)
>     >>           return true;
>     >>         return false;
>     >> @@ -279,8 +265,6 @@ long drm_sched_entity_flush(struct
>     >> drm_sched_entity *entity, long timeout)
>     >>       long ret = timeout;
>     >>         sched = entity->rq->sched;
>     >> -    if (!drm_sched_entity_is_initialized(sched, entity))
>     >> -        return ret;
>     >>       /**
>     >>        * The client will not queue more IBs during this fini,
>     consume
>     >> existing
>     >>        * queued IBs or discard them on SIGKILL
>     >> @@ -299,7 +283,7 @@ long drm_sched_entity_flush(struct
>     >> drm_sched_entity *entity, long timeout)
>     >>       last_user = cmpxchg(&entity->last_user,
>     current->group_leader,
>     >> NULL);
>     >>       if ((!last_user || last_user == current->group_leader) &&
>     >>           (current->flags & PF_EXITING) && (current->exit_code ==
>     >> SIGKILL))
>     >> -        drm_sched_entity_set_rq(entity, NULL);
>     >> +        drm_sched_rq_remove_entity(entity->rq, entity);
>     >>         return ret;
>     >>   }
>     >> @@ -320,7 +304,7 @@ void drm_sched_entity_fini(struct
>     >> drm_sched_entity *entity)
>     >>       struct drm_gpu_scheduler *sched;
>     >>         sched = entity->rq->sched;
>     >> -    drm_sched_entity_set_rq(entity, NULL);
>     >> +    drm_sched_rq_remove_entity(entity->rq, entity);
>     >>         /* Consumption of existing IBs wasn't completed. Forcefully
>     >>        * remove them here.
>     >> @@ -416,15 +400,12 @@ void drm_sched_entity_set_rq(struct
>     >> drm_sched_entity *entity,
>     >>       if (entity->rq == rq)
>     >>           return;
>     >>   -    spin_lock(&entity->rq_lock);
>     >> -
>     >> -    if (entity->rq)
>     >> -        drm_sched_rq_remove_entity(entity->rq, entity);
>     >> +    BUG_ON(!rq);
>     >>   +    spin_lock(&entity->rq_lock);
>     >> +    drm_sched_rq_remove_entity(entity->rq, entity);
>     >>       entity->rq = rq;
>     >> -    if (rq)
>     >> -        drm_sched_rq_add_entity(rq, entity);
>     >> -
>     >> +    drm_sched_rq_add_entity(rq, entity);
>     >>       spin_unlock(&entity->rq_lock);
>     >>   }
>     >>   EXPORT_SYMBOL(drm_sched_entity_set_rq);
>     >
>
>
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel


[-- Attachment #1.2: Type: text/html, Size: 12148 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: stop setting rq to NULL
  2018-08-02  6:47       ` Nayan Deshmukh
  2018-08-02  7:26         ` Christian König
@ 2018-08-02 14:11         ` Andrey Grodzovsky
  2018-08-03 14:06           ` Christian König
  1 sibling, 1 reply; 26+ messages in thread
From: Andrey Grodzovsky @ 2018-08-02 14:11 UTC (permalink / raw)
  To: Nayan Deshmukh, Christian König; +Cc: Maling list - DRI developers


[-- Attachment #1.1: Type: text/plain, Size: 7467 bytes --]



On 08/02/2018 02:47 AM, Nayan Deshmukh wrote:
>
>
> On Thu, Aug 2, 2018 at 12:12 PM Christian König 
> <ckoenig.leichtzumerken@gmail.com 
> <mailto:ckoenig.leichtzumerken@gmail.com>> wrote:
>
>     Am 02.08.2018 um 00:25 schrieb Andrey Grodzovsky:
>     > Thinking again about this change and 53d5f1e drm/scheduler: move
>     idle
>     > entities to scheduler with less load v2
>     >
>     > Looks to me like use case for which fc9a539 drm/scheduler: add NULL
>     > pointer check for run queue (v2) was done
>     >
>     > will not work anymore.
>     >
>     > First of all in drm_sched_entity_push_job, 'if (!entity->rq)' will
>     > never be true any more since we stopped entity->rq to NULL in
>     >
>     > drm_sched_entity_flush.
>
>     Good point, going to remove that.
>

So basically we don't need that if (...){ return; } in 
drm_sched_entity_push_job any more ?

>
>     >
>     > Second of all, even after we removed the entity from rq in
>     > drm_sched_entity_flush to terminate any subsequent submissions
>     >
>     > to the queue the other thread doing push job can just acquire
>     again a
>     > run queue
>     >
>     > from drm_sched_entity_get_free_sched and continue submission
>
> Hi Christian
>
>     That is actually desired.
>
>     When another process is now using the entity to submit jobs adding it
>     back to the rq is actually the right thing to do cause the entity is
>     still in use.
>

Yes, no problem if it's another process. But what about another thread 
from same process ? Is it a possible use case that 2 threads from same 
process submit to same entity concurrently ? If so and we specifically 
kill one, the other will not stop event if we want him to because 
current code makes him just require a rq for him self.

> I am not aware of the usecase so I might just be rambling. but if the 
> thread/process that created the entity has called 
> drm_sched_entity_flush then we shouldn't allow other processes to push 
> jobs to that entity.
>
> Nayan
>
>
>     Christian.
>
We don't really know who created the entity, the creation could be by X 
itself and then it's passed to other process for use. Check 
'drm/scheduler: only kill entity if last user is killed v2', the idea is 
that if by the time you want to
kill this entity another process (not another thread from your process - 
i.e. current->group_leader != last_user in drm_sched_entity_flush) 
already started to use this entity just let it be.

Andrey
>
>
>     > so you can't substitute ' if (!entity->rq)' to 'if
>     > (list_empty(&entity->list))'.
>     >
>     > What about adding a 'stopped' flag to drm_sched_entity to be set in
>     > drm_sched_entity_flush and in
>     >
>     > drm_sched_entity_push_job check for  'if (!entity->stopped)'
>     instead
>     > of  ' if (!entity->rq)' ?
>     >
>     > Andrey
>     >
>     >
>     > On 07/30/2018 07:03 AM, Christian König wrote:
>     >> We removed the redundancy of having an extra scheduler field, so we
>     >> can't set the rq to NULL any more or otherwise won't know which
>     >> scheduler to use for the cleanup.
>     >>
>     >> Just remove the entity from the scheduling list instead.
>     >>
>     >> Signed-off-by: Christian König <christian.koenig@amd.com
>     <mailto:christian.koenig@amd.com>>
>     >> ---
>     >>   drivers/gpu/drm/scheduler/gpu_scheduler.c | 35
>     >> +++++++------------------------
>     >>   1 file changed, 8 insertions(+), 27 deletions(-)
>     >>
>     >> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>     >> b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>     >> index f563e4fbb4b6..1b733229201e 100644
>     >> --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>     >> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>     >> @@ -198,21 +198,6 @@ int drm_sched_entity_init(struct
>     >> drm_sched_entity *entity,
>     >>   }
>     >>   EXPORT_SYMBOL(drm_sched_entity_init);
>     >>   -/**
>     >> - * drm_sched_entity_is_initialized - Query if entity is
>     initialized
>     >> - *
>     >> - * @sched: Pointer to scheduler instance
>     >> - * @entity: The pointer to a valid scheduler entity
>     >> - *
>     >> - * return true if entity is initialized, false otherwise
>     >> -*/
>     >> -static bool drm_sched_entity_is_initialized(struct
>     drm_gpu_scheduler
>     >> *sched,
>     >> -                        struct drm_sched_entity *entity)
>     >> -{
>     >> -    return entity->rq != NULL &&
>     >> -        entity->rq->sched == sched;
>     >> -}
>     >> -
>     >>   /**
>     >>    * drm_sched_entity_is_idle - Check if entity is idle
>     >>    *
>     >> @@ -224,7 +209,8 @@ static bool drm_sched_entity_is_idle(struct
>     >> drm_sched_entity *entity)
>     >>   {
>     >>       rmb();
>     >>   -    if (!entity->rq || spsc_queue_peek(&entity->job_queue)
>     == NULL)
>     >> +    if (list_empty(&entity->list) ||
>     >> +        spsc_queue_peek(&entity->job_queue) == NULL)
>     >>           return true;
>     >>         return false;
>     >> @@ -279,8 +265,6 @@ long drm_sched_entity_flush(struct
>     >> drm_sched_entity *entity, long timeout)
>     >>       long ret = timeout;
>     >>         sched = entity->rq->sched;
>     >> -    if (!drm_sched_entity_is_initialized(sched, entity))
>     >> -        return ret;
>     >>       /**
>     >>        * The client will not queue more IBs during this fini,
>     consume
>     >> existing
>     >>        * queued IBs or discard them on SIGKILL
>     >> @@ -299,7 +283,7 @@ long drm_sched_entity_flush(struct
>     >> drm_sched_entity *entity, long timeout)
>     >>       last_user = cmpxchg(&entity->last_user,
>     current->group_leader,
>     >> NULL);
>     >>       if ((!last_user || last_user == current->group_leader) &&
>     >>           (current->flags & PF_EXITING) && (current->exit_code ==
>     >> SIGKILL))
>     >> -        drm_sched_entity_set_rq(entity, NULL);
>     >> +        drm_sched_rq_remove_entity(entity->rq, entity);
>     >>         return ret;
>     >>   }
>     >> @@ -320,7 +304,7 @@ void drm_sched_entity_fini(struct
>     >> drm_sched_entity *entity)
>     >>       struct drm_gpu_scheduler *sched;
>     >>         sched = entity->rq->sched;
>     >> -    drm_sched_entity_set_rq(entity, NULL);
>     >> +    drm_sched_rq_remove_entity(entity->rq, entity);
>     >>         /* Consumption of existing IBs wasn't completed. Forcefully
>     >>        * remove them here.
>     >> @@ -416,15 +400,12 @@ void drm_sched_entity_set_rq(struct
>     >> drm_sched_entity *entity,
>     >>       if (entity->rq == rq)
>     >>           return;
>     >>   -    spin_lock(&entity->rq_lock);
>     >> -
>     >> -    if (entity->rq)
>     >> -        drm_sched_rq_remove_entity(entity->rq, entity);
>     >> +    BUG_ON(!rq);
>     >>   +    spin_lock(&entity->rq_lock);
>     >> +    drm_sched_rq_remove_entity(entity->rq, entity);
>     >>       entity->rq = rq;
>     >> -    if (rq)
>     >> -        drm_sched_rq_add_entity(rq, entity);
>     >> -
>     >> +    drm_sched_rq_add_entity(rq, entity);
>     >>       spin_unlock(&entity->rq_lock);
>     >>   }
>     >>   EXPORT_SYMBOL(drm_sched_entity_set_rq);
>     >
>


[-- Attachment #1.2: Type: text/html, Size: 12621 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: stop setting rq to NULL
  2018-08-02  6:43     ` Nayan Deshmukh
@ 2018-08-02 14:38       ` Andrey Grodzovsky
  2018-08-03 12:12         ` Nayan Deshmukh
  0 siblings, 1 reply; 26+ messages in thread
From: Andrey Grodzovsky @ 2018-08-02 14:38 UTC (permalink / raw)
  To: Nayan Deshmukh; +Cc: ckoenig.leichtzumerken, Maling list - DRI developers


[-- Attachment #1.1: Type: text/plain, Size: 8304 bytes --]



On 08/02/2018 02:43 AM, Nayan Deshmukh wrote:
> Hi Andrey,
>
> Good catch. I guess since both Christian and I were working on it 
> parallelly we didn't catch this problem.
>
> If it is only causing a problem in push_job then we can handle it 
> something like this:
>
> ----------------------------------------------------------------------------------------------------------------------------
> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c 
> b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> index 05dc6ecd4003..f344ce32f128 100644
> --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> @@ -563,6 +563,11 @@ void drm_sched_entity_push_job(struct 
> drm_sched_job *sched_job,
>         first = spsc_queue_count(&entity->job_queue) == 0;
>         reschedule = idle && first && (entity->num_rq_list > 1);
>
> +       if (first && list_empty(&entity->list)) {

first might be false if the other process interrupted by SIGKILL  in 
middle of  wait_event_killable in drm_sched_entity_flush when there were 
still item in queue.
I don't see a good way besides adding a 'stopped' flag to drm_sched_enitity.

Andrey

> +               DRM_ERROR("Trying to push to a killed entity\n");
> +               return;
> +       }
> +
>         if (reschedule) {
>                 rq = drm_sched_entity_get_free_sched(entity);
>                 spin_lock(&entity->rq_lock);
> @@ -580,11 +585,6 @@ void drm_sched_entity_push_job(struct 
> drm_sched_job *sched_job,
>         if (first) {
>                 /* Add the entity to the run queue */
>                 spin_lock(&entity->rq_lock);
> -               if (!entity->rq) {
> -                       DRM_ERROR("Trying to push to a killed entity\n");
> -                       spin_unlock(&entity->rq_lock);
> -                       return;
> -               }
>                 drm_sched_rq_add_entity(entity->rq, entity);
>                 spin_unlock(&entity->rq_lock);
>                 drm_sched_wakeup(entity->rq->sched);
> -----------------------------------------------------------------------------------------------------------------------------
>
>
> On Thu, Aug 2, 2018 at 3:55 AM Andrey Grodzovsky 
> <Andrey.Grodzovsky@amd.com <mailto:Andrey.Grodzovsky@amd.com>> wrote:
>
>     Thinking again about this change and 53d5f1e drm/scheduler: move idle
>     entities to scheduler with less load v2
>
>     Looks to me like use case for which fc9a539 drm/scheduler: add NULL
>     pointer check for run queue (v2) was done
>
>     will not work anymore.
>
>     First of all in drm_sched_entity_push_job, 'if (!entity->rq)' will
>     never
>     be true any more since we stopped entity->rq to NULL in
>
>     drm_sched_entity_flush.
>
>     Second of all, even after we removed the entity from rq in
>     drm_sched_entity_flush to terminate any subsequent submissions
>
>     to the queue the other thread doing push job can just acquire again a
>     run queue
>
>     from drm_sched_entity_get_free_sched and continue submission so you
>     can't substitute ' if (!entity->rq)' to 'if
>     (list_empty(&entity->list))'.
>
>     What about adding a 'stopped' flag to drm_sched_entity to be set in
>     drm_sched_entity_flush and in
>
> But it might be worth adding a stopped flag field if it is required at 
> other places as well.
>
> Thanks,
> Nayan
>
>     drm_sched_entity_push_job check for  'if (!entity->stopped)'
>     instead of
>     ' if (!entity->rq)' ?
>
>     Andrey
>
>
>     On 07/30/2018 07:03 AM, Christian König wrote:
>     > We removed the redundancy of having an extra scheduler field, so we
>     > can't set the rq to NULL any more or otherwise won't know which
>     > scheduler to use for the cleanup.
>     >
>     > Just remove the entity from the scheduling list instead.
>     >
>     > Signed-off-by: Christian König <christian.koenig@amd.com
>     <mailto:christian.koenig@amd.com>>
>     > ---
>     >   drivers/gpu/drm/scheduler/gpu_scheduler.c | 35
>     +++++++------------------------
>     >   1 file changed, 8 insertions(+), 27 deletions(-)
>     >
>     > diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>     b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>     > index f563e4fbb4b6..1b733229201e 100644
>     > --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>     > +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>     > @@ -198,21 +198,6 @@ int drm_sched_entity_init(struct
>     drm_sched_entity *entity,
>     >   }
>     >   EXPORT_SYMBOL(drm_sched_entity_init);
>     >
>     > -/**
>     > - * drm_sched_entity_is_initialized - Query if entity is initialized
>     > - *
>     > - * @sched: Pointer to scheduler instance
>     > - * @entity: The pointer to a valid scheduler entity
>     > - *
>     > - * return true if entity is initialized, false otherwise
>     > -*/
>     > -static bool drm_sched_entity_is_initialized(struct
>     drm_gpu_scheduler *sched,
>     > -                                         struct
>     drm_sched_entity *entity)
>     > -{
>     > -     return entity->rq != NULL &&
>     > -             entity->rq->sched == sched;
>     > -}
>     > -
>     >   /**
>     >    * drm_sched_entity_is_idle - Check if entity is idle
>     >    *
>     > @@ -224,7 +209,8 @@ static bool drm_sched_entity_is_idle(struct
>     drm_sched_entity *entity)
>     >   {
>     >       rmb();
>     >
>     > -     if (!entity->rq || spsc_queue_peek(&entity->job_queue) ==
>     NULL)
>     > +     if (list_empty(&entity->list) ||
>     > +         spsc_queue_peek(&entity->job_queue) == NULL)
>     >               return true;
>     >
>     >       return false;
>     > @@ -279,8 +265,6 @@ long drm_sched_entity_flush(struct
>     drm_sched_entity *entity, long timeout)
>     >       long ret = timeout;
>     >
>     >       sched = entity->rq->sched;
>     > -     if (!drm_sched_entity_is_initialized(sched, entity))
>     > -             return ret;
>     >       /**
>     >        * The client will not queue more IBs during this fini,
>     consume existing
>     >        * queued IBs or discard them on SIGKILL
>     > @@ -299,7 +283,7 @@ long drm_sched_entity_flush(struct
>     drm_sched_entity *entity, long timeout)
>     >       last_user = cmpxchg(&entity->last_user,
>     current->group_leader, NULL);
>     >       if ((!last_user || last_user == current->group_leader) &&
>     >           (current->flags & PF_EXITING) && (current->exit_code
>     == SIGKILL))
>     > -             drm_sched_entity_set_rq(entity, NULL);
>     > +             drm_sched_rq_remove_entity(entity->rq, entity);
>     >
>     >       return ret;
>     >   }
>     > @@ -320,7 +304,7 @@ void drm_sched_entity_fini(struct
>     drm_sched_entity *entity)
>     >       struct drm_gpu_scheduler *sched;
>     >
>     >       sched = entity->rq->sched;
>     > -     drm_sched_entity_set_rq(entity, NULL);
>     > +     drm_sched_rq_remove_entity(entity->rq, entity);
>     >
>     >       /* Consumption of existing IBs wasn't completed. Forcefully
>     >        * remove them here.
>     > @@ -416,15 +400,12 @@ void drm_sched_entity_set_rq(struct
>     drm_sched_entity *entity,
>     >       if (entity->rq == rq)
>     >               return;
>     >
>     > -     spin_lock(&entity->rq_lock);
>     > -
>     > -     if (entity->rq)
>     > -             drm_sched_rq_remove_entity(entity->rq, entity);
>     > +     BUG_ON(!rq);
>     >
>     > +     spin_lock(&entity->rq_lock);
>     > +     drm_sched_rq_remove_entity(entity->rq, entity);
>     >       entity->rq = rq;
>     > -     if (rq)
>     > -             drm_sched_rq_add_entity(rq, entity);
>     > -
>     > +     drm_sched_rq_add_entity(rq, entity);
>     >       spin_unlock(&entity->rq_lock);
>     >   }
>     >   EXPORT_SYMBOL(drm_sched_entity_set_rq);
>


[-- Attachment #1.2: Type: text/html, Size: 12583 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: stop setting rq to NULL
  2018-08-02 14:38       ` Andrey Grodzovsky
@ 2018-08-03 12:12         ` Nayan Deshmukh
  2018-08-03 13:47           ` Andrey Grodzovsky
  0 siblings, 1 reply; 26+ messages in thread
From: Nayan Deshmukh @ 2018-08-03 12:12 UTC (permalink / raw)
  To: Andrey Grodzovsky; +Cc: ckoenig.leichtzumerken, Maling list - DRI developers


[-- Attachment #1.1: Type: text/plain, Size: 7781 bytes --]

On Thu, Aug 2, 2018, 8:09 PM Andrey Grodzovsky <Andrey.Grodzovsky@amd.com>
wrote:

>
>
> On 08/02/2018 02:43 AM, Nayan Deshmukh wrote:
>
> Hi Andrey,
>
> Good catch. I guess since both Christian and I were working on it
> parallelly we didn't catch this problem.
>
> If it is only causing a problem in push_job then we can handle it
> something like this:
>
>
> ----------------------------------------------------------------------------------------------------------------------------
> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> index 05dc6ecd4003..f344ce32f128 100644
> --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> @@ -563,6 +563,11 @@ void drm_sched_entity_push_job(struct drm_sched_job
> *sched_job,
>         first = spsc_queue_count(&entity->job_queue) == 0;
>         reschedule = idle && first && (entity->num_rq_list > 1);
>
> +       if (first && list_empty(&entity->list)) {
>
>
> first might be false if the other process interrupted by SIGKILL  in
> middle of  wait_event_killable in drm_sched_entity_flush when there were
> still item in queue.
> I don't see a good way besides adding a 'stopped' flag to
> drm_sched_enitity.
>
Sorry I missed this mail. This case might happen but this was also not
being handled previously.

Nayan

>
> Andrey
>
> +               DRM_ERROR("Trying to push to a killed entity\n");
> +               return;
> +       }
> +
>         if (reschedule) {
>                 rq = drm_sched_entity_get_free_sched(entity);
>                 spin_lock(&entity->rq_lock);
> @@ -580,11 +585,6 @@ void drm_sched_entity_push_job(struct drm_sched_job
> *sched_job,
>         if (first) {
>                 /* Add the entity to the run queue */
>                 spin_lock(&entity->rq_lock);
> -               if (!entity->rq) {
> -                       DRM_ERROR("Trying to push to a killed entity\n");
> -                       spin_unlock(&entity->rq_lock);
> -                       return;
> -               }
>                 drm_sched_rq_add_entity(entity->rq, entity);
>                 spin_unlock(&entity->rq_lock);
>                 drm_sched_wakeup(entity->rq->sched);
>
> -----------------------------------------------------------------------------------------------------------------------------
>
>
> On Thu, Aug 2, 2018 at 3:55 AM Andrey Grodzovsky <
> Andrey.Grodzovsky@amd.com> wrote:
>
>> Thinking again about this change and 53d5f1e drm/scheduler: move idle
>> entities to scheduler with less load v2
>>
>> Looks to me like use case for which fc9a539 drm/scheduler: add NULL
>> pointer check for run queue (v2) was done
>>
>> will not work anymore.
>>
>> First of all in drm_sched_entity_push_job, 'if (!entity->rq)' will never
>> be true any more since we stopped entity->rq to NULL in
>>
>> drm_sched_entity_flush.
>>
>> Second of all, even after we removed the entity from rq in
>> drm_sched_entity_flush to terminate any subsequent submissions
>>
>> to the queue the other thread doing push job can just acquire again a
>> run queue
>>
>> from drm_sched_entity_get_free_sched and continue submission so you
>> can't substitute ' if (!entity->rq)' to 'if (list_empty(&entity->list))'.
>>
>> What about adding a 'stopped' flag to drm_sched_entity to be set in
>> drm_sched_entity_flush and in
>>
>> But it might be worth adding a stopped flag field if it is required at
> other places as well.
>
> Thanks,
> Nayan
>
>> drm_sched_entity_push_job check for  'if (!entity->stopped)' instead of
>> ' if (!entity->rq)' ?
>>
>> Andrey
>>
>>
>> On 07/30/2018 07:03 AM, Christian König wrote:
>> > We removed the redundancy of having an extra scheduler field, so we
>> > can't set the rq to NULL any more or otherwise won't know which
>> > scheduler to use for the cleanup.
>> >
>> > Just remove the entity from the scheduling list instead.
>> >
>> > Signed-off-by: Christian König <christian.koenig@amd.com>
>> > ---
>> >   drivers/gpu/drm/scheduler/gpu_scheduler.c | 35
>> +++++++------------------------
>> >   1 file changed, 8 insertions(+), 27 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>> b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>> > index f563e4fbb4b6..1b733229201e 100644
>> > --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>> > +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>> > @@ -198,21 +198,6 @@ int drm_sched_entity_init(struct drm_sched_entity
>> *entity,
>> >   }
>> >   EXPORT_SYMBOL(drm_sched_entity_init);
>> >
>> > -/**
>> > - * drm_sched_entity_is_initialized - Query if entity is initialized
>> > - *
>> > - * @sched: Pointer to scheduler instance
>> > - * @entity: The pointer to a valid scheduler entity
>> > - *
>> > - * return true if entity is initialized, false otherwise
>> > -*/
>> > -static bool drm_sched_entity_is_initialized(struct drm_gpu_scheduler
>> *sched,
>> > -                                         struct drm_sched_entity
>> *entity)
>> > -{
>> > -     return entity->rq != NULL &&
>> > -             entity->rq->sched == sched;
>> > -}
>> > -
>> >   /**
>> >    * drm_sched_entity_is_idle - Check if entity is idle
>> >    *
>> > @@ -224,7 +209,8 @@ static bool drm_sched_entity_is_idle(struct
>> drm_sched_entity *entity)
>> >   {
>> >       rmb();
>> >
>> > -     if (!entity->rq || spsc_queue_peek(&entity->job_queue) == NULL)
>> > +     if (list_empty(&entity->list) ||
>> > +         spsc_queue_peek(&entity->job_queue) == NULL)
>> >               return true;
>> >
>> >       return false;
>> > @@ -279,8 +265,6 @@ long drm_sched_entity_flush(struct drm_sched_entity
>> *entity, long timeout)
>> >       long ret = timeout;
>> >
>> >       sched = entity->rq->sched;
>> > -     if (!drm_sched_entity_is_initialized(sched, entity))
>> > -             return ret;
>> >       /**
>> >        * The client will not queue more IBs during this fini, consume
>> existing
>> >        * queued IBs or discard them on SIGKILL
>> > @@ -299,7 +283,7 @@ long drm_sched_entity_flush(struct drm_sched_entity
>> *entity, long timeout)
>> >       last_user = cmpxchg(&entity->last_user, current->group_leader,
>> NULL);
>> >       if ((!last_user || last_user == current->group_leader) &&
>> >           (current->flags & PF_EXITING) && (current->exit_code ==
>> SIGKILL))
>> > -             drm_sched_entity_set_rq(entity, NULL);
>> > +             drm_sched_rq_remove_entity(entity->rq, entity);
>> >
>> >       return ret;
>> >   }
>> > @@ -320,7 +304,7 @@ void drm_sched_entity_fini(struct drm_sched_entity
>> *entity)
>> >       struct drm_gpu_scheduler *sched;
>> >
>> >       sched = entity->rq->sched;
>> > -     drm_sched_entity_set_rq(entity, NULL);
>> > +     drm_sched_rq_remove_entity(entity->rq, entity);
>> >
>> >       /* Consumption of existing IBs wasn't completed. Forcefully
>> >        * remove them here.
>> > @@ -416,15 +400,12 @@ void drm_sched_entity_set_rq(struct
>> drm_sched_entity *entity,
>> >       if (entity->rq == rq)
>> >               return;
>> >
>> > -     spin_lock(&entity->rq_lock);
>> > -
>> > -     if (entity->rq)
>> > -             drm_sched_rq_remove_entity(entity->rq, entity);
>> > +     BUG_ON(!rq);
>> >
>> > +     spin_lock(&entity->rq_lock);
>> > +     drm_sched_rq_remove_entity(entity->rq, entity);
>> >       entity->rq = rq;
>> > -     if (rq)
>> > -             drm_sched_rq_add_entity(rq, entity);
>> > -
>> > +     drm_sched_rq_add_entity(rq, entity);
>> >       spin_unlock(&entity->rq_lock);
>> >   }
>> >   EXPORT_SYMBOL(drm_sched_entity_set_rq);
>>
>>
>

[-- Attachment #1.2: Type: text/html, Size: 13312 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: stop setting rq to NULL
  2018-08-03 12:12         ` Nayan Deshmukh
@ 2018-08-03 13:47           ` Andrey Grodzovsky
  2018-08-03 13:58             ` Nayan Deshmukh
  0 siblings, 1 reply; 26+ messages in thread
From: Andrey Grodzovsky @ 2018-08-03 13:47 UTC (permalink / raw)
  To: Nayan Deshmukh; +Cc: ckoenig.leichtzumerken, Maling list - DRI developers


[-- Attachment #1.1: Type: text/plain, Size: 9511 bytes --]



On 08/03/2018 08:12 AM, Nayan Deshmukh wrote:
>
>
> On Thu, Aug 2, 2018, 8:09 PM Andrey Grodzovsky 
> <Andrey.Grodzovsky@amd.com <mailto:Andrey.Grodzovsky@amd.com>> wrote:
>
>
>
>     On 08/02/2018 02:43 AM, Nayan Deshmukh wrote:
>>     Hi Andrey,
>>
>>     Good catch. I guess since both Christian and I were working on it
>>     parallelly we didn't catch this problem.
>>
>>     If it is only causing a problem in push_job then we can handle it
>>     something like this:
>>
>>     ----------------------------------------------------------------------------------------------------------------------------
>>     diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>     b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>     index 05dc6ecd4003..f344ce32f128 100644
>>     --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>     +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>     @@ -563,6 +563,11 @@ void drm_sched_entity_push_job(struct
>>     drm_sched_job *sched_job,
>>             first = spsc_queue_count(&entity->job_queue) == 0;
>>             reschedule = idle && first && (entity->num_rq_list > 1);
>>
>>     +       if (first && list_empty(&entity->list)) {
>
>     first might be false if the other process interrupted by SIGKILL 
>     in middle of  wait_event_killable in drm_sched_entity_flush when
>     there were still item in queue.
>     I don't see a good way besides adding a 'stopped' flag to
>     drm_sched_enitity.
>
> Sorry I missed this mail. This case might happen but this was also not 
> being handled previously.
>
> Nayan

The original code before  'drm/scheduler: stop setting rq to NULL' was , 
because you didn't use the queue's emptiness as a criteria for aborting 
your next push to queue but rather
checking for entity->rq != NULL.

Andrey

>
>     Andrey
>
>>     +               DRM_ERROR("Trying to push to a killed entity\n");
>>     +               return;
>>     +       }
>>     +
>>             if (reschedule) {
>>                     rq = drm_sched_entity_get_free_sched(entity);
>>     spin_lock(&entity->rq_lock);
>>     @@ -580,11 +585,6 @@ void drm_sched_entity_push_job(struct
>>     drm_sched_job *sched_job,
>>             if (first) {
>>                     /* Add the entity to the run queue */
>>     spin_lock(&entity->rq_lock);
>>     -               if (!entity->rq) {
>>     -                       DRM_ERROR("Trying to push to a killed
>>     entity\n");
>>     - spin_unlock(&entity->rq_lock);
>>     -                       return;
>>     -               }
>>     drm_sched_rq_add_entity(entity->rq, entity);
>>     spin_unlock(&entity->rq_lock);
>>     drm_sched_wakeup(entity->rq->sched);
>>     -----------------------------------------------------------------------------------------------------------------------------
>>
>>
>>     On Thu, Aug 2, 2018 at 3:55 AM Andrey Grodzovsky
>>     <Andrey.Grodzovsky@amd.com <mailto:Andrey.Grodzovsky@amd.com>> wrote:
>>
>>         Thinking again about this change and 53d5f1e drm/scheduler:
>>         move idle
>>         entities to scheduler with less load v2
>>
>>         Looks to me like use case for which fc9a539 drm/scheduler:
>>         add NULL
>>         pointer check for run queue (v2) was done
>>
>>         will not work anymore.
>>
>>         First of all in drm_sched_entity_push_job, 'if (!entity->rq)'
>>         will never
>>         be true any more since we stopped entity->rq to NULL in
>>
>>         drm_sched_entity_flush.
>>
>>         Second of all, even after we removed the entity from rq in
>>         drm_sched_entity_flush to terminate any subsequent submissions
>>
>>         to the queue the other thread doing push job can just acquire
>>         again a
>>         run queue
>>
>>         from drm_sched_entity_get_free_sched and continue submission
>>         so you
>>         can't substitute ' if (!entity->rq)' to 'if
>>         (list_empty(&entity->list))'.
>>
>>         What about adding a 'stopped' flag to drm_sched_entity to be
>>         set in
>>         drm_sched_entity_flush and in
>>
>>     But it might be worth adding a stopped flag field if it is
>>     required at other places as well.
>>
>>     Thanks,
>>     Nayan
>>
>>         drm_sched_entity_push_job check for  'if (!entity->stopped)'
>>         instead of
>>         ' if (!entity->rq)' ?
>>
>>         Andrey
>>
>>
>>         On 07/30/2018 07:03 AM, Christian König wrote:
>>         > We removed the redundancy of having an extra scheduler
>>         field, so we
>>         > can't set the rq to NULL any more or otherwise won't know which
>>         > scheduler to use for the cleanup.
>>         >
>>         > Just remove the entity from the scheduling list instead.
>>         >
>>         > Signed-off-by: Christian König <christian.koenig@amd.com
>>         <mailto:christian.koenig@amd.com>>
>>         > ---
>>         >   drivers/gpu/drm/scheduler/gpu_scheduler.c | 35
>>         +++++++------------------------
>>         >   1 file changed, 8 insertions(+), 27 deletions(-)
>>         >
>>         > diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>         b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>         > index f563e4fbb4b6..1b733229201e 100644
>>         > --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>         > +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>         > @@ -198,21 +198,6 @@ int drm_sched_entity_init(struct
>>         drm_sched_entity *entity,
>>         >   }
>>         >   EXPORT_SYMBOL(drm_sched_entity_init);
>>         >
>>         > -/**
>>         > - * drm_sched_entity_is_initialized - Query if entity is
>>         initialized
>>         > - *
>>         > - * @sched: Pointer to scheduler instance
>>         > - * @entity: The pointer to a valid scheduler entity
>>         > - *
>>         > - * return true if entity is initialized, false otherwise
>>         > -*/
>>         > -static bool drm_sched_entity_is_initialized(struct
>>         drm_gpu_scheduler *sched,
>>         > -  struct drm_sched_entity *entity)
>>         > -{
>>         > -     return entity->rq != NULL &&
>>         > -             entity->rq->sched == sched;
>>         > -}
>>         > -
>>         >   /**
>>         >    * drm_sched_entity_is_idle - Check if entity is idle
>>         >    *
>>         > @@ -224,7 +209,8 @@ static bool
>>         drm_sched_entity_is_idle(struct drm_sched_entity *entity)
>>         >   {
>>         >       rmb();
>>         >
>>         > -     if (!entity->rq ||
>>         spsc_queue_peek(&entity->job_queue) == NULL)
>>         > +     if (list_empty(&entity->list) ||
>>         > +  spsc_queue_peek(&entity->job_queue) == NULL)
>>         >               return true;
>>         >
>>         >       return false;
>>         > @@ -279,8 +265,6 @@ long drm_sched_entity_flush(struct
>>         drm_sched_entity *entity, long timeout)
>>         >       long ret = timeout;
>>         >
>>         >       sched = entity->rq->sched;
>>         > -     if (!drm_sched_entity_is_initialized(sched, entity))
>>         > -             return ret;
>>         >       /**
>>         >        * The client will not queue more IBs during this
>>         fini, consume existing
>>         >        * queued IBs or discard them on SIGKILL
>>         > @@ -299,7 +283,7 @@ long drm_sched_entity_flush(struct
>>         drm_sched_entity *entity, long timeout)
>>         >       last_user = cmpxchg(&entity->last_user,
>>         current->group_leader, NULL);
>>         >       if ((!last_user || last_user == current->group_leader) &&
>>         >           (current->flags & PF_EXITING) &&
>>         (current->exit_code == SIGKILL))
>>         > -  drm_sched_entity_set_rq(entity, NULL);
>>         > +  drm_sched_rq_remove_entity(entity->rq, entity);
>>         >
>>         >       return ret;
>>         >   }
>>         > @@ -320,7 +304,7 @@ void drm_sched_entity_fini(struct
>>         drm_sched_entity *entity)
>>         >       struct drm_gpu_scheduler *sched;
>>         >
>>         >       sched = entity->rq->sched;
>>         > -     drm_sched_entity_set_rq(entity, NULL);
>>         > +  drm_sched_rq_remove_entity(entity->rq, entity);
>>         >
>>         >       /* Consumption of existing IBs wasn't completed.
>>         Forcefully
>>         >        * remove them here.
>>         > @@ -416,15 +400,12 @@ void drm_sched_entity_set_rq(struct
>>         drm_sched_entity *entity,
>>         >       if (entity->rq == rq)
>>         >               return;
>>         >
>>         > -     spin_lock(&entity->rq_lock);
>>         > -
>>         > -     if (entity->rq)
>>         > -  drm_sched_rq_remove_entity(entity->rq, entity);
>>         > +     BUG_ON(!rq);
>>         >
>>         > +     spin_lock(&entity->rq_lock);
>>         > +  drm_sched_rq_remove_entity(entity->rq, entity);
>>         >       entity->rq = rq;
>>         > -     if (rq)
>>         > -             drm_sched_rq_add_entity(rq, entity);
>>         > -
>>         > +     drm_sched_rq_add_entity(rq, entity);
>>         >       spin_unlock(&entity->rq_lock);
>>         >   }
>>         >   EXPORT_SYMBOL(drm_sched_entity_set_rq);
>>
>


[-- Attachment #1.2: Type: text/html, Size: 18658 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: stop setting rq to NULL
  2018-08-03 13:47           ` Andrey Grodzovsky
@ 2018-08-03 13:58             ` Nayan Deshmukh
  0 siblings, 0 replies; 26+ messages in thread
From: Nayan Deshmukh @ 2018-08-03 13:58 UTC (permalink / raw)
  To: Andrey Grodzovsky; +Cc: ckoenig.leichtzumerken, Maling list - DRI developers


[-- Attachment #1.1: Type: text/plain, Size: 8653 bytes --]

On Fri, Aug 3, 2018 at 7:17 PM Andrey Grodzovsky <Andrey.Grodzovsky@amd.com>
wrote:

>
>
> On 08/03/2018 08:12 AM, Nayan Deshmukh wrote:
>
>
>
> On Thu, Aug 2, 2018, 8:09 PM Andrey Grodzovsky <Andrey.Grodzovsky@amd.com>
> wrote:
>
>>
>>
>> On 08/02/2018 02:43 AM, Nayan Deshmukh wrote:
>>
>> Hi Andrey,
>>
>> Good catch. I guess since both Christian and I were working on it
>> parallelly we didn't catch this problem.
>>
>> If it is only causing a problem in push_job then we can handle it
>> something like this:
>>
>>
>> ----------------------------------------------------------------------------------------------------------------------------
>> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>> b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>> index 05dc6ecd4003..f344ce32f128 100644
>> --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>> @@ -563,6 +563,11 @@ void drm_sched_entity_push_job(struct drm_sched_job
>> *sched_job,
>>         first = spsc_queue_count(&entity->job_queue) == 0;
>>         reschedule = idle && first && (entity->num_rq_list > 1);
>>
>> +       if (first && list_empty(&entity->list)) {
>>
>>
>> first might be false if the other process interrupted by SIGKILL  in
>> middle of  wait_event_killable in drm_sched_entity_flush when there were
>> still item in queue.
>> I don't see a good way besides adding a 'stopped' flag to
>> drm_sched_enitity.
>>
> Sorry I missed this mail. This case might happen but this was also not
> being handled previously.
>
> Nayan
>
>
> The original code before  'drm/scheduler: stop setting rq to NULL' was ,
> because you didn't use the queue's emptiness as a criteria for aborting
> your next push to queue but rather
> checking for entity->rq != NULL.
>
But (entity->rq != NULL) check was inside the body of if (first) so it was
also dependent on the queue's emptiness earlier. Well in any case it
doesn't matter now as with Christian's patch we can remove the if condition
altogether.

Nayan

>
> Andrey
>
>
>> Andrey
>>
>> +               DRM_ERROR("Trying to push to a killed entity\n");
>> +               return;
>> +       }
>> +
>>         if (reschedule) {
>>                 rq = drm_sched_entity_get_free_sched(entity);
>>                 spin_lock(&entity->rq_lock);
>> @@ -580,11 +585,6 @@ void drm_sched_entity_push_job(struct drm_sched_job
>> *sched_job,
>>         if (first) {
>>                 /* Add the entity to the run queue */
>>                 spin_lock(&entity->rq_lock);
>> -               if (!entity->rq) {
>> -                       DRM_ERROR("Trying to push to a killed entity\n");
>> -                       spin_unlock(&entity->rq_lock);
>> -                       return;
>> -               }
>>                 drm_sched_rq_add_entity(entity->rq, entity);
>>                 spin_unlock(&entity->rq_lock);
>>                 drm_sched_wakeup(entity->rq->sched);
>>
>> -----------------------------------------------------------------------------------------------------------------------------
>>
>>
>> On Thu, Aug 2, 2018 at 3:55 AM Andrey Grodzovsky <
>> Andrey.Grodzovsky@amd.com> wrote:
>>
>>> Thinking again about this change and 53d5f1e drm/scheduler: move idle
>>> entities to scheduler with less load v2
>>>
>>> Looks to me like use case for which fc9a539 drm/scheduler: add NULL
>>> pointer check for run queue (v2) was done
>>>
>>> will not work anymore.
>>>
>>> First of all in drm_sched_entity_push_job, 'if (!entity->rq)' will never
>>> be true any more since we stopped entity->rq to NULL in
>>>
>>> drm_sched_entity_flush.
>>>
>>> Second of all, even after we removed the entity from rq in
>>> drm_sched_entity_flush to terminate any subsequent submissions
>>>
>>> to the queue the other thread doing push job can just acquire again a
>>> run queue
>>>
>>> from drm_sched_entity_get_free_sched and continue submission so you
>>> can't substitute ' if (!entity->rq)' to 'if (list_empty(&entity->list))'.
>>>
>>> What about adding a 'stopped' flag to drm_sched_entity to be set in
>>> drm_sched_entity_flush and in
>>>
>>> But it might be worth adding a stopped flag field if it is required at
>> other places as well.
>>
>> Thanks,
>> Nayan
>>
>>> drm_sched_entity_push_job check for  'if (!entity->stopped)' instead of
>>> ' if (!entity->rq)' ?
>>>
>>> Andrey
>>>
>>>
>>> On 07/30/2018 07:03 AM, Christian König wrote:
>>> > We removed the redundancy of having an extra scheduler field, so we
>>> > can't set the rq to NULL any more or otherwise won't know which
>>> > scheduler to use for the cleanup.
>>> >
>>> > Just remove the entity from the scheduling list instead.
>>> >
>>> > Signed-off-by: Christian König <christian.koenig@amd.com>
>>> > ---
>>> >   drivers/gpu/drm/scheduler/gpu_scheduler.c | 35
>>> +++++++------------------------
>>> >   1 file changed, 8 insertions(+), 27 deletions(-)
>>> >
>>> > diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>> b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>> > index f563e4fbb4b6..1b733229201e 100644
>>> > --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>> > +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>> > @@ -198,21 +198,6 @@ int drm_sched_entity_init(struct drm_sched_entity
>>> *entity,
>>> >   }
>>> >   EXPORT_SYMBOL(drm_sched_entity_init);
>>> >
>>> > -/**
>>> > - * drm_sched_entity_is_initialized - Query if entity is initialized
>>> > - *
>>> > - * @sched: Pointer to scheduler instance
>>> > - * @entity: The pointer to a valid scheduler entity
>>> > - *
>>> > - * return true if entity is initialized, false otherwise
>>> > -*/
>>> > -static bool drm_sched_entity_is_initialized(struct drm_gpu_scheduler
>>> *sched,
>>> > -                                         struct drm_sched_entity
>>> *entity)
>>> > -{
>>> > -     return entity->rq != NULL &&
>>> > -             entity->rq->sched == sched;
>>> > -}
>>> > -
>>> >   /**
>>> >    * drm_sched_entity_is_idle - Check if entity is idle
>>> >    *
>>> > @@ -224,7 +209,8 @@ static bool drm_sched_entity_is_idle(struct
>>> drm_sched_entity *entity)
>>> >   {
>>> >       rmb();
>>> >
>>> > -     if (!entity->rq || spsc_queue_peek(&entity->job_queue) == NULL)
>>> > +     if (list_empty(&entity->list) ||
>>> > +         spsc_queue_peek(&entity->job_queue) == NULL)
>>> >               return true;
>>> >
>>> >       return false;
>>> > @@ -279,8 +265,6 @@ long drm_sched_entity_flush(struct
>>> drm_sched_entity *entity, long timeout)
>>> >       long ret = timeout;
>>> >
>>> >       sched = entity->rq->sched;
>>> > -     if (!drm_sched_entity_is_initialized(sched, entity))
>>> > -             return ret;
>>> >       /**
>>> >        * The client will not queue more IBs during this fini, consume
>>> existing
>>> >        * queued IBs or discard them on SIGKILL
>>> > @@ -299,7 +283,7 @@ long drm_sched_entity_flush(struct
>>> drm_sched_entity *entity, long timeout)
>>> >       last_user = cmpxchg(&entity->last_user, current->group_leader,
>>> NULL);
>>> >       if ((!last_user || last_user == current->group_leader) &&
>>> >           (current->flags & PF_EXITING) && (current->exit_code ==
>>> SIGKILL))
>>> > -             drm_sched_entity_set_rq(entity, NULL);
>>> > +             drm_sched_rq_remove_entity(entity->rq, entity);
>>> >
>>> >       return ret;
>>> >   }
>>> > @@ -320,7 +304,7 @@ void drm_sched_entity_fini(struct drm_sched_entity
>>> *entity)
>>> >       struct drm_gpu_scheduler *sched;
>>> >
>>> >       sched = entity->rq->sched;
>>> > -     drm_sched_entity_set_rq(entity, NULL);
>>> > +     drm_sched_rq_remove_entity(entity->rq, entity);
>>> >
>>> >       /* Consumption of existing IBs wasn't completed. Forcefully
>>> >        * remove them here.
>>> > @@ -416,15 +400,12 @@ void drm_sched_entity_set_rq(struct
>>> drm_sched_entity *entity,
>>> >       if (entity->rq == rq)
>>> >               return;
>>> >
>>> > -     spin_lock(&entity->rq_lock);
>>> > -
>>> > -     if (entity->rq)
>>> > -             drm_sched_rq_remove_entity(entity->rq, entity);
>>> > +     BUG_ON(!rq);
>>> >
>>> > +     spin_lock(&entity->rq_lock);
>>> > +     drm_sched_rq_remove_entity(entity->rq, entity);
>>> >       entity->rq = rq;
>>> > -     if (rq)
>>> > -             drm_sched_rq_add_entity(rq, entity);
>>> > -
>>> > +     drm_sched_rq_add_entity(rq, entity);
>>> >       spin_unlock(&entity->rq_lock);
>>> >   }
>>> >   EXPORT_SYMBOL(drm_sched_entity_set_rq);
>>>
>>>
>>
>

[-- Attachment #1.2: Type: text/html, Size: 19356 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: stop setting rq to NULL
  2018-08-02 14:11         ` Andrey Grodzovsky
@ 2018-08-03 14:06           ` Christian König
  2018-08-03 14:54             ` Andrey Grodzovsky
  0 siblings, 1 reply; 26+ messages in thread
From: Christian König @ 2018-08-03 14:06 UTC (permalink / raw)
  To: Andrey Grodzovsky, Nayan Deshmukh; +Cc: Maling list - DRI developers


[-- Attachment #1.1: Type: text/plain, Size: 7913 bytes --]

Am 02.08.2018 um 16:11 schrieb Andrey Grodzovsky:
>
>
>
> On 08/02/2018 02:47 AM, Nayan Deshmukh wrote:
>>
>>
>> On Thu, Aug 2, 2018 at 12:12 PM Christian König 
>> <ckoenig.leichtzumerken@gmail.com 
>> <mailto:ckoenig.leichtzumerken@gmail.com>> wrote:
>>
>>     Am 02.08.2018 um 00:25 schrieb Andrey Grodzovsky:
>>     > Thinking again about this change and 53d5f1e drm/scheduler:
>>     move idle
>>     > entities to scheduler with less load v2
>>     >
>>     > Looks to me like use case for which fc9a539 drm/scheduler: add
>>     NULL
>>     > pointer check for run queue (v2) was done
>>     >
>>     > will not work anymore.
>>     >
>>     > First of all in drm_sched_entity_push_job, 'if (!entity->rq)' will
>>     > never be true any more since we stopped entity->rq to NULL in
>>     >
>>     > drm_sched_entity_flush.
>>
>>     Good point, going to remove that.
>>
>
> So basically we don't need that if (...){ return; } in 
> drm_sched_entity_push_job any more ?

Yes, exactly.

>
>>
>>     >
>>     > Second of all, even after we removed the entity from rq in
>>     > drm_sched_entity_flush to terminate any subsequent submissions
>>     >
>>     > to the queue the other thread doing push job can just acquire
>>     again a
>>     > run queue
>>     >
>>     > from drm_sched_entity_get_free_sched and continue submission
>>
>> Hi Christian
>>
>>     That is actually desired.
>>
>>     When another process is now using the entity to submit jobs
>>     adding it
>>     back to the rq is actually the right thing to do cause the entity is
>>     still in use.
>>
>
> Yes, no problem if it's another process. But what about another thread 
> from same process ? Is it a possible use case that 2 threads from same 
> process submit to same entity concurrently ? If so and we specifically 
> kill one, the other will not stop event if we want him to because 
> current code makes him just require a rq for him self.

Well you can't kill a single thread of a process (you can only interrupt 
it), a SIGKILL will always kill the whole process.

>
>> I am not aware of the usecase so I might just be rambling. but if the 
>> thread/process that created the entity has called 
>> drm_sched_entity_flush then we shouldn't allow other processes to 
>> push jobs to that entity.
>>
>> Nayan
>>
>>
>>     Christian.
>>
> We don't really know who created the entity, the creation could be by 
> X itself and then it's passed to other process for use. Check 
> 'drm/scheduler: only kill entity if last user is killed v2', the idea 
> is that if by the time you want to
> kill this entity another process (not another thread from your process 
> - i.e. current->group_leader != last_user in drm_sched_entity_flush) 
> already started to use this entity just let it be.

Yes, exactly that's the idea.

Christian.

>
> Andrey
>>
>>
>>     > so you can't substitute ' if (!entity->rq)' to 'if
>>     > (list_empty(&entity->list))'.
>>     >
>>     > What about adding a 'stopped' flag to drm_sched_entity to be
>>     set in
>>     > drm_sched_entity_flush and in
>>     >
>>     > drm_sched_entity_push_job check for  'if (!entity->stopped)'
>>     instead
>>     > of  ' if (!entity->rq)' ?
>>     >
>>     > Andrey
>>     >
>>     >
>>     > On 07/30/2018 07:03 AM, Christian König wrote:
>>     >> We removed the redundancy of having an extra scheduler field,
>>     so we
>>     >> can't set the rq to NULL any more or otherwise won't know which
>>     >> scheduler to use for the cleanup.
>>     >>
>>     >> Just remove the entity from the scheduling list instead.
>>     >>
>>     >> Signed-off-by: Christian König <christian.koenig@amd.com
>>     <mailto:christian.koenig@amd.com>>
>>     >> ---
>>     >>   drivers/gpu/drm/scheduler/gpu_scheduler.c | 35
>>     >> +++++++------------------------
>>     >>   1 file changed, 8 insertions(+), 27 deletions(-)
>>     >>
>>     >> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>     >> b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>     >> index f563e4fbb4b6..1b733229201e 100644
>>     >> --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>     >> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>     >> @@ -198,21 +198,6 @@ int drm_sched_entity_init(struct
>>     >> drm_sched_entity *entity,
>>     >>   }
>>     >>   EXPORT_SYMBOL(drm_sched_entity_init);
>>     >>   -/**
>>     >> - * drm_sched_entity_is_initialized - Query if entity is
>>     initialized
>>     >> - *
>>     >> - * @sched: Pointer to scheduler instance
>>     >> - * @entity: The pointer to a valid scheduler entity
>>     >> - *
>>     >> - * return true if entity is initialized, false otherwise
>>     >> -*/
>>     >> -static bool drm_sched_entity_is_initialized(struct
>>     drm_gpu_scheduler
>>     >> *sched,
>>     >> -                        struct drm_sched_entity *entity)
>>     >> -{
>>     >> -    return entity->rq != NULL &&
>>     >> -        entity->rq->sched == sched;
>>     >> -}
>>     >> -
>>     >>   /**
>>     >>    * drm_sched_entity_is_idle - Check if entity is idle
>>     >>    *
>>     >> @@ -224,7 +209,8 @@ static bool drm_sched_entity_is_idle(struct
>>     >> drm_sched_entity *entity)
>>     >>   {
>>     >>       rmb();
>>     >>   -    if (!entity->rq || spsc_queue_peek(&entity->job_queue)
>>     == NULL)
>>     >> +    if (list_empty(&entity->list) ||
>>     >> + spsc_queue_peek(&entity->job_queue) == NULL)
>>     >>           return true;
>>     >>         return false;
>>     >> @@ -279,8 +265,6 @@ long drm_sched_entity_flush(struct
>>     >> drm_sched_entity *entity, long timeout)
>>     >>       long ret = timeout;
>>     >>         sched = entity->rq->sched;
>>     >> -    if (!drm_sched_entity_is_initialized(sched, entity))
>>     >> -        return ret;
>>     >>       /**
>>     >>        * The client will not queue more IBs during this fini,
>>     consume
>>     >> existing
>>     >>        * queued IBs or discard them on SIGKILL
>>     >> @@ -299,7 +283,7 @@ long drm_sched_entity_flush(struct
>>     >> drm_sched_entity *entity, long timeout)
>>     >>       last_user = cmpxchg(&entity->last_user,
>>     current->group_leader,
>>     >> NULL);
>>     >>       if ((!last_user || last_user == current->group_leader) &&
>>     >>           (current->flags & PF_EXITING) && (current->exit_code ==
>>     >> SIGKILL))
>>     >> -        drm_sched_entity_set_rq(entity, NULL);
>>     >> + drm_sched_rq_remove_entity(entity->rq, entity);
>>     >>         return ret;
>>     >>   }
>>     >> @@ -320,7 +304,7 @@ void drm_sched_entity_fini(struct
>>     >> drm_sched_entity *entity)
>>     >>       struct drm_gpu_scheduler *sched;
>>     >>         sched = entity->rq->sched;
>>     >> -    drm_sched_entity_set_rq(entity, NULL);
>>     >> +    drm_sched_rq_remove_entity(entity->rq, entity);
>>     >>         /* Consumption of existing IBs wasn't completed.
>>     Forcefully
>>     >>        * remove them here.
>>     >> @@ -416,15 +400,12 @@ void drm_sched_entity_set_rq(struct
>>     >> drm_sched_entity *entity,
>>     >>       if (entity->rq == rq)
>>     >>           return;
>>     >>   -    spin_lock(&entity->rq_lock);
>>     >> -
>>     >> -    if (entity->rq)
>>     >> - drm_sched_rq_remove_entity(entity->rq, entity);
>>     >> +    BUG_ON(!rq);
>>     >>   +    spin_lock(&entity->rq_lock);
>>     >> +    drm_sched_rq_remove_entity(entity->rq, entity);
>>     >>       entity->rq = rq;
>>     >> -    if (rq)
>>     >> -        drm_sched_rq_add_entity(rq, entity);
>>     >> -
>>     >> +    drm_sched_rq_add_entity(rq, entity);
>>     >>       spin_unlock(&entity->rq_lock);
>>     >>   }
>>     >>   EXPORT_SYMBOL(drm_sched_entity_set_rq);
>>     >
>>
>


[-- Attachment #1.2: Type: text/html, Size: 13994 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: stop setting rq to NULL
  2018-08-03 14:06           ` Christian König
@ 2018-08-03 14:54             ` Andrey Grodzovsky
  2018-08-03 16:42               ` Christian König
  0 siblings, 1 reply; 26+ messages in thread
From: Andrey Grodzovsky @ 2018-08-03 14:54 UTC (permalink / raw)
  To: Christian König, Nayan Deshmukh; +Cc: Maling list - DRI developers


[-- Attachment #1.1: Type: text/plain, Size: 9144 bytes --]



On 08/03/2018 10:06 AM, Christian König wrote:
> Am 02.08.2018 um 16:11 schrieb Andrey Grodzovsky:
>>
>>
>>
>> On 08/02/2018 02:47 AM, Nayan Deshmukh wrote:
>>>
>>>
>>> On Thu, Aug 2, 2018 at 12:12 PM Christian König 
>>> <ckoenig.leichtzumerken@gmail.com 
>>> <mailto:ckoenig.leichtzumerken@gmail.com>> wrote:
>>>
>>>     Am 02.08.2018 um 00:25 schrieb Andrey Grodzovsky:
>>>     > Thinking again about this change and 53d5f1e drm/scheduler:
>>>     move idle
>>>     > entities to scheduler with less load v2
>>>     >
>>>     > Looks to me like use case for which fc9a539 drm/scheduler: add
>>>     NULL
>>>     > pointer check for run queue (v2) was done
>>>     >
>>>     > will not work anymore.
>>>     >
>>>     > First of all in drm_sched_entity_push_job, 'if (!entity->rq)'
>>>     will
>>>     > never be true any more since we stopped entity->rq to NULL in
>>>     >
>>>     > drm_sched_entity_flush.
>>>
>>>     Good point, going to remove that.
>>>
>>
>> So basically we don't need that if (...){ return; } in 
>> drm_sched_entity_push_job any more ?
>
> Yes, exactly.
>
>>
>>>
>>>     >
>>>     > Second of all, even after we removed the entity from rq in
>>>     > drm_sched_entity_flush to terminate any subsequent submissions
>>>     >
>>>     > to the queue the other thread doing push job can just acquire
>>>     again a
>>>     > run queue
>>>     >
>>>     > from drm_sched_entity_get_free_sched and continue submission
>>>
>>> Hi Christian
>>>
>>>     That is actually desired.
>>>
>>>     When another process is now using the entity to submit jobs
>>>     adding it
>>>     back to the rq is actually the right thing to do cause the
>>>     entity is
>>>     still in use.
>>>
>>
>> Yes, no problem if it's another process. But what about another 
>> thread from same process ? Is it a possible use case that 2 threads 
>> from same process submit to same entity concurrently ? If so and we 
>> specifically kill one, the other will not stop event if we want him 
>> to because current code makes him just require a rq for him self.
>
> Well you can't kill a single thread of a process (you can only 
> interrupt it), a SIGKILL will always kill the whole process.

Is the following scenario possible and acceptable ?
2 threads from same process working on same queue where thread A 
currently in drm_sched_entity_flush->wait_event_timeout (the process 
getting shut down because of SIGKILL sent by user)
while thread B still inside drm_sched_entity_push_job before 'if 
(reschedule)'. 'A' stopped waiting because queue became empty and then 
removes the entity queue from scheduler's run queue while
B goes inside 'reschedule' because it evaluates to true ('first' is true 
and all the rest of the conditions), acquires new rq, and later adds it 
back to scheduler (different one maybe) and keeps submitting jobs as 
much as he likes and then can be stack for up to 'timeout' time  in his 
drm_sched_entity_flush waiting for them.

My understanding was that introduction of entity->last is to force 
immediate termination job submissions by any thread from the terminating 
process.

Andrey

>
>>
>>> I am not aware of the usecase so I might just be rambling. but if 
>>> the thread/process that created the entity has called 
>>> drm_sched_entity_flush then we shouldn't allow other processes to 
>>> push jobs to that entity.
>>>
>>> Nayan
>>>
>>>
>>>     Christian.
>>>
>> We don't really know who created the entity, the creation could be by 
>> X itself and then it's passed to other process for use. Check 
>> 'drm/scheduler: only kill entity if last user is killed v2', the idea 
>> is that if by the time you want to
>> kill this entity another process (not another thread from your 
>> process - i.e. current->group_leader != last_user in 
>> drm_sched_entity_flush) already started to use this entity just let 
>> it be.
>
> Yes, exactly that's the idea.
>
> Christian.
>
>>
>> Andrey
>>>
>>>
>>>     > so you can't substitute ' if (!entity->rq)' to 'if
>>>     > (list_empty(&entity->list))'.
>>>     >
>>>     > What about adding a 'stopped' flag to drm_sched_entity to be
>>>     set in
>>>     > drm_sched_entity_flush and in
>>>     >
>>>     > drm_sched_entity_push_job check for  'if (!entity->stopped)'
>>>     instead
>>>     > of  ' if (!entity->rq)' ?
>>>     >
>>>     > Andrey
>>>     >
>>>     >
>>>     > On 07/30/2018 07:03 AM, Christian König wrote:
>>>     >> We removed the redundancy of having an extra scheduler field,
>>>     so we
>>>     >> can't set the rq to NULL any more or otherwise won't know which
>>>     >> scheduler to use for the cleanup.
>>>     >>
>>>     >> Just remove the entity from the scheduling list instead.
>>>     >>
>>>     >> Signed-off-by: Christian König <christian.koenig@amd.com
>>>     <mailto:christian.koenig@amd.com>>
>>>     >> ---
>>>     >>   drivers/gpu/drm/scheduler/gpu_scheduler.c | 35
>>>     >> +++++++------------------------
>>>     >>   1 file changed, 8 insertions(+), 27 deletions(-)
>>>     >>
>>>     >> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>>     >> b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>>     >> index f563e4fbb4b6..1b733229201e 100644
>>>     >> --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>>     >> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>>>     >> @@ -198,21 +198,6 @@ int drm_sched_entity_init(struct
>>>     >> drm_sched_entity *entity,
>>>     >>   }
>>>     >>   EXPORT_SYMBOL(drm_sched_entity_init);
>>>     >>   -/**
>>>     >> - * drm_sched_entity_is_initialized - Query if entity is
>>>     initialized
>>>     >> - *
>>>     >> - * @sched: Pointer to scheduler instance
>>>     >> - * @entity: The pointer to a valid scheduler entity
>>>     >> - *
>>>     >> - * return true if entity is initialized, false otherwise
>>>     >> -*/
>>>     >> -static bool drm_sched_entity_is_initialized(struct
>>>     drm_gpu_scheduler
>>>     >> *sched,
>>>     >> -                        struct drm_sched_entity *entity)
>>>     >> -{
>>>     >> -    return entity->rq != NULL &&
>>>     >> -        entity->rq->sched == sched;
>>>     >> -}
>>>     >> -
>>>     >>   /**
>>>     >>    * drm_sched_entity_is_idle - Check if entity is idle
>>>     >>    *
>>>     >> @@ -224,7 +209,8 @@ static bool drm_sched_entity_is_idle(struct
>>>     >> drm_sched_entity *entity)
>>>     >>   {
>>>     >>       rmb();
>>>     >>   -    if (!entity->rq || spsc_queue_peek(&entity->job_queue)
>>>     == NULL)
>>>     >> +    if (list_empty(&entity->list) ||
>>>     >> + spsc_queue_peek(&entity->job_queue) == NULL)
>>>     >>           return true;
>>>     >>         return false;
>>>     >> @@ -279,8 +265,6 @@ long drm_sched_entity_flush(struct
>>>     >> drm_sched_entity *entity, long timeout)
>>>     >>       long ret = timeout;
>>>     >>         sched = entity->rq->sched;
>>>     >> -    if (!drm_sched_entity_is_initialized(sched, entity))
>>>     >> -        return ret;
>>>     >>       /**
>>>     >>        * The client will not queue more IBs during this fini,
>>>     consume
>>>     >> existing
>>>     >>        * queued IBs or discard them on SIGKILL
>>>     >> @@ -299,7 +283,7 @@ long drm_sched_entity_flush(struct
>>>     >> drm_sched_entity *entity, long timeout)
>>>     >>       last_user = cmpxchg(&entity->last_user,
>>>     current->group_leader,
>>>     >> NULL);
>>>     >>       if ((!last_user || last_user == current->group_leader) &&
>>>     >>           (current->flags & PF_EXITING) &&
>>>     (current->exit_code ==
>>>     >> SIGKILL))
>>>     >> -        drm_sched_entity_set_rq(entity, NULL);
>>>     >> + drm_sched_rq_remove_entity(entity->rq, entity);
>>>     >>         return ret;
>>>     >>   }
>>>     >> @@ -320,7 +304,7 @@ void drm_sched_entity_fini(struct
>>>     >> drm_sched_entity *entity)
>>>     >>       struct drm_gpu_scheduler *sched;
>>>     >>         sched = entity->rq->sched;
>>>     >> -    drm_sched_entity_set_rq(entity, NULL);
>>>     >> +    drm_sched_rq_remove_entity(entity->rq, entity);
>>>     >>         /* Consumption of existing IBs wasn't completed.
>>>     Forcefully
>>>     >>        * remove them here.
>>>     >> @@ -416,15 +400,12 @@ void drm_sched_entity_set_rq(struct
>>>     >> drm_sched_entity *entity,
>>>     >>       if (entity->rq == rq)
>>>     >>           return;
>>>     >>   -    spin_lock(&entity->rq_lock);
>>>     >> -
>>>     >> -    if (entity->rq)
>>>     >> - drm_sched_rq_remove_entity(entity->rq, entity);
>>>     >> +    BUG_ON(!rq);
>>>     >>   +    spin_lock(&entity->rq_lock);
>>>     >> +    drm_sched_rq_remove_entity(entity->rq, entity);
>>>     >>       entity->rq = rq;
>>>     >> -    if (rq)
>>>     >> -        drm_sched_rq_add_entity(rq, entity);
>>>     >> -
>>>     >> +    drm_sched_rq_add_entity(rq, entity);
>>>     >>       spin_unlock(&entity->rq_lock);
>>>     >>   }
>>>     >>   EXPORT_SYMBOL(drm_sched_entity_set_rq);
>>>     >
>>>
>>
>


[-- Attachment #1.2: Type: text/html, Size: 16021 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: stop setting rq to NULL
  2018-08-03 14:54             ` Andrey Grodzovsky
@ 2018-08-03 16:42               ` Christian König
  2018-08-03 18:41                 ` Andrey Grodzovsky
  0 siblings, 1 reply; 26+ messages in thread
From: Christian König @ 2018-08-03 16:42 UTC (permalink / raw)
  To: Andrey Grodzovsky, Nayan Deshmukh; +Cc: Maling list - DRI developers


[-- Attachment #1.1: Type: text/plain, Size: 2717 bytes --]

Am 03.08.2018 um 16:54 schrieb Andrey Grodzovsky:
>
> [SNIP]
>
>>>
>>>>
>>>>     >
>>>>     > Second of all, even after we removed the entity from rq in
>>>>     > drm_sched_entity_flush to terminate any subsequent submissions
>>>>     >
>>>>     > to the queue the other thread doing push job can just acquire
>>>>     again a
>>>>     > run queue
>>>>     >
>>>>     > from drm_sched_entity_get_free_sched and continue submission
>>>>
>>>> Hi Christian
>>>>
>>>>     That is actually desired.
>>>>
>>>>     When another process is now using the entity to submit jobs
>>>>     adding it
>>>>     back to the rq is actually the right thing to do cause the
>>>>     entity is
>>>>     still in use.
>>>>
>>>
>>> Yes, no problem if it's another process. But what about another 
>>> thread from same process ? Is it a possible use case that 2 threads 
>>> from same process submit to same entity concurrently ? If so and we 
>>> specifically kill one, the other will not stop event if we want him 
>>> to because current code makes him just require a rq for him self.
>>
>> Well you can't kill a single thread of a process (you can only 
>> interrupt it), a SIGKILL will always kill the whole process.
>
> Is the following scenario possible and acceptable ?
> 2 threads from same process working on same queue where thread A 
> currently in drm_sched_entity_flush->wait_event_timeout (the process 
> getting shut down because of SIGKILL sent by user)
> while thread B still inside drm_sched_entity_push_job before 'if 
> (reschedule)'. 'A' stopped waiting because queue became empty and then 
> removes the entity queue from scheduler's run queue while
> B goes inside 'reschedule' because it evaluates to true ('first' is 
> true and all the rest of the conditions), acquires new rq, and later 
> adds it back to scheduler (different one maybe) and keeps submitting 
> jobs as much as he likes and then can be stack for up to 'timeout' 
> time  in his drm_sched_entity_flush waiting for them.

I'm not 100% sure but I don't think that can happen.

See flushing the fd is done while dropping the fd, which happens only 
after all threads of the process in question are killed.

Otherwise the flushing wouldn't make to much sense. In other words 
imagine an application where a thread does a write() on a fd which is 
killed.

The idea of the flush is to preserve the data and that won't work if 
that isn't correctly ordered.

> My understanding was that introduction of entity->last is to force 
> immediate termination job submissions by any thread from the 
> terminating process.

We could consider reordering that once more. Going to play out all 
scenarios in my head over the weekend :)

Christian.

>
> Andrey


[-- Attachment #1.2: Type: text/html, Size: 4796 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: stop setting rq to NULL
  2018-08-03 16:42               ` Christian König
@ 2018-08-03 18:41                 ` Andrey Grodzovsky
  2018-08-06  8:14                   ` Christian König
  0 siblings, 1 reply; 26+ messages in thread
From: Andrey Grodzovsky @ 2018-08-03 18:41 UTC (permalink / raw)
  To: Christian König, Nayan Deshmukh; +Cc: Maling list - DRI developers


[-- Attachment #1.1: Type: text/plain, Size: 3379 bytes --]



On 08/06/2018 08:44 AM, Christian König wrote:
> Am 03.08.2018 um 16:54 schrieb Andrey Grodzovsky:
>>
>> [SNIP]
>>
>>>>
>>>>>
>>>>>     >
>>>>>     > Second of all, even after we removed the entity from rq in
>>>>>     > drm_sched_entity_flush to terminate any subsequent submissions
>>>>>     >
>>>>>     > to the queue the other thread doing push job can just
>>>>>     acquire again a
>>>>>     > run queue
>>>>>     >
>>>>>     > from drm_sched_entity_get_free_sched and continue submission
>>>>>
>>>>> Hi Christian
>>>>>
>>>>>     That is actually desired.
>>>>>
>>>>>     When another process is now using the entity to submit jobs
>>>>>     adding it
>>>>>     back to the rq is actually the right thing to do cause the
>>>>>     entity is
>>>>>     still in use.
>>>>>
>>>>
>>>> Yes, no problem if it's another process. But what about another 
>>>> thread from same process ? Is it a possible use case that 2 threads 
>>>> from same process submit to same entity concurrently ? If so and we 
>>>> specifically kill one, the other will not stop event if we want him 
>>>> to because current code makes him just require a rq for him self.
>>>
>>> Well you can't kill a single thread of a process (you can only 
>>> interrupt it), a SIGKILL will always kill the whole process.
>>
>> Is the following scenario possible and acceptable ?
>> 2 threads from same process working on same queue where thread A 
>> currently in drm_sched_entity_flush->wait_event_timeout (the process 
>> getting shut down because of SIGKILL sent by user)
>> while thread B still inside drm_sched_entity_push_job before 'if 
>> (reschedule)'. 'A' stopped waiting because queue became empty and 
>> then removes the entity queue from scheduler's run queue while
>> B goes inside 'reschedule' because it evaluates to true ('first' is 
>> true and all the rest of the conditions), acquires new rq, and later 
>> adds it back to scheduler (different one maybe) and keeps submitting 
>> jobs as much as he likes and then can be stack for up to 'timeout' 
>> time  in his drm_sched_entity_flush waiting for them.
>
> I'm not 100% sure but I don't think that can happen.
>
> See flushing the fd is done while dropping the fd, which happens only 
> after all threads of the process in question are killed.

Yea, this FDs handling is indeed a lot of gray area for me but as far as 
I remember flushing is done per each thread when exits (possibly due to 
a signal).
Now signals interception and processing (as a result of which .flush 
will get called if SIGKILL received) is done in some points amongst 
which is when returning from IOCTL.
So if first thread was at the very end of the CS ioctl when SIGKILL was 
received while the other one at the beginning  then I think we might see 
something like the scenario above.

Andrey

>
> Otherwise the flushing wouldn't make to much sense. In other words 
> imagine an application where a thread does a write() on a fd which is 
> killed.
>
> The idea of the flush is to preserve the data and that won't work if 
> that isn't correctly ordered.
>
>> My understanding was that introduction of entity->last is to force 
>> immediate termination job submissions by any thread from the 
>> terminating process.
>
> We could consider reordering that once more. Going to play out all 
> scenarios in my head over the weekend :)
>
> Christian.
>
>>
>> Andrey
>


[-- Attachment #1.2: Type: text/html, Size: 5966 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: stop setting rq to NULL
  2018-08-03 18:41                 ` Andrey Grodzovsky
@ 2018-08-06  8:14                   ` Christian König
  2018-08-09 17:39                     ` Andrey Grodzovsky
  0 siblings, 1 reply; 26+ messages in thread
From: Christian König @ 2018-08-06  8:14 UTC (permalink / raw)
  To: Andrey Grodzovsky, Christian König, Nayan Deshmukh
  Cc: Maling list - DRI developers


[-- Attachment #1.1: Type: text/plain, Size: 3955 bytes --]

Am 03.08.2018 um 20:41 schrieb Andrey Grodzovsky:
>
>
>
> On 08/06/2018 08:44 AM, Christian König wrote:
>> Am 03.08.2018 um 16:54 schrieb Andrey Grodzovsky:
>>>
>>> [SNIP]
>>>
>>>>>
>>>>>>
>>>>>>     >
>>>>>>     > Second of all, even after we removed the entity from rq in
>>>>>>     > drm_sched_entity_flush to terminate any subsequent submissions
>>>>>>     >
>>>>>>     > to the queue the other thread doing push job can just
>>>>>>     acquire again a
>>>>>>     > run queue
>>>>>>     >
>>>>>>     > from drm_sched_entity_get_free_sched and continue submission
>>>>>>
>>>>>> Hi Christian
>>>>>>
>>>>>>     That is actually desired.
>>>>>>
>>>>>>     When another process is now using the entity to submit jobs
>>>>>>     adding it
>>>>>>     back to the rq is actually the right thing to do cause the
>>>>>>     entity is
>>>>>>     still in use.
>>>>>>
>>>>>
>>>>> Yes, no problem if it's another process. But what about another 
>>>>> thread from same process ? Is it a possible use case that 2 
>>>>> threads from same process submit to same entity concurrently ? If 
>>>>> so and we specifically kill one, the other will not stop event if 
>>>>> we want him to because current code makes him just require a rq 
>>>>> for him self.
>>>>
>>>> Well you can't kill a single thread of a process (you can only 
>>>> interrupt it), a SIGKILL will always kill the whole process.
>>>
>>> Is the following scenario possible and acceptable ?
>>> 2 threads from same process working on same queue where thread A 
>>> currently in drm_sched_entity_flush->wait_event_timeout (the process 
>>> getting shut down because of SIGKILL sent by user)
>>> while thread B still inside drm_sched_entity_push_job before 'if 
>>> (reschedule)'. 'A' stopped waiting because queue became empty and 
>>> then removes the entity queue from scheduler's run queue while
>>> B goes inside 'reschedule' because it evaluates to true ('first' is 
>>> true and all the rest of the conditions), acquires new rq, and later 
>>> adds it back to scheduler (different one maybe) and keeps submitting 
>>> jobs as much as he likes and then can be stack for up to 'timeout' 
>>> time  in his drm_sched_entity_flush waiting for them.
>>
>> I'm not 100% sure but I don't think that can happen.
>>
>> See flushing the fd is done while dropping the fd, which happens only 
>> after all threads of the process in question are killed.
>
> Yea, this FDs handling is indeed a lot of gray area for me but as far 
> as I remember flushing is done per each thread when exits (possibly 
> due to a signal).
> Now signals interception and processing (as a result of which .flush 
> will get called if SIGKILL received) is done in some points amongst 
> which is when returning from IOCTL.
> So if first thread was at the very end of the CS ioctl when SIGKILL 
> was received while the other one at the beginning  then I think we 
> might see something like the scenario above.

SIGKILL isn't processed as long as any thread of the application is 
still inside the kernel. That's why we have wait_event_killable().

So I don't think that the scenario above is possible, but I'm really not 
100% sure either.

Christian.

>
> Andrey
>
>>
>> Otherwise the flushing wouldn't make to much sense. In other words 
>> imagine an application where a thread does a write() on a fd which is 
>> killed.
>>
>> The idea of the flush is to preserve the data and that won't work if 
>> that isn't correctly ordered.
>>
>>> My understanding was that introduction of entity->last is to force 
>>> immediate termination job submissions by any thread from the 
>>> terminating process.
>>
>> We could consider reordering that once more. Going to play out all 
>> scenarios in my head over the weekend :)
>>
>> Christian.
>>
>>>
>>> Andrey
>>
>
>
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel


[-- Attachment #1.2: Type: text/html, Size: 7432 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: stop setting rq to NULL
  2018-08-06  8:14                   ` Christian König
@ 2018-08-09 17:39                     ` Andrey Grodzovsky
  2018-08-10 13:16                       ` Christian König
  0 siblings, 1 reply; 26+ messages in thread
From: Andrey Grodzovsky @ 2018-08-09 17:39 UTC (permalink / raw)
  To: christian.koenig, Nayan Deshmukh; +Cc: Maling list - DRI developers


[-- Attachment #1.1: Type: text/plain, Size: 4488 bytes --]



On 08/06/2018 04:14 AM, Christian König wrote:
> Am 03.08.2018 um 20:41 schrieb Andrey Grodzovsky:
>>
>>
>>
>> On 08/06/2018 08:44 AM, Christian König wrote:
>>> Am 03.08.2018 um 16:54 schrieb Andrey Grodzovsky:
>>>>
>>>> [SNIP]
>>>>
>>>>>>
>>>>>>>
>>>>>>>     >
>>>>>>>     > Second of all, even after we removed the entity from rq in
>>>>>>>     > drm_sched_entity_flush to terminate any subsequent submissions
>>>>>>>     >
>>>>>>>     > to the queue the other thread doing push job can just
>>>>>>>     acquire again a
>>>>>>>     > run queue
>>>>>>>     >
>>>>>>>     > from drm_sched_entity_get_free_sched and continue submission
>>>>>>>
>>>>>>> Hi Christian
>>>>>>>
>>>>>>>     That is actually desired.
>>>>>>>
>>>>>>>     When another process is now using the entity to submit jobs
>>>>>>>     adding it
>>>>>>>     back to the rq is actually the right thing to do cause the
>>>>>>>     entity is
>>>>>>>     still in use.
>>>>>>>
>>>>>>
>>>>>> Yes, no problem if it's another process. But what about another 
>>>>>> thread from same process ? Is it a possible use case that 2 
>>>>>> threads from same process submit to same entity concurrently ? If 
>>>>>> so and we specifically kill one, the other will not stop event if 
>>>>>> we want him to because current code makes him just require a rq 
>>>>>> for him self.
>>>>>
>>>>> Well you can't kill a single thread of a process (you can only 
>>>>> interrupt it), a SIGKILL will always kill the whole process.
>>>>
>>>> Is the following scenario possible and acceptable ?
>>>> 2 threads from same process working on same queue where thread A 
>>>> currently in drm_sched_entity_flush->wait_event_timeout (the 
>>>> process getting shut down because of SIGKILL sent by user)
>>>> while thread B still inside drm_sched_entity_push_job before 'if 
>>>> (reschedule)'. 'A' stopped waiting because queue became empty and 
>>>> then removes the entity queue from scheduler's run queue while
>>>> B goes inside 'reschedule' because it evaluates to true ('first' is 
>>>> true and all the rest of the conditions), acquires new rq, and 
>>>> later adds it back to scheduler (different one maybe) and keeps 
>>>> submitting jobs as much as he likes and then can be stack for up to 
>>>> 'timeout' time  in his drm_sched_entity_flush waiting for them.
>>>
>>> I'm not 100% sure but I don't think that can happen.
>>>
>>> See flushing the fd is done while dropping the fd, which happens 
>>> only after all threads of the process in question are killed.
>>
>> Yea, this FDs handling is indeed a lot of gray area for me but as far 
>> as I remember flushing is done per each thread when exits (possibly 
>> due to a signal).
>> Now signals interception and processing (as a result of which .flush 
>> will get called if SIGKILL received) is done in some points amongst 
>> which is when returning from IOCTL.
>> So if first thread was at the very end of the CS ioctl when SIGKILL 
>> was received while the other one at the beginning  then I think we 
>> might see something like the scenario above.
>
> SIGKILL isn't processed as long as any thread of the application is 
> still inside the kernel. That's why we have wait_event_killable().

Can you tell me where is this happening ? What i see is in the code is that
do_group_exit calls zap_other_threads which just adds SIGKILL to signal 
sets of other threads in group and sends a wake up.
Then do_exit will close all FDs for current thread and so .flush will be 
called, when last thread drops it's refcount for the FD .release will be 
called.

Andrey

>
> So I don't think that the scenario above is possible, but I'm really 
> not 100% sure either.
>
> Christian.
>
>>
>> Andrey
>>
>>>
>>> Otherwise the flushing wouldn't make to much sense. In other words 
>>> imagine an application where a thread does a write() on a fd which 
>>> is killed.
>>>
>>> The idea of the flush is to preserve the data and that won't work if 
>>> that isn't correctly ordered.
>>>
>>>> My understanding was that introduction of entity->last is to force 
>>>> immediate termination job submissions by any thread from the 
>>>> terminating process.
>>>
>>> We could consider reordering that once more. Going to play out all 
>>> scenarios in my head over the weekend :)
>>>
>>> Christian.
>>>
>>>>
>>>> Andrey
>>>
>>
>>
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>


[-- Attachment #1.2: Type: text/html, Size: 8561 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: stop setting rq to NULL
  2018-08-09 17:39                     ` Andrey Grodzovsky
@ 2018-08-10 13:16                       ` Christian König
  0 siblings, 0 replies; 26+ messages in thread
From: Christian König @ 2018-08-10 13:16 UTC (permalink / raw)
  To: Andrey Grodzovsky, christian.koenig, Nayan Deshmukh
  Cc: Maling list - DRI developers

Am 09.08.2018 um 19:39 schrieb Andrey Grodzovsky:
> [SNIP]
>> SIGKILL isn't processed as long as any thread of the application is 
>> still inside the kernel. That's why we have wait_event_killable().
>
> Can you tell me where is this happening ? What i see is in the code is 
> that
> do_group_exit calls zap_other_threads which just adds SIGKILL to 
> signal sets of other threads in group and sends a wake up.
> Then do_exit will close all FDs for current thread and so .flush will 
> be called, when last thread drops it's refcount for the FD .release 
> will be called.

Good question, I have not the slightest idea.

Killed processes certainly doesn't die until all threads return from 
kernel space, but I'm not 100% sure if that happens before or after the 
flush.

Christian.

>
> Andrey

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2018-08-10 13:16 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-30 11:03 [PATCH 1/2] drm/scheduler: only kill entity if last user is killed v2 Christian König
2018-07-30 11:03 ` [PATCH 2/2] drm/scheduler: stop setting rq to NULL Christian König
2018-07-30 13:30   ` Nayan Deshmukh
2018-07-30 20:51     ` Andrey Grodzovsky
2018-07-31  6:50       ` Christian König
2018-07-31 14:16         ` Andrey Grodzovsky
2018-08-01 22:25   ` Andrey Grodzovsky
2018-08-02  6:42     ` Christian König
2018-08-02  6:47       ` Nayan Deshmukh
2018-08-02  7:26         ` Christian König
2018-08-02 14:11         ` Andrey Grodzovsky
2018-08-03 14:06           ` Christian König
2018-08-03 14:54             ` Andrey Grodzovsky
2018-08-03 16:42               ` Christian König
2018-08-03 18:41                 ` Andrey Grodzovsky
2018-08-06  8:14                   ` Christian König
2018-08-09 17:39                     ` Andrey Grodzovsky
2018-08-10 13:16                       ` Christian König
2018-08-02  6:43     ` Nayan Deshmukh
2018-08-02 14:38       ` Andrey Grodzovsky
2018-08-03 12:12         ` Nayan Deshmukh
2018-08-03 13:47           ` Andrey Grodzovsky
2018-08-03 13:58             ` Nayan Deshmukh
2018-07-30 13:34 ` [PATCH 1/2] drm/scheduler: only kill entity if last user is killed v2 Nayan Deshmukh
2018-07-30 20:42   ` Andrey Grodzovsky
2018-07-31  9:11     ` Nayan Deshmukh

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.