All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/scheduler: add a current job field to scheduler
@ 2018-09-18 16:17 Nayan Deshmukh
  2018-09-18 16:17 ` [PATCH 2/2] drm/scheduler: remove timeout work_struct from drm_sched_job Nayan Deshmukh
  2018-09-18 17:09 ` [PATCH 1/2] drm/scheduler: add a current job field to scheduler Koenig, Christian
  0 siblings, 2 replies; 9+ messages in thread
From: Nayan Deshmukh @ 2018-09-18 16:17 UTC (permalink / raw)
  To: dri-devel; +Cc: Nayan Deshmukh, christian.koenig

Which points to the job running on the hardware. This is
useful when we need to access the currently executing job
from the scheduler.

Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
---
 drivers/gpu/drm/scheduler/sched_main.c | 17 +++++++++++------
 include/drm/gpu_scheduler.h            |  2 ++
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 9ca741f3a0bc..0e6ccc8243db 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -189,6 +189,7 @@ static void drm_sched_job_finish(struct work_struct *work)
 	struct drm_sched_job *s_job = container_of(work, struct drm_sched_job,
 						   finish_work);
 	struct drm_gpu_scheduler *sched = s_job->sched;
+	struct drm_sched_job *next;
 
 	/*
 	 * Canceling the timeout without removing our job from the ring mirror
@@ -201,10 +202,10 @@ static void drm_sched_job_finish(struct work_struct *work)
 
 	spin_lock(&sched->job_list_lock);
 	/* queue TDR for next job */
+	next = list_next_entry(s_job, node);
+	sched->curr_job = next;
 	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 	    !list_is_last(&s_job->node, &sched->ring_mirror_list)) {
-		struct drm_sched_job *next = list_next_entry(s_job, node);
-
 		if (!dma_fence_is_signaled(&next->s_fence->finished))
 			schedule_delayed_work(&next->work_tdr, sched->timeout);
 	}
@@ -233,10 +234,12 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job)
 
 	spin_lock(&sched->job_list_lock);
 	list_add_tail(&s_job->node, &sched->ring_mirror_list);
-	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
-	    list_first_entry_or_null(&sched->ring_mirror_list,
-				     struct drm_sched_job, node) == s_job)
-		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
+	if (list_first_entry_or_null(&sched->ring_mirror_list,
+				struct drm_sched_job, node) == s_job) {
+		if (sched->timeout != MAX_SCHEDULE_TIMEOUT)
+			schedule_delayed_work(&s_job->work_tdr, sched->timeout);
+		sched->curr_job = s_job;
+	}
 	spin_unlock(&sched->job_list_lock);
 }
 
@@ -316,6 +319,8 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
 					 struct drm_sched_job, node);
 	if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT)
 		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
+	if (s_job)
+		sched->curr_job = s_job;
 
 	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 		struct drm_sched_fence *s_fence = s_job->s_fence;
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index daec50f887b3..07e776b1ca42 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -252,6 +252,7 @@ struct drm_sched_backend_ops {
  * @timeout: the time after which a job is removed from the scheduler.
  * @name: name of the ring for which this scheduler is being used.
  * @sched_rq: priority wise array of run queues.
+ * @curr_job: points to the job currently running on the hardware
  * @wake_up_worker: the wait queue on which the scheduler sleeps until a job
  *                  is ready to be scheduled.
  * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler
@@ -274,6 +275,7 @@ struct drm_gpu_scheduler {
 	long				timeout;
 	const char			*name;
 	struct drm_sched_rq		sched_rq[DRM_SCHED_PRIORITY_MAX];
+	struct drm_sched_job            *curr_job;
 	wait_queue_head_t		wake_up_worker;
 	wait_queue_head_t		job_scheduled;
 	atomic_t			hw_rq_count;
-- 
2.14.3

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] drm/scheduler: remove timeout work_struct from drm_sched_job
  2018-09-18 16:17 [PATCH 1/2] drm/scheduler: add a current job field to scheduler Nayan Deshmukh
@ 2018-09-18 16:17 ` Nayan Deshmukh
  2018-09-19 12:30   ` Christian König
  2018-09-18 17:09 ` [PATCH 1/2] drm/scheduler: add a current job field to scheduler Koenig, Christian
  1 sibling, 1 reply; 9+ messages in thread
From: Nayan Deshmukh @ 2018-09-18 16:17 UTC (permalink / raw)
  To: dri-devel; +Cc: Nayan Deshmukh, christian.koenig

having a delayed work item per job is redundant as we only need one
per scheduler to track the time out the currently executing job.

Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
Suggested-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/scheduler/sched_main.c | 16 +++++++++-------
 include/drm/gpu_scheduler.h            |  6 +++---
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 0e6ccc8243db..f213b5c7f718 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -198,7 +198,7 @@ static void drm_sched_job_finish(struct work_struct *work)
 	 * manages to find this job as the next job in the list, the fence
 	 * signaled check below will prevent the timeout to be restarted.
 	 */
-	cancel_delayed_work_sync(&s_job->work_tdr);
+	cancel_delayed_work_sync(&sched->work_tdr);
 
 	spin_lock(&sched->job_list_lock);
 	/* queue TDR for next job */
@@ -207,7 +207,7 @@ static void drm_sched_job_finish(struct work_struct *work)
 	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 	    !list_is_last(&s_job->node, &sched->ring_mirror_list)) {
 		if (!dma_fence_is_signaled(&next->s_fence->finished))
-			schedule_delayed_work(&next->work_tdr, sched->timeout);
+			schedule_delayed_work(&sched->work_tdr, sched->timeout);
 	}
 	/* remove job from ring_mirror_list */
 	list_del(&s_job->node);
@@ -237,7 +237,7 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job)
 	if (list_first_entry_or_null(&sched->ring_mirror_list,
 				struct drm_sched_job, node) == s_job) {
 		if (sched->timeout != MAX_SCHEDULE_TIMEOUT)
-			schedule_delayed_work(&s_job->work_tdr, sched->timeout);
+			schedule_delayed_work(&sched->work_tdr, sched->timeout);
 		sched->curr_job = s_job;
 	}
 	spin_unlock(&sched->job_list_lock);
@@ -245,8 +245,10 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job)
 
 static void drm_sched_job_timedout(struct work_struct *work)
 {
-	struct drm_sched_job *job = container_of(work, struct drm_sched_job,
-						 work_tdr.work);
+	struct drm_gpu_scheduler *sched = container_of(work,
+						struct drm_gpu_scheduler,
+						work_tdr.work);
+	struct drm_sched_job *job = sched->curr_job;
 
 	job->sched->ops->timedout_job(job);
 }
@@ -318,7 +320,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
 	s_job = list_first_entry_or_null(&sched->ring_mirror_list,
 					 struct drm_sched_job, node);
 	if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT)
-		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
+		schedule_delayed_work(&sched->work_tdr, sched->timeout);
 	if (s_job)
 		sched->curr_job = s_job;
 
@@ -389,7 +391,6 @@ int drm_sched_job_init(struct drm_sched_job *job,
 
 	INIT_WORK(&job->finish_work, drm_sched_job_finish);
 	INIT_LIST_HEAD(&job->node);
-	INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout);
 
 	return 0;
 }
@@ -580,6 +581,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
 	INIT_LIST_HEAD(&sched->ring_mirror_list);
 	spin_lock_init(&sched->job_list_lock);
 	atomic_set(&sched->hw_rq_count, 0);
+	INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
 	atomic_set(&sched->num_jobs, 0);
 	atomic64_set(&sched->job_id_count, 0);
 
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 07e776b1ca42..9d50d7f3eaa4 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -175,8 +175,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
  *               finished to remove the job from the
  *               @drm_gpu_scheduler.ring_mirror_list.
  * @node: used to append this struct to the @drm_gpu_scheduler.ring_mirror_list.
- * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the timeout
- *            interval is over.
  * @id: a unique id assigned to each job scheduled on the scheduler.
  * @karma: increment on every hang caused by this job. If this exceeds the hang
  *         limit of the scheduler then the job is marked guilty and will not
@@ -195,7 +193,6 @@ struct drm_sched_job {
 	struct dma_fence_cb		finish_cb;
 	struct work_struct		finish_work;
 	struct list_head		node;
-	struct delayed_work		work_tdr;
 	uint64_t			id;
 	atomic_t			karma;
 	enum drm_sched_priority		s_priority;
@@ -260,6 +257,8 @@ struct drm_sched_backend_ops {
  *                 finished.
  * @hw_rq_count: the number of jobs currently in the hardware queue.
  * @job_id_count: used to assign unique id to the each job.
+ * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
+ *            timeout interval is over.
  * @thread: the kthread on which the scheduler which run.
  * @ring_mirror_list: the list of jobs which are currently in the job queue.
  * @job_list_lock: lock to protect the ring_mirror_list.
@@ -280,6 +279,7 @@ struct drm_gpu_scheduler {
 	wait_queue_head_t		job_scheduled;
 	atomic_t			hw_rq_count;
 	atomic64_t			job_id_count;
+	struct delayed_work		work_tdr;
 	struct task_struct		*thread;
 	struct list_head		ring_mirror_list;
 	spinlock_t			job_list_lock;
-- 
2.14.3

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/scheduler: add a current job field to scheduler
  2018-09-18 16:17 [PATCH 1/2] drm/scheduler: add a current job field to scheduler Nayan Deshmukh
  2018-09-18 16:17 ` [PATCH 2/2] drm/scheduler: remove timeout work_struct from drm_sched_job Nayan Deshmukh
@ 2018-09-18 17:09 ` Koenig, Christian
  2018-09-18 23:13   ` Nayan Deshmukh
  1 sibling, 1 reply; 9+ messages in thread
From: Koenig, Christian @ 2018-09-18 17:09 UTC (permalink / raw)
  To: Nayan Deshmukh, dri-devel

Am 18.09.2018 um 18:17 schrieb Nayan Deshmukh:
> Which points to the job running on the hardware. This is
> useful when we need to access the currently executing job
> from the scheduler.

That should be identical to 
list_first_entry_or_null(&sched->ring_mirror_list), doesn't it?

Regards,
Christian.

>
> Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
> ---list_first_entry_or_null(&sched->ring_mirror_list
>
>   drivers/gpu/drm/scheduler/sched_main.c | 17 +++++++++++------
>   include/drm/gpu_scheduler.h            |  2 ++
>   2 files changed, 13 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index 9ca741f3a0bc..0e6ccc8243db 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -189,6 +189,7 @@ static void drm_sched_job_finish(struct work_struct *work)
>   	struct drm_sched_job *s_job = container_of(work, struct drm_sched_job,
>   						   finish_work);
>   	struct drm_gpu_scheduler *sched = s_job->sched;
> +	struct drm_sched_job *next;
>   
>   	/*
>   	 * Canceling the timeout without removing our job from the ring mirror
> @@ -201,10 +202,10 @@ static void drm_sched_job_finish(struct work_struct *work)
>   
>   	spin_lock(&sched->job_list_lock);
>   	/* queue TDR for next job */
> +	next = list_next_entry(s_job, node);
> +	sched->curr_job = next;
>   	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
>   	    !list_is_last(&s_job->node, &sched->ring_mirror_list)) {
> -		struct drm_sched_job *next = list_next_entry(s_job, node);
> -
>   		if (!dma_fence_is_signaled(&next->s_fence->finished))
>   			schedule_delayed_work(&next->work_tdr, sched->timeout);
>   	}
> @@ -233,10 +234,12 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job)
>   
>   	spin_lock(&sched->job_list_lock);
>   	list_add_tail(&s_job->node, &sched->ring_mirror_list);
> -	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
> -	    list_first_entry_or_null(&sched->ring_mirror_list,
> -				     struct drm_sched_job, node) == s_job)
> -		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
> +	if (list_first_entry_or_null(&sched->ring_mirror_list,
> +				struct drm_sched_job, node) == s_job) {
> +		if (sched->timeout != MAX_SCHEDULE_TIMEOUT)
> +			schedule_delayed_work(&s_job->work_tdr, sched->timeout);
> +		sched->curr_job = s_job;
> +	}
>   	spin_unlock(&sched->job_list_lock);
>   }
>   
> @@ -316,6 +319,8 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
>   					 struct drm_sched_job, node);
>   	if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT)
>   		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
> +	if (s_job)
> +		sched->curr_job = s_job;
>   
>   	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
>   		struct drm_sched_fence *s_fence = s_job->s_fence;
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index daec50f887b3..07e776b1ca42 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -252,6 +252,7 @@ struct drm_sched_backend_ops {
>    * @timeout: the time after which a job is removed from the scheduler.
>    * @name: name of the ring for which this scheduler is being used.
>    * @sched_rq: priority wise array of run queues.
> + * @curr_job: points to the job currently running on the hardware
>    * @wake_up_worker: the wait queue on which the scheduler sleeps until a job
>    *                  is ready to be scheduled.
>    * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler
> @@ -274,6 +275,7 @@ struct drm_gpu_scheduler {
>   	long				timeout;
>   	const char			*name;
>   	struct drm_sched_rq		sched_rq[DRM_SCHED_PRIORITY_MAX];
> +	struct drm_sched_job            *curr_job;
>   	wait_queue_head_t		wake_up_worker;
>   	wait_queue_head_t		job_scheduled;
>   	atomic_t			hw_rq_count;

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/scheduler: add a current job field to scheduler
  2018-09-18 17:09 ` [PATCH 1/2] drm/scheduler: add a current job field to scheduler Koenig, Christian
@ 2018-09-18 23:13   ` Nayan Deshmukh
  0 siblings, 0 replies; 9+ messages in thread
From: Nayan Deshmukh @ 2018-09-18 23:13 UTC (permalink / raw)
  To: Maling list - DRI developers


[-- Attachment #1.1: Type: text/plain, Size: 4769 bytes --]

Hi Christian,

Yes you are correct. My bad.

Do you have any comments on the second patch? I will drop this patch and
rebase the second one.

Regards,
Nayan


On Wed, Sep 19, 2018, 2:09 AM Koenig, Christian <Christian.Koenig@amd.com>
wrote:

> Am 18.09.2018 um 18:17 schrieb Nayan Deshmukh:
> > Which points to the job running on the hardware. This is
> > useful when we need to access the currently executing job
> > from the scheduler.
>
> That should be identical to
> list_first_entry_or_null(&sched->ring_mirror_list), doesn't it?
>
> Regards,
> Christian.
>
> >
> > Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
> > ---list_first_entry_or_null(&sched->ring_mirror_list
> >
> >   drivers/gpu/drm/scheduler/sched_main.c | 17 +++++++++++------
> >   include/drm/gpu_scheduler.h            |  2 ++
> >   2 files changed, 13 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/scheduler/sched_main.c
> b/drivers/gpu/drm/scheduler/sched_main.c
> > index 9ca741f3a0bc..0e6ccc8243db 100644
> > --- a/drivers/gpu/drm/scheduler/sched_main.c
> > +++ b/drivers/gpu/drm/scheduler/sched_main.c
> > @@ -189,6 +189,7 @@ static void drm_sched_job_finish(struct work_struct
> *work)
> >       struct drm_sched_job *s_job = container_of(work, struct
> drm_sched_job,
> >                                                  finish_work);
> >       struct drm_gpu_scheduler *sched = s_job->sched;
> > +     struct drm_sched_job *next;
> >
> >       /*
> >        * Canceling the timeout without removing our job from the ring
> mirror
> > @@ -201,10 +202,10 @@ static void drm_sched_job_finish(struct
> work_struct *work)
> >
> >       spin_lock(&sched->job_list_lock);
> >       /* queue TDR for next job */
> > +     next = list_next_entry(s_job, node);
> > +     sched->curr_job = next;
> >       if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
> >           !list_is_last(&s_job->node, &sched->ring_mirror_list)) {
> > -             struct drm_sched_job *next = list_next_entry(s_job, node);
> > -
> >               if (!dma_fence_is_signaled(&next->s_fence->finished))
> >                       schedule_delayed_work(&next->work_tdr,
> sched->timeout);
> >       }
> > @@ -233,10 +234,12 @@ static void drm_sched_job_begin(struct
> drm_sched_job *s_job)
> >
> >       spin_lock(&sched->job_list_lock);
> >       list_add_tail(&s_job->node, &sched->ring_mirror_list);
> > -     if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
> > -         list_first_entry_or_null(&sched->ring_mirror_list,
> > -                                  struct drm_sched_job, node) == s_job)
> > -             schedule_delayed_work(&s_job->work_tdr, sched->timeout);
> > +     if (list_first_entry_or_null(&sched->ring_mirror_list,
> > +                             struct drm_sched_job, node) == s_job) {
> > +             if (sched->timeout != MAX_SCHEDULE_TIMEOUT)
> > +                     schedule_delayed_work(&s_job->work_tdr,
> sched->timeout);
> > +             sched->curr_job = s_job;
> > +     }
> >       spin_unlock(&sched->job_list_lock);
> >   }
> >
> > @@ -316,6 +319,8 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler
> *sched)
> >                                        struct drm_sched_job, node);
> >       if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT)
> >               schedule_delayed_work(&s_job->work_tdr, sched->timeout);
> > +     if (s_job)
> > +             sched->curr_job = s_job;
> >
> >       list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list,
> node) {
> >               struct drm_sched_fence *s_fence = s_job->s_fence;
> > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> > index daec50f887b3..07e776b1ca42 100644
> > --- a/include/drm/gpu_scheduler.h
> > +++ b/include/drm/gpu_scheduler.h
> > @@ -252,6 +252,7 @@ struct drm_sched_backend_ops {
> >    * @timeout: the time after which a job is removed from the scheduler.
> >    * @name: name of the ring for which this scheduler is being used.
> >    * @sched_rq: priority wise array of run queues.
> > + * @curr_job: points to the job currently running on the hardware
> >    * @wake_up_worker: the wait queue on which the scheduler sleeps until
> a job
> >    *                  is ready to be scheduled.
> >    * @job_scheduled: once @drm_sched_entity_do_release is called the
> scheduler
> > @@ -274,6 +275,7 @@ struct drm_gpu_scheduler {
> >       long                            timeout;
> >       const char                      *name;
> >       struct drm_sched_rq             sched_rq[DRM_SCHED_PRIORITY_MAX];
> > +     struct drm_sched_job            *curr_job;
> >       wait_queue_head_t               wake_up_worker;
> >       wait_queue_head_t               job_scheduled;
> >       atomic_t                        hw_rq_count;
>
>

[-- Attachment #1.2: Type: text/html, Size: 6470 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: remove timeout work_struct from drm_sched_job
  2018-09-18 16:17 ` [PATCH 2/2] drm/scheduler: remove timeout work_struct from drm_sched_job Nayan Deshmukh
@ 2018-09-19 12:30   ` Christian König
  2018-09-19 15:39     ` Michel Dänzer
  2018-09-20 11:25     ` Nayan Deshmukh
  0 siblings, 2 replies; 9+ messages in thread
From: Christian König @ 2018-09-19 12:30 UTC (permalink / raw)
  To: Nayan Deshmukh, dri-devel

Am 18.09.2018 um 18:17 schrieb Nayan Deshmukh:
> having a delayed work item per job is redundant as we only need one
> per scheduler to track the time out the currently executing job.

Well that looks simpler than I thought it would be.

But it shows the next problem that the timeout and the completion could 
race.

As far as I can see that can be fixed by moving the 
dma_fence_remove_callback()/dma_fence_add_callback() dance from 
drm_sched_hw_job_reset() to drm_sched_job_timedout().

Anyway, I would say drop patch #1 and fix the one comment below and we 
can use this.

>
> Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
> Suggested-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/scheduler/sched_main.c | 16 +++++++++-------
>   include/drm/gpu_scheduler.h            |  6 +++---
>   2 files changed, 12 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index 0e6ccc8243db..f213b5c7f718 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -198,7 +198,7 @@ static void drm_sched_job_finish(struct work_struct *work)
>   	 * manages to find this job as the next job in the list, the fence
>   	 * signaled check below will prevent the timeout to be restarted.
>   	 */
> -	cancel_delayed_work_sync(&s_job->work_tdr);
> +	cancel_delayed_work_sync(&sched->work_tdr);
>   
>   	spin_lock(&sched->job_list_lock);
>   	/* queue TDR for next job */
> @@ -207,7 +207,7 @@ static void drm_sched_job_finish(struct work_struct *work)
>   	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
>   	    !list_is_last(&s_job->node, &sched->ring_mirror_list)) {
>   		if (!dma_fence_is_signaled(&next->s_fence->finished))

Since we now have only one delayed work item we can just drop the test 
if next is already signaled.

Regards,
Christian.

> -			schedule_delayed_work(&next->work_tdr, sched->timeout);
> +			schedule_delayed_work(&sched->work_tdr, sched->timeout);
>   	}
>   	/* remove job from ring_mirror_list */
>   	list_del(&s_job->node);
> @@ -237,7 +237,7 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job)
>   	if (list_first_entry_or_null(&sched->ring_mirror_list,
>   				struct drm_sched_job, node) == s_job) {
>   		if (sched->timeout != MAX_SCHEDULE_TIMEOUT)
> -			schedule_delayed_work(&s_job->work_tdr, sched->timeout);
> +			schedule_delayed_work(&sched->work_tdr, sched->timeout);
>   		sched->curr_job = s_job;
>   	}
>   	spin_unlock(&sched->job_list_lock);
> @@ -245,8 +245,10 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job)
>   
>   static void drm_sched_job_timedout(struct work_struct *work)
>   {
> -	struct drm_sched_job *job = container_of(work, struct drm_sched_job,
> -						 work_tdr.work);
> +	struct drm_gpu_scheduler *sched = container_of(work,
> +						struct drm_gpu_scheduler,
> +						work_tdr.work);
> +	struct drm_sched_job *job = sched->curr_job;
>   
>   	job->sched->ops->timedout_job(job);
>   }
> @@ -318,7 +320,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
>   	s_job = list_first_entry_or_null(&sched->ring_mirror_list,
>   					 struct drm_sched_job, node);
>   	if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT)
> -		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
> +		schedule_delayed_work(&sched->work_tdr, sched->timeout);
>   	if (s_job)
>   		sched->curr_job = s_job;
>   
> @@ -389,7 +391,6 @@ int drm_sched_job_init(struct drm_sched_job *job,
>   
>   	INIT_WORK(&job->finish_work, drm_sched_job_finish);
>   	INIT_LIST_HEAD(&job->node);
> -	INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout);
>   
>   	return 0;
>   }
> @@ -580,6 +581,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
>   	INIT_LIST_HEAD(&sched->ring_mirror_list);
>   	spin_lock_init(&sched->job_list_lock);
>   	atomic_set(&sched->hw_rq_count, 0);
> +	INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
>   	atomic_set(&sched->num_jobs, 0);
>   	atomic64_set(&sched->job_id_count, 0);
>   
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 07e776b1ca42..9d50d7f3eaa4 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -175,8 +175,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
>    *               finished to remove the job from the
>    *               @drm_gpu_scheduler.ring_mirror_list.
>    * @node: used to append this struct to the @drm_gpu_scheduler.ring_mirror_list.
> - * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the timeout
> - *            interval is over.
>    * @id: a unique id assigned to each job scheduled on the scheduler.
>    * @karma: increment on every hang caused by this job. If this exceeds the hang
>    *         limit of the scheduler then the job is marked guilty and will not
> @@ -195,7 +193,6 @@ struct drm_sched_job {
>   	struct dma_fence_cb		finish_cb;
>   	struct work_struct		finish_work;
>   	struct list_head		node;
> -	struct delayed_work		work_tdr;
>   	uint64_t			id;
>   	atomic_t			karma;
>   	enum drm_sched_priority		s_priority;
> @@ -260,6 +257,8 @@ struct drm_sched_backend_ops {
>    *                 finished.
>    * @hw_rq_count: the number of jobs currently in the hardware queue.
>    * @job_id_count: used to assign unique id to the each job.
> + * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
> + *            timeout interval is over.
>    * @thread: the kthread on which the scheduler which run.
>    * @ring_mirror_list: the list of jobs which are currently in the job queue.
>    * @job_list_lock: lock to protect the ring_mirror_list.
> @@ -280,6 +279,7 @@ struct drm_gpu_scheduler {
>   	wait_queue_head_t		job_scheduled;
>   	atomic_t			hw_rq_count;
>   	atomic64_t			job_id_count;
> +	struct delayed_work		work_tdr;
>   	struct task_struct		*thread;
>   	struct list_head		ring_mirror_list;
>   	spinlock_t			job_list_lock;

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: remove timeout work_struct from drm_sched_job
  2018-09-19 12:30   ` Christian König
@ 2018-09-19 15:39     ` Michel Dänzer
  2018-09-20  6:41       ` Christian König
  2018-09-20 11:25     ` Nayan Deshmukh
  1 sibling, 1 reply; 9+ messages in thread
From: Michel Dänzer @ 2018-09-19 15:39 UTC (permalink / raw)
  To: Christian König, Nayan Deshmukh; +Cc: dri-devel

On 2018-09-19 2:30 p.m., Christian König wrote:
> Am 18.09.2018 um 18:17 schrieb Nayan Deshmukh:
>> having a delayed work item per job is redundant as we only need one
>> per scheduler to track the time out the currently executing job.
> 
> Well that looks simpler than I thought it would be.
> 
> But it shows the next problem that the timeout and the completion could
> race.
> 
> As far as I can see that can be fixed by moving the
> dma_fence_remove_callback()/dma_fence_add_callback() dance from
> drm_sched_hw_job_reset() to drm_sched_job_timedout().

BTW, while you guys are looking into this code, please keep an eye open
for things that could explain https://bugs.freedesktop.org/107762 .


-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: remove timeout work_struct from drm_sched_job
  2018-09-19 15:39     ` Michel Dänzer
@ 2018-09-20  6:41       ` Christian König
  0 siblings, 0 replies; 9+ messages in thread
From: Christian König @ 2018-09-20  6:41 UTC (permalink / raw)
  To: Michel Dänzer, Christian König, Nayan Deshmukh; +Cc: dri-devel

Am 19.09.2018 um 17:39 schrieb Michel Dänzer:
> On 2018-09-19 2:30 p.m., Christian König wrote:
>> Am 18.09.2018 um 18:17 schrieb Nayan Deshmukh:
>>> having a delayed work item per job is redundant as we only need one
>>> per scheduler to track the time out the currently executing job.
>> Well that looks simpler than I thought it would be.
>>
>> But it shows the next problem that the timeout and the completion could
>> race.
>>
>> As far as I can see that can be fixed by moving the
>> dma_fence_remove_callback()/dma_fence_add_callback() dance from
>> drm_sched_hw_job_reset() to drm_sched_job_timedout().
> BTW, while you guys are looking into this code, please keep an eye open
> for things that could explain https://bugs.freedesktop.org/107762 .

Yeah, since we now have only one timer that should be fixed by this as well.

Christian.
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: remove timeout work_struct from drm_sched_job
  2018-09-19 12:30   ` Christian König
  2018-09-19 15:39     ` Michel Dänzer
@ 2018-09-20 11:25     ` Nayan Deshmukh
  2018-09-20 11:30       ` Christian König
  1 sibling, 1 reply; 9+ messages in thread
From: Nayan Deshmukh @ 2018-09-20 11:25 UTC (permalink / raw)
  To: Christian König; +Cc: Maling list - DRI developers


[-- Attachment #1.1: Type: text/plain, Size: 7287 bytes --]

On Wed, Sep 19, 2018, 9:31 PM Christian König <christian.koenig@amd.com>
wrote:

> Am 18.09.2018 um 18:17 schrieb Nayan Deshmukh:
> > having a delayed work item per job is redundant as we only need one
> > per scheduler to track the time out the currently executing job.
>
> Well that looks simpler than I thought it would be.
>
> But it shows the next problem that the timeout and the completion could
> race.
>
> As far as I can see that can be fixed by moving the
> dma_fence_remove_callback()/dma_fence_add_callback() dance from
> drm_sched_hw_job_reset() to drm_sched_job_timedout().
>
> Anyway, I would say drop patch #1 and fix the one comment below and we
> can use this.
>
> >
> > Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
> > Suggested-by: Christian König <christian.koenig@amd.com>
> > ---
> >   drivers/gpu/drm/scheduler/sched_main.c | 16 +++++++++-------
> >   include/drm/gpu_scheduler.h            |  6 +++---
> >   2 files changed, 12 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/scheduler/sched_main.c
> b/drivers/gpu/drm/scheduler/sched_main.c
> > index 0e6ccc8243db..f213b5c7f718 100644
> > --- a/drivers/gpu/drm/scheduler/sched_main.c
> > +++ b/drivers/gpu/drm/scheduler/sched_main.c
> > @@ -198,7 +198,7 @@ static void drm_sched_job_finish(struct work_struct
> *work)
> >        * manages to find this job as the next job in the list, the fence
> >        * signaled check below will prevent the timeout to be restarted.
> >        */
> > -     cancel_delayed_work_sync(&s_job->work_tdr);
> > +     cancel_delayed_work_sync(&sched->work_tdr);
> >
> >       spin_lock(&sched->job_list_lock);
> >       /* queue TDR for next job */
> > @@ -207,7 +207,7 @@ static void drm_sched_job_finish(struct work_struct
> *work)
> >       if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
> >           !list_is_last(&s_job->node, &sched->ring_mirror_list)) {
> >               if (!dma_fence_is_signaled(&next->s_fence->finished))
>
> Since we now have only one delayed work item we can just drop the test
> if next is already signaled.
>
Can you elaborate more on this. Which test are you talking about?

Regards,
Nayan

>
>
> Regards,
> Christian.
>
> > -                     schedule_delayed_work(&next->work_tdr,
> sched->timeout);
> > +                     schedule_delayed_work(&sched->work_tdr,
> sched->timeout);
> >       }
> >       /* remove job from ring_mirror_list */
> >       list_del(&s_job->node);
> > @@ -237,7 +237,7 @@ static void drm_sched_job_begin(struct drm_sched_job
> *s_job)
> >       if (list_first_entry_or_null(&sched->ring_mirror_list,
> >                               struct drm_sched_job, node) == s_job) {
> >               if (sched->timeout != MAX_SCHEDULE_TIMEOUT)
> > -                     schedule_delayed_work(&s_job->work_tdr,
> sched->timeout);
> > +                     schedule_delayed_work(&sched->work_tdr,
> sched->timeout);
> >               sched->curr_job = s_job;
> >       }
> >       spin_unlock(&sched->job_list_lock);
> > @@ -245,8 +245,10 @@ static void drm_sched_job_begin(struct
> drm_sched_job *s_job)
> >
> >   static void drm_sched_job_timedout(struct work_struct *work)
> >   {
> > -     struct drm_sched_job *job = container_of(work, struct
> drm_sched_job,
> > -                                              work_tdr.work);
> > +     struct drm_gpu_scheduler *sched = container_of(work,
> > +                                             struct drm_gpu_scheduler,
> > +                                             work_tdr.work);
> > +     struct drm_sched_job *job = sched->curr_job;
> >
> >       job->sched->ops->timedout_job(job);
> >   }
> > @@ -318,7 +320,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler
> *sched)
> >       s_job = list_first_entry_or_null(&sched->ring_mirror_list,
> >                                        struct drm_sched_job, node);
> >       if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT)
> > -             schedule_delayed_work(&s_job->work_tdr, sched->timeout);
> > +             schedule_delayed_work(&sched->work_tdr, sched->timeout);
> >       if (s_job)
> >               sched->curr_job = s_job;
> >
> > @@ -389,7 +391,6 @@ int drm_sched_job_init(struct drm_sched_job *job,
> >
> >       INIT_WORK(&job->finish_work, drm_sched_job_finish);
> >       INIT_LIST_HEAD(&job->node);
> > -     INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout);
> >
> >       return 0;
> >   }
> > @@ -580,6 +581,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
> >       INIT_LIST_HEAD(&sched->ring_mirror_list);
> >       spin_lock_init(&sched->job_list_lock);
> >       atomic_set(&sched->hw_rq_count, 0);
> > +     INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
> >       atomic_set(&sched->num_jobs, 0);
> >       atomic64_set(&sched->job_id_count, 0);
> >
> > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> > index 07e776b1ca42..9d50d7f3eaa4 100644
> > --- a/include/drm/gpu_scheduler.h
> > +++ b/include/drm/gpu_scheduler.h
> > @@ -175,8 +175,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct
> dma_fence *f);
> >    *               finished to remove the job from the
> >    *               @drm_gpu_scheduler.ring_mirror_list.
> >    * @node: used to append this struct to the
> @drm_gpu_scheduler.ring_mirror_list.
> > - * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after
> the timeout
> > - *            interval is over.
> >    * @id: a unique id assigned to each job scheduled on the scheduler.
> >    * @karma: increment on every hang caused by this job. If this exceeds
> the hang
> >    *         limit of the scheduler then the job is marked guilty and
> will not
> > @@ -195,7 +193,6 @@ struct drm_sched_job {
> >       struct dma_fence_cb             finish_cb;
> >       struct work_struct              finish_work;
> >       struct list_head                node;
> > -     struct delayed_work             work_tdr;
> >       uint64_t                        id;
> >       atomic_t                        karma;
> >       enum drm_sched_priority         s_priority;
> > @@ -260,6 +257,8 @@ struct drm_sched_backend_ops {
> >    *                 finished.
> >    * @hw_rq_count: the number of jobs currently in the hardware queue.
> >    * @job_id_count: used to assign unique id to the each job.
> > + * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after
> the
> > + *            timeout interval is over.
> >    * @thread: the kthread on which the scheduler which run.
> >    * @ring_mirror_list: the list of jobs which are currently in the job
> queue.
> >    * @job_list_lock: lock to protect the ring_mirror_list.
> > @@ -280,6 +279,7 @@ struct drm_gpu_scheduler {
> >       wait_queue_head_t               job_scheduled;
> >       atomic_t                        hw_rq_count;
> >       atomic64_t                      job_id_count;
> > +     struct delayed_work             work_tdr;
> >       struct task_struct              *thread;
> >       struct list_head                ring_mirror_list;
> >       spinlock_t                      job_list_lock;
>
>

[-- Attachment #1.2: Type: text/html, Size: 9839 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/scheduler: remove timeout work_struct from drm_sched_job
  2018-09-20 11:25     ` Nayan Deshmukh
@ 2018-09-20 11:30       ` Christian König
  0 siblings, 0 replies; 9+ messages in thread
From: Christian König @ 2018-09-20 11:30 UTC (permalink / raw)
  To: Nayan Deshmukh; +Cc: Maling list - DRI developers


[-- Attachment #1.1: Type: text/plain, Size: 8411 bytes --]

Am 20.09.2018 um 13:25 schrieb Nayan Deshmukh:
>
>
> On Wed, Sep 19, 2018, 9:31 PM Christian König 
> <christian.koenig@amd.com <mailto:christian.koenig@amd.com>> wrote:
>
>     Am 18.09.2018 um 18:17 schrieb Nayan Deshmukh:
>     > having a delayed work item per job is redundant as we only need one
>     > per scheduler to track the time out the currently executing job.
>
>     Well that looks simpler than I thought it would be.
>
>     But it shows the next problem that the timeout and the completion
>     could
>     race.
>
>     As far as I can see that can be fixed by moving the
>     dma_fence_remove_callback()/dma_fence_add_callback() dance from
>     drm_sched_hw_job_reset() to drm_sched_job_timedout().
>
>     Anyway, I would say drop patch #1 and fix the one comment below
>     and we
>     can use this.
>
>     >
>     > Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com
>     <mailto:nayan26deshmukh@gmail.com>>
>     > Suggested-by: Christian König <christian.koenig@amd.com
>     <mailto:christian.koenig@amd.com>>
>     > ---
>     >   drivers/gpu/drm/scheduler/sched_main.c | 16 +++++++++-------
>     >   include/drm/gpu_scheduler.h            |  6 +++---
>     >   2 files changed, 12 insertions(+), 10 deletions(-)
>     >
>     > diff --git a/drivers/gpu/drm/scheduler/sched_main.c
>     b/drivers/gpu/drm/scheduler/sched_main.c
>     > index 0e6ccc8243db..f213b5c7f718 100644
>     > --- a/drivers/gpu/drm/scheduler/sched_main.c
>     > +++ b/drivers/gpu/drm/scheduler/sched_main.c
>     > @@ -198,7 +198,7 @@ static void drm_sched_job_finish(struct
>     work_struct *work)
>     >        * manages to find this job as the next job in the list,
>     the fence
>     >        * signaled check below will prevent the timeout to be
>     restarted.
>     >        */
>     > -  cancel_delayed_work_sync(&s_job->work_tdr);
>     > +  cancel_delayed_work_sync(&sched->work_tdr);
>     >
>     >       spin_lock(&sched->job_list_lock);
>     >       /* queue TDR for next job */
>     > @@ -207,7 +207,7 @@ static void drm_sched_job_finish(struct
>     work_struct *work)
>     >       if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
>     >           !list_is_last(&s_job->node, &sched->ring_mirror_list)) {
>     >               if (!dma_fence_is_signaled(&next->s_fence->finished))
>
>     Since we now have only one delayed work item we can just drop the
>     test
>     if next is already signaled.
>
> Can you elaborate more on this. Which test are you talking about?

I was talking about the "!dma_fence_is_signaled()" test here.

>
> Regards,
> Nayan
>
>
>
>     Regards,
>     Christian.
>
>     > -  schedule_delayed_work(&next->work_tdr, sched->timeout);
>     > +  schedule_delayed_work(&sched->work_tdr, sched->timeout);
>     >       }
>     >       /* remove job from ring_mirror_list */
>     >       list_del(&s_job->node);
>

Basically you could do this first and then you need to only test if 
sched->ring_mirror_list is empty.

Regards,
Christian.

>     > @@ -237,7 +237,7 @@ static void drm_sched_job_begin(struct
>     drm_sched_job *s_job)
>     >       if (list_first_entry_or_null(&sched->ring_mirror_list,
>     >                               struct drm_sched_job, node) ==
>     s_job) {
>     >               if (sched->timeout != MAX_SCHEDULE_TIMEOUT)
>     > -  schedule_delayed_work(&s_job->work_tdr, sched->timeout);
>     > +  schedule_delayed_work(&sched->work_tdr, sched->timeout);
>     >               sched->curr_job = s_job;
>     >       }
>     >       spin_unlock(&sched->job_list_lock);
>     > @@ -245,8 +245,10 @@ static void drm_sched_job_begin(struct
>     drm_sched_job *s_job)
>     >
>     >   static void drm_sched_job_timedout(struct work_struct *work)
>     >   {
>     > -     struct drm_sched_job *job = container_of(work, struct
>     drm_sched_job,
>     > - work_tdr.work <http://work_tdr.work>);
>     > +     struct drm_gpu_scheduler *sched = container_of(work,
>     > +                                             struct
>     drm_gpu_scheduler,
>     > + work_tdr.work <http://work_tdr.work>);
>     > +     struct drm_sched_job *job = sched->curr_job;
>     >
>     >       job->sched->ops->timedout_job(job);
>     >   }
>     > @@ -318,7 +320,7 @@ void drm_sched_job_recovery(struct
>     drm_gpu_scheduler *sched)
>     >       s_job = list_first_entry_or_null(&sched->ring_mirror_list,
>     >                                        struct drm_sched_job, node);
>     >       if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT)
>     > -  schedule_delayed_work(&s_job->work_tdr, sched->timeout);
>     > +  schedule_delayed_work(&sched->work_tdr, sched->timeout);
>     >       if (s_job)
>     >               sched->curr_job = s_job;
>     >
>     > @@ -389,7 +391,6 @@ int drm_sched_job_init(struct drm_sched_job
>     *job,
>     >
>     >       INIT_WORK(&job->finish_work, drm_sched_job_finish);
>     >       INIT_LIST_HEAD(&job->node);
>     > -     INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout);
>     >
>     >       return 0;
>     >   }
>     > @@ -580,6 +581,7 @@ int drm_sched_init(struct drm_gpu_scheduler
>     *sched,
>     >  INIT_LIST_HEAD(&sched->ring_mirror_list);
>     >       spin_lock_init(&sched->job_list_lock);
>     >       atomic_set(&sched->hw_rq_count, 0);
>     > +     INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
>     >       atomic_set(&sched->num_jobs, 0);
>     >       atomic64_set(&sched->job_id_count, 0);
>     >
>     > diff --git a/include/drm/gpu_scheduler.h
>     b/include/drm/gpu_scheduler.h
>     > index 07e776b1ca42..9d50d7f3eaa4 100644
>     > --- a/include/drm/gpu_scheduler.h
>     > +++ b/include/drm/gpu_scheduler.h
>     > @@ -175,8 +175,6 @@ struct drm_sched_fence
>     *to_drm_sched_fence(struct dma_fence *f);
>     >    *               finished to remove the job from the
>     >    *  @drm_gpu_scheduler.ring_mirror_list.
>     >    * @node: used to append this struct to the
>     @drm_gpu_scheduler.ring_mirror_list.
>     > - * @work_tdr: schedules a delayed call to
>     @drm_sched_job_timedout after the timeout
>     > - *            interval is over.
>     >    * @id: a unique id assigned to each job scheduled on the
>     scheduler.
>     >    * @karma: increment on every hang caused by this job. If this
>     exceeds the hang
>     >    *         limit of the scheduler then the job is marked
>     guilty and will not
>     > @@ -195,7 +193,6 @@ struct drm_sched_job {
>     >       struct dma_fence_cb             finish_cb;
>     >       struct work_struct              finish_work;
>     >       struct list_head                node;
>     > -     struct delayed_work             work_tdr;
>     >       uint64_t                        id;
>     >       atomic_t                        karma;
>     >       enum drm_sched_priority         s_priority;
>     > @@ -260,6 +257,8 @@ struct drm_sched_backend_ops {
>     >    *                 finished.
>     >    * @hw_rq_count: the number of jobs currently in the hardware
>     queue.
>     >    * @job_id_count: used to assign unique id to the each job.
>     > + * @work_tdr: schedules a delayed call to
>     @drm_sched_job_timedout after the
>     > + *            timeout interval is over.
>     >    * @thread: the kthread on which the scheduler which run.
>     >    * @ring_mirror_list: the list of jobs which are currently in
>     the job queue.
>     >    * @job_list_lock: lock to protect the ring_mirror_list.
>     > @@ -280,6 +279,7 @@ struct drm_gpu_scheduler {
>     >       wait_queue_head_t               job_scheduled;
>     >       atomic_t                        hw_rq_count;
>     >       atomic64_t                      job_id_count;
>     > +     struct delayed_work             work_tdr;
>     >       struct task_struct              *thread;
>     >       struct list_head ring_mirror_list;
>     >       spinlock_t                      job_list_lock;
>


[-- Attachment #1.2: Type: text/html, Size: 14497 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2018-09-20 11:30 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-18 16:17 [PATCH 1/2] drm/scheduler: add a current job field to scheduler Nayan Deshmukh
2018-09-18 16:17 ` [PATCH 2/2] drm/scheduler: remove timeout work_struct from drm_sched_job Nayan Deshmukh
2018-09-19 12:30   ` Christian König
2018-09-19 15:39     ` Michel Dänzer
2018-09-20  6:41       ` Christian König
2018-09-20 11:25     ` Nayan Deshmukh
2018-09-20 11:30       ` Christian König
2018-09-18 17:09 ` [PATCH 1/2] drm/scheduler: add a current job field to scheduler Koenig, Christian
2018-09-18 23:13   ` Nayan Deshmukh

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.