All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/sched: fix the bug of time out calculation(v2)
@ 2021-08-25  4:14 Monk Liu
  2021-08-25  6:31 ` Christian König
  2021-08-25 12:51 ` Alex Deucher
  0 siblings, 2 replies; 12+ messages in thread
From: Monk Liu @ 2021-08-25  4:14 UTC (permalink / raw)
  To: amd-gfx; +Cc: Monk Liu

the original logic is wrong that the timeout will not be retriggerd
after the previous job siganled, and that lead to the scenario that all
jobs in the same scheduler shares the same timeout timer from the very
begining job in this scheduler which is wrong.

we should modify the timer everytime a previous job signaled.

v2:
further cleanup the logic, and do the TDR timer cancelling if the signaled job
is the last one in its scheduler.

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/scheduler/sched_main.c | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index a2a9536..8c102ac 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -305,8 +305,17 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job)
 	struct drm_gpu_scheduler *sched = s_job->sched;
 
 	spin_lock(&sched->job_list_lock);
-	list_add_tail(&s_job->list, &sched->pending_list);
-	drm_sched_start_timeout(sched);
+	if (list_empty(&sched->pending_list)) {
+		list_add_tail(&s_job->list, &sched->pending_list);
+		drm_sched_start_timeout(sched);
+	} else {
+		/* the old jobs in pending list are not finished yet
+		 * no need to restart TDR timer here, it is already
+		 * handled by drm_sched_get_cleanup_job
+		 */
+		list_add_tail(&s_job->list, &sched->pending_list);
+	}
+
 	spin_unlock(&sched->job_list_lock);
 }
 
@@ -693,17 +702,22 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
 	if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
 		/* remove job from pending_list */
 		list_del_init(&job->list);
+
 		/* make the scheduled timestamp more accurate */
 		next = list_first_entry_or_null(&sched->pending_list,
 						typeof(*next), list);
-		if (next)
+		if (next) {
+			/* if we still have job in pending list we need modify the TDR timer */
+			mod_delayed_work(system_wq, &sched->work_tdr, sched->timeout);
 			next->s_fence->scheduled.timestamp =
 				job->s_fence->finished.timestamp;
+		} else {
+			/* cancel the TDR timer if no job in pending list */
+			cancel_delayed_work(&sched->work_tdr);
+		}
 
 	} else {
 		job = NULL;
-		/* queue timeout for next job */
-		drm_sched_start_timeout(sched);
 	}
 
 	spin_unlock(&sched->job_list_lock);
@@ -791,11 +805,8 @@ static int drm_sched_main(void *param)
 					  (entity = drm_sched_select_entity(sched))) ||
 					 kthread_should_stop());
 
-		if (cleanup_job) {
+		if (cleanup_job)
 			sched->ops->free_job(cleanup_job);
-			/* queue timeout for next job */
-			drm_sched_start_timeout(sched);
-		}
 
 		if (!entity)
 			continue;
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/sched: fix the bug of time out calculation(v2)
  2021-08-25  4:14 [PATCH] drm/sched: fix the bug of time out calculation(v2) Monk Liu
@ 2021-08-25  6:31 ` Christian König
  2021-08-25 11:55   ` Liu, Monk
  2021-08-25 12:51 ` Alex Deucher
  1 sibling, 1 reply; 12+ messages in thread
From: Christian König @ 2021-08-25  6:31 UTC (permalink / raw)
  To: Monk Liu, amd-gfx

Well NAK to that approach. First of all your bug analyses is incorrect.

The timeout started by queue_delayed_work() in drm_sched_start_timeout() 
is paired with the cancel_delayed_work() in drm_sched_get_cleanup_job().

So you must have something else going on here.

Then please don't use mod_delayed_work(), instead always cancel it and 
restart it.

Regards,
Christian.

Am 25.08.21 um 06:14 schrieb Monk Liu:
> the original logic is wrong that the timeout will not be retriggerd
> after the previous job siganled, and that lead to the scenario that all
> jobs in the same scheduler shares the same timeout timer from the very
> begining job in this scheduler which is wrong.
>
> we should modify the timer everytime a previous job signaled.
>
> v2:
> further cleanup the logic, and do the TDR timer cancelling if the signaled job
> is the last one in its scheduler.
>
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
>   drivers/gpu/drm/scheduler/sched_main.c | 29 ++++++++++++++++++++---------
>   1 file changed, 20 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index a2a9536..8c102ac 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -305,8 +305,17 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job)
>   	struct drm_gpu_scheduler *sched = s_job->sched;
>   
>   	spin_lock(&sched->job_list_lock);
> -	list_add_tail(&s_job->list, &sched->pending_list);
> -	drm_sched_start_timeout(sched);
> +	if (list_empty(&sched->pending_list)) {
> +		list_add_tail(&s_job->list, &sched->pending_list);
> +		drm_sched_start_timeout(sched);
> +	} else {
> +		/* the old jobs in pending list are not finished yet
> +		 * no need to restart TDR timer here, it is already
> +		 * handled by drm_sched_get_cleanup_job
> +		 */
> +		list_add_tail(&s_job->list, &sched->pending_list);
> +	}
> +
>   	spin_unlock(&sched->job_list_lock);
>   }
>   
> @@ -693,17 +702,22 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
>   	if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
>   		/* remove job from pending_list */
>   		list_del_init(&job->list);
> +
>   		/* make the scheduled timestamp more accurate */
>   		next = list_first_entry_or_null(&sched->pending_list,
>   						typeof(*next), list);
> -		if (next)
> +		if (next) {
> +			/* if we still have job in pending list we need modify the TDR timer */
> +			mod_delayed_work(system_wq, &sched->work_tdr, sched->timeout);
>   			next->s_fence->scheduled.timestamp =
>   				job->s_fence->finished.timestamp;
> +		} else {
> +			/* cancel the TDR timer if no job in pending list */
> +			cancel_delayed_work(&sched->work_tdr);
> +		}
>   
>   	} else {
>   		job = NULL;
> -		/* queue timeout for next job */
> -		drm_sched_start_timeout(sched);
>   	}
>   
>   	spin_unlock(&sched->job_list_lock);
> @@ -791,11 +805,8 @@ static int drm_sched_main(void *param)
>   					  (entity = drm_sched_select_entity(sched))) ||
>   					 kthread_should_stop());
>   
> -		if (cleanup_job) {
> +		if (cleanup_job)
>   			sched->ops->free_job(cleanup_job);
> -			/* queue timeout for next job */
> -			drm_sched_start_timeout(sched);
> -		}
>   
>   		if (!entity)
>   			continue;


^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH] drm/sched: fix the bug of time out calculation(v2)
  2021-08-25  6:31 ` Christian König
@ 2021-08-25 11:55   ` Liu, Monk
  2021-08-25 12:01     ` Liu, Monk
  0 siblings, 1 reply; 12+ messages in thread
From: Liu, Monk @ 2021-08-25 11:55 UTC (permalink / raw)
  To: Christian König, amd-gfx

[AMD Official Use Only]

>>The timeout started by queue_delayed_work() in drm_sched_start_timeout() is paired with the cancel_delayed_work() in drm_sched_get_cleanup_job().

No that's wrong, see that when we are in cleanup_job(), assume we do not have timeout on this sched (we are just keep submitting new jobs to this sched),
Then the work_tdr is cancelled, and then we get the heading job, and let's assume the job is not signaled, then we run to the "queue timeout for next job" thus drm_sched_start_timeout() is called, so this heading job's TO timer is actually retriggered ... which is totally wrong.

With my patch the timer is already retriggered after previous JOB really signaled.

Can you be more specific on the incorrect part ?

Thanks 
------------------------------------------
Monk Liu | Cloud-GPU Core team
------------------------------------------

-----Original Message-----
From: Christian König <ckoenig.leichtzumerken@gmail.com> 
Sent: Wednesday, August 25, 2021 2:32 PM
To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/sched: fix the bug of time out calculation(v2)

Well NAK to that approach. First of all your bug analyses is incorrect.

The timeout started by queue_delayed_work() in drm_sched_start_timeout() is paired with the cancel_delayed_work() in drm_sched_get_cleanup_job().

So you must have something else going on here.

Then please don't use mod_delayed_work(), instead always cancel it and restart it.

Regards,
Christian.

Am 25.08.21 um 06:14 schrieb Monk Liu:
> the original logic is wrong that the timeout will not be retriggerd 
> after the previous job siganled, and that lead to the scenario that 
> all jobs in the same scheduler shares the same timeout timer from the 
> very begining job in this scheduler which is wrong.
>
> we should modify the timer everytime a previous job signaled.
>
> v2:
> further cleanup the logic, and do the TDR timer cancelling if the 
> signaled job is the last one in its scheduler.
>
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
>   drivers/gpu/drm/scheduler/sched_main.c | 29 ++++++++++++++++++++---------
>   1 file changed, 20 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
> b/drivers/gpu/drm/scheduler/sched_main.c
> index a2a9536..8c102ac 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -305,8 +305,17 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job)
>   	struct drm_gpu_scheduler *sched = s_job->sched;
>   
>   	spin_lock(&sched->job_list_lock);
> -	list_add_tail(&s_job->list, &sched->pending_list);
> -	drm_sched_start_timeout(sched);
> +	if (list_empty(&sched->pending_list)) {
> +		list_add_tail(&s_job->list, &sched->pending_list);
> +		drm_sched_start_timeout(sched);
> +	} else {
> +		/* the old jobs in pending list are not finished yet
> +		 * no need to restart TDR timer here, it is already
> +		 * handled by drm_sched_get_cleanup_job
> +		 */
> +		list_add_tail(&s_job->list, &sched->pending_list);
> +	}
> +
>   	spin_unlock(&sched->job_list_lock);
>   }
>   
> @@ -693,17 +702,22 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
>   	if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
>   		/* remove job from pending_list */
>   		list_del_init(&job->list);
> +
>   		/* make the scheduled timestamp more accurate */
>   		next = list_first_entry_or_null(&sched->pending_list,
>   						typeof(*next), list);
> -		if (next)
> +		if (next) {
> +			/* if we still have job in pending list we need modify the TDR timer */
> +			mod_delayed_work(system_wq, &sched->work_tdr, sched->timeout);
>   			next->s_fence->scheduled.timestamp =
>   				job->s_fence->finished.timestamp;
> +		} else {
> +			/* cancel the TDR timer if no job in pending list */
> +			cancel_delayed_work(&sched->work_tdr);
> +		}
>   
>   	} else {
>   		job = NULL;
> -		/* queue timeout for next job */
> -		drm_sched_start_timeout(sched);
>   	}
>   
>   	spin_unlock(&sched->job_list_lock);
> @@ -791,11 +805,8 @@ static int drm_sched_main(void *param)
>   					  (entity = drm_sched_select_entity(sched))) ||
>   					 kthread_should_stop());
>   
> -		if (cleanup_job) {
> +		if (cleanup_job)
>   			sched->ops->free_job(cleanup_job);
> -			/* queue timeout for next job */
> -			drm_sched_start_timeout(sched);
> -		}
>   
>   		if (!entity)
>   			continue;

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH] drm/sched: fix the bug of time out calculation(v2)
  2021-08-25 11:55   ` Liu, Monk
@ 2021-08-25 12:01     ` Liu, Monk
  2021-08-25 12:11       ` Christian König
  0 siblings, 1 reply; 12+ messages in thread
From: Liu, Monk @ 2021-08-25 12:01 UTC (permalink / raw)
  To: Christian König, amd-gfx

[AMD Official Use Only]

I think we should remove the cancel_delayed_work() in the beginning of the cleanup_job().

Because by my patch the "mode_delayed_work" in cleanup_job is already doing its duty to retrigger the TO timer accordingly 

Thanks 

------------------------------------------
Monk Liu | Cloud-GPU Core team
------------------------------------------

-----Original Message-----
From: Liu, Monk 
Sent: Wednesday, August 25, 2021 7:55 PM
To: 'Christian König' <ckoenig.leichtzumerken@gmail.com>; amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH] drm/sched: fix the bug of time out calculation(v2)

[AMD Official Use Only]

>>The timeout started by queue_delayed_work() in drm_sched_start_timeout() is paired with the cancel_delayed_work() in drm_sched_get_cleanup_job().

No that's wrong, see that when we are in cleanup_job(), assume we do not have timeout on this sched (we are just keep submitting new jobs to this sched), Then the work_tdr is cancelled, and then we get the heading job, and let's assume the job is not signaled, then we run to the "queue timeout for next job" thus drm_sched_start_timeout() is called, so this heading job's TO timer is actually retriggered ... which is totally wrong.

With my patch the timer is already retriggered after previous JOB really signaled.

Can you be more specific on the incorrect part ?

Thanks
------------------------------------------
Monk Liu | Cloud-GPU Core team
------------------------------------------

-----Original Message-----
From: Christian König <ckoenig.leichtzumerken@gmail.com>
Sent: Wednesday, August 25, 2021 2:32 PM
To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/sched: fix the bug of time out calculation(v2)

Well NAK to that approach. First of all your bug analyses is incorrect.

The timeout started by queue_delayed_work() in drm_sched_start_timeout() is paired with the cancel_delayed_work() in drm_sched_get_cleanup_job().

So you must have something else going on here.

Then please don't use mod_delayed_work(), instead always cancel it and restart it.

Regards,
Christian.

Am 25.08.21 um 06:14 schrieb Monk Liu:
> the original logic is wrong that the timeout will not be retriggerd 
> after the previous job siganled, and that lead to the scenario that 
> all jobs in the same scheduler shares the same timeout timer from the 
> very begining job in this scheduler which is wrong.
>
> we should modify the timer everytime a previous job signaled.
>
> v2:
> further cleanup the logic, and do the TDR timer cancelling if the 
> signaled job is the last one in its scheduler.
>
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
>   drivers/gpu/drm/scheduler/sched_main.c | 29 ++++++++++++++++++++---------
>   1 file changed, 20 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
> b/drivers/gpu/drm/scheduler/sched_main.c
> index a2a9536..8c102ac 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -305,8 +305,17 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job)
>   	struct drm_gpu_scheduler *sched = s_job->sched;
>   
>   	spin_lock(&sched->job_list_lock);
> -	list_add_tail(&s_job->list, &sched->pending_list);
> -	drm_sched_start_timeout(sched);
> +	if (list_empty(&sched->pending_list)) {
> +		list_add_tail(&s_job->list, &sched->pending_list);
> +		drm_sched_start_timeout(sched);
> +	} else {
> +		/* the old jobs in pending list are not finished yet
> +		 * no need to restart TDR timer here, it is already
> +		 * handled by drm_sched_get_cleanup_job
> +		 */
> +		list_add_tail(&s_job->list, &sched->pending_list);
> +	}
> +
>   	spin_unlock(&sched->job_list_lock);
>   }
>   
> @@ -693,17 +702,22 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
>   	if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
>   		/* remove job from pending_list */
>   		list_del_init(&job->list);
> +
>   		/* make the scheduled timestamp more accurate */
>   		next = list_first_entry_or_null(&sched->pending_list,
>   						typeof(*next), list);
> -		if (next)
> +		if (next) {
> +			/* if we still have job in pending list we need modify the TDR timer */
> +			mod_delayed_work(system_wq, &sched->work_tdr, sched->timeout);
>   			next->s_fence->scheduled.timestamp =
>   				job->s_fence->finished.timestamp;
> +		} else {
> +			/* cancel the TDR timer if no job in pending list */
> +			cancel_delayed_work(&sched->work_tdr);
> +		}
>   
>   	} else {
>   		job = NULL;
> -		/* queue timeout for next job */
> -		drm_sched_start_timeout(sched);
>   	}
>   
>   	spin_unlock(&sched->job_list_lock);
> @@ -791,11 +805,8 @@ static int drm_sched_main(void *param)
>   					  (entity = drm_sched_select_entity(sched))) ||
>   					 kthread_should_stop());
>   
> -		if (cleanup_job) {
> +		if (cleanup_job)
>   			sched->ops->free_job(cleanup_job);
> -			/* queue timeout for next job */
> -			drm_sched_start_timeout(sched);
> -		}
>   
>   		if (!entity)
>   			continue;

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/sched: fix the bug of time out calculation(v2)
  2021-08-25 12:01     ` Liu, Monk
@ 2021-08-25 12:11       ` Christian König
  2021-08-25 18:20         ` Andrey Grodzovsky
  2021-08-26  1:53         ` Liu, Monk
  0 siblings, 2 replies; 12+ messages in thread
From: Christian König @ 2021-08-25 12:11 UTC (permalink / raw)
  To: Liu, Monk, amd-gfx

No, this would break that logic here.

See drm_sched_start_timeout() can be called multiple times, this is 
intentional and very important!

The logic in queue_delayed_work() makes sure that the timer is only 
started once and then never again.

All we need to take care of is to cancel_delayed_work() when we know 
that the job is completed.

This here works as intended as far as I can see and if you start to use 
mod_delayed_work() you actually break it.

Regards,
Christian.

Am 25.08.21 um 14:01 schrieb Liu, Monk:
> [AMD Official Use Only]
>
> I think we should remove the cancel_delayed_work() in the beginning of the cleanup_job().
>
> Because by my patch the "mode_delayed_work" in cleanup_job is already doing its duty to retrigger the TO timer accordingly
>
> Thanks
>
> ------------------------------------------
> Monk Liu | Cloud-GPU Core team
> ------------------------------------------
>
> -----Original Message-----
> From: Liu, Monk
> Sent: Wednesday, August 25, 2021 7:55 PM
> To: 'Christian König' <ckoenig.leichtzumerken@gmail.com>; amd-gfx@lists.freedesktop.org
> Subject: RE: [PATCH] drm/sched: fix the bug of time out calculation(v2)
>
> [AMD Official Use Only]
>
>>> The timeout started by queue_delayed_work() in drm_sched_start_timeout() is paired with the cancel_delayed_work() in drm_sched_get_cleanup_job().
> No that's wrong, see that when we are in cleanup_job(), assume we do not have timeout on this sched (we are just keep submitting new jobs to this sched), Then the work_tdr is cancelled, and then we get the heading job, and let's assume the job is not signaled, then we run to the "queue timeout for next job" thus drm_sched_start_timeout() is called, so this heading job's TO timer is actually retriggered ... which is totally wrong.
>
> With my patch the timer is already retriggered after previous JOB really signaled.
>
> Can you be more specific on the incorrect part ?
>
> Thanks
> ------------------------------------------
> Monk Liu | Cloud-GPU Core team
> ------------------------------------------
>
> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Wednesday, August 25, 2021 2:32 PM
> To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/sched: fix the bug of time out calculation(v2)
>
> Well NAK to that approach. First of all your bug analyses is incorrect.
>
> The timeout started by queue_delayed_work() in drm_sched_start_timeout() is paired with the cancel_delayed_work() in drm_sched_get_cleanup_job().
>
> So you must have something else going on here.
>
> Then please don't use mod_delayed_work(), instead always cancel it and restart it.
>
> Regards,
> Christian.
>
> Am 25.08.21 um 06:14 schrieb Monk Liu:
>> the original logic is wrong that the timeout will not be retriggerd
>> after the previous job siganled, and that lead to the scenario that
>> all jobs in the same scheduler shares the same timeout timer from the
>> very begining job in this scheduler which is wrong.
>>
>> we should modify the timer everytime a previous job signaled.
>>
>> v2:
>> further cleanup the logic, and do the TDR timer cancelling if the
>> signaled job is the last one in its scheduler.
>>
>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
>> ---
>>    drivers/gpu/drm/scheduler/sched_main.c | 29 ++++++++++++++++++++---------
>>    1 file changed, 20 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
>> b/drivers/gpu/drm/scheduler/sched_main.c
>> index a2a9536..8c102ac 100644
>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>> @@ -305,8 +305,17 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job)
>>    	struct drm_gpu_scheduler *sched = s_job->sched;
>>    
>>    	spin_lock(&sched->job_list_lock);
>> -	list_add_tail(&s_job->list, &sched->pending_list);
>> -	drm_sched_start_timeout(sched);
>> +	if (list_empty(&sched->pending_list)) {
>> +		list_add_tail(&s_job->list, &sched->pending_list);
>> +		drm_sched_start_timeout(sched);
>> +	} else {
>> +		/* the old jobs in pending list are not finished yet
>> +		 * no need to restart TDR timer here, it is already
>> +		 * handled by drm_sched_get_cleanup_job
>> +		 */
>> +		list_add_tail(&s_job->list, &sched->pending_list);
>> +	}
>> +
>>    	spin_unlock(&sched->job_list_lock);
>>    }
>>    
>> @@ -693,17 +702,22 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
>>    	if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
>>    		/* remove job from pending_list */
>>    		list_del_init(&job->list);
>> +
>>    		/* make the scheduled timestamp more accurate */
>>    		next = list_first_entry_or_null(&sched->pending_list,
>>    						typeof(*next), list);
>> -		if (next)
>> +		if (next) {
>> +			/* if we still have job in pending list we need modify the TDR timer */
>> +			mod_delayed_work(system_wq, &sched->work_tdr, sched->timeout);
>>    			next->s_fence->scheduled.timestamp =
>>    				job->s_fence->finished.timestamp;
>> +		} else {
>> +			/* cancel the TDR timer if no job in pending list */
>> +			cancel_delayed_work(&sched->work_tdr);
>> +		}
>>    
>>    	} else {
>>    		job = NULL;
>> -		/* queue timeout for next job */
>> -		drm_sched_start_timeout(sched);
>>    	}
>>    
>>    	spin_unlock(&sched->job_list_lock);
>> @@ -791,11 +805,8 @@ static int drm_sched_main(void *param)
>>    					  (entity = drm_sched_select_entity(sched))) ||
>>    					 kthread_should_stop());
>>    
>> -		if (cleanup_job) {
>> +		if (cleanup_job)
>>    			sched->ops->free_job(cleanup_job);
>> -			/* queue timeout for next job */
>> -			drm_sched_start_timeout(sched);
>> -		}
>>    
>>    		if (!entity)
>>    			continue;


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/sched: fix the bug of time out calculation(v2)
  2021-08-25  4:14 [PATCH] drm/sched: fix the bug of time out calculation(v2) Monk Liu
  2021-08-25  6:31 ` Christian König
@ 2021-08-25 12:51 ` Alex Deucher
  1 sibling, 0 replies; 12+ messages in thread
From: Alex Deucher @ 2021-08-25 12:51 UTC (permalink / raw)
  To: Monk Liu; +Cc: amd-gfx list

Please cc dri-devel on all scheduler patches.  It's core functionality.

Alex

On Wed, Aug 25, 2021 at 12:14 AM Monk Liu <Monk.Liu@amd.com> wrote:
>
> the original logic is wrong that the timeout will not be retriggerd
> after the previous job siganled, and that lead to the scenario that all
> jobs in the same scheduler shares the same timeout timer from the very
> begining job in this scheduler which is wrong.
>
> we should modify the timer everytime a previous job signaled.
>
> v2:
> further cleanup the logic, and do the TDR timer cancelling if the signaled job
> is the last one in its scheduler.
>
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
>  drivers/gpu/drm/scheduler/sched_main.c | 29 ++++++++++++++++++++---------
>  1 file changed, 20 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index a2a9536..8c102ac 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -305,8 +305,17 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job)
>         struct drm_gpu_scheduler *sched = s_job->sched;
>
>         spin_lock(&sched->job_list_lock);
> -       list_add_tail(&s_job->list, &sched->pending_list);
> -       drm_sched_start_timeout(sched);
> +       if (list_empty(&sched->pending_list)) {
> +               list_add_tail(&s_job->list, &sched->pending_list);
> +               drm_sched_start_timeout(sched);
> +       } else {
> +               /* the old jobs in pending list are not finished yet
> +                * no need to restart TDR timer here, it is already
> +                * handled by drm_sched_get_cleanup_job
> +                */
> +               list_add_tail(&s_job->list, &sched->pending_list);
> +       }
> +
>         spin_unlock(&sched->job_list_lock);
>  }
>
> @@ -693,17 +702,22 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
>         if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
>                 /* remove job from pending_list */
>                 list_del_init(&job->list);
> +
>                 /* make the scheduled timestamp more accurate */
>                 next = list_first_entry_or_null(&sched->pending_list,
>                                                 typeof(*next), list);
> -               if (next)
> +               if (next) {
> +                       /* if we still have job in pending list we need modify the TDR timer */
> +                       mod_delayed_work(system_wq, &sched->work_tdr, sched->timeout);
>                         next->s_fence->scheduled.timestamp =
>                                 job->s_fence->finished.timestamp;
> +               } else {
> +                       /* cancel the TDR timer if no job in pending list */
> +                       cancel_delayed_work(&sched->work_tdr);
> +               }
>
>         } else {
>                 job = NULL;
> -               /* queue timeout for next job */
> -               drm_sched_start_timeout(sched);
>         }
>
>         spin_unlock(&sched->job_list_lock);
> @@ -791,11 +805,8 @@ static int drm_sched_main(void *param)
>                                           (entity = drm_sched_select_entity(sched))) ||
>                                          kthread_should_stop());
>
> -               if (cleanup_job) {
> +               if (cleanup_job)
>                         sched->ops->free_job(cleanup_job);
> -                       /* queue timeout for next job */
> -                       drm_sched_start_timeout(sched);
> -               }
>
>                 if (!entity)
>                         continue;
> --
> 2.7.4
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/sched: fix the bug of time out calculation(v2)
  2021-08-25 12:11       ` Christian König
@ 2021-08-25 18:20         ` Andrey Grodzovsky
  2021-08-26  2:31           ` Liu, Monk
  2021-08-26  1:53         ` Liu, Monk
  1 sibling, 1 reply; 12+ messages in thread
From: Andrey Grodzovsky @ 2021-08-25 18:20 UTC (permalink / raw)
  To: Christian König, Liu, Monk, amd-gfx, dri-devel


On 2021-08-25 8:11 a.m., Christian König wrote:
> No, this would break that logic here.
>
> See drm_sched_start_timeout() can be called multiple times, this is 
> intentional and very important!
>
> The logic in queue_delayed_work() makes sure that the timer is only 
> started once and then never again.
>
> All we need to take care of is to cancel_delayed_work() when we know 
> that the job is completed.


Seems to me you can only do it for empty pending list otherwise you risk 
cancelling a legit new timer that was started
by the next job or not restarting timer at all since your timer was 
still pending when next job tried to start it again (the common case).
For non empty pending list you have to adjust the currently active TDR's 
timer from your's
job TTL to TTL to the next job after you or just restart it as Monk does 
it here which prolongs the timeout more then required but still ok i guess.

What about returning to the old scheme of timer sched_work per job so 
each job has it's own timer and we don't share it and everything
is precise for each job, using the locking scheme we already have today 
the actual TDR handler will execute only once while
all the other arising from the guilty job hang will be rejected (for 
amdgpu, for other drivers it probably requires same locking or we can 
move this to the scheduler layer)

Andrey


>
> This here works as intended as far as I can see and if you start to 
> use mod_delayed_work() you actually break it.
>
> Regards,
> Christian.
>
> Am 25.08.21 um 14:01 schrieb Liu, Monk:
>> [AMD Official Use Only]
>>
>> I think we should remove the cancel_delayed_work() in the beginning 
>> of the cleanup_job().
>>
>> Because by my patch the "mode_delayed_work" in cleanup_job is already 
>> doing its duty to retrigger the TO timer accordingly
>>
>> Thanks
>>
>> ------------------------------------------
>> Monk Liu | Cloud-GPU Core team
>> ------------------------------------------
>>
>> -----Original Message-----
>> From: Liu, Monk
>> Sent: Wednesday, August 25, 2021 7:55 PM
>> To: 'Christian König' <ckoenig.leichtzumerken@gmail.com>; 
>> amd-gfx@lists.freedesktop.org
>> Subject: RE: [PATCH] drm/sched: fix the bug of time out calculation(v2)
>>
>> [AMD Official Use Only]
>>
>>>> The timeout started by queue_delayed_work() in 
>>>> drm_sched_start_timeout() is paired with the cancel_delayed_work() 
>>>> in drm_sched_get_cleanup_job().
>> No that's wrong, see that when we are in cleanup_job(), assume we do 
>> not have timeout on this sched (we are just keep submitting new jobs 
>> to this sched), Then the work_tdr is cancelled, and then we get the 
>> heading job, and let's assume the job is not signaled, then we run to 
>> the "queue timeout for next job" thus drm_sched_start_timeout() is 
>> called, so this heading job's TO timer is actually retriggered ... 
>> which is totally wrong.
>>
>> With my patch the timer is already retriggered after previous JOB 
>> really signaled.
>>
>> Can you be more specific on the incorrect part ?
>>
>> Thanks
>> ------------------------------------------
>> Monk Liu | Cloud-GPU Core team
>> ------------------------------------------
>>
>> -----Original Message-----
>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>> Sent: Wednesday, August 25, 2021 2:32 PM
>> To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH] drm/sched: fix the bug of time out calculation(v2)
>>
>> Well NAK to that approach. First of all your bug analyses is incorrect.
>>
>> The timeout started by queue_delayed_work() in 
>> drm_sched_start_timeout() is paired with the cancel_delayed_work() in 
>> drm_sched_get_cleanup_job().
>>
>> So you must have something else going on here.
>>
>> Then please don't use mod_delayed_work(), instead always cancel it 
>> and restart it.
>>
>> Regards,
>> Christian.
>>
>> Am 25.08.21 um 06:14 schrieb Monk Liu:
>>> the original logic is wrong that the timeout will not be retriggerd
>>> after the previous job siganled, and that lead to the scenario that
>>> all jobs in the same scheduler shares the same timeout timer from the
>>> very begining job in this scheduler which is wrong.
>>>
>>> we should modify the timer everytime a previous job signaled.
>>>
>>> v2:
>>> further cleanup the logic, and do the TDR timer cancelling if the
>>> signaled job is the last one in its scheduler.
>>>
>>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
>>> ---
>>>    drivers/gpu/drm/scheduler/sched_main.c | 29 
>>> ++++++++++++++++++++---------
>>>    1 file changed, 20 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
>>> b/drivers/gpu/drm/scheduler/sched_main.c
>>> index a2a9536..8c102ac 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>> @@ -305,8 +305,17 @@ static void drm_sched_job_begin(struct 
>>> drm_sched_job *s_job)
>>>        struct drm_gpu_scheduler *sched = s_job->sched;
>>>           spin_lock(&sched->job_list_lock);
>>> -    list_add_tail(&s_job->list, &sched->pending_list);
>>> -    drm_sched_start_timeout(sched);
>>> +    if (list_empty(&sched->pending_list)) {
>>> +        list_add_tail(&s_job->list, &sched->pending_list);
>>> +        drm_sched_start_timeout(sched);
>>> +    } else {
>>> +        /* the old jobs in pending list are not finished yet
>>> +         * no need to restart TDR timer here, it is already
>>> +         * handled by drm_sched_get_cleanup_job
>>> +         */
>>> +        list_add_tail(&s_job->list, &sched->pending_list);
>>> +    }
>>> +
>>>        spin_unlock(&sched->job_list_lock);
>>>    }
>>>    @@ -693,17 +702,22 @@ drm_sched_get_cleanup_job(struct 
>>> drm_gpu_scheduler *sched)
>>>        if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
>>>            /* remove job from pending_list */
>>>            list_del_init(&job->list);
>>> +
>>>            /* make the scheduled timestamp more accurate */
>>>            next = list_first_entry_or_null(&sched->pending_list,
>>>                            typeof(*next), list);
>>> -        if (next)
>>> +        if (next) {
>>> +            /* if we still have job in pending list we need modify 
>>> the TDR timer */
>>> +            mod_delayed_work(system_wq, &sched->work_tdr, 
>>> sched->timeout);
>>>                next->s_fence->scheduled.timestamp =
>>>                    job->s_fence->finished.timestamp;
>>> +        } else {
>>> +            /* cancel the TDR timer if no job in pending list */
>>> +            cancel_delayed_work(&sched->work_tdr);
>>> +        }
>>>           } else {
>>>            job = NULL;
>>> -        /* queue timeout for next job */
>>> -        drm_sched_start_timeout(sched);
>>>        }
>>>           spin_unlock(&sched->job_list_lock);
>>> @@ -791,11 +805,8 @@ static int drm_sched_main(void *param)
>>>                          (entity = drm_sched_select_entity(sched))) ||
>>>                         kthread_should_stop());
>>>    -        if (cleanup_job) {
>>> +        if (cleanup_job)
>>>                sched->ops->free_job(cleanup_job);
>>> -            /* queue timeout for next job */
>>> -            drm_sched_start_timeout(sched);
>>> -        }
>>>               if (!entity)
>>>                continue;
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH] drm/sched: fix the bug of time out calculation(v2)
  2021-08-25 12:11       ` Christian König
  2021-08-25 18:20         ` Andrey Grodzovsky
@ 2021-08-26  1:53         ` Liu, Monk
  1 sibling, 0 replies; 12+ messages in thread
From: Liu, Monk @ 2021-08-26  1:53 UTC (permalink / raw)
  To: Christian König, amd-gfx; +Cc: DRI Development

[AMD Official Use Only]

>> All we need to take care of is to cancel_delayed_work() when we know that the job is completed.

That's why I want to remove the cancel_delayed_work in the beginning of cleanup_job(), because in that moment we don't know if
There is a job completed (sched could be wake up due to new submit, instead of a job signaled) , until we get the job and acknowledged of its signaling.



static struct drm_sched_job *
drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
{
	struct drm_sched_job *job, *next;

	/*
	 * Don't destroy jobs while the timeout worker is running  OR thread
	 * is being parked and hence assumed to not touch pending_list
	 */
	if ((sched->timeout != MAX_SCHEDULE_TIMEOUT &&
	    !cancel_delayed_work(&sched->work_tdr)) || //normally if the job is not TO, then he cancel here is incorrect if the job is still running , 
	    kthread_should_park())
		return NULL;

	spin_lock(&sched->job_list_lock);

	job = list_first_entry_or_null(&sched->pending_list,
				       struct drm_sched_job, list);

	if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
		/* remove job from pending_list */
		list_del_init(&job->list);
		/* make the scheduled timestamp more accurate */
		next = list_first_entry_or_null(&sched->pending_list,
						typeof(*next), list);
		if (next)
			next->s_fence->scheduled.timestamp =
				job->s_fence->finished.timestamp;

	} else {
		job = NULL;
		/* queue timeout for next job */
		drm_sched_start_timeout(sched); //if the job is not signaled, the timer will be retriggered here (counting is restarted ....) , which is wrong .... 
	}

	spin_unlock(&sched->job_list_lock);

	return job;
}



>> This here works as intended as far as I can see and if you start to use mod_delayed_work() you actually break it.
Only in the place we find heading job is signaled and there is a next job is the moment that we should cancel the work_tdr for this scheduler , of cause with 
A new work_tdr queued as the "next" job is already started on HW... that's why I use mod_delayed_work. But I can change it to "cancel and queue" approach if you have concern.


Thanks 

------------------------------------------
Monk Liu | Cloud-GPU Core team
------------------------------------------

-----Original Message-----
From: Christian König <ckoenig.leichtzumerken@gmail.com> 
Sent: Wednesday, August 25, 2021 8:11 PM
To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/sched: fix the bug of time out calculation(v2)

No, this would break that logic here.

See drm_sched_start_timeout() can be called multiple times, this is intentional and very important!

The logic in queue_delayed_work() makes sure that the timer is only started once and then never again.

All we need to take care of is to cancel_delayed_work() when we know that the job is completed.

This here works as intended as far as I can see and if you start to use
mod_delayed_work() you actually break it.

Regards,
Christian.

Am 25.08.21 um 14:01 schrieb Liu, Monk:
> [AMD Official Use Only]
>
> I think we should remove the cancel_delayed_work() in the beginning of the cleanup_job().
>
> Because by my patch the "mode_delayed_work" in cleanup_job is already 
> doing its duty to retrigger the TO timer accordingly
>
> Thanks
>
> ------------------------------------------
> Monk Liu | Cloud-GPU Core team
> ------------------------------------------
>
> -----Original Message-----
> From: Liu, Monk
> Sent: Wednesday, August 25, 2021 7:55 PM
> To: 'Christian König' <ckoenig.leichtzumerken@gmail.com>; 
> amd-gfx@lists.freedesktop.org
> Subject: RE: [PATCH] drm/sched: fix the bug of time out 
> calculation(v2)
>
> [AMD Official Use Only]
>
>>> The timeout started by queue_delayed_work() in drm_sched_start_timeout() is paired with the cancel_delayed_work() in drm_sched_get_cleanup_job().
> No that's wrong, see that when we are in cleanup_job(), assume we do not have timeout on this sched (we are just keep submitting new jobs to this sched), Then the work_tdr is cancelled, and then we get the heading job, and let's assume the job is not signaled, then we run to the "queue timeout for next job" thus drm_sched_start_timeout() is called, so this heading job's TO timer is actually retriggered ... which is totally wrong.
>
> With my patch the timer is already retriggered after previous JOB really signaled.
>
> Can you be more specific on the incorrect part ?
>
> Thanks
> ------------------------------------------
> Monk Liu | Cloud-GPU Core team
> ------------------------------------------
>
> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Wednesday, August 25, 2021 2:32 PM
> To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/sched: fix the bug of time out 
> calculation(v2)
>
> Well NAK to that approach. First of all your bug analyses is incorrect.
>
> The timeout started by queue_delayed_work() in drm_sched_start_timeout() is paired with the cancel_delayed_work() in drm_sched_get_cleanup_job().
>
> So you must have something else going on here.
>
> Then please don't use mod_delayed_work(), instead always cancel it and restart it.
>
> Regards,
> Christian.
>
> Am 25.08.21 um 06:14 schrieb Monk Liu:
>> the original logic is wrong that the timeout will not be retriggerd 
>> after the previous job siganled, and that lead to the scenario that 
>> all jobs in the same scheduler shares the same timeout timer from the 
>> very begining job in this scheduler which is wrong.
>>
>> we should modify the timer everytime a previous job signaled.
>>
>> v2:
>> further cleanup the logic, and do the TDR timer cancelling if the 
>> signaled job is the last one in its scheduler.
>>
>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
>> ---
>>    drivers/gpu/drm/scheduler/sched_main.c | 29 ++++++++++++++++++++---------
>>    1 file changed, 20 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
>> b/drivers/gpu/drm/scheduler/sched_main.c
>> index a2a9536..8c102ac 100644
>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>> @@ -305,8 +305,17 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job)
>>    	struct drm_gpu_scheduler *sched = s_job->sched;
>>    
>>    	spin_lock(&sched->job_list_lock);
>> -	list_add_tail(&s_job->list, &sched->pending_list);
>> -	drm_sched_start_timeout(sched);
>> +	if (list_empty(&sched->pending_list)) {
>> +		list_add_tail(&s_job->list, &sched->pending_list);
>> +		drm_sched_start_timeout(sched);
>> +	} else {
>> +		/* the old jobs in pending list are not finished yet
>> +		 * no need to restart TDR timer here, it is already
>> +		 * handled by drm_sched_get_cleanup_job
>> +		 */
>> +		list_add_tail(&s_job->list, &sched->pending_list);
>> +	}
>> +
>>    	spin_unlock(&sched->job_list_lock);
>>    }
>>    
>> @@ -693,17 +702,22 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
>>    	if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
>>    		/* remove job from pending_list */
>>    		list_del_init(&job->list);
>> +
>>    		/* make the scheduled timestamp more accurate */
>>    		next = list_first_entry_or_null(&sched->pending_list,
>>    						typeof(*next), list);
>> -		if (next)
>> +		if (next) {
>> +			/* if we still have job in pending list we need modify the TDR timer */
>> +			mod_delayed_work(system_wq, &sched->work_tdr, sched->timeout);
>>    			next->s_fence->scheduled.timestamp =
>>    				job->s_fence->finished.timestamp;
>> +		} else {
>> +			/* cancel the TDR timer if no job in pending list */
>> +			cancel_delayed_work(&sched->work_tdr);
>> +		}
>>    
>>    	} else {
>>    		job = NULL;
>> -		/* queue timeout for next job */
>> -		drm_sched_start_timeout(sched);
>>    	}
>>    
>>    	spin_unlock(&sched->job_list_lock);
>> @@ -791,11 +805,8 @@ static int drm_sched_main(void *param)
>>    					  (entity = drm_sched_select_entity(sched))) ||
>>    					 kthread_should_stop());
>>    
>> -		if (cleanup_job) {
>> +		if (cleanup_job)
>>    			sched->ops->free_job(cleanup_job);
>> -			/* queue timeout for next job */
>> -			drm_sched_start_timeout(sched);
>> -		}
>>    
>>    		if (!entity)
>>    			continue;

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH] drm/sched: fix the bug of time out calculation(v2)
  2021-08-25 18:20         ` Andrey Grodzovsky
@ 2021-08-26  2:31           ` Liu, Monk
  2021-08-26  3:05             ` Andrey Grodzovsky
  0 siblings, 1 reply; 12+ messages in thread
From: Liu, Monk @ 2021-08-26  2:31 UTC (permalink / raw)
  To: Grodzovsky, Andrey, Christian König, amd-gfx, dri-devel

[AMD Official Use Only]

Hi Andrey

I'm not quite sure if I read you correctly

>>Seems to me you can only do it for empty pending list otherwise you risk cancelling a legit new timer that was started by the next job or not restarting timer at all since your timer was still pending when next job tried to start it again (the common case).

I don't understand above sentence, from my understanding for the common case,  if the timer is pending, the cancel_delay_work in beginning will cancel it and then we will get to the line of "queue timeout for next job" since the heading job is not signaled (align with the timer is pending), then the timer will be restarted (for the next job)

And above sequence is actually wrong to me, because we cancelled a pending timer and restart the timer for the scheduler that its heading job is still running there, the whole counting is repeated from zero and inaccurate at all 

Thanks 

------------------------------------------
Monk Liu | Cloud-GPU Core team
------------------------------------------

-----Original Message-----
From: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com> 
Sent: Thursday, August 26, 2021 2:20 AM
To: Christian König <ckoenig.leichtzumerken@gmail.com>; Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org; dri-devel <dri-devel@lists.freedesktop.org>
Subject: Re: [PATCH] drm/sched: fix the bug of time out calculation(v2)


On 2021-08-25 8:11 a.m., Christian König wrote:
> No, this would break that logic here.
>
> See drm_sched_start_timeout() can be called multiple times, this is 
> intentional and very important!
>
> The logic in queue_delayed_work() makes sure that the timer is only 
> started once and then never again.
>
> All we need to take care of is to cancel_delayed_work() when we know 
> that the job is completed.


Seems to me you can only do it for empty pending list otherwise you risk cancelling a legit new timer that was started by the next job or not restarting timer at all since your timer was still pending when next job tried to start it again (the common case).
For non empty pending list you have to adjust the currently active TDR's timer from your's job TTL to TTL to the next job after you or just restart it as Monk does it here which prolongs the timeout more then required but still ok i guess.

What about returning to the old scheme of timer sched_work per job so each job has it's own timer and we don't share it and everything is precise for each job, using the locking scheme we already have today the actual TDR handler will execute only once while all the other arising from the guilty job hang will be rejected (for amdgpu, for other drivers it probably requires same locking or we can move this to the scheduler layer)

Andrey


>
> This here works as intended as far as I can see and if you start to 
> use mod_delayed_work() you actually break it.
>
> Regards,
> Christian.
>
> Am 25.08.21 um 14:01 schrieb Liu, Monk:
>> [AMD Official Use Only]
>>
>> I think we should remove the cancel_delayed_work() in the beginning 
>> of the cleanup_job().
>>
>> Because by my patch the "mode_delayed_work" in cleanup_job is already 
>> doing its duty to retrigger the TO timer accordingly
>>
>> Thanks
>>
>> ------------------------------------------
>> Monk Liu | Cloud-GPU Core team
>> ------------------------------------------
>>
>> -----Original Message-----
>> From: Liu, Monk
>> Sent: Wednesday, August 25, 2021 7:55 PM
>> To: 'Christian König' <ckoenig.leichtzumerken@gmail.com>;
>> amd-gfx@lists.freedesktop.org
>> Subject: RE: [PATCH] drm/sched: fix the bug of time out 
>> calculation(v2)
>>
>> [AMD Official Use Only]
>>
>>>> The timeout started by queue_delayed_work() in
>>>> drm_sched_start_timeout() is paired with the cancel_delayed_work() 
>>>> in drm_sched_get_cleanup_job().
>> No that's wrong, see that when we are in cleanup_job(), assume we do 
>> not have timeout on this sched (we are just keep submitting new jobs 
>> to this sched), Then the work_tdr is cancelled, and then we get the 
>> heading job, and let's assume the job is not signaled, then we run to 
>> the "queue timeout for next job" thus drm_sched_start_timeout() is 
>> called, so this heading job's TO timer is actually retriggered ...
>> which is totally wrong.
>>
>> With my patch the timer is already retriggered after previous JOB 
>> really signaled.
>>
>> Can you be more specific on the incorrect part ?
>>
>> Thanks
>> ------------------------------------------
>> Monk Liu | Cloud-GPU Core team
>> ------------------------------------------
>>
>> -----Original Message-----
>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>> Sent: Wednesday, August 25, 2021 2:32 PM
>> To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH] drm/sched: fix the bug of time out 
>> calculation(v2)
>>
>> Well NAK to that approach. First of all your bug analyses is incorrect.
>>
>> The timeout started by queue_delayed_work() in
>> drm_sched_start_timeout() is paired with the cancel_delayed_work() in 
>> drm_sched_get_cleanup_job().
>>
>> So you must have something else going on here.
>>
>> Then please don't use mod_delayed_work(), instead always cancel it 
>> and restart it.
>>
>> Regards,
>> Christian.
>>
>> Am 25.08.21 um 06:14 schrieb Monk Liu:
>>> the original logic is wrong that the timeout will not be retriggerd 
>>> after the previous job siganled, and that lead to the scenario that 
>>> all jobs in the same scheduler shares the same timeout timer from 
>>> the very begining job in this scheduler which is wrong.
>>>
>>> we should modify the timer everytime a previous job signaled.
>>>
>>> v2:
>>> further cleanup the logic, and do the TDR timer cancelling if the 
>>> signaled job is the last one in its scheduler.
>>>
>>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
>>> ---
>>>    drivers/gpu/drm/scheduler/sched_main.c | 29
>>> ++++++++++++++++++++---------
>>>    1 file changed, 20 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
>>> b/drivers/gpu/drm/scheduler/sched_main.c
>>> index a2a9536..8c102ac 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>> @@ -305,8 +305,17 @@ static void drm_sched_job_begin(struct 
>>> drm_sched_job *s_job)
>>>        struct drm_gpu_scheduler *sched = s_job->sched;
>>>           spin_lock(&sched->job_list_lock);
>>> -    list_add_tail(&s_job->list, &sched->pending_list);
>>> -    drm_sched_start_timeout(sched);
>>> +    if (list_empty(&sched->pending_list)) {
>>> +        list_add_tail(&s_job->list, &sched->pending_list);
>>> +        drm_sched_start_timeout(sched);
>>> +    } else {
>>> +        /* the old jobs in pending list are not finished yet
>>> +         * no need to restart TDR timer here, it is already
>>> +         * handled by drm_sched_get_cleanup_job
>>> +         */
>>> +        list_add_tail(&s_job->list, &sched->pending_list);
>>> +    }
>>> +
>>>        spin_unlock(&sched->job_list_lock);
>>>    }
>>>    @@ -693,17 +702,22 @@ drm_sched_get_cleanup_job(struct 
>>> drm_gpu_scheduler *sched)
>>>        if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
>>>            /* remove job from pending_list */
>>>            list_del_init(&job->list);
>>> +
>>>            /* make the scheduled timestamp more accurate */
>>>            next = list_first_entry_or_null(&sched->pending_list,
>>>                            typeof(*next), list);
>>> -        if (next)
>>> +        if (next) {
>>> +            /* if we still have job in pending list we need modify
>>> the TDR timer */
>>> +            mod_delayed_work(system_wq, &sched->work_tdr,
>>> sched->timeout);
>>>                next->s_fence->scheduled.timestamp =
>>>                    job->s_fence->finished.timestamp;
>>> +        } else {
>>> +            /* cancel the TDR timer if no job in pending list */
>>> +            cancel_delayed_work(&sched->work_tdr);
>>> +        }
>>>           } else {
>>>            job = NULL;
>>> -        /* queue timeout for next job */
>>> -        drm_sched_start_timeout(sched);
>>>        }
>>>           spin_unlock(&sched->job_list_lock);
>>> @@ -791,11 +805,8 @@ static int drm_sched_main(void *param)
>>>                          (entity = drm_sched_select_entity(sched))) 
>>> ||
>>>                         kthread_should_stop());
>>>    -        if (cleanup_job) {
>>> +        if (cleanup_job)
>>>                sched->ops->free_job(cleanup_job);
>>> -            /* queue timeout for next job */
>>> -            drm_sched_start_timeout(sched);
>>> -        }
>>>               if (!entity)
>>>                continue;
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/sched: fix the bug of time out calculation(v2)
  2021-08-26  2:31           ` Liu, Monk
@ 2021-08-26  3:05             ` Andrey Grodzovsky
  2021-08-26  4:55               ` Liu, Monk
  0 siblings, 1 reply; 12+ messages in thread
From: Andrey Grodzovsky @ 2021-08-26  3:05 UTC (permalink / raw)
  To: Liu, Monk, Christian König, amd-gfx, dri-devel


On 2021-08-25 10:31 p.m., Liu, Monk wrote:
> [AMD Official Use Only]
>
> Hi Andrey
>
> I'm not quite sure if I read you correctly
>
>>> Seems to me you can only do it for empty pending list otherwise you risk cancelling a legit new timer that was started by the next job or not restarting timer at all since your timer was still pending when next job tried to start it again (the common case).
> I don't understand above sentence, from my understanding for the common case,  if the timer is pending, the cancel_delay_work in beginning will cancel it and then we will get to the line of "queue timeout for next job" since the heading job is not signaled (align with the timer is pending), then the timer will be restarted (for the next job)


Ignore it, i realized from looking that i missed the timer restart in 
then end of drm_sched_get_cleanup_job or the alternative one in 
drm_sched_main


>
> And above sequence is actually wrong to me, because we cancelled a pending timer and restart the timer for the scheduler that its heading job is still running there, the whole counting is repeated from zero and inaccurate at all


But  for timer pending case (common case) your mod_delayed_work will 
effectively do exactly the same if you don't use per job TTLs - you mod 
it to  sched->timeout value which resets the pending timer
to again count from 0.

I just wonder why we stopped using per job TDR timers in the first place 
? Isn't the simplest way to count accurate timeouts for each job is to 
actually measure the timeouts for each job separately ?

Andrey


>   
>
> Thanks

>
> ------------------------------------------
> Monk Liu | Cloud-GPU Core team
> ------------------------------------------
>
> -----Original Message-----
> From: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
> Sent: Thursday, August 26, 2021 2:20 AM
> To: Christian König <ckoenig.leichtzumerken@gmail.com>; Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org; dri-devel <dri-devel@lists.freedesktop.org>
> Subject: Re: [PATCH] drm/sched: fix the bug of time out calculation(v2)
>
>
> On 2021-08-25 8:11 a.m., Christian König wrote:
>> No, this would break that logic here.
>>
>> See drm_sched_start_timeout() can be called multiple times, this is
>> intentional and very important!
>>
>> The logic in queue_delayed_work() makes sure that the timer is only
>> started once and then never again.
>>
>> All we need to take care of is to cancel_delayed_work() when we know
>> that the job is completed.
>
> Seems to me you can only do it for empty pending list otherwise you risk cancelling a legit new timer that was started by the next job or not restarting timer at all since your timer was still pending when next job tried to start it again (the common case).
> For non empty pending list you have to adjust the currently active TDR's timer from your's job TTL to TTL to the next job after you or just restart it as Monk does it here which prolongs the timeout more then required but still ok i guess.
>
> What about returning to the old scheme of timer sched_work per job so each job has it's own timer and we don't share it and everything is precise for each job, using the locking scheme we already have today the actual TDR handler will execute only once while all the other arising from the guilty job hang will be rejected (for amdgpu, for other drivers it probably requires same locking or we can move this to the scheduler layer)
>
> Andrey
>
>
>> This here works as intended as far as I can see and if you start to
>> use mod_delayed_work() you actually break it.
>>
>> Regards,
>> Christian.
>>
>> Am 25.08.21 um 14:01 schrieb Liu, Monk:
>>> [AMD Official Use Only]
>>>
>>> I think we should remove the cancel_delayed_work() in the beginning
>>> of the cleanup_job().
>>>
>>> Because by my patch the "mode_delayed_work" in cleanup_job is already
>>> doing its duty to retrigger the TO timer accordingly
>>>
>>> Thanks
>>>
>>> ------------------------------------------
>>> Monk Liu | Cloud-GPU Core team
>>> ------------------------------------------
>>>
>>> -----Original Message-----
>>> From: Liu, Monk
>>> Sent: Wednesday, August 25, 2021 7:55 PM
>>> To: 'Christian König' <ckoenig.leichtzumerken@gmail.com>;
>>> amd-gfx@lists.freedesktop.org
>>> Subject: RE: [PATCH] drm/sched: fix the bug of time out
>>> calculation(v2)
>>>
>>> [AMD Official Use Only]
>>>
>>>>> The timeout started by queue_delayed_work() in
>>>>> drm_sched_start_timeout() is paired with the cancel_delayed_work()
>>>>> in drm_sched_get_cleanup_job().
>>> No that's wrong, see that when we are in cleanup_job(), assume we do
>>> not have timeout on this sched (we are just keep submitting new jobs
>>> to this sched), Then the work_tdr is cancelled, and then we get the
>>> heading job, and let's assume the job is not signaled, then we run to
>>> the "queue timeout for next job" thus drm_sched_start_timeout() is
>>> called, so this heading job's TO timer is actually retriggered ...
>>> which is totally wrong.
>>>
>>> With my patch the timer is already retriggered after previous JOB
>>> really signaled.
>>>
>>> Can you be more specific on the incorrect part ?
>>>
>>> Thanks
>>> ------------------------------------------
>>> Monk Liu | Cloud-GPU Core team
>>> ------------------------------------------
>>>
>>> -----Original Message-----
>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>> Sent: Wednesday, August 25, 2021 2:32 PM
>>> To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
>>> Subject: Re: [PATCH] drm/sched: fix the bug of time out
>>> calculation(v2)
>>>
>>> Well NAK to that approach. First of all your bug analyses is incorrect.
>>>
>>> The timeout started by queue_delayed_work() in
>>> drm_sched_start_timeout() is paired with the cancel_delayed_work() in
>>> drm_sched_get_cleanup_job().
>>>
>>> So you must have something else going on here.
>>>
>>> Then please don't use mod_delayed_work(), instead always cancel it
>>> and restart it.
>>>
>>> Regards,
>>> Christian.
>>>
>>> Am 25.08.21 um 06:14 schrieb Monk Liu:
>>>> the original logic is wrong that the timeout will not be retriggerd
>>>> after the previous job siganled, and that lead to the scenario that
>>>> all jobs in the same scheduler shares the same timeout timer from
>>>> the very begining job in this scheduler which is wrong.
>>>>
>>>> we should modify the timer everytime a previous job signaled.
>>>>
>>>> v2:
>>>> further cleanup the logic, and do the TDR timer cancelling if the
>>>> signaled job is the last one in its scheduler.
>>>>
>>>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
>>>> ---
>>>>     drivers/gpu/drm/scheduler/sched_main.c | 29
>>>> ++++++++++++++++++++---------
>>>>     1 file changed, 20 insertions(+), 9 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
>>>> b/drivers/gpu/drm/scheduler/sched_main.c
>>>> index a2a9536..8c102ac 100644
>>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>>> @@ -305,8 +305,17 @@ static void drm_sched_job_begin(struct
>>>> drm_sched_job *s_job)
>>>>         struct drm_gpu_scheduler *sched = s_job->sched;
>>>>            spin_lock(&sched->job_list_lock);
>>>> -    list_add_tail(&s_job->list, &sched->pending_list);
>>>> -    drm_sched_start_timeout(sched);
>>>> +    if (list_empty(&sched->pending_list)) {
>>>> +        list_add_tail(&s_job->list, &sched->pending_list);
>>>> +        drm_sched_start_timeout(sched);
>>>> +    } else {
>>>> +        /* the old jobs in pending list are not finished yet
>>>> +         * no need to restart TDR timer here, it is already
>>>> +         * handled by drm_sched_get_cleanup_job
>>>> +         */
>>>> +        list_add_tail(&s_job->list, &sched->pending_list);
>>>> +    }
>>>> +
>>>>         spin_unlock(&sched->job_list_lock);
>>>>     }
>>>>     @@ -693,17 +702,22 @@ drm_sched_get_cleanup_job(struct
>>>> drm_gpu_scheduler *sched)
>>>>         if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
>>>>             /* remove job from pending_list */
>>>>             list_del_init(&job->list);
>>>> +
>>>>             /* make the scheduled timestamp more accurate */
>>>>             next = list_first_entry_or_null(&sched->pending_list,
>>>>                             typeof(*next), list);
>>>> -        if (next)
>>>> +        if (next) {
>>>> +            /* if we still have job in pending list we need modify
>>>> the TDR timer */
>>>> +            mod_delayed_work(system_wq, &sched->work_tdr,
>>>> sched->timeout);
>>>>                 next->s_fence->scheduled.timestamp =
>>>>                     job->s_fence->finished.timestamp;
>>>> +        } else {
>>>> +            /* cancel the TDR timer if no job in pending list */
>>>> +            cancel_delayed_work(&sched->work_tdr);
>>>> +        }
>>>>            } else {
>>>>             job = NULL;
>>>> -        /* queue timeout for next job */
>>>> -        drm_sched_start_timeout(sched);
>>>>         }
>>>>            spin_unlock(&sched->job_list_lock);
>>>> @@ -791,11 +805,8 @@ static int drm_sched_main(void *param)
>>>>                           (entity = drm_sched_select_entity(sched)))
>>>> ||
>>>>                          kthread_should_stop());
>>>>     -        if (cleanup_job) {
>>>> +        if (cleanup_job)
>>>>                 sched->ops->free_job(cleanup_job);
>>>> -            /* queue timeout for next job */
>>>> -            drm_sched_start_timeout(sched);
>>>> -        }
>>>>                if (!entity)
>>>>                 continue;

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH] drm/sched: fix the bug of time out calculation(v2)
  2021-08-26  3:05             ` Andrey Grodzovsky
@ 2021-08-26  4:55               ` Liu, Monk
  2021-08-26  5:44                 ` Andrey Grodzovsky
  0 siblings, 1 reply; 12+ messages in thread
From: Liu, Monk @ 2021-08-26  4:55 UTC (permalink / raw)
  To: Grodzovsky, Andrey, Christian König, amd-gfx, dri-devel

[AMD Official Use Only]

>>But  for timer pending case (common case) your mod_delayed_work will effectively do exactly the same if you don't use per job TTLs - you mod it to  sched->timeout value which resets the pending timer to again count from 0.

Ny patch will only modify the timer (restart it , actually) when the heading job is signaled, which means on HW ring the next job is just about start processing.
If the job is not signaled (your common case) the timer is still not touched at all ... 

>> I just wonder why we stopped using per job TDR timers in the first place ? Isn't the simplest way to count accurate timeouts for each job is to actually measure the timeouts for each job separately ?

I'm not sure if Christian can recall something, and I believe it is due to some limitations we found (or some race issue like two job on the same scheduler TO in the same time, which is probably if they are scheduled to the ring almost in the same timeframe)

Anyway I have a V3 version patch, please take a look, it looks working for me 

Thanks 

------------------------------------------
Monk Liu | Cloud-GPU Core team
------------------------------------------

-----Original Message-----
From: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com> 
Sent: Thursday, August 26, 2021 11:05 AM
To: Liu, Monk <Monk.Liu@amd.com>; Christian König <ckoenig.leichtzumerken@gmail.com>; amd-gfx@lists.freedesktop.org; dri-devel <dri-devel@lists.freedesktop.org>
Subject: Re: [PATCH] drm/sched: fix the bug of time out calculation(v2)


On 2021-08-25 10:31 p.m., Liu, Monk wrote:
> [AMD Official Use Only]
>
> Hi Andrey
>
> I'm not quite sure if I read you correctly
>
>>> Seems to me you can only do it for empty pending list otherwise you risk cancelling a legit new timer that was started by the next job or not restarting timer at all since your timer was still pending when next job tried to start it again (the common case).
> I don't understand above sentence, from my understanding for the 
> common case,  if the timer is pending, the cancel_delay_work in 
> beginning will cancel it and then we will get to the line of "queue 
> timeout for next job" since the heading job is not signaled (align 
> with the timer is pending), then the timer will be restarted (for the 
> next job)


Ignore it, i realized from looking that i missed the timer restart in then end of drm_sched_get_cleanup_job or the alternative one in drm_sched_main


>
> And above sequence is actually wrong to me, because we cancelled a 
> pending timer and restart the timer for the scheduler that its heading 
> job is still running there, the whole counting is repeated from zero 
> and inaccurate at all


But  for timer pending case (common case) your mod_delayed_work will effectively do exactly the same if you don't use per job TTLs - you mod it to  sched->timeout value which resets the pending timer to again count from 0.

I just wonder why we stopped using per job TDR timers in the first place ? Isn't the simplest way to count accurate timeouts for each job is to actually measure the timeouts for each job separately ?

Andrey


>   
>
> Thanks

>
> ------------------------------------------
> Monk Liu | Cloud-GPU Core team
> ------------------------------------------
>
> -----Original Message-----
> From: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
> Sent: Thursday, August 26, 2021 2:20 AM
> To: Christian König <ckoenig.leichtzumerken@gmail.com>; Liu, Monk 
> <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org; dri-devel 
> <dri-devel@lists.freedesktop.org>
> Subject: Re: [PATCH] drm/sched: fix the bug of time out 
> calculation(v2)
>
>
> On 2021-08-25 8:11 a.m., Christian König wrote:
>> No, this would break that logic here.
>>
>> See drm_sched_start_timeout() can be called multiple times, this is 
>> intentional and very important!
>>
>> The logic in queue_delayed_work() makes sure that the timer is only 
>> started once and then never again.
>>
>> All we need to take care of is to cancel_delayed_work() when we know 
>> that the job is completed.
>
> Seems to me you can only do it for empty pending list otherwise you risk cancelling a legit new timer that was started by the next job or not restarting timer at all since your timer was still pending when next job tried to start it again (the common case).
> For non empty pending list you have to adjust the currently active TDR's timer from your's job TTL to TTL to the next job after you or just restart it as Monk does it here which prolongs the timeout more then required but still ok i guess.
>
> What about returning to the old scheme of timer sched_work per job so 
> each job has it's own timer and we don't share it and everything is 
> precise for each job, using the locking scheme we already have today 
> the actual TDR handler will execute only once while all the other 
> arising from the guilty job hang will be rejected (for amdgpu, for 
> other drivers it probably requires same locking or we can move this to 
> the scheduler layer)
>
> Andrey
>
>
>> This here works as intended as far as I can see and if you start to 
>> use mod_delayed_work() you actually break it.
>>
>> Regards,
>> Christian.
>>
>> Am 25.08.21 um 14:01 schrieb Liu, Monk:
>>> [AMD Official Use Only]
>>>
>>> I think we should remove the cancel_delayed_work() in the beginning 
>>> of the cleanup_job().
>>>
>>> Because by my patch the "mode_delayed_work" in cleanup_job is 
>>> already doing its duty to retrigger the TO timer accordingly
>>>
>>> Thanks
>>>
>>> ------------------------------------------
>>> Monk Liu | Cloud-GPU Core team
>>> ------------------------------------------
>>>
>>> -----Original Message-----
>>> From: Liu, Monk
>>> Sent: Wednesday, August 25, 2021 7:55 PM
>>> To: 'Christian König' <ckoenig.leichtzumerken@gmail.com>;
>>> amd-gfx@lists.freedesktop.org
>>> Subject: RE: [PATCH] drm/sched: fix the bug of time out
>>> calculation(v2)
>>>
>>> [AMD Official Use Only]
>>>
>>>>> The timeout started by queue_delayed_work() in
>>>>> drm_sched_start_timeout() is paired with the cancel_delayed_work() 
>>>>> in drm_sched_get_cleanup_job().
>>> No that's wrong, see that when we are in cleanup_job(), assume we do 
>>> not have timeout on this sched (we are just keep submitting new jobs 
>>> to this sched), Then the work_tdr is cancelled, and then we get the 
>>> heading job, and let's assume the job is not signaled, then we run 
>>> to the "queue timeout for next job" thus drm_sched_start_timeout() 
>>> is called, so this heading job's TO timer is actually retriggered ...
>>> which is totally wrong.
>>>
>>> With my patch the timer is already retriggered after previous JOB 
>>> really signaled.
>>>
>>> Can you be more specific on the incorrect part ?
>>>
>>> Thanks
>>> ------------------------------------------
>>> Monk Liu | Cloud-GPU Core team
>>> ------------------------------------------
>>>
>>> -----Original Message-----
>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>> Sent: Wednesday, August 25, 2021 2:32 PM
>>> To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
>>> Subject: Re: [PATCH] drm/sched: fix the bug of time out
>>> calculation(v2)
>>>
>>> Well NAK to that approach. First of all your bug analyses is incorrect.
>>>
>>> The timeout started by queue_delayed_work() in
>>> drm_sched_start_timeout() is paired with the cancel_delayed_work() 
>>> in drm_sched_get_cleanup_job().
>>>
>>> So you must have something else going on here.
>>>
>>> Then please don't use mod_delayed_work(), instead always cancel it 
>>> and restart it.
>>>
>>> Regards,
>>> Christian.
>>>
>>> Am 25.08.21 um 06:14 schrieb Monk Liu:
>>>> the original logic is wrong that the timeout will not be retriggerd 
>>>> after the previous job siganled, and that lead to the scenario that 
>>>> all jobs in the same scheduler shares the same timeout timer from 
>>>> the very begining job in this scheduler which is wrong.
>>>>
>>>> we should modify the timer everytime a previous job signaled.
>>>>
>>>> v2:
>>>> further cleanup the logic, and do the TDR timer cancelling if the 
>>>> signaled job is the last one in its scheduler.
>>>>
>>>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
>>>> ---
>>>>     drivers/gpu/drm/scheduler/sched_main.c | 29
>>>> ++++++++++++++++++++---------
>>>>     1 file changed, 20 insertions(+), 9 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
>>>> b/drivers/gpu/drm/scheduler/sched_main.c
>>>> index a2a9536..8c102ac 100644
>>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>>> @@ -305,8 +305,17 @@ static void drm_sched_job_begin(struct 
>>>> drm_sched_job *s_job)
>>>>         struct drm_gpu_scheduler *sched = s_job->sched;
>>>>            spin_lock(&sched->job_list_lock);
>>>> -    list_add_tail(&s_job->list, &sched->pending_list);
>>>> -    drm_sched_start_timeout(sched);
>>>> +    if (list_empty(&sched->pending_list)) {
>>>> +        list_add_tail(&s_job->list, &sched->pending_list);
>>>> +        drm_sched_start_timeout(sched);
>>>> +    } else {
>>>> +        /* the old jobs in pending list are not finished yet
>>>> +         * no need to restart TDR timer here, it is already
>>>> +         * handled by drm_sched_get_cleanup_job
>>>> +         */
>>>> +        list_add_tail(&s_job->list, &sched->pending_list);
>>>> +    }
>>>> +
>>>>         spin_unlock(&sched->job_list_lock);
>>>>     }
>>>>     @@ -693,17 +702,22 @@ drm_sched_get_cleanup_job(struct 
>>>> drm_gpu_scheduler *sched)
>>>>         if (job && dma_fence_is_signaled(&job->s_fence->finished)) 
>>>> {
>>>>             /* remove job from pending_list */
>>>>             list_del_init(&job->list);
>>>> +
>>>>             /* make the scheduled timestamp more accurate */
>>>>             next = list_first_entry_or_null(&sched->pending_list,
>>>>                             typeof(*next), list);
>>>> -        if (next)
>>>> +        if (next) {
>>>> +            /* if we still have job in pending list we need modify
>>>> the TDR timer */
>>>> +            mod_delayed_work(system_wq, &sched->work_tdr,
>>>> sched->timeout);
>>>>                 next->s_fence->scheduled.timestamp =
>>>>                     job->s_fence->finished.timestamp;
>>>> +        } else {
>>>> +            /* cancel the TDR timer if no job in pending list */
>>>> +            cancel_delayed_work(&sched->work_tdr);
>>>> +        }
>>>>            } else {
>>>>             job = NULL;
>>>> -        /* queue timeout for next job */
>>>> -        drm_sched_start_timeout(sched);
>>>>         }
>>>>            spin_unlock(&sched->job_list_lock);
>>>> @@ -791,11 +805,8 @@ static int drm_sched_main(void *param)
>>>>                           (entity = 
>>>> drm_sched_select_entity(sched)))
>>>> ||
>>>>                          kthread_should_stop());
>>>>     -        if (cleanup_job) {
>>>> +        if (cleanup_job)
>>>>                 sched->ops->free_job(cleanup_job);
>>>> -            /* queue timeout for next job */
>>>> -            drm_sched_start_timeout(sched);
>>>> -        }
>>>>                if (!entity)
>>>>                 continue;

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/sched: fix the bug of time out calculation(v2)
  2021-08-26  4:55               ` Liu, Monk
@ 2021-08-26  5:44                 ` Andrey Grodzovsky
  0 siblings, 0 replies; 12+ messages in thread
From: Andrey Grodzovsky @ 2021-08-26  5:44 UTC (permalink / raw)
  To: Liu, Monk, Christian König, amd-gfx, dri-devel


On 2021-08-26 12:55 a.m., Liu, Monk wrote:
> [AMD Official Use Only]
>
>>> But  for timer pending case (common case) your mod_delayed_work will effectively do exactly the same if you don't use per job TTLs - you mod it to  sched->timeout value which resets the pending timer to again count from 0.
> Ny patch will only modify the timer (restart it , actually) when the heading job is signaled, which means on HW ring the next job is just about start processing.

Not sure this is always true, see this specific test we added long ago 
https://gitlab.freedesktop.org/mesa/drm/-/commit/bc21168fa924d3fc4a000492e861f50a1a135b25
AFAIK a ring doesn't have strict serialization of processing jobs one 
after another, especially when 2 jobs are scheduled from different 
contexts like in the example above
which means that in this case the second job might be well into 
execution for some time when the first finish and restarts the TDR timer 
from scratch.


> If the job is not signaled (your common case) the timer is still not touched at all ...
>
>>> I just wonder why we stopped using per job TDR timers in the first place ? Isn't the simplest way to count accurate timeouts for each job is to actually measure the timeouts for each job separately ?
> I'm not sure if Christian can recall something, and I believe it is due to some limitations we found (or some race issue like two job on the same scheduler TO in the same time, which is probably if they are scheduled to the ring almost in the same timeframe)
>
> Anyway I have a V3 version patch, please take a look, it looks working for me


Will take a look tomorrow

Andrey


>   
>
> Thanks
>
> ------------------------------------------
> Monk Liu | Cloud-GPU Core team
> ------------------------------------------
>
> -----Original Message-----
> From: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
> Sent: Thursday, August 26, 2021 11:05 AM
> To: Liu, Monk <Monk.Liu@amd.com>; Christian König <ckoenig.leichtzumerken@gmail.com>; amd-gfx@lists.freedesktop.org; dri-devel <dri-devel@lists.freedesktop.org>
> Subject: Re: [PATCH] drm/sched: fix the bug of time out calculation(v2)
>
>
> On 2021-08-25 10:31 p.m., Liu, Monk wrote:
>> [AMD Official Use Only]
>>
>> Hi Andrey
>>
>> I'm not quite sure if I read you correctly
>>
>>>> Seems to me you can only do it for empty pending list otherwise you risk cancelling a legit new timer that was started by the next job or not restarting timer at all since your timer was still pending when next job tried to start it again (the common case).
>> I don't understand above sentence, from my understanding for the
>> common case,  if the timer is pending, the cancel_delay_work in
>> beginning will cancel it and then we will get to the line of "queue
>> timeout for next job" since the heading job is not signaled (align
>> with the timer is pending), then the timer will be restarted (for the
>> next job)
>
> Ignore it, i realized from looking that i missed the timer restart in then end of drm_sched_get_cleanup_job or the alternative one in drm_sched_main
>
>
>> And above sequence is actually wrong to me, because we cancelled a
>> pending timer and restart the timer for the scheduler that its heading
>> job is still running there, the whole counting is repeated from zero
>> and inaccurate at all
>
> But  for timer pending case (common case) your mod_delayed_work will effectively do exactly the same if you don't use per job TTLs - you mod it to  sched->timeout value which resets the pending timer to again count from 0.
>
> I just wonder why we stopped using per job TDR timers in the first place ? Isn't the simplest way to count accurate timeouts for each job is to actually measure the timeouts for each job separately ?
>
> Andrey
>
>
>>    
>>
>> Thanks
>> ------------------------------------------
>> Monk Liu | Cloud-GPU Core team
>> ------------------------------------------
>>
>> -----Original Message-----
>> From: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
>> Sent: Thursday, August 26, 2021 2:20 AM
>> To: Christian König <ckoenig.leichtzumerken@gmail.com>; Liu, Monk
>> <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org; dri-devel
>> <dri-devel@lists.freedesktop.org>
>> Subject: Re: [PATCH] drm/sched: fix the bug of time out
>> calculation(v2)
>>
>>
>> On 2021-08-25 8:11 a.m., Christian König wrote:
>>> No, this would break that logic here.
>>>
>>> See drm_sched_start_timeout() can be called multiple times, this is
>>> intentional and very important!
>>>
>>> The logic in queue_delayed_work() makes sure that the timer is only
>>> started once and then never again.
>>>
>>> All we need to take care of is to cancel_delayed_work() when we know
>>> that the job is completed.
>> Seems to me you can only do it for empty pending list otherwise you risk cancelling a legit new timer that was started by the next job or not restarting timer at all since your timer was still pending when next job tried to start it again (the common case).
>> For non empty pending list you have to adjust the currently active TDR's timer from your's job TTL to TTL to the next job after you or just restart it as Monk does it here which prolongs the timeout more then required but still ok i guess.
>>
>> What about returning to the old scheme of timer sched_work per job so
>> each job has it's own timer and we don't share it and everything is
>> precise for each job, using the locking scheme we already have today
>> the actual TDR handler will execute only once while all the other
>> arising from the guilty job hang will be rejected (for amdgpu, for
>> other drivers it probably requires same locking or we can move this to
>> the scheduler layer)
>>
>> Andrey
>>
>>
>>> This here works as intended as far as I can see and if you start to
>>> use mod_delayed_work() you actually break it.
>>>
>>> Regards,
>>> Christian.
>>>
>>> Am 25.08.21 um 14:01 schrieb Liu, Monk:
>>>> [AMD Official Use Only]
>>>>
>>>> I think we should remove the cancel_delayed_work() in the beginning
>>>> of the cleanup_job().
>>>>
>>>> Because by my patch the "mode_delayed_work" in cleanup_job is
>>>> already doing its duty to retrigger the TO timer accordingly
>>>>
>>>> Thanks
>>>>
>>>> ------------------------------------------
>>>> Monk Liu | Cloud-GPU Core team
>>>> ------------------------------------------
>>>>
>>>> -----Original Message-----
>>>> From: Liu, Monk
>>>> Sent: Wednesday, August 25, 2021 7:55 PM
>>>> To: 'Christian König' <ckoenig.leichtzumerken@gmail.com>;
>>>> amd-gfx@lists.freedesktop.org
>>>> Subject: RE: [PATCH] drm/sched: fix the bug of time out
>>>> calculation(v2)
>>>>
>>>> [AMD Official Use Only]
>>>>
>>>>>> The timeout started by queue_delayed_work() in
>>>>>> drm_sched_start_timeout() is paired with the cancel_delayed_work()
>>>>>> in drm_sched_get_cleanup_job().
>>>> No that's wrong, see that when we are in cleanup_job(), assume we do
>>>> not have timeout on this sched (we are just keep submitting new jobs
>>>> to this sched), Then the work_tdr is cancelled, and then we get the
>>>> heading job, and let's assume the job is not signaled, then we run
>>>> to the "queue timeout for next job" thus drm_sched_start_timeout()
>>>> is called, so this heading job's TO timer is actually retriggered ...
>>>> which is totally wrong.
>>>>
>>>> With my patch the timer is already retriggered after previous JOB
>>>> really signaled.
>>>>
>>>> Can you be more specific on the incorrect part ?
>>>>
>>>> Thanks
>>>> ------------------------------------------
>>>> Monk Liu | Cloud-GPU Core team
>>>> ------------------------------------------
>>>>
>>>> -----Original Message-----
>>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>>> Sent: Wednesday, August 25, 2021 2:32 PM
>>>> To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
>>>> Subject: Re: [PATCH] drm/sched: fix the bug of time out
>>>> calculation(v2)
>>>>
>>>> Well NAK to that approach. First of all your bug analyses is incorrect.
>>>>
>>>> The timeout started by queue_delayed_work() in
>>>> drm_sched_start_timeout() is paired with the cancel_delayed_work()
>>>> in drm_sched_get_cleanup_job().
>>>>
>>>> So you must have something else going on here.
>>>>
>>>> Then please don't use mod_delayed_work(), instead always cancel it
>>>> and restart it.
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>> Am 25.08.21 um 06:14 schrieb Monk Liu:
>>>>> the original logic is wrong that the timeout will not be retriggerd
>>>>> after the previous job siganled, and that lead to the scenario that
>>>>> all jobs in the same scheduler shares the same timeout timer from
>>>>> the very begining job in this scheduler which is wrong.
>>>>>
>>>>> we should modify the timer everytime a previous job signaled.
>>>>>
>>>>> v2:
>>>>> further cleanup the logic, and do the TDR timer cancelling if the
>>>>> signaled job is the last one in its scheduler.
>>>>>
>>>>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
>>>>> ---
>>>>>      drivers/gpu/drm/scheduler/sched_main.c | 29
>>>>> ++++++++++++++++++++---------
>>>>>      1 file changed, 20 insertions(+), 9 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
>>>>> b/drivers/gpu/drm/scheduler/sched_main.c
>>>>> index a2a9536..8c102ac 100644
>>>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>>>> @@ -305,8 +305,17 @@ static void drm_sched_job_begin(struct
>>>>> drm_sched_job *s_job)
>>>>>          struct drm_gpu_scheduler *sched = s_job->sched;
>>>>>             spin_lock(&sched->job_list_lock);
>>>>> -    list_add_tail(&s_job->list, &sched->pending_list);
>>>>> -    drm_sched_start_timeout(sched);
>>>>> +    if (list_empty(&sched->pending_list)) {
>>>>> +        list_add_tail(&s_job->list, &sched->pending_list);
>>>>> +        drm_sched_start_timeout(sched);
>>>>> +    } else {
>>>>> +        /* the old jobs in pending list are not finished yet
>>>>> +         * no need to restart TDR timer here, it is already
>>>>> +         * handled by drm_sched_get_cleanup_job
>>>>> +         */
>>>>> +        list_add_tail(&s_job->list, &sched->pending_list);
>>>>> +    }
>>>>> +
>>>>>          spin_unlock(&sched->job_list_lock);
>>>>>      }
>>>>>      @@ -693,17 +702,22 @@ drm_sched_get_cleanup_job(struct
>>>>> drm_gpu_scheduler *sched)
>>>>>          if (job && dma_fence_is_signaled(&job->s_fence->finished))
>>>>> {
>>>>>              /* remove job from pending_list */
>>>>>              list_del_init(&job->list);
>>>>> +
>>>>>              /* make the scheduled timestamp more accurate */
>>>>>              next = list_first_entry_or_null(&sched->pending_list,
>>>>>                              typeof(*next), list);
>>>>> -        if (next)
>>>>> +        if (next) {
>>>>> +            /* if we still have job in pending list we need modify
>>>>> the TDR timer */
>>>>> +            mod_delayed_work(system_wq, &sched->work_tdr,
>>>>> sched->timeout);
>>>>>                  next->s_fence->scheduled.timestamp =
>>>>>                      job->s_fence->finished.timestamp;
>>>>> +        } else {
>>>>> +            /* cancel the TDR timer if no job in pending list */
>>>>> +            cancel_delayed_work(&sched->work_tdr);
>>>>> +        }
>>>>>             } else {
>>>>>              job = NULL;
>>>>> -        /* queue timeout for next job */
>>>>> -        drm_sched_start_timeout(sched);
>>>>>          }
>>>>>             spin_unlock(&sched->job_list_lock);
>>>>> @@ -791,11 +805,8 @@ static int drm_sched_main(void *param)
>>>>>                            (entity =
>>>>> drm_sched_select_entity(sched)))
>>>>> ||
>>>>>                           kthread_should_stop());
>>>>>      -        if (cleanup_job) {
>>>>> +        if (cleanup_job)
>>>>>                  sched->ops->free_job(cleanup_job);
>>>>> -            /* queue timeout for next job */
>>>>> -            drm_sched_start_timeout(sched);
>>>>> -        }
>>>>>                 if (!entity)
>>>>>                  continue;

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2021-08-26  5:44 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-25  4:14 [PATCH] drm/sched: fix the bug of time out calculation(v2) Monk Liu
2021-08-25  6:31 ` Christian König
2021-08-25 11:55   ` Liu, Monk
2021-08-25 12:01     ` Liu, Monk
2021-08-25 12:11       ` Christian König
2021-08-25 18:20         ` Andrey Grodzovsky
2021-08-26  2:31           ` Liu, Monk
2021-08-26  3:05             ` Andrey Grodzovsky
2021-08-26  4:55               ` Liu, Monk
2021-08-26  5:44                 ` Andrey Grodzovsky
2021-08-26  1:53         ` Liu, Monk
2021-08-25 12:51 ` Alex Deucher

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.