linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] sched/core: Avoid obvious double update_rq_clock warning
@ 2022-04-18  9:09 Hao Jia
  2022-04-19 10:48 ` Peter Zijlstra
  2022-04-21 12:30 ` Dietmar Eggemann
  0 siblings, 2 replies; 8+ messages in thread
From: Hao Jia @ 2022-04-18  9:09 UTC (permalink / raw)
  To: mingo, peterz, juri.lelli, vincent.guittot, dietmar.eggemann,
	rostedt, bsegall, mgorman, bristot
  Cc: linux-kernel, Hao Jia

When we use raw_spin_rq_lock to acquire the rq lock and have to
update the rq clock while holding the lock, the kernel may issue
a WARN_DOUBLE_CLOCK warning.

Since we directly use raw_spin_rq_lock to acquire rq lock instead of
rq_lock, there is no corresponding change to rq->clock_update_flags.
In particular, we have obtained the rq lock of other cores,
the core rq->clock_update_flags may be RQCF_UPDATED at this time, and
then calling update_rq_clock will trigger the WARN_DOUBLE_CLOCK warning.

Some call trace reports:
Call Trace 1:
 <IRQ>
 sched_rt_period_timer+0x10f/0x3a0
 ? enqueue_top_rt_rq+0x110/0x110
 __hrtimer_run_queues+0x1a9/0x490
 hrtimer_interrupt+0x10b/0x240
 __sysvec_apic_timer_interrupt+0x8a/0x250
 sysvec_apic_timer_interrupt+0x9a/0xd0
 </IRQ>
 <TASK>
 asm_sysvec_apic_timer_interrupt+0x12/0x20

Call Trace 2:
 <TASK>
 activate_task+0x8b/0x110
 push_rt_task.part.108+0x241/0x2c0
 push_rt_tasks+0x15/0x30
 finish_task_switch+0xaa/0x2e0
 ? __switch_to+0x134/0x420
 __schedule+0x343/0x8e0
 ? hrtimer_start_range_ns+0x101/0x340
 schedule+0x4e/0xb0
 do_nanosleep+0x8e/0x160
 hrtimer_nanosleep+0x89/0x120
 ? hrtimer_init_sleeper+0x90/0x90
 __x64_sys_nanosleep+0x96/0xd0
 do_syscall_64+0x34/0x90
 entry_SYSCALL_64_after_hwframe+0x44/0xae

Call Trace 3:
 <TASK>
 deactivate_task+0x93/0xe0
 pull_rt_task+0x33e/0x400
 balance_rt+0x7e/0x90
 __schedule+0x62f/0x8e0
 do_task_dead+0x3f/0x50
 do_exit+0x7b8/0xbb0
 do_group_exit+0x2d/0x90
 get_signal+0x9df/0x9e0
 ? preempt_count_add+0x56/0xa0
 ? __remove_hrtimer+0x35/0x70
 arch_do_signal_or_restart+0x36/0x720
 ? nanosleep_copyout+0x39/0x50
 ? do_nanosleep+0x131/0x160
 ? audit_filter_inodes+0xf5/0x120
 exit_to_user_mode_prepare+0x10f/0x1e0
 syscall_exit_to_user_mode+0x17/0x30
 do_syscall_64+0x40/0x90
 entry_SYSCALL_64_after_hwframe+0x44/0xae

Steps to reproduce:
1. Enable CONFIG_SCHED_DEBUG when compiling the kernel
2. echo 1 > /sys/kernel/debug/clear_warn_once
   echo "WARN_DOUBLE_CLOCK" > /sys/kernel/debug/sched_features
   echo "NO_RT_PUSH_IPI" > /sys/kernel/debug/sched_features
3. Run some rt tasks that periodically change the priority and sleep, e.g.:

void *ThreadFun(void *arg)
{
	int cnt = *(int*)arg;
	struct sched_param param;

	while (1) {
		sqrt(MAGIC_NUM);
		cnt = cnt % 10 + 1;
		param.sched_priority = cnt;
		pthread_setschedparam(pthread_self(), SCHED_RR, &param);
		sqrt(MAGIC_NUM);
		sqrt(MAGIC_NUM);
		sleep(cnt);
	}
	return NULL;
}

Signed-off-by: Hao Jia <jiahao.os@bytedance.com>
---
 kernel/sched/deadline.c | 18 +++++++++++-------
 kernel/sched/rt.c       | 20 ++++++++++++++++++--
 2 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index fb4255ae0b2c..9207b978cc43 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2258,6 +2258,7 @@ static int push_dl_task(struct rq *rq)
 {
 	struct task_struct *next_task;
 	struct rq *later_rq;
+	struct rq_flags srf, drf;
 	int ret = 0;
 
 	if (!rq->dl.overloaded)
@@ -2317,16 +2318,14 @@ static int push_dl_task(struct rq *rq)
 		goto retry;
 	}
 
+	rq_pin_lock(rq, &srf);
+	rq_pin_lock(later_rq, &drf);
 	deactivate_task(rq, next_task, 0);
 	set_task_cpu(next_task, later_rq->cpu);
-
-	/*
-	 * Update the later_rq clock here, because the clock is used
-	 * by the cpufreq_update_util() inside __add_running_bw().
-	 */
-	update_rq_clock(later_rq);
-	activate_task(later_rq, next_task, ENQUEUE_NOCLOCK);
+	activate_task(later_rq, next_task, 0);
 	ret = 1;
+	rq_unpin_lock(rq, &srf);
+	rq_unpin_lock(later_rq, &drf);
 
 	resched_curr(later_rq);
 
@@ -2351,6 +2350,7 @@ static void pull_dl_task(struct rq *this_rq)
 	struct task_struct *p, *push_task;
 	bool resched = false;
 	struct rq *src_rq;
+	struct rq_flags this_rf, src_rf;
 	u64 dmin = LONG_MAX;
 
 	if (likely(!dl_overloaded(this_rq)))
@@ -2413,11 +2413,15 @@ static void pull_dl_task(struct rq *this_rq)
 			if (is_migration_disabled(p)) {
 				push_task = get_push_task(src_rq);
 			} else {
+				rq_pin_lock(this_rq, &this_rf);
+				rq_pin_lock(src_rq, &src_rf);
 				deactivate_task(src_rq, p, 0);
 				set_task_cpu(p, this_cpu);
 				activate_task(this_rq, p, 0);
 				dmin = p->dl.deadline;
 				resched = true;
+				rq_unpin_lock(this_rq, &this_rf);
+				rq_unpin_lock(src_rq, &src_rf);
 			}
 
 			/* Is there any other task even earlier? */
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index a32c46889af8..9305ad87fef0 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -871,6 +871,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 		int enqueue = 0;
 		struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
 		struct rq *rq = rq_of_rt_rq(rt_rq);
+		struct rq_flags rf;
 		int skip;
 
 		/*
@@ -885,7 +886,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 		if (skip)
 			continue;
 
-		raw_spin_rq_lock(rq);
+		rq_lock(rq, &rf);
 		update_rq_clock(rq);
 
 		if (rt_rq->rt_time) {
@@ -923,7 +924,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 
 		if (enqueue)
 			sched_rt_rq_enqueue(rt_rq);
-		raw_spin_rq_unlock(rq);
+		rq_unlock(rq, &rf);
 	}
 
 	if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF))
@@ -2001,6 +2002,7 @@ static int push_rt_task(struct rq *rq, bool pull)
 {
 	struct task_struct *next_task;
 	struct rq *lowest_rq;
+	struct rq_flags srf, drf;
 	int ret = 0;
 
 	if (!rq->rt.overloaded)
@@ -2102,9 +2104,18 @@ static int push_rt_task(struct rq *rq, bool pull)
 		goto retry;
 	}
 
+	/*
+	 * We may drop rq'lock in double_lock_balance,
+	 * so we still need to clean up the RQCF_UPDATED flag
+	 * to avoid the WARN_DOUBLE_CLOCK warning.
+	 */
+	rq_pin_lock(rq, &srf);
+	rq_pin_lock(lowest_rq, &drf);
 	deactivate_task(rq, next_task, 0);
 	set_task_cpu(next_task, lowest_rq->cpu);
 	activate_task(lowest_rq, next_task, 0);
+	rq_unpin_lock(rq, &srf);
+	rq_unpin_lock(lowest_rq, &drf);
 	resched_curr(lowest_rq);
 	ret = 1;
 
@@ -2299,6 +2310,7 @@ static void pull_rt_task(struct rq *this_rq)
 	bool resched = false;
 	struct task_struct *p, *push_task;
 	struct rq *src_rq;
+	struct rq_flags src_rf, this_rf;
 	int rt_overload_count = rt_overloaded(this_rq);
 
 	if (likely(!rt_overload_count))
@@ -2375,10 +2387,14 @@ static void pull_rt_task(struct rq *this_rq)
 			if (is_migration_disabled(p)) {
 				push_task = get_push_task(src_rq);
 			} else {
+				rq_pin_lock(this_rq, &this_rf);
+				rq_pin_lock(src_rq, &src_rf);
 				deactivate_task(src_rq, p, 0);
 				set_task_cpu(p, this_cpu);
 				activate_task(this_rq, p, 0);
 				resched = true;
+				rq_unpin_lock(this_rq, &this_rf);
+				rq_unpin_lock(src_rq, &src_rf);
 			}
 			/*
 			 * We continue with the search, just in
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] sched/core: Avoid obvious double update_rq_clock warning
  2022-04-18  9:09 [PATCH] sched/core: Avoid obvious double update_rq_clock warning Hao Jia
@ 2022-04-19 10:48 ` Peter Zijlstra
  2022-04-20  8:29   ` [External] " Hao Jia
  2022-04-21 12:30 ` Dietmar Eggemann
  1 sibling, 1 reply; 8+ messages in thread
From: Peter Zijlstra @ 2022-04-19 10:48 UTC (permalink / raw)
  To: Hao Jia
  Cc: mingo, juri.lelli, vincent.guittot, dietmar.eggemann, rostedt,
	bsegall, mgorman, bristot, linux-kernel

On Mon, Apr 18, 2022 at 05:09:29PM +0800, Hao Jia wrote:
> When we use raw_spin_rq_lock to acquire the rq lock and have to
> update the rq clock while holding the lock, the kernel may issue
> a WARN_DOUBLE_CLOCK warning.
> 
> Since we directly use raw_spin_rq_lock to acquire rq lock instead of
> rq_lock, there is no corresponding change to rq->clock_update_flags.
> In particular, we have obtained the rq lock of other cores,
> the core rq->clock_update_flags may be RQCF_UPDATED at this time, and
> then calling update_rq_clock will trigger the WARN_DOUBLE_CLOCK warning.

> Signed-off-by: Hao Jia <jiahao.os@bytedance.com>
> ---
>  kernel/sched/deadline.c | 18 +++++++++++-------
>  kernel/sched/rt.c       | 20 ++++++++++++++++++--

Very good for keeping them in sync.

>  2 files changed, 29 insertions(+), 9 deletions(-)
> 
> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index fb4255ae0b2c..9207b978cc43 100644
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c

> @@ -2317,16 +2318,14 @@ static int push_dl_task(struct rq *rq)
>  		goto retry;
>  	}
>  
> +	rq_pin_lock(rq, &srf);
> +	rq_pin_lock(later_rq, &drf);
>  	deactivate_task(rq, next_task, 0);
>  	set_task_cpu(next_task, later_rq->cpu);
> -
> -	/*
> -	 * Update the later_rq clock here, because the clock is used
> -	 * by the cpufreq_update_util() inside __add_running_bw().
> -	 */
> -	update_rq_clock(later_rq);
> -	activate_task(later_rq, next_task, ENQUEUE_NOCLOCK);
> +	activate_task(later_rq, next_task, 0);
>  	ret = 1;
> +	rq_unpin_lock(rq, &srf);
> +	rq_unpin_lock(later_rq, &drf);
>  
>  	resched_curr(later_rq);
>  

> @@ -2413,11 +2413,15 @@ static void pull_dl_task(struct rq *this_rq)
>  			if (is_migration_disabled(p)) {
>  				push_task = get_push_task(src_rq);
>  			} else {
> +				rq_pin_lock(this_rq, &this_rf);
> +				rq_pin_lock(src_rq, &src_rf);
>  				deactivate_task(src_rq, p, 0);
>  				set_task_cpu(p, this_cpu);
>  				activate_task(this_rq, p, 0);
>  				dmin = p->dl.deadline;
>  				resched = true;
> +				rq_unpin_lock(this_rq, &this_rf);
> +				rq_unpin_lock(src_rq, &src_rf);
>  			}
>  
>  			/* Is there any other task even earlier? */

I'm really not sure about this part though. This is a bit of a mess. The
balancer doesn't really need the pinning stuff. I realize you did that
because we got the clock annotation mixed up with that, but urgh.

Basically we want double_rq_lock() / double_lock_balance() to clear
RQCF_UPDATED, right? Perhaps do that directly?

(maybe with an inline helper and a wee comment?)

The only immediate problem with this would appear to be that
_double_rq_lock() behaves differently when it returns 0. Not sure that
matters.

Hmm?


diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f259621f4c93..be4baec84430 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -610,10 +610,13 @@ void double_rq_lock(struct rq *rq1, struct rq *rq2)
 		swap(rq1, rq2);
 
 	raw_spin_rq_lock(rq1);
-	if (__rq_lockp(rq1) == __rq_lockp(rq2))
-		return;
+	if (__rq_lockp(rq1) != __rq_lockp(rq2))
+		raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
 
-	raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
+#ifdef CONFIG_SCHED_DEBUG
+	rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
+	rq2->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
+#endif
 }
 #endif
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 8dccb34eb190..3ca8dd5ca17c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2644,6 +2644,10 @@ static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
 	BUG_ON(rq1 != rq2);
 	raw_spin_rq_lock(rq1);
 	__acquire(rq2->lock);	/* Fake it out ;) */
+#ifdef CONFIG_SCHED_DEBUG
+	rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
+	rq2->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
+#endif
 }
 
 /*

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [External] Re: [PATCH] sched/core: Avoid obvious double update_rq_clock warning
  2022-04-19 10:48 ` Peter Zijlstra
@ 2022-04-20  8:29   ` Hao Jia
  2022-04-20 19:11     ` Dietmar Eggemann
  0 siblings, 1 reply; 8+ messages in thread
From: Hao Jia @ 2022-04-20  8:29 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mingo, juri.lelli, vincent.guittot, dietmar.eggemann, rostedt,
	bsegall, mgorman, bristot, linux-kernel



On 4/19/22 6:48 PM, Peter Zijlstra wrote:
> On Mon, Apr 18, 2022 at 05:09:29PM +0800, Hao Jia wrote:
>> When we use raw_spin_rq_lock to acquire the rq lock and have to
>> update the rq clock while holding the lock, the kernel may issue
>> a WARN_DOUBLE_CLOCK warning.
>>
>> Since we directly use raw_spin_rq_lock to acquire rq lock instead of
>> rq_lock, there is no corresponding change to rq->clock_update_flags.
>> In particular, we have obtained the rq lock of other cores,
>> the core rq->clock_update_flags may be RQCF_UPDATED at this time, and
>> then calling update_rq_clock will trigger the WARN_DOUBLE_CLOCK warning.
> 
>> Signed-off-by: Hao Jia <jiahao.os@bytedance.com>
>> ---
>>   kernel/sched/deadline.c | 18 +++++++++++-------
>>   kernel/sched/rt.c       | 20 ++++++++++++++++++--
> 
> Very good for keeping them in sync.
> 
>>   2 files changed, 29 insertions(+), 9 deletions(-)
>>
>> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
>> index fb4255ae0b2c..9207b978cc43 100644
>> --- a/kernel/sched/deadline.c
>> +++ b/kernel/sched/deadline.c
> 
>> @@ -2317,16 +2318,14 @@ static int push_dl_task(struct rq *rq)
>>   		goto retry;
>>   	}
>>   
>> +	rq_pin_lock(rq, &srf);
>> +	rq_pin_lock(later_rq, &drf);
>>   	deactivate_task(rq, next_task, 0);
>>   	set_task_cpu(next_task, later_rq->cpu);
>> -
>> -	/*
>> -	 * Update the later_rq clock here, because the clock is used
>> -	 * by the cpufreq_update_util() inside __add_running_bw().
>> -	 */
>> -	update_rq_clock(later_rq);
>> -	activate_task(later_rq, next_task, ENQUEUE_NOCLOCK);
>> +	activate_task(later_rq, next_task, 0);
>>   	ret = 1;
>> +	rq_unpin_lock(rq, &srf);
>> +	rq_unpin_lock(later_rq, &drf);
>>   
>>   	resched_curr(later_rq);
>>   
> 
>> @@ -2413,11 +2413,15 @@ static void pull_dl_task(struct rq *this_rq)
>>   			if (is_migration_disabled(p)) {
>>   				push_task = get_push_task(src_rq);
>>   			} else {
>> +				rq_pin_lock(this_rq, &this_rf);
>> +				rq_pin_lock(src_rq, &src_rf);
>>   				deactivate_task(src_rq, p, 0);
>>   				set_task_cpu(p, this_cpu);
>>   				activate_task(this_rq, p, 0);
>>   				dmin = p->dl.deadline;
>>   				resched = true;
>> +				rq_unpin_lock(this_rq, &this_rf);
>> +				rq_unpin_lock(src_rq, &src_rf);
>>   			}
>>   
>>   			/* Is there any other task even earlier? */
> 
> I'm really not sure about this part though. This is a bit of a mess. The
> balancer doesn't really need the pinning stuff. I realize you did that
> because we got the clock annotation mixed up with that, but urgh.
> 
> Basically we want double_rq_lock() / double_lock_balance() to clear
> RQCF_UPDATED, right? Perhaps do that directly?
> 
> (maybe with an inline helper and a wee comment?)
> 
> The only immediate problem with this would appear to be that
> _double_rq_lock() behaves differently when it returns 0. Not sure that
> matters.
> 
> Hmm?

Thanks for your review comments.
As you have prompted, the WARN_DOUBLE_CLOCK warning is still triggered 
when _double_rq_lock() returns 0.
Please review the solution below, and based on your review, I will 
submit the v2 patch as soon as possible.
Thanks.


diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 51efaabac3e4..b73fe46cd6c7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -610,10 +610,13 @@ void double_rq_lock(struct rq *rq1, struct rq *rq2)
         swap(rq1, rq2);

     raw_spin_rq_lock(rq1);
-   if (__rq_lockp(rq1) == __rq_lockp(rq2))
-       return;
+   if (__rq_lockp(rq1) != __rq_lockp(rq2))
+       raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);

-   raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
+#ifdef CONFIG_SCHED_DEBUG
+   rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
+   rq2->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
+#endif
  }
  #endif

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 8dccb34eb190..9fe506a6b7b4 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2544,20 +2544,25 @@ static inline int _double_lock_balance(struct rq 
*this_rq, struct rq *busiest)
     __acquires(this_rq->lock)
  {
     if (__rq_lockp(this_rq) == __rq_lockp(busiest))
-       return 0;
+       goto out;

     if (likely(raw_spin_rq_trylock(busiest)))
-       return 0;
+       goto out;

     if (rq_order_less(this_rq, busiest)) {
         raw_spin_rq_lock_nested(busiest, SINGLE_DEPTH_NESTING);
-       return 0;
+       goto out;
+   } else {
+       raw_spin_rq_unlock(this_rq);
+       double_rq_lock(this_rq, busiest);
+       return 1;
     }
-
-   raw_spin_rq_unlock(this_rq);
-   double_rq_lock(this_rq, busiest);
-
-   return 1;
+out:
+#ifdef CONFIG_SCHED_DEBUG
+   this_rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
+   busiest->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
+#endif
+   return 0;
  }

  #endif /* CONFIG_PREEMPTION */
@@ -2644,6 +2649,9 @@ static inline void double_rq_lock(struct rq *rq1, 
struct rq *rq2)
     BUG_ON(rq1 != rq2);
     raw_spin_rq_lock(rq1);
     __acquire(rq2->lock);   /* Fake it out ;) */
+#ifdef CONFIG_SCHED_DEBUG
+   rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
+#endif
  }
> 
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index f259621f4c93..be4baec84430 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -610,10 +610,13 @@ void double_rq_lock(struct rq *rq1, struct rq *rq2)
>   		swap(rq1, rq2);
>   
>   	raw_spin_rq_lock(rq1);
> -	if (__rq_lockp(rq1) == __rq_lockp(rq2))
> -		return;
> +	if (__rq_lockp(rq1) != __rq_lockp(rq2))
> +		raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
>   
> -	raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
> +#ifdef CONFIG_SCHED_DEBUG
> +	rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
> +	rq2->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
> +#endif
>   }
>   #endif
>   
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 8dccb34eb190..3ca8dd5ca17c 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -2644,6 +2644,10 @@ static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
>   	BUG_ON(rq1 != rq2);
>   	raw_spin_rq_lock(rq1);
>   	__acquire(rq2->lock);	/* Fake it out ;) */
> +#ifdef CONFIG_SCHED_DEBUG
> +	rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
> +	rq2->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
> +#endif
>   }
>   
>   /*

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [External] Re: [PATCH] sched/core: Avoid obvious double update_rq_clock warning
  2022-04-20  8:29   ` [External] " Hao Jia
@ 2022-04-20 19:11     ` Dietmar Eggemann
  2022-04-21  7:24       ` Hao Jia
  0 siblings, 1 reply; 8+ messages in thread
From: Dietmar Eggemann @ 2022-04-20 19:11 UTC (permalink / raw)
  To: Hao Jia, Peter Zijlstra
  Cc: mingo, juri.lelli, vincent.guittot, rostedt, bsegall, mgorman,
	bristot, linux-kernel

On 20/04/2022 10:29, Hao Jia wrote:
> On 4/19/22 6:48 PM, Peter Zijlstra wrote:
>> On Mon, Apr 18, 2022 at 05:09:29PM +0800, Hao Jia wrote:

[...]

>> I'm really not sure about this part though. This is a bit of a mess. The
>> balancer doesn't really need the pinning stuff. I realize you did that
>> because we got the clock annotation mixed up with that, but urgh.
>>
>> Basically we want double_rq_lock() / double_lock_balance() to clear
>> RQCF_UPDATED, right? Perhaps do that directly?
>>
>> (maybe with an inline helper and a wee comment?)
>>
>> The only immediate problem with this would appear to be that
>> _double_rq_lock() behaves differently when it returns 0. Not sure that
>> matters.
>>
>> Hmm?
> 
> Thanks for your review comments.
> As you have prompted, the WARN_DOUBLE_CLOCK warning is still triggered
> when _double_rq_lock() returns 0.
> Please review the solution below, and based on your review, I will
> submit the v2 patch as soon as possible.
> Thanks.


[...]

Maybe something like this:

-->8--

From: Dietmar Eggemann <dietmar.eggemann@arm.com>
Date: Wed, 20 Apr 2022 11:12:10 +0200
Subject: [PATCH] sched/core: Clear RQCF_UPDATED in _double_lock_balance() &
 double_rq_lock()

Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
---
 kernel/sched/core.c  |  6 +++---
 kernel/sched/sched.h | 20 ++++++++++++++++----
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 068c088e9584..f4cfe7eea861 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -610,10 +610,10 @@ void double_rq_lock(struct rq *rq1, struct rq *rq2)
 		swap(rq1, rq2);
 
 	raw_spin_rq_lock(rq1);
-	if (__rq_lockp(rq1) == __rq_lockp(rq2))
-		return;
+	if (__rq_lockp(rq1) != __rq_lockp(rq2))
+		raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
 
-	raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
+	rq_clock_clear_update(rq1, rq2);
 }
 #endif
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 58263f90c559..3a77b10d7cc4 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2515,6 +2515,16 @@ static inline bool rq_order_less(struct rq *rq1, struct rq *rq2)
 
 extern void double_rq_lock(struct rq *rq1, struct rq *rq2);
 
+#ifdef CONFIG_SCHED_DEBUG
+static inline void rq_clock_clear_update(struct rq *rq1, struct rq *rq2)
+{
+	rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
+	rq2->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
+}
+#else
+static inline void rq_clock_clear_update(struct rq *rq1, struct rq *rq2) {}
+#endif
+
 #ifdef CONFIG_PREEMPTION
 
 /*
@@ -2549,14 +2559,15 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
 	__acquires(busiest->lock)
 	__acquires(this_rq->lock)
 {
-	if (__rq_lockp(this_rq) == __rq_lockp(busiest))
-		return 0;
-
-	if (likely(raw_spin_rq_trylock(busiest)))
+	if (__rq_lockp(this_rq) == __rq_lockp(busiest) ||
+	    likely(raw_spin_rq_trylock(busiest))) {
+		rq_clock_clear_update(this_rq, busiest);
 		return 0;
+	}
 
 	if (rq_order_less(this_rq, busiest)) {
 		raw_spin_rq_lock_nested(busiest, SINGLE_DEPTH_NESTING);
+		rq_clock_clear_update(this_rq, busiest);
 		return 0;
 	}
 
@@ -2650,6 +2661,7 @@ static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
 	BUG_ON(rq1 != rq2);
 	raw_spin_rq_lock(rq1);
 	__acquire(rq2->lock);	/* Fake it out ;) */
+	rq_clock_clear_update(rq1, rq2);
 }
 
 /*
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [External] Re: [PATCH] sched/core: Avoid obvious double update_rq_clock warning
  2022-04-20 19:11     ` Dietmar Eggemann
@ 2022-04-21  7:24       ` Hao Jia
  2022-04-21 10:32         ` Dietmar Eggemann
  0 siblings, 1 reply; 8+ messages in thread
From: Hao Jia @ 2022-04-21  7:24 UTC (permalink / raw)
  To: Dietmar Eggemann, Peter Zijlstra
  Cc: mingo, juri.lelli, vincent.guittot, rostedt, bsegall, mgorman,
	bristot, linux-kernel



On 2022/4/21 Dietmar Eggemann wrote:
> On 20/04/2022 10:29, Hao Jia wrote:
>> On 4/19/22 6:48 PM, Peter Zijlstra wrote:
>>> On Mon, Apr 18, 2022 at 05:09:29PM +0800, Hao Jia wrote:
> 
> [...]
> 
>>> I'm really not sure about this part though. This is a bit of a mess. The
>>> balancer doesn't really need the pinning stuff. I realize you did that
>>> because we got the clock annotation mixed up with that, but urgh.
>>>
>>> Basically we want double_rq_lock() / double_lock_balance() to clear
>>> RQCF_UPDATED, right? Perhaps do that directly?
>>>
>>> (maybe with an inline helper and a wee comment?)
>>>
>>> The only immediate problem with this would appear to be that
>>> _double_rq_lock() behaves differently when it returns 0. Not sure that
>>> matters.
>>>
>>> Hmm?
>>
>> Thanks for your review comments.
>> As you have prompted, the WARN_DOUBLE_CLOCK warning is still triggered
>> when _double_rq_lock() returns 0.
>> Please review the solution below, and based on your review, I will
>> submit the v2 patch as soon as possible.
>> Thanks.
> 
> 
> [...]
> 
> Maybe something like this:
> 
> -->8--
> 
> From: Dietmar Eggemann <dietmar.eggemann@arm.com>
> Date: Wed, 20 Apr 2022 11:12:10 +0200
> Subject: [PATCH] sched/core: Clear RQCF_UPDATED in _double_lock_balance() &
>   double_rq_lock()
> 
> Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
> ---
>   kernel/sched/core.c  |  6 +++---
>   kernel/sched/sched.h | 20 ++++++++++++++++----
>   2 files changed, 19 insertions(+), 7 deletions(-)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 068c088e9584..f4cfe7eea861 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -610,10 +610,10 @@ void double_rq_lock(struct rq *rq1, struct rq *rq2)
>   		swap(rq1, rq2);
>   
>   	raw_spin_rq_lock(rq1);
> -	if (__rq_lockp(rq1) == __rq_lockp(rq2))
> -		return;
> +	if (__rq_lockp(rq1) != __rq_lockp(rq2))
> +		raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
>   
> -	raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
> +	rq_clock_clear_update(rq1, rq2);
>   }
>   #endif
>   
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 58263f90c559..3a77b10d7cc4 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -2515,6 +2515,16 @@ static inline bool rq_order_less(struct rq *rq1, struct rq *rq2)
>   
>   extern void double_rq_lock(struct rq *rq1, struct rq *rq2);
>   
> +#ifdef CONFIG_SCHED_DEBUG
> +static inline void rq_clock_clear_update(struct rq *rq1, struct rq *rq2)
> +{
> +	rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
> +	rq2->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
> +}
> +#else
> +static inline void rq_clock_clear_update(struct rq *rq1, struct rq *rq2) {}
> +#endif
> +
>   #ifdef CONFIG_PREEMPTION
>   
>   /*
> @@ -2549,14 +2559,15 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
>   	__acquires(busiest->lock)
>   	__acquires(this_rq->lock)
>   {
> -	if (__rq_lockp(this_rq) == __rq_lockp(busiest))
> -		return 0;
> -
> -	if (likely(raw_spin_rq_trylock(busiest)))
> +	if (__rq_lockp(this_rq) == __rq_lockp(busiest) ||
> +	    likely(raw_spin_rq_trylock(busiest))) {
> +		rq_clock_clear_update(this_rq, busiest);
>   		return 0;
> +	}
>   
>   	if (rq_order_less(this_rq, busiest)) {
>   		raw_spin_rq_lock_nested(busiest, SINGLE_DEPTH_NESTING);
> +		rq_clock_clear_update(this_rq, busiest);
>   		return 0;
>   	}
>   
> @@ -2650,6 +2661,7 @@ static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
>   	BUG_ON(rq1 != rq2);
>   	raw_spin_rq_lock(rq1);
>   	__acquire(rq2->lock);	/* Fake it out ;) */
> +	rq_clock_clear_update(rq1, rq2);

Thanks for your review.
This is very helpful to me.
If CONFIG_SMP is not enabled, should we just clear the RQCF_UPDATED of 
one of rq1 and q2?

like this:
rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);

Thanks.

>   }
>   
>   /*

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [External] Re: [PATCH] sched/core: Avoid obvious double update_rq_clock warning
  2022-04-21  7:24       ` Hao Jia
@ 2022-04-21 10:32         ` Dietmar Eggemann
  0 siblings, 0 replies; 8+ messages in thread
From: Dietmar Eggemann @ 2022-04-21 10:32 UTC (permalink / raw)
  To: Hao Jia, Peter Zijlstra
  Cc: mingo, juri.lelli, vincent.guittot, rostedt, bsegall, mgorman,
	bristot, linux-kernel

On 21/04/2022 09:24, Hao Jia wrote:
> On 2022/4/21 Dietmar Eggemann wrote:
>> On 20/04/2022 10:29, Hao Jia wrote:
>>> On 4/19/22 6:48 PM, Peter Zijlstra wrote:
>>>> On Mon, Apr 18, 2022 at 05:09:29PM +0800, Hao Jia wrote:

[...]

> Thanks for your review.
> This is very helpful to me.
> If CONFIG_SMP is not enabled, should we just clear the RQCF_UPDATED of
> one of rq1 and q2?
> 
> like this:
> rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);

[...]

We could take care of that within rq_clock_clear_update() if really
needed?
Anyway, for !CONFIG_SMP builds rq_clock_clear_update() has to be defined
outside #ifdef CONFIG_SMP.

-->8--

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 3a77b10d7cc4..614b822c667c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2484,6 +2484,17 @@ unsigned long arch_scale_freq_capacity(int cpu)
 }
 #endif
 
+#ifdef CONFIG_SCHED_DEBUG
+static inline void rq_clock_clear_update(struct rq *rq1, struct rq *rq2)
+{
+	rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
+#ifdef CONFIG_SMP
+	rq2->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
+#endif
+}
+#else
+static inline void rq_clock_clear_update(struct rq *rq1, struct rq *rq2) {}
+#endif
 
 #ifdef CONFIG_SMP
 
@@ -2515,16 +2526,6 @@ static inline bool rq_order_less(struct rq *rq1, struct rq *rq2)
 
 extern void double_rq_lock(struct rq *rq1, struct rq *rq2);
 
-#ifdef CONFIG_SCHED_DEBUG
-static inline void rq_clock_clear_update(struct rq *rq1, struct rq *rq2)
-{
-	rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
-	rq2->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
-}
-#else
-static inline void rq_clock_clear_update(struct rq *rq1, struct rq *rq2) {}
-#endif
-
 #ifdef CONFIG_PREEMPTION
 
 /*

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] sched/core: Avoid obvious double update_rq_clock warning
  2022-04-18  9:09 [PATCH] sched/core: Avoid obvious double update_rq_clock warning Hao Jia
  2022-04-19 10:48 ` Peter Zijlstra
@ 2022-04-21 12:30 ` Dietmar Eggemann
  2022-04-21 13:15   ` [External] " Hao Jia
  1 sibling, 1 reply; 8+ messages in thread
From: Dietmar Eggemann @ 2022-04-21 12:30 UTC (permalink / raw)
  To: Hao Jia, mingo, peterz, juri.lelli, vincent.guittot, rostedt,
	bsegall, mgorman, bristot
  Cc: linux-kernel

On 18/04/2022 11:09, Hao Jia wrote:

[...]

> -	/*
> -	 * Update the later_rq clock here, because the clock is used
> -	 * by the cpufreq_update_util() inside __add_running_bw().
> -	 */
> -	update_rq_clock(later_rq);
> -	activate_task(later_rq, next_task, ENQUEUE_NOCLOCK);
> +	activate_task(later_rq, next_task, 0);

IMHO, this change should go in a separate deadline patch.

The change to call update_rq_clock() before activate_task()
(840d719604b09) is no longer needed since f4904815f97a removed the
add_running_bw() before the activate_task().

[...]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [External] Re: [PATCH] sched/core: Avoid obvious double update_rq_clock warning
  2022-04-21 12:30 ` Dietmar Eggemann
@ 2022-04-21 13:15   ` Hao Jia
  0 siblings, 0 replies; 8+ messages in thread
From: Hao Jia @ 2022-04-21 13:15 UTC (permalink / raw)
  To: Dietmar Eggemann, mingo, peterz, juri.lelli, vincent.guittot,
	rostedt, bsegall, mgorman, bristot
  Cc: linux-kernel



On 2022/4/21 Dietmar Eggemann wrote:
> On 18/04/2022 11:09, Hao Jia wrote:
> 
> [...]
> 
>> -	/*
>> -	 * Update the later_rq clock here, because the clock is used
>> -	 * by the cpufreq_update_util() inside __add_running_bw().
>> -	 */
>> -	update_rq_clock(later_rq);
>> -	activate_task(later_rq, next_task, ENQUEUE_NOCLOCK);
>> +	activate_task(later_rq, next_task, 0);
> 
> IMHO, this change should go in a separate deadline patch.
> 

Thanks again for your review.
I will do it in patch v2.
Thanks.

> The change to call update_rq_clock() before activate_task()
> (840d719604b09) is no longer needed since f4904815f97a removed the
> add_running_bw() before the activate_task().
> 
> [...]

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-04-21 13:16 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-18  9:09 [PATCH] sched/core: Avoid obvious double update_rq_clock warning Hao Jia
2022-04-19 10:48 ` Peter Zijlstra
2022-04-20  8:29   ` [External] " Hao Jia
2022-04-20 19:11     ` Dietmar Eggemann
2022-04-21  7:24       ` Hao Jia
2022-04-21 10:32         ` Dietmar Eggemann
2022-04-21 12:30 ` Dietmar Eggemann
2022-04-21 13:15   ` [External] " Hao Jia

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).