linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/1] wait_task_inactive() spend too much time on system startup
@ 2020-03-05  9:59 cl
  2020-03-05  9:59 ` [PATCH v2 1/1] sched/fair: do not preempt current task if it is going to call schedule() cl
  0 siblings, 1 reply; 3+ messages in thread
From: cl @ 2020-03-05  9:59 UTC (permalink / raw)
  To: heiko
  Cc: mingo, peterz, juri.lelli, vincent.guittot, dietmar.eggemann,
	rostedt, bsegall, mgorman, akpm, tglx, mpe, surenb, ben.dooks,
	anshuman.khandual, catalin.marinas, will, keescook, luto, wad,
	mark.rutland, geert+renesas, george_davis, sudeep.holla, linux,
	gregkh, info, kstewart, allison, linux-arm-kernel, linux-kernel,
	huangtao, Liang Chen

From: Liang Chen <cl@rock-chips.com>

Changelog:
v1: wait_task_inactive() frequently call schedule_hrtimeout() and spend a lot of time,
i am trying to optimize it on rockchip platform.
v2: Use atomic_flags(PFA) instead of TIF flag, and add some comments.

Liang Chen (1):
  sched/fair: do not preempt current task if it is going to call
    schedule()

 include/linux/sched.h |  5 +++++
 kernel/kthread.c      |  4 ++++
 kernel/sched/fair.c   | 13 +++++++++++++
 3 files changed, 22 insertions(+)

-- 
2.17.1




^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH v2 1/1] sched/fair: do not preempt current task if it is going to call schedule()
  2020-03-05  9:59 [PATCH v2 0/1] wait_task_inactive() spend too much time on system startup cl
@ 2020-03-05  9:59 ` cl
  2020-03-05 17:24   ` Kees Cook
  0 siblings, 1 reply; 3+ messages in thread
From: cl @ 2020-03-05  9:59 UTC (permalink / raw)
  To: heiko
  Cc: mingo, peterz, juri.lelli, vincent.guittot, dietmar.eggemann,
	rostedt, bsegall, mgorman, akpm, tglx, mpe, surenb, ben.dooks,
	anshuman.khandual, catalin.marinas, will, keescook, luto, wad,
	mark.rutland, geert+renesas, george_davis, sudeep.holla, linux,
	gregkh, info, kstewart, allison, linux-arm-kernel, linux-kernel,
	huangtao, Liang Chen

From: Liang Chen <cl@rock-chips.com>

when we create a kthread with ktrhead_create_on_cpu(),the child thread
entry is ktread.c:ktrhead() which will be preempted by the parent after
call complete(done) while schedule() is not called yet,then the parent
will call wait_task_inactive(child) but the child is still on the runqueue,
so the parent will schedule_hrtimeout() for 1 jiffy,it will waste a lot of
time,especially on startup.

  parent                             child
ktrhead_create_on_cpu()
  wait_fo_completion(&done) -----> ktread.c:ktrhead()
                             |----- complete(done);--wakeup and preempted by parent
 kthread_bind() <------------|  |-> schedule();--dequeue here
  wait_task_inactive(child)     |
   schedule_hrtimeout(1 jiffy) -|

So we hope the child just wakeup parent but not preempted by parent, and the
child is going to call schedule() soon,then the parent will not call
schedule_hrtimeout(1 jiffy) as the child is already dequeue.

The same issue for ktrhead_park()&&kthread_parkme().
This patch can save 120ms on rk312x startup with CONFIG_HZ=300.

Signed-off-by: Liang Chen <cl@rock-chips.com>
---
 include/linux/sched.h |  5 +++++
 kernel/kthread.c      |  4 ++++
 kernel/sched/fair.c   | 13 +++++++++++++
 3 files changed, 22 insertions(+)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 04278493bf15..54bf336f5790 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1533,6 +1533,7 @@ static inline bool is_percpu_thread(void)
 #define PFA_SPEC_IB_DISABLE		5	/* Indirect branch speculation restricted */
 #define PFA_SPEC_IB_FORCE_DISABLE	6	/* Indirect branch speculation permanently restricted */
 #define PFA_SPEC_SSB_NOEXEC		7	/* Speculative Store Bypass clear on execve() */
+#define PFA_GOING_TO_SCHED		8	/* task is going to call schedule() */
 
 #define TASK_PFA_TEST(name, func)					\
 	static inline bool task_##func(struct task_struct *p)		\
@@ -1575,6 +1576,10 @@ TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable)
 TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
 TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
 
+TASK_PFA_TEST(GOING_TO_SCHED, going_to_sched)
+TASK_PFA_SET(GOING_TO_SCHED, going_to_sched)
+TASK_PFA_CLEAR(GOING_TO_SCHED, going_to_sched)
+
 static inline void
 current_restore_flags(unsigned long orig_flags, unsigned long flags)
 {
diff --git a/kernel/kthread.c b/kernel/kthread.c
index b262f47046ca..bc96de2648f6 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -199,8 +199,10 @@ static void __kthread_parkme(struct kthread *self)
 		if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
 			break;
 
+		task_set_going_to_sched(current);
 		complete(&self->parked);
 		schedule();
+		task_clear_going_to_sched(current);
 	}
 	__set_current_state(TASK_RUNNING);
 }
@@ -245,8 +247,10 @@ static int kthread(void *_create)
 	/* OK, tell user we're spawned, wait for stop or wakeup */
 	__set_current_state(TASK_UNINTERRUPTIBLE);
 	create->result = current;
+	task_set_going_to_sched(current);
 	complete(done);
 	schedule();
+	task_clear_going_to_sched(current);
 
 	ret = -EINTR;
 	if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3c8a379c357e..78666cec794a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4330,6 +4330,12 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
 			hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
 		return;
 #endif
+	/*
+	 * current task is going to call schedule(), do not preempt it or
+	 * it will casue more useless contex_switch().
+	 */
+	if (task_going_to_sched(rq_of(cfs_rq)->curr))
+		return;
 
 	if (cfs_rq->nr_running > 1)
 		check_preempt_tick(cfs_rq, curr);
@@ -6634,6 +6640,13 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 	if (test_tsk_need_resched(curr))
 		return;
 
+	/*
+	 * current task is going to call schedule(), do not preempt it or
+	 * it will casue more useless contex_switch().
+	 */
+	if (task_going_to_sched(curr))
+		return;
+
 	/* Idle tasks are by definition preempted by non-idle tasks. */
 	if (unlikely(task_has_idle_policy(curr)) &&
 	    likely(!task_has_idle_policy(p)))
-- 
2.17.1




^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v2 1/1] sched/fair: do not preempt current task if it is going to call schedule()
  2020-03-05  9:59 ` [PATCH v2 1/1] sched/fair: do not preempt current task if it is going to call schedule() cl
@ 2020-03-05 17:24   ` Kees Cook
  0 siblings, 0 replies; 3+ messages in thread
From: Kees Cook @ 2020-03-05 17:24 UTC (permalink / raw)
  To: cl
  Cc: heiko, mingo, peterz, juri.lelli, vincent.guittot,
	dietmar.eggemann, rostedt, bsegall, mgorman, akpm, tglx, mpe,
	surenb, ben.dooks, anshuman.khandual, catalin.marinas, will,
	luto, wad, mark.rutland, geert+renesas, george_davis,
	sudeep.holla, linux, gregkh, info, kstewart, allison,
	linux-arm-kernel, linux-kernel, huangtao

On Thu, Mar 05, 2020 at 05:59:48PM +0800, cl@rock-chips.com wrote:
> From: Liang Chen <cl@rock-chips.com>
> 
> when we create a kthread with ktrhead_create_on_cpu(),the child thread
> entry is ktread.c:ktrhead() which will be preempted by the parent after
> call complete(done) while schedule() is not called yet,then the parent
> will call wait_task_inactive(child) but the child is still on the runqueue,
> so the parent will schedule_hrtimeout() for 1 jiffy,it will waste a lot of
> time,especially on startup.
> 
>   parent                             child
> ktrhead_create_on_cpu()
>   wait_fo_completion(&done) -----> ktread.c:ktrhead()
>                              |----- complete(done);--wakeup and preempted by parent
>  kthread_bind() <------------|  |-> schedule();--dequeue here
>   wait_task_inactive(child)     |
>    schedule_hrtimeout(1 jiffy) -|
> 
> So we hope the child just wakeup parent but not preempted by parent, and the
> child is going to call schedule() soon,then the parent will not call
> schedule_hrtimeout(1 jiffy) as the child is already dequeue.
> 
> The same issue for ktrhead_park()&&kthread_parkme().
> This patch can save 120ms on rk312x startup with CONFIG_HZ=300.
> 
> Signed-off-by: Liang Chen <cl@rock-chips.com>

I'm not familiar with the subtleties of scheduler internals
(e.g. is there a race between the end of "schedule();" and calling
"task_clear_going_to_sched();" that effects the preemption test logic?),
so I'll leave that review to the others. But speaking to the PFA change,
it looks sane to me:

Reviewed-by: Kees Cook <keescook@chromium.org>

-Kees

> ---
>  include/linux/sched.h |  5 +++++
>  kernel/kthread.c      |  4 ++++
>  kernel/sched/fair.c   | 13 +++++++++++++
>  3 files changed, 22 insertions(+)
> 
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 04278493bf15..54bf336f5790 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1533,6 +1533,7 @@ static inline bool is_percpu_thread(void)
>  #define PFA_SPEC_IB_DISABLE		5	/* Indirect branch speculation restricted */
>  #define PFA_SPEC_IB_FORCE_DISABLE	6	/* Indirect branch speculation permanently restricted */
>  #define PFA_SPEC_SSB_NOEXEC		7	/* Speculative Store Bypass clear on execve() */
> +#define PFA_GOING_TO_SCHED		8	/* task is going to call schedule() */
>  
>  #define TASK_PFA_TEST(name, func)					\
>  	static inline bool task_##func(struct task_struct *p)		\
> @@ -1575,6 +1576,10 @@ TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable)
>  TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
>  TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
>  
> +TASK_PFA_TEST(GOING_TO_SCHED, going_to_sched)
> +TASK_PFA_SET(GOING_TO_SCHED, going_to_sched)
> +TASK_PFA_CLEAR(GOING_TO_SCHED, going_to_sched)
> +
>  static inline void
>  current_restore_flags(unsigned long orig_flags, unsigned long flags)
>  {
> diff --git a/kernel/kthread.c b/kernel/kthread.c
> index b262f47046ca..bc96de2648f6 100644
> --- a/kernel/kthread.c
> +++ b/kernel/kthread.c
> @@ -199,8 +199,10 @@ static void __kthread_parkme(struct kthread *self)
>  		if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
>  			break;
>  
> +		task_set_going_to_sched(current);
>  		complete(&self->parked);
>  		schedule();
> +		task_clear_going_to_sched(current);
>  	}
>  	__set_current_state(TASK_RUNNING);
>  }
> @@ -245,8 +247,10 @@ static int kthread(void *_create)
>  	/* OK, tell user we're spawned, wait for stop or wakeup */
>  	__set_current_state(TASK_UNINTERRUPTIBLE);
>  	create->result = current;
> +	task_set_going_to_sched(current);
>  	complete(done);
>  	schedule();
> +	task_clear_going_to_sched(current);
>  
>  	ret = -EINTR;
>  	if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 3c8a379c357e..78666cec794a 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -4330,6 +4330,12 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
>  			hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
>  		return;
>  #endif
> +	/*
> +	 * current task is going to call schedule(), do not preempt it or
> +	 * it will casue more useless contex_switch().
> +	 */
> +	if (task_going_to_sched(rq_of(cfs_rq)->curr))
> +		return;
>  
>  	if (cfs_rq->nr_running > 1)
>  		check_preempt_tick(cfs_rq, curr);
> @@ -6634,6 +6640,13 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
>  	if (test_tsk_need_resched(curr))
>  		return;
>  
> +	/*
> +	 * current task is going to call schedule(), do not preempt it or
> +	 * it will casue more useless contex_switch().
> +	 */
> +	if (task_going_to_sched(curr))
> +		return;
> +
>  	/* Idle tasks are by definition preempted by non-idle tasks. */
>  	if (unlikely(task_has_idle_policy(curr)) &&
>  	    likely(!task_has_idle_policy(p)))
> -- 
> 2.17.1
> 
> 
> 

-- 
Kees Cook

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-03-05 17:24 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-05  9:59 [PATCH v2 0/1] wait_task_inactive() spend too much time on system startup cl
2020-03-05  9:59 ` [PATCH v2 1/1] sched/fair: do not preempt current task if it is going to call schedule() cl
2020-03-05 17:24   ` Kees Cook

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).