From: Peter Zijlstra <peterz@infradead.org>
To: Benjamin Segall <bsegall@google.com>
Cc: mingo@kernel.org, vincent.guittot@linaro.org,
linux-kernel@vger.kernel.org, juri.lelli@redhat.com,
dietmar.eggemann@arm.com, rostedt@goodmis.org, mgorman@suse.de,
bristot@redhat.com, corbet@lwn.net, qyousef@layalina.io,
chris.hyser@oracle.com, patrick.bellasi@matbug.net,
pjt@google.com, pavel@ucw.cz, qperret@google.com,
tim.c.chen@linux.intel.com, joshdon@google.com, timj@gnu.org,
kprateek.nayak@amd.com, yu.c.chen@intel.com,
youssefesmat@chromium.org, joel@joelfernandes.org, efault@gmx.de,
tglx@linutronix.de
Subject: Re: [PATCH 03/15] sched/fair: Add lag based placement
Date: Fri, 13 Oct 2023 18:35:24 +0200 [thread overview]
Message-ID: <20231013163524.GA5294@noisy.programming.kicks-ass.net> (raw)
In-Reply-To: <20231012223428.GP6307@noisy.programming.kicks-ass.net>
On Fri, Oct 13, 2023 at 12:34:28AM +0200, Peter Zijlstra wrote:
> Right, so I do have this:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/commit/?h=sched/eevdf&id=344944e06f11da25b49328825ed15fedd63036d3
>
> That allows tasks to sleep away the lag -- with all the gnarly bits that
> sleep time has. And it reliably fixes the above. However, it also
> depresses a bunch of other stuff. Never a free lunch etc.
>
> It is so far the least horrible of the things I've tried.
So the below is one I conceptually like more -- except I hate the code,
nor does it work as well as the one linked above.
(Mike, this isn't the same one you saw before -- it's been 'improved')
---
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 29daece54a74..7f17295931de 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -895,6 +895,7 @@ struct task_struct {
unsigned sched_reset_on_fork:1;
unsigned sched_contributes_to_load:1;
unsigned sched_migrated:1;
+ unsigned sched_delayed:1;
/* Force alignment to the next boundary: */
unsigned :0;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7771a4d68280..38b2e0488a38 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3833,12 +3833,21 @@ static int ttwu_runnable(struct task_struct *p, int wake_flags)
rq = __task_rq_lock(p, &rf);
if (task_on_rq_queued(p)) {
+ update_rq_clock(rq);
+ if (unlikely(p->sched_delayed)) {
+ p->sched_delayed = 0;
+ /* mustn't run a delayed task */
+ WARN_ON_ONCE(task_on_cpu(rq, p));
+ dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK);
+ if (p->se.vlag > 0)
+ p->se.vlag = 0;
+ enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
+ }
if (!task_on_cpu(rq, p)) {
/*
* When on_rq && !on_cpu the task is preempted, see if
* it should preempt the task that is current now.
*/
- update_rq_clock(rq);
wakeup_preempt(rq, p, wake_flags);
}
ttwu_do_wakeup(p);
@@ -6520,6 +6529,16 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
# define SM_MASK_PREEMPT SM_PREEMPT
#endif
+static void __deschedule_task(struct rq *rq, struct task_struct *p)
+{
+ deactivate_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK);
+
+ if (p->in_iowait) {
+ atomic_inc(&rq->nr_iowait);
+ delayacct_blkio_start();
+ }
+}
+
/*
* __schedule() is the main scheduler function.
*
@@ -6604,6 +6623,8 @@ static void __sched notrace __schedule(unsigned int sched_mode)
switch_count = &prev->nivcsw;
+ WARN_ON_ONCE(prev->sched_delayed);
+
/*
* We must load prev->state once (task_struct::state is volatile), such
* that we form a control dependency vs deactivate_task() below.
@@ -6632,17 +6653,39 @@ static void __sched notrace __schedule(unsigned int sched_mode)
*
* After this, schedule() must not care about p->state any more.
*/
- deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK);
-
- if (prev->in_iowait) {
- atomic_inc(&rq->nr_iowait);
- delayacct_blkio_start();
- }
+ if (sched_feat(DELAY_DEQUEUE) &&
+ prev->sched_class->eligible_task &&
+ !prev->sched_class->eligible_task(rq, prev))
+ prev->sched_delayed = 1;
+ else
+ __deschedule_task(rq, prev);
}
switch_count = &prev->nvcsw;
}
- next = pick_next_task(rq, prev, &rf);
+ for (struct task_struct *tmp = prev;;) {
+
+ next = pick_next_task(rq, tmp, &rf);
+ if (unlikely(tmp != prev))
+ finish_task(tmp);
+
+ if (likely(!next->sched_delayed))
+ break;
+
+ next->sched_delayed = 0;
+
+ /* ttwu_runnable() */
+ if (WARN_ON_ONCE(!next->__state))
+ break;
+
+ prepare_task(next);
+ smp_wmb();
+ __deschedule_task(rq, next);
+ if (next->se.vlag > 0)
+ next->se.vlag = 0;
+ tmp = next;
+ }
+
clear_tsk_need_resched(prev);
clear_preempt_need_resched();
#ifdef CONFIG_SCHED_DEBUG
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b2210e7cc057..3084e21abfe7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8410,6 +8410,16 @@ static struct task_struct *__pick_next_task_fair(struct rq *rq)
return pick_next_task_fair(rq, NULL, NULL);
}
+static bool eligible_task_fair(struct rq *rq, struct task_struct *p)
+{
+ struct sched_entity *se = &p->se;
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+ update_curr(cfs_rq);
+
+ return entity_eligible(cfs_rq, se);
+}
+
/*
* Account for a descheduled task:
*/
@@ -13006,6 +13016,7 @@ DEFINE_SCHED_CLASS(fair) = {
.wakeup_preempt = check_preempt_wakeup_fair,
+ .eligible_task = eligible_task_fair,
.pick_next_task = __pick_next_task_fair,
.put_prev_task = put_prev_task_fair,
.set_next_task = set_next_task_fair,
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index a133b46efedd..0546905f1f8f 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -11,6 +11,7 @@ SCHED_FEAT(PREEMPT_SHORT, true)
SCHED_FEAT(PLACE_SLEEPER, false)
SCHED_FEAT(GENTLE_SLEEPER, true)
SCHED_FEAT(EVDF, false)
+SCHED_FEAT(DELAY_DEQUEUE, true)
/*
* Prefer to schedule the task we woke last (assuming it failed
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 245df0c6d344..35d297e1d91b 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2222,6 +2222,7 @@ struct sched_class {
void (*wakeup_preempt)(struct rq *rq, struct task_struct *p, int flags);
+ bool (*eligible_task)(struct rq *rq, struct task_struct *p);
struct task_struct *(*pick_next_task)(struct rq *rq);
void (*put_prev_task)(struct rq *rq, struct task_struct *p);
@@ -2275,7 +2276,7 @@ struct sched_class {
static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
{
- WARN_ON_ONCE(rq->curr != prev);
+// WARN_ON_ONCE(rq->curr != prev);
prev->sched_class->put_prev_task(rq, prev);
}
next prev parent reply other threads:[~2023-10-13 16:36 UTC|newest]
Thread overview: 104+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-05-31 11:58 [PATCH 00/15] sched: EEVDF and latency-nice and/or slice-attr Peter Zijlstra
2023-05-31 11:58 ` [PATCH 01/15] sched/fair: Add avg_vruntime Peter Zijlstra
2023-06-02 13:51 ` Vincent Guittot
2023-06-02 14:27 ` Peter Zijlstra
2023-06-05 7:18 ` Vincent Guittot
2023-08-10 7:10 ` [tip: sched/core] sched/fair: Add cfs_rq::avg_vruntime tip-bot2 for Peter Zijlstra
2023-10-11 4:15 ` [PATCH 01/15] sched/fair: Add avg_vruntime Abel Wu
2023-10-11 7:30 ` Peter Zijlstra
2023-10-11 8:30 ` Abel Wu
2023-10-11 9:45 ` Peter Zijlstra
2023-10-11 10:05 ` Peter Zijlstra
2023-10-11 13:08 ` Peter Zijlstra
2023-05-31 11:58 ` [PATCH 02/15] sched/fair: Remove START_DEBIT Peter Zijlstra
2023-08-10 7:10 ` [tip: sched/core] sched/fair: Remove sched_feat(START_DEBIT) tip-bot2 for Peter Zijlstra
2023-05-31 11:58 ` [PATCH 03/15] sched/fair: Add lag based placement Peter Zijlstra
2023-08-10 7:10 ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2023-10-11 12:00 ` [PATCH 03/15] " Abel Wu
2023-10-11 13:24 ` Peter Zijlstra
2023-10-12 7:04 ` Abel Wu
2023-10-13 7:37 ` Peter Zijlstra
2023-10-13 8:14 ` Abel Wu
2023-10-12 19:15 ` Benjamin Segall
2023-10-12 22:34 ` Peter Zijlstra
2023-10-13 16:35 ` Peter Zijlstra [this message]
2023-10-14 8:08 ` Mike Galbraith
2023-10-13 14:34 ` Peter Zijlstra
2023-05-31 11:58 ` [PATCH 04/15] rbtree: Add rb_add_augmented_cached() helper Peter Zijlstra
2023-08-10 7:10 ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2023-05-31 11:58 ` [PATCH 05/15] sched/fair: Implement an EEVDF like policy Peter Zijlstra
2023-08-10 7:10 ` [tip: sched/core] sched/fair: Implement an EEVDF-like scheduling policy tip-bot2 for Peter Zijlstra
2023-09-29 21:40 ` [PATCH 05/15] sched/fair: Implement an EEVDF like policy Benjamin Segall
2023-10-02 17:39 ` Peter Zijlstra
2023-10-11 4:14 ` Abel Wu
2023-10-11 7:33 ` Peter Zijlstra
2023-10-11 11:49 ` Abel Wu
2023-09-30 0:09 ` [PATCH] sched/fair: fix pick_eevdf to always find the correct se Benjamin Segall
2023-10-03 10:42 ` [tip: sched/urgent] sched/fair: Fix pick_eevdf() tip-bot2 for Benjamin Segall
[not found] ` <CGME20231004203940eucas1p2f73b017497d1f4239a6e236fdb6019e2@eucas1p2.samsung.com>
2023-10-04 20:39 ` [PATCH] sched/fair: fix pick_eevdf to always find the correct se Marek Szyprowski
2023-10-09 7:53 ` [tip: sched/urgent] sched/eevdf: Fix pick_eevdf() tip-bot2 for Benjamin Segall
2023-10-11 12:12 ` [PATCH] sched/fair: fix pick_eevdf to always find the correct se Abel Wu
2023-10-11 13:14 ` Peter Zijlstra
2023-10-12 10:04 ` Abel Wu
2023-10-11 21:01 ` Benjamin Segall
2023-10-12 10:25 ` Abel Wu
2023-10-12 17:51 ` Benjamin Segall
2023-10-13 3:46 ` Abel Wu
2023-10-13 16:51 ` Benjamin Segall
2023-05-31 11:58 ` [PATCH 06/15] sched: Commit to lag based placement Peter Zijlstra
2023-08-10 7:10 ` [tip: sched/core] sched/fair: " tip-bot2 for Peter Zijlstra
2023-05-31 11:58 ` [PATCH 07/15] sched/smp: Use lag to simplify cross-runqueue placement Peter Zijlstra
2023-08-10 7:10 ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2023-09-12 15:32 ` [PATCH 07/15] " Sebastian Andrzej Siewior
2023-09-13 9:03 ` Peter Zijlstra
2023-10-04 1:17 ` [PATCH] sched/fair: Preserve PLACE_DEADLINE_INITIAL deadline Daniel Jordan
2023-10-04 13:09 ` [PATCH v2] " Daniel Jordan
2023-10-04 15:46 ` Chen Yu
2023-10-06 16:31 ` Daniel Jordan
2023-10-12 4:48 ` K Prateek Nayak
2023-10-05 5:56 ` [PATCH] " K Prateek Nayak
2023-10-06 16:35 ` Daniel Jordan
2023-10-06 16:48 ` [PATCH] sched/fair: Always update_curr() before placing at enqueue Daniel Jordan
2023-10-06 19:58 ` Peter Zijlstra
2023-10-18 0:43 ` Daniel Jordan
2023-10-16 5:39 ` K Prateek Nayak
2023-05-31 11:58 ` [PATCH 08/15] sched: Commit to EEVDF Peter Zijlstra
2023-06-16 21:23 ` Joel Fernandes
2023-06-22 12:01 ` Ingo Molnar
2023-06-22 13:11 ` Joel Fernandes
2023-08-10 7:10 ` [tip: sched/core] sched/fair: " tip-bot2 for Peter Zijlstra
2023-05-31 11:58 ` [PATCH 09/15] sched/debug: Rename min_granularity to base_slice Peter Zijlstra
2023-08-10 7:10 ` [tip: sched/core] sched/debug: Rename sysctl_sched_min_granularity to sysctl_sched_base_slice tip-bot2 for Peter Zijlstra
2023-05-31 11:58 ` [PATCH 10/15] sched/fair: Propagate enqueue flags into place_entity() Peter Zijlstra
2023-08-10 7:10 ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2023-05-31 11:58 ` [PATCH 11/15] sched/eevdf: Better handle mixed slice length Peter Zijlstra
2023-06-02 13:45 ` Vincent Guittot
2023-06-02 15:06 ` Peter Zijlstra
2023-06-10 6:34 ` Chen Yu
2023-06-10 11:22 ` Peter Zijlstra
2023-05-31 11:58 ` [RFC][PATCH 12/15] sched: Introduce latency-nice as a per-task attribute Peter Zijlstra
2023-05-31 11:58 ` [RFC][PATCH 13/15] sched/fair: Implement latency-nice Peter Zijlstra
2023-06-06 14:54 ` Vincent Guittot
2023-06-08 10:34 ` Peter Zijlstra
2023-06-08 12:44 ` Peter Zijlstra
2023-10-11 23:24 ` Benjamin Segall
2023-05-31 11:58 ` [RFC][PATCH 14/15] sched/fair: Add sched group latency support Peter Zijlstra
2023-05-31 11:58 ` [RFC][PATCH 15/15] sched/eevdf: Use sched_attr::sched_runtime to set request/slice Peter Zijlstra
2023-06-01 13:55 ` Vincent Guittot
2023-06-08 11:52 ` Peter Zijlstra
2023-08-24 0:52 ` [PATCH 00/15] sched: EEVDF and latency-nice and/or slice-attr Daniel Jordan
2023-09-06 13:13 ` Peter Zijlstra
2023-09-29 16:54 ` Youssef Esmat
2023-10-02 15:55 ` Youssef Esmat
2023-10-02 18:41 ` Peter Zijlstra
2023-10-05 12:05 ` Peter Zijlstra
2023-10-05 14:14 ` Peter Zijlstra
2023-10-05 14:42 ` Peter Zijlstra
2023-10-05 18:23 ` Youssef Esmat
2023-10-06 0:36 ` Youssef Esmat
2023-10-10 8:08 ` Peter Zijlstra
2023-10-07 22:04 ` Peter Zijlstra
2023-10-09 14:41 ` Peter Zijlstra
2023-10-10 0:51 ` Youssef Esmat
2023-10-10 8:01 ` Peter Zijlstra
2023-10-16 16:50 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231013163524.GA5294@noisy.programming.kicks-ass.net \
--to=peterz@infradead.org \
--cc=bristot@redhat.com \
--cc=bsegall@google.com \
--cc=chris.hyser@oracle.com \
--cc=corbet@lwn.net \
--cc=dietmar.eggemann@arm.com \
--cc=efault@gmx.de \
--cc=joel@joelfernandes.org \
--cc=joshdon@google.com \
--cc=juri.lelli@redhat.com \
--cc=kprateek.nayak@amd.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mgorman@suse.de \
--cc=mingo@kernel.org \
--cc=patrick.bellasi@matbug.net \
--cc=pavel@ucw.cz \
--cc=pjt@google.com \
--cc=qperret@google.com \
--cc=qyousef@layalina.io \
--cc=rostedt@goodmis.org \
--cc=tglx@linutronix.de \
--cc=tim.c.chen@linux.intel.com \
--cc=timj@gnu.org \
--cc=vincent.guittot@linaro.org \
--cc=youssefesmat@chromium.org \
--cc=yu.c.chen@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).