linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Vincent Guittot <vincent.guittot@linaro.org>
To: mingo@redhat.com, peterz@infradead.org, juri.lelli@redhat.com,
	dietmar.eggemann@arm.com, rostedt@goodmis.org,
	bsegall@google.com, mgorman@suse.de, bristot@redhat.com,
	vschneid@redhat.com, linux-kernel@vger.kernel.org,
	parth@linux.ibm.com
Cc: qais.yousef@arm.com, chris.hyser@oracle.com,
	valentin.schneider@arm.com, patrick.bellasi@matbug.net,
	David.Laight@aculab.com, pjt@google.com, pavel@ucw.cz,
	tj@kernel.org, qperret@google.com, tim.c.chen@linux.intel.com,
	joshdon@google.com, timj@gnu.org,
	Vincent Guittot <vincent.guittot@linaro.org>
Subject: [PATCH v5 7/7] sched/fair: Add latency list
Date: Sun, 25 Sep 2022 16:39:08 +0200	[thread overview]
Message-ID: <20220925143908.10846-8-vincent.guittot@linaro.org> (raw)
In-Reply-To: <20220925143908.10846-1-vincent.guittot@linaro.org>

Add a rb tree for latency sensitive entities so we can schedule the most
sensitive one first even when it failed to preempt current at wakeup or
when it got quickly preempted by another entity of higher priority.

In order to keep fairness, the latency is used once at wakeup to get a
minimum slice and not during the following scheduling slice to prevent
long running entity to got more running time than allocated to his nice
priority.

The rb tree nebales to cover the last corner case where latency
sensitive entity can't got schedule quickly after the wakeup.

hackbench -l 10000 -g $group &
cyclictest --policy other -D 5 -q -n
        latency 0           latency -20
group   min  avg    max     min  avg    max
0       17    19     29      17   18     30
1       65   306   7149      64   83    208
4       50   395  15731      56   80    271
8       56   781  41548      54   80    301
16      60  1392  87237      59   86    490

group = 0 means that hackbench is not running.

Both avg and max are significantly improved with nice latency -20. If we
add the histogram parameters to get details of latency, we have :

hackbench -l 10000 -g 16 &
cyclictest --policy other -D 5 -q -n  -H 20000 --histfile data.txt
              latency 0    latency -20
Min Latencies:    60           61
Avg Latencies:  1077           86
Max Latencies: 87311          444
50% latencies:    92           85
75% latencies:   554           90
85% latencies:  1019           93
90% latencies:  1346           96
95% latencies:  5400          100
99% latencies: 19044          110

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
---
 include/linux/sched.h |  2 +
 kernel/sched/fair.c   | 96 +++++++++++++++++++++++++++++++++++++++++--
 kernel/sched/sched.h  |  1 +
 3 files changed, 96 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a74cad08e91e..0b92674e3664 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -547,6 +547,8 @@ struct sched_entity {
 	/* For load-balancing: */
 	struct load_weight		load;
 	struct rb_node			run_node;
+	struct rb_node			latency_node;
+	unsigned int			on_latency;
 	struct list_head		group_node;
 	unsigned int			on_rq;
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e524e892d118..1a72f34136d8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -664,7 +664,77 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
 
 	return __node_2_se(last);
 }
+#endif
 
+/**************************************************************
+ * Scheduling class tree data structure manipulation methods:
+ * for latency
+ */
+
+static inline bool latency_before(struct sched_entity *a,
+				struct sched_entity *b)
+{
+	return (s64)(a->vruntime + a->latency_offset - b->vruntime - b->latency_offset) < 0;
+}
+
+#define __latency_node_2_se(node) \
+	rb_entry((node), struct sched_entity, latency_node)
+
+static inline bool __latency_less(struct rb_node *a, const struct rb_node *b)
+{
+	return latency_before(__latency_node_2_se(a), __latency_node_2_se(b));
+}
+
+/*
+ * Enqueue an entity into the latency rb-tree:
+ */
+static void __enqueue_latency(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+{
+
+	/* Only latency sensitive entity can be added to the list */
+	if (se->latency_offset >= 0)
+		return;
+
+	if (se->on_latency)
+		return;
+
+	/*
+	 * An execution time less than sysctl_sched_min_granularity means that
+	 * the entity has been preempted by a higher sched class or an entity
+	 * with higher latency constraint.
+	 * Put it back in the list so it gets a chance to run 1st during the
+	 * next slice.
+	 */
+	if (!(flags & ENQUEUE_WAKEUP)) {
+		u64 delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime;
+
+		if (delta_exec >= sysctl_sched_min_granularity)
+			return;
+	}
+
+	rb_add_cached(&se->latency_node, &cfs_rq->latency_timeline, __latency_less);
+	se->on_latency = 1;
+}
+
+static void __dequeue_latency(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	if (se->on_latency) {
+		rb_erase_cached(&se->latency_node, &cfs_rq->latency_timeline);
+		se->on_latency = 0;
+	}
+}
+
+static struct sched_entity *__pick_first_latency(struct cfs_rq *cfs_rq)
+{
+	struct rb_node *left = rb_first_cached(&cfs_rq->latency_timeline);
+
+	if (!left)
+		return NULL;
+
+	return __latency_node_2_se(left);
+}
+
+#ifdef CONFIG_SCHED_DEBUG
 /**************************************************************
  * Scheduling class statistics methods:
  */
@@ -4455,8 +4525,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	check_schedstat_required();
 	update_stats_enqueue_fair(cfs_rq, se, flags);
 	check_spread(cfs_rq, se);
-	if (!curr)
+	if (!curr) {
 		__enqueue_entity(cfs_rq, se);
+		__enqueue_latency(cfs_rq, se, flags);
+	}
 	se->on_rq = 1;
 
 	if (cfs_rq->nr_running == 1) {
@@ -4542,8 +4614,10 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
 	clear_buddies(cfs_rq, se);
 
-	if (se != cfs_rq->curr)
+	if (se != cfs_rq->curr) {
 		__dequeue_entity(cfs_rq, se);
+		__dequeue_latency(cfs_rq, se);
+	}
 	se->on_rq = 0;
 	account_entity_dequeue(cfs_rq, se);
 
@@ -4631,6 +4705,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		 */
 		update_stats_wait_end_fair(cfs_rq, se);
 		__dequeue_entity(cfs_rq, se);
+		__dequeue_latency(cfs_rq, se);
 		update_load_avg(cfs_rq, se, UPDATE_TG);
 	}
 
@@ -4669,7 +4744,7 @@ static struct sched_entity *
 pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 {
 	struct sched_entity *left = __pick_first_entity(cfs_rq);
-	struct sched_entity *se;
+	struct sched_entity *latency, *se;
 
 	/*
 	 * If curr is set we have to see if its left of the leftmost entity
@@ -4711,6 +4786,12 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 		se = cfs_rq->last;
 	}
 
+	/* Check for latency sensitive entity waiting for running */
+	latency = __pick_first_latency(cfs_rq);
+	if (latency && (latency != se) &&
+	    wakeup_preempt_entity(latency, se) < 1)
+		se = latency;
+
 	return se;
 }
 
@@ -4734,6 +4815,7 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
 		update_stats_wait_start_fair(cfs_rq, prev);
 		/* Put 'current' back into the tree. */
 		__enqueue_entity(cfs_rq, prev);
+		__enqueue_latency(cfs_rq, prev, 0);
 		/* in !on_rq case, update occurred at dequeue */
 		update_load_avg(cfs_rq, prev, 0);
 	}
@@ -11717,6 +11799,7 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first)
 void init_cfs_rq(struct cfs_rq *cfs_rq)
 {
 	cfs_rq->tasks_timeline = RB_ROOT_CACHED;
+	cfs_rq->latency_timeline = RB_ROOT_CACHED;
 	u64_u32_store(cfs_rq->min_vruntime, (u64)(-(1LL << 20)));
 #ifdef CONFIG_SMP
 	raw_spin_lock_init(&cfs_rq->removed.lock);
@@ -12025,8 +12108,15 @@ int sched_group_set_latency(struct task_group *tg, s64 latency)
 
 	for_each_possible_cpu(i) {
 		struct sched_entity *se = tg->se[i];
+		struct rq *rq = cpu_rq(i);
+		struct rq_flags rf;
+
+		rq_lock_irqsave(rq, &rf);
 
+		__dequeue_latency(se->cfs_rq, se);
 		WRITE_ONCE(se->latency_offset, latency);
+
+		rq_unlock_irqrestore(rq, &rf);
 	}
 
 	mutex_unlock(&shares_mutex);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index a15fb955092c..76bca172585c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -599,6 +599,7 @@ struct cfs_rq {
 #endif
 
 	struct rb_root_cached	tasks_timeline;
+	struct rb_root_cached	latency_timeline;
 
 	/*
 	 * 'curr' points to currently running entity on this cfs_rq.
-- 
2.17.1


  parent reply	other threads:[~2022-09-25 14:42 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-25 14:39 [PATCH v5 0/7] Add latency priority for CFS class Vincent Guittot
2022-09-25 14:39 ` [PATCH v5 1/7] sched: Introduce latency-nice as a per-task attribute Vincent Guittot
2022-09-25 14:39 ` [PATCH v5 2/7] sched/core: Propagate parent task's latency requirements to the child task Vincent Guittot
2022-09-25 14:39 ` [PATCH v5 3/7] sched: Allow sched_{get,set}attr to change latency_nice of the task Vincent Guittot
2022-10-12 15:07   ` K Prateek Nayak
2022-10-12 15:44     ` Vincent Guittot
2022-09-25 14:39 ` [PATCH v5 4/7] sched/fair: Take into account latency priority at wakeup Vincent Guittot
2022-10-22 15:08   ` Chen Yu
2022-10-24 22:36     ` Vincent Guittot
2022-09-25 14:39 ` [PATCH v5 5/7] sched/fair: Add sched group latency support Vincent Guittot
2022-10-12 14:22   ` Qais Yousef
2022-10-12 15:42     ` Vincent Guittot
2022-10-12 16:07       ` Qais Yousef
2022-09-25 14:39 ` [PATCH v5 6/7] sched/core: Support latency priority with sched core Vincent Guittot
2022-09-25 14:39 ` Vincent Guittot [this message]
2022-10-08  1:04   ` [PATCH v5 7/7] sched/fair: Add latency list Youssef Esmat
2022-10-08 21:14     ` David Laight
2022-10-08 21:59       ` Steven Rostedt
2022-10-11 17:10     ` Vincent Guittot
2022-10-11 23:54       ` Youssef Esmat
2022-10-12 15:21         ` Vincent Guittot
2022-10-13 17:19           ` Youssef Esmat
2022-10-14 15:22             ` Vincent Guittot
2022-10-19 16:53               ` Vincent Guittot
2022-10-20 15:20                 ` Vincent Guittot
2022-10-26 10:44           ` Dietmar Eggemann
2022-10-26 13:55             ` Vincent Guittot
2022-10-12 14:53 ` [PATCH v5 0/7] Add latency priority for CFS class K Prateek Nayak
2022-10-13 15:24   ` Vincent Guittot
2022-10-17  6:47     ` K Prateek Nayak
2022-10-25  6:36     ` K Prateek Nayak
2022-10-27 16:34       ` Vincent Guittot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220925143908.10846-8-vincent.guittot@linaro.org \
    --to=vincent.guittot@linaro.org \
    --cc=David.Laight@aculab.com \
    --cc=bristot@redhat.com \
    --cc=bsegall@google.com \
    --cc=chris.hyser@oracle.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=joshdon@google.com \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=parth@linux.ibm.com \
    --cc=patrick.bellasi@matbug.net \
    --cc=pavel@ucw.cz \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=qais.yousef@arm.com \
    --cc=qperret@google.com \
    --cc=rostedt@goodmis.org \
    --cc=tim.c.chen@linux.intel.com \
    --cc=timj@gnu.org \
    --cc=tj@kernel.org \
    --cc=valentin.schneider@arm.com \
    --cc=vschneid@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).