All of lore.kernel.org
 help / color / mirror / Atom feed
From: Vineeth Remanan Pillai <vpillai@digitalocean.com>
To: Nishanth Aravamudan <naravamudan@digitalocean.com>,
	Julien Desfossez <jdesfossez@digitalocean.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Tim Chen <tim.c.chen@linux.intel.com>,
	mingo@kernel.org, tglx@linutronix.de, pjt@google.com,
	torvalds@linux-foundation.org
Cc: linux-kernel@vger.kernel.org, Dario Faggioli <dfaggioli@suse.com>,
	fweisbec@gmail.com, keescook@chromium.org, kerrnel@google.com,
	Phil Auld <pauld@redhat.com>, Aaron Lu <aaron.lwe@gmail.com>,
	Aubrey Li <aubrey.intel@gmail.com>,
	Valentin Schneider <valentin.schneider@arm.com>,
	Mel Gorman <mgorman@techsingularity.net>,
	Pawan Gupta <pawan.kumar.gupta@linux.intel.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Vineeth Remanan Pillai <vpillai@digitalocean.com>
Subject: [RFC PATCH v4 11/19] sched: Basic tracking of matching tasks
Date: Wed, 30 Oct 2019 18:33:24 +0000	[thread overview]
Message-ID: <ca8ef62d1eadc51a8d1056db5e1c38ecac09379e.1572437285.git.vpillai@digitalocean.com> (raw)
In-Reply-To: <cover.1572437285.git.vpillai@digitalocean.com>
In-Reply-To: <cover.1572437285.git.vpillai@digitalocean.com>

From: Peter Zijlstra <peterz@infradead.org>

Introduce task_struct::core_cookie as an opaque identifier for core
scheduling. When enabled; core scheduling will only allow matching
task to be on the core; where idle matches everything.

When task_struct::core_cookie is set (and core scheduling is enabled)
these tasks are indexed in a second RB-tree, first on cookie value
then on scheduling function, such that matching task selection always
finds the most elegible match.

NOTE: *shudder* at the overhead...

NOTE: *sigh*, a 3rd copy of the scheduling function; the alternative
is per class tracking of cookies and that just duplicates a lot of
stuff for no raisin (the 2nd copy lives in the rt-mutex PI code).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineeth Remanan Pillai <vpillai@digitalocean.com>
Signed-off-by: Julien Desfossez <jdesfossez@digitalocean.com>
---
 include/linux/sched.h |   8 ++-
 kernel/sched/core.c   | 146 ++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/fair.c   |  46 -------------
 kernel/sched/sched.h  |  55 ++++++++++++++++
 4 files changed, 208 insertions(+), 47 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9f51932bd543..d319219ea58f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -688,10 +688,16 @@ struct task_struct {
 	const struct sched_class	*sched_class;
 	struct sched_entity		se;
 	struct sched_rt_entity		rt;
+	struct sched_dl_entity		dl;
+
+#ifdef CONFIG_SCHED_CORE
+	struct rb_node			core_node;
+	unsigned long			core_cookie;
+#endif
+
 #ifdef CONFIG_CGROUP_SCHED
 	struct task_group		*sched_task_group;
 #endif
-	struct sched_dl_entity		dl;
 
 #ifdef CONFIG_UCLAMP_TASK
 	/* Clamp values requested for a scheduling entity */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0427cf610d6a..44dadbe9a2d0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -76,6 +76,141 @@ int sysctl_sched_rt_runtime = 950000;
 
 DEFINE_STATIC_KEY_FALSE(__sched_core_enabled);
 
+/* kernel prio, less is more */
+static inline int __task_prio(struct task_struct *p)
+{
+	if (p->sched_class == &stop_sched_class) /* trumps deadline */
+		return -2;
+
+	if (rt_prio(p->prio)) /* includes deadline */
+		return p->prio; /* [-1, 99] */
+
+	if (p->sched_class == &idle_sched_class)
+		return MAX_RT_PRIO + NICE_WIDTH; /* 140 */
+
+	return MAX_RT_PRIO + MAX_NICE; /* 120, squash fair */
+}
+
+/*
+ * l(a,b)
+ * le(a,b) := !l(b,a)
+ * g(a,b)  := l(b,a)
+ * ge(a,b) := !l(a,b)
+ */
+
+/* real prio, less is less */
+static inline bool prio_less(struct task_struct *a, struct task_struct *b)
+{
+
+	int pa = __task_prio(a), pb = __task_prio(b);
+
+	if (-pa < -pb)
+		return true;
+
+	if (-pb < -pa)
+		return false;
+
+	if (pa == -1) /* dl_prio() doesn't work because of stop_class above */
+		return !dl_time_before(a->dl.deadline, b->dl.deadline);
+
+	if (pa == MAX_RT_PRIO + MAX_NICE)  { /* fair */
+		u64 vruntime = b->se.vruntime;
+
+		/*
+		 * Normalize the vruntime if tasks are in different cpus.
+		 */
+		if (task_cpu(a) != task_cpu(b)) {
+			vruntime -= task_cfs_rq(b)->min_vruntime;
+			vruntime += task_cfs_rq(a)->min_vruntime;
+		}
+
+		return !((s64)(a->se.vruntime - vruntime) <= 0);
+	}
+
+	return false;
+}
+
+static inline bool __sched_core_less(struct task_struct *a, struct task_struct *b)
+{
+	if (a->core_cookie < b->core_cookie)
+		return true;
+
+	if (a->core_cookie > b->core_cookie)
+		return false;
+
+	/* flip prio, so high prio is leftmost */
+	if (prio_less(b, a))
+		return true;
+
+	return false;
+}
+
+static void sched_core_enqueue(struct rq *rq, struct task_struct *p)
+{
+	struct rb_node *parent, **node;
+	struct task_struct *node_task;
+
+	rq->core->core_task_seq++;
+
+	if (!p->core_cookie)
+		return;
+
+	node = &rq->core_tree.rb_node;
+	parent = *node;
+
+	while (*node) {
+		node_task = container_of(*node, struct task_struct, core_node);
+		parent = *node;
+
+		if (__sched_core_less(p, node_task))
+			node = &parent->rb_left;
+		else
+			node = &parent->rb_right;
+	}
+
+	rb_link_node(&p->core_node, parent, node);
+	rb_insert_color(&p->core_node, &rq->core_tree);
+}
+
+static void sched_core_dequeue(struct rq *rq, struct task_struct *p)
+{
+	rq->core->core_task_seq++;
+
+	if (!p->core_cookie)
+		return;
+
+	rb_erase(&p->core_node, &rq->core_tree);
+}
+
+/*
+ * Find left-most (aka, highest priority) task matching @cookie.
+ */
+static struct task_struct *sched_core_find(struct rq *rq, unsigned long cookie)
+{
+	struct rb_node *node = rq->core_tree.rb_node;
+	struct task_struct *node_task, *match;
+
+	/*
+	 * The idle task always matches any cookie!
+	 */
+	match = idle_sched_class.pick_task(rq);
+
+	while (node) {
+		node_task = container_of(node, struct task_struct, core_node);
+
+		if (cookie < node_task->core_cookie) {
+			node = node->rb_left;
+		} else if (cookie > node_task->core_cookie) {
+			node = node->rb_right;
+		} else {
+			match = node_task;
+			node = node->rb_left;
+		}
+	}
+
+	return match;
+}
+
 /*
  * The static-key + stop-machine variable are needed such that:
  *
@@ -134,6 +269,11 @@ void sched_core_put(void)
 	mutex_unlock(&sched_core_mutex);
 }
 
+#else /* !CONFIG_SCHED_CORE */
+
+static inline void sched_core_enqueue(struct rq *rq, struct task_struct *p) { }
+static inline void sched_core_dequeue(struct rq *rq, struct task_struct *p) { }
+
 #endif /* CONFIG_SCHED_CORE */
 
 /*
@@ -1233,6 +1373,9 @@ static inline void init_uclamp(void) { }
 
 static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
 {
+	if (sched_core_enabled(rq))
+		sched_core_enqueue(rq, p);
+
 	if (!(flags & ENQUEUE_NOCLOCK))
 		update_rq_clock(rq);
 
@@ -1247,6 +1390,9 @@ static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
 
 static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
 {
+	if (sched_core_enabled(rq))
+		sched_core_dequeue(rq, p);
+
 	if (!(flags & DEQUEUE_NOCLOCK))
 		update_rq_clock(rq);
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index cc78b334e1c3..e3b7e8ba1420 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -248,33 +248,11 @@ const struct sched_class fair_sched_class;
  */
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-static inline struct task_struct *task_of(struct sched_entity *se)
-{
-	SCHED_WARN_ON(!entity_is_task(se));
-	return container_of(se, struct task_struct, se);
-}
 
 /* Walk up scheduling entities hierarchy */
 #define for_each_sched_entity(se) \
 		for (; se; se = se->parent)
 
-static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
-{
-	return p->se.cfs_rq;
-}
-
-/* runqueue on which this entity is (to be) queued */
-static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
-{
-	return se->cfs_rq;
-}
-
-/* runqueue "owned" by this group */
-static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
-{
-	return grp->my_q;
-}
-
 static inline void cfs_rq_tg_path(struct cfs_rq *cfs_rq, char *path, int len)
 {
 	if (!path)
@@ -435,33 +413,9 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse)
 
 #else	/* !CONFIG_FAIR_GROUP_SCHED */
 
-static inline struct task_struct *task_of(struct sched_entity *se)
-{
-	return container_of(se, struct task_struct, se);
-}
-
 #define for_each_sched_entity(se) \
 		for (; se; se = NULL)
 
-static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
-{
-	return &task_rq(p)->cfs;
-}
-
-static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
-{
-	struct task_struct *p = task_of(se);
-	struct rq *rq = task_rq(p);
-
-	return &rq->cfs;
-}
-
-/* runqueue "owned" by this group */
-static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
-{
-	return NULL;
-}
-
 static inline void cfs_rq_tg_path(struct cfs_rq *cfs_rq, char *path, int len)
 {
 	if (path)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 2ba68ab093ee..666b3bc2cfc2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -996,6 +996,10 @@ struct rq {
 	/* per rq */
 	struct rq		*core;
 	unsigned int		core_enabled;
+	struct rb_root		core_tree;
+
+	/* shared state */
+	unsigned int		core_task_seq;
 #endif
 };
 
@@ -1075,6 +1079,57 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
 #define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
 #define raw_rq()		raw_cpu_ptr(&runqueues)
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static inline struct task_struct *task_of(struct sched_entity *se)
+{
+	SCHED_WARN_ON(!entity_is_task(se));
+	return container_of(se, struct task_struct, se);
+}
+
+static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
+{
+	return p->se.cfs_rq;
+}
+
+/* runqueue on which this entity is (to be) queued */
+static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
+{
+	return se->cfs_rq;
+}
+
+/* runqueue "owned" by this group */
+static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
+{
+	return grp->my_q;
+}
+
+#else
+
+static inline struct task_struct *task_of(struct sched_entity *se)
+{
+	return container_of(se, struct task_struct, se);
+}
+
+static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
+{
+	return &task_rq(p)->cfs;
+}
+
+static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
+{
+	struct task_struct *p = task_of(se);
+	struct rq *rq = task_rq(p);
+
+	return &rq->cfs;
+}
+
+/* runqueue "owned" by this group */
+static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
+{
+	return NULL;
+}
+#endif
+
 extern void update_rq_clock(struct rq *rq);
 
 static inline u64 __rq_clock_broken(struct rq *rq)
-- 
2.17.1


  parent reply	other threads:[~2019-10-30 18:34 UTC|newest]

Thread overview: 84+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-30 18:33 [RFC PATCH v4 00/19] Core scheduling v4 Vineeth Remanan Pillai
2019-10-30 18:33 ` [RFC PATCH v4 01/19] stop_machine: Fix stop_cpus_in_progress ordering Vineeth Remanan Pillai
2019-10-30 18:33 ` [RFC PATCH v4 02/19] sched: Fix kerneldoc comment for ia64_set_curr_task Vineeth Remanan Pillai
2019-10-30 18:33 ` [RFC PATCH v4 03/19] sched: Wrap rq::lock access Vineeth Remanan Pillai
2019-10-30 18:33 ` [RFC PATCH v4 04/19] sched/{rt,deadline}: Fix set_next_task vs pick_next_task Vineeth Remanan Pillai
2019-10-30 18:33 ` [RFC PATCH v4 05/19] sched: Add task_struct pointer to sched_class::set_curr_task Vineeth Remanan Pillai
2019-10-30 18:33 ` [RFC PATCH v4 06/19] sched/fair: Export newidle_balance() Vineeth Remanan Pillai
2019-10-30 18:33 ` [RFC PATCH v4 07/19] sched: Allow put_prev_task() to drop rq->lock Vineeth Remanan Pillai
2019-10-30 18:33 ` [RFC PATCH v4 08/19] sched: Rework pick_next_task() slow-path Vineeth Remanan Pillai
2019-10-30 18:33 ` [RFC PATCH v4 09/19] sched: Introduce sched_class::pick_task() Vineeth Remanan Pillai
2019-10-30 18:33 ` [RFC PATCH v4 10/19] sched: Core-wide rq->lock Vineeth Remanan Pillai
2019-10-30 18:33 ` Vineeth Remanan Pillai [this message]
2019-10-30 18:33 ` [RFC PATCH v4 12/19] sched: A quick and dirty cgroup tagging interface Vineeth Remanan Pillai
2019-10-30 18:33 ` [RFC PATCH v4 13/19] sched: Add core wide task selection and scheduling Vineeth Remanan Pillai
2019-10-30 18:33 ` [RFC PATCH v4 14/19] sched/fair: Add a few assertions Vineeth Remanan Pillai
2019-10-30 18:33 ` [RFC PATCH v4 15/19] sched: Trivial forced-newidle balancer Vineeth Remanan Pillai
2019-10-30 18:33 ` [RFC PATCH v4 16/19] sched: Debug bits Vineeth Remanan Pillai
2019-10-30 18:33 ` [RFC PATCH v4 17/19] sched/fair: wrapper for cfs_rq->min_vruntime Vineeth Remanan Pillai
2019-10-30 18:33 ` [RFC PATCH v4 18/19] sched/fair: core wide vruntime comparison Vineeth Remanan Pillai
2019-10-30 18:33 ` [RFC PATCH v4 19/19] sched/fair : Wake up forced idle siblings if needed Vineeth Remanan Pillai
2019-10-31 11:42 ` [RFC PATCH v4 00/19] Core scheduling v4 Li, Aubrey
2019-11-01 11:33   ` Li, Aubrey
2019-11-08  3:20     ` Li, Aubrey
2019-10-31 18:42 ` Phil Auld
2019-11-01 14:03   ` Vineeth Remanan Pillai
2019-11-01 16:35     ` Greg Kerr
2019-11-01 18:07       ` Dario Faggioli
2019-11-12  1:45     ` Dario Faggioli
2019-11-13 17:16       ` Tim Chen
2020-01-02  2:28       ` Aubrey Li
2020-01-10 23:19         ` Tim Chen
2019-11-11 19:10 ` Tim Chen
2020-01-14  1:12   ` Tim Chen
2020-01-14 15:40     ` Vineeth Remanan Pillai
2020-01-15  3:43       ` Li, Aubrey
2020-01-15 19:33         ` Tim Chen
2020-01-16  1:45           ` Aubrey Li
2020-01-17 16:00             ` Vineeth Remanan Pillai
2020-01-22 18:04               ` Gruza, Agata
2020-01-28  2:40       ` Dario Faggioli
     [not found]         ` <CANaguZDDpzrzdTmvjXvCmV2c+wBt6mXWSz4Vn-LJ-onc_Oj=yw@mail.gmail.com>
2020-02-01 15:31           ` Dario Faggioli
2020-02-06  0:28       ` Tim Chen
2020-02-06 22:37         ` Julien Desfossez
2020-02-12 23:07         ` Julien Desfossez
2020-02-13 18:37           ` Tim Chen
2020-02-14  6:10             ` Aubrey Li
     [not found]               ` <CANaguZC40mDHfL1H_9AA7H8cyd028t9PQVRqQ3kB4ha8R7hhqg@mail.gmail.com>
2020-02-15  6:01                 ` Aubrey Li
     [not found]                   ` <CANaguZBj_x_2+9KwbHCQScsmraC_mHdQB6uRqMTYMmvhBYfv2Q@mail.gmail.com>
2020-02-21 23:20                     ` Julien Desfossez
2020-03-17  0:55                       ` Joel Fernandes
2020-03-17 19:07                         ` Tim Chen
2020-03-17 20:18                           ` Tim Chen
2020-03-18  1:10                             ` Joel Fernandes
2020-03-17 21:17                           ` Thomas Gleixner
2020-03-17 21:58                             ` Tim Chen
2020-03-18  1:03                             ` Joel Fernandes
2020-03-18  2:30                               ` Joel Fernandes
2020-03-18  0:52                           ` Joel Fernandes
2020-03-18 11:53                             ` Thomas Gleixner
2020-03-19  1:54                               ` Joel Fernandes
2020-02-25  3:44               ` Aaron Lu
2020-02-25  5:32                 ` Aubrey Li
2020-02-25  7:34                   ` Aaron Lu
2020-02-25 10:40                     ` Aubrey Li
2020-02-25 11:21                       ` Aaron Lu
2020-02-25 13:41                         ` Aubrey Li
     [not found]                 ` <CANaguZD205ccu1V_2W-QuMRrJA9SjJ5ng1do4NCdLy8NDKKrbA@mail.gmail.com>
2020-02-26  3:13                   ` Aaron Lu
2020-02-26  7:21                   ` Aubrey Li
     [not found]                     ` <CANaguZDQZg-Z6aNpeLcjQ-cGm3X8CQOkZ_hnJNUyqDRM=yVDFQ@mail.gmail.com>
2020-02-27  4:45                       ` Aubrey Li
2020-02-28 23:55                       ` Tim Chen
2020-03-03 14:59                         ` Li, Aubrey
2020-03-03 23:54                           ` Li, Aubrey
2020-03-05  4:33                             ` Aaron Lu
2020-03-05  6:10                               ` Li, Aubrey
2020-03-05  8:52                                 ` Aaron Lu
2020-02-27  2:04                   ` Aaron Lu
2020-02-27 14:10                     ` Phil Auld
2020-02-27 14:37                       ` Aubrey Li
2020-02-28  2:54                       ` Aaron Lu
2020-03-05 13:45                         ` Aubrey Li
2020-03-06  2:41                           ` Aaron Lu
2020-03-06 18:06                             ` Tim Chen
2020-03-06 18:33                               ` Phil Auld
2020-03-06 21:44                                 ` Tim Chen
2020-03-07  3:13                                   ` Aaron Lu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ca8ef62d1eadc51a8d1056db5e1c38ecac09379e.1572437285.git.vpillai@digitalocean.com \
    --to=vpillai@digitalocean.com \
    --cc=aaron.lwe@gmail.com \
    --cc=aubrey.intel@gmail.com \
    --cc=dfaggioli@suse.com \
    --cc=fweisbec@gmail.com \
    --cc=jdesfossez@digitalocean.com \
    --cc=keescook@chromium.org \
    --cc=kerrnel@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mgorman@techsingularity.net \
    --cc=mingo@kernel.org \
    --cc=naravamudan@digitalocean.com \
    --cc=pauld@redhat.com \
    --cc=pawan.kumar.gupta@linux.intel.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=tglx@linutronix.de \
    --cc=tim.c.chen@linux.intel.com \
    --cc=torvalds@linux-foundation.org \
    --cc=valentin.schneider@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.