All of lore.kernel.org
 help / color / mirror / Atom feed
From: Patrick Bellasi <patrick.bellasi@arm.com>
To: linux-kernel@vger.kernel.org, linux-pm@vger.kernel.org
Cc: Ingo Molnar <mingo@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>, Tejun Heo <tj@kernel.org>
Subject: [RFC v3 2/5] sched/core: track CPU's capacity_{min,max}
Date: Tue, 28 Feb 2017 14:38:39 +0000	[thread overview]
Message-ID: <1488292722-19410-3-git-send-email-patrick.bellasi@arm.com> (raw)
In-Reply-To: <1488292722-19410-1-git-send-email-patrick.bellasi@arm.com>

When CAPACITY_CLAMPING is enabled, each task is subject to a capacity
constraint which is defined by the capacity_{min,max} attributes of the
task group it belongs to.
At run-time, the capacity constraints of RUNNABLE tasks must be
aggregated to figure out the actual capacity constraints to enforce on
each CPU.

This aggregation must meet two main goals:
  1) ensure the minimum capacity required by the most boosted
     RUNNABLE task on that CPU
  2) do not penalize the less capped RUNNABLE tasks on that CPU

Thus, the aggregation for both the capacity constraints turns out to be
a MAX function on the min/max capacities of RUNNABLE tasks:

  cpu_capacity_min := MAX(capacity_min_i), for each RUNNABLE task_i
  cpu_capacity_max := MAX(capacity_max_i), for each RUNNABLE task_i

The aggregation at CPU level is done by exploiting the task_struct.
Tasks are already enqueued, via fields embedded in their task_struct, in
many different lists and trees. This patch uses the same approach to
keep track of the capacity constraints enforced by every task on a CPU.
To this purpose:
  - each CPU's RQ has two RBTrees, which are used to track the minimum and
    maximum capacity constraints of all the tasks enqueue on that CPU
  - task_struct has two rb_node which allows to position that task in the
    minimum/maximum capacity tracking RBTree of the CPU in which the
    task is enqueued

This patch provides the RBTree support code while, for the sake of
clarity, the synchronization between the
   fast path: {enqueue,dequeue}_task
and the
   slow path: cpu_capacity_{min,max}_write_u64
is provided in a dedicated patch.

Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: linux-kernel@vger.kernel.org
---
 include/linux/sched.h |   3 ++
 kernel/sched/core.c   | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/sched.h  |  23 +++++++++
 3 files changed, 155 insertions(+)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index e2ed46d..5838570 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1531,6 +1531,9 @@ struct task_struct {
 	struct sched_rt_entity rt;
 #ifdef CONFIG_CGROUP_SCHED
 	struct task_group *sched_task_group;
+#ifdef CONFIG_CAPACITY_CLAMPING
+	struct rb_node cap_clamp_node[2];
+#endif
 #endif
 	struct sched_dl_entity dl;
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a171d49..8f509be 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -752,11 +752,128 @@ static void set_load_weight(struct task_struct *p)
 	load->inv_weight = sched_prio_to_wmult[prio];
 }
 
+#ifdef CONFIG_CAPACITY_CLAMPING
+
+static inline void
+cap_clamp_insert_capacity(struct rq *rq, struct task_struct *p,
+			  unsigned int cap_idx)
+{
+	struct cap_clamp_cpu *cgc = &rq->cap_clamp_cpu[cap_idx];
+	struct task_group *tg = task_group(p);
+	struct rb_node *parent = NULL;
+	struct task_struct *entry;
+	struct rb_node **link;
+	struct rb_root *root;
+	struct rb_node *node;
+	int update_cache = 1;
+	u64 capacity_new;
+	u64 capacity_cur;
+
+	node = &p->cap_clamp_node[cap_idx];
+	if (!RB_EMPTY_NODE(node)) {
+		WARN(1, "cap_clamp_insert_capacity() on non empty node\n");
+		return;
+	}
+
+	/*
+	 * The capacity_{min,max} the task is subject to is defined by the
+	 * current TG the task belongs to. The TG's capacity constraints are
+	 * thus used to place the task within the rbtree used to track
+	 * the capacity_{min,max} for the CPU.
+	 */
+	capacity_new = tg->cap_clamp[cap_idx];
+	root = &cgc->tree;
+	link = &root->rb_node;
+	while (*link) {
+		parent = *link;
+		entry = rb_entry(parent, struct task_struct,
+				 cap_clamp_node[cap_idx]);
+		capacity_cur = task_group(entry)->cap_clamp[cap_idx];
+		if (capacity_new <= capacity_cur) {
+			link = &parent->rb_left;
+			update_cache = 0;
+		} else {
+			link = &parent->rb_right;
+		}
+	}
+
+	/* Add task's capacity_{min,max} and rebalance the rbtree */
+	rb_link_node(node, parent, link);
+	rb_insert_color(node, root);
+
+	if (!update_cache)
+		return;
+
+	/* Update CPU's capacity cache pointer */
+	cgc->value = capacity_new;
+	cgc->node = node;
+}
+
+static inline void
+cap_clamp_remove_capacity(struct rq *rq, struct task_struct *p,
+			  unsigned int cap_idx)
+{
+	struct cap_clamp_cpu *cgc = &rq->cap_clamp_cpu[cap_idx];
+	struct rb_node *node = &p->cap_clamp_node[cap_idx];
+	struct rb_root *root = &cgc->tree;
+
+	if (RB_EMPTY_NODE(node)) {
+		WARN(1, "cap_clamp_remove_capacity on empty node\n");
+		return;
+	}
+
+	/* Update CPU's capacity_{min,max} cache pointer */
+	if (node == cgc->node) {
+		struct rb_node *prev_node = rb_prev(node);
+
+		/* Reset value in case this was the last task */
+		cgc->value = (cap_idx == CAP_CLAMP_MIN)
+			? 0 : SCHED_CAPACITY_SCALE;
+
+		/* Update node and value, if there is another task */
+		cgc->node = prev_node;
+		if (cgc->node) {
+			struct task_struct *entry;
+
+			entry = rb_entry(cgc->node, struct task_struct,
+					 cap_clamp_node[cap_idx]);
+			cgc->value = task_group(entry)->cap_clamp[cap_idx];
+		}
+	}
+
+	/* Remove task's capacity_{min,max] */
+	rb_erase(node, root);
+	RB_CLEAR_NODE(node);
+}
+
+static inline void
+cap_clamp_enqueue_task(struct rq *rq, struct task_struct *p, int flags)
+{
+	/* Track task's min/max capacities */
+	cap_clamp_insert_capacity(rq, p, CAP_CLAMP_MIN);
+	cap_clamp_insert_capacity(rq, p, CAP_CLAMP_MAX);
+}
+
+static inline void
+cap_clamp_dequeue_task(struct rq *rq, struct task_struct *p, int flags)
+{
+	/* Track task's min/max capacities */
+	cap_clamp_remove_capacity(rq, p, CAP_CLAMP_MIN);
+	cap_clamp_remove_capacity(rq, p, CAP_CLAMP_MAX);
+}
+#else
+static inline void
+cap_clamp_enqueue_task(struct rq *rq, struct task_struct *p, int flags) { }
+static inline void
+cap_clamp_dequeue_task(struct rq *rq, struct task_struct *p, int flags) { }
+#endif /* CONFIG_CAPACITY_CLAMPING */
+
 static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
 {
 	update_rq_clock(rq);
 	if (!(flags & ENQUEUE_RESTORE))
 		sched_info_queued(rq, p);
+	cap_clamp_enqueue_task(rq, p, flags);
 	p->sched_class->enqueue_task(rq, p, flags);
 }
 
@@ -765,6 +882,7 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
 	update_rq_clock(rq);
 	if (!(flags & DEQUEUE_SAVE))
 		sched_info_dequeued(rq, p);
+	cap_clamp_dequeue_task(rq, p, flags);
 	p->sched_class->dequeue_task(rq, p, flags);
 }
 
@@ -2412,6 +2530,10 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 	plist_node_init(&p->pushable_tasks, MAX_PRIO);
 	RB_CLEAR_NODE(&p->pushable_dl_tasks);
 #endif
+#ifdef CONFIG_CAPACITY_CLAMPING
+	RB_CLEAR_NODE(&p->cap_clamp_node[CAP_CLAMP_MIN]);
+	RB_CLEAR_NODE(&p->cap_clamp_node[CAP_CLAMP_MAX]);
+#endif
 
 	put_cpu();
 	return 0;
@@ -6058,6 +6180,13 @@ void __init sched_init(void)
 		init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL);
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
+#ifdef CONFIG_CAPACITY_CLAMPING
+		rq->cap_clamp_cpu[CAP_CLAMP_MIN].tree = RB_ROOT;
+		rq->cap_clamp_cpu[CAP_CLAMP_MIN].node = NULL;
+		rq->cap_clamp_cpu[CAP_CLAMP_MAX].tree = RB_ROOT;
+		rq->cap_clamp_cpu[CAP_CLAMP_MAX].node = NULL;
+#endif
+
 		rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
 #ifdef CONFIG_RT_GROUP_SCHED
 		init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 05dae4a..4a7d224 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -461,6 +461,24 @@ struct cfs_rq {
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 };
 
+/* Capacity capping -related fields in a runqueue */
+struct cap_clamp_cpu {
+	/*
+	 * RBTree to keep sorted capacity constraints
+	 * of currently RUNNABLE tasks on a CPU.
+	 */
+	struct rb_root tree;
+
+	/*
+	 * Pointers to the RUNNABLE task defining the current
+	 * capacity constraint for a CPU.
+	 */
+	struct rb_node *node;
+
+	/* Current CPU's capacity constraint */
+	unsigned int value;
+};
+
 static inline int rt_bandwidth_enabled(void)
 {
 	return sysctl_sched_rt_runtime >= 0;
@@ -648,6 +666,11 @@ struct rq {
 	struct list_head *tmp_alone_branch;
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
+#ifdef CONFIG_CAPACITY_CLAMPING
+	/* Min and Max capacity constraints */
+	struct cap_clamp_cpu cap_clamp_cpu[2];
+#endif /* CONFIG_CAPACITY_CLAMPING */
+
 	/*
 	 * This is part of a global counter where only the total sum
 	 * over all CPUs matters. A task can increase this counter on
-- 
2.7.4

  parent reply	other threads:[~2017-02-28 14:50 UTC|newest]

Thread overview: 66+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-02-28 14:38 [RFC v3 0/5] Add capacity capping support to the CPU controller Patrick Bellasi
2017-02-28 14:38 ` [RFC v3 1/5] sched/core: add capacity constraints to " Patrick Bellasi
2017-03-13 10:46   ` Joel Fernandes (Google)
2017-03-15 11:20     ` Patrick Bellasi
2017-03-15 13:20       ` Joel Fernandes
2017-03-15 16:10         ` Paul E. McKenney
2017-03-15 16:44           ` Patrick Bellasi
2017-03-15 17:24             ` Paul E. McKenney
2017-03-15 17:57               ` Patrick Bellasi
2017-03-20 17:15   ` Tejun Heo
2017-03-20 17:36     ` Tejun Heo
2017-03-20 18:08     ` Patrick Bellasi
2017-03-23  0:28       ` Joel Fernandes (Google)
2017-03-23 10:32         ` Patrick Bellasi
2017-03-23 16:01           ` Tejun Heo
2017-03-23 18:15             ` Patrick Bellasi
2017-03-23 18:39               ` Tejun Heo
2017-03-24  6:37                 ` Joel Fernandes (Google)
2017-03-24 15:00                   ` Tejun Heo
2017-03-30 21:13                 ` Paul Turner
2017-03-24  7:02           ` Joel Fernandes (Google)
2017-03-30 21:15       ` Paul Turner
2017-04-01 16:25         ` Patrick Bellasi
2017-02-28 14:38 ` Patrick Bellasi [this message]
2017-02-28 14:38 ` [RFC v3 3/5] sched/core: sync capacity_{min,max} between slow and fast paths Patrick Bellasi
2017-02-28 14:38 ` [RFC v3 4/5] sched/{core,cpufreq_schedutil}: add capacity clamping for FAIR tasks Patrick Bellasi
2017-02-28 14:38 ` [RFC v3 5/5] sched/{core,cpufreq_schedutil}: add capacity clamping for RT/DL tasks Patrick Bellasi
2017-03-13 10:08   ` Joel Fernandes (Google)
2017-03-15 11:40     ` Patrick Bellasi
2017-03-15 12:59       ` Joel Fernandes
2017-03-15 14:44         ` Juri Lelli
2017-03-15 16:13           ` Joel Fernandes
2017-03-15 16:24             ` Juri Lelli
2017-03-15 23:40               ` Joel Fernandes
2017-03-16 11:16                 ` Juri Lelli
2017-03-16 12:27                   ` Patrick Bellasi
2017-03-16 12:44                     ` Juri Lelli
2017-03-16 16:58                       ` Joel Fernandes
2017-03-16 17:17                         ` Juri Lelli
2017-03-15 11:41 ` [RFC v3 0/5] Add capacity capping support to the CPU controller Rafael J. Wysocki
2017-03-15 12:59   ` Patrick Bellasi
2017-03-16  1:04     ` Rafael J. Wysocki
2017-03-16  3:15       ` Joel Fernandes
2017-03-20 22:51         ` Rafael J. Wysocki
2017-03-21 11:01           ` Patrick Bellasi
2017-03-24 23:52             ` Rafael J. Wysocki
2017-03-16 12:23       ` Patrick Bellasi
2017-03-20 14:51 ` Tejun Heo
2017-03-20 17:22   ` Patrick Bellasi
2017-04-10  7:36     ` Peter Zijlstra
2017-04-11 17:58       ` Patrick Bellasi
2017-04-12 12:10         ` Peter Zijlstra
2017-04-12 13:55           ` Patrick Bellasi
2017-04-12 15:37             ` Peter Zijlstra
2017-04-13 11:33               ` Patrick Bellasi
2017-04-12 12:15         ` Peter Zijlstra
2017-04-12 13:34           ` Patrick Bellasi
2017-04-12 14:41             ` Peter Zijlstra
2017-04-12 12:22         ` Peter Zijlstra
2017-04-12 13:24           ` Patrick Bellasi
2017-04-12 12:48         ` Peter Zijlstra
2017-04-12 13:27           ` Patrick Bellasi
2017-04-12 14:34             ` Peter Zijlstra
2017-04-12 14:43               ` Patrick Bellasi
2017-04-12 16:14                 ` Peter Zijlstra
2017-04-13 10:34                   ` Patrick Bellasi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1488292722-19410-3-git-send-email-patrick.bellasi@arm.com \
    --to=patrick.bellasi@arm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.