All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/5] sched/deadline: Modify cpudl::free_cpus to reflect rd->online
@ 2015-01-19  4:49 Xunlei Pang
  2015-01-19  4:49 ` [PATCH 2/5] sched/deadline: Remove cpu_active_mask from cpudl_find() Xunlei Pang
                   ` (5 more replies)
  0 siblings, 6 replies; 24+ messages in thread
From: Xunlei Pang @ 2015-01-19  4:49 UTC (permalink / raw)
  To: linux-kernel; +Cc: Peter Zijlstra, Juri Lelli, Xunlei Pang

Currently, cpudl::free_cpus contains all cpus during init, see
cpudl_init(). When calling cpudl_find(), we have to add rd->span
to avoid selecting the cpu outside current root domain, because
cpus_allowed is undependable when performing clustered scheduling
using the cpuset, see find_later_rq().

This patch adds cpudl_set_freecpu() and cpudl_clear_freecpu() for
changing cpudl::free_cpus when doing rq_online_dl()/rq_offline_dl(),
so we can avoid the rd->span operation when calling cpudl_find()
in find_later_rq().

Signed-off-by: Xunlei Pang <pang.xunlei@linaro.org>
---
 kernel/sched/cpudeadline.c | 28 ++++++++++++++++++++++++----
 kernel/sched/cpudeadline.h |  2 ++
 kernel/sched/deadline.c    |  5 ++---
 3 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index 539ca3c..fd9d3fb 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -107,7 +107,9 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
 	int best_cpu = -1;
 	const struct sched_dl_entity *dl_se = &p->dl;
 
-	if (later_mask && cpumask_and(later_mask, later_mask, cp->free_cpus)) {
+	if (later_mask &&
+	    cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed) &&
+	    cpumask_and(later_mask, later_mask, cpu_active_mask)) {
 		best_cpu = cpumask_any(later_mask);
 		goto out;
 	} else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
@@ -186,6 +188,26 @@ out:
 }
 
 /*
+ * cpudl_set_freecpu - Set the cpudl.free_cpus
+ * @cp: the cpudl max-heap context
+ * @cpu: rd attached cpu
+ */
+void cpudl_set_freecpu(struct cpudl *cp, int cpu)
+{
+	cpumask_set_cpu(cpu, cp->free_cpus);
+}
+
+/*
+ * cpudl_clear_freecpu - Clear the cpudl.free_cpus
+ * @cp: the cpudl max-heap context
+ * @cpu: rd attached cpu
+ */
+void cpudl_clear_freecpu(struct cpudl *cp, int cpu)
+{
+	cpumask_clear_cpu(cpu, cp->free_cpus);
+}
+
+/*
  * cpudl_init - initialize the cpudl structure
  * @cp: the cpudl max-heap context
  */
@@ -203,7 +225,7 @@ int cpudl_init(struct cpudl *cp)
 	if (!cp->elements)
 		return -ENOMEM;
 
-	if (!alloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) {
+	if (!zalloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) {
 		kfree(cp->elements);
 		return -ENOMEM;
 	}
@@ -211,8 +233,6 @@ int cpudl_init(struct cpudl *cp)
 	for_each_possible_cpu(i)
 		cp->elements[i].idx = IDX_INVALID;
 
-	cpumask_setall(cp->free_cpus);
-
 	return 0;
 }
 
diff --git a/kernel/sched/cpudeadline.h b/kernel/sched/cpudeadline.h
index 020039b..1a0a6ef 100644
--- a/kernel/sched/cpudeadline.h
+++ b/kernel/sched/cpudeadline.h
@@ -24,6 +24,8 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
 	       struct cpumask *later_mask);
 void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid);
 int cpudl_init(struct cpudl *cp);
+void cpudl_set_freecpu(struct cpudl *cp, int cpu);
+void cpudl_clear_freecpu(struct cpudl *cp, int cpu);
 void cpudl_cleanup(struct cpudl *cp);
 #endif /* CONFIG_SMP */
 
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index b52092f..e7b2722 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1165,9 +1165,6 @@ static int find_later_rq(struct task_struct *task)
 	 * We have to consider system topology and task affinity
 	 * first, then we can look for a suitable cpu.
 	 */
-	cpumask_copy(later_mask, task_rq(task)->rd->span);
-	cpumask_and(later_mask, later_mask, cpu_active_mask);
-	cpumask_and(later_mask, later_mask, &task->cpus_allowed);
 	best_cpu = cpudl_find(&task_rq(task)->rd->cpudl,
 			task, later_mask);
 	if (best_cpu == -1)
@@ -1562,6 +1559,7 @@ static void rq_online_dl(struct rq *rq)
 	if (rq->dl.overloaded)
 		dl_set_overload(rq);
 
+	cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu);
 	if (rq->dl.dl_nr_running > 0)
 		cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1);
 }
@@ -1573,6 +1571,7 @@ static void rq_offline_dl(struct rq *rq)
 		dl_clear_overload(rq);
 
 	cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
+	cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
 }
 
 void init_sched_dl_class(void)
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 2/5] sched/deadline: Remove cpu_active_mask from cpudl_find()
  2015-01-19  4:49 [PATCH 1/5] sched/deadline: Modify cpudl::free_cpus to reflect rd->online Xunlei Pang
@ 2015-01-19  4:49 ` Xunlei Pang
  2015-01-27 15:04   ` Peter Zijlstra
  2015-02-04 14:36   ` [tip:sched/core] " tip-bot for Xunlei Pang
  2015-01-19  4:49 ` [PATCH 3/5] sched/deadline: Fix wrong cpudl_find() in check_preempt_equal_dl() Xunlei Pang
                   ` (4 subsequent siblings)
  5 siblings, 2 replies; 24+ messages in thread
From: Xunlei Pang @ 2015-01-19  4:49 UTC (permalink / raw)
  To: linux-kernel; +Cc: Peter Zijlstra, Juri Lelli, Xunlei Pang

cpu_active_mask is rarely changeable, so remove this operation
to gain a little performance.

If there is a change in cpu_active_mask, rq_online_dl() and
rq_offline_dl() should take care of it normally, so cpudl::
free_cpus carries enough information for us.

For the rare case(causing a task put onto a dying cpu) which
rq_offline_dl() can't handle timely, then it can be handled
through _cpu_down()->...->multi_cpu_stop()->migration_call()
->migrate_tasks(), preventing the task from hanging on the
dead cpu.

Signed-off-by: Xunlei Pang <pang.xunlei@linaro.org>
---
 kernel/sched/cpudeadline.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index fd9d3fb..c6acb07 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -108,8 +108,7 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
 	const struct sched_dl_entity *dl_se = &p->dl;
 
 	if (later_mask &&
-	    cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed) &&
-	    cpumask_and(later_mask, later_mask, cpu_active_mask)) {
+	    cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
 		best_cpu = cpumask_any(later_mask);
 		goto out;
 	} else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 3/5] sched/deadline: Fix wrong cpudl_find() in check_preempt_equal_dl()
  2015-01-19  4:49 [PATCH 1/5] sched/deadline: Modify cpudl::free_cpus to reflect rd->online Xunlei Pang
  2015-01-19  4:49 ` [PATCH 2/5] sched/deadline: Remove cpu_active_mask from cpudl_find() Xunlei Pang
@ 2015-01-19  4:49 ` Xunlei Pang
  2015-01-27 12:48   ` Peter Zijlstra
  2015-01-27 16:47   ` Peter Zijlstra
  2015-01-19  4:49 ` [PATCH 4/5] sched/rt: Consider deadline tasks in cpupri_find() Xunlei Pang
                   ` (3 subsequent siblings)
  5 siblings, 2 replies; 24+ messages in thread
From: Xunlei Pang @ 2015-01-19  4:49 UTC (permalink / raw)
  To: linux-kernel; +Cc: Peter Zijlstra, Juri Lelli, Xunlei Pang

In check_preempt_equal_dl(), cpudl_find() is called with a NULL
later_mask, thus cpudl_find() here doesn't check cpudl::free_cpus
at all.

This patch takles this issue by always passing a non-NULL later_mask
to cpudl_find(), thereby fixing this issue.

Signed-off-by: Xunlei Pang <pang.xunlei@linaro.org>
---
 kernel/sched/cpudeadline.c |  8 +++-----
 kernel/sched/deadline.c    | 15 +++++++++++----
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index c6acb07..f331fcf 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -97,7 +97,7 @@ static inline int cpudl_maximum(struct cpudl *cp)
  * cpudl_find - find the best (later-dl) CPU in the system
  * @cp: the cpudl max-heap context
  * @p: the task
- * @later_mask: a mask to fill in with the selected CPUs (or NULL)
+ * @later_mask: a mask to fill in with the selected CPUs (not NULL)
  *
  * Returns: int - best CPU (heap maximum if suitable)
  */
@@ -107,15 +107,13 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
 	int best_cpu = -1;
 	const struct sched_dl_entity *dl_se = &p->dl;
 
-	if (later_mask &&
-	    cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
+	if (cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
 		best_cpu = cpumask_any(later_mask);
 		goto out;
 	} else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
 			dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
 		best_cpu = cpudl_maximum(cp);
-		if (later_mask)
-			cpumask_set_cpu(best_cpu, later_mask);
+		cpumask_set_cpu(best_cpu, later_mask);
 	}
 
 out:
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index e7b2722..82d900f 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -943,14 +943,23 @@ out:
 	return cpu;
 }
 
+static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
+
 static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
 {
+	struct cpumask *later_mask =
+			this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
+
+	/* Make sure the mask is initialized first */
+	if (unlikely(!later_mask))
+		return;
+
 	/*
 	 * Current can't be migrated, useless to reschedule,
 	 * let's hope p can move out.
 	 */
 	if (rq->curr->nr_cpus_allowed == 1 ||
-	    cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
+	    cpudl_find(&rq->rd->cpudl, rq->curr, later_mask) == -1)
 		return;
 
 	/*
@@ -958,7 +967,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
 	 * see if it is pushed or pulled somewhere else.
 	 */
 	if (p->nr_cpus_allowed != 1 &&
-	    cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
+	    cpudl_find(&rq->rd->cpudl, p, later_mask) != -1)
 		return;
 
 	resched_curr(rq);
@@ -1145,8 +1154,6 @@ next_node:
 	return NULL;
 }
 
-static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
-
 static int find_later_rq(struct task_struct *task)
 {
 	struct sched_domain *sd;
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 4/5] sched/rt: Consider deadline tasks in cpupri_find()
  2015-01-19  4:49 [PATCH 1/5] sched/deadline: Modify cpudl::free_cpus to reflect rd->online Xunlei Pang
  2015-01-19  4:49 ` [PATCH 2/5] sched/deadline: Remove cpu_active_mask from cpudl_find() Xunlei Pang
  2015-01-19  4:49 ` [PATCH 3/5] sched/deadline: Fix wrong cpudl_find() in check_preempt_equal_dl() Xunlei Pang
@ 2015-01-19  4:49 ` Xunlei Pang
  2015-01-27 12:58   ` Peter Zijlstra
  2015-01-19  4:49 ` [PATCH 5/5] sched/rt: Optimize find_lowest_rq() to select a cache hot cpu Xunlei Pang
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 24+ messages in thread
From: Xunlei Pang @ 2015-01-19  4:49 UTC (permalink / raw)
  To: linux-kernel; +Cc: Peter Zijlstra, Juri Lelli, Xunlei Pang

Currently, RT global scheduling doesn't factor deadline
tasks, this may cause some problems.

See a case below:
On a 3 CPU system, CPU0 has one running deadline task,
CPU1 has one running low priority RT task or idle, CPU3
has one running high priority RT task. When another mid
priority RT task is woken on CPU3, it will be pushed to
CPU0(this also disturbs the deadline task on CPU0), while
it is reasonable to put it on CPU1.

This patch eliminates this issue by filtering CPUs that
have runnable deadline tasks, using cpudl->free_cpus in
cpupri_find().

NOTE: We want to make the most use of percpu local_cpu_mask
to save an extra mask allocation, so always passing a non-NULL
lowest_mask to cpupri_find().

Signed-off-by: Xunlei Pang <pang.xunlei@linaro.org>
---
 kernel/sched/core.c   |  2 ++
 kernel/sched/cpupri.c | 22 +++++-----------------
 kernel/sched/cpupri.h |  1 +
 kernel/sched/rt.c     |  9 +++++----
 4 files changed, 13 insertions(+), 21 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ade2958..48c9576 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5652,6 +5652,8 @@ static int init_rootdomain(struct root_domain *rd)
 
 	if (cpupri_init(&rd->cpupri) != 0)
 		goto free_rto_mask;
+
+	rd->cpupri.cpudl = &rd->cpudl;
 	return 0;
 
 free_rto_mask:
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 981fcd7..40b8e81 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -32,6 +32,7 @@
 #include <linux/sched/rt.h>
 #include <linux/slab.h>
 #include "cpupri.h"
+#include "cpudeadline.h"
 
 /* Convert between a 140 based task->prio, and our 102 based cpupri */
 static int convert_prio(int prio)
@@ -54,7 +55,7 @@ static int convert_prio(int prio)
  * cpupri_find - find the best (lowest-pri) CPU in the system
  * @cp: The cpupri context
  * @p: The task
- * @lowest_mask: A mask to fill in with selected CPUs (or NULL)
+ * @lowest_mask: A mask to fill in with selected CPUs (not NULL)
  *
  * Note: This function returns the recommended CPUs as calculated during the
  * current invocation.  By the time the call returns, the CPUs may have in
@@ -103,24 +104,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
 		if (skip)
 			continue;
 
-		if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
+		cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
+		cpumask_and(lowest_mask, lowest_mask, cp->cpudl->free_cpus);
+		if (cpumask_any(lowest_mask) >= nr_cpu_ids)
 			continue;
 
-		if (lowest_mask) {
-			cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
-
-			/*
-			 * We have to ensure that we have at least one bit
-			 * still set in the array, since the map could have
-			 * been concurrently emptied between the first and
-			 * second reads of vec->mask.  If we hit this
-			 * condition, simply act as though we never hit this
-			 * priority level and continue on.
-			 */
-			if (cpumask_any(lowest_mask) >= nr_cpu_ids)
-				continue;
-		}
-
 		return 1;
 	}
 
diff --git a/kernel/sched/cpupri.h b/kernel/sched/cpupri.h
index 63cbb9c..acd7ccf 100644
--- a/kernel/sched/cpupri.h
+++ b/kernel/sched/cpupri.h
@@ -18,6 +18,7 @@ struct cpupri_vec {
 struct cpupri {
 	struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES];
 	int *cpu_to_pri;
+	struct cpudl *cpudl;
 };
 
 #ifdef CONFIG_SMP
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 6725e3c..d28cfa4 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1349,14 +1349,17 @@ out:
 	return cpu;
 }
 
+static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
 static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 {
+	struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
+
 	/*
 	 * Current can't be migrated, useless to reschedule,
 	 * let's hope p can move out.
 	 */
 	if (rq->curr->nr_cpus_allowed == 1 ||
-	    !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
+	    !cpupri_find(&rq->rd->cpupri, rq->curr, lowest_mask))
 		return;
 
 	/*
@@ -1364,7 +1367,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 	 * see if it is pushed or pulled somewhere else.
 	 */
 	if (p->nr_cpus_allowed != 1
-	    && cpupri_find(&rq->rd->cpupri, p, NULL))
+	    && cpupri_find(&rq->rd->cpupri, p, lowest_mask))
 		return;
 
 	/*
@@ -1526,8 +1529,6 @@ static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
 	return NULL;
 }
 
-static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
-
 static int find_lowest_rq(struct task_struct *task)
 {
 	struct sched_domain *sd;
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 5/5] sched/rt: Optimize find_lowest_rq() to select a cache hot cpu
  2015-01-19  4:49 [PATCH 1/5] sched/deadline: Modify cpudl::free_cpus to reflect rd->online Xunlei Pang
                   ` (2 preceding siblings ...)
  2015-01-19  4:49 ` [PATCH 4/5] sched/rt: Consider deadline tasks in cpupri_find() Xunlei Pang
@ 2015-01-19  4:49 ` Xunlei Pang
  2015-01-27 14:21   ` Peter Zijlstra
  2015-01-23 18:09 ` [PATCH 1/5] sched/deadline: Modify cpudl::free_cpus to reflect rd->online Xunlei Pang
  2015-02-01 17:53 ` [tip:sched/core] sched/deadline: Modify cpudl:: free_cpus " tip-bot for Xunlei Pang
  5 siblings, 1 reply; 24+ messages in thread
From: Xunlei Pang @ 2015-01-19  4:49 UTC (permalink / raw)
  To: linux-kernel; +Cc: Peter Zijlstra, Juri Lelli, Xunlei Pang

In find_lowest_rq(), if we can't find a wake_affine cpu from
sched_domain, then we can actually determine a cache hot cpu
instead of simply calling "cpumask_any(lowest_mask)" which
always returns the first cpu in the mask.

So, we can determine the cache hot cpu during the interation of
sched_domain() in passing.

Signed-off-by: Xunlei Pang <pang.xunlei@linaro.org>
---
 kernel/sched/rt.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index d28cfa4..e6a42e6 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1535,6 +1535,7 @@ static int find_lowest_rq(struct task_struct *task)
 	struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
 	int this_cpu = smp_processor_id();
 	int cpu      = task_cpu(task);
+	int cachehot_cpu = nr_cpu_ids;
 
 	/* Make sure the mask is initialized first */
 	if (unlikely(!lowest_mask))
@@ -1566,8 +1567,12 @@ static int find_lowest_rq(struct task_struct *task)
 
 	rcu_read_lock();
 	for_each_domain(cpu, sd) {
+		if (cachehot_cpu >= nr_cpu_ids)
+			cachehot_cpu = cpumask_first_and(lowest_mask,
+						   sched_domain_span(sd));
+
 		if (sd->flags & SD_WAKE_AFFINE) {
-			int best_cpu;
+			int wakeaffine_cpu;
 
 			/*
 			 * "this_cpu" is cheaper to preempt than a
@@ -1579,16 +1584,20 @@ static int find_lowest_rq(struct task_struct *task)
 				return this_cpu;
 			}
 
-			best_cpu = cpumask_first_and(lowest_mask,
+			wakeaffine_cpu = cpumask_first_and(lowest_mask,
 						     sched_domain_span(sd));
-			if (best_cpu < nr_cpu_ids) {
+			if (wakeaffine_cpu < nr_cpu_ids) {
 				rcu_read_unlock();
-				return best_cpu;
+				return wakeaffine_cpu;
 			}
 		}
 	}
 	rcu_read_unlock();
 
+	/* most likely cache-hot */
+	if (cachehot_cpu < nr_cpu_ids)
+		return cachehot_cpu;
+
 	/*
 	 * And finally, if there were no matches within the domains
 	 * just give the caller *something* to work with from the compatible
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/5] sched/deadline: Modify cpudl::free_cpus to reflect rd->online
  2015-01-19  4:49 [PATCH 1/5] sched/deadline: Modify cpudl::free_cpus to reflect rd->online Xunlei Pang
                   ` (3 preceding siblings ...)
  2015-01-19  4:49 ` [PATCH 5/5] sched/rt: Optimize find_lowest_rq() to select a cache hot cpu Xunlei Pang
@ 2015-01-23 18:09 ` Xunlei Pang
  2015-02-01 17:53 ` [tip:sched/core] sched/deadline: Modify cpudl:: free_cpus " tip-bot for Xunlei Pang
  5 siblings, 0 replies; 24+ messages in thread
From: Xunlei Pang @ 2015-01-23 18:09 UTC (permalink / raw)
  To: lkml; +Cc: Peter Zijlstra, Juri Lelli, Xunlei Pang

Hi Peter, Juri,

Could you please give some comments on these 5 patches?
Thanks for your time.

Regards,
Xunlei

On 19 January 2015 at 12:49, Xunlei Pang <pang.xunlei@linaro.org> wrote:
> Currently, cpudl::free_cpus contains all cpus during init, see
> cpudl_init(). When calling cpudl_find(), we have to add rd->span
> to avoid selecting the cpu outside current root domain, because
> cpus_allowed is undependable when performing clustered scheduling
> using the cpuset, see find_later_rq().
>
> This patch adds cpudl_set_freecpu() and cpudl_clear_freecpu() for
> changing cpudl::free_cpus when doing rq_online_dl()/rq_offline_dl(),
> so we can avoid the rd->span operation when calling cpudl_find()
> in find_later_rq().
>
> Signed-off-by: Xunlei Pang <pang.xunlei@linaro.org>
> ---
>  kernel/sched/cpudeadline.c | 28 ++++++++++++++++++++++++----
>  kernel/sched/cpudeadline.h |  2 ++
>  kernel/sched/deadline.c    |  5 ++---
>  3 files changed, 28 insertions(+), 7 deletions(-)
>
> diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
> index 539ca3c..fd9d3fb 100644
> --- a/kernel/sched/cpudeadline.c
> +++ b/kernel/sched/cpudeadline.c
> @@ -107,7 +107,9 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
>         int best_cpu = -1;
>         const struct sched_dl_entity *dl_se = &p->dl;
>
> -       if (later_mask && cpumask_and(later_mask, later_mask, cp->free_cpus)) {
> +       if (later_mask &&
> +           cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed) &&
> +           cpumask_and(later_mask, later_mask, cpu_active_mask)) {
>                 best_cpu = cpumask_any(later_mask);
>                 goto out;
>         } else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
> @@ -186,6 +188,26 @@ out:
>  }
>
>  /*
> + * cpudl_set_freecpu - Set the cpudl.free_cpus
> + * @cp: the cpudl max-heap context
> + * @cpu: rd attached cpu
> + */
> +void cpudl_set_freecpu(struct cpudl *cp, int cpu)
> +{
> +       cpumask_set_cpu(cpu, cp->free_cpus);
> +}
> +
> +/*
> + * cpudl_clear_freecpu - Clear the cpudl.free_cpus
> + * @cp: the cpudl max-heap context
> + * @cpu: rd attached cpu
> + */
> +void cpudl_clear_freecpu(struct cpudl *cp, int cpu)
> +{
> +       cpumask_clear_cpu(cpu, cp->free_cpus);
> +}
> +
> +/*
>   * cpudl_init - initialize the cpudl structure
>   * @cp: the cpudl max-heap context
>   */
> @@ -203,7 +225,7 @@ int cpudl_init(struct cpudl *cp)
>         if (!cp->elements)
>                 return -ENOMEM;
>
> -       if (!alloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) {
> +       if (!zalloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) {
>                 kfree(cp->elements);
>                 return -ENOMEM;
>         }
> @@ -211,8 +233,6 @@ int cpudl_init(struct cpudl *cp)
>         for_each_possible_cpu(i)
>                 cp->elements[i].idx = IDX_INVALID;
>
> -       cpumask_setall(cp->free_cpus);
> -
>         return 0;
>  }
>
> diff --git a/kernel/sched/cpudeadline.h b/kernel/sched/cpudeadline.h
> index 020039b..1a0a6ef 100644
> --- a/kernel/sched/cpudeadline.h
> +++ b/kernel/sched/cpudeadline.h
> @@ -24,6 +24,8 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
>                struct cpumask *later_mask);
>  void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid);
>  int cpudl_init(struct cpudl *cp);
> +void cpudl_set_freecpu(struct cpudl *cp, int cpu);
> +void cpudl_clear_freecpu(struct cpudl *cp, int cpu);
>  void cpudl_cleanup(struct cpudl *cp);
>  #endif /* CONFIG_SMP */
>
> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index b52092f..e7b2722 100644
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -1165,9 +1165,6 @@ static int find_later_rq(struct task_struct *task)
>          * We have to consider system topology and task affinity
>          * first, then we can look for a suitable cpu.
>          */
> -       cpumask_copy(later_mask, task_rq(task)->rd->span);
> -       cpumask_and(later_mask, later_mask, cpu_active_mask);
> -       cpumask_and(later_mask, later_mask, &task->cpus_allowed);
>         best_cpu = cpudl_find(&task_rq(task)->rd->cpudl,
>                         task, later_mask);
>         if (best_cpu == -1)
> @@ -1562,6 +1559,7 @@ static void rq_online_dl(struct rq *rq)
>         if (rq->dl.overloaded)
>                 dl_set_overload(rq);
>
> +       cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu);
>         if (rq->dl.dl_nr_running > 0)
>                 cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1);
>  }
> @@ -1573,6 +1571,7 @@ static void rq_offline_dl(struct rq *rq)
>                 dl_clear_overload(rq);
>
>         cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
> +       cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
>  }
>
>  void init_sched_dl_class(void)
> --
> 1.9.1
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/5] sched/deadline: Fix wrong cpudl_find() in check_preempt_equal_dl()
  2015-01-19  4:49 ` [PATCH 3/5] sched/deadline: Fix wrong cpudl_find() in check_preempt_equal_dl() Xunlei Pang
@ 2015-01-27 12:48   ` Peter Zijlstra
  2015-01-27 14:15     ` Peter Zijlstra
  2015-01-27 16:47   ` Peter Zijlstra
  1 sibling, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2015-01-27 12:48 UTC (permalink / raw)
  To: Xunlei Pang; +Cc: linux-kernel, Juri Lelli

On Mon, Jan 19, 2015 at 04:49:38AM +0000, Xunlei Pang wrote:
> +++ b/kernel/sched/deadline.c
> @@ -943,14 +943,23 @@ out:
>  	return cpu;
>  }
>  
> +static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
> +
>  static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
>  {
> +	struct cpumask *later_mask =
> +			this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
> +
> +	/* Make sure the mask is initialized first */
> +	if (unlikely(!later_mask))
> +		return;
> +
>  	/*
>  	 * Current can't be migrated, useless to reschedule,
>  	 * let's hope p can move out.
>  	 */
>  	if (rq->curr->nr_cpus_allowed == 1 ||
> -	    cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
> +	    cpudl_find(&rq->rd->cpudl, rq->curr, later_mask) == -1)
>  		return;
>  
>  	/*

But but, should you not put something in that mask before you use it?

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 4/5] sched/rt: Consider deadline tasks in cpupri_find()
  2015-01-19  4:49 ` [PATCH 4/5] sched/rt: Consider deadline tasks in cpupri_find() Xunlei Pang
@ 2015-01-27 12:58   ` Peter Zijlstra
  2015-01-27 14:18     ` Peter Zijlstra
  2015-01-27 23:04     ` Steven Rostedt
  0 siblings, 2 replies; 24+ messages in thread
From: Peter Zijlstra @ 2015-01-27 12:58 UTC (permalink / raw)
  To: Xunlei Pang; +Cc: linux-kernel, Juri Lelli, Steven Rostedt

On Mon, Jan 19, 2015 at 04:49:39AM +0000, Xunlei Pang wrote:
> Currently, RT global scheduling doesn't factor deadline
> tasks, this may cause some problems.
> 
> See a case below:
> On a 3 CPU system, CPU0 has one running deadline task,
> CPU1 has one running low priority RT task or idle, CPU3
> has one running high priority RT task. When another mid
> priority RT task is woken on CPU3, it will be pushed to
> CPU0(this also disturbs the deadline task on CPU0), while
> it is reasonable to put it on CPU1.
> 
> This patch eliminates this issue by filtering CPUs that
> have runnable deadline tasks, using cpudl->free_cpus in
> cpupri_find().

Not a bad idea, Cc'ed Steve who likes to look after the RT bits,
excessive quoting for him.

> NOTE: We want to make the most use of percpu local_cpu_mask
> to save an extra mask allocation, so always passing a non-NULL
> lowest_mask to cpupri_find().
> 
> Signed-off-by: Xunlei Pang <pang.xunlei@linaro.org>
> ---
>  kernel/sched/core.c   |  2 ++
>  kernel/sched/cpupri.c | 22 +++++-----------------
>  kernel/sched/cpupri.h |  1 +
>  kernel/sched/rt.c     |  9 +++++----
>  4 files changed, 13 insertions(+), 21 deletions(-)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index ade2958..48c9576 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -5652,6 +5652,8 @@ static int init_rootdomain(struct root_domain *rd)
>  
>  	if (cpupri_init(&rd->cpupri) != 0)
>  		goto free_rto_mask;
> +
> +	rd->cpupri.cpudl = &rd->cpudl;

This is disgusting though; it breaks the cpuri abstraction. Why not pass
in the mask in the one place you actually need it?

>  	return 0;
>  
>  free_rto_mask:
> diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
> index 981fcd7..40b8e81 100644
> --- a/kernel/sched/cpupri.c
> +++ b/kernel/sched/cpupri.c
> @@ -32,6 +32,7 @@
>  #include <linux/sched/rt.h>
>  #include <linux/slab.h>
>  #include "cpupri.h"
> +#include "cpudeadline.h"
>  
>  /* Convert between a 140 based task->prio, and our 102 based cpupri */
>  static int convert_prio(int prio)
> @@ -54,7 +55,7 @@ static int convert_prio(int prio)
>   * cpupri_find - find the best (lowest-pri) CPU in the system
>   * @cp: The cpupri context
>   * @p: The task
> - * @lowest_mask: A mask to fill in with selected CPUs (or NULL)
> + * @lowest_mask: A mask to fill in with selected CPUs (not NULL)
>   *
>   * Note: This function returns the recommended CPUs as calculated during the
>   * current invocation.  By the time the call returns, the CPUs may have in
> @@ -103,24 +104,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
>  		if (skip)
>  			continue;
>  
> -		if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
> +		cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
> +		cpumask_and(lowest_mask, lowest_mask, cp->cpudl->free_cpus);
> +		if (cpumask_any(lowest_mask) >= nr_cpu_ids)
>  			continue;
>  
> -		if (lowest_mask) {
> -			cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
> -
> -			/*
> -			 * We have to ensure that we have at least one bit
> -			 * still set in the array, since the map could have
> -			 * been concurrently emptied between the first and
> -			 * second reads of vec->mask.  If we hit this
> -			 * condition, simply act as though we never hit this
> -			 * priority level and continue on.
> -			 */
> -			if (cpumask_any(lowest_mask) >= nr_cpu_ids)
> -				continue;
> -		}
> -
>  		return 1;
>  	}
>  
> diff --git a/kernel/sched/cpupri.h b/kernel/sched/cpupri.h
> index 63cbb9c..acd7ccf 100644
> --- a/kernel/sched/cpupri.h
> +++ b/kernel/sched/cpupri.h
> @@ -18,6 +18,7 @@ struct cpupri_vec {
>  struct cpupri {
>  	struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES];
>  	int *cpu_to_pri;
> +	struct cpudl *cpudl;
>  };
>  
>  #ifdef CONFIG_SMP
> diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
> index 6725e3c..d28cfa4 100644
> --- a/kernel/sched/rt.c
> +++ b/kernel/sched/rt.c
> @@ -1349,14 +1349,17 @@ out:
>  	return cpu;
>  }
>  
> +static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
>  static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
>  {
> +	struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
> +
>  	/*
>  	 * Current can't be migrated, useless to reschedule,
>  	 * let's hope p can move out.
>  	 */
>  	if (rq->curr->nr_cpus_allowed == 1 ||
> -	    !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
> +	    !cpupri_find(&rq->rd->cpupri, rq->curr, lowest_mask))
>  		return;
>  
>  	/*


Again; should you not put something useful in the mask before you pass
it to cpupri_find()?

> @@ -1364,7 +1367,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
>  	 * see if it is pushed or pulled somewhere else.
>  	 */
>  	if (p->nr_cpus_allowed != 1
> -	    && cpupri_find(&rq->rd->cpupri, p, NULL))
> +	    && cpupri_find(&rq->rd->cpupri, p, lowest_mask))
>  		return;
>  
>  	/*
> @@ -1526,8 +1529,6 @@ static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
>  	return NULL;
>  }
>  
> -static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
> -
>  static int find_lowest_rq(struct task_struct *task)
>  {
>  	struct sched_domain *sd;
> -- 
> 1.9.1
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/5] sched/deadline: Fix wrong cpudl_find() in check_preempt_equal_dl()
  2015-01-27 12:48   ` Peter Zijlstra
@ 2015-01-27 14:15     ` Peter Zijlstra
  0 siblings, 0 replies; 24+ messages in thread
From: Peter Zijlstra @ 2015-01-27 14:15 UTC (permalink / raw)
  To: Xunlei Pang; +Cc: linux-kernel, Juri Lelli

On Tue, Jan 27, 2015 at 01:48:15PM +0100, Peter Zijlstra wrote:
> On Mon, Jan 19, 2015 at 04:49:38AM +0000, Xunlei Pang wrote:
> > +++ b/kernel/sched/deadline.c
> > @@ -943,14 +943,23 @@ out:
> >  	return cpu;
> >  }
> >  
> > +static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
> > +
> >  static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
> >  {
> > +	struct cpumask *later_mask =
> > +			this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
> > +
> > +	/* Make sure the mask is initialized first */
> > +	if (unlikely(!later_mask))
> > +		return;
> > +
> >  	/*
> >  	 * Current can't be migrated, useless to reschedule,
> >  	 * let's hope p can move out.
> >  	 */
> >  	if (rq->curr->nr_cpus_allowed == 1 ||
> > -	    cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
> > +	    cpudl_find(&rq->rd->cpudl, rq->curr, later_mask) == -1)
> >  		return;
> >  
> >  	/*
> 
> But but, should you not put something in that mask before you use it?

N/M I can't read.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 4/5] sched/rt: Consider deadline tasks in cpupri_find()
  2015-01-27 12:58   ` Peter Zijlstra
@ 2015-01-27 14:18     ` Peter Zijlstra
  2015-01-27 23:04     ` Steven Rostedt
  1 sibling, 0 replies; 24+ messages in thread
From: Peter Zijlstra @ 2015-01-27 14:18 UTC (permalink / raw)
  To: Xunlei Pang; +Cc: linux-kernel, Juri Lelli, Steven Rostedt

On Tue, Jan 27, 2015 at 01:58:59PM +0100, Peter Zijlstra wrote:
> > @@ -103,24 +104,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
> >  		if (skip)
> >  			continue;
> >  
> > -		if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
> > +		cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
> > +		cpumask_and(lowest_mask, lowest_mask, cp->cpudl->free_cpus);
> > +		if (cpumask_any(lowest_mask) >= nr_cpu_ids)
> >  			continue;
> >  


> > +++ b/kernel/sched/rt.c
> > @@ -1349,14 +1349,17 @@ out:
> >  	return cpu;
> >  }
> >  
> > +static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
> >  static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
> >  {
> > +	struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
> > +
> >  	/*
> >  	 * Current can't be migrated, useless to reschedule,
> >  	 * let's hope p can move out.
> >  	 */
> >  	if (rq->curr->nr_cpus_allowed == 1 ||
> > -	    !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
> > +	    !cpupri_find(&rq->rd->cpupri, rq->curr, lowest_mask))
> >  		return;
> >  
> >  	/*
> 
> 
> Again; should you not put something useful in the mask before you pass
> it to cpupri_find()?

Similar to the other case; I can't read today. I only saw the
_and(lowest_mask, lowest_mask, ...) and figured it ought to be populated
before that.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 5/5] sched/rt: Optimize find_lowest_rq() to select a cache hot cpu
  2015-01-19  4:49 ` [PATCH 5/5] sched/rt: Optimize find_lowest_rq() to select a cache hot cpu Xunlei Pang
@ 2015-01-27 14:21   ` Peter Zijlstra
  2015-01-27 14:56     ` Steven Rostedt
  0 siblings, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2015-01-27 14:21 UTC (permalink / raw)
  To: Xunlei Pang; +Cc: linux-kernel, Juri Lelli, Steven Rostedt

On Mon, Jan 19, 2015 at 04:49:40AM +0000, Xunlei Pang wrote:
> In find_lowest_rq(), if we can't find a wake_affine cpu from
> sched_domain, then we can actually determine a cache hot cpu
> instead of simply calling "cpumask_any(lowest_mask)" which
> always returns the first cpu in the mask.
> 
> So, we can determine the cache hot cpu during the interation of
> sched_domain() in passing.

Steve, I'm not getting this. Why are we using WAKE_AFFINE here?



> Signed-off-by: Xunlei Pang <pang.xunlei@linaro.org>
> ---
>  kernel/sched/rt.c | 17 +++++++++++++----
>  1 file changed, 13 insertions(+), 4 deletions(-)
> 
> diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
> index d28cfa4..e6a42e6 100644
> --- a/kernel/sched/rt.c
> +++ b/kernel/sched/rt.c
> @@ -1535,6 +1535,7 @@ static int find_lowest_rq(struct task_struct *task)
>  	struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
>  	int this_cpu = smp_processor_id();
>  	int cpu      = task_cpu(task);
> +	int cachehot_cpu = nr_cpu_ids;
>  
>  	/* Make sure the mask is initialized first */
>  	if (unlikely(!lowest_mask))
> @@ -1566,8 +1567,12 @@ static int find_lowest_rq(struct task_struct *task)
>  
>  	rcu_read_lock();
>  	for_each_domain(cpu, sd) {
> +		if (cachehot_cpu >= nr_cpu_ids)
> +			cachehot_cpu = cpumask_first_and(lowest_mask,
> +						   sched_domain_span(sd));
> +
>  		if (sd->flags & SD_WAKE_AFFINE) {
> -			int best_cpu;
> +			int wakeaffine_cpu;
>  
>  			/*
>  			 * "this_cpu" is cheaper to preempt than a
> @@ -1579,16 +1584,20 @@ static int find_lowest_rq(struct task_struct *task)
>  				return this_cpu;
>  			}
>  
> -			best_cpu = cpumask_first_and(lowest_mask,
> +			wakeaffine_cpu = cpumask_first_and(lowest_mask,
>  						     sched_domain_span(sd));
> -			if (best_cpu < nr_cpu_ids) {
> +			if (wakeaffine_cpu < nr_cpu_ids) {
>  				rcu_read_unlock();
> -				return best_cpu;
> +				return wakeaffine_cpu;
>  			}
>  		}
>  	}
>  	rcu_read_unlock();
>  
> +	/* most likely cache-hot */
> +	if (cachehot_cpu < nr_cpu_ids)
> +		return cachehot_cpu;
> +
>  	/*
>  	 * And finally, if there were no matches within the domains
>  	 * just give the caller *something* to work with from the compatible
> -- 
> 1.9.1
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 5/5] sched/rt: Optimize find_lowest_rq() to select a cache hot cpu
  2015-01-27 14:21   ` Peter Zijlstra
@ 2015-01-27 14:56     ` Steven Rostedt
  2015-01-27 16:28       ` Peter Zijlstra
  2015-01-29 16:42       ` Xunlei Pang
  0 siblings, 2 replies; 24+ messages in thread
From: Steven Rostedt @ 2015-01-27 14:56 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Xunlei Pang, linux-kernel, Juri Lelli

On Tue, 27 Jan 2015 15:21:36 +0100
Peter Zijlstra <peterz@infradead.org> wrote:

> On Mon, Jan 19, 2015 at 04:49:40AM +0000, Xunlei Pang wrote:
> > In find_lowest_rq(), if we can't find a wake_affine cpu from
> > sched_domain, then we can actually determine a cache hot cpu
> > instead of simply calling "cpumask_any(lowest_mask)" which
> > always returns the first cpu in the mask.
> > 
> > So, we can determine the cache hot cpu during the interation of
> > sched_domain() in passing.
> 
> Steve, I'm not getting this. Why are we using WAKE_AFFINE here?
> 

It originated from Gregory Haskins topology patches. See 
 6e1254d2c41215da27025add8900ed187bca121d

-- Steve

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/5] sched/deadline: Remove cpu_active_mask from cpudl_find()
  2015-01-19  4:49 ` [PATCH 2/5] sched/deadline: Remove cpu_active_mask from cpudl_find() Xunlei Pang
@ 2015-01-27 15:04   ` Peter Zijlstra
  2015-02-04 14:36   ` [tip:sched/core] " tip-bot for Xunlei Pang
  1 sibling, 0 replies; 24+ messages in thread
From: Peter Zijlstra @ 2015-01-27 15:04 UTC (permalink / raw)
  To: Xunlei Pang; +Cc: linux-kernel, Juri Lelli

On Mon, Jan 19, 2015 at 04:49:37AM +0000, Xunlei Pang wrote:
> cpu_active_mask is rarely changeable, so remove this operation
> to gain a little performance.
> 
> If there is a change in cpu_active_mask, rq_online_dl() and
> rq_offline_dl() should take care of it normally, so cpudl::
> free_cpus carries enough information for us.
> 
> For the rare case(causing a task put onto a dying cpu) which
> rq_offline_dl() can't handle timely, then it can be handled
> through _cpu_down()->...->multi_cpu_stop()->migration_call()
> ->migrate_tasks(), preventing the task from hanging on the
> dead cpu.

Maybe; I'll need to go crawl through the long and twisted history of
active_mask here :/

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 5/5] sched/rt: Optimize find_lowest_rq() to select a cache hot cpu
  2015-01-27 14:56     ` Steven Rostedt
@ 2015-01-27 16:28       ` Peter Zijlstra
  2015-01-29 16:42       ` Xunlei Pang
  1 sibling, 0 replies; 24+ messages in thread
From: Peter Zijlstra @ 2015-01-27 16:28 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Xunlei Pang, linux-kernel, Juri Lelli

On Tue, Jan 27, 2015 at 09:56:26AM -0500, Steven Rostedt wrote:
> On Tue, 27 Jan 2015 15:21:36 +0100
> Peter Zijlstra <peterz@infradead.org> wrote:
> 
> > On Mon, Jan 19, 2015 at 04:49:40AM +0000, Xunlei Pang wrote:
> > > In find_lowest_rq(), if we can't find a wake_affine cpu from
> > > sched_domain, then we can actually determine a cache hot cpu
> > > instead of simply calling "cpumask_any(lowest_mask)" which
> > > always returns the first cpu in the mask.
> > > 
> > > So, we can determine the cache hot cpu during the interation of
> > > sched_domain() in passing.
> > 
> > Steve, I'm not getting this. Why are we using WAKE_AFFINE here?
> > 
> 
> It originated from Gregory Haskins topology patches. See 
>  6e1254d2c41215da27025add8900ed187bca121d

Indeed so; it seems an arbitrary choice.

And the proposed patch seems like a convoluted way to simply remove the
->flags & SD_WAKE_AFFINE test.

Of course, the entire domain loop there assumes a lower domain is
better; yay for SMT being such a good counter example ;-)

Of course, if we remove it here; we should do too for deadline.


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/5] sched/deadline: Fix wrong cpudl_find() in check_preempt_equal_dl()
  2015-01-19  4:49 ` [PATCH 3/5] sched/deadline: Fix wrong cpudl_find() in check_preempt_equal_dl() Xunlei Pang
  2015-01-27 12:48   ` Peter Zijlstra
@ 2015-01-27 16:47   ` Peter Zijlstra
  2015-01-28 15:18     ` Xunlei Pang
  1 sibling, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2015-01-27 16:47 UTC (permalink / raw)
  To: Xunlei Pang; +Cc: linux-kernel, Juri Lelli

On Mon, Jan 19, 2015 at 04:49:38AM +0000, Xunlei Pang wrote:
> In check_preempt_equal_dl(), cpudl_find() is called with a NULL
> later_mask, thus cpudl_find() here doesn't check cpudl::free_cpus
> at all.
> 
> This patch takles this issue by always passing a non-NULL later_mask
> to cpudl_find(), thereby fixing this issue.

Fix what issue? Afaict this is an optimization not a fix.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 4/5] sched/rt: Consider deadline tasks in cpupri_find()
  2015-01-27 12:58   ` Peter Zijlstra
  2015-01-27 14:18     ` Peter Zijlstra
@ 2015-01-27 23:04     ` Steven Rostedt
  2015-01-28 15:21       ` Xunlei Pang
  1 sibling, 1 reply; 24+ messages in thread
From: Steven Rostedt @ 2015-01-27 23:04 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Xunlei Pang, linux-kernel, Juri Lelli

On Tue, 27 Jan 2015 13:58:59 +0100
Peter Zijlstra <peterz@infradead.org> wrote:


> Not a bad idea, Cc'ed Steve who likes to look after the RT bits,
> excessive quoting for him.

Yep I do.


> > index ade2958..48c9576 100644
> > --- a/kernel/sched/core.c
> > +++ b/kernel/sched/core.c
> > @@ -5652,6 +5652,8 @@ static int init_rootdomain(struct root_domain *rd)
> >  
> >  	if (cpupri_init(&rd->cpupri) != 0)
> >  		goto free_rto_mask;
> > +
> > +	rd->cpupri.cpudl = &rd->cpudl;
> 
> This is disgusting though; it breaks the cpuri abstraction. Why not pass
> in the mask in the one place you actually need it?

Yeah, probably should change cpupri_init() to take the rd->cpudl as a
parameter.

Rest looks good (ignoring Peter's other comment that he realized wasn't
an issue).

-- Steve


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/5] sched/deadline: Fix wrong cpudl_find() in check_preempt_equal_dl()
  2015-01-27 16:47   ` Peter Zijlstra
@ 2015-01-28 15:18     ` Xunlei Pang
  0 siblings, 0 replies; 24+ messages in thread
From: Xunlei Pang @ 2015-01-28 15:18 UTC (permalink / raw)
  To: Peter Zijlstra, Steven Rostedt; +Cc: lkml, Juri Lelli

Hi Peter,

On 28 January 2015 at 00:47, Peter Zijlstra <peterz@infradead.org> wrote:
> On Mon, Jan 19, 2015 at 04:49:38AM +0000, Xunlei Pang wrote:
>> In check_preempt_equal_dl(), cpudl_find() is called with a NULL
>> later_mask, thus cpudl_find() here doesn't check cpudl::free_cpus
>> at all.
>>
>> This patch takles this issue by always passing a non-NULL later_mask
>> to cpudl_find(), thereby fixing this issue.
>
> Fix what issue? Afaict this is an optimization not a fix.

Currently, check_preempt_equal_dl() invokes cpudl_find() with a NULL mask,
so cpudl_find() won't check cpudl::free_cpus. For example, 4 core
system, CPU0~CPU2
are all idle(free of deadline tasks), a deadline task is woken on CPU3
which already
has one running deadline task with the same deadline value, then
cpudl_find() will fail
causing CPU3 holding 2 deadline tasks while other cpus are idle,
obviously it should
be placed on one idle cpu.

Thanks,
Xunlei

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 4/5] sched/rt: Consider deadline tasks in cpupri_find()
  2015-01-27 23:04     ` Steven Rostedt
@ 2015-01-28 15:21       ` Xunlei Pang
  0 siblings, 0 replies; 24+ messages in thread
From: Xunlei Pang @ 2015-01-28 15:21 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Peter Zijlstra, lkml, Juri Lelli

Hi Steven,

On 28 January 2015 at 07:04, Steven Rostedt <rostedt@goodmis.org> wrote:
> On Tue, 27 Jan 2015 13:58:59 +0100
> Peter Zijlstra <peterz@infradead.org> wrote:
>
>
>> Not a bad idea, Cc'ed Steve who likes to look after the RT bits,
>> excessive quoting for him.
>
> Yep I do.
>
>
>> > index ade2958..48c9576 100644
>> > --- a/kernel/sched/core.c
>> > +++ b/kernel/sched/core.c
>> > @@ -5652,6 +5652,8 @@ static int init_rootdomain(struct root_domain *rd)
>> >
>> >     if (cpupri_init(&rd->cpupri) != 0)
>> >             goto free_rto_mask;
>> > +
>> > +   rd->cpupri.cpudl = &rd->cpudl;
>>
>> This is disgusting though; it breaks the cpuri abstraction. Why not pass
>> in the mask in the one place you actually need it?
>
> Yeah, probably should change cpupri_init() to take the rd->cpudl as a
> parameter.

Ok, thanks for your advice.
I'll refine this patch and send it out separately soon.

Thanks,
Xunlei

>
> Rest looks good (ignoring Peter's other comment that he realized wasn't
> an issue).
>
> -- Steve
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 5/5] sched/rt: Optimize find_lowest_rq() to select a cache hot cpu
  2015-01-27 14:56     ` Steven Rostedt
  2015-01-27 16:28       ` Peter Zijlstra
@ 2015-01-29 16:42       ` Xunlei Pang
  2015-01-29 17:17         ` Steven Rostedt
  2015-01-29 19:23         ` Peter Zijlstra
  1 sibling, 2 replies; 24+ messages in thread
From: Xunlei Pang @ 2015-01-29 16:42 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Peter Zijlstra, lkml, Juri Lelli

On 27 January 2015 at 22:56, Steven Rostedt <rostedt@goodmis.org> wrote:
> On Tue, 27 Jan 2015 15:21:36 +0100
> Peter Zijlstra <peterz@infradead.org> wrote:
>
>> On Mon, Jan 19, 2015 at 04:49:40AM +0000, Xunlei Pang wrote:
>> > In find_lowest_rq(), if we can't find a wake_affine cpu from
>> > sched_domain, then we can actually determine a cache hot cpu
>> > instead of simply calling "cpumask_any(lowest_mask)" which
>> > always returns the first cpu in the mask.
>> >
>> > So, we can determine the cache hot cpu during the interation of
>> > sched_domain() in passing.
>>
>> Steve, I'm not getting this. Why are we using WAKE_AFFINE here?
>>
>
> It originated from Gregory Haskins topology patches. See
>  6e1254d2c41215da27025add8900ed187bca121d

Hi Peter, Steve,

I think the responsiveness is the most important feature for RT tasks,
so I think:
response latency > cache > SMT in significance.

I was wondering if we can take the cpuidle state into account like
current find_idlest_cpu() for CFS?
cpupri_find() can be easily modified to indicate the CPUPRI_IDLE case,
then we can select
an optimal idle cpu to improve RT tasks' responsiveness. For other
cases(mostly non-idle cpu),
I think we can rely on the existent sched_domain iteraction to select
a cache-hot cpu without
caring too much about SMT.

Any comments on this?

Thanks,
Xunlei

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 5/5] sched/rt: Optimize find_lowest_rq() to select a cache hot cpu
  2015-01-29 16:42       ` Xunlei Pang
@ 2015-01-29 17:17         ` Steven Rostedt
  2015-01-29 19:23         ` Peter Zijlstra
  1 sibling, 0 replies; 24+ messages in thread
From: Steven Rostedt @ 2015-01-29 17:17 UTC (permalink / raw)
  To: Xunlei Pang; +Cc: Peter Zijlstra, lkml, Juri Lelli

On Fri, 30 Jan 2015 00:42:47 +0800
Xunlei Pang <pang.xunlei@linaro.org> wrote:

 
> I think the responsiveness is the most important feature for RT tasks,
> so I think:
> response latency > cache > SMT in significance.

Unfortunately, sometimes cache affects response latency.

> 
> I was wondering if we can take the cpuidle state into account like
> current find_idlest_cpu() for CFS?
> cpupri_find() can be easily modified to indicate the CPUPRI_IDLE case,
> then we can select
> an optimal idle cpu to improve RT tasks' responsiveness. For other
> cases(mostly non-idle cpu),

Even if that idle cpu happens to be on another NUMA node?

-- Steve

> I think we can rely on the existent sched_domain iteraction to select
> a cache-hot cpu without
> caring too much about SMT.



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 5/5] sched/rt: Optimize find_lowest_rq() to select a cache hot cpu
  2015-01-29 16:42       ` Xunlei Pang
  2015-01-29 17:17         ` Steven Rostedt
@ 2015-01-29 19:23         ` Peter Zijlstra
  2015-02-04 13:07           ` Xunlei Pang
  1 sibling, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2015-01-29 19:23 UTC (permalink / raw)
  To: Xunlei Pang; +Cc: Steven Rostedt, lkml, Juri Lelli

On Fri, Jan 30, 2015 at 12:42:47AM +0800, Xunlei Pang wrote:
> On 27 January 2015 at 22:56, Steven Rostedt <rostedt@goodmis.org> wrote:
> > On Tue, 27 Jan 2015 15:21:36 +0100
> > Peter Zijlstra <peterz@infradead.org> wrote:
> >
> >> On Mon, Jan 19, 2015 at 04:49:40AM +0000, Xunlei Pang wrote:
> >> > In find_lowest_rq(), if we can't find a wake_affine cpu from
> >> > sched_domain, then we can actually determine a cache hot cpu
> >> > instead of simply calling "cpumask_any(lowest_mask)" which
> >> > always returns the first cpu in the mask.
> >> >
> >> > So, we can determine the cache hot cpu during the interation of
> >> > sched_domain() in passing.
> >>
> >> Steve, I'm not getting this. Why are we using WAKE_AFFINE here?
> >>
> >
> > It originated from Gregory Haskins topology patches. See
> >  6e1254d2c41215da27025add8900ed187bca121d
> 
> Hi Peter, Steve,
> 
> I think the responsiveness is the most important feature for RT tasks,
> so I think:
> response latency > cache > SMT in significance.

No, deterministic execution time is the utmost important feature. And
for that SMT utterly blows. So much so in fact that rule #1 for -rt work
is to disable SMT on your hardware.

The same argument can be made for shared caches. If your !rt workload
blows away the cache of the rt workload, you loose.

> I was wondering if we can take the cpuidle state into account like
> current find_idlest_cpu() for CFS?
> cpupri_find() can be easily modified to indicate the CPUPRI_IDLE case,
> then we can select
> an optimal idle cpu to improve RT tasks' responsiveness. For other
> cases(mostly non-idle cpu),
> I think we can rely on the existent sched_domain iteraction to select
> a cache-hot cpu without
> caring too much about SMT.

your patch calls something 'cache-hot' when crossing large numa domains,
don't you think that's somewhat stretching the definition of hot?

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [tip:sched/core] sched/deadline: Modify cpudl:: free_cpus to reflect rd->online
  2015-01-19  4:49 [PATCH 1/5] sched/deadline: Modify cpudl::free_cpus to reflect rd->online Xunlei Pang
                   ` (4 preceding siblings ...)
  2015-01-23 18:09 ` [PATCH 1/5] sched/deadline: Modify cpudl::free_cpus to reflect rd->online Xunlei Pang
@ 2015-02-01 17:53 ` tip-bot for Xunlei Pang
  5 siblings, 0 replies; 24+ messages in thread
From: tip-bot for Xunlei Pang @ 2015-02-01 17:53 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: juri.lelli, linux-kernel, pang.xunlei, mingo, tglx, hpa,
	torvalds, peterz

Commit-ID:  16b269436b7213ebc01dcfcc9dafa8535b676ccb
Gitweb:     http://git.kernel.org/tip/16b269436b7213ebc01dcfcc9dafa8535b676ccb
Author:     Xunlei Pang <pang.xunlei@linaro.org>
AuthorDate: Mon, 19 Jan 2015 04:49:36 +0000
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Fri, 30 Jan 2015 19:39:16 +0100

sched/deadline: Modify cpudl::free_cpus to reflect rd->online

Currently, cpudl::free_cpus contains all CPUs during init, see
cpudl_init(). When calling cpudl_find(), we have to add rd->span
to avoid selecting the cpu outside the current root domain, because
cpus_allowed cannot be depended on when performing clustered
scheduling using the cpuset, see find_later_rq().

This patch adds cpudl_set_freecpu() and cpudl_clear_freecpu() for
changing cpudl::free_cpus when doing rq_online_dl()/rq_offline_dl(),
so we can avoid the rd->span operation when calling cpudl_find()
in find_later_rq().

Signed-off-by: Xunlei Pang <pang.xunlei@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1421642980-10045-1-git-send-email-pang.xunlei@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/cpudeadline.c | 28 ++++++++++++++++++++++++----
 kernel/sched/cpudeadline.h |  2 ++
 kernel/sched/deadline.c    |  5 ++---
 3 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index 539ca3c..fd9d3fb 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -107,7 +107,9 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
 	int best_cpu = -1;
 	const struct sched_dl_entity *dl_se = &p->dl;
 
-	if (later_mask && cpumask_and(later_mask, later_mask, cp->free_cpus)) {
+	if (later_mask &&
+	    cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed) &&
+	    cpumask_and(later_mask, later_mask, cpu_active_mask)) {
 		best_cpu = cpumask_any(later_mask);
 		goto out;
 	} else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
@@ -186,6 +188,26 @@ out:
 }
 
 /*
+ * cpudl_set_freecpu - Set the cpudl.free_cpus
+ * @cp: the cpudl max-heap context
+ * @cpu: rd attached cpu
+ */
+void cpudl_set_freecpu(struct cpudl *cp, int cpu)
+{
+	cpumask_set_cpu(cpu, cp->free_cpus);
+}
+
+/*
+ * cpudl_clear_freecpu - Clear the cpudl.free_cpus
+ * @cp: the cpudl max-heap context
+ * @cpu: rd attached cpu
+ */
+void cpudl_clear_freecpu(struct cpudl *cp, int cpu)
+{
+	cpumask_clear_cpu(cpu, cp->free_cpus);
+}
+
+/*
  * cpudl_init - initialize the cpudl structure
  * @cp: the cpudl max-heap context
  */
@@ -203,7 +225,7 @@ int cpudl_init(struct cpudl *cp)
 	if (!cp->elements)
 		return -ENOMEM;
 
-	if (!alloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) {
+	if (!zalloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) {
 		kfree(cp->elements);
 		return -ENOMEM;
 	}
@@ -211,8 +233,6 @@ int cpudl_init(struct cpudl *cp)
 	for_each_possible_cpu(i)
 		cp->elements[i].idx = IDX_INVALID;
 
-	cpumask_setall(cp->free_cpus);
-
 	return 0;
 }
 
diff --git a/kernel/sched/cpudeadline.h b/kernel/sched/cpudeadline.h
index 020039b..1a0a6ef 100644
--- a/kernel/sched/cpudeadline.h
+++ b/kernel/sched/cpudeadline.h
@@ -24,6 +24,8 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
 	       struct cpumask *later_mask);
 void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid);
 int cpudl_init(struct cpudl *cp);
+void cpudl_set_freecpu(struct cpudl *cp, int cpu);
+void cpudl_clear_freecpu(struct cpudl *cp, int cpu);
 void cpudl_cleanup(struct cpudl *cp);
 #endif /* CONFIG_SMP */
 
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index b52092f..e7b2722 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1165,9 +1165,6 @@ static int find_later_rq(struct task_struct *task)
 	 * We have to consider system topology and task affinity
 	 * first, then we can look for a suitable cpu.
 	 */
-	cpumask_copy(later_mask, task_rq(task)->rd->span);
-	cpumask_and(later_mask, later_mask, cpu_active_mask);
-	cpumask_and(later_mask, later_mask, &task->cpus_allowed);
 	best_cpu = cpudl_find(&task_rq(task)->rd->cpudl,
 			task, later_mask);
 	if (best_cpu == -1)
@@ -1562,6 +1559,7 @@ static void rq_online_dl(struct rq *rq)
 	if (rq->dl.overloaded)
 		dl_set_overload(rq);
 
+	cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu);
 	if (rq->dl.dl_nr_running > 0)
 		cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1);
 }
@@ -1573,6 +1571,7 @@ static void rq_offline_dl(struct rq *rq)
 		dl_clear_overload(rq);
 
 	cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
+	cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
 }
 
 void init_sched_dl_class(void)

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 5/5] sched/rt: Optimize find_lowest_rq() to select a cache hot cpu
  2015-01-29 19:23         ` Peter Zijlstra
@ 2015-02-04 13:07           ` Xunlei Pang
  0 siblings, 0 replies; 24+ messages in thread
From: Xunlei Pang @ 2015-02-04 13:07 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Steven Rostedt, lkml, Juri Lelli

Hi Peter, Steve,

Thanks for all your valuable sharing.
I'll keep them in mind.

Regards,
Xunlei

On 30 January 2015 at 03:23, Peter Zijlstra <peterz@infradead.org> wrote:
> On Fri, Jan 30, 2015 at 12:42:47AM +0800, Xunlei Pang wrote:
>> On 27 January 2015 at 22:56, Steven Rostedt <rostedt@goodmis.org> wrote:
>> > On Tue, 27 Jan 2015 15:21:36 +0100
>> > Peter Zijlstra <peterz@infradead.org> wrote:
>> >
>> >> On Mon, Jan 19, 2015 at 04:49:40AM +0000, Xunlei Pang wrote:
>> >> > In find_lowest_rq(), if we can't find a wake_affine cpu from
>> >> > sched_domain, then we can actually determine a cache hot cpu
>> >> > instead of simply calling "cpumask_any(lowest_mask)" which
>> >> > always returns the first cpu in the mask.
>> >> >
>> >> > So, we can determine the cache hot cpu during the interation of
>> >> > sched_domain() in passing.
>> >>
>> >> Steve, I'm not getting this. Why are we using WAKE_AFFINE here?
>> >>
>> >
>> > It originated from Gregory Haskins topology patches. See
>> >  6e1254d2c41215da27025add8900ed187bca121d
>>
>> Hi Peter, Steve,
>>
>> I think the responsiveness is the most important feature for RT tasks,
>> so I think:
>> response latency > cache > SMT in significance.
>
> No, deterministic execution time is the utmost important feature. And
> for that SMT utterly blows. So much so in fact that rule #1 for -rt work
> is to disable SMT on your hardware.
>
> The same argument can be made for shared caches. If your !rt workload
> blows away the cache of the rt workload, you loose.
>
>> I was wondering if we can take the cpuidle state into account like
>> current find_idlest_cpu() for CFS?
>> cpupri_find() can be easily modified to indicate the CPUPRI_IDLE case,
>> then we can select
>> an optimal idle cpu to improve RT tasks' responsiveness. For other
>> cases(mostly non-idle cpu),
>> I think we can rely on the existent sched_domain iteraction to select
>> a cache-hot cpu without
>> caring too much about SMT.
>
> your patch calls something 'cache-hot' when crossing large numa domains,
> don't you think that's somewhat stretching the definition of hot?

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [tip:sched/core] sched/deadline: Remove cpu_active_mask from cpudl_find()
  2015-01-19  4:49 ` [PATCH 2/5] sched/deadline: Remove cpu_active_mask from cpudl_find() Xunlei Pang
  2015-01-27 15:04   ` Peter Zijlstra
@ 2015-02-04 14:36   ` tip-bot for Xunlei Pang
  1 sibling, 0 replies; 24+ messages in thread
From: tip-bot for Xunlei Pang @ 2015-02-04 14:36 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: peterz, tglx, torvalds, mingo, pang.xunlei, juri.lelli, hpa,
	linux-kernel

Commit-ID:  9659e1eeee28f7025b6545934d644d19e9c6e603
Gitweb:     http://git.kernel.org/tip/9659e1eeee28f7025b6545934d644d19e9c6e603
Author:     Xunlei Pang <pang.xunlei@linaro.org>
AuthorDate: Mon, 19 Jan 2015 04:49:37 +0000
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 4 Feb 2015 07:52:29 +0100

sched/deadline: Remove cpu_active_mask from cpudl_find()

cpu_active_mask is rarely changed (only on hotplug), so remove this
operation to gain a little performance.

If there is a change in cpu_active_mask, rq_online_dl() and
rq_offline_dl() should take care of it normally, so cpudl::free_cpus
carries enough information for us.

For the rare case when a task is put onto a dying cpu (which
rq_offline_dl() can't handle in a timely fashion), it will be
handled through _cpu_down()->...->multi_cpu_stop()->migration_call()
->migrate_tasks(), preventing the task from hanging on the
dead cpu.

Cc: Juri Lelli <juri.lelli@gmail.com>
Signed-off-by: Xunlei Pang <pang.xunlei@linaro.org>
[peterz: changelog]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1421642980-10045-2-git-send-email-pang.xunlei@linaro.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/cpudeadline.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index fd9d3fb..c6acb07 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -108,8 +108,7 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
 	const struct sched_dl_entity *dl_se = &p->dl;
 
 	if (later_mask &&
-	    cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed) &&
-	    cpumask_and(later_mask, later_mask, cpu_active_mask)) {
+	    cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
 		best_cpu = cpumask_any(later_mask);
 		goto out;
 	} else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&

^ permalink raw reply related	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2015-02-04 14:36 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-01-19  4:49 [PATCH 1/5] sched/deadline: Modify cpudl::free_cpus to reflect rd->online Xunlei Pang
2015-01-19  4:49 ` [PATCH 2/5] sched/deadline: Remove cpu_active_mask from cpudl_find() Xunlei Pang
2015-01-27 15:04   ` Peter Zijlstra
2015-02-04 14:36   ` [tip:sched/core] " tip-bot for Xunlei Pang
2015-01-19  4:49 ` [PATCH 3/5] sched/deadline: Fix wrong cpudl_find() in check_preempt_equal_dl() Xunlei Pang
2015-01-27 12:48   ` Peter Zijlstra
2015-01-27 14:15     ` Peter Zijlstra
2015-01-27 16:47   ` Peter Zijlstra
2015-01-28 15:18     ` Xunlei Pang
2015-01-19  4:49 ` [PATCH 4/5] sched/rt: Consider deadline tasks in cpupri_find() Xunlei Pang
2015-01-27 12:58   ` Peter Zijlstra
2015-01-27 14:18     ` Peter Zijlstra
2015-01-27 23:04     ` Steven Rostedt
2015-01-28 15:21       ` Xunlei Pang
2015-01-19  4:49 ` [PATCH 5/5] sched/rt: Optimize find_lowest_rq() to select a cache hot cpu Xunlei Pang
2015-01-27 14:21   ` Peter Zijlstra
2015-01-27 14:56     ` Steven Rostedt
2015-01-27 16:28       ` Peter Zijlstra
2015-01-29 16:42       ` Xunlei Pang
2015-01-29 17:17         ` Steven Rostedt
2015-01-29 19:23         ` Peter Zijlstra
2015-02-04 13:07           ` Xunlei Pang
2015-01-23 18:09 ` [PATCH 1/5] sched/deadline: Modify cpudl::free_cpus to reflect rd->online Xunlei Pang
2015-02-01 17:53 ` [tip:sched/core] sched/deadline: Modify cpudl:: free_cpus " tip-bot for Xunlei Pang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.