All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] sched/deadline: Modify cpudl.free_cpus to reflect rd->span
@ 2014-11-17 16:11 pang.xunlei
  2014-11-17 16:11 ` [PATCH 2/2] sched/deadline: Fix wrong cpudl_find() in check_preempt_equal_dl() pang.xunlei
  2014-11-17 19:39 ` [PATCH 1/2] sched/deadline: Modify cpudl.free_cpus to reflect rd->span Steven Rostedt
  0 siblings, 2 replies; 6+ messages in thread
From: pang.xunlei @ 2014-11-17 16:11 UTC (permalink / raw)
  To: linux-kernel; +Cc: Peter Zijlstra, Steven Rostedt, Juri Lelli, pang.xunlei

Currently, cpudl.free_cpus contains all cpus during init(see cpudl_init()),
so when calling cpudl_find() we have to add rd->span cpumask(cpus_allowed is
undependable when performing clustered scheduling using the cpuset) to avoid
selecting the cpu outside current root domain, see find_later_rq().

This patch adds cpudl_set_freecpu() to initialize cpudl.free_cpus when doing
rq_attach_root(), so we can avoid the extra rd->span operation when calling
cpudl_find().

Signed-off-by: pang.xunlei <pang.xunlei@linaro.org>
---
 kernel/sched/core.c        |  2 ++
 kernel/sched/cpudeadline.c | 19 +++++++++++++++----
 kernel/sched/cpudeadline.h |  1 +
 kernel/sched/deadline.c    |  3 ---
 4 files changed, 18 insertions(+), 7 deletions(-)
 mode change 100644 => 100755 kernel/sched/cpudeadline.c
 mode change 100644 => 100755 kernel/sched/deadline.c

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 240157c..1b417de 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5584,6 +5584,8 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
 	rq->rd = rd;
 
 	cpumask_set_cpu(rq->cpu, rd->span);
+	cpudl_set_freecpu(&rd->cpudl, rq->cpu);
+
 	if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
 		set_rq_online(rq);
 
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
old mode 100644
new mode 100755
index 539ca3c..9a69353
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -107,7 +107,9 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
 	int best_cpu = -1;
 	const struct sched_dl_entity *dl_se = &p->dl;
 
-	if (later_mask && cpumask_and(later_mask, later_mask, cp->free_cpus)) {
+	if (later_mask && cpumask_and(later_mask, cp->free_cpus,
+			&p->cpus_allowed) && cpumask_and(later_mask,
+			later_mask, cpu_active_mask)) {
 		best_cpu = cpumask_any(later_mask);
 		goto out;
 	} else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
@@ -186,6 +188,17 @@ out:
 }
 
 /*
+ * cpudl_set_freecpu - Set the cpudl.free_cpus
+ * @cp: the cpudl max-heap context
+ * @cpu: rd attached cpu
+ */
+void cpudl_set_freecpu(struct cpudl *cp, int cpu)
+{
+	cpumask_set_cpu(cpu, cp->free_cpus);
+}
+
+
+/*
  * cpudl_init - initialize the cpudl structure
  * @cp: the cpudl max-heap context
  */
@@ -203,7 +216,7 @@ int cpudl_init(struct cpudl *cp)
 	if (!cp->elements)
 		return -ENOMEM;
 
-	if (!alloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) {
+	if (!zalloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) {
 		kfree(cp->elements);
 		return -ENOMEM;
 	}
@@ -211,8 +224,6 @@ int cpudl_init(struct cpudl *cp)
 	for_each_possible_cpu(i)
 		cp->elements[i].idx = IDX_INVALID;
 
-	cpumask_setall(cp->free_cpus);
-
 	return 0;
 }
 
diff --git a/kernel/sched/cpudeadline.h b/kernel/sched/cpudeadline.h
index 020039b..4a10a65 100755
--- a/kernel/sched/cpudeadline.h
+++ b/kernel/sched/cpudeadline.h
@@ -24,6 +24,7 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
 	       struct cpumask *later_mask);
 void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid);
 int cpudl_init(struct cpudl *cp);
+void cpudl_set_freecpu(struct cpudl *cp, int cpu);
 void cpudl_cleanup(struct cpudl *cp);
 #endif /* CONFIG_SMP */
 
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
old mode 100644
new mode 100755
index 5285332..bd83272
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1187,9 +1187,6 @@ static int find_later_rq(struct task_struct *task)
 	 * We have to consider system topology and task affinity
 	 * first, then we can look for a suitable cpu.
 	 */
-	cpumask_copy(later_mask, task_rq(task)->rd->span);
-	cpumask_and(later_mask, later_mask, cpu_active_mask);
-	cpumask_and(later_mask, later_mask, &task->cpus_allowed);
 	best_cpu = cpudl_find(&task_rq(task)->rd->cpudl,
 			task, later_mask);
 	if (best_cpu == -1)
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] sched/deadline: Fix wrong cpudl_find() in check_preempt_equal_dl()
  2014-11-17 16:11 [PATCH 1/2] sched/deadline: Modify cpudl.free_cpus to reflect rd->span pang.xunlei
@ 2014-11-17 16:11 ` pang.xunlei
  2014-11-17 20:15   ` Steven Rostedt
  2014-11-17 19:39 ` [PATCH 1/2] sched/deadline: Modify cpudl.free_cpus to reflect rd->span Steven Rostedt
  1 sibling, 1 reply; 6+ messages in thread
From: pang.xunlei @ 2014-11-17 16:11 UTC (permalink / raw)
  To: linux-kernel; +Cc: Peter Zijlstra, Steven Rostedt, Juri Lelli, pang.xunlei

In check_preempt_equal_dl(), cpudl_find() is called with a NULL later_mask,
thus cpudl_find() here doesn't check cpudl.free_cpus at all.

This patch takles this issue by always passing a non-NULL cpumask to cpudl_find(),
and assigns later_mask in this function.

Signed-off-by: pang.xunlei <pang.xunlei@linaro.org>
---
 kernel/sched/cpudeadline.c | 14 ++++++--------
 kernel/sched/deadline.c    | 10 ++++++----
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index 9a69353..623f7b5 100755
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -97,7 +97,7 @@ static inline int cpudl_maximum(struct cpudl *cp)
  * cpudl_find - find the best (later-dl) CPU in the system
  * @cp: the cpudl max-heap context
  * @p: the task
- * @later_mask: a mask to fill in with the selected CPUs (or NULL)
+ * @later_mask: a mask to fill in with the selected CPUs (not NULL)
  *
  * Returns: int - best CPU (heap maximum if suitable)
  */
@@ -107,16 +107,14 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
 	int best_cpu = -1;
 	const struct sched_dl_entity *dl_se = &p->dl;
 
-	if (later_mask && cpumask_and(later_mask, cp->free_cpus,
-			&p->cpus_allowed) && cpumask_and(later_mask,
-			later_mask, cpu_active_mask)) {
+	cpumask_and(later_mask, cpu_active_mask, &p->cpus_allowed);
+	if (cpumask_and(later_mask, later_mask, cp->free_cpus)) {
 		best_cpu = cpumask_any(later_mask);
 		goto out;
-	} else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
-			dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
+	} else if (cpumask_and(later_mask, cpumask_of(cpudl_maximum(cp)),
+			&p->cpus_allowed) && dl_time_before(dl_se->deadline,
+			cp->elements[0].dl)) {
 		best_cpu = cpudl_maximum(cp);
-		if (later_mask)
-			cpumask_set_cpu(best_cpu, later_mask);
 	}
 
 out:
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index bd83272..3ecf838 100755
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -965,14 +965,18 @@ out:
 	return cpu;
 }
 
+static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
+
 static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
 {
+	struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
+
 	/*
 	 * Current can't be migrated, useless to reschedule,
 	 * let's hope p can move out.
 	 */
 	if (rq->curr->nr_cpus_allowed == 1 ||
-	    cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
+	    cpudl_find(&rq->rd->cpudl, rq->curr, later_mask) == -1)
 		return;
 
 	/*
@@ -980,7 +984,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
 	 * see if it is pushed or pulled somewhere else.
 	 */
 	if (p->nr_cpus_allowed != 1 &&
-	    cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
+	    cpudl_find(&rq->rd->cpudl, p, later_mask) != -1)
 		return;
 
 	resched_curr(rq);
@@ -1167,8 +1171,6 @@ next_node:
 	return NULL;
 }
 
-static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
-
 static int find_later_rq(struct task_struct *task)
 {
 	struct sched_domain *sd;
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] sched/deadline: Modify cpudl.free_cpus to reflect rd->span
  2014-11-17 16:11 [PATCH 1/2] sched/deadline: Modify cpudl.free_cpus to reflect rd->span pang.xunlei
  2014-11-17 16:11 ` [PATCH 2/2] sched/deadline: Fix wrong cpudl_find() in check_preempt_equal_dl() pang.xunlei
@ 2014-11-17 19:39 ` Steven Rostedt
  2014-11-18 13:32   ` pang.xunlei
  1 sibling, 1 reply; 6+ messages in thread
From: Steven Rostedt @ 2014-11-17 19:39 UTC (permalink / raw)
  To: pang.xunlei; +Cc: linux-kernel, Peter Zijlstra, Juri Lelli

On Tue, 18 Nov 2014 00:11:03 +0800

> index 539ca3c..9a69353
> --- a/kernel/sched/cpudeadline.c
> +++ b/kernel/sched/cpudeadline.c
> @@ -107,7 +107,9 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
>  	int best_cpu = -1;
>  	const struct sched_dl_entity *dl_se = &p->dl;
>  
> -	if (later_mask && cpumask_and(later_mask, later_mask, cp->free_cpus)) {
> +	if (later_mask && cpumask_and(later_mask, cp->free_cpus,
> +			&p->cpus_allowed) && cpumask_and(later_mask,
> +			later_mask, cpu_active_mask)) {

I did a quick review of this patch, and it looks fine to me. But the
above looks ugly. Please change it to something more readable like:

	if (later_mask &&
	    cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed) &&
	    cpumask_and(later_mask, later_mask, cpu_active_mask)) {

Thanks,

-- Steve


>  		best_cpu = cpumask_any(later_mask);
>  		goto out;
>  	} else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
> @@ -186,6 +188,17 @@ out:
>  }
>  


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] sched/deadline: Fix wrong cpudl_find() in check_preempt_equal_dl()
  2014-11-17 16:11 ` [PATCH 2/2] sched/deadline: Fix wrong cpudl_find() in check_preempt_equal_dl() pang.xunlei
@ 2014-11-17 20:15   ` Steven Rostedt
  2014-11-18 14:02     ` pang.xunlei
  0 siblings, 1 reply; 6+ messages in thread
From: Steven Rostedt @ 2014-11-17 20:15 UTC (permalink / raw)
  To: pang.xunlei; +Cc: linux-kernel, Peter Zijlstra, Juri Lelli

On Tue, 18 Nov 2014 00:11:04 +0800
"pang.xunlei" <pang.xunlei@linaro.org> wrote:

> In check_preempt_equal_dl(), cpudl_find() is called with a NULL later_mask,
> thus cpudl_find() here doesn't check cpudl.free_cpus at all.
> 
> This patch takles this issue by always passing a non-NULL cpumask to cpudl_find(),
> and assigns later_mask in this function.
> 
> Signed-off-by: pang.xunlei <pang.xunlei@linaro.org>
> ---
>  kernel/sched/cpudeadline.c | 14 ++++++--------
>  kernel/sched/deadline.c    | 10 ++++++----
>  2 files changed, 12 insertions(+), 12 deletions(-)
> 
> diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
> index 9a69353..623f7b5 100755
> --- a/kernel/sched/cpudeadline.c
> +++ b/kernel/sched/cpudeadline.c
> @@ -97,7 +97,7 @@ static inline int cpudl_maximum(struct cpudl *cp)
>   * cpudl_find - find the best (later-dl) CPU in the system
>   * @cp: the cpudl max-heap context
>   * @p: the task
> - * @later_mask: a mask to fill in with the selected CPUs (or NULL)
> + * @later_mask: a mask to fill in with the selected CPUs (not NULL)
>   *
>   * Returns: int - best CPU (heap maximum if suitable)
>   */
> @@ -107,16 +107,14 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
>  	int best_cpu = -1;
>  	const struct sched_dl_entity *dl_se = &p->dl;
>  
> -	if (later_mask && cpumask_and(later_mask, cp->free_cpus,
> -			&p->cpus_allowed) && cpumask_and(later_mask,
> -			later_mask, cpu_active_mask)) {
> +	cpumask_and(later_mask, cpu_active_mask, &p->cpus_allowed);
> +	if (cpumask_and(later_mask, later_mask, cp->free_cpus)) {
>  		best_cpu = cpumask_any(later_mask);
>  		goto out;
> -	} else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
> -			dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
> +	} else if (cpumask_and(later_mask, cpumask_of(cpudl_maximum(cp)),
> +			&p->cpus_allowed) && dl_time_before(dl_se->deadline,
> +			cp->elements[0].dl)) {
>  		best_cpu = cpudl_maximum(cp);
> -		if (later_mask)
> -			cpumask_set_cpu(best_cpu, later_mask);
>  	}
>  

OK, I need to make this a before/after, as patch format sucks in this
case.

before:

	if (later_mask &&
	    cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed) &&
	    cpumask_and(later_mask, later_mask, cpu_active_mask)) {
		best_cpu = cpumask_any(later_mask);
		goto out;
	} else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
		dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
		best_cpu = cpudl_maximum(cp);
		if (later_mask)
			cpumask_set_cpu(best_cpu, later_mask);
	}

after:

	cpumask_and(later_mask, cpu_active_mask, &p->cpus_allowed);
	if (cpumask_and(later_mask, later_mask, cp->free_cpus)) {
		best_cpu = cpumask_any(later_mask);
		goto out;
	} else if (cpumask_and(later_mask, cpumask_of(cpudl_maximum(cp)),
			&p->cpus_allowed) && dl_time_before(dl_se->deadline,
			cp->elements[0].dl)) {
		best_cpu = cpudl_maximum(cp);
	}

Again, the above else if is rather ugly. Note, if the first
cpumask_and() is zero, there's no reason to call cpumask_and() (which
could be expensive) the second time. Keep the two together with &&.

Also, I wouldn't convert the cpumask_test_cpu() to a cpumask_and(), as
I would think that would be more expensive to copy an entire mask
(think what happens when we have a thousand CPUs). Just doing a test,
and then if the test succeeds (remember, there's another test that
might fail after it), then just set the one bit.

--- Steve



>  out:
> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index bd83272..3ecf838 100755
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -965,14 +965,18 @@ out:
>  	return cpu;
>  }
>  
> +static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
> +
>  static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
>  {
> +	struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
> +
>  	/*
>  	 * Current can't be migrated, useless to reschedule,
>  	 * let's hope p can move out.
>  	 */
>  	if (rq->curr->nr_cpus_allowed == 1 ||
> -	    cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
> +	    cpudl_find(&rq->rd->cpudl, rq->curr, later_mask) == -1)
>  		return;
>  
>  	/*
> @@ -980,7 +984,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
>  	 * see if it is pushed or pulled somewhere else.
>  	 */
>  	if (p->nr_cpus_allowed != 1 &&
> -	    cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
> +	    cpudl_find(&rq->rd->cpudl, p, later_mask) != -1)
>  		return;
>  
>  	resched_curr(rq);
> @@ -1167,8 +1171,6 @@ next_node:
>  	return NULL;
>  }
>  
> -static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
> -
>  static int find_later_rq(struct task_struct *task)
>  {
>  	struct sched_domain *sd;


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] sched/deadline: Modify cpudl.free_cpus to reflect rd->span
  2014-11-17 19:39 ` [PATCH 1/2] sched/deadline: Modify cpudl.free_cpus to reflect rd->span Steven Rostedt
@ 2014-11-18 13:32   ` pang.xunlei
  0 siblings, 0 replies; 6+ messages in thread
From: pang.xunlei @ 2014-11-18 13:32 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: lkml, Peter Zijlstra, Juri Lelli

On 18 November 2014 03:39, Steven Rostedt <rostedt@goodmis.org> wrote:
> On Tue, 18 Nov 2014 00:11:03 +0800
>
>> index 539ca3c..9a69353
>> --- a/kernel/sched/cpudeadline.c
>> +++ b/kernel/sched/cpudeadline.c
>> @@ -107,7 +107,9 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
>>       int best_cpu = -1;
>>       const struct sched_dl_entity *dl_se = &p->dl;
>>
>> -     if (later_mask && cpumask_and(later_mask, later_mask, cp->free_cpus)) {
>> +     if (later_mask && cpumask_and(later_mask, cp->free_cpus,
>> +                     &p->cpus_allowed) && cpumask_and(later_mask,
>> +                     later_mask, cpu_active_mask)) {
>
> I did a quick review of this patch, and it looks fine to me. But the
> above looks ugly. Please change it to something more readable like:

Yes, the following looks beautiful. I'll adjust it, thanks!
>
>         if (later_mask &&
>             cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed) &&
>             cpumask_and(later_mask, later_mask, cpu_active_mask)) {
>
> Thanks,
>
> -- Steve
>
>
>>               best_cpu = cpumask_any(later_mask);
>>               goto out;
>>       } else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
>> @@ -186,6 +188,17 @@ out:
>>  }
>>
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] sched/deadline: Fix wrong cpudl_find() in check_preempt_equal_dl()
  2014-11-17 20:15   ` Steven Rostedt
@ 2014-11-18 14:02     ` pang.xunlei
  0 siblings, 0 replies; 6+ messages in thread
From: pang.xunlei @ 2014-11-18 14:02 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: lkml, Peter Zijlstra, Juri Lelli

On 18 November 2014 04:15, Steven Rostedt <rostedt@goodmis.org> wrote:
> On Tue, 18 Nov 2014 00:11:04 +0800
> "pang.xunlei" <pang.xunlei@linaro.org> wrote:
>
>> In check_preempt_equal_dl(), cpudl_find() is called with a NULL later_mask,
>> thus cpudl_find() here doesn't check cpudl.free_cpus at all.
>>
>> This patch takles this issue by always passing a non-NULL cpumask to cpudl_find(),
>> and assigns later_mask in this function.
>>
>> Signed-off-by: pang.xunlei <pang.xunlei@linaro.org>
>> ---
>>  kernel/sched/cpudeadline.c | 14 ++++++--------
>>  kernel/sched/deadline.c    | 10 ++++++----
>>  2 files changed, 12 insertions(+), 12 deletions(-)
>>
>> diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
>> index 9a69353..623f7b5 100755
>> --- a/kernel/sched/cpudeadline.c
>> +++ b/kernel/sched/cpudeadline.c
>> @@ -97,7 +97,7 @@ static inline int cpudl_maximum(struct cpudl *cp)
>>   * cpudl_find - find the best (later-dl) CPU in the system
>>   * @cp: the cpudl max-heap context
>>   * @p: the task
>> - * @later_mask: a mask to fill in with the selected CPUs (or NULL)
>> + * @later_mask: a mask to fill in with the selected CPUs (not NULL)
>>   *
>>   * Returns: int - best CPU (heap maximum if suitable)
>>   */
>> @@ -107,16 +107,14 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
>>       int best_cpu = -1;
>>       const struct sched_dl_entity *dl_se = &p->dl;
>>
>> -     if (later_mask && cpumask_and(later_mask, cp->free_cpus,
>> -                     &p->cpus_allowed) && cpumask_and(later_mask,
>> -                     later_mask, cpu_active_mask)) {
>> +     cpumask_and(later_mask, cpu_active_mask, &p->cpus_allowed);
>> +     if (cpumask_and(later_mask, later_mask, cp->free_cpus)) {
>>               best_cpu = cpumask_any(later_mask);
>>               goto out;
>> -     } else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
>> -                     dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
>> +     } else if (cpumask_and(later_mask, cpumask_of(cpudl_maximum(cp)),
>> +                     &p->cpus_allowed) && dl_time_before(dl_se->deadline,
>> +                     cp->elements[0].dl)) {
>>               best_cpu = cpudl_maximum(cp);
>> -             if (later_mask)
>> -                     cpumask_set_cpu(best_cpu, later_mask);
>>       }
>>
>
> OK, I need to make this a before/after, as patch format sucks in this
> case.
>
> before:
>
>         if (later_mask &&
>             cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed) &&
>             cpumask_and(later_mask, later_mask, cpu_active_mask)) {
>                 best_cpu = cpumask_any(later_mask);
>                 goto out;
>         } else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
>                 dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
>                 best_cpu = cpudl_maximum(cp);
>                 if (later_mask)
>                         cpumask_set_cpu(best_cpu, later_mask);
>         }
>
> after:
>
>         cpumask_and(later_mask, cpu_active_mask, &p->cpus_allowed);
>         if (cpumask_and(later_mask, later_mask, cp->free_cpus)) {
>                 best_cpu = cpumask_any(later_mask);
>                 goto out;
>         } else if (cpumask_and(later_mask, cpumask_of(cpudl_maximum(cp)),
>                         &p->cpus_allowed) && dl_time_before(dl_se->deadline,
>                         cp->elements[0].dl)) {
>                 best_cpu = cpudl_maximum(cp);
>         }
>
> Again, the above else if is rather ugly. Note, if the first
> cpumask_and() is zero, there's no reason to call cpumask_and() (which
> could be expensive) the second time. Keep the two together with &&.
>
> Also, I wouldn't convert the cpumask_test_cpu() to a cpumask_and(), as
> I would think that would be more expensive to copy an entire mask
> (think what happens when we have a thousand CPUs). Just doing a test,
> and then if the test succeeds (remember, there's another test that
> might fail after it), then just set the one bit.

Hi Steve,

Thanks for pointing me out, it does make sense. I'll fix it and send
out another version.

But one more thing:
I made this change just want to avoid setting later_mask
conditionally, since the call site of check_preempt_equal_dl() doesn't
need the result of later_mask.

So, I wonder if it's reasonable to deal with this function further:
1) to introduce the 4th parameter to cpudl_find() like "int
set_flag"(in the hopes that it can gain some performance?), then the
code will be like this:
 int cpudl_find(struct cpudl *cp, struct task_struct *p,
            struct cpumask *later_mask, int set_flag)
 {
        int best_cpu = -1;
        const struct sched_dl_entity *dl_se = &p->dl;

         cpumask_and(later_mask, cpu_active_mask, &p->cpus_allowed);
         if (cpumask_and(later_mask, later_mask, cp->free_cpus)) {
                 best_cpu = cpumask_any(later_mask);
                 goto out;
         } else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
                     dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
             best_cpu = cpudl_maximum(cp);
             if (set_flag)
                  cpumask_set_cpu(best_cpu, later_mask); // does this
have some performance gain?
        }

out:
    WARN_ON(best_cpu != -1 && !cpu_present(best_cpu));

    return best_cpu;
}

2) also, may put the best_cpu selecting back into find_later_rq()
where we can select additionally according to the sd topology,
obviously the cpumask_any() in cpudl_find() isn't the best one. Then
the code will be like this (change its return type to bool):

int cpudl_find(struct cpudl *cp, struct task_struct *p,
            struct cpumask *later_mask, int set_flag)
 {
        const struct sched_dl_entity *dl_se = &p->dl;

         cpumask_and(later_mask, cpu_active_mask, &p->cpus_allowed);
         if (cpumask_and(later_mask, later_mask, cp->free_cpus)) {
                return 1;
         } else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
                     dl_time_before(dl_se->deadline,
cp->elements[0].dl)) {
             if (set_flag)
                  cpumask_set_cpu(cpudl_maximum(cp), later_mask);

              return 1;
        }

        return 0;
}

Are these two changes acceptable? Thanks!

Regards,
Xunlei

>
> --- Steve
>
>
>
>>  out:
>> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
>> index bd83272..3ecf838 100755
>> --- a/kernel/sched/deadline.c
>> +++ b/kernel/sched/deadline.c
>> @@ -965,14 +965,18 @@ out:
>>       return cpu;
>>  }
>>
>> +static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
>> +
>>  static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
>>  {
>> +     struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
>> +
>>       /*
>>        * Current can't be migrated, useless to reschedule,
>>        * let's hope p can move out.
>>        */
>>       if (rq->curr->nr_cpus_allowed == 1 ||
>> -         cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
>> +         cpudl_find(&rq->rd->cpudl, rq->curr, later_mask) == -1)
>>               return;
>>
>>       /*
>> @@ -980,7 +984,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
>>        * see if it is pushed or pulled somewhere else.
>>        */
>>       if (p->nr_cpus_allowed != 1 &&
>> -         cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
>> +         cpudl_find(&rq->rd->cpudl, p, later_mask) != -1)
>>               return;
>>
>>       resched_curr(rq);
>> @@ -1167,8 +1171,6 @@ next_node:
>>       return NULL;
>>  }
>>
>> -static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
>> -
>>  static int find_later_rq(struct task_struct *task)
>>  {
>>       struct sched_domain *sd;
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2014-11-18 14:02 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-11-17 16:11 [PATCH 1/2] sched/deadline: Modify cpudl.free_cpus to reflect rd->span pang.xunlei
2014-11-17 16:11 ` [PATCH 2/2] sched/deadline: Fix wrong cpudl_find() in check_preempt_equal_dl() pang.xunlei
2014-11-17 20:15   ` Steven Rostedt
2014-11-18 14:02     ` pang.xunlei
2014-11-17 19:39 ` [PATCH 1/2] sched/deadline: Modify cpudl.free_cpus to reflect rd->span Steven Rostedt
2014-11-18 13:32   ` pang.xunlei

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.