From: Mike Galbraith <bitbucket@online.de>
To: Michael Wang <wangyun@linux.vnet.ibm.com>
Cc: linux-kernel@vger.kernel.org, mingo@redhat.com,
peterz@infradead.org, mingo@kernel.org, a.p.zijlstra@chello.nl
Subject: Re: [RFC PATCH 0/2] sched: simplify the select_task_rq_fair()
Date: Wed, 23 Jan 2013 10:32:35 +0100 [thread overview]
Message-ID: <1358933555.5752.132.camel@marge.simpson.net> (raw)
In-Reply-To: <1358932694.5752.126.camel@marge.simpson.net>
On Wed, 2013-01-23 at 10:18 +0100, Mike Galbraith wrote:
> On Wed, 2013-01-23 at 17:00 +0800, Michael Wang wrote:
> > On 01/23/2013 04:49 PM, Mike Galbraith wrote:
> > > On Wed, 2013-01-23 at 16:30 +0800, Michael Wang wrote:
> > >> On 01/23/2013 04:20 PM, Mike Galbraith wrote:
> > >>> On Wed, 2013-01-23 at 15:10 +0800, Michael Wang wrote:
> > >>>> On 01/23/2013 02:28 PM, Mike Galbraith wrote:
> > >>>
> > >>>>> Abbreviated test run:
> > >>>>> Tasks jobs/min jti jobs/min/task real cpu
> > >>>>> 640 158044.01 81 246.9438 24.54 577.66 Wed Jan 23 07:14:33 2013
> > >>>>> 1280 50434.33 39 39.4018 153.80 5737.57 Wed Jan 23 07:17:07 2013
> > >>>>> 2560 47214.07 34 18.4430 328.58 12715.56 Wed Jan 23 07:22:36 2013
> > >>>>
> > >>>> So still not works... and not going to balance path while waking up will
> > >>>> fix it, looks like that's the only choice if no error on balance path
> > >>>> could be found...benchmark wins again, I'm feeling bad...
> > >>>>
> > >>>> I will conclude the info we collected and make a v3 later.
> > >>>
> > >>> FWIW, I hacked virgin to do full balance if an idle CPU was not found,
> > >>> leaving the preference to wake cache affine intact though, turned on
> > >>> WAKE_BALANCE in all domains, and it did not collapse. In fact, the high
> > >>> load end, where the idle search will frequently be a waste of cycles,
> > >>> actually improved a bit. Things that make ya go hmmm.
> > >>
> > >> Oh, does that means the old balance path is good while the new is really
> > >> broken, I mean, compared this with the previously results, could we say
> > >> that all the collapse was just caused by the change of balance path?
> > >
> > > That's a good supposition. I'll see if it holds.
> >
> > I just notice that there is no sd support the WAKE flag at all according
> > to your debug info, isn't it?
>
> There is, I turned it on in all domains.
For your patches, I had to turn it on at birth, but doing that, and
restoring the full balance path to original form killed the collapse.
Tasks jobs/min jti jobs/min/task real cpu
640 152452.83 97 238.2075 25.44 613.48 Wed Jan 23 10:22:12 2013
1280 190491.16 97 148.8212 40.72 1223.74 Wed Jan 23 10:22:53 2013
2560 219397.54 95 85.7022 70.71 2422.46 Wed Jan 23 10:24:04 2013
---
include/linux/topology.h | 6 ++---
kernel/sched/core.c | 41 ++++++++++++++++++++++++++++++-------
kernel/sched/fair.c | 52 +++++++++++++++++++++++++++++------------------
3 files changed, 70 insertions(+), 29 deletions(-)
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -95,7 +95,7 @@ int arch_update_cpu_topology(void);
| 1*SD_BALANCE_NEWIDLE \
| 1*SD_BALANCE_EXEC \
| 1*SD_BALANCE_FORK \
- | 0*SD_BALANCE_WAKE \
+ | 1*SD_BALANCE_WAKE \
| 1*SD_WAKE_AFFINE \
| 1*SD_SHARE_CPUPOWER \
| 1*SD_SHARE_PKG_RESOURCES \
@@ -126,7 +126,7 @@ int arch_update_cpu_topology(void);
| 1*SD_BALANCE_NEWIDLE \
| 1*SD_BALANCE_EXEC \
| 1*SD_BALANCE_FORK \
- | 0*SD_BALANCE_WAKE \
+ | 1*SD_BALANCE_WAKE \
| 1*SD_WAKE_AFFINE \
| 0*SD_SHARE_CPUPOWER \
| 1*SD_SHARE_PKG_RESOURCES \
@@ -156,7 +156,7 @@ int arch_update_cpu_topology(void);
| 1*SD_BALANCE_NEWIDLE \
| 1*SD_BALANCE_EXEC \
| 1*SD_BALANCE_FORK \
- | 0*SD_BALANCE_WAKE \
+ | 1*SD_BALANCE_WAKE \
| 1*SD_WAKE_AFFINE \
| 0*SD_SHARE_CPUPOWER \
| 0*SD_SHARE_PKG_RESOURCES \
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5609,11 +5609,39 @@ static void update_top_cache_domain(int
static int sbm_max_level;
DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_balance_map, sbm_array);
+static void debug_sched_balance_map(int cpu)
+{
+ int i, type, level = 0;
+ struct sched_balance_map *sbm = &per_cpu(sbm_array, cpu);
+
+ printk("WYT: sbm of cpu %d\n", cpu);
+
+ for (type = 0; type < SBM_MAX_TYPE; type++) {
+ if (type == SBM_EXEC_TYPE)
+ printk("WYT: \t exec map\n");
+ else if (type == SBM_FORK_TYPE)
+ printk("WYT: \t fork map\n");
+ else if (type == SBM_WAKE_TYPE)
+ printk("WYT: \t wake map\n");
+
+ for (level = 0; level < sbm_max_level; level++) {
+ if (sbm->sd[type][level])
+ printk("WYT: \t\t sd %x, idx %d, level %d, weight %d\n", sbm->sd[type][level], level, sbm->sd[type][level]->level, sbm->sd[type][level]->span_weight);
+ }
+ }
+
+ printk("WYT: \t affine map\n");
+
+ for_each_possible_cpu(i) {
+ if (sbm->affine_map[i])
+ printk("WYT: \t\t affine with cpu %x in sd %x, weight %d\n", i, sbm->affine_map[i], sbm->affine_map[i]->span_weight);
+ }
+}
+
static void build_sched_balance_map(int cpu)
{
struct sched_balance_map *sbm = &per_cpu(sbm_array, cpu);
struct sched_domain *sd = cpu_rq(cpu)->sd;
- struct sched_domain *top_sd = NULL;
int i, type, level = 0;
memset(sbm->top_level, 0, sizeof((*sbm).top_level));
@@ -5656,11 +5684,9 @@ static void build_sched_balance_map(int
* fill the hole to get lower level sd easily.
*/
for (type = 0; type < SBM_MAX_TYPE; type++) {
- level = sbm->top_level[type];
- top_sd = sbm->sd[type][level];
- if ((++level != sbm_max_level) && top_sd) {
- for (; level < sbm_max_level; level++)
- sbm->sd[type][level] = top_sd;
+ for (level = 1; level < sbm_max_level; level++) {
+ if (!sbm->sd[type][level])
+ sbm->sd[type][level] = sbm->sd[type][level - 1];
}
}
}
@@ -5719,6 +5745,7 @@ cpu_attach_domain(struct sched_domain *s
* destroy_sched_domains() already do the work.
*/
build_sched_balance_map(cpu);
+//MIKE debug_sched_balance_map(cpu);
rcu_assign_pointer(rq->sbm, sbm);
}
@@ -6220,7 +6247,7 @@ sd_numa_init(struct sched_domain_topolog
| 1*SD_BALANCE_NEWIDLE
| 0*SD_BALANCE_EXEC
| 0*SD_BALANCE_FORK
- | 0*SD_BALANCE_WAKE
+ | 1*SD_BALANCE_WAKE
| 0*SD_WAKE_AFFINE
| 0*SD_SHARE_CPUPOWER
| 0*SD_SHARE_PKG_RESOURCES
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3312,7 +3312,7 @@ static int select_idle_sibling(struct ta
static int
select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
{
- struct sched_domain *sd = NULL;
+ struct sched_domain *sd = NULL, *tmp;
int cpu = smp_processor_id();
int prev_cpu = task_cpu(p);
int new_cpu = cpu;
@@ -3376,31 +3376,45 @@ select_task_rq_fair(struct task_struct *
balance_path:
new_cpu = (sd_flag & SD_BALANCE_WAKE) ? prev_cpu : cpu;
- sd = sbm->sd[type][sbm->top_level[type]];
+ sd = tmp = sbm->sd[type][sbm->top_level[type]];
while (sd) {
int load_idx = sd->forkexec_idx;
- struct sched_group *sg = NULL;
+ struct sched_group *group;
+ int weight;
+
+ if (!(sd->flags & sd_flag)) {
+ sd = sd->child;
+ continue;
+ }
if (sd_flag & SD_BALANCE_WAKE)
load_idx = sd->wake_idx;
- sg = find_idlest_group(sd, p, cpu, load_idx);
- if (!sg)
- goto next_sd;
-
- new_cpu = find_idlest_cpu(sg, p, cpu);
- if (new_cpu != -1)
- cpu = new_cpu;
-next_sd:
- if (!sd->level)
- break;
-
- sbm = cpu_rq(cpu)->sbm;
- if (!sbm)
- break;
-
- sd = sbm->sd[type][sd->level - 1];
+ group = find_idlest_group(sd, p, cpu, load_idx);
+ if (!group) {
+ sd = sd->child;
+ continue;
+ }
+
+ new_cpu = find_idlest_cpu(group, p, cpu);
+ if (new_cpu == -1 || new_cpu == cpu) {
+ /* Now try balancing at a lower domain level of cpu */
+ sd = sd->child;
+ continue;
+ }
+
+ /* Now try balancing at a lower domain level of new_cpu */
+ cpu = new_cpu;
+ weight = sd->span_weight;
+ sd = NULL;
+ for_each_domain(cpu, tmp) {
+ if (weight <= tmp->span_weight)
+ break;
+ if (tmp->flags & sd_flag)
+ sd = tmp;
+ }
+ /* while loop will break here if sd == NULL */
}
unlock:
next prev parent reply other threads:[~2013-01-23 9:32 UTC|newest]
Thread overview: 57+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <1356588535-23251-1-git-send-email-wangyun@linux.vnet.ibm.com>
2013-01-09 9:28 ` [RFC PATCH 0/2] sched: simplify the select_task_rq_fair() Michael Wang
2013-01-12 8:01 ` Mike Galbraith
2013-01-12 10:19 ` Mike Galbraith
2013-01-14 9:21 ` Mike Galbraith
2013-01-15 3:10 ` Michael Wang
2013-01-15 4:52 ` Mike Galbraith
2013-01-15 8:26 ` Michael Wang
2013-01-17 5:55 ` Michael Wang
2013-01-20 4:09 ` Mike Galbraith
2013-01-21 2:50 ` Michael Wang
2013-01-21 4:38 ` Mike Galbraith
2013-01-21 5:07 ` Michael Wang
2013-01-21 6:42 ` Mike Galbraith
2013-01-21 7:09 ` Mike Galbraith
2013-01-21 7:45 ` Michael Wang
2013-01-21 9:09 ` Mike Galbraith
2013-01-21 9:22 ` Michael Wang
2013-01-21 9:44 ` Mike Galbraith
2013-01-21 10:30 ` Mike Galbraith
2013-01-22 3:43 ` Michael Wang
2013-01-22 8:03 ` Mike Galbraith
2013-01-22 8:56 ` Michael Wang
2013-01-22 11:34 ` Mike Galbraith
2013-01-23 3:01 ` Michael Wang
2013-01-23 5:02 ` Mike Galbraith
2013-01-22 14:41 ` Mike Galbraith
2013-01-23 2:44 ` Michael Wang
2013-01-23 4:31 ` Mike Galbraith
2013-01-23 5:09 ` Michael Wang
2013-01-23 6:28 ` Mike Galbraith
2013-01-23 7:10 ` Michael Wang
2013-01-23 8:20 ` Mike Galbraith
2013-01-23 8:30 ` Michael Wang
2013-01-23 8:49 ` Mike Galbraith
2013-01-23 9:00 ` Michael Wang
2013-01-23 9:18 ` Mike Galbraith
2013-01-23 9:26 ` Michael Wang
2013-01-23 9:37 ` Mike Galbraith
2013-01-23 9:32 ` Mike Galbraith [this message]
2013-01-24 6:01 ` Michael Wang
2013-01-24 6:51 ` Mike Galbraith
2013-01-24 7:15 ` Michael Wang
2013-01-24 7:47 ` Mike Galbraith
2013-01-24 8:14 ` Michael Wang
2013-01-24 9:07 ` Mike Galbraith
2013-01-24 9:26 ` Michael Wang
2013-01-24 10:34 ` Mike Galbraith
2013-01-25 2:14 ` Michael Wang
2013-01-24 7:00 ` Michael Wang
2013-01-21 7:34 ` Michael Wang
2013-01-21 8:26 ` Mike Galbraith
2013-01-21 8:46 ` Michael Wang
2013-01-21 9:11 ` Mike Galbraith
2013-01-15 2:46 ` Michael Wang
2013-01-11 8:15 Michael Wang
2013-01-11 10:13 ` Nikunj A Dadhania
2013-01-15 2:20 ` Michael Wang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1358933555.5752.132.camel@marge.simpson.net \
--to=bitbucket@online.de \
--cc=a.p.zijlstra@chello.nl \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=mingo@redhat.com \
--cc=peterz@infradead.org \
--cc=wangyun@linux.vnet.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.