linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Chris Mason <clm@fb.com>
To: Mike Galbraith <mgalbraith@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@kernel.org>,
	Matt Fleming <matt@codeblueprint.co.uk>,
	<linux-kernel@vger.kernel.org>
Subject: Re: sched: tweak select_idle_sibling to look for idle threads
Date: Mon, 11 Apr 2016 20:30:44 -0400	[thread overview]
Message-ID: <20160412003044.smr24xzuom3locvo@floor.thefacebook.com> (raw)
In-Reply-To: <1460350461.3870.36.camel@suse.de>

On Mon, Apr 11, 2016 at 06:54:21AM +0200, Mike Galbraith wrote:
> On Sun, 2016-04-10 at 15:55 -0400, Chris Mason wrote:
> > On Sun, Apr 10, 2016 at 12:04:21PM +0200, Mike Galbraith wrote:
> > > On Sat, 2016-04-09 at 15:05 -0400, Chris Mason wrote:
> > > 
> > > > This does preserve the existing logic to prefer idle cores over idle
> > > > CPU threads, and includes some tests to try and avoid the idle scan when we're
> > > > actually better off sharing a non-idle CPU with someone else.
> > > 
> > > My box says the "oh nevermind" checks aren't selective enough, tbench
> > > dropped 4% at clients=cores, and 2% at clients=threads.
> > 
> > Ok, I was able to reproduce this by stuffing tbench_srv and tbench onto
> > just socket 0.  Version 2 below fixes things for me, but I'm hoping
> > someone can suggest a way to get task_hot() buddy checks without the rq
> > lock.
> > 
> > I haven't run this on production loads yet, but our 4.0 patch for this
> > uses task_hot(), so I'd expect it to be on par.  If this doesn't fix it
> > for you, I'll dig up a similar machine on Monday.
> 
> My box stopped caring.  I personally would be reluctant to apply it
> without a "you asked for it" button or a large pile of benchmark
> results.  Lock banging or not, full scan existing makes me nervous.


We can use a bitmap at the socket level to keep track of which cpus are
idle.  I'm sure there are better places for the array and better ways to
allocate, this is just a rough cut to make sure the idle tracking works.

-chris

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a10494a..1c3b5e4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1055,6 +1055,8 @@ struct sched_domain {
 	unsigned int balance_interval;	/* initialise to 1. units in ms. */
 	unsigned int nr_balance_failed; /* initialise to 0 */
 
+	cpumask_var_t idle_cpus_mask;
+
 	/* idle_balance() stats */
 	u64 max_newidle_lb_cost;
 	unsigned long next_decay_max_lb_cost;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 41f6b22..237d645 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3204,6 +3204,7 @@ again:
 static void __sched notrace __schedule(bool preempt)
 {
 	struct task_struct *prev, *next;
+	struct sched_domain *package_sd;
 	unsigned long *switch_count;
 	struct rq *rq;
 	int cpu;
@@ -3270,11 +3270,19 @@ static void __sched notrace __schedule(bool preempt)
 		update_rq_clock(rq);
 
 	next = pick_next_task(rq, prev);
+
 	clear_tsk_need_resched(prev);
 	clear_preempt_need_resched();
 	rq->clock_skip_update = 0;
 
 	if (likely(prev != next)) {
+		package_sd = rcu_dereference(per_cpu(sd_llc, cpu));
+		if (package_sd) {
+			if (prev->policy == SCHED_IDLE && next->policy != SCHED_IDLE)
+				cpumask_clear_cpu(cpu, package_sd->idle_cpus_mask);
+			else if (next->policy == SCHED_IDLE)
+				cpumask_set_cpu(cpu, package_sd->idle_cpus_mask);
+		}
 		rq->nr_switches++;
 		rq->curr = next;
 		++*switch_count;
@@ -6599,7 +6607,6 @@ sd_init(struct sched_domain_topology_level *tl, int cpu)
 		sd->imbalance_pct = 117;
 		sd->cache_nice_tries = 1;
 		sd->busy_idx = 2;
-
 #ifdef CONFIG_NUMA
 	} else if (sd->flags & SD_NUMA) {
 		sd->cache_nice_tries = 2;
@@ -7041,6 +7048,8 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
 		return child;
 
 	cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
+	zalloc_cpumask_var(&sd->idle_cpus_mask, GFP_NOWAIT);
+	cpumask_and(sd->idle_cpus_mask, cpu_map, tl->mask(cpu));
 	if (child) {
 		sd->level = child->level + 1;
 		sched_domain_level_max = max(sched_domain_level_max, sd->level);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0c76505..cae6bd7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5026,7 +5026,7 @@ next:
 	 * the package.
 	 */
 	if (package_sd && should_scan_idle(p, target)) {
-		for_each_cpu_and(i, sched_domain_span(package_sd),
+		for_each_cpu_and(i, package_sd->idle_cpus_mask,
 				 tsk_cpus_allowed(p)) {
 			if (idle_cpu(i)) {
 				target = i;
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 544a713..7e34b42 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -202,6 +202,9 @@ DEFINE_PER_CPU(bool, cpu_dead_idle);
  */
 static void cpu_idle_loop(void)
 {
+	int cpu;
+	struct sched_domain *package_sd;
+
 	while (1) {
 		/*
 		 * If the arch has a polling bit, we maintain an invariant:
@@ -212,10 +215,19 @@ static void cpu_idle_loop(void)
 		 * guaranteed to cause the cpu to reschedule.
 		 */
 
+
 		__current_set_polling();
 		quiet_vmstat();
 		tick_nohz_idle_enter();
 
+		preempt_disable();
+		cpu = smp_processor_id();
+		package_sd = rcu_dereference(per_cpu(sd_llc, cpu));
+		if (package_sd) {
+			cpumask_set_cpu(cpu, package_sd->idle_cpus_mask);
+		}
+		preempt_enable();
+
 		while (!need_resched()) {
 			check_pgt_cache();
 			rmb();
-- 
2.8.0.rc2

  reply	other threads:[~2016-04-12  0:31 UTC|newest]

Thread overview: 80+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-05 18:08 [PATCH RFC] select_idle_sibling experiments Chris Mason
2016-04-05 18:43 ` Bastien Bastien Philbert
2016-04-05 19:28   ` Chris Mason
2016-04-05 20:03 ` Matt Fleming
2016-04-05 21:05   ` Bastien Philbert
2016-04-06  0:44   ` Chris Mason
2016-04-06  7:27 ` Mike Galbraith
2016-04-06 13:36   ` Chris Mason
2016-04-09 17:30   ` Chris Mason
2016-04-12 21:45     ` Matt Fleming
2016-04-13  3:40       ` Mike Galbraith
2016-04-13 15:54         ` Chris Mason
2016-04-28 12:00   ` Peter Zijlstra
2016-04-28 13:17     ` Mike Galbraith
2016-05-02  5:35     ` Mike Galbraith
2016-04-07 15:17 ` Chris Mason
2016-04-09 19:05 ` sched: tweak select_idle_sibling to look for idle threads Chris Mason
2016-04-10 10:04   ` Mike Galbraith
2016-04-10 12:35     ` Chris Mason
2016-04-10 12:46       ` Mike Galbraith
2016-04-10 19:55     ` Chris Mason
2016-04-11  4:54       ` Mike Galbraith
2016-04-12  0:30         ` Chris Mason [this message]
2016-04-12  4:44           ` Mike Galbraith
2016-04-12 13:27             ` Chris Mason
2016-04-12 18:16               ` Mike Galbraith
2016-04-12 20:07                 ` Chris Mason
2016-04-13  3:18                   ` Mike Galbraith
2016-04-13 13:44                     ` Chris Mason
2016-04-13 14:22                       ` Mike Galbraith
2016-04-13 14:36                         ` Chris Mason
2016-04-13 15:05                           ` Mike Galbraith
2016-04-13 15:34                             ` Mike Galbraith
2016-04-30 12:47   ` Peter Zijlstra
2016-05-01  7:12     ` Mike Galbraith
2016-05-01  8:53       ` Peter Zijlstra
2016-05-01  9:20         ` Mike Galbraith
2016-05-07  1:24           ` Yuyang Du
2016-05-08  8:08             ` Mike Galbraith
2016-05-08 18:57               ` Yuyang Du
2016-05-09  3:45                 ` Mike Galbraith
2016-05-08 20:22                   ` Yuyang Du
2016-05-09  7:44                     ` Mike Galbraith
2016-05-09  1:13                       ` Yuyang Du
2016-05-09  9:39                         ` Mike Galbraith
2016-05-09 23:26                           ` Yuyang Du
2016-05-10  7:49                             ` Mike Galbraith
2016-05-10 15:26                               ` Mike Galbraith
2016-05-10 19:16                                 ` Yuyang Du
2016-05-11  4:17                                   ` Mike Galbraith
2016-05-11  1:23                                     ` Yuyang Du
2016-05-11  9:56                                       ` Mike Galbraith
2016-05-18  6:41                                   ` Mike Galbraith
2016-05-09  3:52                 ` Mike Galbraith
2016-05-08 20:31                   ` Yuyang Du
2016-05-02  8:46       ` Peter Zijlstra
2016-05-02 14:50         ` Mike Galbraith
2016-05-02 14:58           ` Peter Zijlstra
2016-05-02 15:47             ` Chris Mason
2016-05-03 14:32               ` Peter Zijlstra
2016-05-03 15:11                 ` Chris Mason
2016-05-04 10:37                   ` Peter Zijlstra
2016-05-04 15:31                     ` Peter Zijlstra
2016-05-05 22:03                     ` Matt Fleming
2016-05-06 18:54                       ` Mike Galbraith
2016-05-09  8:33                         ` Peter Zijlstra
2016-05-09  8:56                           ` Mike Galbraith
2016-05-04 15:45                   ` Peter Zijlstra
2016-05-04 17:46                     ` Chris Mason
2016-05-05  9:33                       ` Peter Zijlstra
2016-05-05 13:58                         ` Chris Mason
2016-05-06  7:12                           ` Peter Zijlstra
2016-05-06 17:27                             ` Chris Mason
2016-05-06  7:25                   ` Peter Zijlstra
2016-05-02 17:30             ` Mike Galbraith
2016-05-02 15:01           ` Peter Zijlstra
2016-05-02 16:04             ` Ingo Molnar
2016-05-03 11:31               ` Peter Zijlstra
2016-05-03 18:22                 ` Peter Zijlstra
2016-05-02 15:10           ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160412003044.smr24xzuom3locvo@floor.thefacebook.com \
    --to=clm@fb.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=matt@codeblueprint.co.uk \
    --cc=mgalbraith@suse.de \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).