linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: "Ofer Levi(SW)" <oferle@mellanox.com>
Cc: "rusty@rustcorp.com.au" <rusty@rustcorp.com.au>,
	"mingo@redhat.com" <mingo@redhat.com>,
	"Vineet.Gupta1@synopsys.com" <Vineet.Gupta1@synopsys.com>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	Tejun Heo <tj@kernel.org>
Subject: Re: hotplug support for arch/arc/plat-eznps platform
Date: Thu, 10 Aug 2017 17:45:18 +0200	[thread overview]
Message-ID: <20170810154518.gl2w3llfabnszusr@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <20170810091905.4lekz7gtjprbgiyn@hirez.programming.kicks-ass.net>

On Thu, Aug 10, 2017 at 11:19:05AM +0200, Peter Zijlstra wrote:
> On Thu, Aug 10, 2017 at 07:40:16AM +0000, Ofer Levi(SW) wrote:
> > Well, this definitely have pleased the little toy :)
> > Thank you. I really appreciate your time and effort.
> > 
> > If I may, one more newbie question. What do I need to do for the two patches to find 
> > their way into formal kernel code?
> 
> I'll split the first patch into two separate patches and line them up.
> 
> I'm not sure about this last patch, I'll speak with Ingo once he's back
> to see what would be the thing to do here.
> 
> I suspect we can make it work, that sysctl stuff is only debug crud
> after all and that should never get in the way of getting work done.

Can you test this instead of the second patch? It should have the same
effect.

BTW, what physical size does your toy have? I'm thinking its less than
multiple racks worth like the SGI systems were.

---
Subject: sched/debug: Optimize sched_domain sysctl generation
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu Aug 10 17:10:26 CEST 2017

Currently we unconditionally destroy all sysctl bits and regenerate
them after we've rebuild the domains (even if that rebuild is a
no-op).

And since we unconditionally (re)build the sysctl for all possible
CPUs, onlining all CPUs gets us O(n^2) time. Instead change this to
only rebuild the bits for CPUs we've actually installed new domains
on.

Reported-by: "Ofer Levi(SW)" <oferle@mellanox.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/sched/debug.c    |   68 ++++++++++++++++++++++++++++++++++++++----------
 kernel/sched/sched.h    |    4 ++
 kernel/sched/topology.c |    1 
 3 files changed, 59 insertions(+), 14 deletions(-)

--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -327,38 +327,78 @@ static struct ctl_table *sd_alloc_ctl_cp
 	return table;
 }
 
+static cpumask_var_t sd_sysctl_cpus;
 static struct ctl_table_header *sd_sysctl_header;
+
 void register_sched_domain_sysctl(void)
 {
-	int i, cpu_num = num_possible_cpus();
-	struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
+	static struct ctl_table *cpu_entries;
+	static struct ctl_table **cpu_idx;
 	char buf[32];
+	int i;
+
+	if (!cpu_entries) {
+		cpu_entries = sd_alloc_ctl_entry(num_possible_cpus() + 1);
+		if (!cpu_entries)
+			return;
+
+		WARN_ON(sd_ctl_dir[0].child);
+		sd_ctl_dir[0].child = cpu_entries;
+	}
+
+	if (!cpu_idx) {
+		struct ctl_table *e = cpu_entries;
+
+		cpu_idx = kcalloc(nr_cpu_ids, sizeof(struct ctl_table*), GFP_KERNEL);
+		if (!cpu_idx)
+			return;
+
+		/* deal with sparse possible map */
+		for_each_possible_cpu(i) {
+			cpu_idx[i] = e;
+			e++;
+		}
+	}
 
-	WARN_ON(sd_ctl_dir[0].child);
-	sd_ctl_dir[0].child = entry;
+	if (!cpumask_available(sd_sysctl_cpus)) {
+		if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))
+			return;
 
-	if (entry == NULL)
-		return;
+		/* init to possible to not have holes in @cpu_entries */
+		cpumask_copy(sd_sysctl_cpus, cpu_possible_mask);
+	}
+
+	for_each_cpu(i, sd_sysctl_cpus) {
+		struct ctl_table *e = cpu_idx[i];
+
+		if (e->child)
+			sd_free_ctl_entry(&e->child);
+
+		if (!e->procname) {
+			snprintf(buf, 32, "cpu%d", i);
+			e->procname = kstrdup(buf, GFP_KERNEL);
+		}
+		e->mode = 0555;
+		e->child = sd_alloc_ctl_cpu_table(i);
 
-	for_each_possible_cpu(i) {
-		snprintf(buf, 32, "cpu%d", i);
-		entry->procname = kstrdup(buf, GFP_KERNEL);
-		entry->mode = 0555;
-		entry->child = sd_alloc_ctl_cpu_table(i);
-		entry++;
+		__cpumask_clear_cpu(i, sd_sysctl_cpus);
 	}
 
 	WARN_ON(sd_sysctl_header);
 	sd_sysctl_header = register_sysctl_table(sd_ctl_root);
 }
 
+void dirty_sched_domain_sysctl(int cpu)
+{
+	if (cpumask_available(sd_sysctl_cpus))
+		__cpumask_set_cpu(cpu, sd_sysctl_cpus);
+}
+
 /* may be called multiple times per register */
 void unregister_sched_domain_sysctl(void)
 {
 	unregister_sysctl_table(sd_sysctl_header);
 	sd_sysctl_header = NULL;
-	if (sd_ctl_dir[0].child)
-		sd_free_ctl_entry(&sd_ctl_dir[0].child);
 }
 #endif /* CONFIG_SYSCTL */
 #endif /* CONFIG_SMP */
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1120,11 +1120,15 @@ extern int group_balance_cpu(struct sche
 
 #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
 void register_sched_domain_sysctl(void);
+void dirty_sched_domain_sysctl(int cpu);
 void unregister_sched_domain_sysctl(void);
 #else
 static inline void register_sched_domain_sysctl(void)
 {
 }
+static inline void dirty_sched_domain_sysctl(int cpu)
+{
+}
 static inline void unregister_sched_domain_sysctl(void)
 {
 }
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -461,6 +461,7 @@ cpu_attach_domain(struct sched_domain *s
 	rq_attach_root(rq, rd);
 	tmp = rq->sd;
 	rcu_assign_pointer(rq->sd, sd);
+	dirty_sched_domain_sysctl(cpu);
 	destroy_sched_domains(tmp);
 
 	update_top_cache_domain(cpu);

  parent reply	other threads:[~2017-08-10 15:45 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-06  5:53 hotplug support for arch/arc/plat-eznps platform Ofer Levi(SW)
2017-08-07  8:33 ` Peter Zijlstra
2017-08-07 13:41   ` Ofer Levi(SW)
2017-08-07 15:10     ` Peter Zijlstra
2017-08-08  6:49       ` Ofer Levi(SW)
2017-08-08 10:16         ` Peter Zijlstra
2017-08-09 15:19           ` Ofer Levi(SW)
2017-08-09 15:34             ` Peter Zijlstra
2017-08-10  7:40               ` Ofer Levi(SW)
2017-08-10  9:19                 ` Peter Zijlstra
2017-08-10 11:51                   ` Ofer Levi(SW)
2017-08-10 15:45                   ` Peter Zijlstra [this message]
2017-08-14  7:54                     ` Ofer Levi(SW)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170810154518.gl2w3llfabnszusr@hirez.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=Vineet.Gupta1@synopsys.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=oferle@mellanox.com \
    --cc=rusty@rustcorp.com.au \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).