All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>,
	linux-kernel@vger.kernel.org,
	Mel Gorman <mgorman@techsingularity.net>,
	Matt Fleming <matt@codeblueprint.co.uk>,
	Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Subject: Re: [PATCH 0/5] sched/debug: decouple sched_stat tracepoints from CONFIG_SCHEDSTATS
Date: Tue, 28 Jun 2016 14:43:36 +0200	[thread overview]
Message-ID: <20160628124336.GG30909@twins.programming.kicks-ass.net> (raw)
In-Reply-To: <cover.1466184592.git.jpoimboe@redhat.com>

On Fri, Jun 17, 2016 at 12:43:22PM -0500, Josh Poimboeuf wrote:
> NOTE: I didn't include any performance numbers because I wasn't able to
> get consistent results.  I tried the following on a Xeon E5-2420 v2 CPU:
> 
>   $ for i in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor; do echo -n performance > $i; done
>   $ echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
>   $ echo 100 > /sys/devices/system/cpu/intel_pstate/min_perf_pct
>   $ echo 0 > /proc/sys/kernel/nmi_watchdog
>   $ taskset 0x10 perf stat -n -r10 perf bench sched pipe -l 1000000
> 
> I was going to post the numbers from that, both with and without
> SCHEDSTATS, but then when I tried to repeat the test on a different day,
> the results were surprisingly different, with different conclusions.
> 
> So any advice on measuring scheduler performance would be appreciated...

Yeah, its a bit of a pain in general...

A) perf stat --null --repeat 50 -- perf bench sched messaging -g 50 -l 5000 | grep "seconds time elapsed"
B) perf stat --null --repeat 50 -- taskset 1 perf bench sched pipe | grep "seconds time elapsed"

1) tip/master + 1-4
2) tip/master + 1-5
3) tip/master + 1-5 + below

	1		2		3

A)	4.627767855	4.650429917	4.646208062
	4.633921933	4.641424424	4.612021058
	4.649536375	4.663144144	4.636815948
	4.630165619	4.649053552	4.613022902

B)	1.770732957	1.789534273	1.773334291
	1.761740716	1.795618428	1.773338681
	1.763761666	1.822316496	1.774385589


>From this it looks like patch 5 does hurt a wee bit, but we can get most
of that back by reordering the structure a bit. The results seem
'stable' across rebuilds and reboots (I've pop'ed all patches and
rebuild, rebooted and re-benched 1 at the end and obtained similar
results).

Although, possible that if we reorder first and then do 5, we'll just
see a bigger regression. I've not bothered.


---
 include/linux/sched.h |   33 +++++++++++++++------------------
 kernel/sched/core.c   |    4 ++--
 kernel/sched/debug.c  |    6 +++---
 3 files changed, 20 insertions(+), 23 deletions(-)

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1220,7 +1220,7 @@ struct uts_namespace;
 struct load_weight {
 	unsigned long weight;
 	u32 inv_weight;
-};
+} __packed;
 
 /*
  * The load_avg/util_avg accumulates an infinite geometric series
@@ -1315,44 +1315,40 @@ struct sched_statistics {
 
 struct sched_entity {
 	struct load_weight	load;		/* for load-balancing */
+	unsigned int		on_rq;
 	struct rb_node		run_node;
 	struct list_head	group_node;
-	unsigned int		on_rq;
 
-	u64			exec_start;
+	u64			exec_start ____cacheline_aligned_in_smp;
 	u64			sum_exec_runtime;
 	u64			vruntime;
 	u64			prev_sum_exec_runtime;
-
-	u64			nr_migrations;
-
 	u64			wait_start;
 	u64			sleep_start;
 	u64			block_start;
 
+#ifdef CONFIG_SMP
+	/*
+	 * Per entity load average tracking.
+	 */
+	struct sched_avg	avg ____cacheline_aligned_in_smp;
+#endif
 #ifdef CONFIG_SCHEDSTATS
 	struct sched_statistics statistics;
 #endif
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-	int			depth;
+	/*
+	 * mostly constant values, separate from modifications above
+	 */
+	int			depth ____cacheline_aligned_in_smp;
 	struct sched_entity	*parent;
 	/* rq on which this entity is (to be) queued: */
 	struct cfs_rq		*cfs_rq;
 	/* rq "owned" by this entity/group: */
 	struct cfs_rq		*my_q;
 #endif
-
-#ifdef CONFIG_SMP
-	/*
-	 * Per entity load average tracking.
-	 *
-	 * Put into separate cache line so it does not
-	 * collide with read-mostly values above.
-	 */
-	struct sched_avg	avg ____cacheline_aligned_in_smp;
-#endif
-};
+} ____cacheline_aligned_in_smp;
 
 struct sched_rt_entity {
 	struct list_head run_list;
@@ -1475,6 +1471,7 @@ struct task_struct {
 	int prio, static_prio, normal_prio;
 	unsigned int rt_priority;
 	const struct sched_class *sched_class;
+	u64 nr_migrations;
 	struct sched_entity se;
 	struct sched_rt_entity rt;
 #ifdef CONFIG_CGROUP_SCHED
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1239,7 +1239,7 @@ void set_task_cpu(struct task_struct *p,
 	if (task_cpu(p) != new_cpu) {
 		if (p->sched_class->migrate_task_rq)
 			p->sched_class->migrate_task_rq(p);
-		p->se.nr_migrations++;
+		p->nr_migrations++;
 		perf_event_task_migrate(p);
 	}
 
@@ -2167,7 +2167,7 @@ static void __sched_fork(unsigned long c
 	p->se.exec_start		= 0;
 	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
-	p->se.nr_migrations		= 0;
+	p->nr_migrations		= 0;
 	p->se.vruntime			= 0;
 	INIT_LIST_HEAD(&p->se.group_node);
 
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -885,7 +885,7 @@ void proc_sched_show_task(struct task_st
 
 	nr_switches = p->nvcsw + p->nivcsw;
 
-	P(se.nr_migrations);
+	P(nr_migrations);
 
 	PN(se.wait_start);
 	PN(se.sleep_start);
@@ -926,9 +926,9 @@ void proc_sched_show_task(struct task_st
 			avg_atom = -1LL;
 
 		avg_per_cpu = p->se.sum_exec_runtime;
-		if (p->se.nr_migrations) {
+		if (p->nr_migrations) {
 			avg_per_cpu = div64_u64(avg_per_cpu,
-						p->se.nr_migrations);
+						p->nr_migrations);
 		} else {
 			avg_per_cpu = -1LL;
 		}

  parent reply	other threads:[~2016-06-28 12:43 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-17 17:43 [PATCH 0/5] sched/debug: decouple sched_stat tracepoints from CONFIG_SCHEDSTATS Josh Poimboeuf
2016-06-17 17:43 ` [PATCH 1/5] sched/debug: rename and move enqueue_sleeper() Josh Poimboeuf
2016-09-05 11:56   ` [tip:sched/core] sched/debug: Rename " tip-bot for Josh Poimboeuf
2016-06-17 17:43 ` [PATCH 2/5] sched/debug: schedstat macro cleanup Josh Poimboeuf
2016-09-05 11:57   ` [tip:sched/core] sched/debug: Clean up schedstat macros tip-bot for Josh Poimboeuf
2016-06-17 17:43 ` [PATCH 3/5] sched/debug: 'schedstat_val()' -> 'schedstat_val_or_zero()' Josh Poimboeuf
2016-09-05 11:57   ` [tip:sched/core] sched/debug: Rename " tip-bot for Josh Poimboeuf
2016-06-17 17:43 ` [PATCH 4/5] sched/debug: remove several CONFIG_SCHEDSTATS guards Josh Poimboeuf
2016-06-27 16:21   ` Peter Zijlstra
2016-06-27 16:32     ` Josh Poimboeuf
2016-09-05 11:57   ` [tip:sched/core] sched/debug: Remove " tip-bot for Josh Poimboeuf
2016-06-17 17:43 ` [PATCH 5/5] sched/debug: decouple 'sched_stat_*' tracepoints' from CONFIG_SCHEDSTATS Josh Poimboeuf
2016-06-21  8:27 ` [PATCH 0/5] sched/debug: decouple sched_stat tracepoints " Srikar Dronamraju
2016-06-28 12:43 ` Peter Zijlstra [this message]
2016-06-29  2:32   ` Josh Poimboeuf
2016-06-29 10:29   ` Peter Zijlstra
2016-07-08 14:57     ` Josh Poimboeuf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160628124336.GG30909@twins.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=jpoimboe@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=matt@codeblueprint.co.uk \
    --cc=mgorman@techsingularity.net \
    --cc=mingo@redhat.com \
    --cc=srikar@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.