[PATCH 0/5] sched/debug: decouple sched_stat tracepoints from CONFIG

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 0/5] sched/debug: decouple sched_stat tracepoints from CONFIG_SCHEDSTATS
@ 2016-06-17 17:43 Josh Poimboeuf
  2016-06-17 17:43 ` [PATCH 1/5] sched/debug: rename and move enqueue_sleeper() Josh Poimboeuf
                   ` (6 more replies)
  0 siblings, 7 replies; 17+ messages in thread
From: Josh Poimboeuf @ 2016-06-17 17:43 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra
  Cc: linux-kernel, Mel Gorman, Matt Fleming, Srikar Dronamraju

NOTE: I didn't include any performance numbers because I wasn't able to
get consistent results.  I tried the following on a Xeon E5-2420 v2 CPU:

  $ for i in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor; do echo -n performance > $i; done
  $ echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
  $ echo 100 > /sys/devices/system/cpu/intel_pstate/min_perf_pct
  $ echo 0 > /proc/sys/kernel/nmi_watchdog
  $ taskset 0x10 perf stat -n -r10 perf bench sched pipe -l 1000000

I was going to post the numbers from that, both with and without
SCHEDSTATS, but then when I tried to repeat the test on a different day,
the results were surprisingly different, with different conclusions.

So any advice on measuring scheduler performance would be appreciated...

Josh Poimboeuf (5):
  sched/debug: rename and move enqueue_sleeper()
  sched/debug: schedstat macro cleanup
  sched/debug: 'schedstat_val()' -> 'schedstat_val_or_zero()'
  sched/debug: remove several CONFIG_SCHEDSTATS guards
  sched/debug: decouple 'sched_stat_*' tracepoints' from
    CONFIG_SCHEDSTATS

 include/linux/sched.h    |  11 +-
 kernel/latencytop.c      |   2 -
 kernel/profile.c         |   5 -
 kernel/sched/core.c      |  59 ++++------
 kernel/sched/debug.c     | 104 +++++++++--------
 kernel/sched/fair.c      | 290 ++++++++++++++++++++---------------------------
 kernel/sched/idle_task.c |   2 +-
 kernel/sched/stats.h     |  24 ++--
 lib/Kconfig.debug        |   1 -
 9 files changed, 220 insertions(+), 278 deletions(-)

-- 
2.4.11

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 1/5] sched/debug: rename and move enqueue_sleeper()
  2016-06-17 17:43 [PATCH 0/5] sched/debug: decouple sched_stat tracepoints from CONFIG_SCHEDSTATS Josh Poimboeuf
@ 2016-06-17 17:43 ` Josh Poimboeuf
  2016-09-05 11:56   ` [tip:sched/core] sched/debug: Rename " tip-bot for Josh Poimboeuf
  2016-06-17 17:43 ` [PATCH 2/5] sched/debug: schedstat macro cleanup Josh Poimboeuf
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 17+ messages in thread
From: Josh Poimboeuf @ 2016-06-17 17:43 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra
  Cc: linux-kernel, Mel Gorman, Matt Fleming, Srikar Dronamraju

enqueue_sleeper() doesn't actually enqueue, it just handles some
statistics and tracepoints.  Rename it to update_stats_enqueue_sleeper()
and call it from update_stats_enqueue().

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 kernel/sched/fair.c | 142 +++++++++++++++++++++++++++-------------------------
 1 file changed, 73 insertions(+), 69 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f75930b..71be269 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -830,11 +830,72 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	se->statistics.wait_start = 0;
 }
 
+static void
+update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	struct task_struct *tsk = NULL;
+
+	if (entity_is_task(se))
+		tsk = task_of(se);
+
+	if (se->statistics.sleep_start) {
+		u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.sleep_start;
+
+		if ((s64)delta < 0)
+			delta = 0;
+
+		if (unlikely(delta > se->statistics.sleep_max))
+			se->statistics.sleep_max = delta;
+
+		se->statistics.sleep_start = 0;
+		se->statistics.sum_sleep_runtime += delta;
+
+		if (tsk) {
+			account_scheduler_latency(tsk, delta >> 10, 1);
+			trace_sched_stat_sleep(tsk, delta);
+		}
+	}
+	if (se->statistics.block_start) {
+		u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.block_start;
+
+		if ((s64)delta < 0)
+			delta = 0;
+
+		if (unlikely(delta > se->statistics.block_max))
+			se->statistics.block_max = delta;
+
+		se->statistics.block_start = 0;
+		se->statistics.sum_sleep_runtime += delta;
+
+		if (tsk) {
+			if (tsk->in_iowait) {
+				se->statistics.iowait_sum += delta;
+				se->statistics.iowait_count++;
+				trace_sched_stat_iowait(tsk, delta);
+			}
+
+			trace_sched_stat_blocked(tsk, delta);
+
+			/*
+			 * Blocking time is in units of nanosecs, so shift by
+			 * 20 to get a milliseconds-range estimation of the
+			 * amount of time that the task spent sleeping:
+			 */
+			if (unlikely(prof_on == SLEEP_PROFILING)) {
+				profile_hits(SLEEP_PROFILING,
+						(void *)get_wchan(tsk),
+						delta >> 20);
+			}
+			account_scheduler_latency(tsk, delta >> 10, 0);
+		}
+	}
+}
+
 /*
  * Task is being enqueued - update stats:
  */
 static inline void
-update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
+update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
 	/*
 	 * Are we enqueueing a waiting task? (for current tasks
@@ -842,6 +903,9 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	 */
 	if (se != cfs_rq->curr)
 		update_stats_wait_start(cfs_rq, se);
+
+	if (flags & ENQUEUE_WAKEUP)
+		update_stats_enqueue_sleeper(cfs_rq, se);
 }
 
 static inline void
@@ -878,7 +942,12 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 }
 
 static inline void
-update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
+update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+}
+
+static inline void
+update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
 }
 
@@ -3099,68 +3168,6 @@ static inline int idle_balance(struct rq *rq)
 
 #endif /* CONFIG_SMP */
 
-static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-#ifdef CONFIG_SCHEDSTATS
-	struct task_struct *tsk = NULL;
-
-	if (entity_is_task(se))
-		tsk = task_of(se);
-
-	if (se->statistics.sleep_start) {
-		u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.sleep_start;
-
-		if ((s64)delta < 0)
-			delta = 0;
-
-		if (unlikely(delta > se->statistics.sleep_max))
-			se->statistics.sleep_max = delta;
-
-		se->statistics.sleep_start = 0;
-		se->statistics.sum_sleep_runtime += delta;
-
-		if (tsk) {
-			account_scheduler_latency(tsk, delta >> 10, 1);
-			trace_sched_stat_sleep(tsk, delta);
-		}
-	}
-	if (se->statistics.block_start) {
-		u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.block_start;
-
-		if ((s64)delta < 0)
-			delta = 0;
-
-		if (unlikely(delta > se->statistics.block_max))
-			se->statistics.block_max = delta;
-
-		se->statistics.block_start = 0;
-		se->statistics.sum_sleep_runtime += delta;
-
-		if (tsk) {
-			if (tsk->in_iowait) {
-				se->statistics.iowait_sum += delta;
-				se->statistics.iowait_count++;
-				trace_sched_stat_iowait(tsk, delta);
-			}
-
-			trace_sched_stat_blocked(tsk, delta);
-
-			/*
-			 * Blocking time is in units of nanosecs, so shift by
-			 * 20 to get a milliseconds-range estimation of the
-			 * amount of time that the task spent sleeping:
-			 */
-			if (unlikely(prof_on == SLEEP_PROFILING)) {
-				profile_hits(SLEEP_PROFILING,
-						(void *)get_wchan(tsk),
-						delta >> 20);
-			}
-			account_scheduler_latency(tsk, delta >> 10, 0);
-		}
-	}
-#endif
-}
-
 static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 #ifdef CONFIG_SCHED_DEBUG
@@ -3287,15 +3294,12 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	account_entity_enqueue(cfs_rq, se);
 	update_cfs_shares(cfs_rq);
 
-	if (flags & ENQUEUE_WAKEUP) {
+	if (flags & ENQUEUE_WAKEUP)
 		place_entity(cfs_rq, se, 0);
-		if (schedstat_enabled())
-			enqueue_sleeper(cfs_rq, se);
-	}
 
 	check_schedstat_required();
 	if (schedstat_enabled()) {
-		update_stats_enqueue(cfs_rq, se);
+		update_stats_enqueue(cfs_rq, se, flags);
 		check_spread(cfs_rq, se);
 	}
 	if (!curr)
-- 
2.4.11

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 2/5] sched/debug: schedstat macro cleanup
  2016-06-17 17:43 [PATCH 0/5] sched/debug: decouple sched_stat tracepoints from CONFIG_SCHEDSTATS Josh Poimboeuf
  2016-06-17 17:43 ` [PATCH 1/5] sched/debug: rename and move enqueue_sleeper() Josh Poimboeuf
@ 2016-06-17 17:43 ` Josh Poimboeuf
  2016-09-05 11:57   ` [tip:sched/core] sched/debug: Clean up schedstat macros tip-bot for Josh Poimboeuf
  2016-06-17 17:43 ` [PATCH 3/5] sched/debug: 'schedstat_val()' -> 'schedstat_val_or_zero()' Josh Poimboeuf
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 17+ messages in thread
From: Josh Poimboeuf @ 2016-06-17 17:43 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra
  Cc: linux-kernel, Mel Gorman, Matt Fleming, Srikar Dronamraju

The schedstat_*() macros are inconsistent: most of them take a pointer
and a field which the macro combines, whereas schedstat_set() takes the
already combined ptr->field.

The already combined ptr->field argument is actually more intuitive and
easier to use, and there's no reason to require the user to split the
variable up, so convert the macros to use the combined argument.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 kernel/sched/core.c      | 22 +++++++++++-----------
 kernel/sched/debug.c     |  4 ++--
 kernel/sched/fair.c      | 42 +++++++++++++++++++++---------------------
 kernel/sched/idle_task.c |  2 +-
 kernel/sched/stats.h     | 22 +++++++++++-----------
 5 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 31c30e5..017a4f5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1633,16 +1633,16 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
 	int this_cpu = smp_processor_id();
 
 	if (cpu == this_cpu) {
-		schedstat_inc(rq, ttwu_local);
-		schedstat_inc(p, se.statistics.nr_wakeups_local);
+		schedstat_inc(rq->ttwu_local);
+		schedstat_inc(p->se.statistics.nr_wakeups_local);
 	} else {
 		struct sched_domain *sd;
 
-		schedstat_inc(p, se.statistics.nr_wakeups_remote);
+		schedstat_inc(p->se.statistics.nr_wakeups_remote);
 		rcu_read_lock();
 		for_each_domain(this_cpu, sd) {
 			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
-				schedstat_inc(sd, ttwu_wake_remote);
+				schedstat_inc(sd->ttwu_wake_remote);
 				break;
 			}
 		}
@@ -1650,15 +1650,15 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
 	}
 
 	if (wake_flags & WF_MIGRATED)
-		schedstat_inc(p, se.statistics.nr_wakeups_migrate);
+		schedstat_inc(p->se.statistics.nr_wakeups_migrate);
 
 #endif /* CONFIG_SMP */
 
-	schedstat_inc(rq, ttwu_count);
-	schedstat_inc(p, se.statistics.nr_wakeups);
+	schedstat_inc(rq->ttwu_count);
+	schedstat_inc(p->se.statistics.nr_wakeups);
 
 	if (wake_flags & WF_SYNC)
-		schedstat_inc(p, se.statistics.nr_wakeups_sync);
+		schedstat_inc(p->se.statistics.nr_wakeups_sync);
 
 #endif /* CONFIG_SCHEDSTATS */
 }
@@ -3181,7 +3181,7 @@ static inline void schedule_debug(struct task_struct *prev)
 
 	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
 
-	schedstat_inc(this_rq(), sched_count);
+	schedstat_inc(this_rq()->sched_count);
 }
 
 /*
@@ -4792,7 +4792,7 @@ SYSCALL_DEFINE0(sched_yield)
 {
 	struct rq *rq = this_rq_lock();
 
-	schedstat_inc(rq, yld_count);
+	schedstat_inc(rq->yld_count);
 	current->sched_class->yield_task(rq);
 
 	/*
@@ -4943,7 +4943,7 @@ again:
 
 	yielded = curr->sched_class->yield_to_task(rq, p, preempt);
 	if (yielded) {
-		schedstat_inc(rq, yld_count);
+		schedstat_inc(rq->yld_count);
 		/*
 		 * Make p's CPU reschedule; pick_next_entity takes care of
 		 * fairness.
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 2a0a999..92fa534 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -429,9 +429,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 		p->prio);
 
 	SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
-		SPLIT_NS(schedstat_val(p, se.statistics.wait_sum)),
+		SPLIT_NS(schedstat_val(p->se.statistics.wait_sum)),
 		SPLIT_NS(p->se.sum_exec_runtime),
-		SPLIT_NS(schedstat_val(p, se.statistics.sum_sleep_runtime)));
+		SPLIT_NS(schedstat_val(p->se.statistics.sum_sleep_runtime)));
 
 #ifdef CONFIG_NUMA_BALANCING
 	SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 71be269..7257066 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -768,7 +768,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
 		      max(delta_exec, curr->statistics.exec_max));
 
 	curr->sum_exec_runtime += delta_exec;
-	schedstat_add(cfs_rq, exec_clock, delta_exec);
+	schedstat_add(cfs_rq->exec_clock, delta_exec);
 
 	curr->vruntime += calc_delta_fair(delta_exec, curr);
 	update_min_vruntime(cfs_rq);
@@ -3177,7 +3177,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		d = -d;
 
 	if (d > 3*sysctl_sched_latency)
-		schedstat_inc(cfs_rq, nr_spread_over);
+		schedstat_inc(cfs_rq->nr_spread_over);
 #endif
 }
 
@@ -5044,13 +5044,13 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
 
 	balanced = this_eff_load <= prev_eff_load;
 
-	schedstat_inc(p, se.statistics.nr_wakeups_affine_attempts);
+	schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts);
 
 	if (!balanced)
 		return 0;
 
-	schedstat_inc(sd, ttwu_move_affine);
-	schedstat_inc(p, se.statistics.nr_wakeups_affine);
+	schedstat_inc(sd->ttwu_move_affine);
+	schedstat_inc(p->se.statistics.nr_wakeups_affine);
 
 	return 1;
 }
@@ -6031,7 +6031,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 	if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) {
 		int cpu;
 
-		schedstat_inc(p, se.statistics.nr_failed_migrations_affine);
+		schedstat_inc(p->se.statistics.nr_failed_migrations_affine);
 
 		env->flags |= LBF_SOME_PINNED;
 
@@ -6062,7 +6062,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 	env->flags &= ~LBF_ALL_PINNED;
 
 	if (task_running(env->src_rq, p)) {
-		schedstat_inc(p, se.statistics.nr_failed_migrations_running);
+		schedstat_inc(p->se.statistics.nr_failed_migrations_running);
 		return 0;
 	}
 
@@ -6079,13 +6079,13 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 	if (tsk_cache_hot <= 0 ||
 	    env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
 		if (tsk_cache_hot == 1) {
-			schedstat_inc(env->sd, lb_hot_gained[env->idle]);
-			schedstat_inc(p, se.statistics.nr_forced_migrations);
+			schedstat_inc(env->sd->lb_hot_gained[env->idle]);
+			schedstat_inc(p->se.statistics.nr_forced_migrations);
 		}
 		return 1;
 	}
 
-	schedstat_inc(p, se.statistics.nr_failed_migrations_hot);
+	schedstat_inc(p->se.statistics.nr_failed_migrations_hot);
 	return 0;
 }
 
@@ -6125,7 +6125,7 @@ static struct task_struct *detach_one_task(struct lb_env *env)
 		 * so we can safely collect stats here rather than
 		 * inside detach_tasks().
 		 */
-		schedstat_inc(env->sd, lb_gained[env->idle]);
+		schedstat_inc(env->sd->lb_gained[env->idle]);
 		return p;
 	}
 	return NULL;
@@ -6217,7 +6217,7 @@ next:
 	 * so we can safely collect detach_one_task() stats here rather
 	 * than inside detach_one_task().
 	 */
-	schedstat_add(env->sd, lb_gained[env->idle], detached);
+	schedstat_add(env->sd->lb_gained[env->idle], detached);
 
 	return detached;
 }
@@ -7358,7 +7358,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 
 	cpumask_copy(cpus, cpu_active_mask);
 
-	schedstat_inc(sd, lb_count[idle]);
+	schedstat_inc(sd->lb_count[idle]);
 
 redo:
 	if (!should_we_balance(&env)) {
@@ -7368,19 +7368,19 @@ redo:
 
 	group = find_busiest_group(&env);
 	if (!group) {
-		schedstat_inc(sd, lb_nobusyg[idle]);
+		schedstat_inc(sd->lb_nobusyg[idle]);
 		goto out_balanced;
 	}
 
 	busiest = find_busiest_queue(&env, group);
 	if (!busiest) {
-		schedstat_inc(sd, lb_nobusyq[idle]);
+		schedstat_inc(sd->lb_nobusyq[idle]);
 		goto out_balanced;
 	}
 
 	BUG_ON(busiest == env.dst_rq);
 
-	schedstat_add(sd, lb_imbalance[idle], env.imbalance);
+	schedstat_add(sd->lb_imbalance[idle], env.imbalance);
 
 	env.src_cpu = busiest->cpu;
 	env.src_rq = busiest;
@@ -7487,7 +7487,7 @@ more_balance:
 	}
 
 	if (!ld_moved) {
-		schedstat_inc(sd, lb_failed[idle]);
+		schedstat_inc(sd->lb_failed[idle]);
 		/*
 		 * Increment the failure counter only on periodic balance.
 		 * We do not want newidle balance, which can be very
@@ -7570,7 +7570,7 @@ out_all_pinned:
 	 * we can't migrate them. Let the imbalance flag set so parent level
 	 * can try to migrate them.
 	 */
-	schedstat_inc(sd, lb_balanced[idle]);
+	schedstat_inc(sd->lb_balanced[idle]);
 
 	sd->nr_balance_failed = 0;
 
@@ -7762,15 +7762,15 @@ static int active_load_balance_cpu_stop(void *data)
 			.idle		= CPU_IDLE,
 		};
 
-		schedstat_inc(sd, alb_count);
+		schedstat_inc(sd->alb_count);
 
 		p = detach_one_task(&env);
 		if (p) {
-			schedstat_inc(sd, alb_pushed);
+			schedstat_inc(sd->alb_pushed);
 			/* Active balancing done, reset the failure counter. */
 			sd->nr_balance_failed = 0;
 		} else {
-			schedstat_inc(sd, alb_failed);
+			schedstat_inc(sd->alb_failed);
 		}
 	}
 	rcu_read_unlock();
diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c
index 2ce5458..dedc81ec 100644
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -28,7 +28,7 @@ pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct pin_cookie c
 {
 	put_prev_task(rq, prev);
 
-	schedstat_inc(rq, sched_goidle);
+	schedstat_inc(rq->sched_goidle);
 	return rq->idle;
 }
 
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 78955cb..fc05425 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -29,11 +29,11 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
 	if (rq)
 		rq->rq_sched_info.run_delay += delta;
 }
-# define schedstat_enabled()		static_branch_unlikely(&sched_schedstats)
-# define schedstat_inc(rq, field)	do { if (schedstat_enabled()) { (rq)->field++; } } while (0)
-# define schedstat_add(rq, field, amt)	do { if (schedstat_enabled()) { (rq)->field += (amt); } } while (0)
-# define schedstat_set(var, val)	do { if (schedstat_enabled()) { var = (val); } } while (0)
-# define schedstat_val(rq, field)	((schedstat_enabled()) ? (rq)->field : 0)
+#define schedstat_enabled()		static_branch_unlikely(&sched_schedstats)
+#define schedstat_inc(var)		do { if (schedstat_enabled()) { var++; } } while (0)
+#define schedstat_add(var, amt)		do { if (schedstat_enabled()) { var += (amt); } } while (0)
+#define schedstat_set(var, val)		do { if (schedstat_enabled()) { var = (val); } } while (0)
+#define schedstat_val(var)		((schedstat_enabled()) ? (var) : 0)
 
 #else /* !CONFIG_SCHEDSTATS */
 static inline void
@@ -45,12 +45,12 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
 static inline void
 rq_sched_info_depart(struct rq *rq, unsigned long long delta)
 {}
-# define schedstat_enabled()		0
-# define schedstat_inc(rq, field)	do { } while (0)
-# define schedstat_add(rq, field, amt)	do { } while (0)
-# define schedstat_set(var, val)	do { } while (0)
-# define schedstat_val(rq, field)	0
-#endif
+#define schedstat_enabled()		0
+#define schedstat_inc(var)		do { } while (0)
+#define schedstat_add(var, amt)		do { } while (0)
+#define schedstat_set(var, val)		do { } while (0)
+#define schedstat_val(var)		0
+#endif /* CONFIG_SCHEDSTATS */
 
 #ifdef CONFIG_SCHED_INFO
 static inline void sched_info_reset_dequeued(struct task_struct *t)
-- 
2.4.11

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 3/5] sched/debug: 'schedstat_val()' -> 'schedstat_val_or_zero()'
  2016-06-17 17:43 [PATCH 0/5] sched/debug: decouple sched_stat tracepoints from CONFIG_SCHEDSTATS Josh Poimboeuf
  2016-06-17 17:43 ` [PATCH 1/5] sched/debug: rename and move enqueue_sleeper() Josh Poimboeuf
  2016-06-17 17:43 ` [PATCH 2/5] sched/debug: schedstat macro cleanup Josh Poimboeuf
@ 2016-06-17 17:43 ` Josh Poimboeuf
  2016-09-05 11:57   ` [tip:sched/core] sched/debug: Rename " tip-bot for Josh Poimboeuf
  2016-06-17 17:43 ` [PATCH 4/5] sched/debug: remove several CONFIG_SCHEDSTATS guards Josh Poimboeuf
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 17+ messages in thread
From: Josh Poimboeuf @ 2016-06-17 17:43 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra
  Cc: linux-kernel, Mel Gorman, Matt Fleming, Srikar Dronamraju

The schedstat_val() macro's behavior is kind of surprising: when
schedstat is runtime disabled, it returns zero.  Rename it to
schedstat_val_or_zero().

There's also a need for a similar macro which doesn't have the 'if
(schedstat_enable())' check, to avoid doing the check twice.  Create a
new 'schedstat_val()' macro for that.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 kernel/sched/debug.c | 4 ++--
 kernel/sched/stats.h | 4 +++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 92fa534..63ffcaa 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -429,9 +429,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 		p->prio);
 
 	SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
-		SPLIT_NS(schedstat_val(p->se.statistics.wait_sum)),
+		SPLIT_NS(schedstat_val_or_zero(p->se.statistics.wait_sum)),
 		SPLIT_NS(p->se.sum_exec_runtime),
-		SPLIT_NS(schedstat_val(p->se.statistics.sum_sleep_runtime)));
+		SPLIT_NS(schedstat_val_or_zero(p->se.statistics.sum_sleep_runtime)));
 
 #ifdef CONFIG_NUMA_BALANCING
 	SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index fc05425..34659a8 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -33,7 +33,8 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
 #define schedstat_inc(var)		do { if (schedstat_enabled()) { var++; } } while (0)
 #define schedstat_add(var, amt)		do { if (schedstat_enabled()) { var += (amt); } } while (0)
 #define schedstat_set(var, val)		do { if (schedstat_enabled()) { var = (val); } } while (0)
-#define schedstat_val(var)		((schedstat_enabled()) ? (var) : 0)
+#define schedstat_val(var)		(var)
+#define schedstat_val_or_zero(var)	((schedstat_enabled()) ? (var) : 0)
 
 #else /* !CONFIG_SCHEDSTATS */
 static inline void
@@ -50,6 +51,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta)
 #define schedstat_add(var, amt)		do { } while (0)
 #define schedstat_set(var, val)		do { } while (0)
 #define schedstat_val(var)		0
+#define schedstat_val_or_zero(var)	0
 #endif /* CONFIG_SCHEDSTATS */
 
 #ifdef CONFIG_SCHED_INFO
-- 
2.4.11

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 4/5] sched/debug: remove several CONFIG_SCHEDSTATS guards
  2016-06-17 17:43 [PATCH 0/5] sched/debug: decouple sched_stat tracepoints from CONFIG_SCHEDSTATS Josh Poimboeuf
                   ` (2 preceding siblings ...)
  2016-06-17 17:43 ` [PATCH 3/5] sched/debug: 'schedstat_val()' -> 'schedstat_val_or_zero()' Josh Poimboeuf
@ 2016-06-17 17:43 ` Josh Poimboeuf
  2016-06-27 16:21   ` Peter Zijlstra
  2016-09-05 11:57   ` [tip:sched/core] sched/debug: Remove " tip-bot for Josh Poimboeuf
  2016-06-17 17:43 ` [PATCH 5/5] sched/debug: decouple 'sched_stat_*' tracepoints' from CONFIG_SCHEDSTATS Josh Poimboeuf
                   ` (2 subsequent siblings)
  6 siblings, 2 replies; 17+ messages in thread
From: Josh Poimboeuf @ 2016-06-17 17:43 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra
  Cc: linux-kernel, Mel Gorman, Matt Fleming, Srikar Dronamraju

Clean up the sched code by removing several of the CONFIG_SCHEDSTATS
guards, using schedstat_*() macros where needed.

Code size:

  !CONFIG_SCHEDSTATS defconfig:

      text	   data	    bss	     dec	    hex	filename
  10209818	4368184	1105920	15683922	 ef5152	vmlinux.before.nostats
  10209818	4368184	1105920	15683922	 ef5152	vmlinux.after.nostats

  CONFIG_SCHEDSTATS defconfig:

      text	   data	    bss	    dec	    hex	filename
  10214210	4370040	1105920	15690170	 ef69ba	vmlinux.before.stats
  10214210	4370680	1105920	15690810	 ef6c3a	vmlinux.after.stats

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 kernel/sched/core.c  |  29 +++++-----
 kernel/sched/debug.c |  99 ++++++++++++++++++----------------
 kernel/sched/fair.c  | 148 ++++++++++++++++++++++++---------------------------
 3 files changed, 136 insertions(+), 140 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 017a4f5..0aee5dd 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1626,10 +1626,15 @@ static inline int __set_cpus_allowed_ptr(struct task_struct *p,
 static void
 ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
 {
-#ifdef CONFIG_SCHEDSTATS
-	struct rq *rq = this_rq();
+	struct rq *rq;
+
+	if (!schedstat_enabled())
+		return;
+
+	rq = this_rq();
 
 #ifdef CONFIG_SMP
+{
 	int this_cpu = smp_processor_id();
 
 	if (cpu == this_cpu) {
@@ -1651,7 +1656,7 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
 
 	if (wake_flags & WF_MIGRATED)
 		schedstat_inc(p->se.statistics.nr_wakeups_migrate);
-
+}
 #endif /* CONFIG_SMP */
 
 	schedstat_inc(rq->ttwu_count);
@@ -1659,8 +1664,6 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
 
 	if (wake_flags & WF_SYNC)
 		schedstat_inc(p->se.statistics.nr_wakeups_sync);
-
-#endif /* CONFIG_SCHEDSTATS */
 }
 
 static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
@@ -2059,8 +2062,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 
 	ttwu_queue(p, cpu, wake_flags);
 stat:
-	if (schedstat_enabled())
-		ttwu_stat(p, cpu, wake_flags);
+	ttwu_stat(p, cpu, wake_flags);
 out:
 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 
@@ -2108,8 +2110,7 @@ static void try_to_wake_up_local(struct task_struct *p, struct pin_cookie cookie
 		ttwu_activate(rq, p, ENQUEUE_WAKEUP);
 
 	ttwu_do_wakeup(rq, p, 0, cookie);
-	if (schedstat_enabled())
-		ttwu_stat(p, smp_processor_id(), 0);
+	ttwu_stat(p, smp_processor_id(), 0);
 out:
 	raw_spin_unlock(&p->pi_lock);
 }
@@ -7588,12 +7589,10 @@ void normalize_rt_tasks(void)
 		if (p->flags & PF_KTHREAD)
 			continue;
 
-		p->se.exec_start		= 0;
-#ifdef CONFIG_SCHEDSTATS
-		p->se.statistics.wait_start	= 0;
-		p->se.statistics.sleep_start	= 0;
-		p->se.statistics.block_start	= 0;
-#endif
+		p->se.exec_start = 0;
+		schedstat_set(p->se.statistics.wait_start,  0);
+		schedstat_set(p->se.statistics.sleep_start, 0);
+		schedstat_set(p->se.statistics.block_start, 0);
 
 		if (!dl_task(p) && !rt_task(p)) {
 			/*
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 63ffcaa..1393588 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -369,8 +369,12 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
 
 #define P(F) \
 	SEQ_printf(m, "  .%-30s: %lld\n", #F, (long long)F)
+#define P_SCHEDSTAT(F) \
+	SEQ_printf(m, "  .%-30s: %lld\n", #F, (long long)schedstat_val(F))
 #define PN(F) \
 	SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
+#define PN_SCHEDSTAT(F) \
+	SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F)))
 
 	if (!se)
 		return;
@@ -378,26 +382,27 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
 	PN(se->exec_start);
 	PN(se->vruntime);
 	PN(se->sum_exec_runtime);
-#ifdef CONFIG_SCHEDSTATS
 	if (schedstat_enabled()) {
-		PN(se->statistics.wait_start);
-		PN(se->statistics.sleep_start);
-		PN(se->statistics.block_start);
-		PN(se->statistics.sleep_max);
-		PN(se->statistics.block_max);
-		PN(se->statistics.exec_max);
-		PN(se->statistics.slice_max);
-		PN(se->statistics.wait_max);
-		PN(se->statistics.wait_sum);
-		P(se->statistics.wait_count);
+		PN_SCHEDSTAT(se->statistics.wait_start);
+		PN_SCHEDSTAT(se->statistics.sleep_start);
+		PN_SCHEDSTAT(se->statistics.block_start);
+		PN_SCHEDSTAT(se->statistics.sleep_max);
+		PN_SCHEDSTAT(se->statistics.block_max);
+		PN_SCHEDSTAT(se->statistics.exec_max);
+		PN_SCHEDSTAT(se->statistics.slice_max);
+		PN_SCHEDSTAT(se->statistics.wait_max);
+		PN_SCHEDSTAT(se->statistics.wait_sum);
+		P_SCHEDSTAT(se->statistics.wait_count);
 	}
-#endif
 	P(se->load.weight);
 #ifdef CONFIG_SMP
 	P(se->avg.load_avg);
 	P(se->avg.util_avg);
 #endif
+
+#undef PN_SCHEDSTAT
 #undef PN
+#undef P_SCHEDSTAT
 #undef P
 }
 #endif
@@ -626,9 +631,7 @@ do {									\
 #undef P64
 #endif
 
-#ifdef CONFIG_SCHEDSTATS
-#define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, rq->n);
-
+#define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, schedstat_val(rq->n));
 	if (schedstat_enabled()) {
 		P(yld_count);
 		P(sched_count);
@@ -636,9 +639,8 @@ do {									\
 		P(ttwu_count);
 		P(ttwu_local);
 	}
-
 #undef P
-#endif
+
 	spin_lock_irqsave(&sched_debug_lock, flags);
 	print_cfs_stats(m, cpu);
 	print_rt_stats(m, cpu);
@@ -868,10 +870,14 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
 #define P(F) \
 	SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
+#define P_SCHEDSTAT(F) \
+	SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)schedstat_val(p->F))
 #define __PN(F) \
 	SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
 #define PN(F) \
 	SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
+#define PN_SCHEDSTAT(F) \
+	SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(p->F)))
 
 	PN(se.exec_start);
 	PN(se.vruntime);
@@ -881,37 +887,36 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 
 	P(se.nr_migrations);
 
-#ifdef CONFIG_SCHEDSTATS
 	if (schedstat_enabled()) {
 		u64 avg_atom, avg_per_cpu;
 
-		PN(se.statistics.sum_sleep_runtime);
-		PN(se.statistics.wait_start);
-		PN(se.statistics.sleep_start);
-		PN(se.statistics.block_start);
-		PN(se.statistics.sleep_max);
-		PN(se.statistics.block_max);
-		PN(se.statistics.exec_max);
-		PN(se.statistics.slice_max);
-		PN(se.statistics.wait_max);
-		PN(se.statistics.wait_sum);
-		P(se.statistics.wait_count);
-		PN(se.statistics.iowait_sum);
-		P(se.statistics.iowait_count);
-		P(se.statistics.nr_migrations_cold);
-		P(se.statistics.nr_failed_migrations_affine);
-		P(se.statistics.nr_failed_migrations_running);
-		P(se.statistics.nr_failed_migrations_hot);
-		P(se.statistics.nr_forced_migrations);
-		P(se.statistics.nr_wakeups);
-		P(se.statistics.nr_wakeups_sync);
-		P(se.statistics.nr_wakeups_migrate);
-		P(se.statistics.nr_wakeups_local);
-		P(se.statistics.nr_wakeups_remote);
-		P(se.statistics.nr_wakeups_affine);
-		P(se.statistics.nr_wakeups_affine_attempts);
-		P(se.statistics.nr_wakeups_passive);
-		P(se.statistics.nr_wakeups_idle);
+		PN_SCHEDSTAT(se.statistics.sum_sleep_runtime);
+		PN_SCHEDSTAT(se.statistics.wait_start);
+		PN_SCHEDSTAT(se.statistics.sleep_start);
+		PN_SCHEDSTAT(se.statistics.block_start);
+		PN_SCHEDSTAT(se.statistics.sleep_max);
+		PN_SCHEDSTAT(se.statistics.block_max);
+		PN_SCHEDSTAT(se.statistics.exec_max);
+		PN_SCHEDSTAT(se.statistics.slice_max);
+		PN_SCHEDSTAT(se.statistics.wait_max);
+		PN_SCHEDSTAT(se.statistics.wait_sum);
+		P_SCHEDSTAT(se.statistics.wait_count);
+		PN_SCHEDSTAT(se.statistics.iowait_sum);
+		P_SCHEDSTAT(se.statistics.iowait_count);
+		P_SCHEDSTAT(se.statistics.nr_migrations_cold);
+		P_SCHEDSTAT(se.statistics.nr_failed_migrations_affine);
+		P_SCHEDSTAT(se.statistics.nr_failed_migrations_running);
+		P_SCHEDSTAT(se.statistics.nr_failed_migrations_hot);
+		P_SCHEDSTAT(se.statistics.nr_forced_migrations);
+		P_SCHEDSTAT(se.statistics.nr_wakeups);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_sync);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_migrate);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_local);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_remote);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_affine);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_affine_attempts);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_passive);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_idle);
 
 		avg_atom = p->se.sum_exec_runtime;
 		if (nr_switches)
@@ -930,7 +935,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 		__PN(avg_atom);
 		__PN(avg_per_cpu);
 	}
-#endif
+
 	__P(nr_switches);
 	SEQ_printf(m, "%-45s:%21Ld\n",
 		   "nr_voluntary_switches", (long long)p->nvcsw);
@@ -947,8 +952,10 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 #endif
 	P(policy);
 	P(prio);
+#undef PN_SCHEDSTAT
 #undef PN
 #undef __PN
+#undef P_SCHEDSTAT
 #undef P
 #undef __P
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7257066..ae70600 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -789,26 +789,34 @@ static void update_curr_fair(struct rq *rq)
 	update_curr(cfs_rq_of(&rq->curr->se));
 }
 
-#ifdef CONFIG_SCHEDSTATS
 static inline void
 update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	u64 wait_start = rq_clock(rq_of(cfs_rq));
+	u64 wait_start, prev_wait_start;
+
+	if (!schedstat_enabled())
+		return;
+
+	wait_start = rq_clock(rq_of(cfs_rq));
+	prev_wait_start = schedstat_val(se->statistics.wait_start);
 
 	if (entity_is_task(se) && task_on_rq_migrating(task_of(se)) &&
-	    likely(wait_start > se->statistics.wait_start))
-		wait_start -= se->statistics.wait_start;
+	    likely(wait_start > prev_wait_start))
+		wait_start -= prev_wait_start;
 
-	se->statistics.wait_start = wait_start;
+	schedstat_set(se->statistics.wait_start, wait_start);
 }
 
-static void
+static inline void
 update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	struct task_struct *p;
 	u64 delta;
 
-	delta = rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start;
+	if (!schedstat_enabled())
+		return;
+
+	delta = rq_clock(rq_of(cfs_rq)) - schedstat_val(se->statistics.wait_start);
 
 	if (entity_is_task(se)) {
 		p = task_of(se);
@@ -818,59 +826,67 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 			 * time stamp can be adjusted to accumulate wait time
 			 * prior to migration.
 			 */
-			se->statistics.wait_start = delta;
+			schedstat_set(se->statistics.wait_start, delta);
 			return;
 		}
 		trace_sched_stat_wait(p, delta);
 	}
 
-	se->statistics.wait_max = max(se->statistics.wait_max, delta);
-	se->statistics.wait_count++;
-	se->statistics.wait_sum += delta;
-	se->statistics.wait_start = 0;
+	schedstat_set(se->statistics.wait_max,
+		      max(schedstat_val(se->statistics.wait_max), delta));
+	schedstat_inc(se->statistics.wait_count);
+	schedstat_add(se->statistics.wait_sum, delta);
+	schedstat_set(se->statistics.wait_start, 0);
 }
 
-static void
+static inline void
 update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	struct task_struct *tsk = NULL;
+	u64 sleep_start, block_start;
+
+	if (!schedstat_enabled())
+		return;
+
+	sleep_start = schedstat_val(se->statistics.sleep_start);
+	block_start = schedstat_val(se->statistics.block_start);
 
 	if (entity_is_task(se))
 		tsk = task_of(se);
 
-	if (se->statistics.sleep_start) {
-		u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.sleep_start;
+	if (sleep_start) {
+		u64 delta = rq_clock(rq_of(cfs_rq)) - sleep_start;
 
 		if ((s64)delta < 0)
 			delta = 0;
 
-		if (unlikely(delta > se->statistics.sleep_max))
-			se->statistics.sleep_max = delta;
+		if (unlikely(delta > schedstat_val(se->statistics.sleep_max)))
+			schedstat_set(se->statistics.sleep_max, delta);
 
-		se->statistics.sleep_start = 0;
-		se->statistics.sum_sleep_runtime += delta;
+		schedstat_set(se->statistics.sleep_start, 0);
+		schedstat_add(se->statistics.sum_sleep_runtime, delta);
 
 		if (tsk) {
 			account_scheduler_latency(tsk, delta >> 10, 1);
 			trace_sched_stat_sleep(tsk, delta);
 		}
 	}
-	if (se->statistics.block_start) {
-		u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.block_start;
+	if (block_start) {
+		u64 delta = rq_clock(rq_of(cfs_rq)) - block_start;
 
 		if ((s64)delta < 0)
 			delta = 0;
 
-		if (unlikely(delta > se->statistics.block_max))
-			se->statistics.block_max = delta;
+		if (unlikely(delta > schedstat_val(se->statistics.block_max)))
+			schedstat_set(se->statistics.block_max, delta);
 
-		se->statistics.block_start = 0;
-		se->statistics.sum_sleep_runtime += delta;
+		schedstat_set(se->statistics.block_start, 0);
+		schedstat_add(se->statistics.sum_sleep_runtime, delta);
 
 		if (tsk) {
 			if (tsk->in_iowait) {
-				se->statistics.iowait_sum += delta;
-				se->statistics.iowait_count++;
+				schedstat_add(se->statistics.iowait_sum, delta);
+				schedstat_inc(se->statistics.iowait_count);
 				trace_sched_stat_iowait(tsk, delta);
 			}
 
@@ -897,6 +913,9 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 static inline void
 update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
+	if (!schedstat_enabled())
+		return;
+
 	/*
 	 * Are we enqueueing a waiting task? (for current tasks
 	 * a dequeue/enqueue event is a NOP)
@@ -911,6 +930,10 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 static inline void
 update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
+
+	if (!schedstat_enabled())
+		return;
+
 	/*
 	 * Mark the end of the wait period if dequeueing a
 	 * waiting task:
@@ -918,45 +941,18 @@ update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	if (se != cfs_rq->curr)
 		update_stats_wait_end(cfs_rq, se);
 
-	if (flags & DEQUEUE_SLEEP) {
-		if (entity_is_task(se)) {
-			struct task_struct *tsk = task_of(se);
+	if ((flags & DEQUEUE_SLEEP) && entity_is_task(se)) {
+		struct task_struct *tsk = task_of(se);
 
-			if (tsk->state & TASK_INTERRUPTIBLE)
-				se->statistics.sleep_start = rq_clock(rq_of(cfs_rq));
-			if (tsk->state & TASK_UNINTERRUPTIBLE)
-				se->statistics.block_start = rq_clock(rq_of(cfs_rq));
-		}
+		if (tsk->state & TASK_INTERRUPTIBLE)
+			schedstat_set(se->statistics.sleep_start,
+				      rq_clock(rq_of(cfs_rq)));
+		if (tsk->state & TASK_UNINTERRUPTIBLE)
+			schedstat_set(se->statistics.block_start,
+				      rq_clock(rq_of(cfs_rq)));
 	}
-
-}
-#else
-static inline void
-update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-}
-
-static inline void
-update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-}
-
-static inline void
-update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
 }
 
-static inline void
-update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
-{
-}
-
-static inline void
-update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
-{
-}
-#endif
-
 /*
  * We are picking a new current task - update its stats:
  */
@@ -3298,10 +3294,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 		place_entity(cfs_rq, se, 0);
 
 	check_schedstat_required();
-	if (schedstat_enabled()) {
-		update_stats_enqueue(cfs_rq, se, flags);
-		check_spread(cfs_rq, se);
-	}
+	update_stats_enqueue(cfs_rq, se, flags);
+	check_spread(cfs_rq, se);
 	if (!curr)
 		__enqueue_entity(cfs_rq, se);
 	se->on_rq = 1;
@@ -3368,8 +3362,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	update_curr(cfs_rq);
 	dequeue_entity_load_avg(cfs_rq, se);
 
-	if (schedstat_enabled())
-		update_stats_dequeue(cfs_rq, se, flags);
+	update_stats_dequeue(cfs_rq, se, flags);
 
 	clear_buddies(cfs_rq, se);
 
@@ -3443,25 +3436,25 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		 * a CPU. So account for the time it spent waiting on the
 		 * runqueue.
 		 */
-		if (schedstat_enabled())
-			update_stats_wait_end(cfs_rq, se);
+		update_stats_wait_end(cfs_rq, se);
 		__dequeue_entity(cfs_rq, se);
 		update_load_avg(se, 1);
 	}
 
 	update_stats_curr_start(cfs_rq, se);
 	cfs_rq->curr = se;
-#ifdef CONFIG_SCHEDSTATS
+
 	/*
 	 * Track our maximum slice length, if the CPU's load is at
 	 * least twice that of our own weight (i.e. dont track it
 	 * when there are only lesser-weight tasks around):
 	 */
 	if (schedstat_enabled() && rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {
-		se->statistics.slice_max = max(se->statistics.slice_max,
-			se->sum_exec_runtime - se->prev_sum_exec_runtime);
+		schedstat_set(se->statistics.slice_max,
+			max((u64)schedstat_val(se->statistics.slice_max),
+			    se->sum_exec_runtime - se->prev_sum_exec_runtime));
 	}
-#endif
+
 	se->prev_sum_exec_runtime = se->sum_exec_runtime;
 }
 
@@ -3540,13 +3533,10 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
 	/* throttle cfs_rqs exceeding runtime */
 	check_cfs_rq_runtime(cfs_rq);
 
-	if (schedstat_enabled()) {
-		check_spread(cfs_rq, prev);
-		if (prev->on_rq)
-			update_stats_wait_start(cfs_rq, prev);
-	}
+	check_spread(cfs_rq, prev);
 
 	if (prev->on_rq) {
+		update_stats_wait_start(cfs_rq, prev);
 		/* Put 'current' back into the tree. */
 		__enqueue_entity(cfs_rq, prev);
 		/* in !on_rq case, update occurred at dequeue */
-- 
2.4.11

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 5/5] sched/debug: decouple 'sched_stat_*' tracepoints' from CONFIG_SCHEDSTATS
  2016-06-17 17:43 [PATCH 0/5] sched/debug: decouple sched_stat tracepoints from CONFIG_SCHEDSTATS Josh Poimboeuf
                   ` (3 preceding siblings ...)
  2016-06-17 17:43 ` [PATCH 4/5] sched/debug: remove several CONFIG_SCHEDSTATS guards Josh Poimboeuf
@ 2016-06-17 17:43 ` Josh Poimboeuf
  2016-06-21  8:27 ` [PATCH 0/5] sched/debug: decouple sched_stat tracepoints " Srikar Dronamraju
  2016-06-28 12:43 ` Peter Zijlstra
  6 siblings, 0 replies; 17+ messages in thread
From: Josh Poimboeuf @ 2016-06-17 17:43 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra
  Cc: linux-kernel, Mel Gorman, Matt Fleming, Srikar Dronamraju

The 'sched_stat_*' tracepoints have a dependency on schedstats being
enabled at both compile-time (CONFIG_SCHEDSTATS) and runtime
(sched_schedstats).

In addition to causing increased code complexity, it also causes
confusion:

- When schedstats are disabled, the tracepoints don't fire at all.

- When schedstats are enabled at runtime, existing tasks will have stale
  or missing statistics, which can cause the tracepoints to either not
  fire, or to fire with corrupt data.

Decouple them so that the tracepoints will always fire correctly,
independent of schedstats.  A side benefit is that this also means that
latencytop and profiling no longer have a dependency on schedstats.

Code size:

  !CONFIG_SCHEDSTATS defconfig:

        text	   data	    bss	     dec	    hex	filename
    10209818	4368184	1105920	15683922	 ef5152	vmlinux.before.nostats
    10209818	4368312	1105920	15684050	 ef51d2	vmlinux.after.nostats

  CONFIG_SCHEDSTATS defconfig:

        text	   data	    bss	     dec	    hex	filename
    10214210	4370680	1105920	15690810	 ef6c3a	vmlinux.before.stats
    10214261	4370104	1105920	15690285	 ef6a2d	vmlinux.after.stats

Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 include/linux/sched.h | 11 +++----
 kernel/latencytop.c   |  2 --
 kernel/profile.c      |  5 ----
 kernel/sched/core.c   | 16 +++--------
 kernel/sched/debug.c  | 13 +++++----
 kernel/sched/fair.c   | 80 ++++++++++++---------------------------------------
 lib/Kconfig.debug     |  1 -
 7 files changed, 34 insertions(+), 94 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index e31758b..0ee8f04 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -940,10 +940,6 @@ static inline int sched_info_on(void)
 #endif
 }
 
-#ifdef CONFIG_SCHEDSTATS
-void force_schedstat_enabled(void);
-#endif
-
 enum cpu_idle_type {
 	CPU_IDLE,
 	CPU_NOT_IDLE,
@@ -1285,18 +1281,15 @@ struct sched_avg {
 
 #ifdef CONFIG_SCHEDSTATS
 struct sched_statistics {
-	u64			wait_start;
 	u64			wait_max;
 	u64			wait_count;
 	u64			wait_sum;
 	u64			iowait_count;
 	u64			iowait_sum;
 
-	u64			sleep_start;
 	u64			sleep_max;
 	s64			sum_sleep_runtime;
 
-	u64			block_start;
 	u64			block_max;
 	u64			exec_max;
 	u64			slice_max;
@@ -1332,6 +1325,10 @@ struct sched_entity {
 
 	u64			nr_migrations;
 
+	u64			wait_start;
+	u64			sleep_start;
+	u64			block_start;
+
 #ifdef CONFIG_SCHEDSTATS
 	struct sched_statistics statistics;
 #endif
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index b5c30d9..5ff6c54 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -296,8 +296,6 @@ int sysctl_latencytop(struct ctl_table *table, int write,
 	int err;
 
 	err = proc_dointvec(table, write, buffer, lenp, ppos);
-	if (latencytop_enabled)
-		force_schedstat_enabled();
 
 	return err;
 }
diff --git a/kernel/profile.c b/kernel/profile.c
index c2199e9..f033d79 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -58,8 +58,6 @@ int profile_setup(char *str)
 	int par;
 
 	if (!strncmp(str, sleepstr, strlen(sleepstr))) {
-#ifdef CONFIG_SCHEDSTATS
-		force_schedstat_enabled();
 		prof_on = SLEEP_PROFILING;
 		if (str[strlen(sleepstr)] == ',')
 			str += strlen(sleepstr) + 1;
@@ -67,9 +65,6 @@ int profile_setup(char *str)
 			prof_shift = par;
 		pr_info("kernel sleep profiling enabled (shift: %ld)\n",
 			prof_shift);
-#else
-		pr_warn("kernel sleep profiling requires CONFIG_SCHEDSTATS\n");
-#endif /* CONFIG_SCHEDSTATS */
 	} else if (!strncmp(str, schedstr, strlen(schedstr))) {
 		prof_on = SCHED_PROFILING;
 		if (str[strlen(schedstr)] == ',')
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0aee5dd..9befffb 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2267,14 +2267,6 @@ static void set_schedstats(bool enabled)
 		static_branch_disable(&sched_schedstats);
 }
 
-void force_schedstat_enabled(void)
-{
-	if (!schedstat_enabled()) {
-		pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n");
-		static_branch_enable(&sched_schedstats);
-	}
-}
-
 static int __init setup_schedstats(char *str)
 {
 	int ret = 0;
@@ -7589,10 +7581,10 @@ void normalize_rt_tasks(void)
 		if (p->flags & PF_KTHREAD)
 			continue;
 
-		p->se.exec_start = 0;
-		schedstat_set(p->se.statistics.wait_start,  0);
-		schedstat_set(p->se.statistics.sleep_start, 0);
-		schedstat_set(p->se.statistics.block_start, 0);
+		p->se.exec_start  = 0;
+		p->se.wait_start  = 0;
+		p->se.sleep_start = 0;
+		p->se.block_start = 0;
 
 		if (!dl_task(p) && !rt_task(p)) {
 			/*
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 1393588..06be44a 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -382,10 +382,10 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
 	PN(se->exec_start);
 	PN(se->vruntime);
 	PN(se->sum_exec_runtime);
+	PN(se->wait_start);
+	PN(se->sleep_start);
+	PN(se->block_start);
 	if (schedstat_enabled()) {
-		PN_SCHEDSTAT(se->statistics.wait_start);
-		PN_SCHEDSTAT(se->statistics.sleep_start);
-		PN_SCHEDSTAT(se->statistics.block_start);
 		PN_SCHEDSTAT(se->statistics.sleep_max);
 		PN_SCHEDSTAT(se->statistics.block_max);
 		PN_SCHEDSTAT(se->statistics.exec_max);
@@ -887,13 +887,14 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 
 	P(se.nr_migrations);
 
+	PN(se.wait_start);
+	PN(se.sleep_start);
+	PN(se.block_start);
+
 	if (schedstat_enabled()) {
 		u64 avg_atom, avg_per_cpu;
 
 		PN_SCHEDSTAT(se.statistics.sum_sleep_runtime);
-		PN_SCHEDSTAT(se.statistics.wait_start);
-		PN_SCHEDSTAT(se.statistics.sleep_start);
-		PN_SCHEDSTAT(se.statistics.block_start);
 		PN_SCHEDSTAT(se.statistics.sleep_max);
 		PN_SCHEDSTAT(se.statistics.block_max);
 		PN_SCHEDSTAT(se.statistics.exec_max);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ae70600..3c85949 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -792,19 +792,15 @@ static void update_curr_fair(struct rq *rq)
 static inline void
 update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	u64 wait_start, prev_wait_start;
-
-	if (!schedstat_enabled())
-		return;
+	u64 wait_start;
 
 	wait_start = rq_clock(rq_of(cfs_rq));
-	prev_wait_start = schedstat_val(se->statistics.wait_start);
 
 	if (entity_is_task(se) && task_on_rq_migrating(task_of(se)) &&
-	    likely(wait_start > prev_wait_start))
-		wait_start -= prev_wait_start;
+	    likely(wait_start > se->wait_start))
+		wait_start -= se->wait_start;
 
-	schedstat_set(se->statistics.wait_start, wait_start);
+	se->wait_start = wait_start;
 }
 
 static inline void
@@ -813,10 +809,7 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	struct task_struct *p;
 	u64 delta;
 
-	if (!schedstat_enabled())
-		return;
-
-	delta = rq_clock(rq_of(cfs_rq)) - schedstat_val(se->statistics.wait_start);
+	delta = rq_clock(rq_of(cfs_rq)) - se->wait_start;
 
 	if (entity_is_task(se)) {
 		p = task_of(se);
@@ -826,44 +819,38 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 			 * time stamp can be adjusted to accumulate wait time
 			 * prior to migration.
 			 */
-			schedstat_set(se->statistics.wait_start, delta);
+			se->wait_start = delta;
 			return;
 		}
 		trace_sched_stat_wait(p, delta);
 	}
 
+	se->wait_start = 0;
 	schedstat_set(se->statistics.wait_max,
 		      max(schedstat_val(se->statistics.wait_max), delta));
 	schedstat_inc(se->statistics.wait_count);
 	schedstat_add(se->statistics.wait_sum, delta);
-	schedstat_set(se->statistics.wait_start, 0);
 }
 
 static inline void
 update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	struct task_struct *tsk = NULL;
-	u64 sleep_start, block_start;
-
-	if (!schedstat_enabled())
-		return;
-
-	sleep_start = schedstat_val(se->statistics.sleep_start);
-	block_start = schedstat_val(se->statistics.block_start);
 
 	if (entity_is_task(se))
 		tsk = task_of(se);
 
-	if (sleep_start) {
-		u64 delta = rq_clock(rq_of(cfs_rq)) - sleep_start;
+	if (se->sleep_start) {
+		u64 delta = rq_clock(rq_of(cfs_rq)) - se->sleep_start;
 
 		if ((s64)delta < 0)
 			delta = 0;
 
-		if (unlikely(delta > schedstat_val(se->statistics.sleep_max)))
+		if (schedstat_enabled() &&
+		    unlikely(delta > schedstat_val(se->statistics.sleep_max)))
 			schedstat_set(se->statistics.sleep_max, delta);
 
-		schedstat_set(se->statistics.sleep_start, 0);
+		se->sleep_start = 0;
 		schedstat_add(se->statistics.sum_sleep_runtime, delta);
 
 		if (tsk) {
@@ -871,16 +858,17 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 			trace_sched_stat_sleep(tsk, delta);
 		}
 	}
-	if (block_start) {
-		u64 delta = rq_clock(rq_of(cfs_rq)) - block_start;
+	if (se->block_start) {
+		u64 delta = rq_clock(rq_of(cfs_rq)) - se->block_start;
 
 		if ((s64)delta < 0)
 			delta = 0;
 
-		if (unlikely(delta > schedstat_val(se->statistics.block_max)))
+		if (schedstat_enabled() &&
+		    unlikely(delta > schedstat_val(se->statistics.block_max)))
 			schedstat_set(se->statistics.block_max, delta);
 
-		schedstat_set(se->statistics.block_start, 0);
+		se->block_start = 0;
 		schedstat_add(se->statistics.sum_sleep_runtime, delta);
 
 		if (tsk) {
@@ -913,9 +901,6 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 static inline void
 update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
-	if (!schedstat_enabled())
-		return;
-
 	/*
 	 * Are we enqueueing a waiting task? (for current tasks
 	 * a dequeue/enqueue event is a NOP)
@@ -931,9 +916,6 @@ static inline void
 update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
 
-	if (!schedstat_enabled())
-		return;
-
 	/*
 	 * Mark the end of the wait period if dequeueing a
 	 * waiting task:
@@ -945,11 +927,9 @@ update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 		struct task_struct *tsk = task_of(se);
 
 		if (tsk->state & TASK_INTERRUPTIBLE)
-			schedstat_set(se->statistics.sleep_start,
-				      rq_clock(rq_of(cfs_rq)));
+			se->sleep_start = rq_clock(rq_of(cfs_rq));
 		if (tsk->state & TASK_UNINTERRUPTIBLE)
-			schedstat_set(se->statistics.block_start,
-				      rq_clock(rq_of(cfs_rq)));
+			se->block_start = rq_clock(rq_of(cfs_rq));
 	}
 }
 
@@ -3211,27 +3191,6 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 
 static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
 
-static inline void check_schedstat_required(void)
-{
-#ifdef CONFIG_SCHEDSTATS
-	if (schedstat_enabled())
-		return;
-
-	/* Force schedstat enabled if a dependent tracepoint is active */
-	if (trace_sched_stat_wait_enabled()    ||
-			trace_sched_stat_sleep_enabled()   ||
-			trace_sched_stat_iowait_enabled()  ||
-			trace_sched_stat_blocked_enabled() ||
-			trace_sched_stat_runtime_enabled())  {
-		printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, "
-			     "stat_blocked and stat_runtime require the "
-			     "kernel parameter schedstats=enabled or "
-			     "kernel.sched_schedstats=1\n");
-	}
-#endif
-}
-
-
 /*
  * MIGRATION
  *
@@ -3293,7 +3252,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	if (flags & ENQUEUE_WAKEUP)
 		place_entity(cfs_rq, se, 0);
 
-	check_schedstat_required();
 	update_stats_enqueue(cfs_rq, se, flags);
 	check_spread(cfs_rq, se);
 	if (!curr)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index b9cfdbf..e8adfac 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1699,7 +1699,6 @@ config LATENCYTOP
 	select KALLSYMS
 	select KALLSYMS_ALL
 	select STACKTRACE
-	select SCHEDSTATS
 	select SCHED_DEBUG
 	help
 	  Enable this option if you want to use the LatencyTOP tool
-- 
2.4.11

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH 0/5] sched/debug: decouple sched_stat tracepoints from CONFIG_SCHEDSTATS
  2016-06-17 17:43 [PATCH 0/5] sched/debug: decouple sched_stat tracepoints from CONFIG_SCHEDSTATS Josh Poimboeuf
                   ` (4 preceding siblings ...)
  2016-06-17 17:43 ` [PATCH 5/5] sched/debug: decouple 'sched_stat_*' tracepoints' from CONFIG_SCHEDSTATS Josh Poimboeuf
@ 2016-06-21  8:27 ` Srikar Dronamraju
  2016-06-28 12:43 ` Peter Zijlstra
  6 siblings, 0 replies; 17+ messages in thread
From: Srikar Dronamraju @ 2016-06-21  8:27 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: Ingo Molnar, Peter Zijlstra, linux-kernel, Mel Gorman, Matt Fleming

* Josh Poimboeuf <jpoimboe@redhat.com> [2016-06-17 12:43:22]:

> NOTE: I didn't include any performance numbers because I wasn't able to
> get consistent results.  I tried the following on a Xeon E5-2420 v2 CPU:
> 
>   $ for i in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor; do echo -n performance > $i; done
>   $ echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
>   $ echo 100 > /sys/devices/system/cpu/intel_pstate/min_perf_pct
>   $ echo 0 > /proc/sys/kernel/nmi_watchdog
>   $ taskset 0x10 perf stat -n -r10 perf bench sched pipe -l 1000000
> 
> I was going to post the numbers from that, both with and without
> SCHEDSTATS, but then when I tried to repeat the test on a different day,
> the results were surprisingly different, with different conclusions.
> 
> So any advice on measuring scheduler performance would be appreciated...
> 
> Josh Poimboeuf (5):
>   sched/debug: rename and move enqueue_sleeper()
>   sched/debug: schedstat macro cleanup
>   sched/debug: 'schedstat_val()' -> 'schedstat_val_or_zero()'
>   sched/debug: remove several CONFIG_SCHEDSTATS guards
>   sched/debug: decouple 'sched_stat_*' tracepoints' from
>     CONFIG_SCHEDSTATS
> 

This patchset looks good to me.

Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>

-- 
Thanks and Regards
Srikar Dronamraju

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 4/5] sched/debug: remove several CONFIG_SCHEDSTATS guards
  2016-06-17 17:43 ` [PATCH 4/5] sched/debug: remove several CONFIG_SCHEDSTATS guards Josh Poimboeuf
@ 2016-06-27 16:21   ` Peter Zijlstra
  2016-06-27 16:32     ` Josh Poimboeuf
  2016-09-05 11:57   ` [tip:sched/core] sched/debug: Remove " tip-bot for Josh Poimboeuf
  1 sibling, 1 reply; 17+ messages in thread
From: Peter Zijlstra @ 2016-06-27 16:21 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: Ingo Molnar, linux-kernel, Mel Gorman, Matt Fleming, Srikar Dronamraju

On Fri, Jun 17, 2016 at 12:43:26PM -0500, Josh Poimboeuf wrote:
> index 017a4f5..0aee5dd 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -1626,10 +1626,15 @@ static inline int __set_cpus_allowed_ptr(struct task_struct *p,
>  static void
>  ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
>  {
> -#ifdef CONFIG_SCHEDSTATS
> -	struct rq *rq = this_rq();
> +	struct rq *rq;
> +
> +	if (!schedstat_enabled())
> +		return;
> +
> +	rq = this_rq();
>  
>  #ifdef CONFIG_SMP
> +{
>  	int this_cpu = smp_processor_id();
>  
>  	if (cpu == this_cpu) {
> @@ -1651,7 +1656,7 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
>  
>  	if (wake_flags & WF_MIGRATED)
>  		schedstat_inc(p->se.statistics.nr_wakeups_migrate);
> -
> +}
>  #endif /* CONFIG_SMP */
>  
>  	schedstat_inc(rq->ttwu_count);

I did:

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1636,10 +1636,7 @@ ttwu_stat(struct task_struct *p, int cpu
 	rq = this_rq();
 
 #ifdef CONFIG_SMP
-{
-	int this_cpu = smp_processor_id();
-
-	if (cpu == this_cpu) {
+	if (cpu == rq->cpu) {
 		schedstat_inc(rq->ttwu_local);
 		schedstat_inc(p->se.statistics.nr_wakeups_local);
 	} else {
@@ -1647,7 +1644,7 @@ ttwu_stat(struct task_struct *p, int cpu
 
 		schedstat_inc(p->se.statistics.nr_wakeups_remote);
 		rcu_read_lock();
-		for_each_domain(this_cpu, sd) {
+		for_each_domain(rq->cpu, sd) {
 			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
 				schedstat_inc(sd->ttwu_wake_remote);
 				break;
@@ -1658,7 +1655,6 @@ ttwu_stat(struct task_struct *p, int cpu
 
 	if (wake_flags & WF_MIGRATED)
 		schedstat_inc(p->se.statistics.nr_wakeups_migrate);
-}
 #endif /* CONFIG_SMP */
 
 	schedstat_inc(rq->ttwu_count);

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 4/5] sched/debug: remove several CONFIG_SCHEDSTATS guards
  2016-06-27 16:21   ` Peter Zijlstra
@ 2016-06-27 16:32     ` Josh Poimboeuf
  0 siblings, 0 replies; 17+ messages in thread
From: Josh Poimboeuf @ 2016-06-27 16:32 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, linux-kernel, Mel Gorman, Matt Fleming, Srikar Dronamraju

On Mon, Jun 27, 2016 at 06:21:11PM +0200, Peter Zijlstra wrote:
> On Fri, Jun 17, 2016 at 12:43:26PM -0500, Josh Poimboeuf wrote:
> > index 017a4f5..0aee5dd 100644
> > --- a/kernel/sched/core.c
> > +++ b/kernel/sched/core.c
> > @@ -1626,10 +1626,15 @@ static inline int __set_cpus_allowed_ptr(struct task_struct *p,
> >  static void
> >  ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
> >  {
> > -#ifdef CONFIG_SCHEDSTATS
> > -	struct rq *rq = this_rq();
> > +	struct rq *rq;
> > +
> > +	if (!schedstat_enabled())
> > +		return;
> > +
> > +	rq = this_rq();
> >  
> >  #ifdef CONFIG_SMP
> > +{
> >  	int this_cpu = smp_processor_id();
> >  
> >  	if (cpu == this_cpu) {
> > @@ -1651,7 +1656,7 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
> >  
> >  	if (wake_flags & WF_MIGRATED)
> >  		schedstat_inc(p->se.statistics.nr_wakeups_migrate);
> > -
> > +}
> >  #endif /* CONFIG_SMP */
> >  
> >  	schedstat_inc(rq->ttwu_count);
> 
> I did:
> 
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -1636,10 +1636,7 @@ ttwu_stat(struct task_struct *p, int cpu
>  	rq = this_rq();
>  
>  #ifdef CONFIG_SMP
> -{
> -	int this_cpu = smp_processor_id();
> -
> -	if (cpu == this_cpu) {
> +	if (cpu == rq->cpu) {
>  		schedstat_inc(rq->ttwu_local);
>  		schedstat_inc(p->se.statistics.nr_wakeups_local);
>  	} else {
> @@ -1647,7 +1644,7 @@ ttwu_stat(struct task_struct *p, int cpu
>  
>  		schedstat_inc(p->se.statistics.nr_wakeups_remote);
>  		rcu_read_lock();
> -		for_each_domain(this_cpu, sd) {
> +		for_each_domain(rq->cpu, sd) {
>  			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
>  				schedstat_inc(sd->ttwu_wake_remote);
>  				break;
> @@ -1658,7 +1655,6 @@ ttwu_stat(struct task_struct *p, int cpu
>  
>  	if (wake_flags & WF_MIGRATED)
>  		schedstat_inc(p->se.statistics.nr_wakeups_migrate);
> -}
>  #endif /* CONFIG_SMP */
>  
>  	schedstat_inc(rq->ttwu_count);

Much better, thanks.

-- 
Josh

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 0/5] sched/debug: decouple sched_stat tracepoints from CONFIG_SCHEDSTATS
  2016-06-17 17:43 [PATCH 0/5] sched/debug: decouple sched_stat tracepoints from CONFIG_SCHEDSTATS Josh Poimboeuf
                   ` (5 preceding siblings ...)
  2016-06-21  8:27 ` [PATCH 0/5] sched/debug: decouple sched_stat tracepoints " Srikar Dronamraju
@ 2016-06-28 12:43 ` Peter Zijlstra
  2016-06-29  2:32   ` Josh Poimboeuf
  2016-06-29 10:29   ` Peter Zijlstra
  6 siblings, 2 replies; 17+ messages in thread
From: Peter Zijlstra @ 2016-06-28 12:43 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: Ingo Molnar, linux-kernel, Mel Gorman, Matt Fleming, Srikar Dronamraju

On Fri, Jun 17, 2016 at 12:43:22PM -0500, Josh Poimboeuf wrote:
> NOTE: I didn't include any performance numbers because I wasn't able to
> get consistent results.  I tried the following on a Xeon E5-2420 v2 CPU:
> 
>   $ for i in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor; do echo -n performance > $i; done
>   $ echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
>   $ echo 100 > /sys/devices/system/cpu/intel_pstate/min_perf_pct
>   $ echo 0 > /proc/sys/kernel/nmi_watchdog
>   $ taskset 0x10 perf stat -n -r10 perf bench sched pipe -l 1000000
> 
> I was going to post the numbers from that, both with and without
> SCHEDSTATS, but then when I tried to repeat the test on a different day,
> the results were surprisingly different, with different conclusions.
> 
> So any advice on measuring scheduler performance would be appreciated...

Yeah, its a bit of a pain in general...

A) perf stat --null --repeat 50 -- perf bench sched messaging -g 50 -l 5000 | grep "seconds time elapsed"
B) perf stat --null --repeat 50 -- taskset 1 perf bench sched pipe | grep "seconds time elapsed"

1) tip/master + 1-4
2) tip/master + 1-5
3) tip/master + 1-5 + below

	1		2		3

A)	4.627767855	4.650429917	4.646208062
	4.633921933	4.641424424	4.612021058
	4.649536375	4.663144144	4.636815948
	4.630165619	4.649053552	4.613022902

B)	1.770732957	1.789534273	1.773334291
	1.761740716	1.795618428	1.773338681
	1.763761666	1.822316496	1.774385589


>From this it looks like patch 5 does hurt a wee bit, but we can get most
of that back by reordering the structure a bit. The results seem
'stable' across rebuilds and reboots (I've pop'ed all patches and
rebuild, rebooted and re-benched 1 at the end and obtained similar
results).

Although, possible that if we reorder first and then do 5, we'll just
see a bigger regression. I've not bothered.


---
 include/linux/sched.h |   33 +++++++++++++++------------------
 kernel/sched/core.c   |    4 ++--
 kernel/sched/debug.c  |    6 +++---
 3 files changed, 20 insertions(+), 23 deletions(-)

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1220,7 +1220,7 @@ struct uts_namespace;
 struct load_weight {
 	unsigned long weight;
 	u32 inv_weight;
-};
+} __packed;
 
 /*
  * The load_avg/util_avg accumulates an infinite geometric series
@@ -1315,44 +1315,40 @@ struct sched_statistics {
 
 struct sched_entity {
 	struct load_weight	load;		/* for load-balancing */
+	unsigned int		on_rq;
 	struct rb_node		run_node;
 	struct list_head	group_node;
-	unsigned int		on_rq;
 
-	u64			exec_start;
+	u64			exec_start ____cacheline_aligned_in_smp;
 	u64			sum_exec_runtime;
 	u64			vruntime;
 	u64			prev_sum_exec_runtime;
-
-	u64			nr_migrations;
-
 	u64			wait_start;
 	u64			sleep_start;
 	u64			block_start;
 
+#ifdef CONFIG_SMP
+	/*
+	 * Per entity load average tracking.
+	 */
+	struct sched_avg	avg ____cacheline_aligned_in_smp;
+#endif
 #ifdef CONFIG_SCHEDSTATS
 	struct sched_statistics statistics;
 #endif
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-	int			depth;
+	/*
+	 * mostly constant values, separate from modifications above
+	 */
+	int			depth ____cacheline_aligned_in_smp;
 	struct sched_entity	*parent;
 	/* rq on which this entity is (to be) queued: */
 	struct cfs_rq		*cfs_rq;
 	/* rq "owned" by this entity/group: */
 	struct cfs_rq		*my_q;
 #endif
-
-#ifdef CONFIG_SMP
-	/*
-	 * Per entity load average tracking.
-	 *
-	 * Put into separate cache line so it does not
-	 * collide with read-mostly values above.
-	 */
-	struct sched_avg	avg ____cacheline_aligned_in_smp;
-#endif
-};
+} ____cacheline_aligned_in_smp;
 
 struct sched_rt_entity {
 	struct list_head run_list;
@@ -1475,6 +1471,7 @@ struct task_struct {
 	int prio, static_prio, normal_prio;
 	unsigned int rt_priority;
 	const struct sched_class *sched_class;
+	u64 nr_migrations;
 	struct sched_entity se;
 	struct sched_rt_entity rt;
 #ifdef CONFIG_CGROUP_SCHED
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1239,7 +1239,7 @@ void set_task_cpu(struct task_struct *p,
 	if (task_cpu(p) != new_cpu) {
 		if (p->sched_class->migrate_task_rq)
 			p->sched_class->migrate_task_rq(p);
-		p->se.nr_migrations++;
+		p->nr_migrations++;
 		perf_event_task_migrate(p);
 	}
 
@@ -2167,7 +2167,7 @@ static void __sched_fork(unsigned long c
 	p->se.exec_start		= 0;
 	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
-	p->se.nr_migrations		= 0;
+	p->nr_migrations		= 0;
 	p->se.vruntime			= 0;
 	INIT_LIST_HEAD(&p->se.group_node);
 
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -885,7 +885,7 @@ void proc_sched_show_task(struct task_st
 
 	nr_switches = p->nvcsw + p->nivcsw;
 
-	P(se.nr_migrations);
+	P(nr_migrations);
 
 	PN(se.wait_start);
 	PN(se.sleep_start);
@@ -926,9 +926,9 @@ void proc_sched_show_task(struct task_st
 			avg_atom = -1LL;
 
 		avg_per_cpu = p->se.sum_exec_runtime;
-		if (p->se.nr_migrations) {
+		if (p->nr_migrations) {
 			avg_per_cpu = div64_u64(avg_per_cpu,
-						p->se.nr_migrations);
+						p->nr_migrations);
 		} else {
 			avg_per_cpu = -1LL;
 		}

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 0/5] sched/debug: decouple sched_stat tracepoints from CONFIG_SCHEDSTATS
  2016-06-28 12:43 ` Peter Zijlstra
@ 2016-06-29  2:32   ` Josh Poimboeuf
  2016-06-29 10:29   ` Peter Zijlstra
  1 sibling, 0 replies; 17+ messages in thread
From: Josh Poimboeuf @ 2016-06-29  2:32 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, linux-kernel, Mel Gorman, Matt Fleming, Srikar Dronamraju

On Tue, Jun 28, 2016 at 02:43:36PM +0200, Peter Zijlstra wrote:
> On Fri, Jun 17, 2016 at 12:43:22PM -0500, Josh Poimboeuf wrote:
> > NOTE: I didn't include any performance numbers because I wasn't able to
> > get consistent results.  I tried the following on a Xeon E5-2420 v2 CPU:
> > 
> >   $ for i in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor; do echo -n performance > $i; done
> >   $ echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
> >   $ echo 100 > /sys/devices/system/cpu/intel_pstate/min_perf_pct
> >   $ echo 0 > /proc/sys/kernel/nmi_watchdog
> >   $ taskset 0x10 perf stat -n -r10 perf bench sched pipe -l 1000000
> > 
> > I was going to post the numbers from that, both with and without
> > SCHEDSTATS, but then when I tried to repeat the test on a different day,
> > the results were surprisingly different, with different conclusions.
> > 
> > So any advice on measuring scheduler performance would be appreciated...
> 
> Yeah, its a bit of a pain in general...
> 
> A) perf stat --null --repeat 50 -- perf bench sched messaging -g 50 -l 5000 | grep "seconds time elapsed"
> B) perf stat --null --repeat 50 -- taskset 1 perf bench sched pipe | grep "seconds time elapsed"
> 
> 1) tip/master + 1-4
> 2) tip/master + 1-5
> 3) tip/master + 1-5 + below
> 
> 	1		2		3
> 
> A)	4.627767855	4.650429917	4.646208062
> 	4.633921933	4.641424424	4.612021058
> 	4.649536375	4.663144144	4.636815948
> 	4.630165619	4.649053552	4.613022902
> 
> B)	1.770732957	1.789534273	1.773334291
> 	1.761740716	1.795618428	1.773338681
> 	1.763761666	1.822316496	1.774385589
> 
> 
> From this it looks like patch 5 does hurt a wee bit, but we can get most
> of that back by reordering the structure a bit. The results seem
> 'stable' across rebuilds and reboots (I've pop'ed all patches and
> rebuild, rebooted and re-benched 1 at the end and obtained similar
> results).
> 
> Although, possible that if we reorder first and then do 5, we'll just
> see a bigger regression. I've not bothered.

Thanks a lot for benchmarking this!  And also for improving the cache
alignments.  Your changes look good to me.

-- 
Josh

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 0/5] sched/debug: decouple sched_stat tracepoints from CONFIG_SCHEDSTATS
  2016-06-28 12:43 ` Peter Zijlstra
  2016-06-29  2:32   ` Josh Poimboeuf
@ 2016-06-29 10:29   ` Peter Zijlstra
  2016-07-08 14:57     ` Josh Poimboeuf
  1 sibling, 1 reply; 17+ messages in thread
From: Peter Zijlstra @ 2016-06-29 10:29 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: Ingo Molnar, linux-kernel, Mel Gorman, Matt Fleming, Srikar Dronamraju

On Tue, Jun 28, 2016 at 02:43:36PM +0200, Peter Zijlstra wrote:

> Yeah, its a bit of a pain in general...
> 
> A) perf stat --null --repeat 50 -- perf bench sched messaging -g 50 -l 5000 | grep "seconds time elapsed"
> B) perf stat --null --repeat 50 -- taskset 1 perf bench sched pipe | grep "seconds time elapsed"
> 
> 1) tip/master + 1-4
> 2) tip/master + 1-5
> 3) tip/master + 1-5 + below
> 
> 	1		2		3
> 
> A)	4.627767855	4.650429917	4.646208062
> 	4.633921933	4.641424424	4.612021058
> 	4.649536375	4.663144144	4.636815948
> 	4.630165619	4.649053552	4.613022902
> 
> B)	1.770732957	1.789534273	1.773334291
> 	1.761740716	1.795618428	1.773338681
> 	1.763761666	1.822316496	1.774385589
> 
> 
> From this it looks like patch 5 does hurt a wee bit, but we can get most
> of that back by reordering the structure a bit. The results seem
> 'stable' across rebuilds and reboots (I've pop'ed all patches and
> rebuild, rebooted and re-benched 1 at the end and obtained similar
> results).

Ha! So those numbers were with CONFIG_SCHEDSTAT=n :-/

1) above 1 (4 patches, CONFIG_SCHEDSTAT=n, sysctl=0)
2) 1 + CONFIG_SCHEDSTAT=y (sysctl=0)
3) 2 + sysctl=1
4) above 3 (6 patches) + CONFIG_SCHEDSTAT=y (sysctl=0)


	1		2		3		4

A)	4.620495664	4.788352823	4.862036428	4.623480512
	4.628800053	4.792622881	4.855325525	4.613553872
	4.611909507	4.794282178	4.850959761	4.613323142
	4.608379522	4.787300153	4.822439864	4.597903070

B)	1.765668026	1.788374847	1.877803100	1.827213170
	1.769379968	1.779881911	1.870091005	1.825335322
	1.765822150	1.786251610	1.885874745	1.828218761


Which looks good for hackbench, but still stinks for pipetest :/

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 0/5] sched/debug: decouple sched_stat tracepoints from CONFIG_SCHEDSTATS
  2016-06-29 10:29   ` Peter Zijlstra
@ 2016-07-08 14:57     ` Josh Poimboeuf
  0 siblings, 0 replies; 17+ messages in thread
From: Josh Poimboeuf @ 2016-07-08 14:57 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, linux-kernel, Mel Gorman, Matt Fleming, Srikar Dronamraju

On Wed, Jun 29, 2016 at 12:29:58PM +0200, Peter Zijlstra wrote:
> On Tue, Jun 28, 2016 at 02:43:36PM +0200, Peter Zijlstra wrote:
> 
> > Yeah, its a bit of a pain in general...
> > 
> > A) perf stat --null --repeat 50 -- perf bench sched messaging -g 50 -l 5000 | grep "seconds time elapsed"
> > B) perf stat --null --repeat 50 -- taskset 1 perf bench sched pipe | grep "seconds time elapsed"
> > 
> > 1) tip/master + 1-4
> > 2) tip/master + 1-5
> > 3) tip/master + 1-5 + below
> > 
> > 	1		2		3
> > 
> > A)	4.627767855	4.650429917	4.646208062
> > 	4.633921933	4.641424424	4.612021058
> > 	4.649536375	4.663144144	4.636815948
> > 	4.630165619	4.649053552	4.613022902
> > 
> > B)	1.770732957	1.789534273	1.773334291
> > 	1.761740716	1.795618428	1.773338681
> > 	1.763761666	1.822316496	1.774385589
> > 
> > 
> > From this it looks like patch 5 does hurt a wee bit, but we can get most
> > of that back by reordering the structure a bit. The results seem
> > 'stable' across rebuilds and reboots (I've pop'ed all patches and
> > rebuild, rebooted and re-benched 1 at the end and obtained similar
> > results).
> 
> Ha! So those numbers were with CONFIG_SCHEDSTAT=n :-/
> 
> 1) above 1 (4 patches, CONFIG_SCHEDSTAT=n, sysctl=0)
> 2) 1 + CONFIG_SCHEDSTAT=y (sysctl=0)
> 3) 2 + sysctl=1
> 4) above 3 (6 patches) + CONFIG_SCHEDSTAT=y (sysctl=0)
> 
> 
> 	1		2		3		4
> 
> A)	4.620495664	4.788352823	4.862036428	4.623480512
> 	4.628800053	4.792622881	4.855325525	4.613553872
> 	4.611909507	4.794282178	4.850959761	4.613323142
> 	4.608379522	4.787300153	4.822439864	4.597903070
> 
> B)	1.765668026	1.788374847	1.877803100	1.827213170
> 	1.769379968	1.779881911	1.870091005	1.825335322
> 	1.765822150	1.786251610	1.885874745	1.828218761
> 
> 
> Which looks good for hackbench, but still stinks for pipetest :/

I tried again on another system (Broadwell 2*10*2) and seemingly got
more consistent results, but the conclusions are a bit different from
yours.

I tested only with CONFIG_SCHEDSTAT=y, sysctl=0, because I think that
should be the most common configuration by far.

1) linus/master
2) linus/master + 1-4
3) linux/master + 1-5
4) linus/master + 1-5 + smp cacheline patch

A) perf stat --null --repeat 50 -- perf bench sched messaging -g 50 -l 5000
B) perf stat --null --repeat 50 -- taskset 1 perf bench sched pipe

	1		2		3		4

A)	6.335625627	6.299825679	6.317633969	6.305548464
	6.334188492	6.331391159	6.345195048	6.334608006
	6.345243359	6.329650737	6.328263309	6.304355127
	6.333154970	6.313859694	6.336338820	6.342374680

B)	2.310476138	2.324716175	2.355990033	2.350834083
	2.307231831	2.327946052	2.349816680	2.335581939
	2.303859470	2.317300965	2.347193526	2.333758084
	2.317224538	2.331390610	2.326164933	2.334235895

With patches 1-4, hackbench was slightly better and pipetest was
slightly worse.

With patches 1-5, hackbench was about the same or even slightly better
than baseline, and pipetest was 1-2% worse than baseline.

With your smp cacheline patch added, I didn't see a clear improvement.

It would be nice to have the schedstat tracepoints be always functional,
but I suppose it's up to you and Ingo as to whether it's worth the
performance tradeoff.

Another option would be to only merge patches 1-4.

-- 
Josh

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [tip:sched/core] sched/debug: Rename and move enqueue_sleeper()
  2016-06-17 17:43 ` [PATCH 1/5] sched/debug: rename and move enqueue_sleeper() Josh Poimboeuf
@ 2016-09-05 11:56   ` tip-bot for Josh Poimboeuf
  0 siblings, 0 replies; 17+ messages in thread
From: tip-bot for Josh Poimboeuf @ 2016-09-05 11:56 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: hpa, jpoimboe, tglx, mingo, matt, linux-kernel, peterz, srikar,
	torvalds, mgorman

Commit-ID:  1a3d027c5a6847e5d349c8527f99aada47e5467a
Gitweb:     http://git.kernel.org/tip/1a3d027c5a6847e5d349c8527f99aada47e5467a
Author:     Josh Poimboeuf <jpoimboe@redhat.com>
AuthorDate: Fri, 17 Jun 2016 12:43:23 -0500
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Mon, 5 Sep 2016 13:29:45 +0200

sched/debug: Rename and move enqueue_sleeper()

enqueue_sleeper() doesn't actually enqueue, it just handles some
statistics and tracepoints.  Rename it to update_stats_enqueue_sleeper()
and call it from update_stats_enqueue().

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/fb20b7159dc4d028c406c0e8d5f8c439b741615b.1466184592.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 142 +++++++++++++++++++++++++++-------------------------
 1 file changed, 73 insertions(+), 69 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6011bfe..479639f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -862,11 +862,72 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	se->statistics.wait_start = 0;
 }
 
+static void
+update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	struct task_struct *tsk = NULL;
+
+	if (entity_is_task(se))
+		tsk = task_of(se);
+
+	if (se->statistics.sleep_start) {
+		u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.sleep_start;
+
+		if ((s64)delta < 0)
+			delta = 0;
+
+		if (unlikely(delta > se->statistics.sleep_max))
+			se->statistics.sleep_max = delta;
+
+		se->statistics.sleep_start = 0;
+		se->statistics.sum_sleep_runtime += delta;
+
+		if (tsk) {
+			account_scheduler_latency(tsk, delta >> 10, 1);
+			trace_sched_stat_sleep(tsk, delta);
+		}
+	}
+	if (se->statistics.block_start) {
+		u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.block_start;
+
+		if ((s64)delta < 0)
+			delta = 0;
+
+		if (unlikely(delta > se->statistics.block_max))
+			se->statistics.block_max = delta;
+
+		se->statistics.block_start = 0;
+		se->statistics.sum_sleep_runtime += delta;
+
+		if (tsk) {
+			if (tsk->in_iowait) {
+				se->statistics.iowait_sum += delta;
+				se->statistics.iowait_count++;
+				trace_sched_stat_iowait(tsk, delta);
+			}
+
+			trace_sched_stat_blocked(tsk, delta);
+
+			/*
+			 * Blocking time is in units of nanosecs, so shift by
+			 * 20 to get a milliseconds-range estimation of the
+			 * amount of time that the task spent sleeping:
+			 */
+			if (unlikely(prof_on == SLEEP_PROFILING)) {
+				profile_hits(SLEEP_PROFILING,
+						(void *)get_wchan(tsk),
+						delta >> 20);
+			}
+			account_scheduler_latency(tsk, delta >> 10, 0);
+		}
+	}
+}
+
 /*
  * Task is being enqueued - update stats:
  */
 static inline void
-update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
+update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
 	/*
 	 * Are we enqueueing a waiting task? (for current tasks
@@ -874,6 +935,9 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	 */
 	if (se != cfs_rq->curr)
 		update_stats_wait_start(cfs_rq, se);
+
+	if (flags & ENQUEUE_WAKEUP)
+		update_stats_enqueue_sleeper(cfs_rq, se);
 }
 
 static inline void
@@ -910,7 +974,12 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 }
 
 static inline void
-update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
+update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+}
+
+static inline void
+update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
 }
 
@@ -3197,68 +3266,6 @@ static inline int idle_balance(struct rq *rq)
 
 #endif /* CONFIG_SMP */
 
-static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-#ifdef CONFIG_SCHEDSTATS
-	struct task_struct *tsk = NULL;
-
-	if (entity_is_task(se))
-		tsk = task_of(se);
-
-	if (se->statistics.sleep_start) {
-		u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.sleep_start;
-
-		if ((s64)delta < 0)
-			delta = 0;
-
-		if (unlikely(delta > se->statistics.sleep_max))
-			se->statistics.sleep_max = delta;
-
-		se->statistics.sleep_start = 0;
-		se->statistics.sum_sleep_runtime += delta;
-
-		if (tsk) {
-			account_scheduler_latency(tsk, delta >> 10, 1);
-			trace_sched_stat_sleep(tsk, delta);
-		}
-	}
-	if (se->statistics.block_start) {
-		u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.block_start;
-
-		if ((s64)delta < 0)
-			delta = 0;
-
-		if (unlikely(delta > se->statistics.block_max))
-			se->statistics.block_max = delta;
-
-		se->statistics.block_start = 0;
-		se->statistics.sum_sleep_runtime += delta;
-
-		if (tsk) {
-			if (tsk->in_iowait) {
-				se->statistics.iowait_sum += delta;
-				se->statistics.iowait_count++;
-				trace_sched_stat_iowait(tsk, delta);
-			}
-
-			trace_sched_stat_blocked(tsk, delta);
-
-			/*
-			 * Blocking time is in units of nanosecs, so shift by
-			 * 20 to get a milliseconds-range estimation of the
-			 * amount of time that the task spent sleeping:
-			 */
-			if (unlikely(prof_on == SLEEP_PROFILING)) {
-				profile_hits(SLEEP_PROFILING,
-						(void *)get_wchan(tsk),
-						delta >> 20);
-			}
-			account_scheduler_latency(tsk, delta >> 10, 0);
-		}
-	}
-#endif
-}
-
 static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 #ifdef CONFIG_SCHED_DEBUG
@@ -3385,15 +3392,12 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	account_entity_enqueue(cfs_rq, se);
 	update_cfs_shares(cfs_rq);
 
-	if (flags & ENQUEUE_WAKEUP) {
+	if (flags & ENQUEUE_WAKEUP)
 		place_entity(cfs_rq, se, 0);
-		if (schedstat_enabled())
-			enqueue_sleeper(cfs_rq, se);
-	}
 
 	check_schedstat_required();
 	if (schedstat_enabled()) {
-		update_stats_enqueue(cfs_rq, se);
+		update_stats_enqueue(cfs_rq, se, flags);
 		check_spread(cfs_rq, se);
 	}
 	if (!curr)

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [tip:sched/core] sched/debug: Clean up schedstat macros
  2016-06-17 17:43 ` [PATCH 2/5] sched/debug: schedstat macro cleanup Josh Poimboeuf
@ 2016-09-05 11:57   ` tip-bot for Josh Poimboeuf
  0 siblings, 0 replies; 17+ messages in thread
From: tip-bot for Josh Poimboeuf @ 2016-09-05 11:57 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: mingo, mgorman, hpa, linux-kernel, matt, torvalds, jpoimboe,
	peterz, tglx, srikar

Commit-ID:  ae92882e5646d8661a3ca182ba988752fe4b773f
Gitweb:     http://git.kernel.org/tip/ae92882e5646d8661a3ca182ba988752fe4b773f
Author:     Josh Poimboeuf <jpoimboe@redhat.com>
AuthorDate: Fri, 17 Jun 2016 12:43:24 -0500
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Mon, 5 Sep 2016 13:29:46 +0200

sched/debug: Clean up schedstat macros

The schedstat_*() macros are inconsistent: most of them take a pointer
and a field which the macro combines, whereas schedstat_set() takes the
already combined ptr->field.

The already combined ptr->field argument is actually more intuitive and
easier to use, and there's no reason to require the user to split the
variable up, so convert the macros to use the combined argument.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/54953ca25bb579f3a5946432dee409b0e05222c6.1466184592.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c      | 22 +++++++++++-----------
 kernel/sched/debug.c     |  4 ++--
 kernel/sched/fair.c      | 42 +++++++++++++++++++++---------------------
 kernel/sched/idle_task.c |  2 +-
 kernel/sched/stats.h     | 22 +++++++++++-----------
 5 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 90b1961..8506770 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1636,16 +1636,16 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
 	int this_cpu = smp_processor_id();
 
 	if (cpu == this_cpu) {
-		schedstat_inc(rq, ttwu_local);
-		schedstat_inc(p, se.statistics.nr_wakeups_local);
+		schedstat_inc(rq->ttwu_local);
+		schedstat_inc(p->se.statistics.nr_wakeups_local);
 	} else {
 		struct sched_domain *sd;
 
-		schedstat_inc(p, se.statistics.nr_wakeups_remote);
+		schedstat_inc(p->se.statistics.nr_wakeups_remote);
 		rcu_read_lock();
 		for_each_domain(this_cpu, sd) {
 			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
-				schedstat_inc(sd, ttwu_wake_remote);
+				schedstat_inc(sd->ttwu_wake_remote);
 				break;
 			}
 		}
@@ -1653,15 +1653,15 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
 	}
 
 	if (wake_flags & WF_MIGRATED)
-		schedstat_inc(p, se.statistics.nr_wakeups_migrate);
+		schedstat_inc(p->se.statistics.nr_wakeups_migrate);
 
 #endif /* CONFIG_SMP */
 
-	schedstat_inc(rq, ttwu_count);
-	schedstat_inc(p, se.statistics.nr_wakeups);
+	schedstat_inc(rq->ttwu_count);
+	schedstat_inc(p->se.statistics.nr_wakeups);
 
 	if (wake_flags & WF_SYNC)
-		schedstat_inc(p, se.statistics.nr_wakeups_sync);
+		schedstat_inc(p->se.statistics.nr_wakeups_sync);
 
 #endif /* CONFIG_SCHEDSTATS */
 }
@@ -3237,7 +3237,7 @@ static inline void schedule_debug(struct task_struct *prev)
 
 	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
 
-	schedstat_inc(this_rq(), sched_count);
+	schedstat_inc(this_rq()->sched_count);
 }
 
 /*
@@ -4849,7 +4849,7 @@ SYSCALL_DEFINE0(sched_yield)
 {
 	struct rq *rq = this_rq_lock();
 
-	schedstat_inc(rq, yld_count);
+	schedstat_inc(rq->yld_count);
 	current->sched_class->yield_task(rq);
 
 	/*
@@ -5000,7 +5000,7 @@ again:
 
 	yielded = curr->sched_class->yield_to_task(rq, p, preempt);
 	if (yielded) {
-		schedstat_inc(rq, yld_count);
+		schedstat_inc(rq->yld_count);
 		/*
 		 * Make p's CPU reschedule; pick_next_entity takes care of
 		 * fairness.
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 2a0a999..92fa534 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -429,9 +429,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 		p->prio);
 
 	SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
-		SPLIT_NS(schedstat_val(p, se.statistics.wait_sum)),
+		SPLIT_NS(schedstat_val(p->se.statistics.wait_sum)),
 		SPLIT_NS(p->se.sum_exec_runtime),
-		SPLIT_NS(schedstat_val(p, se.statistics.sum_sleep_runtime)));
+		SPLIT_NS(schedstat_val(p->se.statistics.sum_sleep_runtime)));
 
 #ifdef CONFIG_NUMA_BALANCING
 	SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 479639f..157d741 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -800,7 +800,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
 		      max(delta_exec, curr->statistics.exec_max));
 
 	curr->sum_exec_runtime += delta_exec;
-	schedstat_add(cfs_rq, exec_clock, delta_exec);
+	schedstat_add(cfs_rq->exec_clock, delta_exec);
 
 	curr->vruntime += calc_delta_fair(delta_exec, curr);
 	update_min_vruntime(cfs_rq);
@@ -3275,7 +3275,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		d = -d;
 
 	if (d > 3*sysctl_sched_latency)
-		schedstat_inc(cfs_rq, nr_spread_over);
+		schedstat_inc(cfs_rq->nr_spread_over);
 #endif
 }
 
@@ -5164,13 +5164,13 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
 
 	balanced = this_eff_load <= prev_eff_load;
 
-	schedstat_inc(p, se.statistics.nr_wakeups_affine_attempts);
+	schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts);
 
 	if (!balanced)
 		return 0;
 
-	schedstat_inc(sd, ttwu_move_affine);
-	schedstat_inc(p, se.statistics.nr_wakeups_affine);
+	schedstat_inc(sd->ttwu_move_affine);
+	schedstat_inc(p->se.statistics.nr_wakeups_affine);
 
 	return 1;
 }
@@ -6183,7 +6183,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 	if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) {
 		int cpu;
 
-		schedstat_inc(p, se.statistics.nr_failed_migrations_affine);
+		schedstat_inc(p->se.statistics.nr_failed_migrations_affine);
 
 		env->flags |= LBF_SOME_PINNED;
 
@@ -6214,7 +6214,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 	env->flags &= ~LBF_ALL_PINNED;
 
 	if (task_running(env->src_rq, p)) {
-		schedstat_inc(p, se.statistics.nr_failed_migrations_running);
+		schedstat_inc(p->se.statistics.nr_failed_migrations_running);
 		return 0;
 	}
 
@@ -6231,13 +6231,13 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 	if (tsk_cache_hot <= 0 ||
 	    env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
 		if (tsk_cache_hot == 1) {
-			schedstat_inc(env->sd, lb_hot_gained[env->idle]);
-			schedstat_inc(p, se.statistics.nr_forced_migrations);
+			schedstat_inc(env->sd->lb_hot_gained[env->idle]);
+			schedstat_inc(p->se.statistics.nr_forced_migrations);
 		}
 		return 1;
 	}
 
-	schedstat_inc(p, se.statistics.nr_failed_migrations_hot);
+	schedstat_inc(p->se.statistics.nr_failed_migrations_hot);
 	return 0;
 }
 
@@ -6277,7 +6277,7 @@ static struct task_struct *detach_one_task(struct lb_env *env)
 		 * so we can safely collect stats here rather than
 		 * inside detach_tasks().
 		 */
-		schedstat_inc(env->sd, lb_gained[env->idle]);
+		schedstat_inc(env->sd->lb_gained[env->idle]);
 		return p;
 	}
 	return NULL;
@@ -6369,7 +6369,7 @@ next:
 	 * so we can safely collect detach_one_task() stats here rather
 	 * than inside detach_one_task().
 	 */
-	schedstat_add(env->sd, lb_gained[env->idle], detached);
+	schedstat_add(env->sd->lb_gained[env->idle], detached);
 
 	return detached;
 }
@@ -7510,7 +7510,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 
 	cpumask_copy(cpus, cpu_active_mask);
 
-	schedstat_inc(sd, lb_count[idle]);
+	schedstat_inc(sd->lb_count[idle]);
 
 redo:
 	if (!should_we_balance(&env)) {
@@ -7520,19 +7520,19 @@ redo:
 
 	group = find_busiest_group(&env);
 	if (!group) {
-		schedstat_inc(sd, lb_nobusyg[idle]);
+		schedstat_inc(sd->lb_nobusyg[idle]);
 		goto out_balanced;
 	}
 
 	busiest = find_busiest_queue(&env, group);
 	if (!busiest) {
-		schedstat_inc(sd, lb_nobusyq[idle]);
+		schedstat_inc(sd->lb_nobusyq[idle]);
 		goto out_balanced;
 	}
 
 	BUG_ON(busiest == env.dst_rq);
 
-	schedstat_add(sd, lb_imbalance[idle], env.imbalance);
+	schedstat_add(sd->lb_imbalance[idle], env.imbalance);
 
 	env.src_cpu = busiest->cpu;
 	env.src_rq = busiest;
@@ -7639,7 +7639,7 @@ more_balance:
 	}
 
 	if (!ld_moved) {
-		schedstat_inc(sd, lb_failed[idle]);
+		schedstat_inc(sd->lb_failed[idle]);
 		/*
 		 * Increment the failure counter only on periodic balance.
 		 * We do not want newidle balance, which can be very
@@ -7722,7 +7722,7 @@ out_all_pinned:
 	 * we can't migrate them. Let the imbalance flag set so parent level
 	 * can try to migrate them.
 	 */
-	schedstat_inc(sd, lb_balanced[idle]);
+	schedstat_inc(sd->lb_balanced[idle]);
 
 	sd->nr_balance_failed = 0;
 
@@ -7915,15 +7915,15 @@ static int active_load_balance_cpu_stop(void *data)
 			.idle		= CPU_IDLE,
 		};
 
-		schedstat_inc(sd, alb_count);
+		schedstat_inc(sd->alb_count);
 
 		p = detach_one_task(&env);
 		if (p) {
-			schedstat_inc(sd, alb_pushed);
+			schedstat_inc(sd->alb_pushed);
 			/* Active balancing done, reset the failure counter. */
 			sd->nr_balance_failed = 0;
 		} else {
-			schedstat_inc(sd, alb_failed);
+			schedstat_inc(sd->alb_failed);
 		}
 	}
 	rcu_read_unlock();
diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c
index 2ce5458..dedc81ec 100644
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -28,7 +28,7 @@ pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct pin_cookie c
 {
 	put_prev_task(rq, prev);
 
-	schedstat_inc(rq, sched_goidle);
+	schedstat_inc(rq->sched_goidle);
 	return rq->idle;
 }
 
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 78955cb..fc05425 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -29,11 +29,11 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
 	if (rq)
 		rq->rq_sched_info.run_delay += delta;
 }
-# define schedstat_enabled()		static_branch_unlikely(&sched_schedstats)
-# define schedstat_inc(rq, field)	do { if (schedstat_enabled()) { (rq)->field++; } } while (0)
-# define schedstat_add(rq, field, amt)	do { if (schedstat_enabled()) { (rq)->field += (amt); } } while (0)
-# define schedstat_set(var, val)	do { if (schedstat_enabled()) { var = (val); } } while (0)
-# define schedstat_val(rq, field)	((schedstat_enabled()) ? (rq)->field : 0)
+#define schedstat_enabled()		static_branch_unlikely(&sched_schedstats)
+#define schedstat_inc(var)		do { if (schedstat_enabled()) { var++; } } while (0)
+#define schedstat_add(var, amt)		do { if (schedstat_enabled()) { var += (amt); } } while (0)
+#define schedstat_set(var, val)		do { if (schedstat_enabled()) { var = (val); } } while (0)
+#define schedstat_val(var)		((schedstat_enabled()) ? (var) : 0)
 
 #else /* !CONFIG_SCHEDSTATS */
 static inline void
@@ -45,12 +45,12 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
 static inline void
 rq_sched_info_depart(struct rq *rq, unsigned long long delta)
 {}
-# define schedstat_enabled()		0
-# define schedstat_inc(rq, field)	do { } while (0)
-# define schedstat_add(rq, field, amt)	do { } while (0)
-# define schedstat_set(var, val)	do { } while (0)
-# define schedstat_val(rq, field)	0
-#endif
+#define schedstat_enabled()		0
+#define schedstat_inc(var)		do { } while (0)
+#define schedstat_add(var, amt)		do { } while (0)
+#define schedstat_set(var, val)		do { } while (0)
+#define schedstat_val(var)		0
+#endif /* CONFIG_SCHEDSTATS */
 
 #ifdef CONFIG_SCHED_INFO
 static inline void sched_info_reset_dequeued(struct task_struct *t)

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [tip:sched/core] sched/debug: Rename 'schedstat_val()' -> 'schedstat_val_or_zero()'
  2016-06-17 17:43 ` [PATCH 3/5] sched/debug: 'schedstat_val()' -> 'schedstat_val_or_zero()' Josh Poimboeuf
@ 2016-09-05 11:57   ` tip-bot for Josh Poimboeuf
  0 siblings, 0 replies; 17+ messages in thread
From: tip-bot for Josh Poimboeuf @ 2016-09-05 11:57 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: mgorman, mingo, peterz, srikar, tglx, matt, linux-kernel,
	jpoimboe, hpa, torvalds

Commit-ID:  20e1d4863bfa7152e98f94e5bcdda3e7db41d899
Gitweb:     http://git.kernel.org/tip/20e1d4863bfa7152e98f94e5bcdda3e7db41d899
Author:     Josh Poimboeuf <jpoimboe@redhat.com>
AuthorDate: Fri, 17 Jun 2016 12:43:25 -0500
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Mon, 5 Sep 2016 13:29:46 +0200

sched/debug: Rename 'schedstat_val()' -> 'schedstat_val_or_zero()'

The schedstat_val() macro's behavior is kind of surprising: when
schedstat is runtime disabled, it returns zero.  Rename it to
schedstat_val_or_zero().

There's also a need for a similar macro which doesn't have the 'if
(schedstat_enable())' check, to avoid doing the check twice.  Create a
new 'schedstat_val()' macro for that.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/3bb1d2367d041fee333b0dde17171e709395b675.1466184592.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/debug.c | 4 ++--
 kernel/sched/stats.h | 4 +++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 92fa534..63ffcaa 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -429,9 +429,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 		p->prio);
 
 	SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
-		SPLIT_NS(schedstat_val(p->se.statistics.wait_sum)),
+		SPLIT_NS(schedstat_val_or_zero(p->se.statistics.wait_sum)),
 		SPLIT_NS(p->se.sum_exec_runtime),
-		SPLIT_NS(schedstat_val(p->se.statistics.sum_sleep_runtime)));
+		SPLIT_NS(schedstat_val_or_zero(p->se.statistics.sum_sleep_runtime)));
 
 #ifdef CONFIG_NUMA_BALANCING
 	SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index fc05425..34659a8 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -33,7 +33,8 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
 #define schedstat_inc(var)		do { if (schedstat_enabled()) { var++; } } while (0)
 #define schedstat_add(var, amt)		do { if (schedstat_enabled()) { var += (amt); } } while (0)
 #define schedstat_set(var, val)		do { if (schedstat_enabled()) { var = (val); } } while (0)
-#define schedstat_val(var)		((schedstat_enabled()) ? (var) : 0)
+#define schedstat_val(var)		(var)
+#define schedstat_val_or_zero(var)	((schedstat_enabled()) ? (var) : 0)
 
 #else /* !CONFIG_SCHEDSTATS */
 static inline void
@@ -50,6 +51,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta)
 #define schedstat_add(var, amt)		do { } while (0)
 #define schedstat_set(var, val)		do { } while (0)
 #define schedstat_val(var)		0
+#define schedstat_val_or_zero(var)	0
 #endif /* CONFIG_SCHEDSTATS */
 
 #ifdef CONFIG_SCHED_INFO

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [tip:sched/core] sched/debug: Remove several CONFIG_SCHEDSTATS guards
  2016-06-17 17:43 ` [PATCH 4/5] sched/debug: remove several CONFIG_SCHEDSTATS guards Josh Poimboeuf
  2016-06-27 16:21   ` Peter Zijlstra
@ 2016-09-05 11:57   ` tip-bot for Josh Poimboeuf
  1 sibling, 0 replies; 17+ messages in thread
From: tip-bot for Josh Poimboeuf @ 2016-09-05 11:57 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: jpoimboe, torvalds, tglx, linux-kernel, srikar, peterz, mgorman,
	mingo, hpa, matt

Commit-ID:  4fa8d299b43a91f871f6d5b00dd5ab33d43bbc2c
Gitweb:     http://git.kernel.org/tip/4fa8d299b43a91f871f6d5b00dd5ab33d43bbc2c
Author:     Josh Poimboeuf <jpoimboe@redhat.com>
AuthorDate: Fri, 17 Jun 2016 12:43:26 -0500
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Mon, 5 Sep 2016 13:29:47 +0200

sched/debug: Remove several CONFIG_SCHEDSTATS guards

Clean up the sched code by removing several of the CONFIG_SCHEDSTATS
guards, using schedstat_*() macros where needed.

Code size:

  !CONFIG_SCHEDSTATS defconfig:

      text	   data	    bss	     dec	    hex	filename
  10209818	4368184	1105920	15683922	 ef5152	vmlinux.before.nostats
  10209818	4368184	1105920	15683922	 ef5152	vmlinux.after.nostats

  CONFIG_SCHEDSTATS defconfig:

      text	   data	    bss	    dec	    hex	filename
  10214210	4370040	1105920	15690170	 ef69ba	vmlinux.before.stats
  10214210	4370680	1105920	15690810	 ef6c3a	vmlinux.after.stats

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/e51e0ebe5af95ac295de720dd252e7c0d2142e4a.1466184592.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c  |  33 +++++-------
 kernel/sched/debug.c |  99 ++++++++++++++++++----------------
 kernel/sched/fair.c  | 148 ++++++++++++++++++++++++---------------------------
 3 files changed, 136 insertions(+), 144 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8506770..860070f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1629,13 +1629,15 @@ static inline int __set_cpus_allowed_ptr(struct task_struct *p,
 static void
 ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
 {
-#ifdef CONFIG_SCHEDSTATS
-	struct rq *rq = this_rq();
+	struct rq *rq;
 
-#ifdef CONFIG_SMP
-	int this_cpu = smp_processor_id();
+	if (!schedstat_enabled())
+		return;
+
+	rq = this_rq();
 
-	if (cpu == this_cpu) {
+#ifdef CONFIG_SMP
+	if (cpu == rq->cpu) {
 		schedstat_inc(rq->ttwu_local);
 		schedstat_inc(p->se.statistics.nr_wakeups_local);
 	} else {
@@ -1643,7 +1645,7 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
 
 		schedstat_inc(p->se.statistics.nr_wakeups_remote);
 		rcu_read_lock();
-		for_each_domain(this_cpu, sd) {
+		for_each_domain(rq->cpu, sd) {
 			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
 				schedstat_inc(sd->ttwu_wake_remote);
 				break;
@@ -1654,7 +1656,6 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
 
 	if (wake_flags & WF_MIGRATED)
 		schedstat_inc(p->se.statistics.nr_wakeups_migrate);
-
 #endif /* CONFIG_SMP */
 
 	schedstat_inc(rq->ttwu_count);
@@ -1662,8 +1663,6 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
 
 	if (wake_flags & WF_SYNC)
 		schedstat_inc(p->se.statistics.nr_wakeups_sync);
-
-#endif /* CONFIG_SCHEDSTATS */
 }
 
 static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
@@ -2084,8 +2083,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 
 	ttwu_queue(p, cpu, wake_flags);
 stat:
-	if (schedstat_enabled())
-		ttwu_stat(p, cpu, wake_flags);
+	ttwu_stat(p, cpu, wake_flags);
 out:
 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 
@@ -2134,8 +2132,7 @@ static void try_to_wake_up_local(struct task_struct *p, struct pin_cookie cookie
 		ttwu_activate(rq, p, ENQUEUE_WAKEUP);
 
 	ttwu_do_wakeup(rq, p, 0, cookie);
-	if (schedstat_enabled())
-		ttwu_stat(p, smp_processor_id(), 0);
+	ttwu_stat(p, smp_processor_id(), 0);
 out:
 	raw_spin_unlock(&p->pi_lock);
 }
@@ -7675,12 +7672,10 @@ void normalize_rt_tasks(void)
 		if (p->flags & PF_KTHREAD)
 			continue;
 
-		p->se.exec_start		= 0;
-#ifdef CONFIG_SCHEDSTATS
-		p->se.statistics.wait_start	= 0;
-		p->se.statistics.sleep_start	= 0;
-		p->se.statistics.block_start	= 0;
-#endif
+		p->se.exec_start = 0;
+		schedstat_set(p->se.statistics.wait_start,  0);
+		schedstat_set(p->se.statistics.sleep_start, 0);
+		schedstat_set(p->se.statistics.block_start, 0);
 
 		if (!dl_task(p) && !rt_task(p)) {
 			/*
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 63ffcaa..1393588 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -369,8 +369,12 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
 
 #define P(F) \
 	SEQ_printf(m, "  .%-30s: %lld\n", #F, (long long)F)
+#define P_SCHEDSTAT(F) \
+	SEQ_printf(m, "  .%-30s: %lld\n", #F, (long long)schedstat_val(F))
 #define PN(F) \
 	SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
+#define PN_SCHEDSTAT(F) \
+	SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F)))
 
 	if (!se)
 		return;
@@ -378,26 +382,27 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
 	PN(se->exec_start);
 	PN(se->vruntime);
 	PN(se->sum_exec_runtime);
-#ifdef CONFIG_SCHEDSTATS
 	if (schedstat_enabled()) {
-		PN(se->statistics.wait_start);
-		PN(se->statistics.sleep_start);
-		PN(se->statistics.block_start);
-		PN(se->statistics.sleep_max);
-		PN(se->statistics.block_max);
-		PN(se->statistics.exec_max);
-		PN(se->statistics.slice_max);
-		PN(se->statistics.wait_max);
-		PN(se->statistics.wait_sum);
-		P(se->statistics.wait_count);
+		PN_SCHEDSTAT(se->statistics.wait_start);
+		PN_SCHEDSTAT(se->statistics.sleep_start);
+		PN_SCHEDSTAT(se->statistics.block_start);
+		PN_SCHEDSTAT(se->statistics.sleep_max);
+		PN_SCHEDSTAT(se->statistics.block_max);
+		PN_SCHEDSTAT(se->statistics.exec_max);
+		PN_SCHEDSTAT(se->statistics.slice_max);
+		PN_SCHEDSTAT(se->statistics.wait_max);
+		PN_SCHEDSTAT(se->statistics.wait_sum);
+		P_SCHEDSTAT(se->statistics.wait_count);
 	}
-#endif
 	P(se->load.weight);
 #ifdef CONFIG_SMP
 	P(se->avg.load_avg);
 	P(se->avg.util_avg);
 #endif
+
+#undef PN_SCHEDSTAT
 #undef PN
+#undef P_SCHEDSTAT
 #undef P
 }
 #endif
@@ -626,9 +631,7 @@ do {									\
 #undef P64
 #endif
 
-#ifdef CONFIG_SCHEDSTATS
-#define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, rq->n);
-
+#define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, schedstat_val(rq->n));
 	if (schedstat_enabled()) {
 		P(yld_count);
 		P(sched_count);
@@ -636,9 +639,8 @@ do {									\
 		P(ttwu_count);
 		P(ttwu_local);
 	}
-
 #undef P
-#endif
+
 	spin_lock_irqsave(&sched_debug_lock, flags);
 	print_cfs_stats(m, cpu);
 	print_rt_stats(m, cpu);
@@ -868,10 +870,14 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
 #define P(F) \
 	SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
+#define P_SCHEDSTAT(F) \
+	SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)schedstat_val(p->F))
 #define __PN(F) \
 	SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
 #define PN(F) \
 	SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
+#define PN_SCHEDSTAT(F) \
+	SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(p->F)))
 
 	PN(se.exec_start);
 	PN(se.vruntime);
@@ -881,37 +887,36 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 
 	P(se.nr_migrations);
 
-#ifdef CONFIG_SCHEDSTATS
 	if (schedstat_enabled()) {
 		u64 avg_atom, avg_per_cpu;
 
-		PN(se.statistics.sum_sleep_runtime);
-		PN(se.statistics.wait_start);
-		PN(se.statistics.sleep_start);
-		PN(se.statistics.block_start);
-		PN(se.statistics.sleep_max);
-		PN(se.statistics.block_max);
-		PN(se.statistics.exec_max);
-		PN(se.statistics.slice_max);
-		PN(se.statistics.wait_max);
-		PN(se.statistics.wait_sum);
-		P(se.statistics.wait_count);
-		PN(se.statistics.iowait_sum);
-		P(se.statistics.iowait_count);
-		P(se.statistics.nr_migrations_cold);
-		P(se.statistics.nr_failed_migrations_affine);
-		P(se.statistics.nr_failed_migrations_running);
-		P(se.statistics.nr_failed_migrations_hot);
-		P(se.statistics.nr_forced_migrations);
-		P(se.statistics.nr_wakeups);
-		P(se.statistics.nr_wakeups_sync);
-		P(se.statistics.nr_wakeups_migrate);
-		P(se.statistics.nr_wakeups_local);
-		P(se.statistics.nr_wakeups_remote);
-		P(se.statistics.nr_wakeups_affine);
-		P(se.statistics.nr_wakeups_affine_attempts);
-		P(se.statistics.nr_wakeups_passive);
-		P(se.statistics.nr_wakeups_idle);
+		PN_SCHEDSTAT(se.statistics.sum_sleep_runtime);
+		PN_SCHEDSTAT(se.statistics.wait_start);
+		PN_SCHEDSTAT(se.statistics.sleep_start);
+		PN_SCHEDSTAT(se.statistics.block_start);
+		PN_SCHEDSTAT(se.statistics.sleep_max);
+		PN_SCHEDSTAT(se.statistics.block_max);
+		PN_SCHEDSTAT(se.statistics.exec_max);
+		PN_SCHEDSTAT(se.statistics.slice_max);
+		PN_SCHEDSTAT(se.statistics.wait_max);
+		PN_SCHEDSTAT(se.statistics.wait_sum);
+		P_SCHEDSTAT(se.statistics.wait_count);
+		PN_SCHEDSTAT(se.statistics.iowait_sum);
+		P_SCHEDSTAT(se.statistics.iowait_count);
+		P_SCHEDSTAT(se.statistics.nr_migrations_cold);
+		P_SCHEDSTAT(se.statistics.nr_failed_migrations_affine);
+		P_SCHEDSTAT(se.statistics.nr_failed_migrations_running);
+		P_SCHEDSTAT(se.statistics.nr_failed_migrations_hot);
+		P_SCHEDSTAT(se.statistics.nr_forced_migrations);
+		P_SCHEDSTAT(se.statistics.nr_wakeups);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_sync);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_migrate);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_local);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_remote);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_affine);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_affine_attempts);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_passive);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_idle);
 
 		avg_atom = p->se.sum_exec_runtime;
 		if (nr_switches)
@@ -930,7 +935,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 		__PN(avg_atom);
 		__PN(avg_per_cpu);
 	}
-#endif
+
 	__P(nr_switches);
 	SEQ_printf(m, "%-45s:%21Ld\n",
 		   "nr_voluntary_switches", (long long)p->nvcsw);
@@ -947,8 +952,10 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 #endif
 	P(policy);
 	P(prio);
+#undef PN_SCHEDSTAT
 #undef PN
 #undef __PN
+#undef P_SCHEDSTAT
 #undef P
 #undef __P
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 157d741..a6820b3 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -821,26 +821,34 @@ static void update_curr_fair(struct rq *rq)
 	update_curr(cfs_rq_of(&rq->curr->se));
 }
 
-#ifdef CONFIG_SCHEDSTATS
 static inline void
 update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	u64 wait_start = rq_clock(rq_of(cfs_rq));
+	u64 wait_start, prev_wait_start;
+
+	if (!schedstat_enabled())
+		return;
+
+	wait_start = rq_clock(rq_of(cfs_rq));
+	prev_wait_start = schedstat_val(se->statistics.wait_start);
 
 	if (entity_is_task(se) && task_on_rq_migrating(task_of(se)) &&
-	    likely(wait_start > se->statistics.wait_start))
-		wait_start -= se->statistics.wait_start;
+	    likely(wait_start > prev_wait_start))
+		wait_start -= prev_wait_start;
 
-	se->statistics.wait_start = wait_start;
+	schedstat_set(se->statistics.wait_start, wait_start);
 }
 
-static void
+static inline void
 update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	struct task_struct *p;
 	u64 delta;
 
-	delta = rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start;
+	if (!schedstat_enabled())
+		return;
+
+	delta = rq_clock(rq_of(cfs_rq)) - schedstat_val(se->statistics.wait_start);
 
 	if (entity_is_task(se)) {
 		p = task_of(se);
@@ -850,59 +858,67 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 			 * time stamp can be adjusted to accumulate wait time
 			 * prior to migration.
 			 */
-			se->statistics.wait_start = delta;
+			schedstat_set(se->statistics.wait_start, delta);
 			return;
 		}
 		trace_sched_stat_wait(p, delta);
 	}
 
-	se->statistics.wait_max = max(se->statistics.wait_max, delta);
-	se->statistics.wait_count++;
-	se->statistics.wait_sum += delta;
-	se->statistics.wait_start = 0;
+	schedstat_set(se->statistics.wait_max,
+		      max(schedstat_val(se->statistics.wait_max), delta));
+	schedstat_inc(se->statistics.wait_count);
+	schedstat_add(se->statistics.wait_sum, delta);
+	schedstat_set(se->statistics.wait_start, 0);
 }
 
-static void
+static inline void
 update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	struct task_struct *tsk = NULL;
+	u64 sleep_start, block_start;
+
+	if (!schedstat_enabled())
+		return;
+
+	sleep_start = schedstat_val(se->statistics.sleep_start);
+	block_start = schedstat_val(se->statistics.block_start);
 
 	if (entity_is_task(se))
 		tsk = task_of(se);
 
-	if (se->statistics.sleep_start) {
-		u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.sleep_start;
+	if (sleep_start) {
+		u64 delta = rq_clock(rq_of(cfs_rq)) - sleep_start;
 
 		if ((s64)delta < 0)
 			delta = 0;
 
-		if (unlikely(delta > se->statistics.sleep_max))
-			se->statistics.sleep_max = delta;
+		if (unlikely(delta > schedstat_val(se->statistics.sleep_max)))
+			schedstat_set(se->statistics.sleep_max, delta);
 
-		se->statistics.sleep_start = 0;
-		se->statistics.sum_sleep_runtime += delta;
+		schedstat_set(se->statistics.sleep_start, 0);
+		schedstat_add(se->statistics.sum_sleep_runtime, delta);
 
 		if (tsk) {
 			account_scheduler_latency(tsk, delta >> 10, 1);
 			trace_sched_stat_sleep(tsk, delta);
 		}
 	}
-	if (se->statistics.block_start) {
-		u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.block_start;
+	if (block_start) {
+		u64 delta = rq_clock(rq_of(cfs_rq)) - block_start;
 
 		if ((s64)delta < 0)
 			delta = 0;
 
-		if (unlikely(delta > se->statistics.block_max))
-			se->statistics.block_max = delta;
+		if (unlikely(delta > schedstat_val(se->statistics.block_max)))
+			schedstat_set(se->statistics.block_max, delta);
 
-		se->statistics.block_start = 0;
-		se->statistics.sum_sleep_runtime += delta;
+		schedstat_set(se->statistics.block_start, 0);
+		schedstat_add(se->statistics.sum_sleep_runtime, delta);
 
 		if (tsk) {
 			if (tsk->in_iowait) {
-				se->statistics.iowait_sum += delta;
-				se->statistics.iowait_count++;
+				schedstat_add(se->statistics.iowait_sum, delta);
+				schedstat_inc(se->statistics.iowait_count);
 				trace_sched_stat_iowait(tsk, delta);
 			}
 
@@ -929,6 +945,9 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 static inline void
 update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
+	if (!schedstat_enabled())
+		return;
+
 	/*
 	 * Are we enqueueing a waiting task? (for current tasks
 	 * a dequeue/enqueue event is a NOP)
@@ -943,6 +962,10 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 static inline void
 update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
+
+	if (!schedstat_enabled())
+		return;
+
 	/*
 	 * Mark the end of the wait period if dequeueing a
 	 * waiting task:
@@ -950,45 +973,18 @@ update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	if (se != cfs_rq->curr)
 		update_stats_wait_end(cfs_rq, se);
 
-	if (flags & DEQUEUE_SLEEP) {
-		if (entity_is_task(se)) {
-			struct task_struct *tsk = task_of(se);
+	if ((flags & DEQUEUE_SLEEP) && entity_is_task(se)) {
+		struct task_struct *tsk = task_of(se);
 
-			if (tsk->state & TASK_INTERRUPTIBLE)
-				se->statistics.sleep_start = rq_clock(rq_of(cfs_rq));
-			if (tsk->state & TASK_UNINTERRUPTIBLE)
-				se->statistics.block_start = rq_clock(rq_of(cfs_rq));
-		}
+		if (tsk->state & TASK_INTERRUPTIBLE)
+			schedstat_set(se->statistics.sleep_start,
+				      rq_clock(rq_of(cfs_rq)));
+		if (tsk->state & TASK_UNINTERRUPTIBLE)
+			schedstat_set(se->statistics.block_start,
+				      rq_clock(rq_of(cfs_rq)));
 	}
-
-}
-#else
-static inline void
-update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-}
-
-static inline void
-update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-}
-
-static inline void
-update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
 }
 
-static inline void
-update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
-{
-}
-
-static inline void
-update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
-{
-}
-#endif
-
 /*
  * We are picking a new current task - update its stats:
  */
@@ -3396,10 +3392,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 		place_entity(cfs_rq, se, 0);
 
 	check_schedstat_required();
-	if (schedstat_enabled()) {
-		update_stats_enqueue(cfs_rq, se, flags);
-		check_spread(cfs_rq, se);
-	}
+	update_stats_enqueue(cfs_rq, se, flags);
+	check_spread(cfs_rq, se);
 	if (!curr)
 		__enqueue_entity(cfs_rq, se);
 	se->on_rq = 1;
@@ -3466,8 +3460,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	update_curr(cfs_rq);
 	dequeue_entity_load_avg(cfs_rq, se);
 
-	if (schedstat_enabled())
-		update_stats_dequeue(cfs_rq, se, flags);
+	update_stats_dequeue(cfs_rq, se, flags);
 
 	clear_buddies(cfs_rq, se);
 
@@ -3541,25 +3534,25 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		 * a CPU. So account for the time it spent waiting on the
 		 * runqueue.
 		 */
-		if (schedstat_enabled())
-			update_stats_wait_end(cfs_rq, se);
+		update_stats_wait_end(cfs_rq, se);
 		__dequeue_entity(cfs_rq, se);
 		update_load_avg(se, 1);
 	}
 
 	update_stats_curr_start(cfs_rq, se);
 	cfs_rq->curr = se;
-#ifdef CONFIG_SCHEDSTATS
+
 	/*
 	 * Track our maximum slice length, if the CPU's load is at
 	 * least twice that of our own weight (i.e. dont track it
 	 * when there are only lesser-weight tasks around):
 	 */
 	if (schedstat_enabled() && rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {
-		se->statistics.slice_max = max(se->statistics.slice_max,
-			se->sum_exec_runtime - se->prev_sum_exec_runtime);
+		schedstat_set(se->statistics.slice_max,
+			max((u64)schedstat_val(se->statistics.slice_max),
+			    se->sum_exec_runtime - se->prev_sum_exec_runtime));
 	}
-#endif
+
 	se->prev_sum_exec_runtime = se->sum_exec_runtime;
 }
 
@@ -3638,13 +3631,10 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
 	/* throttle cfs_rqs exceeding runtime */
 	check_cfs_rq_runtime(cfs_rq);
 
-	if (schedstat_enabled()) {
-		check_spread(cfs_rq, prev);
-		if (prev->on_rq)
-			update_stats_wait_start(cfs_rq, prev);
-	}
+	check_spread(cfs_rq, prev);
 
 	if (prev->on_rq) {
+		update_stats_wait_start(cfs_rq, prev);
 		/* Put 'current' back into the tree. */
 		__enqueue_entity(cfs_rq, prev);
 		/* in !on_rq case, update occurred at dequeue */

^ permalink raw reply related	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2016-09-05 12:45 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-17 17:43 [PATCH 0/5] sched/debug: decouple sched_stat tracepoints from CONFIG_SCHEDSTATS Josh Poimboeuf
2016-06-17 17:43 ` [PATCH 1/5] sched/debug: rename and move enqueue_sleeper() Josh Poimboeuf
2016-09-05 11:56   ` [tip:sched/core] sched/debug: Rename " tip-bot for Josh Poimboeuf
2016-06-17 17:43 ` [PATCH 2/5] sched/debug: schedstat macro cleanup Josh Poimboeuf
2016-09-05 11:57   ` [tip:sched/core] sched/debug: Clean up schedstat macros tip-bot for Josh Poimboeuf
2016-06-17 17:43 ` [PATCH 3/5] sched/debug: 'schedstat_val()' -> 'schedstat_val_or_zero()' Josh Poimboeuf
2016-09-05 11:57   ` [tip:sched/core] sched/debug: Rename " tip-bot for Josh Poimboeuf
2016-06-17 17:43 ` [PATCH 4/5] sched/debug: remove several CONFIG_SCHEDSTATS guards Josh Poimboeuf
2016-06-27 16:21   ` Peter Zijlstra
2016-06-27 16:32     ` Josh Poimboeuf
2016-09-05 11:57   ` [tip:sched/core] sched/debug: Remove " tip-bot for Josh Poimboeuf
2016-06-17 17:43 ` [PATCH 5/5] sched/debug: decouple 'sched_stat_*' tracepoints' from CONFIG_SCHEDSTATS Josh Poimboeuf
2016-06-21  8:27 ` [PATCH 0/5] sched/debug: decouple sched_stat tracepoints " Srikar Dronamraju
2016-06-28 12:43 ` Peter Zijlstra
2016-06-29  2:32   ` Josh Poimboeuf
2016-06-29 10:29   ` Peter Zijlstra
2016-07-08 14:57     ` Josh Poimboeuf

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.