linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] sched: fair: fix missed CONFIG_SCHEDSTATS
@ 2019-03-06  8:43 Yafang Shao
  2019-03-06 10:09 ` Peter Zijlstra
  0 siblings, 1 reply; 10+ messages in thread
From: Yafang Shao @ 2019-03-06  8:43 UTC (permalink / raw)
  To: mingo, peterz; +Cc: linux-kernel, shaoyafang, Yafang Shao

When I'm using trace_sched_stat_{iowait, blocked, wait, sleep} to
measure how long the processes are stalled, there's always no output from
trace_pipe while there're really some tasks in uninterruptible sleep
state. That makes me confused, so I try to investigate why.
Finally I find the reason is that CONFIG_SCHEDSTATS is not set.

To avoid such kind of confusion, we should not expose these tracepoints
if CONFIG_SCHEDSTATS is not set.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 include/trace/events/sched.h |  3 ++-
 kernel/sched/fair.c          | 13 ++++++++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 9a4bdfa..a261da8 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -336,6 +336,7 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct *
 		  __entry->pid, __entry->old_pid)
 );
 
+#ifdef CONFIG_SCHEDSTATS
 /*
  * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
  *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
@@ -363,7 +364,6 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct *
 			(unsigned long long)__entry->delay)
 );
 
-
 /*
  * Tracepoint for accounting wait time (time the task is runnable
  * but not actually running due to scheduler contention).
@@ -394,6 +394,7 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct *
 DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
 	     TP_PROTO(struct task_struct *tsk, u64 delay),
 	     TP_ARGS(tsk, delay));
+#endif
 
 /*
  * Tracepoint for accounting runtime (time the task is executing
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8213ff6..a8006c9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -855,6 +855,7 @@ static void update_curr_fair(struct rq *rq)
 static inline void
 update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
+#ifdef CONFIG_SCHEDSTATS
 	u64 wait_start, prev_wait_start;
 
 	if (!schedstat_enabled())
@@ -868,11 +869,13 @@ static void update_curr_fair(struct rq *rq)
 		wait_start -= prev_wait_start;
 
 	__schedstat_set(se->statistics.wait_start, wait_start);
+#endif
 }
 
 static inline void
 update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
+#ifdef CONFIG_SCHEDSTATS
 	struct task_struct *p;
 	u64 delta;
 
@@ -900,11 +903,13 @@ static void update_curr_fair(struct rq *rq)
 	__schedstat_inc(se->statistics.wait_count);
 	__schedstat_add(se->statistics.wait_sum, delta);
 	__schedstat_set(se->statistics.wait_start, 0);
+#endif
 }
 
 static inline void
 update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
+#ifdef CONFIG_SCHEDSTATS
 	struct task_struct *tsk = NULL;
 	u64 sleep_start, block_start;
 
@@ -968,6 +973,7 @@ static void update_curr_fair(struct rq *rq)
 			account_scheduler_latency(tsk, delta >> 10, 0);
 		}
 	}
+#endif
 }
 
 /*
@@ -976,6 +982,7 @@ static void update_curr_fair(struct rq *rq)
 static inline void
 update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
+#ifdef CONFIG_SCHEDSTATS
 	if (!schedstat_enabled())
 		return;
 
@@ -988,12 +995,13 @@ static void update_curr_fair(struct rq *rq)
 
 	if (flags & ENQUEUE_WAKEUP)
 		update_stats_enqueue_sleeper(cfs_rq, se);
+#endif
 }
 
 static inline void
 update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
-
+#ifdef CONFIG_SCHEDSTATS
 	if (!schedstat_enabled())
 		return;
 
@@ -1014,6 +1022,7 @@ static void update_curr_fair(struct rq *rq)
 			__schedstat_set(se->statistics.block_start,
 				      rq_clock(rq_of(cfs_rq)));
 	}
+#endif
 }
 
 /*
@@ -4090,6 +4099,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	update_stats_curr_start(cfs_rq, se);
 	cfs_rq->curr = se;
 
+#ifdef CONFIG_SCHEDSTATS
 	/*
 	 * Track our maximum slice length, if the CPU's load is at
 	 * least twice that of our own weight (i.e. dont track it
@@ -4100,6 +4110,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
 			max((u64)schedstat_val(se->statistics.slice_max),
 			    se->sum_exec_runtime - se->prev_sum_exec_runtime));
 	}
+#endif
 
 	se->prev_sum_exec_runtime = se->sum_exec_runtime;
 }
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched: fair: fix missed CONFIG_SCHEDSTATS
  2019-03-06  8:43 [PATCH] sched: fair: fix missed CONFIG_SCHEDSTATS Yafang Shao
@ 2019-03-06 10:09 ` Peter Zijlstra
  2019-03-06 10:15   ` Yafang Shao
  0 siblings, 1 reply; 10+ messages in thread
From: Peter Zijlstra @ 2019-03-06 10:09 UTC (permalink / raw)
  To: Yafang Shao; +Cc: mingo, linux-kernel, shaoyafang

On Wed, Mar 06, 2019 at 04:43:46PM +0800, Yafang Shao wrote:
> When I'm using trace_sched_stat_{iowait, blocked, wait, sleep} to
> measure how long the processes are stalled, there's always no output from
> trace_pipe while there're really some tasks in uninterruptible sleep
> state. That makes me confused, so I try to investigate why.
> Finally I find the reason is that CONFIG_SCHEDSTATS is not set.
> 
> To avoid such kind of confusion, we should not expose these tracepoints
> if CONFIG_SCHEDSTATS is not set.

Yeah, lets not sprinkle #ifdef. Big fat NAK.

Also, the below seem to indicate your compiler is stupid. Without
CONFIG_SCHEDSTAT, schedstat_enabled() should be a constant 0 and DCE
should delete all code.

> @@ -976,6 +982,7 @@ static void update_curr_fair(struct rq *rq)
>  static inline void
>  update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
>  {
> +#ifdef CONFIG_SCHEDSTATS
>  	if (!schedstat_enabled())
>  		return;
>  
> @@ -988,12 +995,13 @@ static void update_curr_fair(struct rq *rq)
>  
>  	if (flags & ENQUEUE_WAKEUP)
>  		update_stats_enqueue_sleeper(cfs_rq, se);
> +#endif
>  }
>  
>  static inline void
>  update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
>  {
> -
> +#ifdef CONFIG_SCHEDSTATS
>  	if (!schedstat_enabled())
>  		return;
>  
> @@ -1014,6 +1022,7 @@ static void update_curr_fair(struct rq *rq)
>  			__schedstat_set(se->statistics.block_start,
>  				      rq_clock(rq_of(cfs_rq)));
>  	}
> +#endif
>  }
>  
>  /*
> @@ -4090,6 +4099,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
>  	update_stats_curr_start(cfs_rq, se);
>  	cfs_rq->curr = se;
>  
> +#ifdef CONFIG_SCHEDSTATS
>  	/*
>  	 * Track our maximum slice length, if the CPU's load is at
>  	 * least twice that of our own weight (i.e. dont track it
> @@ -4100,6 +4110,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
>  			max((u64)schedstat_val(se->statistics.slice_max),
>  			    se->sum_exec_runtime - se->prev_sum_exec_runtime));
>  	}
> +#endif
>  
>  	se->prev_sum_exec_runtime = se->sum_exec_runtime;
>  }
> -- 
> 1.8.3.1
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched: fair: fix missed CONFIG_SCHEDSTATS
  2019-03-06 10:09 ` Peter Zijlstra
@ 2019-03-06 10:15   ` Yafang Shao
  2019-03-06 11:27     ` Peter Zijlstra
  0 siblings, 1 reply; 10+ messages in thread
From: Yafang Shao @ 2019-03-06 10:15 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: mingo, LKML, shaoyafang

On Wed, Mar 6, 2019 at 6:09 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Wed, Mar 06, 2019 at 04:43:46PM +0800, Yafang Shao wrote:
> > When I'm using trace_sched_stat_{iowait, blocked, wait, sleep} to
> > measure how long the processes are stalled, there's always no output from
> > trace_pipe while there're really some tasks in uninterruptible sleep
> > state. That makes me confused, so I try to investigate why.
> > Finally I find the reason is that CONFIG_SCHEDSTATS is not set.
> >
> > To avoid such kind of confusion, we should not expose these tracepoints
> > if CONFIG_SCHEDSTATS is not set.
>
> Yeah, lets not sprinkle #ifdef. Big fat NAK.
>
> Also, the below seem to indicate your compiler is stupid. Without
> CONFIG_SCHEDSTAT, schedstat_enabled() should be a constant 0 and DCE
> should delete all code.
>

My compiler is GCC-7.3.0.
I don't know which comipler could be smart enough to remove the
definition of these tracepoints.
Could you pls. tell me what compiler you are using ?

> > @@ -976,6 +982,7 @@ static void update_curr_fair(struct rq *rq)
> >  static inline void
> >  update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
> >  {
> > +#ifdef CONFIG_SCHEDSTATS
> >       if (!schedstat_enabled())
> >               return;
> >
> > @@ -988,12 +995,13 @@ static void update_curr_fair(struct rq *rq)
> >
> >       if (flags & ENQUEUE_WAKEUP)
> >               update_stats_enqueue_sleeper(cfs_rq, se);
> > +#endif
> >  }
> >
> >  static inline void
> >  update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
> >  {
> > -
> > +#ifdef CONFIG_SCHEDSTATS
> >       if (!schedstat_enabled())
> >               return;
> >
> > @@ -1014,6 +1022,7 @@ static void update_curr_fair(struct rq *rq)
> >                       __schedstat_set(se->statistics.block_start,
> >                                     rq_clock(rq_of(cfs_rq)));
> >       }
> > +#endif
> >  }
> >
> >  /*
> > @@ -4090,6 +4099,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
> >       update_stats_curr_start(cfs_rq, se);
> >       cfs_rq->curr = se;
> >
> > +#ifdef CONFIG_SCHEDSTATS
> >       /*
> >        * Track our maximum slice length, if the CPU's load is at
> >        * least twice that of our own weight (i.e. dont track it
> > @@ -4100,6 +4110,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
> >                       max((u64)schedstat_val(se->statistics.slice_max),
> >                           se->sum_exec_runtime - se->prev_sum_exec_runtime));
> >       }
> > +#endif
> >
> >       se->prev_sum_exec_runtime = se->sum_exec_runtime;
> >  }
> > --
> > 1.8.3.1
> >

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched: fair: fix missed CONFIG_SCHEDSTATS
  2019-03-06 10:15   ` Yafang Shao
@ 2019-03-06 11:27     ` Peter Zijlstra
  2019-03-06 11:49       ` Yafang Shao
  0 siblings, 1 reply; 10+ messages in thread
From: Peter Zijlstra @ 2019-03-06 11:27 UTC (permalink / raw)
  To: Yafang Shao; +Cc: mingo, LKML, shaoyafang

On Wed, Mar 06, 2019 at 06:15:39PM +0800, Yafang Shao wrote:
> On Wed, Mar 6, 2019 at 6:09 PM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > On Wed, Mar 06, 2019 at 04:43:46PM +0800, Yafang Shao wrote:
> > > When I'm using trace_sched_stat_{iowait, blocked, wait, sleep} to
> > > measure how long the processes are stalled, there's always no output from
> > > trace_pipe while there're really some tasks in uninterruptible sleep
> > > state. That makes me confused, so I try to investigate why.
> > > Finally I find the reason is that CONFIG_SCHEDSTATS is not set.
> > >
> > > To avoid such kind of confusion, we should not expose these tracepoints
> > > if CONFIG_SCHEDSTATS is not set.
> >
> > Yeah, lets not sprinkle #ifdef. Big fat NAK.
> >
> > Also, the below seem to indicate your compiler is stupid. Without
> > CONFIG_SCHEDSTAT, schedstat_enabled() should be a constant 0 and DCE
> > should delete all code.
> >
> 
> My compiler is GCC-7.3.0.
> I don't know which comipler could be smart enough to remove the
> definition of these tracepoints.
> Could you pls. tell me what compiler you are using ?

Just look at the generated code...

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched: fair: fix missed CONFIG_SCHEDSTATS
  2019-03-06 11:27     ` Peter Zijlstra
@ 2019-03-06 11:49       ` Yafang Shao
  2019-03-06 12:38         ` Peter Zijlstra
  0 siblings, 1 reply; 10+ messages in thread
From: Yafang Shao @ 2019-03-06 11:49 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: mingo, LKML, shaoyafang

On Wed, Mar 6, 2019 at 7:27 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Wed, Mar 06, 2019 at 06:15:39PM +0800, Yafang Shao wrote:
> > On Wed, Mar 6, 2019 at 6:09 PM Peter Zijlstra <peterz@infradead.org> wrote:
> > >
> > > On Wed, Mar 06, 2019 at 04:43:46PM +0800, Yafang Shao wrote:
> > > > When I'm using trace_sched_stat_{iowait, blocked, wait, sleep} to
> > > > measure how long the processes are stalled, there's always no output from
> > > > trace_pipe while there're really some tasks in uninterruptible sleep
> > > > state. That makes me confused, so I try to investigate why.
> > > > Finally I find the reason is that CONFIG_SCHEDSTATS is not set.
> > > >
> > > > To avoid such kind of confusion, we should not expose these tracepoints
> > > > if CONFIG_SCHEDSTATS is not set.
> > >
> > > Yeah, lets not sprinkle #ifdef. Big fat NAK.
> > >
> > > Also, the below seem to indicate your compiler is stupid. Without
> > > CONFIG_SCHEDSTAT, schedstat_enabled() should be a constant 0 and DCE
> > > should delete all code.
> > >
> >
> > My compiler is GCC-7.3.0.
> > I don't know which comipler could be smart enough to remove the
> > definition of these tracepoints.
> > Could you pls. tell me what compiler you are using ?
>
> Just look at the generated code...

When CONFIG_SCHEDSTATS  is not set,
I have objdumped the vmlinux, and the perf_trace_sched_stat_template
is still there withou my patch.

$ objdump -D vmlinux > vmlinux.asm
$ cat vmlinux.asm | grep trace_sched_stat
ffffffff810b3b40 <perf_trace_sched_stat_template>:
...

So I can't get your point really.
Forgive me if I missed something.

Thanks
Yafang

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched: fair: fix missed CONFIG_SCHEDSTATS
  2019-03-06 11:49       ` Yafang Shao
@ 2019-03-06 12:38         ` Peter Zijlstra
  2019-03-06 12:53           ` Yafang Shao
  0 siblings, 1 reply; 10+ messages in thread
From: Peter Zijlstra @ 2019-03-06 12:38 UTC (permalink / raw)
  To: Yafang Shao; +Cc: mingo, LKML, shaoyafang

On Wed, Mar 06, 2019 at 07:49:36PM +0800, Yafang Shao wrote:


$ grep SCHEDSTAT defconfig-build/.config
# CONFIG_SCHEDSTATS is not set
$ obbjdump -dr defconfig-build/kernel/sched/fair.o | awk '/>:$/ { F=$2 } /sched_stat/ { print F " " $0 }'
<update_curr>:                  24cd: R_X86_64_32S      __tracepoint_sched_stat_runtime+0x28
<update_curr>:                  24d9: R_X86_64_PC32     __tracepoint_sched_stat_runtime+0x24
$ patch -p1 < foo
patching file kernel/sched/fair.c
$ make O=defconfig-build kernel/sched/
make[1]: Entering directory '/usr/src/linux-2.6/defconfig-build'
Using .. as source for kernel
GEN     Makefile
CALL    ../scripts/checksyscalls.sh
CALL    ../scripts/atomic/check-atomics.sh
DESCEND  objtool
CC      kernel/sched/fair.o
AR      kernel/sched/built-in.a
make[1]: Leaving directory '/usr/src/linux-2.6/defconfig-build'
$ objdump -dr defconfig-build/kernel/sched/fair.o | awk '/>:$/ { F=$2 } /sched_stat/ { print F " " $0 }'
$ cat foo
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8213ff6e365d..6e5ceec3b662 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -839,7 +839,8 @@ static void update_curr(struct cfs_rq *cfs_rq)
 	if (entity_is_task(curr)) {
 		struct task_struct *curtask = task_of(curr);
 
-		trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
+		if (schedstat_enabled())
+			trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
 		cgroup_account_cputime(curtask, delta_exec);
 		account_group_exec_runtime(curtask, delta_exec);
 	}


_1_ line, where you wanted to add _6_ ugly #ifdefs

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched: fair: fix missed CONFIG_SCHEDSTATS
  2019-03-06 12:38         ` Peter Zijlstra
@ 2019-03-06 12:53           ` Yafang Shao
  2019-03-07  7:49             ` Yafang Shao
  0 siblings, 1 reply; 10+ messages in thread
From: Yafang Shao @ 2019-03-06 12:53 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: mingo, LKML, shaoyafang

On Wed, Mar 6, 2019 at 8:38 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Wed, Mar 06, 2019 at 07:49:36PM +0800, Yafang Shao wrote:
>
>
> $ grep SCHEDSTAT defconfig-build/.config
> # CONFIG_SCHEDSTATS is not set
> $ obbjdump -dr defconfig-build/kernel/sched/fair.o | awk '/>:$/ { F=$2 } /sched_stat/ { print F " " $0 }'
> <update_curr>:                  24cd: R_X86_64_32S      __tracepoint_sched_stat_runtime+0x28
> <update_curr>:                  24d9: R_X86_64_PC32     __tracepoint_sched_stat_runtime+0x24
> $ patch -p1 < foo
> patching file kernel/sched/fair.c
> $ make O=defconfig-build kernel/sched/
> make[1]: Entering directory '/usr/src/linux-2.6/defconfig-build'
> Using .. as source for kernel
> GEN     Makefile
> CALL    ../scripts/checksyscalls.sh
> CALL    ../scripts/atomic/check-atomics.sh
> DESCEND  objtool
> CC      kernel/sched/fair.o
> AR      kernel/sched/built-in.a
> make[1]: Leaving directory '/usr/src/linux-2.6/defconfig-build'
> $ objdump -dr defconfig-build/kernel/sched/fair.o | awk '/>:$/ { F=$2 } /sched_stat/ { print F " " $0 }'
> $ cat foo
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 8213ff6e365d..6e5ceec3b662 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -839,7 +839,8 @@ static void update_curr(struct cfs_rq *cfs_rq)
>         if (entity_is_task(curr)) {
>                 struct task_struct *curtask = task_of(curr);
>
> -               trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
> +               if (schedstat_enabled())
> +                       trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
>                 cgroup_account_cputime(curtask, delta_exec);
>                 account_group_exec_runtime(curtask, delta_exec);
>         }
>
>
> _1_ line, where you wanted to add _6_ ugly #ifdefs

I get your point now.

Yes, these codes can be removed from the callsites in kernel/sched/fair.c,
but the definitions of these tracepoints are still there,
and then they will be exposed in /sys/kernel/debug/tracing/events/sched/.

You can try objdump the vmlinux.
$ objdump -dr kernel/sched/fair.o | awk '/>:$/ { F=$2 } /sched_stat/ {
print F " " $0 }'    // nothing

$ objdump -dr vmlinux  | awk '/>:$/ { F=$2 } /sched_stat/ { print F " " $0 }'
<perf_trace_sched_stat_runtime>: ffffffff810b3c30
<perf_trace_sched_stat_runtime>:  // it is still defined


My guess is they will be used by perf or bpf,
so they won't be optimized out by the compiler.

Thanks
Yafang

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched: fair: fix missed CONFIG_SCHEDSTATS
  2019-03-06 12:53           ` Yafang Shao
@ 2019-03-07  7:49             ` Yafang Shao
  2019-03-07  9:55               ` Peter Zijlstra
  0 siblings, 1 reply; 10+ messages in thread
From: Yafang Shao @ 2019-03-07  7:49 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: mingo, LKML, shaoyafang

On Wed, Mar 6, 2019 at 8:53 PM Yafang Shao <laoar.shao@gmail.com> wrote:
>
> On Wed, Mar 6, 2019 at 8:38 PM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > On Wed, Mar 06, 2019 at 07:49:36PM +0800, Yafang Shao wrote:
> >
> >
> > $ grep SCHEDSTAT defconfig-build/.config
> > # CONFIG_SCHEDSTATS is not set
> > $ obbjdump -dr defconfig-build/kernel/sched/fair.o | awk '/>:$/ { F=$2 } /sched_stat/ { print F " " $0 }'
> > <update_curr>:                  24cd: R_X86_64_32S      __tracepoint_sched_stat_runtime+0x28
> > <update_curr>:                  24d9: R_X86_64_PC32     __tracepoint_sched_stat_runtime+0x24
> > $ patch -p1 < foo
> > patching file kernel/sched/fair.c
> > $ make O=defconfig-build kernel/sched/
> > make[1]: Entering directory '/usr/src/linux-2.6/defconfig-build'
> > Using .. as source for kernel
> > GEN     Makefile
> > CALL    ../scripts/checksyscalls.sh
> > CALL    ../scripts/atomic/check-atomics.sh
> > DESCEND  objtool
> > CC      kernel/sched/fair.o
> > AR      kernel/sched/built-in.a
> > make[1]: Leaving directory '/usr/src/linux-2.6/defconfig-build'
> > $ objdump -dr defconfig-build/kernel/sched/fair.o | awk '/>:$/ { F=$2 } /sched_stat/ { print F " " $0 }'
> > $ cat foo
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index 8213ff6e365d..6e5ceec3b662 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -839,7 +839,8 @@ static void update_curr(struct cfs_rq *cfs_rq)
> >         if (entity_is_task(curr)) {
> >                 struct task_struct *curtask = task_of(curr);
> >
> > -               trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
> > +               if (schedstat_enabled())
> > +                       trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
> >                 cgroup_account_cputime(curtask, delta_exec);
> >                 account_group_exec_runtime(curtask, delta_exec);
> >         }
> >
> >
> > _1_ line, where you wanted to add _6_ ugly #ifdefs
>
> I get your point now.
>
> Yes, these codes can be removed from the callsites in kernel/sched/fair.c,
> but the definitions of these tracepoints are still there,
> and then they will be exposed in /sys/kernel/debug/tracing/events/sched/.
>
> You can try objdump the vmlinux.
> $ objdump -dr kernel/sched/fair.o | awk '/>:$/ { F=$2 } /sched_stat/ {
> print F " " $0 }'    // nothing
>
> $ objdump -dr vmlinux  | awk '/>:$/ { F=$2 } /sched_stat/ { print F " " $0 }'
> <perf_trace_sched_stat_runtime>: ffffffff810b3c30
> <perf_trace_sched_stat_runtime>:  // it is still defined
>
>
> My guess is they will be used by perf or bpf,
> so they won't be optimized out by the compiler.
>

Hi Peter,

If you do not like sprinkle #ifdef, we can use something like bellow
to resovle this issue.
I don't like bellow code really, but it can avoid exposing these
tracepoints to the userspace.

What about your opinon ?


diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 9a4bdfa..a0291f2 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -336,6 +336,7 @@ static inline long __trace_sched_switch_state(bool
preempt, struct task_struct *
                  __entry->pid, __entry->old_pid)
 );

+#ifdef CONFIG_SCHEDSTATS
 /*
  * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
  *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
@@ -394,6 +395,14 @@ static inline long
__trace_sched_switch_state(bool preempt, struct task_struct *
 DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
             TP_PROTO(struct task_struct *tsk, u64 delay),
             TP_ARGS(tsk, delay));
+#else
+
+#define trace_sched_stat_wait(...) do {} while (0)
+#define trace_sched_stat_sleep(...) do {} while (0)
+#define trace_sched_stat_iowait(...) do {} while (0)
+#define trace_sched_stat_blocked(...) do {} while (0)
+
+#endif

 /*
  * Tracepoint for accounting runtime (time the task is executing


Thanks
Yafang

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched: fair: fix missed CONFIG_SCHEDSTATS
  2019-03-07  7:49             ` Yafang Shao
@ 2019-03-07  9:55               ` Peter Zijlstra
  2019-03-07 10:41                 ` Yafang Shao
  0 siblings, 1 reply; 10+ messages in thread
From: Peter Zijlstra @ 2019-03-07  9:55 UTC (permalink / raw)
  To: Yafang Shao; +Cc: mingo, LKML, shaoyafang

On Thu, Mar 07, 2019 at 03:49:58PM +0800, Yafang Shao wrote:
> What about your opinon ?
> 
> 
> diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
> index 9a4bdfa..a0291f2 100644
> --- a/include/trace/events/sched.h
> +++ b/include/trace/events/sched.h
> @@ -336,6 +336,7 @@ static inline long __trace_sched_switch_state(bool
> preempt, struct task_struct *
>                   __entry->pid, __entry->old_pid)
>  );
> 
> +#ifdef CONFIG_SCHEDSTATS
>  /*
>   * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
>   *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
> @@ -394,6 +395,14 @@ static inline long
> __trace_sched_switch_state(bool preempt, struct task_struct *
>  DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
>              TP_PROTO(struct task_struct *tsk, u64 delay),
>              TP_ARGS(tsk, delay));
> +#else
> +
> +#define trace_sched_stat_wait(...) do {} while (0)
> +#define trace_sched_stat_sleep(...) do {} while (0)
> +#define trace_sched_stat_iowait(...) do {} while (0)
> +#define trace_sched_stat_blocked(...) do {} while (0)

Make those inline functions that take proper arguments.

> +#endif

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched: fair: fix missed CONFIG_SCHEDSTATS
  2019-03-07  9:55               ` Peter Zijlstra
@ 2019-03-07 10:41                 ` Yafang Shao
  0 siblings, 0 replies; 10+ messages in thread
From: Yafang Shao @ 2019-03-07 10:41 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: mingo, LKML, shaoyafang

On Thu, Mar 7, 2019 at 5:55 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Thu, Mar 07, 2019 at 03:49:58PM +0800, Yafang Shao wrote:
> > What about your opinon ?
> >
> >
> > diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
> > index 9a4bdfa..a0291f2 100644
> > --- a/include/trace/events/sched.h
> > +++ b/include/trace/events/sched.h
> > @@ -336,6 +336,7 @@ static inline long __trace_sched_switch_state(bool
> > preempt, struct task_struct *
> >                   __entry->pid, __entry->old_pid)
> >  );
> >
> > +#ifdef CONFIG_SCHEDSTATS
> >  /*
> >   * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
> >   *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
> > @@ -394,6 +395,14 @@ static inline long
> > __trace_sched_switch_state(bool preempt, struct task_struct *
> >  DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
> >              TP_PROTO(struct task_struct *tsk, u64 delay),
> >              TP_ARGS(tsk, delay));
> > +#else
> > +
> > +#define trace_sched_stat_wait(...) do {} while (0)
> > +#define trace_sched_stat_sleep(...) do {} while (0)
> > +#define trace_sched_stat_iowait(...) do {} while (0)
> > +#define trace_sched_stat_blocked(...) do {} while (0)
>
> Make those inline functions that take proper arguments.
>

Sure.
Will change it.

Thanks
Yafang

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2019-03-07 10:42 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-06  8:43 [PATCH] sched: fair: fix missed CONFIG_SCHEDSTATS Yafang Shao
2019-03-06 10:09 ` Peter Zijlstra
2019-03-06 10:15   ` Yafang Shao
2019-03-06 11:27     ` Peter Zijlstra
2019-03-06 11:49       ` Yafang Shao
2019-03-06 12:38         ` Peter Zijlstra
2019-03-06 12:53           ` Yafang Shao
2019-03-07  7:49             ` Yafang Shao
2019-03-07  9:55               ` Peter Zijlstra
2019-03-07 10:41                 ` Yafang Shao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).