All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] perf/x86/intel: Add proper condition to run sched_task callbacks
@ 2017-07-17 15:01 Jiri Olsa
  2017-07-18  9:14 ` Peter Zijlstra
  0 siblings, 1 reply; 7+ messages in thread
From: Jiri Olsa @ 2017-07-17 15:01 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: lkml, Ingo Molnar, Andi Kleen, Alexander Shishkin, Kan Liang

The x86 pmu currently uses the sched_task callback for 2 functions:
  - PEBS drain
  - save/restore LBR data

They are both triggered once the x86 pmu is registered with
perf_sched_cb_inc call (within pmu::add	callback), regardless
if there's actually any PEBS or LBR event configured on the cpu.

This can lead to extra cycles in some perf monitoring, like
when we monitor PEBS event without LBR data. We need PEBS,
non freq/timestamp event to enable the sched_task callback:

  # perf record --no-timestamp -c 10000 -e cycles:p ./perf bench sched pipe -l 1000000

The perf stat with cycles and msr:write_msr if above command before:
  ...
  Performance counter stats for './perf record --no-timestamp -c 10000 -e cycles:p \
                                 ./perf bench sched pipe -l 1000000' (5 runs):

    18,519,557,441      cycles:k
        91,195,527      msr:write_msr

      29.334476406 seconds time elapsed

And after the change:
  ...
  Performance counter stats for './perf record --no-timestamp -c 10000 -e cycles:p \
                                 ./perf bench sched pipe -l 1000000' (5 runs):

    18,565,757,840      cycles:k
        27,103,160      msr:write_msr

      16.253026030 seconds time elapsed

There's no affect on cycles:k because the sched_task happens
with events switched off, however the msr:write_msr tracepoint
counter and almost 50% of time speedup show the improvement.

Monitoring LBR event and having extra PEBS drain processing
in sched_task callback showed just a little speedup, because
the drain function does not do much extra work in case there
is no PEBS data.

Fixing this by adding PEBS and LBR conditions for relevant
event data being configured on cpu into intel_pmu_sched_task
callback.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 arch/x86/events/intel/core.c | 6 ++++--
 arch/x86/events/intel/ds.c   | 8 ++++----
 arch/x86/events/perf_event.h | 2 ++
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index aa62437d1aa1..1f66356d8122 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3265,9 +3265,11 @@ static void intel_pmu_cpu_dying(int cpu)
 static void intel_pmu_sched_task(struct perf_event_context *ctx,
 				 bool sched_in)
 {
-	if (x86_pmu.pebs_active)
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	if (intel_pmu_pebs_needs_sched_cb(cpuc))
 		intel_pmu_pebs_sched_task(ctx, sched_in);
-	if (x86_pmu.lbr_nr)
+	if (cpuc->lbr_users)
 		intel_pmu_lbr_sched_task(ctx, sched_in);
 }
 
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index c6d23ffe422d..c42e68efd6ec 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -811,7 +811,7 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
  * the large interrupt threshold, such that we can provide PID and TID
  * to PEBS samples.
  */
-static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
+inline bool intel_pmu_pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
 {
 	return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
 }
@@ -841,7 +841,7 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu)
 	 */
 	bool update = cpuc->n_pebs == 1;
 
-	if (needed_cb != pebs_needs_sched_cb(cpuc)) {
+	if (needed_cb != intel_pmu_pebs_needs_sched_cb(cpuc)) {
 		if (!needed_cb)
 			perf_sched_cb_inc(pmu);
 		else
@@ -858,7 +858,7 @@ void intel_pmu_pebs_add(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
-	bool needed_cb = pebs_needs_sched_cb(cpuc);
+	bool needed_cb = intel_pmu_pebs_needs_sched_cb(cpuc);
 
 	cpuc->n_pebs++;
 	if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
@@ -896,7 +896,7 @@ void intel_pmu_pebs_del(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
-	bool needed_cb = pebs_needs_sched_cb(cpuc);
+	bool needed_cb = intel_pmu_pebs_needs_sched_cb(cpuc);
 
 	cpuc->n_pebs--;
 	if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 53728eea1bed..3a1acc40bfee 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -909,6 +909,8 @@ void intel_pmu_pebs_disable_all(void);
 
 void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
 
+bool intel_pmu_pebs_needs_sched_cb(struct cpu_hw_events *cpuc);
+
 void intel_ds_init(void);
 
 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
-- 
2.9.4

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] perf/x86/intel: Add proper condition to run sched_task callbacks
  2017-07-17 15:01 [PATCH] perf/x86/intel: Add proper condition to run sched_task callbacks Jiri Olsa
@ 2017-07-18  9:14 ` Peter Zijlstra
  2017-07-18  9:29   ` Jiri Olsa
  0 siblings, 1 reply; 7+ messages in thread
From: Peter Zijlstra @ 2017-07-18  9:14 UTC (permalink / raw)
  To: Jiri Olsa; +Cc: lkml, Ingo Molnar, Andi Kleen, Alexander Shishkin, Kan Liang

On Mon, Jul 17, 2017 at 05:01:56PM +0200, Jiri Olsa wrote:
> The x86 pmu currently uses the sched_task callback for 2 functions:
>   - PEBS drain
>   - save/restore LBR data
> 
> They are both triggered once the x86 pmu is registered with
> perf_sched_cb_inc call (within pmu::add	callback), regardless
> if there's actually any PEBS or LBR event configured on the cpu.

I don't understand. If we do pmu::add() we _are_ on the CPU.

So you're saying intel_pmu_pebs_{add,del}() are doing it wrong? So why
not fix those?

> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index aa62437d1aa1..1f66356d8122 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -3265,9 +3265,11 @@ static void intel_pmu_cpu_dying(int cpu)
>  static void intel_pmu_sched_task(struct perf_event_context *ctx,
>  				 bool sched_in)
>  {
> -	if (x86_pmu.pebs_active)
> +	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> +
> +	if (intel_pmu_pebs_needs_sched_cb(cpuc))
>  		intel_pmu_pebs_sched_task(ctx, sched_in);

So I'm confused, if we'd not need this, how come we're here in the first
place?

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] perf/x86/intel: Add proper condition to run sched_task callbacks
  2017-07-18  9:14 ` Peter Zijlstra
@ 2017-07-18  9:29   ` Jiri Olsa
  2017-07-18 12:37     ` Peter Zijlstra
  0 siblings, 1 reply; 7+ messages in thread
From: Jiri Olsa @ 2017-07-18  9:29 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Jiri Olsa, lkml, Ingo Molnar, Andi Kleen, Alexander Shishkin, Kan Liang

On Tue, Jul 18, 2017 at 11:14:44AM +0200, Peter Zijlstra wrote:
> On Mon, Jul 17, 2017 at 05:01:56PM +0200, Jiri Olsa wrote:
> > The x86 pmu currently uses the sched_task callback for 2 functions:
> >   - PEBS drain
> >   - save/restore LBR data
> > 
> > They are both triggered once the x86 pmu is registered with
> > perf_sched_cb_inc call (within pmu::add	callback), regardless
> > if there's actually any PEBS or LBR event configured on the cpu.
> 
> I don't understand. If we do pmu::add() we _are_ on the CPU.
> 
> So you're saying intel_pmu_pebs_{add,del}() are doing it wrong? So why
> not fix those?
> 
> > diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> > index aa62437d1aa1..1f66356d8122 100644
> > --- a/arch/x86/events/intel/core.c
> > +++ b/arch/x86/events/intel/core.c
> > @@ -3265,9 +3265,11 @@ static void intel_pmu_cpu_dying(int cpu)
> >  static void intel_pmu_sched_task(struct perf_event_context *ctx,
> >  				 bool sched_in)
> >  {
> > -	if (x86_pmu.pebs_active)
> > +	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> > +
> > +	if (intel_pmu_pebs_needs_sched_cb(cpuc))
> >  		intel_pmu_pebs_sched_task(ctx, sched_in);
> 
> So I'm confused, if we'd not need this, how come we're here in the first
> place?
> 

because we have 2 places using the same callback
  - PEBS drain for free running counters
  - LBR save/store

both of them called from intel_pmu_sched_task

so let's say PEBS drain setup the callback for the event,
but in the callback itself (intel_pmu_sched_task) we will
also run the code for LBR save/restore, which we did not
ask for, but the code in intel_pmu_sched_task does not
check for that

I'm adding conditions to recognize the work that needs
to be done in the callback

another option might be to add support for more x86_pmu::sched_task
callbacks, which might be cleaner

jirka

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] perf/x86/intel: Add proper condition to run sched_task callbacks
  2017-07-18  9:29   ` Jiri Olsa
@ 2017-07-18 12:37     ` Peter Zijlstra
  2017-07-18 22:11       ` Jiri Olsa
  2017-07-19  7:52       ` Jiri Olsa
  0 siblings, 2 replies; 7+ messages in thread
From: Peter Zijlstra @ 2017-07-18 12:37 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Jiri Olsa, lkml, Ingo Molnar, Andi Kleen, Alexander Shishkin, Kan Liang

On Tue, Jul 18, 2017 at 11:29:32AM +0200, Jiri Olsa wrote:

> because we have 2 places using the same callback
>   - PEBS drain for free running counters
>   - LBR save/store
> 
> both of them called from intel_pmu_sched_task
> 
> so let's say PEBS drain setup the callback for the event,
> but in the callback itself (intel_pmu_sched_task) we will
> also run the code for LBR save/restore, which we did not
> ask for, but the code in intel_pmu_sched_task does not
> check for that

Ah fair enough; Changelog confused me.

> I'm adding conditions to recognize the work that needs
> to be done in the callback
> 
> another option might be to add support for more x86_pmu::sched_task
> callbacks, which might be cleaner

Right; either that or pull the condition into the functions themselves
to create less churn. Something like so I suppose...


diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index aa62437d1aa1..2d533d4c0e2c 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3265,10 +3265,8 @@ static void intel_pmu_cpu_dying(int cpu)
 static void intel_pmu_sched_task(struct perf_event_context *ctx,
 				 bool sched_in)
 {
-	if (x86_pmu.pebs_active)
-		intel_pmu_pebs_sched_task(ctx, sched_in);
-	if (x86_pmu.lbr_nr)
-		intel_pmu_lbr_sched_task(ctx, sched_in);
+	intel_pmu_pebs_sched_task(ctx, sched_in);
+	intel_pmu_lbr_sched_task(ctx, sched_in);
 }
 
 PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index c6d23ffe422d..6ee7ebdc8555 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -606,12 +606,6 @@ static inline void intel_pmu_drain_pebs_buffer(void)
 	x86_pmu.drain_pebs(&regs);
 }
 
-void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
-{
-	if (!sched_in)
-		intel_pmu_drain_pebs_buffer();
-}
-
 /*
  * PEBS
  */
@@ -816,6 +810,14 @@ static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
 	return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
 }
 
+void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	if (!sched_in && pebs_needs_sched_cb(cpuc))
+		intel_pmu_drain_pebs_buffer();
+}
+
 static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
 {
 	struct debug_store *ds = cpuc->ds;
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index eb261656a320..955457a30197 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -380,8 +380,12 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
 
 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
 {
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct x86_perf_task_context *task_ctx;
 
+	if (!cpuc->lbr_users)
+		return;
+
 	/*
 	 * If LBR callstack feature is enabled and the stack was saved when
 	 * the task was scheduled out, restore the stack. Otherwise flush

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] perf/x86/intel: Add proper condition to run sched_task callbacks
  2017-07-18 12:37     ` Peter Zijlstra
@ 2017-07-18 22:11       ` Jiri Olsa
  2017-07-19  7:52       ` Jiri Olsa
  1 sibling, 0 replies; 7+ messages in thread
From: Jiri Olsa @ 2017-07-18 22:11 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Jiri Olsa, lkml, Ingo Molnar, Andi Kleen, Alexander Shishkin, Kan Liang

On Tue, Jul 18, 2017 at 02:37:34PM +0200, Peter Zijlstra wrote:
> On Tue, Jul 18, 2017 at 11:29:32AM +0200, Jiri Olsa wrote:
> 
> > because we have 2 places using the same callback
> >   - PEBS drain for free running counters
> >   - LBR save/store
> > 
> > both of them called from intel_pmu_sched_task
> > 
> > so let's say PEBS drain setup the callback for the event,
> > but in the callback itself (intel_pmu_sched_task) we will
> > also run the code for LBR save/restore, which we did not
> > ask for, but the code in intel_pmu_sched_task does not
> > check for that
> 
> Ah fair enough; Changelog confused me.
> 
> > I'm adding conditions to recognize the work that needs
> > to be done in the callback
> > 
> > another option might be to add support for more x86_pmu::sched_task
> > callbacks, which might be cleaner
> 
> Right; either that or pull the condition into the functions themselves
> to create less churn. Something like so I suppose...

ok, will send new version

jirka

> 
> 
> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index aa62437d1aa1..2d533d4c0e2c 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -3265,10 +3265,8 @@ static void intel_pmu_cpu_dying(int cpu)
>  static void intel_pmu_sched_task(struct perf_event_context *ctx,
>  				 bool sched_in)
>  {
> -	if (x86_pmu.pebs_active)
> -		intel_pmu_pebs_sched_task(ctx, sched_in);
> -	if (x86_pmu.lbr_nr)
> -		intel_pmu_lbr_sched_task(ctx, sched_in);
> +	intel_pmu_pebs_sched_task(ctx, sched_in);
> +	intel_pmu_lbr_sched_task(ctx, sched_in);
>  }
>  
>  PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
> index c6d23ffe422d..6ee7ebdc8555 100644
> --- a/arch/x86/events/intel/ds.c
> +++ b/arch/x86/events/intel/ds.c
> @@ -606,12 +606,6 @@ static inline void intel_pmu_drain_pebs_buffer(void)
>  	x86_pmu.drain_pebs(&regs);
>  }
>  
> -void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
> -{
> -	if (!sched_in)
> -		intel_pmu_drain_pebs_buffer();
> -}
> -
>  /*
>   * PEBS
>   */
> @@ -816,6 +810,14 @@ static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
>  	return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
>  }
>  
> +void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
> +{
> +	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> +
> +	if (!sched_in && pebs_needs_sched_cb(cpuc))
> +		intel_pmu_drain_pebs_buffer();
> +}
> +
>  static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
>  {
>  	struct debug_store *ds = cpuc->ds;
> diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
> index eb261656a320..955457a30197 100644
> --- a/arch/x86/events/intel/lbr.c
> +++ b/arch/x86/events/intel/lbr.c
> @@ -380,8 +380,12 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
>  
>  void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
>  {
> +	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
>  	struct x86_perf_task_context *task_ctx;
>  
> +	if (!cpuc->lbr_users)
> +		return;
> +
>  	/*
>  	 * If LBR callstack feature is enabled and the stack was saved when
>  	 * the task was scheduled out, restore the stack. Otherwise flush

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] perf/x86/intel: Add proper condition to run sched_task callbacks
  2017-07-18 12:37     ` Peter Zijlstra
  2017-07-18 22:11       ` Jiri Olsa
@ 2017-07-19  7:52       ` Jiri Olsa
  2017-07-21  9:38         ` [tip:perf/urgent] " tip-bot for Jiri Olsa
  1 sibling, 1 reply; 7+ messages in thread
From: Jiri Olsa @ 2017-07-19  7:52 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Jiri Olsa, lkml, Ingo Molnar, Andi Kleen, Alexander Shishkin, Kan Liang

On Tue, Jul 18, 2017 at 02:37:34PM +0200, Peter Zijlstra wrote:
> On Tue, Jul 18, 2017 at 11:29:32AM +0200, Jiri Olsa wrote:
> 
> > because we have 2 places using the same callback
> >   - PEBS drain for free running counters
> >   - LBR save/store
> > 
> > both of them called from intel_pmu_sched_task
> > 
> > so let's say PEBS drain setup the callback for the event,
> > but in the callback itself (intel_pmu_sched_task) we will
> > also run the code for LBR save/restore, which we did not
> > ask for, but the code in intel_pmu_sched_task does not
> > check for that
> 
> Ah fair enough; Changelog confused me.

just when I thought that my english is getting bedder ;-)

v2 attached with new changelog 

jirka


---
We have 2 functions using the same sched_task callback:
  - PEBS drain for free running counters
  - LBR save/store

Both of them are called from intel_pmu_sched_task and
either of them can be unwillingly triggered when the
other one is configured to run.

Let's say there's PEBS drain configured in sched_task
callback for the event, but in the callback itself
(intel_pmu_sched_task) we will also run the code for
LBR save/restore, which we did not ask for, but the
code in intel_pmu_sched_task does not check for that.

This can lead to extra cycles in some perf monitoring,
like when we monitor PEBS event without LBR data.

  # perf record --no-timestamp -c 10000 -e cycles:p ./perf bench sched pipe -l 1000000

  (We need PEBS, non freq/non timestamp event to enable
   the sched_task callback)

The perf stat of cycles and msr:write_msr for above
command before the change:
  ...
  Performance counter stats for './perf record --no-timestamp -c 10000 -e cycles:p \
                                 ./perf bench sched pipe -l 1000000' (5 runs):

    18,519,557,441      cycles:k
        91,195,527      msr:write_msr

      29.334476406 seconds time elapsed

And after the change:
  ...
  Performance counter stats for './perf record --no-timestamp -c 10000 -e cycles:p \
                                 ./perf bench sched pipe -l 1000000' (5 runs):

    18,704,973,540      cycles:k
        27,184,720      msr:write_msr

      16.977875900 seconds time elapsed

There's no affect on cycles:k because the sched_task happens
with events switched off, however the msr:write_msr tracepoint
counter together with almost 50% of time speedup show the
improvement.

Monitoring LBR event and having extra PEBS drain processing
in sched_task callback showed just a little speedup, because
the drain function does not do much extra work in case there
is no PEBS data.

Adding conditions to recognize the configured work that needs
to be done in the x86_pmu's sched_task callback.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 arch/x86/events/intel/core.c |  6 ++----
 arch/x86/events/intel/ds.c   | 14 ++++++++------
 arch/x86/events/intel/lbr.c  |  4 ++++
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index ede97710c2f4..98b0f0729527 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3397,10 +3397,8 @@ static void intel_pmu_cpu_dying(int cpu)
 static void intel_pmu_sched_task(struct perf_event_context *ctx,
 				 bool sched_in)
 {
-	if (x86_pmu.pebs_active)
-		intel_pmu_pebs_sched_task(ctx, sched_in);
-	if (x86_pmu.lbr_nr)
-		intel_pmu_lbr_sched_task(ctx, sched_in);
+	intel_pmu_pebs_sched_task(ctx, sched_in);
+	intel_pmu_lbr_sched_task(ctx, sched_in);
 }
 
 PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 6dc8a59e1bfb..a322fed5f8ed 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -606,12 +606,6 @@ static inline void intel_pmu_drain_pebs_buffer(void)
 	x86_pmu.drain_pebs(&regs);
 }
 
-void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
-{
-	if (!sched_in)
-		intel_pmu_drain_pebs_buffer();
-}
-
 /*
  * PEBS
  */
@@ -822,6 +816,14 @@ static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
 	return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
 }
 
+void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	if (!sched_in && pebs_needs_sched_cb(cpuc))
+		intel_pmu_drain_pebs_buffer();
+}
+
 static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
 {
 	struct debug_store *ds = cpuc->ds;
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index eb261656a320..955457a30197 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -380,8 +380,12 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
 
 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
 {
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct x86_perf_task_context *task_ctx;
 
+	if (!cpuc->lbr_users)
+		return;
+
 	/*
 	 * If LBR callstack feature is enabled and the stack was saved when
 	 * the task was scheduled out, restore the stack. Otherwise flush
-- 
2.9.4

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [tip:perf/urgent] perf/x86/intel: Add proper condition to run sched_task callbacks
  2017-07-19  7:52       ` Jiri Olsa
@ 2017-07-21  9:38         ` tip-bot for Jiri Olsa
  0 siblings, 0 replies; 7+ messages in thread
From: tip-bot for Jiri Olsa @ 2017-07-21  9:38 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: peterz, linux-kernel, tglx, hpa, alexander.shishkin, kan.liang,
	jolsa, mingo, jolsa

Commit-ID:  df6c3db8d30fb1699ccbc403196b86324f4257af
Gitweb:     http://git.kernel.org/tip/df6c3db8d30fb1699ccbc403196b86324f4257af
Author:     Jiri Olsa <jolsa@redhat.com>
AuthorDate: Wed, 19 Jul 2017 09:52:47 +0200
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Fri, 21 Jul 2017 09:58:39 +0200

perf/x86/intel: Add proper condition to run sched_task callbacks

We have 2 functions using the same sched_task callback:

  - PEBS drain for free running counters
  - LBR save/store

Both of them are called from intel_pmu_sched_task() and
either of them can be unwillingly triggered when the
other one is configured to run.

Let's say there's PEBS drain configured in sched_task
callback for the event, but in the callback itself
(intel_pmu_sched_task()) we will also run the code for
LBR save/restore, which we did not ask for, but the
code in intel_pmu_sched_task() does not check for that.

This can lead to extra cycles in some perf monitoring,
like when we monitor PEBS event without LBR data.

  # perf record --no-timestamp -c 10000 -e cycles:p ./perf bench sched pipe -l 1000000

  (We need PEBS, non freq/non timestamp event to enable
   the sched_task callback)

The perf stat of cycles and msr:write_msr for above
command before the change:
  ...
  Performance counter stats for './perf record --no-timestamp -c 10000 -e cycles:p \
                                 ./perf bench sched pipe -l 1000000' (5 runs):

    18,519,557,441      cycles:k
        91,195,527      msr:write_msr

      29.334476406 seconds time elapsed

And after the change:
  ...
  Performance counter stats for './perf record --no-timestamp -c 10000 -e cycles:p \
                                 ./perf bench sched pipe -l 1000000' (5 runs):

    18,704,973,540      cycles:k
        27,184,720      msr:write_msr

      16.977875900 seconds time elapsed

There's no affect on cycles:k because the sched_task happens
with events switched off, however the msr:write_msr tracepoint
counter together with almost 50% of time speedup show the
improvement.

Monitoring LBR event and having extra PEBS drain processing
in sched_task callback showed just a little speedup, because
the drain function does not do much extra work in case there
is no PEBS data.

Adding conditions to recognize the configured work that needs
to be done in the x86_pmu's sched_task callback.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20170719075247.GA27506@krava
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c |  6 ++----
 arch/x86/events/intel/ds.c   | 14 ++++++++------
 arch/x86/events/intel/lbr.c  |  4 ++++
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index ede9771..98b0f07 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3397,10 +3397,8 @@ static void intel_pmu_cpu_dying(int cpu)
 static void intel_pmu_sched_task(struct perf_event_context *ctx,
 				 bool sched_in)
 {
-	if (x86_pmu.pebs_active)
-		intel_pmu_pebs_sched_task(ctx, sched_in);
-	if (x86_pmu.lbr_nr)
-		intel_pmu_lbr_sched_task(ctx, sched_in);
+	intel_pmu_pebs_sched_task(ctx, sched_in);
+	intel_pmu_lbr_sched_task(ctx, sched_in);
 }
 
 PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 6dc8a59..a322fed 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -606,12 +606,6 @@ static inline void intel_pmu_drain_pebs_buffer(void)
 	x86_pmu.drain_pebs(&regs);
 }
 
-void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
-{
-	if (!sched_in)
-		intel_pmu_drain_pebs_buffer();
-}
-
 /*
  * PEBS
  */
@@ -822,6 +816,14 @@ static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
 	return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
 }
 
+void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	if (!sched_in && pebs_needs_sched_cb(cpuc))
+		intel_pmu_drain_pebs_buffer();
+}
+
 static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
 {
 	struct debug_store *ds = cpuc->ds;
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index eb26165..955457a 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -380,8 +380,12 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
 
 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
 {
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct x86_perf_task_context *task_ctx;
 
+	if (!cpuc->lbr_users)
+		return;
+
 	/*
 	 * If LBR callstack feature is enabled and the stack was saved when
 	 * the task was scheduled out, restore the stack. Otherwise flush

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-07-21  9:41 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-17 15:01 [PATCH] perf/x86/intel: Add proper condition to run sched_task callbacks Jiri Olsa
2017-07-18  9:14 ` Peter Zijlstra
2017-07-18  9:29   ` Jiri Olsa
2017-07-18 12:37     ` Peter Zijlstra
2017-07-18 22:11       ` Jiri Olsa
2017-07-19  7:52       ` Jiri Olsa
2017-07-21  9:38         ` [tip:perf/urgent] " tip-bot for Jiri Olsa

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.