All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] perf/x86/intel: Account interrupts for PEBS errors
@ 2016-12-14 16:50 Jiri Olsa
  2016-12-14 18:07 ` Peter Zijlstra
  0 siblings, 1 reply; 9+ messages in thread
From: Jiri Olsa @ 2016-12-14 16:50 UTC (permalink / raw)
  To: Peter Zijlstra, Andi Kleen
  Cc: lkml, Alexander Shishkin, Vince Weaver, Ingo Molnar

hi,
I'm hitting soft lockup generated by fuzzer, where the
perf hangs in remote_install path like:

 NMI watchdog: BUG: soft lockup - CPU#22 stuck for 22s! [perf_fuzzer:5816]

 task: ffff880273148000 task.stack: ffffc90002d58000
 RIP: 0010:[<ffffffff81159232>]  [<ffffffff81159232>] smp_call_function_single+0xe2/0x140
 RSP: 0018:ffffc90002d5bd60  EFLAGS: 00000202
 ...
 Call Trace:
  [<ffffffff81114ce5>] ? trace_hardirqs_on_caller+0xf5/0x1b0
  [<ffffffff811e20e0>] ? perf_cgroup_attach+0x70/0x70
  [<ffffffff811e1049>] perf_install_in_context+0x199/0x1b0
  [<ffffffff811e74e0>] ? ctx_resched+0x90/0x90
  [<ffffffff811ed9d1>] SYSC_perf_event_open+0x641/0xf90
  [<ffffffff811f1069>] SyS_perf_event_open+0x9/0x10
  [<ffffffff81003edc>] do_syscall_64+0x6c/0x1f0
  [<ffffffff818defc9>] entry_SYSCALL64_slow_path+0x25/0x25


I found out that I could reproduce this with following
2 perf commands running simultaneously:

  taskset -c 1 ./perf record -c 4 -e branches:pp -j any -C 10

this forces cpu 10 to endless loop causing the soft lockup

AFAICS the reason for this is that intel_pmu_drain_pebs_nhm does
not account event->hw.interrupt for error PEBS interrupts, so in
case you're getting ONLY errors you dont have a way to stop event
when it's over the max_samples_per_tick limit

I added extra accounting for error PEBS and it seems to work now,
fuzzer is running for several hours now ;-)

also I could not reproduce with any other event, just branches plus
the additional branch stack sample type

I also fail to reproduce on other than snb_x (model 45) server

thoughts?

thanks,
jirka


---
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index be202390bbd3..f2010dbe75d6 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1389,9 +1389,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 			continue;
 
 		/* log dropped samples number */
-		if (error[bit])
+		if (error[bit]) {
 			perf_log_lost_samples(event, error[bit]);
 
+			if (perf_event_account_interrupt(event, 1))
+				x86_pmu_stop(event, 0);
+		}
+
 		if (counts[bit]) {
 			__intel_pmu_pebs_event(event, iregs, base,
 					       top, bit, counts[bit]);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4741ecdb9817..7225396228ce 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1259,6 +1259,7 @@ extern void perf_event_disable(struct perf_event *event);
 extern void perf_event_disable_local(struct perf_event *event);
 extern void perf_event_disable_inatomic(struct perf_event *event);
 extern void perf_event_task_tick(void);
+extern int perf_event_account_interrupt(struct perf_event *event, int throttle);
 #else /* !CONFIG_PERF_EVENTS: */
 static inline void *
 perf_aux_output_begin(struct perf_output_handle *handle,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 02c8421f8c01..93b46cc2c977 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7034,25 +7034,11 @@ static void perf_log_itrace_start(struct perf_event *event)
 	perf_output_end(&handle);
 }
 
-/*
- * Generic event overflow handling, sampling.
- */
-
-static int __perf_event_overflow(struct perf_event *event,
-				   int throttle, struct perf_sample_data *data,
-				   struct pt_regs *regs)
+int perf_event_account_interrupt(struct perf_event *event, int throttle)
 {
-	int events = atomic_read(&event->event_limit);
 	struct hw_perf_event *hwc = &event->hw;
-	u64 seq;
 	int ret = 0;
-
-	/*
-	 * Non-sampling counters might still use the PMI to fold short
-	 * hardware counters, ignore those.
-	 */
-	if (unlikely(!is_sampling_event(event)))
-		return 0;
+	u64 seq;
 
 	seq = __this_cpu_read(perf_throttled_seq);
 	if (seq != hwc->interrupts_seq) {
@@ -7070,6 +7056,30 @@ static int __perf_event_overflow(struct perf_event *event,
 		}
 	}
 
+	return ret;
+}
+
+/*
+ * Generic event overflow handling, sampling.
+ */
+
+static int __perf_event_overflow(struct perf_event *event,
+				   int throttle, struct perf_sample_data *data,
+				   struct pt_regs *regs)
+{
+	int events = atomic_read(&event->event_limit);
+	struct hw_perf_event *hwc = &event->hw;
+	int ret = 0;
+
+	/*
+	 * Non-sampling counters might still use the PMI to fold short
+	 * hardware counters, ignore those.
+	 */
+	if (unlikely(!is_sampling_event(event)))
+		return 0;
+
+	ret = perf_event_account_interrupt(event, throttle);
+
 	if (event->attr.freq) {
 		u64 now = perf_clock();
 		s64 delta = now - hwc->freq_time_stamp;

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [RFC] perf/x86/intel: Account interrupts for PEBS errors
  2016-12-14 16:50 [RFC] perf/x86/intel: Account interrupts for PEBS errors Jiri Olsa
@ 2016-12-14 18:07 ` Peter Zijlstra
  2016-12-14 18:16   ` Jiri Olsa
  0 siblings, 1 reply; 9+ messages in thread
From: Peter Zijlstra @ 2016-12-14 18:07 UTC (permalink / raw)
  To: Jiri Olsa; +Cc: Andi Kleen, lkml, Alexander Shishkin, Vince Weaver, Ingo Molnar

On Wed, Dec 14, 2016 at 05:50:36PM +0100, Jiri Olsa wrote:
> 
> I also fail to reproduce on other than snb_x (model 45) server

reproduces on my ivb-ep as well model 62.

> thoughts?

cute find :-)

> +++ b/arch/x86/events/intel/ds.c
> @@ -1389,9 +1389,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
>  			continue;
>  
>  		/* log dropped samples number */
> -		if (error[bit])
> +		if (error[bit]) {
>  			perf_log_lost_samples(event, error[bit]);
>  
> +			if (perf_event_account_interrupt(event, 1))

Seems a bit daft to expose the .throttle argument, since that would be
the only point of calling this.




> +static int __perf_event_overflow(struct perf_event *event,
> +				   int throttle, struct perf_sample_data *data,
> +				   struct pt_regs *regs)
> +{
> +	int events = atomic_read(&event->event_limit);
> +	struct hw_perf_event *hwc = &event->hw;
> +	int ret = 0;
> +
> +	/*
> +	 * Non-sampling counters might still use the PMI to fold short
> +	 * hardware counters, ignore those.
> +	 */
> +	if (unlikely(!is_sampling_event(event)))
> +		return 0;
> +
> +	ret = perf_event_account_interrupt(event, throttle);
> +
>  	if (event->attr.freq) {
>  		u64 now = perf_clock();
>  		s64 delta = now - hwc->freq_time_stamp;

Arguably, everything in __perf_event_overflow() except for calling of
->overflow_handler() should be done I think.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC] perf/x86/intel: Account interrupts for PEBS errors
  2016-12-14 18:07 ` Peter Zijlstra
@ 2016-12-14 18:16   ` Jiri Olsa
  2016-12-14 19:32     ` Peter Zijlstra
  0 siblings, 1 reply; 9+ messages in thread
From: Jiri Olsa @ 2016-12-14 18:16 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andi Kleen, lkml, Alexander Shishkin, Vince Weaver, Ingo Molnar

On Wed, Dec 14, 2016 at 07:07:30PM +0100, Peter Zijlstra wrote:
> On Wed, Dec 14, 2016 at 05:50:36PM +0100, Jiri Olsa wrote:
> > 
> > I also fail to reproduce on other than snb_x (model 45) server
> 
> reproduces on my ivb-ep as well model 62.
> 
> > thoughts?
> 
> cute find :-)
> 
> > +++ b/arch/x86/events/intel/ds.c
> > @@ -1389,9 +1389,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
> >  			continue;
> >  
> >  		/* log dropped samples number */
> > -		if (error[bit])
> > +		if (error[bit]) {
> >  			perf_log_lost_samples(event, error[bit]);
> >  
> > +			if (perf_event_account_interrupt(event, 1))
> 
> Seems a bit daft to expose the .throttle argument, since that would be
> the only point of calling this.

there's also the other caller from __perf_event_overflow

> > +static int __perf_event_overflow(struct perf_event *event,
> > +				   int throttle, struct perf_sample_data *data,
> > +				   struct pt_regs *regs)
> > +{
> > +	int events = atomic_read(&event->event_limit);
> > +	struct hw_perf_event *hwc = &event->hw;
> > +	int ret = 0;
> > +
> > +	/*
> > +	 * Non-sampling counters might still use the PMI to fold short
> > +	 * hardware counters, ignore those.
> > +	 */
> > +	if (unlikely(!is_sampling_event(event)))
> > +		return 0;
> > +
> > +	ret = perf_event_account_interrupt(event, throttle);
> > +
> >  	if (event->attr.freq) {
> >  		u64 now = perf_clock();
> >  		s64 delta = now - hwc->freq_time_stamp;
> 
> Arguably, everything in __perf_event_overflow() except for calling of
> ->overflow_handler() should be done I think.

well, I was wondering about that period adjustment bit

but I wasn't sure about those pending_kill/pending_wakeup bits,
they make sense to me only if we have some data to deliver


jirka

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC] perf/x86/intel: Account interrupts for PEBS errors
  2016-12-14 18:16   ` Jiri Olsa
@ 2016-12-14 19:32     ` Peter Zijlstra
  2016-12-15  7:14       ` Jiri Olsa
  2016-12-15 15:43       ` [PATCHv2] " Jiri Olsa
  0 siblings, 2 replies; 9+ messages in thread
From: Peter Zijlstra @ 2016-12-14 19:32 UTC (permalink / raw)
  To: Jiri Olsa; +Cc: Andi Kleen, lkml, Alexander Shishkin, Vince Weaver, Ingo Molnar

On Wed, Dec 14, 2016 at 07:16:36PM +0100, Jiri Olsa wrote:

> > > +++ b/arch/x86/events/intel/ds.c
> > > @@ -1389,9 +1389,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
> > >  			continue;
> > >  
> > >  		/* log dropped samples number */
> > > -		if (error[bit])
> > > +		if (error[bit]) {
> > >  			perf_log_lost_samples(event, error[bit]);
> > >  
> > > +			if (perf_event_account_interrupt(event, 1))
> > 
> > Seems a bit daft to expose the .throttle argument, since that would be
> > the only point of calling this.
> 
> there's also the other caller from __perf_event_overflow

See the below patchlet ;-)

> > > +static int __perf_event_overflow(struct perf_event *event,
> > > +				   int throttle, struct perf_sample_data *data,
> > > +				   struct pt_regs *regs)
> > > +{
> > > +	int events = atomic_read(&event->event_limit);
> > > +	struct hw_perf_event *hwc = &event->hw;
> > > +	int ret = 0;
> > > +
> > > +	/*
> > > +	 * Non-sampling counters might still use the PMI to fold short
> > > +	 * hardware counters, ignore those.
> > > +	 */
> > > +	if (unlikely(!is_sampling_event(event)))
> > > +		return 0;
> > > +
> > > +	ret = perf_event_account_interrupt(event, throttle);
> > > +
> > >  	if (event->attr.freq) {
> > >  		u64 now = perf_clock();
> > >  		s64 delta = now - hwc->freq_time_stamp;
> > 
> > Arguably, everything in __perf_event_overflow() except for calling of
> > ->overflow_handler() should be done I think.
> 
> well, I was wondering about that period adjustment bit
> 
> but I wasn't sure about those pending_kill/pending_wakeup bits,
> they make sense to me only if we have some data to deliver

Hmm, maybe. Please add a comment, that way we can at least rediscover we
thought about this.


--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1392,7 +1392,7 @@ static void intel_pmu_drain_pebs_nhm(str
 		if (error[bit]) {
 			perf_log_lost_samples(event, error[bit]);
 
-			if (perf_event_account_interrupt(event, 1))
+			if (perf_event_account_interrupt(event))
 				x86_pmu_stop(event, 0);
 		}
 
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1259,7 +1259,7 @@ extern void perf_event_disable(struct pe
 extern void perf_event_disable_local(struct perf_event *event);
 extern void perf_event_disable_inatomic(struct perf_event *event);
 extern void perf_event_task_tick(void);
-extern int perf_event_account_interrupt(struct perf_event *event, int throttle);
+extern int perf_event_account_interrupt(struct perf_event *event);
 #else /* !CONFIG_PERF_EVENTS: */
 static inline void *
 perf_aux_output_begin(struct perf_output_handle *handle,
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7034,7 +7034,7 @@ static void perf_log_itrace_start(struct
 	perf_output_end(&handle);
 }
 
-int perf_event_account_interrupt(struct perf_event *event, int throttle)
+static int __perf_event_account_interrupt(struct perf_event *event, int throttle)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	int ret = 0;
@@ -7059,6 +7059,11 @@ int perf_event_account_interrupt(struct
 	return ret;
 }
 
+int perf_event_account_interrupt(struct perf_event *event)
+{
+	return __perf_event_account_interrupt(event, 1);
+}
+
 /*
  * Generic event overflow handling, sampling.
  */
@@ -7078,7 +7083,7 @@ static int __perf_event_overflow(struct
 	if (unlikely(!is_sampling_event(event)))
 		return 0;
 
-	ret = perf_event_account_interrupt(event, throttle);
+	ret = __perf_event_account_interrupt(event, throttle);
 
 	if (event->attr.freq) {
 		u64 now = perf_clock();

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC] perf/x86/intel: Account interrupts for PEBS errors
  2016-12-14 19:32     ` Peter Zijlstra
@ 2016-12-15  7:14       ` Jiri Olsa
  2016-12-15 15:43       ` [PATCHv2] " Jiri Olsa
  1 sibling, 0 replies; 9+ messages in thread
From: Jiri Olsa @ 2016-12-15  7:14 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andi Kleen, lkml, Alexander Shishkin, Vince Weaver, Ingo Molnar

On Wed, Dec 14, 2016 at 08:32:39PM +0100, Peter Zijlstra wrote:
> On Wed, Dec 14, 2016 at 07:16:36PM +0100, Jiri Olsa wrote:
> 
> > > > +++ b/arch/x86/events/intel/ds.c
> > > > @@ -1389,9 +1389,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
> > > >  			continue;
> > > >  
> > > >  		/* log dropped samples number */
> > > > -		if (error[bit])
> > > > +		if (error[bit]) {
> > > >  			perf_log_lost_samples(event, error[bit]);
> > > >  
> > > > +			if (perf_event_account_interrupt(event, 1))
> > > 
> > > Seems a bit daft to expose the .throttle argument, since that would be
> > > the only point of calling this.
> > 
> > there's also the other caller from __perf_event_overflow
> 
> See the below patchlet ;-)

ok, np ;-)

> 
> > > > +static int __perf_event_overflow(struct perf_event *event,
> > > > +				   int throttle, struct perf_sample_data *data,
> > > > +				   struct pt_regs *regs)
> > > > +{
> > > > +	int events = atomic_read(&event->event_limit);
> > > > +	struct hw_perf_event *hwc = &event->hw;
> > > > +	int ret = 0;
> > > > +
> > > > +	/*
> > > > +	 * Non-sampling counters might still use the PMI to fold short
> > > > +	 * hardware counters, ignore those.
> > > > +	 */
> > > > +	if (unlikely(!is_sampling_event(event)))
> > > > +		return 0;
> > > > +
> > > > +	ret = perf_event_account_interrupt(event, throttle);
> > > > +
> > > >  	if (event->attr.freq) {
> > > >  		u64 now = perf_clock();
> > > >  		s64 delta = now - hwc->freq_time_stamp;
> > > 
> > > Arguably, everything in __perf_event_overflow() except for calling of
> > > ->overflow_handler() should be done I think.
> > 
> > well, I was wondering about that period adjustment bit
> > 
> > but I wasn't sure about those pending_kill/pending_wakeup bits,
> > they make sense to me only if we have some data to deliver
> 
> Hmm, maybe. Please add a comment, that way we can at least rediscover we
> thought about this.

ook

jirka

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCHv2] perf/x86/intel: Account interrupts for PEBS errors
  2016-12-14 19:32     ` Peter Zijlstra
  2016-12-15  7:14       ` Jiri Olsa
@ 2016-12-15 15:43       ` Jiri Olsa
  2016-12-15 23:41         ` kbuild test robot
  2016-12-16  1:37         ` [PATCHv2] " Vince Weaver
  1 sibling, 2 replies; 9+ messages in thread
From: Jiri Olsa @ 2016-12-15 15:43 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andi Kleen, lkml, Alexander Shishkin, Vince Weaver, Ingo Molnar

On Wed, Dec 14, 2016 at 08:32:39PM +0100, Peter Zijlstra wrote:
> On Wed, Dec 14, 2016 at 07:16:36PM +0100, Jiri Olsa wrote:
> 
> > > > +++ b/arch/x86/events/intel/ds.c
> > > > @@ -1389,9 +1389,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
> > > >  			continue;
> > > >  
> > > >  		/* log dropped samples number */
> > > > -		if (error[bit])
> > > > +		if (error[bit]) {
> > > >  			perf_log_lost_samples(event, error[bit]);
> > > >  
> > > > +			if (perf_event_account_interrupt(event, 1))
> > > 
> > > Seems a bit daft to expose the .throttle argument, since that would be
> > > the only point of calling this.
> > 
> > there's also the other caller from __perf_event_overflow
> 
> See the below patchlet ;-)
> 
> > > > +static int __perf_event_overflow(struct perf_event *event,
> > > > +				   int throttle, struct perf_sample_data *data,
> > > > +				   struct pt_regs *regs)
> > > > +{
> > > > +	int events = atomic_read(&event->event_limit);
> > > > +	struct hw_perf_event *hwc = &event->hw;
> > > > +	int ret = 0;
> > > > +
> > > > +	/*
> > > > +	 * Non-sampling counters might still use the PMI to fold short
> > > > +	 * hardware counters, ignore those.
> > > > +	 */
> > > > +	if (unlikely(!is_sampling_event(event)))
> > > > +		return 0;
> > > > +
> > > > +	ret = perf_event_account_interrupt(event, throttle);
> > > > +
> > > >  	if (event->attr.freq) {
> > > >  		u64 now = perf_clock();
> > > >  		s64 delta = now - hwc->freq_time_stamp;
> > > 
> > > Arguably, everything in __perf_event_overflow() except for calling of
> > > ->overflow_handler() should be done I think.
> > 
> > well, I was wondering about that period adjustment bit
> > 
> > but I wasn't sure about those pending_kill/pending_wakeup bits,
> > they make sense to me only if we have some data to deliver
> 
> Hmm, maybe. Please add a comment, that way we can at least rediscover we
> thought about this.
> 

new version with full changelog

jirka


---
It's possible to setup PEBS events and get only errors and not
a single data, like on SNB-X (model 45) and IVB-EP (model 62)
via 2 perf commands running simultaneously:

    taskset -c 1 ./perf record -c 4 -e branches:pp -j any -C 10

This leads to soft lock up, because the error path of the
intel_pmu_drain_pebs_nhm does not account event->hw.interrupt
for error PEBS interrupts so the event is not eventually
stopped when it gets over the max_samples_per_tick limit.

  NMI watchdog: BUG: soft lockup - CPU#22 stuck for 22s! [perf_fuzzer:5816]
  ...
  task: ffff880273148000 task.stack: ffffc90002d58000
  RIP: 0010:[<ffffffff81159232>]  [<ffffffff81159232>] smp_call_function_single+0xe2/0x140
  RSP: 0018:ffffc90002d5bd60  EFLAGS: 00000202
  ...
  Call Trace:
   ? trace_hardirqs_on_caller+0xf5/0x1b0
   ? perf_cgroup_attach+0x70/0x70
   perf_install_in_context+0x199/0x1b0
   ? ctx_resched+0x90/0x90
   SYSC_perf_event_open+0x641/0xf90
   SyS_perf_event_open+0x9/0x10
   do_syscall_64+0x6c/0x1f0
   entry_SYSCALL64_slow_path+0x25/0x25

Adding  perf_event_account_interrupt with event's interrupt
and frequency checks and calling it from drain_pebs's error
path.

Keeping pending_kill and pending_wakeup check up logic only
in __perf_event_overflow path, because they make sense only
in case if there's any data to deliver.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 arch/x86/events/intel/ds.c |  6 +++++-
 include/linux/perf_event.h |  1 +
 kernel/events/core.c       | 47 ++++++++++++++++++++++++++++++----------------
 3 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index be202390bbd3..9dfeeeca0ea8 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1389,9 +1389,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 			continue;
 
 		/* log dropped samples number */
-		if (error[bit])
+		if (error[bit]) {
 			perf_log_lost_samples(event, error[bit]);
 
+			if (perf_event_account_interrupt(event))
+				x86_pmu_stop(event, 0);
+		}
+
 		if (counts[bit]) {
 			__intel_pmu_pebs_event(event, iregs, base,
 					       top, bit, counts[bit]);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4741ecdb9817..78ed8105e64d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1259,6 +1259,7 @@ extern void perf_event_disable(struct perf_event *event);
 extern void perf_event_disable_local(struct perf_event *event);
 extern void perf_event_disable_inatomic(struct perf_event *event);
 extern void perf_event_task_tick(void);
+extern int perf_event_account_interrupt(struct perf_event *event);
 #else /* !CONFIG_PERF_EVENTS: */
 static inline void *
 perf_aux_output_begin(struct perf_output_handle *handle,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 02c8421f8c01..7c6264f5deb7 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7034,25 +7034,11 @@ static void perf_log_itrace_start(struct perf_event *event)
 	perf_output_end(&handle);
 }
 
-/*
- * Generic event overflow handling, sampling.
- */
-
-static int __perf_event_overflow(struct perf_event *event,
-				   int throttle, struct perf_sample_data *data,
-				   struct pt_regs *regs)
+static int __perf_event_account_interrupt(struct perf_event *event, int throttle)
 {
-	int events = atomic_read(&event->event_limit);
 	struct hw_perf_event *hwc = &event->hw;
-	u64 seq;
 	int ret = 0;
-
-	/*
-	 * Non-sampling counters might still use the PMI to fold short
-	 * hardware counters, ignore those.
-	 */
-	if (unlikely(!is_sampling_event(event)))
-		return 0;
+	u64 seq;
 
 	seq = __this_cpu_read(perf_throttled_seq);
 	if (seq != hwc->interrupts_seq) {
@@ -7080,6 +7066,35 @@ static int __perf_event_overflow(struct perf_event *event,
 			perf_adjust_period(event, delta, hwc->last_period, true);
 	}
 
+	return ret;
+}
+
+int perf_event_account_interrupt(struct perf_event *event)
+{
+	return __perf_event_account_interrupt(event, 1);
+}
+
+/*
+ * Generic event overflow handling, sampling.
+ */
+
+static int __perf_event_overflow(struct perf_event *event,
+				   int throttle, struct perf_sample_data *data,
+				   struct pt_regs *regs)
+{
+	int events = atomic_read(&event->event_limit);
+	struct hw_perf_event *hwc = &event->hw;
+	int ret = 0;
+
+	/*
+	 * Non-sampling counters might still use the PMI to fold short
+	 * hardware counters, ignore those.
+	 */
+	if (unlikely(!is_sampling_event(event)))
+		return 0;
+
+	ret = __perf_event_account_interrupt(event, throttle);
+
 	/*
 	 * XXX event_limit might not quite work as expected on inherited
 	 * events
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCHv2] perf/x86/intel: Account interrupts for PEBS errors
  2016-12-15 15:43       ` [PATCHv2] " Jiri Olsa
@ 2016-12-15 23:41         ` kbuild test robot
  2016-12-16  8:07           ` [PATCHv3] " Jiri Olsa
  2016-12-16  1:37         ` [PATCHv2] " Vince Weaver
  1 sibling, 1 reply; 9+ messages in thread
From: kbuild test robot @ 2016-12-15 23:41 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: kbuild-all, Peter Zijlstra, Andi Kleen, lkml, Alexander Shishkin,
	Vince Weaver, Ingo Molnar

[-- Attachment #1: Type: text/plain, Size: 1749 bytes --]

Hi Jiri,

[auto build test WARNING on tip/perf/core]
[also build test WARNING on v4.9 next-20161215]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Jiri-Olsa/perf-x86-intel-Account-interrupts-for-PEBS-errors/20161216-042644
config: x86_64-randconfig-a0-12160640 (attached as .config)
compiler: gcc-4.4 (Debian 4.4.7-8) 4.4.7
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All warnings (new ones prefixed by >>):

   kernel/events/core.c: In function '__perf_event_overflow':
>> kernel/events/core.c:7086: warning: unused variable 'hwc'
   kernel/events/core.o: warning: objtool: perf_try_init_event()+0x43: function has unreachable instruction

vim +/hwc +7086 kernel/events/core.c

  7070	}
  7071	
  7072	int perf_event_account_interrupt(struct perf_event *event)
  7073	{
  7074		return __perf_event_account_interrupt(event, 1);
  7075	}
  7076	
  7077	/*
  7078	 * Generic event overflow handling, sampling.
  7079	 */
  7080	
  7081	static int __perf_event_overflow(struct perf_event *event,
  7082					   int throttle, struct perf_sample_data *data,
  7083					   struct pt_regs *regs)
  7084	{
  7085		int events = atomic_read(&event->event_limit);
> 7086		struct hw_perf_event *hwc = &event->hw;
  7087		int ret = 0;
  7088	
  7089		/*
  7090		 * Non-sampling counters might still use the PMI to fold short
  7091		 * hardware counters, ignore those.
  7092		 */
  7093		if (unlikely(!is_sampling_event(event)))
  7094			return 0;

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 21922 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCHv2] perf/x86/intel: Account interrupts for PEBS errors
  2016-12-15 15:43       ` [PATCHv2] " Jiri Olsa
  2016-12-15 23:41         ` kbuild test robot
@ 2016-12-16  1:37         ` Vince Weaver
  1 sibling, 0 replies; 9+ messages in thread
From: Vince Weaver @ 2016-12-16  1:37 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Peter Zijlstra, Andi Kleen, lkml, Alexander Shishkin, Ingo Molnar

On Thu, 15 Dec 2016, Jiri Olsa wrote:

> It's possible to setup PEBS events and get only errors and not
> a single data, like on SNB-X (model 45) and IVB-EP (model 62)
> via 2 perf commands running simultaneously:
> 
>     taskset -c 1 ./perf record -c 4 -e branches:pp -j any -C 10
> 
> This leads to soft lock up, because the error path of the
> intel_pmu_drain_pebs_nhm does not account event->hw.interrupt
> for error PEBS interrupts so the event is not eventually
> stopped when it gets over the max_samples_per_tick limit.
> 
>   NMI watchdog: BUG: soft lockup - CPU#22 stuck for 22s! [perf_fuzzer:5816]
>   ...
>   task: ffff880273148000 task.stack: ffffc90002d58000
>   RIP: 0010:[<ffffffff81159232>]  [<ffffffff81159232>] smp_call_function_single+0xe2/0x140
>   RSP: 0018:ffffc90002d5bd60  EFLAGS: 00000202
>   ...
>   Call Trace:
>    ? trace_hardirqs_on_caller+0xf5/0x1b0
>    ? perf_cgroup_attach+0x70/0x70
>    perf_install_in_context+0x199/0x1b0
>    ? ctx_resched+0x90/0x90
>    SYSC_perf_event_open+0x641/0xf90
>    SyS_perf_event_open+0x9/0x10
>    do_syscall_64+0x6c/0x1f0
>    entry_SYSCALL64_slow_path+0x25/0x25

I'll have to try this, with all the recent fixes I am down to NMI lockups 
like this being the major cause of fuzzer issues on my intel machines.

My AMD and ARM machines are now fuzzing for weeks w/o problems.

I also finally got a power8 machine and it crashes really quickly when 
fuzzing, but I haven't had a chance to track dthings own yet because it 
sounds like a jet plane taking off and I can't really leave it fuzzing 
like that when students are sitting nearby.  Maybe over break.

Vince

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCHv3] perf/x86/intel: Account interrupts for PEBS errors
  2016-12-15 23:41         ` kbuild test robot
@ 2016-12-16  8:07           ` Jiri Olsa
  0 siblings, 0 replies; 9+ messages in thread
From: Jiri Olsa @ 2016-12-16  8:07 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: kbuild-all, Peter Zijlstra, kbuild test robot, Andi Kleen, lkml,
	Alexander Shishkin, Vince Weaver, Ingo Molnar

On Fri, Dec 16, 2016 at 07:41:04AM +0800, kbuild test robot wrote:
> Hi Jiri,
> 
> [auto build test WARNING on tip/perf/core]
> [also build test WARNING on v4.9 next-20161215]
> [if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
> 
> url:    https://github.com/0day-ci/linux/commits/Jiri-Olsa/perf-x86-intel-Account-interrupts-for-PEBS-errors/20161216-042644
> config: x86_64-randconfig-a0-12160640 (attached as .config)
> compiler: gcc-4.4 (Debian 4.4.7-8) 4.4.7
> reproduce:
>         # save the attached .config to linux build tree
>         make ARCH=x86_64 
> 
> All warnings (new ones prefixed by >>):
> 
>    kernel/events/core.c: In function '__perf_event_overflow':
> >> kernel/events/core.c:7086: warning: unused variable 'hwc'
>    kernel/events/core.o: warning: objtool: perf_try_init_event()+0x43: function has unreachable instruction
> 
> vim +/hwc +7086 kernel/events/core.c
> 
>   7070	}
>   7071	
>   7072	int perf_event_account_interrupt(struct perf_event *event)
>   7073	{
>   7074		return __perf_event_account_interrupt(event, 1);
>   7075	}
>   7076	
>   7077	/*
>   7078	 * Generic event overflow handling, sampling.
>   7079	 */
>   7080	
>   7081	static int __perf_event_overflow(struct perf_event *event,
>   7082					   int throttle, struct perf_sample_data *data,
>   7083					   struct pt_regs *regs)
>   7084	{
>   7085		int events = atomic_read(&event->event_limit);
> > 7086		struct hw_perf_event *hwc = &event->hw;
>   7087		int ret = 0;
>   7088	

ugh, I guess I'm too used to the perf tool to fail build on unused variable :-\

v3 attached

thanks,
jirka


---
It's possible to setup PEBS events and get only errors and not
a single data, like on SNB-X (model 45) and IVB-EP (model 62)
via 2 perf commands running simultaneously:

    taskset -c 1 ./perf record -c 4 -e branches:pp -j any -C 10

This leads to soft lock up, because the error path of the
intel_pmu_drain_pebs_nhm does not account event->hw.interrupt
for error PEBS interrupts so the event is not eventually
stopped when it gets over the max_samples_per_tick limit.

  NMI watchdog: BUG: soft lockup - CPU#22 stuck for 22s! [perf_fuzzer:5816]
  ...
  task: ffff880273148000 task.stack: ffffc90002d58000
  RIP: 0010:[<ffffffff81159232>]  [<ffffffff81159232>] smp_call_function_single+0xe2/0x140
  RSP: 0018:ffffc90002d5bd60  EFLAGS: 00000202
  ...
  Call Trace:
   ? trace_hardirqs_on_caller+0xf5/0x1b0
   ? perf_cgroup_attach+0x70/0x70
   perf_install_in_context+0x199/0x1b0
   ? ctx_resched+0x90/0x90
   SYSC_perf_event_open+0x641/0xf90
   SyS_perf_event_open+0x9/0x10
   do_syscall_64+0x6c/0x1f0
   entry_SYSCALL64_slow_path+0x25/0x25

Adding  perf_event_account_interrupt with event's interrupt
and frequency checks and calling it from drain_pebs's error
path.

Keeping pending_kill and pending_wakeup check up logic only
in __perf_event_overflow path, because they make sense only
in case if there's any data to deliver.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 arch/x86/events/intel/ds.c |  6 +++++-
 include/linux/perf_event.h |  1 +
 kernel/events/core.c       | 46 ++++++++++++++++++++++++++++++----------------
 3 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index be202390bbd3..9dfeeeca0ea8 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1389,9 +1389,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 			continue;
 
 		/* log dropped samples number */
-		if (error[bit])
+		if (error[bit]) {
 			perf_log_lost_samples(event, error[bit]);
 
+			if (perf_event_account_interrupt(event))
+				x86_pmu_stop(event, 0);
+		}
+
 		if (counts[bit]) {
 			__intel_pmu_pebs_event(event, iregs, base,
 					       top, bit, counts[bit]);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4741ecdb9817..78ed8105e64d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1259,6 +1259,7 @@ extern void perf_event_disable(struct perf_event *event);
 extern void perf_event_disable_local(struct perf_event *event);
 extern void perf_event_disable_inatomic(struct perf_event *event);
 extern void perf_event_task_tick(void);
+extern int perf_event_account_interrupt(struct perf_event *event);
 #else /* !CONFIG_PERF_EVENTS: */
 static inline void *
 perf_aux_output_begin(struct perf_output_handle *handle,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 02c8421f8c01..bb24d5abc40a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7034,25 +7034,11 @@ static void perf_log_itrace_start(struct perf_event *event)
 	perf_output_end(&handle);
 }
 
-/*
- * Generic event overflow handling, sampling.
- */
-
-static int __perf_event_overflow(struct perf_event *event,
-				   int throttle, struct perf_sample_data *data,
-				   struct pt_regs *regs)
+static int __perf_event_account_interrupt(struct perf_event *event, int throttle)
 {
-	int events = atomic_read(&event->event_limit);
 	struct hw_perf_event *hwc = &event->hw;
-	u64 seq;
 	int ret = 0;
-
-	/*
-	 * Non-sampling counters might still use the PMI to fold short
-	 * hardware counters, ignore those.
-	 */
-	if (unlikely(!is_sampling_event(event)))
-		return 0;
+	u64 seq;
 
 	seq = __this_cpu_read(perf_throttled_seq);
 	if (seq != hwc->interrupts_seq) {
@@ -7080,6 +7066,34 @@ static int __perf_event_overflow(struct perf_event *event,
 			perf_adjust_period(event, delta, hwc->last_period, true);
 	}
 
+	return ret;
+}
+
+int perf_event_account_interrupt(struct perf_event *event)
+{
+	return __perf_event_account_interrupt(event, 1);
+}
+
+/*
+ * Generic event overflow handling, sampling.
+ */
+
+static int __perf_event_overflow(struct perf_event *event,
+				   int throttle, struct perf_sample_data *data,
+				   struct pt_regs *regs)
+{
+	int events = atomic_read(&event->event_limit);
+	int ret = 0;
+
+	/*
+	 * Non-sampling counters might still use the PMI to fold short
+	 * hardware counters, ignore those.
+	 */
+	if (unlikely(!is_sampling_event(event)))
+		return 0;
+
+	ret = __perf_event_account_interrupt(event, throttle);
+
 	/*
 	 * XXX event_limit might not quite work as expected on inherited
 	 * events
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2016-12-16  8:09 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-12-14 16:50 [RFC] perf/x86/intel: Account interrupts for PEBS errors Jiri Olsa
2016-12-14 18:07 ` Peter Zijlstra
2016-12-14 18:16   ` Jiri Olsa
2016-12-14 19:32     ` Peter Zijlstra
2016-12-15  7:14       ` Jiri Olsa
2016-12-15 15:43       ` [PATCHv2] " Jiri Olsa
2016-12-15 23:41         ` kbuild test robot
2016-12-16  8:07           ` [PATCHv3] " Jiri Olsa
2016-12-16  1:37         ` [PATCHv2] " Vince Weaver

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.