linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] perf: fix assertion failure in x86_pmu_start()
@ 2012-02-07 13:39 Stephane Eranian
  2012-02-07 14:36 ` Eric Dumazet
  2012-02-07 19:44 ` [tip:perf/urgent] perf: Fix double start/stop " tip-bot for Stephane Eranian
  0 siblings, 2 replies; 3+ messages in thread
From: Stephane Eranian @ 2012-02-07 13:39 UTC (permalink / raw)
  To: linux-kernel; +Cc: peterz, mingo, eric.dumazet, markus, paulus


The following patch fixes an issue introduced by the following
commit:
        e050e3f0a71b ("perf: Fix broken interrupt rate throttling")

The patch caused the following warning to pop up depending on
the sampling frequency adjustments:

[89214.962603] ------------[ cut here ]------------
[89214.967441] WARNING: at arch/x86/kernel/cpu/perf_event.c:995 x86_pmu_start+0x79/0xd4()
[89214.975825] Hardware name: X8DTN
[89214.979268] Modules linked in:
[89214.982560] Pid: 0, comm: swapper/6 Not tainted 3.3.0-rc2-tip+ #1
[89214.988865] Call Trace:
[89214.991533]  <IRQ>  [<ffffffff81065cc7>] warn_slowpath_common+0x7e/0x97
[89214.998379]  [<ffffffff81065cf5>] warn_slowpath_null+0x15/0x17
[89215.004428]  [<ffffffff8103f626>] x86_pmu_start+0x79/0xd4
[89215.010042]  [<ffffffff810e30d1>] perf_adjust_freq_unthr_context.part.63+0xef/0x123
[89215.018123]  [<ffffffff810e318c>] perf_event_task_tick+0x87/0x1c1
[89215.024463]  [<ffffffff810a2370>] ? tick_nohz_handler+0xda/0xda
[89215.030595]  [<ffffffff8108b819>] scheduler_tick+0xd1/0xf3
[89215.036296]  [<ffffffff810720b0>] update_process_times+0x5e/0x6f
[89215.042512]  [<ffffffff810a23e0>] tick_sched_timer+0x70/0x99
[89215.048387]  [<ffffffff810823f9>] __run_hrtimer+0x8c/0x148
[89215.054087]  [<ffffffff81082add>] hrtimer_interrupt+0xc1/0x18c

It was caused by the following call sequence:

perf_adjust_freq_unthr_context.part() {
     stop()
     if (delta > 0) {
          perf_adjust_period() {
              if (period > 8*...) {
                  stop()
                  ...
                  start()
              }
          }
      }
      start()
}

Which caused a double start and a double stop, thus triggering the assert
in x86_pmu_start().

The patch fixes the problem by avoiding the double calls. We pass a new
argument to perf_adjust_period() to indicate whether or not the event
is already stopped. We can't just remove the start/stop from that function
because it's called from __perf_event_overflow where the event needs to
be reloaded via a stop/start back-toback call.

The patch reintroduces the assertion in x86_pmu_start() which was removed
by commit:
	84f2b9b perf: Remove deprecated WARN_ON_ONCE()

In this second version, we've added calls to disable/enable PMU during
unthrottling or frequency adjustment based on bug report of spurious
NMI interrupts from Eric Dumazet.

Signed-off-by: Stephane Eranian <eranian@google.com>
---

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 3c44b71..f8bddb5 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -988,6 +988,9 @@ static void x86_pmu_start(struct perf_event *event, int flags)
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int idx = event->hw.idx;
 
+	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+		return;
+
 	if (WARN_ON_ONCE(idx == -1))
 		return;
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 7c3b9de..8410773 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2303,7 +2303,9 @@ do {					\
 static DEFINE_PER_CPU(int, perf_throttled_count);
 static DEFINE_PER_CPU(u64, perf_throttled_seq);
 
-static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
+static void perf_adjust_period(struct perf_event *event, u64 nsec,
+			       u64 count,
+			       bool dostop)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	s64 period, sample_period;
@@ -2322,9 +2324,13 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
 	hwc->sample_period = sample_period;
 
 	if (local64_read(&hwc->period_left) > 8*sample_period) {
-		event->pmu->stop(event, PERF_EF_UPDATE);
+		if (dostop)
+			event->pmu->stop(event, PERF_EF_UPDATE);
+
 		local64_set(&hwc->period_left, 0);
-		event->pmu->start(event, PERF_EF_RELOAD);
+
+		if (dostop)
+			event->pmu->start(event, PERF_EF_RELOAD);
 	}
 }
 
@@ -2350,6 +2356,7 @@ static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx,
 		return;
 
 	raw_spin_lock(&ctx->lock);
+	perf_pmu_disable(ctx->pmu);
 
 	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
 		if (event->state != PERF_EVENT_STATE_ACTIVE)
@@ -2381,13 +2388,17 @@ static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx,
 		/*
 		 * restart the event
 		 * reload only if value has changed
+		 * we have stopped the event so tell that
+		 * to perf_adjust_period() to avoid stopping it
+		 * twice.
 		 */
 		if (delta > 0)
-			perf_adjust_period(event, period, delta);
+			perf_adjust_period(event, period, delta, false);
 
 		event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0);
 	}
 
+	perf_pmu_enable(ctx->pmu);
 	raw_spin_unlock(&ctx->lock);
 }
 
@@ -4567,7 +4578,8 @@ static int __perf_event_overflow(struct perf_event *event,
 		hwc->freq_time_stamp = now;
 
 		if (delta > 0 && delta < 2*TICK_NSEC)
-			perf_adjust_period(event, delta, hwc->last_period);
+			perf_adjust_period(event, delta,
+					   hwc->last_period, true);
 	}
 
 	/*

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] perf: fix assertion failure in x86_pmu_start()
  2012-02-07 13:39 [PATCH v2] perf: fix assertion failure in x86_pmu_start() Stephane Eranian
@ 2012-02-07 14:36 ` Eric Dumazet
  2012-02-07 19:44 ` [tip:perf/urgent] perf: Fix double start/stop " tip-bot for Stephane Eranian
  1 sibling, 0 replies; 3+ messages in thread
From: Eric Dumazet @ 2012-02-07 14:36 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: linux-kernel, peterz, mingo, markus, paulus

Le mardi 07 février 2012 à 14:39 +0100, Stephane Eranian a écrit :
> The following patch fixes an issue introduced by the following
> commit:
>         e050e3f0a71b ("perf: Fix broken interrupt rate throttling")
> 
> The patch caused the following warning to pop up depending on
> the sampling frequency adjustments:
> 
...

> 
> In this second version, we've added calls to disable/enable PMU during
> unthrottling or frequency adjustment based on bug report of spurious
> NMI interrupts from Eric Dumazet.
> 
> Signed-off-by: Stephane Eranian <eranian@google.com>
> ---

I tested successfully this patch on my two dev machines, thanks for
fixing this Stephane.

Reported-and-tested-by: Eric Dumazet <eric.dumazet@gmail.com>




^ permalink raw reply	[flat|nested] 3+ messages in thread

* [tip:perf/urgent] perf: Fix double start/stop in x86_pmu_start()
  2012-02-07 13:39 [PATCH v2] perf: fix assertion failure in x86_pmu_start() Stephane Eranian
  2012-02-07 14:36 ` Eric Dumazet
@ 2012-02-07 19:44 ` tip-bot for Stephane Eranian
  1 sibling, 0 replies; 3+ messages in thread
From: tip-bot for Stephane Eranian @ 2012-02-07 19:44 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, eranian, hpa, mingo, eric.dumazet, a.p.zijlstra,
	tglx, mingo

Commit-ID:  f39d47ff819ed52a2afbdbecbe35f23f7755f58d
Gitweb:     http://git.kernel.org/tip/f39d47ff819ed52a2afbdbecbe35f23f7755f58d
Author:     Stephane Eranian <eranian@google.com>
AuthorDate: Tue, 7 Feb 2012 14:39:57 +0100
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Tue, 7 Feb 2012 16:58:56 +0100

perf: Fix double start/stop in x86_pmu_start()

The following patch fixes a bug introduced by the following
commit:

        e050e3f0a71b ("perf: Fix broken interrupt rate throttling")

The patch caused the following warning to pop up depending on
the sampling frequency adjustments:

  ------------[ cut here ]------------
  WARNING: at arch/x86/kernel/cpu/perf_event.c:995 x86_pmu_start+0x79/0xd4()

It was caused by the following call sequence:

perf_adjust_freq_unthr_context.part() {
     stop()
     if (delta > 0) {
          perf_adjust_period() {
              if (period > 8*...) {
                  stop()
                  ...
                  start()
              }
          }
      }
      start()
}

Which caused a double start and a double stop, thus triggering
the assert in x86_pmu_start().

The patch fixes the problem by avoiding the double calls. We
pass a new argument to perf_adjust_period() to indicate whether
or not the event is already stopped. We can't just remove the
start/stop from that function because it's called from
__perf_event_overflow where the event needs to be reloaded via a
stop/start back-toback call.

The patch reintroduces the assertion in x86_pmu_start() which
was removed by commit:

	84f2b9b ("perf: Remove deprecated WARN_ON_ONCE()")

In this second version, we've added calls to disable/enable PMU
during unthrottling or frequency adjustment based on bug report
of spurious NMI interrupts from Eric Dumazet.

Reported-and-tested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: markus@trippelsdorf.de
Cc: paulus@samba.org
Link: http://lkml.kernel.org/r/20120207133956.GA4932@quad
[ Minor edits to the changelog and to the code ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c |    3 +++
 kernel/events/core.c             |   19 ++++++++++++++-----
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 2a30e5a..5adce10 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -986,6 +986,9 @@ static void x86_pmu_start(struct perf_event *event, int flags)
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int idx = event->hw.idx;
 
+	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+		return;
+
 	if (WARN_ON_ONCE(idx == -1))
 		return;
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index ba36013..1b5c081 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2303,7 +2303,7 @@ do {					\
 static DEFINE_PER_CPU(int, perf_throttled_count);
 static DEFINE_PER_CPU(u64, perf_throttled_seq);
 
-static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
+static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, bool disable)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	s64 period, sample_period;
@@ -2322,9 +2322,13 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
 	hwc->sample_period = sample_period;
 
 	if (local64_read(&hwc->period_left) > 8*sample_period) {
-		event->pmu->stop(event, PERF_EF_UPDATE);
+		if (disable)
+			event->pmu->stop(event, PERF_EF_UPDATE);
+
 		local64_set(&hwc->period_left, 0);
-		event->pmu->start(event, PERF_EF_RELOAD);
+
+		if (disable)
+			event->pmu->start(event, PERF_EF_RELOAD);
 	}
 }
 
@@ -2350,6 +2354,7 @@ static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx,
 		return;
 
 	raw_spin_lock(&ctx->lock);
+	perf_pmu_disable(ctx->pmu);
 
 	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
 		if (event->state != PERF_EVENT_STATE_ACTIVE)
@@ -2381,13 +2386,17 @@ static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx,
 		/*
 		 * restart the event
 		 * reload only if value has changed
+		 * we have stopped the event so tell that
+		 * to perf_adjust_period() to avoid stopping it
+		 * twice.
 		 */
 		if (delta > 0)
-			perf_adjust_period(event, period, delta);
+			perf_adjust_period(event, period, delta, false);
 
 		event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0);
 	}
 
+	perf_pmu_enable(ctx->pmu);
 	raw_spin_unlock(&ctx->lock);
 }
 
@@ -4562,7 +4571,7 @@ static int __perf_event_overflow(struct perf_event *event,
 		hwc->freq_time_stamp = now;
 
 		if (delta > 0 && delta < 2*TICK_NSEC)
-			perf_adjust_period(event, delta, hwc->last_period);
+			perf_adjust_period(event, delta, hwc->last_period, true);
 	}
 
 	/*

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2012-02-07 19:44 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-02-07 13:39 [PATCH v2] perf: fix assertion failure in x86_pmu_start() Stephane Eranian
2012-02-07 14:36 ` Eric Dumazet
2012-02-07 19:44 ` [tip:perf/urgent] perf: Fix double start/stop " tip-bot for Stephane Eranian

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).