linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] perf/x86/intel: Fixes for PT and BTS
@ 2015-06-11 12:13 Alexander Shishkin
  2015-06-11 12:13 ` [PATCH 1/2] perf/x86/intel/bts: Fix DS area sharing with x86_pmu events Alexander Shishkin
  2015-06-11 12:13 ` [PATCH 2/2] perf/x86/intel: Fix PMI handling for Intel PT Alexander Shishkin
  0 siblings, 2 replies; 12+ messages in thread
From: Alexander Shishkin @ 2015-06-11 12:13 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar
  Cc: linux-kernel, adrian.hunter, x86, hpa, acme, Alexander Shishkin

Hi Peter and Ingo,

I have here two fixes for 4.1, both of which are basically results
of not testing code properly with the NMI watchdog disabled, so one
lesson learned here.

One bug results in trace data loss and unknown NMI warnings and the
other one is a NULL pointer dereference. The latter might need to be
revisited in the future as we're now handling PT PMIs from
intel_pmu_handle_irq() and we might want to change that. On the other
hand, PT and x86_pmu share the PMI status register, so from that angle
it kind of makes sense. But until such time as we decide to sort it
out, the proposed fix should do nicely.

Alexander Shishkin (2):
  perf/x86/intel/bts: Fix DS area sharing with x86_pmu events
  perf/x86/intel: Fix PMI handling for Intel PT

 arch/x86/kernel/cpu/perf_event.c           | 70 +++++++++++++++++++++---------
 arch/x86/kernel/cpu/perf_event.h           |  4 ++
 arch/x86/kernel/cpu/perf_event_intel_bts.c |  9 ++++
 3 files changed, 63 insertions(+), 20 deletions(-)

-- 
2.1.4


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/2] perf/x86/intel/bts: Fix DS area sharing with x86_pmu events
  2015-06-11 12:13 [PATCH 0/2] perf/x86/intel: Fixes for PT and BTS Alexander Shishkin
@ 2015-06-11 12:13 ` Alexander Shishkin
  2015-06-19 17:58   ` [tip:perf/urgent] " tip-bot for Alexander Shishkin
  2015-06-11 12:13 ` [PATCH 2/2] perf/x86/intel: Fix PMI handling for Intel PT Alexander Shishkin
  1 sibling, 1 reply; 12+ messages in thread
From: Alexander Shishkin @ 2015-06-11 12:13 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar
  Cc: linux-kernel, adrian.hunter, x86, hpa, acme, Alexander Shishkin

Currently, intel_bts driver relies on the DS area allocated by the x86_pmu
code in its event_init() path, which is a bug: creating a BTS event while
no x86_pmu events are present results in a NULL pointer dereference.

The same DS area is also used by pebs sampling, which makes it quite a bit
trickier to have a separate one for intel_bts' purposes.

This patch makes intel_bts driver use the same DS allocation and reference
counting code as x86_pmu to make sure it is always present when either
intel_bts or x86_pmu need it.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event.c           | 52 +++++++++++++++++++-----------
 arch/x86/kernel/cpu/perf_event.h           |  4 +++
 arch/x86/kernel/cpu/perf_event_intel_bts.c |  9 ++++++
 3 files changed, 46 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index dbe3328f8a..8487714da2 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -270,11 +270,7 @@ msr_fail:
 
 static void hw_perf_event_destroy(struct perf_event *event)
 {
-	if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
-		release_pmc_hardware();
-		release_ds_buffers();
-		mutex_unlock(&pmc_reserve_mutex);
-	}
+	x86_release_hardware();
 }
 
 void hw_perf_lbr_event_destroy(struct perf_event *event)
@@ -324,6 +320,35 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
 	return x86_pmu_extra_regs(val, event);
 }
 
+int x86_reserve_hardware(void)
+{
+	int err = 0;
+
+	if (!atomic_inc_not_zero(&active_events)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_read(&active_events) == 0) {
+			if (!reserve_pmc_hardware())
+				err = -EBUSY;
+			else
+				reserve_ds_buffers();
+		}
+		if (!err)
+			atomic_inc(&active_events);
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+
+	return err;
+}
+
+void x86_release_hardware(void)
+{
+	if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
+		release_pmc_hardware();
+		release_ds_buffers();
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+}
+
 /*
  * Check if we can create event of a certain type (that no conflicting events
  * are present).
@@ -336,9 +361,10 @@ int x86_add_exclusive(unsigned int what)
 		return 0;
 
 	mutex_lock(&pmc_reserve_mutex);
-	for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++)
+	for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
 		if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i]))
 			goto out;
+	}
 
 	atomic_inc(&x86_pmu.lbr_exclusive[what]);
 	ret = 0;
@@ -527,19 +553,7 @@ static int __x86_pmu_event_init(struct perf_event *event)
 	if (!x86_pmu_initialized())
 		return -ENODEV;
 
-	err = 0;
-	if (!atomic_inc_not_zero(&active_events)) {
-		mutex_lock(&pmc_reserve_mutex);
-		if (atomic_read(&active_events) == 0) {
-			if (!reserve_pmc_hardware())
-				err = -EBUSY;
-			else
-				reserve_ds_buffers();
-		}
-		if (!err)
-			atomic_inc(&active_events);
-		mutex_unlock(&pmc_reserve_mutex);
-	}
+	err = x86_reserve_hardware();
 	if (err)
 		return err;
 
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 804b47eb9c..0cd854671b 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -702,6 +702,10 @@ int x86_add_exclusive(unsigned int what);
 
 void x86_del_exclusive(unsigned int what);
 
+int x86_reserve_hardware(void);
+
+void x86_release_hardware(void);
+
 void hw_perf_lbr_event_destroy(struct perf_event *event);
 
 int x86_setup_perfctr(struct perf_event *event);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c
index 2ca05b4461..57479ed49e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_bts.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c
@@ -480,17 +480,26 @@ static int bts_event_add(struct perf_event *event, int mode)
 
 static void bts_event_destroy(struct perf_event *event)
 {
+	x86_release_hardware();
 	x86_del_exclusive(x86_lbr_exclusive_bts);
 }
 
 static int bts_event_init(struct perf_event *event)
 {
+	int ret;
+
 	if (event->attr.type != bts_pmu.type)
 		return -ENOENT;
 
 	if (x86_add_exclusive(x86_lbr_exclusive_bts))
 		return -EBUSY;
 
+	ret = x86_reserve_hardware();
+	if (ret) {
+		x86_del_exclusive(x86_lbr_exclusive_bts);
+		return ret;
+	}
+
 	event->destroy = bts_event_destroy;
 
 	return 0;
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/2] perf/x86/intel: Fix PMI handling for Intel PT
  2015-06-11 12:13 [PATCH 0/2] perf/x86/intel: Fixes for PT and BTS Alexander Shishkin
  2015-06-11 12:13 ` [PATCH 1/2] perf/x86/intel/bts: Fix DS area sharing with x86_pmu events Alexander Shishkin
@ 2015-06-11 12:13 ` Alexander Shishkin
  2015-06-11 14:00   ` Peter Zijlstra
  1 sibling, 1 reply; 12+ messages in thread
From: Alexander Shishkin @ 2015-06-11 12:13 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar
  Cc: linux-kernel, adrian.hunter, x86, hpa, acme, Alexander Shishkin

Since Intel PT is a separate pmu and is not using any of the x86_pmu
code paths, which means in particular that active_events counter remains
intact when new PT events are created. However, PT uses x86_pmu PMI
handler for its PMI handling needs. The problem here is that the latter
checks active_events and in case of it being zero, exits without calling
the actual x86_pmu.handle_nmi(), which results in unknown NMI errors and
massive data loss for PT.

The effect is not visible if there are other perf events in the system
at the same time that keep active_events counter non-zero, for instance
if the NMI watchdog is running, so one needs to disable it to reproduce
the problem.

However, we have the lbr_exclusive[] counter for PT, so we can check that
for presence of PT events.

This patch adjusts perf_event_nmi_handler() to check for presence of
events in lbr_exclusive counter for PT in addition to checking
active_events counter.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 8487714da2..cf066fc38c 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1426,7 +1426,23 @@ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 	u64 finish_clock;
 	int ret;
 
-	if (!atomic_read(&active_events))
+	/*
+	 * Intel PT is a separate PMUs, it doesn't increment active_events
+	 * nor does it use resources associated with it, such as DS area.
+	 * However, it still uses the same PMI handler, so here we need
+	 * to make sure to call it *also* if any PT events are present.
+	 *
+	 * BTS events currently increment active_events implicitly through
+	 * x86_reserve_hardware(), where it acts as DS area reference
+	 * counter, so no need to check x86_lbr_exclusive_bts counter here;
+	 * otherwise we'd have to do the same for BTS.
+	 *
+	 * Theoretically, they could be made into separate PMI handlers, but
+	 * that can create additional challenges as PT uses the same PMI status
+	 * register as x86_pmu.
+	 */
+	if (!atomic_read(&active_events) &&
+	    !atomic_read(&x86_pmu.lbr_exclusive[x86_lbr_exclusive_pt]))
 		return NMI_DONE;
 
 	start_clock = sched_clock();
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] perf/x86/intel: Fix PMI handling for Intel PT
  2015-06-11 12:13 ` [PATCH 2/2] perf/x86/intel: Fix PMI handling for Intel PT Alexander Shishkin
@ 2015-06-11 14:00   ` Peter Zijlstra
  2015-06-12  9:08     ` Alexander Shishkin
  0 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2015-06-11 14:00 UTC (permalink / raw)
  To: Alexander Shishkin
  Cc: Ingo Molnar, linux-kernel, adrian.hunter, x86, hpa, acme

On Thu, Jun 11, 2015 at 03:13:57PM +0300, Alexander Shishkin wrote:
> @@ -1426,7 +1426,23 @@ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
>  	u64 finish_clock;
>  	int ret;
>  
> -	if (!atomic_read(&active_events))
> +	/*
> +	 * Intel PT is a separate PMUs, it doesn't increment active_events
> +	 * nor does it use resources associated with it, such as DS area.
> +	 * However, it still uses the same PMI handler, so here we need
> +	 * to make sure to call it *also* if any PT events are present.
> +	 *
> +	 * BTS events currently increment active_events implicitly through
> +	 * x86_reserve_hardware(), where it acts as DS area reference
> +	 * counter, so no need to check x86_lbr_exclusive_bts counter here;
> +	 * otherwise we'd have to do the same for BTS.
> +	 *
> +	 * Theoretically, they could be made into separate PMI handlers, but
> +	 * that can create additional challenges as PT uses the same PMI status
> +	 * register as x86_pmu.
> +	 */
> +	if (!atomic_read(&active_events) &&
> +	    !atomic_read(&x86_pmu.lbr_exclusive[x86_lbr_exclusive_pt]))
>  		return NMI_DONE;
>  

Urgh, sad. That's two cache misses instead of one. Cant we somehow keep
that a single value to read?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] perf/x86/intel: Fix PMI handling for Intel PT
  2015-06-11 14:00   ` Peter Zijlstra
@ 2015-06-12  9:08     ` Alexander Shishkin
  2015-06-12 13:09       ` Peter Zijlstra
  2015-06-19 17:58       ` [tip:perf/urgent] " tip-bot for Alexander Shishkin
  0 siblings, 2 replies; 12+ messages in thread
From: Alexander Shishkin @ 2015-06-12  9:08 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, linux-kernel, adrian.hunter, x86, hpa, acme

Peter Zijlstra <peterz@infradead.org> writes:

> On Thu, Jun 11, 2015 at 03:13:57PM +0300, Alexander Shishkin wrote:
>> @@ -1426,7 +1426,23 @@ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
>>  	u64 finish_clock;
>>  	int ret;
>>  
>> -	if (!atomic_read(&active_events))
>> +	/*
>> +	 * Intel PT is a separate PMUs, it doesn't increment active_events
>> +	 * nor does it use resources associated with it, such as DS area.
>> +	 * However, it still uses the same PMI handler, so here we need
>> +	 * to make sure to call it *also* if any PT events are present.
>> +	 *
>> +	 * BTS events currently increment active_events implicitly through
>> +	 * x86_reserve_hardware(), where it acts as DS area reference
>> +	 * counter, so no need to check x86_lbr_exclusive_bts counter here;
>> +	 * otherwise we'd have to do the same for BTS.
>> +	 *
>> +	 * Theoretically, they could be made into separate PMI handlers, but
>> +	 * that can create additional challenges as PT uses the same PMI status
>> +	 * register as x86_pmu.
>> +	 */
>> +	if (!atomic_read(&active_events) &&
>> +	    !atomic_read(&x86_pmu.lbr_exclusive[x86_lbr_exclusive_pt]))
>>  		return NMI_DONE;
>>  
>
> Urgh, sad. That's two cache misses instead of one. Cant we somehow keep
> that a single value to read?

Indeed. One thing here is that active_events actually serves dual
purpose and does reference counting for
{reserve,release}_{ds_buffers,pmc_hardware}(). We can split it in two
and then we can add PT events to active_events as well, so we can make
do with just one atomic_read() here. How about the patch below instead
of the previously posted one?

>From 6f0e86cb0bd556cbfaa5ada6a8ef3aa0e2a6b60a Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Tue, 9 Jun 2015 13:03:26 +0300
Subject: [PATCH] perf/x86/intel: Fix PMI handling for Intel PT

Since Intel PT is a separate pmu and is not using any of the x86_pmu
code paths, which means in particular that active_events counter remains
intact when new PT events are created. However, PT uses x86_pmu PMI
handler for its PMI handling needs. The problem here is that the latter
checks active_events and in case of it being zero, exits without calling
the actual x86_pmu.handle_nmi(), which results in unknown NMI errors and
massive data loss for PT.

The effect is not visible if there are other perf events in the system
at the same time that keep active_events counter non-zero, for instance
if the NMI watchdog is running, so one needs to disable it to reproduce
the problem.

At the same time, the active_events counter besides doing what the name
suggests also implicitly serves as a pmc hardware and DS area reference
counter.

This patch adds a separate reference counter for the pmc hardware, leaving
active_events for actually counting the events and makes sure it also
counts PT and BTS events.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event.c | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 8487714da2..3b0257e222 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -135,6 +135,7 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
 }
 
 static atomic_t active_events;
+static atomic_t pmc_refcount;
 static DEFINE_MUTEX(pmc_reserve_mutex);
 
 #ifdef CONFIG_X86_LOCAL_APIC
@@ -271,6 +272,7 @@ msr_fail:
 static void hw_perf_event_destroy(struct perf_event *event)
 {
 	x86_release_hardware();
+	atomic_dec(&active_events);
 }
 
 void hw_perf_lbr_event_destroy(struct perf_event *event)
@@ -324,16 +326,16 @@ int x86_reserve_hardware(void)
 {
 	int err = 0;
 
-	if (!atomic_inc_not_zero(&active_events)) {
+	if (!atomic_inc_not_zero(&pmc_refcount)) {
 		mutex_lock(&pmc_reserve_mutex);
-		if (atomic_read(&active_events) == 0) {
+		if (atomic_read(&pmc_refcount) == 0) {
 			if (!reserve_pmc_hardware())
 				err = -EBUSY;
 			else
 				reserve_ds_buffers();
 		}
 		if (!err)
-			atomic_inc(&active_events);
+			atomic_inc(&pmc_refcount);
 		mutex_unlock(&pmc_reserve_mutex);
 	}
 
@@ -342,7 +344,7 @@ int x86_reserve_hardware(void)
 
 void x86_release_hardware(void)
 {
-	if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
+	if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) {
 		release_pmc_hardware();
 		release_ds_buffers();
 		mutex_unlock(&pmc_reserve_mutex);
@@ -371,12 +373,24 @@ int x86_add_exclusive(unsigned int what)
 
 out:
 	mutex_unlock(&pmc_reserve_mutex);
+
+	/*
+	 * Assuming that all exclusive events will share the PMI handler
+	 * (which checks active_events for whether there is work to do),
+	 * we can bump active_events counter right here, except for
+	 * x86_lbr_exclusive_lbr events that go through x86_pmu_event_init()
+	 * path, which already bumps active_events for them.
+	 */
+	if (!ret && what != x86_lbr_exclusive_lbr)
+		atomic_inc(&active_events);
+
 	return ret;
 }
 
 void x86_del_exclusive(unsigned int what)
 {
 	atomic_dec(&x86_pmu.lbr_exclusive[what]);
+	atomic_dec(&active_events);
 }
 
 int x86_setup_perfctr(struct perf_event *event)
@@ -557,6 +571,7 @@ static int __x86_pmu_event_init(struct perf_event *event)
 	if (err)
 		return err;
 
+	atomic_inc(&active_events);
 	event->destroy = hw_perf_event_destroy;
 
 	event->hw.idx = -1;
@@ -1426,6 +1441,10 @@ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 	u64 finish_clock;
 	int ret;
 
+	/*
+	 * All PMUs/events that share this PMI handler should make sure to
+	 * increment active_events for their events.
+	 */
 	if (!atomic_read(&active_events))
 		return NMI_DONE;
 
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] perf/x86/intel: Fix PMI handling for Intel PT
  2015-06-12  9:08     ` Alexander Shishkin
@ 2015-06-12 13:09       ` Peter Zijlstra
  2015-06-19 17:58       ` [tip:perf/urgent] " tip-bot for Alexander Shishkin
  1 sibling, 0 replies; 12+ messages in thread
From: Peter Zijlstra @ 2015-06-12 13:09 UTC (permalink / raw)
  To: Alexander Shishkin
  Cc: Ingo Molnar, linux-kernel, adrian.hunter, x86, hpa, acme

On Fri, Jun 12, 2015 at 12:08:35PM +0300, Alexander Shishkin wrote:
> From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
> Date: Tue, 9 Jun 2015 13:03:26 +0300
> Subject: [PATCH] perf/x86/intel: Fix PMI handling for Intel PT
> 
> Since Intel PT is a separate pmu and is not using any of the x86_pmu
> code paths, which means in particular that active_events counter remains
> intact when new PT events are created. However, PT uses x86_pmu PMI
> handler for its PMI handling needs. The problem here is that the latter
> checks active_events and in case of it being zero, exits without calling
> the actual x86_pmu.handle_nmi(), which results in unknown NMI errors and
> massive data loss for PT.
> 
> The effect is not visible if there are other perf events in the system
> at the same time that keep active_events counter non-zero, for instance
> if the NMI watchdog is running, so one needs to disable it to reproduce
> the problem.
> 
> At the same time, the active_events counter besides doing what the name
> suggests also implicitly serves as a pmc hardware and DS area reference
> counter.
> 
> This patch adds a separate reference counter for the pmc hardware, leaving
> active_events for actually counting the events and makes sure it also
> counts PT and BTS events.
> 
> Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>

Thanks.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [tip:perf/urgent] perf/x86/intel/bts: Fix DS area sharing with x86_pmu events
  2015-06-11 12:13 ` [PATCH 1/2] perf/x86/intel/bts: Fix DS area sharing with x86_pmu events Alexander Shishkin
@ 2015-06-19 17:58   ` tip-bot for Alexander Shishkin
  0 siblings, 0 replies; 12+ messages in thread
From: tip-bot for Alexander Shishkin @ 2015-06-19 17:58 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: dvlasenk, luto, peterz, mingo, akpm, oleg, linux-kernel, brgerst,
	hpa, alexander.shishkin, tglx, bp, torvalds

Commit-ID:  6b099d9b040b0f3d0aec05b560d7caf879af5077
Gitweb:     http://git.kernel.org/tip/6b099d9b040b0f3d0aec05b560d7caf879af5077
Author:     Alexander Shishkin <alexander.shishkin@linux.intel.com>
AuthorDate: Thu, 11 Jun 2015 15:13:56 +0300
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Fri, 19 Jun 2015 09:38:47 +0200

perf/x86/intel/bts: Fix DS area sharing with x86_pmu events

Currently, the intel_bts driver relies on the DS area allocated by the x86_pmu
code in its event_init() path, which is a bug: creating a BTS event while
no x86_pmu events are present results in a NULL pointer dereference.

The same DS area is also used by PEBS sampling, which makes it quite a bit
trickier to have a separate one for intel_bts' purposes.

This patch makes intel_bts driver use the same DS allocation and reference
counting code as x86_pmu to make sure it is always present when either
intel_bts or x86_pmu need it.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Link: http://lkml.kernel.org/r/1434024837-9916-2-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.c           | 52 +++++++++++++++++++-----------
 arch/x86/kernel/cpu/perf_event.h           |  4 +++
 arch/x86/kernel/cpu/perf_event_intel_bts.c |  9 ++++++
 3 files changed, 46 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 4f7001f..aa4e3a7 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -270,11 +270,7 @@ msr_fail:
 
 static void hw_perf_event_destroy(struct perf_event *event)
 {
-	if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
-		release_pmc_hardware();
-		release_ds_buffers();
-		mutex_unlock(&pmc_reserve_mutex);
-	}
+	x86_release_hardware();
 }
 
 void hw_perf_lbr_event_destroy(struct perf_event *event)
@@ -324,6 +320,35 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
 	return x86_pmu_extra_regs(val, event);
 }
 
+int x86_reserve_hardware(void)
+{
+	int err = 0;
+
+	if (!atomic_inc_not_zero(&active_events)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_read(&active_events) == 0) {
+			if (!reserve_pmc_hardware())
+				err = -EBUSY;
+			else
+				reserve_ds_buffers();
+		}
+		if (!err)
+			atomic_inc(&active_events);
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+
+	return err;
+}
+
+void x86_release_hardware(void)
+{
+	if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
+		release_pmc_hardware();
+		release_ds_buffers();
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+}
+
 /*
  * Check if we can create event of a certain type (that no conflicting events
  * are present).
@@ -336,9 +361,10 @@ int x86_add_exclusive(unsigned int what)
 		return 0;
 
 	mutex_lock(&pmc_reserve_mutex);
-	for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++)
+	for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
 		if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i]))
 			goto out;
+	}
 
 	atomic_inc(&x86_pmu.lbr_exclusive[what]);
 	ret = 0;
@@ -527,19 +553,7 @@ static int __x86_pmu_event_init(struct perf_event *event)
 	if (!x86_pmu_initialized())
 		return -ENODEV;
 
-	err = 0;
-	if (!atomic_inc_not_zero(&active_events)) {
-		mutex_lock(&pmc_reserve_mutex);
-		if (atomic_read(&active_events) == 0) {
-			if (!reserve_pmc_hardware())
-				err = -EBUSY;
-			else
-				reserve_ds_buffers();
-		}
-		if (!err)
-			atomic_inc(&active_events);
-		mutex_unlock(&pmc_reserve_mutex);
-	}
+	err = x86_reserve_hardware();
 	if (err)
 		return err;
 
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index ef78516..f068695 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -703,6 +703,10 @@ int x86_add_exclusive(unsigned int what);
 
 void x86_del_exclusive(unsigned int what);
 
+int x86_reserve_hardware(void);
+
+void x86_release_hardware(void);
+
 void hw_perf_lbr_event_destroy(struct perf_event *event);
 
 int x86_setup_perfctr(struct perf_event *event);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c
index ac1f0c5..7795f3f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_bts.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c
@@ -483,17 +483,26 @@ static int bts_event_add(struct perf_event *event, int mode)
 
 static void bts_event_destroy(struct perf_event *event)
 {
+	x86_release_hardware();
 	x86_del_exclusive(x86_lbr_exclusive_bts);
 }
 
 static int bts_event_init(struct perf_event *event)
 {
+	int ret;
+
 	if (event->attr.type != bts_pmu.type)
 		return -ENOENT;
 
 	if (x86_add_exclusive(x86_lbr_exclusive_bts))
 		return -EBUSY;
 
+	ret = x86_reserve_hardware();
+	if (ret) {
+		x86_del_exclusive(x86_lbr_exclusive_bts);
+		return ret;
+	}
+
 	event->destroy = bts_event_destroy;
 
 	return 0;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [tip:perf/urgent] perf/x86/intel: Fix PMI handling for Intel PT
  2015-06-12  9:08     ` Alexander Shishkin
  2015-06-12 13:09       ` Peter Zijlstra
@ 2015-06-19 17:58       ` tip-bot for Alexander Shishkin
  2015-06-24 12:21         ` Peter Zijlstra
  2015-06-24 13:16         ` Peter Zijlstra
  1 sibling, 2 replies; 12+ messages in thread
From: tip-bot for Alexander Shishkin @ 2015-06-19 17:58 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: peterz, oleg, dvlasenk, tglx, akpm, mingo, torvalds,
	linux-kernel, luto, brgerst, bp, hpa, alexander.shishkin

Commit-ID:  1b7b938f181742f899a2f31c280f79dabef6ddb6
Gitweb:     http://git.kernel.org/tip/1b7b938f181742f899a2f31c280f79dabef6ddb6
Author:     Alexander Shishkin <alexander.shishkin@linux.intel.com>
AuthorDate: Tue, 9 Jun 2015 13:03:26 +0300
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Fri, 19 Jun 2015 09:38:47 +0200

perf/x86/intel: Fix PMI handling for Intel PT

Intel PT is a separate PMU and it is not using any of the x86_pmu
code paths, which means in particular that the active_events counter
remains intact when new PT events are created.

However, PT uses the generic x86_pmu PMI handler for its PMI handling needs.

The problem here is that the latter checks active_events and in case of it
being zero, exits without calling the actual x86_pmu.handle_nmi(), which
results in unknown NMI errors and massive data loss for PT.

The effect is not visible if there are other perf events in the system
at the same time that keep active_events counter non-zero, for instance
if the NMI watchdog is running, so one needs to disable it to reproduce
the problem.

At the same time, the active_events counter besides doing what the name
suggests also implicitly serves as a PMC hardware and DS area reference
counter.

This patch adds a separate reference counter for the PMC hardware, leaving
active_events for actually counting the events and makes sure it also
counts PT and BTS events.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Link: http://lkml.kernel.org/r/87k2v92t0s.fsf@ashishki-desk.ger.corp.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.c | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index aa4e3a7..6be186c 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -135,6 +135,7 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
 }
 
 static atomic_t active_events;
+static atomic_t pmc_refcount;
 static DEFINE_MUTEX(pmc_reserve_mutex);
 
 #ifdef CONFIG_X86_LOCAL_APIC
@@ -271,6 +272,7 @@ msr_fail:
 static void hw_perf_event_destroy(struct perf_event *event)
 {
 	x86_release_hardware();
+	atomic_dec(&active_events);
 }
 
 void hw_perf_lbr_event_destroy(struct perf_event *event)
@@ -324,16 +326,16 @@ int x86_reserve_hardware(void)
 {
 	int err = 0;
 
-	if (!atomic_inc_not_zero(&active_events)) {
+	if (!atomic_inc_not_zero(&pmc_refcount)) {
 		mutex_lock(&pmc_reserve_mutex);
-		if (atomic_read(&active_events) == 0) {
+		if (atomic_read(&pmc_refcount) == 0) {
 			if (!reserve_pmc_hardware())
 				err = -EBUSY;
 			else
 				reserve_ds_buffers();
 		}
 		if (!err)
-			atomic_inc(&active_events);
+			atomic_inc(&pmc_refcount);
 		mutex_unlock(&pmc_reserve_mutex);
 	}
 
@@ -342,7 +344,7 @@ int x86_reserve_hardware(void)
 
 void x86_release_hardware(void)
 {
-	if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
+	if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) {
 		release_pmc_hardware();
 		release_ds_buffers();
 		mutex_unlock(&pmc_reserve_mutex);
@@ -371,12 +373,24 @@ int x86_add_exclusive(unsigned int what)
 
 out:
 	mutex_unlock(&pmc_reserve_mutex);
+
+	/*
+	 * Assuming that all exclusive events will share the PMI handler
+	 * (which checks active_events for whether there is work to do),
+	 * we can bump active_events counter right here, except for
+	 * x86_lbr_exclusive_lbr events that go through x86_pmu_event_init()
+	 * path, which already bumps active_events for them.
+	 */
+	if (!ret && what != x86_lbr_exclusive_lbr)
+		atomic_inc(&active_events);
+
 	return ret;
 }
 
 void x86_del_exclusive(unsigned int what)
 {
 	atomic_dec(&x86_pmu.lbr_exclusive[what]);
+	atomic_dec(&active_events);
 }
 
 int x86_setup_perfctr(struct perf_event *event)
@@ -557,6 +571,7 @@ static int __x86_pmu_event_init(struct perf_event *event)
 	if (err)
 		return err;
 
+	atomic_inc(&active_events);
 	event->destroy = hw_perf_event_destroy;
 
 	event->hw.idx = -1;
@@ -1429,6 +1444,10 @@ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 	u64 finish_clock;
 	int ret;
 
+	/*
+	 * All PMUs/events that share this PMI handler should make sure to
+	 * increment active_events for their events.
+	 */
 	if (!atomic_read(&active_events))
 		return NMI_DONE;
 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [tip:perf/urgent] perf/x86/intel: Fix PMI handling for Intel PT
  2015-06-19 17:58       ` [tip:perf/urgent] " tip-bot for Alexander Shishkin
@ 2015-06-24 12:21         ` Peter Zijlstra
  2015-06-24 13:16         ` Peter Zijlstra
  1 sibling, 0 replies; 12+ messages in thread
From: Peter Zijlstra @ 2015-06-24 12:21 UTC (permalink / raw)
  To: linux-kernel, torvalds, mingo, alexander.shishkin, hpa, bp,
	brgerst, luto, oleg, akpm, tglx, dvlasenk
  Cc: linux-tip-commits

On Fri, Jun 19, 2015 at 10:58:36AM -0700, tip-bot for Alexander Shishkin wrote:
> Commit-ID:  1b7b938f181742f899a2f31c280f79dabef6ddb6
> Gitweb:     http://git.kernel.org/tip/1b7b938f181742f899a2f31c280f79dabef6ddb6
> Author:     Alexander Shishkin <alexander.shishkin@linux.intel.com>
> AuthorDate: Tue, 9 Jun 2015 13:03:26 +0300
> Committer:  Ingo Molnar <mingo@kernel.org>
> CommitDate: Fri, 19 Jun 2015 09:38:47 +0200
> 
> perf/x86/intel: Fix PMI handling for Intel PT

And the price goes to....

The result is very reproducible and the bisection is good, as reverting
this one commit makes it go away.

I've just finished, have not actually looked at the code yet, I need to
eat first.

---

root@ivb-ep:/usr/src/linux-2.6# perf record -a -e cycles:pp -- make
O=defconfig-build/ clean; make O=defconfig-build/ defconfig; make
O=defconfig-build/ -j80 -s

results in fail:

[   45.618402] Uhhuh. NMI received for unknown reason 21 on CPU 0.
[   45.625020] Do you have a strange power saving mode enabled?
[   45.631336] Dazed and confused, but trying to continue
[   49.736011] Uhhuh. NMI received for unknown reason 31 on CPU 3.
[   49.742627] Do you have a strange power saving mode enabled?
[   49.748954] Dazed and confused, but trying to continue
[   50.317877] Uhhuh. NMI received for unknown reason 31 on CPU 26.
[   50.324591] Do you have a strange power saving mode enabled?
[   50.330917] Dazed and confused, but trying to continue
[   50.950088] Uhhuh. NMI received for unknown reason 31 on CPU 27.
[   50.956804] Do you have a strange power saving mode enabled?
[   50.963129] Dazed and confused, but trying to continue



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [tip:perf/urgent] perf/x86/intel: Fix PMI handling for Intel PT
  2015-06-19 17:58       ` [tip:perf/urgent] " tip-bot for Alexander Shishkin
  2015-06-24 12:21         ` Peter Zijlstra
@ 2015-06-24 13:16         ` Peter Zijlstra
  2015-06-24 14:47           ` Peter Zijlstra
  1 sibling, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2015-06-24 13:16 UTC (permalink / raw)
  To: linux-kernel, torvalds, mingo, alexander.shishkin, hpa, bp,
	brgerst, luto, oleg, akpm, tglx, dvlasenk
  Cc: linux-tip-commits

On Fri, Jun 19, 2015 at 10:58:36AM -0700, tip-bot for Alexander Shishkin wrote:
> @@ -371,12 +373,24 @@ int x86_add_exclusive(unsigned int what)
>  
>  out:
>  	mutex_unlock(&pmc_reserve_mutex);
> +
> +	/*
> +	 * Assuming that all exclusive events will share the PMI handler
> +	 * (which checks active_events for whether there is work to do),
> +	 * we can bump active_events counter right here, except for
> +	 * x86_lbr_exclusive_lbr events that go through x86_pmu_event_init()
> +	 * path, which already bumps active_events for them.
> +	 */
> +	if (!ret && what != x86_lbr_exclusive_lbr)
> +		atomic_inc(&active_events);
> +
>  	return ret;
>  }
>  
>  void x86_del_exclusive(unsigned int what)
>  {
>  	atomic_dec(&x86_pmu.lbr_exclusive[what]);
> +	atomic_dec(&active_events);
>  }
>  

This, conditional inc, unconditional dec.

This leads to active_events == 0 even though there's still the NMI
watchdog active generating NMIs.

I would say, drop the condition on inc, double inc isn't a problem as
long as we match with a double dec and that results in simpler code too.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [tip:perf/urgent] perf/x86/intel: Fix PMI handling for Intel PT
  2015-06-24 13:16         ` Peter Zijlstra
@ 2015-06-24 14:47           ` Peter Zijlstra
  2015-07-01  6:57             ` [tip:perf/urgent] perf/x86: Fix 'active_events' imbalance tip-bot for Peter Zijlstra
  0 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2015-06-24 14:47 UTC (permalink / raw)
  To: linux-kernel, torvalds, mingo, alexander.shishkin, hpa, bp,
	brgerst, luto, oleg, akpm, tglx, dvlasenk
  Cc: linux-tip-commits

Subject: perf,x86: Fix active_events imbalance

Commit 1b7b938f1817 ("perf/x86/intel: Fix PMI handling for Intel PT")
conditionally increments active_events in x86_add_exclusive() but
unconditionally decrements in x86_del_exclusive().

These extra decrements can lead to the situation where active_events is
zero and thus the PMI handler is 'disabled' while we have active events
on the PMU generating PMIs.

This leads to a truckload of:

  Uhhuh. NMI received for unknown reason 21 on CPU 28.
  Do you have a strange power saving mode enabled?
  Dazed and confused, but trying to continue

messages and generally messes up perf.

Remove the condition on the increment, double increment balanced by a
double decrement is perfectly fine.

Restructure the code a little bit to make the unconditional inc a bit
more natural.

Fixes: 1b7b938f1817 ("perf/x86/intel: Fix PMI handling for Intel PT")
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/kernel/cpu/perf_event.c |   36 +++++++++++++-----------------------
 1 file changed, 13 insertions(+), 23 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 5801a14..3658de4 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -357,34 +357,24 @@ void x86_release_hardware(void)
  */
 int x86_add_exclusive(unsigned int what)
 {
-	int ret = -EBUSY, i;
-
-	if (atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what]))
-		return 0;
+	int i;
 
-	mutex_lock(&pmc_reserve_mutex);
-	for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
-		if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i]))
-			goto out;
+	if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) {
+		mutex_lock(&pmc_reserve_mutex);
+		for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
+			if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i]))
+				goto fail_unlock;
+		}
+		atomic_inc(&x86_pmu.lbr_exclusive[what]);
+		mutex_unlock(&pmc_reserve_mutex);
 	}
 
-	atomic_inc(&x86_pmu.lbr_exclusive[what]);
-	ret = 0;
+	atomic_inc(&active_events);
+	return 0;
 
-out:
+fail_unlock:
 	mutex_unlock(&pmc_reserve_mutex);
-
-	/*
-	 * Assuming that all exclusive events will share the PMI handler
-	 * (which checks active_events for whether there is work to do),
-	 * we can bump active_events counter right here, except for
-	 * x86_lbr_exclusive_lbr events that go through x86_pmu_event_init()
-	 * path, which already bumps active_events for them.
-	 */
-	if (!ret && what != x86_lbr_exclusive_lbr)
-		atomic_inc(&active_events);
-
-	return ret;
+	return -EBUSY;
 }
 
 void x86_del_exclusive(unsigned int what)

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [tip:perf/urgent] perf/x86: Fix 'active_events' imbalance
  2015-06-24 14:47           ` Peter Zijlstra
@ 2015-07-01  6:57             ` tip-bot for Peter Zijlstra
  0 siblings, 0 replies; 12+ messages in thread
From: tip-bot for Peter Zijlstra @ 2015-07-01  6:57 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, hpa, torvalds, tglx, mingo, bp, akpm, peterz

Commit-ID:  93472aff802fd7b61f2209335207e9bd793012f7
Gitweb:     http://git.kernel.org/tip/93472aff802fd7b61f2209335207e9bd793012f7
Author:     Peter Zijlstra <peterz@infradead.org>
AuthorDate: Wed, 24 Jun 2015 16:47:50 +0200
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Tue, 30 Jun 2015 13:08:46 +0200

perf/x86: Fix 'active_events' imbalance

Commit 1b7b938f1817 ("perf/x86/intel: Fix PMI handling for Intel PT") conditionally
increments active_events in x86_add_exclusive() but unconditionally decrements in
x86_del_exclusive().

These extra decrements can lead to the situation where
active_events is zero and thus the PMI handler is 'disabled'
while we have active events on the PMU generating PMIs.

This leads to a truckload of:

  Uhhuh. NMI received for unknown reason 21 on CPU 28.
  Do you have a strange power saving mode enabled?
  Dazed and confused, but trying to continue

messages and generally messes up perf.

Remove the condition on the increment, double increment balanced
by a double decrement is perfectly fine.

Restructure the code a little bit to make the unconditional inc
a bit more natural.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: alexander.shishkin@linux.intel.com
Cc: brgerst@gmail.com
Cc: dvlasenk@redhat.com
Cc: luto@amacapital.net
Cc: oleg@redhat.com
Fixes: 1b7b938f1817 ("perf/x86/intel: Fix PMI handling for Intel PT")
Link: http://lkml.kernel.org/r/20150624144750.GJ18673@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.c | 36 +++++++++++++-----------------------
 1 file changed, 13 insertions(+), 23 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 5801a14..3658de4 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -357,34 +357,24 @@ void x86_release_hardware(void)
  */
 int x86_add_exclusive(unsigned int what)
 {
-	int ret = -EBUSY, i;
-
-	if (atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what]))
-		return 0;
+	int i;
 
-	mutex_lock(&pmc_reserve_mutex);
-	for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
-		if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i]))
-			goto out;
+	if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) {
+		mutex_lock(&pmc_reserve_mutex);
+		for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
+			if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i]))
+				goto fail_unlock;
+		}
+		atomic_inc(&x86_pmu.lbr_exclusive[what]);
+		mutex_unlock(&pmc_reserve_mutex);
 	}
 
-	atomic_inc(&x86_pmu.lbr_exclusive[what]);
-	ret = 0;
+	atomic_inc(&active_events);
+	return 0;
 
-out:
+fail_unlock:
 	mutex_unlock(&pmc_reserve_mutex);
-
-	/*
-	 * Assuming that all exclusive events will share the PMI handler
-	 * (which checks active_events for whether there is work to do),
-	 * we can bump active_events counter right here, except for
-	 * x86_lbr_exclusive_lbr events that go through x86_pmu_event_init()
-	 * path, which already bumps active_events for them.
-	 */
-	if (!ret && what != x86_lbr_exclusive_lbr)
-		atomic_inc(&active_events);
-
-	return ret;
+	return -EBUSY;
 }
 
 void x86_del_exclusive(unsigned int what)

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2015-07-01  6:58 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-11 12:13 [PATCH 0/2] perf/x86/intel: Fixes for PT and BTS Alexander Shishkin
2015-06-11 12:13 ` [PATCH 1/2] perf/x86/intel/bts: Fix DS area sharing with x86_pmu events Alexander Shishkin
2015-06-19 17:58   ` [tip:perf/urgent] " tip-bot for Alexander Shishkin
2015-06-11 12:13 ` [PATCH 2/2] perf/x86/intel: Fix PMI handling for Intel PT Alexander Shishkin
2015-06-11 14:00   ` Peter Zijlstra
2015-06-12  9:08     ` Alexander Shishkin
2015-06-12 13:09       ` Peter Zijlstra
2015-06-19 17:58       ` [tip:perf/urgent] " tip-bot for Alexander Shishkin
2015-06-24 12:21         ` Peter Zijlstra
2015-06-24 13:16         ` Peter Zijlstra
2015-06-24 14:47           ` Peter Zijlstra
2015-07-01  6:57             ` [tip:perf/urgent] perf/x86: Fix 'active_events' imbalance tip-bot for Peter Zijlstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).