All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH]  perf_events: AMD event scheduling (v3)
@ 2010-02-08 15:17 Stephane Eranian
  2010-02-10 11:59 ` Peter Zijlstra
  2010-02-26 10:25 ` [tip:perf/core] perf_events, x86: AMD event scheduling tip-bot for Stephane Eranian
  0 siblings, 2 replies; 12+ messages in thread
From: Stephane Eranian @ 2010-02-08 15:17 UTC (permalink / raw)
  To: linux-kernel
  Cc: peterz, mingo, paulus, davem, fweisbec, robert.richter,
	perfmon2-devel, eranian, eranian

	This patch adds correct AMD Northbridge event scheduling.
	It must be applied on top tip-x86 + hw_perf_enable() fix.

	NB events are events measuring L3 cache, Hypertransport
	traffic. They are identified by an event code  >= 0xe0.
	They measure events on the Northbride which is shared
	by all cores on a package. NB events are counted on a
	shared set of counters. When a NB event is programmed
	in a counter, the data actually comes from a shared
	counter. Thus, access to those counters needs to be
	synchronized.

	We implement the synchronization such that no two cores
	can be measuring NB events using the same counters. Thus,
	we maintain a per-NB * allocation table. The available slot
	is propagated using the event_constraint structure.

	The 2nd version takes into account the changes on how
	constraints are stored by the scheduling code.

	The 3rd version fixes formatting issues, code readability
	and one bug in amd_put_event_constraints().

	Signed-off-by: Stephane Eranian <eranian@google.com>

--
 arch/x86/kernel/cpu/perf_event.c |  267 ++++++++++++++++++++++++++++++++++++++-
 kernel/perf_event.c              |    5 
 2 files changed, 269 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index a920f17..29c294c 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -80,6 +80,13 @@ struct event_constraint {
 	int	weight;
 };
 
+struct amd_nb {
+	int nb_id;  /* Northbridge id */
+	int refcnt; /* reference count */
+	struct perf_event *owners[X86_PMC_IDX_MAX];
+	struct event_constraint event_constraints[X86_PMC_IDX_MAX];
+};
+
 struct cpu_hw_events {
 	struct perf_event	*events[X86_PMC_IDX_MAX]; /* in counter order */
 	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
@@ -92,6 +99,7 @@ struct cpu_hw_events {
 	int			assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
 	u64			tags[X86_PMC_IDX_MAX];
 	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
+	struct amd_nb		*amd_nb;
 };
 
 #define __EVENT_CONSTRAINT(c, n, m, w) {\
@@ -153,6 +161,8 @@ struct x86_pmu {
 
 static struct x86_pmu x86_pmu __read_mostly;
 
+static raw_spinlock_t amd_nb_lock;
+
 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
 	.enabled = 1,
 };
@@ -802,7 +812,7 @@ static u64 amd_pmu_event_map(int hw_event)
 
 static u64 amd_pmu_raw_event(u64 hw_event)
 {
-#define K7_EVNTSEL_EVENT_MASK	0x7000000FFULL
+#define K7_EVNTSEL_EVENT_MASK	0xF000000FFULL
 #define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
 #define K7_EVNTSEL_EDGE_MASK	0x000040000ULL
 #define K7_EVNTSEL_INV_MASK	0x000800000ULL
@@ -2196,6 +2206,7 @@ perf_event_nmi_handler(struct notifier_block *self,
 }
 
 static struct event_constraint unconstrained;
+static struct event_constraint emptyconstraint;
 
 static struct event_constraint bts_constraint =
 	EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
@@ -2235,10 +2246,148 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
 	return &unconstrained;
 }
 
+/*
+ * AMD64 events are detected based on their event codes.
+ */
+static inline int amd_is_nb_event(struct hw_perf_event *hwc)
+{
+	u64 val = hwc->config & K7_EVNTSEL_EVENT_MASK;
+	/* event code : bits [35-32] | [7-0] */
+	val = (val >> 24) | (val & 0xff);
+	return val >= 0x0e0;
+}
+
+static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
+				      struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct amd_nb *nb = cpuc->amd_nb;
+	int i;
+
+	/*
+	 * only care about NB events
+	 */
+	if (!(nb && amd_is_nb_event(hwc)))
+		return;
+
+	/*
+	 * need to scan whole list because event may not have
+	 * been assigned during scheduling
+	 *
+	 * no race condition possible because event can only
+	 * be removed on one CPU at a time AND PMU is disabled
+	 * when we come here
+	 */
+	for (i = 0; i < x86_pmu.num_events; i++) {
+		if (nb->owners[i] == event) {
+			cmpxchg(nb->owners+i, event, NULL);
+			break;
+		}
+	}
+}
+
+ /*
+  * AMD64 Northbridge events need special treatment because
+  * counter access needs to be synchronized across all cores
+  * of a package. Refer to BKDG section 3.12
+  *
+  * NB events are events measuring L3 cache, Hypertransport
+  * traffic. They are identified by an event code  >= 0xe0.
+  * They measure events on the Northbride which is shared
+  * by all cores on a package. NB events are counted on a
+  * shared set of counters. When a NB event is programmed
+  * in a counter, the data actually comes from a shared
+  * counter. Thus, access to those counters needs to be
+  * synchronized.
+  * We implement the synchronization such that no two cores
+  * can be measuring NB events using the same counters. Thus,
+  * we maintain a per-NB * allocation table. The available slot
+  * is propagated using the event_constraint structure.
+  *
+  * We provide only one choice for each NB event based on
+  * the fact that only NB events have restrictions. Consequently,
+  * if a counter is available, there is a guarantee the NB event
+  * will be assigned to it. If no slot is available, an empty
+  * constraint is returned and scheduling will evnetually fail
+  * for this event.
+  *
+  * Note that all cores attached the same NB compete for the same
+  * counters to host NB events, this is why we use atomic ops. Some
+  * multi-chip CPUs may have more than one NB.
+  *
+  * Given that resources are allocated (cmpxchg), they must be
+  * eventually freed for others to use. This is accomplished by
+  * calling amd_put_event_constraints().
+  *
+  * Non NB events are not impacted by this restriction.
+  */
 static struct event_constraint *
 amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
 {
-	return &unconstrained;
+	struct hw_perf_event *hwc = &event->hw;
+	struct amd_nb *nb = cpuc->amd_nb;
+	struct perf_event *old = NULL;
+	int max = x86_pmu.num_events;
+	int i, j, k = -1;
+
+	/*
+	 * if not NB event or no NB, then no constraints
+	 */
+	if (!(nb && amd_is_nb_event(hwc)))
+		return &unconstrained;
+
+	/*
+	 * detect if already present, if so reuse
+	 *
+	 * cannot merge with actual allocation
+	 * because of possible holes
+	 *
+	 * event can already be present yet not assigned (in hwc->idx)
+	 * because of successive calls to x86_schedule_events() from
+	 * hw_perf_group_sched_in() without hw_perf_enable()
+	 */
+	for (i = 0; i < max; i++) {
+		/*
+		 * keep track of first free slot
+		 */
+		if (k == -1 && !nb->owners[i])
+			k = i;
+
+		/* already present, reuse */
+		if (nb->owners[i] == event)
+			goto done;
+	}
+	/*
+	 * not present, so grab a new slot
+	 * starting either at:
+	 */
+	if (hwc->idx != -1) {
+		/* previous assignment */
+		i = hwc->idx;
+	} else if (k != -1) {
+		/* start from free slot found */
+		i = k;
+	} else {
+		/*
+		 * event not found, no slot found in
+		 * first pass, try again from the
+		 * beginning
+		 */
+		i = 0;
+	}
+	j = i;
+	do {
+		old = cmpxchg(nb->owners+i, NULL, event);
+		if (!old)
+			break;
+		if (++i == max)
+			i = 0;
+	} while (i != j);
+done:
+	if (!old)
+		return &nb->event_constraints[i];
+
+	return &emptyconstraint;
 }
 
 static int x86_event_sched_in(struct perf_event *event,
@@ -2451,7 +2600,8 @@ static __initconst struct x86_pmu amd_pmu = {
 	.apic			= 1,
 	/* use highest bit to detect overflow */
 	.max_period		= (1ULL << 47) - 1,
-	.get_event_constraints	= amd_get_event_constraints
+	.get_event_constraints	= amd_get_event_constraints,
+	.put_event_constraints	= amd_put_event_constraints
 };
 
 static __init int p6_pmu_init(void)
@@ -2575,6 +2725,94 @@ static __init int intel_pmu_init(void)
 	return 0;
 }
 
+static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
+{
+	struct amd_nb *nb;
+	int i;
+
+	nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL);
+	if (!nb)
+		return NULL;
+
+	memset(nb, 0, sizeof(*nb));
+	nb->nb_id = nb_id;
+
+	/*
+	 * initialize all possible NB constraints
+   */
+	for (i = 0; i < x86_pmu.num_events; i++) {
+		set_bit(i, nb->event_constraints[i].idxmsk);
+		nb->event_constraints[i].weight = 1;
+	}
+	return nb;
+}
+
+static void amd_pmu_cpu_online(int cpu)
+{
+	struct cpu_hw_events *cpu1, *cpu2;
+	struct amd_nb *nb = NULL;
+	int i, nb_id;
+
+	if (boot_cpu_data.x86_max_cores < 2)
+		return;
+
+	/*
+	 * function may be called too early in the
+	 * boot process, in which case nb_id is bogus
+	 *
+	 * for BSP, there is an explicit call from
+	 * amd_pmu_init()
+	 */
+	nb_id = amd_get_nb_id(cpu);
+	if (nb_id == BAD_APICID)
+		return;
+
+	cpu1 = &per_cpu(cpu_hw_events, cpu);
+	cpu1->amd_nb = NULL;
+
+	raw_spin_lock(&amd_nb_lock);
+
+	for_each_online_cpu(i) {
+		cpu2 = &per_cpu(cpu_hw_events, i);
+		nb = cpu2->amd_nb;
+		if (!nb)
+			continue;
+		if (nb->nb_id == nb_id)
+			goto found;
+	}
+
+	nb = amd_alloc_nb(cpu, nb_id);
+	if (!nb) {
+		pr_err("perf_events: failed NB allocation for CPU%d\n", cpu);
+		raw_spin_unlock(&amd_nb_lock);
+		return;
+	}
+found:
+	nb->refcnt++;
+	cpu1->amd_nb = nb;
+
+	raw_spin_unlock(&amd_nb_lock);
+}
+
+static void amd_pmu_cpu_offline(int cpu)
+{
+	struct cpu_hw_events *cpuhw;
+
+	if (boot_cpu_data.x86_max_cores < 2)
+		return;
+
+	cpuhw = &per_cpu(cpu_hw_events, cpu);
+
+	raw_spin_lock(&amd_nb_lock);
+
+	if (--cpuhw->amd_nb->refcnt == 0)
+		kfree(cpuhw->amd_nb);
+
+	cpuhw->amd_nb = NULL;
+
+	raw_spin_unlock(&amd_nb_lock);
+}
+
 static __init int amd_pmu_init(void)
 {
 	/* Performance-monitoring supported from K7 and later: */
@@ -2587,6 +2825,8 @@ static __init int amd_pmu_init(void)
 	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
 	       sizeof(hw_cache_event_ids));
 
+	/* initialize BSP */
+	amd_pmu_cpu_online(smp_processor_id());
 	return 0;
 }
 
@@ -2918,4 +3158,25 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 void hw_perf_event_setup_online(int cpu)
 {
 	init_debug_store_on_cpu(cpu);
+
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_AMD:
+		amd_pmu_cpu_online(cpu);
+		break;
+	default:
+		return;
+	}
+}
+
+void hw_perf_event_setup_offline(int cpu)
+{
+	init_debug_store_on_cpu(cpu);
+
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_AMD:
+		amd_pmu_cpu_offline(cpu);
+		break;
+	default:
+		return;
+	}
 }
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index ab8a312..0092480 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -98,6 +98,7 @@ void __weak hw_perf_enable(void)		{ barrier(); }
 
 void __weak hw_perf_event_setup(int cpu)	{ barrier(); }
 void __weak hw_perf_event_setup_online(int cpu)	{ barrier(); }
+void __weak hw_perf_event_setup_offline(int cpu)	{ barrier(); }
 
 int __weak
 hw_perf_group_sched_in(struct perf_event *group_leader,
@@ -5446,6 +5447,10 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 		perf_event_exit_cpu(cpu);
 		break;
 
+	case CPU_DEAD:
+		hw_perf_event_setup_offline(cpu);
+		break;
+
 	default:
 		break;
 	}

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH]  perf_events: AMD event scheduling (v3)
  2010-02-08 15:17 [PATCH] perf_events: AMD event scheduling (v3) Stephane Eranian
@ 2010-02-10 11:59 ` Peter Zijlstra
  2010-02-10 13:04   ` Stephane Eranian
  2010-02-10 16:09   ` Robert Richter
  2010-02-26 10:25 ` [tip:perf/core] perf_events, x86: AMD event scheduling tip-bot for Stephane Eranian
  1 sibling, 2 replies; 12+ messages in thread
From: Peter Zijlstra @ 2010-02-10 11:59 UTC (permalink / raw)
  To: eranian
  Cc: linux-kernel, mingo, paulus, davem, fweisbec, robert.richter,
	perfmon2-devel, eranian

On Mon, 2010-02-08 at 17:17 +0200, Stephane Eranian wrote:
>         This patch adds correct AMD Northbridge event scheduling.
>         It must be applied on top tip-x86 + hw_perf_enable() fix.
> 
>         NB events are events measuring L3 cache, Hypertransport
>         traffic. They are identified by an event code  >= 0xe0.
>         They measure events on the Northbride which is shared
>         by all cores on a package. NB events are counted on a
>         shared set of counters. When a NB event is programmed
>         in a counter, the data actually comes from a shared
>         counter. Thus, access to those counters needs to be
>         synchronized.
> 
>         We implement the synchronization such that no two cores
>         can be measuring NB events using the same counters. Thus,
>         we maintain a per-NB * allocation table. The available slot
>         is propagated using the event_constraint structure.
> 
>         The 2nd version takes into account the changes on how
>         constraints are stored by the scheduling code.
> 
>         The 3rd version fixes formatting issues, code readability
>         and one bug in amd_put_event_constraints().
> 
>         Signed-off-by: Stephane Eranian <eranian@google.com>

OK, took this with the below merged in.

---
Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
@@ -81,7 +81,7 @@ struct event_constraint {
 };
 
 struct amd_nb {
-	int nb_id;  /* Northbridge id */
+	int nb_id;  /* NorthBridge id */
 	int refcnt; /* reference count */
 	struct perf_event *owners[X86_PMC_IDX_MAX];
 	struct event_constraint event_constraints[X86_PMC_IDX_MAX];
@@ -2268,7 +2268,7 @@ static inline int amd_is_nb_event(struct
 	u64 val = hwc->config & K7_EVNTSEL_EVENT_MASK;
 	/* event code : bits [35-32] | [7-0] */
 	val = (val >> 24) | (val & 0xff);
-	return val >= 0x0e0;
+	return val >= 0xe00;
 }
 
 static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
@@ -2301,28 +2301,29 @@ static void amd_put_event_constraints(st
 }
 
  /*
-  * AMD64 Northbridge events need special treatment because
+  * AMD64 NorthBridge events need special treatment because
   * counter access needs to be synchronized across all cores
   * of a package. Refer to BKDG section 3.12
   *
   * NB events are events measuring L3 cache, Hypertransport
-  * traffic. They are identified by an event code  >= 0xe0.
-  * They measure events on the Northbride which is shared
+  * traffic. They are identified by an event code >= 0xe00.
+  * They measure events on the NorthBride which is shared
   * by all cores on a package. NB events are counted on a
   * shared set of counters. When a NB event is programmed
   * in a counter, the data actually comes from a shared
   * counter. Thus, access to those counters needs to be
   * synchronized.
+  *
   * We implement the synchronization such that no two cores
   * can be measuring NB events using the same counters. Thus,
-  * we maintain a per-NB * allocation table. The available slot
+  * we maintain a per-NB allocation table. The available slot
   * is propagated using the event_constraint structure.
   *
   * We provide only one choice for each NB event based on
   * the fact that only NB events have restrictions. Consequently,
   * if a counter is available, there is a guarantee the NB event
   * will be assigned to it. If no slot is available, an empty
-  * constraint is returned and scheduling will evnetually fail
+  * constraint is returned and scheduling will eventually fail
   * for this event.
   *
   * Note that all cores attached the same NB compete for the same
@@ -2753,7 +2754,7 @@ static struct amd_nb *amd_alloc_nb(int c
 
 	/*
 	 * initialize all possible NB constraints
-   */
+	 */
 	for (i = 0; i < x86_pmu.num_events; i++) {
 		set_bit(i, nb->event_constraints[i].idxmsk);
 		nb->event_constraints[i].weight = 1;
@@ -2773,9 +2774,6 @@ static void amd_pmu_cpu_online(int cpu)
 	/*
 	 * function may be called too early in the
 	 * boot process, in which case nb_id is bogus
-	 *
-	 * for BSP, there is an explicit call from
-	 * amd_pmu_init()
 	 */
 	nb_id = amd_get_nb_id(cpu);
 	if (nb_id == BAD_APICID)
@@ -2839,7 +2837,10 @@ static __init int amd_pmu_init(void)
 	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
 	       sizeof(hw_cache_event_ids));
 
-	/* initialize BSP */
+	/* 
+	 * explicitly initialize the boot cpu, other cpus will get 
+	 * the cpu hotplug callbacks from smp_init()
+	 */
 	amd_pmu_cpu_online(smp_processor_id());
 	return 0;
 }



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf_events: AMD event scheduling (v3)
  2010-02-10 11:59 ` Peter Zijlstra
@ 2010-02-10 13:04   ` Stephane Eranian
  2010-02-10 13:17     ` Peter Zijlstra
  2010-02-10 16:09   ` Robert Richter
  1 sibling, 1 reply; 12+ messages in thread
From: Stephane Eranian @ 2010-02-10 13:04 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, mingo, paulus, davem, fweisbec, robert.richter,
	perfmon2-devel, eranian

On Wed, Feb 10, 2010 at 12:59 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Mon, 2010-02-08 at 17:17 +0200, Stephane Eranian wrote:
>>         This patch adds correct AMD Northbridge event scheduling.
>>         It must be applied on top tip-x86 + hw_perf_enable() fix.
>>
>>         NB events are events measuring L3 cache, Hypertransport
>>         traffic. They are identified by an event code  >= 0xe0.
>>         They measure events on the Northbride which is shared
>>         by all cores on a package. NB events are counted on a
>>         shared set of counters. When a NB event is programmed
>>         in a counter, the data actually comes from a shared
>>         counter. Thus, access to those counters needs to be
>>         synchronized.
>>
>>         We implement the synchronization such that no two cores
>>         can be measuring NB events using the same counters. Thus,
>>         we maintain a per-NB * allocation table. The available slot
>>         is propagated using the event_constraint structure.
>>
>>         The 2nd version takes into account the changes on how
>>         constraints are stored by the scheduling code.
>>
>>         The 3rd version fixes formatting issues, code readability
>>         and one bug in amd_put_event_constraints().
>>
>>         Signed-off-by: Stephane Eranian <eranian@google.com>
>
> OK, took this with the below merged in.
>
> ---
> Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
> +++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
> @@ -81,7 +81,7 @@ struct event_constraint {
>  };
>
>  struct amd_nb {
> -       int nb_id;  /* Northbridge id */
> +       int nb_id;  /* NorthBridge id */
>        int refcnt; /* reference count */
>        struct perf_event *owners[X86_PMC_IDX_MAX];
>        struct event_constraint event_constraints[X86_PMC_IDX_MAX];
> @@ -2268,7 +2268,7 @@ static inline int amd_is_nb_event(struct
>        u64 val = hwc->config & K7_EVNTSEL_EVENT_MASK;
>        /* event code : bits [35-32] | [7-0] */
>        val = (val >> 24) | (val & 0xff);
> -       return val >= 0x0e0;
> +       return val >= 0xe00;
>  }
>
I don't understand the change from 0xe0 to 0xe00.
That's not the same thing at all.
Event select is bits 0-7 + 32-35.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf_events: AMD event scheduling (v3)
  2010-02-10 13:04   ` Stephane Eranian
@ 2010-02-10 13:17     ` Peter Zijlstra
  2010-02-10 13:28       ` Peter Zijlstra
  0 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2010-02-10 13:17 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: linux-kernel, mingo, paulus, davem, fweisbec, robert.richter,
	perfmon2-devel, eranian

On Wed, 2010-02-10 at 14:04 +0100, Stephane Eranian wrote:

> > @@ -2268,7 +2268,7 @@ static inline int amd_is_nb_event(struct
> >        u64 val = hwc->config & K7_EVNTSEL_EVENT_MASK;
> >        /* event code : bits [35-32] | [7-0] */
> >        val = (val >> 24) | (val & 0xff);
> > -       return val >= 0x0e0;
> > +       return val >= 0xe00;
> >  }
> >
> I don't understand the change from 0xe0 to 0xe00.
> That's not the same thing at all.
> Event select is bits 0-7 + 32-35.

OK that appears to be my bad, because you extended K7_EVNTSEL_EVENT_MASK
with bit 35 I thought NB events all had bit 35 set.

But looking at the AMD docs it does indeed appear to start at 0xe0, and
there are no events with bit 35 set, only a few with bit 32.

I'll switch it back to 0xe0.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf_events: AMD event scheduling (v3)
  2010-02-10 13:17     ` Peter Zijlstra
@ 2010-02-10 13:28       ` Peter Zijlstra
  2010-02-10 13:58         ` stephane eranian
  0 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2010-02-10 13:28 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: linux-kernel, mingo, paulus, davem, fweisbec, robert.richter,
	perfmon2-devel, eranian

On Wed, 2010-02-10 at 14:17 +0100, Peter Zijlstra wrote:
> On Wed, 2010-02-10 at 14:04 +0100, Stephane Eranian wrote:
> 
> > > @@ -2268,7 +2268,7 @@ static inline int amd_is_nb_event(struct
> > >        u64 val = hwc->config & K7_EVNTSEL_EVENT_MASK;
> > >        /* event code : bits [35-32] | [7-0] */
> > >        val = (val >> 24) | (val & 0xff);
> > > -       return val >= 0x0e0;
> > > +       return val >= 0xe00;
> > >  }
> > >
> > I don't understand the change from 0xe0 to 0xe00.
> > That's not the same thing at all.
> > Event select is bits 0-7 + 32-35.
> 
> OK that appears to be my bad, because you extended K7_EVNTSEL_EVENT_MASK
> with bit 35 I thought NB events all had bit 35 set.
> 
> But looking at the AMD docs it does indeed appear to start at 0xe0, and
> there are no events with bit 35 set, only a few with bit 32.
> 
> I'll switch it back to 0xe0.

Fwiw, for the purpose of that function you might as well write:

static inline int amd_is_nb_event(struct hw_perf_event *hwc)
{
	return (hwc->config & K7_EVNTSEL_EVENT_MASK) > 0xe0;
}

No need to move bits around higher than the value you compare against.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] perf_events: AMD event scheduling (v3)
  2010-02-10 13:28       ` Peter Zijlstra
@ 2010-02-10 13:58         ` stephane eranian
  2010-02-10 15:59           ` [perfmon2] " Drongowski, Paul
  0 siblings, 1 reply; 12+ messages in thread
From: stephane eranian @ 2010-02-10 13:58 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Stephane Eranian, linux-kernel, mingo, paulus, davem, fweisbec,
	robert.richter, perfmon2-devel

On Wed, Feb 10, 2010 at 2:28 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Wed, 2010-02-10 at 14:17 +0100, Peter Zijlstra wrote:
>> On Wed, 2010-02-10 at 14:04 +0100, Stephane Eranian wrote:
>>
>> > > @@ -2268,7 +2268,7 @@ static inline int amd_is_nb_event(struct
>> > >        u64 val = hwc->config & K7_EVNTSEL_EVENT_MASK;
>> > >        /* event code : bits [35-32] | [7-0] */
>> > >        val = (val >> 24) | (val & 0xff);
>> > > -       return val >= 0x0e0;
>> > > +       return val >= 0xe00;
>> > >  }
>> > >
>> > I don't understand the change from 0xe0 to 0xe00.
>> > That's not the same thing at all.
>> > Event select is bits 0-7 + 32-35.
>>
>> OK that appears to be my bad, because you extended K7_EVNTSEL_EVENT_MASK
>> with bit 35 I thought NB events all had bit 35 set.
>>
>> But looking at the AMD docs it does indeed appear to start at 0xe0, and
>> there are no events with bit 35 set, only a few with bit 32.
>>
>> I'll switch it back to 0xe0.
>
> Fwiw, for the purpose of that function you might as well write:
>
> static inline int amd_is_nb_event(struct hw_perf_event *hwc)
> {
>        return (hwc->config & K7_EVNTSEL_EVENT_MASK) > 0xe0;
> }
>
> No need to move bits around higher than the value you compare against.
>
I think given the existing event codes, that would be fine too.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [perfmon2] [PATCH] perf_events: AMD event scheduling (v3)
  2010-02-10 13:58         ` stephane eranian
@ 2010-02-10 15:59           ` Drongowski, Paul
  2010-02-10 16:07             ` Stephane Eranian
  0 siblings, 1 reply; 12+ messages in thread
From: Drongowski, Paul @ 2010-02-10 15:59 UTC (permalink / raw)
  To: eranian, Peter Zijlstra
  Cc: perfmon2-devel, fweisbec, linux-kernel, Stephane Eranian, paulus,
	mingo, davem

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 3275 bytes --]

Good catch!

Historically, AMD has treated the bit field EventSelect<7:5>
in model specific register MSRC001_00[03:00] Performance Event
Select Register (PERF_CTL[3:0]) like an "event group selector".
Please see the "BIOS and Kernel Developer's Guide for AMD
Family 10h Processors."

Typically, EventSelect<7:5> == 0x7 selects Northbridge
events.

Yes, when the event select value was extended to twelve bits,
it placed this field somewhere in the middle of the full
twelve bit value. ;-)

Please consider AMD Family 10h event 0x1C0 Retired x87
Floating Point Operations. This is not a Northbridge event.
If the test is greater than or equal to (e.g., 0x1C0 >= 0x0E0),
then this event will be incorrectly identified as a
Northbridge event. (There are other similar examples.)

So, I would recommend testing EventSelect<7:5> == 0x7
in order to detect AMD Northbridge events.

Thanks for implementing the AMD event scheduling feature!

-- pj

Paul Drongowski
AMD CodeAnalyst team


-----Original Message-----
From: stephane eranian [mailto:eranian@googlemail.com] 
Sent: Wednesday, February 10, 2010 8:58 AM
To: Peter Zijlstra
Cc: perfmon2-devel@lists.sf.net; fweisbec@gmail.com; linux-kernel@vger.kernel.org; Stephane Eranian; paulus@samba.org; mingo@elte.hu; davem@davemloft.net
Subject: Re: [perfmon2] [PATCH] perf_events: AMD event scheduling (v3)

On Wed, Feb 10, 2010 at 2:28 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Wed, 2010-02-10 at 14:17 +0100, Peter Zijlstra wrote:
>> On Wed, 2010-02-10 at 14:04 +0100, Stephane Eranian wrote:
>>
>> > > @@ -2268,7 +2268,7 @@ static inline int amd_is_nb_event(struct
>> > >        u64 val = hwc->config & K7_EVNTSEL_EVENT_MASK;
>> > >        /* event code : bits [35-32] | [7-0] */
>> > >        val = (val >> 24) | (val & 0xff);
>> > > -       return val >= 0x0e0;
>> > > +       return val >= 0xe00;
>> > >  }
>> > >
>> > I don't understand the change from 0xe0 to 0xe00.
>> > That's not the same thing at all.
>> > Event select is bits 0-7 + 32-35.
>>
>> OK that appears to be my bad, because you extended K7_EVNTSEL_EVENT_MASK
>> with bit 35 I thought NB events all had bit 35 set.
>>
>> But looking at the AMD docs it does indeed appear to start at 0xe0, and
>> there are no events with bit 35 set, only a few with bit 32.
>>
>> I'll switch it back to 0xe0.
>
> Fwiw, for the purpose of that function you might as well write:
>
> static inline int amd_is_nb_event(struct hw_perf_event *hwc)
> {
>        return (hwc->config & K7_EVNTSEL_EVENT_MASK) > 0xe0;
> }
>
> No need to move bits around higher than the value you compare against.
>
I think given the existing event codes, that would be fine too.

------------------------------------------------------------------------------
SOLARIS 10 is the OS for Data Centers - provides features such as DTrace,
Predictive Self Healing and Award Winning ZFS. Get Solaris 10 NOW
http://p.sf.net/sfu/solaris-dev2dev
_______________________________________________
perfmon2-devel mailing list
perfmon2-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/perfmon2-devel
ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [perfmon2] [PATCH] perf_events: AMD event scheduling (v3)
  2010-02-10 15:59           ` [perfmon2] " Drongowski, Paul
@ 2010-02-10 16:07             ` Stephane Eranian
  2010-02-10 16:17               ` Peter Zijlstra
  0 siblings, 1 reply; 12+ messages in thread
From: Stephane Eranian @ 2010-02-10 16:07 UTC (permalink / raw)
  To: Drongowski, Paul
  Cc: eranian, Peter Zijlstra, perfmon2-devel, fweisbec, linux-kernel,
	paulus, mingo, davem

On Wed, Feb 10, 2010 at 4:59 PM, Drongowski, Paul
<paul.drongowski@amd.com> wrote:
> Good catch!
>
> Historically, AMD has treated the bit field EventSelect<7:5>
> in model specific register MSRC001_00[03:00] Performance Event
> Select Register (PERF_CTL[3:0]) like an "event group selector".
> Please see the "BIOS and Kernel Developer's Guide for AMD
> Family 10h Processors."
>
> Typically, EventSelect<7:5> == 0x7 selects Northbridge
> events.
>
> Yes, when the event select value was extended to twelve bits,
> it placed this field somewhere in the middle of the full
> twelve bit value. ;-)
>
> Please consider AMD Family 10h event 0x1C0 Retired x87
> Floating Point Operations. This is not a Northbridge event.
> If the test is greater than or equal to (e.g., 0x1C0 >= 0x0E0),
> then this event will be incorrectly identified as a
> Northbridge event. (There are other similar examples.)
>
Good example.

> So, I would recommend testing EventSelect<7:5> == 0x7
> in order to detect AMD Northbridge events.
>
Ok, so something like the following would do it:

static inline int amd_is_nb_event(struct hw_perf_event *hwc)
{
   return (hwc->config >> 5) & 0x7 == 0x7;
}

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH]  perf_events: AMD event scheduling (v3)
  2010-02-10 11:59 ` Peter Zijlstra
  2010-02-10 13:04   ` Stephane Eranian
@ 2010-02-10 16:09   ` Robert Richter
  2010-02-10 16:14     ` Peter Zijlstra
  1 sibling, 1 reply; 12+ messages in thread
From: Robert Richter @ 2010-02-10 16:09 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: eranian, linux-kernel, mingo, paulus, davem, fweisbec,
	perfmon2-devel, eranian

On 10.02.10 12:59:26, Peter Zijlstra wrote:
> On Mon, 2010-02-08 at 17:17 +0200, Stephane Eranian wrote:
> >         This patch adds correct AMD Northbridge event scheduling.
> >         It must be applied on top tip-x86 + hw_perf_enable() fix.
> > 
> >         NB events are events measuring L3 cache, Hypertransport
> >         traffic. They are identified by an event code  >= 0xe0.
> >         They measure events on the Northbride which is shared
> >         by all cores on a package. NB events are counted on a
> >         shared set of counters. When a NB event is programmed
> >         in a counter, the data actually comes from a shared
> >         counter. Thus, access to those counters needs to be
> >         synchronized.
> > 
> >         We implement the synchronization such that no two cores
> >         can be measuring NB events using the same counters. Thus,
> >         we maintain a per-NB * allocation table. The available slot
> >         is propagated using the event_constraint structure.
> > 
> >         The 2nd version takes into account the changes on how
> >         constraints are stored by the scheduling code.
> > 
> >         The 3rd version fixes formatting issues, code readability
> >         and one bug in amd_put_event_constraints().
> > 
> >         Signed-off-by: Stephane Eranian <eranian@google.com>
> 
> OK, took this with the below merged in.

Peter,

will this go to tip/perf/core? Or is there another tree?

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center
email: robert.richter@amd.com


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH]  perf_events: AMD event scheduling (v3)
  2010-02-10 16:09   ` Robert Richter
@ 2010-02-10 16:14     ` Peter Zijlstra
  0 siblings, 0 replies; 12+ messages in thread
From: Peter Zijlstra @ 2010-02-10 16:14 UTC (permalink / raw)
  To: Robert Richter
  Cc: eranian, linux-kernel, mingo, paulus, davem, fweisbec,
	perfmon2-devel, eranian

On Wed, 2010-02-10 at 17:09 +0100, Robert Richter wrote:
> On 10.02.10 12:59:26, Peter Zijlstra wrote:
> > On Mon, 2010-02-08 at 17:17 +0200, Stephane Eranian wrote:
> > >         This patch adds correct AMD Northbridge event scheduling.
> > >         It must be applied on top tip-x86 + hw_perf_enable() fix.
> > > 
> > >         NB events are events measuring L3 cache, Hypertransport
> > >         traffic. They are identified by an event code  >= 0xe0.
> > >         They measure events on the Northbride which is shared
> > >         by all cores on a package. NB events are counted on a
> > >         shared set of counters. When a NB event is programmed
> > >         in a counter, the data actually comes from a shared
> > >         counter. Thus, access to those counters needs to be
> > >         synchronized.
> > > 
> > >         We implement the synchronization such that no two cores
> > >         can be measuring NB events using the same counters. Thus,
> > >         we maintain a per-NB * allocation table. The available slot
> > >         is propagated using the event_constraint structure.
> > > 
> > >         The 2nd version takes into account the changes on how
> > >         constraints are stored by the scheduling code.
> > > 
> > >         The 3rd version fixes formatting issues, code readability
> > >         and one bug in amd_put_event_constraints().
> > > 
> > >         Signed-off-by: Stephane Eranian <eranian@google.com>
> > 
> > OK, took this with the below merged in.
> 
> Peter,
> 
> will this go to tip/perf/core? Or is there another tree?

Currently my quilt queue, should end up in tip/perf/core in a day or so.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [perfmon2] [PATCH] perf_events: AMD event scheduling (v3)
  2010-02-10 16:07             ` Stephane Eranian
@ 2010-02-10 16:17               ` Peter Zijlstra
  0 siblings, 0 replies; 12+ messages in thread
From: Peter Zijlstra @ 2010-02-10 16:17 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: Drongowski, Paul, eranian, perfmon2-devel, fweisbec,
	linux-kernel, paulus, mingo, davem

On Wed, 2010-02-10 at 17:07 +0100, Stephane Eranian wrote:
> 
> static inline int amd_is_nb_event(struct hw_perf_event *hwc)
> {
>    return (hwc->config >> 5) & 0x7 == 0x7;
> }

return (hwc->config & 0xe0) == 0xe0;

Changed it, thanks guys!


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [tip:perf/core] perf_events, x86: AMD event scheduling
  2010-02-08 15:17 [PATCH] perf_events: AMD event scheduling (v3) Stephane Eranian
  2010-02-10 11:59 ` Peter Zijlstra
@ 2010-02-26 10:25 ` tip-bot for Stephane Eranian
  1 sibling, 0 replies; 12+ messages in thread
From: tip-bot for Stephane Eranian @ 2010-02-26 10:25 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, eranian, hpa, mingo, a.p.zijlstra, tglx, mingo

Commit-ID:  38331f62c20456454eed9ebea2525f072c6f1d2e
Gitweb:     http://git.kernel.org/tip/38331f62c20456454eed9ebea2525f072c6f1d2e
Author:     Stephane Eranian <eranian@google.com>
AuthorDate: Mon, 8 Feb 2010 17:17:01 +0200
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 26 Feb 2010 10:56:53 +0100

perf_events, x86: AMD event scheduling

This patch adds correct AMD NorthBridge event scheduling.

NB events are events measuring L3 cache, Hypertransport traffic. They are
identified by an event code >= 0xe0. They measure events on the
Northbride which is shared by all cores on a package. NB events are
counted on a shared set of counters. When a NB event is programmed in a
counter, the data actually comes from a shared counter. Thus, access to
those counters needs to be synchronized.

We implement the synchronization such that no two cores can be measuring
NB events using the same counters. Thus, we maintain a per-NB allocation
table. The available slot is propagated using the event_constraint
structure.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4b703957.0702d00a.6bf2.7b7d@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c |  265 +++++++++++++++++++++++++++++++++++++-
 kernel/perf_event.c              |    5 +
 2 files changed, 267 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 9173ea9..aa12f36 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -80,6 +80,13 @@ struct event_constraint {
 	int	weight;
 };
 
+struct amd_nb {
+	int nb_id;  /* NorthBridge id */
+	int refcnt; /* reference count */
+	struct perf_event *owners[X86_PMC_IDX_MAX];
+	struct event_constraint event_constraints[X86_PMC_IDX_MAX];
+};
+
 struct cpu_hw_events {
 	struct perf_event	*events[X86_PMC_IDX_MAX]; /* in counter order */
 	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
@@ -92,6 +99,7 @@ struct cpu_hw_events {
 	int			assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
 	u64			tags[X86_PMC_IDX_MAX];
 	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
+	struct amd_nb		*amd_nb;
 };
 
 #define __EVENT_CONSTRAINT(c, n, m, w) {\
@@ -153,6 +161,8 @@ struct x86_pmu {
 
 static struct x86_pmu x86_pmu __read_mostly;
 
+static raw_spinlock_t amd_nb_lock;
+
 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
 	.enabled = 1,
 };
@@ -802,7 +812,7 @@ static u64 amd_pmu_event_map(int hw_event)
 
 static u64 amd_pmu_raw_event(u64 hw_event)
 {
-#define K7_EVNTSEL_EVENT_MASK	0x7000000FFULL
+#define K7_EVNTSEL_EVENT_MASK	0xF000000FFULL
 #define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
 #define K7_EVNTSEL_EDGE_MASK	0x000040000ULL
 #define K7_EVNTSEL_INV_MASK	0x000800000ULL
@@ -2210,6 +2220,7 @@ perf_event_nmi_handler(struct notifier_block *self,
 }
 
 static struct event_constraint unconstrained;
+static struct event_constraint emptyconstraint;
 
 static struct event_constraint bts_constraint =
 	EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
@@ -2249,10 +2260,146 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
 	return &unconstrained;
 }
 
+/*
+ * AMD64 events are detected based on their event codes.
+ */
+static inline int amd_is_nb_event(struct hw_perf_event *hwc)
+{
+	return (hwc->config & 0xe0) == 0xe0;
+}
+
+static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
+				      struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct amd_nb *nb = cpuc->amd_nb;
+	int i;
+
+	/*
+	 * only care about NB events
+	 */
+	if (!(nb && amd_is_nb_event(hwc)))
+		return;
+
+	/*
+	 * need to scan whole list because event may not have
+	 * been assigned during scheduling
+	 *
+	 * no race condition possible because event can only
+	 * be removed on one CPU at a time AND PMU is disabled
+	 * when we come here
+	 */
+	for (i = 0; i < x86_pmu.num_events; i++) {
+		if (nb->owners[i] == event) {
+			cmpxchg(nb->owners+i, event, NULL);
+			break;
+		}
+	}
+}
+
+ /*
+  * AMD64 NorthBridge events need special treatment because
+  * counter access needs to be synchronized across all cores
+  * of a package. Refer to BKDG section 3.12
+  *
+  * NB events are events measuring L3 cache, Hypertransport
+  * traffic. They are identified by an event code >= 0xe00.
+  * They measure events on the NorthBride which is shared
+  * by all cores on a package. NB events are counted on a
+  * shared set of counters. When a NB event is programmed
+  * in a counter, the data actually comes from a shared
+  * counter. Thus, access to those counters needs to be
+  * synchronized.
+  *
+  * We implement the synchronization such that no two cores
+  * can be measuring NB events using the same counters. Thus,
+  * we maintain a per-NB allocation table. The available slot
+  * is propagated using the event_constraint structure.
+  *
+  * We provide only one choice for each NB event based on
+  * the fact that only NB events have restrictions. Consequently,
+  * if a counter is available, there is a guarantee the NB event
+  * will be assigned to it. If no slot is available, an empty
+  * constraint is returned and scheduling will eventually fail
+  * for this event.
+  *
+  * Note that all cores attached the same NB compete for the same
+  * counters to host NB events, this is why we use atomic ops. Some
+  * multi-chip CPUs may have more than one NB.
+  *
+  * Given that resources are allocated (cmpxchg), they must be
+  * eventually freed for others to use. This is accomplished by
+  * calling amd_put_event_constraints().
+  *
+  * Non NB events are not impacted by this restriction.
+  */
 static struct event_constraint *
 amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
 {
-	return &unconstrained;
+	struct hw_perf_event *hwc = &event->hw;
+	struct amd_nb *nb = cpuc->amd_nb;
+	struct perf_event *old = NULL;
+	int max = x86_pmu.num_events;
+	int i, j, k = -1;
+
+	/*
+	 * if not NB event or no NB, then no constraints
+	 */
+	if (!(nb && amd_is_nb_event(hwc)))
+		return &unconstrained;
+
+	/*
+	 * detect if already present, if so reuse
+	 *
+	 * cannot merge with actual allocation
+	 * because of possible holes
+	 *
+	 * event can already be present yet not assigned (in hwc->idx)
+	 * because of successive calls to x86_schedule_events() from
+	 * hw_perf_group_sched_in() without hw_perf_enable()
+	 */
+	for (i = 0; i < max; i++) {
+		/*
+		 * keep track of first free slot
+		 */
+		if (k == -1 && !nb->owners[i])
+			k = i;
+
+		/* already present, reuse */
+		if (nb->owners[i] == event)
+			goto done;
+	}
+	/*
+	 * not present, so grab a new slot
+	 * starting either at:
+	 */
+	if (hwc->idx != -1) {
+		/* previous assignment */
+		i = hwc->idx;
+	} else if (k != -1) {
+		/* start from free slot found */
+		i = k;
+	} else {
+		/*
+		 * event not found, no slot found in
+		 * first pass, try again from the
+		 * beginning
+		 */
+		i = 0;
+	}
+	j = i;
+	do {
+		old = cmpxchg(nb->owners+i, NULL, event);
+		if (!old)
+			break;
+		if (++i == max)
+			i = 0;
+	} while (i != j);
+done:
+	if (!old)
+		return &nb->event_constraints[i];
+
+	return &emptyconstraint;
 }
 
 static int x86_event_sched_in(struct perf_event *event,
@@ -2465,7 +2612,8 @@ static __initconst struct x86_pmu amd_pmu = {
 	.apic			= 1,
 	/* use highest bit to detect overflow */
 	.max_period		= (1ULL << 47) - 1,
-	.get_event_constraints	= amd_get_event_constraints
+	.get_event_constraints	= amd_get_event_constraints,
+	.put_event_constraints	= amd_put_event_constraints
 };
 
 static __init int p6_pmu_init(void)
@@ -2589,6 +2737,91 @@ static __init int intel_pmu_init(void)
 	return 0;
 }
 
+static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
+{
+	struct amd_nb *nb;
+	int i;
+
+	nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL);
+	if (!nb)
+		return NULL;
+
+	memset(nb, 0, sizeof(*nb));
+	nb->nb_id = nb_id;
+
+	/*
+	 * initialize all possible NB constraints
+	 */
+	for (i = 0; i < x86_pmu.num_events; i++) {
+		set_bit(i, nb->event_constraints[i].idxmsk);
+		nb->event_constraints[i].weight = 1;
+	}
+	return nb;
+}
+
+static void amd_pmu_cpu_online(int cpu)
+{
+	struct cpu_hw_events *cpu1, *cpu2;
+	struct amd_nb *nb = NULL;
+	int i, nb_id;
+
+	if (boot_cpu_data.x86_max_cores < 2)
+		return;
+
+	/*
+	 * function may be called too early in the
+	 * boot process, in which case nb_id is bogus
+	 */
+	nb_id = amd_get_nb_id(cpu);
+	if (nb_id == BAD_APICID)
+		return;
+
+	cpu1 = &per_cpu(cpu_hw_events, cpu);
+	cpu1->amd_nb = NULL;
+
+	raw_spin_lock(&amd_nb_lock);
+
+	for_each_online_cpu(i) {
+		cpu2 = &per_cpu(cpu_hw_events, i);
+		nb = cpu2->amd_nb;
+		if (!nb)
+			continue;
+		if (nb->nb_id == nb_id)
+			goto found;
+	}
+
+	nb = amd_alloc_nb(cpu, nb_id);
+	if (!nb) {
+		pr_err("perf_events: failed NB allocation for CPU%d\n", cpu);
+		raw_spin_unlock(&amd_nb_lock);
+		return;
+	}
+found:
+	nb->refcnt++;
+	cpu1->amd_nb = nb;
+
+	raw_spin_unlock(&amd_nb_lock);
+}
+
+static void amd_pmu_cpu_offline(int cpu)
+{
+	struct cpu_hw_events *cpuhw;
+
+	if (boot_cpu_data.x86_max_cores < 2)
+		return;
+
+	cpuhw = &per_cpu(cpu_hw_events, cpu);
+
+	raw_spin_lock(&amd_nb_lock);
+
+	if (--cpuhw->amd_nb->refcnt == 0)
+		kfree(cpuhw->amd_nb);
+
+	cpuhw->amd_nb = NULL;
+
+	raw_spin_unlock(&amd_nb_lock);
+}
+
 static __init int amd_pmu_init(void)
 {
 	/* Performance-monitoring supported from K7 and later: */
@@ -2601,6 +2834,11 @@ static __init int amd_pmu_init(void)
 	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
 	       sizeof(hw_cache_event_ids));
 
+	/*
+	 * explicitly initialize the boot cpu, other cpus will get
+	 * the cpu hotplug callbacks from smp_init()
+	 */
+	amd_pmu_cpu_online(smp_processor_id());
 	return 0;
 }
 
@@ -2934,4 +3172,25 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 void hw_perf_event_setup_online(int cpu)
 {
 	init_debug_store_on_cpu(cpu);
+
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_AMD:
+		amd_pmu_cpu_online(cpu);
+		break;
+	default:
+		return;
+	}
+}
+
+void hw_perf_event_setup_offline(int cpu)
+{
+	init_debug_store_on_cpu(cpu);
+
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_AMD:
+		amd_pmu_cpu_offline(cpu);
+		break;
+	default:
+		return;
+	}
 }
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 74c6002..fb4e56e 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -98,6 +98,7 @@ void __weak hw_perf_enable(void)		{ barrier(); }
 
 void __weak hw_perf_event_setup(int cpu)	{ barrier(); }
 void __weak hw_perf_event_setup_online(int cpu)	{ barrier(); }
+void __weak hw_perf_event_setup_offline(int cpu)	{ barrier(); }
 
 int __weak
 hw_perf_group_sched_in(struct perf_event *group_leader,
@@ -5462,6 +5463,10 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 		perf_event_exit_cpu(cpu);
 		break;
 
+	case CPU_DEAD:
+		hw_perf_event_setup_offline(cpu);
+		break;
+
 	default:
 		break;
 	}

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2010-02-26 10:26 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-02-08 15:17 [PATCH] perf_events: AMD event scheduling (v3) Stephane Eranian
2010-02-10 11:59 ` Peter Zijlstra
2010-02-10 13:04   ` Stephane Eranian
2010-02-10 13:17     ` Peter Zijlstra
2010-02-10 13:28       ` Peter Zijlstra
2010-02-10 13:58         ` stephane eranian
2010-02-10 15:59           ` [perfmon2] " Drongowski, Paul
2010-02-10 16:07             ` Stephane Eranian
2010-02-10 16:17               ` Peter Zijlstra
2010-02-10 16:09   ` Robert Richter
2010-02-10 16:14     ` Peter Zijlstra
2010-02-26 10:25 ` [tip:perf/core] perf_events, x86: AMD event scheduling tip-bot for Stephane Eranian

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.