All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] perf: Intel uncore pmu counting support
@ 2011-06-30  8:09 Lin Ming
  2011-06-30  8:09 ` [PATCH 1/4] perf, x86: Add Intel Nehalem/Westmere uncore pmu Lin Ming
                   ` (5 more replies)
  0 siblings, 6 replies; 29+ messages in thread
From: Lin Ming @ 2011-06-30  8:09 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Andi Kleen, Stephane Eranian,
	Arnaldo Carvalho de Melo
  Cc: linux-kernel

Hi, all

I posted uncore patches months ago, but it was pended due to an uncore
interrupt problem.

This series are cut to support uncore pmu counting only.
So uncore interrupt handling is not needed.

The uncore pmu type is allocated dynamically and exported via sysfs.
$ cat /sys/bus/event_source/devices/uncore/type
6

You can count uncore raw events as below,
$ perf stat -e uncore:r0101 ls

It reads uncore pmu type id from sysfs to setup perf_event_attr::type.

Comments are appreciated.

Thanks,
Lin Ming


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 1/4] perf, x86: Add Intel Nehalem/Westmere uncore pmu
  2011-06-30  8:09 [PATCH 0/4] perf: Intel uncore pmu counting support Lin Ming
@ 2011-06-30  8:09 ` Lin Ming
  2011-06-30 14:08   ` Peter Zijlstra
  2011-06-30 16:58   ` Andi Kleen
  2011-06-30  8:09 ` [PATCH 2/4] perf, x86: Add Intel SandyBridge " Lin Ming
                   ` (4 subsequent siblings)
  5 siblings, 2 replies; 29+ messages in thread
From: Lin Ming @ 2011-06-30  8:09 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Andi Kleen, Stephane Eranian,
	Arnaldo Carvalho de Melo
  Cc: linux-kernel

Add Intel Nehalem/Westmere uncore pmu support.
And also the generic data structure to support uncore pmu.

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
---
 arch/x86/kernel/cpu/Makefile                  |    1 +
 arch/x86/kernel/cpu/perf_event_intel_uncore.c |  351 +++++++++++++++++++++++++
 arch/x86/kernel/cpu/perf_event_intel_uncore.h |   48 ++++
 3 files changed, 400 insertions(+), 0 deletions(-)
 create mode 100644 arch/x86/kernel/cpu/perf_event_intel_uncore.c
 create mode 100644 arch/x86/kernel/cpu/perf_event_intel_uncore.h

diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 6042981..31fd49e 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_CPU_SUP_TRANSMETA_32)	+= transmeta.o
 obj-$(CONFIG_CPU_SUP_UMC_32)		+= umc.o
 
 obj-$(CONFIG_PERF_EVENTS)		+= perf_event.o
+obj-$(CONFIG_PERF_EVENTS)              += perf_event_intel_uncore.o
 
 obj-$(CONFIG_X86_MCE)			+= mcheck/
 obj-$(CONFIG_MTRR)			+= mtrr/
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
new file mode 100644
index 0000000..01060ce
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -0,0 +1,351 @@
+#include "perf_event_intel_uncore.h"
+
+static DEFINE_PER_CPU(struct cpu_uncore_events, cpu_uncore_events);
+static DEFINE_RAW_SPINLOCK(intel_uncore_lock);
+
+static bool uncore_pmu_initialized;
+static struct intel_uncore_pmu intel_uncore_pmu __read_mostly;
+
+/* Nehalem/Westmere uncore pmu */
+
+static void nhm_uncore_pmu_enable_all(void)
+{
+	u64 ctrl = (1 << intel_uncore_pmu.num_counters) - 1;
+
+	wrmsrl(NHM_MSR_UNCORE_PERF_GLOBAL_CTRL, ctrl);
+}
+
+static void nhm_uncore_pmu_disable_all(void)
+{
+	wrmsrl(NHM_MSR_UNCORE_PERF_GLOBAL_CTRL, 0);
+}
+
+static int nhm_uncore_pmu_hw_config(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	hwc->config = event->attr.config & NHM_UNCORE_RAW_EVENT_MASK;
+	hwc->config_base = NHM_MSR_UNCORE_PERFEVTSEL0 + hwc->idx;
+	hwc->event_base = NHM_MSR_UNCORE_PMC0 + hwc->idx;
+
+	return 0;
+}
+
+static void nhm_uncore_pmu_enable_event(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	wrmsrl(hwc->config_base,
+	       hwc->config | NHM_UNCORE_EVENTSEL_ENABLE);
+}
+
+static void nhm_uncore_pmu_disable_event(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	wrmsrl(hwc->config_base, hwc->config);
+}
+
+static __initconst const struct intel_uncore_pmu nhm_uncore_pmu = {
+	.name			= "Nehalem/Westmere",
+	.disable_all		= nhm_uncore_pmu_disable_all,
+	.enable_all		= nhm_uncore_pmu_enable_all,
+	.enable			= nhm_uncore_pmu_enable_event,
+	.disable		= nhm_uncore_pmu_disable_event,
+	.hw_config		= nhm_uncore_pmu_hw_config,
+	.num_counters		= 8,
+	.cntval_bits		= 48,
+};
+
+static u64 uncore_perf_event_update(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int shift = 64 - intel_uncore_pmu.cntval_bits;
+	u64 prev_raw_count, new_raw_count;
+	s64 delta;
+
+	/*
+	 * Careful: an NMI might modify the previous event value.
+	 *
+	 * Our tactic to handle this is to first atomically read and
+	 * exchange a new raw count - then add that new-prev delta
+	 * count to the generic event atomically:
+	 */
+again:
+	prev_raw_count = local64_read(&hwc->prev_count);
+	rdmsrl(hwc->event_base, new_raw_count);
+
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+			    new_raw_count) != prev_raw_count)
+		goto again;
+
+	/*
+	 * Now we have the new raw value and have updated the prev
+	 * timestamp already. We can now calculate the elapsed delta
+	 * (event-)time and add that to the generic event.
+	 *
+	 * Careful, not all hw sign-extends above the physical width
+	 * of the count.
+	 */
+	delta = (new_raw_count << shift) - (prev_raw_count << shift);
+	delta >>= shift;
+
+	local64_add(delta, &event->count);
+
+	return new_raw_count;
+}
+
+static struct pmu uncore_pmu;
+
+static int uncore_pmu_event_init(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (!uncore_pmu_initialized)
+		return -ENOENT;
+
+	if (event->attr.type != uncore_pmu.type)
+		return -ENOENT;
+
+	/*
+	 * Uncore PMU does measure at all privilege level all the time.
+	 * So it doesn't make sense to specify any exclude bits.
+	 */
+	if (event->attr.exclude_user || event->attr.exclude_kernel
+	    || event->attr.exclude_hv || event->attr.exclude_idle)
+		return -ENOENT;
+
+	/* Sampling not supported yet */
+	if (hwc->sample_period)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void uncore_pmu_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 now;
+
+	rdmsrl(hwc->event_base, now);
+
+	local64_set(&event->hw.prev_count, now);
+	intel_uncore_pmu.enable(event);
+}
+
+static void uncore_pmu_stop(struct perf_event *event, int flags)
+{
+	intel_uncore_pmu.disable(event);
+	uncore_perf_event_update(event);
+}
+
+static int uncore_pmu_add(struct perf_event *event, int flags)
+{
+	struct cpu_uncore_events *cpuc = &__get_cpu_var(cpu_uncore_events);
+	struct intel_uncore *uncore = cpuc->intel_uncore;
+	int ret = 1;
+	int i;
+
+	spin_lock(&uncore->lock);
+
+	for (i = 0; i < X86_PMC_IDX_MAX; i++) {
+		if (!uncore->events[i]) {
+			uncore->events[i] = event;
+			uncore->n_events++;
+			event->hw.idx = i;
+			intel_uncore_pmu.hw_config(event);
+
+			if (flags & PERF_EF_START)
+				uncore_pmu_start(event, flags);
+			ret = 0;
+			break;
+		}
+	}
+
+	if (uncore->n_events == 1)
+		intel_uncore_pmu.enable_all();
+
+	spin_unlock(&uncore->lock);
+
+	return ret;
+}
+
+static void uncore_pmu_del(struct perf_event *event, int flags)
+{
+	struct cpu_uncore_events *cpuc = &__get_cpu_var(cpu_uncore_events);
+	struct intel_uncore *uncore = cpuc->intel_uncore;
+	struct hw_perf_event *hwc = &event->hw;
+	int i;
+
+	spin_lock(&uncore->lock);
+
+	for (i = 0; i < X86_PMC_IDX_MAX; i++) {
+		if (uncore->events[i] == event) {
+			uncore->events[hwc->idx] = NULL;
+			uncore->n_events--;
+
+			uncore_pmu_stop(event, flags);
+			break;
+		}
+	}
+
+	if (uncore->n_events == 0)
+		intel_uncore_pmu.disable_all();
+
+	spin_unlock(&uncore->lock);
+}
+
+static void uncore_pmu_read(struct perf_event *event)
+{
+	uncore_perf_event_update(event);
+}
+
+static struct pmu uncore_pmu = {
+	.event_init	= uncore_pmu_event_init,
+	.add		= uncore_pmu_add,
+	.del		= uncore_pmu_del,
+	.start		= uncore_pmu_start,
+	.stop		= uncore_pmu_stop,
+	.read		= uncore_pmu_read,
+};
+
+static struct intel_uncore *alloc_uncore(int cpu, int uncore_id)
+{
+	struct intel_uncore *uncore;
+
+	uncore =
+	    kmalloc_node(sizeof(struct intel_uncore), GFP_KERNEL | __GFP_ZERO,
+			 cpu_to_node(cpu));
+	if (!uncore)
+		return NULL;
+
+	uncore->id = uncore_id;
+	spin_lock_init(&uncore->lock);
+
+	return uncore;
+}
+
+static int uncore_pmu_cpu_prepare(int cpu)
+{
+	struct cpu_uncore_events *cpuc = &per_cpu(cpu_uncore_events, cpu);
+
+	WARN_ON_ONCE(cpuc->intel_uncore);
+
+	if (boot_cpu_data.x86_max_cores < 2)
+		return NOTIFY_OK;
+
+	cpuc->intel_uncore = alloc_uncore(cpu, -1);
+	if (!cpuc->intel_uncore)
+		return NOTIFY_BAD;
+
+	return NOTIFY_OK;
+}
+
+static void uncore_pmu_cpu_starting(int cpu)
+{
+	struct cpu_uncore_events *cpuc = &per_cpu(cpu_uncore_events, cpu);
+	struct intel_uncore *uncore;
+	int i, uncore_id;
+
+	if (boot_cpu_data.x86_max_cores < 2)
+		return;
+
+	uncore_id = topology_physical_package_id(cpu);
+	WARN_ON_ONCE(uncore_id == BAD_APICID);
+
+	raw_spin_lock(&intel_uncore_lock);
+
+	for_each_online_cpu(i) {
+		uncore = per_cpu(cpu_uncore_events, i).intel_uncore;
+		if (WARN_ON_ONCE(!uncore))
+			continue;
+
+		if (uncore->id == uncore_id) {
+			kfree(cpuc->intel_uncore);
+			cpuc->intel_uncore = uncore;
+			break;
+		}
+	}
+
+	cpuc->intel_uncore->id = uncore_id;
+	cpuc->intel_uncore->refcnt++;
+
+	raw_spin_unlock(&intel_uncore_lock);
+}
+
+static void uncore_pmu_cpu_dead(int cpu)
+{
+	struct cpu_uncore_events *cpuhw;
+
+	if (boot_cpu_data.x86_max_cores < 2)
+		return;
+
+	cpuhw = &per_cpu(cpu_uncore_events, cpu);
+
+	raw_spin_lock(&intel_uncore_lock);
+
+	if (cpuhw->intel_uncore) {
+		struct intel_uncore *uncore = cpuhw->intel_uncore;
+
+		if (uncore->id == -1 || --uncore->refcnt == 0)
+			kfree(uncore);
+
+		cpuhw->intel_uncore = NULL;
+	}
+
+	raw_spin_unlock(&intel_uncore_lock);
+}
+
+static int __cpuinit
+uncore_pmu_notifier(struct notifier_block *self, unsigned long action,
+		    void *hcpu)
+{
+	unsigned int cpu = (long)hcpu;
+	int ret = NOTIFY_OK;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_UP_PREPARE:
+		ret = uncore_pmu_cpu_prepare(cpu);
+		break;
+
+	case CPU_STARTING:
+		uncore_pmu_cpu_starting(cpu);
+		break;
+
+	case CPU_DYING:
+		uncore_pmu_cpu_dead(cpu);
+		break;
+
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static int __init uncore_pmu_init(void)
+{
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+	    boot_cpu_data.x86 != 6)
+		return 0;
+
+	switch (boot_cpu_data.x86_model) {
+	case 26: /* Nehalem */
+	case 30:
+	case 31:
+	case 37: /* Westmere */
+		intel_uncore_pmu = nhm_uncore_pmu;
+		break;
+
+	default:
+		return 0;
+	}
+
+	pr_cont("Performance Events: %s uncore PMU.", intel_uncore_pmu.name);
+
+	perf_pmu_register(&uncore_pmu, "uncore", -1);
+	perf_cpu_notifier(uncore_pmu_notifier);
+	uncore_pmu_initialized = true;
+	return 0;
+}
+early_initcall(uncore_pmu_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
new file mode 100644
index 0000000..f622f97
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -0,0 +1,48 @@
+#include <linux/perf_event.h>
+#include <linux/kprobes.h>
+#include <linux/hardirq.h>
+#include <linux/slab.h>
+
+/* Nehalme/Westmere uncore MSR */
+
+#define NHM_MSR_UNCORE_PERF_GLOBAL_CTRL    	0x391
+#define NHM_MSR_UNCORE_PMC0			0x3b0
+#define NHM_MSR_UNCORE_PERFEVTSEL0		0x3c0
+
+#define NHM_UNCORE_EVENTSEL_EVENT		0x000000FFULL
+#define NHM_UNCORE_EVENTSEL_UMASK		0x0000FF00ULL
+#define NHM_UNCORE_EVENTSEL_EDGE		(1ULL << 18)
+#define NHM_UNCORE_EVENTSEL_ENABLE		(1ULL << 22)
+#define NHM_UNCORE_EVENTSEL_INV			(1ULL << 23)
+#define NHM_UNCORE_EVENTSEL_CMASK		0xFF000000ULL
+
+#define NHM_UNCORE_RAW_EVENT_MASK	\
+	(NHM_UNCORE_EVENTSEL_EVENT |	\
+	 NHM_UNCORE_EVENTSEL_UMASK |	\
+	 NHM_UNCORE_EVENTSEL_EDGE  |	\
+	 NHM_UNCORE_EVENTSEL_INV   |	\
+	 NHM_UNCORE_EVENTSEL_CMASK)
+
+struct intel_uncore {
+	int id;			/* uncore id */
+	int refcnt;		/* reference count */
+
+	struct perf_event *events[X86_PMC_IDX_MAX];	/* in counter order */
+	int n_events;
+	struct spinlock lock;
+};
+
+struct cpu_uncore_events {
+	struct intel_uncore *intel_uncore;
+};
+
+struct intel_uncore_pmu {
+	const char	*name;
+	void		(*disable_all)(void);
+	void		(*enable_all)(void);
+	void		(*enable)(struct perf_event *);
+	void		(*disable)(struct perf_event *);
+	int		(*hw_config)(struct perf_event *event);
+	int		num_counters;
+	int		cntval_bits;
+};
-- 
1.7.5.1


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 2/4] perf, x86: Add Intel SandyBridge uncore pmu
  2011-06-30  8:09 [PATCH 0/4] perf: Intel uncore pmu counting support Lin Ming
  2011-06-30  8:09 ` [PATCH 1/4] perf, x86: Add Intel Nehalem/Westmere uncore pmu Lin Ming
@ 2011-06-30  8:09 ` Lin Ming
  2011-06-30 22:09   ` Peter Zijlstra
  2011-06-30  8:09 ` [PATCH 3/4] perf: Remove perf_event_attr::type check Lin Ming
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 29+ messages in thread
From: Lin Ming @ 2011-06-30  8:09 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Andi Kleen, Stephane Eranian,
	Arnaldo Carvalho de Melo
  Cc: linux-kernel

Add Intel SandyBridge uncore pmu support.

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore.c |   49 +++++++++++++++++++++++++
 arch/x86/kernel/cpu/perf_event_intel_uncore.h |   40 ++++++++++++++++++++
 2 files changed, 89 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 01060ce..fdfe7e6 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -57,6 +57,51 @@ static __initconst const struct intel_uncore_pmu nhm_uncore_pmu = {
 	.cntval_bits		= 48,
 };
 
+/* SandyBridge uncore pmu */
+
+static struct uncore_config {
+	unsigned long config_base;
+	unsigned long event_base;
+} snb_uncore_configs[SNB_UNCORE_NUM_COUNTERS] = {
+	{SNB_MSR_UNC_CBO_0_PERFEVTSEL0, SNB_MSR_UNC_CBO_0_PER_CTR0},
+	{SNB_MSR_UNC_CBO_0_PERFEVTSEL1, SNB_MSR_UNC_CBO_0_PER_CTR1},
+	{SNB_MSR_UNC_CBO_1_PERFEVTSEL0, SNB_MSR_UNC_CBO_1_PER_CTR0},
+	{SNB_MSR_UNC_CBO_1_PERFEVTSEL1, SNB_MSR_UNC_CBO_1_PER_CTR1},
+	{SNB_MSR_UNC_CBO_2_PERFEVTSEL0, SNB_MSR_UNC_CBO_2_PER_CTR0},
+	{SNB_MSR_UNC_CBO_2_PERFEVTSEL1, SNB_MSR_UNC_CBO_2_PER_CTR1},
+	{SNB_MSR_UNC_CBO_3_PERFEVTSEL0, SNB_MSR_UNC_CBO_3_PER_CTR0},
+	{SNB_MSR_UNC_CBO_3_PERFEVTSEL1, SNB_MSR_UNC_CBO_3_PER_CTR1},
+};
+
+static void snb_uncore_pmu_enable_all(void)
+{
+	wrmsrl(SNB_MSR_UNCORE_PERF_GLOBAL_CTRL,
+		SNB_MSR_UNCORE_PERF_GLOBAL_CTRL_EN);
+}
+
+static int snb_uncore_pmu_hw_config(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int i = hwc->idx;
+
+	hwc->config = event->attr.config & SNB_UNCORE_RAW_EVENT_MASK;
+	hwc->config_base = snb_uncore_configs[i].config_base;
+	hwc->event_base = snb_uncore_configs[i].event_base;
+
+	return 0;
+}
+
+static __initconst const struct intel_uncore_pmu snb_uncore_pmu = {
+	.name			= "SandyBridge",
+	.disable_all		= nhm_uncore_pmu_disable_all,
+	.enable_all		= snb_uncore_pmu_enable_all,
+	.enable			= nhm_uncore_pmu_enable_event,
+	.disable		= nhm_uncore_pmu_disable_event,
+	.hw_config		= snb_uncore_pmu_hw_config,
+	.num_counters		= 8,
+	.cntval_bits		= 48,
+};
+
 static u64 uncore_perf_event_update(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
@@ -337,6 +382,10 @@ static int __init uncore_pmu_init(void)
 		intel_uncore_pmu = nhm_uncore_pmu;
 		break;
 
+	case 42: /* SandyBridge */
+		intel_uncore_pmu = snb_uncore_pmu;
+		break;
+
 	default:
 		return 0;
 	}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index f622f97..9ba152b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -23,6 +23,46 @@
 	 NHM_UNCORE_EVENTSEL_INV   |	\
 	 NHM_UNCORE_EVENTSEL_CMASK)
 
+/* SandyBridge uncore MSR */
+
+#define SNB_MSR_UNC_CBO_0_PERFEVTSEL0		0x700
+#define SNB_MSR_UNC_CBO_0_PERFEVTSEL1		0x701
+#define SNB_MSR_UNC_CBO_0_UNIT_STATUS		0x705
+#define SNB_MSR_UNC_CBO_0_PER_CTR0		0x706
+#define SNB_MSR_UNC_CBO_0_PER_CTR1		0x707
+
+#define SNB_MSR_UNC_CBO_1_PERFEVTSEL0		0x710
+#define SNB_MSR_UNC_CBO_1_PERFEVTSEL1		0x711
+#define SNB_MSR_UNC_CBO_1_UNIT_STATUS		0x715
+#define SNB_MSR_UNC_CBO_1_PER_CTR0		0x716
+#define SNB_MSR_UNC_CBO_1_PER_CTR1		0x717
+
+#define SNB_MSR_UNC_CBO_2_PERFEVTSEL0		0x720
+#define SNB_MSR_UNC_CBO_2_PERFEVTSEL1		0x721
+#define SNB_MSR_UNC_CBO_2_UNIT_STATUS		0x725
+#define SNB_MSR_UNC_CBO_2_PER_CTR0		0x726
+#define SNB_MSR_UNC_CBO_2_PER_CTR1		0x727
+
+#define SNB_MSR_UNC_CBO_3_PERFEVTSEL0		0x730
+#define SNB_MSR_UNC_CBO_3_PERFEVTSEL1		0x731
+#define SNB_MSR_UNC_CBO_3_UNIT_STATUS		0x735
+#define SNB_MSR_UNC_CBO_3_PER_CTR0		0x736
+#define SNB_MSR_UNC_CBO_3_PER_CTR1		0x737
+
+#define SNB_MSR_UNCORE_PERF_GLOBAL_CTRL		0x391
+#define SNB_MSR_UNCORE_PERF_GLOBAL_CTRL_EN	(1ULL << 29)
+
+#define SNB_UNCORE_EVENTSEL_CMASK		0x1F000000ULL
+
+#define SNB_UNCORE_RAW_EVENT_MASK	\
+	(NHM_UNCORE_EVENTSEL_EVENT |	\
+	 NHM_UNCORE_EVENTSEL_UMASK |	\
+	 NHM_UNCORE_EVENTSEL_EDGE  |	\
+	 NHM_UNCORE_EVENTSEL_INV   |	\
+	 SNB_UNCORE_EVENTSEL_CMASK)
+
+#define SNB_UNCORE_NUM_COUNTERS			8
+
 struct intel_uncore {
 	int id;			/* uncore id */
 	int refcnt;		/* reference count */
-- 
1.7.5.1


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 3/4] perf: Remove perf_event_attr::type check
  2011-06-30  8:09 [PATCH 0/4] perf: Intel uncore pmu counting support Lin Ming
  2011-06-30  8:09 ` [PATCH 1/4] perf, x86: Add Intel Nehalem/Westmere uncore pmu Lin Ming
  2011-06-30  8:09 ` [PATCH 2/4] perf, x86: Add Intel SandyBridge " Lin Ming
@ 2011-06-30  8:09 ` Lin Ming
  2011-07-21 19:31   ` [tip:perf/core] " tip-bot for Lin Ming
  2011-06-30  8:09 ` [PATCH 4/4] perf tool: Get PMU type id from sysfs Lin Ming
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 29+ messages in thread
From: Lin Ming @ 2011-06-30  8:09 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Andi Kleen, Stephane Eranian,
	Arnaldo Carvalho de Melo
  Cc: linux-kernel

PMU type id can be allocated dynamically, so perf_event_attr::type check
when copying attribute from userspace to kernel is not valid.

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
---
 kernel/events/core.c |    7 -------
 1 files changed, 0 insertions(+), 7 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5e70f62..eaaddbf 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5892,13 +5892,6 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 	if (ret)
 		return -EFAULT;
 
-	/*
-	 * If the type exists, the corresponding creation will verify
-	 * the attr->config.
-	 */
-	if (attr->type >= PERF_TYPE_MAX)
-		return -EINVAL;
-
 	if (attr->__reserved_1)
 		return -EINVAL;
 
-- 
1.7.5.1


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 4/4] perf tool: Get PMU type id from sysfs
  2011-06-30  8:09 [PATCH 0/4] perf: Intel uncore pmu counting support Lin Ming
                   ` (2 preceding siblings ...)
  2011-06-30  8:09 ` [PATCH 3/4] perf: Remove perf_event_attr::type check Lin Ming
@ 2011-06-30  8:09 ` Lin Ming
  2011-06-30 12:10 ` [PATCH 0/4] perf: Intel uncore pmu counting support Stephane Eranian
  2011-07-01 11:08 ` Ingo Molnar
  5 siblings, 0 replies; 29+ messages in thread
From: Lin Ming @ 2011-06-30  8:09 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Andi Kleen, Stephane Eranian,
	Arnaldo Carvalho de Melo
  Cc: linux-kernel

PMU type id can be allocated dynamically, for example

$ perf stat -e uncore:r0101 ls

$ cat /sys/bus/event_source/devices/uncore/type 
6

Uncore pmu type is read from sysfs and set into attr->type.

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
---
 tools/perf/util/parse-events.c |   55 ++++++++++++++++++++++++++++++++++++++-
 1 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 41982c3..6cbbeda 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -684,7 +684,7 @@ parse_symbolic_event(const char **strp, struct perf_event_attr *attr)
 }
 
 static enum event_result
-parse_raw_event(const char **strp, struct perf_event_attr *attr)
+parse_raw_config(const char **strp, struct perf_event_attr *attr)
 {
 	const char *str = *strp;
 	u64 config;
@@ -695,7 +695,6 @@ parse_raw_event(const char **strp, struct perf_event_attr *attr)
 	n = hex2u64(str + 1, &config);
 	if (n > 0) {
 		*strp = str + n + 1;
-		attr->type = PERF_TYPE_RAW;
 		attr->config = config;
 		return EVT_HANDLED;
 	}
@@ -703,6 +702,54 @@ parse_raw_event(const char **strp, struct perf_event_attr *attr)
 }
 
 static enum event_result
+parse_raw_event(const char **strp, struct perf_event_attr *attr)
+{
+	if (parse_raw_config(strp, attr) != EVT_HANDLED)
+		return EVT_FAILED;
+
+	attr->type = PERF_TYPE_RAW;
+	return EVT_HANDLED;
+}
+
+#define EVENT_SOURCE_DIR "/sys/bus/event_source/devices"
+
+static enum event_result
+parse_sysfs_event(const char **strp, struct perf_event_attr *attr)
+{
+	char *pmu_name;
+	char evt_path[MAXPATHLEN];
+	char type_buf[4];
+	u64 type;
+	int fd;
+
+	pmu_name = strchr(*strp, ':');
+	if (!pmu_name)
+		return EVT_FAILED;
+
+	pmu_name = strndup(*strp, pmu_name - *strp);
+
+	snprintf(evt_path, MAXPATHLEN, "%s/%s/type", EVENT_SOURCE_DIR,
+		 pmu_name);
+
+	fd = open(evt_path, O_RDONLY);
+	if (fd < 0)
+		return EVT_FAILED;
+
+	if (read(fd, type_buf, sizeof(type_buf)) < 0) {
+		close(fd);
+		return EVT_FAILED;
+	}
+
+	close(fd);
+	type = atoll(type_buf);
+	attr->type = type;
+	*strp += strlen(pmu_name) + 1; /* + 1 for the ':' */
+	free(pmu_name);
+
+	return parse_raw_config(strp, attr);
+}
+
+static enum event_result
 parse_numeric_event(const char **strp, struct perf_event_attr *attr)
 {
 	const char *str = *strp;
@@ -783,6 +830,10 @@ parse_event_symbols(const struct option *opt, const char **str,
 {
 	enum event_result ret;
 
+	ret = parse_sysfs_event(str, attr);
+	if (ret != EVT_FAILED)
+		goto modifier;
+
 	ret = parse_tracepoint_event(opt, str, attr);
 	if (ret != EVT_FAILED)
 		goto modifier;
-- 
1.7.5.1


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/4] perf: Intel uncore pmu counting support
  2011-06-30  8:09 [PATCH 0/4] perf: Intel uncore pmu counting support Lin Ming
                   ` (3 preceding siblings ...)
  2011-06-30  8:09 ` [PATCH 4/4] perf tool: Get PMU type id from sysfs Lin Ming
@ 2011-06-30 12:10 ` Stephane Eranian
  2011-06-30 14:10   ` Peter Zijlstra
                     ` (2 more replies)
  2011-07-01 11:08 ` Ingo Molnar
  5 siblings, 3 replies; 29+ messages in thread
From: Stephane Eranian @ 2011-06-30 12:10 UTC (permalink / raw)
  To: Lin Ming
  Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen,
	Arnaldo Carvalho de Melo, linux-kernel

On Thu, Jun 30, 2011 at 10:09 AM, Lin Ming <ming.m.lin@intel.com> wrote:
> Hi, all
>
> I posted uncore patches months ago, but it was pended due to an uncore
> interrupt problem.
>
> This series are cut to support uncore pmu counting only.
> So uncore interrupt handling is not needed.
>
You're making the assumption that when counting, you can never construct
a measurement that will cause a counter to overflow the 39 bits. If not, then
you need interrupt handling even when counting.


> The uncore pmu type is allocated dynamically and exported via sysfs.
> $ cat /sys/bus/event_source/devices/uncore/type
> 6
>
> You can count uncore raw events as below,
> $ perf stat -e uncore:r0101 ls
>
> It reads uncore pmu type id from sysfs to setup perf_event_attr::type.
>
> Comments are appreciated.
>
> Thanks,
> Lin Ming
>
>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/4] perf, x86: Add Intel Nehalem/Westmere uncore pmu
  2011-06-30  8:09 ` [PATCH 1/4] perf, x86: Add Intel Nehalem/Westmere uncore pmu Lin Ming
@ 2011-06-30 14:08   ` Peter Zijlstra
  2011-07-01  6:05     ` Lin Ming
  2011-06-30 16:58   ` Andi Kleen
  1 sibling, 1 reply; 29+ messages in thread
From: Peter Zijlstra @ 2011-06-30 14:08 UTC (permalink / raw)
  To: Lin Ming
  Cc: Ingo Molnar, Andi Kleen, Stephane Eranian,
	Arnaldo Carvalho de Melo, linux-kernel

On Thu, 2011-06-30 at 08:09 +0000, Lin Ming wrote:
> +static struct pmu uncore_pmu = {

	.task_ctx_nr = perf_invalid_context,

> +       .event_init     = uncore_pmu_event_init,
> +       .add            = uncore_pmu_add,
> +       .del            = uncore_pmu_del,
> +       .start          = uncore_pmu_start,
> +       .stop           = uncore_pmu_stop,
> +       .read           = uncore_pmu_read,
> +}; 



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/4] perf: Intel uncore pmu counting support
  2011-06-30 12:10 ` [PATCH 0/4] perf: Intel uncore pmu counting support Stephane Eranian
@ 2011-06-30 14:10   ` Peter Zijlstra
  2011-06-30 16:27   ` Stephane Eranian
  2011-07-01  5:49   ` Lin Ming
  2 siblings, 0 replies; 29+ messages in thread
From: Peter Zijlstra @ 2011-06-30 14:10 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: Lin Ming, Ingo Molnar, Andi Kleen, Arnaldo Carvalho de Melo,
	linux-kernel

On Thu, 2011-06-30 at 14:10 +0200, Stephane Eranian wrote:
> > This series are cut to support uncore pmu counting only.
> > So uncore interrupt handling is not needed.
> >
> You're making the assumption that when counting, you can never construct
> a measurement that will cause a counter to overflow the 39 bits. If not, then
> you need interrupt handling even when counting.
> 

Also, I'm missing the stuff that maps everything to one active cpu in
the uncore mask. IIRC your previous postings had stuff like that.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/4] perf: Intel uncore pmu counting support
  2011-06-30 12:10 ` [PATCH 0/4] perf: Intel uncore pmu counting support Stephane Eranian
  2011-06-30 14:10   ` Peter Zijlstra
@ 2011-06-30 16:27   ` Stephane Eranian
  2011-07-01  3:17     ` Lin Ming
  2011-07-01  5:49   ` Lin Ming
  2 siblings, 1 reply; 29+ messages in thread
From: Stephane Eranian @ 2011-06-30 16:27 UTC (permalink / raw)
  To: Lin Ming
  Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen,
	Arnaldo Carvalho de Melo, linux-kernel

On Thu, Jun 30, 2011 at 2:10 PM, Stephane Eranian <eranian@google.com> wrote:
> On Thu, Jun 30, 2011 at 10:09 AM, Lin Ming <ming.m.lin@intel.com> wrote:
>> Hi, all
>>
>> I posted uncore patches months ago, but it was pended due to an uncore
>> interrupt problem.
>>
>> This series are cut to support uncore pmu counting only.
>> So uncore interrupt handling is not needed.
>>
> You're making the assumption that when counting, you can never construct
> a measurement that will cause a counter to overflow the 39 bits. If not, then
> you need interrupt handling even when counting.
>
The actual counter width is 48. But wrmsrl() can only write the bottom 32 bits
of a register. I think Intel fixed that only with SandyBridge (see Vol3b). Thus,
the risk of 'silent' wrap around is much higher now as you have only 31 bits
to play with.

But if I read your patch correctly, it seems you are avoiding wrmsrl() on the
counter. Instead, you are reading it when you start (prev_count) and using
that value to compute the delta on stop.

Am I understanding your workaround correctly?


>
>> The uncore pmu type is allocated dynamically and exported via sysfs.
>> $ cat /sys/bus/event_source/devices/uncore/type
>> 6
>>
>> You can count uncore raw events as below,
>> $ perf stat -e uncore:r0101 ls
>>
>> It reads uncore pmu type id from sysfs to setup perf_event_attr::type.
>>
>> Comments are appreciated.
>>
>> Thanks,
>> Lin Ming
>>
>>
>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/4] perf, x86: Add Intel Nehalem/Westmere uncore pmu
  2011-06-30  8:09 ` [PATCH 1/4] perf, x86: Add Intel Nehalem/Westmere uncore pmu Lin Ming
  2011-06-30 14:08   ` Peter Zijlstra
@ 2011-06-30 16:58   ` Andi Kleen
  2011-07-04  6:39     ` Lin Ming
  1 sibling, 1 reply; 29+ messages in thread
From: Andi Kleen @ 2011-06-30 16:58 UTC (permalink / raw)
  To: Lin Ming
  Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen, Stephane Eranian,
	Arnaldo Carvalho de Melo, linux-kernel

On Thu, Jun 30, 2011 at 08:09:53AM +0000, Lin Ming wrote:
> +static u64 uncore_perf_event_update(struct perf_event *event)
> +{
> +	struct hw_perf_event *hwc = &event->hw;
> +	int shift = 64 - intel_uncore_pmu.cntval_bits;
> +	u64 prev_raw_count, new_raw_count;
> +	s64 delta;
> +
> +	/*
> +	 * Careful: an NMI might modify the previous event value.

There are no NMIs without sampling, so at least the comment seems bogus.
Perhaps the code could be a bit simplified now without atomics.

> +static int uncore_pmu_event_init(struct perf_event *event)
> +{
> +	struct hw_perf_event *hwc = &event->hw;
> +
> +	if (!uncore_pmu_initialized)
> +		return -ENOENT;
> +
> +	if (event->attr.type != uncore_pmu.type)
> +		return -ENOENT;
> +
> +	/*
> +	 * Uncore PMU does measure at all privilege level all the time.
> +	 * So it doesn't make sense to specify any exclude bits.
> +	 */
> +	if (event->attr.exclude_user || event->attr.exclude_kernel
> +	    || event->attr.exclude_hv || event->attr.exclude_idle)
> +		return -ENOENT;
> +
> +	/* Sampling not supported yet */
> +	if (hwc->sample_period)
> +		return -EINVAL;

Don't we need a "is root" check here? uncore counts everything, so
it cannot be limited to a single process.

> +static void uncore_pmu_cpu_starting(int cpu)
> +{
> +	struct cpu_uncore_events *cpuc = &per_cpu(cpu_uncore_events, cpu);
> +	struct intel_uncore *uncore;
> +	int i, uncore_id;
> +
> +	if (boot_cpu_data.x86_max_cores < 2)
> +		return;

Why that check? uncore counting should work on a single core system too.

I think you should remove all of those.

> +
> +	uncore_id = topology_physical_package_id(cpu);
> +	WARN_ON_ONCE(uncore_id == BAD_APICID);
> +
> +	raw_spin_lock(&intel_uncore_lock);

Does this really need to be a raw spinlock? 

> +#define NHM_MSR_UNCORE_PERF_GLOBAL_CTRL    	0x391
> +#define NHM_MSR_UNCORE_PMC0			0x3b0
> +#define NHM_MSR_UNCORE_PERFEVTSEL0		0x3c0

These should be in msr-index.h


-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 2/4] perf, x86: Add Intel SandyBridge uncore pmu
  2011-06-30  8:09 ` [PATCH 2/4] perf, x86: Add Intel SandyBridge " Lin Ming
@ 2011-06-30 22:09   ` Peter Zijlstra
  0 siblings, 0 replies; 29+ messages in thread
From: Peter Zijlstra @ 2011-06-30 22:09 UTC (permalink / raw)
  To: Lin Ming
  Cc: Ingo Molnar, Andi Kleen, Stephane Eranian,
	Arnaldo Carvalho de Melo, linux-kernel

On Thu, 2011-06-30 at 08:09 +0000, Lin Ming wrote:
> +static struct uncore_config {
> +       unsigned long config_base;
> +       unsigned long event_base;
> +} snb_uncore_configs[SNB_UNCORE_NUM_COUNTERS] = {
> +       {SNB_MSR_UNC_CBO_0_PERFEVTSEL0, SNB_MSR_UNC_CBO_0_PER_CTR0},
> +       {SNB_MSR_UNC_CBO_0_PERFEVTSEL1, SNB_MSR_UNC_CBO_0_PER_CTR1},
> +       {SNB_MSR_UNC_CBO_1_PERFEVTSEL0, SNB_MSR_UNC_CBO_1_PER_CTR0},
> +       {SNB_MSR_UNC_CBO_1_PERFEVTSEL1, SNB_MSR_UNC_CBO_1_PER_CTR1},
> +       {SNB_MSR_UNC_CBO_2_PERFEVTSEL0, SNB_MSR_UNC_CBO_2_PER_CTR0},
> +       {SNB_MSR_UNC_CBO_2_PERFEVTSEL1, SNB_MSR_UNC_CBO_2_PER_CTR1},
> +       {SNB_MSR_UNC_CBO_3_PERFEVTSEL0, SNB_MSR_UNC_CBO_3_PER_CTR0},
> +       {SNB_MSR_UNC_CBO_3_PERFEVTSEL1, SNB_MSR_UNC_CBO_3_PER_CTR1},
> +};

> +static int snb_uncore_pmu_hw_config(struct perf_event *event)
> +{
> +       struct hw_perf_event *hwc = &event->hw;
> +       int i = hwc->idx;
> +
> +       hwc->config = event->attr.config & SNB_UNCORE_RAW_EVENT_MASK;
> +       hwc->config_base = snb_uncore_configs[i].config_base;

	hwc->config_base = SNB_MSR_UNC_CBO_0_PERFEVTSEL0 + 
		0x10 * (i >> 1) + 0x6 * (i & 1);

Saves a memory lookup, might or might not be worth it.

What the heck did Intel mess those MSRs up for anyway?

> +       hwc->event_base = snb_uncore_configs[i].event_base;
> +
> +       return 0;
> +} 



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/4] perf: Intel uncore pmu counting support
  2011-06-30 16:27   ` Stephane Eranian
@ 2011-07-01  3:17     ` Lin Ming
  2011-07-01 10:49       ` Stephane Eranian
  0 siblings, 1 reply; 29+ messages in thread
From: Lin Ming @ 2011-07-01  3:17 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen,
	Arnaldo Carvalho de Melo, linux-kernel

On Fri, 2011-07-01 at 00:27 +0800, Stephane Eranian wrote:
> On Thu, Jun 30, 2011 at 2:10 PM, Stephane Eranian <eranian@google.com> wrote:
> > On Thu, Jun 30, 2011 at 10:09 AM, Lin Ming <ming.m.lin@intel.com> wrote:
> >> Hi, all
> >>
> >> I posted uncore patches months ago, but it was pended due to an uncore
> >> interrupt problem.
> >>
> >> This series are cut to support uncore pmu counting only.
> >> So uncore interrupt handling is not needed.
> >>
> > You're making the assumption that when counting, you can never construct
> > a measurement that will cause a counter to overflow the 39 bits. If not, then
> > you need interrupt handling even when counting.
> >
> The actual counter width is 48. But wrmsrl() can only write the bottom 32 bits
> of a register. I think Intel fixed that only with SandyBridge (see Vol3b). Thus,
> the risk of 'silent' wrap around is much higher now as you have only 31 bits
> to play with.

I just tested wrmsrl on uncore counters and it's surprised to me that it
supports full write.

	val64.low  = 0xFFFFEEEE;
	val64.high = 0x12345678;

On Nehalem/Westmere:

	msr = 0x3b0; //NHM_MSR_UNCORE_PMC0
	wrmsrl(msr, val64.full & 0xfffffffffff); //48 bits counter
	rdmsrl(msr, val64.full);
        printfk("counter value: 0x%llx\n", val64.full);

I got:
	counter value: 0x5678ffffeeee

On SandyBridge:

	msr = 0x716; //SNB_MSR_UNC_CBO_1_PER_CTR0
	wrmsrl(msr, val64.full & 0xfffffffffff); //44 bits counter
	rdmsrl(msr, val64.full);
        printfk("counter value: 0x%llx\n", val64.full);

I got:
	counter value: 0x678ffffeeee

> 
> But if I read your patch correctly, it seems you are avoiding wrmsrl() on the
> counter. Instead, you are reading it when you start (prev_count) and using
> that value to compute the delta on stop.
> 
> Am I understanding your workaround correctly?

Yes, but I didn't realize that it's a workaround.

Lin Ming

> 
> 
> >
> >> The uncore pmu type is allocated dynamically and exported via sysfs.
> >> $ cat /sys/bus/event_source/devices/uncore/type
> >> 6
> >>
> >> You can count uncore raw events as below,
> >> $ perf stat -e uncore:r0101 ls
> >>
> >> It reads uncore pmu type id from sysfs to setup perf_event_attr::type.
> >>
> >> Comments are appreciated.
> >>
> >> Thanks,
> >> Lin Ming
> >>
> >>
> >



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/4] perf: Intel uncore pmu counting support
  2011-06-30 12:10 ` [PATCH 0/4] perf: Intel uncore pmu counting support Stephane Eranian
  2011-06-30 14:10   ` Peter Zijlstra
  2011-06-30 16:27   ` Stephane Eranian
@ 2011-07-01  5:49   ` Lin Ming
  2 siblings, 0 replies; 29+ messages in thread
From: Lin Ming @ 2011-07-01  5:49 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen,
	Arnaldo Carvalho de Melo, linux-kernel

On Thu, 2011-06-30 at 20:10 +0800, Stephane Eranian wrote:
> On Thu, Jun 30, 2011 at 10:09 AM, Lin Ming <ming.m.lin@intel.com> wrote:
> > Hi, all
> >
> > I posted uncore patches months ago, but it was pended due to an uncore
> > interrupt problem.
> >
> > This series are cut to support uncore pmu counting only.
> > So uncore interrupt handling is not needed.
> >
> You're making the assumption that when counting, you can never construct
> a measurement that will cause a counter to overflow the 39 bits. If not, then
> you need interrupt handling even when counting.

Is it enough to get the delta when overflow like below?

        unsigned int a = 0xfffffffe;
        unsigned int b = 10;
        unsigned int delta = (int)b - (int)a;

> 
> 
> > The uncore pmu type is allocated dynamically and exported via sysfs.
> > $ cat /sys/bus/event_source/devices/uncore/type
> > 6
> >
> > You can count uncore raw events as below,
> > $ perf stat -e uncore:r0101 ls
> >
> > It reads uncore pmu type id from sysfs to setup perf_event_attr::type.
> >
> > Comments are appreciated.
> >
> > Thanks,
> > Lin Ming
> >
> >



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/4] perf, x86: Add Intel Nehalem/Westmere uncore pmu
  2011-06-30 14:08   ` Peter Zijlstra
@ 2011-07-01  6:05     ` Lin Ming
  0 siblings, 0 replies; 29+ messages in thread
From: Lin Ming @ 2011-07-01  6:05 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, Andi Kleen, Stephane Eranian,
	Arnaldo Carvalho de Melo, linux-kernel

On Thu, 2011-06-30 at 22:08 +0800, Peter Zijlstra wrote:
> On Thu, 2011-06-30 at 08:09 +0000, Lin Ming wrote:
> > +static struct pmu uncore_pmu = {
> 
> 	.task_ctx_nr = perf_invalid_context,

OK.

usage: perf stat [<options>] [<command>]

Currently, "perf stat" always need a <command> to do counting.
Need to change it to be optional.

So I may add a "P" option to specify which physical processor to count.

perf stat -P 3 -e uncore:rXXXX

> 
> > +       .event_init     = uncore_pmu_event_init,
> > +       .add            = uncore_pmu_add,
> > +       .del            = uncore_pmu_del,
> > +       .start          = uncore_pmu_start,
> > +       .stop           = uncore_pmu_stop,
> > +       .read           = uncore_pmu_read,
> > +}; 
> 
> 



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/4] perf: Intel uncore pmu counting support
  2011-07-01  3:17     ` Lin Ming
@ 2011-07-01 10:49       ` Stephane Eranian
  2011-07-01 12:23         ` Stephane Eranian
  2011-07-04  6:03         ` Lin Ming
  0 siblings, 2 replies; 29+ messages in thread
From: Stephane Eranian @ 2011-07-01 10:49 UTC (permalink / raw)
  To: Lin Ming
  Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen,
	Arnaldo Carvalho de Melo, linux-kernel

On Fri, Jul 1, 2011 at 5:17 AM, Lin Ming <ming.m.lin@intel.com> wrote:
> On Fri, 2011-07-01 at 00:27 +0800, Stephane Eranian wrote:
>> On Thu, Jun 30, 2011 at 2:10 PM, Stephane Eranian <eranian@google.com> wrote:
>> > On Thu, Jun 30, 2011 at 10:09 AM, Lin Ming <ming.m.lin@intel.com> wrote:
>> >> Hi, all
>> >>
>> >> I posted uncore patches months ago, but it was pended due to an uncore
>> >> interrupt problem.
>> >>
>> >> This series are cut to support uncore pmu counting only.
>> >> So uncore interrupt handling is not needed.
>> >>
>> > You're making the assumption that when counting, you can never construct
>> > a measurement that will cause a counter to overflow the 39 bits. If not, then
>> > you need interrupt handling even when counting.
>> >
>> The actual counter width is 48. But wrmsrl() can only write the bottom 32 bits
>> of a register. I think Intel fixed that only with SandyBridge (see Vol3b). Thus,
>> the risk of 'silent' wrap around is much higher now as you have only 31 bits
>> to play with.
>
> I just tested wrmsrl on uncore counters and it's surprised to me that it
> supports full write.
>
>        val64.low  = 0xFFFFEEEE;
>        val64.high = 0x12345678;
>
> On Nehalem/Westmere:
>
>        msr = 0x3b0; //NHM_MSR_UNCORE_PMC0
>        wrmsrl(msr, val64.full & 0xfffffffffff); //48 bits counter
>        rdmsrl(msr, val64.full);
>        printfk("counter value: 0x%llx\n", val64.full);
>
> I got:
>        counter value: 0x5678ffffeeee
>
> On SandyBridge:
>
>        msr = 0x716; //SNB_MSR_UNC_CBO_1_PER_CTR0
>        wrmsrl(msr, val64.full & 0xfffffffffff); //44 bits counter
>        rdmsrl(msr, val64.full);
>        printfk("counter value: 0x%llx\n", val64.full);
>
> I got:
>        counter value: 0x678ffffeeee
>
I tried on my Nehalem and SandyBridge (model 42) and I get the same results.
The restriction applies only to core PMU counters then.

But what's so weird is that on SandyBridge the counter counters are still
limited:

uncore counter value: 0x678ffffeeee
   core counter value: 0xffffffffeeee

Yet, IA32_PERF_CAPABILITIES.FW_WIDTH (bit13) is set, thus wrmsrl()
should write the full 48-bit width (see Vol3b 30.2.2.3).

Further testing revealed that you get full width ONLY with the fixed counter
counters:

FW_WIDTH=1
uncore counter value: 0x678ffffeeee
core counter value: 0xffffffffeeee
fixed counter value: 0x678ffffeeee

I suspect there is a bug in the HW or a bogus description in the SDM.
I will check on that with Intel.


>> But if I read your patch correctly, it seems you are avoiding wrmsrl() on the
>> counter. Instead, you are reading it when you start (prev_count) and using
>> that value to compute the delta on stop.
>>
>> Am I understanding your workaround correctly?
>
> Yes, but I didn't realize that it's a workaround.
>
> Lin Ming
>
>>
>>
>> >
>> >> The uncore pmu type is allocated dynamically and exported via sysfs.
>> >> $ cat /sys/bus/event_source/devices/uncore/type
>> >> 6
>> >>
>> >> You can count uncore raw events as below,
>> >> $ perf stat -e uncore:r0101 ls
>> >>
>> >> It reads uncore pmu type id from sysfs to setup perf_event_attr::type.
>> >>
>> >> Comments are appreciated.
>> >>
>> >> Thanks,
>> >> Lin Ming
>> >>
>> >>
>> >
>
>
>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/4] perf: Intel uncore pmu counting support
  2011-06-30  8:09 [PATCH 0/4] perf: Intel uncore pmu counting support Lin Ming
                   ` (4 preceding siblings ...)
  2011-06-30 12:10 ` [PATCH 0/4] perf: Intel uncore pmu counting support Stephane Eranian
@ 2011-07-01 11:08 ` Ingo Molnar
  5 siblings, 0 replies; 29+ messages in thread
From: Ingo Molnar @ 2011-07-01 11:08 UTC (permalink / raw)
  To: Lin Ming
  Cc: Peter Zijlstra, Andi Kleen, Stephane Eranian,
	Arnaldo Carvalho de Melo, linux-kernel


* Lin Ming <ming.m.lin@intel.com> wrote:

> Hi, all
> 
> I posted uncore patches months ago, but it was pended due to an uncore
> interrupt problem.
> 
> This series are cut to support uncore pmu counting only.
> So uncore interrupt handling is not needed.
> 
> The uncore pmu type is allocated dynamically and exported via sysfs.
> $ cat /sys/bus/event_source/devices/uncore/type
> 6
> 
> You can count uncore raw events as below,
> $ perf stat -e uncore:r0101 ls
> 
> It reads uncore pmu type id from sysfs to setup perf_event_attr::type.
> 
> Comments are appreciated.

Could you please list the most useful hw events that Intel would like 
users to be able to measure via the uncore PMU, and which are not 
available via the offcore PMU?

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/4] perf: Intel uncore pmu counting support
  2011-07-01 10:49       ` Stephane Eranian
@ 2011-07-01 12:23         ` Stephane Eranian
  2011-07-01 12:28           ` Stephane Eranian
  2011-07-04  6:03         ` Lin Ming
  1 sibling, 1 reply; 29+ messages in thread
From: Stephane Eranian @ 2011-07-01 12:23 UTC (permalink / raw)
  To: Lin Ming
  Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen,
	Arnaldo Carvalho de Melo, linux-kernel

Lin,

Ok, false alarm. Vol3b is correct. I missed the fact that if you want
wrmsrl() to write full width generic counters, you have to use the
alternate MSR alias. I guess that's for backward compatibility.
So for full width wrmsrl on PERFCTR0 -> use 0x41c

uncore counter value: 0x678ffffeeee
core fixed counter value: 0x678ffffeeee
       core generic counter value: 0xffffffffeeee
wide core generic counter value: 0x678ffffeeee

I think full width write to fixed counters has been there
for a long time. Looks like the problem was only present
for core generic counters until SNB.




On Fri, Jul 1, 2011 at 12:49 PM, Stephane Eranian <eranian@google.com> wrote:
> On Fri, Jul 1, 2011 at 5:17 AM, Lin Ming <ming.m.lin@intel.com> wrote:
>> On Fri, 2011-07-01 at 00:27 +0800, Stephane Eranian wrote:
>>> On Thu, Jun 30, 2011 at 2:10 PM, Stephane Eranian <eranian@google.com> wrote:
>>> > On Thu, Jun 30, 2011 at 10:09 AM, Lin Ming <ming.m.lin@intel.com> wrote:
>>> >> Hi, all
>>> >>
>>> >> I posted uncore patches months ago, but it was pended due to an uncore
>>> >> interrupt problem.
>>> >>
>>> >> This series are cut to support uncore pmu counting only.
>>> >> So uncore interrupt handling is not needed.
>>> >>
>>> > You're making the assumption that when counting, you can never construct
>>> > a measurement that will cause a counter to overflow the 39 bits. If not, then
>>> > you need interrupt handling even when counting.
>>> >
>>> The actual counter width is 48. But wrmsrl() can only write the bottom 32 bits
>>> of a register. I think Intel fixed that only with SandyBridge (see Vol3b). Thus,
>>> the risk of 'silent' wrap around is much higher now as you have only 31 bits
>>> to play with.
>>
>> I just tested wrmsrl on uncore counters and it's surprised to me that it
>> supports full write.
>>
>>        val64.low  = 0xFFFFEEEE;
>>        val64.high = 0x12345678;
>>
>> On Nehalem/Westmere:
>>
>>        msr = 0x3b0; //NHM_MSR_UNCORE_PMC0
>>        wrmsrl(msr, val64.full & 0xfffffffffff); //48 bits counter
>>        rdmsrl(msr, val64.full);
>>        printfk("counter value: 0x%llx\n", val64.full);
>>
>> I got:
>>        counter value: 0x5678ffffeeee
>>
>> On SandyBridge:
>>
>>        msr = 0x716; //SNB_MSR_UNC_CBO_1_PER_CTR0
>>        wrmsrl(msr, val64.full & 0xfffffffffff); //44 bits counter
>>        rdmsrl(msr, val64.full);
>>        printfk("counter value: 0x%llx\n", val64.full);
>>
>> I got:
>>        counter value: 0x678ffffeeee
>>
> I tried on my Nehalem and SandyBridge (model 42) and I get the same results.
> The restriction applies only to core PMU counters then.
>
> But what's so weird is that on SandyBridge the counter counters are still
> limited:
>
> uncore counter value: 0x678ffffeeee
>   core counter value: 0xffffffffeeee
>
> Yet, IA32_PERF_CAPABILITIES.FW_WIDTH (bit13) is set, thus wrmsrl()
> should write the full 48-bit width (see Vol3b 30.2.2.3).
>
> Further testing revealed that you get full width ONLY with the fixed counter
> counters:
>
> FW_WIDTH=1
> uncore counter value: 0x678ffffeeee
> core counter value: 0xffffffffeeee
> fixed counter value: 0x678ffffeeee
>
> I suspect there is a bug in the HW or a bogus description in the SDM.
> I will check on that with Intel.
>
>
>>> But if I read your patch correctly, it seems you are avoiding wrmsrl() on the
>>> counter. Instead, you are reading it when you start (prev_count) and using
>>> that value to compute the delta on stop.
>>>
>>> Am I understanding your workaround correctly?
>>
>> Yes, but I didn't realize that it's a workaround.
>>
>> Lin Ming
>>
>>>
>>>
>>> >
>>> >> The uncore pmu type is allocated dynamically and exported via sysfs.
>>> >> $ cat /sys/bus/event_source/devices/uncore/type
>>> >> 6
>>> >>
>>> >> You can count uncore raw events as below,
>>> >> $ perf stat -e uncore:r0101 ls
>>> >>
>>> >> It reads uncore pmu type id from sysfs to setup perf_event_attr::type.
>>> >>
>>> >> Comments are appreciated.
>>> >>
>>> >> Thanks,
>>> >> Lin Ming
>>> >>
>>> >>
>>> >
>>
>>
>>
>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/4] perf: Intel uncore pmu counting support
  2011-07-01 12:23         ` Stephane Eranian
@ 2011-07-01 12:28           ` Stephane Eranian
  0 siblings, 0 replies; 29+ messages in thread
From: Stephane Eranian @ 2011-07-01 12:28 UTC (permalink / raw)
  To: Lin Ming
  Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen,
	Arnaldo Carvalho de Melo, linux-kernel

On Fri, Jul 1, 2011 at 2:23 PM, Stephane Eranian <eranian@google.com> wrote:
> Lin,
>
> Ok, false alarm. Vol3b is correct. I missed the fact that if you want
> wrmsrl() to write full width generic counters, you have to use the
> alternate MSR alias. I guess that's for backward compatibility.
> So for full width wrmsrl on PERFCTR0 -> use 0x41c
>
> uncore counter value: 0x678ffffeeee
> core fixed counter value: 0x678ffffeeee
>       core generic counter value: 0xffffffffeeee
> wide core generic counter value: 0x678ffffeeee
>
> I think full width write to fixed counters has been there
> for a long time. Looks like the problem was only present
> for core generic counters until SNB.
>
Forgot to say, that we could adapt the code to use the alternate
registers if full width wrmsrl() is supported. That would mitigate
a bit the overhead of sampling and counting by simply not interrupting
that often to accumulate counts in the SW counter.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/4] perf: Intel uncore pmu counting support
  2011-07-01 10:49       ` Stephane Eranian
  2011-07-01 12:23         ` Stephane Eranian
@ 2011-07-04  6:03         ` Lin Ming
  1 sibling, 0 replies; 29+ messages in thread
From: Lin Ming @ 2011-07-04  6:03 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen,
	Arnaldo Carvalho de Melo, linux-kernel

On Fri, 2011-07-01 at 18:49 +0800, Stephane Eranian wrote:
> On Fri, Jul 1, 2011 at 5:17 AM, Lin Ming <ming.m.lin@intel.com> wrote:
> > On Fri, 2011-07-01 at 00:27 +0800, Stephane Eranian wrote:
> >> On Thu, Jun 30, 2011 at 2:10 PM, Stephane Eranian <eranian@google.com> wrote:
> >> > On Thu, Jun 30, 2011 at 10:09 AM, Lin Ming <ming.m.lin@intel.com> wrote:
> >> >> Hi, all
> >> >>
> >> >> I posted uncore patches months ago, but it was pended due to an uncore
> >> >> interrupt problem.
> >> >>
> >> >> This series are cut to support uncore pmu counting only.
> >> >> So uncore interrupt handling is not needed.
> >> >>
> >> > You're making the assumption that when counting, you can never construct
> >> > a measurement that will cause a counter to overflow the 39 bits. If not, then
> >> > you need interrupt handling even when counting.
> >> >
> >> The actual counter width is 48. But wrmsrl() can only write the bottom 32 bits
> >> of a register. I think Intel fixed that only with SandyBridge (see Vol3b). Thus,
> >> the risk of 'silent' wrap around is much higher now as you have only 31 bits
> >> to play with.
> >
> > I just tested wrmsrl on uncore counters and it's surprised to me that it
> > supports full write.
> >
> >        val64.low  = 0xFFFFEEEE;
> >        val64.high = 0x12345678;
> >
> > On Nehalem/Westmere:
> >
> >        msr = 0x3b0; //NHM_MSR_UNCORE_PMC0
> >        wrmsrl(msr, val64.full & 0xfffffffffff); //48 bits counter
> >        rdmsrl(msr, val64.full);
> >        printfk("counter value: 0x%llx\n", val64.full);
> >
> > I got:
> >        counter value: 0x5678ffffeeee
> >
> > On SandyBridge:
> >
> >        msr = 0x716; //SNB_MSR_UNC_CBO_1_PER_CTR0
> >        wrmsrl(msr, val64.full & 0xfffffffffff); //44 bits counter
> >        rdmsrl(msr, val64.full);
> >        printfk("counter value: 0x%llx\n", val64.full);
> >
> > I got:
> >        counter value: 0x678ffffeeee
> >
> I tried on my Nehalem and SandyBridge (model 42) and I get the same results.
> The restriction applies only to core PMU counters then.
> 
> But what's so weird is that on SandyBridge the counter counters are still
> limited:
> 
> uncore counter value: 0x678ffffeeee
>    core counter value: 0xffffffffeeee
> 
> Yet, IA32_PERF_CAPABILITIES.FW_WIDTH (bit13) is set, thus wrmsrl()
> should write the full 48-bit width (see Vol3b 30.2.2.3).

No.

The SandyBridge uncore generic counters are 44 bits width and the fixed
counter is 48 bit width.

Lin Ming

> 
> Further testing revealed that you get full width ONLY with the fixed counter
> counters:
> 
> FW_WIDTH=1
> uncore counter value: 0x678ffffeeee
> core counter value: 0xffffffffeeee
> fixed counter value: 0x678ffffeeee
> 
> I suspect there is a bug in the HW or a bogus description in the SDM.
> I will check on that with Intel.
> 
> 
> >> But if I read your patch correctly, it seems you are avoiding wrmsrl() on the
> >> counter. Instead, you are reading it when you start (prev_count) and using
> >> that value to compute the delta on stop.
> >>
> >> Am I understanding your workaround correctly?
> >
> > Yes, but I didn't realize that it's a workaround.
> >
> > Lin Ming
> >
> >>
> >>
> >> >
> >> >> The uncore pmu type is allocated dynamically and exported via sysfs.
> >> >> $ cat /sys/bus/event_source/devices/uncore/type
> >> >> 6
> >> >>
> >> >> You can count uncore raw events as below,
> >> >> $ perf stat -e uncore:r0101 ls
> >> >>
> >> >> It reads uncore pmu type id from sysfs to setup perf_event_attr::type.
> >> >>
> >> >> Comments are appreciated.
> >> >>
> >> >> Thanks,
> >> >> Lin Ming
> >> >>
> >> >>
> >> >
> >
> >
> >



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/4] perf, x86: Add Intel Nehalem/Westmere uncore pmu
  2011-06-30 16:58   ` Andi Kleen
@ 2011-07-04  6:39     ` Lin Ming
  2011-07-04  8:38       ` Peter Zijlstra
  2011-07-04 21:57       ` Andi Kleen
  0 siblings, 2 replies; 29+ messages in thread
From: Lin Ming @ 2011-07-04  6:39 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Peter Zijlstra, Ingo Molnar, Stephane Eranian,
	Arnaldo Carvalho de Melo, linux-kernel

On Fri, 2011-07-01 at 00:58 +0800, Andi Kleen wrote:
> On Thu, Jun 30, 2011 at 08:09:53AM +0000, Lin Ming wrote:
> > +static u64 uncore_perf_event_update(struct perf_event *event)
> > +{
> > +	struct hw_perf_event *hwc = &event->hw;
> > +	int shift = 64 - intel_uncore_pmu.cntval_bits;
> > +	u64 prev_raw_count, new_raw_count;
> > +	s64 delta;
> > +
> > +	/*
> > +	 * Careful: an NMI might modify the previous event value.
> 
> There are no NMIs without sampling, so at least the comment seems bogus.
> Perhaps the code could be a bit simplified now without atomics.

I'm not sure if uncore PMU interrupt need to be enabled for counting
only. What do you think?

> 
> > +static int uncore_pmu_event_init(struct perf_event *event)
> > +{
> > +	struct hw_perf_event *hwc = &event->hw;
> > +
> > +	if (!uncore_pmu_initialized)
> > +		return -ENOENT;
> > +
> > +	if (event->attr.type != uncore_pmu.type)
> > +		return -ENOENT;
> > +
> > +	/*
> > +	 * Uncore PMU does measure at all privilege level all the time.
> > +	 * So it doesn't make sense to specify any exclude bits.
> > +	 */
> > +	if (event->attr.exclude_user || event->attr.exclude_kernel
> > +	    || event->attr.exclude_hv || event->attr.exclude_idle)
> > +		return -ENOENT;
> > +
> > +	/* Sampling not supported yet */
> > +	if (hwc->sample_period)
> > +		return -EINVAL;
> 
> Don't we need a "is root" check here? uncore counts everything, so
> it cannot be limited to a single process.

Yes, will add a "is root" check.

Will add .task_ctx_nr = perf_invalid_context to disallow per-process
counting.

> 
> > +static void uncore_pmu_cpu_starting(int cpu)
> > +{
> > +	struct cpu_uncore_events *cpuc = &per_cpu(cpu_uncore_events, cpu);
> > +	struct intel_uncore *uncore;
> > +	int i, uncore_id;
> > +
> > +	if (boot_cpu_data.x86_max_cores < 2)
> > +		return;
> 
> Why that check? uncore counting should work on a single core system too.
> 
> I think you should remove all of those.

Agree, will remove it.

> 
> > +
> > +	uncore_id = topology_physical_package_id(cpu);
> > +	WARN_ON_ONCE(uncore_id == BAD_APICID);
> > +
> > +	raw_spin_lock(&intel_uncore_lock);
> 
> Does this really need to be a raw spinlock? 

I think spinlock is enough.

> 
> > +#define NHM_MSR_UNCORE_PERF_GLOBAL_CTRL    	0x391
> > +#define NHM_MSR_UNCORE_PMC0			0x3b0
> > +#define NHM_MSR_UNCORE_PERFEVTSEL0		0x3c0
> 
> These should be in msr-index.h

Will move these.

Thanks,
Lin Ming

> 
> 
> -Andi



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/4] perf, x86: Add Intel Nehalem/Westmere uncore pmu
  2011-07-04  6:39     ` Lin Ming
@ 2011-07-04  8:38       ` Peter Zijlstra
  2011-07-04 21:57       ` Andi Kleen
  1 sibling, 0 replies; 29+ messages in thread
From: Peter Zijlstra @ 2011-07-04  8:38 UTC (permalink / raw)
  To: Lin Ming
  Cc: Andi Kleen, Ingo Molnar, Stephane Eranian,
	Arnaldo Carvalho de Melo, linux-kernel

On Mon, 2011-07-04 at 14:39 +0800, Lin Ming wrote:
> > Does this really need to be a raw spinlock? 
> 
> I think spinlock is enough. 

No, raw_spinlock_t was correct.

Talking of which:

+       struct spinlock lock;

That too should be a raw_spinlock_t.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/4] perf, x86: Add Intel Nehalem/Westmere uncore pmu
  2011-07-04  6:39     ` Lin Ming
  2011-07-04  8:38       ` Peter Zijlstra
@ 2011-07-04 21:57       ` Andi Kleen
  2011-07-05 11:22         ` Peter Zijlstra
  1 sibling, 1 reply; 29+ messages in thread
From: Andi Kleen @ 2011-07-04 21:57 UTC (permalink / raw)
  To: Lin Ming
  Cc: Andi Kleen, Peter Zijlstra, Ingo Molnar, Stephane Eranian,
	Arnaldo Carvalho de Melo, linux-kernel

> > There are no NMIs without sampling, so at least the comment seems bogus.
> > Perhaps the code could be a bit simplified now without atomics.
> 
> I'm not sure if uncore PMU interrupt need to be enabled for counting
> only. What do you think?

Only for overflow handling to accumulate into a larger counter, but it doesn't 
need to be an NMI for that.  But it's not strictly required I would say, 
44(?) bits are probably enough for near all use cases.

At least initially not having one is fine I think.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/4] perf, x86: Add Intel Nehalem/Westmere uncore pmu
  2011-07-04 21:57       ` Andi Kleen
@ 2011-07-05 11:22         ` Peter Zijlstra
  2011-07-05 12:48           ` Lin Ming
  2011-07-05 16:01           ` Andi Kleen
  0 siblings, 2 replies; 29+ messages in thread
From: Peter Zijlstra @ 2011-07-05 11:22 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Lin Ming, Ingo Molnar, Stephane Eranian,
	Arnaldo Carvalho de Melo, linux-kernel

On Mon, 2011-07-04 at 23:57 +0200, Andi Kleen wrote:
> > > There are no NMIs without sampling, so at least the comment seems bogus.
> > > Perhaps the code could be a bit simplified now without atomics.
> > 
> > I'm not sure if uncore PMU interrupt need to be enabled for counting
> > only. What do you think?
> 
> Only for overflow handling to accumulate into a larger counter, but it doesn't 
> need to be an NMI for that.

Uncore is hooked into the regular PMI, and since we wire that to the NMI
the uncore will always be NMI too.

>   But it's not strictly required I would say, 
> 44(?) bits are probably enough for near all use cases.

44bits is in the hours range for pure cycle counts, which is so-so. I
bet you're going to be very annoyed when you find your counters are
wrecked after your 5 hour test run finishes.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/4] perf, x86: Add Intel Nehalem/Westmere uncore pmu
  2011-07-05 11:22         ` Peter Zijlstra
@ 2011-07-05 12:48           ` Lin Ming
  2011-07-05 12:56             ` Peter Zijlstra
  2011-07-05 16:01           ` Andi Kleen
  1 sibling, 1 reply; 29+ messages in thread
From: Lin Ming @ 2011-07-05 12:48 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andi Kleen, Ingo Molnar, Stephane Eranian,
	Arnaldo Carvalho de Melo, linux-kernel

On Tue, 2011-07-05 at 19:22 +0800, Peter Zijlstra wrote:
> On Mon, 2011-07-04 at 23:57 +0200, Andi Kleen wrote:
> > > > There are no NMIs without sampling, so at least the comment seems bogus.
> > > > Perhaps the code could be a bit simplified now without atomics.
> > > 
> > > I'm not sure if uncore PMU interrupt need to be enabled for counting
> > > only. What do you think?
> > 
> > Only for overflow handling to accumulate into a larger counter, but it doesn't 
> > need to be an NMI for that.
> 
> Uncore is hooked into the regular PMI, and since we wire that to the NMI
> the uncore will always be NMI too.
> 
> >   But it's not strictly required I would say, 
> > 44(?) bits are probably enough for near all use cases.
> 
> 44bits is in the hours range for pure cycle counts, which is so-so. I
> bet you're going to be very annoyed when you find your counters are
> wrecked after your 5 hour test run finishes.

I'll add the interrupt handling code back.



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/4] perf, x86: Add Intel Nehalem/Westmere uncore pmu
  2011-07-05 12:48           ` Lin Ming
@ 2011-07-05 12:56             ` Peter Zijlstra
  2011-07-05 13:13               ` Lin Ming
  0 siblings, 1 reply; 29+ messages in thread
From: Peter Zijlstra @ 2011-07-05 12:56 UTC (permalink / raw)
  To: Lin Ming
  Cc: Andi Kleen, Ingo Molnar, Stephane Eranian,
	Arnaldo Carvalho de Melo, linux-kernel

On Tue, 2011-07-05 at 20:48 +0800, Lin Ming wrote:
> On Tue, 2011-07-05 at 19:22 +0800, Peter Zijlstra wrote:
> > On Mon, 2011-07-04 at 23:57 +0200, Andi Kleen wrote:
> > > > > There are no NMIs without sampling, so at least the comment seems bogus.
> > > > > Perhaps the code could be a bit simplified now without atomics.
> > > > 
> > > > I'm not sure if uncore PMU interrupt need to be enabled for counting
> > > > only. What do you think?
> > > 
> > > Only for overflow handling to accumulate into a larger counter, but it doesn't 
> > > need to be an NMI for that.
> > 
> > Uncore is hooked into the regular PMI, and since we wire that to the NMI
> > the uncore will always be NMI too.
> > 
> > >   But it's not strictly required I would say, 
> > > 44(?) bits are probably enough for near all use cases.
> > 
> > 44bits is in the hours range for pure cycle counts, which is so-so. I
> > bet you're going to be very annoyed when you find your counters are
> > wrecked after your 5 hour test run finishes.
> 
> I'll add the interrupt handling code back.

Does it work? The problem was with the hardware being iffy.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/4] perf, x86: Add Intel Nehalem/Westmere uncore pmu
  2011-07-05 12:56             ` Peter Zijlstra
@ 2011-07-05 13:13               ` Lin Ming
  0 siblings, 0 replies; 29+ messages in thread
From: Lin Ming @ 2011-07-05 13:13 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andi Kleen, Ingo Molnar, Stephane Eranian,
	Arnaldo Carvalho de Melo, linux-kernel

On Tue, 2011-07-05 at 20:56 +0800, Peter Zijlstra wrote:
> On Tue, 2011-07-05 at 20:48 +0800, Lin Ming wrote:
> > On Tue, 2011-07-05 at 19:22 +0800, Peter Zijlstra wrote:
> > > On Mon, 2011-07-04 at 23:57 +0200, Andi Kleen wrote:
> > > > > > There are no NMIs without sampling, so at least the comment seems bogus.
> > > > > > Perhaps the code could be a bit simplified now without atomics.
> > > > > 
> > > > > I'm not sure if uncore PMU interrupt need to be enabled for counting
> > > > > only. What do you think?
> > > > 
> > > > Only for overflow handling to accumulate into a larger counter, but it doesn't 
> > > > need to be an NMI for that.
> > > 
> > > Uncore is hooked into the regular PMI, and since we wire that to the NMI
> > > the uncore will always be NMI too.
> > > 
> > > >   But it's not strictly required I would say, 
> > > > 44(?) bits are probably enough for near all use cases.
> > > 
> > > 44bits is in the hours range for pure cycle counts, which is so-so. I
> > > bet you're going to be very annoyed when you find your counters are
> > > wrecked after your 5 hour test run finishes.
> > 
> > I'll add the interrupt handling code back.
> 
> Does it work? The problem was with the hardware being iffy.

It may work on SandyBridge.


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/4] perf, x86: Add Intel Nehalem/Westmere uncore pmu
  2011-07-05 11:22         ` Peter Zijlstra
  2011-07-05 12:48           ` Lin Ming
@ 2011-07-05 16:01           ` Andi Kleen
  2011-07-06  9:35             ` Ingo Molnar
  1 sibling, 1 reply; 29+ messages in thread
From: Andi Kleen @ 2011-07-05 16:01 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andi Kleen, Lin Ming, Ingo Molnar, Stephane Eranian,
	Arnaldo Carvalho de Melo, linux-kernel


>
>>   But it's not strictly required I would say,
>> 44(?) bits are probably enough for near all use cases.
>
> 44bits is in the hours range for pure cycle counts, which is so-so. I
> bet you're going to be very annoyed when you find your counters are
> wrecked after your 5 hour test run finishes.

For these kinds of long running measurements you usually care about
ratios, not absolutes. Some infrequent wrapping should not substantially
change.
those.

-Andi


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/4] perf, x86: Add Intel Nehalem/Westmere uncore pmu
  2011-07-05 16:01           ` Andi Kleen
@ 2011-07-06  9:35             ` Ingo Molnar
  0 siblings, 0 replies; 29+ messages in thread
From: Ingo Molnar @ 2011-07-06  9:35 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Peter Zijlstra, Lin Ming, Stephane Eranian,
	Arnaldo Carvalho de Melo, linux-kernel


* Andi Kleen <andi@firstfloor.org> wrote:

> 
> >
> >>   But it's not strictly required I would say,
> >> 44(?) bits are probably enough for near all use cases.
> >
> > 44bits is in the hours range for pure cycle counts, which is 
> > so-so. I bet you're going to be very annoyed when you find your 
> > counters are wrecked after your 5 hour test run finishes.
> 
> For these kinds of long running measurements you usually care about 
> ratios, not absolutes. Some infrequent wrapping should not 
> substantially change. those.

That's just empty blathering - those who *do* take a look at 
long-running absolute values obviously care.

There's absolutely no reason to be sloppy here - this needs to be 
implemented correctly.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [tip:perf/core] perf: Remove perf_event_attr::type check
  2011-06-30  8:09 ` [PATCH 3/4] perf: Remove perf_event_attr::type check Lin Ming
@ 2011-07-21 19:31   ` tip-bot for Lin Ming
  0 siblings, 0 replies; 29+ messages in thread
From: tip-bot for Lin Ming @ 2011-07-21 19:31 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, hpa, mingo, robert.richter, a.p.zijlstra,
	ming.m.lin, tglx, mingo

Commit-ID:  9985c20f9e4aee6857c08246b273a3695a52b929
Gitweb:     http://git.kernel.org/tip/9985c20f9e4aee6857c08246b273a3695a52b929
Author:     Lin Ming <ming.m.lin@intel.com>
AuthorDate: Thu, 30 Jun 2011 08:09:55 +0000
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Thu, 21 Jul 2011 20:41:55 +0200

perf: Remove perf_event_attr::type check

PMU type id can be allocated dynamically, so perf_event_attr::type check
when copying attribute from userspace to kernel is not valid.

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Cc: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1309421396-17438-4-git-send-email-ming.m.lin@intel.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/events/core.c |    7 -------
 1 files changed, 0 insertions(+), 7 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0567e32..b8785e2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5926,13 +5926,6 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 	if (ret)
 		return -EFAULT;
 
-	/*
-	 * If the type exists, the corresponding creation will verify
-	 * the attr->config.
-	 */
-	if (attr->type >= PERF_TYPE_MAX)
-		return -EINVAL;
-
 	if (attr->__reserved_1)
 		return -EINVAL;
 

^ permalink raw reply related	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2011-07-21 19:31 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-06-30  8:09 [PATCH 0/4] perf: Intel uncore pmu counting support Lin Ming
2011-06-30  8:09 ` [PATCH 1/4] perf, x86: Add Intel Nehalem/Westmere uncore pmu Lin Ming
2011-06-30 14:08   ` Peter Zijlstra
2011-07-01  6:05     ` Lin Ming
2011-06-30 16:58   ` Andi Kleen
2011-07-04  6:39     ` Lin Ming
2011-07-04  8:38       ` Peter Zijlstra
2011-07-04 21:57       ` Andi Kleen
2011-07-05 11:22         ` Peter Zijlstra
2011-07-05 12:48           ` Lin Ming
2011-07-05 12:56             ` Peter Zijlstra
2011-07-05 13:13               ` Lin Ming
2011-07-05 16:01           ` Andi Kleen
2011-07-06  9:35             ` Ingo Molnar
2011-06-30  8:09 ` [PATCH 2/4] perf, x86: Add Intel SandyBridge " Lin Ming
2011-06-30 22:09   ` Peter Zijlstra
2011-06-30  8:09 ` [PATCH 3/4] perf: Remove perf_event_attr::type check Lin Ming
2011-07-21 19:31   ` [tip:perf/core] " tip-bot for Lin Ming
2011-06-30  8:09 ` [PATCH 4/4] perf tool: Get PMU type id from sysfs Lin Ming
2011-06-30 12:10 ` [PATCH 0/4] perf: Intel uncore pmu counting support Stephane Eranian
2011-06-30 14:10   ` Peter Zijlstra
2011-06-30 16:27   ` Stephane Eranian
2011-07-01  3:17     ` Lin Ming
2011-07-01 10:49       ` Stephane Eranian
2011-07-01 12:23         ` Stephane Eranian
2011-07-01 12:28           ` Stephane Eranian
2011-07-04  6:03         ` Lin Ming
2011-07-01  5:49   ` Lin Ming
2011-07-01 11:08 ` Ingo Molnar

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.