All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/7] perf, x86: Implement AMD IBS
@ 2011-07-28 13:46 Robert Richter
  2011-07-28 13:46 ` [PATCH 1/7] perf, x86: share IBS macros between perf and oprofile Robert Richter
                   ` (7 more replies)
  0 siblings, 8 replies; 39+ messages in thread
From: Robert Richter @ 2011-07-28 13:46 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

This patch set adds support for AMD IBS to perf. It is a new
implementation and unrelated to my previous postings last year. The
main differences are:

* separate and independent from x86 perfctrs, IBS could be used
  without the x86 pmu,
* using dynamic pmu allocation, userspace uses sysfs to select the pmu,
* support for 64 bit counters,
* libperf based example code,
* shared IBS initialziation code for perf and oprofile.

The approach is still to collect raw sample data which should be the
most important use case for application developers. The data format is
the same as described in the IBS register specification.

Future work could be:

* better integration into the perf tool, use IBS for generic events
  where possible,
* support of the precise event sampling perf i/f,
* implementation of extended IBS features (e.g. ext. counter width),
* support of counting (perf stat),
* in-kernel IBS event parsing,
* IBS tracepoint support.

-Robert



^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/7] perf, x86: share IBS macros between perf and oprofile
  2011-07-28 13:46 [PATCH 0/7] perf, x86: Implement AMD IBS Robert Richter
@ 2011-07-28 13:46 ` Robert Richter
  2011-07-28 13:46 ` [PATCH 2/7] perf, x86: Implement IBS initialization Robert Richter
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 39+ messages in thread
From: Robert Richter @ 2011-07-28 13:46 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML, Robert Richter

Moving IBS macros from oprofile to <asm/perf_event.h> to make it
available to perf. No additional changes.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/include/asm/perf_event.h    |   38 +++++++++++++++++++++++++++++++--
 arch/x86/kernel/cpu/perf_event_amd.c |    4 +-
 arch/x86/oprofile/op_model_amd.c     |   37 ++------------------------------
 3 files changed, 40 insertions(+), 39 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 094fb30..bc801ac 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -43,14 +43,17 @@
 #define AMD64_RAW_EVENT_MASK		\
 	(X86_RAW_EVENT_MASK          |  \
 	 AMD64_EVENTSEL_EVENT)
+#define AMD64_NUM_COUNTERS				4
+#define AMD64_NUM_COUNTERS_F15H				6
+#define AMD64_NUM_COUNTERS_MAX				AMD64_NUM_COUNTERS_F15H
 
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		      0x3c
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		0x3c
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX			 0
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX		0
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
 		(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
 
-#define ARCH_PERFMON_BRANCH_MISSES_RETIRED			 6
+#define ARCH_PERFMON_BRANCH_MISSES_RETIRED		6
 
 /*
  * Intel "Architectural Performance Monitoring" CPUID
@@ -110,6 +113,35 @@ union cpuid10_edx {
  */
 #define X86_PMC_IDX_FIXED_BTS				(X86_PMC_IDX_FIXED + 16)
 
+/*
+ * IBS cpuid feature detection
+ */
+
+#define IBS_CPUID_FEATURES		0x8000001b
+
+/*
+ * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but
+ * bit 0 is used to indicate the existence of IBS.
+ */
+#define IBS_CAPS_AVAIL			(1U<<0)
+#define IBS_CAPS_FETCHSAM		(1U<<1)
+#define IBS_CAPS_OPSAM			(1U<<2)
+#define IBS_CAPS_RDWROPCNT		(1U<<3)
+#define IBS_CAPS_OPCNT			(1U<<4)
+#define IBS_CAPS_BRNTRGT		(1U<<5)
+#define IBS_CAPS_OPCNTEXT		(1U<<6)
+
+#define IBS_CAPS_DEFAULT		(IBS_CAPS_AVAIL		\
+					 | IBS_CAPS_FETCHSAM	\
+					 | IBS_CAPS_OPSAM)
+
+/*
+ * IBS APIC setup
+ */
+#define IBSCTL				0x1cc
+#define IBSCTL_LVT_OFFSET_VALID		(1ULL<<8)
+#define IBSCTL_LVT_OFFSET_MASK		0x0F
+
 /* IbsFetchCtl bits/masks */
 #define IBS_FETCH_RAND_EN	(1ULL<<57)
 #define IBS_FETCH_VAL		(1ULL<<49)
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 941caa2..a9e3047 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -392,7 +392,7 @@ static __initconst const struct x86_pmu amd_pmu = {
 	.perfctr		= MSR_K7_PERFCTR0,
 	.event_map		= amd_pmu_event_map,
 	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
-	.num_counters		= 4,
+	.num_counters		= AMD64_NUM_COUNTERS,
 	.cntval_bits		= 48,
 	.cntval_mask		= (1ULL << 48) - 1,
 	.apic			= 1,
@@ -556,7 +556,7 @@ static __initconst const struct x86_pmu amd_pmu_f15h = {
 	.perfctr		= MSR_F15H_PERF_CTR,
 	.event_map		= amd_pmu_event_map,
 	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
-	.num_counters		= 6,
+	.num_counters		= AMD64_NUM_COUNTERS_F15H,
 	.cntval_bits		= 48,
 	.cntval_mask		= (1ULL << 48) - 1,
 	.apic			= 1,
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 9cbb710..e947e5c 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -29,8 +29,6 @@
 #include "op_x86_model.h"
 #include "op_counter.h"
 
-#define NUM_COUNTERS		4
-#define NUM_COUNTERS_F15H	6
 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
 #define NUM_VIRT_COUNTERS	32
 #else
@@ -70,35 +68,6 @@ static struct ibs_config ibs_config;
 static struct ibs_state ibs_state;
 
 /*
- * IBS cpuid feature detection
- */
-
-#define IBS_CPUID_FEATURES		0x8000001b
-
-/*
- * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but
- * bit 0 is used to indicate the existence of IBS.
- */
-#define IBS_CAPS_AVAIL			(1U<<0)
-#define IBS_CAPS_FETCHSAM		(1U<<1)
-#define IBS_CAPS_OPSAM			(1U<<2)
-#define IBS_CAPS_RDWROPCNT		(1U<<3)
-#define IBS_CAPS_OPCNT			(1U<<4)
-#define IBS_CAPS_BRNTRGT		(1U<<5)
-#define IBS_CAPS_OPCNTEXT		(1U<<6)
-
-#define IBS_CAPS_DEFAULT		(IBS_CAPS_AVAIL		\
-					 | IBS_CAPS_FETCHSAM	\
-					 | IBS_CAPS_OPSAM)
-
-/*
- * IBS APIC setup
- */
-#define IBSCTL				0x1cc
-#define IBSCTL_LVT_OFFSET_VALID		(1ULL<<8)
-#define IBSCTL_LVT_OFFSET_MASK		0x0F
-
-/*
  * IBS randomization macros
  */
 #define IBS_RANDOM_BITS			12
@@ -439,7 +408,7 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
 			goto fail;
 		}
 		/* both registers must be reserved */
-		if (num_counters == NUM_COUNTERS_F15H) {
+		if (num_counters == AMD64_NUM_COUNTERS_F15H) {
 			msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1);
 			msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1);
 		} else {
@@ -741,9 +710,9 @@ static int op_amd_init(struct oprofile_operations *ops)
 	ops->create_files = setup_ibs_files;
 
 	if (boot_cpu_data.x86 == 0x15) {
-		num_counters = NUM_COUNTERS_F15H;
+		num_counters = AMD64_NUM_COUNTERS_F15H;
 	} else {
-		num_counters = NUM_COUNTERS;
+		num_counters = AMD64_NUM_COUNTERS;
 	}
 
 	op_amd_spec.num_counters = num_counters;
-- 
1.7.5.3



^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 2/7] perf, x86: Implement IBS initialization
  2011-07-28 13:46 [PATCH 0/7] perf, x86: Implement AMD IBS Robert Richter
  2011-07-28 13:46 ` [PATCH 1/7] perf, x86: share IBS macros between perf and oprofile Robert Richter
@ 2011-07-28 13:46 ` Robert Richter
  2011-07-29 16:58   ` Peter Zijlstra
  2011-08-02 11:49   ` Peter Zijlstra
  2011-07-28 13:46 ` [PATCH 3/7] perf, x86: Implement IBS event configuration Robert Richter
                   ` (5 subsequent siblings)
  7 siblings, 2 replies; 39+ messages in thread
From: Robert Richter @ 2011-07-28 13:46 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML, Robert Richter

This patch implements IBS feature detection and initialzation. The
code is shared between perf and oprofile. If ibs is available on the
system for perf, a pmu is setup.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/include/asm/perf_event.h        |    2 +
 arch/x86/kernel/cpu/Makefile             |    2 +-
 arch/x86/kernel/cpu/perf_event_amd_ibs.c |  301 ++++++++++++++++++++++++++++++
 arch/x86/oprofile/nmi_int.c              |    2 -
 arch/x86/oprofile/op_model_amd.c         |  197 -------------------
 arch/x86/oprofile/op_x86_model.h         |    1 -
 6 files changed, 304 insertions(+), 201 deletions(-)
 create mode 100644 arch/x86/kernel/cpu/perf_event_amd_ibs.c

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index bc801ac..e7d2f15 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -156,6 +156,8 @@ union cpuid10_edx {
 #define IBS_OP_MAX_CNT		0x0000FFFFULL
 #define IBS_OP_MAX_CNT_EXT	0x007FFFFFULL	/* not a register bit mask */
 
+extern u32 get_ibs_caps(void);
+
 #ifdef CONFIG_PERF_EVENTS
 extern void perf_events_lapic_init(void);
 
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 6042981..ab70cd1 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -31,7 +31,7 @@ obj-$(CONFIG_PERF_EVENTS)		+= perf_event.o
 obj-$(CONFIG_X86_MCE)			+= mcheck/
 obj-$(CONFIG_MTRR)			+= mtrr/
 
-obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o
+obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o perf_event_amd_ibs.o
 
 quiet_cmd_mkcapflags = MKCAP   $@
       cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
new file mode 100644
index 0000000..cae9528
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -0,0 +1,301 @@
+/*
+ * Performance events - AMD IBS
+ *
+ *  Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_event.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
+
+static struct pmu perf_ibs;
+
+static int perf_ibs_init(struct perf_event *event)
+{
+	if (perf_ibs.type != event->attr.type)
+		return -ENOENT;
+	pr_info("Found event %p (config=%016llx) for pmu %s (type=%d) on cpu %d\n",
+		event, event->attr.config, perf_ibs.name, event->attr.type, event->oncpu);
+	return 0;
+}
+
+static int perf_ibs_add(struct perf_event *event, int flags)
+{
+	pr_info("Adding event %p (config=%016llx) to pmu %s (type=%d) on cpu %d\n",
+		event, event->attr.config, perf_ibs.name, event->attr.type, event->oncpu);
+	return 0;
+}
+
+static void perf_ibs_del(struct perf_event *event, int flags)
+{
+	pr_info("Removing event %p (config=%016llx) to pmu %s (type=%d) on cpu %d\n",
+		event, event->attr.config, perf_ibs.name, event->attr.type, event->oncpu);
+}
+
+static struct pmu perf_ibs = {
+	.event_init= perf_ibs_init,
+	.add= perf_ibs_add,
+	.del= perf_ibs_del,
+};
+
+static __init int perf_event_ibs_init(void)
+{
+	u32 caps;
+
+	caps = get_ibs_caps();
+	if (!caps)
+		return -ENODEV;	/* ibs not supported by the cpu */
+
+	perf_pmu_register(&perf_ibs, "ibs", -1);
+	printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", caps);
+
+	return 0;
+}
+
+#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
+
+static __init int perf_event_ibs_init(void) { return 0; }
+
+#endif
+
+/* IBS - apic initialization, for perf and oprofile */
+
+static __init u32 __get_ibs_caps(void)
+{
+	u32 caps;
+	unsigned int max_level;
+
+	if (!boot_cpu_has(X86_FEATURE_IBS))
+		return 0;
+
+	/* check IBS cpuid feature flags */
+	max_level = cpuid_eax(0x80000000);
+	if (max_level < IBS_CPUID_FEATURES)
+		return IBS_CAPS_DEFAULT;
+
+	caps = cpuid_eax(IBS_CPUID_FEATURES);
+	if (!(caps & IBS_CAPS_AVAIL))
+		/* cpuid flags not valid */
+		return IBS_CAPS_DEFAULT;
+
+	return caps;
+}
+
+static u32 ibs_caps;
+
+u32 get_ibs_caps(void)
+{
+	return ibs_caps;
+}
+
+EXPORT_SYMBOL(get_ibs_caps);
+
+static inline int get_eilvt(int offset)
+{
+	return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
+}
+
+static inline int put_eilvt(int offset)
+{
+	return !setup_APIC_eilvt(offset, 0, 0, 1);
+}
+
+/*
+ * Check and reserve APIC extended interrupt LVT offset for IBS if available.
+ */
+static inline int ibs_eilvt_valid(void)
+{
+	int offset;
+	u64 val;
+	int valid = 0;
+
+	preempt_disable();
+
+	rdmsrl(MSR_AMD64_IBSCTL, val);
+	offset = val & IBSCTL_LVT_OFFSET_MASK;
+
+	if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
+		pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
+		       smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
+		goto out;
+	}
+
+	if (!get_eilvt(offset)) {
+		pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
+		       smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
+		goto out;
+	}
+
+	valid = 1;
+out:
+	preempt_enable();
+
+	return valid;
+}
+
+static int setup_ibs_ctl(int ibs_eilvt_off)
+{
+	struct pci_dev *cpu_cfg;
+	int nodes;
+	u32 value = 0;
+
+	nodes = 0;
+	cpu_cfg = NULL;
+	do {
+		cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
+					 PCI_DEVICE_ID_AMD_10H_NB_MISC,
+					 cpu_cfg);
+		if (!cpu_cfg)
+			break;
+		++nodes;
+		pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
+				       | IBSCTL_LVT_OFFSET_VALID);
+		pci_read_config_dword(cpu_cfg, IBSCTL, &value);
+		if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
+			pci_dev_put(cpu_cfg);
+			printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
+			       "IBSCTL = 0x%08x\n", value);
+			return -EINVAL;
+		}
+	} while (1);
+
+	if (!nodes) {
+		printk(KERN_DEBUG "No CPU node configured for IBS\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+/*
+ * This runs only on the current cpu. We try to find an LVT offset and
+ * setup the local APIC. For this we must disable preemption. On
+ * success we initialize all nodes with this offset. This updates then
+ * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
+ * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
+ * is using the new offset.
+ */
+static int force_ibs_eilvt_setup(void)
+{
+	int offset;
+	int ret;
+
+	preempt_disable();
+	/* find the next free available EILVT entry, skip offset 0 */
+	for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
+		if (get_eilvt(offset))
+			break;
+	}
+	preempt_enable();
+
+	if (offset == APIC_EILVT_NR_MAX) {
+		printk(KERN_DEBUG "No EILVT entry available\n");
+		return -EBUSY;
+	}
+
+	ret = setup_ibs_ctl(offset);
+	if (ret)
+		goto out;
+
+	if (!ibs_eilvt_valid()) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset);
+	pr_err(FW_BUG "workaround enabled for IBS LVT offset\n");
+
+	return 0;
+out:
+	preempt_disable();
+	put_eilvt(offset);
+	preempt_enable();
+	return ret;
+}
+
+static inline int get_ibs_lvt_offset(void)
+{
+	u64 val;
+
+	rdmsrl(MSR_AMD64_IBSCTL, val);
+	if (!(val & IBSCTL_LVT_OFFSET_VALID))
+		return -EINVAL;
+
+	return val & IBSCTL_LVT_OFFSET_MASK;
+}
+
+static void setup_APIC_ibs(void *dummy)
+{
+	int offset;
+
+	offset = get_ibs_lvt_offset();
+	if (offset < 0)
+		goto failed;
+
+	if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
+		return;
+failed:
+	pr_warn("perf: IBS APIC setup failed on cpu #%d\n",
+		smp_processor_id());
+}
+
+static void clear_APIC_ibs(void *dummy)
+{
+	int offset;
+
+	offset = get_ibs_lvt_offset();
+	if (offset >= 0)
+		setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
+}
+
+static int __cpuinit
+perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_STARTING:
+		setup_APIC_ibs(NULL);
+		break;
+	case CPU_DYING:
+		clear_APIC_ibs(NULL);
+		break;
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static __init int amd_ibs_init(void)
+{
+	u32 caps;
+	int ret;
+
+	caps = __get_ibs_caps();
+	if (!caps)
+		return -ENODEV;	/* ibs not supported by the cpu */
+
+	if (!ibs_eilvt_valid()) {
+		ret = force_ibs_eilvt_setup();
+		if (ret) {
+			pr_err("Failed to setup IBS, %d\n", ret);
+			return ret;
+		}
+	}
+
+	get_online_cpus();
+	ibs_caps = caps;
+	/* make ibs_caps visible to other cpus: */
+	smp_mb();
+	perf_cpu_notifier(perf_ibs_cpu_notifier);
+	smp_call_function(setup_APIC_ibs, NULL, 1);
+	put_online_cpus();
+
+	return perf_event_ibs_init();
+}
+
+/* Since we need the pci subsystem to init ibs we can't do this earlier: */
+device_initcall(amd_ibs_init);
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 68894fd..28e0b1c 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -402,8 +402,6 @@ static void nmi_cpu_shutdown(void *dummy)
 	apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
 	apic_write(APIC_LVTERR, v);
 	nmi_cpu_restore_registers(msrs);
-	if (model->cpu_down)
-		model->cpu_down();
 }
 
 static void nmi_cpu_up(void *dummy)
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index e947e5c..303f086 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -74,27 +74,6 @@ static struct ibs_state ibs_state;
 #define IBS_RANDOM_MASK			((1ULL << IBS_RANDOM_BITS) - 1)
 #define IBS_RANDOM_MAXCNT_OFFSET	(1ULL << (IBS_RANDOM_BITS - 5))
 
-static u32 get_ibs_caps(void)
-{
-	u32 ibs_caps;
-	unsigned int max_level;
-
-	if (!boot_cpu_has(X86_FEATURE_IBS))
-		return 0;
-
-	/* check IBS cpuid feature flags */
-	max_level = cpuid_eax(0x80000000);
-	if (max_level < IBS_CPUID_FEATURES)
-		return IBS_CAPS_DEFAULT;
-
-	ibs_caps = cpuid_eax(IBS_CPUID_FEATURES);
-	if (!(ibs_caps & IBS_CAPS_AVAIL))
-		/* cpuid flags not valid */
-		return IBS_CAPS_DEFAULT;
-
-	return ibs_caps;
-}
-
 /*
  * 16-bit Linear Feedback Shift Register (LFSR)
  *
@@ -285,81 +264,6 @@ static void op_amd_stop_ibs(void)
 		wrmsrl(MSR_AMD64_IBSOPCTL, 0);
 }
 
-static inline int get_eilvt(int offset)
-{
-	return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
-}
-
-static inline int put_eilvt(int offset)
-{
-	return !setup_APIC_eilvt(offset, 0, 0, 1);
-}
-
-static inline int ibs_eilvt_valid(void)
-{
-	int offset;
-	u64 val;
-	int valid = 0;
-
-	preempt_disable();
-
-	rdmsrl(MSR_AMD64_IBSCTL, val);
-	offset = val & IBSCTL_LVT_OFFSET_MASK;
-
-	if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
-		pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
-		       smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
-		goto out;
-	}
-
-	if (!get_eilvt(offset)) {
-		pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
-		       smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
-		goto out;
-	}
-
-	valid = 1;
-out:
-	preempt_enable();
-
-	return valid;
-}
-
-static inline int get_ibs_offset(void)
-{
-	u64 val;
-
-	rdmsrl(MSR_AMD64_IBSCTL, val);
-	if (!(val & IBSCTL_LVT_OFFSET_VALID))
-		return -EINVAL;
-
-	return val & IBSCTL_LVT_OFFSET_MASK;
-}
-
-static void setup_APIC_ibs(void)
-{
-	int offset;
-
-	offset = get_ibs_offset();
-	if (offset < 0)
-		goto failed;
-
-	if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
-		return;
-failed:
-	pr_warn("oprofile: IBS APIC setup failed on cpu #%d\n",
-		smp_processor_id());
-}
-
-static void clear_APIC_ibs(void)
-{
-	int offset;
-
-	offset = get_ibs_offset();
-	if (offset >= 0)
-		setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
-}
-
 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
 
 static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
@@ -473,15 +377,6 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 		val |= op_x86_get_ctrl(model, &counter_config[virt]);
 		wrmsrl(msrs->controls[i].addr, val);
 	}
-
-	if (ibs_caps)
-		setup_APIC_ibs();
-}
-
-static void op_amd_cpu_shutdown(void)
-{
-	if (ibs_caps)
-		clear_APIC_ibs();
 }
 
 static int op_amd_check_ctrs(struct pt_regs * const regs,
@@ -544,86 +439,6 @@ static void op_amd_stop(struct op_msrs const * const msrs)
 	op_amd_stop_ibs();
 }
 
-static int setup_ibs_ctl(int ibs_eilvt_off)
-{
-	struct pci_dev *cpu_cfg;
-	int nodes;
-	u32 value = 0;
-
-	nodes = 0;
-	cpu_cfg = NULL;
-	do {
-		cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
-					 PCI_DEVICE_ID_AMD_10H_NB_MISC,
-					 cpu_cfg);
-		if (!cpu_cfg)
-			break;
-		++nodes;
-		pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
-				       | IBSCTL_LVT_OFFSET_VALID);
-		pci_read_config_dword(cpu_cfg, IBSCTL, &value);
-		if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
-			pci_dev_put(cpu_cfg);
-			printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
-			       "IBSCTL = 0x%08x\n", value);
-			return -EINVAL;
-		}
-	} while (1);
-
-	if (!nodes) {
-		printk(KERN_DEBUG "No CPU node configured for IBS\n");
-		return -ENODEV;
-	}
-
-	return 0;
-}
-
-/*
- * This runs only on the current cpu. We try to find an LVT offset and
- * setup the local APIC. For this we must disable preemption. On
- * success we initialize all nodes with this offset. This updates then
- * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
- * the IBS interrupt vector is called from op_amd_setup_ctrs()/op_-
- * amd_cpu_shutdown() using the new offset.
- */
-static int force_ibs_eilvt_setup(void)
-{
-	int offset;
-	int ret;
-
-	preempt_disable();
-	/* find the next free available EILVT entry, skip offset 0 */
-	for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
-		if (get_eilvt(offset))
-			break;
-	}
-	preempt_enable();
-
-	if (offset == APIC_EILVT_NR_MAX) {
-		printk(KERN_DEBUG "No EILVT entry available\n");
-		return -EBUSY;
-	}
-
-	ret = setup_ibs_ctl(offset);
-	if (ret)
-		goto out;
-
-	if (!ibs_eilvt_valid()) {
-		ret = -EFAULT;
-		goto out;
-	}
-
-	pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset);
-	pr_err(FW_BUG "workaround enabled for IBS LVT offset\n");
-
-	return 0;
-out:
-	preempt_disable();
-	put_eilvt(offset);
-	preempt_enable();
-	return ret;
-}
-
 /*
  * check and reserve APIC extended interrupt LVT offset for IBS if
  * available
@@ -636,17 +451,6 @@ static void init_ibs(void)
 	if (!ibs_caps)
 		return;
 
-	if (ibs_eilvt_valid())
-		goto out;
-
-	if (!force_ibs_eilvt_setup())
-		goto out;
-
-	/* Failed to setup ibs */
-	ibs_caps = 0;
-	return;
-
-out:
 	printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps);
 }
 
@@ -729,7 +533,6 @@ struct op_x86_model_spec op_amd_spec = {
 	.init			= op_amd_init,
 	.fill_in_addresses	= &op_amd_fill_in_addresses,
 	.setup_ctrs		= &op_amd_setup_ctrs,
-	.cpu_down		= &op_amd_cpu_shutdown,
 	.check_ctrs		= &op_amd_check_ctrs,
 	.start			= &op_amd_start,
 	.stop			= &op_amd_stop,
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 89017fa..71e8a67 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -43,7 +43,6 @@ struct op_x86_model_spec {
 	int		(*fill_in_addresses)(struct op_msrs * const msrs);
 	void		(*setup_ctrs)(struct op_x86_model_spec const *model,
 				      struct op_msrs const * const msrs);
-	void		(*cpu_down)(void);
 	int		(*check_ctrs)(struct pt_regs * const regs,
 				      struct op_msrs const * const msrs);
 	void		(*start)(struct op_msrs const * const msrs);
-- 
1.7.5.3



^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 3/7] perf, x86: Implement IBS event configuration
  2011-07-28 13:46 [PATCH 0/7] perf, x86: Implement AMD IBS Robert Richter
  2011-07-28 13:46 ` [PATCH 1/7] perf, x86: share IBS macros between perf and oprofile Robert Richter
  2011-07-28 13:46 ` [PATCH 2/7] perf, x86: Implement IBS initialization Robert Richter
@ 2011-07-28 13:46 ` Robert Richter
  2011-08-02 11:35   ` Peter Zijlstra
  2011-07-28 13:46 ` [PATCH 4/7] perf, x86: Implement IBS interrupt handler Robert Richter
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 39+ messages in thread
From: Robert Richter @ 2011-07-28 13:46 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML, Robert Richter

This patch implements perf configuration for AMD IBS. The IBS pmu is
selected using the type attribute in sysfs. There are two types of ibs
pmus, for instruction fetch (IBS_FETCH) and for instruction execution
(IBS_OP):

 /sys/bus/event_source/devices/ibs_fetch/type
 /sys/bus/event_source/devices/ibs_op/type

Except for the sample period IBS can only be set up with raw config
values and raw data samples. The event attributes for the syscall
should be programmed like this (IBS_FETCH):

        type = get_pmu_type("/sys/bus/event_source/devices/ibs_fetch/type");

        memset(&attr, 0, sizeof(attr));
        attr.type        = type;
        attr.sample_type = PERF_SAMPLE_CPU | PERF_SAMPLE_RAW;
        attr.config      = IBS_FETCH_CONFIG_DEFAULT;

This implementation does not yet support 64 bit counters. It is
limited to the hardware counter bit width which is 20 bits. 64 bit
support can be added later.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/kernel/cpu/perf_event_amd_ibs.c |   99 ++++++++++++++++++++++++++----
 1 files changed, 87 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index cae9528..bd77209 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -12,34 +12,108 @@
 
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
 
-static struct pmu perf_ibs;
+#define IBS_FETCH_CONFIG_MASK	(IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
+#define IBS_OP_CONFIG_MASK	IBS_OP_MAX_CNT
+
+struct perf_ibs {
+	struct pmu	pmu;
+	unsigned int	msr;
+	u64		config_mask;
+	u64		cnt_mask;
+	u64		enable_mask;
+};
+
+static struct perf_ibs perf_ibs_fetch;
+static struct perf_ibs perf_ibs_op;
+
+static struct perf_ibs *get_ibs_pmu(int type)
+{
+	if (perf_ibs_fetch.pmu.type == type)
+		return &perf_ibs_fetch;
+	if (perf_ibs_op.pmu.type == type)
+		return &perf_ibs_op;
+	return NULL;
+}
 
 static int perf_ibs_init(struct perf_event *event)
 {
-	if (perf_ibs.type != event->attr.type)
+	struct hw_perf_event *hwc = &event->hw;
+	struct perf_ibs *perf_ibs;
+	u64 max_cnt, config;
+
+	perf_ibs = get_ibs_pmu(event->attr.type);
+	if (!perf_ibs)
 		return -ENOENT;
+
+	config = event->attr.config;
+	if (config & ~perf_ibs->config_mask)
+		return -EINVAL;
+
+	if (hwc->sample_period) {
+		if (config & perf_ibs->cnt_mask)
+			/* raw max_cnt may not be set */
+			return -EINVAL;
+		if (hwc->sample_period & 0x0f)
+			/* lower 4 bits can not be set in ibs max cnt */
+			return -EINVAL;
+		max_cnt = hwc->sample_period >> 4;
+		if (max_cnt & ~perf_ibs->cnt_mask)
+			/* out of range */
+			return -EINVAL;
+		config |= max_cnt;
+	} else {
+		max_cnt = config & perf_ibs->cnt_mask;
+		event->attr.sample_period = max_cnt << 4;
+		hwc->sample_period = event->attr.sample_period;
+	}
+
+	if (!max_cnt)
+		return -EINVAL;
+
+	hwc->config_base = perf_ibs->msr;
+	hwc->config = config;
+
 	pr_info("Found event %p (config=%016llx) for pmu %s (type=%d) on cpu %d\n",
-		event, event->attr.config, perf_ibs.name, event->attr.type, event->oncpu);
+		event, event->attr.config, event->pmu->name, event->attr.type, event->oncpu);
+
 	return 0;
 }
 
 static int perf_ibs_add(struct perf_event *event, int flags)
 {
-	pr_info("Adding event %p (config=%016llx) to pmu %s (type=%d) on cpu %d\n",
-		event, event->attr.config, perf_ibs.name, event->attr.type, event->oncpu);
+	pr_info("Adding event %p (config=%016llx) for pmu %p (name='%s', type=%d) on cpu %d\n",
+		event, event->attr.config, event->pmu, event->pmu->name, event->attr.type, event->oncpu);
 	return 0;
 }
 
 static void perf_ibs_del(struct perf_event *event, int flags)
 {
-	pr_info("Removing event %p (config=%016llx) to pmu %s (type=%d) on cpu %d\n",
-		event, event->attr.config, perf_ibs.name, event->attr.type, event->oncpu);
+	pr_info("Removing event %p (config=%016llx) for pmu %p (name='%s', type=%d) on cpu %d\n",
+		event, event->attr.config, event->pmu, event->pmu->name, event->attr.type, event->oncpu);
 }
 
-static struct pmu perf_ibs = {
-	.event_init= perf_ibs_init,
-	.add= perf_ibs_add,
-	.del= perf_ibs_del,
+static struct perf_ibs perf_ibs_fetch = {
+	.pmu = {
+		.event_init	= perf_ibs_init,
+		.add		= perf_ibs_add,
+		.del		= perf_ibs_del,
+	},
+	.msr			= MSR_AMD64_IBSFETCHCTL,
+	.config_mask		= IBS_FETCH_CONFIG_MASK,
+	.cnt_mask		= IBS_FETCH_MAX_CNT,
+	.enable_mask		= IBS_FETCH_ENABLE,
+};
+
+static struct perf_ibs perf_ibs_op = {
+	.pmu = {
+		.event_init	= perf_ibs_init,
+		.add		= perf_ibs_add,
+		.del		= perf_ibs_del,
+	},
+	.msr			= MSR_AMD64_IBSOPCTL,
+	.config_mask		= IBS_OP_CONFIG_MASK,
+	.cnt_mask		= IBS_OP_MAX_CNT,
+	.enable_mask		= IBS_OP_ENABLE,
 };
 
 static __init int perf_event_ibs_init(void)
@@ -50,7 +124,8 @@ static __init int perf_event_ibs_init(void)
 	if (!caps)
 		return -ENODEV;	/* ibs not supported by the cpu */
 
-	perf_pmu_register(&perf_ibs, "ibs", -1);
+	perf_pmu_register(&perf_ibs_fetch.pmu, "ibs_fetch", -1);
+	perf_pmu_register(&perf_ibs_op.pmu, "ibs_op", -1);
 	printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", caps);
 
 	return 0;
-- 
1.7.5.3



^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 4/7] perf, x86: Implement IBS interrupt handler
  2011-07-28 13:46 [PATCH 0/7] perf, x86: Implement AMD IBS Robert Richter
                   ` (2 preceding siblings ...)
  2011-07-28 13:46 ` [PATCH 3/7] perf, x86: Implement IBS event configuration Robert Richter
@ 2011-07-28 13:46 ` Robert Richter
  2011-07-29 16:58   ` Peter Zijlstra
  2011-08-02 11:43   ` Peter Zijlstra
  2011-07-28 13:46 ` [PATCH 5/7] perf, x86: Implement IBS pmu control ops Robert Richter
                   ` (3 subsequent siblings)
  7 siblings, 2 replies; 39+ messages in thread
From: Robert Richter @ 2011-07-28 13:46 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML, Robert Richter

This patch implements code to handle ibs interrupts. If ibs data is
available a raw perf_event data sample is created and sent back to the
userland. This patch only implements the storage of ibs data in the
raw sample, but this could be extended in a later patch by generating
generic event data such as the rip from the ibs sampling data.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/include/asm/msr-index.h         |    3 +
 arch/x86/kernel/cpu/perf_event_amd_ibs.c |   80 ++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 485b4f1..75f131e 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -127,6 +127,7 @@
 #define MSR_AMD64_IBSFETCHCTL		0xc0011030
 #define MSR_AMD64_IBSFETCHLINAD		0xc0011031
 #define MSR_AMD64_IBSFETCHPHYSAD	0xc0011032
+#define MSR_AMD64_IBSFETCH_REG_COUNT	3
 #define MSR_AMD64_IBSOPCTL		0xc0011033
 #define MSR_AMD64_IBSOPRIP		0xc0011034
 #define MSR_AMD64_IBSOPDATA		0xc0011035
@@ -134,6 +135,8 @@
 #define MSR_AMD64_IBSOPDATA3		0xc0011037
 #define MSR_AMD64_IBSDCLINAD		0xc0011038
 #define MSR_AMD64_IBSDCPHYSAD		0xc0011039
+#define MSR_AMD64_IBSOP_REG_COUNT	7
+#define MSR_AMD64_IBS_REG_COUNT_MAX	MSR_AMD64_IBSOP_REG_COUNT
 #define MSR_AMD64_IBSCTL		0xc001103a
 #define MSR_AMD64_IBSBRTARGET		0xc001103b
 
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index bd77209..09311e3 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -12,6 +12,11 @@
 
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
 
+#include <linux/kprobes.h>
+#include <linux/hardirq.h>
+
+#include <asm/nmi.h>
+
 #define IBS_FETCH_CONFIG_MASK	(IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
 #define IBS_OP_CONFIG_MASK	IBS_OP_MAX_CNT
 
@@ -21,6 +26,8 @@ struct perf_ibs {
 	u64		config_mask;
 	u64		cnt_mask;
 	u64		enable_mask;
+	u64		valid_mask;
+	int		reg_count;
 };
 
 static struct perf_ibs perf_ibs_fetch;
@@ -102,6 +109,8 @@ static struct perf_ibs perf_ibs_fetch = {
 	.config_mask		= IBS_FETCH_CONFIG_MASK,
 	.cnt_mask		= IBS_FETCH_MAX_CNT,
 	.enable_mask		= IBS_FETCH_ENABLE,
+	.valid_mask		= IBS_FETCH_VAL,
+	.reg_count		= MSR_AMD64_IBSFETCH_REG_COUNT,
 };
 
 static struct perf_ibs perf_ibs_op = {
@@ -114,6 +123,76 @@ static struct perf_ibs perf_ibs_op = {
 	.config_mask		= IBS_OP_CONFIG_MASK,
 	.cnt_mask		= IBS_OP_MAX_CNT,
 	.enable_mask		= IBS_OP_ENABLE,
+	.valid_mask		= IBS_OP_VAL,
+	.reg_count		= MSR_AMD64_IBSOP_REG_COUNT,
+};
+
+static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
+{
+	struct perf_event *event = NULL;
+	struct hw_perf_event *hwc = &event->hw;
+	struct perf_sample_data data;
+	struct perf_raw_record raw;
+	struct pt_regs regs;
+	u64 buffer[MSR_AMD64_IBS_REG_COUNT_MAX];
+	int i;
+	unsigned int msr;
+	u64 *buf;
+
+	msr = hwc->config_base;
+	buf = buffer;
+	rdmsrl(msr++, *buf);
+	if (!(*buf++ & perf_ibs->valid_mask))
+		return 0;
+
+	perf_sample_data_init(&data, 0);
+	if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+		for (i = 1; i < perf_ibs->reg_count; i++)
+			rdmsrl(msr++, *buf++);
+		raw.size = sizeof(u32) + sizeof(u64) * perf_ibs->reg_count;
+		raw.data = buffer;
+		data.raw = &raw;
+	}
+
+	regs = *iregs; /* XXX: update ip from ibs sample */
+
+	if (perf_event_overflow(event, &data, &regs))
+		; /* stop */
+	else
+		/* reenable */
+		wrmsrl(hwc->config_base, hwc->config | perf_ibs->enable_mask);
+
+	return 1;
+}
+
+static int __kprobes
+perf_ibs_nmi_handler(struct notifier_block *self,
+		     unsigned long cmd, void *__args)
+{
+	struct die_args *args = __args;
+	int handled = 0;
+
+	switch (cmd) {
+	case DIE_NMI:
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	handled += perf_ibs_handle_irq(&perf_ibs_fetch, args->regs);
+	handled += perf_ibs_handle_irq(&perf_ibs_op, args->regs);
+
+	if (!handled)
+		return NOTIFY_DONE;
+
+	inc_irq_stat(apic_perf_irqs);
+
+	return NOTIFY_STOP;
+}
+
+static __read_mostly struct notifier_block perf_ibs_nmi_notifier = {
+	.notifier_call		= perf_ibs_nmi_handler,
+	.priority		= NMI_LOCAL_LOW_PRIOR,
 };
 
 static __init int perf_event_ibs_init(void)
@@ -126,6 +205,7 @@ static __init int perf_event_ibs_init(void)
 
 	perf_pmu_register(&perf_ibs_fetch.pmu, "ibs_fetch", -1);
 	perf_pmu_register(&perf_ibs_op.pmu, "ibs_op", -1);
+	register_die_notifier(&perf_ibs_nmi_notifier);
 	printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", caps);
 
 	return 0;
-- 
1.7.5.3



^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 5/7] perf, x86: Implement IBS pmu control ops
  2011-07-28 13:46 [PATCH 0/7] perf, x86: Implement AMD IBS Robert Richter
                   ` (3 preceding siblings ...)
  2011-07-28 13:46 ` [PATCH 4/7] perf, x86: Implement IBS interrupt handler Robert Richter
@ 2011-07-28 13:46 ` Robert Richter
  2011-07-28 13:46 ` [PATCH 6/7] perf, x86: Example code for AMD IBS Robert Richter
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 39+ messages in thread
From: Robert Richter @ 2011-07-28 13:46 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML, Robert Richter

Add code to control the IBS pmu. We need to maintain per-cpu
states. Since some states are used and changed by the nmi handler,
access to these states must be atomic.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/kernel/cpu/perf_event_amd_ibs.c |  113 +++++++++++++++++++++++++++---
 1 files changed, 103 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 09311e3..ea9f360 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -20,6 +20,19 @@
 #define IBS_FETCH_CONFIG_MASK	(IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
 #define IBS_OP_CONFIG_MASK	IBS_OP_MAX_CNT
 
+enum ibs_states {
+	IBS_ENABLED	= 0,
+	IBS_STARTED	= 1,
+	IBS_STOPPING	= 2,
+
+	IBS_MAX_STATES,
+};
+
+struct cpu_perf_ibs {
+	struct perf_event	*event;
+	unsigned long		state[BITS_TO_LONGS(IBS_MAX_STATES)];
+};
+
 struct perf_ibs {
 	struct pmu	pmu;
 	unsigned int	msr;
@@ -28,6 +41,7 @@ struct perf_ibs {
 	u64		enable_mask;
 	u64		valid_mask;
 	int		reg_count;
+	struct cpu_perf_ibs __percpu *pcpu;
 };
 
 static struct perf_ibs perf_ibs_fetch;
@@ -80,30 +94,77 @@ static int perf_ibs_init(struct perf_event *event)
 	hwc->config_base = perf_ibs->msr;
 	hwc->config = config;
 
-	pr_info("Found event %p (config=%016llx) for pmu %s (type=%d) on cpu %d\n",
-		event, event->attr.config, event->pmu->name, event->attr.type, event->oncpu);
-
 	return 0;
 }
 
+static void perf_ibs_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+	struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+
+	if (test_and_set_bit(IBS_STARTED, pcpu->state))
+		return;
+
+	wrmsrl(hwc->config_base, hwc->config | perf_ibs->enable_mask);
+}
+
+static void perf_ibs_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+	struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+	u64 val;
+
+	if (!test_and_clear_bit(IBS_STARTED, pcpu->state))
+		return;
+
+	set_bit(IBS_STOPPING, pcpu->state);
+
+	rdmsrl(hwc->config_base, val);
+	val &= ~perf_ibs->enable_mask;
+	wrmsrl(hwc->config_base, val);
+}
+
 static int perf_ibs_add(struct perf_event *event, int flags)
 {
-	pr_info("Adding event %p (config=%016llx) for pmu %p (name='%s', type=%d) on cpu %d\n",
-		event, event->attr.config, event->pmu, event->pmu->name, event->attr.type, event->oncpu);
+	struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+	struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+
+	if (test_and_set_bit(IBS_ENABLED, pcpu->state))
+		return -ENOSPC;
+
+	pcpu->event = event;
+
+	if (flags & PERF_EF_START)
+		perf_ibs_start(event, PERF_EF_RELOAD);
+
 	return 0;
 }
 
 static void perf_ibs_del(struct perf_event *event, int flags)
 {
-	pr_info("Removing event %p (config=%016llx) for pmu %p (name='%s', type=%d) on cpu %d\n",
-		event, event->attr.config, event->pmu, event->pmu->name, event->attr.type, event->oncpu);
+	struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+	struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+
+	if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
+		return;
+
+	perf_ibs_stop(event, 0);
+
+	pcpu->event = NULL;
 }
 
+static void perf_ibs_read(struct perf_event *event) { }
+
 static struct perf_ibs perf_ibs_fetch = {
 	.pmu = {
 		.event_init	= perf_ibs_init,
 		.add		= perf_ibs_add,
 		.del		= perf_ibs_del,
+		.start		= perf_ibs_start,
+		.stop		= perf_ibs_stop,
+		.read		= perf_ibs_read,
 	},
 	.msr			= MSR_AMD64_IBSFETCHCTL,
 	.config_mask		= IBS_FETCH_CONFIG_MASK,
@@ -118,6 +179,9 @@ static struct perf_ibs perf_ibs_op = {
 		.event_init	= perf_ibs_init,
 		.add		= perf_ibs_add,
 		.del		= perf_ibs_del,
+		.start		= perf_ibs_start,
+		.stop		= perf_ibs_stop,
+		.read		= perf_ibs_read,
 	},
 	.msr			= MSR_AMD64_IBSOPCTL,
 	.config_mask		= IBS_OP_CONFIG_MASK,
@@ -129,7 +193,8 @@ static struct perf_ibs perf_ibs_op = {
 
 static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
 {
-	struct perf_event *event = NULL;
+	struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+	struct perf_event *event = pcpu->event;
 	struct hw_perf_event *hwc = &event->hw;
 	struct perf_sample_data data;
 	struct perf_raw_record raw;
@@ -139,6 +204,14 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
 	unsigned int msr;
 	u64 *buf;
 
+	if (!test_bit(IBS_STARTED, pcpu->state)) {
+		/* Catch spurious interrupts after stopping IBS: */
+		if (!test_and_clear_bit(IBS_STOPPING, pcpu->state))
+			return 0;
+		rdmsrl(perf_ibs->msr, *buffer);
+		return (*buffer & perf_ibs->valid_mask);
+	}
+
 	msr = hwc->config_base;
 	buf = buffer;
 	rdmsrl(msr++, *buf);
@@ -195,6 +268,26 @@ static __read_mostly struct notifier_block perf_ibs_nmi_notifier = {
 	.priority		= NMI_LOCAL_LOW_PRIOR,
 };
 
+static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
+{
+	struct cpu_perf_ibs __percpu *pcpu;
+	int ret;
+
+	pcpu = alloc_percpu(struct cpu_perf_ibs);
+	if (!pcpu)
+		return -ENOMEM;
+
+	perf_ibs->pcpu = pcpu;
+
+	ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
+	if (ret) {
+		perf_ibs->pcpu = NULL;
+		free_percpu(pcpu);
+	}
+
+	return ret;
+}
+
 static __init int perf_event_ibs_init(void)
 {
 	u32 caps;
@@ -203,8 +296,8 @@ static __init int perf_event_ibs_init(void)
 	if (!caps)
 		return -ENODEV;	/* ibs not supported by the cpu */
 
-	perf_pmu_register(&perf_ibs_fetch.pmu, "ibs_fetch", -1);
-	perf_pmu_register(&perf_ibs_op.pmu, "ibs_op", -1);
+	perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
+	perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
 	register_die_notifier(&perf_ibs_nmi_notifier);
 	printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", caps);
 
-- 
1.7.5.3



^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 6/7] perf, x86: Example code for AMD IBS
  2011-07-28 13:46 [PATCH 0/7] perf, x86: Implement AMD IBS Robert Richter
                   ` (4 preceding siblings ...)
  2011-07-28 13:46 ` [PATCH 5/7] perf, x86: Implement IBS pmu control ops Robert Richter
@ 2011-07-28 13:46 ` Robert Richter
  2011-07-29 16:58   ` Peter Zijlstra
  2011-07-28 13:46 ` [PATCH 7/7] perf, x86: Implement 64 bit counter support for IBS Robert Richter
  2011-07-29 17:07 ` [PATCH 0/7] perf, x86: Implement AMD IBS Peter Zijlstra
  7 siblings, 1 reply; 39+ messages in thread
From: Robert Richter @ 2011-07-28 13:46 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML, Robert Richter

This patch includes an example to use IBS via perf_event.

usage: ibs [-h]
       ibs ibs_fetch | ibs_op [-s] [-C CPU] [-m BUFFERPAGES] <command>

        <command>
                Command to execute.

        -e CONFIG
                64 bit configuration value, refers to msrs
                IbsFetchCtl (0xC0011030) or IbsOpCtl (0xC0011033).
                The default sample period is set to 100000.

        -c COUNT
                Event period to sample (default: 100000).

        -h
                Print help.

        -s
                system wide profiling (set per default)

        -C CPU
                profile on CPU (not yet implemented)

        -m BUFFERPAGES
                Per-cpu buffer pages to allocate.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 tools/perf/Documentation/examples/Makefile |   44 +++
 tools/perf/Documentation/examples/ibs.c    |  436 ++++++++++++++++++++++++++++
 2 files changed, 480 insertions(+), 0 deletions(-)
 create mode 100644 tools/perf/Documentation/examples/Makefile
 create mode 100644 tools/perf/Documentation/examples/ibs.c

diff --git a/tools/perf/Documentation/examples/Makefile b/tools/perf/Documentation/examples/Makefile
new file mode 100644
index 0000000..cfc9647
--- /dev/null
+++ b/tools/perf/Documentation/examples/Makefile
@@ -0,0 +1,44 @@
+all:	ibs
+
+CFLAGS += -I../..
+CFLAGS += -I../../util/include
+CFLAGS += -DNO_NEWT_SUPPORT
+
+LIB_FILE=../../libperf.a
+
+INSTALL = install
+
+ifeq ("$(origin O)", "command line")
+	OUTPUT := $(O)/
+endif
+
+ifneq ($(OUTPUT),)
+# check that the output directory actually exists
+OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd)
+$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist))
+endif
+
+ifndef DESTDIR
+prefix = $(HOME)
+endif
+bindir_relative = bin
+bindir = $(prefix)/$(bindir_relative)
+
+DESTDIR_SQ = $(subst ','\'',$(DESTDIR))
+bindir_SQ = $(subst ','\'',$(bindir))
+
+../../libperf.a:
+	$(MAKE) CFLAGS="-DNO_NEWT_SUPPORT" -C ../.. libperf.a
+
+$(OUTPUT)ibs: ibs.c $(LIB_FILE)
+	$(CC) $(CFLAGS) $^ -o $@
+
+clean:
+	$(MAKE) -C ../.. clean
+	$(RM) ibs
+
+install: all
+	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
+	$(INSTALL) $(OUTPUT)ibs '$(DESTDIR_SQ)$(bindir_SQ)'
+
+.PHONY:	all clean install
diff --git a/tools/perf/Documentation/examples/ibs.c b/tools/perf/Documentation/examples/ibs.c
new file mode 100644
index 0000000..3d4e334
--- /dev/null
+++ b/tools/perf/Documentation/examples/ibs.c
@@ -0,0 +1,436 @@
+/*
+ * IBS sampling example
+ *
+ *  Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
+ *
+ * Sample code that attaches an event to a specified PMU.
+ *
+ * Compiling:
+ *
+ *  $ cd linux         # Linux kernel source dir
+ *  $ make -C tools/perf/Documentation/examples ibs
+ *
+ * Running:
+ *
+ *  $ ./ibs ibs_fetch -s -m 256 <command>
+ *
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdio.h>
+#include <poll.h>
+#include <stdlib.h>
+#include <sys/ptrace.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <inttypes.h>
+
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/cpumap.h"
+#include "util/thread_map.h"
+
+struct perf_config {
+	uint64_t	config;
+	uint64_t	sample_period;
+	char		*sysfs;
+	int		pid;
+	int		cpu;
+	int		mmap_pages;
+	char		**argv;
+};
+
+static uint64_t collected_samples, lost_samples, sum_period;
+
+static void usage(void)
+{
+	printf(
+"usage: ibs [-h]\n"
+"       ibs ibs_fetch | ibs_op [-s] [-C CPU] [-m BUFFERPAGES] <command>\n"
+"\n"
+"        <command>\n"
+"                Command to execute.\n"
+"\n"
+"        -e CONFIG\n"
+"                64 bit configuration value, refers to msrs\n"
+"                IbsFetchCtl (0xC0011030) or IbsOpCtl (0xC0011033).\n"
+"                The default sample period is set to 100000.\n"
+"\n"
+"        -c COUNT\n"
+"                Event period to sample (default: 100000).\n"
+"\n"
+"        -h\n"
+"                Print help.\n"
+"\n"
+"        -s\n"
+"                system wide profiling (set per default)\n"
+"\n"
+"        -C CPU\n"
+"                profile on CPU (not yet implemented)\n"
+"\n"
+"        -m BUFFERPAGES\n"
+"                Per-cpu buffer pages to allocate.\n"
+);
+	exit(0);
+}
+
+#define IBS_FETCH_DEFAULT	((1ULL<<57)|(100000ULL>>4))
+#define IBS_OP_DEFAULT		((0ULL<<19)|(100000ULL>>4))
+
+#define IBS_MAX_CNT		0x0000FFFFULL
+
+#define IBS_FETCH_SYSFS "/sys/bus/event_source/devices/ibs_fetch/type"
+#define IBS_OP_SYSFS    "/sys/bus/event_source/devices/ibs_op/type"
+
+static int ibs_config(struct perf_config *config, int argc, char **argv)
+{
+	int c;
+
+	memset(config, 0, sizeof(*config));
+	config->pid = -1;	/* support for system wide profiling only */
+	config->cpu = -1;
+	config->mmap_pages = 1; /* need buffer for ibs */
+
+	c = getopt(argc, argv,"+h");
+	if (c != -1 || !argv[optind]) {
+		usage();
+		exit(0);
+	}
+
+	if (!strcmp(argv[optind], "ibs_fetch")) {
+		config->sysfs  = IBS_FETCH_SYSFS;
+		config->config = IBS_FETCH_DEFAULT;
+	} else if (!strcmp(argv[optind], "ibs_op")) {
+		config->sysfs  = IBS_OP_SYSFS;
+		config->config = IBS_OP_DEFAULT;
+	} else {
+		errx(1, "specify ibs_fetch or ibs_op\n");
+	}
+
+	optind++;
+
+	while (1) {
+		c = getopt(argc, argv,"+he:c:sC:m:v");
+		if (c == -1)
+			break;
+		switch (c) {
+		case 'h':
+			usage();
+			exit(0);
+		case 'e':
+			/* event configuration */
+			config->config = atoll(optarg);
+			break;
+		case 'c':
+			/* sample period */
+			config->sample_period = atoll(optarg);
+			config->config &= ~IBS_MAX_CNT;
+			if (!config->sample_period)
+				errx(1, "invalid sample period");
+			break;
+		case 's':
+			/* system wide profiling */
+			if (config->pid)
+				break;
+			config->pid = -1;
+			config->cpu = -1;
+			break;
+		case 'C':
+			/* profile cpu */
+			config->pid = -1;
+			config->cpu = atoi(optarg);
+			break;
+		case 'm':
+			config->mmap_pages = atoi(optarg);
+			break;
+		default:
+			errx(1, "unknown option");
+		}
+	}
+
+	if (!argv[optind])
+		errx(1, "you must specify a command to execute\n");
+
+	config->argv = argv + optind;
+
+	if (config->mmap_pages > 1 && ((config->mmap_pages) & 0x1))
+		errx(1, "number of pages must be power of 2\n");
+
+	return 0;
+}
+
+#define BUFSIZ_ATOI	32
+
+static int get_pmu_type(char *sysfs)
+{	int pmu, ret = 0;
+	char buf[BUFSIZ_ATOI];
+	size_t size;
+
+	pmu = open(sysfs, O_RDONLY);
+	if (pmu == -1)
+		return -errno;
+	size = read(pmu, buf, BUFSIZ - 1);
+	if (size < 0)
+		ret = -errno;
+	close(pmu);
+
+	if (ret)
+		return ret;
+
+	buf[size] = '0';
+
+	return atoi(buf);
+}
+
+static volatile int done = 0;
+
+static void cld_handler(int n)
+{
+	done = 1;
+}
+
+static int child(char **arg)
+{
+	ptrace(PTRACE_TRACEME, 0, NULL, NULL);
+	execvp(arg[0], arg);
+	return -1;
+}
+
+static void print_ibs_fetch(int cpu, uint64_t *ibs)
+{
+	printf("IBS_fetch sample on cpu%d\tIBS0: 0x%016"PRIx64" IBS1: 0x%016"PRIx64" IBS2:0x%016"PRIx64"\n",
+	       cpu, ibs[0], ibs[1], ibs[2]);
+}
+
+static void print_ibs_op(int cpu, uint64_t *ibs)
+{
+	printf("IBS_OP sample on cpu%d\t"
+	       "\t IBS0: 0x%016"PRIx64" IBS1: 0x%016"PRIx64" IBS2: 0x%016"PRIx64"\n"
+	        "\tIBS3: 0x%016"PRIx64" IBS4: 0x%016"PRIx64" IBS5: 0x%016"PRIx64" IBS6: 0x%016"PRIx64"\n",
+	       cpu, ibs[0], ibs[1], ibs[2], ibs[3], ibs[4], ibs[5], ibs[6]);
+}
+
+#define MSR_AMD64_IBSFETCH_SIZE		3
+#define MSR_AMD64_IBSOP_SIZE		7
+
+static int print_ibs(struct perf_sample *sample)
+{
+	switch (sample->raw_size >> 3) {
+	case MSR_AMD64_IBSFETCH_SIZE:
+		print_ibs_fetch(sample->cpu, sample->raw_data);
+		return 0;
+	case MSR_AMD64_IBSOP_SIZE:
+		print_ibs_op(sample->cpu, sample->raw_data);
+		return 0;
+	default:
+		printf("invalid: raw_size = %d, p = %p\n",
+		       sample->raw_size, (u64*)sample->raw_data);
+		return -EINVAL;
+	}
+}
+
+static void print_event(union perf_event *event)
+{
+	int idx, size = event->sample.header.size;
+	u64 *val = event->sample.array;
+
+	printf("unrecognized event, type = %d, size = %d, header = 0x%016"PRIx64":\n",
+	       event->sample.header.type, size, *(u64*)&event->sample.header);
+
+	for (idx = 1; size > 0; idx++, size -= 8) {
+		printf(" 0x%016"PRIx64, *val++);
+		if (!(idx % 8))
+			printf("\n");
+	}
+	printf("\n");
+}
+
+static int ibs_run(struct perf_config *config)
+{
+	struct perf_event_attr attr;
+	struct perf_sample sample;
+	struct perf_evsel *evsel = NULL;
+	struct perf_evlist *evlist = NULL;
+	struct cpu_map *cpus = NULL;
+	struct thread_map *threads = NULL;
+	struct perf_evsel *pos, *n;
+	union perf_event *event;
+	pid_t pid = config->pid;
+	char cpu_list[8];
+	int type, idx, status, ready = 0;
+	int ret = -ENOMEM;
+	static uint64_t ovfl_count; /* static to avoid setjmp issue */
+
+	type = get_pmu_type(config->sysfs);
+	if (type < 0) {
+		fprintf(stderr, "Failed to get pmu type: %d\n", type);
+		return type;
+	}
+
+	memset(&attr, 0, sizeof(attr));
+	attr.type = type;
+	attr.sample_type   = PERF_SAMPLE_CPU | PERF_SAMPLE_RAW;
+	attr.sample_period = config->sample_period;
+	attr.config        = config->config;
+
+	evsel = perf_evsel__new(&attr, 0);
+
+	if (config->cpu == -1) {
+		cpus = cpu_map__new(NULL);
+	} else {
+		snprintf(cpu_list, sizeof(cpu_list), "%d", config->cpu);
+		cpus = cpu_map__new(cpu_list);
+	}
+
+	threads = thread_map__new(pid, pid);
+
+	evlist = perf_evlist__new(cpus, threads);
+
+	if (!evsel || !evlist || !cpus || !threads)
+		goto out;
+
+	ret = perf_evsel__alloc_counts(evsel, cpus->nr);
+	if (ret < 0)
+		goto out;
+
+	perf_evlist__add(evlist, evsel);
+
+	list_for_each_entry(pos, &evlist->entries, node) {
+		if (perf_evsel__open(pos, evlist->cpus, evlist->threads, 0) < 0) {
+			ret = -errno;
+			fprintf(stderr, "cannot open events, %d\n", ret);
+			goto out;
+		}
+	}
+
+	if (perf_evlist__mmap(evlist, config->mmap_pages, false) < 0) {
+		ret = -errno;
+		fprintf(stderr, "failed to mmap with %d (%s)\n",
+			ret, strerror(ret));
+		goto out;
+	}
+
+	/*
+	 * Create the child task
+	 */
+	if ((pid=fork()) == -1) {
+		ret = -errno;
+		fprintf(stderr, "cannot fork process\n");
+		goto out;
+	}
+
+	if (pid == 0)
+		exit(child(config->argv));
+
+	/*
+	 * wait for the child to exec
+	 */
+	ret = waitpid(pid, &status, WUNTRACED);
+	if (ret == -1)
+		err(1, "waitpid failed");
+
+	if (WIFEXITED(status))
+		errx(1, "task %s [%d] exited already status %d\n",
+		     config->argv[0], pid, WEXITSTATUS(status));
+
+	/*
+	 * effectively activate monitoring
+	 */
+	ptrace(PTRACE_DETACH, pid, NULL, 0);
+
+	signal(SIGCHLD, cld_handler);
+
+	/*
+	 * core loop
+	 */
+	for (ret = 0; !ret; ) {
+		if (done && ready)
+			break;
+		ready = done;
+
+		ret = poll(evlist->pollfd, evlist->nr_fds, done ? 0 : -1);
+
+		if (ret > 0) {
+			ovfl_count += ret;
+		} else if (ret < 0) {
+			ret = -errno;
+			if (ret != -EINTR)
+				break;
+			ret = 0;
+		}
+
+		list_for_each_entry(pos, &evlist->entries, node) {
+			if (ret < 0)
+				break;
+			ret = __perf_evsel__read(pos, evlist->cpus->nr,
+						 evlist->threads->nr, false);
+		}
+
+		for (idx = 0; !ret, idx < evlist->nr_fds; idx++) {
+			if (done)
+				ioctl(evlist->pollfd[idx].fd,
+				      PERF_EVENT_IOC_DISABLE);
+			while (event = perf_evlist__mmap_read(evlist, idx)) {
+				ready = 0;
+				ret = perf_event__parse_sample(event,
+							       evsel->attr.sample_type,
+							       perf_evsel__sample_size(evsel),
+							       false, &sample);
+				if (ret)
+					break;
+				collected_samples++;
+				if (print_ibs(&sample))
+					print_event(event);
+			}
+		}
+	}
+
+	/*
+	 * cleanup child
+	 */
+	waitpid(pid, &status, 0);
+
+	printf("%"PRIu64" samples collected in %"PRIu64" poll events, %"PRIu64" lost samples\n",
+		collected_samples, ovfl_count, lost_samples);
+	if (collected_samples)
+		printf("avg period=%"PRIu64"\n", sum_period / collected_samples);
+out:
+	if (evlist) {
+		perf_evlist__munmap(evlist);
+		list_for_each_entry_safe(pos, n, &evlist->entries, node) {
+			perf_evsel__close_fd(pos, evlist->cpus->nr,
+					     evlist->threads->nr);
+			list_del(&pos->node);
+		}
+		free(evsel->counts);
+		evsel->counts = NULL;
+		perf_evlist__delete_maps(evlist);
+		cpus = NULL;
+		threads = NULL;
+	}
+	free(evsel);
+	free(evlist);
+	free(cpus);
+	free(threads);
+
+	return ret;
+}
+
+int main(int argc, char **argv)
+{
+	struct perf_config config;
+	int ret;
+
+	ret = ibs_config(&config, argc, argv);
+	if (ret)
+		goto fail;
+	ret = ibs_run(&config);
+	if (ret)
+		goto fail;
+	return 0;
+fail:
+	printf("An error occurred: %d (%s)\n", -ret, strerror(-ret));
+	return -1;
+}
-- 
1.7.5.3



^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 7/7] perf, x86: Implement 64 bit counter support for IBS
  2011-07-28 13:46 [PATCH 0/7] perf, x86: Implement AMD IBS Robert Richter
                   ` (5 preceding siblings ...)
  2011-07-28 13:46 ` [PATCH 6/7] perf, x86: Example code for AMD IBS Robert Richter
@ 2011-07-28 13:46 ` Robert Richter
  2011-07-29 16:58   ` Peter Zijlstra
                     ` (2 more replies)
  2011-07-29 17:07 ` [PATCH 0/7] perf, x86: Implement AMD IBS Peter Zijlstra
  7 siblings, 3 replies; 39+ messages in thread
From: Robert Richter @ 2011-07-28 13:46 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML, Robert Richter

This patch implements 64 bit counter support for IBS. The sampling
period is no longer limited to the hw counter width.

The functions perf_event_set_period() and perf_event_try_update() can
be used as generic functions. They can replace similar code that is
duplicate across architectures.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/include/asm/perf_event.h        |    2 +
 arch/x86/kernel/cpu/perf_event_amd_ibs.c |  204 +++++++++++++++++++++++++++---
 2 files changed, 185 insertions(+), 21 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index e7d2f15..cc44a1a 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -150,6 +150,8 @@ union cpuid10_edx {
 #define IBS_FETCH_MAX_CNT	0x0000FFFFULL
 
 /* IbsOpCtl bits */
+/* lower 4 bits of the current count are ignored: */
+#define IBS_OP_CUR_CNT		(0xFFFF0ULL<<32)
 #define IBS_OP_CNT_CTL		(1ULL<<19)
 #define IBS_OP_VAL		(1ULL<<18)
 #define IBS_OP_ENABLE		(1ULL<<17)
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index ea9f360..bd78994 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -40,10 +40,84 @@ struct perf_ibs {
 	u64		cnt_mask;
 	u64		enable_mask;
 	u64		valid_mask;
+	u64		max_period;
 	int		reg_count;
 	struct cpu_perf_ibs __percpu *pcpu;
+	u64		(*get_count)(u64 config);
 };
 
+static int
+perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *count)
+{
+	s64 left = local64_read(&hwc->period_left);
+	s64 period = hwc->sample_period;
+	int overflow = 0;
+
+	/*
+	 * If we are way outside a reasonable range then just skip forward:
+	 */
+	if (unlikely(left <= -period)) {
+		left = period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		overflow = 1;
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		overflow = 1;
+	}
+
+	if (unlikely(left < min))
+		left = min;
+
+	if (left > max)
+		left = max;
+
+	*count = (u64)left;
+
+	return overflow;
+}
+
+static  int
+perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int shift = 64 - width;
+	u64 prev_raw_count;
+	u64 delta;
+
+	/*
+	 * Careful: an NMI might modify the previous event value.
+	 *
+	 * Our tactic to handle this is to first atomically read and
+	 * exchange a new raw count - then add that new-prev delta
+	 * count to the generic event atomically:
+	 */
+	prev_raw_count = local64_read(&hwc->prev_count);
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+					new_raw_count) != prev_raw_count)
+		return 0;
+
+	/*
+	 * Now we have the new raw value and have updated the prev
+	 * timestamp already. We can now calculate the elapsed delta
+	 * (event-)time and add that to the generic event.
+	 *
+	 * Careful, not all hw sign-extends above the physical width
+	 * of the count.
+	 */
+	delta = (new_raw_count << shift) - (prev_raw_count << shift);
+	delta >>= shift;
+
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
+
+	return 1;
+}
+
 static struct perf_ibs perf_ibs_fetch;
 static struct perf_ibs perf_ibs_op;
 
@@ -77,18 +151,14 @@ static int perf_ibs_init(struct perf_event *event)
 		if (hwc->sample_period & 0x0f)
 			/* lower 4 bits can not be set in ibs max cnt */
 			return -EINVAL;
-		max_cnt = hwc->sample_period >> 4;
-		if (max_cnt & ~perf_ibs->cnt_mask)
-			/* out of range */
-			return -EINVAL;
-		config |= max_cnt;
 	} else {
 		max_cnt = config & perf_ibs->cnt_mask;
+		config &= ~perf_ibs->cnt_mask;
 		event->attr.sample_period = max_cnt << 4;
 		hwc->sample_period = event->attr.sample_period;
 	}
 
-	if (!max_cnt)
+	if (!hwc->sample_period)
 		return -EINVAL;
 
 	hwc->config_base = perf_ibs->msr;
@@ -97,16 +167,71 @@ static int perf_ibs_init(struct perf_event *event)
 	return 0;
 }
 
+static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
+			       struct hw_perf_event *hwc, u64 *period)
+{
+	int ret;
+
+	/* ignore lower 4 bits in min count: */
+	ret = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
+	local64_set(&hwc->prev_count, 0);
+
+	return ret;
+}
+
+static u64 get_ibs_fetch_count(u64 config)
+{
+	return (config & IBS_FETCH_CNT) >> 12;
+}
+
+static u64 get_ibs_op_count(u64 config)
+{
+	return (config & IBS_OP_CUR_CNT) >> 32;
+}
+
+static void
+perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
+		      u64 config)
+{
+	u64 count = perf_ibs->get_count(config);
+
+	while (!perf_event_try_update(event, count, 20)) {
+		rdmsrl(event->hw.config_base, config);
+		count = perf_ibs->get_count(config);
+	}
+}
+
+/* Note: The enable mask must be encoded in the config argument. */
+static inline void perf_ibs_enable_event(struct hw_perf_event *hwc, u64 config)
+{
+	wrmsrl(hwc->config_base, hwc->config | config);
+}
+
+/*
+ * We cannot restore the ibs pmu state, so we always needs to update
+ * the event while stopping it and then reset the state when starting
+ * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
+ * perf_ibs_start()/perf_ibs_stop() and instead always do it.
+ */
 static void perf_ibs_start(struct perf_event *event, int flags)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
 	struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+	u64 config;
 
-	if (test_and_set_bit(IBS_STARTED, pcpu->state))
+	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
 		return;
 
-	wrmsrl(hwc->config_base, hwc->config | perf_ibs->enable_mask);
+	WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+	hwc->state = 0;
+
+	perf_ibs_set_period(perf_ibs, hwc, &config);
+	config = (config >> 4) | perf_ibs->enable_mask;
+	set_bit(IBS_STARTED, pcpu->state);
+	perf_ibs_enable_event(hwc, config);
+
+	perf_event_update_userpage(event);
 }
 
 static void perf_ibs_stop(struct perf_event *event, int flags)
@@ -115,15 +240,28 @@ static void perf_ibs_stop(struct perf_event *event, int flags)
 	struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
 	struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
 	u64 val;
+	int stopping;
 
-	if (!test_and_clear_bit(IBS_STARTED, pcpu->state))
-		return;
+	stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
 
-	set_bit(IBS_STOPPING, pcpu->state);
+	if (!stopping && (hwc->state & PERF_HES_UPTODATE))
+		return;
 
 	rdmsrl(hwc->config_base, val);
-	val &= ~perf_ibs->enable_mask;
-	wrmsrl(hwc->config_base, val);
+
+	if (stopping) {
+		set_bit(IBS_STOPPING, pcpu->state);
+		val &= ~perf_ibs->enable_mask;
+		wrmsrl(hwc->config_base, val);
+		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+		hwc->state |= PERF_HES_STOPPED;
+	}
+
+	if (hwc->state & PERF_HES_UPTODATE)
+		return;
+
+	perf_ibs_event_update(perf_ibs, event, val);
+	hwc->state |= PERF_HES_UPTODATE;
 }
 
 static int perf_ibs_add(struct perf_event *event, int flags)
@@ -134,6 +272,8 @@ static int perf_ibs_add(struct perf_event *event, int flags)
 	if (test_and_set_bit(IBS_ENABLED, pcpu->state))
 		return -ENOSPC;
 
+	event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
 	pcpu->event = event;
 
 	if (flags & PERF_EF_START)
@@ -150,9 +290,11 @@ static void perf_ibs_del(struct perf_event *event, int flags)
 	if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
 		return;
 
-	perf_ibs_stop(event, 0);
+	perf_ibs_stop(event, PERF_EF_UPDATE);
 
 	pcpu->event = NULL;
+
+	perf_event_update_userpage(event);
 }
 
 static void perf_ibs_read(struct perf_event *event) { }
@@ -171,7 +313,10 @@ static struct perf_ibs perf_ibs_fetch = {
 	.cnt_mask		= IBS_FETCH_MAX_CNT,
 	.enable_mask		= IBS_FETCH_ENABLE,
 	.valid_mask		= IBS_FETCH_VAL,
+	.max_period		= IBS_FETCH_MAX_CNT << 4,
 	.reg_count		= MSR_AMD64_IBSFETCH_REG_COUNT,
+
+	.get_count		= get_ibs_fetch_count,
 };
 
 static struct perf_ibs perf_ibs_op = {
@@ -188,7 +333,10 @@ static struct perf_ibs perf_ibs_op = {
 	.cnt_mask		= IBS_OP_MAX_CNT,
 	.enable_mask		= IBS_OP_ENABLE,
 	.valid_mask		= IBS_OP_VAL,
+	.max_period		= IBS_OP_MAX_CNT << 4,
 	.reg_count		= MSR_AMD64_IBSOP_REG_COUNT,
+
+	.get_count		= get_ibs_op_count,
 };
 
 static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
@@ -200,9 +348,9 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
 	struct perf_raw_record raw;
 	struct pt_regs regs;
 	u64 buffer[MSR_AMD64_IBS_REG_COUNT_MAX];
-	int i;
+	int i, overflow, reenable;
 	unsigned int msr;
-	u64 *buf;
+	u64 *buf, config;
 
 	if (!test_bit(IBS_STARTED, pcpu->state)) {
 		/* Catch spurious interrupts after stopping IBS: */
@@ -229,11 +377,25 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
 
 	regs = *iregs; /* XXX: update ip from ibs sample */
 
-	if (perf_event_overflow(event, &data, &regs))
-		; /* stop */
-	else
-		/* reenable */
-		wrmsrl(hwc->config_base, hwc->config | perf_ibs->enable_mask);
+	/*
+	 * Emulate IbsOpCurCnt in MSRC001_1033 (IbsOpCtl), not
+	 * supported in all cpus. As this triggered an interrupt, we
+	 * set the current count to the max count.
+	 */
+	config = buffer[0];
+	if (perf_ibs == &perf_ibs_op) {
+		config &= ~IBS_OP_CUR_CNT;
+		config |= (config & IBS_OP_MAX_CNT) << 36;
+	}
+
+	perf_ibs_event_update(perf_ibs, event, config);
+
+	overflow = perf_ibs_set_period(perf_ibs, hwc, &config);
+	reenable = !(overflow && perf_event_overflow(event, &data, &regs));
+	config = (config >> 4) | (reenable ? perf_ibs->enable_mask : 0);
+	perf_ibs_enable_event(hwc, config);
+
+	perf_event_update_userpage(event);
 
 	return 1;
 }
-- 
1.7.5.3



^ permalink raw reply related	[flat|nested] 39+ messages in thread

* Re: [PATCH 2/7] perf, x86: Implement IBS initialization
  2011-07-28 13:46 ` [PATCH 2/7] perf, x86: Implement IBS initialization Robert Richter
@ 2011-07-29 16:58   ` Peter Zijlstra
  2011-08-01  5:27     ` Robert Richter
  2011-08-02 11:49   ` Peter Zijlstra
  1 sibling, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2011-07-29 16:58 UTC (permalink / raw)
  To: Robert Richter; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On Thu, 2011-07-28 at 15:46 +0200, Robert Richter wrote:
> +static int perf_ibs_add(struct perf_event *event, int flags)
> +{
> +       pr_info("Adding event %p (config=%016llx) to pmu %s (type=%d) on cpu %d\n",
> +               event, event->attr.config, perf_ibs.name, event->attr.type, event->oncpu);
> +       return 0;
> +}
> +
> +static void perf_ibs_del(struct perf_event *event, int flags)
> +{
> +       pr_info("Removing event %p (config=%016llx) to pmu %s (type=%d) on cpu %d\n",
> +               event, event->attr.config, perf_ibs.name, event->attr.type, event->oncpu);
> +} 

While I see you remove them later on, I do find it somewhat strange to
have them here. This is user triggerable code in the context switch
path, lots of nasty can come from this.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 4/7] perf, x86: Implement IBS interrupt handler
  2011-07-28 13:46 ` [PATCH 4/7] perf, x86: Implement IBS interrupt handler Robert Richter
@ 2011-07-29 16:58   ` Peter Zijlstra
  2011-08-01  5:32     ` Robert Richter
  2011-08-02 11:43   ` Peter Zijlstra
  1 sibling, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2011-07-29 16:58 UTC (permalink / raw)
  To: Robert Richter; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On Thu, 2011-07-28 at 15:46 +0200, Robert Richter wrote:
> +static int __kprobes
> +perf_ibs_nmi_handler(struct notifier_block *self,
> +                    unsigned long cmd, void *__args)
> +{
> +       struct die_args *args = __args;
> +       int handled = 0;
> +
> +       switch (cmd) {
> +       case DIE_NMI:
> +               break;
> +       default:
> +               return NOTIFY_DONE;
> +       }
> +
> +       handled += perf_ibs_handle_irq(&perf_ibs_fetch, args->regs);
> +       handled += perf_ibs_handle_irq(&perf_ibs_op, args->regs);
> +
> +       if (!handled)
> +               return NOTIFY_DONE;
> +
> +       inc_irq_stat(apic_perf_irqs);
> +
> +       return NOTIFY_STOP;
> +} 

So IBS cannot trigger the whole unknown NMI business? Wouldn't ibs_op
triggering while ibs_fetch just started latch the NMI line, the
in-progress NMI would handle both, and we then end up with a spare NMI?

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 7/7] perf, x86: Implement 64 bit counter support for IBS
  2011-07-28 13:46 ` [PATCH 7/7] perf, x86: Implement 64 bit counter support for IBS Robert Richter
@ 2011-07-29 16:58   ` Peter Zijlstra
  2011-07-29 17:02     ` Peter Zijlstra
  2011-07-29 17:01   ` Peter Zijlstra
  2011-08-02 11:37   ` Peter Zijlstra
  2 siblings, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2011-07-29 16:58 UTC (permalink / raw)
  To: Robert Richter; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On Thu, 2011-07-28 at 15:46 +0200, Robert Richter wrote:
> This patch implements 64 bit counter support for IBS. The sampling
> period is no longer limited to the hw counter width.

That should never be the case, even for shorter hw counter, in such a
case you should ignore overflows until you reach the programmed period.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 6/7] perf, x86: Example code for AMD IBS
  2011-07-28 13:46 ` [PATCH 6/7] perf, x86: Example code for AMD IBS Robert Richter
@ 2011-07-29 16:58   ` Peter Zijlstra
  2011-08-01  5:50     ` Robert Richter
  0 siblings, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2011-07-29 16:58 UTC (permalink / raw)
  To: Robert Richter; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On Thu, 2011-07-28 at 15:46 +0200, Robert Richter wrote:
>  tools/perf/Documentation/examples/ibs.c    |  436 ++++++++++++++++++++++++++++

That really isn't the place for this..

Also, how similar is the Alpha PMU to AMD IBS?

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 7/7] perf, x86: Implement 64 bit counter support for IBS
  2011-07-28 13:46 ` [PATCH 7/7] perf, x86: Implement 64 bit counter support for IBS Robert Richter
  2011-07-29 16:58   ` Peter Zijlstra
@ 2011-07-29 17:01   ` Peter Zijlstra
  2011-08-01  6:13     ` Robert Richter
  2011-08-02 11:37   ` Peter Zijlstra
  2 siblings, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2011-07-29 17:01 UTC (permalink / raw)
  To: Robert Richter; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On Thu, 2011-07-28 at 15:46 +0200, Robert Richter wrote:
> The functions perf_event_set_period() and perf_event_try_update() can
> be used as generic functions. They can replace similar code that is
> duplicate across architectures.

We could perhaps place them in kernel/events/pmu-lib.c or so, a place to
gather useful common code to pmu driver implementations.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 7/7] perf, x86: Implement 64 bit counter support for IBS
  2011-07-29 16:58   ` Peter Zijlstra
@ 2011-07-29 17:02     ` Peter Zijlstra
  2011-08-01  5:55       ` Robert Richter
  0 siblings, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2011-07-29 17:02 UTC (permalink / raw)
  To: Robert Richter; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On Fri, 2011-07-29 at 18:58 +0200, Peter Zijlstra wrote:
> On Thu, 2011-07-28 at 15:46 +0200, Robert Richter wrote:
> > This patch implements 64 bit counter support for IBS. The sampling
> > period is no longer limited to the hw counter width.
> 
> That should never be the case, even for shorter hw counter, in such a
> case you should ignore overflows until you reach the programmed period.

Hmm, I might have mis-understood, is that exactly what this patch
implements, or does this patch add support for new hardware?



^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 0/7] perf, x86: Implement AMD IBS
  2011-07-28 13:46 [PATCH 0/7] perf, x86: Implement AMD IBS Robert Richter
                   ` (6 preceding siblings ...)
  2011-07-28 13:46 ` [PATCH 7/7] perf, x86: Implement 64 bit counter support for IBS Robert Richter
@ 2011-07-29 17:07 ` Peter Zijlstra
  2011-08-01  5:21   ` Robert Richter
  7 siblings, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2011-07-29 17:07 UTC (permalink / raw)
  To: Robert Richter; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On Thu, 2011-07-28 at 15:46 +0200, Robert Richter wrote:
> This patch set adds support for AMD IBS to perf. 

> The approach is still to collect raw sample data which should be the
> most important use case for application developers. The data format is
> the same as described in the IBS register specification.

That makes it hardware dependent right? I take it new hardware with IBS
extensions adds output MSRs.

Anyway, I'll try and go over it again next week after reading the IBS
hardware spec (again.. that stuff just won't stick to memory).

I've got the BKDG for Fam10, is there anything more I should read?

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 0/7] perf, x86: Implement AMD IBS
  2011-07-29 17:07 ` [PATCH 0/7] perf, x86: Implement AMD IBS Peter Zijlstra
@ 2011-08-01  5:21   ` Robert Richter
  2011-08-02 11:29     ` Peter Zijlstra
  0 siblings, 1 reply; 39+ messages in thread
From: Robert Richter @ 2011-08-01  5:21 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On 29.07.11 13:07:16, Peter Zijlstra wrote:
> On Thu, 2011-07-28 at 15:46 +0200, Robert Richter wrote:
> > This patch set adds support for AMD IBS to perf. 
> 
> > The approach is still to collect raw sample data which should be the
> > most important use case for application developers. The data format is
> > the same as described in the IBS register specification.
> 
> That makes it hardware dependent right? I take it new hardware with IBS
> extensions adds output MSRs.

IBS is supposed to be architectural spec'ed, meaning there are no
family checks. IBS features are detected using cpuid.

So the version of the raw sampling data format could be specified with
the u32 capability variable. I could put the caps value to the raw
sample data too right after the size field. An additional advantage
would be that 64 bit values are memory alligned then.

The Branch Target Address register that has been added to newer cpus
could simply be extended to the raw data sample, the data would still
be backward compatible. Userland can detect it existence from the
sample size or (better) from the ibs caps.

> Anyway, I'll try and go over it again next week after reading the IBS
> hardware spec (again.. that stuff just won't stick to memory).
> 
> I've got the BKDG for Fam10, is there anything more I should read?

Though it is treated architectural, it isn't in the AMD64 Architecture
Programmer's Manual (APM). The 10h BKDG is a good source, but extended
IBS features are described in the family 12h bkdg (same as for 15h)
and the capabilities are in the cpuid spec:

 http://support.amd.com/us/Processor_TechDocs/41131.pdf
 http://support.amd.com/us/Processor_TechDocs/25481.pdf

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 2/7] perf, x86: Implement IBS initialization
  2011-07-29 16:58   ` Peter Zijlstra
@ 2011-08-01  5:27     ` Robert Richter
  0 siblings, 0 replies; 39+ messages in thread
From: Robert Richter @ 2011-08-01  5:27 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On 29.07.11 12:58:45, Peter Zijlstra wrote:
> On Thu, 2011-07-28 at 15:46 +0200, Robert Richter wrote:
> > +static int perf_ibs_add(struct perf_event *event, int flags)
> > +{
> > +       pr_info("Adding event %p (config=%016llx) to pmu %s (type=%d) on cpu %d\n",
> > +               event, event->attr.config, perf_ibs.name, event->attr.type, event->oncpu);
> > +       return 0;
> > +}
> > +
> > +static void perf_ibs_del(struct perf_event *event, int flags)
> > +{
> > +       pr_info("Removing event %p (config=%016llx) to pmu %s (type=%d) on cpu %d\n",
> > +               event, event->attr.config, perf_ibs.name, event->attr.type, event->oncpu);
> > +} 
> 
> While I see you remove them later on, I do find it somewhat strange to
> have them here. This is user triggerable code in the context switch
> path, lots of nasty can come from this.

I will remove it and simply replace it with function stubs.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 4/7] perf, x86: Implement IBS interrupt handler
  2011-07-29 16:58   ` Peter Zijlstra
@ 2011-08-01  5:32     ` Robert Richter
  2011-08-01 15:21       ` Peter Zijlstra
  0 siblings, 1 reply; 39+ messages in thread
From: Robert Richter @ 2011-08-01  5:32 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On 29.07.11 12:58:46, Peter Zijlstra wrote:
> On Thu, 2011-07-28 at 15:46 +0200, Robert Richter wrote:
> > +static int __kprobes
> > +perf_ibs_nmi_handler(struct notifier_block *self,
> > +                    unsigned long cmd, void *__args)
> > +{
> > +       struct die_args *args = __args;
> > +       int handled = 0;
> > +
> > +       switch (cmd) {
> > +       case DIE_NMI:
> > +               break;
> > +       default:
> > +               return NOTIFY_DONE;
> > +       }
> > +
> > +       handled += perf_ibs_handle_irq(&perf_ibs_fetch, args->regs);
> > +       handled += perf_ibs_handle_irq(&perf_ibs_op, args->regs);
> > +
> > +       if (!handled)
> > +               return NOTIFY_DONE;
> > +
> > +       inc_irq_stat(apic_perf_irqs);
> > +
> > +       return NOTIFY_STOP;
> > +} 
> 
> So IBS cannot trigger the whole unknown NMI business? Wouldn't ibs_op
> triggering while ibs_fetch just started latch the NMI line, the
> in-progress NMI would handle both, and we then end up with a spare NMI?

Ok, I will run some excessive testing of this. If this turns out to be
a problem I will change the code. Could this be on top of this patch
set then?

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 6/7] perf, x86: Example code for AMD IBS
  2011-07-29 16:58   ` Peter Zijlstra
@ 2011-08-01  5:50     ` Robert Richter
  2011-08-02 10:37       ` Peter Zijlstra
  0 siblings, 1 reply; 39+ messages in thread
From: Robert Richter @ 2011-08-01  5:50 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On 29.07.11 12:58:49, Peter Zijlstra wrote:
> On Thu, 2011-07-28 at 15:46 +0200, Robert Richter wrote:
> >  tools/perf/Documentation/examples/ibs.c    |  436 ++++++++++++++++++++++++++++
> 
> That really isn't the place for this..
> 
> Also, how similar is the Alpha PMU to AMD IBS?

Would you prefer

 tools/perf/Documentation/examples/x86/ibs.c

instead?

Actually I wasn't sure if this place is ok at all. But I wanted to
publish the code for reference and testing. There isn't somthing
similar already there, and the perf tool code is also not the best
place to implement such cpu specific features.

But putting it to the documentation is common practice for library
packages (e.g. libpapi and libpfm). If the example is not for the
given architecture it won't be added to the package documentation.
This example won't be installed at all. Even for very special features
an example is important since it is often not part of the generic
implementation.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 7/7] perf, x86: Implement 64 bit counter support for IBS
  2011-07-29 17:02     ` Peter Zijlstra
@ 2011-08-01  5:55       ` Robert Richter
  0 siblings, 0 replies; 39+ messages in thread
From: Robert Richter @ 2011-08-01  5:55 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On 29.07.11 13:02:42, Peter Zijlstra wrote:
> On Fri, 2011-07-29 at 18:58 +0200, Peter Zijlstra wrote:
> > On Thu, 2011-07-28 at 15:46 +0200, Robert Richter wrote:
> > > This patch implements 64 bit counter support for IBS. The sampling
> > > period is no longer limited to the hw counter width.
> > 
> > That should never be the case, even for shorter hw counter, in such a
> > case you should ignore overflows until you reach the programmed period.
> 
> Hmm, I might have mis-understood, is that exactly what this patch
> implements, or does this patch add support for new hardware?

Yes, this patch implements 64 bit counter in software and doesn't add
a new hardware feature. It implements 64 bit counting by throwing away
interrupts if the period isn't reached.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 7/7] perf, x86: Implement 64 bit counter support for IBS
  2011-07-29 17:01   ` Peter Zijlstra
@ 2011-08-01  6:13     ` Robert Richter
  0 siblings, 0 replies; 39+ messages in thread
From: Robert Richter @ 2011-08-01  6:13 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On 29.07.11 13:01:03, Peter Zijlstra wrote:
> On Thu, 2011-07-28 at 15:46 +0200, Robert Richter wrote:
> > The functions perf_event_set_period() and perf_event_try_update() can
> > be used as generic functions. They can replace similar code that is
> > duplicate across architectures.
> 
> We could perhaps place them in kernel/events/pmu-lib.c or so, a place to
> gather useful common code to pmu driver implementations.

As we already have kernel/events/core.c I suggest to put it into
.../common.c?

I planned to factor this out of current perf code for x86 and maybe
other architectures. Want to send separate patches independent from
ibs.

Should I add both functions already to the new file? Maybe this has
some implications to header and makefiles.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 4/7] perf, x86: Implement IBS interrupt handler
  2011-08-01  5:32     ` Robert Richter
@ 2011-08-01 15:21       ` Peter Zijlstra
  2011-08-01 16:38         ` Don Zickus
  0 siblings, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2011-08-01 15:21 UTC (permalink / raw)
  To: Robert Richter; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML, Don Zickus

On Mon, 2011-08-01 at 07:32 +0200, Robert Richter wrote:
> > So IBS cannot trigger the whole unknown NMI business? Wouldn't ibs_op
> > triggering while ibs_fetch just started latch the NMI line, the
> > in-progress NMI would handle both, and we then end up with a spare NMI?
> 
> Ok, I will run some excessive testing of this. If this turns out to be
> a problem I will change the code. Could this be on top of this patch
> set then? 

Sure, if you somehow end up duplicating some logic I think you know
about this common.c file you proposed ;-)

I kinda lost the current state of affairs wrt spurious NMIs, I think
there's still a few reports out there. I recently read through some
Intel errata and found the Intel PMU can send double PMIs under some
circumstances (just to keep life interesting).

I also haven't checked up on what the perf_event_nmi_handler() magic
looks today, so I can't say if its a problem or not, but I thought I'd
just mention it.


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 4/7] perf, x86: Implement IBS interrupt handler
  2011-08-01 15:21       ` Peter Zijlstra
@ 2011-08-01 16:38         ` Don Zickus
  2011-08-05  9:55           ` Ingo Molnar
  0 siblings, 1 reply; 39+ messages in thread
From: Don Zickus @ 2011-08-01 16:38 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Robert Richter, Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On Mon, Aug 01, 2011 at 05:21:43PM +0200, Peter Zijlstra wrote:
> On Mon, 2011-08-01 at 07:32 +0200, Robert Richter wrote:
> > > So IBS cannot trigger the whole unknown NMI business? Wouldn't ibs_op
> > > triggering while ibs_fetch just started latch the NMI line, the
> > > in-progress NMI would handle both, and we then end up with a spare NMI?
> > 
> > Ok, I will run some excessive testing of this. If this turns out to be
> > a problem I will change the code. Could this be on top of this patch
> > set then? 
> 
> Sure, if you somehow end up duplicating some logic I think you know
> about this common.c file you proposed ;-)
> 
> I kinda lost the current state of affairs wrt spurious NMIs, I think
> there's still a few reports out there. I recently read through some
> Intel errata and found the Intel PMU can send double PMIs under some
> circumstances (just to keep life interesting).

I tried looking into but everytime I applied workarounds for Intel errata
I wound up with more unknown NMIs and proving that a couple of them worked
(with trace_printks) seemed elusive.  I got frustrated and left it alone.

But yeah, Intel's perf has so many errata that I think if you kick the
box while running perf you can generate an unknown NMI.

> 
> I also haven't checked up on what the perf_event_nmi_handler() magic
> looks today, so I can't say if its a problem or not, but I thought I'd
> just mention it.

It hasn't changed much since Robert added his magic which handles the
majority of use cases for now.

Cheers,
Don

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 6/7] perf, x86: Example code for AMD IBS
  2011-08-01  5:50     ` Robert Richter
@ 2011-08-02 10:37       ` Peter Zijlstra
  2011-08-03  8:27         ` Michael Cree
  0 siblings, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2011-08-02 10:37 UTC (permalink / raw)
  To: Robert Richter
  Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML, Michael Cree, Lin Ming

On Mon, 2011-08-01 at 07:50 +0200, Robert Richter wrote:
> On 29.07.11 12:58:49, Peter Zijlstra wrote:
> > On Thu, 2011-07-28 at 15:46 +0200, Robert Richter wrote:
> > >  tools/perf/Documentation/examples/ibs.c    |  436 ++++++++++++++++++++++++++++
> > 
> > That really isn't the place for this..
> > 
> > Also, how similar is the Alpha PMU to AMD IBS?
> 
> Would you prefer
> 
>  tools/perf/Documentation/examples/x86/ibs.c
> 
> instead?

Possibly, but having just looked at the example again I don't really see
it doing anything perf-record doesn't already do, so why does it deserve
to live at all?

Initially I thought it was a record+report like example, some code
interpreting the 'mess' that comes out of IBS would be most appreciated
and I think we can even ship that as perf-ibs-report/perf-ibs-annotate
or so (and if its still remotely similar to its Alpha precursor that
might make the Alpha folks happy too).

> Actually I wasn't sure if this place is ok at all. But I wanted to
> publish the code for reference and testing. There isn't somthing
> similar already there, and the perf tool code is also not the best
> place to implement such cpu specific features.
> 
> But putting it to the documentation is common practice for library
> packages (e.g. libpapi and libpfm). If the example is not for the
> given architecture it won't be added to the package documentation.
> This example won't be installed at all. Even for very special features
> an example is important since it is often not part of the generic
> implementation.

Right, its just that I'm not seeing the extra value at all, aside from
maybe the IBS_*_DEFAULT values that should live as an event config
(poking mlin to see where he's at with those patches..).

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 0/7] perf, x86: Implement AMD IBS
  2011-08-01  5:21   ` Robert Richter
@ 2011-08-02 11:29     ` Peter Zijlstra
  2011-08-12 19:43       ` Robert Richter
  0 siblings, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2011-08-02 11:29 UTC (permalink / raw)
  To: Robert Richter; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On Mon, 2011-08-01 at 07:21 +0200, Robert Richter wrote:

> IBS is supposed to be architectural spec'ed, meaning there are no
> family checks. IBS features are detected using cpuid.
> 
> So the version of the raw sampling data format could be specified with
> the u32 capability variable. I could put the caps value to the raw
> sample data too right after the size field. An additional advantage
> would be that 64 bit values are memory alligned then.

Seems like a good filler :-)

> The Branch Target Address register that has been added to newer cpus
> could simply be extended to the raw data sample, the data would still
> be backward compatible. Userland can detect it existence from the
> sample size or (better) from the ibs caps.

Caps would be better.

> Though it is treated architectural, it isn't in the AMD64 Architecture
> Programmer's Manual (APM). The 10h BKDG is a good source, but extended
> IBS features are described in the family 12h bkdg (same as for 15h)
> and the capabilities are in the cpuid spec:
> 
>  http://support.amd.com/us/Processor_TechDocs/41131.pdf
>  http://support.amd.com/us/Processor_TechDocs/25481.pdf 

Right, so comparing Fam10 to Fam12,

+ IbsOpCtl.19:58
+ IbsOpData.38
- IbsOpData2.4:5
+ IbsOpData3.19
+ IbsBrTarget

Curious that they removed a few bits, those don't seem to be enumerated
in the IBS capability field either.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/7] perf, x86: Implement IBS event configuration
  2011-07-28 13:46 ` [PATCH 3/7] perf, x86: Implement IBS event configuration Robert Richter
@ 2011-08-02 11:35   ` Peter Zijlstra
  2011-08-12 19:51     ` Robert Richter
  0 siblings, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2011-08-02 11:35 UTC (permalink / raw)
  To: Robert Richter; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On Thu, 2011-07-28 at 15:46 +0200, Robert Richter wrote:
> +static struct perf_ibs perf_ibs_fetch = {
> +       .pmu = {
> +               .event_init     = perf_ibs_init,
> +               .add            = perf_ibs_add,
> +               .del            = perf_ibs_del,
> +       },
> +       .msr                    = MSR_AMD64_IBSFETCHCTL,
> +       .config_mask            = IBS_FETCH_CONFIG_MASK,
> +       .cnt_mask               = IBS_FETCH_MAX_CNT,
> +       .enable_mask            = IBS_FETCH_ENABLE,
> +};
> +
> +static struct perf_ibs perf_ibs_op = {
> +       .pmu = {
> +               .event_init     = perf_ibs_init,
> +               .add            = perf_ibs_add,
> +               .del            = perf_ibs_del,
> +       },
> +       .msr                    = MSR_AMD64_IBSOPCTL,
> +       .config_mask            = IBS_OP_CONFIG_MASK,
> +       .cnt_mask               = IBS_OP_MAX_CNT,
> +       .enable_mask            = IBS_OP_ENABLE,
>  };
>   

It it intentional that you map the IBS things to the hw task_context ?



^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 7/7] perf, x86: Implement 64 bit counter support for IBS
  2011-07-28 13:46 ` [PATCH 7/7] perf, x86: Implement 64 bit counter support for IBS Robert Richter
  2011-07-29 16:58   ` Peter Zijlstra
  2011-07-29 17:01   ` Peter Zijlstra
@ 2011-08-02 11:37   ` Peter Zijlstra
  2011-08-12 18:11     ` Robert Richter
  2 siblings, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2011-08-02 11:37 UTC (permalink / raw)
  To: Robert Richter; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On Thu, 2011-07-28 at 15:46 +0200, Robert Richter wrote:
> +       /*
> +        * Emulate IbsOpCurCnt in MSRC001_1033 (IbsOpCtl), not
> +        * supported in all cpus. As this triggered an interrupt, we
> +        * set the current count to the max count.
> +        */
> +       config = buffer[0];
> +       if (perf_ibs == &perf_ibs_op) {
> +               config &= ~IBS_OP_CUR_CNT;
> +               config |= (config & IBS_OP_MAX_CNT) << 36;
> +       } 

Shouldn't that be conditional on capability muck?

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 4/7] perf, x86: Implement IBS interrupt handler
  2011-07-28 13:46 ` [PATCH 4/7] perf, x86: Implement IBS interrupt handler Robert Richter
  2011-07-29 16:58   ` Peter Zijlstra
@ 2011-08-02 11:43   ` Peter Zijlstra
  2011-08-12 18:07     ` Robert Richter
  1 sibling, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2011-08-02 11:43 UTC (permalink / raw)
  To: Robert Richter; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On Thu, 2011-07-28 at 15:46 +0200, Robert Richter wrote:
> +       msr = hwc->config_base;
> +       buf = buffer;
> +       rdmsrl(msr++, *buf);
> +       if (!(*buf++ & perf_ibs->valid_mask))
> +               return 0;
> +
> +       perf_sample_data_init(&data, 0);
> +       if (event->attr.sample_type & PERF_SAMPLE_RAW) {
> +               for (i = 1; i < perf_ibs->reg_count; i++)
> +                       rdmsrl(msr++, *buf++);
> +               raw.size = sizeof(u32) + sizeof(u64) * perf_ibs->reg_count;
> +               raw.data = buffer;
> +               data.raw = &raw;
> +       } 

OK, so this dumps a linear range of MSRs into the raw data buffer. The
only 'problem' I have with that is that Fam12 will then also dump 103A
IBS Control Register, which seems pointless.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 2/7] perf, x86: Implement IBS initialization
  2011-07-28 13:46 ` [PATCH 2/7] perf, x86: Implement IBS initialization Robert Richter
  2011-07-29 16:58   ` Peter Zijlstra
@ 2011-08-02 11:49   ` Peter Zijlstra
  2011-08-12 17:49     ` Robert Richter
  1 sibling, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2011-08-02 11:49 UTC (permalink / raw)
  To: Robert Richter; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On Thu, 2011-07-28 at 15:46 +0200, Robert Richter wrote:
> +/*
> + * This runs only on the current cpu. We try to find an LVT offset and
> + * setup the local APIC. For this we must disable preemption. On
> + * success we initialize all nodes with this offset. This updates then
> + * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
> + * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
> + * is using the new offset.
> + */
> +static int force_ibs_eilvt_setup(void)
> +{
> +       int offset;
> +       int ret;
> +
> +       preempt_disable();
> +       /* find the next free available EILVT entry, skip offset 0 */
> +       for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
> +               if (get_eilvt(offset))
> +                       break;
> +       }
> +       preempt_enable();
> +
> +       if (offset == APIC_EILVT_NR_MAX) {
> +               printk(KERN_DEBUG "No EILVT entry available\n");
> +               return -EBUSY;
> +       }
> +
> +       ret = setup_ibs_ctl(offset);
> +       if (ret)
> +               goto out;
> +
> +       if (!ibs_eilvt_valid()) {
> +               ret = -EFAULT;
> +               goto out;
> +       }
> +
> +       pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset);
> +       pr_err(FW_BUG "workaround enabled for IBS LVT offset\n");
> +
> +       return 0;
> +out:
> +       preempt_disable();
> +       put_eilvt(offset);
> +       preempt_enable();
> +       return ret;
> +} 

So I don't get any of that preempt_disable/enable crap in this patch,
but the above is esp. confusing. How is that preempt_disable() at out:
still valid? We could be running on an entirely different cpu from when
we did get_eilvt at the start.



^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 6/7] perf, x86: Example code for AMD IBS
  2011-08-02 10:37       ` Peter Zijlstra
@ 2011-08-03  8:27         ` Michael Cree
  2011-08-03 17:56           ` Robert Richter
  0 siblings, 1 reply; 39+ messages in thread
From: Michael Cree @ 2011-08-03  8:27 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Robert Richter, Ingo Molnar, Arnaldo Carvalho de Melo, LKML, Lin Ming

On 02/08/11 22:37, Peter Zijlstra wrote:
> On Mon, 2011-08-01 at 07:50 +0200, Robert Richter wrote:
>> On 29.07.11 12:58:49, Peter Zijlstra wrote:
>>> On Thu, 2011-07-28 at 15:46 +0200, Robert Richter wrote:
>>>>  tools/perf/Documentation/examples/ibs.c    |  436 ++++++++++++++++++++++++++++
>>>
>>> That really isn't the place for this..
>>>
>>> Also, how similar is the Alpha PMU to AMD IBS?

The Alpha PMU (on the EV67 and later CPUs) has two counter modes:
"Aggregate" which is like counters on other CPUs and implemented in the
kernel, and "ProfileMe", which is not currently used in the perf. event
subsystem.

In the "ProfileMe" mode a counter is initialised with max_count-N and
when the counter overflows (i.e. after execution of ~N instructions) a
window is opened for profiling.  The window closes (roughly) when the
profiled instruction is retired from the pipeline.  The two counters
count events such as instructions, cycles, Bcache misses, etc. during
the window.  When the window is closed an interrupt to the PMU interrupt
handler is made.  The two counters can be read and there are other
registers that can be read that provide information on the profiled
instruction's flight through the pipeline, such as instruction killed
before being mapped (i.e. it was identified as a nop), instruction
stalled between fetch and being mapped (usually due to operands not data
ready), branch direction and branch misprediction (if instruction is a
branch), instruction was in a new Icache fill stream, instruction
trapped and trap type, and so on.

>> Would you prefer
>>
>>  tools/perf/Documentation/examples/x86/ibs.c
>>
>> instead?
> 
> Possibly, but having just looked at the example again I don't really see
> it doing anything perf-record doesn't already do, so why does it deserve
> to live at all?
> 
> Initially I thought it was a record+report like example, some code
> interpreting the 'mess' that comes out of IBS would be most appreciated
> and I think we can even ship that as perf-ibs-report/perf-ibs-annotate
> or so (and if its still remotely similar to its Alpha precursor that
> might make the Alpha folks happy too).

Sure would be nice if the infrastructure to support ProfileMe mode
appeared in the perf. events subsystem.  I am not going to go full out
to implement all the support needed for it because there are too few
users on Alpha to justify the effort.  But if we could score an
implementation of ProfileMe mode with minimal effort on the back of an
AMD implementation that would make us happy.

Cheers
Michael.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 6/7] perf, x86: Example code for AMD IBS
  2011-08-03  8:27         ` Michael Cree
@ 2011-08-03 17:56           ` Robert Richter
  0 siblings, 0 replies; 39+ messages in thread
From: Robert Richter @ 2011-08-03 17:56 UTC (permalink / raw)
  To: Michael Cree
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo, LKML, Lin Ming

On 03.08.11 04:27:08, Michael Cree wrote:
> On 02/08/11 22:37, Peter Zijlstra wrote:

> > Possibly, but having just looked at the example again I don't really see
> > it doing anything perf-record doesn't already do, so why does it deserve
> > to live at all?

I also got it running with perf-record with Lin's patch

 perf tool: Parse general/raw events from sysfs

with no other changes using:

 perf record -R -e ibs_fetch:r0 -a -c 10000 <command>

What is missing here is a parser for the IBS sampling data that is
very specific, which would be the perf-ibs-annotate tool you
recommended.

> > Initially I thought it was a record+report like example, some code
> > interpreting the 'mess' that comes out of IBS would be most appreciated
> > and I think we can even ship that as perf-ibs-report/perf-ibs-annotate
> > or so (and if its still remotely similar to its Alpha precursor that
> > might make the Alpha folks happy too).

Yes, the example I wrote could use perf-record as backend. The parser
for IBS could be part of perf-record, if we encode the pmu name
(ibs_op/ibs_fetch) in perf.data. Then we don't neeed a special tool
for IBS anymore.

> Sure would be nice if the infrastructure to support ProfileMe mode
> appeared in the perf. events subsystem.  I am not going to go full out
> to implement all the support needed for it because there are too few
> users on Alpha to justify the effort.  But if we could score an
> implementation of ProfileMe mode with minimal effort on the back of an
> AMD implementation that would make us happy.

ProfileMe looks similar to IBS, so the above would fit there too.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 4/7] perf, x86: Implement IBS interrupt handler
  2011-08-01 16:38         ` Don Zickus
@ 2011-08-05  9:55           ` Ingo Molnar
  2011-08-05 13:47             ` Don Zickus
  0 siblings, 1 reply; 39+ messages in thread
From: Ingo Molnar @ 2011-08-05  9:55 UTC (permalink / raw)
  To: Don Zickus; +Cc: Peter Zijlstra, Robert Richter, Arnaldo Carvalho de Melo, LKML


* Don Zickus <dzickus@redhat.com> wrote:

> On Mon, Aug 01, 2011 at 05:21:43PM +0200, Peter Zijlstra wrote:
> > On Mon, 2011-08-01 at 07:32 +0200, Robert Richter wrote:
> > > > So IBS cannot trigger the whole unknown NMI business? Wouldn't ibs_op
> > > > triggering while ibs_fetch just started latch the NMI line, the
> > > > in-progress NMI would handle both, and we then end up with a spare NMI?
> > > 
> > > Ok, I will run some excessive testing of this. If this turns out to be
> > > a problem I will change the code. Could this be on top of this patch
> > > set then? 
> > 
> > Sure, if you somehow end up duplicating some logic I think you 
> > know about this common.c file you proposed ;-)
> > 
> > I kinda lost the current state of affairs wrt spurious NMIs, I 
> > think there's still a few reports out there. I recently read 
> > through some Intel errata and found the Intel PMU can send double 
> > PMIs under some circumstances (just to keep life interesting).
> 
> I tried looking into but everytime I applied workarounds for Intel 
> errata I wound up with more unknown NMIs and proving that a couple 
> of them worked (with trace_printks) seemed elusive.  I got 
> frustrated and left it alone.
> 
> But yeah, Intel's perf has so many errata that I think if you kick 
> the box while running perf you can generate an unknown NMI.

Hence the only sane approach is to just tolerate spurious NMIs and 
only annoy the user with them if there's *way* too many of them or 
so.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 4/7] perf, x86: Implement IBS interrupt handler
  2011-08-05  9:55           ` Ingo Molnar
@ 2011-08-05 13:47             ` Don Zickus
  0 siblings, 0 replies; 39+ messages in thread
From: Don Zickus @ 2011-08-05 13:47 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Peter Zijlstra, Robert Richter, Arnaldo Carvalho de Melo, LKML

On Fri, Aug 05, 2011 at 11:55:19AM +0200, Ingo Molnar wrote:
> > I tried looking into but everytime I applied workarounds for Intel 
> > errata I wound up with more unknown NMIs and proving that a couple 
> > of them worked (with trace_printks) seemed elusive.  I got 
> > frustrated and left it alone.
> > 
> > But yeah, Intel's perf has so many errata that I think if you kick 
> > the box while running perf you can generate an unknown NMI.
> 
> Hence the only sane approach is to just tolerate spurious NMIs and 
> only annoy the user with them if there's *way* too many of them or 
> so.

That may work if we can determine if the user is running perf or not.  But
on older systems (like pre-Nehalem), sometimes the only way a system can
signal a platform error is through a single unknown NMI.  I would be
afraid we might lose one of those if we 'tolerate' unknown NMIs.

So far I have only noticed perf generating 'unknown NMIs' on high volume
usage (like multiple counters).  For the casual user it has been ok so
far.

Cheers,
Don

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 2/7] perf, x86: Implement IBS initialization
  2011-08-02 11:49   ` Peter Zijlstra
@ 2011-08-12 17:49     ` Robert Richter
  0 siblings, 0 replies; 39+ messages in thread
From: Robert Richter @ 2011-08-12 17:49 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On 02.08.11 07:49:01, Peter Zijlstra wrote:
> On Thu, 2011-07-28 at 15:46 +0200, Robert Richter wrote:
> > +/*
> > + * This runs only on the current cpu. We try to find an LVT offset and
> > + * setup the local APIC. For this we must disable preemption. On
> > + * success we initialize all nodes with this offset. This updates then
> > + * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
> > + * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
> > + * is using the new offset.
> > + */
> > +static int force_ibs_eilvt_setup(void)
> > +{
> > +       int offset;
> > +       int ret;
> > +
> > +       preempt_disable();
> > +       /* find the next free available EILVT entry, skip offset 0 */
> > +       for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
> > +               if (get_eilvt(offset))
> > +                       break;
> > +       }
> > +       preempt_enable();
> > +
> > +       if (offset == APIC_EILVT_NR_MAX) {
> > +               printk(KERN_DEBUG "No EILVT entry available\n");
> > +               return -EBUSY;
> > +       }
> > +
> > +       ret = setup_ibs_ctl(offset);
> > +       if (ret)
> > +               goto out;
> > +
> > +       if (!ibs_eilvt_valid()) {
> > +               ret = -EFAULT;
> > +               goto out;
> > +       }
> > +
> > +       pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset);
> > +       pr_err(FW_BUG "workaround enabled for IBS LVT offset\n");
> > +
> > +       return 0;
> > +out:
> > +       preempt_disable();
> > +       put_eilvt(offset);
> > +       preempt_enable();
> > +       return ret;
> > +} 
> 
> So I don't get any of that preempt_disable/enable crap in this patch,
> but the above is esp. confusing. How is that preempt_disable() at out:
> still valid? We could be running on an entirely different cpu from when
> we did get_eilvt at the start.

Yes, this code is strange due to the hardware mixing up per-cpu and
per-node configuration. This code did many iterations in the oprofile
driver.

get/put_eilvt() accesses APIC registers on one *cpu* and then globally
reserves/releases the offset by keeping it in eilvt_offsets[]. To
avoid switching cpus in the middle of an apic access the funcions are
protected with preempt_disable/enable.

setup_ibs_ctl() then sets up this offset on all *nodes*. During node
setup the pci cpu devices are accessed and thus may not be called with
preemption disabled.

The offset is later taken from the per-node msr IBS_CTL and used for a
per-core setup of the NMI vector on each cpu.

It is save to have get_eilvt and put_eilvt on different cpus as the
offset is kept globally.

I was thinking of moving preempt_disable/enable to get/put_eilvt, but
also wanted to avoid switching to a different cpu while searching for
an offset in the for-loop.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 4/7] perf, x86: Implement IBS interrupt handler
  2011-08-02 11:43   ` Peter Zijlstra
@ 2011-08-12 18:07     ` Robert Richter
  0 siblings, 0 replies; 39+ messages in thread
From: Robert Richter @ 2011-08-12 18:07 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On 02.08.11 07:43:28, Peter Zijlstra wrote:
> On Thu, 2011-07-28 at 15:46 +0200, Robert Richter wrote:
> > +       msr = hwc->config_base;
> > +       buf = buffer;
> > +       rdmsrl(msr++, *buf);
> > +       if (!(*buf++ & perf_ibs->valid_mask))
> > +               return 0;
> > +
> > +       perf_sample_data_init(&data, 0);
> > +       if (event->attr.sample_type & PERF_SAMPLE_RAW) {
> > +               for (i = 1; i < perf_ibs->reg_count; i++)
> > +                       rdmsrl(msr++, *buf++);
> > +               raw.size = sizeof(u32) + sizeof(u64) * perf_ibs->reg_count;
> > +               raw.data = buffer;
> > +               data.raw = &raw;
> > +       } 
> 
> OK, so this dumps a linear range of MSRs into the raw data buffer. The
> only 'problem' I have with that is that Fam12 will then also dump 103A
> IBS Control Register, which seems pointless.

I will implement a bit mask instead. We can then use
for_each_set_bit() etc.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 7/7] perf, x86: Implement 64 bit counter support for IBS
  2011-08-02 11:37   ` Peter Zijlstra
@ 2011-08-12 18:11     ` Robert Richter
  0 siblings, 0 replies; 39+ messages in thread
From: Robert Richter @ 2011-08-12 18:11 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On 02.08.11 07:37:20, Peter Zijlstra wrote:
> On Thu, 2011-07-28 at 15:46 +0200, Robert Richter wrote:
> > +       /*
> > +        * Emulate IbsOpCurCnt in MSRC001_1033 (IbsOpCtl), not
> > +        * supported in all cpus. As this triggered an interrupt, we
> > +        * set the current count to the max count.
> > +        */
> > +       config = buffer[0];
> > +       if (perf_ibs == &perf_ibs_op) {
> > +               config &= ~IBS_OP_CUR_CNT;
> > +               config |= (config & IBS_OP_MAX_CNT) << 36;
> > +       } 
> 
> Shouldn't that be conditional on capability muck?

It doesn't hurt actually, but true, will implement it right with the
capability check.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 0/7] perf, x86: Implement AMD IBS
  2011-08-02 11:29     ` Peter Zijlstra
@ 2011-08-12 19:43       ` Robert Richter
  2011-08-16 21:05         ` Robert Richter
  0 siblings, 1 reply; 39+ messages in thread
From: Robert Richter @ 2011-08-12 19:43 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On 02.08.11 07:29:55, Peter Zijlstra wrote:
> On Mon, 2011-08-01 at 07:21 +0200, Robert Richter wrote:
> 
> > IBS is supposed to be architectural spec'ed, meaning there are no
> > family checks. IBS features are detected using cpuid.
> > 
> > So the version of the raw sampling data format could be specified with
> > the u32 capability variable. I could put the caps value to the raw
> > sample data too right after the size field. An additional advantage
> > would be that 64 bit values are memory alligned then.
> 
> Seems like a good filler :-)
> 
> > The Branch Target Address register that has been added to newer cpus
> > could simply be extended to the raw data sample, the data would still
> > be backward compatible. Userland can detect it existence from the
> > sample size or (better) from the ibs caps.
> 
> Caps would be better.

Will take caps here.

While thinking about this I realized we have to encode the pmu type
actually in the sample, because there could be one sampling file with
multiple samples from different pmus. So attr.type must be encoded and
additionaly also its mapping to the name for dynamically added pmus.
Hmm?

Somthing like this content:

 perf.data header (once): "ibs_op" -> type = 7
 perf.data sample (each): type = 7

Maybe we use the reserved field of PERF_SAMPLE_CPU for it?

> 
> > Though it is treated architectural, it isn't in the AMD64 Architecture
> > Programmer's Manual (APM). The 10h BKDG is a good source, but extended
> > IBS features are described in the family 12h bkdg (same as for 15h)
> > and the capabilities are in the cpuid spec:
> > 
> >  http://support.amd.com/us/Processor_TechDocs/41131.pdf
> >  http://support.amd.com/us/Processor_TechDocs/25481.pdf 
> 
> Right, so comparing Fam10 to Fam12,
> 
> + IbsOpCtl.19:58
> + IbsOpData.38
> - IbsOpData2.4:5

Yeah, good catch. This is due to the different northbridge
implementations. I will ask the hw guys how to handle this.

> + IbsOpData3.19

This is already in Fam10h RefC. There is also no caps bit, but should
be always clear on systems without 1G pages.

-Robert

> + IbsBrTarget
> 
> Curious that they removed a few bits, those don't seem to be enumerated
> in the IBS capability field either.
> 

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/7] perf, x86: Implement IBS event configuration
  2011-08-02 11:35   ` Peter Zijlstra
@ 2011-08-12 19:51     ` Robert Richter
  0 siblings, 0 replies; 39+ messages in thread
From: Robert Richter @ 2011-08-12 19:51 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On 02.08.11 07:35:33, Peter Zijlstra wrote:
> On Thu, 2011-07-28 at 15:46 +0200, Robert Richter wrote:
> > +static struct perf_ibs perf_ibs_fetch = {
> > +       .pmu = {
> > +               .event_init     = perf_ibs_init,
> > +               .add            = perf_ibs_add,
> > +               .del            = perf_ibs_del,
> > +       },
> > +       .msr                    = MSR_AMD64_IBSFETCHCTL,
> > +       .config_mask            = IBS_FETCH_CONFIG_MASK,
> > +       .cnt_mask               = IBS_FETCH_MAX_CNT,
> > +       .enable_mask            = IBS_FETCH_ENABLE,
> > +};
> > +
> > +static struct perf_ibs perf_ibs_op = {
> > +       .pmu = {
> > +               .event_init     = perf_ibs_init,
> > +               .add            = perf_ibs_add,
> > +               .del            = perf_ibs_del,
> > +       },
> > +       .msr                    = MSR_AMD64_IBSOPCTL,
> > +       .config_mask            = IBS_OP_CONFIG_MASK,
> > +       .cnt_mask               = IBS_OP_MAX_CNT,
> > +       .enable_mask            = IBS_OP_ENABLE,
> >  };
> >   
> 
> It it intentional that you map the IBS things to the hw task_context ?

I didn't get you here.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 0/7] perf, x86: Implement AMD IBS
  2011-08-12 19:43       ` Robert Richter
@ 2011-08-16 21:05         ` Robert Richter
  0 siblings, 0 replies; 39+ messages in thread
From: Robert Richter @ 2011-08-16 21:05 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Arnaldo Carvalho de Melo, LKML

On 12.08.11 21:43:44, Robert Richter wrote:
> On 02.08.11 07:29:55, Peter Zijlstra wrote:
> > Right, so comparing Fam10 to Fam12,
> > 
> > + IbsOpCtl.19:58
> > + IbsOpData.38
> > - IbsOpData2.4:5
> 
> Yeah, good catch. This is due to the different northbridge
> implementations. I will ask the hw guys how to handle this.

Both bits are meant to be architectural and defined the same on all
cpus. On families 12h and 14h they always return 0. The documentation
will be updated accordingly.

Thanks,

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 39+ messages in thread

end of thread, other threads:[~2011-08-16 21:05 UTC | newest]

Thread overview: 39+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-07-28 13:46 [PATCH 0/7] perf, x86: Implement AMD IBS Robert Richter
2011-07-28 13:46 ` [PATCH 1/7] perf, x86: share IBS macros between perf and oprofile Robert Richter
2011-07-28 13:46 ` [PATCH 2/7] perf, x86: Implement IBS initialization Robert Richter
2011-07-29 16:58   ` Peter Zijlstra
2011-08-01  5:27     ` Robert Richter
2011-08-02 11:49   ` Peter Zijlstra
2011-08-12 17:49     ` Robert Richter
2011-07-28 13:46 ` [PATCH 3/7] perf, x86: Implement IBS event configuration Robert Richter
2011-08-02 11:35   ` Peter Zijlstra
2011-08-12 19:51     ` Robert Richter
2011-07-28 13:46 ` [PATCH 4/7] perf, x86: Implement IBS interrupt handler Robert Richter
2011-07-29 16:58   ` Peter Zijlstra
2011-08-01  5:32     ` Robert Richter
2011-08-01 15:21       ` Peter Zijlstra
2011-08-01 16:38         ` Don Zickus
2011-08-05  9:55           ` Ingo Molnar
2011-08-05 13:47             ` Don Zickus
2011-08-02 11:43   ` Peter Zijlstra
2011-08-12 18:07     ` Robert Richter
2011-07-28 13:46 ` [PATCH 5/7] perf, x86: Implement IBS pmu control ops Robert Richter
2011-07-28 13:46 ` [PATCH 6/7] perf, x86: Example code for AMD IBS Robert Richter
2011-07-29 16:58   ` Peter Zijlstra
2011-08-01  5:50     ` Robert Richter
2011-08-02 10:37       ` Peter Zijlstra
2011-08-03  8:27         ` Michael Cree
2011-08-03 17:56           ` Robert Richter
2011-07-28 13:46 ` [PATCH 7/7] perf, x86: Implement 64 bit counter support for IBS Robert Richter
2011-07-29 16:58   ` Peter Zijlstra
2011-07-29 17:02     ` Peter Zijlstra
2011-08-01  5:55       ` Robert Richter
2011-07-29 17:01   ` Peter Zijlstra
2011-08-01  6:13     ` Robert Richter
2011-08-02 11:37   ` Peter Zijlstra
2011-08-12 18:11     ` Robert Richter
2011-07-29 17:07 ` [PATCH 0/7] perf, x86: Implement AMD IBS Peter Zijlstra
2011-08-01  5:21   ` Robert Richter
2011-08-02 11:29     ` Peter Zijlstra
2011-08-12 19:43       ` Robert Richter
2011-08-16 21:05         ` Robert Richter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.