All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/6] perf/x86: Move Nehalem PEBS code to flag
@ 2017-06-05 22:48 Andi Kleen
  2017-06-05 22:48 ` [PATCH 2/6] perf/x86: Fix data source decoding for Skylake Andi Kleen
                   ` (4 more replies)
  0 siblings, 5 replies; 11+ messages in thread
From: Andi Kleen @ 2017-06-05 22:48 UTC (permalink / raw)
  To: peterz, acme; +Cc: linux-kernel, jolsa, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Minor cleanup: use an explicit x86_pmu flag to handle the
missing Lock / TLB information on Nehalem, instead of always
checking the model number for each PEBS sample.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/events/intel/core.c | 1 +
 arch/x86/events/intel/ds.c   | 5 +----
 arch/x86/events/perf_event.h | 3 ++-
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a6d91d4e37a1..59933105d0ea 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3712,6 +3712,7 @@ __init int intel_pmu_init(void)
 
 		intel_pmu_pebs_data_source_nhm();
 		x86_add_quirk(intel_nehalem_quirk);
+		x86_pmu.pebs_no_tlb = 1;
 
 		pr_cont("Nehalem events, ");
 		break;
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index c6d23ffe422d..7732999f5e2a 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -149,8 +149,6 @@ static u64 load_latency_data(u64 status)
 {
 	union intel_x86_pebs_dse dse;
 	u64 val;
-	int model = boot_cpu_data.x86_model;
-	int fam = boot_cpu_data.x86;
 
 	dse.val = status;
 
@@ -162,8 +160,7 @@ static u64 load_latency_data(u64 status)
 	/*
 	 * Nehalem models do not support TLB, Lock infos
 	 */
-	if (fam == 0x6 && (model == 26 || model == 30
-	    || model == 31 || model == 46)) {
+	if (x86_pmu.pebs_no_tlb) {
 		val |= P(TLB, NA) | P(LOCK, NA);
 		return val;
 	}
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index be3d36254040..6f5461fe582b 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -588,7 +588,8 @@ struct x86_pmu {
 			pebs		:1,
 			pebs_active	:1,
 			pebs_broken	:1,
-			pebs_prec_dist	:1;
+			pebs_prec_dist	:1,
+			pebs_no_tlb	:1;
 	int		pebs_record_size;
 	int		pebs_buffer_size;
 	void		(*drain_pebs)(struct pt_regs *regs);
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/6] perf/x86: Fix data source decoding for Skylake
  2017-06-05 22:48 [PATCH 1/6] perf/x86: Move Nehalem PEBS code to flag Andi Kleen
@ 2017-06-05 22:48 ` Andi Kleen
  2017-06-06 10:08   ` Peter Zijlstra
  2017-06-05 22:48 ` [PATCH 3/6] perf, tools: Add support for printing new mem_info encodings Andi Kleen
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 11+ messages in thread
From: Andi Kleen @ 2017-06-05 22:48 UTC (permalink / raw)
  To: peterz, acme; +Cc: linux-kernel, jolsa, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Skylake changed the encoding of the PEBS data source field.
Some combinations are not available anymore, but some new cases
e.g. for L4 cache hit are added.

Fix up the conversion table for Skylake, similar as had been done
for Nehalem.

To properly describe it in the abstracted perf format I had to add
some new bits. Unfortunately the existing fields were full, so
this required adding eXtension fields for mem_lvl and snoop
into existing reserved space.

The new bits are: generic REMOTE (to combine with N/A level),
L4 (L4 EDRAM), and for snoops a bit for the FORWARD
state.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/events/intel/core.c    |  1 +
 arch/x86/events/intel/ds.c      |  9 +++++++++
 arch/x86/events/perf_event.h    |  2 ++
 include/uapi/linux/perf_event.h | 19 +++++++++++++++++--
 4 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 59933105d0ea..b54d9022d016 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3989,6 +3989,7 @@ __init int intel_pmu_init(void)
 						  skl_format_attr);
 		WARN_ON(!x86_pmu.format_attrs);
 		x86_pmu.cpu_events = hsw_events_attrs;
+		intel_pmu_pebs_data_source_skl();
 		pr_cont("Skylake events, ");
 		break;
 
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 7732999f5e2a..c7f2c71d74bd 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -79,6 +79,15 @@ void __init intel_pmu_pebs_data_source_nhm(void)
 	pebs_data_source[0x07] = OP_LH | P(LVL, L3)  | P(SNOOP, HITM);
 }
 
+void __init intel_pmu_pebs_data_source_skl(void)
+{
+	pebs_data_source[0x08] = OP_LH | P(LVLX, L4) | P(SNOOP, HIT);
+	pebs_data_source[0x09] = OP_LH | P(LVLX, L4) | P(LVLX, REMOTE) | P(SNOOP, HIT);
+	pebs_data_source[0x0b] = OP_LH | P(LVLX, RAM) | P(LVLX, REMOTE) | P(SNOOP, NONE);
+	pebs_data_source[0x0c] = OP_LH | P(LVL, NA) | P(LVLX, REMOTE) | P(SNOOPX, FWD);
+	pebs_data_source[0x0d] = OP_LH | P(LVL, NA) | P(LVLX, REMOTE) | P(SNOOP, HITM);
+}
+
 static u64 precise_store_data(u64 status)
 {
 	union intel_x86_pebs_dse dse;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 6f5461fe582b..5298debaeb32 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -943,6 +943,8 @@ void intel_pmu_lbr_init_knl(void);
 
 void intel_pmu_pebs_data_source_nhm(void);
 
+void intel_pmu_pebs_data_source_skl(void);
+
 int intel_pmu_setup_lbr_filter(struct perf_event *event);
 
 void intel_pt_interrupt(void);
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index b1c0b187acfe..4b5deeada34b 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -931,14 +931,18 @@ union perf_mem_data_src {
 			mem_snoop:5,	/* snoop mode */
 			mem_lock:2,	/* lock instr */
 			mem_dtlb:7,	/* tlb access */
-			mem_rsvd:31;
+			mem_lvlx:8,	/* memory hierarchy level, ext */
+			mem_snoopx:2,	/* snoop mode, ext */
+			mem_rsvd:21;
 	};
 };
 #elif defined(__BIG_ENDIAN_BITFIELD)
 union perf_mem_data_src {
 	__u64 val;
 	struct {
-		__u64	mem_rsvd:31,
+		__u64	mem_rsvd:21,
+			mem_snoopx:2,	/* snoop mode, ext */
+			mem_lvlx:8,	/* memory hierarchy level, ext */
 			mem_dtlb:7,	/* tlb access */
 			mem_lock:2,	/* lock instr */
 			mem_snoop:5,	/* snoop mode */
@@ -975,6 +979,13 @@ union perf_mem_data_src {
 #define PERF_MEM_LVL_UNC	0x2000 /* Uncached memory */
 #define PERF_MEM_LVL_SHIFT	5
 
+#define PERF_MEM_LVLX_REMOTE    0x01 /* Remote */
+#define PERF_MEM_LVLX_L4	0x02 /* L4 */
+#define PERF_MEM_LVLX_RAM	0x04 /* Ram */
+/* 5 free */
+
+#define PERF_MEM_LVLX_SHIFT	33
+
 /* snoop mode */
 #define PERF_MEM_SNOOP_NA	0x01 /* not available */
 #define PERF_MEM_SNOOP_NONE	0x02 /* no snoop */
@@ -983,6 +994,10 @@ union perf_mem_data_src {
 #define PERF_MEM_SNOOP_HITM	0x10 /* snoop hit modified */
 #define PERF_MEM_SNOOP_SHIFT	19
 
+#define PERF_MEM_SNOOPX_FWD	0x01 /* forward */
+/* 1 free */
+#define PERF_MEM_SNOOPX_SHIFT	41
+
 /* locked instruction */
 #define PERF_MEM_LOCK_NA	0x01 /* not available */
 #define PERF_MEM_LOCK_LOCKED	0x02 /* locked transaction */
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/6] perf, tools: Add support for printing new mem_info encodings
  2017-06-05 22:48 [PATCH 1/6] perf/x86: Move Nehalem PEBS code to flag Andi Kleen
  2017-06-05 22:48 ` [PATCH 2/6] perf/x86: Fix data source decoding for Skylake Andi Kleen
@ 2017-06-05 22:48 ` Andi Kleen
  2017-06-05 22:48 ` [PATCH 4/6] perf/x86: Add support for PEBS sampling persistent RAM on Skylake Andi Kleen
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 11+ messages in thread
From: Andi Kleen @ 2017-06-05 22:48 UTC (permalink / raw)
  To: peterz, acme; +Cc: linux-kernel, jolsa, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Add decoding for the new lvlx and snoopx field meminfo field
added earlier to the kernel so that "perf mem report" and
other tools can print it properly.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/include/uapi/linux/perf_event.h | 19 ++++++++++++++--
 tools/perf/util/mem-events.c          | 41 ++++++++++++++++++++++++++++++++---
 2 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index b1c0b187acfe..4b5deeada34b 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -931,14 +931,18 @@ union perf_mem_data_src {
 			mem_snoop:5,	/* snoop mode */
 			mem_lock:2,	/* lock instr */
 			mem_dtlb:7,	/* tlb access */
-			mem_rsvd:31;
+			mem_lvlx:8,	/* memory hierarchy level, ext */
+			mem_snoopx:2,	/* snoop mode, ext */
+			mem_rsvd:21;
 	};
 };
 #elif defined(__BIG_ENDIAN_BITFIELD)
 union perf_mem_data_src {
 	__u64 val;
 	struct {
-		__u64	mem_rsvd:31,
+		__u64	mem_rsvd:21,
+			mem_snoopx:2,	/* snoop mode, ext */
+			mem_lvlx:8,	/* memory hierarchy level, ext */
 			mem_dtlb:7,	/* tlb access */
 			mem_lock:2,	/* lock instr */
 			mem_snoop:5,	/* snoop mode */
@@ -975,6 +979,13 @@ union perf_mem_data_src {
 #define PERF_MEM_LVL_UNC	0x2000 /* Uncached memory */
 #define PERF_MEM_LVL_SHIFT	5
 
+#define PERF_MEM_LVLX_REMOTE    0x01 /* Remote */
+#define PERF_MEM_LVLX_L4	0x02 /* L4 */
+#define PERF_MEM_LVLX_RAM	0x04 /* Ram */
+/* 5 free */
+
+#define PERF_MEM_LVLX_SHIFT	33
+
 /* snoop mode */
 #define PERF_MEM_SNOOP_NA	0x01 /* not available */
 #define PERF_MEM_SNOOP_NONE	0x02 /* no snoop */
@@ -983,6 +994,10 @@ union perf_mem_data_src {
 #define PERF_MEM_SNOOP_HITM	0x10 /* snoop hit modified */
 #define PERF_MEM_SNOOP_SHIFT	19
 
+#define PERF_MEM_SNOOPX_FWD	0x01 /* forward */
+/* 1 free */
+#define PERF_MEM_SNOOPX_SHIFT	41
+
 /* locked instruction */
 #define PERF_MEM_LOCK_NA	0x01 /* not available */
 #define PERF_MEM_LOCK_LOCKED	0x02 /* locked transaction */
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 06f5a3a4295c..0ebce5be5724 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -166,11 +166,18 @@ static const char * const mem_lvl[] = {
 	"Uncached",
 };
 
+static const char * const mem_lvlx[] = {
+	NULL,
+	"L4",
+	"RAM",
+};
+
 int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
 {
 	size_t i, l = 0;
 	u64 m =  PERF_MEM_LVL_NA;
 	u64 hit, miss;
+	int printed;
 
 	if (mem_info)
 		m  = mem_info->data_src.mem_lvl;
@@ -184,17 +191,37 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
 	/* already taken care of */
 	m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS);
 
+	if (mem_info &&
+	     (mem_info->data_src.mem_lvlx & PERF_MEM_LVLX_REMOTE))
+		l += scnprintf(out + l, sz - l, "Remote ");
+
+	printed = 0;
 	for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
 		if (!(m & 0x1))
 			continue;
-		if (l) {
+		if (printed++) {
 			strcat(out, " or ");
 			l += 4;
 		}
 		l += scnprintf(out + l, sz - l, mem_lvl[i]);
 	}
-	if (*out == '\0')
-		l += scnprintf(out, sz - l, "N/A");
+
+	m = 0;
+	if (mem_info)
+		m = mem_info->data_src.mem_lvlx;
+
+	for (i = 0; m && i < ARRAY_SIZE(mem_lvlx); i++, m >>= 1) {
+		if (!(m & 0x1) || !mem_lvlx[i])
+			continue;
+		if (printed++) {
+			strcat(out, " or ");
+			l += 4;
+		}
+		l += scnprintf(out + l, sz - l, mem_lvlx[i]);
+	}
+
+	if (l == 0)
+		l += scnprintf(out + l, sz - l, "N/A");
 	if (hit)
 		l += scnprintf(out + l, sz - l, " hit");
 	if (miss)
@@ -231,6 +258,14 @@ int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
 		}
 		l += scnprintf(out + l, sz - l, snoop_access[i]);
 	}
+	if (mem_info &&
+	     (mem_info->data_src.mem_snoopx & PERF_MEM_SNOOPX_FWD)) {
+		if (l) {
+			strcat(out, " or ");
+			l += 4;
+		}
+		l += scnprintf(out + l, sz - l, "Fwd");
+	}
 
 	if (*out == '\0')
 		l += scnprintf(out, sz - l, "N/A");
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 4/6] perf/x86: Add support for PEBS sampling persistent RAM on Skylake
  2017-06-05 22:48 [PATCH 1/6] perf/x86: Move Nehalem PEBS code to flag Andi Kleen
  2017-06-05 22:48 ` [PATCH 2/6] perf/x86: Fix data source decoding for Skylake Andi Kleen
  2017-06-05 22:48 ` [PATCH 3/6] perf, tools: Add support for printing new mem_info encodings Andi Kleen
@ 2017-06-05 22:48 ` Andi Kleen
  2017-06-05 22:48 ` [PATCH 5/6] perf, tools: Support persistent memory encoding Andi Kleen
  2017-06-05 22:48 ` [PATCH 6/6] perf, tools: Add test cases for new data source encoding Andi Kleen
  4 siblings, 0 replies; 11+ messages in thread
From: Andi Kleen @ 2017-06-05 22:48 UTC (permalink / raw)
  To: peterz, acme; +Cc: linux-kernel, jolsa, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

On Skylake server the L4 encoding in the PEBS data source actually
means persistent memory. Add a new perf encoding for this case,
and report it on Skylake Server.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/events/intel/core.c    | 3 ++-
 arch/x86/events/intel/ds.c      | 8 +++++---
 arch/x86/events/perf_event.h    | 2 +-
 include/uapi/linux/perf_event.h | 3 ++-
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index b54d9022d016..f341126766cb 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3989,7 +3989,8 @@ __init int intel_pmu_init(void)
 						  skl_format_attr);
 		WARN_ON(!x86_pmu.format_attrs);
 		x86_pmu.cpu_events = hsw_events_attrs;
-		intel_pmu_pebs_data_source_skl();
+		intel_pmu_pebs_data_source_skl(
+			boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
 		pr_cont("Skylake events, ");
 		break;
 
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index c7f2c71d74bd..881784ed0f0b 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -79,10 +79,12 @@ void __init intel_pmu_pebs_data_source_nhm(void)
 	pebs_data_source[0x07] = OP_LH | P(LVL, L3)  | P(SNOOP, HITM);
 }
 
-void __init intel_pmu_pebs_data_source_skl(void)
+void __init intel_pmu_pebs_data_source_skl(bool pmem)
 {
-	pebs_data_source[0x08] = OP_LH | P(LVLX, L4) | P(SNOOP, HIT);
-	pebs_data_source[0x09] = OP_LH | P(LVLX, L4) | P(LVLX, REMOTE) | P(SNOOP, HIT);
+	u64 pmem_or_l4 = pmem ? P(LVLX, PMEM) : P(LVLX, L4);
+
+	pebs_data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
+	pebs_data_source[0x09] = OP_LH | pmem_or_l4 | P(LVLX, REMOTE) | P(SNOOP, HIT);
 	pebs_data_source[0x0b] = OP_LH | P(LVLX, RAM) | P(LVLX, REMOTE) | P(SNOOP, NONE);
 	pebs_data_source[0x0c] = OP_LH | P(LVL, NA) | P(LVLX, REMOTE) | P(SNOOPX, FWD);
 	pebs_data_source[0x0d] = OP_LH | P(LVL, NA) | P(LVLX, REMOTE) | P(SNOOP, HITM);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 5298debaeb32..9b05a5a04e0b 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -943,7 +943,7 @@ void intel_pmu_lbr_init_knl(void);
 
 void intel_pmu_pebs_data_source_nhm(void);
 
-void intel_pmu_pebs_data_source_skl(void);
+void intel_pmu_pebs_data_source_skl(bool pmem);
 
 int intel_pmu_setup_lbr_filter(struct perf_event *event);
 
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 4b5deeada34b..8d2c7c4ec881 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -982,7 +982,8 @@ union perf_mem_data_src {
 #define PERF_MEM_LVLX_REMOTE    0x01 /* Remote */
 #define PERF_MEM_LVLX_L4	0x02 /* L4 */
 #define PERF_MEM_LVLX_RAM	0x04 /* Ram */
-/* 5 free */
+#define PERF_MEM_LVLX_PMEM	0x08 /* Persistent Memory */
+/* 4 free */
 
 #define PERF_MEM_LVLX_SHIFT	33
 
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 5/6] perf, tools: Support persistent memory encoding
  2017-06-05 22:48 [PATCH 1/6] perf/x86: Move Nehalem PEBS code to flag Andi Kleen
                   ` (2 preceding siblings ...)
  2017-06-05 22:48 ` [PATCH 4/6] perf/x86: Add support for PEBS sampling persistent RAM on Skylake Andi Kleen
@ 2017-06-05 22:48 ` Andi Kleen
  2017-06-05 22:48 ` [PATCH 6/6] perf, tools: Add test cases for new data source encoding Andi Kleen
  4 siblings, 0 replies; 11+ messages in thread
From: Andi Kleen @ 2017-06-05 22:48 UTC (permalink / raw)
  To: peterz, acme; +Cc: linux-kernel, jolsa, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Teach the perf user tools to report the recently added
persistent memory encoding for sample data sources.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/include/uapi/linux/perf_event.h | 3 ++-
 tools/perf/util/mem-events.c          | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 4b5deeada34b..8d2c7c4ec881 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -982,7 +982,8 @@ union perf_mem_data_src {
 #define PERF_MEM_LVLX_REMOTE    0x01 /* Remote */
 #define PERF_MEM_LVLX_L4	0x02 /* L4 */
 #define PERF_MEM_LVLX_RAM	0x04 /* Ram */
-/* 5 free */
+#define PERF_MEM_LVLX_PMEM	0x08 /* Persistent Memory */
+/* 4 free */
 
 #define PERF_MEM_LVLX_SHIFT	33
 
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 0ebce5be5724..97d39db8c0a5 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -170,6 +170,7 @@ static const char * const mem_lvlx[] = {
 	NULL,
 	"L4",
 	"RAM",
+	"PMEM",
 };
 
 int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 6/6] perf, tools: Add test cases for new data source encoding
  2017-06-05 22:48 [PATCH 1/6] perf/x86: Move Nehalem PEBS code to flag Andi Kleen
                   ` (3 preceding siblings ...)
  2017-06-05 22:48 ` [PATCH 5/6] perf, tools: Support persistent memory encoding Andi Kleen
@ 2017-06-05 22:48 ` Andi Kleen
  4 siblings, 0 replies; 11+ messages in thread
From: Andi Kleen @ 2017-06-05 22:48 UTC (permalink / raw)
  To: peterz, acme; +Cc: linux-kernel, jolsa, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Add some simple tests to perf test to test data source printing.

v2: Make the tests actually checked for the correct name of Forward
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/perf/tests/Build          |  1 +
 tools/perf/tests/builtin-test.c |  4 ++++
 tools/perf/tests/mem.c          | 42 +++++++++++++++++++++++++++++++++++++++++
 tools/perf/tests/tests.h        |  1 +
 4 files changed, 48 insertions(+)
 create mode 100644 tools/perf/tests/mem.c

diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index af58ebc243ef..540409613b73 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -34,6 +34,7 @@ perf-y += thread-map.o
 perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o llvm-src-relocation.o
 perf-y += bpf.o
 perf-y += topology.o
+perf-y += mem.o
 perf-y += cpumap.o
 perf-y += stat.o
 perf-y += event_update.o
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 9e08d297f1a9..57d355445c4f 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -43,6 +43,10 @@ static struct test generic_tests[] = {
 		.func = test__basic_mmap,
 	},
 	{
+		.desc = "Test data source output",
+		.func = test__mem,
+	},
+	{
 		.desc = "Parse event definition strings",
 		.func = test__parse_events,
 	},
diff --git a/tools/perf/tests/mem.c b/tools/perf/tests/mem.c
new file mode 100644
index 000000000000..fb18d0b8fc50
--- /dev/null
+++ b/tools/perf/tests/mem.c
@@ -0,0 +1,42 @@
+#include "util/mem-events.h"
+#include "util/symbol.h"
+#include "linux/perf_event.h"
+#include "util/debug.h"
+#include "tests.h"
+#include <string.h>
+
+static int check(union perf_mem_data_src data_src,
+		  const char *string)
+{
+	char out[100];
+	char failure[100];
+	struct mem_info mi = { .data_src = data_src };
+
+	int n;
+
+	n = perf_mem__snp_scnprintf(out, sizeof out, &mi);
+	n += perf_mem__lvl_scnprintf(out + n, sizeof out - n, &mi);
+	snprintf(failure, sizeof failure, "unexpected %s", out);
+	TEST_ASSERT_VAL(failure, !strcmp(string, out));
+	return 0;
+}
+
+int test__mem(int subtest __maybe_unused)
+{
+	int ret = 0;
+
+	ret |= check(((union perf_mem_data_src) {
+				.mem_lvl = PERF_MEM_LVL_HIT,
+				.mem_lvlx = PERF_MEM_LVLX_L4 }), "N/AL4 hit");
+
+	ret |= check(((union perf_mem_data_src) {
+				.mem_lvl = PERF_MEM_LVL_MISS,
+				.mem_lvlx = PERF_MEM_LVLX_PMEM }), "N/APMEM miss");
+
+	ret |= check(((union perf_mem_data_src) {
+				.mem_snoopx = PERF_MEM_SNOOPX_FWD,
+				.mem_lvl = PERF_MEM_LVL_MISS,
+				.mem_lvlx = PERF_MEM_LVLX_RAM }), "FwdRAM miss");
+
+	return ret;
+}
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 631859629403..3b3017ee91d6 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -56,6 +56,7 @@ int test__python_use(int subtest);
 int test__bp_signal(int subtest);
 int test__bp_signal_overflow(int subtest);
 int test__task_exit(int subtest);
+int test__mem(int subtest);
 int test__sw_clock_freq(int subtest);
 int test__code_reading(int subtest);
 int test__sample_parsing(int subtest);
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/6] perf/x86: Fix data source decoding for Skylake
  2017-06-05 22:48 ` [PATCH 2/6] perf/x86: Fix data source decoding for Skylake Andi Kleen
@ 2017-06-06 10:08   ` Peter Zijlstra
  2017-06-06 13:51     ` Andi Kleen
  0 siblings, 1 reply; 11+ messages in thread
From: Peter Zijlstra @ 2017-06-06 10:08 UTC (permalink / raw)
  To: Andi Kleen; +Cc: acme, linux-kernel, jolsa, Andi Kleen, Stephane Eranian

On Mon, Jun 05, 2017 at 03:48:34PM -0700, Andi Kleen wrote:

> +void __init intel_pmu_pebs_data_source_skl(void)
> +{
> +	pebs_data_source[0x08] = OP_LH | P(LVLX, L4) | P(SNOOP, HIT);
> +	pebs_data_source[0x09] = OP_LH | P(LVLX, L4) | P(LVLX, REMOTE) | P(SNOOP, HIT);
> +	pebs_data_source[0x0b] = OP_LH | P(LVLX, RAM) | P(LVLX, REMOTE) | P(SNOOP, NONE);
> +	pebs_data_source[0x0c] = OP_LH | P(LVL, NA) | P(LVLX, REMOTE) | P(SNOOPX, FWD);
> +	pebs_data_source[0x0d] = OP_LH | P(LVL, NA) | P(LVLX, REMOTE) | P(SNOOP, HITM);
> +}

Not too happy about that..

  P(LVLX, L4) | P(LVLX, REMOTE)

reads like something that should be PERF_MEM_LVL_REM_CCE1 or something
and

  P(LVLX, RAM) | P(LVLX, REMOTE)

Should certainly be

  PERF_MEM_LVL_REM_RAM1


This new generic 'REMOTE' has too much overlap with the existing things.

I realize the pickle you're in, but urgh..

Stephane?


> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index b1c0b187acfe..4b5deeada34b 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -931,14 +931,18 @@ union perf_mem_data_src {
>  			mem_snoop:5,	/* snoop mode */
>  			mem_lock:2,	/* lock instr */
>  			mem_dtlb:7,	/* tlb access */
> -			mem_rsvd:31;
> +			mem_lvlx:8,	/* memory hierarchy level, ext */
> +			mem_snoopx:2,	/* snoop mode, ext */
> +			mem_rsvd:21;
>  	};
>  };
>  #elif defined(__BIG_ENDIAN_BITFIELD)
>  union perf_mem_data_src {
>  	__u64 val;
>  	struct {
> -		__u64	mem_rsvd:31,
> +		__u64	mem_rsvd:21,
> +			mem_snoopx:2,	/* snoop mode, ext */
> +			mem_lvlx:8,	/* memory hierarchy level, ext */
>  			mem_dtlb:7,	/* tlb access */
>  			mem_lock:2,	/* lock instr */
>  			mem_snoop:5,	/* snoop mode */
> @@ -975,6 +979,13 @@ union perf_mem_data_src {
>  #define PERF_MEM_LVL_UNC	0x2000 /* Uncached memory */
>  #define PERF_MEM_LVL_SHIFT	5
>  
> +#define PERF_MEM_LVLX_REMOTE    0x01 /* Remote */
> +#define PERF_MEM_LVLX_L4	0x02 /* L4 */
> +#define PERF_MEM_LVLX_RAM	0x04 /* Ram */
> +/* 5 free */
> +
> +#define PERF_MEM_LVLX_SHIFT	33
> +
>  /* snoop mode */
>  #define PERF_MEM_SNOOP_NA	0x01 /* not available */
>  #define PERF_MEM_SNOOP_NONE	0x02 /* no snoop */
> @@ -983,6 +994,10 @@ union perf_mem_data_src {
>  #define PERF_MEM_SNOOP_HITM	0x10 /* snoop hit modified */
>  #define PERF_MEM_SNOOP_SHIFT	19
>  
> +#define PERF_MEM_SNOOPX_FWD	0x01 /* forward */
> +/* 1 free */
> +#define PERF_MEM_SNOOPX_SHIFT	41
> +
>  /* locked instruction */
>  #define PERF_MEM_LOCK_NA	0x01 /* not available */
>  #define PERF_MEM_LOCK_LOCKED	0x02 /* locked transaction */
> -- 
> 2.9.4
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/6] perf/x86: Fix data source decoding for Skylake
  2017-06-06 10:08   ` Peter Zijlstra
@ 2017-06-06 13:51     ` Andi Kleen
  2017-06-06 16:21       ` Peter Zijlstra
  0 siblings, 1 reply; 11+ messages in thread
From: Andi Kleen @ 2017-06-06 13:51 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andi Kleen, acme, linux-kernel, jolsa, Andi Kleen, Stephane Eranian

> Not too happy about that..
> 
>   P(LVLX, L4) | P(LVLX, REMOTE)
> 
> reads like something that should be PERF_MEM_LVL_REM_CCE1 or something

CCE1? You mean L4?

The two bits seem cleaner to me than enumerating all cases.  But ok.

REM_L4


> This new generic 'REMOTE' has too much overlap with the existing things.

So you want a REM_NA ?

-Andi

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/6] perf/x86: Fix data source decoding for Skylake
  2017-06-06 13:51     ` Andi Kleen
@ 2017-06-06 16:21       ` Peter Zijlstra
  2017-06-06 17:12         ` Andi Kleen
  0 siblings, 1 reply; 11+ messages in thread
From: Peter Zijlstra @ 2017-06-06 16:21 UTC (permalink / raw)
  To: Andi Kleen; +Cc: acme, linux-kernel, jolsa, Andi Kleen, Stephane Eranian

On Tue, Jun 06, 2017 at 06:51:20AM -0700, Andi Kleen wrote:
> > Not too happy about that..
> > 
> >   P(LVLX, L4) | P(LVLX, REMOTE)
> > 
> > reads like something that should be PERF_MEM_LVL_REM_CCE1 or something
> 
> CCE1? You mean L4?

#define PERF_MEM_LVL_REM_CCE1   0x400 /* Remote Cache (1 hop) */

It says 'cache' which is irrespective of level.

> The two bits seem cleaner to me than enumerating all cases.  But ok.

I tend to agree that a separate remote,distance,type fields would have
been nicer, but we seem to be stuck with this REM_* crud..

> > This new generic 'REMOTE' has too much overlap with the existing things.
> 
> So you want a REM_NA ?

Not sure... What's the point of a REM_NA vs regular NA ? "'something'
happened 'not here'" vs "'something' happened".


I hope Stephane has better ideas, he seems to be the one that introduced
this in the first place.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/6] perf/x86: Fix data source decoding for Skylake
  2017-06-06 16:21       ` Peter Zijlstra
@ 2017-06-06 17:12         ` Andi Kleen
  0 siblings, 0 replies; 11+ messages in thread
From: Andi Kleen @ 2017-06-06 17:12 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Andi Kleen, acme, linux-kernel, jolsa, Stephane Eranian

On Tue, Jun 06, 2017 at 06:21:08PM +0200, Peter Zijlstra wrote:
> On Tue, Jun 06, 2017 at 06:51:20AM -0700, Andi Kleen wrote:
> > > Not too happy about that..
> > > 
> > >   P(LVLX, L4) | P(LVLX, REMOTE)
> > > 
> > > reads like something that should be PERF_MEM_LVL_REM_CCE1 or something
> > 
> > CCE1? You mean L4?
> 
> #define PERF_MEM_LVL_REM_CCE1   0x400 /* Remote Cache (1 hop) */
> 
> It says 'cache' which is irrespective of level.

But remote L4 is far more useful than remote something.

(even though it currently doesn't exist, so it's not too important)

> 
> > The two bits seem cleaner to me than enumerating all cases.  But ok.
> 
> I tend to agree that a separate remote,distance,type fields would have
> been nicer, but we seem to be stuck with this REM_* crud..

Obviously the old ones cannot be changed, but I don't see any reason
not to do better for new encodings.

> 
> > > This new generic 'REMOTE' has too much overlap with the existing things.
> > 
> > So you want a REM_NA ?
> 
> Not sure... What's the point of a REM_NA vs regular NA ? "'something'
> happened 'not here'" vs "'something' happened".

It's a very big difference in latency. That's useful information.

> I hope Stephane has better ideas, he seems to be the one that introduced
> this in the first place.

The original bits were pretty much a direct mapping from the Nehalem 
Intel bits.  But even Intel has out grown it to some degree. 

-Andi

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/6] perf/x86: Move Nehalem PEBS code to flag
@ 2017-06-02 20:12 Andi Kleen
  0 siblings, 0 replies; 11+ messages in thread
From: Andi Kleen @ 2017-06-02 20:12 UTC (permalink / raw)
  To: peterz, acme; +Cc: eranian, jolsa, linux-kernel, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Minor cleanup: use an explicit x86_pmu flag to handle the
missing Lock / TLB information on Nehalem, instead of always
checking the model number for each PEBS sample.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/events/intel/core.c | 1 +
 arch/x86/events/intel/ds.c   | 5 +----
 arch/x86/events/perf_event.h | 3 ++-
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a6d91d4e37a1..59933105d0ea 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3712,6 +3712,7 @@ __init int intel_pmu_init(void)
 
 		intel_pmu_pebs_data_source_nhm();
 		x86_add_quirk(intel_nehalem_quirk);
+		x86_pmu.pebs_no_tlb = 1;
 
 		pr_cont("Nehalem events, ");
 		break;
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index c6d23ffe422d..7732999f5e2a 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -149,8 +149,6 @@ static u64 load_latency_data(u64 status)
 {
 	union intel_x86_pebs_dse dse;
 	u64 val;
-	int model = boot_cpu_data.x86_model;
-	int fam = boot_cpu_data.x86;
 
 	dse.val = status;
 
@@ -162,8 +160,7 @@ static u64 load_latency_data(u64 status)
 	/*
 	 * Nehalem models do not support TLB, Lock infos
 	 */
-	if (fam == 0x6 && (model == 26 || model == 30
-	    || model == 31 || model == 46)) {
+	if (x86_pmu.pebs_no_tlb) {
 		val |= P(TLB, NA) | P(LOCK, NA);
 		return val;
 	}
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index be3d36254040..6f5461fe582b 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -588,7 +588,8 @@ struct x86_pmu {
 			pebs		:1,
 			pebs_active	:1,
 			pebs_broken	:1,
-			pebs_prec_dist	:1;
+			pebs_prec_dist	:1,
+			pebs_no_tlb	:1;
 	int		pebs_record_size;
 	int		pebs_buffer_size;
 	void		(*drain_pebs)(struct pt_regs *regs);
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2017-06-06 17:12 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-05 22:48 [PATCH 1/6] perf/x86: Move Nehalem PEBS code to flag Andi Kleen
2017-06-05 22:48 ` [PATCH 2/6] perf/x86: Fix data source decoding for Skylake Andi Kleen
2017-06-06 10:08   ` Peter Zijlstra
2017-06-06 13:51     ` Andi Kleen
2017-06-06 16:21       ` Peter Zijlstra
2017-06-06 17:12         ` Andi Kleen
2017-06-05 22:48 ` [PATCH 3/6] perf, tools: Add support for printing new mem_info encodings Andi Kleen
2017-06-05 22:48 ` [PATCH 4/6] perf/x86: Add support for PEBS sampling persistent RAM on Skylake Andi Kleen
2017-06-05 22:48 ` [PATCH 5/6] perf, tools: Support persistent memory encoding Andi Kleen
2017-06-05 22:48 ` [PATCH 6/6] perf, tools: Add test cases for new data source encoding Andi Kleen
  -- strict thread matches above, loose matches on Subject: below --
2017-06-02 20:12 [PATCH 1/6] perf/x86: Move Nehalem PEBS code to flag Andi Kleen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.