All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stephane Eranian <eranian@google.com>
To: linux-kernel@vger.kernel.org
Cc: peterz@infradead.org, mingo@elte.hu, ak@linux.intel.com,
	acme@redhat.com, jolsa@redhat.com, namhyung.kim@lge.com
Subject: [PATCH v2 10/16] perf/x86: add support for PEBS Precise Store
Date: Mon,  5 Nov 2012 14:50:57 +0100	[thread overview]
Message-ID: <1352123463-7346-11-git-send-email-eranian@google.com> (raw)
In-Reply-To: <1352123463-7346-1-git-send-email-eranian@google.com>

This patch adds support for PEBS Precise Store
which is available on Intel Sandy Bridge and
Ivy Bridge processors.

To use Precise store, the proper PEBS event
must be used: mem_trans_retired:precise_stores.
For the perf tool, the generic mem-stores event
exported via sysfs can be used directly.

Signed-off-by: Stephane Eranian <eranian@google.com>
---
 arch/x86/kernel/cpu/perf_event.h          |    5 +++
 arch/x86/kernel/cpu/perf_event_intel.c    |    2 ++
 arch/x86/kernel/cpu/perf_event_intel_ds.c |   49 +++++++++++++++++++++++++++--
 3 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 3c5aa72..4e95c90 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -66,6 +66,7 @@ struct event_constraint {
  * struct event_constraint flags
  */
 #define PERF_X86_EVENT_PEBS_LDLAT	0x1 /* ld+ldlat data address sampling */
+#define PERF_X86_EVENT_PEBS_ST		0x2 /* st data address sampling */
 
 struct amd_nb {
 	int nb_id;  /* NorthBridge id */
@@ -242,6 +243,10 @@ struct cpu_hw_events {
 	__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
 			   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
 
+#define INTEL_PST_CONSTRAINT(c, n)	\
+	__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
+
 #define EVENT_CONSTRAINT_END		\
 	EVENT_CONSTRAINT(0, 0, 0)
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index cbfb252..4176be3 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -150,6 +150,7 @@ EVENT_ATTR(ref-cycles,			REF_CPU_CYCLES		);
 
 EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
 EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
+EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
 
 struct attribute *nhm_events_attrs[] = {
 	EVENT_PTR(CPU_CYCLES),
@@ -178,6 +179,7 @@ struct attribute *snb_events_attrs[] = {
 	EVENT_PTR(STALLED_CYCLES_BACKEND),
 	EVENT_PTR(REF_CPU_CYCLES),
 	EVENT_PTR(mem_ld_snb),
+	EVENT_PTR(mem_st_snb),
 	NULL,
 };
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 759b0c0..ed0ca3e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -69,6 +69,44 @@ static const u64 pebs_data_source[] = {
 	OP_LH | P(LVL,UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
 };
 
+static u64 precise_store_data(u64 status)
+{
+	union intel_x86_pebs_dse dse;
+	u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
+
+	dse.val = status;
+
+	/*
+	 * bit 4: TLB access
+	 * 1 = stored missed 2nd level TLB
+	 *
+	 * so it either hit the walker or the OS
+	 * otherwise hit 2nd level TLB
+	 */
+	if (dse.st_stlb_miss)
+		val |= P(TLB, MISS);
+	else
+		val |= P(TLB, HIT);
+
+	/*
+	 * bit 0: hit L1 data cache
+	 * if not set, then all we know is that
+	 * it missed L1D
+	 */
+	if (dse.st_l1d_hit)
+		val |= P(LVL, HIT);
+	else
+		val |= P(LVL, MISS);
+
+	/*
+	 * bit 5: Locked prefix
+	 */
+	if (dse.st_locked)
+		val |= P(LOCK, LOCKED);
+
+	return val;
+}
+
 static u64 load_latency_data(u64 status)
 {
 	union intel_x86_pebs_dse dse;
@@ -486,6 +524,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
 	INTEL_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
 	INTEL_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
 	INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
+	INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
 	INTEL_EVENT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
 	INTEL_EVENT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
 	INTEL_EVENT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -500,6 +539,7 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
         INTEL_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
         INTEL_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
         INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
+	INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
         INTEL_EVENT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
         INTEL_EVENT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
         INTEL_EVENT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -537,6 +577,8 @@ void intel_pmu_pebs_enable(struct perf_event *event)
 
 	if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
 		cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
+	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
+		cpuc->pebs_enabled |= 1ULL << 63;
 }
 
 void intel_pmu_pebs_disable(struct perf_event *event)
@@ -657,12 +699,13 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 	struct perf_sample_data data;
 	struct pt_regs regs;
 	u64 sample_type;
-	int fll;
+	int fll, fst;
 
 	if (!intel_pmu_save_and_restart(event))
 		return;
 
 	fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
+	fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST;
 
 	perf_sample_data_init(&data, 0, event->hw.last_period);
 
@@ -672,7 +715,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 	/*
 	 * if PEBS-LL or PreciseStore
 	 */
-	if (fll) {
+	if (fll || fst) {
 		if (sample_type & PERF_SAMPLE_ADDR)
 			data.addr = pebs->dla;
 
@@ -688,6 +731,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 		if (sample_type & PERF_SAMPLE_DSRC) {
 			if (fll)
 				data.dsrc.val = load_latency_data(pebs->dse);
+			else
+				data.dsrc.val = precise_store_data(pebs->dse);
 		}
 	}
 
-- 
1.7.9.5


  parent reply	other threads:[~2012-11-05 13:53 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-11-05 13:50 [PATCH v2 00/16] perf: add memory access sampling support Stephane Eranian
2012-11-05 13:50 ` [PATCH v2 01/16] perf/x86: improve sysfs event mapping with event string Stephane Eranian
2012-11-05 13:50 ` [PATCH v2 02/16] perf/x86: add flags to event constraints Stephane Eranian
2012-11-05 13:50 ` [PATCH v2 03/16] perf, core: Add a concept of a weightened sample Stephane Eranian
2012-11-05 20:01   ` Arnaldo Carvalho de Melo
2012-11-05 20:07     ` Arnaldo Carvalho de Melo
2012-11-05 22:51     ` Andi Kleen
2012-11-05 13:50 ` [PATCH v2 04/16] perf: add minimal support for PERF_SAMPLE_WEIGHT Stephane Eranian
2012-11-05 13:50 ` [PATCH v2 05/16] perf, tools: Add arbitary aliases and support names with - Stephane Eranian
2012-11-14  7:34   ` [tip:perf/core] perf " tip-bot for Andi Kleen
2012-11-05 13:50 ` [PATCH v2 06/16] perf: add support for PERF_SAMPLE_ADDR in dump_sampple() Stephane Eranian
2012-11-05 13:50 ` [PATCH v2 07/16] perf: add generic memory sampling interface Stephane Eranian
2012-11-05 13:50 ` [PATCH v2 08/16] perf/x86: add memory profiling via PEBS Load Latency Stephane Eranian
2012-11-06 13:31   ` Andi Kleen
2012-11-06 14:29     ` Stephane Eranian
2012-11-06 18:50       ` Andi Kleen
2012-11-06 19:37         ` Stephane Eranian
2012-11-07 14:39           ` Stephane Eranian
2012-11-05 13:50 ` [PATCH v2 09/16] perf/x86: export PEBS load latency threshold register to sysfs Stephane Eranian
2012-11-05 13:50 ` Stephane Eranian [this message]
2012-11-05 13:50 ` [PATCH v2 11/16] perf tools: add mem access sampling core support Stephane Eranian
2012-11-05 13:50 ` [PATCH v2 12/16] perf report: add support for mem access profiling Stephane Eranian
2012-11-05 13:51 ` [PATCH v2 13/16] perf record: " Stephane Eranian
2012-11-05 13:51 ` [PATCH v2 14/16] perf tools: add new mem command for memory " Stephane Eranian
2012-11-06 15:44   ` Arnaldo Carvalho de Melo
2012-11-06 15:49     ` Stephane Eranian
2012-11-06 16:51       ` Arnaldo Carvalho de Melo
2012-11-06 17:05         ` Arnaldo Carvalho de Melo
2012-11-06 15:50     ` Arnaldo Carvalho de Melo
2012-11-06 15:57       ` Stephane Eranian
2012-11-06 17:07         ` Arnaldo Carvalho de Melo
2012-11-05 13:51 ` [PATCH v2 15/16] perf: add PERF_RECORD_MISC_MMAP_DATA to RECORD_MMAP Stephane Eranian
2012-11-05 13:51 ` [PATCH v2 16/16] perf tools: detect data vs. text mappings Stephane Eranian
2012-11-06 20:52 ` [PATCH v2 00/16] perf: add memory access sampling support Arnaldo Carvalho de Melo
2012-11-07  7:38   ` Namhyung Kim
2012-11-07 10:02     ` Stephane Eranian
2012-11-07 14:53   ` Masami Hiramatsu
2012-11-07 14:56     ` Stephane Eranian

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1352123463-7346-11-git-send-email-eranian@google.com \
    --to=eranian@google.com \
    --cc=acme@redhat.com \
    --cc=ak@linux.intel.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=namhyung.kim@lge.com \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.