All of lore.kernel.org
 help / color / mirror / Atom feed
From: Don Zickus <dzickus@redhat.com>
To: acme@ghostprotocols.net
Cc: LKML <linux-kernel@vger.kernel.org>,
	jolsa@redhat.com, jmario@redhat.com, fowles@inreach.com,
	peterz@infradead.org, eranian@google.com, andi.kleen@intel.com,
	Don Zickus <dzickus@redhat.com>
Subject: [PATCH 08/15 V3] perf, c2c: Add stats to track data source bits and cpu to node maps
Date: Mon, 24 Mar 2014 15:36:59 -0400	[thread overview]
Message-ID: <1395689826-215033-9-git-send-email-dzickus@redhat.com> (raw)
In-Reply-To: <1395689826-215033-1-git-send-email-dzickus@redhat.com>

This patch adds a bunch of stats that will be used later in post-processing
to determine where and with what frequency the HITMs are coming from.

Most of the stats are decoded from the data source response.  Another
piece of the stats is tracking which cpu the record came in on.

Credit to Dick Fowles for determining which bits are important and how to
properly track them.  Ported to perf by me.

V2: refresh with hist_entry

Original-by: Dick Fowles <rfowles@redhat.com>
Signed-off-by: Don Zickus <dzickus@redhat.com>
---
 tools/perf/builtin-c2c.c | 187 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 187 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index b5742bd..55c5ce9 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -5,17 +5,58 @@
 #include "util/parse-options.h"
 #include "util/session.h"
 #include "util/tool.h"
+#include "util/stat.h"
+#include "util/cpumap.h"
 #include "util/debug.h"
 #include <api/fs/fs.h>
 #include "util/annotate.h"
 
 #include <linux/compiler.h>
 #include <linux/kernel.h>
+#include <sched.h>
+
+typedef struct {
+	int  locks;               /* count of 'lock' transactions */
+	int  store;               /* count of all stores in trace */
+	int  st_uncache;          /* stores to uncacheable address */
+	int  st_noadrs;           /* cacheable store with no address */
+	int  st_l1hit;            /* count of stores that hit L1D */
+	int  st_l1miss;           /* count of stores that miss L1D */
+	int  load;                /* count of all loads in trace */
+	int  ld_excl;             /* exclusive loads, rmt/lcl DRAM - snp none/miss */
+	int  ld_shared;           /* shared loads, rmt/lcl DRAM - snp hit */
+	int  ld_uncache;          /* loads to uncacheable address */
+	int  ld_io;               /* loads to io address */
+	int  ld_miss;             /* loads miss */
+	int  ld_noadrs;           /* cacheable load with no address */
+	int  ld_fbhit;            /* count of loads hitting Fill Buffer */
+	int  ld_l1hit;            /* count of loads that hit L1D */
+	int  ld_l2hit;            /* count of loads that hit L2D */
+	int  ld_llchit;           /* count of loads that hit LLC */
+	int  lcl_hitm;            /* count of loads with local HITM  */
+	int  rmt_hitm;            /* count of loads with remote HITM */
+	int  rmt_hit;             /* count of loads with remote hit clean; */
+	int  lcl_dram;            /* count of loads miss to local DRAM */
+	int  rmt_dram;            /* count of loads miss to remote DRAM */
+	int  nomap;               /* count of load/stores with no phys adrs */
+	int  noparse;             /* count of unparsable data sources */
+} trinfo_t;
+
+struct c2c_stats {
+	cpu_set_t		cpuset;
+	int			nr_entries;
+	u64			total_period;
+	trinfo_t		t;
+	struct stats		stats;
+};
 
 struct perf_c2c {
 	struct perf_tool tool;
 	bool		 raw_records;
 	struct hists	 hists;
+
+	/* stats */
+	struct c2c_stats	stats;
 };
 
 enum { OP, LVL, SNP, LCK, TLB };
@@ -26,6 +67,29 @@ static int lat_level = DEFAULT_LATENCY_THRES;
 static int prec_level = DEFAULT_PRECISION;
 static bool no_stores = false;
 
+#define RMT_RAM              (PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_REM_RAM2)
+#define RMT_LLC              (PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_REM_CCE2)
+
+#define L1CACHE_HIT(a)       (((a) & PERF_MEM_LVL_L1 ) && ((a) & PERF_MEM_LVL_HIT))
+#define FILLBUF_HIT(a)       (((a) & PERF_MEM_LVL_LFB) && ((a) & PERF_MEM_LVL_HIT))
+#define L2CACHE_HIT(a)       (((a) & PERF_MEM_LVL_L2 ) && ((a) & PERF_MEM_LVL_HIT))
+#define L3CACHE_HIT(a)       (((a) & PERF_MEM_LVL_L3 ) && ((a) & PERF_MEM_LVL_HIT))
+
+#define L1CACHE_MISS(a)      (((a) & PERF_MEM_LVL_L1 ) && ((a) & PERF_MEM_LVL_MISS))
+#define L3CACHE_MISS(a)      (((a) & PERF_MEM_LVL_L3 ) && ((a) & PERF_MEM_LVL_MISS))
+
+#define LD_UNCACHED(a)       (((a) & PERF_MEM_LVL_UNC) && ((a) & PERF_MEM_LVL_HIT))
+#define ST_UNCACHED(a)       (((a) & PERF_MEM_LVL_UNC) && ((a) & PERF_MEM_LVL_HIT))
+
+#define RMT_LLCHIT(a)        (((a) & RMT_LLC) && ((a) & PERF_MEM_LVL_HIT))
+#define RMT_HIT(a,b)         (((a) & RMT_LLC) && ((b) & PERF_MEM_SNOOP_HIT))
+#define RMT_HITM(a,b)        (((a) & RMT_LLC) && ((b) & PERF_MEM_SNOOP_HITM))
+#define RMT_MEM(a)           (((a) & RMT_RAM) && ((a) & PERF_MEM_LVL_HIT))
+
+#define LCL_HIT(a,b)         (L3CACHE_HIT(a) && ((b) & PERF_MEM_SNOOP_HIT))
+#define LCL_HITM(a,b)        (L3CACHE_HIT(a) && ((b) & PERF_MEM_SNOOP_HITM))
+#define LCL_MEM(a)           (((a) & PERF_MEM_LVL_LOC_RAM) && ((a) & PERF_MEM_LVL_HIT))
+
 static int perf_c2c__scnprintf_data_src(char *bf, size_t size, uint64_t val)
 {
 #define PREFIX       "["
@@ -148,6 +212,109 @@ static int perf_sample__fprintf(struct perf_sample *sample, char tag,
 		       mi->iaddr.sym ? mi->iaddr.sym->name : "???");
 }
 
+static int c2c_decode_stats(struct c2c_stats *stats, struct hist_entry *entry)
+{
+	union perf_mem_data_src *data_src = &entry->mem_info->data_src;
+	u64 daddr = entry->mem_info->daddr.addr;
+	u64 weight = entry->stat.weight;
+	int err = 0;
+
+	u64 op = data_src->mem_op;
+	u64 lvl = data_src->mem_lvl;
+	u64 snoop = data_src->mem_snoop;
+	u64 lock = data_src->mem_lock;
+
+#define P(a,b) PERF_MEM_##a##_##b
+
+	stats->nr_entries++;
+	stats->total_period += entry->stat.period;
+
+	if (lock & P(LOCK,LOCKED)) stats->t.locks++;
+
+	if (op & P(OP,LOAD)) {
+		stats->t.load++;
+
+		if (!daddr) {
+			stats->t.ld_noadrs++;
+			return -1;
+		}
+
+		if (lvl & P(LVL,HIT)) {
+			if (lvl & P(LVL,UNC)) stats->t.ld_uncache++;
+			if (lvl & P(LVL,IO))  stats->t.ld_io++;
+			if (lvl & P(LVL,LFB)) stats->t.ld_fbhit++;
+			if (lvl & P(LVL,L1 )) stats->t.ld_l1hit++;
+			if (lvl & P(LVL,L2 )) stats->t.ld_l2hit++;
+			if (lvl & P(LVL,L3 )) {
+				if (snoop & P(SNOOP,HITM))
+					stats->t.lcl_hitm++;
+				else
+					stats->t.ld_llchit++;
+			}
+
+			if (lvl & P(LVL,LOC_RAM)) {
+				stats->t.lcl_dram++;
+				if (snoop & P(SNOOP,HIT))
+					stats->t.ld_shared++;
+				else
+					stats->t.ld_excl++;
+			}
+
+			if ((lvl & P(LVL,REM_RAM1)) ||
+			    (lvl & P(LVL,REM_RAM2))) {
+				stats->t.rmt_dram++;
+				if (snoop & P(SNOOP,HIT))
+					stats->t.ld_shared++;
+				else
+					stats->t.ld_excl++;
+			}
+		}
+
+		if ((lvl & P(LVL,REM_CCE1)) ||
+		    (lvl & P(LVL,REM_CCE2))) {
+			if (snoop & P(SNOOP, HIT))
+				stats->t.rmt_hit++;
+			else if (snoop & P(SNOOP, HITM)) {
+				stats->t.rmt_hitm++;
+				update_stats(&stats->stats, weight);
+			}
+		}
+
+		if ((lvl & P(LVL,MISS)))
+			stats->t.ld_miss++;
+
+	} else if (op & P(OP,STORE)) {
+		/* store */
+		stats->t.store++;
+
+		if (!daddr) {
+			stats->t.st_noadrs++;
+			return -1;
+		}
+
+		if (lvl & P(LVL,HIT)) {
+			if (lvl & P(LVL,UNC)) stats->t.st_uncache++;
+			if (lvl & P(LVL,L1 )) stats->t.st_l1hit++;
+		}
+		if (lvl & P(LVL,MISS))
+			if (lvl & P(LVL,L1)) stats->t.st_l1miss++;
+	} else {
+		/* unparsable data_src? */
+		stats->t.noparse++;
+		return -1;
+	}
+
+	if (!entry->mem_info->daddr.map || !entry->mem_info->iaddr.map) {
+		stats->t.nomap++;
+		pr_debug("Dropping data 0x%lx (%p) and inst 0x%lx (%p)\n",
+			 entry->mem_info->daddr.addr, entry->mem_info->daddr.map,
+			 entry->mem_info->iaddr.addr, entry->mem_info->iaddr.map);
+		return -1;
+	}
+
+	return err;
+}
+
 static int perf_c2c__process_load_store(struct perf_c2c *c2c,
 					struct addr_location *al,
 					struct perf_sample *sample,
@@ -187,6 +354,14 @@ static int perf_c2c__process_load_store(struct perf_c2c *c2c,
 		goto out_mem;
 	}
 
+	err = c2c_decode_stats(&c2c->stats, he);
+	if (err < 0) {
+		err = 0;
+		rb_erase(&he->rb_node_in, c2c->hists.entries_in);
+		free(he);
+		goto out;
+	}
+
 	c2c->hists.stats.total_period += cost;
 	hists__inc_nr_events(&c2c->hists, PERF_RECORD_SAMPLE);
 	return err;
@@ -280,6 +455,9 @@ static int perf_c2c__read_events(struct perf_c2c *c2c)
 		goto out;
 	}
 
+	if (symbol__init() < 0)
+		goto out_delete;
+
 	/* setup the evsel handlers for each event type */
 	evlist__for_each(session->evlist, evsel) {
 		const char *name = perf_evsel__name(evsel);
@@ -294,12 +472,20 @@ static int perf_c2c__read_events(struct perf_c2c *c2c)
 
 	err = perf_c2c__process_events(session, c2c);
 
+out_delete:
+	perf_session__delete(session);
 out:
 	return err;
 }
 
 static int perf_c2c__init(struct perf_c2c *c2c)
 {
+	/* setup cpu map */
+	if (cpu__setup_cpunode_map() < 0) {
+		pr_err("can not setup cpu map\n");
+		return -1;
+	}
+
 	sort__mode = SORT_MODE__PHYSID;
 	sort__wants_unique = 1;
 	sort_order = "daddr,iaddr,pid,tid";
@@ -310,6 +496,7 @@ static int perf_c2c__init(struct perf_c2c *c2c)
 	}
 
 	hists__init(&c2c->hists);
+	CPU_ZERO(&c2c->stats.cpuset);
 
 	return 0;
 }
-- 
1.7.11.7


  parent reply	other threads:[~2014-03-24 19:41 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-03-24 19:36 [PATCH 00/15 V3] perf, c2c: Add new tool to analyze cacheline contention on NUMA systems Don Zickus
2014-03-24 19:36 ` [PATCH 01/15 V3] perf: Fix stddev calculation Don Zickus
2014-03-24 19:36 ` [PATCH 02/15 V3] perf, callchain: Add generic callchain print handler for stdio Don Zickus
2014-03-24 19:36 ` [PATCH 03/15 V3] perf c2c: Shared data analyser Don Zickus
2014-04-08  6:59   ` Namhyung Kim
2014-04-08 14:22     ` Don Zickus
2014-04-09  0:58       ` Namhyung Kim
2014-04-09  1:29         ` Andi Kleen
2014-04-08 14:23     ` Don Zickus
2014-03-24 19:36 ` [PATCH 04/15 V3] perf c2c: Dump raw records, decode data_src bits Don Zickus
2014-04-08  7:09   ` Namhyung Kim
2014-03-24 19:36 ` [PATCH 05/15 V3] perf, c2c: Rework setup code to prepare for features Don Zickus
2014-03-29 17:10   ` Jiri Olsa
2014-04-01  2:52     ` Don Zickus
2014-04-08  7:41     ` Namhyung Kim
2014-04-08 14:11       ` Don Zickus
2014-04-09  1:12         ` Namhyung Kim
2014-04-09  1:36           ` Don Zickus
2014-04-11 14:57             ` Jiri Olsa
2014-04-08  7:18   ` Namhyung Kim
2014-03-24 19:36 ` [PATCH 06/15 V3] perf, c2c: Add in new options to configure latency and stores Don Zickus
2014-03-29 17:11   ` Jiri Olsa
2014-04-01  2:55     ` Don Zickus
2014-04-06 13:14       ` Jiri Olsa
2014-04-07 18:16         ` Don Zickus
2014-04-09  0:17           ` Namhyung Kim
2014-04-08  7:37         ` Namhyung Kim
2014-04-08  7:31   ` Namhyung Kim
2014-03-24 19:36 ` [PATCH 07/15 V3] perf, c2c: Add in sort on physid Don Zickus
2014-04-08  7:56   ` Namhyung Kim
2014-04-08 14:17     ` Don Zickus
2014-04-09  1:30       ` Namhyung Kim
2014-04-09  1:56         ` Don Zickus
2014-03-24 19:36 ` Don Zickus [this message]
2014-04-08  8:05   ` [PATCH 08/15 V3] perf, c2c: Add stats to track data source bits and cpu to node maps Namhyung Kim
2014-03-24 19:37 ` [PATCH 09/15 V3] perf, c2c: Sort based on hottest cache line Don Zickus
2014-04-08  8:23   ` Namhyung Kim
2014-03-24 19:37 ` [PATCH 10/15 V3] perf, c2c: Display cacheline HITM analysis to stdout Don Zickus
2014-04-08  8:26   ` Namhyung Kim
2014-04-08 23:46   ` Namhyung Kim
2014-03-24 19:37 ` [PATCH 11/15 V3] perf, c2c: Add callchain support Don Zickus
2014-03-24 19:37 ` [PATCH 12/15 V3] perf, c2c: Output summary stats Don Zickus
2014-03-24 19:37 ` [PATCH 13/15 V3] perf, c2c: Dump rbtree for debugging Don Zickus
2014-03-24 19:37 ` [PATCH 14/15 V3] perf, c2c: Add symbol count table Don Zickus
2014-03-24 19:37 ` [PATCH 15/15 V3] perf, c2c: Add shared cachline summary table Don Zickus

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1395689826-215033-9-git-send-email-dzickus@redhat.com \
    --to=dzickus@redhat.com \
    --cc=acme@ghostprotocols.net \
    --cc=andi.kleen@intel.com \
    --cc=eranian@google.com \
    --cc=fowles@inreach.com \
    --cc=jmario@redhat.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.