All of lore.kernel.org
 help / color / mirror / Atom feed
From: Don Zickus <dzickus@redhat.com>
To: acme@ghostprotocols.net
Cc: LKML <linux-kernel@vger.kernel.org>,
	jolsa@redhat.com, jmario@redhat.com, fowles@inreach.com,
	peterz@infradead.org, eranian@google.com, andi.kleen@intel.com,
	Don Zickus <dzickus@redhat.com>
Subject: [PATCH 09/15 V3] perf, c2c: Sort based on hottest cache line
Date: Mon, 24 Mar 2014 15:37:00 -0400	[thread overview]
Message-ID: <1395689826-215033-10-git-send-email-dzickus@redhat.com> (raw)
In-Reply-To: <1395689826-215033-1-git-send-email-dzickus@redhat.com>

Now that we have all the events sort on a unique address, we can walk
the rbtree sequential and count up all the HITMs for each cacheline
fairly easily.

Once we encounter a new event on a different cacheline, process the previous
cacheline.  That includes determining if any HITMs were present on that
cacheline and if so, add it to another rbtree sorted on the number of HITMs.

This second rbtree sorted on number of HITMs will be the interesting data
we want to report and will be displayed in a follow up patch.

For now, organize the data properly.

V2: re-work using hist_entries

Signed-off-by: Don Zickus <dzickus@redhat.com>
---
 tools/perf/builtin-c2c.c | 201 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 201 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 55c5ce9..8674626 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -59,6 +59,25 @@ struct perf_c2c {
 	struct c2c_stats	stats;
 };
 
+#define CACHE_LINESIZE       64
+#define CLINE_OFFSET_MSK     (CACHE_LINESIZE - 1)
+#define CLADRS(a)            ((a) & ~(CLINE_OFFSET_MSK))
+#define CLOFFSET(a)          (int)((a) &  (CLINE_OFFSET_MSK))
+
+struct c2c_hit {
+	struct rb_node	rb_node;
+	struct rb_root  tree;
+	struct list_head list;
+	u64		cacheline;
+	int		color;
+	struct c2c_stats	stats;
+	pid_t		pid;
+	pid_t		tid;
+	u64		daddr;
+	u64		iaddr;
+	struct mem_info	*mi;
+};
+
 enum { OP, LVL, SNP, LCK, TLB };
 
 #define DEFAULT_LATENCY_THRES	30
@@ -160,6 +179,44 @@ static int perf_c2c__scnprintf_data_src(char *bf, size_t size, uint64_t val)
 	return printed;
 }
 
+static int c2c_hitm__add_to_list(struct rb_root *root, struct c2c_hit *h)
+{
+	struct rb_node **p;
+	struct rb_node *parent = NULL;
+	struct c2c_hit *he;
+	int64_t cmp;
+	u64 l_hitms, r_hitms;
+
+	p = &root->rb_node;
+
+	while (*p != NULL) {
+		parent = *p;
+		he = rb_entry(parent, struct c2c_hit, rb_node);
+
+		/* sort on remote hitms first */
+		l_hitms = he->stats.t.rmt_hitm;
+		r_hitms = h->stats.t.rmt_hitm;
+		cmp = r_hitms - l_hitms;
+
+		if (!cmp) {
+			/* sort on local hitms */
+			l_hitms = he->stats.t.lcl_hitm;
+			r_hitms = h->stats.t.lcl_hitm;
+			cmp = r_hitms - l_hitms;
+		}
+
+		if (cmp > 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&h->rb_node, parent, p);
+	rb_insert_color(&h->rb_node, root);
+
+	return 0;
+}
+
 static int perf_c2c__fprintf_header(FILE *fp)
 {
 	int printed = fprintf(fp, "%c %-16s  %6s  %6s  %4s  %18s  %18s  %18s  %6s  %-10s %-60s %s\n", 
@@ -315,6 +372,50 @@ static int c2c_decode_stats(struct c2c_stats *stats, struct hist_entry *entry)
 	return err;
 }
 
+static struct c2c_hit *c2c_hit__new(u64 cacheline, struct hist_entry *entry)
+{
+	struct c2c_hit *h = zalloc(sizeof(struct c2c_hit));
+
+	if (!h) {
+		pr_err("Could not allocate c2c_hit memory\n");
+		return NULL;
+	}
+
+	CPU_ZERO(&h->stats.cpuset);
+	INIT_LIST_HEAD(&h->list);
+	init_stats(&h->stats.stats);
+	h->tree = RB_ROOT;
+	h->cacheline = cacheline;
+	h->pid = entry->thread->pid_;
+	h->tid = entry->thread->tid;
+
+	/* use original addresses here, not adjusted al_addr */
+	h->iaddr = entry->mem_info->iaddr.addr;
+	h->daddr = entry->mem_info->daddr.addr;
+
+	h->mi = entry->mem_info;
+	return h;
+}
+
+static void c2c_hit__update_strings(struct c2c_hit *h,
+				    struct hist_entry *n)
+{
+	if (h->pid != n->thread->pid_)
+		h->pid = -1;
+
+	if (h->tid != n->thread->tid)
+		h->tid = -1;
+
+	/* use original addresses here, not adjusted al_addr */
+	if (h->iaddr != n->mem_info->iaddr.addr)
+		h->iaddr = -1;
+
+	if (CLADRS(h->daddr) != CLADRS(n->mem_info->daddr.addr))
+		h->daddr = -1;
+
+	CPU_SET(n->cpu, &h->stats.cpuset);
+}
+
 static int perf_c2c__process_load_store(struct perf_c2c *c2c,
 					struct addr_location *al,
 					struct perf_sample *sample,
@@ -424,6 +525,104 @@ err:
 	return err;
 }
 
+#define HAS_HITMS(h) (h->stats.t.lcl_hitm || h->stats.t.rmt_hitm)
+
+static void c2c_hit__update_stats(struct c2c_stats *new,
+				  struct c2c_stats *old)
+{
+	new->t.load		+= old->t.load;
+	new->t.ld_fbhit		+= old->t.ld_fbhit;
+	new->t.ld_l1hit		+= old->t.ld_l1hit;
+	new->t.ld_l2hit		+= old->t.ld_l2hit;
+	new->t.ld_llchit	+= old->t.ld_llchit;
+	new->t.locks		+= old->t.locks;
+	new->t.lcl_dram		+= old->t.lcl_dram;
+	new->t.rmt_dram		+= old->t.rmt_dram;
+	new->t.lcl_hitm		+= old->t.lcl_hitm;
+	new->t.rmt_hitm		+= old->t.rmt_hitm;
+	new->t.rmt_hit		+= old->t.rmt_hit;
+	new->t.store		+= old->t.store;
+	new->t.st_l1hit		+= old->t.st_l1hit;
+
+	new->total_period	+= old->total_period;
+}
+
+static inline int valid_hitm_or_store(union perf_mem_data_src *dsrc)
+{
+	return ((dsrc->mem_snoop & P(SNOOP,HITM)) ||
+		(dsrc->mem_op & P(OP,STORE)));
+}
+
+static void c2c_analyze_hitms(struct perf_c2c *c2c)
+{
+
+	struct rb_node *next = rb_first(c2c->hists.entries_in);
+	struct hist_entry *he;
+	struct c2c_hit *h = NULL;
+	struct c2c_stats hitm_stats;
+	struct rb_root hitm_tree = RB_ROOT;
+	int shared_clines = 0;
+	u64 cl = 0;
+
+	memset(&hitm_stats, 0, sizeof(struct c2c_stats));
+
+	/* find HITMs */
+	while (next) {
+		he = rb_entry(next, struct hist_entry, rb_node_in);
+		next = rb_next(&he->rb_node_in);
+
+		cl = he->mem_info->daddr.al_addr;
+
+		/* switch cache line objects */
+		/* 'color' forces a boundary change based on the original sort */
+		if (!h || !he->color || (CLADRS(cl) != h->cacheline)) {
+			if (h && HAS_HITMS(h)) {
+				c2c_hit__update_stats(&hitm_stats, &h->stats);
+
+				/* sort based on hottest cacheline */
+				c2c_hitm__add_to_list(&hitm_tree, h);
+				shared_clines++;
+			} else {
+				/* stores-only are un-interesting */
+				free(h);
+			}
+			h = c2c_hit__new(CLADRS(cl), he);
+			if (!h)
+				goto cleanup;
+		}
+
+
+		c2c_decode_stats(&h->stats, he);
+
+		/* filter out non-hitms as un-interesting noise */
+		if (valid_hitm_or_store(&he->mem_info->data_src)) {
+			/* save the entry for later processing */
+			list_add_tail(&he->pairs.node, &h->list);
+
+			c2c_hit__update_strings(h, he);
+		}
+	}
+
+	/* last chunk */
+	if (h && HAS_HITMS(h)) {
+		c2c_hit__update_stats(&hitm_stats, &h->stats);
+		c2c_hitm__add_to_list(&hitm_tree, h);
+		shared_clines++;
+	} else
+		free(h);
+
+cleanup:
+	next = rb_first(&hitm_tree);
+	while (next) {
+		h = rb_entry(next, struct c2c_hit, rb_node);
+		next = rb_next(&h->rb_node);
+		rb_erase(&h->rb_node, &hitm_tree);
+
+		free(h);
+	}
+	return;
+}
+
 static int perf_c2c__process_events(struct perf_session *session,
 				    struct perf_c2c *c2c)
 {
@@ -435,6 +634,8 @@ static int perf_c2c__process_events(struct perf_session *session,
 		goto err;
 	}
 
+	c2c_analyze_hitms(c2c);
+
 err:
 	return err;
 }
-- 
1.7.11.7


  parent reply	other threads:[~2014-03-24 19:42 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-03-24 19:36 [PATCH 00/15 V3] perf, c2c: Add new tool to analyze cacheline contention on NUMA systems Don Zickus
2014-03-24 19:36 ` [PATCH 01/15 V3] perf: Fix stddev calculation Don Zickus
2014-03-24 19:36 ` [PATCH 02/15 V3] perf, callchain: Add generic callchain print handler for stdio Don Zickus
2014-03-24 19:36 ` [PATCH 03/15 V3] perf c2c: Shared data analyser Don Zickus
2014-04-08  6:59   ` Namhyung Kim
2014-04-08 14:22     ` Don Zickus
2014-04-09  0:58       ` Namhyung Kim
2014-04-09  1:29         ` Andi Kleen
2014-04-08 14:23     ` Don Zickus
2014-03-24 19:36 ` [PATCH 04/15 V3] perf c2c: Dump raw records, decode data_src bits Don Zickus
2014-04-08  7:09   ` Namhyung Kim
2014-03-24 19:36 ` [PATCH 05/15 V3] perf, c2c: Rework setup code to prepare for features Don Zickus
2014-03-29 17:10   ` Jiri Olsa
2014-04-01  2:52     ` Don Zickus
2014-04-08  7:41     ` Namhyung Kim
2014-04-08 14:11       ` Don Zickus
2014-04-09  1:12         ` Namhyung Kim
2014-04-09  1:36           ` Don Zickus
2014-04-11 14:57             ` Jiri Olsa
2014-04-08  7:18   ` Namhyung Kim
2014-03-24 19:36 ` [PATCH 06/15 V3] perf, c2c: Add in new options to configure latency and stores Don Zickus
2014-03-29 17:11   ` Jiri Olsa
2014-04-01  2:55     ` Don Zickus
2014-04-06 13:14       ` Jiri Olsa
2014-04-07 18:16         ` Don Zickus
2014-04-09  0:17           ` Namhyung Kim
2014-04-08  7:37         ` Namhyung Kim
2014-04-08  7:31   ` Namhyung Kim
2014-03-24 19:36 ` [PATCH 07/15 V3] perf, c2c: Add in sort on physid Don Zickus
2014-04-08  7:56   ` Namhyung Kim
2014-04-08 14:17     ` Don Zickus
2014-04-09  1:30       ` Namhyung Kim
2014-04-09  1:56         ` Don Zickus
2014-03-24 19:36 ` [PATCH 08/15 V3] perf, c2c: Add stats to track data source bits and cpu to node maps Don Zickus
2014-04-08  8:05   ` Namhyung Kim
2014-03-24 19:37 ` Don Zickus [this message]
2014-04-08  8:23   ` [PATCH 09/15 V3] perf, c2c: Sort based on hottest cache line Namhyung Kim
2014-03-24 19:37 ` [PATCH 10/15 V3] perf, c2c: Display cacheline HITM analysis to stdout Don Zickus
2014-04-08  8:26   ` Namhyung Kim
2014-04-08 23:46   ` Namhyung Kim
2014-03-24 19:37 ` [PATCH 11/15 V3] perf, c2c: Add callchain support Don Zickus
2014-03-24 19:37 ` [PATCH 12/15 V3] perf, c2c: Output summary stats Don Zickus
2014-03-24 19:37 ` [PATCH 13/15 V3] perf, c2c: Dump rbtree for debugging Don Zickus
2014-03-24 19:37 ` [PATCH 14/15 V3] perf, c2c: Add symbol count table Don Zickus
2014-03-24 19:37 ` [PATCH 15/15 V3] perf, c2c: Add shared cachline summary table Don Zickus

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1395689826-215033-10-git-send-email-dzickus@redhat.com \
    --to=dzickus@redhat.com \
    --cc=acme@ghostprotocols.net \
    --cc=andi.kleen@intel.com \
    --cc=eranian@google.com \
    --cc=fowles@inreach.com \
    --cc=jmario@redhat.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.