linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/6] perf, events: Enable mmap2 support
@ 2014-03-24 19:34 Don Zickus
  2014-03-24 19:34 ` [PATCH 1/6] events, perf: Pass protection and flags bits through mmap2 interface Don Zickus
                   ` (5 more replies)
  0 siblings, 6 replies; 25+ messages in thread
From: Don Zickus @ 2014-03-24 19:34 UTC (permalink / raw)
  To: acme, peterz; +Cc: LKML, jolsa, jmario, fowles, eranian, andi.kleen, Don Zickus

Re-enable mmap2 support with some extra features.  Also include a user,
perf report.

Perf report uses a new output format for the physid work I am trying to 
do in the c2c tool.  Hopefully this format is not as ugly.

V2: This is the second iteration of the mmap2 sorting in hist_entry's to
    cleanup the output a bit

Don Zickus (6):
  events, perf: Pass protection and flags bits through mmap2 interface
  perf: Update mmap2 interface with protection and flag bits
  Revert "perf: Disable PERF_RECORD_MMAP2 support"
  perf, sort:  Add physid sorting based on mmap2 data
  perf: Update sort to handle MAP_SHARED bits
  perf, sort:  Allow unique sorting instead of combining hist_entries

 include/uapi/linux/perf_event.h |   1 +
 kernel/events/core.c            |  37 ++++-
 tools/perf/builtin-report.c     |  20 ++-
 tools/perf/util/event.c         |  59 +++++---
 tools/perf/util/event.h         |   2 +
 tools/perf/util/evsel.c         |   1 +
 tools/perf/util/hist.c          |  30 +++-
 tools/perf/util/hist.h          |   8 ++
 tools/perf/util/machine.c       |   4 +-
 tools/perf/util/map.c           |   4 +-
 tools/perf/util/map.h           |   4 +-
 tools/perf/util/sort.c          | 297 ++++++++++++++++++++++++++++++++++++++++
 tools/perf/util/sort.h          |  14 ++
 13 files changed, 450 insertions(+), 31 deletions(-)

-- 
1.7.11.7


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 1/6] events, perf: Pass protection and flags bits through mmap2 interface
  2014-03-24 19:34 [PATCH 0/6] perf, events: Enable mmap2 support Don Zickus
@ 2014-03-24 19:34 ` Don Zickus
  2014-03-24 19:34 ` [PATCH 2/6] perf: Update mmap2 interface with protection and flag bits Don Zickus
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 25+ messages in thread
From: Don Zickus @ 2014-03-24 19:34 UTC (permalink / raw)
  To: acme, peterz; +Cc: LKML, jolsa, jmario, fowles, eranian, andi.kleen, Don Zickus

From: Peter Zijlstra <a.p.zijlstra@chello.nl>

The mmap2 interface was missing the protection and flags bits needed to
accurately determine if a mmap memory area was shared or private and
if it was readable or not.

[tweaked patch to compile and wrote changelog - Don
Signed-off-by: Don Zickus <dzickus@redhat.com>

--
Peter you mentioned writing up a changelog for this patch, but you
seemed busy, so I jumped the gun a little bit.  Let me know how you
want to do this patch if this approach is wrong.
---
 include/uapi/linux/perf_event.h |  1 +
 kernel/events/core.c            | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 853bc1c..2ed502f 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -699,6 +699,7 @@ enum perf_event_type {
 	 *	u32				min;
 	 *	u64				ino;
 	 *	u64				ino_generation;
+	 *	u32				prot, flags;
 	 *	char				filename[];
 	 * 	struct sample_id		sample_id;
 	 * };
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 661951a..21874d4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -39,6 +39,7 @@
 #include <linux/hw_breakpoint.h>
 #include <linux/mm_types.h>
 #include <linux/cgroup.h>
+#include <linux/mman.h>
 
 #include "internal.h"
 
@@ -5100,6 +5101,7 @@ struct perf_mmap_event {
 	int			maj, min;
 	u64			ino;
 	u64			ino_generation;
+	u32			prot, flags;
 
 	struct {
 		struct perf_event_header	header;
@@ -5141,6 +5143,8 @@ static void perf_event_mmap_output(struct perf_event *event,
 		mmap_event->event_id.header.size += sizeof(mmap_event->min);
 		mmap_event->event_id.header.size += sizeof(mmap_event->ino);
 		mmap_event->event_id.header.size += sizeof(mmap_event->ino_generation);
+		mmap_event->event_id.header.size += sizeof(mmap_event->prot);
+		mmap_event->event_id.header.size += sizeof(mmap_event->flags);
 	}
 
 	perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
@@ -5159,6 +5163,8 @@ static void perf_event_mmap_output(struct perf_event *event,
 		perf_output_put(&handle, mmap_event->min);
 		perf_output_put(&handle, mmap_event->ino);
 		perf_output_put(&handle, mmap_event->ino_generation);
+		perf_output_put(&handle, mmap_event->prot);
+		perf_output_put(&handle, mmap_event->flags);
 	}
 
 	__output_copy(&handle, mmap_event->file_name,
@@ -5177,6 +5183,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 	struct file *file = vma->vm_file;
 	int maj = 0, min = 0;
 	u64 ino = 0, gen = 0;
+	u32 prot = 0, flags = 0;
 	unsigned int size;
 	char tmp[16];
 	char *buf = NULL;
@@ -5207,6 +5214,28 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 		gen = inode->i_generation;
 		maj = MAJOR(dev);
 		min = MINOR(dev);
+
+		if (vma->vm_flags & VM_READ)
+			prot |= PROT_READ;
+		if (vma->vm_flags & VM_WRITE)
+			prot |= PROT_WRITE;
+		if (vma->vm_flags & VM_EXEC)
+			prot |= PROT_EXEC;
+
+		if (vma->vm_flags & VM_MAYSHARE)
+			flags = MAP_SHARED;
+		else
+			flags = MAP_PRIVATE;
+
+		if (vma->vm_flags & VM_DENYWRITE)
+			flags |= MAP_DENYWRITE;
+		if (vma->vm_flags & VM_MAYEXEC)
+			flags |= MAP_EXECUTABLE;
+		if (vma->vm_flags & VM_LOCKED)
+			flags |= MAP_LOCKED;
+		if (vma->vm_flags & VM_HUGETLB)
+			flags |= MAP_HUGETLB;
+
 		goto got_name;
 	} else {
 		name = (char *)arch_vma_name(vma);
@@ -5247,6 +5276,8 @@ got_name:
 	mmap_event->min = min;
 	mmap_event->ino = ino;
 	mmap_event->ino_generation = gen;
+	mmap_event->prot = prot;
+	mmap_event->flags = flags;
 
 	if (!(vma->vm_flags & VM_EXEC))
 		mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA;
@@ -5287,6 +5318,8 @@ void perf_event_mmap(struct vm_area_struct *vma)
 		/* .min (attr_mmap2 only) */
 		/* .ino (attr_mmap2 only) */
 		/* .ino_generation (attr_mmap2 only) */
+		/* .prot (attr_mmap2 only) */
+		/* .flags (attr_mmap2 only) */
 	};
 
 	perf_event_mmap_event(&mmap_event);
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 2/6] perf: Update mmap2 interface with protection and flag bits
  2014-03-24 19:34 [PATCH 0/6] perf, events: Enable mmap2 support Don Zickus
  2014-03-24 19:34 ` [PATCH 1/6] events, perf: Pass protection and flags bits through mmap2 interface Don Zickus
@ 2014-03-24 19:34 ` Don Zickus
  2014-04-09  2:17   ` Namhyung Kim
  2014-03-24 19:34 ` [PATCH 3/6] Revert "perf: Disable PERF_RECORD_MMAP2 support" Don Zickus
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 25+ messages in thread
From: Don Zickus @ 2014-03-24 19:34 UTC (permalink / raw)
  To: acme, peterz; +Cc: LKML, jolsa, jmario, fowles, eranian, andi.kleen, Don Zickus

The kernel piece passes more info now.  Update the perf tool to reflect
that and adjust the synthesized maps to play along.

Signed-off-by: Don Zickus <dzickus@redhat.com>
---
 tools/perf/util/event.c   | 23 +++++++++++++++++++++--
 tools/perf/util/event.h   |  2 ++
 tools/perf/util/machine.c |  4 +++-
 tools/perf/util/map.c     |  4 +++-
 tools/perf/util/map.h     |  4 +++-
 5 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 9d12aa6..6b8646c 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1,4 +1,5 @@
 #include <linux/types.h>
+#include <sys/mman.h>
 #include "event.h"
 #include "debug.h"
 #include "hist.h"
@@ -212,6 +213,21 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 		else
 			event->header.misc = PERF_RECORD_MISC_GUEST_USER;
 
+		/* map protection and flags bits */
+		event->mmap2.prot = 0;
+		event->mmap2.flags = 0;
+		if (prot[0] == 'r')
+			event->mmap2.prot |= PROT_READ;
+		if (prot[1] == 'w')
+			event->mmap2.prot |= PROT_WRITE;
+		if (prot[2] == 'x')
+			event->mmap2.prot |= PROT_EXEC;
+
+		if (prot[3] == 's')
+			event->mmap2.flags |= MAP_SHARED;
+		else
+			event->mmap2.flags |= MAP_PRIVATE;
+
 		if (prot[2] != 'x') {
 			if (!mmap_data || prot[0] != 'r')
 				continue;
@@ -612,12 +628,15 @@ size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
 size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp)
 {
 	return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64
-			   " %02x:%02x %"PRIu64" %"PRIu64"]: %c %s\n",
+			   " %02x:%02x %"PRIu64" %"PRIu64"]: %c%c%c%c %s\n",
 		       event->mmap2.pid, event->mmap2.tid, event->mmap2.start,
 		       event->mmap2.len, event->mmap2.pgoff, event->mmap2.maj,
 		       event->mmap2.min, event->mmap2.ino,
 		       event->mmap2.ino_generation,
-		       (event->header.misc & PERF_RECORD_MISC_MMAP_DATA) ? 'r' : 'x',
+		       (event->mmap2.prot & PROT_READ) ? 'r' : '-',
+		       (event->mmap2.prot & PROT_WRITE) ? 'w' : '-',
+		       (event->mmap2.prot & PROT_EXEC) ? 'x' : '-',
+		       (event->mmap2.flags & MAP_SHARED) ? 's' : 'p',
 		       event->mmap2.filename);
 }
 
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 38457d4..96bd19c 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -27,6 +27,8 @@ struct mmap2_event {
 	u32 min;
 	u64 ino;
 	u64 ino_generation;
+	u32 prot;
+	u32 flags;
 	char filename[PATH_MAX];
 };
 
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index a53cd0b..f3cf46f 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1041,6 +1041,8 @@ int machine__process_mmap2_event(struct machine *machine,
 			event->mmap2.pid, event->mmap2.maj,
 			event->mmap2.min, event->mmap2.ino,
 			event->mmap2.ino_generation,
+			event->mmap2.prot,
+			event->mmap2.flags,
 			event->mmap2.filename, type);
 
 	if (map == NULL)
@@ -1086,7 +1088,7 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
 
 	map = map__new(&machine->user_dsos, event->mmap.start,
 			event->mmap.len, event->mmap.pgoff,
-			event->mmap.pid, 0, 0, 0, 0,
+			event->mmap.pid, 0, 0, 0, 0, 0, 0,
 			event->mmap.filename,
 			type);
 
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 39cd2d0..f98f8fe 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -51,7 +51,7 @@ void map__init(struct map *map, enum map_type type,
 
 struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
 		     u64 pgoff, u32 pid, u32 d_maj, u32 d_min, u64 ino,
-		     u64 ino_gen, char *filename,
+		     u64 ino_gen, u32 prot, u32 flags, char *filename,
 		     enum map_type type)
 {
 	struct map *map = malloc(sizeof(*map));
@@ -69,6 +69,8 @@ struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
 		map->min = d_min;
 		map->ino = ino;
 		map->ino_generation = ino_gen;
+		map->prot = prot;
+		map->flags = flags;
 
 		if ((anon || no_dso) && type == MAP__FUNCTION) {
 			snprintf(newfilename, sizeof(newfilename), "/tmp/perf-%d.map", pid);
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index f00f058..8cd0cff 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -35,6 +35,8 @@ struct map {
 	bool			referenced;
 	bool			erange_warned;
 	u32			priv;
+	u32			prot;
+	u32			flags;
 	u64			pgoff;
 	u64			reloc;
 	u32			maj, min; /* only valid for MMAP2 record */
@@ -106,7 +108,7 @@ void map__init(struct map *map, enum map_type type,
 	       u64 start, u64 end, u64 pgoff, struct dso *dso);
 struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
 		     u64 pgoff, u32 pid, u32 d_maj, u32 d_min, u64 ino,
-		     u64 ino_gen,
+		     u64 ino_gen, u32 prot, u32 flags,
 		     char *filename, enum map_type type);
 struct map *map__new2(u64 start, struct dso *dso, enum map_type type);
 void map__delete(struct map *map);
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 3/6] Revert "perf: Disable PERF_RECORD_MMAP2 support"
  2014-03-24 19:34 [PATCH 0/6] perf, events: Enable mmap2 support Don Zickus
  2014-03-24 19:34 ` [PATCH 1/6] events, perf: Pass protection and flags bits through mmap2 interface Don Zickus
  2014-03-24 19:34 ` [PATCH 2/6] perf: Update mmap2 interface with protection and flag bits Don Zickus
@ 2014-03-24 19:34 ` Don Zickus
  2014-04-09  2:32   ` Namhyung Kim
  2014-03-24 19:34 ` [PATCH 4/6] perf, sort: Add physid sorting based on mmap2 data Don Zickus
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 25+ messages in thread
From: Don Zickus @ 2014-03-24 19:34 UTC (permalink / raw)
  To: acme, peterz; +Cc: LKML, jolsa, jmario, fowles, eranian, andi.kleen, Don Zickus

This reverts commit 3090ffb5a2515990182f3f55b0688a7817325488.

Conflicts:
	tools/perf/util/event.c
---
 kernel/events/core.c    |  4 ----
 tools/perf/util/event.c | 36 +++++++++++++++++++-----------------
 tools/perf/util/evsel.c |  1 +
 3 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 21874d4..ace46f8 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6882,10 +6882,6 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 	if (ret)
 		return -EFAULT;
 
-	/* disabled for now */
-	if (attr->mmap2)
-		return -EINVAL;
-
 	if (attr->__reserved_1)
 		return -EINVAL;
 
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 6b8646c..86bdf2a 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -179,13 +179,14 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 		return -1;
 	}
 
-	event->header.type = PERF_RECORD_MMAP;
+	event->header.type = PERF_RECORD_MMAP2;
 
 	while (1) {
 		char bf[BUFSIZ];
 		char prot[5];
 		char execname[PATH_MAX];
 		char anonstr[] = "//anon";
+		unsigned int ino;
 		size_t size;
 		ssize_t n;
 
@@ -196,14 +197,15 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 		strcpy(execname, "");
 
 		/* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
-		n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %*x:%*x %*u %s\n",
-		       &event->mmap.start, &event->mmap.len, prot,
-		       &event->mmap.pgoff,
-		       execname);
-		/*
- 		 * Anon maps don't have the execname.
- 		 */
-		if (n < 4)
+		n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %s\n",
+		       &event->mmap2.start, &event->mmap2.len, prot,
+		       &event->mmap2.pgoff, &event->mmap2.maj,
+		       &event->mmap2.min,
+		       &ino, execname);
+
+		event->mmap2.ino = (u64)ino;
+
+		if (n < 7)
 			continue;
 		/*
 		 * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c
@@ -239,15 +241,15 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 			strcpy(execname, anonstr);
 
 		size = strlen(execname) + 1;
-		memcpy(event->mmap.filename, execname, size);
+		memcpy(event->mmap2.filename, execname, size);
 		size = PERF_ALIGN(size, sizeof(u64));
-		event->mmap.len -= event->mmap.start;
-		event->mmap.header.size = (sizeof(event->mmap) -
-					(sizeof(event->mmap.filename) - size));
-		memset(event->mmap.filename + size, 0, machine->id_hdr_size);
-		event->mmap.header.size += machine->id_hdr_size;
-		event->mmap.pid = tgid;
-		event->mmap.tid = pid;
+		event->mmap2.len -= event->mmap.start;
+		event->mmap2.header.size = (sizeof(event->mmap2) -
+					(sizeof(event->mmap2.filename) - size));
+		memset(event->mmap2.filename + size, 0, machine->id_hdr_size);
+		event->mmap2.header.size += machine->id_hdr_size;
+		event->mmap2.pid = tgid;
+		event->mmap2.tid = pid;
 
 		if (process(tool, event, &synth_sample, machine) != 0) {
 			rc = -1;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 5c28d82..21154da 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -659,6 +659,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
 		perf_evsel__set_sample_bit(evsel, WEIGHT);
 
 	attr->mmap  = track;
+	attr->mmap2 = track && !perf_missing_features.mmap2;
 	attr->comm  = track;
 
 	if (opts->sample_transaction)
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 4/6] perf, sort:  Add physid sorting based on mmap2 data
  2014-03-24 19:34 [PATCH 0/6] perf, events: Enable mmap2 support Don Zickus
                   ` (2 preceding siblings ...)
  2014-03-24 19:34 ` [PATCH 3/6] Revert "perf: Disable PERF_RECORD_MMAP2 support" Don Zickus
@ 2014-03-24 19:34 ` Don Zickus
  2014-03-24 19:54   ` Andi Kleen
                     ` (3 more replies)
  2014-03-24 19:34 ` [PATCH 5/6] perf: Update sort to handle MAP_SHARED bits Don Zickus
  2014-03-24 19:34 ` [PATCH 6/6] perf, sort: Allow unique sorting instead of combining hist_entries Don Zickus
  5 siblings, 4 replies; 25+ messages in thread
From: Don Zickus @ 2014-03-24 19:34 UTC (permalink / raw)
  To: acme, peterz; +Cc: LKML, jolsa, jmario, fowles, eranian, andi.kleen, Don Zickus

In order for the c2c tool to work correctly, it needs to properly
sort all the records on uniquely identifiable data addresses.  These
unique addresses are converted from virtual addresses provided by the
hardware into a kernel address using an mmap2 record as the decoder.

Once a unique address is converted, we can sort on them based on
various rules.  Then it becomes clear which address are overlapping
with each other across mmap regions or pid spaces.

This patch just creates the rules and inserts the records into a
sort entry for safe keeping until later patches process them.

The general sorting rule is:

o group cpumodes together
o if (nonzero major/minor number - ie mmap'd areas)
  o sort on major, minor, inode, inode generation numbers
o else if cpumode is not kernel
  o sort on pid
o sort on data addresses

I also hacked in the concept of 'color'.  The purpose of that bit is to
provides hints later when processing these records that indicate a new unique
address has been encountered.  Because later processing only checks the data
addresses, there can be a theoretical scenario that similar sequential data
addresses (when walking the rbtree) could be misinterpreted as overlapping
when in fact they are not.

Sample output: (perf report --stdio --physid-mode)

    18.93%  [k] 0xffffc900139c40b0  [k] igb_update_stats     kworker/0:1:  257   257      0      0       0  0
     7.63%  [k] 0xffff88082e6cf0a8  [k] watchdog_timer_fn        swapper:    0     0      0      0       0  0
     1.86%  [k] 0xffff88042ef94700  [k] _raw_spin_lock           swapper:    0     0      0      0       0  0
     1.77%  [k] 0xffff8804278afa50  [k] __switch_to              swapper:    0     0      0      0       0  0

V3: split out the sorting into unique entries.  This makes it look
      far less ugly
    create a new 'physid mode' to group all the sorting rules together
      (mimics the mem-mode)

Signed-off-by: Don Zickus <dzickus@redhat.com>
---
 tools/perf/builtin-report.c |  20 ++-
 tools/perf/util/hist.c      |  27 +++-
 tools/perf/util/hist.h      |   8 ++
 tools/perf/util/sort.c      | 294 ++++++++++++++++++++++++++++++++++++++++++++
 tools/perf/util/sort.h      |  13 ++
 5 files changed, 358 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index c87412b..093f5ad 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -49,6 +49,7 @@ struct report {
 	bool			show_threads;
 	bool			inverted_callchain;
 	bool			mem_mode;
+	bool			physid_mode;
 	bool			header;
 	bool			header_only;
 	int			max_stack;
@@ -241,7 +242,7 @@ static int process_sample_event(struct perf_tool *tool,
 		ret = report__add_branch_hist_entry(rep, &al, sample, evsel);
 		if (ret < 0)
 			pr_debug("problem adding lbr entry, skipping event\n");
-	} else if (rep->mem_mode == 1) {
+	} else if ((rep->mem_mode == 1) || (rep->physid_mode)) {
 		ret = report__add_mem_hist_entry(rep, &al, sample, evsel);
 		if (ret < 0)
 			pr_debug("problem adding mem entry, skipping event\n");
@@ -746,6 +747,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
 		    "Disable symbol demangling"),
 	OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"),
+	OPT_BOOLEAN(0, "physid-mode", &report.physid_mode, "physid access profile"),
 	OPT_CALLBACK(0, "percent-limit", &report, "percent",
 		     "Don't show entries under that percent", parse_percent_limit),
 	OPT_END()
@@ -817,6 +819,22 @@ repeat:
 			sort_order = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
 	}
 
+	if (report.physid_mode) {
+		if ((sort__mode == SORT_MODE__BRANCH) ||
+		    (sort__mode == SORT_MODE__MEMORY)) {
+			pr_err("branch or memory and physid mode incompatible\n");
+			goto error;
+		}
+		sort__mode = SORT_MODE__PHYSID;
+
+		/*
+		 * if no sort_order is provided, then specify
+		 * branch-mode specific order
+		 */
+		if (sort_order == default_sort_order)
+			sort_order = "daddr,iaddr,pid,tid,major,minor,inode,inode_gen";
+	}
+
 	if (setup_sorting() < 0) {
 		parse_options_usage(report_usage, options, "s", 1);
 		goto error;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index f38590d..81f47ee 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -136,14 +136,34 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 			symlen = dso__name_len(h->mem_info->daddr.map->dso);
 			hists__new_col_len(hists, HISTC_MEM_DADDR_DSO,
 					   symlen);
+			hists__new_col_len(hists, HISTC_PHYSID_DADDR, symlen);
 		} else {
 			symlen = unresolved_col_width + 4 + 2;
 			hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
+			hists__set_unres_dso_col_len(hists, HISTC_PHYSID_DADDR);
+		}
+
+		if (h->mem_info->iaddr.sym) {
+			symlen = (int)h->mem_info->iaddr.sym->namelen + 4
+			       + unresolved_col_width + 2;
+			hists__new_col_len(hists, HISTC_PHYSID_IADDR, symlen);
+		} else {
+			symlen = unresolved_col_width + 4 + 2;
+			hists__new_col_len(hists, HISTC_PHYSID_IADDR, symlen);
+		}
+		if (h->mem_info->iaddr.map) {
+			symlen = dso__name_len(h->mem_info->iaddr.map->dso);
+			hists__new_col_len(hists, HISTC_PHYSID_IADDR, symlen);
+		} else {
+			symlen = unresolved_col_width + 4 + 2;
+			hists__set_unres_dso_col_len(hists, HISTC_PHYSID_IADDR);
 		}
 	} else {
 		symlen = unresolved_col_width + 4 + 2;
 		hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen);
 		hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
+		hists__set_unres_dso_col_len(hists, HISTC_PHYSID_DADDR);
+		hists__set_unres_dso_col_len(hists, HISTC_PHYSID_IADDR);
 	}
 
 	hists__new_col_len(hists, HISTC_MEM_LOCKED, 6);
@@ -413,9 +433,10 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
 			.map	= al->map,
 			.sym	= al->sym,
 		},
-		.cpu	= al->cpu,
-		.ip	= al->addr,
-		.level	= al->level,
+		.cpu	 = al->cpu,
+		.cpumode = al->cpumode,
+		.ip	 = al->addr,
+		.level	 = al->level,
 		.stat = {
 			.nr_events = 1,
 			.period	= period,
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 1f1f513..664d83f 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -71,6 +71,14 @@ enum hist_column {
 	HISTC_MEM_LVL,
 	HISTC_MEM_SNOOP,
 	HISTC_TRANSACTION,
+	HISTC_PHYSID_DADDR,
+	HISTC_PHYSID_IADDR,
+	HISTC_PHYSID_PID,
+	HISTC_PHYSID_TID,
+	HISTC_PHYSID_MAJOR,
+	HISTC_PHYSID_MINOR,
+	HISTC_PHYSID_INODE,
+	HISTC_PHYSID_INODE_GEN,
 	HISTC_NR_COLS, /* Last entry */
 };
 
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 635cd8f..e016fc1 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -977,6 +977,269 @@ struct sort_entry sort_transaction = {
 	.se_width_idx	= HISTC_TRANSACTION,
 };
 
+static int64_t
+sort__physid_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	u64 l, r;
+	struct map *l_map = left->mem_info->daddr.map;
+	struct map *r_map = right->mem_info->daddr.map;
+
+	/* store all NULL mem maps at the bottom */
+	/* shouldn't even need this check, should have stubs */
+	if (!left->mem_info->daddr.map || !right->mem_info->daddr.map)
+		return 1;
+
+	/* group event types together */
+	if (left->cpumode > right->cpumode) return -1;
+	if (left->cpumode < right->cpumode) return 1;
+
+	/*
+	 * Addresses with no major/minor numbers are assumed to be
+	 * anonymous in userspace.  Sort those on pid then address.
+	 *
+	 * The kernel and non-zero major/minor mapped areas are
+	 * assumed to be unity mapped.  Sort those on address then pid.
+	 */
+
+	if (l_map->maj || l_map->min || l_map->ino || l_map->ino_generation) {
+		/* mmapped areas */
+
+		if (l_map->maj > r_map->maj) return -1;
+		if (l_map->maj < r_map->maj) return 1;
+
+		if (l_map->min > r_map->min) return -1;
+		if (l_map->min < r_map->min) return 1;
+
+		if (l_map->ino > r_map->ino) return -1;
+		if (l_map->ino < r_map->ino) return 1;
+
+		if (l_map->ino_generation > r_map->ino_generation) return -1;
+		if (l_map->ino_generation < r_map->ino_generation) return 1;
+
+	} else if (left->cpumode != PERF_RECORD_MISC_KERNEL) {
+		/* userspace anonymous */
+		if (left->thread->pid_ > right->thread->pid_) return -1;
+		if (left->thread->pid_ < right->thread->pid_) return 1;
+	}
+
+	/* hack to mark similar regions, 'right' is new entry */
+	right->color = TRUE;
+
+	/* al_addr does all the right addr - start + offset calculations */
+	l = left->mem_info->daddr.al_addr;
+	r = right->mem_info->daddr.al_addr;
+
+	if (l > r) return -1;
+	if (l < r) return 1;
+
+	/* sanity check the maps; only mmaped areas should have different maps */
+	if ((left->mem_info->daddr.map != right->mem_info->daddr.map) &&
+	     !right->mem_info->daddr.map->maj && !right->mem_info->daddr.map->min)
+		pr_debug("physid_cmp: Similar entries have different maps\n");
+
+	return 0;
+}
+
+static int hist_entry__physid_daddr_snprintf(struct hist_entry *he, char *bf,
+					    size_t size, unsigned int width)
+{
+	uint64_t addr = 0;
+	struct map *map = NULL;
+	struct symbol *sym = NULL;
+	char level = he->level;
+
+	if (he->mem_info) {
+		addr = he->mem_info->daddr.addr;
+		map = he->mem_info->daddr.map;
+		sym = he->mem_info->daddr.sym;
+
+		/* print [s] for data mmaps */
+		if ((he->cpumode != PERF_RECORD_MISC_KERNEL) &&
+		     map && (map->type == MAP__VARIABLE) &&
+		    (map->maj || map->min || map->ino ||
+		     map->ino_generation))
+			level = 's';
+	}
+
+	return _hist_entry__sym_snprintf(map, sym, addr, level, bf, size,
+					 width);
+}
+
+struct sort_entry sort_physid_daddr = {
+	.se_header	= "Data Address",
+	.se_cmp		= sort__physid_daddr_cmp,
+	.se_snprintf	= hist_entry__physid_daddr_snprintf,
+	.se_width_idx	= HISTC_PHYSID_DADDR,
+};
+
+static int64_t
+sort__physid_iaddr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	u64 l = left->mem_info->iaddr.al_addr;
+	u64 r = right->mem_info->iaddr.al_addr;
+
+	return r - l;
+}
+
+static int hist_entry__physid_iaddr_snprintf(struct hist_entry *he, char *bf,
+					    size_t size, unsigned int width)
+{
+	uint64_t addr = 0;
+	struct map *map = NULL;
+	struct symbol *sym = NULL;
+	char level = he->level;
+
+	if (he->mem_info) {
+		addr = he->mem_info->iaddr.addr;
+		map = he->mem_info->iaddr.map;
+		sym = he->mem_info->iaddr.sym;
+	}
+
+	return _hist_entry__sym_snprintf(map, sym, addr, level, bf, size,
+					 width);
+}
+
+struct sort_entry sort_physid_iaddr = {
+	.se_header	= "Source Address",
+	.se_cmp		= sort__physid_iaddr_cmp,
+	.se_snprintf	= hist_entry__physid_iaddr_snprintf,
+	.se_width_idx	= HISTC_PHYSID_IADDR,
+};
+
+static int64_t
+sort__physid_pid_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	pid_t l = left->thread->pid_;
+	pid_t r = right->thread->pid_;
+
+	return r - l;
+}
+
+static int hist_entry__physid_pid_snprintf(struct hist_entry *he, char *bf,
+					    size_t size, unsigned int width)
+{
+	const char *comm = thread__comm_str(he->thread);
+	return repsep_snprintf(bf, size, "%*s:%5d", width - 6,
+			       comm ?: "", he->thread->pid_);
+}
+
+struct sort_entry sort_physid_pid = {
+	.se_header	= "Command:  Pid",
+	.se_cmp		= sort__physid_pid_cmp,
+	.se_snprintf	= hist_entry__physid_pid_snprintf,
+	.se_width_idx	= HISTC_PHYSID_PID,
+};
+
+static int64_t
+sort__physid_tid_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	pid_t l = left->thread->tid;
+	pid_t r = right->thread->tid;
+
+	return r - l;
+}
+
+static int hist_entry__physid_tid_snprintf(struct hist_entry *he, char *bf,
+					    size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%*d", width, he->thread->tid);
+}
+
+struct sort_entry sort_physid_tid = {
+	.se_header	= "Tid  ",
+	.se_cmp		= sort__physid_tid_cmp,
+	.se_snprintf	= hist_entry__physid_tid_snprintf,
+	.se_width_idx	= HISTC_PHYSID_TID,
+};
+
+static int64_t
+sort__physid_major_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct map *l = left->mem_info->daddr.map;
+	struct map *r = right->mem_info->daddr.map;
+
+	return r->maj - l->maj;
+}
+
+static int hist_entry__physid_major_snprintf(struct hist_entry *he, char *bf,
+					    size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%*x", width, he->mem_info->daddr.map->maj);
+}
+
+struct sort_entry sort_physid_major = {
+	.se_header	= "Major",
+	.se_cmp		= sort__physid_major_cmp,
+	.se_snprintf	= hist_entry__physid_major_snprintf,
+	.se_width_idx	= HISTC_PHYSID_MAJOR,
+};
+
+static int64_t
+sort__physid_minor_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct map *l = left->mem_info->daddr.map;
+	struct map *r = right->mem_info->daddr.map;
+
+	return r->min - l->min;
+}
+
+static int hist_entry__physid_minor_snprintf(struct hist_entry *he, char *bf,
+					    size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%*x", width, he->mem_info->daddr.map->min);
+}
+
+struct sort_entry sort_physid_minor = {
+	.se_header	= "Minor",
+	.se_cmp		= sort__physid_minor_cmp,
+	.se_snprintf	= hist_entry__physid_minor_snprintf,
+	.se_width_idx	= HISTC_PHYSID_MINOR,
+};
+
+static int64_t
+sort__physid_inode_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct map *l = left->mem_info->daddr.map;
+	struct map *r = right->mem_info->daddr.map;
+
+	return r->ino - l->ino;
+}
+
+static int hist_entry__physid_inode_snprintf(struct hist_entry *he, char *bf,
+					    size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%*x", width, he->mem_info->daddr.map->ino);
+}
+
+struct sort_entry sort_physid_inode = {
+	.se_header	= "Inode    ",
+	.se_cmp		= sort__physid_inode_cmp,
+	.se_snprintf	= hist_entry__physid_inode_snprintf,
+	.se_width_idx	= HISTC_PHYSID_INODE,
+};
+
+static int64_t
+sort__physid_inode_gen_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct map *l = left->mem_info->daddr.map;
+	struct map *r = right->mem_info->daddr.map;
+
+	return r->ino_generation - l->ino_generation;
+}
+
+static int hist_entry__physid_inode_gen_snprintf(struct hist_entry *he, char *bf,
+					    size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%-*x", width, he->mem_info->daddr.map->ino_generation);
+}
+
+struct sort_entry sort_physid_inode_gen = {
+	.se_header	= "Inode Gen",
+	.se_cmp		= sort__physid_inode_gen_cmp,
+	.se_snprintf	= hist_entry__physid_inode_gen_snprintf,
+	.se_width_idx	= HISTC_PHYSID_INODE_GEN,
+};
+
 struct sort_dimension {
 	const char		*name;
 	struct sort_entry	*entry;
@@ -1027,6 +1290,21 @@ static struct sort_dimension memory_sort_dimensions[] = {
 
 #undef DIM
 
+#define DIM(d, n, func) [d - __SORT_PHYSID_MODE] = { .name = n, .entry = &(func) }
+
+static struct sort_dimension physid_sort_dimensions[] = {
+	DIM(SORT_PHYSID_DADDR, "daddr", sort_physid_daddr),
+	DIM(SORT_PHYSID_IADDR, "iaddr", sort_physid_iaddr),
+	DIM(SORT_PHYSID_PID, "pid", sort_physid_pid),
+	DIM(SORT_PHYSID_TID, "tid", sort_physid_tid),
+	DIM(SORT_PHYSID_MAJOR, "major", sort_physid_major),
+	DIM(SORT_PHYSID_MINOR, "minor", sort_physid_minor),
+	DIM(SORT_PHYSID_INODE, "inode", sort_physid_inode),
+	DIM(SORT_PHYSID_INODE_GEN, "inode_gen", sort_physid_inode_gen),
+};
+
+#undef DIM
+
 static void __sort_dimension__add(struct sort_dimension *sd, enum sort_type idx)
 {
 	if (sd->taken)
@@ -1104,6 +1382,22 @@ int sort_dimension__add(const char *tok)
 		return 0;
 	}
 
+	for (i = 0; i < ARRAY_SIZE(physid_sort_dimensions); i++) {
+		struct sort_dimension *sd = &physid_sort_dimensions[i];
+
+		if (strncasecmp(tok, sd->name, strlen(tok)))
+			continue;
+
+		if (sort__mode != SORT_MODE__PHYSID)
+			return -EINVAL;
+
+		if (sd->entry == &sort_physid_daddr)
+			sort__has_sym = 1;
+
+		__sort_dimension__add(sd, i + __SORT_PHYSID_MODE);
+		return 0;
+	}
+
 	return -ESRCH;
 }
 
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 43e5ff4..b1f52a8 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -87,11 +87,13 @@ struct hist_entry {
 	u64			ip;
 	u64			transaction;
 	s32			cpu;
+	u8			cpumode;
 
 	struct hist_entry_diff	diff;
 
 	/* We are added by hists__add_dummy_entry. */
 	bool			dummy;
+	bool			color;
 
 	/* XXX These two should move to some tree widget lib */
 	u16			row_offset;
@@ -133,6 +135,7 @@ enum sort_mode {
 	SORT_MODE__NORMAL,
 	SORT_MODE__BRANCH,
 	SORT_MODE__MEMORY,
+	SORT_MODE__PHYSID,
 };
 
 enum sort_type {
@@ -166,6 +169,16 @@ enum sort_type {
 	SORT_MEM_TLB,
 	SORT_MEM_LVL,
 	SORT_MEM_SNOOP,
+
+	__SORT_PHYSID_MODE,
+	SORT_PHYSID_DADDR = __SORT_PHYSID_MODE,
+	SORT_PHYSID_IADDR,
+	SORT_PHYSID_PID,
+	SORT_PHYSID_TID,
+	SORT_PHYSID_MAJOR,
+	SORT_PHYSID_MINOR,
+	SORT_PHYSID_INODE,
+	SORT_PHYSID_INODE_GEN,
 };
 
 /*
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 5/6] perf: Update sort to handle MAP_SHARED bits
  2014-03-24 19:34 [PATCH 0/6] perf, events: Enable mmap2 support Don Zickus
                   ` (3 preceding siblings ...)
  2014-03-24 19:34 ` [PATCH 4/6] perf, sort: Add physid sorting based on mmap2 data Don Zickus
@ 2014-03-24 19:34 ` Don Zickus
  2014-03-24 19:34 ` [PATCH 6/6] perf, sort: Allow unique sorting instead of combining hist_entries Don Zickus
  5 siblings, 0 replies; 25+ messages in thread
From: Don Zickus @ 2014-03-24 19:34 UTC (permalink / raw)
  To: acme, peterz; +Cc: LKML, jolsa, jmario, fowles, eranian, andi.kleen, Don Zickus

Remove some of the false positives when sorting by utilizing the MAP_SHARED bit.

This helps deal with the COW cases where a virtual address is modified but is
mapped to a read-only shared library area because the perf tool doesn't understand
COW faults.

Using the MAP_SHARED bit tells the tool in this example the memory is private
and will not cause contention with other processes accessing the same shared
library area.

Signed-off-by: Don Zickus <dzickus@redhat.com>
---
 tools/perf/util/sort.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index e016fc1..be675f4 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1,3 +1,4 @@
+#include <sys/mman.h>
 #include "sort.h"
 #include "hist.h"
 #include "comm.h"
@@ -1001,7 +1002,8 @@ sort__physid_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
 	 * assumed to be unity mapped.  Sort those on address then pid.
 	 */
 
-	if (l_map->maj || l_map->min || l_map->ino || l_map->ino_generation) {
+	if ((l_map->flags & MAP_SHARED) &&
+	   (l_map->maj || l_map->min || l_map->ino || l_map->ino_generation)) {
 		/* mmapped areas */
 
 		if (l_map->maj > r_map->maj) return -1;
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 6/6] perf, sort:  Allow unique sorting instead of combining hist_entries
  2014-03-24 19:34 [PATCH 0/6] perf, events: Enable mmap2 support Don Zickus
                   ` (4 preceding siblings ...)
  2014-03-24 19:34 ` [PATCH 5/6] perf: Update sort to handle MAP_SHARED bits Don Zickus
@ 2014-03-24 19:34 ` Don Zickus
  2014-04-09  5:31   ` Namhyung Kim
  5 siblings, 1 reply; 25+ messages in thread
From: Don Zickus @ 2014-03-24 19:34 UTC (permalink / raw)
  To: acme, peterz; +Cc: LKML, jolsa, jmario, fowles, eranian, andi.kleen, Don Zickus

The cache contention tools needs to keep all the perf records unique in order
to properly parse all the data.  Currently add_hist_entry() will combine
the duplicate record and add the weight/period to the existing record.

This throws away the unique data the cache contention tool needs (mainly
the data source).  Create a flag to force the records to stay unique.

Signed-off-by: Don Zickus <dzickus@redhat.com>
---
 tools/perf/util/hist.c | 3 ++-
 tools/perf/util/sort.c | 1 +
 tools/perf/util/sort.h | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 81f47ee..69d2cb9 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -378,7 +378,8 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
 		 */
 		cmp = hist_entry__cmp(he, entry);
 
-		if (!cmp) {
+		if (!cmp && !sort__wants_unique) {
+
 			he_stat__add_period(&he->stat, period, weight);
 
 			/*
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index be675f4..ca86b23 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -15,6 +15,7 @@ int		sort__need_collapse = 0;
 int		sort__has_parent = 0;
 int		sort__has_sym = 0;
 int		sort__has_dso = 0;
+int		sort__wants_unique = 0;
 enum sort_mode	sort__mode = SORT_MODE__NORMAL;
 
 enum sort_type	sort__first_dimension;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index b1f52a8..b005445 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -33,6 +33,7 @@ extern int have_ignore_callees;
 extern int sort__need_collapse;
 extern int sort__has_parent;
 extern int sort__has_sym;
+extern int sort__wants_unique;
 extern enum sort_mode sort__mode;
 extern struct sort_entry sort_comm;
 extern struct sort_entry sort_dso;
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/6] perf, sort:  Add physid sorting based on mmap2 data
  2014-03-24 19:34 ` [PATCH 4/6] perf, sort: Add physid sorting based on mmap2 data Don Zickus
@ 2014-03-24 19:54   ` Andi Kleen
  2014-03-24 20:17     ` Don Zickus
  2014-03-24 20:54   ` [PATCH 01/15 V3] perf: Fix stddev calculation Don Zickus
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 25+ messages in thread
From: Andi Kleen @ 2014-03-24 19:54 UTC (permalink / raw)
  To: Don Zickus; +Cc: acme, peterz, LKML, jolsa, jmario, fowles, eranian, andi.kleen

Don Zickus <dzickus@redhat.com> writes:

> In order for the c2c tool to work correctly, it needs to properly
> sort all the records on uniquely identifiable data addresses.  These
> unique addresses are converted from virtual addresses provided by the
> hardware into a kernel address using an mmap2 record as the decoder.

No documentation for the new option?

Probably the new mode should be also supported by --sort

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/6] perf, sort:  Add physid sorting based on mmap2 data
  2014-03-24 19:54   ` Andi Kleen
@ 2014-03-24 20:17     ` Don Zickus
  2014-03-24 20:20       ` Andi Kleen
  0 siblings, 1 reply; 25+ messages in thread
From: Don Zickus @ 2014-03-24 20:17 UTC (permalink / raw)
  To: Andi Kleen; +Cc: acme, peterz, LKML, jolsa, jmario, fowles, eranian, andi.kleen

On Mon, Mar 24, 2014 at 12:54:31PM -0700, Andi Kleen wrote:
> Don Zickus <dzickus@redhat.com> writes:
> 
> > In order for the c2c tool to work correctly, it needs to properly
> > sort all the records on uniquely identifiable data addresses.  These
> > unique addresses are converted from virtual addresses provided by the
> > hardware into a kernel address using an mmap2 record as the decoder.
> 
> No documentation for the new option?
> 
> Probably the new mode should be also supported by --sort

I hid the new option further in the changelog, so it isn't obvious.  Sorry
about that.

Sample output: (perf report --stdio --physid-mode)

So the option was '--physid-mode' and if you don't pass in a '--sort' then
it takes the default sort of

'daddr,iaddr,pid,tid,major,minor,inode,inode_gen'

Otherwise you could pass in a combination of the other fields.

The output is not the best way to use the mmap2 data as it just gives you
hottest data addresses.  Our c2c tool really takes the data addresses and
combines them into a cacheline and then processes the cacheline for
interesting bottlenecks (HITMs in our case).

I don't know a good way to present the data and yet still have the sort
useful for our c2c tool.  So I threw this interface together.  I am open
to suggestions.

Cheers,
Don

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/6] perf, sort:  Add physid sorting based on mmap2 data
  2014-03-24 20:17     ` Don Zickus
@ 2014-03-24 20:20       ` Andi Kleen
  2014-03-24 20:26         ` Don Zickus
  0 siblings, 1 reply; 25+ messages in thread
From: Andi Kleen @ 2014-03-24 20:20 UTC (permalink / raw)
  To: Don Zickus
  Cc: Andi Kleen, acme, peterz, LKML, jolsa, jmario, fowles, eranian,
	andi.kleen

On Mon, Mar 24, 2014 at 04:17:57PM -0400, Don Zickus wrote:
> On Mon, Mar 24, 2014 at 12:54:31PM -0700, Andi Kleen wrote:
> > Don Zickus <dzickus@redhat.com> writes:
> > 
> > > In order for the c2c tool to work correctly, it needs to properly
> > > sort all the records on uniquely identifiable data addresses.  These
> > > unique addresses are converted from virtual addresses provided by the
> > > hardware into a kernel address using an mmap2 record as the decoder.
> > 
> > No documentation for the new option?
> > 
> > Probably the new mode should be also supported by --sort
> 
> I hid the new option further in the changelog, so it isn't obvious.  Sorry
> about that.

I meant there's no manpage change

-Andi

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/6] perf, sort:  Add physid sorting based on mmap2 data
  2014-03-24 20:20       ` Andi Kleen
@ 2014-03-24 20:26         ` Don Zickus
  0 siblings, 0 replies; 25+ messages in thread
From: Don Zickus @ 2014-03-24 20:26 UTC (permalink / raw)
  To: Andi Kleen; +Cc: acme, peterz, LKML, jolsa, jmario, fowles, eranian, andi.kleen

On Mon, Mar 24, 2014 at 09:20:45PM +0100, Andi Kleen wrote:
> On Mon, Mar 24, 2014 at 04:17:57PM -0400, Don Zickus wrote:
> > On Mon, Mar 24, 2014 at 12:54:31PM -0700, Andi Kleen wrote:
> > > Don Zickus <dzickus@redhat.com> writes:
> > > 
> > > > In order for the c2c tool to work correctly, it needs to properly
> > > > sort all the records on uniquely identifiable data addresses.  These
> > > > unique addresses are converted from virtual addresses provided by the
> > > > hardware into a kernel address using an mmap2 record as the decoder.
> > > 
> > > No documentation for the new option?
> > > 
> > > Probably the new mode should be also supported by --sort
> > 
> > I hid the new option further in the changelog, so it isn't obvious.  Sorry
> > about that.
> 
> I meant there's no manpage change

Ah, right.  I missed that.  Thanks for noticing.  Let me respin that.

Cheers,
Don

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 01/15 V3] perf: Fix stddev calculation
  2014-03-24 19:34 ` [PATCH 4/6] perf, sort: Add physid sorting based on mmap2 data Don Zickus
  2014-03-24 19:54   ` Andi Kleen
@ 2014-03-24 20:54   ` Don Zickus
  2014-03-24 20:57     ` Don Zickus
  2014-03-24 20:57   ` [PATCH 4/6 V2] perf, sort: Add physid sorting based on mmap2 data Don Zickus
  2014-04-09  3:06   ` [PATCH 4/6] " Don Zickus
  3 siblings, 1 reply; 25+ messages in thread
From: Don Zickus @ 2014-03-24 20:54 UTC (permalink / raw)
  To: acme; +Cc: LKML, jolsa, jmario, fowles, peterz, eranian, andi.kleen, Don Zickus

The stddev calculation written matched standard error.  As a result when
using this result to find the relative stddev between runs, it was not
accurate.

Update the formula to match traditional stddev.  Then rename the old
stddev calculation to stderr_stats in case someone wants to use it.

Signed-off-by: Don Zickus <dzickus@redhat.com>
---
 tools/perf/util/stat.c | 13 +++++++++++++
 tools/perf/util/stat.h |  1 +
 2 files changed, 14 insertions(+)

diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 6506b3d..0cb4dbc 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -33,6 +33,7 @@ double avg_stats(struct stats *stats)
  * http://en.wikipedia.org/wiki/Stddev
  *
  * The std dev of the mean is related to the std dev by:
+ * (also known as standard error)
  *
  *             s
  * s_mean = -------
@@ -41,6 +42,18 @@ double avg_stats(struct stats *stats)
  */
 double stddev_stats(struct stats *stats)
 {
+	double variance;
+
+	if (stats->n < 2)
+		return 0.0;
+
+	variance = stats->M2 / (stats->n - 1);
+
+	return sqrt(variance);
+}
+
+double stderr_stats(struct stats *stats)
+{
 	double variance, variance_mean;
 
 	if (stats->n < 2)
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index ae8ccd7..6f61615 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -12,6 +12,7 @@ struct stats
 void update_stats(struct stats *stats, u64 val);
 double avg_stats(struct stats *stats);
 double stddev_stats(struct stats *stats);
+double stderr_stats(struct stats *stats);
 double rel_stddev_stats(double stddev, double avg);
 
 static inline void init_stats(struct stats *stats)
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH 01/15 V3] perf: Fix stddev calculation
  2014-03-24 20:54   ` [PATCH 01/15 V3] perf: Fix stddev calculation Don Zickus
@ 2014-03-24 20:57     ` Don Zickus
  0 siblings, 0 replies; 25+ messages in thread
From: Don Zickus @ 2014-03-24 20:57 UTC (permalink / raw)
  To: acme; +Cc: LKML, jolsa, jmario, fowles, peterz, eranian, andi.kleen

On Mon, Mar 24, 2014 at 04:54:38PM -0400, Don Zickus wrote:
> The stddev calculation written matched standard error.  As a result when
> using this result to find the relative stddev between runs, it was not
> accurate.
> 

This isn't the patch I that had my updates...  Sorry for the noise.

Cheers,
Don

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 4/6 V2] perf, sort:  Add physid sorting based on mmap2 data
  2014-03-24 19:34 ` [PATCH 4/6] perf, sort: Add physid sorting based on mmap2 data Don Zickus
  2014-03-24 19:54   ` Andi Kleen
  2014-03-24 20:54   ` [PATCH 01/15 V3] perf: Fix stddev calculation Don Zickus
@ 2014-03-24 20:57   ` Don Zickus
  2014-03-29 17:11     ` Jiri Olsa
  2014-04-09  5:21     ` Namhyung Kim
  2014-04-09  3:06   ` [PATCH 4/6] " Don Zickus
  3 siblings, 2 replies; 25+ messages in thread
From: Don Zickus @ 2014-03-24 20:57 UTC (permalink / raw)
  To: acme; +Cc: LKML, jolsa, jmario, fowles, peterz, eranian, andi.kleen, Don Zickus

In order for the c2c tool to work correctly, it needs to properly
sort all the records on uniquely identifiable data addresses.  These
unique addresses are converted from virtual addresses provided by the
hardware into a kernel address using an mmap2 record as the decoder.

Once a unique address is converted, we can sort on them based on
various rules.  Then it becomes clear which address are overlapping
with each other across mmap regions or pid spaces.

This patch just creates the rules and inserts the records into a
sort entry for safe keeping until later patches process them.

The general sorting rule is:

o group cpumodes together
o if (nonzero major/minor number - ie mmap'd areas)
  o sort on major, minor, inode, inode generation numbers
o else if cpumode is not kernel
  o sort on pid
o sort on data addresses

I also hacked in the concept of 'color'.  The purpose of that bit is to
provides hints later when processing these records that indicate a new unique
address has been encountered.  Because later processing only checks the data
addresses, there can be a theoretical scenario that similar sequential data
addresses (when walking the rbtree) could be misinterpreted as overlapping
when in fact they are not.

Sample output: (perf report --stdio --physid-mode)

  Overhead            Data Address            Source Address    Command:  Pid    Tid Major  Minor  Inode  Inode Gen
  ........  ......................  ........................ ................. ..... .....  ..... ....... .........
    18.93%  [k] 0xffffc900139c40b0  [k] igb_update_stats     kworker/0:1:  257   257     0      0       0         0
     7.63%  [k] 0xffff88082e6cf0a8  [k] watchdog_timer_fn        swapper:    0     0     0      0       0         0
     1.86%  [k] 0xffff88042ef94700  [k] _raw_spin_lock           swapper:    0     0     0      0       0         0
     1.77%  [k] 0xffff8804278afa50  [k] __switch_to              swapper:    0     0     0      0       0         0

V4: add manpage entry in perf-report

V3: split out the sorting into unique entries.  This makes it look
      far less ugly
    create a new 'physid mode' to group all the sorting rules together
      (mimics the mem-mode)

Signed-off-by: Don Zickus <dzickus@redhat.com>
---
 tools/perf/Documentation/perf-report.txt |  23 +++
 tools/perf/builtin-report.c              |  20 ++-
 tools/perf/util/hist.c                   |  27 ++-
 tools/perf/util/hist.h                   |   8 +
 tools/perf/util/sort.c                   | 294 +++++++++++++++++++++++++++++++
 tools/perf/util/sort.h                   |  13 ++
 6 files changed, 381 insertions(+), 4 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 8eab8a4..01391b0 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -95,6 +95,23 @@ OPTIONS
 	And default sort keys are changed to comm, dso_from, symbol_from, dso_to
 	and symbol_to, see '--branch-stack'.
 
+	If --physid-mode option is used, following sort keys are also
+	available:
+	daddr, iaddr, pid, tid, major, minor, inode, inode_gen.
+
+	- daddr: data address (sorted based on major, minor, inode and inode
+		generation numbers if shared, otherwise pid)
+	- iaddr: instruction address
+	- pid: command and pid of the task
+	- tid: tid of the task
+	- major: major number of mapped location (0 if not mapped)
+	- minor: minor number of mapped location (0 if not mapped)
+	- inode: inode number of mapped location (0 if not mapped)
+	- inode_gen: inode generation number of mapped location (0 if not mapped)
+
+	And default sort keys are changed to daddr, iaddr, pid, tid, major,
+	minor, inode and inode_gen, see '--physid-mode'.
+
 -p::
 --parent=<regex>::
         A regex filter to identify parent. The parent is a caller of this
@@ -223,6 +240,12 @@ OPTIONS
 	branch stacks and it will automatically switch to the branch view mode,
 	unless --no-branch-stack is used.
 
+--physid-mode::
+	Use the data addresses sampled using perf record -d and combine them
+	with the mmap'd area region where they are located.  This helps identify
+	which data addresses collide with similar addresses in another process
+	space.  See --sort for output choices.
+
 --objdump=<path>::
         Path to objdump binary.
 
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index c87412b..093f5ad 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -49,6 +49,7 @@ struct report {
 	bool			show_threads;
 	bool			inverted_callchain;
 	bool			mem_mode;
+	bool			physid_mode;
 	bool			header;
 	bool			header_only;
 	int			max_stack;
@@ -241,7 +242,7 @@ static int process_sample_event(struct perf_tool *tool,
 		ret = report__add_branch_hist_entry(rep, &al, sample, evsel);
 		if (ret < 0)
 			pr_debug("problem adding lbr entry, skipping event\n");
-	} else if (rep->mem_mode == 1) {
+	} else if ((rep->mem_mode == 1) || (rep->physid_mode)) {
 		ret = report__add_mem_hist_entry(rep, &al, sample, evsel);
 		if (ret < 0)
 			pr_debug("problem adding mem entry, skipping event\n");
@@ -746,6 +747,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
 		    "Disable symbol demangling"),
 	OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"),
+	OPT_BOOLEAN(0, "physid-mode", &report.physid_mode, "physid access profile"),
 	OPT_CALLBACK(0, "percent-limit", &report, "percent",
 		     "Don't show entries under that percent", parse_percent_limit),
 	OPT_END()
@@ -817,6 +819,22 @@ repeat:
 			sort_order = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
 	}
 
+	if (report.physid_mode) {
+		if ((sort__mode == SORT_MODE__BRANCH) ||
+		    (sort__mode == SORT_MODE__MEMORY)) {
+			pr_err("branch or memory and physid mode incompatible\n");
+			goto error;
+		}
+		sort__mode = SORT_MODE__PHYSID;
+
+		/*
+		 * if no sort_order is provided, then specify
+		 * branch-mode specific order
+		 */
+		if (sort_order == default_sort_order)
+			sort_order = "daddr,iaddr,pid,tid,major,minor,inode,inode_gen";
+	}
+
 	if (setup_sorting() < 0) {
 		parse_options_usage(report_usage, options, "s", 1);
 		goto error;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index f38590d..81f47ee 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -136,14 +136,34 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 			symlen = dso__name_len(h->mem_info->daddr.map->dso);
 			hists__new_col_len(hists, HISTC_MEM_DADDR_DSO,
 					   symlen);
+			hists__new_col_len(hists, HISTC_PHYSID_DADDR, symlen);
 		} else {
 			symlen = unresolved_col_width + 4 + 2;
 			hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
+			hists__set_unres_dso_col_len(hists, HISTC_PHYSID_DADDR);
+		}
+
+		if (h->mem_info->iaddr.sym) {
+			symlen = (int)h->mem_info->iaddr.sym->namelen + 4
+			       + unresolved_col_width + 2;
+			hists__new_col_len(hists, HISTC_PHYSID_IADDR, symlen);
+		} else {
+			symlen = unresolved_col_width + 4 + 2;
+			hists__new_col_len(hists, HISTC_PHYSID_IADDR, symlen);
+		}
+		if (h->mem_info->iaddr.map) {
+			symlen = dso__name_len(h->mem_info->iaddr.map->dso);
+			hists__new_col_len(hists, HISTC_PHYSID_IADDR, symlen);
+		} else {
+			symlen = unresolved_col_width + 4 + 2;
+			hists__set_unres_dso_col_len(hists, HISTC_PHYSID_IADDR);
 		}
 	} else {
 		symlen = unresolved_col_width + 4 + 2;
 		hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen);
 		hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
+		hists__set_unres_dso_col_len(hists, HISTC_PHYSID_DADDR);
+		hists__set_unres_dso_col_len(hists, HISTC_PHYSID_IADDR);
 	}
 
 	hists__new_col_len(hists, HISTC_MEM_LOCKED, 6);
@@ -413,9 +433,10 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
 			.map	= al->map,
 			.sym	= al->sym,
 		},
-		.cpu	= al->cpu,
-		.ip	= al->addr,
-		.level	= al->level,
+		.cpu	 = al->cpu,
+		.cpumode = al->cpumode,
+		.ip	 = al->addr,
+		.level	 = al->level,
 		.stat = {
 			.nr_events = 1,
 			.period	= period,
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 1f1f513..664d83f 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -71,6 +71,14 @@ enum hist_column {
 	HISTC_MEM_LVL,
 	HISTC_MEM_SNOOP,
 	HISTC_TRANSACTION,
+	HISTC_PHYSID_DADDR,
+	HISTC_PHYSID_IADDR,
+	HISTC_PHYSID_PID,
+	HISTC_PHYSID_TID,
+	HISTC_PHYSID_MAJOR,
+	HISTC_PHYSID_MINOR,
+	HISTC_PHYSID_INODE,
+	HISTC_PHYSID_INODE_GEN,
 	HISTC_NR_COLS, /* Last entry */
 };
 
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 635cd8f..e016fc1 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -977,6 +977,269 @@ struct sort_entry sort_transaction = {
 	.se_width_idx	= HISTC_TRANSACTION,
 };
 
+static int64_t
+sort__physid_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	u64 l, r;
+	struct map *l_map = left->mem_info->daddr.map;
+	struct map *r_map = right->mem_info->daddr.map;
+
+	/* store all NULL mem maps at the bottom */
+	/* shouldn't even need this check, should have stubs */
+	if (!left->mem_info->daddr.map || !right->mem_info->daddr.map)
+		return 1;
+
+	/* group event types together */
+	if (left->cpumode > right->cpumode) return -1;
+	if (left->cpumode < right->cpumode) return 1;
+
+	/*
+	 * Addresses with no major/minor numbers are assumed to be
+	 * anonymous in userspace.  Sort those on pid then address.
+	 *
+	 * The kernel and non-zero major/minor mapped areas are
+	 * assumed to be unity mapped.  Sort those on address then pid.
+	 */
+
+	if (l_map->maj || l_map->min || l_map->ino || l_map->ino_generation) {
+		/* mmapped areas */
+
+		if (l_map->maj > r_map->maj) return -1;
+		if (l_map->maj < r_map->maj) return 1;
+
+		if (l_map->min > r_map->min) return -1;
+		if (l_map->min < r_map->min) return 1;
+
+		if (l_map->ino > r_map->ino) return -1;
+		if (l_map->ino < r_map->ino) return 1;
+
+		if (l_map->ino_generation > r_map->ino_generation) return -1;
+		if (l_map->ino_generation < r_map->ino_generation) return 1;
+
+	} else if (left->cpumode != PERF_RECORD_MISC_KERNEL) {
+		/* userspace anonymous */
+		if (left->thread->pid_ > right->thread->pid_) return -1;
+		if (left->thread->pid_ < right->thread->pid_) return 1;
+	}
+
+	/* hack to mark similar regions, 'right' is new entry */
+	right->color = TRUE;
+
+	/* al_addr does all the right addr - start + offset calculations */
+	l = left->mem_info->daddr.al_addr;
+	r = right->mem_info->daddr.al_addr;
+
+	if (l > r) return -1;
+	if (l < r) return 1;
+
+	/* sanity check the maps; only mmaped areas should have different maps */
+	if ((left->mem_info->daddr.map != right->mem_info->daddr.map) &&
+	     !right->mem_info->daddr.map->maj && !right->mem_info->daddr.map->min)
+		pr_debug("physid_cmp: Similar entries have different maps\n");
+
+	return 0;
+}
+
+static int hist_entry__physid_daddr_snprintf(struct hist_entry *he, char *bf,
+					    size_t size, unsigned int width)
+{
+	uint64_t addr = 0;
+	struct map *map = NULL;
+	struct symbol *sym = NULL;
+	char level = he->level;
+
+	if (he->mem_info) {
+		addr = he->mem_info->daddr.addr;
+		map = he->mem_info->daddr.map;
+		sym = he->mem_info->daddr.sym;
+
+		/* print [s] for data mmaps */
+		if ((he->cpumode != PERF_RECORD_MISC_KERNEL) &&
+		     map && (map->type == MAP__VARIABLE) &&
+		    (map->maj || map->min || map->ino ||
+		     map->ino_generation))
+			level = 's';
+	}
+
+	return _hist_entry__sym_snprintf(map, sym, addr, level, bf, size,
+					 width);
+}
+
+struct sort_entry sort_physid_daddr = {
+	.se_header	= "Data Address",
+	.se_cmp		= sort__physid_daddr_cmp,
+	.se_snprintf	= hist_entry__physid_daddr_snprintf,
+	.se_width_idx	= HISTC_PHYSID_DADDR,
+};
+
+static int64_t
+sort__physid_iaddr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	u64 l = left->mem_info->iaddr.al_addr;
+	u64 r = right->mem_info->iaddr.al_addr;
+
+	return r - l;
+}
+
+static int hist_entry__physid_iaddr_snprintf(struct hist_entry *he, char *bf,
+					    size_t size, unsigned int width)
+{
+	uint64_t addr = 0;
+	struct map *map = NULL;
+	struct symbol *sym = NULL;
+	char level = he->level;
+
+	if (he->mem_info) {
+		addr = he->mem_info->iaddr.addr;
+		map = he->mem_info->iaddr.map;
+		sym = he->mem_info->iaddr.sym;
+	}
+
+	return _hist_entry__sym_snprintf(map, sym, addr, level, bf, size,
+					 width);
+}
+
+struct sort_entry sort_physid_iaddr = {
+	.se_header	= "Source Address",
+	.se_cmp		= sort__physid_iaddr_cmp,
+	.se_snprintf	= hist_entry__physid_iaddr_snprintf,
+	.se_width_idx	= HISTC_PHYSID_IADDR,
+};
+
+static int64_t
+sort__physid_pid_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	pid_t l = left->thread->pid_;
+	pid_t r = right->thread->pid_;
+
+	return r - l;
+}
+
+static int hist_entry__physid_pid_snprintf(struct hist_entry *he, char *bf,
+					    size_t size, unsigned int width)
+{
+	const char *comm = thread__comm_str(he->thread);
+	return repsep_snprintf(bf, size, "%*s:%5d", width - 6,
+			       comm ?: "", he->thread->pid_);
+}
+
+struct sort_entry sort_physid_pid = {
+	.se_header	= "Command:  Pid",
+	.se_cmp		= sort__physid_pid_cmp,
+	.se_snprintf	= hist_entry__physid_pid_snprintf,
+	.se_width_idx	= HISTC_PHYSID_PID,
+};
+
+static int64_t
+sort__physid_tid_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	pid_t l = left->thread->tid;
+	pid_t r = right->thread->tid;
+
+	return r - l;
+}
+
+static int hist_entry__physid_tid_snprintf(struct hist_entry *he, char *bf,
+					    size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%*d", width, he->thread->tid);
+}
+
+struct sort_entry sort_physid_tid = {
+	.se_header	= "Tid  ",
+	.se_cmp		= sort__physid_tid_cmp,
+	.se_snprintf	= hist_entry__physid_tid_snprintf,
+	.se_width_idx	= HISTC_PHYSID_TID,
+};
+
+static int64_t
+sort__physid_major_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct map *l = left->mem_info->daddr.map;
+	struct map *r = right->mem_info->daddr.map;
+
+	return r->maj - l->maj;
+}
+
+static int hist_entry__physid_major_snprintf(struct hist_entry *he, char *bf,
+					    size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%*x", width, he->mem_info->daddr.map->maj);
+}
+
+struct sort_entry sort_physid_major = {
+	.se_header	= "Major",
+	.se_cmp		= sort__physid_major_cmp,
+	.se_snprintf	= hist_entry__physid_major_snprintf,
+	.se_width_idx	= HISTC_PHYSID_MAJOR,
+};
+
+static int64_t
+sort__physid_minor_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct map *l = left->mem_info->daddr.map;
+	struct map *r = right->mem_info->daddr.map;
+
+	return r->min - l->min;
+}
+
+static int hist_entry__physid_minor_snprintf(struct hist_entry *he, char *bf,
+					    size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%*x", width, he->mem_info->daddr.map->min);
+}
+
+struct sort_entry sort_physid_minor = {
+	.se_header	= "Minor",
+	.se_cmp		= sort__physid_minor_cmp,
+	.se_snprintf	= hist_entry__physid_minor_snprintf,
+	.se_width_idx	= HISTC_PHYSID_MINOR,
+};
+
+static int64_t
+sort__physid_inode_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct map *l = left->mem_info->daddr.map;
+	struct map *r = right->mem_info->daddr.map;
+
+	return r->ino - l->ino;
+}
+
+static int hist_entry__physid_inode_snprintf(struct hist_entry *he, char *bf,
+					    size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%*x", width, he->mem_info->daddr.map->ino);
+}
+
+struct sort_entry sort_physid_inode = {
+	.se_header	= "Inode    ",
+	.se_cmp		= sort__physid_inode_cmp,
+	.se_snprintf	= hist_entry__physid_inode_snprintf,
+	.se_width_idx	= HISTC_PHYSID_INODE,
+};
+
+static int64_t
+sort__physid_inode_gen_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct map *l = left->mem_info->daddr.map;
+	struct map *r = right->mem_info->daddr.map;
+
+	return r->ino_generation - l->ino_generation;
+}
+
+static int hist_entry__physid_inode_gen_snprintf(struct hist_entry *he, char *bf,
+					    size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%-*x", width, he->mem_info->daddr.map->ino_generation);
+}
+
+struct sort_entry sort_physid_inode_gen = {
+	.se_header	= "Inode Gen",
+	.se_cmp		= sort__physid_inode_gen_cmp,
+	.se_snprintf	= hist_entry__physid_inode_gen_snprintf,
+	.se_width_idx	= HISTC_PHYSID_INODE_GEN,
+};
+
 struct sort_dimension {
 	const char		*name;
 	struct sort_entry	*entry;
@@ -1027,6 +1290,21 @@ static struct sort_dimension memory_sort_dimensions[] = {
 
 #undef DIM
 
+#define DIM(d, n, func) [d - __SORT_PHYSID_MODE] = { .name = n, .entry = &(func) }
+
+static struct sort_dimension physid_sort_dimensions[] = {
+	DIM(SORT_PHYSID_DADDR, "daddr", sort_physid_daddr),
+	DIM(SORT_PHYSID_IADDR, "iaddr", sort_physid_iaddr),
+	DIM(SORT_PHYSID_PID, "pid", sort_physid_pid),
+	DIM(SORT_PHYSID_TID, "tid", sort_physid_tid),
+	DIM(SORT_PHYSID_MAJOR, "major", sort_physid_major),
+	DIM(SORT_PHYSID_MINOR, "minor", sort_physid_minor),
+	DIM(SORT_PHYSID_INODE, "inode", sort_physid_inode),
+	DIM(SORT_PHYSID_INODE_GEN, "inode_gen", sort_physid_inode_gen),
+};
+
+#undef DIM
+
 static void __sort_dimension__add(struct sort_dimension *sd, enum sort_type idx)
 {
 	if (sd->taken)
@@ -1104,6 +1382,22 @@ int sort_dimension__add(const char *tok)
 		return 0;
 	}
 
+	for (i = 0; i < ARRAY_SIZE(physid_sort_dimensions); i++) {
+		struct sort_dimension *sd = &physid_sort_dimensions[i];
+
+		if (strncasecmp(tok, sd->name, strlen(tok)))
+			continue;
+
+		if (sort__mode != SORT_MODE__PHYSID)
+			return -EINVAL;
+
+		if (sd->entry == &sort_physid_daddr)
+			sort__has_sym = 1;
+
+		__sort_dimension__add(sd, i + __SORT_PHYSID_MODE);
+		return 0;
+	}
+
 	return -ESRCH;
 }
 
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 43e5ff4..b1f52a8 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -87,11 +87,13 @@ struct hist_entry {
 	u64			ip;
 	u64			transaction;
 	s32			cpu;
+	u8			cpumode;
 
 	struct hist_entry_diff	diff;
 
 	/* We are added by hists__add_dummy_entry. */
 	bool			dummy;
+	bool			color;
 
 	/* XXX These two should move to some tree widget lib */
 	u16			row_offset;
@@ -133,6 +135,7 @@ enum sort_mode {
 	SORT_MODE__NORMAL,
 	SORT_MODE__BRANCH,
 	SORT_MODE__MEMORY,
+	SORT_MODE__PHYSID,
 };
 
 enum sort_type {
@@ -166,6 +169,16 @@ enum sort_type {
 	SORT_MEM_TLB,
 	SORT_MEM_LVL,
 	SORT_MEM_SNOOP,
+
+	__SORT_PHYSID_MODE,
+	SORT_PHYSID_DADDR = __SORT_PHYSID_MODE,
+	SORT_PHYSID_IADDR,
+	SORT_PHYSID_PID,
+	SORT_PHYSID_TID,
+	SORT_PHYSID_MAJOR,
+	SORT_PHYSID_MINOR,
+	SORT_PHYSID_INODE,
+	SORT_PHYSID_INODE_GEN,
 };
 
 /*
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/6 V2] perf, sort:  Add physid sorting based on mmap2 data
  2014-03-24 20:57   ` [PATCH 4/6 V2] perf, sort: Add physid sorting based on mmap2 data Don Zickus
@ 2014-03-29 17:11     ` Jiri Olsa
  2014-04-01  2:58       ` Don Zickus
  2014-04-09  5:21     ` Namhyung Kim
  1 sibling, 1 reply; 25+ messages in thread
From: Jiri Olsa @ 2014-03-29 17:11 UTC (permalink / raw)
  To: Don Zickus; +Cc: acme, LKML, jmario, fowles, peterz, eranian, andi.kleen

On Mon, Mar 24, 2014 at 04:57:18PM -0400, Don Zickus wrote:
> In order for the c2c tool to work correctly, it needs to properly
> sort all the records on uniquely identifiable data addresses.  These
> unique addresses are converted from virtual addresses provided by the
> hardware into a kernel address using an mmap2 record as the decoder.
> 
> Once a unique address is converted, we can sort on them based on
> various rules.  Then it becomes clear which address are overlapping
> with each other across mmap regions or pid spaces.
> 
> This patch just creates the rules and inserts the records into a
> sort entry for safe keeping until later patches process them.
> 
> The general sorting rule is:

SNIP

> +
> +static int64_t
> +sort__physid_major_cmp(struct hist_entry *left, struct hist_entry *right)
> +{
> +	struct map *l = left->mem_info->daddr.map;
> +	struct map *r = right->mem_info->daddr.map;
> +
> +	return r->maj - l->maj;

I got segfault here, and consequently in all other sorting
functions, because it failed to resolve map earlier in
ip__resolve_data

we need to check it here, or before adding to the tree

thanks,
jirka

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/6 V2] perf, sort:  Add physid sorting based on mmap2 data
  2014-03-29 17:11     ` Jiri Olsa
@ 2014-04-01  2:58       ` Don Zickus
  0 siblings, 0 replies; 25+ messages in thread
From: Don Zickus @ 2014-04-01  2:58 UTC (permalink / raw)
  To: Jiri Olsa; +Cc: acme, LKML, jmario, fowles, peterz, eranian, andi.kleen

On Sat, Mar 29, 2014 at 06:11:52PM +0100, Jiri Olsa wrote:
> On Mon, Mar 24, 2014 at 04:57:18PM -0400, Don Zickus wrote:
> > In order for the c2c tool to work correctly, it needs to properly
> > sort all the records on uniquely identifiable data addresses.  These
> > unique addresses are converted from virtual addresses provided by the
> > hardware into a kernel address using an mmap2 record as the decoder.
> > 
> > Once a unique address is converted, we can sort on them based on
> > various rules.  Then it becomes clear which address are overlapping
> > with each other across mmap regions or pid spaces.
> > 
> > This patch just creates the rules and inserts the records into a
> > sort entry for safe keeping until later patches process them.
> > 
> > The general sorting rule is:
> 
> SNIP
> 
> > +
> > +static int64_t
> > +sort__physid_major_cmp(struct hist_entry *left, struct hist_entry *right)
> > +{
> > +	struct map *l = left->mem_info->daddr.map;
> > +	struct map *r = right->mem_info->daddr.map;
> > +
> > +	return r->maj - l->maj;
> 
> I got segfault here, and consequently in all other sorting
> functions, because it failed to resolve map earlier in
> ip__resolve_data
> 
> we need to check it here, or before adding to the tree

Crap.  I checked it before, when I had one big function.  I forgot to
carry that though.  Honestly I would love to block these before they made
it to the sort routine but don't know a good way without adding checks to
all the builtins.

Cheers,
Don

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/6] perf: Update mmap2 interface with protection and flag bits
  2014-03-24 19:34 ` [PATCH 2/6] perf: Update mmap2 interface with protection and flag bits Don Zickus
@ 2014-04-09  2:17   ` Namhyung Kim
  2014-04-09  2:20     ` Namhyung Kim
  0 siblings, 1 reply; 25+ messages in thread
From: Namhyung Kim @ 2014-04-09  2:17 UTC (permalink / raw)
  To: Don Zickus; +Cc: acme, peterz, LKML, jolsa, jmario, fowles, eranian, andi.kleen

On Mon, 24 Mar 2014 15:34:32 -0400, Don Zickus wrote:
> The kernel piece passes more info now.  Update the perf tool to reflect
> that and adjust the synthesized maps to play along.
>
> Signed-off-by: Don Zickus <dzickus@redhat.com>
> ---
>  tools/perf/util/event.c   | 23 +++++++++++++++++++++--
>  tools/perf/util/event.h   |  2 ++
>  tools/perf/util/machine.c |  4 +++-
>  tools/perf/util/map.c     |  4 +++-
>  tools/perf/util/map.h     |  4 +++-
>  5 files changed, 32 insertions(+), 5 deletions(-)
>
> diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
> index 9d12aa6..6b8646c 100644
> --- a/tools/perf/util/event.c
> +++ b/tools/perf/util/event.c
> @@ -1,4 +1,5 @@
>  #include <linux/types.h>
> +#include <sys/mman.h>
>  #include "event.h"
>  #include "debug.h"
>  #include "hist.h"
> @@ -212,6 +213,21 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
>  		else
>  			event->header.misc = PERF_RECORD_MISC_GUEST_USER;
>  
> +		/* map protection and flags bits */
> +		event->mmap2.prot = 0;
> +		event->mmap2.flags = 0;
> +		if (prot[0] == 'r')
> +			event->mmap2.prot |= PROT_READ;
> +		if (prot[1] == 'w')
> +			event->mmap2.prot |= PROT_WRITE;
> +		if (prot[2] == 'x')
> +			event->mmap2.prot |= PROT_EXEC;
> +
> +		if (prot[3] == 's')
> +			event->mmap2.flags |= MAP_SHARED;
> +		else
> +			event->mmap2.flags |= MAP_PRIVATE;
> +

So you need to synthesize a PERF_RECORD_MMAP2 event then.  The
mmap_event and mmap2_event shares same fields util ->pgoff only.  So
copying to mmap.filename will overwrite other bits in mmap2.

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/6] perf: Update mmap2 interface with protection and flag bits
  2014-04-09  2:17   ` Namhyung Kim
@ 2014-04-09  2:20     ` Namhyung Kim
  0 siblings, 0 replies; 25+ messages in thread
From: Namhyung Kim @ 2014-04-09  2:20 UTC (permalink / raw)
  To: Don Zickus; +Cc: acme, peterz, LKML, jolsa, jmario, fowles, eranian, andi.kleen

On Wed, 09 Apr 2014 11:17:44 +0900, Namhyung Kim wrote:
> So you need to synthesize a PERF_RECORD_MMAP2 event then.  The
> mmap_event and mmap2_event shares same fields util ->pgoff only.  So
> copying to mmap.filename will overwrite other bits in mmap2.

Oops, missed patch 3/3 already does it.  Sorry for noise.

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 3/6] Revert "perf: Disable PERF_RECORD_MMAP2 support"
  2014-03-24 19:34 ` [PATCH 3/6] Revert "perf: Disable PERF_RECORD_MMAP2 support" Don Zickus
@ 2014-04-09  2:32   ` Namhyung Kim
  0 siblings, 0 replies; 25+ messages in thread
From: Namhyung Kim @ 2014-04-09  2:32 UTC (permalink / raw)
  To: Don Zickus; +Cc: acme, peterz, LKML, jolsa, jmario, fowles, eranian, andi.kleen

On Mon, 24 Mar 2014 15:34:33 -0400, Don Zickus wrote:
> This reverts commit 3090ffb5a2515990182f3f55b0688a7817325488.

It seems if you exchange the order of patch 2 and 3 it'd be less
confusing  ;-p

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/6] perf, sort:  Add physid sorting based on mmap2 data
  2014-03-24 19:34 ` [PATCH 4/6] perf, sort: Add physid sorting based on mmap2 data Don Zickus
                     ` (2 preceding siblings ...)
  2014-03-24 20:57   ` [PATCH 4/6 V2] perf, sort: Add physid sorting based on mmap2 data Don Zickus
@ 2014-04-09  3:06   ` Don Zickus
  3 siblings, 0 replies; 25+ messages in thread
From: Don Zickus @ 2014-04-09  3:06 UTC (permalink / raw)
  To: acme, peterz; +Cc: LKML, jolsa, jmario, fowles, eranian, andi.kleen

On Mon, Mar 24, 2014 at 03:34:34PM -0400, Don Zickus wrote:
> In order for the c2c tool to work correctly, it needs to properly
> sort all the records on uniquely identifiable data addresses.  These
> unique addresses are converted from virtual addresses provided by the
> hardware into a kernel address using an mmap2 record as the decoder.
> 
> Once a unique address is converted, we can sort on them based on
> various rules.  Then it becomes clear which address are overlapping
> with each other across mmap regions or pid spaces.

I am finishing up another way to sort this data that might make more sense
then the approach in this patch.  Hopefully tomorrow I can do that.

Cheers,
Don
> 
> This patch just creates the rules and inserts the records into a
> sort entry for safe keeping until later patches process them.
> 
> The general sorting rule is:
> 
> o group cpumodes together
> o if (nonzero major/minor number - ie mmap'd areas)
>   o sort on major, minor, inode, inode generation numbers
> o else if cpumode is not kernel
>   o sort on pid
> o sort on data addresses
> 
> I also hacked in the concept of 'color'.  The purpose of that bit is to
> provides hints later when processing these records that indicate a new unique
> address has been encountered.  Because later processing only checks the data
> addresses, there can be a theoretical scenario that similar sequential data
> addresses (when walking the rbtree) could be misinterpreted as overlapping
> when in fact they are not.
> 
> Sample output: (perf report --stdio --physid-mode)
> 
>     18.93%  [k] 0xffffc900139c40b0  [k] igb_update_stats     kworker/0:1:  257   257      0      0       0  0
>      7.63%  [k] 0xffff88082e6cf0a8  [k] watchdog_timer_fn        swapper:    0     0      0      0       0  0
>      1.86%  [k] 0xffff88042ef94700  [k] _raw_spin_lock           swapper:    0     0      0      0       0  0
>      1.77%  [k] 0xffff8804278afa50  [k] __switch_to              swapper:    0     0      0      0       0  0
> 
> V3: split out the sorting into unique entries.  This makes it look
>       far less ugly
>     create a new 'physid mode' to group all the sorting rules together
>       (mimics the mem-mode)
> 
> Signed-off-by: Don Zickus <dzickus@redhat.com>
> ---
>  tools/perf/builtin-report.c |  20 ++-
>  tools/perf/util/hist.c      |  27 +++-
>  tools/perf/util/hist.h      |   8 ++
>  tools/perf/util/sort.c      | 294 ++++++++++++++++++++++++++++++++++++++++++++
>  tools/perf/util/sort.h      |  13 ++
>  5 files changed, 358 insertions(+), 4 deletions(-)
> 
> diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> index c87412b..093f5ad 100644
> --- a/tools/perf/builtin-report.c
> +++ b/tools/perf/builtin-report.c
> @@ -49,6 +49,7 @@ struct report {
>  	bool			show_threads;
>  	bool			inverted_callchain;
>  	bool			mem_mode;
> +	bool			physid_mode;
>  	bool			header;
>  	bool			header_only;
>  	int			max_stack;
> @@ -241,7 +242,7 @@ static int process_sample_event(struct perf_tool *tool,
>  		ret = report__add_branch_hist_entry(rep, &al, sample, evsel);
>  		if (ret < 0)
>  			pr_debug("problem adding lbr entry, skipping event\n");
> -	} else if (rep->mem_mode == 1) {
> +	} else if ((rep->mem_mode == 1) || (rep->physid_mode)) {
>  		ret = report__add_mem_hist_entry(rep, &al, sample, evsel);
>  		if (ret < 0)
>  			pr_debug("problem adding mem entry, skipping event\n");
> @@ -746,6 +747,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
>  	OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
>  		    "Disable symbol demangling"),
>  	OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"),
> +	OPT_BOOLEAN(0, "physid-mode", &report.physid_mode, "physid access profile"),
>  	OPT_CALLBACK(0, "percent-limit", &report, "percent",
>  		     "Don't show entries under that percent", parse_percent_limit),
>  	OPT_END()
> @@ -817,6 +819,22 @@ repeat:
>  			sort_order = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
>  	}
>  
> +	if (report.physid_mode) {
> +		if ((sort__mode == SORT_MODE__BRANCH) ||
> +		    (sort__mode == SORT_MODE__MEMORY)) {
> +			pr_err("branch or memory and physid mode incompatible\n");
> +			goto error;
> +		}
> +		sort__mode = SORT_MODE__PHYSID;
> +
> +		/*
> +		 * if no sort_order is provided, then specify
> +		 * branch-mode specific order
> +		 */
> +		if (sort_order == default_sort_order)
> +			sort_order = "daddr,iaddr,pid,tid,major,minor,inode,inode_gen";
> +	}
> +
>  	if (setup_sorting() < 0) {
>  		parse_options_usage(report_usage, options, "s", 1);
>  		goto error;
> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> index f38590d..81f47ee 100644
> --- a/tools/perf/util/hist.c
> +++ b/tools/perf/util/hist.c
> @@ -136,14 +136,34 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
>  			symlen = dso__name_len(h->mem_info->daddr.map->dso);
>  			hists__new_col_len(hists, HISTC_MEM_DADDR_DSO,
>  					   symlen);
> +			hists__new_col_len(hists, HISTC_PHYSID_DADDR, symlen);
>  		} else {
>  			symlen = unresolved_col_width + 4 + 2;
>  			hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
> +			hists__set_unres_dso_col_len(hists, HISTC_PHYSID_DADDR);
> +		}
> +
> +		if (h->mem_info->iaddr.sym) {
> +			symlen = (int)h->mem_info->iaddr.sym->namelen + 4
> +			       + unresolved_col_width + 2;
> +			hists__new_col_len(hists, HISTC_PHYSID_IADDR, symlen);
> +		} else {
> +			symlen = unresolved_col_width + 4 + 2;
> +			hists__new_col_len(hists, HISTC_PHYSID_IADDR, symlen);
> +		}
> +		if (h->mem_info->iaddr.map) {
> +			symlen = dso__name_len(h->mem_info->iaddr.map->dso);
> +			hists__new_col_len(hists, HISTC_PHYSID_IADDR, symlen);
> +		} else {
> +			symlen = unresolved_col_width + 4 + 2;
> +			hists__set_unres_dso_col_len(hists, HISTC_PHYSID_IADDR);
>  		}
>  	} else {
>  		symlen = unresolved_col_width + 4 + 2;
>  		hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen);
>  		hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
> +		hists__set_unres_dso_col_len(hists, HISTC_PHYSID_DADDR);
> +		hists__set_unres_dso_col_len(hists, HISTC_PHYSID_IADDR);
>  	}
>  
>  	hists__new_col_len(hists, HISTC_MEM_LOCKED, 6);
> @@ -413,9 +433,10 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
>  			.map	= al->map,
>  			.sym	= al->sym,
>  		},
> -		.cpu	= al->cpu,
> -		.ip	= al->addr,
> -		.level	= al->level,
> +		.cpu	 = al->cpu,
> +		.cpumode = al->cpumode,
> +		.ip	 = al->addr,
> +		.level	 = al->level,
>  		.stat = {
>  			.nr_events = 1,
>  			.period	= period,
> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
> index 1f1f513..664d83f 100644
> --- a/tools/perf/util/hist.h
> +++ b/tools/perf/util/hist.h
> @@ -71,6 +71,14 @@ enum hist_column {
>  	HISTC_MEM_LVL,
>  	HISTC_MEM_SNOOP,
>  	HISTC_TRANSACTION,
> +	HISTC_PHYSID_DADDR,
> +	HISTC_PHYSID_IADDR,
> +	HISTC_PHYSID_PID,
> +	HISTC_PHYSID_TID,
> +	HISTC_PHYSID_MAJOR,
> +	HISTC_PHYSID_MINOR,
> +	HISTC_PHYSID_INODE,
> +	HISTC_PHYSID_INODE_GEN,
>  	HISTC_NR_COLS, /* Last entry */
>  };
>  
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index 635cd8f..e016fc1 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -977,6 +977,269 @@ struct sort_entry sort_transaction = {
>  	.se_width_idx	= HISTC_TRANSACTION,
>  };
>  
> +static int64_t
> +sort__physid_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
> +{
> +	u64 l, r;
> +	struct map *l_map = left->mem_info->daddr.map;
> +	struct map *r_map = right->mem_info->daddr.map;
> +
> +	/* store all NULL mem maps at the bottom */
> +	/* shouldn't even need this check, should have stubs */
> +	if (!left->mem_info->daddr.map || !right->mem_info->daddr.map)
> +		return 1;
> +
> +	/* group event types together */
> +	if (left->cpumode > right->cpumode) return -1;
> +	if (left->cpumode < right->cpumode) return 1;
> +
> +	/*
> +	 * Addresses with no major/minor numbers are assumed to be
> +	 * anonymous in userspace.  Sort those on pid then address.
> +	 *
> +	 * The kernel and non-zero major/minor mapped areas are
> +	 * assumed to be unity mapped.  Sort those on address then pid.
> +	 */
> +
> +	if (l_map->maj || l_map->min || l_map->ino || l_map->ino_generation) {
> +		/* mmapped areas */
> +
> +		if (l_map->maj > r_map->maj) return -1;
> +		if (l_map->maj < r_map->maj) return 1;
> +
> +		if (l_map->min > r_map->min) return -1;
> +		if (l_map->min < r_map->min) return 1;
> +
> +		if (l_map->ino > r_map->ino) return -1;
> +		if (l_map->ino < r_map->ino) return 1;
> +
> +		if (l_map->ino_generation > r_map->ino_generation) return -1;
> +		if (l_map->ino_generation < r_map->ino_generation) return 1;
> +
> +	} else if (left->cpumode != PERF_RECORD_MISC_KERNEL) {
> +		/* userspace anonymous */
> +		if (left->thread->pid_ > right->thread->pid_) return -1;
> +		if (left->thread->pid_ < right->thread->pid_) return 1;
> +	}
> +
> +	/* hack to mark similar regions, 'right' is new entry */
> +	right->color = TRUE;
> +
> +	/* al_addr does all the right addr - start + offset calculations */
> +	l = left->mem_info->daddr.al_addr;
> +	r = right->mem_info->daddr.al_addr;
> +
> +	if (l > r) return -1;
> +	if (l < r) return 1;
> +
> +	/* sanity check the maps; only mmaped areas should have different maps */
> +	if ((left->mem_info->daddr.map != right->mem_info->daddr.map) &&
> +	     !right->mem_info->daddr.map->maj && !right->mem_info->daddr.map->min)
> +		pr_debug("physid_cmp: Similar entries have different maps\n");
> +
> +	return 0;
> +}
> +
> +static int hist_entry__physid_daddr_snprintf(struct hist_entry *he, char *bf,
> +					    size_t size, unsigned int width)
> +{
> +	uint64_t addr = 0;
> +	struct map *map = NULL;
> +	struct symbol *sym = NULL;
> +	char level = he->level;
> +
> +	if (he->mem_info) {
> +		addr = he->mem_info->daddr.addr;
> +		map = he->mem_info->daddr.map;
> +		sym = he->mem_info->daddr.sym;
> +
> +		/* print [s] for data mmaps */
> +		if ((he->cpumode != PERF_RECORD_MISC_KERNEL) &&
> +		     map && (map->type == MAP__VARIABLE) &&
> +		    (map->maj || map->min || map->ino ||
> +		     map->ino_generation))
> +			level = 's';
> +	}
> +
> +	return _hist_entry__sym_snprintf(map, sym, addr, level, bf, size,
> +					 width);
> +}
> +
> +struct sort_entry sort_physid_daddr = {
> +	.se_header	= "Data Address",
> +	.se_cmp		= sort__physid_daddr_cmp,
> +	.se_snprintf	= hist_entry__physid_daddr_snprintf,
> +	.se_width_idx	= HISTC_PHYSID_DADDR,
> +};
> +
> +static int64_t
> +sort__physid_iaddr_cmp(struct hist_entry *left, struct hist_entry *right)
> +{
> +	u64 l = left->mem_info->iaddr.al_addr;
> +	u64 r = right->mem_info->iaddr.al_addr;
> +
> +	return r - l;
> +}
> +
> +static int hist_entry__physid_iaddr_snprintf(struct hist_entry *he, char *bf,
> +					    size_t size, unsigned int width)
> +{
> +	uint64_t addr = 0;
> +	struct map *map = NULL;
> +	struct symbol *sym = NULL;
> +	char level = he->level;
> +
> +	if (he->mem_info) {
> +		addr = he->mem_info->iaddr.addr;
> +		map = he->mem_info->iaddr.map;
> +		sym = he->mem_info->iaddr.sym;
> +	}
> +
> +	return _hist_entry__sym_snprintf(map, sym, addr, level, bf, size,
> +					 width);
> +}
> +
> +struct sort_entry sort_physid_iaddr = {
> +	.se_header	= "Source Address",
> +	.se_cmp		= sort__physid_iaddr_cmp,
> +	.se_snprintf	= hist_entry__physid_iaddr_snprintf,
> +	.se_width_idx	= HISTC_PHYSID_IADDR,
> +};
> +
> +static int64_t
> +sort__physid_pid_cmp(struct hist_entry *left, struct hist_entry *right)
> +{
> +	pid_t l = left->thread->pid_;
> +	pid_t r = right->thread->pid_;
> +
> +	return r - l;
> +}
> +
> +static int hist_entry__physid_pid_snprintf(struct hist_entry *he, char *bf,
> +					    size_t size, unsigned int width)
> +{
> +	const char *comm = thread__comm_str(he->thread);
> +	return repsep_snprintf(bf, size, "%*s:%5d", width - 6,
> +			       comm ?: "", he->thread->pid_);
> +}
> +
> +struct sort_entry sort_physid_pid = {
> +	.se_header	= "Command:  Pid",
> +	.se_cmp		= sort__physid_pid_cmp,
> +	.se_snprintf	= hist_entry__physid_pid_snprintf,
> +	.se_width_idx	= HISTC_PHYSID_PID,
> +};
> +
> +static int64_t
> +sort__physid_tid_cmp(struct hist_entry *left, struct hist_entry *right)
> +{
> +	pid_t l = left->thread->tid;
> +	pid_t r = right->thread->tid;
> +
> +	return r - l;
> +}
> +
> +static int hist_entry__physid_tid_snprintf(struct hist_entry *he, char *bf,
> +					    size_t size, unsigned int width)
> +{
> +	return repsep_snprintf(bf, size, "%*d", width, he->thread->tid);
> +}
> +
> +struct sort_entry sort_physid_tid = {
> +	.se_header	= "Tid  ",
> +	.se_cmp		= sort__physid_tid_cmp,
> +	.se_snprintf	= hist_entry__physid_tid_snprintf,
> +	.se_width_idx	= HISTC_PHYSID_TID,
> +};
> +
> +static int64_t
> +sort__physid_major_cmp(struct hist_entry *left, struct hist_entry *right)
> +{
> +	struct map *l = left->mem_info->daddr.map;
> +	struct map *r = right->mem_info->daddr.map;
> +
> +	return r->maj - l->maj;
> +}
> +
> +static int hist_entry__physid_major_snprintf(struct hist_entry *he, char *bf,
> +					    size_t size, unsigned int width)
> +{
> +	return repsep_snprintf(bf, size, "%*x", width, he->mem_info->daddr.map->maj);
> +}
> +
> +struct sort_entry sort_physid_major = {
> +	.se_header	= "Major",
> +	.se_cmp		= sort__physid_major_cmp,
> +	.se_snprintf	= hist_entry__physid_major_snprintf,
> +	.se_width_idx	= HISTC_PHYSID_MAJOR,
> +};
> +
> +static int64_t
> +sort__physid_minor_cmp(struct hist_entry *left, struct hist_entry *right)
> +{
> +	struct map *l = left->mem_info->daddr.map;
> +	struct map *r = right->mem_info->daddr.map;
> +
> +	return r->min - l->min;
> +}
> +
> +static int hist_entry__physid_minor_snprintf(struct hist_entry *he, char *bf,
> +					    size_t size, unsigned int width)
> +{
> +	return repsep_snprintf(bf, size, "%*x", width, he->mem_info->daddr.map->min);
> +}
> +
> +struct sort_entry sort_physid_minor = {
> +	.se_header	= "Minor",
> +	.se_cmp		= sort__physid_minor_cmp,
> +	.se_snprintf	= hist_entry__physid_minor_snprintf,
> +	.se_width_idx	= HISTC_PHYSID_MINOR,
> +};
> +
> +static int64_t
> +sort__physid_inode_cmp(struct hist_entry *left, struct hist_entry *right)
> +{
> +	struct map *l = left->mem_info->daddr.map;
> +	struct map *r = right->mem_info->daddr.map;
> +
> +	return r->ino - l->ino;
> +}
> +
> +static int hist_entry__physid_inode_snprintf(struct hist_entry *he, char *bf,
> +					    size_t size, unsigned int width)
> +{
> +	return repsep_snprintf(bf, size, "%*x", width, he->mem_info->daddr.map->ino);
> +}
> +
> +struct sort_entry sort_physid_inode = {
> +	.se_header	= "Inode    ",
> +	.se_cmp		= sort__physid_inode_cmp,
> +	.se_snprintf	= hist_entry__physid_inode_snprintf,
> +	.se_width_idx	= HISTC_PHYSID_INODE,
> +};
> +
> +static int64_t
> +sort__physid_inode_gen_cmp(struct hist_entry *left, struct hist_entry *right)
> +{
> +	struct map *l = left->mem_info->daddr.map;
> +	struct map *r = right->mem_info->daddr.map;
> +
> +	return r->ino_generation - l->ino_generation;
> +}
> +
> +static int hist_entry__physid_inode_gen_snprintf(struct hist_entry *he, char *bf,
> +					    size_t size, unsigned int width)
> +{
> +	return repsep_snprintf(bf, size, "%-*x", width, he->mem_info->daddr.map->ino_generation);
> +}
> +
> +struct sort_entry sort_physid_inode_gen = {
> +	.se_header	= "Inode Gen",
> +	.se_cmp		= sort__physid_inode_gen_cmp,
> +	.se_snprintf	= hist_entry__physid_inode_gen_snprintf,
> +	.se_width_idx	= HISTC_PHYSID_INODE_GEN,
> +};
> +
>  struct sort_dimension {
>  	const char		*name;
>  	struct sort_entry	*entry;
> @@ -1027,6 +1290,21 @@ static struct sort_dimension memory_sort_dimensions[] = {
>  
>  #undef DIM
>  
> +#define DIM(d, n, func) [d - __SORT_PHYSID_MODE] = { .name = n, .entry = &(func) }
> +
> +static struct sort_dimension physid_sort_dimensions[] = {
> +	DIM(SORT_PHYSID_DADDR, "daddr", sort_physid_daddr),
> +	DIM(SORT_PHYSID_IADDR, "iaddr", sort_physid_iaddr),
> +	DIM(SORT_PHYSID_PID, "pid", sort_physid_pid),
> +	DIM(SORT_PHYSID_TID, "tid", sort_physid_tid),
> +	DIM(SORT_PHYSID_MAJOR, "major", sort_physid_major),
> +	DIM(SORT_PHYSID_MINOR, "minor", sort_physid_minor),
> +	DIM(SORT_PHYSID_INODE, "inode", sort_physid_inode),
> +	DIM(SORT_PHYSID_INODE_GEN, "inode_gen", sort_physid_inode_gen),
> +};
> +
> +#undef DIM
> +
>  static void __sort_dimension__add(struct sort_dimension *sd, enum sort_type idx)
>  {
>  	if (sd->taken)
> @@ -1104,6 +1382,22 @@ int sort_dimension__add(const char *tok)
>  		return 0;
>  	}
>  
> +	for (i = 0; i < ARRAY_SIZE(physid_sort_dimensions); i++) {
> +		struct sort_dimension *sd = &physid_sort_dimensions[i];
> +
> +		if (strncasecmp(tok, sd->name, strlen(tok)))
> +			continue;
> +
> +		if (sort__mode != SORT_MODE__PHYSID)
> +			return -EINVAL;
> +
> +		if (sd->entry == &sort_physid_daddr)
> +			sort__has_sym = 1;
> +
> +		__sort_dimension__add(sd, i + __SORT_PHYSID_MODE);
> +		return 0;
> +	}
> +
>  	return -ESRCH;
>  }
>  
> diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
> index 43e5ff4..b1f52a8 100644
> --- a/tools/perf/util/sort.h
> +++ b/tools/perf/util/sort.h
> @@ -87,11 +87,13 @@ struct hist_entry {
>  	u64			ip;
>  	u64			transaction;
>  	s32			cpu;
> +	u8			cpumode;
>  
>  	struct hist_entry_diff	diff;
>  
>  	/* We are added by hists__add_dummy_entry. */
>  	bool			dummy;
> +	bool			color;
>  
>  	/* XXX These two should move to some tree widget lib */
>  	u16			row_offset;
> @@ -133,6 +135,7 @@ enum sort_mode {
>  	SORT_MODE__NORMAL,
>  	SORT_MODE__BRANCH,
>  	SORT_MODE__MEMORY,
> +	SORT_MODE__PHYSID,
>  };
>  
>  enum sort_type {
> @@ -166,6 +169,16 @@ enum sort_type {
>  	SORT_MEM_TLB,
>  	SORT_MEM_LVL,
>  	SORT_MEM_SNOOP,
> +
> +	__SORT_PHYSID_MODE,
> +	SORT_PHYSID_DADDR = __SORT_PHYSID_MODE,
> +	SORT_PHYSID_IADDR,
> +	SORT_PHYSID_PID,
> +	SORT_PHYSID_TID,
> +	SORT_PHYSID_MAJOR,
> +	SORT_PHYSID_MINOR,
> +	SORT_PHYSID_INODE,
> +	SORT_PHYSID_INODE_GEN,
>  };
>  
>  /*
> -- 
> 1.7.11.7
> 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/6 V2] perf, sort:  Add physid sorting based on mmap2 data
  2014-03-24 20:57   ` [PATCH 4/6 V2] perf, sort: Add physid sorting based on mmap2 data Don Zickus
  2014-03-29 17:11     ` Jiri Olsa
@ 2014-04-09  5:21     ` Namhyung Kim
  2014-04-09  5:45       ` Peter Zijlstra
  1 sibling, 1 reply; 25+ messages in thread
From: Namhyung Kim @ 2014-04-09  5:21 UTC (permalink / raw)
  To: Don Zickus; +Cc: acme, LKML, jolsa, jmario, fowles, peterz, eranian, andi.kleen

On Mon, 24 Mar 2014 16:57:18 -0400, Don Zickus wrote:
> In order for the c2c tool to work correctly, it needs to properly
> sort all the records on uniquely identifiable data addresses.  These
> unique addresses are converted from virtual addresses provided by the
> hardware into a kernel address using an mmap2 record as the decoder.
>
> Once a unique address is converted, we can sort on them based on
> various rules.  Then it becomes clear which address are overlapping
> with each other across mmap regions or pid spaces.
>
> This patch just creates the rules and inserts the records into a
> sort entry for safe keeping until later patches process them.
>
> The general sorting rule is:
>
> o group cpumodes together
> o if (nonzero major/minor number - ie mmap'd areas)
>   o sort on major, minor, inode, inode generation numbers
> o else if cpumode is not kernel
>   o sort on pid
> o sort on data addresses
>
> I also hacked in the concept of 'color'.  The purpose of that bit is to
> provides hints later when processing these records that indicate a new unique
> address has been encountered.  Because later processing only checks the data
> addresses, there can be a theoretical scenario that similar sequential data
> addresses (when walking the rbtree) could be misinterpreted as overlapping
> when in fact they are not.
>
> Sample output: (perf report --stdio --physid-mode)
>
>   Overhead            Data Address            Source Address    Command:  Pid    Tid Major  Minor  Inode  Inode Gen
>   ........  ......................  ........................ ................. ..... .....  ..... ....... .........
>     18.93%  [k] 0xffffc900139c40b0  [k] igb_update_stats     kworker/0:1:  257   257     0      0       0         0
>      7.63%  [k] 0xffff88082e6cf0a8  [k] watchdog_timer_fn        swapper:    0     0     0      0       0         0
>      1.86%  [k] 0xffff88042ef94700  [k] _raw_spin_lock           swapper:    0     0     0      0       0         0
>      1.77%  [k] 0xffff8804278afa50  [k] __switch_to              swapper:    0     0     0      0       0         0
>
> V4: add manpage entry in perf-report
>
> V3: split out the sorting into unique entries.  This makes it look
>       far less ugly
>     create a new 'physid mode' to group all the sorting rules together
>       (mimics the mem-mode)

What is 'physid' then?  I guess you meant physical id but it seems
unique id or unique map id looks like a better fit IMHO.

>
> Signed-off-by: Don Zickus <dzickus@redhat.com>
> ---
>  tools/perf/Documentation/perf-report.txt |  23 +++
>  tools/perf/builtin-report.c              |  20 ++-
>  tools/perf/util/hist.c                   |  27 ++-
>  tools/perf/util/hist.h                   |   8 +
>  tools/perf/util/sort.c                   | 294 +++++++++++++++++++++++++++++++
>  tools/perf/util/sort.h                   |  13 ++
>  6 files changed, 381 insertions(+), 4 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
> index 8eab8a4..01391b0 100644
> --- a/tools/perf/Documentation/perf-report.txt
> +++ b/tools/perf/Documentation/perf-report.txt
> @@ -95,6 +95,23 @@ OPTIONS
>  	And default sort keys are changed to comm, dso_from, symbol_from, dso_to
>  	and symbol_to, see '--branch-stack'.
>  
> +	If --physid-mode option is used, following sort keys are also
> +	available:
> +	daddr, iaddr, pid, tid, major, minor, inode, inode_gen.
> +
> +	- daddr: data address (sorted based on major, minor, inode and inode
> +		generation numbers if shared, otherwise pid)

By "if shared", did you mean "for shared file mapping"?


> +	- iaddr: instruction address
> +	- pid: command and pid of the task
> +	- tid: tid of the task
> +	- major: major number of mapped location (0 if not mapped)
> +	- minor: minor number of mapped location (0 if not mapped)
> +	- inode: inode number of mapped location (0 if not mapped)
> +	- inode_gen: inode generation number of mapped location (0 if not mapped)

s/if not mapped/if not file-mapped/ ?

> +
> +	And default sort keys are changed to daddr, iaddr, pid, tid, major,
> +	minor, inode and inode_gen, see '--physid-mode'.
> +
>  -p::
>  --parent=<regex>::
>          A regex filter to identify parent. The parent is a caller of this
> @@ -223,6 +240,12 @@ OPTIONS
>  	branch stacks and it will automatically switch to the branch view mode,
>  	unless --no-branch-stack is used.
>  
> +--physid-mode::
> +	Use the data addresses sampled using perf record -d and combine them
> +	with the mmap'd area region where they are located.  This helps identify
> +	which data addresses collide with similar addresses in another process
> +	space.  See --sort for output choices.
> +
>  --objdump=<path>::
>          Path to objdump binary.
>  
> diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> index c87412b..093f5ad 100644
> --- a/tools/perf/builtin-report.c
> +++ b/tools/perf/builtin-report.c
> @@ -49,6 +49,7 @@ struct report {
>  	bool			show_threads;
>  	bool			inverted_callchain;
>  	bool			mem_mode;
> +	bool			physid_mode;
>  	bool			header;
>  	bool			header_only;
>  	int			max_stack;
> @@ -241,7 +242,7 @@ static int process_sample_event(struct perf_tool *tool,
>  		ret = report__add_branch_hist_entry(rep, &al, sample, evsel);
>  		if (ret < 0)
>  			pr_debug("problem adding lbr entry, skipping event\n");
> -	} else if (rep->mem_mode == 1) {
> +	} else if ((rep->mem_mode == 1) || (rep->physid_mode)) {

As you can see rep->mem_mode is also a boolean field.  Please change it
like:

	} else if (rep->mem_mode || rep->physid_mode) {


>  		ret = report__add_mem_hist_entry(rep, &al, sample, evsel);
>  		if (ret < 0)
>  			pr_debug("problem adding mem entry, skipping event\n");
> @@ -746,6 +747,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
>  	OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
>  		    "Disable symbol demangling"),
>  	OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"),
> +	OPT_BOOLEAN(0, "physid-mode", &report.physid_mode, "physid access profile"),
>  	OPT_CALLBACK(0, "percent-limit", &report, "percent",
>  		     "Don't show entries under that percent", parse_percent_limit),
>  	OPT_END()
> @@ -817,6 +819,22 @@ repeat:
>  			sort_order = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
>  	}
>  
> +	if (report.physid_mode) {
> +		if ((sort__mode == SORT_MODE__BRANCH) ||
> +		    (sort__mode == SORT_MODE__MEMORY)) {
> +			pr_err("branch or memory and physid mode incompatible\n");
> +			goto error;
> +		}
> +		sort__mode = SORT_MODE__PHYSID;
> +
> +		/*
> +		 * if no sort_order is provided, then specify
> +		 * branch-mode specific order

s/branch-mode/physid-mode/

It looks mem-mode has the same copy-n-paste problem.


> +		 */
> +		if (sort_order == default_sort_order)
> +			sort_order = "daddr,iaddr,pid,tid,major,minor,inode,inode_gen";

So if the 'daddr' key already checks major, minor, inode and inode_gen
by itself why do we need to add those sort keys again?


> +	}
> +
>  	if (setup_sorting() < 0) {
>  		parse_options_usage(report_usage, options, "s", 1);
>  		goto error;


[SNIP]
> +static int64_t
> +sort__physid_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
> +{
> +	u64 l, r;
> +	struct map *l_map = left->mem_info->daddr.map;
> +	struct map *r_map = right->mem_info->daddr.map;
> +
> +	/* store all NULL mem maps at the bottom */
> +	/* shouldn't even need this check, should have stubs */
> +	if (!left->mem_info->daddr.map || !right->mem_info->daddr.map)
> +		return 1;

You might want to use 'return cmp_null(l_map, r_map);' here.

> +
> +	/* group event types together */
> +	if (left->cpumode > right->cpumode) return -1;
> +	if (left->cpumode < right->cpumode) return 1;
> +
> +	/*
> +	 * Addresses with no major/minor numbers are assumed to be
> +	 * anonymous in userspace.  Sort those on pid then address.
> +	 *
> +	 * The kernel and non-zero major/minor mapped areas are
> +	 * assumed to be unity mapped.  Sort those on address then pid.
> +	 */
> +
> +	if (l_map->maj || l_map->min || l_map->ino || l_map->ino_generation) {
> +		/* mmapped areas */
> +
> +		if (l_map->maj > r_map->maj) return -1;
> +		if (l_map->maj < r_map->maj) return 1;
> +
> +		if (l_map->min > r_map->min) return -1;
> +		if (l_map->min < r_map->min) return 1;
> +
> +		if (l_map->ino > r_map->ino) return -1;
> +		if (l_map->ino < r_map->ino) return 1;
> +
> +		if (l_map->ino_generation > r_map->ino_generation) return -1;
> +		if (l_map->ino_generation < r_map->ino_generation) return 1;
> +
> +	} else if (left->cpumode != PERF_RECORD_MISC_KERNEL) {
> +		/* userspace anonymous */
> +		if (left->thread->pid_ > right->thread->pid_) return -1;
> +		if (left->thread->pid_ < right->thread->pid_) return 1;
> +	}
> +
> +	/* hack to mark similar regions, 'right' is new entry */
> +	right->color = TRUE;

I don't understand the logic behind the 'color'.  It seems just mark
every samples except first one on a same file (or same pid for anon map)
indicating that those accesses are for distinct maps, right?

I don't know how it could help to distinguish whether an access is for a
same map or different map.  For the userspace anon map case, why doesn't
it check the start addresses of l_map and r_map?

I'm feeling ignorant.. :-(

> +
> +	/* al_addr does all the right addr - start + offset calculations */
> +	l = left->mem_info->daddr.al_addr;
> +	r = right->mem_info->daddr.al_addr;
> +
> +	if (l > r) return -1;
> +	if (l < r) return 1;
> +
> +	/* sanity check the maps; only mmaped areas should have different maps */
> +	if ((left->mem_info->daddr.map != right->mem_info->daddr.map) &&
> +	     !right->mem_info->daddr.map->maj && !right->mem_info->daddr.map->min)
> +		pr_debug("physid_cmp: Similar entries have different maps\n");
> +
> +	return 0;
> +}


[SNIP]
> @@ -1104,6 +1382,22 @@ int sort_dimension__add(const char *tok)
>  		return 0;
>  	}
>  
> +	for (i = 0; i < ARRAY_SIZE(physid_sort_dimensions); i++) {
> +		struct sort_dimension *sd = &physid_sort_dimensions[i];
> +
> +		if (strncasecmp(tok, sd->name, strlen(tok)))
> +			continue;
> +
> +		if (sort__mode != SORT_MODE__PHYSID)
> +			return -EINVAL;
> +
> +		if (sd->entry == &sort_physid_daddr)
> +			sort__has_sym = 1;

I think it's not needed.  The sort__has_sym is for doing annotate during
report/top session and it only works for symbol (i.e. function) basis.

Thanks,
Namhyung

> +
> +		__sort_dimension__add(sd, i + __SORT_PHYSID_MODE);
> +		return 0;
> +	}
> +
>  	return -ESRCH;
>  }

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 6/6] perf, sort:  Allow unique sorting instead of combining hist_entries
  2014-03-24 19:34 ` [PATCH 6/6] perf, sort: Allow unique sorting instead of combining hist_entries Don Zickus
@ 2014-04-09  5:31   ` Namhyung Kim
  2014-04-09 13:57     ` Don Zickus
  0 siblings, 1 reply; 25+ messages in thread
From: Namhyung Kim @ 2014-04-09  5:31 UTC (permalink / raw)
  To: Don Zickus; +Cc: acme, peterz, LKML, jolsa, jmario, fowles, eranian, andi.kleen

On Mon, 24 Mar 2014 15:34:36 -0400, Don Zickus wrote:
> The cache contention tools needs to keep all the perf records unique in order
> to properly parse all the data.  Currently add_hist_entry() will combine
> the duplicate record and add the weight/period to the existing record.
>
> This throws away the unique data the cache contention tool needs (mainly
> the data source).  Create a flag to force the records to stay unique.

No.  This is why I said you need to add 'mem' and 'snoop' sort keys into
the c2c tool.  This is not how sort works IMHO - if you need to make
samples unique let the sort key(s) distinguish them somehow, or you can
combine same samples (in terms of sort kes) and use the combined entry's
stat.nr_events and stat.period or weight.

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/6 V2] perf, sort:  Add physid sorting based on mmap2 data
  2014-04-09  5:21     ` Namhyung Kim
@ 2014-04-09  5:45       ` Peter Zijlstra
  0 siblings, 0 replies; 25+ messages in thread
From: Peter Zijlstra @ 2014-04-09  5:45 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Don Zickus, acme, LKML, jolsa, jmario, fowles, eranian, andi.kleen

On Wed, Apr 09, 2014 at 02:21:49PM +0900, Namhyung Kim wrote:
> >     create a new 'physid mode' to group all the sorting rules together
> >       (mimics the mem-mode)
> 
> What is 'physid' then?  I guess you meant physical id but it seems
> unique id or unique map id looks like a better fit IMHO.

I suspect this is legacy naming; they used to do this using physical
addresses.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 6/6] perf, sort:  Allow unique sorting instead of combining hist_entries
  2014-04-09  5:31   ` Namhyung Kim
@ 2014-04-09 13:57     ` Don Zickus
  2014-04-10  5:09       ` Namhyung Kim
  0 siblings, 1 reply; 25+ messages in thread
From: Don Zickus @ 2014-04-09 13:57 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: acme, peterz, LKML, jolsa, jmario, fowles, eranian, andi.kleen

On Wed, Apr 09, 2014 at 02:31:00PM +0900, Namhyung Kim wrote:
> On Mon, 24 Mar 2014 15:34:36 -0400, Don Zickus wrote:
> > The cache contention tools needs to keep all the perf records unique in order
> > to properly parse all the data.  Currently add_hist_entry() will combine
> > the duplicate record and add the weight/period to the existing record.
> >
> > This throws away the unique data the cache contention tool needs (mainly
> > the data source).  Create a flag to force the records to stay unique.
> 
> No.  This is why I said you need to add 'mem' and 'snoop' sort keys into
> the c2c tool.  This is not how sort works IMHO - if you need to make
> samples unique let the sort key(s) distinguish them somehow, or you can
> combine same samples (in terms of sort kes) and use the combined entry's
> stat.nr_events and stat.period or weight.

Ok.  I understand your point.  Perhaps this was my lack of fully
understanding the sorting algorithm when I did this.  I can look into
adding the 'mem' and 'snoop'.

One concern I do have is we were caculating statistics based on the weight
(mean, median, stddev).  I was afraid that combining the entries would
throw off our calculations as we could no longer accurately determine them
any more.  Is that true?

Cheers,
Don

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 6/6] perf, sort:  Allow unique sorting instead of combining hist_entries
  2014-04-09 13:57     ` Don Zickus
@ 2014-04-10  5:09       ` Namhyung Kim
  0 siblings, 0 replies; 25+ messages in thread
From: Namhyung Kim @ 2014-04-10  5:09 UTC (permalink / raw)
  To: Don Zickus; +Cc: acme, peterz, LKML, jolsa, jmario, fowles, eranian, andi.kleen

Hi Don,

On Wed, 9 Apr 2014 09:57:06 -0400, Don Zickus wrote:
> On Wed, Apr 09, 2014 at 02:31:00PM +0900, Namhyung Kim wrote:
>> On Mon, 24 Mar 2014 15:34:36 -0400, Don Zickus wrote:
>> > The cache contention tools needs to keep all the perf records unique in order
>> > to properly parse all the data.  Currently add_hist_entry() will combine
>> > the duplicate record and add the weight/period to the existing record.
>> >
>> > This throws away the unique data the cache contention tool needs (mainly
>> > the data source).  Create a flag to force the records to stay unique.
>> 
>> No.  This is why I said you need to add 'mem' and 'snoop' sort keys into
>> the c2c tool.  This is not how sort works IMHO - if you need to make
>> samples unique let the sort key(s) distinguish them somehow, or you can
>> combine same samples (in terms of sort kes) and use the combined entry's
>> stat.nr_events and stat.period or weight.
>
> Ok.  I understand your point.  Perhaps this was my lack of fully
> understanding the sorting algorithm when I did this.  I can look into
> adding the 'mem' and 'snoop'.
>
> One concern I do have is we were caculating statistics based on the weight
> (mean, median, stddev).  I was afraid that combining the entries would
> throw off our calculations as we could no longer accurately determine them
> any more.  Is that true?

Yes, if you want to calculate the stats accurately, it needs to be done
when processing samples not hist_entries IMHO.

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2014-04-10  5:09 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-03-24 19:34 [PATCH 0/6] perf, events: Enable mmap2 support Don Zickus
2014-03-24 19:34 ` [PATCH 1/6] events, perf: Pass protection and flags bits through mmap2 interface Don Zickus
2014-03-24 19:34 ` [PATCH 2/6] perf: Update mmap2 interface with protection and flag bits Don Zickus
2014-04-09  2:17   ` Namhyung Kim
2014-04-09  2:20     ` Namhyung Kim
2014-03-24 19:34 ` [PATCH 3/6] Revert "perf: Disable PERF_RECORD_MMAP2 support" Don Zickus
2014-04-09  2:32   ` Namhyung Kim
2014-03-24 19:34 ` [PATCH 4/6] perf, sort: Add physid sorting based on mmap2 data Don Zickus
2014-03-24 19:54   ` Andi Kleen
2014-03-24 20:17     ` Don Zickus
2014-03-24 20:20       ` Andi Kleen
2014-03-24 20:26         ` Don Zickus
2014-03-24 20:54   ` [PATCH 01/15 V3] perf: Fix stddev calculation Don Zickus
2014-03-24 20:57     ` Don Zickus
2014-03-24 20:57   ` [PATCH 4/6 V2] perf, sort: Add physid sorting based on mmap2 data Don Zickus
2014-03-29 17:11     ` Jiri Olsa
2014-04-01  2:58       ` Don Zickus
2014-04-09  5:21     ` Namhyung Kim
2014-04-09  5:45       ` Peter Zijlstra
2014-04-09  3:06   ` [PATCH 4/6] " Don Zickus
2014-03-24 19:34 ` [PATCH 5/6] perf: Update sort to handle MAP_SHARED bits Don Zickus
2014-03-24 19:34 ` [PATCH 6/6] perf, sort: Allow unique sorting instead of combining hist_entries Don Zickus
2014-04-09  5:31   ` Namhyung Kim
2014-04-09 13:57     ` Don Zickus
2014-04-10  5:09       ` Namhyung Kim

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).