* [PATCH v3 1/5] perf/core: Define the common branch type classification
2017-04-11 10:56 [PATCH v3 0/5] perf report: Show branch type Jin Yao
@ 2017-04-11 10:56 ` Jin Yao
2017-04-11 10:56 ` [PATCH v3 2/5] perf/x86/intel: Record branch type Jin Yao
` (3 subsequent siblings)
4 siblings, 0 replies; 12+ messages in thread
From: Jin Yao @ 2017-04-11 10:56 UTC (permalink / raw)
To: acme, jolsa, peterz, mingo, alexander.shishkin
Cc: Linux-kernel, ak, kan.liang, yao.jin, linuxppc-dev, Jin Yao
It is often useful to know the branch types while analyzing branch
data. For example, a call is very different from a conditional branch.
Currently we have to look it up in binary while the binary may later
not be available and even the binary is available but user has to take
some time. It is very useful for user to check it directly in perf
report.
Perf already has support for disassembling the branch instruction
to get the x86 branch type.
To keep consistent on kernel and userspace and make the classification
more common, the patch adds the common branch type classification
in perf_event.h.
PERF_BR_NONE : unknown
PERF_BR_JCC : conditional jump
PERF_BR_JMP : jump
PERF_BR_IND_JMP : indirect jump
PERF_BR_CALL : call
PERF_BR_IND_CALL : indirect call
PERF_BR_RET : return
PERF_BR_SYSCALL : syscall
PERF_BR_SYSRET : syscall return
PERF_BR_IRQ : hw interrupt/trap/fault
PERF_BR_INT : sw interrupt
PERF_BR_IRET : return from interrupt
PERF_BR_FAR_BRANCH: not generic far branch type
Since the disassembling of branch instruction needs some overhead,
a new PERF_SAMPLE_BRANCH_TYPE_SAVE is introduced to indicate if it
needs to disassemble the branch instruction and record the branch
type.
Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
---
include/uapi/linux/perf_event.h | 29 ++++++++++++++++++++++++++++-
tools/include/uapi/linux/perf_event.h | 29 ++++++++++++++++++++++++++++-
2 files changed, 56 insertions(+), 2 deletions(-)
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index d09a9cd..69af012 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -174,6 +174,8 @@ enum perf_branch_sample_type_shift {
PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT = 14, /* no flags */
PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT = 15, /* no cycles */
+ PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */
+
PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */
};
@@ -198,9 +200,32 @@ enum perf_branch_sample_type {
PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
+ PERF_SAMPLE_BRANCH_TYPE_SAVE =
+ 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,
+
PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
};
+/*
+ * Common flow change classification
+ */
+enum {
+ PERF_BR_NONE = 0, /* unknown */
+ PERF_BR_JCC = 1, /* conditional jump */
+ PERF_BR_JMP = 2, /* jump */
+ PERF_BR_IND_JMP = 3, /* indirect jump */
+ PERF_BR_CALL = 4, /* call */
+ PERF_BR_IND_CALL = 5, /* indirect call */
+ PERF_BR_RET = 6, /* return */
+ PERF_BR_SYSCALL = 7, /* syscall */
+ PERF_BR_SYSRET = 8, /* syscall return */
+ PERF_BR_IRQ = 9, /* hw interrupt/trap/fault */
+ PERF_BR_INT = 10, /* sw interrupt */
+ PERF_BR_IRET = 11, /* return from interrupt */
+ PERF_BR_FAR_BRANCH = 12, /* not generic far branch type */
+ PERF_BR_MAX,
+};
+
#define PERF_SAMPLE_BRANCH_PLM_ALL \
(PERF_SAMPLE_BRANCH_USER|\
PERF_SAMPLE_BRANCH_KERNEL|\
@@ -999,6 +1024,7 @@ union perf_mem_data_src {
* in_tx: running in a hardware transaction
* abort: aborting a hardware transaction
* cycles: cycles from last branch (or 0 if not supported)
+ * type: branch type
*/
struct perf_branch_entry {
__u64 from;
@@ -1008,7 +1034,8 @@ struct perf_branch_entry {
in_tx:1, /* in transaction */
abort:1, /* transaction abort */
cycles:16, /* cycle count to last branch */
- reserved:44;
+ type:4, /* branch type */
+ reserved:40;
};
#endif /* _UAPI_LINUX_PERF_EVENT_H */
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index d09a9cd..69af012 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -174,6 +174,8 @@ enum perf_branch_sample_type_shift {
PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT = 14, /* no flags */
PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT = 15, /* no cycles */
+ PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */
+
PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */
};
@@ -198,9 +200,32 @@ enum perf_branch_sample_type {
PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
+ PERF_SAMPLE_BRANCH_TYPE_SAVE =
+ 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,
+
PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
};
+/*
+ * Common flow change classification
+ */
+enum {
+ PERF_BR_NONE = 0, /* unknown */
+ PERF_BR_JCC = 1, /* conditional jump */
+ PERF_BR_JMP = 2, /* jump */
+ PERF_BR_IND_JMP = 3, /* indirect jump */
+ PERF_BR_CALL = 4, /* call */
+ PERF_BR_IND_CALL = 5, /* indirect call */
+ PERF_BR_RET = 6, /* return */
+ PERF_BR_SYSCALL = 7, /* syscall */
+ PERF_BR_SYSRET = 8, /* syscall return */
+ PERF_BR_IRQ = 9, /* hw interrupt/trap/fault */
+ PERF_BR_INT = 10, /* sw interrupt */
+ PERF_BR_IRET = 11, /* return from interrupt */
+ PERF_BR_FAR_BRANCH = 12, /* not generic far branch type */
+ PERF_BR_MAX,
+};
+
#define PERF_SAMPLE_BRANCH_PLM_ALL \
(PERF_SAMPLE_BRANCH_USER|\
PERF_SAMPLE_BRANCH_KERNEL|\
@@ -999,6 +1024,7 @@ union perf_mem_data_src {
* in_tx: running in a hardware transaction
* abort: aborting a hardware transaction
* cycles: cycles from last branch (or 0 if not supported)
+ * type: branch type
*/
struct perf_branch_entry {
__u64 from;
@@ -1008,7 +1034,8 @@ struct perf_branch_entry {
in_tx:1, /* in transaction */
abort:1, /* transaction abort */
cycles:16, /* cycle count to last branch */
- reserved:44;
+ type:4, /* branch type */
+ reserved:40;
};
#endif /* _UAPI_LINUX_PERF_EVENT_H */
--
2.7.4
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH v3 4/5] perf report: Show branch type statistics for stdio mode
2017-04-11 10:56 [PATCH v3 0/5] perf report: Show branch type Jin Yao
` (2 preceding siblings ...)
2017-04-11 10:56 ` [PATCH v3 3/5] perf record: Create a new option save_type in --branch-filter Jin Yao
@ 2017-04-11 10:56 ` Jin Yao
2017-04-11 10:56 ` [PATCH v3 5/5] perf report: Show branch type in callchain entry Jin Yao
4 siblings, 0 replies; 12+ messages in thread
From: Jin Yao @ 2017-04-11 10:56 UTC (permalink / raw)
To: acme, jolsa, peterz, mingo, alexander.shishkin
Cc: Linux-kernel, ak, kan.liang, yao.jin, linuxppc-dev, Jin Yao
Show the branch type statistics at the end of perf report --stdio.
For example:
perf report --stdio
JCC forward: 27.8%
JCC backward: 9.7%
CROSS_4K: 0.0%
CROSS_2M: 14.3%
JCC: 37.6%
JMP: 0.0%
IND_JMP: 6.5%
CALL: 26.6%
RET: 29.3%
IRET: 0.0%
The branch types are:
---------------------
JCC forward: Conditional forward jump
JCC backward: Conditional backward jump
JMP: Jump imm
IND_JMP: Jump reg/mem
CALL: Call imm
IND_CALL: Call reg/mem
RET: Ret
SYSCALL: Syscall
SYSRET: Syscall return
IRQ: HW interrupt/trap/fault
INT: SW interrupt
IRET: Return from interrupt
FAR_BRANCH: Others not generic branch type
CROSS_4K and CROSS_2M:
----------------------
They are the metrics checking for branches cross 4K or 2MB pages.
It's an approximate computing. We don't know if the area is 4K or
2MB, so always compute both.
To make the output simple, if a branch crosses 2M area, CROSS_4K
will not be incremented.
Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
---
tools/perf/builtin-report.c | 70 +++++++++++++++++++++++++++++++++++++++++++++
tools/perf/util/event.h | 3 +-
tools/perf/util/hist.c | 5 +---
tools/perf/util/util.c | 59 ++++++++++++++++++++++++++++++++++++++
tools/perf/util/util.h | 17 +++++++++++
5 files changed, 149 insertions(+), 5 deletions(-)
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index c18158b..c2889eb 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -66,6 +66,7 @@ struct report {
u64 queue_size;
int socket_filter;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+ struct branch_type_stat brtype_stat;
};
static int report__config(const char *var, const char *value, void *cb)
@@ -144,6 +145,24 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter,
return err;
}
+static int hist_iter__branch_callback(struct hist_entry_iter *iter,
+ struct addr_location *al __maybe_unused,
+ bool single __maybe_unused,
+ void *arg)
+{
+ struct hist_entry *he = iter->he;
+ struct report *rep = arg;
+ struct branch_info *bi;
+
+ if (sort__mode == SORT_MODE__BRANCH) {
+ bi = he->branch_info;
+ branch_type_count(&rep->brtype_stat, &bi->flags,
+ bi->from.addr, bi->to.addr);
+ }
+
+ return 0;
+}
+
static int process_sample_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
@@ -182,6 +201,8 @@ static int process_sample_event(struct perf_tool *tool,
*/
if (!sample->branch_stack)
goto out_put;
+
+ iter.add_entry_cb = hist_iter__branch_callback;
iter.ops = &hist_iter_branch;
} else if (rep->mem_mode) {
iter.ops = &hist_iter_mem;
@@ -369,6 +390,50 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
return ret + fprintf(fp, "\n#\n");
}
+static void branch_type_stat_display(FILE *fp, struct branch_type_stat *stat)
+{
+ u64 total = 0;
+ int i;
+
+ for (i = 0; i < PERF_BR_MAX; i++)
+ total += stat->counts[i];
+
+ if (total == 0)
+ return;
+
+ fprintf(fp, "\n#");
+ fprintf(fp, "\n# Branch Statistics:");
+ fprintf(fp, "\n#");
+
+ if (stat->jcc_fwd > 0)
+ fprintf(fp, "\n%12s: %5.1f%%",
+ "JCC forward",
+ 100.0 * (double)stat->jcc_fwd / (double)total);
+
+ if (stat->jcc_bwd > 0)
+ fprintf(fp, "\n%12s: %5.1f%%",
+ "JCC backward",
+ 100.0 * (double)stat->jcc_bwd / (double)total);
+
+ if (stat->cross_4k > 0)
+ fprintf(fp, "\n%12s: %5.1f%%",
+ "CROSS_4K",
+ 100.0 * (double)stat->cross_4k / (double)total);
+
+ if (stat->cross_2m > 0)
+ fprintf(fp, "\n%12s: %5.1f%%",
+ "CROSS_2M",
+ 100.0 * (double)stat->cross_2m / (double)total);
+
+ for (i = 0; i < PERF_BR_MAX; i++) {
+ if (stat->counts[i] > 0)
+ fprintf(fp, "\n%12s: %5.1f%%",
+ branch_type_name(i),
+ 100.0 *
+ (double)stat->counts[i] / (double)total);
+ }
+}
+
static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
struct report *rep,
const char *help)
@@ -404,6 +469,9 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
perf_read_values_destroy(&rep->show_threads_values);
}
+ if (sort__mode == SORT_MODE__BRANCH)
+ branch_type_stat_display(stdout, &rep->brtype_stat);
+
return 0;
}
@@ -936,6 +1004,8 @@ int cmd_report(int argc, const char **argv)
if (has_br_stack && branch_call_mode)
symbol_conf.show_branchflag_count = true;
+ memset(&report.brtype_stat, 0, sizeof(struct branch_type_stat));
+
/*
* Branch mode is a tristate:
* -1 means default, so decide based on the file having branch data.
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index eb7a7b2..26b4c2e 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -142,7 +142,8 @@ struct branch_flags {
u64 in_tx:1;
u64 abort:1;
u64 cycles:16;
- u64 reserved:44;
+ u64 type:4;
+ u64 reserved:40;
};
struct branch_entry {
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 61bf304..c8aee25 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -745,12 +745,9 @@ iter_prepare_branch_entry(struct hist_entry_iter *iter, struct addr_location *al
}
static int
-iter_add_single_branch_entry(struct hist_entry_iter *iter,
+iter_add_single_branch_entry(struct hist_entry_iter *iter __maybe_unused,
struct addr_location *al __maybe_unused)
{
- /* to avoid calling callback function */
- iter->he = NULL;
-
return 0;
}
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index d8b45ce..a4b54a9 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -802,3 +802,62 @@ int unit_number__scnprintf(char *buf, size_t size, u64 n)
return scnprintf(buf, size, "%" PRIu64 "%c", n, unit[i]);
}
+
+static bool cross_area(u64 addr1, u64 addr2, int size)
+{
+ u64 align1, align2;
+
+ align1 = addr1 & ~(size - 1);
+ align2 = addr2 & ~(size - 1);
+
+ return (align1 != align2) ? true : false;
+}
+
+#define AREA_4K 4096
+#define AREA_2M (2 * 1024 * 1024)
+
+void branch_type_count(struct branch_type_stat *stat,
+ struct branch_flags *flags,
+ u64 from, u64 to)
+{
+ if ((flags->type == PERF_BR_NONE) || (from == 0))
+ return;
+
+ stat->counts[flags->type]++;
+
+ if (flags->type == PERF_BR_JCC) {
+ if (to > from)
+ stat->jcc_fwd++;
+ else
+ stat->jcc_bwd++;
+ }
+
+ if (cross_area(from, to, AREA_2M))
+ stat->cross_2m++;
+ else if (cross_area(from, to, AREA_4K))
+ stat->cross_4k++;
+}
+
+const char *branch_type_name(int type)
+{
+ const char *branch_names[PERF_BR_MAX] = {
+ "N/A",
+ "JCC",
+ "JMP",
+ "IND_JMP",
+ "CALL",
+ "IND_CALL",
+ "RET",
+ "SYSCALL",
+ "SYSRET",
+ "IRQ",
+ "INT",
+ "IRET",
+ "FAR_BRANCH",
+ };
+
+ if ((type >= 0) && (type < PERF_BR_MAX))
+ return branch_names[type];
+
+ return NULL;
+}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 7cf5752..0a5bbcc 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -79,6 +79,7 @@
#include <linux/bitops.h>
#include <termios.h>
#include "strlist.h"
+#include "../perf.h"
extern const char *graph_line;
extern const char *graph_dotted_line;
@@ -380,4 +381,20 @@ struct inline_node {
struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr);
void inline_node__delete(struct inline_node *node);
+struct branch_type_stat {
+ u64 counts[PERF_BR_MAX];
+ u64 jcc_fwd;
+ u64 jcc_bwd;
+ u64 cross_4k;
+ u64 cross_2m;
+};
+
+struct branch_flags;
+
+void branch_type_count(struct branch_type_stat *stat,
+ struct branch_flags *flags,
+ u64 from, u64 to);
+
+const char *branch_type_name(int type);
+
#endif /* GIT_COMPAT_UTIL_H */
--
2.7.4
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH v3 5/5] perf report: Show branch type in callchain entry
2017-04-11 10:56 [PATCH v3 0/5] perf report: Show branch type Jin Yao
` (3 preceding siblings ...)
2017-04-11 10:56 ` [PATCH v3 4/5] perf report: Show branch type statistics for stdio mode Jin Yao
@ 2017-04-11 10:56 ` Jin Yao
4 siblings, 0 replies; 12+ messages in thread
From: Jin Yao @ 2017-04-11 10:56 UTC (permalink / raw)
To: acme, jolsa, peterz, mingo, alexander.shishkin
Cc: Linux-kernel, ak, kan.liang, yao.jin, linuxppc-dev, Jin Yao
Show branch type in callchain entry. The branch type is printed
with other LBR information (such as cycles/abort/...).
One example:
perf report --branch-history --stdio --no-children
--23.54%--main div.c:42 (CROSS_2M RET cycles:2)
compute_flag div.c:28 (RET cycles:2)
compute_flag div.c:27 (CROSS_2M RET cycles:1)
rand rand.c:28 (CROSS_4K RET cycles:1)
rand rand.c:28 (CROSS_2M RET cycles:1)
__random random.c:298 (CROSS_4K RET cycles:1)
__random random.c:297 (JCC backward CROSS_2M cycles:1)
__random random.c:295 (JCC forward CROSS_4K cycles:1)
__random random.c:295 (JCC backward CROSS_2M cycles:1)
__random random.c:295 (JCC forward CROSS_4K cycles:1)
__random random.c:295 (CROSS_2M RET cycles:9)
Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
---
tools/perf/util/callchain.c | 195 ++++++++++++++++++++++++++++++--------------
tools/perf/util/callchain.h | 4 +-
tools/perf/util/machine.c | 26 ++++--
3 files changed, 152 insertions(+), 73 deletions(-)
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 2e5eff5..3c875b1 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -467,6 +467,11 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
call->cycles_count = cursor_node->branch_flags.cycles;
call->iter_count = cursor_node->nr_loop_iter;
call->samples_count = cursor_node->samples;
+
+ branch_type_count(&call->brtype_stat,
+ &cursor_node->branch_flags,
+ cursor_node->branch_from,
+ cursor_node->ip);
}
list_add_tail(&call->list, &node->val);
@@ -579,6 +584,11 @@ static enum match_result match_chain(struct callchain_cursor_node *node,
cnode->cycles_count += node->branch_flags.cycles;
cnode->iter_count += node->nr_loop_iter;
cnode->samples_count += node->samples;
+
+ branch_type_count(&cnode->brtype_stat,
+ &node->branch_flags,
+ node->branch_from,
+ node->ip);
}
return MATCH_EQ;
@@ -813,7 +823,7 @@ merge_chain_branch(struct callchain_cursor *cursor,
list_for_each_entry_safe(list, next_list, &src->val, list) {
callchain_cursor_append(cursor, list->ip,
list->ms.map, list->ms.sym,
- false, NULL, 0, 0);
+ false, NULL, 0, 0, 0);
list_del(&list->list);
map__zput(list->ms.map);
free(list);
@@ -853,7 +863,7 @@ int callchain_merge(struct callchain_cursor *cursor,
int callchain_cursor_append(struct callchain_cursor *cursor,
u64 ip, struct map *map, struct symbol *sym,
bool branch, struct branch_flags *flags,
- int nr_loop_iter, int samples)
+ int nr_loop_iter, int samples, u64 branch_from)
{
struct callchain_cursor_node *node = *cursor->last;
@@ -877,6 +887,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
memcpy(&node->branch_flags, flags,
sizeof(struct branch_flags));
+ node->branch_from = branch_from;
cursor->nr++;
cursor->last = &node->next;
@@ -1105,95 +1116,151 @@ int callchain_branch_counts(struct callchain_root *root,
cycles_count);
}
+static int branch_type_str(struct branch_type_stat *stat,
+ char *bf, int bfsize)
+{
+ int i, j = 0, printed = 0;
+ u64 total = 0;
+
+ for (i = 0; i < PERF_BR_MAX; i++)
+ total += stat->counts[i];
+
+ if (total == 0)
+ return 0;
+
+ printed += scnprintf(bf + printed, bfsize - printed, " (");
+
+ if (stat->jcc_fwd > 0) {
+ j++;
+ printed += scnprintf(bf + printed, bfsize - printed,
+ "JCC forward");
+ }
+
+ if (stat->jcc_bwd > 0) {
+ if (j++)
+ printed += scnprintf(bf + printed, bfsize - printed,
+ " JCC backward");
+ else
+ printed += scnprintf(bf + printed, bfsize - printed,
+ "JCC backward");
+ }
+
+ if (stat->cross_4k > 0) {
+ if (j++)
+ printed += scnprintf(bf + printed, bfsize - printed,
+ " CROSS_4K");
+ else
+ printed += scnprintf(bf + printed, bfsize - printed,
+ "CROSS_4K");
+ }
+
+ if (stat->cross_2m > 0) {
+ if (j++)
+ printed += scnprintf(bf + printed, bfsize - printed,
+ " CROSS_2M");
+ else
+ printed += scnprintf(bf + printed, bfsize - printed,
+ "CROSS_2M");
+ }
+
+ for (i = 0; i < PERF_BR_MAX; i++) {
+ if (i == PERF_BR_JCC)
+ continue;
+
+ if (stat->counts[i] > 0) {
+ if (j++)
+ printed += scnprintf(bf + printed,
+ bfsize - printed,
+ " %s",
+ branch_type_name(i));
+ else
+ printed += scnprintf(bf + printed,
+ bfsize - printed,
+ "%s",
+ branch_type_name(i));
+ }
+ }
+
+ return printed;
+}
+
static int counts_str_build(char *bf, int bfsize,
u64 branch_count, u64 predicted_count,
u64 abort_count, u64 cycles_count,
- u64 iter_count, u64 samples_count)
+ u64 iter_count, u64 samples_count,
+ struct branch_type_stat *brtype_stat)
{
- double predicted_percent = 0.0;
- const char *null_str = "";
- char iter_str[32];
- char cycle_str[32];
- char *istr, *cstr;
u64 cycles;
+ int printed, i = 0;
if (branch_count == 0)
return scnprintf(bf, bfsize, " (calltrace)");
+ printed = branch_type_str(brtype_stat, bf, bfsize);
+ if (printed)
+ i++;
+
cycles = cycles_count / branch_count;
+ if (cycles) {
+ if (i++)
+ printed += scnprintf(bf + printed, bfsize - printed,
+ " cycles:%" PRId64 "", cycles);
+ else
+ printed += scnprintf(bf + printed, bfsize - printed,
+ " (cycles:%" PRId64 "", cycles);
+ }
if (iter_count && samples_count) {
- if (cycles > 0)
- scnprintf(iter_str, sizeof(iter_str),
- " iterations:%" PRId64 "",
- iter_count / samples_count);
+ if (i++)
+ printed += scnprintf(bf + printed, bfsize - printed,
+ " iterations:%" PRId64 "",
+ iter_count / samples_count);
else
- scnprintf(iter_str, sizeof(iter_str),
- "iterations:%" PRId64 "",
- iter_count / samples_count);
- istr = iter_str;
- } else
- istr = (char *)null_str;
-
- if (cycles > 0) {
- scnprintf(cycle_str, sizeof(cycle_str),
- "cycles:%" PRId64 "", cycles);
- cstr = cycle_str;
- } else
- cstr = (char *)null_str;
-
- predicted_percent = predicted_count * 100.0 / branch_count;
+ printed += scnprintf(bf + printed, bfsize - printed,
+ " (iterations:%" PRId64 "",
+ iter_count / samples_count);
+ }
- if ((predicted_count == branch_count) && (abort_count == 0)) {
- if ((cycles > 0) || (istr != (char *)null_str))
- return scnprintf(bf, bfsize, " (%s%s)", cstr, istr);
+ if (predicted_count < branch_count) {
+ if (i++)
+ printed += scnprintf(bf + printed, bfsize - printed,
+ " predicted:%.1f%%",
+ predicted_count * 100.0 / branch_count);
else
- return scnprintf(bf, bfsize, "%s", (char *)null_str);
- }
-
- if ((predicted_count < branch_count) && (abort_count == 0)) {
- if ((cycles > 0) || (istr != (char *)null_str))
- return scnprintf(bf, bfsize,
- " (predicted:%.1f%% %s%s)",
- predicted_percent, cstr, istr);
- else {
- return scnprintf(bf, bfsize,
- " (predicted:%.1f%%)",
- predicted_percent);
- }
+ printed += scnprintf(bf + printed, bfsize - printed,
+ " (predicted:%.1f%%",
+ predicted_count * 100.0 / branch_count);
}
- if ((predicted_count == branch_count) && (abort_count > 0)) {
- if ((cycles > 0) || (istr != (char *)null_str))
- return scnprintf(bf, bfsize,
- " (abort:%" PRId64 " %s%s)",
- abort_count, cstr, istr);
+ if (abort_count) {
+ if (i++)
+ printed += scnprintf(bf + printed, bfsize - printed,
+ " abort:%.1f%%",
+ abort_count * 100.0 / branch_count);
else
- return scnprintf(bf, bfsize,
- " (abort:%" PRId64 ")",
- abort_count);
+ printed += scnprintf(bf + printed, bfsize - printed,
+ " (abort:%.1f%%",
+ abort_count * 100.0 / branch_count);
}
- if ((cycles > 0) || (istr != (char *)null_str))
- return scnprintf(bf, bfsize,
- " (predicted:%.1f%% abort:%" PRId64 " %s%s)",
- predicted_percent, abort_count, cstr, istr);
+ if (i)
+ return scnprintf(bf + printed, bfsize - printed, ")");
- return scnprintf(bf, bfsize,
- " (predicted:%.1f%% abort:%" PRId64 ")",
- predicted_percent, abort_count);
+ bf[0] = 0;
+ return 0;
}
static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
u64 branch_count, u64 predicted_count,
u64 abort_count, u64 cycles_count,
- u64 iter_count, u64 samples_count)
+ u64 iter_count, u64 samples_count,
+ struct branch_type_stat *brtype_stat)
{
- char str[128];
+ char str[256];
counts_str_build(str, sizeof(str), branch_count,
predicted_count, abort_count, cycles_count,
- iter_count, samples_count);
+ iter_count, samples_count, brtype_stat);
if (fp)
return fprintf(fp, "%s", str);
@@ -1225,7 +1292,8 @@ int callchain_list_counts__printf_value(struct callchain_node *node,
return callchain_counts_printf(fp, bf, bfsize, branch_count,
predicted_count, abort_count,
- cycles_count, iter_count, samples_count);
+ cycles_count, iter_count, samples_count,
+ &clist->brtype_stat);
}
static void free_callchain_node(struct callchain_node *node)
@@ -1350,7 +1418,8 @@ int callchain_cursor__copy(struct callchain_cursor *dst,
rc = callchain_cursor_append(dst, node->ip, node->map, node->sym,
node->branch, &node->branch_flags,
- node->nr_loop_iter, node->samples);
+ node->nr_loop_iter, node->samples,
+ node->branch_from);
if (rc)
break;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index c56c23d..b93897a 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -119,6 +119,7 @@ struct callchain_list {
u64 cycles_count;
u64 iter_count;
u64 samples_count;
+ struct branch_type_stat brtype_stat;
char *srcline;
struct list_head list;
};
@@ -135,6 +136,7 @@ struct callchain_cursor_node {
struct symbol *sym;
bool branch;
struct branch_flags branch_flags;
+ u64 branch_from;
int nr_loop_iter;
int samples;
struct callchain_cursor_node *next;
@@ -198,7 +200,7 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
struct map *map, struct symbol *sym,
bool branch, struct branch_flags *flags,
- int nr_loop_iter, int samples);
+ int nr_loop_iter, int samples, u64 branch_from);
/* Close a cursor writing session. Initialize for the reader */
static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index dfc6004..2309614 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1673,7 +1673,8 @@ static int add_callchain_ip(struct thread *thread,
bool branch,
struct branch_flags *flags,
int nr_loop_iter,
- int samples)
+ int samples,
+ u64 branch_from)
{
struct addr_location al;
@@ -1726,7 +1727,8 @@ static int add_callchain_ip(struct thread *thread,
if (symbol_conf.hide_unresolved && al.sym == NULL)
return 0;
return callchain_cursor_append(cursor, al.addr, al.map, al.sym,
- branch, flags, nr_loop_iter, samples);
+ branch, flags, nr_loop_iter, samples,
+ branch_from);
}
struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
@@ -1805,7 +1807,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
struct ip_callchain *chain = sample->callchain;
int chain_nr = min(max_stack, (int)chain->nr), i;
u8 cpumode = PERF_RECORD_MISC_USER;
- u64 ip;
+ u64 ip, branch_from = 0;
for (i = 0; i < chain_nr; i++) {
if (chain->ips[i] == PERF_CONTEXT_USER)
@@ -1847,6 +1849,8 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
ip = lbr_stack->entries[0].to;
branch = true;
flags = &lbr_stack->entries[0].flags;
+ branch_from =
+ lbr_stack->entries[0].from;
}
} else {
if (j < lbr_nr) {
@@ -1861,12 +1865,15 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
ip = lbr_stack->entries[0].to;
branch = true;
flags = &lbr_stack->entries[0].flags;
+ branch_from =
+ lbr_stack->entries[0].from;
}
}
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
- branch, flags, 0, 0);
+ branch, flags, 0, 0,
+ branch_from);
if (err)
return (err < 0) ? err : 0;
}
@@ -1965,19 +1972,20 @@ static int thread__resolve_callchain_sample(struct thread *thread,
root_al,
NULL, be[i].to,
true, &be[i].flags,
- nr_loop_iter, 1);
+ nr_loop_iter, 1,
+ be[i].from);
else
err = add_callchain_ip(thread, cursor, parent,
root_al,
NULL, be[i].to,
true, &be[i].flags,
- 0, 0);
+ 0, 0, be[i].from);
if (!err)
err = add_callchain_ip(thread, cursor, parent, root_al,
NULL, be[i].from,
true, &be[i].flags,
- 0, 0);
+ 0, 0, 0);
if (err == -EINVAL)
break;
if (err)
@@ -2007,7 +2015,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
- false, NULL, 0, 0);
+ false, NULL, 0, 0, 0);
if (err)
return (err < 0) ? err : 0;
@@ -2024,7 +2032,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
return 0;
return callchain_cursor_append(cursor, entry->ip,
entry->map, entry->sym,
- false, NULL, 0, 0);
+ false, NULL, 0, 0, 0);
}
static int thread__resolve_callchain_unwind(struct thread *thread,
--
2.7.4
^ permalink raw reply related [flat|nested] 12+ messages in thread