* [PATCH v1 2/5] perf util: Count the total cycles of all samples
2019-10-08 7:04 [PATCH v1 0/5] perf report: Support sorting all blocks by cycles Jin Yao
2019-10-08 7:04 ` [PATCH v1 1/5] perf util: Create new block.h/block.c for block related functions Jin Yao
@ 2019-10-08 7:04 ` Jin Yao
2019-10-08 7:05 ` [PATCH v1 3/5] perf report: Sort by sampled cycles percent per block for stdio Jin Yao
` (3 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: Jin Yao @ 2019-10-08 7:04 UTC (permalink / raw)
To: acme, jolsa, peterz, mingo, alexander.shishkin
Cc: Linux-kernel, ak, kan.liang, yao.jin, Jin Yao
We can get the per sample cycles by hist__account_cycles(). It's also
useful to know the total cycles of all samples in order to get the
cycles coverage for a single program block in further. For example,
coverage = per block sampled cycles / total sampled cycles
This patch creates a new argument 'total_cycles' in hist__account_cycles(),
which will be added with the cycles of each sample.
Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
---
tools/perf/builtin-annotate.c | 2 +-
tools/perf/builtin-diff.c | 3 ++-
tools/perf/builtin-report.c | 2 +-
tools/perf/builtin-top.c | 3 ++-
tools/perf/util/hist.c | 6 +++++-
tools/perf/util/hist.h | 3 ++-
6 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 8db8fc9bddef..6ab0cc45b287 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -201,7 +201,7 @@ static int process_branch_callback(struct evsel *evsel,
if (a.map != NULL)
a.map->dso->hit = 1;
- hist__account_cycles(sample->branch_stack, al, sample, false);
+ hist__account_cycles(sample->branch_stack, al, sample, false, NULL);
ret = hist_entry_iter__add(&iter, &a, PERF_MAX_STACK_DEPTH, ann);
return ret;
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index c5242737a6df..029da813bfa6 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -421,7 +421,8 @@ static int diff__process_sample_event(struct perf_tool *tool,
goto out_put;
}
- hist__account_cycles(sample->branch_stack, &al, sample, false);
+ hist__account_cycles(sample->branch_stack, &al, sample, false,
+ NULL);
}
/*
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index aae0e57c60fb..094bb43cbcf5 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -292,7 +292,7 @@ static int process_sample_event(struct perf_tool *tool,
if (ui__has_annotation() || rep->symbol_ipc) {
hist__account_cycles(sample->branch_stack, &al, sample,
- rep->nonany_branch_mode);
+ rep->nonany_branch_mode, NULL);
}
ret = hist_entry_iter__add(&iter, &al, rep->max_stack, rep);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 611d03030abc..d2fc44da8b03 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -724,7 +724,8 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter,
perf_top__record_precise_ip(top, he, iter->sample, evsel, al->addr);
hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
- !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY));
+ !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY),
+ NULL);
return 0;
}
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 26ee45a3e5d0..af65ce950ba2 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -2570,7 +2570,8 @@ int hists__unlink(struct hists *hists)
}
void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
- struct perf_sample *sample, bool nonany_branch_mode)
+ struct perf_sample *sample, bool nonany_branch_mode,
+ u64 *total_cycles)
{
struct branch_info *bi;
@@ -2597,6 +2598,9 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
nonany_branch_mode ? NULL : prev,
bi[i].flags.cycles);
prev = &bi[i].to;
+
+ if (total_cycles)
+ *total_cycles += bi[i].flags.cycles;
}
free(bi);
}
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 6a186b668303..4d87c7b4c1b2 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -527,7 +527,8 @@ unsigned int hists__sort_list_width(struct hists *hists);
unsigned int hists__overhead_width(struct hists *hists);
void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
- struct perf_sample *sample, bool nonany_branch_mode);
+ struct perf_sample *sample, bool nonany_branch_mode,
+ u64 *total_cycles);
struct option;
int parse_filter_percentage(const struct option *opt, const char *arg, int unset);
--
2.17.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v1 3/5] perf report: Sort by sampled cycles percent per block for stdio
2019-10-08 7:04 [PATCH v1 0/5] perf report: Support sorting all blocks by cycles Jin Yao
2019-10-08 7:04 ` [PATCH v1 1/5] perf util: Create new block.h/block.c for block related functions Jin Yao
2019-10-08 7:04 ` [PATCH v1 2/5] perf util: Count the total cycles of all samples Jin Yao
@ 2019-10-08 7:05 ` Jin Yao
2019-10-08 7:05 ` [PATCH v1 4/5] perf report: Support --percent-limit for total_cycles Jin Yao
` (2 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: Jin Yao @ 2019-10-08 7:05 UTC (permalink / raw)
To: acme, jolsa, peterz, mingo, alexander.shishkin
Cc: Linux-kernel, ak, kan.liang, yao.jin, Jin Yao
It would be useful to support sorting for all blocks by the
sampled cycles percent per block. This is useful to concentrate
on the globally busiest/slowest blocks.
This patch implements a new sort option "total_cycles" which sorts
all blocks by 'Sampled Cycles%'. The 'Sampled Cycles%' is
block sampled cycles aggregation / total sampled cycles
Note that, this patch only supports "--stdio" mode.
For example,
perf record -b ./div
perf report -s total_cycles --stdio
# To display the perf.data header info, please use --header/--header-only options.
#
#
# Total Lost Samples: 0
#
# Samples: 2M of event 'cycles'
# Event count (approx.): 2753248
#
# Sampled Cycles% Sampled Cycles Avg Cycles% Avg Cycles [Program Block Range] Shared Object
# ............... .............. ........... .......... ................................................................. ....................
#
26.04% 2.8M 0.40% 18 [div.c:42 -> div.c:39] div
15.17% 1.2M 0.16% 7 [random_r.c:357 -> random_r.c:380] libc-2.27.so
5.11% 402.0K 0.04% 2 [div.c:27 -> div.c:28] div
4.87% 381.6K 0.04% 2 [random.c:288 -> random.c:291] libc-2.27.so
4.53% 381.0K 0.04% 2 [div.c:40 -> div.c:40] div
3.85% 300.9K 0.02% 1 [div.c:22 -> div.c:25] div
3.08% 241.1K 0.02% 1 [rand.c:26 -> rand.c:27] libc-2.27.so
3.06% 240.0K 0.02% 1 [random.c:291 -> random.c:291] libc-2.27.so
2.78% 215.7K 0.02% 1 [random.c:298 -> random.c:298] libc-2.27.so
2.52% 198.3K 0.02% 1 [random.c:293 -> random.c:293] libc-2.27.so
2.36% 184.8K 0.02% 1 [rand.c:28 -> rand.c:28] libc-2.27.so
2.33% 180.5K 0.02% 1 [random.c:295 -> random.c:295] libc-2.27.so
2.28% 176.7K 0.02% 1 [random.c:295 -> random.c:295] libc-2.27.so
2.20% 168.8K 0.02% 1 [rand@plt+0 -> rand@plt+0] div
1.98% 158.2K 0.02% 1 [random_r.c:388 -> random_r.c:388] libc-2.27.so
1.57% 123.3K 0.02% 1 [div.c:42 -> div.c:44] div
1.44% 116.0K 0.42% 19 [random_r.c:357 -> random_r.c:394] libc-2.27.so
0.25% 182.5K 0.02% 1 [random_r.c:388 -> random_r.c:391] libc-2.27.so
0.00% 48 1.07% 48 [x86_pmu_enable+284 -> x86_pmu_enable+298] [kernel.kallsyms]
0.00% 74 1.64% 74 [vm_mmap_pgoff+0 -> vm_mmap_pgoff+92] [kernel.kallsyms]
0.00% 73 1.62% 73 [vm_mmap+0 -> vm_mmap+48] [kernel.kallsyms]
0.00% 63 0.69% 31 [up_write+0 -> up_write+34] [kernel.kallsyms]
0.00% 13 0.29% 13 [setup_arg_pages+396 -> setup_arg_pages+413] [kernel.kallsyms]
0.00% 3 0.07% 3 [setup_arg_pages+418 -> setup_arg_pages+450] [kernel.kallsyms]
0.00% 616 6.84% 308 [security_mmap_file+0 -> security_mmap_file+72] [kernel.kallsyms]
0.00% 23 0.51% 23 [security_mmap_file+77 -> security_mmap_file+87] [kernel.kallsyms]
0.00% 4 0.02% 1 [sched_clock+0 -> sched_clock+4] [kernel.kallsyms]
0.00% 4 0.02% 1 [sched_clock+9 -> sched_clock+12] [kernel.kallsyms]
0.00% 1 0.02% 1 [rcu_nmi_exit+0 -> rcu_nmi_exit+9] [kernel.kallsyms]
0.00% 1 0.02% 1 [rcu_nmi_exit+14 -> rcu_nmi_exit+15] [kernel.kallsyms]
0.00% 5 0.11% 5 [rcu_irq_exit+0 -> rcu_irq_exit+79] [kernel.kallsyms]
0.00% 2 0.04% 2 [printk_nmi_exit+0 -> printk_nmi_exit+16] [kernel.kallsyms]
0.00% 3 0.07% 3 [perf_sample_event_took+0 -> perf_sample_event_took+56] [kernel.kallsyms]
0.00% 1 0.02% 1 [perf_sample_event_took+184 -> perf_sample_event_took+185] [kernel.kallsyms]
0.00% 72 1.60% 72 [perf_iterate_ctx+0 -> perf_iterate_ctx+50] [kernel.kallsyms]
0.00% 1 0.02% 1 [perf_event_nmi_handler+50 -> perf_event_nmi_handler+52] [kernel.kallsyms]
0.00% 1 0.02% 1 [perf_event_nmi_handler+57 -> perf_event_nmi_handler+63] [kernel.kallsyms]
0.00% 1 0.02% 1 [perf_event_nmi_handler+68 -> perf_event_nmi_handler+74] [kernel.kallsyms]
Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
---
tools/perf/Documentation/perf-report.txt | 10 +
tools/perf/builtin-report.c | 423 ++++++++++++++++++++++-
tools/perf/ui/stdio/hist.c | 22 ++
tools/perf/util/block.h | 1 +
tools/perf/util/hist.c | 4 +
tools/perf/util/sort.c | 5 +
tools/perf/util/sort.h | 1 +
tools/perf/util/symbol_conf.h | 1 +
8 files changed, 463 insertions(+), 4 deletions(-)
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 7315f155803f..b1f9c93a91fd 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -124,6 +124,7 @@ OPTIONS
- in_tx: branch in TSX transaction
- abort: TSX transaction abort.
- cycles: Cycles in basic block
+ - total_cycles: sort all blocks by the 'Sampled Cycles%'.
And default sort keys are changed to comm, dso_from, symbol_from, dso_to
and symbol_to, see '--branch-stack'.
@@ -136,6 +137,15 @@ OPTIONS
executed, such as a memory access bottleneck. If a function has high overhead
and low IPC, it's worth further analyzing it to optimize its performance.
+ When the total_cycles is specified, it supports sorting for all blocks by the
+ 'Sampled Cycles%'. This is useful to concentrate on the globally slowest blocks.
+ In output, there are some new columns:
+ 'Sampled Cycles%' - block sampled cycles aggregation / total sampled cycles
+ 'Sampled Cycles' - block sampled cycles aggregation
+ 'Avg Cycles%' - block average sampled cycles / sum of total block average
+ sampled cycles
+ 'Avg Cycles' - block average sampled cycles
+
If the --mem-mode option is used, the following sort keys are also available
(incompatible with --branch-stack):
symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline.
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 094bb43cbcf5..189fd1b75ac8 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -51,6 +51,7 @@
#include "util/util.h" // perf_tip()
#include "ui/ui.h"
#include "ui/progress.h"
+#include "util/block.h"
#include <dlfcn.h>
#include <errno.h>
@@ -96,10 +97,64 @@ struct report {
float min_percent;
u64 nr_entries;
u64 queue_size;
+ u64 cycles_count;
+ u64 block_cycles;
int socket_filter;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
struct branch_type_stat brtype_stat;
bool symbol_ipc;
+ bool total_cycles;
+ struct block_hist block_hist;
+};
+
+struct block_fmt {
+ struct perf_hpp_fmt fmt;
+ int idx;
+ int width;
+ const char *header;
+ struct report *rep;
+};
+
+enum {
+ PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_COV,
+ PERF_HPP_REPORT__BLOCK_LBR_CYCLES,
+ PERF_HPP_REPORT__BLOCK_CYCLES_PCT,
+ PERF_HPP_REPORT__BLOCK_AVG_CYCLES,
+ PERF_HPP_REPORT__BLOCK_RANGE,
+ PERF_HPP_REPORT__BLOCK_DSO,
+ PERF_HPP_REPORT__BLOCK_MAX_INDEX
+};
+
+static struct block_fmt block_fmts[PERF_HPP_REPORT__BLOCK_MAX_INDEX];
+
+static struct block_header_column{
+ const char *name;
+ int width;
+} block_columns[PERF_HPP_REPORT__BLOCK_MAX_INDEX] = {
+ [PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_COV] = {
+ .name = "Sampled Cycles%",
+ .width = 15,
+ },
+ [PERF_HPP_REPORT__BLOCK_LBR_CYCLES] = {
+ .name = "Sampled Cycles",
+ .width = 14,
+ },
+ [PERF_HPP_REPORT__BLOCK_CYCLES_PCT] = {
+ .name = "Avg Cycles%",
+ .width = 11,
+ },
+ [PERF_HPP_REPORT__BLOCK_AVG_CYCLES] = {
+ .name = "Avg Cycles",
+ .width = 10,
+ },
+ [PERF_HPP_REPORT__BLOCK_RANGE] = {
+ .name = "[Program Block Range]",
+ .width = 70,
+ },
+ [PERF_HPP_REPORT__BLOCK_DSO] = {
+ .name = "Shared Object",
+ .width = 20,
+ }
};
static int report__config(const char *var, const char *value, void *cb)
@@ -277,7 +332,8 @@ static int process_sample_event(struct perf_tool *tool,
if (!sample->branch_stack)
goto out_put;
- iter.add_entry_cb = hist_iter__branch_callback;
+ if (!rep->total_cycles)
+ iter.add_entry_cb = hist_iter__branch_callback;
iter.ops = &hist_iter_branch;
} else if (rep->mem_mode) {
iter.ops = &hist_iter_mem;
@@ -290,9 +346,10 @@ static int process_sample_event(struct perf_tool *tool,
if (al.map != NULL)
al.map->dso->hit = 1;
- if (ui__has_annotation() || rep->symbol_ipc) {
+ if (ui__has_annotation() || rep->symbol_ipc || rep->total_cycles) {
hist__account_cycles(sample->branch_stack, &al, sample,
- rep->nonany_branch_mode, NULL);
+ rep->nonany_branch_mode,
+ &rep->cycles_count);
}
ret = hist_entry_iter__add(&iter, &al, rep->max_stack, rep);
@@ -473,6 +530,349 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
return ret + fprintf(fp, "\n#\n");
}
+static int block_column_header(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct perf_hpp *hpp __maybe_unused,
+ struct hists *hists __maybe_unused,
+ int line __maybe_unused,
+ int *span __maybe_unused)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+ block_fmt->header);
+}
+
+static int block_column_width(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct perf_hpp *hpp __maybe_unused,
+ struct hists *hists __maybe_unused)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+
+ return block_fmt->width;
+}
+
+static int block_cycles_cov_entry(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp, struct hist_entry *he)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+ struct report *rep = block_fmt->rep;
+ struct block_info *bi = he->block_info;
+ double ratio = 0.0;
+ char buf[16];
+
+ if (rep->cycles_count)
+ ratio = (double)bi->cycles / (double)rep->cycles_count;
+
+ sprintf(buf, "%.2f%%", 100.0 * ratio);
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
+}
+
+static int64_t block_cycles_cov_sort(struct perf_hpp_fmt *fmt,
+ struct hist_entry *left,
+ struct hist_entry *right)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+ struct report *rep = block_fmt->rep;
+ struct block_info *bi_l = left->block_info;
+ struct block_info *bi_r = right->block_info;
+ double l, r;
+
+ if (rep->cycles_count) {
+ l = ((double)bi_l->cycles / (double)rep->cycles_count) * 1000.0;
+ r = ((double)bi_r->cycles / (double)rep->cycles_count) * 1000.0;
+ return (int64_t)l - (int64_t)r;
+ }
+
+ return 0;
+}
+
+static void cycles_string(u64 cycles, char *buf, int size)
+{
+ if (cycles >= 1000000)
+ scnprintf(buf, size, "%.1fM", (double)cycles / 1000000.0);
+ else if (cycles >= 1000)
+ scnprintf(buf, size, "%.1fK", (double)cycles / 1000.0);
+ else
+ scnprintf(buf, size, "%1d", cycles);
+}
+
+static int block_cycles_lbr_entry(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp, struct hist_entry *he)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+ struct block_info *bi = he->block_info;
+ char cycles_buf[16];
+
+ cycles_string(bi->cycles_aggr, cycles_buf, sizeof(cycles_buf));
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+ cycles_buf);
+}
+
+static int block_cycles_pct_entry(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp, struct hist_entry *he)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+ struct report *rep = block_fmt->rep;
+ struct block_info *bi = he->block_info;
+ double ratio = 0.0;
+ u64 avg;
+ char buf[16];
+
+ if (rep->block_cycles && bi->num_aggr) {
+ avg = bi->cycles_aggr / bi->num_aggr;
+ ratio = (double)avg / (double)rep->block_cycles;
+ }
+
+ sprintf(buf, "%.2f%%", 100.0 * ratio);
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
+}
+
+static int block_avg_cycles_entry(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+ struct block_info *bi = he->block_info;
+ char cycles_buf[16];
+
+ cycles_string(bi->cycles_aggr / bi->num_aggr, cycles_buf,
+ sizeof(cycles_buf));
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+ cycles_buf);
+}
+
+static int block_range_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+ struct block_info *bi = he->block_info;
+ char buf[128];
+ char *start_line, *end_line;
+
+ symbol_conf.disable_add2line_warn = true;
+
+ start_line = map__srcline(he->ms.map, bi->sym->start + bi->start,
+ he->ms.sym);
+
+ end_line = map__srcline(he->ms.map, bi->sym->start + bi->end,
+ he->ms.sym);
+
+ if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) {
+ scnprintf(buf, sizeof(buf), "[%s -> %s]",
+ start_line, end_line);
+ } else {
+ scnprintf(buf, sizeof(buf), "[%7lx -> %7lx]",
+ bi->start, bi->end);
+ }
+
+ free_srcline(start_line);
+ free_srcline(end_line);
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
+}
+
+static int block_dso_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
+ struct map *map = he->ms.map;
+
+ if (map && map->dso) {
+ return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+ map->dso->short_name);
+ }
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width,
+ "[unknown]");
+}
+
+static void init_block_header(struct block_fmt *block_fmt)
+{
+ struct perf_hpp_fmt *fmt = &block_fmt->fmt;
+
+ BUG_ON(block_fmt->idx >= PERF_HPP_REPORT__BLOCK_MAX_INDEX);
+
+ block_fmt->header = block_columns[block_fmt->idx].name;
+ block_fmt->width = block_columns[block_fmt->idx].width;
+
+ fmt->header = block_column_header;
+ fmt->width = block_column_width;
+}
+
+static void block_hpp_register(struct block_fmt *block_fmt, int idx,
+ struct perf_hpp_list *hpp_list,
+ struct report *rep)
+{
+ struct perf_hpp_fmt *fmt = &block_fmt->fmt;
+
+ block_fmt->rep = rep;
+ block_fmt->idx = idx;
+ INIT_LIST_HEAD(&fmt->list);
+ INIT_LIST_HEAD(&fmt->sort_list);
+
+ switch (idx) {
+ case PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_COV:
+ fmt->entry = block_cycles_cov_entry;
+ fmt->cmp = block_info__cmp;
+ fmt->sort = block_cycles_cov_sort;
+ break;
+ case PERF_HPP_REPORT__BLOCK_LBR_CYCLES:
+ fmt->entry = block_cycles_lbr_entry;
+ break;
+ case PERF_HPP_REPORT__BLOCK_CYCLES_PCT:
+ fmt->entry = block_cycles_pct_entry;
+ break;
+ case PERF_HPP_REPORT__BLOCK_AVG_CYCLES:
+ fmt->entry = block_avg_cycles_entry;
+ break;
+ case PERF_HPP_REPORT__BLOCK_RANGE:
+ fmt->entry = block_range_entry;
+ break;
+ case PERF_HPP_REPORT__BLOCK_DSO:
+ fmt->entry = block_dso_entry;
+ break;
+ default:
+ return;
+ }
+
+ init_block_header(block_fmt);
+ perf_hpp_list__column_register(hpp_list, fmt);
+}
+
+static void register_block_columns(struct perf_hpp_list *hpp_list,
+ struct report *rep)
+{
+ for (int i = 0; i < PERF_HPP_REPORT__BLOCK_MAX_INDEX; i++)
+ block_hpp_register(&block_fmts[i], i, hpp_list, rep);
+}
+
+static void init_block_hist(struct block_hist *bh, struct report *rep)
+{
+ __hists__init(&bh->block_hists, &bh->block_list);
+ perf_hpp_list__init(&bh->block_list);
+ bh->block_list.nr_header_lines = 1;
+
+ register_block_columns(&bh->block_list, rep);
+
+ perf_hpp_list__register_sort_field(&bh->block_list,
+ &block_fmts[PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_COV].fmt);
+}
+
+static void init_block_info(struct block_info *bi, struct symbol *sym,
+ struct cyc_hist *ch, int offset,
+ u64 total_cycles)
+{
+ bi->sym = sym;
+ bi->start = ch->start;
+ bi->end = offset;
+ bi->cycles = ch->cycles;
+ bi->cycles_aggr = ch->cycles_aggr;
+ bi->num = ch->num;
+ bi->num_aggr = ch->num_aggr;
+ bi->total_cycles = total_cycles;
+}
+
+static int add_block_per_sym(struct hist_entry *he, struct block_hist *bh,
+ u64 *block_cycles, u64 total_cycles)
+{
+ struct annotation *notes;
+ struct cyc_hist *ch;
+ static struct addr_location al;
+ u64 cycles = 0;
+
+ if (!he->ms.map || !he->ms.sym)
+ return 0;
+
+ memset(&al, 0, sizeof(al));
+ al.map = he->ms.map;
+ al.sym = he->ms.sym;
+
+ notes = symbol__annotation(he->ms.sym);
+ if (!notes || !notes->src || !notes->src->cycles_hist)
+ return 0;
+ ch = notes->src->cycles_hist;
+ for (unsigned int i = 0; i < symbol__size(he->ms.sym); i++) {
+ if (ch[i].num_aggr) {
+ struct block_info *bi;
+ struct hist_entry *he_block;
+
+ bi = block_info__new();
+ if (!bi)
+ return -1;
+
+ init_block_info(bi, he->ms.sym, &ch[i], i,
+ total_cycles);
+ cycles += bi->cycles_aggr / bi->num_aggr;
+
+ he_block = hists__add_entry_block(&bh->block_hists,
+ &al, bi);
+ if (!he_block) {
+ block_info__put(bi);
+ return -1;
+ }
+ }
+ }
+
+ if (block_cycles)
+ *block_cycles += cycles;
+
+ return 0;
+}
+
+static int resort_cb(struct hist_entry *he, void *arg __maybe_unused)
+{
+ /* Skip the calculation of column length in output_resort */
+ he->filtered = true;
+ return 0;
+}
+
+static void hists__clear_filtered(struct hists *hists)
+{
+ struct rb_node *next = rb_first_cached(&hists->entries);
+ struct hist_entry *he;
+
+ while (next) {
+ he = rb_entry(next, struct hist_entry, rb_node);
+ he->filtered = false;
+ next = rb_next(&he->rb_node);
+ }
+}
+
+static void get_block_hists(struct hists *hists, struct block_hist *bh,
+ struct report *rep)
+{
+ struct rb_node *next = rb_first_cached(&hists->entries);
+ struct hist_entry *he;
+
+ init_block_hist(bh, rep);
+
+ while (next) {
+ he = rb_entry(next, struct hist_entry, rb_node);
+ add_block_per_sym(he, bh, &rep->block_cycles,
+ rep->cycles_count);
+ next = rb_next(&he->rb_node);
+ }
+
+ hists__output_resort_cb(&bh->block_hists, NULL, resort_cb);
+ hists__clear_filtered(&bh->block_hists);
+}
+
+static int hists__fprintf_all_blocks(struct hists *hists, struct report *rep)
+{
+ struct block_hist *bh = &rep->block_hist;
+
+ get_block_hists(hists, bh, rep);
+ symbol_conf.report_individual_block = true;
+ hists__fprintf(&bh->block_hists, true, 0, 0, 0,
+ stdout, true);
+ hists__delete_entries(&bh->block_hists);
+ return 0;
+}
+
static int perf_evlist__tty_browse_hists(struct evlist *evlist,
struct report *rep,
const char *help)
@@ -493,6 +893,12 @@ static int perf_evlist__tty_browse_hists(struct evlist *evlist,
continue;
hists__fprintf_nr_sample_events(hists, rep, evname, stdout);
+
+ if (rep->total_cycles) {
+ hists__fprintf_all_blocks(hists, rep);
+ continue;
+ }
+
hists__fprintf(hists, !quiet, 0, 0, rep->min_percent, stdout,
!(symbol_conf.use_callchain ||
symbol_conf.show_branchflag_count));
@@ -1366,6 +1772,15 @@ int cmd_report(int argc, const char **argv)
goto error;
}
+ if (sort_order && strstr(sort_order, "total_cycles") &&
+ (sort__mode == SORT_MODE__BRANCH)) {
+ report.total_cycles = true;
+ if (!report.use_stdio) {
+ pr_err("Error: -s total_cycles can be only used together with --stdio\n");
+ goto error;
+ }
+ }
+
if (strcmp(input_name, "-") != 0)
setup_browser(true);
else
@@ -1416,7 +1831,7 @@ int cmd_report(int argc, const char **argv)
* so don't allocate extra space that won't be used in the stdio
* implementation.
*/
- if (ui__has_annotation() || report.symbol_ipc) {
+ if (ui__has_annotation() || report.symbol_ipc || report.total_cycles) {
ret = symbol__annotation_init();
if (ret < 0)
goto error;
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 5365606e9dad..655ef7708cd0 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -558,6 +558,25 @@ static int hist_entry__block_fprintf(struct hist_entry *he,
return ret;
}
+static int hist_entry__individual_block_fprintf(struct hist_entry *he,
+ char *bf, size_t size,
+ FILE *fp)
+{
+ int ret = 0;
+
+ struct perf_hpp hpp = {
+ .buf = bf,
+ .size = size,
+ .skip = false,
+ };
+
+ hist_entry__snprintf(he, &hpp);
+ if (!hpp.skip)
+ ret += fprintf(fp, "%s\n", bf);
+
+ return ret;
+}
+
static int hist_entry__fprintf(struct hist_entry *he, size_t size,
char *bf, size_t bfsz, FILE *fp,
bool ignore_callchains)
@@ -580,6 +599,9 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size,
if (symbol_conf.report_block)
return hist_entry__block_fprintf(he, bf, size, fp);
+ if (symbol_conf.report_individual_block)
+ return hist_entry__individual_block_fprintf(he, bf, size, fp);
+
hist_entry__snprintf(he, &hpp);
ret = fprintf(fp, "%s\n", bf);
diff --git a/tools/perf/util/block.h b/tools/perf/util/block.h
index 1aeef6dd2bf3..8031a8dd318a 100644
--- a/tools/perf/util/block.h
+++ b/tools/perf/util/block.h
@@ -13,6 +13,7 @@ struct block_info {
u64 end;
u64 cycles;
u64 cycles_aggr;
+ u64 total_cycles;
int num;
int num_aggr;
refcount_t refcnt;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index af65ce950ba2..521f7185a94f 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -756,6 +756,10 @@ struct hist_entry *hists__add_entry_block(struct hists *hists,
struct hist_entry entry = {
.block_info = block_info,
.hists = hists,
+ .ms = {
+ .map = al->map,
+ .sym = al->sym,
+ },
}, *he = hists__findnew_entry(hists, &entry, al, false);
return he;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 43d1d410854a..eb286700a8a9 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -492,6 +492,10 @@ struct sort_entry sort_sym_ipc_null = {
.se_width_idx = HISTC_SYMBOL_IPC,
};
+struct sort_entry sort_block_cycles = {
+ .se_cmp = sort__sym_cmp,
+};
+
/* --sort srcfile */
static char no_srcfile[1];
@@ -1695,6 +1699,7 @@ static struct sort_dimension bstack_sort_dimensions[] = {
DIM(SORT_SRCLINE_FROM, "srcline_from", sort_srcline_from),
DIM(SORT_SRCLINE_TO, "srcline_to", sort_srcline_to),
DIM(SORT_SYM_IPC, "ipc_lbr", sort_sym_ipc),
+ DIM(SORT_BLOCK_CYCLES, "total_cycles", sort_block_cycles),
};
#undef DIM
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 7b93f34ac1f4..7ed9a08751c3 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -235,6 +235,7 @@ enum sort_type {
SORT_SRCLINE_FROM,
SORT_SRCLINE_TO,
SORT_SYM_IPC,
+ SORT_BLOCK_CYCLES,
/* memory mode specific sort keys */
__SORT_MEMORY_MODE,
diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
index e6880789864c..10f1ec3e0349 100644
--- a/tools/perf/util/symbol_conf.h
+++ b/tools/perf/util/symbol_conf.h
@@ -40,6 +40,7 @@ struct symbol_conf {
raw_trace,
report_hierarchy,
report_block,
+ report_individual_block,
inline_name,
disable_add2line_warn;
const char *vmlinux_name,
--
2.17.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v1 5/5] perf report: Sort by sampled cycles percent per block for tui
2019-10-08 7:04 [PATCH v1 0/5] perf report: Support sorting all blocks by cycles Jin Yao
` (3 preceding siblings ...)
2019-10-08 7:05 ` [PATCH v1 4/5] perf report: Support --percent-limit for total_cycles Jin Yao
@ 2019-10-08 7:05 ` Jin Yao
2019-10-14 15:32 ` [PATCH v1 0/5] perf report: Support sorting all blocks by cycles Jiri Olsa
5 siblings, 0 replies; 9+ messages in thread
From: Jin Yao @ 2019-10-08 7:05 UTC (permalink / raw)
To: acme, jolsa, peterz, mingo, alexander.shishkin
Cc: Linux-kernel, ak, kan.liang, yao.jin, Jin Yao
Previous patch has implemented a new sort option "total_cycles".
But there was only stdio mode supported.
This patch supports the tui mode and support '--percent-limit'
as well.
For example,
perf record -b ./div
perf report -s total_cycles --percent-limit 1
# Samples: 2753248 of event 'cycles'
Sampled Cycles% Sampled Cycles Avg Cycles% Avg Cycles [Program Block Range] Shared Object
26.04% 2.8M 0.40% 18 [div.c:42 -> div.c:39] div
15.17% 1.2M 0.16% 7 [random_r.c:357 -> random_r.c:380] libc-2.27.so
5.11% 402.0K 0.04% 2 [div.c:27 -> div.c:28] div
4.87% 381.6K 0.04% 2 [random.c:288 -> random.c:291] libc-2.27.so
4.53% 381.0K 0.04% 2 [div.c:40 -> div.c:40] div
3.85% 300.9K 0.02% 1 [div.c:22 -> div.c:25] div
3.08% 241.1K 0.02% 1 [rand.c:26 -> rand.c:27] libc-2.27.so
3.06% 240.0K 0.02% 1 [random.c:291 -> random.c:291] libc-2.27.so
2.78% 215.7K 0.02% 1 [random.c:298 -> random.c:298] libc-2.27.so
2.52% 198.3K 0.02% 1 [random.c:293 -> random.c:293] libc-2.27.so
2.36% 184.8K 0.02% 1 [rand.c:28 -> rand.c:28] libc-2.27.so
2.33% 180.5K 0.02% 1 [random.c:295 -> random.c:295] libc-2.27.so
2.28% 176.7K 0.02% 1 [random.c:295 -> random.c:295] libc-2.27.so
2.20% 168.8K 0.02% 1 [rand@plt+0 -> rand@plt+0] div
1.98% 158.2K 0.02% 1 [random_r.c:388 -> random_r.c:388] libc-2.27.so
1.57% 123.3K 0.02% 1 [div.c:42 -> div.c:44] div
1.44% 116.0K 0.42% 19 [random_r.c:357 -> random_r.c:394] libc-2.27.so
Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
---
tools/perf/builtin-report.c | 30 +++++++++++++---
tools/perf/ui/browsers/hists.c | 62 +++++++++++++++++++++++++++++++++-
tools/perf/ui/browsers/hists.h | 2 ++
tools/perf/util/hist.h | 12 +++++++
4 files changed, 101 insertions(+), 5 deletions(-)
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 96e4d68b3884..c095c91dad1d 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -873,6 +873,27 @@ static int hists__fprintf_all_blocks(struct hists *hists, struct report *rep)
return 0;
}
+static int perf_evlist__tui_block_hists_browse(struct evlist *evlist,
+ struct report *rep)
+{
+ struct block_hist *bh = &rep->block_hist;
+ struct evsel *pos;
+ int ret;
+
+ evlist__for_each_entry(evlist, pos) {
+ struct hists *hists = evsel__hists(pos);
+
+ get_block_hists(hists, bh, rep);
+ symbol_conf.report_individual_block = true;
+ ret = block_hists_tui_browse(bh, pos, rep->min_percent);
+ hists__delete_entries(&bh->block_hists);
+ if (ret != 0)
+ return ret;
+ }
+
+ return ret;
+}
+
static int perf_evlist__tty_browse_hists(struct evlist *evlist,
struct report *rep,
const char *help)
@@ -981,6 +1002,11 @@ static int report__browse_hists(struct report *rep)
switch (use_browser) {
case 1:
+ if (rep->total_cycles) {
+ ret = perf_evlist__tui_block_hists_browse(evlist, rep);
+ break;
+ }
+
ret = perf_evlist__tui_browse_hists(evlist, help, NULL,
rep->min_percent,
&session->header.env,
@@ -1775,10 +1801,6 @@ int cmd_report(int argc, const char **argv)
if (sort_order && strstr(sort_order, "total_cycles") &&
(sort__mode == SORT_MODE__BRANCH)) {
report.total_cycles = true;
- if (!report.use_stdio) {
- pr_err("Error: -s total_cycles can be only used together with --stdio\n");
- goto error;
- }
}
if (strcmp(input_name, "-") != 0)
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 7a7187e069b4..e0f3b1d4a43b 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -26,6 +26,7 @@
#include "../../util/sort.h"
#include "../../util/top.h"
#include "../../util/thread.h"
+#include "../../util/block.h"
#include "../../arch/common.h"
#include "../../perf.h"
@@ -1783,7 +1784,11 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)
continue;
}
- percent = hist_entry__get_percent_limit(h);
+ if (symbol_conf.report_individual_block)
+ percent = block_total_cycles_percent(h);
+ else
+ percent = hist_entry__get_percent_limit(h);
+
if (percent < hb->min_pcnt)
continue;
@@ -3443,3 +3448,58 @@ int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help,
warn_lost_event,
annotation_opts);
}
+
+static int block_hists_browser__title(struct hist_browser *browser, char *bf,
+ size_t size)
+{
+ struct hists *hists = evsel__hists(browser->block_evsel);
+ const char *evname = perf_evsel__name(browser->block_evsel);
+ unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
+ int ret;
+
+ ret = scnprintf(bf, size, "# Samples: %lu", nr_samples);
+ if (evname)
+ scnprintf(bf + ret, size - ret, " of event '%s'", evname);
+
+ return 0;
+}
+
+int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel,
+ float min_percent)
+{
+ struct hists *hists = &bh->block_hists;
+ struct hist_browser *browser;
+ int key = -1;
+ static const char help[] =
+ " q Quit \n";
+
+ browser = hist_browser__new(hists);
+ if (!browser)
+ return -1;
+
+ browser->block_evsel = evsel;
+ browser->title = block_hists_browser__title;
+ browser->min_pcnt = min_percent;
+
+ /* reset abort key so that it can get Ctrl-C as a key */
+ SLang_reset_tty();
+ SLang_init_tty(0, 0, 0);
+
+ while (1) {
+ key = hist_browser__run(browser, "? - help", true);
+
+ switch (key) {
+ case 'q':
+ goto out;
+ case '?':
+ ui_browser__help_window(&browser->b, help);
+ break;
+ default:
+ break;
+ }
+ }
+
+out:
+ hist_browser__delete(browser);
+ return 0;
+}
diff --git a/tools/perf/ui/browsers/hists.h b/tools/perf/ui/browsers/hists.h
index 91d3e18b50aa..078f2f2c7abd 100644
--- a/tools/perf/ui/browsers/hists.h
+++ b/tools/perf/ui/browsers/hists.h
@@ -5,6 +5,7 @@
#include "ui/browser.h"
struct annotation_options;
+struct evsel;
struct hist_browser {
struct ui_browser b;
@@ -15,6 +16,7 @@ struct hist_browser {
struct pstack *pstack;
struct perf_env *env;
struct annotation_options *annotation_opts;
+ struct evsel *block_evsel;
int print_seq;
bool show_dso;
bool show_headers;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 4d87c7b4c1b2..9dc3c112157c 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -449,6 +449,8 @@ enum rstype {
A_SOURCE
};
+struct block_hist;
+
#ifdef HAVE_SLANG_SUPPORT
#include "../ui/keysyms.h"
void attr_to_script(char *buf, struct perf_event_attr *attr);
@@ -474,6 +476,9 @@ void run_script(char *cmd);
int res_sample_browse(struct res_sample *res_samples, int num_res,
struct evsel *evsel, enum rstype rstype);
void res_sample_init(void);
+
+int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel,
+ float min_percent);
#else
static inline
int perf_evlist__tui_browse_hists(struct evlist *evlist __maybe_unused,
@@ -516,6 +521,13 @@ static inline int res_sample_browse(struct res_sample *res_samples __maybe_unuse
return 0;
}
+static inline int block_hists_tui_browse(struct block_hist *bh __maybe_unused,
+ struct evsel *evsel __maybe_unused,
+ float min_percent)
+{
+ return 0;
+}
+
static inline void res_sample_init(void) {}
#define K_LEFT -1000
--
2.17.1
^ permalink raw reply related [flat|nested] 9+ messages in thread