linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RFC V10 1/4] perf,tools: per-event callgraph support
@ 2015-08-11 10:30 kan.liang
  2015-08-11 10:30 ` [PATCH RFC V10 2/4] perf,tools: disable " kan.liang
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: kan.liang @ 2015-08-11 10:30 UTC (permalink / raw)
  To: acme, jolsa; +Cc: namhyung, ak, linux-kernel, Kan Liang

From: Kan Liang <kan.liang@intel.com>

This patchkit adds the ability to set callgraph mode (fp,dwarf,lbr) per
event. This in term can reduce sampling overhead and the size of the
perf.data.

Here is an example.

perf record -e
'cpu/cpu-cycles,period=1000,call-graph=fp,time=1/,cpu/instructions,call-graph=lbr/'
sleep 1

perf evlist -v
cpu/cpu-cycles,period=1000,call-graph=fp,time=1/: type: 4, size: 112,
config: 0x3c, { sample_period, sample_freq }: 1000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|IDENTIFIER, read_format: ID, disabled: 1,
inherit: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all:
1, exclude_guest: 1, mmap2: 1, comm_exec: 1
cpu/instructions,call-graph=lbr/: type: 4, size: 112, config: 0xc0, {
sample_period, sample_freq }: 4000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|BRANCH_STACK|IDENTIFIER, read_format: ID,
disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1,
exclude_guest: 1

Signed-off-by: Kan Liang <kan.liang@intel.com>
---

Changes since V9:
 - move "callgraph=no" to seperate patch

 tools/perf/Documentation/perf-record.txt |  3 ++
 tools/perf/util/evsel.c                  | 62 ++++++++++++++++++++++++++++++--
 tools/perf/util/evsel.h                  |  4 +++
 tools/perf/util/parse-events.c           | 12 +++++++
 tools/perf/util/parse-events.h           |  2 ++
 tools/perf/util/parse-events.l           |  2 ++
 tools/perf/util/pmu.c                    |  4 ++-
 7 files changed, 86 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index afbe45e..7f82dec 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -53,6 +53,9 @@ OPTIONS
 	  - 'time': Disable/enable time stamping. Acceptable values are 1 for
 		    enabling time stamping. 0 for disabling time stamping.
 		    The default is 1.
+	  - 'call-graph': Disable/enable callgraph. Acceptable str are "fp" for
+			 FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode.
+	  - 'stack-size': user stack size for dwarf mode
 	  Note: If user explicitly sets options which conflict with the params,
 	  the value set by the params will be overridden.
 
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 04fdddd..6647925 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -588,11 +588,36 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
 	}
 }
 
-static void apply_config_terms(struct perf_evsel *evsel)
+static void
+perf_evsel__reset_callgraph(struct perf_evsel *evsel,
+			    struct callchain_param *param)
+{
+	struct perf_event_attr *attr = &evsel->attr;
+
+	perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
+	if (param->record_mode == CALLCHAIN_LBR) {
+		perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
+		attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
+					      PERF_SAMPLE_BRANCH_CALL_STACK);
+	}
+	if (param->record_mode == CALLCHAIN_DWARF) {
+		perf_evsel__reset_sample_bit(evsel, REGS_USER);
+		perf_evsel__reset_sample_bit(evsel, STACK_USER);
+	}
+}
+
+static void apply_config_terms(struct perf_evsel *evsel,
+			       struct record_opts *opts)
 {
 	struct perf_evsel_config_term *term;
 	struct list_head *config_terms = &evsel->config_terms;
 	struct perf_event_attr *attr = &evsel->attr;
+	struct callchain_param param;
+	u32 dump_size = 0;
+	char *callgraph_buf = NULL;
+
+	/* callgraph default */
+	param.record_mode = callchain_param.record_mode;
 
 	list_for_each_entry(term, config_terms, list) {
 		switch (term->type) {
@@ -610,10 +635,43 @@ static void apply_config_terms(struct perf_evsel *evsel)
 			else
 				perf_evsel__reset_sample_bit(evsel, TIME);
 			break;
+		case PERF_EVSEL__CONFIG_TERM_CALLGRAPH:
+			callgraph_buf = term->val.callgraph;
+			break;
+		case PERF_EVSEL__CONFIG_TERM_STACK_USER:
+			dump_size = term->val.stack_user;
+			break;
 		default:
 			break;
 		}
 	}
+
+	/* User explicitly set per-event callgraph, clear the old setting and reset. */
+	if ((callgraph_buf != NULL) || (dump_size > 0)) {
+
+		/* parse callgraph parameters */
+		if (callgraph_buf != NULL) {
+			param.enabled = true;
+			if (parse_callchain_record(callgraph_buf, &param)) {
+				pr_err("per-event callgraph setting for %s failed. "
+				       "Apply callgraph global setting for it\n",
+				       evsel->name);
+				return;
+			}
+		}
+		if (dump_size > 0) {
+			dump_size = round_up(dump_size, sizeof(u64));
+			param.dump_size = dump_size;
+		}
+
+		/* If global callgraph set, clear it */
+		if (callchain_param.enabled)
+			perf_evsel__reset_callgraph(evsel, &callchain_param);
+
+		/* set perf-event callgraph */
+		if (param.enabled)
+			perf_evsel__config_callgraph(evsel, opts, &param);
+	}
 }
 
 /*
@@ -812,7 +870,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
 	 * Apply event specific term settings,
 	 * it overloads any global configuration.
 	 */
-	apply_config_terms(evsel);
+	apply_config_terms(evsel, opts);
 }
 
 static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index fdf2674..93ac6b1 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -41,6 +41,8 @@ enum {
 	PERF_EVSEL__CONFIG_TERM_PERIOD,
 	PERF_EVSEL__CONFIG_TERM_FREQ,
 	PERF_EVSEL__CONFIG_TERM_TIME,
+	PERF_EVSEL__CONFIG_TERM_CALLGRAPH,
+	PERF_EVSEL__CONFIG_TERM_STACK_USER,
 	PERF_EVSEL__CONFIG_TERM_MAX,
 };
 
@@ -51,6 +53,8 @@ struct perf_evsel_config_term {
 		u64	period;
 		u64	freq;
 		bool	time;
+		char	*callgraph;
+		u64	stack_user;
 	} val;
 };
 
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index dbf315d..d826e6f 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -614,6 +614,12 @@ do {									   \
 			return -EINVAL;
 		}
 		break;
+	case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+		CHECK_TYPE_VAL(STR);
+		break;
+	case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+		CHECK_TYPE_VAL(NUM);
+		break;
 	case PARSE_EVENTS__TERM_TYPE_NAME:
 		CHECK_TYPE_VAL(STR);
 		break;
@@ -668,6 +674,12 @@ do {								\
 		case PARSE_EVENTS__TERM_TYPE_TIME:
 			ADD_CONFIG_TERM(TIME, time, term->val.num);
 			break;
+		case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+			ADD_CONFIG_TERM(CALLGRAPH, callgraph, term->val.str);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+			ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num);
+			break;
 		default:
 			break;
 		}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index ce2d13a..a09b0e2 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -65,6 +65,8 @@ enum {
 	PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ,
 	PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE,
 	PARSE_EVENTS__TERM_TYPE_TIME,
+	PARSE_EVENTS__TERM_TYPE_CALLGRAPH,
+	PARSE_EVENTS__TERM_TYPE_STACKSIZE,
 };
 
 struct parse_events_term {
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 4306f5a..936d566 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -185,6 +185,8 @@ period			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
 freq			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ); }
 branch_type		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
 time			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
+call-graph		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
+stack-size		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
 ,			{ return ','; }
 "/"			{ BEGIN(INITIAL); return '/'; }
 {name_minus}		{ return str(yyscanner, PE_NAME); }
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index d85f11b..84cad05 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -634,7 +634,9 @@ static char *formats_error_string(struct list_head *formats)
 {
 	struct perf_pmu_format *format;
 	char *err, *str;
-	static const char *static_terms = "config,config1,config2,name,period,freq,branch_type,time\n";
+	static const char *static_terms = "config,config1,config2,name,"
+					  "period,freq,branch_type,time,"
+					  "call-graph,stack-size\n";
 	unsigned i = 0;
 
 	if (!asprintf(&str, "valid terms:"))
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH RFC V10 2/4] perf,tools: disable per-event callgraph support
  2015-08-11 10:30 [PATCH RFC V10 1/4] perf,tools: per-event callgraph support kan.liang
@ 2015-08-11 10:30 ` kan.liang
  2015-08-13  8:07   ` [tip:perf/core] perf callchain: Allow disabling call graphs per event tip-bot for Kan Liang
  2015-08-11 10:30 ` [PATCH RFC V10 3/4] perf,tools: show call graph from reference events kan.liang
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 8+ messages in thread
From: kan.liang @ 2015-08-11 10:30 UTC (permalink / raw)
  To: acme, jolsa; +Cc: namhyung, ak, linux-kernel, Kan Liang

From: Kan Liang <kan.liang@intel.com>

This patch introduce "call-graph=no" to disable per-event callgraph.

Here is an example.

perf record -e
'cpu/cpu-cycles,call-graph=fp/,cpu/instructions,call-graph=no/' sleep 1

perf report --stdio

 # To display the perf.data header info, please use
 --header/--header-only options.
 #
 #
 # Total Lost Samples: 0
 #
 # Samples: 6  of event 'cpu/cpu-cycles,call-graph=fp/'
 # Event count (approx.): 774218
 #
 # Children      Self  Command  Shared Object     Symbol
 # ........  ........  .......  ................
 ........................................
 #
    61.94%     0.00%  sleep    [kernel.vmlinux]  [k]
 entry_SYSCALL_64_fastpath
              |
              ---entry_SYSCALL_64_fastpath
                 |
                 |--97.30%-- __brk
                 |
                  --2.70%-- mmap64
                            _dl_check_map_versions
                            _dl_check_all_versions

    61.94%     0.00%  sleep    [kernel.vmlinux]  [k] perf_event_mmap
              |
              ---perf_event_mmap
                 |
                 |--97.30%-- do_brk
                 |          sys_brk
                 |          entry_SYSCALL_64_fastpath
                 |          __brk
                 |
                  --2.70%-- mmap_region
                            do_mmap_pgoff
                            vm_mmap_pgoff
                            sys_mmap_pgoff
                            sys_mmap
                            entry_SYSCALL_64_fastpath
                            mmap64
                            _dl_check_map_versions
                            _dl_check_all_versions
 ......

 # Samples: 6  of event 'cpu/instructions,call-graph=no/'
 # Event count (approx.): 359692
 #
 # Children      Self  Command  Shared Object     Symbol
 # ........  ........  .......  ................
 .................................
 #
    89.03%     0.00%  sleep    [unknown]         [.] 0xffff6598ffff6598

    89.03%     0.00%  sleep    ld-2.17.so        [.]
 _dl_resolve_conflicts

    89.03%     0.00%  sleep    [kernel.vmlinux]  [k] page_fault

Signed-off-by: Kan Liang <kan.liang@intel.com>
---

Changes since V9:
 - Really disable callgraph for "callgraph=no" event

 tools/perf/Documentation/perf-record.txt |  3 ++-
 tools/perf/builtin-annotate.c            |  2 ++
 tools/perf/builtin-diff.c                |  3 +++
 tools/perf/tests/hists_cumulate.c        |  4 ++++
 tools/perf/util/evsel.c                  | 17 +++++++++++------
 tools/perf/util/hist.c                   |  9 ++++++---
 6 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 7f82dec..347a273 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -54,7 +54,8 @@ OPTIONS
 		    enabling time stamping. 0 for disabling time stamping.
 		    The default is 1.
 	  - 'call-graph': Disable/enable callgraph. Acceptable str are "fp" for
-			 FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode.
+			 FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode and
+			 "no" for disable callgraph.
 	  - 'stack-size': user stack size for dwarf mode
 	  Note: If user explicitly sets options which conflict with the params,
 	  the value set by the params will be overridden.
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 467a23b..a32a64e 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -239,6 +239,8 @@ static int __cmd_annotate(struct perf_annotate *ann)
 		if (nr_samples > 0) {
 			total_nr_samples += nr_samples;
 			hists__collapse_resort(hists, NULL);
+			/* Don't sort callchain */
+			perf_evsel__reset_sample_bit(pos, CALLCHAIN);
 			hists__output_resort(hists, NULL);
 
 			if (symbol_conf.event_group &&
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index daaa7dc..0b180a8 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -722,6 +722,9 @@ static void data_process(void)
 		if (verbose || data__files_cnt > 2)
 			data__fprintf();
 
+		/* Don't sort callchain for perf diff */
+		perf_evsel__reset_sample_bit(evsel_base, CALLCHAIN);
+
 		hists__process(hists_base);
 	}
 }
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index 7d82c8b..7ed7370 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -279,6 +279,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine)
 
 	symbol_conf.use_callchain = false;
 	symbol_conf.cumulate_callchain = false;
+	perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
 
 	setup_sorting();
 	callchain_register_param(&callchain_param);
@@ -425,6 +426,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine)
 
 	symbol_conf.use_callchain = true;
 	symbol_conf.cumulate_callchain = false;
+	perf_evsel__set_sample_bit(evsel, CALLCHAIN);
 
 	setup_sorting();
 	callchain_register_param(&callchain_param);
@@ -482,6 +484,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine)
 
 	symbol_conf.use_callchain = false;
 	symbol_conf.cumulate_callchain = true;
+	perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
 
 	setup_sorting();
 	callchain_register_param(&callchain_param);
@@ -665,6 +668,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine)
 
 	symbol_conf.use_callchain = true;
 	symbol_conf.cumulate_callchain = true;
+	perf_evsel__set_sample_bit(evsel, CALLCHAIN);
 
 	setup_sorting();
 	callchain_register_param(&callchain_param);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 6647925..b096ef7 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -651,12 +651,17 @@ static void apply_config_terms(struct perf_evsel *evsel,
 
 		/* parse callgraph parameters */
 		if (callgraph_buf != NULL) {
-			param.enabled = true;
-			if (parse_callchain_record(callgraph_buf, &param)) {
-				pr_err("per-event callgraph setting for %s failed. "
-				       "Apply callgraph global setting for it\n",
-				       evsel->name);
-				return;
+			if (!strcmp(callgraph_buf, "no")) {
+				param.enabled = false;
+				param.record_mode = CALLCHAIN_NONE;
+			} else {
+				param.enabled = true;
+				if (parse_callchain_record(callgraph_buf, &param)) {
+					pr_err("per-event callgraph setting for %s failed. "
+					       "Apply callgraph global setting for it\n",
+					       evsel->name);
+					return;
+				}
 			}
 		}
 		if (dump_size > 0) {
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 6bccfae..1cd785b 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1109,13 +1109,14 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h)
 
 static void __hists__insert_output_entry(struct rb_root *entries,
 					 struct hist_entry *he,
-					 u64 min_callchain_hits)
+					 u64 min_callchain_hits,
+					 bool use_callchain)
 {
 	struct rb_node **p = &entries->rb_node;
 	struct rb_node *parent = NULL;
 	struct hist_entry *iter;
 
-	if (symbol_conf.use_callchain)
+	if (use_callchain)
 		callchain_param.sort(&he->sorted_chain, he->callchain,
 				      min_callchain_hits, &callchain_param);
 
@@ -1139,6 +1140,8 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog)
 	struct rb_node *next;
 	struct hist_entry *n;
 	u64 min_callchain_hits;
+	struct perf_evsel *evsel = hists_to_evsel(hists);
+	bool use_callchain = evsel ? (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) : symbol_conf.use_callchain;
 
 	min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100);
 
@@ -1157,7 +1160,7 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog)
 		n = rb_entry(next, struct hist_entry, rb_node_in);
 		next = rb_next(&n->rb_node_in);
 
-		__hists__insert_output_entry(&hists->entries, n, min_callchain_hits);
+		__hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain);
 		hists__inc_stats(hists, n);
 
 		if (!n->filtered)
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH RFC V10 3/4] perf,tools: show call graph from reference events
  2015-08-11 10:30 [PATCH RFC V10 1/4] perf,tools: per-event callgraph support kan.liang
  2015-08-11 10:30 ` [PATCH RFC V10 2/4] perf,tools: disable " kan.liang
@ 2015-08-11 10:30 ` kan.liang
  2015-08-13  8:07   ` [tip:perf/core] perf report: Show " tip-bot for Kan Liang
  2015-08-11 10:30 ` [PATCH RFC V10 4/4] perf,tests: Add tests to callgraph and time parse kan.liang
  2015-08-13  8:06 ` [tip:perf/core] perf callchain: Per-event type selection support tip-bot for Kan Liang
  3 siblings, 1 reply; 8+ messages in thread
From: kan.liang @ 2015-08-11 10:30 UTC (permalink / raw)
  To: acme, jolsa; +Cc: namhyung, ak, linux-kernel, Kan Liang

From: Kan Liang <kan.liang@intel.com>

Introduce --show-ref-call-graph for perf report to print reference
callgraph for no callgraph event.

Here is an example.

perf report --show-ref-call-graph --stdio

 # To display the perf.data header info, please use
 --header/--header-only options.
 #
 #
 # Total Lost Samples: 0
 #
 # Samples: 5  of event 'cpu/cpu-cycles,call-graph=fp/'
 # Event count (approx.): 144985
 #
 # Children      Self  Command  Shared Object     Symbol
 # ........  ........  .......  ................
 ........................................
 #
    72.30%     0.00%  sleep    [kernel.vmlinux]  [k]
entry_SYSCALL_64_fastpath
              |
              ---entry_SYSCALL_64_fastpath
                 |
                 |--22.62%-- __GI___libc_nanosleep
                  --77.38%-- [...]

......

 # Samples: 6  of event 'cpu/instructions,call-graph=no/', show
reference callgraph
 # Event count (approx.): 172780
 #
 # Children      Self  Command  Shared Object     Symbol
 # ........  ........  .......  ................
 ........................................
 #
    73.16%     0.00%  sleep    [kernel.vmlinux]  [k]
entry_SYSCALL_64_fastpath
              |
              ---entry_SYSCALL_64_fastpath
                 |
                 |--31.44%-- __GI___libc_nanosleep
                  --68.56%-- [...]

Signed-off-by: Kan Liang <kan.liang@intel.com>
---

Changes since V9:
 - new patch to show reference callgraph

 tools/perf/Documentation/perf-report.txt | 11 +++++++++++
 tools/perf/builtin-report.c              |  7 +++++++
 tools/perf/ui/browsers/hists.c           |  9 +++++++--
 tools/perf/util/hist.c                   |  7 ++++++-
 tools/perf/util/symbol.h                 |  3 ++-
 5 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 7b07d19..a18ba75 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -359,6 +359,17 @@ OPTIONS
 --full-source-path::
 	Show the full path for source files for srcline output.
 
+--show-ref-call-graph::
+	When multiple events are sampled, it may not be needed to collect
+	callgraphs for all of them. The sample sites are usually nearby,
+	and it's enough to collect the callgraphs on a reference event.
+	So user can use "call-graph=no" event modifier to disable callgraph
+	for other events to reduce the overhead.
+	However, perf report cannot show callgraphs for the event which
+	disable the callgraph.
+	This option extends the perf report to show reference callgraphs,
+	which collected by reference event, in no callgraph event.
+
 include::callchain-overhead-calculation.txt[]
 
 SEE ALSO
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index f301e86..62b285e 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -316,6 +316,11 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
 	if (evname != NULL)
 		ret += fprintf(fp, " of event '%s'", evname);
 
+	if (symbol_conf.show_ref_callgraph &&
+	    strstr(evname, "call-graph=no")) {
+		ret += fprintf(fp, ", show reference callgraph");
+	}
+
 	if (rep->mem_mode) {
 		ret += fprintf(fp, "\n# Total weight : %" PRIu64, nr_events);
 		ret += fprintf(fp, "\n# Sort order   : %s", sort_order ? : default_mem_sort_order);
@@ -740,6 +745,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 			    itrace_parse_synth_opts),
 	OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename,
 			"Show full source file name path for source lines"),
+	OPT_BOOLEAN(0, "show-ref-call-graph", &symbol_conf.show_ref_callgraph,
+		    "Show callgraph from reference event"),
 	OPT_END()
 	};
 	struct perf_data_file file = {
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index fa67613..03f09de 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1267,6 +1267,8 @@ static int hists__browser_title(struct hists *hists,
 	const char *ev_name = perf_evsel__name(evsel);
 	char buf[512];
 	size_t buflen = sizeof(buf);
+	char ref[30] = " show reference callgraph, ";
+	bool enable_ref = false;
 
 	if (symbol_conf.filter_relative) {
 		nr_samples = hists->stats.nr_non_filtered_samples;
@@ -1292,10 +1294,13 @@ static int hists__browser_title(struct hists *hists,
 		}
 	}
 
+	if (symbol_conf.show_ref_callgraph &&
+	    strstr(ev_name, "call-graph=no"))
+		enable_ref = true;
 	nr_samples = convert_unit(nr_samples, &unit);
 	printed = scnprintf(bf, size,
-			   "Samples: %lu%c of event '%s', Event count (approx.): %" PRIu64,
-			   nr_samples, unit, ev_name, nr_events);
+			   "Samples: %lu%c of event '%s',%sEvent count (approx.): %" PRIu64,
+			   nr_samples, unit, ev_name, enable_ref ? ref : " ", nr_events);
 
 
 	if (hists->uid_filter_str)
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 1cd785b..08b6cd9 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1141,7 +1141,12 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog)
 	struct hist_entry *n;
 	u64 min_callchain_hits;
 	struct perf_evsel *evsel = hists_to_evsel(hists);
-	bool use_callchain = evsel ? (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) : symbol_conf.use_callchain;
+	bool use_callchain;
+
+	if (evsel && !symbol_conf.show_ref_callgraph)
+		use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN;
+	else
+		use_callchain = symbol_conf.use_callchain;
 
 	min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100);
 
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index b98ce51..a4cde92 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -106,7 +106,8 @@ struct symbol_conf {
 			filter_relative,
 			show_hist_headers,
 			branch_callstack,
-			has_filter;
+			has_filter,
+			show_ref_callgraph;
 	const char	*vmlinux_name,
 			*kallsyms_name,
 			*source_prefix,
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH RFC V10 4/4] perf,tests: Add tests to callgraph and time parse
  2015-08-11 10:30 [PATCH RFC V10 1/4] perf,tools: per-event callgraph support kan.liang
  2015-08-11 10:30 ` [PATCH RFC V10 2/4] perf,tools: disable " kan.liang
  2015-08-11 10:30 ` [PATCH RFC V10 3/4] perf,tools: show call graph from reference events kan.liang
@ 2015-08-11 10:30 ` kan.liang
  2015-08-13  8:07   ` [tip:perf/core] perf tests: " tip-bot for Kan Liang
  2015-08-13  8:06 ` [tip:perf/core] perf callchain: Per-event type selection support tip-bot for Kan Liang
  3 siblings, 1 reply; 8+ messages in thread
From: kan.liang @ 2015-08-11 10:30 UTC (permalink / raw)
  To: acme, jolsa; +Cc: namhyung, ak, linux-kernel, Kan Liang

From: Kan Liang <kan.liang@intel.com>

Add tests in tests/parse-events.c to check call-graph and time option

Signed-off-by: Kan Liang <kan.liang@intel.com>
---
 tools/perf/tests/parse-events.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index f65bb89..9b6b2b63 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -479,6 +479,39 @@ static int test__checkevent_pmu_name(struct perf_evlist *evlist)
 	return 0;
 }
 
+static int test__checkevent_pmu_partial_time_callgraph(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	/* cpu/config=1,call-graph=fp,time,period=100000/ */
+	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",  1 == evsel->attr.config);
+	/*
+	 * The period, time and callgraph value gets configured
+	 * within perf_evlist__config,
+	 * while this test executes only parse events method.
+	 */
+	TEST_ASSERT_VAL("wrong period",     0 == evsel->attr.sample_period);
+	TEST_ASSERT_VAL("wrong callgraph",  !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type));
+	TEST_ASSERT_VAL("wrong time",  !(PERF_SAMPLE_TIME & evsel->attr.sample_type));
+
+	/* cpu/config=2,call-graph=no,time=0,period=2000/ */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",  2 == evsel->attr.config);
+	/*
+	 * The period, time and callgraph value gets configured
+	 * within perf_evlist__config,
+	 * while this test executes only parse events method.
+	 */
+	TEST_ASSERT_VAL("wrong period",     0 == evsel->attr.sample_period);
+	TEST_ASSERT_VAL("wrong callgraph",  !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type));
+	TEST_ASSERT_VAL("wrong time",  !(PERF_SAMPLE_TIME & evsel->attr.sample_type));
+
+	return 0;
+}
+
 static int test__checkevent_pmu_events(struct perf_evlist *evlist)
 {
 	struct perf_evsel *evsel = perf_evlist__first(evlist);
@@ -1555,6 +1588,11 @@ static struct evlist_test test__events_pmu[] = {
 		.check = test__checkevent_pmu_name,
 		.id    = 1,
 	},
+	{
+		.name  = "cpu/config=1,call-graph=fp,time,period=100000/,cpu/config=2,call-graph=no,time=0,period=2000/",
+		.check = test__checkevent_pmu_partial_time_callgraph,
+		.id    = 2,
+	},
 };
 
 struct terms_test {
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [tip:perf/core] perf callchain: Per-event type selection support
  2015-08-11 10:30 [PATCH RFC V10 1/4] perf,tools: per-event callgraph support kan.liang
                   ` (2 preceding siblings ...)
  2015-08-11 10:30 ` [PATCH RFC V10 4/4] perf,tests: Add tests to callgraph and time parse kan.liang
@ 2015-08-13  8:06 ` tip-bot for Kan Liang
  3 siblings, 0 replies; 8+ messages in thread
From: tip-bot for Kan Liang @ 2015-08-13  8:06 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: jolsa, ak, namhyung, kan.liang, mingo, hpa, linux-kernel, acme, tglx

Commit-ID:  d457c96392bb418bd998f3ccf93e0e4c958fcd0f
Gitweb:     http://git.kernel.org/tip/d457c96392bb418bd998f3ccf93e0e4c958fcd0f
Author:     Kan Liang <kan.liang@intel.com>
AuthorDate: Tue, 11 Aug 2015 06:30:47 -0400
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Wed, 12 Aug 2015 13:20:27 -0300

perf callchain: Per-event type selection support

This patchkit adds the ability to set callgraph mode (fp, dwarf, lbr) per
event. This in term can reduce sampling overhead and the size of the
perf.data.

Here is an example.

  perf record -e 'cpu/cpu-cycles,period=1000,call-graph=fp,time=1/,cpu/instructions,call-graph=lbr/' sleep 1

 perf evlist -v
 cpu/cpu-cycles,period=1000,call-graph=fp,time=1/: type: 4, size: 112,
 config: 0x3c, { sample_period, sample_freq }: 1000, sample_type:
 IP|TID|TIME|CALLCHAIN|PERIOD|IDENTIFIER, read_format: ID, disabled: 1,
 inherit: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all:
 1, exclude_guest: 1, mmap2: 1, comm_exec: 1
 cpu/instructions,call-graph=lbr/: type: 4, size: 112, config: 0xc0, {
 sample_period, sample_freq }: 4000, sample_type:
 IP|TID|TIME|CALLCHAIN|PERIOD|BRANCH_STACK|IDENTIFIER, read_format: ID,
 disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1,
 exclude_guest: 1

Signed-off-by: Kan Liang <kan.liang@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1439289050-40510-1-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-record.txt |  3 ++
 tools/perf/util/evsel.c                  | 62 ++++++++++++++++++++++++++++++--
 tools/perf/util/evsel.h                  |  4 +++
 tools/perf/util/parse-events.c           | 12 +++++++
 tools/perf/util/parse-events.h           |  2 ++
 tools/perf/util/parse-events.l           |  2 ++
 tools/perf/util/pmu.c                    |  4 ++-
 7 files changed, 86 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index afbe45e..7f82dec 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -53,6 +53,9 @@ OPTIONS
 	  - 'time': Disable/enable time stamping. Acceptable values are 1 for
 		    enabling time stamping. 0 for disabling time stamping.
 		    The default is 1.
+	  - 'call-graph': Disable/enable callgraph. Acceptable str are "fp" for
+			 FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode.
+	  - 'stack-size': user stack size for dwarf mode
 	  Note: If user explicitly sets options which conflict with the params,
 	  the value set by the params will be overridden.
 
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 04fdddd..6647925 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -588,11 +588,36 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
 	}
 }
 
-static void apply_config_terms(struct perf_evsel *evsel)
+static void
+perf_evsel__reset_callgraph(struct perf_evsel *evsel,
+			    struct callchain_param *param)
+{
+	struct perf_event_attr *attr = &evsel->attr;
+
+	perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
+	if (param->record_mode == CALLCHAIN_LBR) {
+		perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
+		attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
+					      PERF_SAMPLE_BRANCH_CALL_STACK);
+	}
+	if (param->record_mode == CALLCHAIN_DWARF) {
+		perf_evsel__reset_sample_bit(evsel, REGS_USER);
+		perf_evsel__reset_sample_bit(evsel, STACK_USER);
+	}
+}
+
+static void apply_config_terms(struct perf_evsel *evsel,
+			       struct record_opts *opts)
 {
 	struct perf_evsel_config_term *term;
 	struct list_head *config_terms = &evsel->config_terms;
 	struct perf_event_attr *attr = &evsel->attr;
+	struct callchain_param param;
+	u32 dump_size = 0;
+	char *callgraph_buf = NULL;
+
+	/* callgraph default */
+	param.record_mode = callchain_param.record_mode;
 
 	list_for_each_entry(term, config_terms, list) {
 		switch (term->type) {
@@ -610,10 +635,43 @@ static void apply_config_terms(struct perf_evsel *evsel)
 			else
 				perf_evsel__reset_sample_bit(evsel, TIME);
 			break;
+		case PERF_EVSEL__CONFIG_TERM_CALLGRAPH:
+			callgraph_buf = term->val.callgraph;
+			break;
+		case PERF_EVSEL__CONFIG_TERM_STACK_USER:
+			dump_size = term->val.stack_user;
+			break;
 		default:
 			break;
 		}
 	}
+
+	/* User explicitly set per-event callgraph, clear the old setting and reset. */
+	if ((callgraph_buf != NULL) || (dump_size > 0)) {
+
+		/* parse callgraph parameters */
+		if (callgraph_buf != NULL) {
+			param.enabled = true;
+			if (parse_callchain_record(callgraph_buf, &param)) {
+				pr_err("per-event callgraph setting for %s failed. "
+				       "Apply callgraph global setting for it\n",
+				       evsel->name);
+				return;
+			}
+		}
+		if (dump_size > 0) {
+			dump_size = round_up(dump_size, sizeof(u64));
+			param.dump_size = dump_size;
+		}
+
+		/* If global callgraph set, clear it */
+		if (callchain_param.enabled)
+			perf_evsel__reset_callgraph(evsel, &callchain_param);
+
+		/* set perf-event callgraph */
+		if (param.enabled)
+			perf_evsel__config_callgraph(evsel, opts, &param);
+	}
 }
 
 /*
@@ -812,7 +870,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
 	 * Apply event specific term settings,
 	 * it overloads any global configuration.
 	 */
-	apply_config_terms(evsel);
+	apply_config_terms(evsel, opts);
 }
 
 static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index fdf2674..93ac6b1 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -41,6 +41,8 @@ enum {
 	PERF_EVSEL__CONFIG_TERM_PERIOD,
 	PERF_EVSEL__CONFIG_TERM_FREQ,
 	PERF_EVSEL__CONFIG_TERM_TIME,
+	PERF_EVSEL__CONFIG_TERM_CALLGRAPH,
+	PERF_EVSEL__CONFIG_TERM_STACK_USER,
 	PERF_EVSEL__CONFIG_TERM_MAX,
 };
 
@@ -51,6 +53,8 @@ struct perf_evsel_config_term {
 		u64	period;
 		u64	freq;
 		bool	time;
+		char	*callgraph;
+		u64	stack_user;
 	} val;
 };
 
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index dbf315d..d826e6f 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -614,6 +614,12 @@ do {									   \
 			return -EINVAL;
 		}
 		break;
+	case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+		CHECK_TYPE_VAL(STR);
+		break;
+	case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+		CHECK_TYPE_VAL(NUM);
+		break;
 	case PARSE_EVENTS__TERM_TYPE_NAME:
 		CHECK_TYPE_VAL(STR);
 		break;
@@ -668,6 +674,12 @@ do {								\
 		case PARSE_EVENTS__TERM_TYPE_TIME:
 			ADD_CONFIG_TERM(TIME, time, term->val.num);
 			break;
+		case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+			ADD_CONFIG_TERM(CALLGRAPH, callgraph, term->val.str);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+			ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num);
+			break;
 		default:
 			break;
 		}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index ce2d13a..a09b0e2 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -65,6 +65,8 @@ enum {
 	PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ,
 	PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE,
 	PARSE_EVENTS__TERM_TYPE_TIME,
+	PARSE_EVENTS__TERM_TYPE_CALLGRAPH,
+	PARSE_EVENTS__TERM_TYPE_STACKSIZE,
 };
 
 struct parse_events_term {
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 4306f5a..936d566 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -185,6 +185,8 @@ period			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
 freq			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ); }
 branch_type		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
 time			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
+call-graph		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
+stack-size		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
 ,			{ return ','; }
 "/"			{ BEGIN(INITIAL); return '/'; }
 {name_minus}		{ return str(yyscanner, PE_NAME); }
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index d85f11b..84cad05 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -634,7 +634,9 @@ static char *formats_error_string(struct list_head *formats)
 {
 	struct perf_pmu_format *format;
 	char *err, *str;
-	static const char *static_terms = "config,config1,config2,name,period,freq,branch_type,time\n";
+	static const char *static_terms = "config,config1,config2,name,"
+					  "period,freq,branch_type,time,"
+					  "call-graph,stack-size\n";
 	unsigned i = 0;
 
 	if (!asprintf(&str, "valid terms:"))

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [tip:perf/core] perf callchain: Allow disabling call graphs per event
  2015-08-11 10:30 ` [PATCH RFC V10 2/4] perf,tools: disable " kan.liang
@ 2015-08-13  8:07   ` tip-bot for Kan Liang
  0 siblings, 0 replies; 8+ messages in thread
From: tip-bot for Kan Liang @ 2015-08-13  8:07 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, ak, tglx, mingo, namhyung, jolsa, kan.liang, acme, hpa

Commit-ID:  f9db0d0f1b2cf030083c83d3ed3a4bbae6bdc8b7
Gitweb:     http://git.kernel.org/tip/f9db0d0f1b2cf030083c83d3ed3a4bbae6bdc8b7
Author:     Kan Liang <kan.liang@intel.com>
AuthorDate: Tue, 11 Aug 2015 06:30:48 -0400
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Wed, 12 Aug 2015 13:20:28 -0300

perf callchain: Allow disabling call graphs per event

This patch introduce "call-graph=no" to disable per-event callgraph.

Here is an example.

  perf record -e 'cpu/cpu-cycles,call-graph=fp/,cpu/instructions,call-graph=no/' sleep 1

  perf report --stdio

  # To display the perf.data header info, please use
  --header/--header-only options.
  #
  #
  # Total Lost Samples: 0
  #
  # Samples: 6  of event 'cpu/cpu-cycles,call-graph=fp/'
  # Event count (approx.): 774218
  #
  # Children      Self  Command  Shared Object     Symbol
  # ........  ........  .......  ................  ........................................
  #
    61.94%     0.00%  sleep    [kernel.vmlinux]  [k] entry_SYSCALL_64_fastpath
              |
              ---entry_SYSCALL_64_fastpath
                 |
                 |--97.30%-- __brk
                 |
                  --2.70%-- mmap64
                            _dl_check_map_versions
                            _dl_check_all_versions

    61.94%     0.00%  sleep    [kernel.vmlinux]  [k] perf_event_mmap
              |
              ---perf_event_mmap
                 |
                 |--97.30%-- do_brk
                 |          sys_brk
                 |          entry_SYSCALL_64_fastpath
                 |          __brk
                 |
                  --2.70%-- mmap_region
                            do_mmap_pgoff
                            vm_mmap_pgoff
                            sys_mmap_pgoff
                            sys_mmap
                            entry_SYSCALL_64_fastpath
                            mmap64
                            _dl_check_map_versions
                            _dl_check_all_versions
  ......

  # Samples: 6  of event 'cpu/instructions,call-graph=no/'
  # Event count (approx.): 359692
  #
  # Children      Self  Command  Shared Object     Symbol
  # ........  ........  .......  ................  .................................
  #
     89.03%     0.00%  sleep    [unknown]         [.] 0xffff6598ffff6598
     89.03%     0.00%  sleep    ld-2.17.so        [.] _dl_resolve_conflicts
     89.03%     0.00%  sleep    [kernel.vmlinux]  [k] page_fault

Signed-off-by: Kan Liang <kan.liang@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1439289050-40510-2-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-record.txt |  3 ++-
 tools/perf/builtin-annotate.c            |  2 ++
 tools/perf/builtin-diff.c                |  3 +++
 tools/perf/tests/hists_cumulate.c        |  4 ++++
 tools/perf/util/evsel.c                  | 17 +++++++++++------
 tools/perf/util/hist.c                   |  9 ++++++---
 6 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 7f82dec..347a273 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -54,7 +54,8 @@ OPTIONS
 		    enabling time stamping. 0 for disabling time stamping.
 		    The default is 1.
 	  - 'call-graph': Disable/enable callgraph. Acceptable str are "fp" for
-			 FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode.
+			 FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode and
+			 "no" for disable callgraph.
 	  - 'stack-size': user stack size for dwarf mode
 	  Note: If user explicitly sets options which conflict with the params,
 	  the value set by the params will be overridden.
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 467a23b..a32a64e 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -239,6 +239,8 @@ static int __cmd_annotate(struct perf_annotate *ann)
 		if (nr_samples > 0) {
 			total_nr_samples += nr_samples;
 			hists__collapse_resort(hists, NULL);
+			/* Don't sort callchain */
+			perf_evsel__reset_sample_bit(pos, CALLCHAIN);
 			hists__output_resort(hists, NULL);
 
 			if (symbol_conf.event_group &&
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index daaa7dc..0b180a8 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -722,6 +722,9 @@ static void data_process(void)
 		if (verbose || data__files_cnt > 2)
 			data__fprintf();
 
+		/* Don't sort callchain for perf diff */
+		perf_evsel__reset_sample_bit(evsel_base, CALLCHAIN);
+
 		hists__process(hists_base);
 	}
 }
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index 7d82c8b..7ed7370 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -279,6 +279,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine)
 
 	symbol_conf.use_callchain = false;
 	symbol_conf.cumulate_callchain = false;
+	perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
 
 	setup_sorting();
 	callchain_register_param(&callchain_param);
@@ -425,6 +426,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine)
 
 	symbol_conf.use_callchain = true;
 	symbol_conf.cumulate_callchain = false;
+	perf_evsel__set_sample_bit(evsel, CALLCHAIN);
 
 	setup_sorting();
 	callchain_register_param(&callchain_param);
@@ -482,6 +484,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine)
 
 	symbol_conf.use_callchain = false;
 	symbol_conf.cumulate_callchain = true;
+	perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
 
 	setup_sorting();
 	callchain_register_param(&callchain_param);
@@ -665,6 +668,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine)
 
 	symbol_conf.use_callchain = true;
 	symbol_conf.cumulate_callchain = true;
+	perf_evsel__set_sample_bit(evsel, CALLCHAIN);
 
 	setup_sorting();
 	callchain_register_param(&callchain_param);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 6647925..b096ef7 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -651,12 +651,17 @@ static void apply_config_terms(struct perf_evsel *evsel,
 
 		/* parse callgraph parameters */
 		if (callgraph_buf != NULL) {
-			param.enabled = true;
-			if (parse_callchain_record(callgraph_buf, &param)) {
-				pr_err("per-event callgraph setting for %s failed. "
-				       "Apply callgraph global setting for it\n",
-				       evsel->name);
-				return;
+			if (!strcmp(callgraph_buf, "no")) {
+				param.enabled = false;
+				param.record_mode = CALLCHAIN_NONE;
+			} else {
+				param.enabled = true;
+				if (parse_callchain_record(callgraph_buf, &param)) {
+					pr_err("per-event callgraph setting for %s failed. "
+					       "Apply callgraph global setting for it\n",
+					       evsel->name);
+					return;
+				}
 			}
 		}
 		if (dump_size > 0) {
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 6bccfae..1cd785b 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1109,13 +1109,14 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h)
 
 static void __hists__insert_output_entry(struct rb_root *entries,
 					 struct hist_entry *he,
-					 u64 min_callchain_hits)
+					 u64 min_callchain_hits,
+					 bool use_callchain)
 {
 	struct rb_node **p = &entries->rb_node;
 	struct rb_node *parent = NULL;
 	struct hist_entry *iter;
 
-	if (symbol_conf.use_callchain)
+	if (use_callchain)
 		callchain_param.sort(&he->sorted_chain, he->callchain,
 				      min_callchain_hits, &callchain_param);
 
@@ -1139,6 +1140,8 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog)
 	struct rb_node *next;
 	struct hist_entry *n;
 	u64 min_callchain_hits;
+	struct perf_evsel *evsel = hists_to_evsel(hists);
+	bool use_callchain = evsel ? (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) : symbol_conf.use_callchain;
 
 	min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100);
 
@@ -1157,7 +1160,7 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog)
 		n = rb_entry(next, struct hist_entry, rb_node_in);
 		next = rb_next(&n->rb_node_in);
 
-		__hists__insert_output_entry(&hists->entries, n, min_callchain_hits);
+		__hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain);
 		hists__inc_stats(hists, n);
 
 		if (!n->filtered)

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [tip:perf/core] perf report: Show call graph from reference events
  2015-08-11 10:30 ` [PATCH RFC V10 3/4] perf,tools: show call graph from reference events kan.liang
@ 2015-08-13  8:07   ` tip-bot for Kan Liang
  0 siblings, 0 replies; 8+ messages in thread
From: tip-bot for Kan Liang @ 2015-08-13  8:07 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: namhyung, hpa, tglx, jolsa, ak, kan.liang, mingo, acme, linux-kernel

Commit-ID:  9e207ddfa20781e56465ce9a537f0a377c9d34fb
Gitweb:     http://git.kernel.org/tip/9e207ddfa20781e56465ce9a537f0a377c9d34fb
Author:     Kan Liang <kan.liang@intel.com>
AuthorDate: Tue, 11 Aug 2015 06:30:49 -0400
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Wed, 12 Aug 2015 13:20:28 -0300

perf report: Show call graph from reference events

Introduce --show-ref-call-graph for perf report to print reference
callgraph for no callgraph event.

Here is an example.

 perf report --show-ref-call-graph --stdio

 # To display the perf.data header info, please use
 --header/--header-only options.
 #
 #
 # Total Lost Samples: 0
 #
 # Samples: 5  of event 'cpu/cpu-cycles,call-graph=fp/'
 # Event count (approx.): 144985
 #
 # Children      Self  Command  Shared Object     Symbol
 # ........  ........  .......  ................  ........................................
 #
    72.30%     0.00%  sleep    [kernel.vmlinux]  [k] entry_SYSCALL_64_fastpath
              |
              ---entry_SYSCALL_64_fastpath
                 |
                 |--22.62%-- __GI___libc_nanosleep
                  --77.38%-- [...]

......

 # Samples: 6  of event 'cpu/instructions,call-graph=no/', show reference callgraph
 # Event count (approx.): 172780
 #
 # Children      Self  Command  Shared Object     Symbol
 # ........  ........  .......  ................  ........................................
 #
    73.16%     0.00%  sleep    [kernel.vmlinux]  [k] entry_SYSCALL_64_fastpath
              |
              ---entry_SYSCALL_64_fastpath
                 |
                 |--31.44%-- __GI___libc_nanosleep
                  --68.56%-- [...]

Signed-off-by: Kan Liang <kan.liang@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1439289050-40510-3-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-report.txt | 11 +++++++++++
 tools/perf/builtin-report.c              |  7 +++++++
 tools/perf/ui/browsers/hists.c           |  9 +++++++--
 tools/perf/util/hist.c                   |  7 ++++++-
 tools/perf/util/symbol.h                 |  3 ++-
 5 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 7b07d19..a18ba75 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -359,6 +359,17 @@ OPTIONS
 --full-source-path::
 	Show the full path for source files for srcline output.
 
+--show-ref-call-graph::
+	When multiple events are sampled, it may not be needed to collect
+	callgraphs for all of them. The sample sites are usually nearby,
+	and it's enough to collect the callgraphs on a reference event.
+	So user can use "call-graph=no" event modifier to disable callgraph
+	for other events to reduce the overhead.
+	However, perf report cannot show callgraphs for the event which
+	disable the callgraph.
+	This option extends the perf report to show reference callgraphs,
+	which collected by reference event, in no callgraph event.
+
 include::callchain-overhead-calculation.txt[]
 
 SEE ALSO
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index f301e86..62b285e 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -316,6 +316,11 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
 	if (evname != NULL)
 		ret += fprintf(fp, " of event '%s'", evname);
 
+	if (symbol_conf.show_ref_callgraph &&
+	    strstr(evname, "call-graph=no")) {
+		ret += fprintf(fp, ", show reference callgraph");
+	}
+
 	if (rep->mem_mode) {
 		ret += fprintf(fp, "\n# Total weight : %" PRIu64, nr_events);
 		ret += fprintf(fp, "\n# Sort order   : %s", sort_order ? : default_mem_sort_order);
@@ -740,6 +745,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 			    itrace_parse_synth_opts),
 	OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename,
 			"Show full source file name path for source lines"),
+	OPT_BOOLEAN(0, "show-ref-call-graph", &symbol_conf.show_ref_callgraph,
+		    "Show callgraph from reference event"),
 	OPT_END()
 	};
 	struct perf_data_file file = {
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index f296b73..10c7ec0 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1267,6 +1267,8 @@ static int hists__browser_title(struct hists *hists,
 	const char *ev_name = perf_evsel__name(evsel);
 	char buf[512];
 	size_t buflen = sizeof(buf);
+	char ref[30] = " show reference callgraph, ";
+	bool enable_ref = false;
 
 	if (symbol_conf.filter_relative) {
 		nr_samples = hists->stats.nr_non_filtered_samples;
@@ -1292,10 +1294,13 @@ static int hists__browser_title(struct hists *hists,
 		}
 	}
 
+	if (symbol_conf.show_ref_callgraph &&
+	    strstr(ev_name, "call-graph=no"))
+		enable_ref = true;
 	nr_samples = convert_unit(nr_samples, &unit);
 	printed = scnprintf(bf, size,
-			   "Samples: %lu%c of event '%s', Event count (approx.): %" PRIu64,
-			   nr_samples, unit, ev_name, nr_events);
+			   "Samples: %lu%c of event '%s',%sEvent count (approx.): %" PRIu64,
+			   nr_samples, unit, ev_name, enable_ref ? ref : " ", nr_events);
 
 
 	if (hists->uid_filter_str)
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 1cd785b..08b6cd9 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1141,7 +1141,12 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog)
 	struct hist_entry *n;
 	u64 min_callchain_hits;
 	struct perf_evsel *evsel = hists_to_evsel(hists);
-	bool use_callchain = evsel ? (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) : symbol_conf.use_callchain;
+	bool use_callchain;
+
+	if (evsel && !symbol_conf.show_ref_callgraph)
+		use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN;
+	else
+		use_callchain = symbol_conf.use_callchain;
 
 	min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100);
 
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index b98ce51..a4cde92 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -106,7 +106,8 @@ struct symbol_conf {
 			filter_relative,
 			show_hist_headers,
 			branch_callstack,
-			has_filter;
+			has_filter,
+			show_ref_callgraph;
 	const char	*vmlinux_name,
 			*kallsyms_name,
 			*source_prefix,

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [tip:perf/core] perf tests: Add tests to callgraph and time parse
  2015-08-11 10:30 ` [PATCH RFC V10 4/4] perf,tests: Add tests to callgraph and time parse kan.liang
@ 2015-08-13  8:07   ` tip-bot for Kan Liang
  0 siblings, 0 replies; 8+ messages in thread
From: tip-bot for Kan Liang @ 2015-08-13  8:07 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: ak, jolsa, kan.liang, hpa, mingo, tglx, linux-kernel, namhyung, acme

Commit-ID:  71ef150ee06df29c5b427307dc0bacfe06a8baea
Gitweb:     http://git.kernel.org/tip/71ef150ee06df29c5b427307dc0bacfe06a8baea
Author:     Kan Liang <kan.liang@intel.com>
AuthorDate: Tue, 11 Aug 2015 06:30:50 -0400
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Wed, 12 Aug 2015 13:20:29 -0300

perf tests: Add tests to callgraph and time parse

Add tests in tests/parse-events.c to check call-graph and time option.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1439289050-40510-4-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/parse-events.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index f65bb89..9b6b2b63 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -479,6 +479,39 @@ static int test__checkevent_pmu_name(struct perf_evlist *evlist)
 	return 0;
 }
 
+static int test__checkevent_pmu_partial_time_callgraph(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	/* cpu/config=1,call-graph=fp,time,period=100000/ */
+	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",  1 == evsel->attr.config);
+	/*
+	 * The period, time and callgraph value gets configured
+	 * within perf_evlist__config,
+	 * while this test executes only parse events method.
+	 */
+	TEST_ASSERT_VAL("wrong period",     0 == evsel->attr.sample_period);
+	TEST_ASSERT_VAL("wrong callgraph",  !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type));
+	TEST_ASSERT_VAL("wrong time",  !(PERF_SAMPLE_TIME & evsel->attr.sample_type));
+
+	/* cpu/config=2,call-graph=no,time=0,period=2000/ */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",  2 == evsel->attr.config);
+	/*
+	 * The period, time and callgraph value gets configured
+	 * within perf_evlist__config,
+	 * while this test executes only parse events method.
+	 */
+	TEST_ASSERT_VAL("wrong period",     0 == evsel->attr.sample_period);
+	TEST_ASSERT_VAL("wrong callgraph",  !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type));
+	TEST_ASSERT_VAL("wrong time",  !(PERF_SAMPLE_TIME & evsel->attr.sample_type));
+
+	return 0;
+}
+
 static int test__checkevent_pmu_events(struct perf_evlist *evlist)
 {
 	struct perf_evsel *evsel = perf_evlist__first(evlist);
@@ -1555,6 +1588,11 @@ static struct evlist_test test__events_pmu[] = {
 		.check = test__checkevent_pmu_name,
 		.id    = 1,
 	},
+	{
+		.name  = "cpu/config=1,call-graph=fp,time,period=100000/,cpu/config=2,call-graph=no,time=0,period=2000/",
+		.check = test__checkevent_pmu_partial_time_callgraph,
+		.id    = 2,
+	},
 };
 
 struct terms_test {

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2015-08-13  8:08 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-08-11 10:30 [PATCH RFC V10 1/4] perf,tools: per-event callgraph support kan.liang
2015-08-11 10:30 ` [PATCH RFC V10 2/4] perf,tools: disable " kan.liang
2015-08-13  8:07   ` [tip:perf/core] perf callchain: Allow disabling call graphs per event tip-bot for Kan Liang
2015-08-11 10:30 ` [PATCH RFC V10 3/4] perf,tools: show call graph from reference events kan.liang
2015-08-13  8:07   ` [tip:perf/core] perf report: Show " tip-bot for Kan Liang
2015-08-11 10:30 ` [PATCH RFC V10 4/4] perf,tests: Add tests to callgraph and time parse kan.liang
2015-08-13  8:07   ` [tip:perf/core] perf tests: " tip-bot for Kan Liang
2015-08-13  8:06 ` [tip:perf/core] perf callchain: Per-event type selection support tip-bot for Kan Liang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).