All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] perf report: calculate the average cycles of iterations
@ 2017-08-07 13:05 Jin Yao
  2017-08-14  1:30 ` Jin, Yao
  2017-09-05  5:19 ` [tip:perf/urgent] perf report: Calculate " tip-bot for Jin Yao
  0 siblings, 2 replies; 6+ messages in thread
From: Jin Yao @ 2017-08-07 13:05 UTC (permalink / raw)
  To: acme, jolsa, peterz, mingo, alexander.shishkin
  Cc: Linux-kernel, ak, kan.liang, yao.jin, Jin Yao

The branch history code has a loop detection function. With
this, we can get the number of iterations by calculating the
removed loops.

While it would be nice for knowing the average cycles of
iterations. This patch adds up the cycles in branch entries
of removed loops and save the result to the next branch entry
(e.g. branch entry A).

Finally it will display the iteration number and average
cycles at the "from" of branch entry A.

For example:
perf record -g -j any,save_type ./div
perf report --branch-history --no-children --stdio

--22.63%--main div.c:42 (RET CROSS_2M)
          compute_flag div.c:28 (cycles:2 iter:173115 avg_cycles:2)
          |
           --10.73%--compute_flag div.c:27 (RET CROSS_2M)
                     rand rand.c:28 (cycles:1)
                     rand rand.c:28 (RET CROSS_2M)
                     __random random.c:298 (cycles:1)
                     __random random.c:297 (COND_BWD CROSS_2M)
                     __random random.c:295 (cycles:1)
                     __random random.c:295 (COND_BWD CROSS_2M)
                     __random random.c:295 (cycles:1)
                     __random random.c:295 (RET CROSS_2M)

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
---
 tools/perf/ui/browsers/hists.c |  8 +---
 tools/perf/ui/stdio/hist.c     | 10 ++---
 tools/perf/util/callchain.c    | 49 +++++++++++------------
 tools/perf/util/callchain.h    |  9 ++---
 tools/perf/util/machine.c      | 88 +++++++++++++++++++++++++-----------------
 5 files changed, 85 insertions(+), 79 deletions(-)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index f4bc246..13dfb0a 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -931,12 +931,8 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser,
 				       browser->show_dso);
 
 	if (symbol_conf.show_branchflag_count) {
-		if (need_percent)
-			callchain_list_counts__printf_value(node, chain, NULL,
-							    buf, sizeof(buf));
-		else
-			callchain_list_counts__printf_value(NULL, chain, NULL,
-							    buf, sizeof(buf));
+		callchain_list_counts__printf_value(chain, NULL,
+						    buf, sizeof(buf));
 
 		if (asprintf(&alloc_str2, "%s%s", str, buf) < 0)
 			str = "Not enough memory!";
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 5c95b83..8bdb7a5 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -124,12 +124,8 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node,
 	str = callchain_list__sym_name(chain, bf, sizeof(bf), false);
 
 	if (symbol_conf.show_branchflag_count) {
-		if (!period)
-			callchain_list_counts__printf_value(node, chain, NULL,
-							    buf, sizeof(buf));
-		else
-			callchain_list_counts__printf_value(NULL, chain, NULL,
-							    buf, sizeof(buf));
+		callchain_list_counts__printf_value(chain, NULL,
+						    buf, sizeof(buf));
 
 		if (asprintf(&alloc_str, "%s%s", str, buf) < 0)
 			str = "Not enough memory!";
@@ -313,7 +309,7 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
 
 			if (symbol_conf.show_branchflag_count)
 				ret += callchain_list_counts__printf_value(
-						NULL, chain, fp, NULL, 0);
+						chain, fp, NULL, 0);
 			ret += fprintf(fp, "\n");
 
 			if (++entries_printed == callchain_param.print_limit)
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index f320b07..510b513 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -588,7 +588,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
 				call->cycles_count =
 					cursor_node->branch_flags.cycles;
 				call->iter_count = cursor_node->nr_loop_iter;
-				call->samples_count = cursor_node->samples;
+				call->iter_cycles = cursor_node->iter_cycles;
 			}
 		}
 
@@ -722,7 +722,7 @@ static enum match_result match_chain(struct callchain_cursor_node *node,
 				cnode->cycles_count +=
 					node->branch_flags.cycles;
 				cnode->iter_count += node->nr_loop_iter;
-				cnode->samples_count += node->samples;
+				cnode->iter_cycles += node->iter_cycles;
 			}
 		}
 
@@ -998,7 +998,7 @@ int callchain_merge(struct callchain_cursor *cursor,
 int callchain_cursor_append(struct callchain_cursor *cursor,
 			    u64 ip, struct map *map, struct symbol *sym,
 			    bool branch, struct branch_flags *flags,
-			    int nr_loop_iter, int samples, u64 branch_from)
+			    int nr_loop_iter, u64 iter_cycles, u64 branch_from)
 {
 	struct callchain_cursor_node *node = *cursor->last;
 
@@ -1016,7 +1016,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
 	node->sym = sym;
 	node->branch = branch;
 	node->nr_loop_iter = nr_loop_iter;
-	node->samples = samples;
+	node->iter_cycles = iter_cycles;
 
 	if (flags)
 		memcpy(&node->branch_flags, flags,
@@ -1306,7 +1306,7 @@ static int branch_to_str(char *bf, int bfsize,
 static int branch_from_str(char *bf, int bfsize,
 			   u64 branch_count,
 			   u64 cycles_count, u64 iter_count,
-			   u64 samples_count)
+			   u64 iter_cycles)
 {
 	int printed = 0, i = 0;
 	u64 cycles;
@@ -1318,9 +1318,13 @@ static int branch_from_str(char *bf, int bfsize,
 				bf + printed, bfsize - printed);
 	}
 
-	if (iter_count && samples_count) {
-		printed += count_pri64_printf(i++, "iterations",
-				iter_count / samples_count,
+	if (iter_count) {
+		printed += count_pri64_printf(i++, "iter",
+				iter_count,
+				bf + printed, bfsize - printed);
+
+		printed += count_pri64_printf(i++, "avg_cycles",
+				iter_cycles / iter_count,
 				bf + printed, bfsize - printed);
 	}
 
@@ -1333,7 +1337,7 @@ static int branch_from_str(char *bf, int bfsize,
 static int counts_str_build(char *bf, int bfsize,
 			     u64 branch_count, u64 predicted_count,
 			     u64 abort_count, u64 cycles_count,
-			     u64 iter_count, u64 samples_count,
+			     u64 iter_count, u64 iter_cycles,
 			     struct branch_type_stat *brtype_stat)
 {
 	int printed;
@@ -1346,7 +1350,7 @@ static int counts_str_build(char *bf, int bfsize,
 				predicted_count, abort_count, brtype_stat);
 	} else {
 		printed = branch_from_str(bf, bfsize, branch_count,
-				cycles_count, iter_count, samples_count);
+				cycles_count, iter_count, iter_cycles);
 	}
 
 	if (!printed)
@@ -1358,14 +1362,14 @@ static int counts_str_build(char *bf, int bfsize,
 static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
 				   u64 branch_count, u64 predicted_count,
 				   u64 abort_count, u64 cycles_count,
-				   u64 iter_count, u64 samples_count,
+				   u64 iter_count, u64 iter_cycles,
 				   struct branch_type_stat *brtype_stat)
 {
 	char str[256];
 
 	counts_str_build(str, sizeof(str), branch_count,
 			 predicted_count, abort_count, cycles_count,
-			 iter_count, samples_count, brtype_stat);
+			 iter_count, iter_cycles, brtype_stat);
 
 	if (fp)
 		return fprintf(fp, "%s", str);
@@ -1373,31 +1377,23 @@ static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
 	return scnprintf(bf, bfsize, "%s", str);
 }
 
-int callchain_list_counts__printf_value(struct callchain_node *node,
-					struct callchain_list *clist,
+int callchain_list_counts__printf_value(struct callchain_list *clist,
 					FILE *fp, char *bf, int bfsize)
 {
 	u64 branch_count, predicted_count;
 	u64 abort_count, cycles_count;
-	u64 iter_count = 0, samples_count = 0;
+	u64 iter_count, iter_cycles;
 
 	branch_count = clist->branch_count;
 	predicted_count = clist->predicted_count;
 	abort_count = clist->abort_count;
 	cycles_count = clist->cycles_count;
-
-	if (node) {
-		struct callchain_list *call;
-
-		list_for_each_entry(call, &node->val, list) {
-			iter_count += call->iter_count;
-			samples_count += call->samples_count;
-		}
-	}
+	iter_count = clist->iter_count;
+	iter_cycles = clist->iter_cycles;
 
 	return callchain_counts_printf(fp, bf, bfsize, branch_count,
 				       predicted_count, abort_count,
-				       cycles_count, iter_count, samples_count,
+				       cycles_count, iter_count, iter_cycles,
 				       &clist->brtype_stat);
 }
 
@@ -1523,7 +1519,8 @@ int callchain_cursor__copy(struct callchain_cursor *dst,
 
 		rc = callchain_cursor_append(dst, node->ip, node->map, node->sym,
 					     node->branch, &node->branch_flags,
-					     node->nr_loop_iter, node->samples,
+					     node->nr_loop_iter,
+					     node->iter_cycles,
 					     node->branch_from);
 		if (rc)
 			break;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 9773820..1ed6fc6 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -119,7 +119,7 @@ struct callchain_list {
 	u64			abort_count;
 	u64			cycles_count;
 	u64			iter_count;
-	u64			samples_count;
+	u64			iter_cycles;
 	struct branch_type_stat brtype_stat;
 	char		       *srcline;
 	struct list_head	list;
@@ -139,7 +139,7 @@ struct callchain_cursor_node {
 	struct branch_flags		branch_flags;
 	u64				branch_from;
 	int				nr_loop_iter;
-	int				samples;
+	u64				iter_cycles;
 	struct callchain_cursor_node	*next;
 };
 
@@ -201,7 +201,7 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
 int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
 			    struct map *map, struct symbol *sym,
 			    bool branch, struct branch_flags *flags,
-			    int nr_loop_iter, int samples, u64 branch_from);
+			    int nr_loop_iter, u64 iter_cycles, u64 branch_from);
 
 /* Close a cursor writing session. Initialize for the reader */
 static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
@@ -282,8 +282,7 @@ char *callchain_node__scnprintf_value(struct callchain_node *node,
 int callchain_node__fprintf_value(struct callchain_node *node,
 				  FILE *fp, u64 total);
 
-int callchain_list_counts__printf_value(struct callchain_node *node,
-					struct callchain_list *clist,
+int callchain_list_counts__printf_value(struct callchain_list *clist,
 					FILE *fp, char *bf, int bfsize);
 
 void free_callchain(struct callchain_root *root);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index d4df353..9d68211 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1673,6 +1673,11 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample,
 	return mi;
 }
 
+struct iterations {
+	int nr_loop_iter;
+	u64 cycles;
+};
+
 static int add_callchain_ip(struct thread *thread,
 			    struct callchain_cursor *cursor,
 			    struct symbol **parent,
@@ -1681,11 +1686,12 @@ static int add_callchain_ip(struct thread *thread,
 			    u64 ip,
 			    bool branch,
 			    struct branch_flags *flags,
-			    int nr_loop_iter,
-			    int samples,
+			    struct iterations *iter,
 			    u64 branch_from)
 {
 	struct addr_location al;
+	int nr_loop_iter = 0;
+	u64 iter_cycles = 0;
 
 	al.filtered = 0;
 	al.sym = NULL;
@@ -1735,9 +1741,15 @@ static int add_callchain_ip(struct thread *thread,
 
 	if (symbol_conf.hide_unresolved && al.sym == NULL)
 		return 0;
+
+	if (iter) {
+		nr_loop_iter = iter->nr_loop_iter;
+		iter_cycles = iter->cycles;
+	}
+
 	return callchain_cursor_append(cursor, al.addr, al.map, al.sym,
-				       branch, flags, nr_loop_iter, samples,
-				       branch_from);
+				       branch, flags, nr_loop_iter,
+				       iter_cycles, branch_from);
 }
 
 struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
@@ -1758,6 +1770,18 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
 	return bi;
 }
 
+static void save_iterations(struct iterations *iter,
+			    struct branch_entry *be, int nr)
+{
+	int i;
+
+	iter->nr_loop_iter = nr;
+	iter->cycles = 0;
+
+	for (i = 0; i < nr; i++)
+		iter->cycles += be[i].flags.cycles;
+}
+
 #define CHASHSZ 127
 #define CHASHBITS 7
 #define NO_ENTRY 0xff
@@ -1765,7 +1789,8 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
 #define PERF_MAX_BRANCH_DEPTH 127
 
 /* Remove loops. */
-static int remove_loops(struct branch_entry *l, int nr)
+static int remove_loops(struct branch_entry *l, int nr,
+			struct iterations *iter)
 {
 	int i, j, off;
 	unsigned char chash[CHASHSZ];
@@ -1790,8 +1815,18 @@ static int remove_loops(struct branch_entry *l, int nr)
 					break;
 				}
 			if (is_loop) {
-				memmove(l + i, l + i + off,
-					(nr - (i + off)) * sizeof(*l));
+				j = nr - (i + off);
+				if (j > 0) {
+					save_iterations(iter + i + off,
+						l + i, off);
+
+					memmove(iter + i, iter + i + off,
+						j * sizeof(*iter));
+
+					memmove(l + i, l + i + off,
+						j * sizeof(*l));
+				}
+
 				nr -= off;
 			}
 		}
@@ -1881,7 +1916,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
 
 			err = add_callchain_ip(thread, cursor, parent,
 					       root_al, &cpumode, ip,
-					       branch, flags, 0, 0,
+					       branch, flags, NULL,
 					       branch_from);
 			if (err)
 				return (err < 0) ? err : 0;
@@ -1907,7 +1942,6 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 	int i, j, err, nr_entries;
 	int skip_idx = -1;
 	int first_call = 0;
-	int nr_loop_iter;
 
 	if (chain)
 		chain_nr = chain->nr;
@@ -1940,6 +1974,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 	if (branch && callchain_param.branch_callstack) {
 		int nr = min(max_stack, (int)branch->nr);
 		struct branch_entry be[nr];
+		struct iterations iter[nr];
 
 		if (branch->nr > PERF_MAX_BRANCH_DEPTH) {
 			pr_warning("corrupted branch chain. skipping...\n");
@@ -1970,38 +2005,21 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 				be[i] = branch->entries[branch->nr - i - 1];
 		}
 
-		nr_loop_iter = nr;
-		nr = remove_loops(be, nr);
-
-		/*
-		 * Get the number of iterations.
-		 * It's only approximation, but good enough in practice.
-		 */
-		if (nr_loop_iter > nr)
-			nr_loop_iter = nr_loop_iter - nr + 1;
-		else
-			nr_loop_iter = 0;
+		memset(iter, 0, sizeof(struct iterations) * nr);
+		nr = remove_loops(be, nr, iter);
 
 		for (i = 0; i < nr; i++) {
-			if (i == nr - 1)
-				err = add_callchain_ip(thread, cursor, parent,
-						       root_al,
-						       NULL, be[i].to,
-						       true, &be[i].flags,
-						       nr_loop_iter, 1,
-						       be[i].from);
-			else
-				err = add_callchain_ip(thread, cursor, parent,
-						       root_al,
-						       NULL, be[i].to,
-						       true, &be[i].flags,
-						       0, 0, be[i].from);
+			err = add_callchain_ip(thread, cursor, parent,
+					       root_al,
+					       NULL, be[i].to,
+					       true, &be[i].flags,
+					       NULL, be[i].from);
 
 			if (!err)
 				err = add_callchain_ip(thread, cursor, parent, root_al,
 						       NULL, be[i].from,
 						       true, &be[i].flags,
-						       0, 0, 0);
+						       &iter[i], 0);
 			if (err == -EINVAL)
 				break;
 			if (err)
@@ -2035,7 +2053,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 
 		err = add_callchain_ip(thread, cursor, parent,
 				       root_al, &cpumode, ip,
-				       false, NULL, 0, 0, 0);
+				       false, NULL, NULL, 0);
 
 		if (err)
 			return (err < 0) ? err : 0;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* RE: [PATCH] perf report: calculate the average cycles of iterations
  2017-08-07 13:05 [PATCH] perf report: calculate the average cycles of iterations Jin Yao
@ 2017-08-14  1:30 ` Jin, Yao
  2017-08-15  3:19   ` Andi Kleen
  2017-09-05  5:19 ` [tip:perf/urgent] perf report: Calculate " tip-bot for Jin Yao
  1 sibling, 1 reply; 6+ messages in thread
From: Jin, Yao @ 2017-08-14  1:30 UTC (permalink / raw)
  To: Jin Yao, acme, jolsa, peterz, mingo, alexander.shishkin, ak
  Cc: Linux-kernel, Liang, Kan

Hi Andi, 

Do you have any comments for this patch?

Thanks
Jin Yao

-----Original Message-----
From: Jin Yao [mailto:yao.jin@linux.intel.com] 
Sent: Monday, August 7, 2017 9:05 PM
To: acme@kernel.org; jolsa@kernel.org; peterz@infradead.org; mingo@redhat.com; alexander.shishkin@linux.intel.com
Cc: Linux-kernel@vger.kernel.org; ak@linux.intel.com; Liang, Kan <kan.liang@intel.com>; Jin, Yao <yao.jin@intel.com>; Jin Yao <yao.jin@linux.intel.com>
Subject: [PATCH] perf report: calculate the average cycles of iterations

The branch history code has a loop detection function. With this, we can get the number of iterations by calculating the removed loops.

While it would be nice for knowing the average cycles of iterations. This patch adds up the cycles in branch entries of removed loops and save the result to the next branch entry (e.g. branch entry A).

Finally it will display the iteration number and average cycles at the "from" of branch entry A.

For example:
perf record -g -j any,save_type ./div
perf report --branch-history --no-children --stdio

--22.63%--main div.c:42 (RET CROSS_2M)
          compute_flag div.c:28 (cycles:2 iter:173115 avg_cycles:2)
          |
           --10.73%--compute_flag div.c:27 (RET CROSS_2M)
                     rand rand.c:28 (cycles:1)
                     rand rand.c:28 (RET CROSS_2M)
                     __random random.c:298 (cycles:1)
                     __random random.c:297 (COND_BWD CROSS_2M)
                     __random random.c:295 (cycles:1)
                     __random random.c:295 (COND_BWD CROSS_2M)
                     __random random.c:295 (cycles:1)
                     __random random.c:295 (RET CROSS_2M)

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
---
 tools/perf/ui/browsers/hists.c |  8 +---
 tools/perf/ui/stdio/hist.c     | 10 ++---
 tools/perf/util/callchain.c    | 49 +++++++++++------------
 tools/perf/util/callchain.h    |  9 ++---
 tools/perf/util/machine.c      | 88 +++++++++++++++++++++++++-----------------
 5 files changed, 85 insertions(+), 79 deletions(-)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index f4bc246..13dfb0a 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -931,12 +931,8 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser,
 				       browser->show_dso);
 
 	if (symbol_conf.show_branchflag_count) {
-		if (need_percent)
-			callchain_list_counts__printf_value(node, chain, NULL,
-							    buf, sizeof(buf));
-		else
-			callchain_list_counts__printf_value(NULL, chain, NULL,
-							    buf, sizeof(buf));
+		callchain_list_counts__printf_value(chain, NULL,
+						    buf, sizeof(buf));
 
 		if (asprintf(&alloc_str2, "%s%s", str, buf) < 0)
 			str = "Not enough memory!";
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 5c95b83..8bdb7a5 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -124,12 +124,8 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node,
 	str = callchain_list__sym_name(chain, bf, sizeof(bf), false);
 
 	if (symbol_conf.show_branchflag_count) {
-		if (!period)
-			callchain_list_counts__printf_value(node, chain, NULL,
-							    buf, sizeof(buf));
-		else
-			callchain_list_counts__printf_value(NULL, chain, NULL,
-							    buf, sizeof(buf));
+		callchain_list_counts__printf_value(chain, NULL,
+						    buf, sizeof(buf));
 
 		if (asprintf(&alloc_str, "%s%s", str, buf) < 0)
 			str = "Not enough memory!";
@@ -313,7 +309,7 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
 
 			if (symbol_conf.show_branchflag_count)
 				ret += callchain_list_counts__printf_value(
-						NULL, chain, fp, NULL, 0);
+						chain, fp, NULL, 0);
 			ret += fprintf(fp, "\n");
 
 			if (++entries_printed == callchain_param.print_limit) diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index f320b07..510b513 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -588,7 +588,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
 				call->cycles_count =
 					cursor_node->branch_flags.cycles;
 				call->iter_count = cursor_node->nr_loop_iter;
-				call->samples_count = cursor_node->samples;
+				call->iter_cycles = cursor_node->iter_cycles;
 			}
 		}
 
@@ -722,7 +722,7 @@ static enum match_result match_chain(struct callchain_cursor_node *node,
 				cnode->cycles_count +=
 					node->branch_flags.cycles;
 				cnode->iter_count += node->nr_loop_iter;
-				cnode->samples_count += node->samples;
+				cnode->iter_cycles += node->iter_cycles;
 			}
 		}
 
@@ -998,7 +998,7 @@ int callchain_merge(struct callchain_cursor *cursor,  int callchain_cursor_append(struct callchain_cursor *cursor,
 			    u64 ip, struct map *map, struct symbol *sym,
 			    bool branch, struct branch_flags *flags,
-			    int nr_loop_iter, int samples, u64 branch_from)
+			    int nr_loop_iter, u64 iter_cycles, u64 branch_from)
 {
 	struct callchain_cursor_node *node = *cursor->last;
 
@@ -1016,7 +1016,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
 	node->sym = sym;
 	node->branch = branch;
 	node->nr_loop_iter = nr_loop_iter;
-	node->samples = samples;
+	node->iter_cycles = iter_cycles;
 
 	if (flags)
 		memcpy(&node->branch_flags, flags,
@@ -1306,7 +1306,7 @@ static int branch_to_str(char *bf, int bfsize,  static int branch_from_str(char *bf, int bfsize,
 			   u64 branch_count,
 			   u64 cycles_count, u64 iter_count,
-			   u64 samples_count)
+			   u64 iter_cycles)
 {
 	int printed = 0, i = 0;
 	u64 cycles;
@@ -1318,9 +1318,13 @@ static int branch_from_str(char *bf, int bfsize,
 				bf + printed, bfsize - printed);
 	}
 
-	if (iter_count && samples_count) {
-		printed += count_pri64_printf(i++, "iterations",
-				iter_count / samples_count,
+	if (iter_count) {
+		printed += count_pri64_printf(i++, "iter",
+				iter_count,
+				bf + printed, bfsize - printed);
+
+		printed += count_pri64_printf(i++, "avg_cycles",
+				iter_cycles / iter_count,
 				bf + printed, bfsize - printed);
 	}
 
@@ -1333,7 +1337,7 @@ static int branch_from_str(char *bf, int bfsize,  static int counts_str_build(char *bf, int bfsize,
 			     u64 branch_count, u64 predicted_count,
 			     u64 abort_count, u64 cycles_count,
-			     u64 iter_count, u64 samples_count,
+			     u64 iter_count, u64 iter_cycles,
 			     struct branch_type_stat *brtype_stat)  {
 	int printed;
@@ -1346,7 +1350,7 @@ static int counts_str_build(char *bf, int bfsize,
 				predicted_count, abort_count, brtype_stat);
 	} else {
 		printed = branch_from_str(bf, bfsize, branch_count,
-				cycles_count, iter_count, samples_count);
+				cycles_count, iter_count, iter_cycles);
 	}
 
 	if (!printed)
@@ -1358,14 +1362,14 @@ static int counts_str_build(char *bf, int bfsize,  static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
 				   u64 branch_count, u64 predicted_count,
 				   u64 abort_count, u64 cycles_count,
-				   u64 iter_count, u64 samples_count,
+				   u64 iter_count, u64 iter_cycles,
 				   struct branch_type_stat *brtype_stat)  {
 	char str[256];
 
 	counts_str_build(str, sizeof(str), branch_count,
 			 predicted_count, abort_count, cycles_count,
-			 iter_count, samples_count, brtype_stat);
+			 iter_count, iter_cycles, brtype_stat);
 
 	if (fp)
 		return fprintf(fp, "%s", str);
@@ -1373,31 +1377,23 @@ static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
 	return scnprintf(bf, bfsize, "%s", str);  }
 
-int callchain_list_counts__printf_value(struct callchain_node *node,
-					struct callchain_list *clist,
+int callchain_list_counts__printf_value(struct callchain_list *clist,
 					FILE *fp, char *bf, int bfsize)
 {
 	u64 branch_count, predicted_count;
 	u64 abort_count, cycles_count;
-	u64 iter_count = 0, samples_count = 0;
+	u64 iter_count, iter_cycles;
 
 	branch_count = clist->branch_count;
 	predicted_count = clist->predicted_count;
 	abort_count = clist->abort_count;
 	cycles_count = clist->cycles_count;
-
-	if (node) {
-		struct callchain_list *call;
-
-		list_for_each_entry(call, &node->val, list) {
-			iter_count += call->iter_count;
-			samples_count += call->samples_count;
-		}
-	}
+	iter_count = clist->iter_count;
+	iter_cycles = clist->iter_cycles;
 
 	return callchain_counts_printf(fp, bf, bfsize, branch_count,
 				       predicted_count, abort_count,
-				       cycles_count, iter_count, samples_count,
+				       cycles_count, iter_count, iter_cycles,
 				       &clist->brtype_stat);
 }
 
@@ -1523,7 +1519,8 @@ int callchain_cursor__copy(struct callchain_cursor *dst,
 
 		rc = callchain_cursor_append(dst, node->ip, node->map, node->sym,
 					     node->branch, &node->branch_flags,
-					     node->nr_loop_iter, node->samples,
+					     node->nr_loop_iter,
+					     node->iter_cycles,
 					     node->branch_from);
 		if (rc)
 			break;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 9773820..1ed6fc6 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -119,7 +119,7 @@ struct callchain_list {
 	u64			abort_count;
 	u64			cycles_count;
 	u64			iter_count;
-	u64			samples_count;
+	u64			iter_cycles;
 	struct branch_type_stat brtype_stat;
 	char		       *srcline;
 	struct list_head	list;
@@ -139,7 +139,7 @@ struct callchain_cursor_node {
 	struct branch_flags		branch_flags;
 	u64				branch_from;
 	int				nr_loop_iter;
-	int				samples;
+	u64				iter_cycles;
 	struct callchain_cursor_node	*next;
 };
 
@@ -201,7 +201,7 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor)  int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
 			    struct map *map, struct symbol *sym,
 			    bool branch, struct branch_flags *flags,
-			    int nr_loop_iter, int samples, u64 branch_from);
+			    int nr_loop_iter, u64 iter_cycles, u64 branch_from);
 
 /* Close a cursor writing session. Initialize for the reader */  static inline void callchain_cursor_commit(struct callchain_cursor *cursor) @@ -282,8 +282,7 @@ char *callchain_node__scnprintf_value(struct callchain_node *node,  int callchain_node__fprintf_value(struct callchain_node *node,
 				  FILE *fp, u64 total);
 
-int callchain_list_counts__printf_value(struct callchain_node *node,
-					struct callchain_list *clist,
+int callchain_list_counts__printf_value(struct callchain_list *clist,
 					FILE *fp, char *bf, int bfsize);
 
 void free_callchain(struct callchain_root *root); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index d4df353..9d68211 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1673,6 +1673,11 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample,
 	return mi;
 }
 
+struct iterations {
+	int nr_loop_iter;
+	u64 cycles;
+};
+
 static int add_callchain_ip(struct thread *thread,
 			    struct callchain_cursor *cursor,
 			    struct symbol **parent,
@@ -1681,11 +1686,12 @@ static int add_callchain_ip(struct thread *thread,
 			    u64 ip,
 			    bool branch,
 			    struct branch_flags *flags,
-			    int nr_loop_iter,
-			    int samples,
+			    struct iterations *iter,
 			    u64 branch_from)
 {
 	struct addr_location al;
+	int nr_loop_iter = 0;
+	u64 iter_cycles = 0;
 
 	al.filtered = 0;
 	al.sym = NULL;
@@ -1735,9 +1741,15 @@ static int add_callchain_ip(struct thread *thread,
 
 	if (symbol_conf.hide_unresolved && al.sym == NULL)
 		return 0;
+
+	if (iter) {
+		nr_loop_iter = iter->nr_loop_iter;
+		iter_cycles = iter->cycles;
+	}
+
 	return callchain_cursor_append(cursor, al.addr, al.map, al.sym,
-				       branch, flags, nr_loop_iter, samples,
-				       branch_from);
+				       branch, flags, nr_loop_iter,
+				       iter_cycles, branch_from);
 }
 
 struct branch_info *sample__resolve_bstack(struct perf_sample *sample, @@ -1758,6 +1770,18 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
 	return bi;
 }
 
+static void save_iterations(struct iterations *iter,
+			    struct branch_entry *be, int nr) {
+	int i;
+
+	iter->nr_loop_iter = nr;
+	iter->cycles = 0;
+
+	for (i = 0; i < nr; i++)
+		iter->cycles += be[i].flags.cycles;
+}
+
 #define CHASHSZ 127
 #define CHASHBITS 7
 #define NO_ENTRY 0xff
@@ -1765,7 +1789,8 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,  #define PERF_MAX_BRANCH_DEPTH 127
 
 /* Remove loops. */
-static int remove_loops(struct branch_entry *l, int nr)
+static int remove_loops(struct branch_entry *l, int nr,
+			struct iterations *iter)
 {
 	int i, j, off;
 	unsigned char chash[CHASHSZ];
@@ -1790,8 +1815,18 @@ static int remove_loops(struct branch_entry *l, int nr)
 					break;
 				}
 			if (is_loop) {
-				memmove(l + i, l + i + off,
-					(nr - (i + off)) * sizeof(*l));
+				j = nr - (i + off);
+				if (j > 0) {
+					save_iterations(iter + i + off,
+						l + i, off);
+
+					memmove(iter + i, iter + i + off,
+						j * sizeof(*iter));
+
+					memmove(l + i, l + i + off,
+						j * sizeof(*l));
+				}
+
 				nr -= off;
 			}
 		}
@@ -1881,7 +1916,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
 
 			err = add_callchain_ip(thread, cursor, parent,
 					       root_al, &cpumode, ip,
-					       branch, flags, 0, 0,
+					       branch, flags, NULL,
 					       branch_from);
 			if (err)
 				return (err < 0) ? err : 0;
@@ -1907,7 +1942,6 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 	int i, j, err, nr_entries;
 	int skip_idx = -1;
 	int first_call = 0;
-	int nr_loop_iter;
 
 	if (chain)
 		chain_nr = chain->nr;
@@ -1940,6 +1974,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 	if (branch && callchain_param.branch_callstack) {
 		int nr = min(max_stack, (int)branch->nr);
 		struct branch_entry be[nr];
+		struct iterations iter[nr];
 
 		if (branch->nr > PERF_MAX_BRANCH_DEPTH) {
 			pr_warning("corrupted branch chain. skipping...\n"); @@ -1970,38 +2005,21 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 				be[i] = branch->entries[branch->nr - i - 1];
 		}
 
-		nr_loop_iter = nr;
-		nr = remove_loops(be, nr);
-
-		/*
-		 * Get the number of iterations.
-		 * It's only approximation, but good enough in practice.
-		 */
-		if (nr_loop_iter > nr)
-			nr_loop_iter = nr_loop_iter - nr + 1;
-		else
-			nr_loop_iter = 0;
+		memset(iter, 0, sizeof(struct iterations) * nr);
+		nr = remove_loops(be, nr, iter);
 
 		for (i = 0; i < nr; i++) {
-			if (i == nr - 1)
-				err = add_callchain_ip(thread, cursor, parent,
-						       root_al,
-						       NULL, be[i].to,
-						       true, &be[i].flags,
-						       nr_loop_iter, 1,
-						       be[i].from);
-			else
-				err = add_callchain_ip(thread, cursor, parent,
-						       root_al,
-						       NULL, be[i].to,
-						       true, &be[i].flags,
-						       0, 0, be[i].from);
+			err = add_callchain_ip(thread, cursor, parent,
+					       root_al,
+					       NULL, be[i].to,
+					       true, &be[i].flags,
+					       NULL, be[i].from);
 
 			if (!err)
 				err = add_callchain_ip(thread, cursor, parent, root_al,
 						       NULL, be[i].from,
 						       true, &be[i].flags,
-						       0, 0, 0);
+						       &iter[i], 0);
 			if (err == -EINVAL)
 				break;
 			if (err)
@@ -2035,7 +2053,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 
 		err = add_callchain_ip(thread, cursor, parent,
 				       root_al, &cpumode, ip,
-				       false, NULL, 0, 0, 0);
+				       false, NULL, NULL, 0);
 
 		if (err)
 			return (err < 0) ? err : 0;
--
2.7.4

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] perf report: calculate the average cycles of iterations
  2017-08-14  1:30 ` Jin, Yao
@ 2017-08-15  3:19   ` Andi Kleen
  2017-08-30  8:41     ` Jin, Yao
  0 siblings, 1 reply; 6+ messages in thread
From: Andi Kleen @ 2017-08-15  3:19 UTC (permalink / raw)
  To: Jin, Yao
  Cc: Jin Yao, acme, jolsa, peterz, mingo, alexander.shishkin,
	Linux-kernel, Liang, Kan

On Mon, Aug 14, 2017 at 01:30:29AM +0000, Jin, Yao wrote:
> Hi Andi, 
> 
> Do you have any comments for this patch?

Patch looks good to me. 

Reviewed-by: Andi Kleen <ak@linux.intel.com>

-Andi

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] perf report: calculate the average cycles of iterations
  2017-08-15  3:19   ` Andi Kleen
@ 2017-08-30  8:41     ` Jin, Yao
  2017-08-30 13:03       ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 6+ messages in thread
From: Jin, Yao @ 2017-08-30  8:41 UTC (permalink / raw)
  To: Andi Kleen, Jin, Yao, Arnaldo Carvalho de Melo
  Cc: jolsa, peterz, mingo, alexander.shishkin, Linux-kernel, Liang, Kan

Hi Arnaldo,

Andi has reviewed this patch yet.

https://patchwork.kernel.org/patch/9884399/

Is this patch OK for merging or any other comments?

Thanks

Jin Yao

On 8/15/2017 11:19 AM, Andi Kleen wrote:
> On Mon, Aug 14, 2017 at 01:30:29AM +0000, Jin, Yao wrote:
>> Hi Andi,
>>
>> Do you have any comments for this patch?
> Patch looks good to me.
>
> Reviewed-by: Andi Kleen <ak@linux.intel.com>
>
> -Andi

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] perf report: calculate the average cycles of iterations
  2017-08-30  8:41     ` Jin, Yao
@ 2017-08-30 13:03       ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 6+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-08-30 13:03 UTC (permalink / raw)
  To: Jin, Yao
  Cc: Andi Kleen, Jin, Yao, jolsa, peterz, mingo, alexander.shishkin,
	Linux-kernel, Liang, Kan

Em Wed, Aug 30, 2017 at 04:41:13PM +0800, Jin, Yao escreveu:
> Hi Arnaldo,
> 
> Andi has reviewed this patch yet.
> 
> https://patchwork.kernel.org/patch/9884399/
> 
> Is this patch OK for merging or any other comments?

Thanks, applied together with Andi's reviewed-by tag,

- Arnaldo
 
> Thanks
> 
> Jin Yao
> 
> On 8/15/2017 11:19 AM, Andi Kleen wrote:
> > On Mon, Aug 14, 2017 at 01:30:29AM +0000, Jin, Yao wrote:
> > > Hi Andi,
> > > 
> > > Do you have any comments for this patch?
> > Patch looks good to me.
> > 
> > Reviewed-by: Andi Kleen <ak@linux.intel.com>
> > 
> > -Andi

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [tip:perf/urgent] perf report: Calculate the average cycles of iterations
  2017-08-07 13:05 [PATCH] perf report: calculate the average cycles of iterations Jin Yao
  2017-08-14  1:30 ` Jin, Yao
@ 2017-09-05  5:19 ` tip-bot for Jin Yao
  1 sibling, 0 replies; 6+ messages in thread
From: tip-bot for Jin Yao @ 2017-09-05  5:19 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: jolsa, tglx, acme, peterz, mingo, kan.liang, ak, hpa,
	alexander.shishkin, linux-kernel, yao.jin

Commit-ID:  c4ee06251d4212a0d55e2371f2db464f6a1e0901
Gitweb:     http://git.kernel.org/tip/c4ee06251d4212a0d55e2371f2db464f6a1e0901
Author:     Jin Yao <yao.jin@linux.intel.com>
AuthorDate: Mon, 7 Aug 2017 21:05:15 +0800
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Wed, 30 Aug 2017 10:03:27 -0300

perf report: Calculate the average cycles of iterations

The branch history code has a loop detection function. With this, we can
get the number of iterations by calculating the removed loops.

While it would be nice for knowing the average cycles of iterations.
This patch adds up the cycles in branch entries of removed loops and
save the result to the next branch entry (e.g. branch entry A).

Finally it will display the iteration number and average cycles at the
"from" of branch entry A.

For example:
perf record -g -j any,save_type ./div
perf report --branch-history --no-children --stdio

--22.63%--main div.c:42 (RET CROSS_2M)
          compute_flag div.c:28 (cycles:2 iter:173115 avg_cycles:2)
          |
           --10.73%--compute_flag div.c:27 (RET CROSS_2M)
                     rand rand.c:28 (cycles:1)
                     rand rand.c:28 (RET CROSS_2M)
                     __random random.c:298 (cycles:1)
                     __random random.c:297 (COND_BWD CROSS_2M)
                     __random random.c:295 (cycles:1)
                     __random random.c:295 (COND_BWD CROSS_2M)
                     __random random.c:295 (cycles:1)
                     __random random.c:295 (RET CROSS_2M)

Signed-off-by: Yao Jin <yao.jin@linux.intel.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1502111115-18305-1-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/hists.c |  8 +---
 tools/perf/ui/stdio/hist.c     | 10 ++---
 tools/perf/util/callchain.c    | 49 +++++++++++------------
 tools/perf/util/callchain.h    |  9 ++---
 tools/perf/util/machine.c      | 88 +++++++++++++++++++++++++-----------------
 5 files changed, 85 insertions(+), 79 deletions(-)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index f4bc246..13dfb0a 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -931,12 +931,8 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser,
 				       browser->show_dso);
 
 	if (symbol_conf.show_branchflag_count) {
-		if (need_percent)
-			callchain_list_counts__printf_value(node, chain, NULL,
-							    buf, sizeof(buf));
-		else
-			callchain_list_counts__printf_value(NULL, chain, NULL,
-							    buf, sizeof(buf));
+		callchain_list_counts__printf_value(chain, NULL,
+						    buf, sizeof(buf));
 
 		if (asprintf(&alloc_str2, "%s%s", str, buf) < 0)
 			str = "Not enough memory!";
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 5c95b83..8bdb7a5 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -124,12 +124,8 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node,
 	str = callchain_list__sym_name(chain, bf, sizeof(bf), false);
 
 	if (symbol_conf.show_branchflag_count) {
-		if (!period)
-			callchain_list_counts__printf_value(node, chain, NULL,
-							    buf, sizeof(buf));
-		else
-			callchain_list_counts__printf_value(NULL, chain, NULL,
-							    buf, sizeof(buf));
+		callchain_list_counts__printf_value(chain, NULL,
+						    buf, sizeof(buf));
 
 		if (asprintf(&alloc_str, "%s%s", str, buf) < 0)
 			str = "Not enough memory!";
@@ -313,7 +309,7 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
 
 			if (symbol_conf.show_branchflag_count)
 				ret += callchain_list_counts__printf_value(
-						NULL, chain, fp, NULL, 0);
+						chain, fp, NULL, 0);
 			ret += fprintf(fp, "\n");
 
 			if (++entries_printed == callchain_param.print_limit)
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index f320b07..510b513 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -588,7 +588,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
 				call->cycles_count =
 					cursor_node->branch_flags.cycles;
 				call->iter_count = cursor_node->nr_loop_iter;
-				call->samples_count = cursor_node->samples;
+				call->iter_cycles = cursor_node->iter_cycles;
 			}
 		}
 
@@ -722,7 +722,7 @@ static enum match_result match_chain(struct callchain_cursor_node *node,
 				cnode->cycles_count +=
 					node->branch_flags.cycles;
 				cnode->iter_count += node->nr_loop_iter;
-				cnode->samples_count += node->samples;
+				cnode->iter_cycles += node->iter_cycles;
 			}
 		}
 
@@ -998,7 +998,7 @@ int callchain_merge(struct callchain_cursor *cursor,
 int callchain_cursor_append(struct callchain_cursor *cursor,
 			    u64 ip, struct map *map, struct symbol *sym,
 			    bool branch, struct branch_flags *flags,
-			    int nr_loop_iter, int samples, u64 branch_from)
+			    int nr_loop_iter, u64 iter_cycles, u64 branch_from)
 {
 	struct callchain_cursor_node *node = *cursor->last;
 
@@ -1016,7 +1016,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
 	node->sym = sym;
 	node->branch = branch;
 	node->nr_loop_iter = nr_loop_iter;
-	node->samples = samples;
+	node->iter_cycles = iter_cycles;
 
 	if (flags)
 		memcpy(&node->branch_flags, flags,
@@ -1306,7 +1306,7 @@ static int branch_to_str(char *bf, int bfsize,
 static int branch_from_str(char *bf, int bfsize,
 			   u64 branch_count,
 			   u64 cycles_count, u64 iter_count,
-			   u64 samples_count)
+			   u64 iter_cycles)
 {
 	int printed = 0, i = 0;
 	u64 cycles;
@@ -1318,9 +1318,13 @@ static int branch_from_str(char *bf, int bfsize,
 				bf + printed, bfsize - printed);
 	}
 
-	if (iter_count && samples_count) {
-		printed += count_pri64_printf(i++, "iterations",
-				iter_count / samples_count,
+	if (iter_count) {
+		printed += count_pri64_printf(i++, "iter",
+				iter_count,
+				bf + printed, bfsize - printed);
+
+		printed += count_pri64_printf(i++, "avg_cycles",
+				iter_cycles / iter_count,
 				bf + printed, bfsize - printed);
 	}
 
@@ -1333,7 +1337,7 @@ static int branch_from_str(char *bf, int bfsize,
 static int counts_str_build(char *bf, int bfsize,
 			     u64 branch_count, u64 predicted_count,
 			     u64 abort_count, u64 cycles_count,
-			     u64 iter_count, u64 samples_count,
+			     u64 iter_count, u64 iter_cycles,
 			     struct branch_type_stat *brtype_stat)
 {
 	int printed;
@@ -1346,7 +1350,7 @@ static int counts_str_build(char *bf, int bfsize,
 				predicted_count, abort_count, brtype_stat);
 	} else {
 		printed = branch_from_str(bf, bfsize, branch_count,
-				cycles_count, iter_count, samples_count);
+				cycles_count, iter_count, iter_cycles);
 	}
 
 	if (!printed)
@@ -1358,14 +1362,14 @@ static int counts_str_build(char *bf, int bfsize,
 static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
 				   u64 branch_count, u64 predicted_count,
 				   u64 abort_count, u64 cycles_count,
-				   u64 iter_count, u64 samples_count,
+				   u64 iter_count, u64 iter_cycles,
 				   struct branch_type_stat *brtype_stat)
 {
 	char str[256];
 
 	counts_str_build(str, sizeof(str), branch_count,
 			 predicted_count, abort_count, cycles_count,
-			 iter_count, samples_count, brtype_stat);
+			 iter_count, iter_cycles, brtype_stat);
 
 	if (fp)
 		return fprintf(fp, "%s", str);
@@ -1373,31 +1377,23 @@ static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
 	return scnprintf(bf, bfsize, "%s", str);
 }
 
-int callchain_list_counts__printf_value(struct callchain_node *node,
-					struct callchain_list *clist,
+int callchain_list_counts__printf_value(struct callchain_list *clist,
 					FILE *fp, char *bf, int bfsize)
 {
 	u64 branch_count, predicted_count;
 	u64 abort_count, cycles_count;
-	u64 iter_count = 0, samples_count = 0;
+	u64 iter_count, iter_cycles;
 
 	branch_count = clist->branch_count;
 	predicted_count = clist->predicted_count;
 	abort_count = clist->abort_count;
 	cycles_count = clist->cycles_count;
-
-	if (node) {
-		struct callchain_list *call;
-
-		list_for_each_entry(call, &node->val, list) {
-			iter_count += call->iter_count;
-			samples_count += call->samples_count;
-		}
-	}
+	iter_count = clist->iter_count;
+	iter_cycles = clist->iter_cycles;
 
 	return callchain_counts_printf(fp, bf, bfsize, branch_count,
 				       predicted_count, abort_count,
-				       cycles_count, iter_count, samples_count,
+				       cycles_count, iter_count, iter_cycles,
 				       &clist->brtype_stat);
 }
 
@@ -1523,7 +1519,8 @@ int callchain_cursor__copy(struct callchain_cursor *dst,
 
 		rc = callchain_cursor_append(dst, node->ip, node->map, node->sym,
 					     node->branch, &node->branch_flags,
-					     node->nr_loop_iter, node->samples,
+					     node->nr_loop_iter,
+					     node->iter_cycles,
 					     node->branch_from);
 		if (rc)
 			break;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 9773820..1ed6fc6 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -119,7 +119,7 @@ struct callchain_list {
 	u64			abort_count;
 	u64			cycles_count;
 	u64			iter_count;
-	u64			samples_count;
+	u64			iter_cycles;
 	struct branch_type_stat brtype_stat;
 	char		       *srcline;
 	struct list_head	list;
@@ -139,7 +139,7 @@ struct callchain_cursor_node {
 	struct branch_flags		branch_flags;
 	u64				branch_from;
 	int				nr_loop_iter;
-	int				samples;
+	u64				iter_cycles;
 	struct callchain_cursor_node	*next;
 };
 
@@ -201,7 +201,7 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
 int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
 			    struct map *map, struct symbol *sym,
 			    bool branch, struct branch_flags *flags,
-			    int nr_loop_iter, int samples, u64 branch_from);
+			    int nr_loop_iter, u64 iter_cycles, u64 branch_from);
 
 /* Close a cursor writing session. Initialize for the reader */
 static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
@@ -282,8 +282,7 @@ char *callchain_node__scnprintf_value(struct callchain_node *node,
 int callchain_node__fprintf_value(struct callchain_node *node,
 				  FILE *fp, u64 total);
 
-int callchain_list_counts__printf_value(struct callchain_node *node,
-					struct callchain_list *clist,
+int callchain_list_counts__printf_value(struct callchain_list *clist,
 					FILE *fp, char *bf, int bfsize);
 
 void free_callchain(struct callchain_root *root);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 5c8eaca..9eaa953 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1675,6 +1675,11 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample,
 	return mi;
 }
 
+struct iterations {
+	int nr_loop_iter;
+	u64 cycles;
+};
+
 static int add_callchain_ip(struct thread *thread,
 			    struct callchain_cursor *cursor,
 			    struct symbol **parent,
@@ -1683,11 +1688,12 @@ static int add_callchain_ip(struct thread *thread,
 			    u64 ip,
 			    bool branch,
 			    struct branch_flags *flags,
-			    int nr_loop_iter,
-			    int samples,
+			    struct iterations *iter,
 			    u64 branch_from)
 {
 	struct addr_location al;
+	int nr_loop_iter = 0;
+	u64 iter_cycles = 0;
 
 	al.filtered = 0;
 	al.sym = NULL;
@@ -1737,9 +1743,15 @@ static int add_callchain_ip(struct thread *thread,
 
 	if (symbol_conf.hide_unresolved && al.sym == NULL)
 		return 0;
+
+	if (iter) {
+		nr_loop_iter = iter->nr_loop_iter;
+		iter_cycles = iter->cycles;
+	}
+
 	return callchain_cursor_append(cursor, al.addr, al.map, al.sym,
-				       branch, flags, nr_loop_iter, samples,
-				       branch_from);
+				       branch, flags, nr_loop_iter,
+				       iter_cycles, branch_from);
 }
 
 struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
@@ -1760,6 +1772,18 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
 	return bi;
 }
 
+static void save_iterations(struct iterations *iter,
+			    struct branch_entry *be, int nr)
+{
+	int i;
+
+	iter->nr_loop_iter = nr;
+	iter->cycles = 0;
+
+	for (i = 0; i < nr; i++)
+		iter->cycles += be[i].flags.cycles;
+}
+
 #define CHASHSZ 127
 #define CHASHBITS 7
 #define NO_ENTRY 0xff
@@ -1767,7 +1791,8 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
 #define PERF_MAX_BRANCH_DEPTH 127
 
 /* Remove loops. */
-static int remove_loops(struct branch_entry *l, int nr)
+static int remove_loops(struct branch_entry *l, int nr,
+			struct iterations *iter)
 {
 	int i, j, off;
 	unsigned char chash[CHASHSZ];
@@ -1792,8 +1817,18 @@ static int remove_loops(struct branch_entry *l, int nr)
 					break;
 				}
 			if (is_loop) {
-				memmove(l + i, l + i + off,
-					(nr - (i + off)) * sizeof(*l));
+				j = nr - (i + off);
+				if (j > 0) {
+					save_iterations(iter + i + off,
+						l + i, off);
+
+					memmove(iter + i, iter + i + off,
+						j * sizeof(*iter));
+
+					memmove(l + i, l + i + off,
+						j * sizeof(*l));
+				}
+
 				nr -= off;
 			}
 		}
@@ -1883,7 +1918,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
 
 			err = add_callchain_ip(thread, cursor, parent,
 					       root_al, &cpumode, ip,
-					       branch, flags, 0, 0,
+					       branch, flags, NULL,
 					       branch_from);
 			if (err)
 				return (err < 0) ? err : 0;
@@ -1909,7 +1944,6 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 	int i, j, err, nr_entries;
 	int skip_idx = -1;
 	int first_call = 0;
-	int nr_loop_iter;
 
 	if (chain)
 		chain_nr = chain->nr;
@@ -1942,6 +1976,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 	if (branch && callchain_param.branch_callstack) {
 		int nr = min(max_stack, (int)branch->nr);
 		struct branch_entry be[nr];
+		struct iterations iter[nr];
 
 		if (branch->nr > PERF_MAX_BRANCH_DEPTH) {
 			pr_warning("corrupted branch chain. skipping...\n");
@@ -1972,38 +2007,21 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 				be[i] = branch->entries[branch->nr - i - 1];
 		}
 
-		nr_loop_iter = nr;
-		nr = remove_loops(be, nr);
-
-		/*
-		 * Get the number of iterations.
-		 * It's only approximation, but good enough in practice.
-		 */
-		if (nr_loop_iter > nr)
-			nr_loop_iter = nr_loop_iter - nr + 1;
-		else
-			nr_loop_iter = 0;
+		memset(iter, 0, sizeof(struct iterations) * nr);
+		nr = remove_loops(be, nr, iter);
 
 		for (i = 0; i < nr; i++) {
-			if (i == nr - 1)
-				err = add_callchain_ip(thread, cursor, parent,
-						       root_al,
-						       NULL, be[i].to,
-						       true, &be[i].flags,
-						       nr_loop_iter, 1,
-						       be[i].from);
-			else
-				err = add_callchain_ip(thread, cursor, parent,
-						       root_al,
-						       NULL, be[i].to,
-						       true, &be[i].flags,
-						       0, 0, be[i].from);
+			err = add_callchain_ip(thread, cursor, parent,
+					       root_al,
+					       NULL, be[i].to,
+					       true, &be[i].flags,
+					       NULL, be[i].from);
 
 			if (!err)
 				err = add_callchain_ip(thread, cursor, parent, root_al,
 						       NULL, be[i].from,
 						       true, &be[i].flags,
-						       0, 0, 0);
+						       &iter[i], 0);
 			if (err == -EINVAL)
 				break;
 			if (err)
@@ -2037,7 +2055,7 @@ check_calls:
 
 		err = add_callchain_ip(thread, cursor, parent,
 				       root_al, &cpumode, ip,
-				       false, NULL, 0, 0, 0);
+				       false, NULL, NULL, 0);
 
 		if (err)
 			return (err < 0) ? err : 0;

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-09-05  5:22 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-07 13:05 [PATCH] perf report: calculate the average cycles of iterations Jin Yao
2017-08-14  1:30 ` Jin, Yao
2017-08-15  3:19   ` Andi Kleen
2017-08-30  8:41     ` Jin, Yao
2017-08-30 13:03       ` Arnaldo Carvalho de Melo
2017-09-05  5:19 ` [tip:perf/urgent] perf report: Calculate " tip-bot for Jin Yao

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.