stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Jin Yao <yao.jin@linux.intel.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Andi Kleen <ak@linux.intel.com>, Jiri Olsa <jolsa@kernel.org>,
	Kan Liang <kan.liang@linux.intel.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Arnaldo Carvalho de Melo <acme@redhat.com>,
	Sasha Levin <sashal@kernel.org>
Subject: [PATCH AUTOSEL 4.20 04/42] perf report: Fix wrong iteration count in --branch-history
Date: Sat,  9 Feb 2019 13:46:56 -0500	[thread overview]
Message-ID: <20190209184734.125935-4-sashal@kernel.org> (raw)
In-Reply-To: <20190209184734.125935-1-sashal@kernel.org>

From: Jin Yao <yao.jin@linux.intel.com>

[ Upstream commit a3366db06bb656cef2e03f30f780d93059bcc594 ]

By calculating the removed loops, we can get the iteration count.

But the iteration count could be reported incorrectly, reporting
impossibly high counts.

That's because previous code uses the number of removed LBR entries for
the iteration count. That's not good. Fix this by increasing the
iteration count when a loop is detected.

When matching the chain, the iteration count would be added up, finally we need
to compute the average value when printing out.

For example,

  $ perf report --branch-history --stdio --no-children

Before:

  ---f2 +0
     |
     |--33.62%--f1 +9 (cycles:1)
     |          f1 +0
     |          main +22 (cycles:1)
     |          main +17
     |          main +38 (cycles:1)
     |          main +27
     |          f1 +26 (cycles:1)
     |          f1 +24
     |          f2 +27 (cycles:7)
     |          f2 +0
     |          f1 +19 (cycles:1)
     |          f1 +14
     |          f2 +27 (cycles:11)
     |          f2 +0
     |          f1 +9 (cycles:1 iter:2968 avg_cycles:3)
     |          f1 +0
     |          main +22 (cycles:1 iter:2968 avg_cycles:3)
     |          main +17
     |          main +38 (cycles:1 iter:2968 avg_cycles:3)

2968 is an impossible high iteration count and avg_cycles is too small.

After:

  ---f2 +0
     |
     |--33.62%--f1 +9 (cycles:1)
     |          f1 +0
     |          main +22 (cycles:1)
     |          main +17
     |          main +38 (cycles:1)
     |          main +27
     |          f1 +26 (cycles:1)
     |          f1 +24
     |          f2 +27 (cycles:7)
     |          f2 +0
     |          f1 +19 (cycles:1)
     |          f1 +14
     |          f2 +27 (cycles:11)
     |          f2 +0
     |          f1 +9 (cycles:1 iter:1 avg_cycles:23)
     |          f1 +0
     |          main +22 (cycles:1 iter:1 avg_cycles:23)
     |          main +17
     |          main +38 (cycles:1 iter:1 avg_cycles:23)

avg_cycles:23 is the average cycles of this iteration.

Fixes: c4ee06251d42 ("perf report: Calculate the average cycles of iterations")

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1546582230-17507-1-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 tools/perf/util/callchain.c | 32 ++++++++++++++++++++------------
 tools/perf/util/callchain.h |  1 +
 tools/perf/util/machine.c   |  2 +-
 3 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 32ef7bdca1cf..dc2212e12184 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -766,6 +766,7 @@ static enum match_result match_chain(struct callchain_cursor_node *node,
 			cnode->cycles_count += node->branch_flags.cycles;
 			cnode->iter_count += node->nr_loop_iter;
 			cnode->iter_cycles += node->iter_cycles;
+			cnode->from_count++;
 		}
 	}
 
@@ -1345,10 +1346,10 @@ static int branch_to_str(char *bf, int bfsize,
 static int branch_from_str(char *bf, int bfsize,
 			   u64 branch_count,
 			   u64 cycles_count, u64 iter_count,
-			   u64 iter_cycles)
+			   u64 iter_cycles, u64 from_count)
 {
 	int printed = 0, i = 0;
-	u64 cycles;
+	u64 cycles, v = 0;
 
 	cycles = cycles_count / branch_count;
 	if (cycles) {
@@ -1357,14 +1358,16 @@ static int branch_from_str(char *bf, int bfsize,
 				bf + printed, bfsize - printed);
 	}
 
-	if (iter_count) {
-		printed += count_pri64_printf(i++, "iter",
-				iter_count,
-				bf + printed, bfsize - printed);
+	if (iter_count && from_count) {
+		v = iter_count / from_count;
+		if (v) {
+			printed += count_pri64_printf(i++, "iter",
+					v, bf + printed, bfsize - printed);
 
-		printed += count_pri64_printf(i++, "avg_cycles",
-				iter_cycles / iter_count,
-				bf + printed, bfsize - printed);
+			printed += count_pri64_printf(i++, "avg_cycles",
+					iter_cycles / iter_count,
+					bf + printed, bfsize - printed);
+		}
 	}
 
 	if (i)
@@ -1377,6 +1380,7 @@ static int counts_str_build(char *bf, int bfsize,
 			     u64 branch_count, u64 predicted_count,
 			     u64 abort_count, u64 cycles_count,
 			     u64 iter_count, u64 iter_cycles,
+			     u64 from_count,
 			     struct branch_type_stat *brtype_stat)
 {
 	int printed;
@@ -1389,7 +1393,8 @@ static int counts_str_build(char *bf, int bfsize,
 				predicted_count, abort_count, brtype_stat);
 	} else {
 		printed = branch_from_str(bf, bfsize, branch_count,
-				cycles_count, iter_count, iter_cycles);
+				cycles_count, iter_count, iter_cycles,
+				from_count);
 	}
 
 	if (!printed)
@@ -1402,13 +1407,14 @@ static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
 				   u64 branch_count, u64 predicted_count,
 				   u64 abort_count, u64 cycles_count,
 				   u64 iter_count, u64 iter_cycles,
+				   u64 from_count,
 				   struct branch_type_stat *brtype_stat)
 {
 	char str[256];
 
 	counts_str_build(str, sizeof(str), branch_count,
 			 predicted_count, abort_count, cycles_count,
-			 iter_count, iter_cycles, brtype_stat);
+			 iter_count, iter_cycles, from_count, brtype_stat);
 
 	if (fp)
 		return fprintf(fp, "%s", str);
@@ -1422,6 +1428,7 @@ int callchain_list_counts__printf_value(struct callchain_list *clist,
 	u64 branch_count, predicted_count;
 	u64 abort_count, cycles_count;
 	u64 iter_count, iter_cycles;
+	u64 from_count;
 
 	branch_count = clist->branch_count;
 	predicted_count = clist->predicted_count;
@@ -1429,11 +1436,12 @@ int callchain_list_counts__printf_value(struct callchain_list *clist,
 	cycles_count = clist->cycles_count;
 	iter_count = clist->iter_count;
 	iter_cycles = clist->iter_cycles;
+	from_count = clist->from_count;
 
 	return callchain_counts_printf(fp, bf, bfsize, branch_count,
 				       predicted_count, abort_count,
 				       cycles_count, iter_count, iter_cycles,
-				       &clist->brtype_stat);
+				       from_count, &clist->brtype_stat);
 }
 
 static void free_callchain_node(struct callchain_node *node)
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 154560b1eb65..99d38ac019b8 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -118,6 +118,7 @@ struct callchain_list {
 		bool		has_children;
 	};
 	u64			branch_count;
+	u64			from_count;
 	u64			predicted_count;
 	u64			abort_count;
 	u64			cycles_count;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 9397e3f2444d..ea228dd0a187 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2005,7 +2005,7 @@ static void save_iterations(struct iterations *iter,
 {
 	int i;
 
-	iter->nr_loop_iter = nr;
+	iter->nr_loop_iter++;
 	iter->cycles = 0;
 
 	for (i = 0; i < nr; i++)
-- 
2.19.1


  parent reply	other threads:[~2019-02-09 18:57 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-09 18:46 [PATCH AUTOSEL 4.20 01/42] drm/amdgpu/sriov:Correct pfvf exchange logic Sasha Levin
2019-02-09 18:46 ` [PATCH AUTOSEL 4.20 02/42] ACPI: NUMA: Use correct type for printing addresses on i386-PAE Sasha Levin
2019-02-09 18:46 ` [PATCH AUTOSEL 4.20 03/42] perf stat: Fix endless wait for child process Sasha Levin
2019-02-09 18:46 ` Sasha Levin [this message]
2019-02-09 18:46 ` [PATCH AUTOSEL 4.20 05/42] perf test shell: Use a fallback to get the pathname in vfs_getname Sasha Levin
2019-02-09 18:46 ` [PATCH AUTOSEL 4.20 06/42] soc: renesas: r8a774c0-sysc: Fix initialization order of 3DG-{A,B} Sasha Levin
2019-02-09 18:46 ` [PATCH AUTOSEL 4.20 07/42] tools uapi: fix RISC-V 64-bit support Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 08/42] riscv: fix trace_sys_exit hook Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 09/42] cpufreq: check if policy is inactive early in __cpufreq_get() Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 10/42] csky: fixup relocation error with 807 & 860 Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 11/42] csky: fixup CACHEV1 store instruction fast retire Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 12/42] csky: fixup compile error with pte_alloc Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 13/42] irqchip/csky: fixup handle_irq_perbit break irq Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 14/42] drm/amd/powerplay: avoid possible buffer overflow Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 15/42] drm/bridge: tc358767: add bus flags Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 16/42] drm/bridge: tc358767: add defines for DP1_SRCCTRL & PHY_2LANE Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 17/42] drm/bridge: tc358767: fix single lane configuration Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 18/42] drm/bridge: tc358767: fix initial DP0/1_SRCCTRL value Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 19/42] drm/bridge: tc358767: reject modes which require too much BW Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 20/42] drm/bridge: tc358767: fix output H/V syncs Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 21/42] nvme-pci: use the same attributes when freeing host_mem_desc_bufs Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 22/42] nvme-pci: fix out of bounds access in nvme_cqe_pending Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 23/42] nvme-multipath: zero out ANA log buffer Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 24/42] nvme: pad fake subsys NQN vid and ssvid with zeros Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 25/42] nvme: introduce NVME_QUIRK_IGNORE_DEV_SUBNQN Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 26/42] drm/amdgpu: fix CPDMA hang in PRT mode for VEGA20 Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 27/42] drm/amdgpu: set WRITE_BURST_LENGTH to 64B to workaround SDMA1 hang Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 28/42] drm/amdgpu: disable system memory page tables for now Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 29/42] ARM: dts: da850-evm: Correct the audio codec regulators Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 30/42] ARM: dts: da850-evm: Correct the sound card name Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 31/42] ARM: dts: da850-lcdk: Correct the audio codec regulators Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 32/42] ARM: dts: da850-lcdk: Correct the sound card name Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 33/42] ARM: dts: kirkwood: Fix polarity of GPIO fan lines Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 34/42] csky: fixup compile error with CPU 810 Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 35/42] gpio: pl061: handle failed allocations Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 36/42] drm/nouveau: Don't disable polling in fallback mode Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 37/42] drm/nouveau/falcon: avoid touching registers if engine is off Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 38/42] cifs: Limit memory used by lock request calls to a page Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 39/42] CIFS: Fix credits calculation for cancelled requests Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 40/42] CIFS: Move credit processing to mid callbacks for SMB3 Sasha Levin
2019-02-12  1:48   ` Pavel Shilovskiy
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 41/42] CIFS: Fix error paths in writeback code Sasha Levin
2019-02-09 18:47 ` [PATCH AUTOSEL 4.20 42/42] kvm: sev: Fail KVM_SEV_INIT if already initialized Sasha Levin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190209184734.125935-4-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=acme@redhat.com \
    --cc=ak@linux.intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=jolsa@kernel.org \
    --cc=kan.liang@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=stable@vger.kernel.org \
    --cc=yao.jin@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).