linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] perf script: Fix LBR skid dump problems in brstackinsn
@ 2018-11-20  5:06 Andi Kleen
  2018-12-06 17:01 ` Arnaldo Carvalho de Melo
  2019-01-03 13:13 ` [tip:perf/urgent] " tip-bot for Andi Kleen
  0 siblings, 2 replies; 8+ messages in thread
From: Andi Kleen @ 2018-11-20  5:06 UTC (permalink / raw)
  To: acme; +Cc: linux-kernel, jolsa, Andi Kleen, milian.wolff, adrian.hunter

From: Andi Kleen <ak@linux.intel.com>

This is a fix for another instance of the skid problem Milian
recently found [1]

The LBRs don't freeze at the exact same time as the PMI is triggered.
The perf script brstackinsn code that dumps LBR assembler
assumes that the last branch in the LBR leads to the sample point.
But with skid it's possible that the CPU executes one or more branches
before the sample, but which do not appear in the LBR.

What happens then is either that the sample point is before
the last LBR branch. In this case the dumper sees a negative
length and ignores it. Or it the sample point is long after
the last branch. Then the dumper sees a very long block and dumps
it upto its block limit (16k bytes), which is noise in the output.

On typical sample session this can happen regularly.

This patch tries to detect and handle the situation. On the last
block that is dumped by the LBR dumper we always stop on the first
branch. If the block length is negative just scan forward to the
first branch. Otherwise scan until a branch is found.

The PT decoder already has a function that uses the instruction
decoder to detect branches, so we can just reuse it here.

Then when a terminating branch is found print an indication
and stop dumping. This might miss a few instructions, but at least
shows no runaway blocks.

Cc: milian.wolff@kdab.com
Cc: adrian.hunter@intel.com
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/perf/builtin-script.c                    | 18 +++++++++++++++++-
 tools/perf/util/dump-insn.c                    |  8 ++++++++
 tools/perf/util/dump-insn.h                    |  2 ++
 .../intel-pt-decoder/intel-pt-insn-decoder.c   |  8 ++++++++
 4 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 4da5e32b9e03..11868bf39e66 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1049,9 +1049,18 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 
 	/*
 	 * Print final block upto sample
+	 *
+	 * Due to pipeline delays the LBRs might be missing a branch
+	 * or two, which can result in very large or negative blocks
+	 * between final branch and sample. When this happens just
+	 * continue walking after the last TO until we hit a branch.
 	 */
 	start = br->entries[0].to;
 	end = sample->ip;
+	if (end < start) {
+		/* Missing jump. Scan 128 bytes for the next branch */
+		end = start + 128;
+	}
 	len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, true);
 	printed += ip__fprintf_sym(start, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
 	if (len <= 0) {
@@ -1060,7 +1069,6 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 			      machine, thread, &x.is64bit, &x.cpumode, false);
 		if (len <= 0)
 			goto out;
-
 		printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", sample->ip,
 			dump_insn(&x, sample->ip, buffer, len, NULL));
 		goto out;
@@ -1070,6 +1078,14 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 				   dump_insn(&x, start + off, buffer + off, len - off, &ilen));
 		if (ilen == 0)
 			break;
+		if (arch_is_branch(buffer + off, len - off, x.is64bit) &&
+			start + off != sample->ip) {
+			/*
+			 * Hit a missing branch. Just stop.
+			 */
+			printed += fprintf(fp, "\t... not reaching sample ...\n");
+			break;
+		}
 	}
 out:
 	return printed;
diff --git a/tools/perf/util/dump-insn.c b/tools/perf/util/dump-insn.c
index 10988d3de7ce..2bd8585db93c 100644
--- a/tools/perf/util/dump-insn.c
+++ b/tools/perf/util/dump-insn.c
@@ -13,3 +13,11 @@ const char *dump_insn(struct perf_insn *x __maybe_unused,
 		*lenp = 0;
 	return "?";
 }
+
+__weak
+int arch_is_branch(const unsigned char *buf __maybe_unused,
+		   size_t len __maybe_unused,
+		   int x86_64 __maybe_unused)
+{
+	return 0;
+}
diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h
index 0e06280a8860..650125061530 100644
--- a/tools/perf/util/dump-insn.h
+++ b/tools/perf/util/dump-insn.h
@@ -20,4 +20,6 @@ struct perf_insn {
 
 const char *dump_insn(struct perf_insn *x, u64 ip,
 		      u8 *inbuf, int inlen, int *lenp);
+int arch_is_branch(const unsigned char *buf, size_t len, int x86_64);
+
 #endif
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index 54818828023b..1c0e289f01e6 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -180,6 +180,14 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
 	return 0;
 }
 
+int arch_is_branch(const unsigned char *buf, size_t len, int x86_64)
+{
+	struct intel_pt_insn in;
+	if (intel_pt_get_insn(buf, len, x86_64, &in) < 0)
+		return -1;
+	return in.branch != INTEL_PT_BR_NO_BRANCH;
+}
+
 const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused,
 		      u8 *inbuf, int inlen, int *lenp)
 {
-- 
2.17.2


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] perf script: Fix LBR skid dump problems in brstackinsn
  2018-11-20  5:06 [PATCH] perf script: Fix LBR skid dump problems in brstackinsn Andi Kleen
@ 2018-12-06 17:01 ` Arnaldo Carvalho de Melo
  2018-12-06 20:51   ` Andi Kleen
  2018-12-07  9:21   ` Adrian Hunter
  2019-01-03 13:13 ` [tip:perf/urgent] " tip-bot for Andi Kleen
  1 sibling, 2 replies; 8+ messages in thread
From: Arnaldo Carvalho de Melo @ 2018-12-06 17:01 UTC (permalink / raw)
  To: Milian Wolff, Adrian Hunter; +Cc: Andi Kleen, linux-kernel, jolsa, Andi Kleen

Em Mon, Nov 19, 2018 at 09:06:17PM -0800, Andi Kleen escreveu:
> From: Andi Kleen <ak@linux.intel.com>
> 
> This is a fix for another instance of the skid problem Milian
> recently found [1]

Milian, have you tested this?

Adrian, can I have your Reviewed-by or Acked-by?

Thanks,

- Arnaldo
 
> The LBRs don't freeze at the exact same time as the PMI is triggered.
> The perf script brstackinsn code that dumps LBR assembler
> assumes that the last branch in the LBR leads to the sample point.
> But with skid it's possible that the CPU executes one or more branches
> before the sample, but which do not appear in the LBR.
> 
> What happens then is either that the sample point is before
> the last LBR branch. In this case the dumper sees a negative
> length and ignores it. Or it the sample point is long after
> the last branch. Then the dumper sees a very long block and dumps
> it upto its block limit (16k bytes), which is noise in the output.
> 
> On typical sample session this can happen regularly.
> 
> This patch tries to detect and handle the situation. On the last
> block that is dumped by the LBR dumper we always stop on the first
> branch. If the block length is negative just scan forward to the
> first branch. Otherwise scan until a branch is found.
> 
> The PT decoder already has a function that uses the instruction
> decoder to detect branches, so we can just reuse it here.
> 
> Then when a terminating branch is found print an indication
> and stop dumping. This might miss a few instructions, but at least
> shows no runaway blocks.
> 
> Cc: milian.wolff@kdab.com
> Cc: adrian.hunter@intel.com
> Signed-off-by: Andi Kleen <ak@linux.intel.com>
> ---
>  tools/perf/builtin-script.c                    | 18 +++++++++++++++++-
>  tools/perf/util/dump-insn.c                    |  8 ++++++++
>  tools/perf/util/dump-insn.h                    |  2 ++
>  .../intel-pt-decoder/intel-pt-insn-decoder.c   |  8 ++++++++
>  4 files changed, 35 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> index 4da5e32b9e03..11868bf39e66 100644
> --- a/tools/perf/builtin-script.c
> +++ b/tools/perf/builtin-script.c
> @@ -1049,9 +1049,18 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
>  
>  	/*
>  	 * Print final block upto sample
> +	 *
> +	 * Due to pipeline delays the LBRs might be missing a branch
> +	 * or two, which can result in very large or negative blocks
> +	 * between final branch and sample. When this happens just
> +	 * continue walking after the last TO until we hit a branch.
>  	 */
>  	start = br->entries[0].to;
>  	end = sample->ip;
> +	if (end < start) {
> +		/* Missing jump. Scan 128 bytes for the next branch */
> +		end = start + 128;
> +	}
>  	len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, true);
>  	printed += ip__fprintf_sym(start, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
>  	if (len <= 0) {
> @@ -1060,7 +1069,6 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
>  			      machine, thread, &x.is64bit, &x.cpumode, false);
>  		if (len <= 0)
>  			goto out;
> -
>  		printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", sample->ip,
>  			dump_insn(&x, sample->ip, buffer, len, NULL));
>  		goto out;
> @@ -1070,6 +1078,14 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
>  				   dump_insn(&x, start + off, buffer + off, len - off, &ilen));
>  		if (ilen == 0)
>  			break;
> +		if (arch_is_branch(buffer + off, len - off, x.is64bit) &&
> +			start + off != sample->ip) {
> +			/*
> +			 * Hit a missing branch. Just stop.
> +			 */
> +			printed += fprintf(fp, "\t... not reaching sample ...\n");
> +			break;
> +		}
>  	}
>  out:
>  	return printed;
> diff --git a/tools/perf/util/dump-insn.c b/tools/perf/util/dump-insn.c
> index 10988d3de7ce..2bd8585db93c 100644
> --- a/tools/perf/util/dump-insn.c
> +++ b/tools/perf/util/dump-insn.c
> @@ -13,3 +13,11 @@ const char *dump_insn(struct perf_insn *x __maybe_unused,
>  		*lenp = 0;
>  	return "?";
>  }
> +
> +__weak
> +int arch_is_branch(const unsigned char *buf __maybe_unused,
> +		   size_t len __maybe_unused,
> +		   int x86_64 __maybe_unused)
> +{
> +	return 0;
> +}
> diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h
> index 0e06280a8860..650125061530 100644
> --- a/tools/perf/util/dump-insn.h
> +++ b/tools/perf/util/dump-insn.h
> @@ -20,4 +20,6 @@ struct perf_insn {
>  
>  const char *dump_insn(struct perf_insn *x, u64 ip,
>  		      u8 *inbuf, int inlen, int *lenp);
> +int arch_is_branch(const unsigned char *buf, size_t len, int x86_64);
> +
>  #endif
> diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
> index 54818828023b..1c0e289f01e6 100644
> --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
> +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
> @@ -180,6 +180,14 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
>  	return 0;
>  }
>  
> +int arch_is_branch(const unsigned char *buf, size_t len, int x86_64)
> +{
> +	struct intel_pt_insn in;
> +	if (intel_pt_get_insn(buf, len, x86_64, &in) < 0)
> +		return -1;
> +	return in.branch != INTEL_PT_BR_NO_BRANCH;
> +}
> +
>  const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused,
>  		      u8 *inbuf, int inlen, int *lenp)
>  {
> -- 
> 2.17.2

-- 

- Arnaldo

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] perf script: Fix LBR skid dump problems in brstackinsn
  2018-12-06 17:01 ` Arnaldo Carvalho de Melo
@ 2018-12-06 20:51   ` Andi Kleen
  2018-12-06 21:29     ` Arnaldo Carvalho de Melo
  2018-12-07  9:21   ` Adrian Hunter
  1 sibling, 1 reply; 8+ messages in thread
From: Andi Kleen @ 2018-12-06 20:51 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Milian Wolff, Adrian Hunter, Andi Kleen, linux-kernel, jolsa, Andi Kleen

On Thu, Dec 06, 2018 at 02:01:40PM -0300, Arnaldo Carvalho de Melo wrote:
> Em Mon, Nov 19, 2018 at 09:06:17PM -0800, Andi Kleen escreveu:
> > From: Andi Kleen <ak@linux.intel.com>
> > 
> > This is a fix for another instance of the skid problem Milian
> > recently found [1]
> 
> Milian, have you tested this?

It won't fix Milians problem, which were with call graphs.

For Milian's issue you would need Milian's patches,
which AFAIK haven't been reviewed by anyone.

-Andi

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] perf script: Fix LBR skid dump problems in brstackinsn
  2018-12-06 20:51   ` Andi Kleen
@ 2018-12-06 21:29     ` Arnaldo Carvalho de Melo
  2018-12-06 22:52       ` Andi Kleen
  0 siblings, 1 reply; 8+ messages in thread
From: Arnaldo Carvalho de Melo @ 2018-12-06 21:29 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Milian Wolff, Adrian Hunter, linux-kernel, jolsa, Andi Kleen

Em Thu, Dec 06, 2018 at 12:51:48PM -0800, Andi Kleen escreveu:
> On Thu, Dec 06, 2018 at 02:01:40PM -0300, Arnaldo Carvalho de Melo wrote:
> > Em Mon, Nov 19, 2018 at 09:06:17PM -0800, Andi Kleen escreveu:
> > > From: Andi Kleen <ak@linux.intel.com>
> > > 
> > > This is a fix for another instance of the skid problem Milian
> > > recently found [1]

I think you forgot to add the reference, i.e. what is the url or
message-id that this [1] refers to?

> > Milian, have you tested this?
> 
> It won't fix Milians problem, which were with call graphs.
> 
> For Milian's issue you would need Milian's patches,
> which AFAIK haven't been reviewed by anyone.

/me trying to find these patches...

- Arnaldo

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] perf script: Fix LBR skid dump problems in brstackinsn
  2018-12-06 21:29     ` Arnaldo Carvalho de Melo
@ 2018-12-06 22:52       ` Andi Kleen
  2018-12-11 20:47         ` Milian Wolff
  0 siblings, 1 reply; 8+ messages in thread
From: Andi Kleen @ 2018-12-06 22:52 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Andi Kleen, Milian Wolff, Adrian Hunter, linux-kernel, jolsa, Andi Kleen

On Thu, Dec 06, 2018 at 06:29:20PM -0300, Arnaldo Carvalho de Melo wrote:
> Em Thu, Dec 06, 2018 at 12:51:48PM -0800, Andi Kleen escreveu:
> > On Thu, Dec 06, 2018 at 02:01:40PM -0300, Arnaldo Carvalho de Melo wrote:
> > > Em Mon, Nov 19, 2018 at 09:06:17PM -0800, Andi Kleen escreveu:
> > > > From: Andi Kleen <ak@linux.intel.com>
> > > > 
> > > > This is a fix for another instance of the skid problem Milian
> > > > recently found [1]
> 
> I think you forgot to add the reference, i.e. what is the url or
> message-id that this [1] refers to?

Hmm, I thought I saw some patches from Milian for this earlier,
but now I can't find them. Perhaps I misremember. Milian
can point to them if they exist and are not just a figment
of my imagination :-)

These were the changes to report the stack frame RIP/RSP in the PEBS
handler and use it for unwinding in perf.

-Andi

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] perf script: Fix LBR skid dump problems in brstackinsn
  2018-12-06 17:01 ` Arnaldo Carvalho de Melo
  2018-12-06 20:51   ` Andi Kleen
@ 2018-12-07  9:21   ` Adrian Hunter
  1 sibling, 0 replies; 8+ messages in thread
From: Adrian Hunter @ 2018-12-07  9:21 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Milian Wolff
  Cc: Andi Kleen, linux-kernel, jolsa, Andi Kleen

On 6/12/18 7:01 PM, Arnaldo Carvalho de Melo wrote:
> Em Mon, Nov 19, 2018 at 09:06:17PM -0800, Andi Kleen escreveu:
>> From: Andi Kleen <ak@linux.intel.com>
>>
>> This is a fix for another instance of the skid problem Milian
>> recently found [1]
> 
> Milian, have you tested this?
> 
> Adrian, can I have your Reviewed-by or Acked-by?

Acked-by: Adrian Hunter <adrian.hunter@intel.com>

> 
> Thanks,
> 
> - Arnaldo
>  
>> The LBRs don't freeze at the exact same time as the PMI is triggered.
>> The perf script brstackinsn code that dumps LBR assembler
>> assumes that the last branch in the LBR leads to the sample point.
>> But with skid it's possible that the CPU executes one or more branches
>> before the sample, but which do not appear in the LBR.
>>
>> What happens then is either that the sample point is before
>> the last LBR branch. In this case the dumper sees a negative
>> length and ignores it. Or it the sample point is long after
>> the last branch. Then the dumper sees a very long block and dumps
>> it upto its block limit (16k bytes), which is noise in the output.
>>
>> On typical sample session this can happen regularly.
>>
>> This patch tries to detect and handle the situation. On the last
>> block that is dumped by the LBR dumper we always stop on the first
>> branch. If the block length is negative just scan forward to the
>> first branch. Otherwise scan until a branch is found.
>>
>> The PT decoder already has a function that uses the instruction
>> decoder to detect branches, so we can just reuse it here.
>>
>> Then when a terminating branch is found print an indication
>> and stop dumping. This might miss a few instructions, but at least
>> shows no runaway blocks.
>>
>> Cc: milian.wolff@kdab.com
>> Cc: adrian.hunter@intel.com
>> Signed-off-by: Andi Kleen <ak@linux.intel.com>
>> ---
>>  tools/perf/builtin-script.c                    | 18 +++++++++++++++++-
>>  tools/perf/util/dump-insn.c                    |  8 ++++++++
>>  tools/perf/util/dump-insn.h                    |  2 ++
>>  .../intel-pt-decoder/intel-pt-insn-decoder.c   |  8 ++++++++
>>  4 files changed, 35 insertions(+), 1 deletion(-)
>>
>> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
>> index 4da5e32b9e03..11868bf39e66 100644
>> --- a/tools/perf/builtin-script.c
>> +++ b/tools/perf/builtin-script.c
>> @@ -1049,9 +1049,18 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
>>  
>>  	/*
>>  	 * Print final block upto sample
>> +	 *
>> +	 * Due to pipeline delays the LBRs might be missing a branch
>> +	 * or two, which can result in very large or negative blocks
>> +	 * between final branch and sample. When this happens just
>> +	 * continue walking after the last TO until we hit a branch.
>>  	 */
>>  	start = br->entries[0].to;
>>  	end = sample->ip;
>> +	if (end < start) {
>> +		/* Missing jump. Scan 128 bytes for the next branch */
>> +		end = start + 128;
>> +	}
>>  	len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, true);
>>  	printed += ip__fprintf_sym(start, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
>>  	if (len <= 0) {
>> @@ -1060,7 +1069,6 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
>>  			      machine, thread, &x.is64bit, &x.cpumode, false);
>>  		if (len <= 0)
>>  			goto out;
>> -
>>  		printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", sample->ip,
>>  			dump_insn(&x, sample->ip, buffer, len, NULL));
>>  		goto out;
>> @@ -1070,6 +1078,14 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
>>  				   dump_insn(&x, start + off, buffer + off, len - off, &ilen));
>>  		if (ilen == 0)
>>  			break;
>> +		if (arch_is_branch(buffer + off, len - off, x.is64bit) &&
>> +			start + off != sample->ip) {
>> +			/*
>> +			 * Hit a missing branch. Just stop.
>> +			 */
>> +			printed += fprintf(fp, "\t... not reaching sample ...\n");
>> +			break;
>> +		}
>>  	}
>>  out:
>>  	return printed;
>> diff --git a/tools/perf/util/dump-insn.c b/tools/perf/util/dump-insn.c
>> index 10988d3de7ce..2bd8585db93c 100644
>> --- a/tools/perf/util/dump-insn.c
>> +++ b/tools/perf/util/dump-insn.c
>> @@ -13,3 +13,11 @@ const char *dump_insn(struct perf_insn *x __maybe_unused,
>>  		*lenp = 0;
>>  	return "?";
>>  }
>> +
>> +__weak
>> +int arch_is_branch(const unsigned char *buf __maybe_unused,
>> +		   size_t len __maybe_unused,
>> +		   int x86_64 __maybe_unused)
>> +{
>> +	return 0;
>> +}
>> diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h
>> index 0e06280a8860..650125061530 100644
>> --- a/tools/perf/util/dump-insn.h
>> +++ b/tools/perf/util/dump-insn.h
>> @@ -20,4 +20,6 @@ struct perf_insn {
>>  
>>  const char *dump_insn(struct perf_insn *x, u64 ip,
>>  		      u8 *inbuf, int inlen, int *lenp);
>> +int arch_is_branch(const unsigned char *buf, size_t len, int x86_64);
>> +
>>  #endif
>> diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
>> index 54818828023b..1c0e289f01e6 100644
>> --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
>> +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
>> @@ -180,6 +180,14 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
>>  	return 0;
>>  }
>>  
>> +int arch_is_branch(const unsigned char *buf, size_t len, int x86_64)
>> +{
>> +	struct intel_pt_insn in;
>> +	if (intel_pt_get_insn(buf, len, x86_64, &in) < 0)
>> +		return -1;
>> +	return in.branch != INTEL_PT_BR_NO_BRANCH;
>> +}
>> +
>>  const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused,
>>  		      u8 *inbuf, int inlen, int *lenp)
>>  {
>> -- 
>> 2.17.2
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] perf script: Fix LBR skid dump problems in brstackinsn
  2018-12-06 22:52       ` Andi Kleen
@ 2018-12-11 20:47         ` Milian Wolff
  0 siblings, 0 replies; 8+ messages in thread
From: Milian Wolff @ 2018-12-11 20:47 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Arnaldo Carvalho de Melo, Adrian Hunter, linux-kernel, jolsa, Andi Kleen

[-- Attachment #1: Type: text/plain, Size: 1621 bytes --]

On Donnerstag, 6. Dezember 2018 23:52:07 CET Andi Kleen wrote:
> On Thu, Dec 06, 2018 at 06:29:20PM -0300, Arnaldo Carvalho de Melo wrote:
> > Em Thu, Dec 06, 2018 at 12:51:48PM -0800, Andi Kleen escreveu:
> > > On Thu, Dec 06, 2018 at 02:01:40PM -0300, Arnaldo Carvalho de Melo 
wrote:
> > > > Em Mon, Nov 19, 2018 at 09:06:17PM -0800, Andi Kleen escreveu:
> > > > > From: Andi Kleen <ak@linux.intel.com>
> > > > > 
> > > > > This is a fix for another instance of the skid problem Milian
> > > > > recently found [1]
> > 
> > I think you forgot to add the reference, i.e. what is the url or
> > message-id that this [1] refers to?
> 
> Hmm, I thought I saw some patches from Milian for this earlier,
> but now I can't find them. Perhaps I misremember. Milian
> can point to them if they exist and are not just a figment
> of my imagination :-)

I only have very early POC patches, cf.: https://lkml.org/lkml/2018/11/14/608

I've now also pushed that on my WIP branch: https://github.com/milianw/linux/
tree/pebs-callchain-breakage

I haven't had the time since to work on this. The patches as-is are not 
upstreamable. There are some open questions on my side (see mail referenced 
above).

> These were the changes to report the stack frame RIP/RSP in the PEBS
> handler and use it for unwinding in perf.

Yes, I was looking at something different. I've no experience with brstackinsn 
usage in perf, so I can't really add my tested-by.

Cheers

-- 
Milian Wolff | milian.wolff@kdab.com | Senior Software Engineer
KDAB (Deutschland) GmbH, a KDAB Group company
Tel: +49-30-521325470
KDAB - The Qt, C++ and OpenGL Experts

[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 3826 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [tip:perf/urgent] perf script: Fix LBR skid dump problems in brstackinsn
  2018-11-20  5:06 [PATCH] perf script: Fix LBR skid dump problems in brstackinsn Andi Kleen
  2018-12-06 17:01 ` Arnaldo Carvalho de Melo
@ 2019-01-03 13:13 ` tip-bot for Andi Kleen
  1 sibling, 0 replies; 8+ messages in thread
From: tip-bot for Andi Kleen @ 2019-01-03 13:13 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: hpa, tglx, linux-kernel, jolsa, ak, adrian.hunter, acme, mingo,
	milian.wolff

Commit-ID:  61f611593f2c90547cb09c0bf6977414454a27e6
Gitweb:     https://git.kernel.org/tip/61f611593f2c90547cb09c0bf6977414454a27e6
Author:     Andi Kleen <ak@linux.intel.com>
AuthorDate: Mon, 19 Nov 2018 21:06:17 -0800
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Fri, 28 Dec 2018 16:33:02 -0300

perf script: Fix LBR skid dump problems in brstackinsn

This is a fix for another instance of the skid problem Milian recently
found [1]

The LBRs don't freeze at the exact same time as the PMI is triggered.
The perf script brstackinsn code that dumps LBR assembler assumes that
the last branch in the LBR leads to the sample point.  But with skid
it's possible that the CPU executes one or more branches before the
sample, but which do not appear in the LBR.

What happens then is either that the sample point is before the last LBR
branch. In this case the dumper sees a negative length and ignores it.
Or it the sample point is long after the last branch. Then the dumper
sees a very long block and dumps it upto its block limit (16k bytes),
which is noise in the output.

On typical sample session this can happen regularly.

This patch tries to detect and handle the situation. On the last block
that is dumped by the LBR dumper we always stop on the first branch. If
the block length is negative just scan forward to the first branch.
Otherwise scan until a branch is found.

The PT decoder already has a function that uses the instruction decoder
to detect branches, so we can just reuse it here.

Then when a terminating branch is found print an indication and stop
dumping. This might miss a few instructions, but at least shows no
runaway blocks.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Link: http://lkml.kernel.org/r/20181120050617.4119-1-andi@firstfloor.org
[ Resolved conflict with dd2e18e9ac20 ("perf tools: Support 'srccode' output") ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-script.c                             | 17 ++++++++++++++++-
 tools/perf/util/dump-insn.c                             |  8 ++++++++
 tools/perf/util/dump-insn.h                             |  2 ++
 .../perf/util/intel-pt-decoder/intel-pt-insn-decoder.c  |  8 ++++++++
 4 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 3728b50e52e2..88d52ed85ffc 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1073,9 +1073,18 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 
 	/*
 	 * Print final block upto sample
+	 *
+	 * Due to pipeline delays the LBRs might be missing a branch
+	 * or two, which can result in very large or negative blocks
+	 * between final branch and sample. When this happens just
+	 * continue walking after the last TO until we hit a branch.
 	 */
 	start = br->entries[0].to;
 	end = sample->ip;
+	if (end < start) {
+		/* Missing jump. Scan 128 bytes for the next branch */
+		end = start + 128;
+	}
 	len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, true);
 	printed += ip__fprintf_sym(start, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
 	if (len <= 0) {
@@ -1084,7 +1093,6 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 			      machine, thread, &x.is64bit, &x.cpumode, false);
 		if (len <= 0)
 			goto out;
-
 		printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", sample->ip,
 			dump_insn(&x, sample->ip, buffer, len, NULL));
 		if (PRINT_FIELD(SRCCODE))
@@ -1096,6 +1104,13 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 				   dump_insn(&x, start + off, buffer + off, len - off, &ilen));
 		if (ilen == 0)
 			break;
+		if (arch_is_branch(buffer + off, len - off, x.is64bit) && start + off != sample->ip) {
+			/*
+			 * Hit a missing branch. Just stop.
+			 */
+			printed += fprintf(fp, "\t... not reaching sample ...\n");
+			break;
+		}
 		if (PRINT_FIELD(SRCCODE))
 			print_srccode(thread, x.cpumode, start + off);
 	}
diff --git a/tools/perf/util/dump-insn.c b/tools/perf/util/dump-insn.c
index 10988d3de7ce..2bd8585db93c 100644
--- a/tools/perf/util/dump-insn.c
+++ b/tools/perf/util/dump-insn.c
@@ -13,3 +13,11 @@ const char *dump_insn(struct perf_insn *x __maybe_unused,
 		*lenp = 0;
 	return "?";
 }
+
+__weak
+int arch_is_branch(const unsigned char *buf __maybe_unused,
+		   size_t len __maybe_unused,
+		   int x86_64 __maybe_unused)
+{
+	return 0;
+}
diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h
index 0e06280a8860..650125061530 100644
--- a/tools/perf/util/dump-insn.h
+++ b/tools/perf/util/dump-insn.h
@@ -20,4 +20,6 @@ struct perf_insn {
 
 const char *dump_insn(struct perf_insn *x, u64 ip,
 		      u8 *inbuf, int inlen, int *lenp);
+int arch_is_branch(const unsigned char *buf, size_t len, int x86_64);
+
 #endif
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index 54818828023b..1c0e289f01e6 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -180,6 +180,14 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
 	return 0;
 }
 
+int arch_is_branch(const unsigned char *buf, size_t len, int x86_64)
+{
+	struct intel_pt_insn in;
+	if (intel_pt_get_insn(buf, len, x86_64, &in) < 0)
+		return -1;
+	return in.branch != INTEL_PT_BR_NO_BRANCH;
+}
+
 const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused,
 		      u8 *inbuf, int inlen, int *lenp)
 {

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2019-01-03 13:13 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-20  5:06 [PATCH] perf script: Fix LBR skid dump problems in brstackinsn Andi Kleen
2018-12-06 17:01 ` Arnaldo Carvalho de Melo
2018-12-06 20:51   ` Andi Kleen
2018-12-06 21:29     ` Arnaldo Carvalho de Melo
2018-12-06 22:52       ` Andi Kleen
2018-12-11 20:47         ` Milian Wolff
2018-12-07  9:21   ` Adrian Hunter
2019-01-03 13:13 ` [tip:perf/urgent] " tip-bot for Andi Kleen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).