linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] perf intel-pt-decoder: Report instruction bytes and length in sample
@ 2016-10-04  0:30 Andi Kleen
  2016-10-04  0:30 ` [PATCH 2/2] perf tools: Support insn and insnlen in perf script Andi Kleen
  2016-10-05 11:36 ` [PATCH 1/2] perf intel-pt-decoder: Report instruction bytes and length in sample Arnaldo Carvalho de Melo
  0 siblings, 2 replies; 7+ messages in thread
From: Andi Kleen @ 2016-10-04  0:30 UTC (permalink / raw)
  To: acme; +Cc: linux-kernel, adrian.hunter, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Change the Intel PT decoder to pass up the length and the instruction
bytes of the decoded or sampled instruction in the perf sample.

The decoder already knows this information, we just need to pass it
up. Since it is only a couple of movs it is not very expensive.

Used in the next patch.

v2: Handle instruction cache too. Make sure ilen is always initialized.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/perf/util/event.h                                  |  3 +++
 tools/perf/util/intel-pt-decoder/intel-pt-decoder.c      |  2 ++
 tools/perf/util/intel-pt-decoder/intel-pt-decoder.h      |  3 +++
 tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h |  2 +-
 tools/perf/util/intel-pt.c                               | 10 ++++++++++
 5 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 8d363d5e65a2..c735c53a26f8 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -177,6 +177,8 @@ enum {
 	PERF_IP_FLAG_TRACE_BEGIN	|\
 	PERF_IP_FLAG_TRACE_END)
 
+#define MAX_INSN 16
+
 struct perf_sample {
 	u64 ip;
 	u32 pid, tid;
@@ -193,6 +195,7 @@ struct perf_sample {
 	u32 flags;
 	u16 insn_len;
 	u8  cpumode;
+	char insn[MAX_INSN];
 	void *raw_data;
 	struct ip_callchain *callchain;
 	struct branch_stack *branch_stack;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 8ff6c6a61291..8a5e21abb790 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -949,6 +949,8 @@ out:
 out_no_progress:
 	decoder->state.insn_op = intel_pt_insn->op;
 	decoder->state.insn_len = intel_pt_insn->length;
+	memcpy(decoder->state.insn, intel_pt_insn->buf,
+	       sizeof(decoder->state.insn));
 
 	if (decoder->tx_flags & INTEL_PT_IN_TX)
 		decoder->state.flags |= INTEL_PT_IN_TX;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index 02c38fec1c37..fbd7d08d97d5 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -20,6 +20,8 @@
 #include <stddef.h>
 #include <stdbool.h>
 
+#define MAX_INSN			16
+
 #include "intel-pt-insn-decoder.h"
 
 #define INTEL_PT_IN_TX		(1 << 0)
@@ -66,6 +68,7 @@ struct intel_pt_state {
 	uint32_t flags;
 	enum intel_pt_insn_op insn_op;
 	int insn_len;
+	char insn[MAX_INSN];
 };
 
 struct intel_pt_insn;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
index b0adbf37323e..47e196dec224 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
@@ -20,7 +20,7 @@
 #include <stdint.h>
 
 #define INTEL_PT_INSN_DESC_MAX		32
-#define INTEL_PT_INSN_DBG_BUF_SZ	16
+#define INTEL_PT_INSN_DBG_BUF_SZ	16 /* Must be >= MAX_INSN */
 
 enum intel_pt_insn_op {
 	INTEL_PT_OP_OTHER,
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index b9cc353cace2..4b9d0086a383 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -140,6 +140,7 @@ struct intel_pt_queue {
 	u32 flags;
 	u16 insn_len;
 	u64 last_insn_cnt;
+	char insn[MAX_INSN];
 };
 
 static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
@@ -305,6 +306,7 @@ struct intel_pt_cache_entry {
 	enum intel_pt_insn_branch	branch;
 	int				length;
 	int32_t				rel;
+	char				insn[MAX_INSN];
 };
 
 static int intel_pt_config_div(const char *var, const char *value, void *data)
@@ -390,6 +392,7 @@ static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
 	e->branch = intel_pt_insn->branch;
 	e->length = intel_pt_insn->length;
 	e->rel = intel_pt_insn->rel;
+	memcpy(e->insn, intel_pt_insn->buf, MAX_INSN);
 
 	err = auxtrace_cache__add(c, offset, &e->entry);
 	if (err)
@@ -427,6 +430,8 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
 	u64 insn_cnt = 0;
 	bool one_map = true;
 
+	intel_pt_insn->length = 0;
+
 	if (to_ip && *ip == to_ip)
 		goto out_no_cache;
 
@@ -468,6 +473,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
 				intel_pt_insn->branch = e->branch;
 				intel_pt_insn->length = e->length;
 				intel_pt_insn->rel = e->rel;
+				memcpy(intel_pt_insn->buf, e->insn, MAX_INSN);
 				intel_pt_log_insn_no_data(intel_pt_insn, *ip);
 				return 0;
 			}
@@ -817,6 +823,7 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
 		if (ptq->state->flags & INTEL_PT_IN_TX)
 			ptq->flags |= PERF_IP_FLAG_IN_TX;
 		ptq->insn_len = ptq->state->insn_len;
+		memcpy(ptq->insn, ptq->state->insn, MAX_INSN);
 	}
 }
 
@@ -997,6 +1004,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
 	sample.cpu = ptq->cpu;
 	sample.flags = ptq->flags;
 	sample.insn_len = ptq->insn_len;
+	memcpy(sample.insn, ptq->insn, MAX_INSN);
 
 	/*
 	 * perf report cannot handle events without a branch stack when using
@@ -1058,6 +1066,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
 	sample.cpu = ptq->cpu;
 	sample.flags = ptq->flags;
 	sample.insn_len = ptq->insn_len;
+	memcpy(sample.insn, ptq->insn, MAX_INSN);
 
 	ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
 
@@ -1120,6 +1129,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
 	sample.cpu = ptq->cpu;
 	sample.flags = ptq->flags;
 	sample.insn_len = ptq->insn_len;
+	memcpy(sample.insn, ptq->insn, MAX_INSN);
 
 	if (pt->synth_opts.callchain) {
 		thread_stack__sample(ptq->thread, ptq->chain,
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/2] perf tools: Support insn and insnlen in perf script
  2016-10-04  0:30 [PATCH 1/2] perf intel-pt-decoder: Report instruction bytes and length in sample Andi Kleen
@ 2016-10-04  0:30 ` Andi Kleen
  2016-10-05 11:36 ` [PATCH 1/2] perf intel-pt-decoder: Report instruction bytes and length in sample Arnaldo Carvalho de Melo
  1 sibling, 0 replies; 7+ messages in thread
From: Andi Kleen @ 2016-10-04  0:30 UTC (permalink / raw)
  To: acme; +Cc: linux-kernel, adrian.hunter, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

When looking at Intel PT traces with perf script it is useful to have
some indication of the instruction. Dump the instruction bytes and
instruction length, which can be used for simple pattern analysis in
scripts.

% perf record -e intel_pt// foo
% perf script --itrace=i0ns -F ip,insn,insnlen
 ffffffff8101232f ilen: 5 insn: 0f 1f 44 00 00
 ffffffff81012334 ilen: 1 insn: 5b
 ffffffff81012335 ilen: 1 insn: 5d
 ffffffff81012336 ilen: 1 insn: c3
 ffffffff810123e3 ilen: 1 insn: 5b
 ffffffff810123e4 ilen: 2 insn: 41 5c
 ffffffff810123e6 ilen: 1 insn: 5d
 ffffffff810123e7 ilen: 1 insn: c3
 ffffffff810124a6 ilen: 2 insn: 31 c0
 ffffffff810124a8 ilen: 9 insn: 41 83 bc 24 a8 01 00 00 01
 ffffffff810124b1 ilen: 2 insn: 75 87
...

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/perf/Documentation/perf-script.txt |  6 +++++-
 tools/perf/builtin-script.c              | 25 +++++++++++++++++++++++--
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 053bbbd84ece..c01904f388ce 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -117,7 +117,7 @@ OPTIONS
         Comma separated list of fields to print. Options are:
         comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
         srcline, period, iregs, brstack, brstacksym, flags, bpf-output,
-        callindent. Field list can be prepended with the type, trace, sw or hw,
+        callindent, insn, insnlen. Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
         e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
 
@@ -181,6 +181,10 @@ OPTIONS
 	Instruction Trace decoding. For calls and returns, it will display the
 	name of the symbol indented with spaces to reflect the stack depth.
 
+	When doing instruction trace decoding insn and insnlen give the
+	instruction bytes and the instruction length of the current
+	instruction.
+
 	Finally, a user may not set fields to none for all event types.
 	i.e., -F "" is not allowed.
 
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 7228d141a789..11cf75d5dbda 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -66,6 +66,8 @@ enum perf_output_field {
 	PERF_OUTPUT_WEIGHT	    = 1U << 18,
 	PERF_OUTPUT_BPF_OUTPUT	    = 1U << 19,
 	PERF_OUTPUT_CALLINDENT	    = 1U << 20,
+	PERF_OUTPUT_INSN	    = 1U << 21,
+	PERF_OUTPUT_INSNLEN	    = 1U << 22,
 };
 
 struct output_option {
@@ -93,6 +95,8 @@ struct output_option {
 	{.str = "weight",   .field = PERF_OUTPUT_WEIGHT},
 	{.str = "bpf-output",   .field = PERF_OUTPUT_BPF_OUTPUT},
 	{.str = "callindent", .field = PERF_OUTPUT_CALLINDENT},
+	{.str = "insn", .field = PERF_OUTPUT_INSN},
+	{.str = "insnlen", .field = PERF_OUTPUT_INSNLEN},
 };
 
 /* default set to maintain compatibility with current format */
@@ -624,6 +628,21 @@ static void print_sample_callindent(struct perf_sample *sample,
 		printf("%*s", spacing - len, "");
 }
 
+
+static void print_insn(struct perf_sample *sample,
+		       struct perf_event_attr *attr)
+{
+	if (PRINT_FIELD(INSNLEN))
+		printf(" ilen: %d", sample->insn_len);
+	if (PRINT_FIELD(INSN)) {
+		int i;
+
+		printf(" insn:");
+		for (i = 0; i < sample->insn_len; i++)
+			printf(" %02x", (unsigned char)sample->insn[i]);
+	}
+}
+
 static void print_sample_bts(struct perf_sample *sample,
 			     struct perf_evsel *evsel,
 			     struct thread *thread,
@@ -668,6 +687,8 @@ static void print_sample_bts(struct perf_sample *sample,
 	if (print_srcline_last)
 		map__fprintf_srcline(al->map, al->addr, "\n  ", stdout);
 
+	print_insn(sample, attr);
+
 	printf("\n");
 }
 
@@ -911,7 +932,7 @@ static void process_event(struct perf_script *script,
 
 	if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
 		print_sample_bpf_output(sample);
-
+	print_insn(sample, attr);
 	printf("\n");
 }
 
@@ -2124,7 +2145,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 		     "Valid types: hw,sw,trace,raw. "
 		     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
 		     "addr,symoff,period,iregs,brstack,brstacksym,flags,"
-		     "bpf-output,callindent", parse_output_fields),
+		     "bpf-output,callindent,insn,insnlen", parse_output_fields),
 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
 		    "system-wide collection from all CPUs"),
 	OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] perf intel-pt-decoder: Report instruction bytes and length in sample
  2016-10-04  0:30 [PATCH 1/2] perf intel-pt-decoder: Report instruction bytes and length in sample Andi Kleen
  2016-10-04  0:30 ` [PATCH 2/2] perf tools: Support insn and insnlen in perf script Andi Kleen
@ 2016-10-05 11:36 ` Arnaldo Carvalho de Melo
  2016-10-07 13:45   ` Adrian Hunter
  1 sibling, 1 reply; 7+ messages in thread
From: Arnaldo Carvalho de Melo @ 2016-10-05 11:36 UTC (permalink / raw)
  To: Adrian Hunter; +Cc: Andi Kleen, linux-kernel, Andi Kleen

Em Mon, Oct 03, 2016 at 05:30:32PM -0700, Andi Kleen escreveu:
> From: Andi Kleen <ak@linux.intel.com>
> 
> Change the Intel PT decoder to pass up the length and the instruction
> bytes of the decoded or sampled instruction in the perf sample.
> 
> The decoder already knows this information, we just need to pass it
> up. Since it is only a couple of movs it is not very expensive.
> 
> Used in the next patch.

Adrian, Ack?

- Arnaldo
 
> v2: Handle instruction cache too. Make sure ilen is always initialized.
> Signed-off-by: Andi Kleen <ak@linux.intel.com>
> ---
>  tools/perf/util/event.h                                  |  3 +++
>  tools/perf/util/intel-pt-decoder/intel-pt-decoder.c      |  2 ++
>  tools/perf/util/intel-pt-decoder/intel-pt-decoder.h      |  3 +++
>  tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h |  2 +-
>  tools/perf/util/intel-pt.c                               | 10 ++++++++++
>  5 files changed, 19 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
> index 8d363d5e65a2..c735c53a26f8 100644
> --- a/tools/perf/util/event.h
> +++ b/tools/perf/util/event.h
> @@ -177,6 +177,8 @@ enum {
>  	PERF_IP_FLAG_TRACE_BEGIN	|\
>  	PERF_IP_FLAG_TRACE_END)
>  
> +#define MAX_INSN 16
> +
>  struct perf_sample {
>  	u64 ip;
>  	u32 pid, tid;
> @@ -193,6 +195,7 @@ struct perf_sample {
>  	u32 flags;
>  	u16 insn_len;
>  	u8  cpumode;
> +	char insn[MAX_INSN];
>  	void *raw_data;
>  	struct ip_callchain *callchain;
>  	struct branch_stack *branch_stack;
> diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
> index 8ff6c6a61291..8a5e21abb790 100644
> --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
> +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
> @@ -949,6 +949,8 @@ out:
>  out_no_progress:
>  	decoder->state.insn_op = intel_pt_insn->op;
>  	decoder->state.insn_len = intel_pt_insn->length;
> +	memcpy(decoder->state.insn, intel_pt_insn->buf,
> +	       sizeof(decoder->state.insn));
>  
>  	if (decoder->tx_flags & INTEL_PT_IN_TX)
>  		decoder->state.flags |= INTEL_PT_IN_TX;
> diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
> index 02c38fec1c37..fbd7d08d97d5 100644
> --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
> +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
> @@ -20,6 +20,8 @@
>  #include <stddef.h>
>  #include <stdbool.h>
>  
> +#define MAX_INSN			16
> +
>  #include "intel-pt-insn-decoder.h"
>  
>  #define INTEL_PT_IN_TX		(1 << 0)
> @@ -66,6 +68,7 @@ struct intel_pt_state {
>  	uint32_t flags;
>  	enum intel_pt_insn_op insn_op;
>  	int insn_len;
> +	char insn[MAX_INSN];
>  };
>  
>  struct intel_pt_insn;
> diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
> index b0adbf37323e..47e196dec224 100644
> --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
> +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
> @@ -20,7 +20,7 @@
>  #include <stdint.h>
>  
>  #define INTEL_PT_INSN_DESC_MAX		32
> -#define INTEL_PT_INSN_DBG_BUF_SZ	16
> +#define INTEL_PT_INSN_DBG_BUF_SZ	16 /* Must be >= MAX_INSN */
>  
>  enum intel_pt_insn_op {
>  	INTEL_PT_OP_OTHER,
> diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
> index b9cc353cace2..4b9d0086a383 100644
> --- a/tools/perf/util/intel-pt.c
> +++ b/tools/perf/util/intel-pt.c
> @@ -140,6 +140,7 @@ struct intel_pt_queue {
>  	u32 flags;
>  	u16 insn_len;
>  	u64 last_insn_cnt;
> +	char insn[MAX_INSN];
>  };
>  
>  static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
> @@ -305,6 +306,7 @@ struct intel_pt_cache_entry {
>  	enum intel_pt_insn_branch	branch;
>  	int				length;
>  	int32_t				rel;
> +	char				insn[MAX_INSN];
>  };
>  
>  static int intel_pt_config_div(const char *var, const char *value, void *data)
> @@ -390,6 +392,7 @@ static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
>  	e->branch = intel_pt_insn->branch;
>  	e->length = intel_pt_insn->length;
>  	e->rel = intel_pt_insn->rel;
> +	memcpy(e->insn, intel_pt_insn->buf, MAX_INSN);
>  
>  	err = auxtrace_cache__add(c, offset, &e->entry);
>  	if (err)
> @@ -427,6 +430,8 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
>  	u64 insn_cnt = 0;
>  	bool one_map = true;
>  
> +	intel_pt_insn->length = 0;
> +
>  	if (to_ip && *ip == to_ip)
>  		goto out_no_cache;
>  
> @@ -468,6 +473,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
>  				intel_pt_insn->branch = e->branch;
>  				intel_pt_insn->length = e->length;
>  				intel_pt_insn->rel = e->rel;
> +				memcpy(intel_pt_insn->buf, e->insn, MAX_INSN);
>  				intel_pt_log_insn_no_data(intel_pt_insn, *ip);
>  				return 0;
>  			}
> @@ -817,6 +823,7 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
>  		if (ptq->state->flags & INTEL_PT_IN_TX)
>  			ptq->flags |= PERF_IP_FLAG_IN_TX;
>  		ptq->insn_len = ptq->state->insn_len;
> +		memcpy(ptq->insn, ptq->state->insn, MAX_INSN);
>  	}
>  }
>  
> @@ -997,6 +1004,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
>  	sample.cpu = ptq->cpu;
>  	sample.flags = ptq->flags;
>  	sample.insn_len = ptq->insn_len;
> +	memcpy(sample.insn, ptq->insn, MAX_INSN);
>  
>  	/*
>  	 * perf report cannot handle events without a branch stack when using
> @@ -1058,6 +1066,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
>  	sample.cpu = ptq->cpu;
>  	sample.flags = ptq->flags;
>  	sample.insn_len = ptq->insn_len;
> +	memcpy(sample.insn, ptq->insn, MAX_INSN);
>  
>  	ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
>  
> @@ -1120,6 +1129,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
>  	sample.cpu = ptq->cpu;
>  	sample.flags = ptq->flags;
>  	sample.insn_len = ptq->insn_len;
> +	memcpy(sample.insn, ptq->insn, MAX_INSN);
>  
>  	if (pt->synth_opts.callchain) {
>  		thread_stack__sample(ptq->thread, ptq->chain,
> -- 
> 2.5.5

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] perf intel-pt-decoder: Report instruction bytes and length in sample
  2016-10-05 11:36 ` [PATCH 1/2] perf intel-pt-decoder: Report instruction bytes and length in sample Arnaldo Carvalho de Melo
@ 2016-10-07 13:45   ` Adrian Hunter
  0 siblings, 0 replies; 7+ messages in thread
From: Adrian Hunter @ 2016-10-07 13:45 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo; +Cc: Andi Kleen, linux-kernel, Andi Kleen

On 05/10/16 14:36, Arnaldo Carvalho de Melo wrote:
> Em Mon, Oct 03, 2016 at 05:30:32PM -0700, Andi Kleen escreveu:
>> From: Andi Kleen <ak@linux.intel.com>
>>
>> Change the Intel PT decoder to pass up the length and the instruction
>> bytes of the decoded or sampled instruction in the perf sample.
>>
>> The decoder already knows this information, we just need to pass it
>> up. Since it is only a couple of movs it is not very expensive.
>>
>> Used in the next patch.
> 
> Adrian, Ack?

I did a little tidy-up in Intel PT / BTS and re-based Andi's patch with my
SOB.  Please see V3 patch I sent.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] perf intel-pt-decoder: Report instruction bytes and length in sample
  2016-09-30 10:07 ` Adrian Hunter
@ 2016-10-04  0:33   ` Andi Kleen
  0 siblings, 0 replies; 7+ messages in thread
From: Andi Kleen @ 2016-10-04  0:33 UTC (permalink / raw)
  To: Adrian Hunter; +Cc: Andi Kleen, acme, linux-kernel, Andi Kleen

On Fri, Sep 30, 2016 at 01:07:17PM +0300, Adrian Hunter wrote:
> On 30/09/16 06:49, Andi Kleen wrote:
> > From: Andi Kleen <ak@linux.intel.com>
> > 
> > Change the Intel PT decoder to pass up the length and the instruction
> > bytes of the decoded or sampled instruction in the perf sample.
> > 
> > The decoder already knows this information, we just need to pass it
> > up. Since it is only a couple of movs it is not very expensive.
> 
> The decoder doesn't always fill the instruction buffer because it caches the
> results from walking basic blocks.  That means the bytes from the last
> instruction in the basic block would need to be added to the cache.

I fixed this and posted a new version.
> 
> I wonder if we shouldn't look at something more sophisticated e.g. a python
> script that can do full disassembly.

I had an earlier version that used a disassembler library, but that
wasn't appreciated, so now switched to this simpler method.
Assembler output can be done with a sed + objdump hack now.

-Andi

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] perf intel-pt-decoder: Report instruction bytes and length in sample
  2016-09-30  3:49 Andi Kleen
@ 2016-09-30 10:07 ` Adrian Hunter
  2016-10-04  0:33   ` Andi Kleen
  0 siblings, 1 reply; 7+ messages in thread
From: Adrian Hunter @ 2016-09-30 10:07 UTC (permalink / raw)
  To: Andi Kleen, acme; +Cc: linux-kernel, Andi Kleen

On 30/09/16 06:49, Andi Kleen wrote:
> From: Andi Kleen <ak@linux.intel.com>
> 
> Change the Intel PT decoder to pass up the length and the instruction
> bytes of the decoded or sampled instruction in the perf sample.
> 
> The decoder already knows this information, we just need to pass it
> up. Since it is only a couple of movs it is not very expensive.

The decoder doesn't always fill the instruction buffer because it caches the
results from walking basic blocks.  That means the bytes from the last
instruction in the basic block would need to be added to the cache.

I wonder if we shouldn't look at something more sophisticated e.g. a python
script that can do full disassembly.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/2] perf intel-pt-decoder: Report instruction bytes and length in sample
@ 2016-09-30  3:49 Andi Kleen
  2016-09-30 10:07 ` Adrian Hunter
  0 siblings, 1 reply; 7+ messages in thread
From: Andi Kleen @ 2016-09-30  3:49 UTC (permalink / raw)
  To: acme; +Cc: adrian.hunter, linux-kernel, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Change the Intel PT decoder to pass up the length and the instruction
bytes of the decoded or sampled instruction in the perf sample.

The decoder already knows this information, we just need to pass it
up. Since it is only a couple of movs it is not very expensive.

Used in the next patch.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/perf/util/event.h                                  | 3 +++
 tools/perf/util/intel-pt-decoder/intel-pt-decoder.c      | 2 ++
 tools/perf/util/intel-pt-decoder/intel-pt-decoder.h      | 3 +++
 tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h | 2 +-
 tools/perf/util/intel-pt.c                               | 5 +++++
 5 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 8d363d5e65a2..c735c53a26f8 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -177,6 +177,8 @@ enum {
 	PERF_IP_FLAG_TRACE_BEGIN	|\
 	PERF_IP_FLAG_TRACE_END)
 
+#define MAX_INSN 16
+
 struct perf_sample {
 	u64 ip;
 	u32 pid, tid;
@@ -193,6 +195,7 @@ struct perf_sample {
 	u32 flags;
 	u16 insn_len;
 	u8  cpumode;
+	char insn[MAX_INSN];
 	void *raw_data;
 	struct ip_callchain *callchain;
 	struct branch_stack *branch_stack;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 8ff6c6a61291..8a5e21abb790 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -949,6 +949,8 @@ out:
 out_no_progress:
 	decoder->state.insn_op = intel_pt_insn->op;
 	decoder->state.insn_len = intel_pt_insn->length;
+	memcpy(decoder->state.insn, intel_pt_insn->buf,
+	       sizeof(decoder->state.insn));
 
 	if (decoder->tx_flags & INTEL_PT_IN_TX)
 		decoder->state.flags |= INTEL_PT_IN_TX;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index 02c38fec1c37..fbd7d08d97d5 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -20,6 +20,8 @@
 #include <stddef.h>
 #include <stdbool.h>
 
+#define MAX_INSN			16
+
 #include "intel-pt-insn-decoder.h"
 
 #define INTEL_PT_IN_TX		(1 << 0)
@@ -66,6 +68,7 @@ struct intel_pt_state {
 	uint32_t flags;
 	enum intel_pt_insn_op insn_op;
 	int insn_len;
+	char insn[MAX_INSN];
 };
 
 struct intel_pt_insn;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
index b0adbf37323e..47e196dec224 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
@@ -20,7 +20,7 @@
 #include <stdint.h>
 
 #define INTEL_PT_INSN_DESC_MAX		32
-#define INTEL_PT_INSN_DBG_BUF_SZ	16
+#define INTEL_PT_INSN_DBG_BUF_SZ	16 /* Must be >= MAX_INSN */
 
 enum intel_pt_insn_op {
 	INTEL_PT_OP_OTHER,
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index b9cc353cace2..363eba09c609 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -140,6 +140,7 @@ struct intel_pt_queue {
 	u32 flags;
 	u16 insn_len;
 	u64 last_insn_cnt;
+	char insn[MAX_INSN];
 };
 
 static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
@@ -817,6 +818,7 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
 		if (ptq->state->flags & INTEL_PT_IN_TX)
 			ptq->flags |= PERF_IP_FLAG_IN_TX;
 		ptq->insn_len = ptq->state->insn_len;
+		memcpy(ptq->insn, ptq->state->insn, MAX_INSN);
 	}
 }
 
@@ -997,6 +999,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
 	sample.cpu = ptq->cpu;
 	sample.flags = ptq->flags;
 	sample.insn_len = ptq->insn_len;
+	memcpy(sample.insn, ptq->insn, MAX_INSN);
 
 	/*
 	 * perf report cannot handle events without a branch stack when using
@@ -1058,6 +1061,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
 	sample.cpu = ptq->cpu;
 	sample.flags = ptq->flags;
 	sample.insn_len = ptq->insn_len;
+	memcpy(sample.insn, ptq->insn, MAX_INSN);
 
 	ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
 
@@ -1120,6 +1124,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
 	sample.cpu = ptq->cpu;
 	sample.flags = ptq->flags;
 	sample.insn_len = ptq->insn_len;
+	memcpy(sample.insn, ptq->insn, MAX_INSN);
 
 	if (pt->synth_opts.callchain) {
 		thread_stack__sample(ptq->thread, ptq->chain,
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-10-07 13:50 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-10-04  0:30 [PATCH 1/2] perf intel-pt-decoder: Report instruction bytes and length in sample Andi Kleen
2016-10-04  0:30 ` [PATCH 2/2] perf tools: Support insn and insnlen in perf script Andi Kleen
2016-10-05 11:36 ` [PATCH 1/2] perf intel-pt-decoder: Report instruction bytes and length in sample Arnaldo Carvalho de Melo
2016-10-07 13:45   ` Adrian Hunter
  -- strict thread matches above, loose matches on Subject: below --
2016-09-30  3:49 Andi Kleen
2016-09-30 10:07 ` Adrian Hunter
2016-10-04  0:33   ` Andi Kleen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).