linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] perf cs-etm: Refactor timestamp variable names
@ 2021-04-14 14:39 James Clark
  2021-04-14 14:39 ` [PATCH 2/2] perf cs-etm: Set time on synthesised samples to preserve ordering James Clark
  2021-04-15 19:46 ` [PATCH 1/2] perf cs-etm: Refactor timestamp variable names Mathieu Poirier
  0 siblings, 2 replies; 12+ messages in thread
From: James Clark @ 2021-04-14 14:39 UTC (permalink / raw)
  To: coresight
  Cc: al.grant, branislav.rankov, denik, suzuki.poulose, James Clark,
	Mike Leach, Leo Yan, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Namhyung Kim, John Garry, Will Deacon, Mathieu Poirier,
	linux-arm-kernel, linux-perf-users, linux-kernel

Remove ambiguity in variable names relating to timestamps.
A later commit will save the sample kernel timestamp in one
of the etm structs, so name all elements appropriately to
avoid confusion.

This is also removes some ambiguity arising from the fact
that the --timestamp argument to perf record refers to
sample kernel timestamps, and the /timestamp/ event modifier
refers to etm timestamps, so the term is overloaded.

Signed-off-by: James Clark <james.clark@arm.com>
---
 .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 18 ++++----
 tools/perf/util/cs-etm.c                      | 42 +++++++++----------
 tools/perf/util/cs-etm.h                      |  4 +-
 3 files changed, 31 insertions(+), 33 deletions(-)

diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 059bcec3f651..055cb93eca59 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -276,13 +276,13 @@ cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq,
 				  const uint8_t trace_chan_id)
 {
 	/* No timestamp packet has been received, nothing to do */
-	if (!packet_queue->timestamp)
+	if (!packet_queue->etm_timestamp)
 		return OCSD_RESP_CONT;
 
-	packet_queue->timestamp = packet_queue->next_timestamp;
+	packet_queue->etm_timestamp = packet_queue->next_etm_timestamp;
 
 	/* Estimate the timestamp for the next range packet */
-	packet_queue->next_timestamp += packet_queue->instr_count;
+	packet_queue->next_etm_timestamp += packet_queue->instr_count;
 	packet_queue->instr_count = 0;
 
 	/* Tell the front end which traceid_queue needs attention */
@@ -308,8 +308,8 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
 	 * Function do_soft_timestamp() will report the value to the front end,
 	 * hence asking the decoder to keep decoding rather than stopping.
 	 */
-	if (packet_queue->timestamp) {
-		packet_queue->next_timestamp = elem->timestamp;
+	if (packet_queue->etm_timestamp) {
+		packet_queue->next_etm_timestamp = elem->timestamp;
 		return OCSD_RESP_CONT;
 	}
 
@@ -320,8 +320,8 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
 	 * which instructions started by subtracting the number of instructions
 	 * executed to the timestamp.
 	 */
-	packet_queue->timestamp = elem->timestamp - packet_queue->instr_count;
-	packet_queue->next_timestamp = elem->timestamp;
+	packet_queue->etm_timestamp = elem->timestamp - packet_queue->instr_count;
+	packet_queue->next_etm_timestamp = elem->timestamp;
 	packet_queue->instr_count = 0;
 
 	/* Tell the front end which traceid_queue needs attention */
@@ -334,8 +334,8 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
 static void
 cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue)
 {
-	packet_queue->timestamp = 0;
-	packet_queue->next_timestamp = 0;
+	packet_queue->etm_timestamp = 0;
+	packet_queue->next_etm_timestamp = 0;
 	packet_queue->instr_count = 0;
 }
 
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 7e63e7dedc33..c25da2ffa8f3 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -38,8 +38,6 @@
 #include <tools/libc_compat.h>
 #include "util/synthetic-events.h"
 
-#define MAX_TIMESTAMP (~0ULL)
-
 struct cs_etm_auxtrace {
 	struct auxtrace auxtrace;
 	struct auxtrace_queues queues;
@@ -86,7 +84,7 @@ struct cs_etm_queue {
 	struct cs_etm_decoder *decoder;
 	struct auxtrace_buffer *buffer;
 	unsigned int queue_nr;
-	u8 pending_timestamp;
+	u8 pending_timestamp_chan_id;
 	u64 offset;
 	const unsigned char *buf;
 	size_t buf_len, buf_used;
@@ -208,7 +206,7 @@ void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
 	 * be more than one channel per cs_etm_queue, we need to specify
 	 * what traceID queue needs servicing.
 	 */
-	etmq->pending_timestamp = trace_chan_id;
+	etmq->pending_timestamp_chan_id = trace_chan_id;
 }
 
 static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
@@ -216,22 +214,22 @@ static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
 {
 	struct cs_etm_packet_queue *packet_queue;
 
-	if (!etmq->pending_timestamp)
+	if (!etmq->pending_timestamp_chan_id)
 		return 0;
 
 	if (trace_chan_id)
-		*trace_chan_id = etmq->pending_timestamp;
+		*trace_chan_id = etmq->pending_timestamp_chan_id;
 
 	packet_queue = cs_etm__etmq_get_packet_queue(etmq,
-						     etmq->pending_timestamp);
+						     etmq->pending_timestamp_chan_id);
 	if (!packet_queue)
 		return 0;
 
 	/* Acknowledge pending status */
-	etmq->pending_timestamp = 0;
+	etmq->pending_timestamp_chan_id = 0;
 
 	/* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
-	return packet_queue->timestamp;
+	return packet_queue->etm_timestamp;
 }
 
 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
@@ -814,7 +812,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
 	int ret = 0;
 	unsigned int cs_queue_nr;
 	u8 trace_chan_id;
-	u64 timestamp;
+	u64 etm_timestamp;
 	struct cs_etm_queue *etmq = queue->priv;
 
 	if (list_empty(&queue->head) || etmq)
@@ -854,7 +852,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
 
 		/*
 		 * Run decoder on the trace block.  The decoder will stop when
-		 * encountering a timestamp, a full packet queue or the end of
+		 * encountering an ETM timestamp, a full packet queue or the end of
 		 * trace for that block.
 		 */
 		ret = cs_etm__decode_data_block(etmq);
@@ -865,10 +863,10 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
 		 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
 		 * the timestamp calculation for us.
 		 */
-		timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
+		etm_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
 
 		/* We found a timestamp, no need to continue. */
-		if (timestamp)
+		if (etm_timestamp)
 			break;
 
 		/*
@@ -892,7 +890,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
 	 * queue and will be processed in cs_etm__process_queues().
 	 */
 	cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
-	ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp);
+	ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, etm_timestamp);
 out:
 	return ret;
 }
@@ -2221,7 +2219,7 @@ static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
 	int ret = 0;
 	unsigned int cs_queue_nr, queue_nr;
 	u8 trace_chan_id;
-	u64 timestamp;
+	u64 etm_timestamp;
 	struct auxtrace_queue *queue;
 	struct cs_etm_queue *etmq;
 	struct cs_etm_traceid_queue *tidq;
@@ -2283,9 +2281,9 @@ static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
 		if (ret)
 			goto out;
 
-		timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
+		etm_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
 
-		if (!timestamp) {
+		if (!etm_timestamp) {
 			/*
 			 * Function cs_etm__decode_data_block() returns when
 			 * there is no more traces to decode in the current
@@ -2308,7 +2306,7 @@ static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
 		 * this queue/traceID.
 		 */
 		cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
-		ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp);
+		ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, etm_timestamp);
 	}
 
 out:
@@ -2380,7 +2378,7 @@ static int cs_etm__process_event(struct perf_session *session,
 				 struct perf_tool *tool)
 {
 	int err = 0;
-	u64 timestamp;
+	u64 sample_kernel_timestamp;
 	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
 						   struct cs_etm_auxtrace,
 						   auxtrace);
@@ -2394,11 +2392,11 @@ static int cs_etm__process_event(struct perf_session *session,
 	}
 
 	if (sample->time && (sample->time != (u64) -1))
-		timestamp = sample->time;
+		sample_kernel_timestamp = sample->time;
 	else
-		timestamp = 0;
+		sample_kernel_timestamp = 0;
 
-	if (timestamp || etm->timeless_decoding) {
+	if (sample_kernel_timestamp || etm->timeless_decoding) {
 		err = cs_etm__update_queues(etm);
 		if (err)
 			return err;
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index 36428918411e..b300f6fa19cc 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -171,8 +171,8 @@ struct cs_etm_packet_queue {
 	u32 head;
 	u32 tail;
 	u32 instr_count;
-	u64 timestamp;
-	u64 next_timestamp;
+	u64 etm_timestamp;
+	u64 next_etm_timestamp;
 	struct cs_etm_packet packet_buffer[CS_ETM_PACKET_MAX_BUFFER];
 };
 
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/2] perf cs-etm: Set time on synthesised samples to preserve ordering
  2021-04-14 14:39 [PATCH 1/2] perf cs-etm: Refactor timestamp variable names James Clark
@ 2021-04-14 14:39 ` James Clark
  2021-04-14 14:41   ` James Clark
                     ` (3 more replies)
  2021-04-15 19:46 ` [PATCH 1/2] perf cs-etm: Refactor timestamp variable names Mathieu Poirier
  1 sibling, 4 replies; 12+ messages in thread
From: James Clark @ 2021-04-14 14:39 UTC (permalink / raw)
  To: coresight
  Cc: al.grant, branislav.rankov, denik, suzuki.poulose, James Clark,
	Mike Leach, Leo Yan, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Namhyung Kim, John Garry, Will Deacon, Mathieu Poirier,
	linux-arm-kernel, linux-perf-users, linux-kernel

The following attribute is set when synthesising samples in
timed decoding mode:

    attr.sample_type |= PERF_SAMPLE_TIME;

This results in new samples that appear to have timestamps but
because we don't assign any timestamps to the samples, when the
resulting inject file is opened again, the synthesised samples
will be on the wrong side of the MMAP or COMM events.

For example this results in the samples being associated with
the perf binary, rather than the target of the record:

    perf record -e cs_etm/@tmc_etr0/u top
    perf inject -i perf.data -o perf.inject --itrace=i100il
    perf report -i perf.inject

Where 'Command' == perf should show as 'top':

    # Overhead  Command  Source Shared Object  Source Symbol           Target Symbol           Basic Block Cycles
    # ........  .......  ....................  ......................  ......................  ..................
    #
        31.08%  perf     [unknown]             [.] 0x000000000040c3f8  [.] 0x000000000040c3e8  -

If the perf.data file is opened directly with perf, without the
inject step, then this already works correctly because the
events are synthesised after the COMM and MMAP events and
no second sorting happens. Re-sorting only happens when opening
the perf.inject file for the second time so timestamps are
needed.

Using the timestamp from the AUX record mirrors the current
behaviour when opening directly with perf, because the events
are generated on the call to cs_etm__process_queues().

Signed-off-by: James Clark <james.clark@arm.com>
Co-developed-by: Al Grant <al.grant@arm.com>
Signed-off-by: Al Grant <al.grant@arm.com>
---
 tools/perf/util/cs-etm.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index c25da2ffa8f3..d0fa9dce47f1 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -54,6 +54,7 @@ struct cs_etm_auxtrace {
 	u8 sample_instructions;
 
 	int num_cpu;
+	u64 latest_kernel_timestamp;
 	u32 auxtrace_type;
 	u64 branches_sample_type;
 	u64 branches_id;
@@ -1192,6 +1193,8 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
 	event->sample.header.misc = cs_etm__cpu_mode(etmq, addr);
 	event->sample.header.size = sizeof(struct perf_event_header);
 
+	if (!etm->timeless_decoding)
+		sample.time = etm->latest_kernel_timestamp;
 	sample.ip = addr;
 	sample.pid = tidq->pid;
 	sample.tid = tidq->tid;
@@ -1248,6 +1251,8 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
 	event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
 	event->sample.header.size = sizeof(struct perf_event_header);
 
+	if (!etm->timeless_decoding)
+		sample.time = etm->latest_kernel_timestamp;
 	sample.ip = ip;
 	sample.pid = tidq->pid;
 	sample.tid = tidq->tid;
@@ -2412,9 +2417,10 @@ static int cs_etm__process_event(struct perf_session *session,
 	else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
 		return cs_etm__process_switch_cpu_wide(etm, event);
 
-	if (!etm->timeless_decoding &&
-	    event->header.type == PERF_RECORD_AUX)
+	if (!etm->timeless_decoding && event->header.type == PERF_RECORD_AUX) {
+		etm->latest_kernel_timestamp = sample_kernel_timestamp;
 		return cs_etm__process_queues(etm);
+	}
 
 	return 0;
 }
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] perf cs-etm: Set time on synthesised samples to preserve ordering
  2021-04-14 14:39 ` [PATCH 2/2] perf cs-etm: Set time on synthesised samples to preserve ordering James Clark
@ 2021-04-14 14:41   ` James Clark
  2021-04-15 12:39     ` Leo Yan
  2021-04-14 15:54   ` Suzuki K Poulose
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 12+ messages in thread
From: James Clark @ 2021-04-14 14:41 UTC (permalink / raw)
  To: coresight
  Cc: al.grant, branislav.rankov, denik, suzuki.poulose, Mike Leach,
	Leo Yan, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Namhyung Kim, John Garry, Will Deacon, Mathieu Poirier,
	linux-arm-kernel, linux-perf-users, linux-kernel

Hi,

For this change, I also tried removing the setting of PERF_SAMPLE_TIME in cs_etm__synth_events(). In theory, this would remove the sorting when opening the file, but the change doesn't affect when the built-in events are saved to the inject file. Resulting in events like MMAP and COMM with timestamps, but the synthesised events without. This results in the same issue of the synthesised events appearing before the COMM and MMAP events. If it was possible to somehow tell perf to remove timestamps from built-in events, removing PERF_SAMPLE_TIME would probably be the right solution, because we don't set sample.time.

For Arm v8.4 we will have the kernel time in the etm timestamps, so an if can be added to switch between this behaviour and the next (more correct) one depending on the hardware. 

On the subject of timestamps, but not related to this change, some combinations of timestamp options aren't working. For example:

    perf record -e cs_etm/time,@tmc_etr0/u --per-thread
or  perf record -e cs_etm/@tmc_etr0/u --timestamp --per-thread

These don't work because of the assumption that etm->timeless_decoding == --per-thread
and kernel timestamps enabled (/time/ or --timestamp) == etm timestamps enabled (/timestamp/), which isn't necessarily true.

This can be made to work with a few code changes for cs_etm/time,timestamp/u --per-thread, but cs_etm/time/u --per-thread could be a bit more work. Changes involved would be using "per_cpu_mmaps" in some places instead of etm->timeless_decoding, and also setting etm->timeless_decoding based on whether there are any etm timestamps, not kernel ones. Although to search for any etm timestamp would involve a full decode ahead of time which might not be feasible (or maybe just checking the options, although that's not how it's done in cs_etm__is_timeless_decoding() currently).

Or, we could force /time/ and /timestamp/ options to always be enabled together in the record stage. 


Thanks
James

On 14/04/2021 17:39, James Clark wrote:
> The following attribute is set when synthesising samples in
> timed decoding mode:
> 
>     attr.sample_type |= PERF_SAMPLE_TIME;
> 
> This results in new samples that appear to have timestamps but
> because we don't assign any timestamps to the samples, when the
> resulting inject file is opened again, the synthesised samples
> will be on the wrong side of the MMAP or COMM events.
> 
> For example this results in the samples being associated with
> the perf binary, rather than the target of the record:
> 
>     perf record -e cs_etm/@tmc_etr0/u top
>     perf inject -i perf.data -o perf.inject --itrace=i100il
>     perf report -i perf.inject
> 
> Where 'Command' == perf should show as 'top':
> 
>     # Overhead  Command  Source Shared Object  Source Symbol           Target Symbol           Basic Block Cycles
>     # ........  .......  ....................  ......................  ......................  ..................
>     #
>         31.08%  perf     [unknown]             [.] 0x000000000040c3f8  [.] 0x000000000040c3e8  -
> 
> If the perf.data file is opened directly with perf, without the
> inject step, then this already works correctly because the
> events are synthesised after the COMM and MMAP events and
> no second sorting happens. Re-sorting only happens when opening
> the perf.inject file for the second time so timestamps are
> needed.
> 
> Using the timestamp from the AUX record mirrors the current
> behaviour when opening directly with perf, because the events
> are generated on the call to cs_etm__process_queues().
> 
> Signed-off-by: James Clark <james.clark@arm.com>
> Co-developed-by: Al Grant <al.grant@arm.com>
> Signed-off-by: Al Grant <al.grant@arm.com>
> ---
>  tools/perf/util/cs-etm.c | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> index c25da2ffa8f3..d0fa9dce47f1 100644
> --- a/tools/perf/util/cs-etm.c
> +++ b/tools/perf/util/cs-etm.c
> @@ -54,6 +54,7 @@ struct cs_etm_auxtrace {
>  	u8 sample_instructions;
>  
>  	int num_cpu;
> +	u64 latest_kernel_timestamp;
>  	u32 auxtrace_type;
>  	u64 branches_sample_type;
>  	u64 branches_id;
> @@ -1192,6 +1193,8 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
>  	event->sample.header.misc = cs_etm__cpu_mode(etmq, addr);
>  	event->sample.header.size = sizeof(struct perf_event_header);
>  
> +	if (!etm->timeless_decoding)
> +		sample.time = etm->latest_kernel_timestamp;
>  	sample.ip = addr;
>  	sample.pid = tidq->pid;
>  	sample.tid = tidq->tid;
> @@ -1248,6 +1251,8 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
>  	event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
>  	event->sample.header.size = sizeof(struct perf_event_header);
>  
> +	if (!etm->timeless_decoding)
> +		sample.time = etm->latest_kernel_timestamp;
>  	sample.ip = ip;
>  	sample.pid = tidq->pid;
>  	sample.tid = tidq->tid;
> @@ -2412,9 +2417,10 @@ static int cs_etm__process_event(struct perf_session *session,
>  	else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
>  		return cs_etm__process_switch_cpu_wide(etm, event);
>  
> -	if (!etm->timeless_decoding &&
> -	    event->header.type == PERF_RECORD_AUX)
> +	if (!etm->timeless_decoding && event->header.type == PERF_RECORD_AUX) {
> +		etm->latest_kernel_timestamp = sample_kernel_timestamp;
>  		return cs_etm__process_queues(etm);
> +	}
>  
>  	return 0;
>  }
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] perf cs-etm: Set time on synthesised samples to preserve ordering
  2021-04-14 14:39 ` [PATCH 2/2] perf cs-etm: Set time on synthesised samples to preserve ordering James Clark
  2021-04-14 14:41   ` James Clark
@ 2021-04-14 15:54   ` Suzuki K Poulose
  2021-04-15 12:30   ` Leo Yan
  2021-04-15 19:54   ` Mathieu Poirier
  3 siblings, 0 replies; 12+ messages in thread
From: Suzuki K Poulose @ 2021-04-14 15:54 UTC (permalink / raw)
  To: James Clark, coresight
  Cc: al.grant, branislav.rankov, denik, Mike Leach, Leo Yan,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
	John Garry, Will Deacon, Mathieu Poirier, linux-arm-kernel,
	linux-perf-users, linux-kernel

On 14/04/2021 15:39, James Clark wrote:
> The following attribute is set when synthesising samples in
> timed decoding mode:
> 
>      attr.sample_type |= PERF_SAMPLE_TIME;
> 
> This results in new samples that appear to have timestamps but
> because we don't assign any timestamps to the samples, when the
> resulting inject file is opened again, the synthesised samples
> will be on the wrong side of the MMAP or COMM events.
> 
> For example this results in the samples being associated with
> the perf binary, rather than the target of the record:
> 
>      perf record -e cs_etm/@tmc_etr0/u top
>      perf inject -i perf.data -o perf.inject --itrace=i100il
>      perf report -i perf.inject
> 
> Where 'Command' == perf should show as 'top':
> 
>      # Overhead  Command  Source Shared Object  Source Symbol           Target Symbol           Basic Block Cycles
>      # ........  .......  ....................  ......................  ......................  ..................
>      #
>          31.08%  perf     [unknown]             [.] 0x000000000040c3f8  [.] 0x000000000040c3e8  -
> 
> If the perf.data file is opened directly with perf, without the
> inject step, then this already works correctly because the
> events are synthesised after the COMM and MMAP events and
> no second sorting happens. Re-sorting only happens when opening
> the perf.inject file for the second time so timestamps are
> needed.
> 
> Using the timestamp from the AUX record mirrors the current
> behaviour when opening directly with perf, because the events
> are generated on the call to cs_etm__process_queues().

I would add the following here to clarify the situation :

"The ETM trace could optionally contain time stamps, but there is
no way to correlate this with the kernel time. So, the best available
time value is that of the AUX_RECORD header. This patch uses
the timestamp from the header for all the samples. The ordering of the
samples are implicit in the trace and thus is fine with respect to
relative ordering."


> 
> Signed-off-by: James Clark <james.clark@arm.com>
> Co-developed-by: Al Grant <al.grant@arm.com>
> Signed-off-by: Al Grant <al.grant@arm.com>

nit: The preferred order is your S-o-B at the end (i.e of the sender)

> ---
>   tools/perf/util/cs-etm.c | 10 ++++++++--
>   1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> index c25da2ffa8f3..d0fa9dce47f1 100644
> --- a/tools/perf/util/cs-etm.c
> +++ b/tools/perf/util/cs-etm.c
> @@ -54,6 +54,7 @@ struct cs_etm_auxtrace {
>   	u8 sample_instructions;
>   
>   	int num_cpu;
> +	u64 latest_kernel_timestamp;
>   	u32 auxtrace_type;
>   	u64 branches_sample_type;
>   	u64 branches_id;
> @@ -1192,6 +1193,8 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
>   	event->sample.header.misc = cs_etm__cpu_mode(etmq, addr);
>   	event->sample.header.size = sizeof(struct perf_event_header);
>   
> +	if (!etm->timeless_decoding)
> +		sample.time = etm->latest_kernel_timestamp;
>   	sample.ip = addr;
>   	sample.pid = tidq->pid;
>   	sample.tid = tidq->tid;
> @@ -1248,6 +1251,8 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
>   	event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
>   	event->sample.header.size = sizeof(struct perf_event_header);
>   
> +	if (!etm->timeless_decoding)
> +		sample.time = etm->latest_kernel_timestamp;
>   	sample.ip = ip;
>   	sample.pid = tidq->pid;
>   	sample.tid = tidq->tid;
> @@ -2412,9 +2417,10 @@ static int cs_etm__process_event(struct perf_session *session,
>   	else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
>   		return cs_etm__process_switch_cpu_wide(etm, event);
>   
> -	if (!etm->timeless_decoding &&
> -	    event->header.type == PERF_RECORD_AUX)

Might want to add a comment here ,

	/*
	 * Record the latest kernel timestamp available in the header
	 * for samples.
	 */

> +	if (!etm->timeless_decoding && event->header.type == PERF_RECORD_AUX) {
> +		etm->latest_kernel_timestamp = sample_kernel_timestamp;
>   		return cs_etm__process_queues(etm);
> +	}
>   
>   	return 0;
>   }
> 

This is the best effort thing we could do to get things working.

With the comments addressed:

Acked-by: Suzuki K Poulos <suzuki.poulose@arm.com>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] perf cs-etm: Set time on synthesised samples to preserve ordering
  2021-04-14 14:39 ` [PATCH 2/2] perf cs-etm: Set time on synthesised samples to preserve ordering James Clark
  2021-04-14 14:41   ` James Clark
  2021-04-14 15:54   ` Suzuki K Poulose
@ 2021-04-15 12:30   ` Leo Yan
  2021-04-15 19:54   ` Mathieu Poirier
  3 siblings, 0 replies; 12+ messages in thread
From: Leo Yan @ 2021-04-15 12:30 UTC (permalink / raw)
  To: James Clark
  Cc: coresight, al.grant, branislav.rankov, denik, suzuki.poulose,
	Mike Leach, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Namhyung Kim, John Garry, Will Deacon, Mathieu Poirier,
	linux-arm-kernel, linux-perf-users, linux-kernel

Hi James,

On Wed, Apr 14, 2021 at 05:39:19PM +0300, James Clark wrote:
> The following attribute is set when synthesising samples in
> timed decoding mode:
> 
>     attr.sample_type |= PERF_SAMPLE_TIME;
> 
> This results in new samples that appear to have timestamps but
> because we don't assign any timestamps to the samples, when the
> resulting inject file is opened again, the synthesised samples
> will be on the wrong side of the MMAP or COMM events.
> 
> For example this results in the samples being associated with
> the perf binary, rather than the target of the record:
> 
>     perf record -e cs_etm/@tmc_etr0/u top
>     perf inject -i perf.data -o perf.inject --itrace=i100il
>     perf report -i perf.inject
> 
> Where 'Command' == perf should show as 'top':
> 
>     # Overhead  Command  Source Shared Object  Source Symbol           Target Symbol           Basic Block Cycles
>     # ........  .......  ....................  ......................  ......................  ..................
>     #
>         31.08%  perf     [unknown]             [.] 0x000000000040c3f8  [.] 0x000000000040c3e8  -
> 
> If the perf.data file is opened directly with perf, without the
> inject step, then this already works correctly because the
> events are synthesised after the COMM and MMAP events and
> no second sorting happens. Re-sorting only happens when opening
> the perf.inject file for the second time so timestamps are
> needed.
> 
> Using the timestamp from the AUX record mirrors the current
> behaviour when opening directly with perf, because the events
> are generated on the call to cs_etm__process_queues().
> 
> Signed-off-by: James Clark <james.clark@arm.com>
> Co-developed-by: Al Grant <al.grant@arm.com>
> Signed-off-by: Al Grant <al.grant@arm.com>
> ---
>  tools/perf/util/cs-etm.c | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> index c25da2ffa8f3..d0fa9dce47f1 100644
> --- a/tools/perf/util/cs-etm.c
> +++ b/tools/perf/util/cs-etm.c
> @@ -54,6 +54,7 @@ struct cs_etm_auxtrace {
>  	u8 sample_instructions;
>  
>  	int num_cpu;
> +	u64 latest_kernel_timestamp;
>  	u32 auxtrace_type;
>  	u64 branches_sample_type;
>  	u64 branches_id;
> @@ -1192,6 +1193,8 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
>  	event->sample.header.misc = cs_etm__cpu_mode(etmq, addr);
>  	event->sample.header.size = sizeof(struct perf_event_header);
>  
> +	if (!etm->timeless_decoding)
> +		sample.time = etm->latest_kernel_timestamp;
>  	sample.ip = addr;
>  	sample.pid = tidq->pid;
>  	sample.tid = tidq->tid;
> @@ -1248,6 +1251,8 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
>  	event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
>  	event->sample.header.size = sizeof(struct perf_event_header);
>  
> +	if (!etm->timeless_decoding)
> +		sample.time = etm->latest_kernel_timestamp;
>  	sample.ip = ip;
>  	sample.pid = tidq->pid;
>  	sample.tid = tidq->tid;
> @@ -2412,9 +2417,10 @@ static int cs_etm__process_event(struct perf_session *session,
>  	else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
>  		return cs_etm__process_switch_cpu_wide(etm, event);
>  
> -	if (!etm->timeless_decoding &&
> -	    event->header.type == PERF_RECORD_AUX)
> +	if (!etm->timeless_decoding && event->header.type == PERF_RECORD_AUX) {
> +		etm->latest_kernel_timestamp = sample_kernel_timestamp;
>  		return cs_etm__process_queues(etm);
> +	}

The change looks good to me, I went through these two patches for at
least twice, and didn't find issue.

And given the trace data might be overflow and overwritten, it's
reasonable for me to use the PERF_RECORD_AUX timestamp from the tail of
trace data.

Reviewed-by: Leo Yan <leo.yan@linaro.org>

>  	return 0;
>  }
> -- 
> 2.28.0
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] perf cs-etm: Set time on synthesised samples to preserve ordering
  2021-04-14 14:41   ` James Clark
@ 2021-04-15 12:39     ` Leo Yan
  2021-04-15 12:51       ` James Clark
  0 siblings, 1 reply; 12+ messages in thread
From: Leo Yan @ 2021-04-15 12:39 UTC (permalink / raw)
  To: James Clark
  Cc: coresight, al.grant, branislav.rankov, denik, suzuki.poulose,
	Mike Leach, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Namhyung Kim, John Garry, Will Deacon, Mathieu Poirier,
	linux-arm-kernel, linux-perf-users, linux-kernel

On Wed, Apr 14, 2021 at 05:41:46PM +0300, James Clark wrote:
> Hi,
> 
> For this change, I also tried removing the setting of PERF_SAMPLE_TIME in cs_etm__synth_events(). In theory, this would remove the sorting when opening the file, but the change doesn't affect when the built-in events are saved to the inject file. Resulting in events like MMAP and COMM with timestamps, but the synthesised events without. This results in the same issue of the synthesised events appearing before the COMM and MMAP events. If it was possible to somehow tell perf to remove timestamps from built-in events, removing PERF_SAMPLE_TIME would probably be the right solution, because we don't set sample.time.
> 
> For Arm v8.4 we will have the kernel time in the etm timestamps, so an if can be added to switch between this behaviour and the next (more correct) one depending on the hardware. 
> 
> On the subject of timestamps, but not related to this change, some combinations of timestamp options aren't working. For example:
> 
>     perf record -e cs_etm/time,@tmc_etr0/u --per-thread
> or  perf record -e cs_etm/@tmc_etr0/u --timestamp --per-thread
> 
> These don't work because of the assumption that etm->timeless_decoding == --per-thread
> and kernel timestamps enabled (/time/ or --timestamp) == etm timestamps enabled (/timestamp/), which isn't necessarily true.
> 
> This can be made to work with a few code changes for cs_etm/time,timestamp/u --per-thread, but cs_etm/time/u --per-thread could be a bit more work. Changes involved would be using "per_cpu_mmaps" in some places instead of etm->timeless_decoding, and also setting etm->timeless_decoding based on whether there are any etm timestamps, not kernel ones. Although to search for any etm timestamp would involve a full decode ahead of time which might not be feasible (or maybe just checking the options, although that's not how it's done in cs_etm__is_timeless_decoding() currently).

Confirm for one thing:

For the orignal perf data file with "--per-thread" option, the decoder
runs into the condition for "etm->timeless_decoding"; and it doesn't
contain ETM timestamp.

Afterwards, the injected perf data file also misses ETM timestamp and
hit the condition "etm->timeless_decoding".

So I am confusing why the original perf data can be processed properly
but fails to handle the injected perf data file.

Thanks,
Leo

> Or, we could force /time/ and /timestamp/ options to always be enabled together in the record stage. 
> 
> 
> Thanks
> James
> 
> On 14/04/2021 17:39, James Clark wrote:
> > The following attribute is set when synthesising samples in
> > timed decoding mode:
> > 
> >     attr.sample_type |= PERF_SAMPLE_TIME;
> > 
> > This results in new samples that appear to have timestamps but
> > because we don't assign any timestamps to the samples, when the
> > resulting inject file is opened again, the synthesised samples
> > will be on the wrong side of the MMAP or COMM events.
> > 
> > For example this results in the samples being associated with
> > the perf binary, rather than the target of the record:
> > 
> >     perf record -e cs_etm/@tmc_etr0/u top
> >     perf inject -i perf.data -o perf.inject --itrace=i100il
> >     perf report -i perf.inject
> > 
> > Where 'Command' == perf should show as 'top':
> > 
> >     # Overhead  Command  Source Shared Object  Source Symbol           Target Symbol           Basic Block Cycles
> >     # ........  .......  ....................  ......................  ......................  ..................
> >     #
> >         31.08%  perf     [unknown]             [.] 0x000000000040c3f8  [.] 0x000000000040c3e8  -
> > 
> > If the perf.data file is opened directly with perf, without the
> > inject step, then this already works correctly because the
> > events are synthesised after the COMM and MMAP events and
> > no second sorting happens. Re-sorting only happens when opening
> > the perf.inject file for the second time so timestamps are
> > needed.
> > 
> > Using the timestamp from the AUX record mirrors the current
> > behaviour when opening directly with perf, because the events
> > are generated on the call to cs_etm__process_queues().
> > 
> > Signed-off-by: James Clark <james.clark@arm.com>
> > Co-developed-by: Al Grant <al.grant@arm.com>
> > Signed-off-by: Al Grant <al.grant@arm.com>
> > ---
> >  tools/perf/util/cs-etm.c | 10 ++++++++--
> >  1 file changed, 8 insertions(+), 2 deletions(-)
> > 
> > diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> > index c25da2ffa8f3..d0fa9dce47f1 100644
> > --- a/tools/perf/util/cs-etm.c
> > +++ b/tools/perf/util/cs-etm.c
> > @@ -54,6 +54,7 @@ struct cs_etm_auxtrace {
> >  	u8 sample_instructions;
> >  
> >  	int num_cpu;
> > +	u64 latest_kernel_timestamp;
> >  	u32 auxtrace_type;
> >  	u64 branches_sample_type;
> >  	u64 branches_id;
> > @@ -1192,6 +1193,8 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
> >  	event->sample.header.misc = cs_etm__cpu_mode(etmq, addr);
> >  	event->sample.header.size = sizeof(struct perf_event_header);
> >  
> > +	if (!etm->timeless_decoding)
> > +		sample.time = etm->latest_kernel_timestamp;
> >  	sample.ip = addr;
> >  	sample.pid = tidq->pid;
> >  	sample.tid = tidq->tid;
> > @@ -1248,6 +1251,8 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
> >  	event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
> >  	event->sample.header.size = sizeof(struct perf_event_header);
> >  
> > +	if (!etm->timeless_decoding)
> > +		sample.time = etm->latest_kernel_timestamp;
> >  	sample.ip = ip;
> >  	sample.pid = tidq->pid;
> >  	sample.tid = tidq->tid;
> > @@ -2412,9 +2417,10 @@ static int cs_etm__process_event(struct perf_session *session,
> >  	else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
> >  		return cs_etm__process_switch_cpu_wide(etm, event);
> >  
> > -	if (!etm->timeless_decoding &&
> > -	    event->header.type == PERF_RECORD_AUX)
> > +	if (!etm->timeless_decoding && event->header.type == PERF_RECORD_AUX) {
> > +		etm->latest_kernel_timestamp = sample_kernel_timestamp;
> >  		return cs_etm__process_queues(etm);
> > +	}
> >  
> >  	return 0;
> >  }
> > 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] perf cs-etm: Set time on synthesised samples to preserve ordering
  2021-04-15 12:39     ` Leo Yan
@ 2021-04-15 12:51       ` James Clark
  2021-04-15 14:33         ` Leo Yan
  0 siblings, 1 reply; 12+ messages in thread
From: James Clark @ 2021-04-15 12:51 UTC (permalink / raw)
  To: Leo Yan
  Cc: coresight, al.grant, branislav.rankov, denik, suzuki.poulose,
	Mike Leach, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Namhyung Kim, John Garry, Will Deacon, Mathieu Poirier,
	linux-arm-kernel, linux-perf-users, linux-kernel



On 15/04/2021 15:39, Leo Yan wrote:
> On Wed, Apr 14, 2021 at 05:41:46PM +0300, James Clark wrote:
>> Hi,
>>
>> For this change, I also tried removing the setting of PERF_SAMPLE_TIME in cs_etm__synth_events(). In theory, this would remove the sorting when opening the file, but the change doesn't affect when the built-in events are saved to the inject file. Resulting in events like MMAP and COMM with timestamps, but the synthesised events without. This results in the same issue of the synthesised events appearing before the COMM and MMAP events. If it was possible to somehow tell perf to remove timestamps from built-in events, removing PERF_SAMPLE_TIME would probably be the right solution, because we don't set sample.time.
>>
>> For Arm v8.4 we will have the kernel time in the etm timestamps, so an if can be added to switch between this behaviour and the next (more correct) one depending on the hardware. 
>>
>> On the subject of timestamps, but not related to this change, some combinations of timestamp options aren't working. For example:
>>
>>     perf record -e cs_etm/time,@tmc_etr0/u --per-thread
>> or  perf record -e cs_etm/@tmc_etr0/u --timestamp --per-thread
>>
>> These don't work because of the assumption that etm->timeless_decoding == --per-thread
>> and kernel timestamps enabled (/time/ or --timestamp) == etm timestamps enabled (/timestamp/), which isn't necessarily true.
>>
>> This can be made to work with a few code changes for cs_etm/time,timestamp/u --per-thread, but cs_etm/time/u --per-thread could be a bit more work. Changes involved would be using "per_cpu_mmaps" in some places instead of etm->timeless_decoding, and also setting etm->timeless_decoding based on whether there are any etm timestamps, not kernel ones. Although to search for any etm timestamp would involve a full decode ahead of time which might not be feasible (or maybe just checking the options, although that's not how it's done in cs_etm__is_timeless_decoding() currently).
> 
> Confirm for one thing:
> 
> For the orignal perf data file with "--per-thread" option, the decoder
> runs into the condition for "etm->timeless_decoding"; and it doesn't
> contain ETM timestamp.
> 
> Afterwards, the injected perf data file also misses ETM timestamp and
> hit the condition "etm->timeless_decoding".
> 
> So I am confusing why the original perf data can be processed properly
> but fails to handle the injected perf data file.

Hi Leo,

My patch only deals with per-cpu mode. With per-thread mode everything is already working
because _none_ of the events have timestamps because they are not enabled by default:

	/* In per-cpu case, always need the time of mmap events etc */
	if (!perf_cpu_map__empty(cpus))
		evsel__set_sample_bit(tracking_evsel, TIME);

When none of the events have timestamps, I think perf doesn't use the ordering code in
ordered-events.c. So when the inject file is opened, the events are read in file order.
In file order, MMAP and COMM events come first, because they were encountered before the
AUX record where we generated synthetic events.

So it's not really about --per-thread vs per-cpu mode, it's actually about whether
PERF_SAMPLE_TIME is set, which is set as a by-product of per-cpu mode.

I hope I understood your question properly.

James


> 
> Thanks,
> Leo
> 
>> Or, we could force /time/ and /timestamp/ options to always be enabled together in the record stage. 
>>
>>
>> Thanks
>> James
>>
>> On 14/04/2021 17:39, James Clark wrote:
>>> The following attribute is set when synthesising samples in
>>> timed decoding mode:
>>>
>>>     attr.sample_type |= PERF_SAMPLE_TIME;
>>>
>>> This results in new samples that appear to have timestamps but
>>> because we don't assign any timestamps to the samples, when the
>>> resulting inject file is opened again, the synthesised samples
>>> will be on the wrong side of the MMAP or COMM events.
>>>
>>> For example this results in the samples being associated with
>>> the perf binary, rather than the target of the record:
>>>
>>>     perf record -e cs_etm/@tmc_etr0/u top
>>>     perf inject -i perf.data -o perf.inject --itrace=i100il
>>>     perf report -i perf.inject
>>>
>>> Where 'Command' == perf should show as 'top':
>>>
>>>     # Overhead  Command  Source Shared Object  Source Symbol           Target Symbol           Basic Block Cycles
>>>     # ........  .......  ....................  ......................  ......................  ..................
>>>     #
>>>         31.08%  perf     [unknown]             [.] 0x000000000040c3f8  [.] 0x000000000040c3e8  -
>>>
>>> If the perf.data file is opened directly with perf, without the
>>> inject step, then this already works correctly because the
>>> events are synthesised after the COMM and MMAP events and
>>> no second sorting happens. Re-sorting only happens when opening
>>> the perf.inject file for the second time so timestamps are
>>> needed.
>>>
>>> Using the timestamp from the AUX record mirrors the current
>>> behaviour when opening directly with perf, because the events
>>> are generated on the call to cs_etm__process_queues().
>>>
>>> Signed-off-by: James Clark <james.clark@arm.com>
>>> Co-developed-by: Al Grant <al.grant@arm.com>
>>> Signed-off-by: Al Grant <al.grant@arm.com>
>>> ---
>>>  tools/perf/util/cs-etm.c | 10 ++++++++--
>>>  1 file changed, 8 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
>>> index c25da2ffa8f3..d0fa9dce47f1 100644
>>> --- a/tools/perf/util/cs-etm.c
>>> +++ b/tools/perf/util/cs-etm.c
>>> @@ -54,6 +54,7 @@ struct cs_etm_auxtrace {
>>>  	u8 sample_instructions;
>>>  
>>>  	int num_cpu;
>>> +	u64 latest_kernel_timestamp;
>>>  	u32 auxtrace_type;
>>>  	u64 branches_sample_type;
>>>  	u64 branches_id;
>>> @@ -1192,6 +1193,8 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
>>>  	event->sample.header.misc = cs_etm__cpu_mode(etmq, addr);
>>>  	event->sample.header.size = sizeof(struct perf_event_header);
>>>  
>>> +	if (!etm->timeless_decoding)
>>> +		sample.time = etm->latest_kernel_timestamp;
>>>  	sample.ip = addr;
>>>  	sample.pid = tidq->pid;
>>>  	sample.tid = tidq->tid;
>>> @@ -1248,6 +1251,8 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
>>>  	event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
>>>  	event->sample.header.size = sizeof(struct perf_event_header);
>>>  
>>> +	if (!etm->timeless_decoding)
>>> +		sample.time = etm->latest_kernel_timestamp;
>>>  	sample.ip = ip;
>>>  	sample.pid = tidq->pid;
>>>  	sample.tid = tidq->tid;
>>> @@ -2412,9 +2417,10 @@ static int cs_etm__process_event(struct perf_session *session,
>>>  	else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
>>>  		return cs_etm__process_switch_cpu_wide(etm, event);
>>>  
>>> -	if (!etm->timeless_decoding &&
>>> -	    event->header.type == PERF_RECORD_AUX)
>>> +	if (!etm->timeless_decoding && event->header.type == PERF_RECORD_AUX) {
>>> +		etm->latest_kernel_timestamp = sample_kernel_timestamp;
>>>  		return cs_etm__process_queues(etm);
>>> +	}
>>>  
>>>  	return 0;
>>>  }
>>>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] perf cs-etm: Set time on synthesised samples to preserve ordering
  2021-04-15 12:51       ` James Clark
@ 2021-04-15 14:33         ` Leo Yan
  2021-04-16  9:55           ` James Clark
  0 siblings, 1 reply; 12+ messages in thread
From: Leo Yan @ 2021-04-15 14:33 UTC (permalink / raw)
  To: James Clark
  Cc: coresight, al.grant, branislav.rankov, denik, suzuki.poulose,
	Mike Leach, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Namhyung Kim, John Garry, Will Deacon, Mathieu Poirier,
	linux-arm-kernel, linux-perf-users, linux-kernel

Hi James,

On Thu, Apr 15, 2021 at 03:51:46PM +0300, James Clark wrote:

[...]

> > For the orignal perf data file with "--per-thread" option, the decoder
> > runs into the condition for "etm->timeless_decoding"; and it doesn't
> > contain ETM timestamp.
> > 
> > Afterwards, the injected perf data file also misses ETM timestamp and
> > hit the condition "etm->timeless_decoding".
> > 
> > So I am confusing why the original perf data can be processed properly
> > but fails to handle the injected perf data file.
> 
> Hi Leo,
> 
> My patch only deals with per-cpu mode. With per-thread mode everything is already working
> because _none_ of the events have timestamps because they are not enabled by default:
> 
> 	/* In per-cpu case, always need the time of mmap events etc */
> 	if (!perf_cpu_map__empty(cpus))
> 		evsel__set_sample_bit(tracking_evsel, TIME);
> 
> When none of the events have timestamps, I think perf doesn't use the ordering code in
> ordered-events.c. So when the inject file is opened, the events are read in file order.

The explination makes sense to me.  One thinking: if the original file
doesn't use the ordered event, is it possible for the injected file to
not use the ordered event as well?

Could you confirm Intel-pt can work well for per-cpu mode for inject
file?

> So it's not really about --per-thread vs per-cpu mode, it's actually about whether
> PERF_SAMPLE_TIME is set, which is set as a by-product of per-cpu mode.
>
> I hope I understood your question properly.

Thanks for info, sorry if I miss any info you have elaborated.

Leo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] perf cs-etm: Refactor timestamp variable names
  2021-04-14 14:39 [PATCH 1/2] perf cs-etm: Refactor timestamp variable names James Clark
  2021-04-14 14:39 ` [PATCH 2/2] perf cs-etm: Set time on synthesised samples to preserve ordering James Clark
@ 2021-04-15 19:46 ` Mathieu Poirier
  1 sibling, 0 replies; 12+ messages in thread
From: Mathieu Poirier @ 2021-04-15 19:46 UTC (permalink / raw)
  To: James Clark
  Cc: coresight, al.grant, branislav.rankov, denik, suzuki.poulose,
	Mike Leach, Leo Yan, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Namhyung Kim, John Garry, Will Deacon, linux-arm-kernel,
	linux-perf-users, linux-kernel

On Wed, Apr 14, 2021 at 05:39:18PM +0300, James Clark wrote:
> Remove ambiguity in variable names relating to timestamps.
> A later commit will save the sample kernel timestamp in one
> of the etm structs, so name all elements appropriately to
> avoid confusion.
> 
> This is also removes some ambiguity arising from the fact
> that the --timestamp argument to perf record refers to
> sample kernel timestamps, and the /timestamp/ event modifier
> refers to etm timestamps, so the term is overloaded.
> 
> Signed-off-by: James Clark <james.clark@arm.com>
> ---
>  .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 18 ++++----
>  tools/perf/util/cs-etm.c                      | 42 +++++++++----------
>  tools/perf/util/cs-etm.h                      |  4 +-
>  3 files changed, 31 insertions(+), 33 deletions(-)
> 
> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> index 059bcec3f651..055cb93eca59 100644
> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> @@ -276,13 +276,13 @@ cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq,
>  				  const uint8_t trace_chan_id)
>  {
>  	/* No timestamp packet has been received, nothing to do */
> -	if (!packet_queue->timestamp)
> +	if (!packet_queue->etm_timestamp)
>  		return OCSD_RESP_CONT;
>  
> -	packet_queue->timestamp = packet_queue->next_timestamp;
> +	packet_queue->etm_timestamp = packet_queue->next_etm_timestamp;
>  
>  	/* Estimate the timestamp for the next range packet */
> -	packet_queue->next_timestamp += packet_queue->instr_count;
> +	packet_queue->next_etm_timestamp += packet_queue->instr_count;
>  	packet_queue->instr_count = 0;
>  
>  	/* Tell the front end which traceid_queue needs attention */
> @@ -308,8 +308,8 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
>  	 * Function do_soft_timestamp() will report the value to the front end,
>  	 * hence asking the decoder to keep decoding rather than stopping.
>  	 */
> -	if (packet_queue->timestamp) {
> -		packet_queue->next_timestamp = elem->timestamp;
> +	if (packet_queue->etm_timestamp) {
> +		packet_queue->next_etm_timestamp = elem->timestamp;
>  		return OCSD_RESP_CONT;
>  	}
>  
> @@ -320,8 +320,8 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
>  	 * which instructions started by subtracting the number of instructions
>  	 * executed to the timestamp.
>  	 */
> -	packet_queue->timestamp = elem->timestamp - packet_queue->instr_count;
> -	packet_queue->next_timestamp = elem->timestamp;
> +	packet_queue->etm_timestamp = elem->timestamp - packet_queue->instr_count;
> +	packet_queue->next_etm_timestamp = elem->timestamp;
>  	packet_queue->instr_count = 0;
>  
>  	/* Tell the front end which traceid_queue needs attention */
> @@ -334,8 +334,8 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
>  static void
>  cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue)
>  {
> -	packet_queue->timestamp = 0;
> -	packet_queue->next_timestamp = 0;
> +	packet_queue->etm_timestamp = 0;
> +	packet_queue->next_etm_timestamp = 0;
>  	packet_queue->instr_count = 0;
>  }
>  
> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> index 7e63e7dedc33..c25da2ffa8f3 100644
> --- a/tools/perf/util/cs-etm.c
> +++ b/tools/perf/util/cs-etm.c
> @@ -38,8 +38,6 @@
>  #include <tools/libc_compat.h>
>  #include "util/synthetic-events.h"
>  
> -#define MAX_TIMESTAMP (~0ULL)
> -
>  struct cs_etm_auxtrace {
>  	struct auxtrace auxtrace;
>  	struct auxtrace_queues queues;
> @@ -86,7 +84,7 @@ struct cs_etm_queue {
>  	struct cs_etm_decoder *decoder;
>  	struct auxtrace_buffer *buffer;
>  	unsigned int queue_nr;
> -	u8 pending_timestamp;
> +	u8 pending_timestamp_chan_id;
>  	u64 offset;
>  	const unsigned char *buf;
>  	size_t buf_len, buf_used;
> @@ -208,7 +206,7 @@ void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
>  	 * be more than one channel per cs_etm_queue, we need to specify
>  	 * what traceID queue needs servicing.
>  	 */
> -	etmq->pending_timestamp = trace_chan_id;
> +	etmq->pending_timestamp_chan_id = trace_chan_id;
>  }
>  
>  static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
> @@ -216,22 +214,22 @@ static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
>  {
>  	struct cs_etm_packet_queue *packet_queue;
>  
> -	if (!etmq->pending_timestamp)
> +	if (!etmq->pending_timestamp_chan_id)
>  		return 0;
>  
>  	if (trace_chan_id)
> -		*trace_chan_id = etmq->pending_timestamp;
> +		*trace_chan_id = etmq->pending_timestamp_chan_id;
>  
>  	packet_queue = cs_etm__etmq_get_packet_queue(etmq,
> -						     etmq->pending_timestamp);
> +						     etmq->pending_timestamp_chan_id);
>  	if (!packet_queue)
>  		return 0;
>  
>  	/* Acknowledge pending status */
> -	etmq->pending_timestamp = 0;
> +	etmq->pending_timestamp_chan_id = 0;
>  
>  	/* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
> -	return packet_queue->timestamp;
> +	return packet_queue->etm_timestamp;
>  }
>  
>  static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
> @@ -814,7 +812,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
>  	int ret = 0;
>  	unsigned int cs_queue_nr;
>  	u8 trace_chan_id;
> -	u64 timestamp;
> +	u64 etm_timestamp;
>  	struct cs_etm_queue *etmq = queue->priv;
>  
>  	if (list_empty(&queue->head) || etmq)
> @@ -854,7 +852,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
>  
>  		/*
>  		 * Run decoder on the trace block.  The decoder will stop when
> -		 * encountering a timestamp, a full packet queue or the end of
> +		 * encountering an ETM timestamp, a full packet queue or the end of
>  		 * trace for that block.
>  		 */
>  		ret = cs_etm__decode_data_block(etmq);
> @@ -865,10 +863,10 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
>  		 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
>  		 * the timestamp calculation for us.
>  		 */
> -		timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
> +		etm_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
>  
>  		/* We found a timestamp, no need to continue. */
> -		if (timestamp)
> +		if (etm_timestamp)
>  			break;
>  
>  		/*
> @@ -892,7 +890,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
>  	 * queue and will be processed in cs_etm__process_queues().
>  	 */
>  	cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
> -	ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp);
> +	ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, etm_timestamp);
>  out:
>  	return ret;
>  }
> @@ -2221,7 +2219,7 @@ static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
>  	int ret = 0;
>  	unsigned int cs_queue_nr, queue_nr;
>  	u8 trace_chan_id;
> -	u64 timestamp;
> +	u64 etm_timestamp;
>  	struct auxtrace_queue *queue;
>  	struct cs_etm_queue *etmq;
>  	struct cs_etm_traceid_queue *tidq;
> @@ -2283,9 +2281,9 @@ static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
>  		if (ret)
>  			goto out;
>  
> -		timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
> +		etm_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
>  
> -		if (!timestamp) {
> +		if (!etm_timestamp) {
>  			/*
>  			 * Function cs_etm__decode_data_block() returns when
>  			 * there is no more traces to decode in the current
> @@ -2308,7 +2306,7 @@ static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
>  		 * this queue/traceID.
>  		 */
>  		cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
> -		ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp);
> +		ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, etm_timestamp);
>  	}
>  
>  out:
> @@ -2380,7 +2378,7 @@ static int cs_etm__process_event(struct perf_session *session,
>  				 struct perf_tool *tool)
>  {
>  	int err = 0;
> -	u64 timestamp;
> +	u64 sample_kernel_timestamp;
>  	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
>  						   struct cs_etm_auxtrace,
>  						   auxtrace);
> @@ -2394,11 +2392,11 @@ static int cs_etm__process_event(struct perf_session *session,
>  	}
>  
>  	if (sample->time && (sample->time != (u64) -1))
> -		timestamp = sample->time;
> +		sample_kernel_timestamp = sample->time;
>  	else
> -		timestamp = 0;
> +		sample_kernel_timestamp = 0;
>  
> -	if (timestamp || etm->timeless_decoding) {
> +	if (sample_kernel_timestamp || etm->timeless_decoding) {
>  		err = cs_etm__update_queues(etm);
>  		if (err)
>  			return err;
> diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
> index 36428918411e..b300f6fa19cc 100644
> --- a/tools/perf/util/cs-etm.h
> +++ b/tools/perf/util/cs-etm.h
> @@ -171,8 +171,8 @@ struct cs_etm_packet_queue {
>  	u32 head;
>  	u32 tail;
>  	u32 instr_count;
> -	u64 timestamp;
> -	u64 next_timestamp;
> +	u64 etm_timestamp;
> +	u64 next_etm_timestamp;

I find the "etm" part confusing since timestamps are generated by a specific CS
component, not the ETM itself.  As such I think it should be "cs_timestamp" and
"next_cs_timestamp".  But this is just a naming convention and won't press the
case further if you feel strongly about it.

Otherwise I think this renaming exercise is worth it.  

With or without the above:

Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>

>  	struct cs_etm_packet packet_buffer[CS_ETM_PACKET_MAX_BUFFER];
>  };
>  
> -- 
> 2.28.0
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] perf cs-etm: Set time on synthesised samples to preserve ordering
  2021-04-14 14:39 ` [PATCH 2/2] perf cs-etm: Set time on synthesised samples to preserve ordering James Clark
                     ` (2 preceding siblings ...)
  2021-04-15 12:30   ` Leo Yan
@ 2021-04-15 19:54   ` Mathieu Poirier
  2021-04-16 10:16     ` James Clark
  3 siblings, 1 reply; 12+ messages in thread
From: Mathieu Poirier @ 2021-04-15 19:54 UTC (permalink / raw)
  To: James Clark
  Cc: coresight, al.grant, branislav.rankov, denik, suzuki.poulose,
	Mike Leach, Leo Yan, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Namhyung Kim, John Garry, Will Deacon, linux-arm-kernel,
	linux-perf-users, linux-kernel

On Wed, Apr 14, 2021 at 05:39:19PM +0300, James Clark wrote:
> The following attribute is set when synthesising samples in
> timed decoding mode:
> 
>     attr.sample_type |= PERF_SAMPLE_TIME;
> 
> This results in new samples that appear to have timestamps but
> because we don't assign any timestamps to the samples, when the
> resulting inject file is opened again, the synthesised samples
> will be on the wrong side of the MMAP or COMM events.
>

I understand the problem.  Once again an issue caused by CS and the kernel
having a different view of time. 

> For example this results in the samples being associated with
> the perf binary, rather than the target of the record:
> 
>     perf record -e cs_etm/@tmc_etr0/u top
>     perf inject -i perf.data -o perf.inject --itrace=i100il
>     perf report -i perf.inject
> 
> Where 'Command' == perf should show as 'top':
> 
>     # Overhead  Command  Source Shared Object  Source Symbol           Target Symbol           Basic Block Cycles
>     # ........  .......  ....................  ......................  ......................  ..................
>     #
>         31.08%  perf     [unknown]             [.] 0x000000000040c3f8  [.] 0x000000000040c3e8  -
> 
> If the perf.data file is opened directly with perf, without the
> inject step, then this already works correctly because the
> events are synthesised after the COMM and MMAP events and
> no second sorting happens. Re-sorting only happens when opening
> the perf.inject file for the second time so timestamps are
> needed.
> 
> Using the timestamp from the AUX record mirrors the current
> behaviour when opening directly with perf, because the events
> are generated on the call to cs_etm__process_queues().
> 
> Signed-off-by: James Clark <james.clark@arm.com>
> Co-developed-by: Al Grant <al.grant@arm.com>
> Signed-off-by: Al Grant <al.grant@arm.com>

Suzuki is correct, your name has to appear after Al's.

> ---
>  tools/perf/util/cs-etm.c | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> index c25da2ffa8f3..d0fa9dce47f1 100644
> --- a/tools/perf/util/cs-etm.c
> +++ b/tools/perf/util/cs-etm.c
> @@ -54,6 +54,7 @@ struct cs_etm_auxtrace {
>  	u8 sample_instructions;
>  
>  	int num_cpu;
> +	u64 latest_kernel_timestamp;
>  	u32 auxtrace_type;
>  	u64 branches_sample_type;
>  	u64 branches_id;
> @@ -1192,6 +1193,8 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
>  	event->sample.header.misc = cs_etm__cpu_mode(etmq, addr);
>  	event->sample.header.size = sizeof(struct perf_event_header);
>  
> +	if (!etm->timeless_decoding)
> +		sample.time = etm->latest_kernel_timestamp;
>  	sample.ip = addr;
>  	sample.pid = tidq->pid;
>  	sample.tid = tidq->tid;
> @@ -1248,6 +1251,8 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
>  	event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
>  	event->sample.header.size = sizeof(struct perf_event_header);
>  
> +	if (!etm->timeless_decoding)
> +		sample.time = etm->latest_kernel_timestamp;
>  	sample.ip = ip;
>  	sample.pid = tidq->pid;
>  	sample.tid = tidq->tid;
> @@ -2412,9 +2417,10 @@ static int cs_etm__process_event(struct perf_session *session,
>  	else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
>  		return cs_etm__process_switch_cpu_wide(etm, event);
>  
> -	if (!etm->timeless_decoding &&
> -	    event->header.type == PERF_RECORD_AUX)
> +	if (!etm->timeless_decoding && event->header.type == PERF_RECORD_AUX) {
> +		etm->latest_kernel_timestamp = sample_kernel_timestamp;

It will be fun to fix this when 8.4 comes out but for now it's the best we've
got.

Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>

>  		return cs_etm__process_queues(etm);
> +	}
>  
>  	return 0;
>  }
> -- 
> 2.28.0
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] perf cs-etm: Set time on synthesised samples to preserve ordering
  2021-04-15 14:33         ` Leo Yan
@ 2021-04-16  9:55           ` James Clark
  0 siblings, 0 replies; 12+ messages in thread
From: James Clark @ 2021-04-16  9:55 UTC (permalink / raw)
  To: Leo Yan
  Cc: coresight, al.grant, branislav.rankov, denik, suzuki.poulose,
	Mike Leach, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Namhyung Kim, John Garry, Will Deacon, Mathieu Poirier,
	linux-arm-kernel, linux-perf-users, linux-kernel



On 15/04/2021 17:33, Leo Yan wrote:
> Hi James,
> 
> On Thu, Apr 15, 2021 at 03:51:46PM +0300, James Clark wrote:
> 
> [...]
> 
>>> For the orignal perf data file with "--per-thread" option, the decoder
>>> runs into the condition for "etm->timeless_decoding"; and it doesn't
>>> contain ETM timestamp.
>>>
>>> Afterwards, the injected perf data file also misses ETM timestamp and
>>> hit the condition "etm->timeless_decoding".
>>>
>>> So I am confusing why the original perf data can be processed properly
>>> but fails to handle the injected perf data file.
>>
>> Hi Leo,
>>
>> My patch only deals with per-cpu mode. With per-thread mode everything is already working
>> because _none_ of the events have timestamps because they are not enabled by default:
>>
>> 	/* In per-cpu case, always need the time of mmap events etc */
>> 	if (!perf_cpu_map__empty(cpus))
>> 		evsel__set_sample_bit(tracking_evsel, TIME);
>>
>> When none of the events have timestamps, I think perf doesn't use the ordering code in
>> ordered-events.c. So when the inject file is opened, the events are read in file order.
> 
> The explination makes sense to me.  One thinking: if the original file
> doesn't use the ordered event, is it possible for the injected file to
> not use the ordered event as well?

Yes if you inject on a file with no timestamps and then open it, then the function queue_event()
in ordered_events.c is not hit.

If you create a file based on one with timestamps, then the queue_event() function is hit
even on the injected file.

The relevant bit of code is here:

	if (tool->ordered_events) {
		u64 timestamp = -1ULL;

		ret = evlist__parse_sample_timestamp(evlist, event, &timestamp);
		if (ret && ret != -1)
			return ret;

		ret = perf_session__queue_event(session, event, timestamp, file_offset);
		if (ret != -ETIME)
			return ret;
	}

	return perf_session__deliver_event(session, event, tool, file_offset);

If tool->ordered_events is set AND the timestamp for the sample parses to be non zero
and non -1:

	if (!timestamp || timestamp == ~0ULL)
		return -ETIME;

Then the event is added into the queue, otherwise it goes straight through to perf_session__deliver_event()
The ordering can be disabled manually with tool->ordered_events and --disable-order and is also disabled
with --dump-raw-trace.

It seems like processing the file only really works when all events are unordered but in the right order,
or ordered with the right timestamps set.

> 
> Could you confirm Intel-pt can work well for per-cpu mode for inject
> file?

Yes it seems like synthesised samples are assigned sensible timestamps.

	perf record -e intel_pt//u top
	perf inject -i perf.data -o perf-intel-per-cpu.inject.data --itrace=i100i --strip
	perf report -i perf-intel-per-cpu.inject.data -D

Results in the correct binary and DSO names and the SAMPLE timestamp is after the COMM:

	0 381165621595220 0x1200 [0x38]: PERF_RECORD_COMM exec: top:20173/20173
	
	...
	
	2 381165622169297 0x13b0 [0x38]: PERF_RECORD_SAMPLE(IP, 0x2): 20173/20173: 0x7fdaa14abf53 period: 100 addr: 0
 	... thread: top:20173
 	...... dso: /lib/x86_64-linux-gnu/ld-2.27.so

Per-thread also works, but no samples or events have timestamps.

> 
>> So it's not really about --per-thread vs per-cpu mode, it's actually about whether
>> PERF_SAMPLE_TIME is set, which is set as a by-product of per-cpu mode.
>>
>> I hope I understood your question properly.
> 
> Thanks for info, sorry if I miss any info you have elaborated.
> 
> Leo
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] perf cs-etm: Set time on synthesised samples to preserve ordering
  2021-04-15 19:54   ` Mathieu Poirier
@ 2021-04-16 10:16     ` James Clark
  0 siblings, 0 replies; 12+ messages in thread
From: James Clark @ 2021-04-16 10:16 UTC (permalink / raw)
  To: Mathieu Poirier
  Cc: coresight, al.grant, branislav.rankov, denik, suzuki.poulose,
	Mike Leach, Leo Yan, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Namhyung Kim, John Garry, Will Deacon, linux-arm-kernel,
	linux-perf-users, linux-kernel



On 15/04/2021 22:54, Mathieu Poirier wrote:
> On Wed, Apr 14, 2021 at 05:39:19PM +0300, James Clark wrote:
>> The following attribute is set when synthesising samples in
>> timed decoding mode:
>>
>>     attr.sample_type |= PERF_SAMPLE_TIME;
>>
>> This results in new samples that appear to have timestamps but
>> because we don't assign any timestamps to the samples, when the
>> resulting inject file is opened again, the synthesised samples
>> will be on the wrong side of the MMAP or COMM events.
>>
> 
> I understand the problem.  Once again an issue caused by CS and the kernel
> having a different view of time. 
> 
>> For example this results in the samples being associated with
>> the perf binary, rather than the target of the record:
>>
>>     perf record -e cs_etm/@tmc_etr0/u top
>>     perf inject -i perf.data -o perf.inject --itrace=i100il
>>     perf report -i perf.inject
>>
>> Where 'Command' == perf should show as 'top':
>>
>>     # Overhead  Command  Source Shared Object  Source Symbol           Target Symbol           Basic Block Cycles
>>     # ........  .......  ....................  ......................  ......................  ..................
>>     #
>>         31.08%  perf     [unknown]             [.] 0x000000000040c3f8  [.] 0x000000000040c3e8  -
>>
>> If the perf.data file is opened directly with perf, without the
>> inject step, then this already works correctly because the
>> events are synthesised after the COMM and MMAP events and
>> no second sorting happens. Re-sorting only happens when opening
>> the perf.inject file for the second time so timestamps are
>> needed.
>>
>> Using the timestamp from the AUX record mirrors the current
>> behaviour when opening directly with perf, because the events
>> are generated on the call to cs_etm__process_queues().
>>
>> Signed-off-by: James Clark <james.clark@arm.com>
>> Co-developed-by: Al Grant <al.grant@arm.com>
>> Signed-off-by: Al Grant <al.grant@arm.com>
> 
> Suzuki is correct, your name has to appear after Al's.
> 
>> ---
>>  tools/perf/util/cs-etm.c | 10 ++++++++--
>>  1 file changed, 8 insertions(+), 2 deletions(-)
>>
>> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
>> index c25da2ffa8f3..d0fa9dce47f1 100644
>> --- a/tools/perf/util/cs-etm.c
>> +++ b/tools/perf/util/cs-etm.c
>> @@ -54,6 +54,7 @@ struct cs_etm_auxtrace {
>>  	u8 sample_instructions;
>>  
>>  	int num_cpu;
>> +	u64 latest_kernel_timestamp;
>>  	u32 auxtrace_type;
>>  	u64 branches_sample_type;
>>  	u64 branches_id;
>> @@ -1192,6 +1193,8 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
>>  	event->sample.header.misc = cs_etm__cpu_mode(etmq, addr);
>>  	event->sample.header.size = sizeof(struct perf_event_header);
>>  
>> +	if (!etm->timeless_decoding)
>> +		sample.time = etm->latest_kernel_timestamp;
>>  	sample.ip = addr;
>>  	sample.pid = tidq->pid;
>>  	sample.tid = tidq->tid;
>> @@ -1248,6 +1251,8 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
>>  	event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
>>  	event->sample.header.size = sizeof(struct perf_event_header);
>>  
>> +	if (!etm->timeless_decoding)
>> +		sample.time = etm->latest_kernel_timestamp;
>>  	sample.ip = ip;
>>  	sample.pid = tidq->pid;
>>  	sample.tid = tidq->tid;
>> @@ -2412,9 +2417,10 @@ static int cs_etm__process_event(struct perf_session *session,
>>  	else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
>>  		return cs_etm__process_switch_cpu_wide(etm, event);
>>  
>> -	if (!etm->timeless_decoding &&
>> -	    event->header.type == PERF_RECORD_AUX)
>> +	if (!etm->timeless_decoding && event->header.type == PERF_RECORD_AUX) {
>> +		etm->latest_kernel_timestamp = sample_kernel_timestamp;
> 
> It will be fun to fix this when 8.4 comes out but for now it's the best we've
> got.
> 

Thanks for the reviews Leo, Mathieu and Suzuki. Yes I think for 8.4 we can also do something
very similar to Leo's "perf arm-spe: Bail out if the trace is later than perf event"
patch where decoding is paused until the other events with later timestamps
have been received. At the moment the CS decoding happens all at once.

I will submit a new set with the fixes and better variable name.

James

> Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>
> 
>>  		return cs_etm__process_queues(etm);
>> +	}
>>  
>>  	return 0;
>>  }
>> -- 
>> 2.28.0
>>

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2021-04-16 10:16 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-14 14:39 [PATCH 1/2] perf cs-etm: Refactor timestamp variable names James Clark
2021-04-14 14:39 ` [PATCH 2/2] perf cs-etm: Set time on synthesised samples to preserve ordering James Clark
2021-04-14 14:41   ` James Clark
2021-04-15 12:39     ` Leo Yan
2021-04-15 12:51       ` James Clark
2021-04-15 14:33         ` Leo Yan
2021-04-16  9:55           ` James Clark
2021-04-14 15:54   ` Suzuki K Poulose
2021-04-15 12:30   ` Leo Yan
2021-04-15 19:54   ` Mathieu Poirier
2021-04-16 10:16     ` James Clark
2021-04-15 19:46 ` [PATCH 1/2] perf cs-etm: Refactor timestamp variable names Mathieu Poirier

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).