All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] perf arm-spe: Synthesize SPE instruction events
@ 2021-12-15 18:46 ` German Gomez
  0 siblings, 0 replies; 10+ messages in thread
From: German Gomez @ 2021-12-15 18:46 UTC (permalink / raw)
  To: linux-kernel, linux-perf-users, acme
  Cc: German Gomez, John Garry, Will Deacon, Mathieu Poirier, Leo Yan,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
	linux-arm-kernel

Synthesize instruction events per every decoded ARM SPE record.

Because Arm SPE implements a hardware-based sample period, and perf
implements a software-based one that gets applied on top, also add a
warning to make the user aware.

Signed-off-by: German Gomez <german.gomez@arm.com>
---
Changes since v1 [https://lore.kernel.org/all/20211117142833.226629-1-german.gomez@arm.com]
  - Generate events with "--itrace=i" instead of "--itrace=o".
  - Generate events with virt_addr, phys_addr, and data_src values.
---
 tools/perf/util/arm-spe.c | 58 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index fccac06b573a..879583822c8f 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -58,6 +58,8 @@ struct arm_spe {
 	u8				sample_branch;
 	u8				sample_remote_access;
 	u8				sample_memory;
+	u8				sample_instructions;
+	u64				instructions_sample_period;
 
 	u64				l1d_miss_id;
 	u64				l1d_access_id;
@@ -68,6 +70,7 @@ struct arm_spe {
 	u64				branch_miss_id;
 	u64				remote_access_id;
 	u64				memory_id;
+	u64				instructions_id;
 
 	u64				kernel_start;
 
@@ -90,6 +93,7 @@ struct arm_spe_queue {
 	u64				time;
 	u64				timestamp;
 	struct thread			*thread;
+	u64				period_instructions;
 };
 
 static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
@@ -202,6 +206,7 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
 	speq->pid = -1;
 	speq->tid = -1;
 	speq->cpu = -1;
+	speq->period_instructions = 0;
 
 	/* params set */
 	params.get_trace = arm_spe_get_trace;
@@ -351,6 +356,33 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
 	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
 }
 
+static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
+					     u64 spe_events_id, u64 data_src)
+{
+	struct arm_spe *spe = speq->spe;
+	struct arm_spe_record *record = &speq->decoder->record;
+	union perf_event *event = speq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+
+	/*
+	 * Handles perf instruction sampling period.
+	 */
+	speq->period_instructions++;
+	if (speq->period_instructions < spe->instructions_sample_period)
+		return 0;
+	speq->period_instructions = 0;
+
+	arm_spe_prep_sample(spe, speq, event, &sample);
+
+	sample.id = spe_events_id;
+	sample.stream_id = spe_events_id;
+	sample.addr = record->virt_addr;
+	sample.phys_addr = record->phys_addr;
+	sample.data_src = data_src;
+
+	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+}
+
 #define SPE_MEM_TYPE	(ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
 			 ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
 			 ARM_SPE_REMOTE_ACCESS)
@@ -480,6 +512,12 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
 			return err;
 	}
 
+	if (spe->sample_instructions) {
+		err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
+		if (err)
+			return err;
+	}
+
 	return 0;
 }
 
@@ -1107,6 +1145,26 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
 			return err;
 		spe->memory_id = id;
 		arm_spe_set_event_name(evlist, id, "memory");
+		id += 1;
+	}
+
+	if (spe->synth_opts.instructions) {
+		if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS)
+			return -EINVAL;
+
+		if (spe->synth_opts.period > 1)
+			pr_warning("Arm SPE has a hardware-based sample period.\n"
+				   "More instruction events will be discarded by --itrace\n");
+
+		spe->sample_instructions = true;
+		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
+		attr.sample_period = spe->synth_opts.period;
+		spe->instructions_sample_period = attr.sample_period;
+		err = arm_spe_synth_event(session, &attr, id);
+		if (err)
+			return err;
+		spe->instructions_id = id;
+		arm_spe_set_event_name(evlist, id, "instructions");
 	}
 
 	return 0;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v2] perf arm-spe: Synthesize SPE instruction events
@ 2021-12-15 18:46 ` German Gomez
  0 siblings, 0 replies; 10+ messages in thread
From: German Gomez @ 2021-12-15 18:46 UTC (permalink / raw)
  To: linux-kernel, linux-perf-users, acme
  Cc: German Gomez, John Garry, Will Deacon, Mathieu Poirier, Leo Yan,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
	linux-arm-kernel

Synthesize instruction events per every decoded ARM SPE record.

Because Arm SPE implements a hardware-based sample period, and perf
implements a software-based one that gets applied on top, also add a
warning to make the user aware.

Signed-off-by: German Gomez <german.gomez@arm.com>
---
Changes since v1 [https://lore.kernel.org/all/20211117142833.226629-1-german.gomez@arm.com]
  - Generate events with "--itrace=i" instead of "--itrace=o".
  - Generate events with virt_addr, phys_addr, and data_src values.
---
 tools/perf/util/arm-spe.c | 58 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index fccac06b573a..879583822c8f 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -58,6 +58,8 @@ struct arm_spe {
 	u8				sample_branch;
 	u8				sample_remote_access;
 	u8				sample_memory;
+	u8				sample_instructions;
+	u64				instructions_sample_period;
 
 	u64				l1d_miss_id;
 	u64				l1d_access_id;
@@ -68,6 +70,7 @@ struct arm_spe {
 	u64				branch_miss_id;
 	u64				remote_access_id;
 	u64				memory_id;
+	u64				instructions_id;
 
 	u64				kernel_start;
 
@@ -90,6 +93,7 @@ struct arm_spe_queue {
 	u64				time;
 	u64				timestamp;
 	struct thread			*thread;
+	u64				period_instructions;
 };
 
 static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
@@ -202,6 +206,7 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
 	speq->pid = -1;
 	speq->tid = -1;
 	speq->cpu = -1;
+	speq->period_instructions = 0;
 
 	/* params set */
 	params.get_trace = arm_spe_get_trace;
@@ -351,6 +356,33 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
 	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
 }
 
+static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
+					     u64 spe_events_id, u64 data_src)
+{
+	struct arm_spe *spe = speq->spe;
+	struct arm_spe_record *record = &speq->decoder->record;
+	union perf_event *event = speq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+
+	/*
+	 * Handles perf instruction sampling period.
+	 */
+	speq->period_instructions++;
+	if (speq->period_instructions < spe->instructions_sample_period)
+		return 0;
+	speq->period_instructions = 0;
+
+	arm_spe_prep_sample(spe, speq, event, &sample);
+
+	sample.id = spe_events_id;
+	sample.stream_id = spe_events_id;
+	sample.addr = record->virt_addr;
+	sample.phys_addr = record->phys_addr;
+	sample.data_src = data_src;
+
+	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+}
+
 #define SPE_MEM_TYPE	(ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
 			 ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
 			 ARM_SPE_REMOTE_ACCESS)
@@ -480,6 +512,12 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
 			return err;
 	}
 
+	if (spe->sample_instructions) {
+		err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
+		if (err)
+			return err;
+	}
+
 	return 0;
 }
 
@@ -1107,6 +1145,26 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
 			return err;
 		spe->memory_id = id;
 		arm_spe_set_event_name(evlist, id, "memory");
+		id += 1;
+	}
+
+	if (spe->synth_opts.instructions) {
+		if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS)
+			return -EINVAL;
+
+		if (spe->synth_opts.period > 1)
+			pr_warning("Arm SPE has a hardware-based sample period.\n"
+				   "More instruction events will be discarded by --itrace\n");
+
+		spe->sample_instructions = true;
+		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
+		attr.sample_period = spe->synth_opts.period;
+		spe->instructions_sample_period = attr.sample_period;
+		err = arm_spe_synth_event(session, &attr, id);
+		if (err)
+			return err;
+		spe->instructions_id = id;
+		arm_spe_set_event_name(evlist, id, "instructions");
 	}
 
 	return 0;
-- 
2.25.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] perf arm-spe: Synthesize SPE instruction events
  2021-12-15 18:46 ` German Gomez
@ 2021-12-16  1:18   ` Namhyung Kim
  -1 siblings, 0 replies; 10+ messages in thread
From: Namhyung Kim @ 2021-12-16  1:18 UTC (permalink / raw)
  To: German Gomez
  Cc: linux-kernel, linux-perf-users, Arnaldo Carvalho de Melo,
	John Garry, Will Deacon, Mathieu Poirier, Leo Yan, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, linux-arm-kernel

Hello,

On Wed, Dec 15, 2021 at 10:46 AM German Gomez <german.gomez@arm.com> wrote:
>
> Synthesize instruction events per every decoded ARM SPE record.
>
> Because Arm SPE implements a hardware-based sample period, and perf
> implements a software-based one that gets applied on top, also add a
> warning to make the user aware.
>
> Signed-off-by: German Gomez <german.gomez@arm.com>
> ---
> Changes since v1 [https://lore.kernel.org/all/20211117142833.226629-1-german.gomez@arm.com]
>   - Generate events with "--itrace=i" instead of "--itrace=o".
>   - Generate events with virt_addr, phys_addr, and data_src values.
> ---
>  tools/perf/util/arm-spe.c | 58 +++++++++++++++++++++++++++++++++++++++
>  1 file changed, 58 insertions(+)
>
> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
> index fccac06b573a..879583822c8f 100644
> --- a/tools/perf/util/arm-spe.c
> +++ b/tools/perf/util/arm-spe.c
> @@ -58,6 +58,8 @@ struct arm_spe {
>         u8                              sample_branch;
>         u8                              sample_remote_access;
>         u8                              sample_memory;
> +       u8                              sample_instructions;
> +       u64                             instructions_sample_period;
>
>         u64                             l1d_miss_id;
>         u64                             l1d_access_id;
> @@ -68,6 +70,7 @@ struct arm_spe {
>         u64                             branch_miss_id;
>         u64                             remote_access_id;
>         u64                             memory_id;
> +       u64                             instructions_id;
>
>         u64                             kernel_start;
>
> @@ -90,6 +93,7 @@ struct arm_spe_queue {
>         u64                             time;
>         u64                             timestamp;
>         struct thread                   *thread;
> +       u64                             period_instructions;
>  };
>
>  static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
> @@ -202,6 +206,7 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
>         speq->pid = -1;
>         speq->tid = -1;
>         speq->cpu = -1;
> +       speq->period_instructions = 0;
>
>         /* params set */
>         params.get_trace = arm_spe_get_trace;
> @@ -351,6 +356,33 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
>         return arm_spe_deliver_synth_event(spe, speq, event, &sample);
>  }
>
> +static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
> +                                            u64 spe_events_id, u64 data_src)
> +{
> +       struct arm_spe *spe = speq->spe;
> +       struct arm_spe_record *record = &speq->decoder->record;
> +       union perf_event *event = speq->event_buf;
> +       struct perf_sample sample = { .ip = 0, };
> +
> +       /*
> +        * Handles perf instruction sampling period.
> +        */
> +       speq->period_instructions++;
> +       if (speq->period_instructions < spe->instructions_sample_period)
> +               return 0;
> +       speq->period_instructions = 0;
> +
> +       arm_spe_prep_sample(spe, speq, event, &sample);
> +
> +       sample.id = spe_events_id;
> +       sample.stream_id = spe_events_id;
> +       sample.addr = record->virt_addr;
> +       sample.phys_addr = record->phys_addr;
> +       sample.data_src = data_src;

I think it should set sample.period to spe->instructions_sample_period.

Also it can set sample.weight but I think we lost my patch

  https://lore.kernel.org/r/20211201220855.1260688-1-namhyung@kernel.org

Arnaldo, can you please take a look?

> +
> +       return arm_spe_deliver_synth_event(spe, speq, event, &sample);
> +}
> +
>  #define SPE_MEM_TYPE   (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
>                          ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
>                          ARM_SPE_REMOTE_ACCESS)
> @@ -480,6 +512,12 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
>                         return err;
>         }
>
> +       if (spe->sample_instructions) {
> +               err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
> +               if (err)
> +                       return err;
> +       }
> +
>         return 0;
>  }
>
> @@ -1107,6 +1145,26 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
>                         return err;
>                 spe->memory_id = id;
>                 arm_spe_set_event_name(evlist, id, "memory");
> +               id += 1;
> +       }
> +
> +       if (spe->synth_opts.instructions) {
> +               if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS)
> +                       return -EINVAL;
> +
> +               if (spe->synth_opts.period > 1)
> +                       pr_warning("Arm SPE has a hardware-based sample period.\n"
> +                                  "More instruction events will be discarded by --itrace\n");
> +
> +               spe->sample_instructions = true;
> +               attr.config = PERF_COUNT_HW_INSTRUCTIONS;
> +               attr.sample_period = spe->synth_opts.period;
> +               spe->instructions_sample_period = attr.sample_period;
> +               err = arm_spe_synth_event(session, &attr, id);
> +               if (err)
> +                       return err;
> +               spe->instructions_id = id;
> +               arm_spe_set_event_name(evlist, id, "instructions");

Yeah, I think it's a better name than "all". :)

Thanks,
Namhyung


>         }
>
>         return 0;
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] perf arm-spe: Synthesize SPE instruction events
@ 2021-12-16  1:18   ` Namhyung Kim
  0 siblings, 0 replies; 10+ messages in thread
From: Namhyung Kim @ 2021-12-16  1:18 UTC (permalink / raw)
  To: German Gomez
  Cc: linux-kernel, linux-perf-users, Arnaldo Carvalho de Melo,
	John Garry, Will Deacon, Mathieu Poirier, Leo Yan, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, linux-arm-kernel

Hello,

On Wed, Dec 15, 2021 at 10:46 AM German Gomez <german.gomez@arm.com> wrote:
>
> Synthesize instruction events per every decoded ARM SPE record.
>
> Because Arm SPE implements a hardware-based sample period, and perf
> implements a software-based one that gets applied on top, also add a
> warning to make the user aware.
>
> Signed-off-by: German Gomez <german.gomez@arm.com>
> ---
> Changes since v1 [https://lore.kernel.org/all/20211117142833.226629-1-german.gomez@arm.com]
>   - Generate events with "--itrace=i" instead of "--itrace=o".
>   - Generate events with virt_addr, phys_addr, and data_src values.
> ---
>  tools/perf/util/arm-spe.c | 58 +++++++++++++++++++++++++++++++++++++++
>  1 file changed, 58 insertions(+)
>
> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
> index fccac06b573a..879583822c8f 100644
> --- a/tools/perf/util/arm-spe.c
> +++ b/tools/perf/util/arm-spe.c
> @@ -58,6 +58,8 @@ struct arm_spe {
>         u8                              sample_branch;
>         u8                              sample_remote_access;
>         u8                              sample_memory;
> +       u8                              sample_instructions;
> +       u64                             instructions_sample_period;
>
>         u64                             l1d_miss_id;
>         u64                             l1d_access_id;
> @@ -68,6 +70,7 @@ struct arm_spe {
>         u64                             branch_miss_id;
>         u64                             remote_access_id;
>         u64                             memory_id;
> +       u64                             instructions_id;
>
>         u64                             kernel_start;
>
> @@ -90,6 +93,7 @@ struct arm_spe_queue {
>         u64                             time;
>         u64                             timestamp;
>         struct thread                   *thread;
> +       u64                             period_instructions;
>  };
>
>  static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
> @@ -202,6 +206,7 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
>         speq->pid = -1;
>         speq->tid = -1;
>         speq->cpu = -1;
> +       speq->period_instructions = 0;
>
>         /* params set */
>         params.get_trace = arm_spe_get_trace;
> @@ -351,6 +356,33 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
>         return arm_spe_deliver_synth_event(spe, speq, event, &sample);
>  }
>
> +static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
> +                                            u64 spe_events_id, u64 data_src)
> +{
> +       struct arm_spe *spe = speq->spe;
> +       struct arm_spe_record *record = &speq->decoder->record;
> +       union perf_event *event = speq->event_buf;
> +       struct perf_sample sample = { .ip = 0, };
> +
> +       /*
> +        * Handles perf instruction sampling period.
> +        */
> +       speq->period_instructions++;
> +       if (speq->period_instructions < spe->instructions_sample_period)
> +               return 0;
> +       speq->period_instructions = 0;
> +
> +       arm_spe_prep_sample(spe, speq, event, &sample);
> +
> +       sample.id = spe_events_id;
> +       sample.stream_id = spe_events_id;
> +       sample.addr = record->virt_addr;
> +       sample.phys_addr = record->phys_addr;
> +       sample.data_src = data_src;

I think it should set sample.period to spe->instructions_sample_period.

Also it can set sample.weight but I think we lost my patch

  https://lore.kernel.org/r/20211201220855.1260688-1-namhyung@kernel.org

Arnaldo, can you please take a look?

> +
> +       return arm_spe_deliver_synth_event(spe, speq, event, &sample);
> +}
> +
>  #define SPE_MEM_TYPE   (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
>                          ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
>                          ARM_SPE_REMOTE_ACCESS)
> @@ -480,6 +512,12 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
>                         return err;
>         }
>
> +       if (spe->sample_instructions) {
> +               err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
> +               if (err)
> +                       return err;
> +       }
> +
>         return 0;
>  }
>
> @@ -1107,6 +1145,26 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
>                         return err;
>                 spe->memory_id = id;
>                 arm_spe_set_event_name(evlist, id, "memory");
> +               id += 1;
> +       }
> +
> +       if (spe->synth_opts.instructions) {
> +               if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS)
> +                       return -EINVAL;
> +
> +               if (spe->synth_opts.period > 1)
> +                       pr_warning("Arm SPE has a hardware-based sample period.\n"
> +                                  "More instruction events will be discarded by --itrace\n");
> +
> +               spe->sample_instructions = true;
> +               attr.config = PERF_COUNT_HW_INSTRUCTIONS;
> +               attr.sample_period = spe->synth_opts.period;
> +               spe->instructions_sample_period = attr.sample_period;
> +               err = arm_spe_synth_event(session, &attr, id);
> +               if (err)
> +                       return err;
> +               spe->instructions_id = id;
> +               arm_spe_set_event_name(evlist, id, "instructions");

Yeah, I think it's a better name than "all". :)

Thanks,
Namhyung


>         }
>
>         return 0;
> --
> 2.25.1
>

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] perf arm-spe: Synthesize SPE instruction events
  2021-12-16  1:18   ` Namhyung Kim
@ 2021-12-16 11:11     ` German Gomez
  -1 siblings, 0 replies; 10+ messages in thread
From: German Gomez @ 2021-12-16 11:11 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: linux-kernel, linux-perf-users, Arnaldo Carvalho de Melo,
	John Garry, Will Deacon, Mathieu Poirier, Leo Yan, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, linux-arm-kernel

Hi Namhyung, thanks for your comments.

On 16/12/2021 01:18, Namhyung Kim wrote:
> Hello,
>
> On Wed, Dec 15, 2021 at 10:46 AM German Gomez <german.gomez@arm.com> wrote:
>> Synthesize instruction events per every decoded ARM SPE record.
>>
>> Because Arm SPE implements a hardware-based sample period, and perf
>> implements a software-based one that gets applied on top, also add a
>> warning to make the user aware.
>>
>> Signed-off-by: German Gomez <german.gomez@arm.com>
>> ---
>> Changes since v1 [https://lore.kernel.org/all/20211117142833.226629-1-german.gomez@arm.com]
>>   - Generate events with "--itrace=i" instead of "--itrace=o".
>>   - Generate events with virt_addr, phys_addr, and data_src values.
>> ---
>>  tools/perf/util/arm-spe.c | 58 +++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 58 insertions(+)
>>
>> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
>> index fccac06b573a..879583822c8f 100644
>> --- a/tools/perf/util/arm-spe.c
>> +++ b/tools/perf/util/arm-spe.c
>> @@ -58,6 +58,8 @@ struct arm_spe {
>>         u8                              sample_branch;
>>         u8                              sample_remote_access;
>>         u8                              sample_memory;
>> +       u8                              sample_instructions;
>> +       u64                             instructions_sample_period;
>>
>>         u64                             l1d_miss_id;
>>         u64                             l1d_access_id;
>> @@ -68,6 +70,7 @@ struct arm_spe {
>>         u64                             branch_miss_id;
>>         u64                             remote_access_id;
>>         u64                             memory_id;
>> +       u64                             instructions_id;
>>
>>         u64                             kernel_start;
>>
>> @@ -90,6 +93,7 @@ struct arm_spe_queue {
>>         u64                             time;
>>         u64                             timestamp;
>>         struct thread                   *thread;
>> +       u64                             period_instructions;
>>  };
>>
>>  static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
>> @@ -202,6 +206,7 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
>>         speq->pid = -1;
>>         speq->tid = -1;
>>         speq->cpu = -1;
>> +       speq->period_instructions = 0;
>>
>>         /* params set */
>>         params.get_trace = arm_spe_get_trace;
>> @@ -351,6 +356,33 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
>>         return arm_spe_deliver_synth_event(spe, speq, event, &sample);
>>  }
>>
>> +static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
>> +                                            u64 spe_events_id, u64 data_src)
>> +{
>> +       struct arm_spe *spe = speq->spe;
>> +       struct arm_spe_record *record = &speq->decoder->record;
>> +       union perf_event *event = speq->event_buf;
>> +       struct perf_sample sample = { .ip = 0, };
>> +
>> +       /*
>> +        * Handles perf instruction sampling period.
>> +        */
>> +       speq->period_instructions++;
>> +       if (speq->period_instructions < spe->instructions_sample_period)
>> +               return 0;
>> +       speq->period_instructions = 0;
>> +
>> +       arm_spe_prep_sample(spe, speq, event, &sample);
>> +
>> +       sample.id = spe_events_id;
>> +       sample.stream_id = spe_events_id;
>> +       sample.addr = record->virt_addr;
>> +       sample.phys_addr = record->phys_addr;
>> +       sample.data_src = data_src;
> I think it should set sample.period to spe->instructions_sample_period.

Ack!

>
> Also it can set sample.weight but I think we lost my patch

I forgot to apply your latency patch first. I will add it.

Thanks,
German

>
>   https://lore.kernel.org/r/20211201220855.1260688-1-namhyung@kernel.org
>
> Arnaldo, can you please take a look?
>
>> +
>> +       return arm_spe_deliver_synth_event(spe, speq, event, &sample);
>> +}
>> +
>>  #define SPE_MEM_TYPE   (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
>>                          ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
>>                          ARM_SPE_REMOTE_ACCESS)
>> @@ -480,6 +512,12 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
>>                         return err;
>>         }
>>
>> +       if (spe->sample_instructions) {
>> +               err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
>> +               if (err)
>> +                       return err;
>> +       }
>> +
>>         return 0;
>>  }
>>
>> @@ -1107,6 +1145,26 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
>>                         return err;
>>                 spe->memory_id = id;
>>                 arm_spe_set_event_name(evlist, id, "memory");
>> +               id += 1;
>> +       }
>> +
>> +       if (spe->synth_opts.instructions) {
>> +               if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS)
>> +                       return -EINVAL;
>> +
>> +               if (spe->synth_opts.period > 1)
>> +                       pr_warning("Arm SPE has a hardware-based sample period.\n"
>> +                                  "More instruction events will be discarded by --itrace\n");
>> +
>> +               spe->sample_instructions = true;
>> +               attr.config = PERF_COUNT_HW_INSTRUCTIONS;
>> +               attr.sample_period = spe->synth_opts.period;
>> +               spe->instructions_sample_period = attr.sample_period;
>> +               err = arm_spe_synth_event(session, &attr, id);
>> +               if (err)
>> +                       return err;
>> +               spe->instructions_id = id;
>> +               arm_spe_set_event_name(evlist, id, "instructions");
> Yeah, I think it's a better name than "all". :)
>
> Thanks,
> Namhyung
>
>
>>         }
>>
>>         return 0;
>> --
>> 2.25.1
>>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] perf arm-spe: Synthesize SPE instruction events
@ 2021-12-16 11:11     ` German Gomez
  0 siblings, 0 replies; 10+ messages in thread
From: German Gomez @ 2021-12-16 11:11 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: linux-kernel, linux-perf-users, Arnaldo Carvalho de Melo,
	John Garry, Will Deacon, Mathieu Poirier, Leo Yan, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, linux-arm-kernel

Hi Namhyung, thanks for your comments.

On 16/12/2021 01:18, Namhyung Kim wrote:
> Hello,
>
> On Wed, Dec 15, 2021 at 10:46 AM German Gomez <german.gomez@arm.com> wrote:
>> Synthesize instruction events per every decoded ARM SPE record.
>>
>> Because Arm SPE implements a hardware-based sample period, and perf
>> implements a software-based one that gets applied on top, also add a
>> warning to make the user aware.
>>
>> Signed-off-by: German Gomez <german.gomez@arm.com>
>> ---
>> Changes since v1 [https://lore.kernel.org/all/20211117142833.226629-1-german.gomez@arm.com]
>>   - Generate events with "--itrace=i" instead of "--itrace=o".
>>   - Generate events with virt_addr, phys_addr, and data_src values.
>> ---
>>  tools/perf/util/arm-spe.c | 58 +++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 58 insertions(+)
>>
>> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
>> index fccac06b573a..879583822c8f 100644
>> --- a/tools/perf/util/arm-spe.c
>> +++ b/tools/perf/util/arm-spe.c
>> @@ -58,6 +58,8 @@ struct arm_spe {
>>         u8                              sample_branch;
>>         u8                              sample_remote_access;
>>         u8                              sample_memory;
>> +       u8                              sample_instructions;
>> +       u64                             instructions_sample_period;
>>
>>         u64                             l1d_miss_id;
>>         u64                             l1d_access_id;
>> @@ -68,6 +70,7 @@ struct arm_spe {
>>         u64                             branch_miss_id;
>>         u64                             remote_access_id;
>>         u64                             memory_id;
>> +       u64                             instructions_id;
>>
>>         u64                             kernel_start;
>>
>> @@ -90,6 +93,7 @@ struct arm_spe_queue {
>>         u64                             time;
>>         u64                             timestamp;
>>         struct thread                   *thread;
>> +       u64                             period_instructions;
>>  };
>>
>>  static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
>> @@ -202,6 +206,7 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
>>         speq->pid = -1;
>>         speq->tid = -1;
>>         speq->cpu = -1;
>> +       speq->period_instructions = 0;
>>
>>         /* params set */
>>         params.get_trace = arm_spe_get_trace;
>> @@ -351,6 +356,33 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
>>         return arm_spe_deliver_synth_event(spe, speq, event, &sample);
>>  }
>>
>> +static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
>> +                                            u64 spe_events_id, u64 data_src)
>> +{
>> +       struct arm_spe *spe = speq->spe;
>> +       struct arm_spe_record *record = &speq->decoder->record;
>> +       union perf_event *event = speq->event_buf;
>> +       struct perf_sample sample = { .ip = 0, };
>> +
>> +       /*
>> +        * Handles perf instruction sampling period.
>> +        */
>> +       speq->period_instructions++;
>> +       if (speq->period_instructions < spe->instructions_sample_period)
>> +               return 0;
>> +       speq->period_instructions = 0;
>> +
>> +       arm_spe_prep_sample(spe, speq, event, &sample);
>> +
>> +       sample.id = spe_events_id;
>> +       sample.stream_id = spe_events_id;
>> +       sample.addr = record->virt_addr;
>> +       sample.phys_addr = record->phys_addr;
>> +       sample.data_src = data_src;
> I think it should set sample.period to spe->instructions_sample_period.

Ack!

>
> Also it can set sample.weight but I think we lost my patch

I forgot to apply your latency patch first. I will add it.

Thanks,
German

>
>   https://lore.kernel.org/r/20211201220855.1260688-1-namhyung@kernel.org
>
> Arnaldo, can you please take a look?
>
>> +
>> +       return arm_spe_deliver_synth_event(spe, speq, event, &sample);
>> +}
>> +
>>  #define SPE_MEM_TYPE   (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
>>                          ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
>>                          ARM_SPE_REMOTE_ACCESS)
>> @@ -480,6 +512,12 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
>>                         return err;
>>         }
>>
>> +       if (spe->sample_instructions) {
>> +               err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
>> +               if (err)
>> +                       return err;
>> +       }
>> +
>>         return 0;
>>  }
>>
>> @@ -1107,6 +1145,26 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
>>                         return err;
>>                 spe->memory_id = id;
>>                 arm_spe_set_event_name(evlist, id, "memory");
>> +               id += 1;
>> +       }
>> +
>> +       if (spe->synth_opts.instructions) {
>> +               if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS)
>> +                       return -EINVAL;
>> +
>> +               if (spe->synth_opts.period > 1)
>> +                       pr_warning("Arm SPE has a hardware-based sample period.\n"
>> +                                  "More instruction events will be discarded by --itrace\n");
>> +
>> +               spe->sample_instructions = true;
>> +               attr.config = PERF_COUNT_HW_INSTRUCTIONS;
>> +               attr.sample_period = spe->synth_opts.period;
>> +               spe->instructions_sample_period = attr.sample_period;
>> +               err = arm_spe_synth_event(session, &attr, id);
>> +               if (err)
>> +                       return err;
>> +               spe->instructions_id = id;
>> +               arm_spe_set_event_name(evlist, id, "instructions");
> Yeah, I think it's a better name than "all". :)
>
> Thanks,
> Namhyung
>
>
>>         }
>>
>>         return 0;
>> --
>> 2.25.1
>>

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] perf arm-spe: Synthesize SPE instruction events
  2021-12-15 18:46 ` German Gomez
@ 2021-12-16 11:11   ` Leo Yan
  -1 siblings, 0 replies; 10+ messages in thread
From: Leo Yan @ 2021-12-16 11:11 UTC (permalink / raw)
  To: German Gomez
  Cc: linux-kernel, linux-perf-users, acme, John Garry, Will Deacon,
	Mathieu Poirier, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Namhyung Kim, linux-arm-kernel

Hi German,

On Wed, Dec 15, 2021 at 06:46:05PM +0000, German Gomez wrote:
> Synthesize instruction events per every decoded ARM SPE record.
> 
> Because Arm SPE implements a hardware-based sample period, and perf
> implements a software-based one that gets applied on top, also add a
> warning to make the user aware.
> 
> Signed-off-by: German Gomez <german.gomez@arm.com>
> ---
> Changes since v1 [https://lore.kernel.org/all/20211117142833.226629-1-german.gomez@arm.com]
>   - Generate events with "--itrace=i" instead of "--itrace=o".
>   - Generate events with virt_addr, phys_addr, and data_src values.
> ---
>  tools/perf/util/arm-spe.c | 58 +++++++++++++++++++++++++++++++++++++++
>  1 file changed, 58 insertions(+)
> 
> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
> index fccac06b573a..879583822c8f 100644
> --- a/tools/perf/util/arm-spe.c
> +++ b/tools/perf/util/arm-spe.c
> @@ -58,6 +58,8 @@ struct arm_spe {
>  	u8				sample_branch;
>  	u8				sample_remote_access;
>  	u8				sample_memory;
> +	u8				sample_instructions;
> +	u64				instructions_sample_period;
>  
>  	u64				l1d_miss_id;
>  	u64				l1d_access_id;
> @@ -68,6 +70,7 @@ struct arm_spe {
>  	u64				branch_miss_id;
>  	u64				remote_access_id;
>  	u64				memory_id;
> +	u64				instructions_id;
>  
>  	u64				kernel_start;
>  
> @@ -90,6 +93,7 @@ struct arm_spe_queue {
>  	u64				time;
>  	u64				timestamp;
>  	struct thread			*thread;
> +	u64				period_instructions;
>  };
>  
>  static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
> @@ -202,6 +206,7 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
>  	speq->pid = -1;
>  	speq->tid = -1;
>  	speq->cpu = -1;
> +	speq->period_instructions = 0;
>  
>  	/* params set */
>  	params.get_trace = arm_spe_get_trace;
> @@ -351,6 +356,33 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
>  	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
>  }
>  
> +static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
> +					     u64 spe_events_id, u64 data_src)
> +{
> +	struct arm_spe *spe = speq->spe;
> +	struct arm_spe_record *record = &speq->decoder->record;
> +	union perf_event *event = speq->event_buf;
> +	struct perf_sample sample = { .ip = 0, };
> +
> +	/*
> +	 * Handles perf instruction sampling period.
> +	 */
> +	speq->period_instructions++;
> +	if (speq->period_instructions < spe->instructions_sample_period)
> +		return 0;
> +	speq->period_instructions = 0;
> +
> +	arm_spe_prep_sample(spe, speq, event, &sample);
> +
> +	sample.id = spe_events_id;
> +	sample.stream_id = spe_events_id;
> +	sample.addr = record->virt_addr;
> +	sample.phys_addr = record->phys_addr;
> +	sample.data_src = data_src;
> +
> +	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
> +}
> +
>  #define SPE_MEM_TYPE	(ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
>  			 ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
>  			 ARM_SPE_REMOTE_ACCESS)
> @@ -480,6 +512,12 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
>  			return err;
>  	}
>  
> +	if (spe->sample_instructions) {
> +		err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
> +		if (err)
> +			return err;
> +	}
> +
>  	return 0;
>  }
>  
> @@ -1107,6 +1145,26 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
>  			return err;
>  		spe->memory_id = id;
>  		arm_spe_set_event_name(evlist, id, "memory");
> +		id += 1;
> +	}
> +
> +	if (spe->synth_opts.instructions) {
> +		if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS)
> +			return -EINVAL;

Will this break some perf commands, like "perf report" or other perf
report commands?

See the the function arm_spe_process_auxtrace_info(), it invokes [1]:

    itrace_synth_opts__set_default(&spe->synth_opts, false);

So synth_opts.instructions is initialized to true,
synth_opts.period_type is set to PERF_ITRACE_DEFAULT_PERIOD_TYPE (2),
so the perf tool will directly bail out when synth_opts.period_type is
not equal to PERF_ITRACE_PERIOD_INSTRUCTIONS.

If we only support period type PERF_ITRACE_PERIOD_INSTRUCTIONS at
current stage, I think we use the checking like below:

  if (spe->synth_opts.instructions &&
      (spe->synth_opts.period_type == PERF_ITRACE_PERIOD_INSTRUCTIONS)) {

     ...
  }

[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/util/arm-spe.c#n1180

> +
> +		if (spe->synth_opts.period > 1)
> +			pr_warning("Arm SPE has a hardware-based sample period.\n"
> +				   "More instruction events will be discarded by --itrace\n");

Okay, Since Arm SPE is statistical profiling, so this is the right thing to
do.

Please also address Namhyung's two comments, the rest of this patch looks
good to me.

Thanks,
Leo

> +
> +		spe->sample_instructions = true;
> +		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
> +		attr.sample_period = spe->synth_opts.period;
> +		spe->instructions_sample_period = attr.sample_period;
> +		err = arm_spe_synth_event(session, &attr, id);
> +		if (err)
> +			return err;
> +		spe->instructions_id = id;
> +		arm_spe_set_event_name(evlist, id, "instructions");
>  	}
>  
>  	return 0;
> -- 
> 2.25.1
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] perf arm-spe: Synthesize SPE instruction events
@ 2021-12-16 11:11   ` Leo Yan
  0 siblings, 0 replies; 10+ messages in thread
From: Leo Yan @ 2021-12-16 11:11 UTC (permalink / raw)
  To: German Gomez
  Cc: linux-kernel, linux-perf-users, acme, John Garry, Will Deacon,
	Mathieu Poirier, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Namhyung Kim, linux-arm-kernel

Hi German,

On Wed, Dec 15, 2021 at 06:46:05PM +0000, German Gomez wrote:
> Synthesize instruction events per every decoded ARM SPE record.
> 
> Because Arm SPE implements a hardware-based sample period, and perf
> implements a software-based one that gets applied on top, also add a
> warning to make the user aware.
> 
> Signed-off-by: German Gomez <german.gomez@arm.com>
> ---
> Changes since v1 [https://lore.kernel.org/all/20211117142833.226629-1-german.gomez@arm.com]
>   - Generate events with "--itrace=i" instead of "--itrace=o".
>   - Generate events with virt_addr, phys_addr, and data_src values.
> ---
>  tools/perf/util/arm-spe.c | 58 +++++++++++++++++++++++++++++++++++++++
>  1 file changed, 58 insertions(+)
> 
> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
> index fccac06b573a..879583822c8f 100644
> --- a/tools/perf/util/arm-spe.c
> +++ b/tools/perf/util/arm-spe.c
> @@ -58,6 +58,8 @@ struct arm_spe {
>  	u8				sample_branch;
>  	u8				sample_remote_access;
>  	u8				sample_memory;
> +	u8				sample_instructions;
> +	u64				instructions_sample_period;
>  
>  	u64				l1d_miss_id;
>  	u64				l1d_access_id;
> @@ -68,6 +70,7 @@ struct arm_spe {
>  	u64				branch_miss_id;
>  	u64				remote_access_id;
>  	u64				memory_id;
> +	u64				instructions_id;
>  
>  	u64				kernel_start;
>  
> @@ -90,6 +93,7 @@ struct arm_spe_queue {
>  	u64				time;
>  	u64				timestamp;
>  	struct thread			*thread;
> +	u64				period_instructions;
>  };
>  
>  static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
> @@ -202,6 +206,7 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
>  	speq->pid = -1;
>  	speq->tid = -1;
>  	speq->cpu = -1;
> +	speq->period_instructions = 0;
>  
>  	/* params set */
>  	params.get_trace = arm_spe_get_trace;
> @@ -351,6 +356,33 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
>  	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
>  }
>  
> +static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
> +					     u64 spe_events_id, u64 data_src)
> +{
> +	struct arm_spe *spe = speq->spe;
> +	struct arm_spe_record *record = &speq->decoder->record;
> +	union perf_event *event = speq->event_buf;
> +	struct perf_sample sample = { .ip = 0, };
> +
> +	/*
> +	 * Handles perf instruction sampling period.
> +	 */
> +	speq->period_instructions++;
> +	if (speq->period_instructions < spe->instructions_sample_period)
> +		return 0;
> +	speq->period_instructions = 0;
> +
> +	arm_spe_prep_sample(spe, speq, event, &sample);
> +
> +	sample.id = spe_events_id;
> +	sample.stream_id = spe_events_id;
> +	sample.addr = record->virt_addr;
> +	sample.phys_addr = record->phys_addr;
> +	sample.data_src = data_src;
> +
> +	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
> +}
> +
>  #define SPE_MEM_TYPE	(ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
>  			 ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
>  			 ARM_SPE_REMOTE_ACCESS)
> @@ -480,6 +512,12 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
>  			return err;
>  	}
>  
> +	if (spe->sample_instructions) {
> +		err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
> +		if (err)
> +			return err;
> +	}
> +
>  	return 0;
>  }
>  
> @@ -1107,6 +1145,26 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
>  			return err;
>  		spe->memory_id = id;
>  		arm_spe_set_event_name(evlist, id, "memory");
> +		id += 1;
> +	}
> +
> +	if (spe->synth_opts.instructions) {
> +		if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS)
> +			return -EINVAL;

Will this break some perf commands, like "perf report" or other perf
report commands?

See the the function arm_spe_process_auxtrace_info(), it invokes [1]:

    itrace_synth_opts__set_default(&spe->synth_opts, false);

So synth_opts.instructions is initialized to true,
synth_opts.period_type is set to PERF_ITRACE_DEFAULT_PERIOD_TYPE (2),
so the perf tool will directly bail out when synth_opts.period_type is
not equal to PERF_ITRACE_PERIOD_INSTRUCTIONS.

If we only support period type PERF_ITRACE_PERIOD_INSTRUCTIONS at
current stage, I think we use the checking like below:

  if (spe->synth_opts.instructions &&
      (spe->synth_opts.period_type == PERF_ITRACE_PERIOD_INSTRUCTIONS)) {

     ...
  }

[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/util/arm-spe.c#n1180

> +
> +		if (spe->synth_opts.period > 1)
> +			pr_warning("Arm SPE has a hardware-based sample period.\n"
> +				   "More instruction events will be discarded by --itrace\n");

Okay, Since Arm SPE is statistical profiling, so this is the right thing to
do.

Please also address Namhyung's two comments, the rest of this patch looks
good to me.

Thanks,
Leo

> +
> +		spe->sample_instructions = true;
> +		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
> +		attr.sample_period = spe->synth_opts.period;
> +		spe->instructions_sample_period = attr.sample_period;
> +		err = arm_spe_synth_event(session, &attr, id);
> +		if (err)
> +			return err;
> +		spe->instructions_id = id;
> +		arm_spe_set_event_name(evlist, id, "instructions");
>  	}
>  
>  	return 0;
> -- 
> 2.25.1
> 

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] perf arm-spe: Synthesize SPE instruction events
  2021-12-16 11:11   ` Leo Yan
@ 2021-12-16 15:00     ` German Gomez
  -1 siblings, 0 replies; 10+ messages in thread
From: German Gomez @ 2021-12-16 15:00 UTC (permalink / raw)
  To: Leo Yan
  Cc: linux-kernel, linux-perf-users, acme, John Garry, Will Deacon,
	Mathieu Poirier, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Namhyung Kim, linux-arm-kernel

Hi Leo,

Thanks for your comments. I'll send a revised patch shortly.

Thanks,
German

On 16/12/2021 11:11, Leo Yan wrote:
> Hi German,
>
> On Wed, Dec 15, 2021 at 06:46:05PM +0000, German Gomez wrote:
>> Synthesize instruction events per every decoded ARM SPE record.
>>
>> Because Arm SPE implements a hardware-based sample period, and perf
>> implements a software-based one that gets applied on top, also add a
>> warning to make the user aware.
>>
>> Signed-off-by: German Gomez <german.gomez@arm.com>
>> ---
>> Changes since v1 [https://lore.kernel.org/all/20211117142833.226629-1-german.gomez@arm.com]
>>   - Generate events with "--itrace=i" instead of "--itrace=o".
>>   - Generate events with virt_addr, phys_addr, and data_src values.
>> ---
>>  tools/perf/util/arm-spe.c | 58 +++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 58 insertions(+)
>>
>> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
>> index fccac06b573a..879583822c8f 100644
>> --- a/tools/perf/util/arm-spe.c
>> +++ b/tools/perf/util/arm-spe.c
>> @@ -58,6 +58,8 @@ struct arm_spe {
>>  	u8				sample_branch;
>>  	u8				sample_remote_access;
>>  	u8				sample_memory;
>> +	u8				sample_instructions;
>> +	u64				instructions_sample_period;
>>  
>>  	u64				l1d_miss_id;
>>  	u64				l1d_access_id;
>> @@ -68,6 +70,7 @@ struct arm_spe {
>>  	u64				branch_miss_id;
>>  	u64				remote_access_id;
>>  	u64				memory_id;
>> +	u64				instructions_id;
>>  
>>  	u64				kernel_start;
>>  
>> @@ -90,6 +93,7 @@ struct arm_spe_queue {
>>  	u64				time;
>>  	u64				timestamp;
>>  	struct thread			*thread;
>> +	u64				period_instructions;
>>  };
>>  
>>  static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
>> @@ -202,6 +206,7 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
>>  	speq->pid = -1;
>>  	speq->tid = -1;
>>  	speq->cpu = -1;
>> +	speq->period_instructions = 0;
>>  
>>  	/* params set */
>>  	params.get_trace = arm_spe_get_trace;
>> @@ -351,6 +356,33 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
>>  	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
>>  }
>>  
>> +static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
>> +					     u64 spe_events_id, u64 data_src)
>> +{
>> +	struct arm_spe *spe = speq->spe;
>> +	struct arm_spe_record *record = &speq->decoder->record;
>> +	union perf_event *event = speq->event_buf;
>> +	struct perf_sample sample = { .ip = 0, };
>> +
>> +	/*
>> +	 * Handles perf instruction sampling period.
>> +	 */
>> +	speq->period_instructions++;
>> +	if (speq->period_instructions < spe->instructions_sample_period)
>> +		return 0;
>> +	speq->period_instructions = 0;
>> +
>> +	arm_spe_prep_sample(spe, speq, event, &sample);
>> +
>> +	sample.id = spe_events_id;
>> +	sample.stream_id = spe_events_id;
>> +	sample.addr = record->virt_addr;
>> +	sample.phys_addr = record->phys_addr;
>> +	sample.data_src = data_src;
>> +
>> +	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
>> +}
>> +
>>  #define SPE_MEM_TYPE	(ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
>>  			 ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
>>  			 ARM_SPE_REMOTE_ACCESS)
>> @@ -480,6 +512,12 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
>>  			return err;
>>  	}
>>  
>> +	if (spe->sample_instructions) {
>> +		err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
>> +		if (err)
>> +			return err;
>> +	}
>> +
>>  	return 0;
>>  }
>>  
>> @@ -1107,6 +1145,26 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
>>  			return err;
>>  		spe->memory_id = id;
>>  		arm_spe_set_event_name(evlist, id, "memory");
>> +		id += 1;
>> +	}
>> +
>> +	if (spe->synth_opts.instructions) {
>> +		if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS)
>> +			return -EINVAL;
> Will this break some perf commands, like "perf report" or other perf
> report commands?
>
> See the the function arm_spe_process_auxtrace_info(), it invokes [1]:
>
>     itrace_synth_opts__set_default(&spe->synth_opts, false);
>
> So synth_opts.instructions is initialized to true,
> synth_opts.period_type is set to PERF_ITRACE_DEFAULT_PERIOD_TYPE (2),
> so the perf tool will directly bail out when synth_opts.period_type is
> not equal to PERF_ITRACE_PERIOD_INSTRUCTIONS.
>
> If we only support period type PERF_ITRACE_PERIOD_INSTRUCTIONS at
> current stage, I think we use the checking like below:
>
>   if (spe->synth_opts.instructions &&
>       (spe->synth_opts.period_type == PERF_ITRACE_PERIOD_INSTRUCTIONS)) {
>
>      ...
>   }
>
> [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/util/arm-spe.c#n1180
>
>> +
>> +		if (spe->synth_opts.period > 1)
>> +			pr_warning("Arm SPE has a hardware-based sample period.\n"
>> +				   "More instruction events will be discarded by --itrace\n");
> Okay, Since Arm SPE is statistical profiling, so this is the right thing to
> do.
>
> Please also address Namhyung's two comments, the rest of this patch looks
> good to me.
>
> Thanks,
> Leo
>
>> +
>> +		spe->sample_instructions = true;
>> +		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
>> +		attr.sample_period = spe->synth_opts.period;
>> +		spe->instructions_sample_period = attr.sample_period;
>> +		err = arm_spe_synth_event(session, &attr, id);
>> +		if (err)
>> +			return err;
>> +		spe->instructions_id = id;
>> +		arm_spe_set_event_name(evlist, id, "instructions");
>>  	}
>>  
>>  	return 0;
>> -- 
>> 2.25.1
>>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] perf arm-spe: Synthesize SPE instruction events
@ 2021-12-16 15:00     ` German Gomez
  0 siblings, 0 replies; 10+ messages in thread
From: German Gomez @ 2021-12-16 15:00 UTC (permalink / raw)
  To: Leo Yan
  Cc: linux-kernel, linux-perf-users, acme, John Garry, Will Deacon,
	Mathieu Poirier, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Namhyung Kim, linux-arm-kernel

Hi Leo,

Thanks for your comments. I'll send a revised patch shortly.

Thanks,
German

On 16/12/2021 11:11, Leo Yan wrote:
> Hi German,
>
> On Wed, Dec 15, 2021 at 06:46:05PM +0000, German Gomez wrote:
>> Synthesize instruction events per every decoded ARM SPE record.
>>
>> Because Arm SPE implements a hardware-based sample period, and perf
>> implements a software-based one that gets applied on top, also add a
>> warning to make the user aware.
>>
>> Signed-off-by: German Gomez <german.gomez@arm.com>
>> ---
>> Changes since v1 [https://lore.kernel.org/all/20211117142833.226629-1-german.gomez@arm.com]
>>   - Generate events with "--itrace=i" instead of "--itrace=o".
>>   - Generate events with virt_addr, phys_addr, and data_src values.
>> ---
>>  tools/perf/util/arm-spe.c | 58 +++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 58 insertions(+)
>>
>> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
>> index fccac06b573a..879583822c8f 100644
>> --- a/tools/perf/util/arm-spe.c
>> +++ b/tools/perf/util/arm-spe.c
>> @@ -58,6 +58,8 @@ struct arm_spe {
>>  	u8				sample_branch;
>>  	u8				sample_remote_access;
>>  	u8				sample_memory;
>> +	u8				sample_instructions;
>> +	u64				instructions_sample_period;
>>  
>>  	u64				l1d_miss_id;
>>  	u64				l1d_access_id;
>> @@ -68,6 +70,7 @@ struct arm_spe {
>>  	u64				branch_miss_id;
>>  	u64				remote_access_id;
>>  	u64				memory_id;
>> +	u64				instructions_id;
>>  
>>  	u64				kernel_start;
>>  
>> @@ -90,6 +93,7 @@ struct arm_spe_queue {
>>  	u64				time;
>>  	u64				timestamp;
>>  	struct thread			*thread;
>> +	u64				period_instructions;
>>  };
>>  
>>  static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
>> @@ -202,6 +206,7 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
>>  	speq->pid = -1;
>>  	speq->tid = -1;
>>  	speq->cpu = -1;
>> +	speq->period_instructions = 0;
>>  
>>  	/* params set */
>>  	params.get_trace = arm_spe_get_trace;
>> @@ -351,6 +356,33 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
>>  	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
>>  }
>>  
>> +static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
>> +					     u64 spe_events_id, u64 data_src)
>> +{
>> +	struct arm_spe *spe = speq->spe;
>> +	struct arm_spe_record *record = &speq->decoder->record;
>> +	union perf_event *event = speq->event_buf;
>> +	struct perf_sample sample = { .ip = 0, };
>> +
>> +	/*
>> +	 * Handles perf instruction sampling period.
>> +	 */
>> +	speq->period_instructions++;
>> +	if (speq->period_instructions < spe->instructions_sample_period)
>> +		return 0;
>> +	speq->period_instructions = 0;
>> +
>> +	arm_spe_prep_sample(spe, speq, event, &sample);
>> +
>> +	sample.id = spe_events_id;
>> +	sample.stream_id = spe_events_id;
>> +	sample.addr = record->virt_addr;
>> +	sample.phys_addr = record->phys_addr;
>> +	sample.data_src = data_src;
>> +
>> +	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
>> +}
>> +
>>  #define SPE_MEM_TYPE	(ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
>>  			 ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
>>  			 ARM_SPE_REMOTE_ACCESS)
>> @@ -480,6 +512,12 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
>>  			return err;
>>  	}
>>  
>> +	if (spe->sample_instructions) {
>> +		err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
>> +		if (err)
>> +			return err;
>> +	}
>> +
>>  	return 0;
>>  }
>>  
>> @@ -1107,6 +1145,26 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
>>  			return err;
>>  		spe->memory_id = id;
>>  		arm_spe_set_event_name(evlist, id, "memory");
>> +		id += 1;
>> +	}
>> +
>> +	if (spe->synth_opts.instructions) {
>> +		if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS)
>> +			return -EINVAL;
> Will this break some perf commands, like "perf report" or other perf
> report commands?
>
> See the the function arm_spe_process_auxtrace_info(), it invokes [1]:
>
>     itrace_synth_opts__set_default(&spe->synth_opts, false);
>
> So synth_opts.instructions is initialized to true,
> synth_opts.period_type is set to PERF_ITRACE_DEFAULT_PERIOD_TYPE (2),
> so the perf tool will directly bail out when synth_opts.period_type is
> not equal to PERF_ITRACE_PERIOD_INSTRUCTIONS.
>
> If we only support period type PERF_ITRACE_PERIOD_INSTRUCTIONS at
> current stage, I think we use the checking like below:
>
>   if (spe->synth_opts.instructions &&
>       (spe->synth_opts.period_type == PERF_ITRACE_PERIOD_INSTRUCTIONS)) {
>
>      ...
>   }
>
> [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/util/arm-spe.c#n1180
>
>> +
>> +		if (spe->synth_opts.period > 1)
>> +			pr_warning("Arm SPE has a hardware-based sample period.\n"
>> +				   "More instruction events will be discarded by --itrace\n");
> Okay, Since Arm SPE is statistical profiling, so this is the right thing to
> do.
>
> Please also address Namhyung's two comments, the rest of this patch looks
> good to me.
>
> Thanks,
> Leo
>
>> +
>> +		spe->sample_instructions = true;
>> +		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
>> +		attr.sample_period = spe->synth_opts.period;
>> +		spe->instructions_sample_period = attr.sample_period;
>> +		err = arm_spe_synth_event(session, &attr, id);
>> +		if (err)
>> +			return err;
>> +		spe->instructions_id = id;
>> +		arm_spe_set_event_name(evlist, id, "instructions");
>>  	}
>>  
>>  	return 0;
>> -- 
>> 2.25.1
>>

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-12-16 15:01 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-15 18:46 [PATCH v2] perf arm-spe: Synthesize SPE instruction events German Gomez
2021-12-15 18:46 ` German Gomez
2021-12-16  1:18 ` Namhyung Kim
2021-12-16  1:18   ` Namhyung Kim
2021-12-16 11:11   ` German Gomez
2021-12-16 11:11     ` German Gomez
2021-12-16 11:11 ` Leo Yan
2021-12-16 11:11   ` Leo Yan
2021-12-16 15:00   ` German Gomez
2021-12-16 15:00     ` German Gomez

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.