All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] powerpc/perf: Fix cycles/instructions as PM_CYC/PM_INST_CMPL in power10
@ 2021-07-07  6:39 Athira Rajeev
  2021-07-08 12:56 ` Nicholas Piggin
  0 siblings, 1 reply; 5+ messages in thread
From: Athira Rajeev @ 2021-07-07  6:39 UTC (permalink / raw)
  To: mpe; +Cc: maddy, linuxppc-dev

From: Athira Rajeev <atrajeev@linux.vnet.ibm.cm>

Power10 performance monitoring unit (PMU) driver uses performance
monitor counter 5 (PMC5) and performance monitor counter 6 (PMC6)
for counting instructions and cycles. Event used for cycles is
PM_RUN_CYC and instructions is PM_RUN_INST_CMPL. But counting of these
events in wait state is controlled by the CC56RUN bit setting in
Monitor Mode Control Register0 (MMCR0). If the CC56RUN bit is not
set, PMC5/6 will not count when CTRL[RUN] is zero.

Patch sets the CC56RUN bit in MMCR0 for power10 which makes PMC5 and
PMC6 count instructions and cycles regardless of the run bit. With this
change, these events are also now renamed to PM_CYC and PM_INST_CMPL
rather than PM_RUN_CYC and PM_RUN_INST_CMPL.

Fixes: a64e697cef23 ("powerpc/perf: power10 Performance Monitoring support")
Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.cm>
Reviewed-by: Madhavan Srinivasan <maddy@linux.ibm.com>
---
Notes on testing done for this change:
 Tested this patch change with a kernel module that
 turns off and turns on the runlatch. kernel module also
 reads the counter values for PMC5 and PMC6 during the
 period when runlatch is off.
 - Started PMU counters via "perf stat" and loaded the
   test module.
 - Checked the counter values captured from module during
   the runlatch off period.
 - Verified that counters were frozen without the patch and
   with the patch, observed counters were incrementing.

 arch/powerpc/perf/power10-events-list.h |  8 +++---
 arch/powerpc/perf/power10-pmu.c         | 44 +++++++++++++++++++++++----------
 2 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h
index 93be719..564f1409 100644
--- a/arch/powerpc/perf/power10-events-list.h
+++ b/arch/powerpc/perf/power10-events-list.h
@@ -9,10 +9,10 @@
 /*
  * Power10 event codes.
  */
-EVENT(PM_RUN_CYC,				0x600f4);
+EVENT(PM_CYC,				0x600f4);
 EVENT(PM_DISP_STALL_CYC,			0x100f8);
 EVENT(PM_EXEC_STALL,				0x30008);
-EVENT(PM_RUN_INST_CMPL,				0x500fa);
+EVENT(PM_INST_CMPL,				0x500fa);
 EVENT(PM_BR_CMPL,                               0x4d05e);
 EVENT(PM_BR_MPRED_CMPL,                         0x400f6);
 EVENT(PM_BR_FIN,				0x2f04a);
@@ -50,8 +50,8 @@
 /* ITLB Reloaded */
 EVENT(PM_ITLB_MISS,				0x400fc);
 
-EVENT(PM_RUN_CYC_ALT,				0x0001e);
-EVENT(PM_RUN_INST_CMPL_ALT,			0x00002);
+EVENT(PM_CYC_ALT,				0x0001e);
+EVENT(PM_INST_CMPL_ALT,				0x00002);
 
 /*
  * Memory Access Events
diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
index f9d64c6..9dd75f3 100644
--- a/arch/powerpc/perf/power10-pmu.c
+++ b/arch/powerpc/perf/power10-pmu.c
@@ -91,8 +91,8 @@
 
 /* Table of alternatives, sorted by column 0 */
 static const unsigned int power10_event_alternatives[][MAX_ALT] = {
-	{ PM_RUN_CYC_ALT,		PM_RUN_CYC },
-	{ PM_RUN_INST_CMPL_ALT,		PM_RUN_INST_CMPL },
+	{ PM_CYC_ALT,			PM_CYC },
+	{ PM_INST_CMPL_ALT,		PM_INST_CMPL },
 };
 
 static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
@@ -118,8 +118,8 @@ static int power10_check_attr_config(struct perf_event *ev)
 	return 0;
 }
 
-GENERIC_EVENT_ATTR(cpu-cycles,			PM_RUN_CYC);
-GENERIC_EVENT_ATTR(instructions,		PM_RUN_INST_CMPL);
+GENERIC_EVENT_ATTR(cpu-cycles,			PM_CYC);
+GENERIC_EVENT_ATTR(instructions,		PM_INST_CMPL);
 GENERIC_EVENT_ATTR(branch-instructions,		PM_BR_CMPL);
 GENERIC_EVENT_ATTR(branch-misses,		PM_BR_MPRED_CMPL);
 GENERIC_EVENT_ATTR(cache-references,		PM_LD_REF_L1);
@@ -148,8 +148,8 @@ static int power10_check_attr_config(struct perf_event *ev)
 CACHE_EVENT_ATTR(iTLB-load-misses,		PM_ITLB_MISS);
 
 static struct attribute *power10_events_attr_dd1[] = {
-	GENERIC_EVENT_PTR(PM_RUN_CYC),
-	GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
+	GENERIC_EVENT_PTR(PM_CYC),
+	GENERIC_EVENT_PTR(PM_INST_CMPL),
 	GENERIC_EVENT_PTR(PM_BR_CMPL),
 	GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
 	GENERIC_EVENT_PTR(PM_LD_REF_L1),
@@ -173,8 +173,8 @@ static int power10_check_attr_config(struct perf_event *ev)
 };
 
 static struct attribute *power10_events_attr[] = {
-	GENERIC_EVENT_PTR(PM_RUN_CYC),
-	GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
+	GENERIC_EVENT_PTR(PM_CYC),
+	GENERIC_EVENT_PTR(PM_INST_CMPL),
 	GENERIC_EVENT_PTR(PM_BR_FIN),
 	GENERIC_EVENT_PTR(PM_MPRED_BR_FIN),
 	GENERIC_EVENT_PTR(PM_LD_REF_L1),
@@ -271,8 +271,8 @@ static int power10_check_attr_config(struct perf_event *ev)
 };
 
 static int power10_generic_events_dd1[] = {
-	[PERF_COUNT_HW_CPU_CYCLES] =			PM_RUN_CYC,
-	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_RUN_INST_CMPL,
+	[PERF_COUNT_HW_CPU_CYCLES] =			PM_CYC,
+	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_INST_CMPL,
 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BR_CMPL,
 	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_BR_MPRED_CMPL,
 	[PERF_COUNT_HW_CACHE_REFERENCES] =		PM_LD_REF_L1,
@@ -280,8 +280,8 @@ static int power10_check_attr_config(struct perf_event *ev)
 };
 
 static int power10_generic_events[] = {
-	[PERF_COUNT_HW_CPU_CYCLES] =			PM_RUN_CYC,
-	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_RUN_INST_CMPL,
+	[PERF_COUNT_HW_CPU_CYCLES] =			PM_CYC,
+	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_INST_CMPL,
 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BR_FIN,
 	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_MPRED_BR_FIN,
 	[PERF_COUNT_HW_CACHE_REFERENCES] =		PM_LD_REF_L1,
@@ -548,6 +548,24 @@ static void power10_config_bhrb(u64 pmu_bhrb_filter)
 
 #undef C
 
+/*
+ * Set the MMCR0[CC56RUN] bit to enable counting for
+ * PMC5 and PMC6 regardless of the state of CTRL[RUN],
+ * so that we can use counters 5 and 6 as PM_INST_CMPL and
+ * PM_CYC.
+ */
+static int power10_compute_mmcr(u64 event[], int n_ev,
+				unsigned int hwc[], struct mmcr_regs *mmcr,
+				struct perf_event *pevents[], u32 flags)
+{
+	int ret;
+
+	ret = isa207_compute_mmcr(event, n_ev, hwc, mmcr, pevents, flags);
+	if (!ret)
+		mmcr->mmcr0 |= MMCR0_C56RUN;
+	return ret;
+}
+
 static struct power_pmu power10_pmu = {
 	.name			= "POWER10",
 	.n_counter		= MAX_PMU_COUNTERS,
@@ -555,7 +573,7 @@ static void power10_config_bhrb(u64 pmu_bhrb_filter)
 	.test_adder		= ISA207_TEST_ADDER,
 	.group_constraint_mask	= CNST_CACHE_PMC4_MASK,
 	.group_constraint_val	= CNST_CACHE_PMC4_VAL,
-	.compute_mmcr		= isa207_compute_mmcr,
+	.compute_mmcr		= power10_compute_mmcr,
 	.config_bhrb		= power10_config_bhrb,
 	.bhrb_filter_map	= power10_bhrb_filter_map,
 	.get_constraint		= isa207_get_constraint,
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] powerpc/perf: Fix cycles/instructions as PM_CYC/PM_INST_CMPL in power10
  2021-07-07  6:39 [PATCH] powerpc/perf: Fix cycles/instructions as PM_CYC/PM_INST_CMPL in power10 Athira Rajeev
@ 2021-07-08 12:56 ` Nicholas Piggin
  2021-07-08 15:43   ` Paul A. Clarke
  2021-07-12  9:31   ` Athira Rajeev
  0 siblings, 2 replies; 5+ messages in thread
From: Nicholas Piggin @ 2021-07-08 12:56 UTC (permalink / raw)
  To: Athira Rajeev, mpe; +Cc: maddy, linuxppc-dev

Excerpts from Athira Rajeev's message of July 7, 2021 4:39 pm:
> From: Athira Rajeev <atrajeev@linux.vnet.ibm.cm>
> 
> Power10 performance monitoring unit (PMU) driver uses performance
> monitor counter 5 (PMC5) and performance monitor counter 6 (PMC6)
> for counting instructions and cycles. Event used for cycles is
> PM_RUN_CYC and instructions is PM_RUN_INST_CMPL. But counting of these
> events in wait state is controlled by the CC56RUN bit setting in
> Monitor Mode Control Register0 (MMCR0). If the CC56RUN bit is not
> set, PMC5/6 will not count when CTRL[RUN] is zero.

What's the acutal bug here, can you explain a bit more? I thought
PM_RUN_CYC is supposed to be gated by the runlatch.

POWER9 code looks similar, it doesn't have the same problem?

Thanks,
Nick

> 
> Patch sets the CC56RUN bit in MMCR0 for power10 which makes PMC5 and
> PMC6 count instructions and cycles regardless of the run bit. With this
> change, these events are also now renamed to PM_CYC and PM_INST_CMPL
> rather than PM_RUN_CYC and PM_RUN_INST_CMPL.
> 
> Fixes: a64e697cef23 ("powerpc/perf: power10 Performance Monitoring support")
> Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.cm>
> Reviewed-by: Madhavan Srinivasan <maddy@linux.ibm.com>
> ---
> Notes on testing done for this change:
>  Tested this patch change with a kernel module that
>  turns off and turns on the runlatch. kernel module also
>  reads the counter values for PMC5 and PMC6 during the
>  period when runlatch is off.
>  - Started PMU counters via "perf stat" and loaded the
>    test module.
>  - Checked the counter values captured from module during
>    the runlatch off period.
>  - Verified that counters were frozen without the patch and
>    with the patch, observed counters were incrementing.
> 
>  arch/powerpc/perf/power10-events-list.h |  8 +++---
>  arch/powerpc/perf/power10-pmu.c         | 44 +++++++++++++++++++++++----------
>  2 files changed, 35 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h
> index 93be719..564f1409 100644
> --- a/arch/powerpc/perf/power10-events-list.h
> +++ b/arch/powerpc/perf/power10-events-list.h
> @@ -9,10 +9,10 @@
>  /*
>   * Power10 event codes.
>   */
> -EVENT(PM_RUN_CYC,				0x600f4);
> +EVENT(PM_CYC,				0x600f4);
>  EVENT(PM_DISP_STALL_CYC,			0x100f8);
>  EVENT(PM_EXEC_STALL,				0x30008);
> -EVENT(PM_RUN_INST_CMPL,				0x500fa);
> +EVENT(PM_INST_CMPL,				0x500fa);
>  EVENT(PM_BR_CMPL,                               0x4d05e);
>  EVENT(PM_BR_MPRED_CMPL,                         0x400f6);
>  EVENT(PM_BR_FIN,				0x2f04a);
> @@ -50,8 +50,8 @@
>  /* ITLB Reloaded */
>  EVENT(PM_ITLB_MISS,				0x400fc);
>  
> -EVENT(PM_RUN_CYC_ALT,				0x0001e);
> -EVENT(PM_RUN_INST_CMPL_ALT,			0x00002);
> +EVENT(PM_CYC_ALT,				0x0001e);
> +EVENT(PM_INST_CMPL_ALT,				0x00002);
>  
>  /*
>   * Memory Access Events
> diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
> index f9d64c6..9dd75f3 100644
> --- a/arch/powerpc/perf/power10-pmu.c
> +++ b/arch/powerpc/perf/power10-pmu.c
> @@ -91,8 +91,8 @@
>  
>  /* Table of alternatives, sorted by column 0 */
>  static const unsigned int power10_event_alternatives[][MAX_ALT] = {
> -	{ PM_RUN_CYC_ALT,		PM_RUN_CYC },
> -	{ PM_RUN_INST_CMPL_ALT,		PM_RUN_INST_CMPL },
> +	{ PM_CYC_ALT,			PM_CYC },
> +	{ PM_INST_CMPL_ALT,		PM_INST_CMPL },
>  };
>  
>  static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
> @@ -118,8 +118,8 @@ static int power10_check_attr_config(struct perf_event *ev)
>  	return 0;
>  }
>  
> -GENERIC_EVENT_ATTR(cpu-cycles,			PM_RUN_CYC);
> -GENERIC_EVENT_ATTR(instructions,		PM_RUN_INST_CMPL);
> +GENERIC_EVENT_ATTR(cpu-cycles,			PM_CYC);
> +GENERIC_EVENT_ATTR(instructions,		PM_INST_CMPL);
>  GENERIC_EVENT_ATTR(branch-instructions,		PM_BR_CMPL);
>  GENERIC_EVENT_ATTR(branch-misses,		PM_BR_MPRED_CMPL);
>  GENERIC_EVENT_ATTR(cache-references,		PM_LD_REF_L1);
> @@ -148,8 +148,8 @@ static int power10_check_attr_config(struct perf_event *ev)
>  CACHE_EVENT_ATTR(iTLB-load-misses,		PM_ITLB_MISS);
>  
>  static struct attribute *power10_events_attr_dd1[] = {
> -	GENERIC_EVENT_PTR(PM_RUN_CYC),
> -	GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
> +	GENERIC_EVENT_PTR(PM_CYC),
> +	GENERIC_EVENT_PTR(PM_INST_CMPL),
>  	GENERIC_EVENT_PTR(PM_BR_CMPL),
>  	GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
>  	GENERIC_EVENT_PTR(PM_LD_REF_L1),
> @@ -173,8 +173,8 @@ static int power10_check_attr_config(struct perf_event *ev)
>  };
>  
>  static struct attribute *power10_events_attr[] = {
> -	GENERIC_EVENT_PTR(PM_RUN_CYC),
> -	GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
> +	GENERIC_EVENT_PTR(PM_CYC),
> +	GENERIC_EVENT_PTR(PM_INST_CMPL),
>  	GENERIC_EVENT_PTR(PM_BR_FIN),
>  	GENERIC_EVENT_PTR(PM_MPRED_BR_FIN),
>  	GENERIC_EVENT_PTR(PM_LD_REF_L1),
> @@ -271,8 +271,8 @@ static int power10_check_attr_config(struct perf_event *ev)
>  };
>  
>  static int power10_generic_events_dd1[] = {
> -	[PERF_COUNT_HW_CPU_CYCLES] =			PM_RUN_CYC,
> -	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_RUN_INST_CMPL,
> +	[PERF_COUNT_HW_CPU_CYCLES] =			PM_CYC,
> +	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_INST_CMPL,
>  	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BR_CMPL,
>  	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_BR_MPRED_CMPL,
>  	[PERF_COUNT_HW_CACHE_REFERENCES] =		PM_LD_REF_L1,
> @@ -280,8 +280,8 @@ static int power10_check_attr_config(struct perf_event *ev)
>  };
>  
>  static int power10_generic_events[] = {
> -	[PERF_COUNT_HW_CPU_CYCLES] =			PM_RUN_CYC,
> -	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_RUN_INST_CMPL,
> +	[PERF_COUNT_HW_CPU_CYCLES] =			PM_CYC,
> +	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_INST_CMPL,
>  	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BR_FIN,
>  	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_MPRED_BR_FIN,
>  	[PERF_COUNT_HW_CACHE_REFERENCES] =		PM_LD_REF_L1,
> @@ -548,6 +548,24 @@ static void power10_config_bhrb(u64 pmu_bhrb_filter)
>  
>  #undef C
>  
> +/*
> + * Set the MMCR0[CC56RUN] bit to enable counting for
> + * PMC5 and PMC6 regardless of the state of CTRL[RUN],
> + * so that we can use counters 5 and 6 as PM_INST_CMPL and
> + * PM_CYC.
> + */
> +static int power10_compute_mmcr(u64 event[], int n_ev,
> +				unsigned int hwc[], struct mmcr_regs *mmcr,
> +				struct perf_event *pevents[], u32 flags)
> +{
> +	int ret;
> +
> +	ret = isa207_compute_mmcr(event, n_ev, hwc, mmcr, pevents, flags);
> +	if (!ret)
> +		mmcr->mmcr0 |= MMCR0_C56RUN;
> +	return ret;
> +}
> +
>  static struct power_pmu power10_pmu = {
>  	.name			= "POWER10",
>  	.n_counter		= MAX_PMU_COUNTERS,
> @@ -555,7 +573,7 @@ static void power10_config_bhrb(u64 pmu_bhrb_filter)
>  	.test_adder		= ISA207_TEST_ADDER,
>  	.group_constraint_mask	= CNST_CACHE_PMC4_MASK,
>  	.group_constraint_val	= CNST_CACHE_PMC4_VAL,
> -	.compute_mmcr		= isa207_compute_mmcr,
> +	.compute_mmcr		= power10_compute_mmcr,
>  	.config_bhrb		= power10_config_bhrb,
>  	.bhrb_filter_map	= power10_bhrb_filter_map,
>  	.get_constraint		= isa207_get_constraint,
> -- 
> 1.8.3.1
> 
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] powerpc/perf: Fix cycles/instructions as PM_CYC/PM_INST_CMPL in power10
  2021-07-08 12:56 ` Nicholas Piggin
@ 2021-07-08 15:43   ` Paul A. Clarke
  2021-07-12  9:40     ` Athira Rajeev
  2021-07-12  9:31   ` Athira Rajeev
  1 sibling, 1 reply; 5+ messages in thread
From: Paul A. Clarke @ 2021-07-08 15:43 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: Athira Rajeev, linuxppc-dev, maddy

On Thu, Jul 08, 2021 at 10:56:57PM +1000, Nicholas Piggin wrote:
> Excerpts from Athira Rajeev's message of July 7, 2021 4:39 pm:
> > From: Athira Rajeev <atrajeev@linux.vnet.ibm.cm>
> > 
> > Power10 performance monitoring unit (PMU) driver uses performance
> > monitor counter 5 (PMC5) and performance monitor counter 6 (PMC6)
> > for counting instructions and cycles. Event used for cycles is
> > PM_RUN_CYC and instructions is PM_RUN_INST_CMPL. But counting of these
> > events in wait state is controlled by the CC56RUN bit setting in
> > Monitor Mode Control Register0 (MMCR0). If the CC56RUN bit is not
> > set, PMC5/6 will not count when CTRL[RUN] is zero.
> 
> What's the acutal bug here, can you explain a bit more? I thought
> PM_RUN_CYC is supposed to be gated by the runlatch.

Would this renaming break compatibility with existing tools that
presume PM_RUN_CYC and PM_RUN_INST_CMPL exist generically?

PC

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] powerpc/perf: Fix cycles/instructions as PM_CYC/PM_INST_CMPL in power10
  2021-07-08 12:56 ` Nicholas Piggin
  2021-07-08 15:43   ` Paul A. Clarke
@ 2021-07-12  9:31   ` Athira Rajeev
  1 sibling, 0 replies; 5+ messages in thread
From: Athira Rajeev @ 2021-07-12  9:31 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: Madhavan Srinivasan, linuxppc-dev



> On 08-Jul-2021, at 6:26 PM, Nicholas Piggin <npiggin@gmail.com> wrote:
> 
> Excerpts from Athira Rajeev's message of July 7, 2021 4:39 pm:
>> From: Athira Rajeev <atrajeev@linux.vnet.ibm.cm>
>> 
>> Power10 performance monitoring unit (PMU) driver uses performance
>> monitor counter 5 (PMC5) and performance monitor counter 6 (PMC6)
>> for counting instructions and cycles. Event used for cycles is
>> PM_RUN_CYC and instructions is PM_RUN_INST_CMPL. But counting of these
>> events in wait state is controlled by the CC56RUN bit setting in
>> Monitor Mode Control Register0 (MMCR0). If the CC56RUN bit is not
>> set, PMC5/6 will not count when CTRL[RUN] is zero.
> 
> What's the acutal bug here, can you explain a bit more? I thought
> PM_RUN_CYC is supposed to be gated by the runlatch.
> 
> POWER9 code looks similar, it doesn't have the same problem?
> 
> Thanks,
> Nick

Hi Nick,

Thanks for the review.

In power9 and before, the default event used for counting "cycles" - PM_CYC (0x0001e) and for counting instruction - PM_INST_CMPL (0x00002) . 
These events, uses two programmable PMC to count cycles and instructions (this causes multiplexing for basic set of events supported by perf stat). 
And PM_CYC/PM_INST_CMPL both by default count irrespective of the run latch state.

So in power10, we decided to use PMC5 and PMC6 for counting instructions and cycles respectively. But PMC5 and PMC6 by default counts only when runlatch is enabled, this can cause issues in case of system wide profiling. So in order to make PMC5/6 behave as PM_CYC and PM_INST_CMPL, we are enabling MMCR0[CC56RUN]] bit.

Thanks
Athira
> 
>> 
>> Patch sets the CC56RUN bit in MMCR0 for power10 which makes PMC5 and
>> PMC6 count instructions and cycles regardless of the run bit. With this
>> change, these events are also now renamed to PM_CYC and PM_INST_CMPL
>> rather than PM_RUN_CYC and PM_RUN_INST_CMPL.
>> 
>> Fixes: a64e697cef23 ("powerpc/perf: power10 Performance Monitoring support")
>> Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.cm>
>> Reviewed-by: Madhavan Srinivasan <maddy@linux.ibm.com>
>> ---
>> Notes on testing done for this change:
>> Tested this patch change with a kernel module that
>> turns off and turns on the runlatch. kernel module also
>> reads the counter values for PMC5 and PMC6 during the
>> period when runlatch is off.
>> - Started PMU counters via "perf stat" and loaded the
>>   test module.
>> - Checked the counter values captured from module during
>>   the runlatch off period.
>> - Verified that counters were frozen without the patch and
>>   with the patch, observed counters were incrementing.
>> 
>> arch/powerpc/perf/power10-events-list.h |  8 +++---
>> arch/powerpc/perf/power10-pmu.c         | 44 +++++++++++++++++++++++----------
>> 2 files changed, 35 insertions(+), 17 deletions(-)
>> 
>> diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h
>> index 93be719..564f1409 100644
>> --- a/arch/powerpc/perf/power10-events-list.h
>> +++ b/arch/powerpc/perf/power10-events-list.h
>> @@ -9,10 +9,10 @@
>> /*
>>  * Power10 event codes.
>>  */
>> -EVENT(PM_RUN_CYC,				0x600f4);
>> +EVENT(PM_CYC,				0x600f4);
>> EVENT(PM_DISP_STALL_CYC,			0x100f8);
>> EVENT(PM_EXEC_STALL,				0x30008);
>> -EVENT(PM_RUN_INST_CMPL,				0x500fa);
>> +EVENT(PM_INST_CMPL,				0x500fa);
>> EVENT(PM_BR_CMPL,                               0x4d05e);
>> EVENT(PM_BR_MPRED_CMPL,                         0x400f6);
>> EVENT(PM_BR_FIN,				0x2f04a);
>> @@ -50,8 +50,8 @@
>> /* ITLB Reloaded */
>> EVENT(PM_ITLB_MISS,				0x400fc);
>> 
>> -EVENT(PM_RUN_CYC_ALT,				0x0001e);
>> -EVENT(PM_RUN_INST_CMPL_ALT,			0x00002);
>> +EVENT(PM_CYC_ALT,				0x0001e);
>> +EVENT(PM_INST_CMPL_ALT,				0x00002);
>> 
>> /*
>>  * Memory Access Events
>> diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
>> index f9d64c6..9dd75f3 100644
>> --- a/arch/powerpc/perf/power10-pmu.c
>> +++ b/arch/powerpc/perf/power10-pmu.c
>> @@ -91,8 +91,8 @@
>> 
>> /* Table of alternatives, sorted by column 0 */
>> static const unsigned int power10_event_alternatives[][MAX_ALT] = {
>> -	{ PM_RUN_CYC_ALT,		PM_RUN_CYC },
>> -	{ PM_RUN_INST_CMPL_ALT,		PM_RUN_INST_CMPL },
>> +	{ PM_CYC_ALT,			PM_CYC },
>> +	{ PM_INST_CMPL_ALT,		PM_INST_CMPL },
>> };
>> 
>> static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
>> @@ -118,8 +118,8 @@ static int power10_check_attr_config(struct perf_event *ev)
>> 	return 0;
>> }
>> 
>> -GENERIC_EVENT_ATTR(cpu-cycles,			PM_RUN_CYC);
>> -GENERIC_EVENT_ATTR(instructions,		PM_RUN_INST_CMPL);
>> +GENERIC_EVENT_ATTR(cpu-cycles,			PM_CYC);
>> +GENERIC_EVENT_ATTR(instructions,		PM_INST_CMPL);
>> GENERIC_EVENT_ATTR(branch-instructions,		PM_BR_CMPL);
>> GENERIC_EVENT_ATTR(branch-misses,		PM_BR_MPRED_CMPL);
>> GENERIC_EVENT_ATTR(cache-references,		PM_LD_REF_L1);
>> @@ -148,8 +148,8 @@ static int power10_check_attr_config(struct perf_event *ev)
>> CACHE_EVENT_ATTR(iTLB-load-misses,		PM_ITLB_MISS);
>> 
>> static struct attribute *power10_events_attr_dd1[] = {
>> -	GENERIC_EVENT_PTR(PM_RUN_CYC),
>> -	GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
>> +	GENERIC_EVENT_PTR(PM_CYC),
>> +	GENERIC_EVENT_PTR(PM_INST_CMPL),
>> 	GENERIC_EVENT_PTR(PM_BR_CMPL),
>> 	GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
>> 	GENERIC_EVENT_PTR(PM_LD_REF_L1),
>> @@ -173,8 +173,8 @@ static int power10_check_attr_config(struct perf_event *ev)
>> };
>> 
>> static struct attribute *power10_events_attr[] = {
>> -	GENERIC_EVENT_PTR(PM_RUN_CYC),
>> -	GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
>> +	GENERIC_EVENT_PTR(PM_CYC),
>> +	GENERIC_EVENT_PTR(PM_INST_CMPL),
>> 	GENERIC_EVENT_PTR(PM_BR_FIN),
>> 	GENERIC_EVENT_PTR(PM_MPRED_BR_FIN),
>> 	GENERIC_EVENT_PTR(PM_LD_REF_L1),
>> @@ -271,8 +271,8 @@ static int power10_check_attr_config(struct perf_event *ev)
>> };
>> 
>> static int power10_generic_events_dd1[] = {
>> -	[PERF_COUNT_HW_CPU_CYCLES] =			PM_RUN_CYC,
>> -	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_RUN_INST_CMPL,
>> +	[PERF_COUNT_HW_CPU_CYCLES] =			PM_CYC,
>> +	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_INST_CMPL,
>> 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BR_CMPL,
>> 	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_BR_MPRED_CMPL,
>> 	[PERF_COUNT_HW_CACHE_REFERENCES] =		PM_LD_REF_L1,
>> @@ -280,8 +280,8 @@ static int power10_check_attr_config(struct perf_event *ev)
>> };
>> 
>> static int power10_generic_events[] = {
>> -	[PERF_COUNT_HW_CPU_CYCLES] =			PM_RUN_CYC,
>> -	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_RUN_INST_CMPL,
>> +	[PERF_COUNT_HW_CPU_CYCLES] =			PM_CYC,
>> +	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_INST_CMPL,
>> 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BR_FIN,
>> 	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_MPRED_BR_FIN,
>> 	[PERF_COUNT_HW_CACHE_REFERENCES] =		PM_LD_REF_L1,
>> @@ -548,6 +548,24 @@ static void power10_config_bhrb(u64 pmu_bhrb_filter)
>> 
>> #undef C
>> 
>> +/*
>> + * Set the MMCR0[CC56RUN] bit to enable counting for
>> + * PMC5 and PMC6 regardless of the state of CTRL[RUN],
>> + * so that we can use counters 5 and 6 as PM_INST_CMPL and
>> + * PM_CYC.
>> + */
>> +static int power10_compute_mmcr(u64 event[], int n_ev,
>> +				unsigned int hwc[], struct mmcr_regs *mmcr,
>> +				struct perf_event *pevents[], u32 flags)
>> +{
>> +	int ret;
>> +
>> +	ret = isa207_compute_mmcr(event, n_ev, hwc, mmcr, pevents, flags);
>> +	if (!ret)
>> +		mmcr->mmcr0 |= MMCR0_C56RUN;
>> +	return ret;
>> +}
>> +
>> static struct power_pmu power10_pmu = {
>> 	.name			= "POWER10",
>> 	.n_counter		= MAX_PMU_COUNTERS,
>> @@ -555,7 +573,7 @@ static void power10_config_bhrb(u64 pmu_bhrb_filter)
>> 	.test_adder		= ISA207_TEST_ADDER,
>> 	.group_constraint_mask	= CNST_CACHE_PMC4_MASK,
>> 	.group_constraint_val	= CNST_CACHE_PMC4_VAL,
>> -	.compute_mmcr		= isa207_compute_mmcr,
>> +	.compute_mmcr		= power10_compute_mmcr,
>> 	.config_bhrb		= power10_config_bhrb,
>> 	.bhrb_filter_map	= power10_bhrb_filter_map,
>> 	.get_constraint		= isa207_get_constraint,
>> -- 
>> 1.8.3.1
>> 
>> 


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] powerpc/perf: Fix cycles/instructions as PM_CYC/PM_INST_CMPL in power10
  2021-07-08 15:43   ` Paul A. Clarke
@ 2021-07-12  9:40     ` Athira Rajeev
  0 siblings, 0 replies; 5+ messages in thread
From: Athira Rajeev @ 2021-07-12  9:40 UTC (permalink / raw)
  To: Paul A. Clarke; +Cc: Madhavan Srinivasan, linuxppc-dev, Nicholas Piggin



> On 08-Jul-2021, at 9:13 PM, Paul A. Clarke <pc@us.ibm.com> wrote:
> 
> On Thu, Jul 08, 2021 at 10:56:57PM +1000, Nicholas Piggin wrote:
>> Excerpts from Athira Rajeev's message of July 7, 2021 4:39 pm:
>>> From: Athira Rajeev <atrajeev@linux.vnet.ibm.cm>
>>> 
>>> Power10 performance monitoring unit (PMU) driver uses performance
>>> monitor counter 5 (PMC5) and performance monitor counter 6 (PMC6)
>>> for counting instructions and cycles. Event used for cycles is
>>> PM_RUN_CYC and instructions is PM_RUN_INST_CMPL. But counting of these
>>> events in wait state is controlled by the CC56RUN bit setting in
>>> Monitor Mode Control Register0 (MMCR0). If the CC56RUN bit is not
>>> set, PMC5/6 will not count when CTRL[RUN] is zero.
>> 
>> What's the acutal bug here, can you explain a bit more? I thought
>> PM_RUN_CYC is supposed to be gated by the runlatch.
> 
> Would this renaming break compatibility with existing tools that
> presume PM_RUN_CYC and PM_RUN_INST_CMPL exist generically?


Hi Paul,

Thanks for checking the patch.

No, this does not break compatibility with existing tools. Since the change is only for PMC5 and PMC6. Events PM_RUN_CYC and PM_RUN_INST_CMPL still behaves the same way since they are programmed in different PMC’s.

Thanks
Athira
> 
> PC


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-07-12  9:41 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-07  6:39 [PATCH] powerpc/perf: Fix cycles/instructions as PM_CYC/PM_INST_CMPL in power10 Athira Rajeev
2021-07-08 12:56 ` Nicholas Piggin
2021-07-08 15:43   ` Paul A. Clarke
2021-07-12  9:40     ` Athira Rajeev
2021-07-12  9:31   ` Athira Rajeev

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.