From mboxrd@z Thu Jan 1 00:00:00 1970 From: Marc Zyngier Subject: Re: [PATCH v3 03/20] arm64: Use the physical counter when available for read_cycles Date: Mon, 9 Oct 2017 17:21:24 +0100 Message-ID: <9b06425f-7c2c-d44a-cd6c-aeaa4b76849c@arm.com> References: <20170923004207.22356-1-cdall@linaro.org> <20170923004207.22356-4-cdall@linaro.org> Mime-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 7bit Cc: kvm@vger.kernel.org, Will Deacon , Catalin Marinas , Mark Rutland To: Christoffer Dall , kvmarm@lists.cs.columbia.edu, linux-arm-kernel@lists.infradead.org Return-path: Received: from foss.arm.com ([217.140.101.70]:59942 "EHLO foss.arm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754163AbdJIQV2 (ORCPT ); Mon, 9 Oct 2017 12:21:28 -0400 In-Reply-To: <20170923004207.22356-4-cdall@linaro.org> Content-Language: en-GB Sender: kvm-owner@vger.kernel.org List-ID: On 23/09/17 01:41, Christoffer Dall wrote: > Currently get_cycles() is hardwired to arch_counter_get_cntvct() on > arm64, but as we move to using the physical timer for the in-kernel > time-keeping, we need to make that more flexible. > > First, we need to make sure the physical counter can be read on equal > terms to the virtual counter, which includes adding physical counter > read functions for timers that require errata. > > Second, we need to make a choice between reading the physical vs virtual > counter, depending on which timer is used for time keeping in the kernel > otherwise. We can do this using a static key to avoid a performance > penalty during runtime when reading the counter. > > Cc: Catalin Marinas > Cc: Will Deacon > Cc: Mark Rutland > Cc: Marc Zyngier > Signed-off-by: Christoffer Dall Right. I should have read patch #3. I'm an idiot. > --- > arch/arm64/include/asm/arch_timer.h | 15 ++++++++++++--- > arch/arm64/include/asm/timex.h | 2 +- > drivers/clocksource/arm_arch_timer.c | 32 ++++++++++++++++++++++++++++++-- > 3 files changed, 43 insertions(+), 6 deletions(-) > > diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h > index 1859a1c..c56d8cd 100644 > --- a/arch/arm64/include/asm/arch_timer.h > +++ b/arch/arm64/include/asm/arch_timer.h > @@ -30,6 +30,8 @@ > > #include > > +extern struct static_key_false arch_timer_phys_counter_available; > + > #if IS_ENABLED(CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND) > extern struct static_key_false arch_timer_read_ool_enabled; > #define needs_unstable_timer_counter_workaround() \ > @@ -52,6 +54,7 @@ struct arch_timer_erratum_workaround { > const char *desc; > u32 (*read_cntp_tval_el0)(void); > u32 (*read_cntv_tval_el0)(void); > + u64 (*read_cntpct_el0)(void); > u64 (*read_cntvct_el0)(void); > int (*set_next_event_phys)(unsigned long, struct clock_event_device *); > int (*set_next_event_virt)(unsigned long, struct clock_event_device *); > @@ -148,10 +151,8 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl) > > static inline u64 arch_counter_get_cntpct(void) > { > - u64 cval; > isb(); > - asm volatile("mrs %0, cntpct_el0" : "=r" (cval)); > - return cval; > + return arch_timer_reg_read_stable(cntpct_el0); > } > > static inline u64 arch_counter_get_cntvct(void) > @@ -160,6 +161,14 @@ static inline u64 arch_counter_get_cntvct(void) > return arch_timer_reg_read_stable(cntvct_el0); > } > > +static inline u64 arch_counter_get_cycles(void) > +{ > + if (static_branch_unlikely(&arch_timer_phys_counter_available)) > + return arch_counter_get_cntpct(); > + else > + return arch_counter_get_cntvct(); > +} > + > static inline int arch_timer_arch_init(void) > { > return 0; > diff --git a/arch/arm64/include/asm/timex.h b/arch/arm64/include/asm/timex.h > index 81a076e..c0d214c 100644 > --- a/arch/arm64/include/asm/timex.h > +++ b/arch/arm64/include/asm/timex.h > @@ -22,7 +22,7 @@ > * Use the current timer as a cycle counter since this is what we use for > * the delay loop. > */ > -#define get_cycles() arch_counter_get_cntvct() > +#define get_cycles() arch_counter_get_cycles() Why can't this be arch_timer_read_counter() instead? Is there any measurable advantage in using a static key compared to a memory indirection? > > #include > > diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c > index 9b3322a..f35da20 100644 > --- a/drivers/clocksource/arm_arch_timer.c > +++ b/drivers/clocksource/arm_arch_timer.c > @@ -77,6 +77,9 @@ static bool arch_timer_mem_use_virtual; > static bool arch_counter_suspend_stop; > static bool vdso_default = true; > > +DEFINE_STATIC_KEY_FALSE(arch_timer_phys_counter_available); > +EXPORT_SYMBOL_GPL(arch_timer_phys_counter_available); > + > static bool evtstrm_enable = IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM); > > static int __init early_evtstrm_cfg(char *buf) > @@ -217,6 +220,11 @@ static u32 notrace fsl_a008585_read_cntv_tval_el0(void) > return __fsl_a008585_read_reg(cntv_tval_el0); > } > > +static u64 notrace fsl_a008585_read_cntpct_el0(void) > +{ > + return __fsl_a008585_read_reg(cntpct_el0); > +} > + > static u64 notrace fsl_a008585_read_cntvct_el0(void) > { > return __fsl_a008585_read_reg(cntvct_el0); > @@ -258,6 +266,11 @@ static u32 notrace hisi_161010101_read_cntv_tval_el0(void) > return __hisi_161010101_read_reg(cntv_tval_el0); > } > > +static u64 notrace hisi_161010101_read_cntpct_el0(void) > +{ > + return __hisi_161010101_read_reg(cntpct_el0); > +} > + > static u64 notrace hisi_161010101_read_cntvct_el0(void) > { > return __hisi_161010101_read_reg(cntvct_el0); > @@ -288,6 +301,15 @@ static struct ate_acpi_oem_info hisi_161010101_oem_info[] = { > #endif > > #ifdef CONFIG_ARM64_ERRATUM_858921 > +static u64 notrace arm64_858921_read_cntpct_el0(void) > +{ > + u64 old, new; > + > + old = read_sysreg(cntpct_el0); > + new = read_sysreg(cntpct_el0); > + return (((old ^ new) >> 32) & 1) ? old : new; > +} > + > static u64 notrace arm64_858921_read_cntvct_el0(void) > { > u64 old, new; > @@ -346,6 +368,7 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = { > .desc = "Freescale erratum a005858", > .read_cntp_tval_el0 = fsl_a008585_read_cntp_tval_el0, > .read_cntv_tval_el0 = fsl_a008585_read_cntv_tval_el0, > + .read_cntpct_el0 = fsl_a008585_read_cntpct_el0, > .read_cntvct_el0 = fsl_a008585_read_cntvct_el0, > .set_next_event_phys = erratum_set_next_event_tval_phys, > .set_next_event_virt = erratum_set_next_event_tval_virt, > @@ -358,6 +381,7 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = { > .desc = "HiSilicon erratum 161010101", > .read_cntp_tval_el0 = hisi_161010101_read_cntp_tval_el0, > .read_cntv_tval_el0 = hisi_161010101_read_cntv_tval_el0, > + .read_cntpct_el0 = hisi_161010101_read_cntpct_el0, > .read_cntvct_el0 = hisi_161010101_read_cntvct_el0, > .set_next_event_phys = erratum_set_next_event_tval_phys, > .set_next_event_virt = erratum_set_next_event_tval_virt, > @@ -368,6 +392,7 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = { > .desc = "HiSilicon erratum 161010101", > .read_cntp_tval_el0 = hisi_161010101_read_cntp_tval_el0, > .read_cntv_tval_el0 = hisi_161010101_read_cntv_tval_el0, > + .read_cntpct_el0 = hisi_161010101_read_cntpct_el0, > .read_cntvct_el0 = hisi_161010101_read_cntvct_el0, > .set_next_event_phys = erratum_set_next_event_tval_phys, > .set_next_event_virt = erratum_set_next_event_tval_virt, > @@ -378,6 +403,7 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = { > .match_type = ate_match_local_cap_id, > .id = (void *)ARM64_WORKAROUND_858921, > .desc = "ARM erratum 858921", > + .read_cntpct_el0 = arm64_858921_read_cntpct_el0, > .read_cntvct_el0 = arm64_858921_read_cntvct_el0, > }, > #endif > @@ -890,10 +916,12 @@ static void __init arch_counter_register(unsigned type) > > /* Register the CP15 based counter if we have one */ > if (type & ARCH_TIMER_TYPE_CP15) { > - if (arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) > + if (arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) { > arch_timer_read_counter = arch_counter_get_cntvct; > - else > + } else { > arch_timer_read_counter = arch_counter_get_cntpct; > + static_branch_enable(&arch_timer_phys_counter_available); > + } > > clocksource_counter.archdata.vdso_direct = vdso_default; > } else { > In my reply to patch #2, I had the following hunk: @@ -310,7 +329,7 @@ static void erratum_set_next_event_tval_generic(const int access, unsigned long struct clock_event_device *clk) { unsigned long ctrl; - u64 cval = evt + arch_counter_get_cntvct(); + u64 cval = evt + arch_timer_read_counter(); ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk); ctrl |= ARCH_TIMER_CTRL_ENABLE; Once we start using a different timer, this could well have an effect... Thanks, M. -- Jazz is not dead. It just smells funny... From mboxrd@z Thu Jan 1 00:00:00 1970 From: marc.zyngier@arm.com (Marc Zyngier) Date: Mon, 9 Oct 2017 17:21:24 +0100 Subject: [PATCH v3 03/20] arm64: Use the physical counter when available for read_cycles In-Reply-To: <20170923004207.22356-4-cdall@linaro.org> References: <20170923004207.22356-1-cdall@linaro.org> <20170923004207.22356-4-cdall@linaro.org> Message-ID: <9b06425f-7c2c-d44a-cd6c-aeaa4b76849c@arm.com> To: linux-arm-kernel@lists.infradead.org List-Id: linux-arm-kernel.lists.infradead.org On 23/09/17 01:41, Christoffer Dall wrote: > Currently get_cycles() is hardwired to arch_counter_get_cntvct() on > arm64, but as we move to using the physical timer for the in-kernel > time-keeping, we need to make that more flexible. > > First, we need to make sure the physical counter can be read on equal > terms to the virtual counter, which includes adding physical counter > read functions for timers that require errata. > > Second, we need to make a choice between reading the physical vs virtual > counter, depending on which timer is used for time keeping in the kernel > otherwise. We can do this using a static key to avoid a performance > penalty during runtime when reading the counter. > > Cc: Catalin Marinas > Cc: Will Deacon > Cc: Mark Rutland > Cc: Marc Zyngier > Signed-off-by: Christoffer Dall Right. I should have read patch #3. I'm an idiot. > --- > arch/arm64/include/asm/arch_timer.h | 15 ++++++++++++--- > arch/arm64/include/asm/timex.h | 2 +- > drivers/clocksource/arm_arch_timer.c | 32 ++++++++++++++++++++++++++++++-- > 3 files changed, 43 insertions(+), 6 deletions(-) > > diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h > index 1859a1c..c56d8cd 100644 > --- a/arch/arm64/include/asm/arch_timer.h > +++ b/arch/arm64/include/asm/arch_timer.h > @@ -30,6 +30,8 @@ > > #include > > +extern struct static_key_false arch_timer_phys_counter_available; > + > #if IS_ENABLED(CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND) > extern struct static_key_false arch_timer_read_ool_enabled; > #define needs_unstable_timer_counter_workaround() \ > @@ -52,6 +54,7 @@ struct arch_timer_erratum_workaround { > const char *desc; > u32 (*read_cntp_tval_el0)(void); > u32 (*read_cntv_tval_el0)(void); > + u64 (*read_cntpct_el0)(void); > u64 (*read_cntvct_el0)(void); > int (*set_next_event_phys)(unsigned long, struct clock_event_device *); > int (*set_next_event_virt)(unsigned long, struct clock_event_device *); > @@ -148,10 +151,8 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl) > > static inline u64 arch_counter_get_cntpct(void) > { > - u64 cval; > isb(); > - asm volatile("mrs %0, cntpct_el0" : "=r" (cval)); > - return cval; > + return arch_timer_reg_read_stable(cntpct_el0); > } > > static inline u64 arch_counter_get_cntvct(void) > @@ -160,6 +161,14 @@ static inline u64 arch_counter_get_cntvct(void) > return arch_timer_reg_read_stable(cntvct_el0); > } > > +static inline u64 arch_counter_get_cycles(void) > +{ > + if (static_branch_unlikely(&arch_timer_phys_counter_available)) > + return arch_counter_get_cntpct(); > + else > + return arch_counter_get_cntvct(); > +} > + > static inline int arch_timer_arch_init(void) > { > return 0; > diff --git a/arch/arm64/include/asm/timex.h b/arch/arm64/include/asm/timex.h > index 81a076e..c0d214c 100644 > --- a/arch/arm64/include/asm/timex.h > +++ b/arch/arm64/include/asm/timex.h > @@ -22,7 +22,7 @@ > * Use the current timer as a cycle counter since this is what we use for > * the delay loop. > */ > -#define get_cycles() arch_counter_get_cntvct() > +#define get_cycles() arch_counter_get_cycles() Why can't this be arch_timer_read_counter() instead? Is there any measurable advantage in using a static key compared to a memory indirection? > > #include > > diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c > index 9b3322a..f35da20 100644 > --- a/drivers/clocksource/arm_arch_timer.c > +++ b/drivers/clocksource/arm_arch_timer.c > @@ -77,6 +77,9 @@ static bool arch_timer_mem_use_virtual; > static bool arch_counter_suspend_stop; > static bool vdso_default = true; > > +DEFINE_STATIC_KEY_FALSE(arch_timer_phys_counter_available); > +EXPORT_SYMBOL_GPL(arch_timer_phys_counter_available); > + > static bool evtstrm_enable = IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM); > > static int __init early_evtstrm_cfg(char *buf) > @@ -217,6 +220,11 @@ static u32 notrace fsl_a008585_read_cntv_tval_el0(void) > return __fsl_a008585_read_reg(cntv_tval_el0); > } > > +static u64 notrace fsl_a008585_read_cntpct_el0(void) > +{ > + return __fsl_a008585_read_reg(cntpct_el0); > +} > + > static u64 notrace fsl_a008585_read_cntvct_el0(void) > { > return __fsl_a008585_read_reg(cntvct_el0); > @@ -258,6 +266,11 @@ static u32 notrace hisi_161010101_read_cntv_tval_el0(void) > return __hisi_161010101_read_reg(cntv_tval_el0); > } > > +static u64 notrace hisi_161010101_read_cntpct_el0(void) > +{ > + return __hisi_161010101_read_reg(cntpct_el0); > +} > + > static u64 notrace hisi_161010101_read_cntvct_el0(void) > { > return __hisi_161010101_read_reg(cntvct_el0); > @@ -288,6 +301,15 @@ static struct ate_acpi_oem_info hisi_161010101_oem_info[] = { > #endif > > #ifdef CONFIG_ARM64_ERRATUM_858921 > +static u64 notrace arm64_858921_read_cntpct_el0(void) > +{ > + u64 old, new; > + > + old = read_sysreg(cntpct_el0); > + new = read_sysreg(cntpct_el0); > + return (((old ^ new) >> 32) & 1) ? old : new; > +} > + > static u64 notrace arm64_858921_read_cntvct_el0(void) > { > u64 old, new; > @@ -346,6 +368,7 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = { > .desc = "Freescale erratum a005858", > .read_cntp_tval_el0 = fsl_a008585_read_cntp_tval_el0, > .read_cntv_tval_el0 = fsl_a008585_read_cntv_tval_el0, > + .read_cntpct_el0 = fsl_a008585_read_cntpct_el0, > .read_cntvct_el0 = fsl_a008585_read_cntvct_el0, > .set_next_event_phys = erratum_set_next_event_tval_phys, > .set_next_event_virt = erratum_set_next_event_tval_virt, > @@ -358,6 +381,7 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = { > .desc = "HiSilicon erratum 161010101", > .read_cntp_tval_el0 = hisi_161010101_read_cntp_tval_el0, > .read_cntv_tval_el0 = hisi_161010101_read_cntv_tval_el0, > + .read_cntpct_el0 = hisi_161010101_read_cntpct_el0, > .read_cntvct_el0 = hisi_161010101_read_cntvct_el0, > .set_next_event_phys = erratum_set_next_event_tval_phys, > .set_next_event_virt = erratum_set_next_event_tval_virt, > @@ -368,6 +392,7 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = { > .desc = "HiSilicon erratum 161010101", > .read_cntp_tval_el0 = hisi_161010101_read_cntp_tval_el0, > .read_cntv_tval_el0 = hisi_161010101_read_cntv_tval_el0, > + .read_cntpct_el0 = hisi_161010101_read_cntpct_el0, > .read_cntvct_el0 = hisi_161010101_read_cntvct_el0, > .set_next_event_phys = erratum_set_next_event_tval_phys, > .set_next_event_virt = erratum_set_next_event_tval_virt, > @@ -378,6 +403,7 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = { > .match_type = ate_match_local_cap_id, > .id = (void *)ARM64_WORKAROUND_858921, > .desc = "ARM erratum 858921", > + .read_cntpct_el0 = arm64_858921_read_cntpct_el0, > .read_cntvct_el0 = arm64_858921_read_cntvct_el0, > }, > #endif > @@ -890,10 +916,12 @@ static void __init arch_counter_register(unsigned type) > > /* Register the CP15 based counter if we have one */ > if (type & ARCH_TIMER_TYPE_CP15) { > - if (arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) > + if (arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) { > arch_timer_read_counter = arch_counter_get_cntvct; > - else > + } else { > arch_timer_read_counter = arch_counter_get_cntpct; > + static_branch_enable(&arch_timer_phys_counter_available); > + } > > clocksource_counter.archdata.vdso_direct = vdso_default; > } else { > In my reply to patch #2, I had the following hunk: @@ -310,7 +329,7 @@ static void erratum_set_next_event_tval_generic(const int access, unsigned long struct clock_event_device *clk) { unsigned long ctrl; - u64 cval = evt + arch_counter_get_cntvct(); + u64 cval = evt + arch_timer_read_counter(); ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk); ctrl |= ARCH_TIMER_CTRL_ENABLE; Once we start using a different timer, this could well have an effect... Thanks, M. -- Jazz is not dead. It just smells funny...