From mboxrd@z Thu Jan 1 00:00:00 1970 From: Christoffer Dall Subject: Re: [PATCH v2 04/11] arm64: alternatives: use tpidr_el2 on VHE hosts Date: Sun, 17 Sep 2017 07:43:34 -0700 Message-ID: <20170917144334.GB99021@lvm> References: <20170808164616.25949-1-james.morse@arm.com> <20170808164616.25949-5-james.morse@arm.com> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Return-path: Content-Disposition: inline In-Reply-To: <20170808164616.25949-5-james.morse-5wv7dgnIgG8@public.gmane.org> Sender: devicetree-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org To: James Morse Cc: linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org, kvmarm-FPEHb7Xf0XXUo1n7N8X6UoWGPAHP3yOg@public.gmane.org, devicetree-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, Will Deacon , Catalin Marinas , Marc Zyngier , Christoffer Dall , Mark Rutland , Rob Herring , Loc Ho List-Id: devicetree@vger.kernel.org On Tue, Aug 08, 2017 at 05:46:09PM +0100, James Morse wrote: > Now that KVM uses tpidr_el2 in the same way as Linux's cpu_offset in > tpidr_el1, merge the two. This saves KVM from save/restoring tpidr_el1 > on VHE hosts, and allows future code to blindly access per-cpu variables > without triggering world-switch. > > Signed-off-by: James Morse > --- > Changes since v1: > * cpu_copy_el2regs()'s 'have I been patched' test now always sets a register, > just in case the compiler puts do_copyregs on the stack. I don't understand this? > > arch/arm64/include/asm/assembler.h | 8 ++++++++ > arch/arm64/include/asm/percpu.h | 11 +++++++++-- > arch/arm64/include/asm/processor.h | 1 + > arch/arm64/kernel/cpufeature.c | 23 +++++++++++++++++++++++ > arch/arm64/mm/proc.S | 8 ++++++++ > 5 files changed, 49 insertions(+), 2 deletions(-) > > diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h > index 1b67c3782d00..06252602901e 100644 > --- a/arch/arm64/include/asm/assembler.h > +++ b/arch/arm64/include/asm/assembler.h > @@ -236,7 +236,11 @@ lr .req x30 // link register > */ > .macro adr_this_cpu, dst, sym, tmp > adr_l \dst, \sym > +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN > mrs \tmp, tpidr_el1 > +alternative_else > + mrs \tmp, tpidr_el2 > +alternative_endif > add \dst, \dst, \tmp > .endm > > @@ -247,7 +251,11 @@ lr .req x30 // link register > */ > .macro ldr_this_cpu dst, sym, tmp > adr_l \dst, \sym > +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN > mrs \tmp, tpidr_el1 > +alternative_else > + mrs \tmp, tpidr_el2 > +alternative_endif > ldr \dst, [\dst, \tmp] > .endm > > diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h > index 3bd498e4de4c..43393208229e 100644 > --- a/arch/arm64/include/asm/percpu.h > +++ b/arch/arm64/include/asm/percpu.h > @@ -16,11 +16,15 @@ > #ifndef __ASM_PERCPU_H > #define __ASM_PERCPU_H > > +#include > #include > > static inline void set_my_cpu_offset(unsigned long off) > { > - asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory"); > + asm volatile(ALTERNATIVE("msr tpidr_el1, %0", > + "msr tpidr_el2, %0", > + ARM64_HAS_VIRT_HOST_EXTN) > + :: "r" (off) : "memory"); > } > > static inline unsigned long __my_cpu_offset(void) > @@ -31,7 +35,10 @@ static inline unsigned long __my_cpu_offset(void) > * We want to allow caching the value, so avoid using volatile and > * instead use a fake stack read to hazard against barrier(). > */ > - asm("mrs %0, tpidr_el1" : "=r" (off) : > + asm(ALTERNATIVE("mrs %0, tpidr_el1", > + "mrs %0, tpidr_el2", > + ARM64_HAS_VIRT_HOST_EXTN) > + : "=r" (off) : > "Q" (*(const unsigned long *)current_stack_pointer)); > > return off; > diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h > index 64c9e78f9882..6d4fdfccf869 100644 > --- a/arch/arm64/include/asm/processor.h > +++ b/arch/arm64/include/asm/processor.h > @@ -193,5 +193,6 @@ static inline void spin_lock_prefetch(const void *ptr) > > int cpu_enable_pan(void *__unused); > int cpu_enable_cache_maint_trap(void *__unused); > +int cpu_copy_el2regs(void *__unused); > > #endif /* __ASM_PROCESSOR_H */ > diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c > index 9f9e0064c8c1..1960706d2422 100644 > --- a/arch/arm64/kernel/cpufeature.c > +++ b/arch/arm64/kernel/cpufeature.c > @@ -864,6 +864,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { > .capability = ARM64_HAS_VIRT_HOST_EXTN, > .def_scope = SCOPE_SYSTEM, > .matches = runs_at_el2, > + .enable = cpu_copy_el2regs, > }, > { > .desc = "32-bit EL0 Support", > @@ -1295,3 +1296,25 @@ static int __init enable_mrs_emulation(void) > } > > late_initcall(enable_mrs_emulation); > + > +int cpu_copy_el2regs(void *__unused) > +{ > + int do_copyregs = 0; > + > + /* > + * Copy register values that aren't redirected by hardware. > + * > + * Before code patching, we only set tpidr_el1, all CPUs need to copy > + * this value to tpidr_el2 before we patch the code. Once we've done > + * that, freshly-onlined CPUs will set tpidr_el2, so we don't need to > + * do anything here. > + */ > + asm volatile(ALTERNATIVE("mov %0, #1", "mov %0, #0", > + ARM64_HAS_VIRT_HOST_EXTN) > + : "=r" (do_copyregs) : : ); > + > + if (do_copyregs) > + write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); Could you just use has_vhe() here ? > + > + return 0; > +} > diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S > index 877d42fb0df6..109d43a15aaf 100644 > --- a/arch/arm64/mm/proc.S > +++ b/arch/arm64/mm/proc.S > @@ -70,7 +70,11 @@ ENTRY(cpu_do_suspend) > mrs x8, mdscr_el1 > mrs x9, oslsr_el1 > mrs x10, sctlr_el1 > +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN > mrs x11, tpidr_el1 > +alternative_else > + mrs x11, tpidr_el2 > +alternative_endif > mrs x12, sp_el0 > stp x2, x3, [x0] > stp x4, xzr, [x0, #16] > @@ -116,7 +120,11 @@ ENTRY(cpu_do_resume) > msr mdscr_el1, x10 > > msr sctlr_el1, x12 > +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN > msr tpidr_el1, x13 > +alternative_else > + msr tpidr_el2, x13 > +alternative_endif > msr sp_el0, x14 > /* > * Restore oslsr_el1 by writing oslar_el1 > -- > 2.13.3 > Otherwise: Reviewed-by: Christoffer Dall -- To unsubscribe from this list: send the line "unsubscribe devicetree" in the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at http://vger.kernel.org/majordomo-info.html From mboxrd@z Thu Jan 1 00:00:00 1970 From: cdall@linaro.org (Christoffer Dall) Date: Sun, 17 Sep 2017 07:43:34 -0700 Subject: [PATCH v2 04/11] arm64: alternatives: use tpidr_el2 on VHE hosts In-Reply-To: <20170808164616.25949-5-james.morse@arm.com> References: <20170808164616.25949-1-james.morse@arm.com> <20170808164616.25949-5-james.morse@arm.com> Message-ID: <20170917144334.GB99021@lvm> To: linux-arm-kernel@lists.infradead.org List-Id: linux-arm-kernel.lists.infradead.org On Tue, Aug 08, 2017 at 05:46:09PM +0100, James Morse wrote: > Now that KVM uses tpidr_el2 in the same way as Linux's cpu_offset in > tpidr_el1, merge the two. This saves KVM from save/restoring tpidr_el1 > on VHE hosts, and allows future code to blindly access per-cpu variables > without triggering world-switch. > > Signed-off-by: James Morse > --- > Changes since v1: > * cpu_copy_el2regs()'s 'have I been patched' test now always sets a register, > just in case the compiler puts do_copyregs on the stack. I don't understand this? > > arch/arm64/include/asm/assembler.h | 8 ++++++++ > arch/arm64/include/asm/percpu.h | 11 +++++++++-- > arch/arm64/include/asm/processor.h | 1 + > arch/arm64/kernel/cpufeature.c | 23 +++++++++++++++++++++++ > arch/arm64/mm/proc.S | 8 ++++++++ > 5 files changed, 49 insertions(+), 2 deletions(-) > > diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h > index 1b67c3782d00..06252602901e 100644 > --- a/arch/arm64/include/asm/assembler.h > +++ b/arch/arm64/include/asm/assembler.h > @@ -236,7 +236,11 @@ lr .req x30 // link register > */ > .macro adr_this_cpu, dst, sym, tmp > adr_l \dst, \sym > +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN > mrs \tmp, tpidr_el1 > +alternative_else > + mrs \tmp, tpidr_el2 > +alternative_endif > add \dst, \dst, \tmp > .endm > > @@ -247,7 +251,11 @@ lr .req x30 // link register > */ > .macro ldr_this_cpu dst, sym, tmp > adr_l \dst, \sym > +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN > mrs \tmp, tpidr_el1 > +alternative_else > + mrs \tmp, tpidr_el2 > +alternative_endif > ldr \dst, [\dst, \tmp] > .endm > > diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h > index 3bd498e4de4c..43393208229e 100644 > --- a/arch/arm64/include/asm/percpu.h > +++ b/arch/arm64/include/asm/percpu.h > @@ -16,11 +16,15 @@ > #ifndef __ASM_PERCPU_H > #define __ASM_PERCPU_H > > +#include > #include > > static inline void set_my_cpu_offset(unsigned long off) > { > - asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory"); > + asm volatile(ALTERNATIVE("msr tpidr_el1, %0", > + "msr tpidr_el2, %0", > + ARM64_HAS_VIRT_HOST_EXTN) > + :: "r" (off) : "memory"); > } > > static inline unsigned long __my_cpu_offset(void) > @@ -31,7 +35,10 @@ static inline unsigned long __my_cpu_offset(void) > * We want to allow caching the value, so avoid using volatile and > * instead use a fake stack read to hazard against barrier(). > */ > - asm("mrs %0, tpidr_el1" : "=r" (off) : > + asm(ALTERNATIVE("mrs %0, tpidr_el1", > + "mrs %0, tpidr_el2", > + ARM64_HAS_VIRT_HOST_EXTN) > + : "=r" (off) : > "Q" (*(const unsigned long *)current_stack_pointer)); > > return off; > diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h > index 64c9e78f9882..6d4fdfccf869 100644 > --- a/arch/arm64/include/asm/processor.h > +++ b/arch/arm64/include/asm/processor.h > @@ -193,5 +193,6 @@ static inline void spin_lock_prefetch(const void *ptr) > > int cpu_enable_pan(void *__unused); > int cpu_enable_cache_maint_trap(void *__unused); > +int cpu_copy_el2regs(void *__unused); > > #endif /* __ASM_PROCESSOR_H */ > diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c > index 9f9e0064c8c1..1960706d2422 100644 > --- a/arch/arm64/kernel/cpufeature.c > +++ b/arch/arm64/kernel/cpufeature.c > @@ -864,6 +864,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { > .capability = ARM64_HAS_VIRT_HOST_EXTN, > .def_scope = SCOPE_SYSTEM, > .matches = runs_at_el2, > + .enable = cpu_copy_el2regs, > }, > { > .desc = "32-bit EL0 Support", > @@ -1295,3 +1296,25 @@ static int __init enable_mrs_emulation(void) > } > > late_initcall(enable_mrs_emulation); > + > +int cpu_copy_el2regs(void *__unused) > +{ > + int do_copyregs = 0; > + > + /* > + * Copy register values that aren't redirected by hardware. > + * > + * Before code patching, we only set tpidr_el1, all CPUs need to copy > + * this value to tpidr_el2 before we patch the code. Once we've done > + * that, freshly-onlined CPUs will set tpidr_el2, so we don't need to > + * do anything here. > + */ > + asm volatile(ALTERNATIVE("mov %0, #1", "mov %0, #0", > + ARM64_HAS_VIRT_HOST_EXTN) > + : "=r" (do_copyregs) : : ); > + > + if (do_copyregs) > + write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); Could you just use has_vhe() here ? > + > + return 0; > +} > diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S > index 877d42fb0df6..109d43a15aaf 100644 > --- a/arch/arm64/mm/proc.S > +++ b/arch/arm64/mm/proc.S > @@ -70,7 +70,11 @@ ENTRY(cpu_do_suspend) > mrs x8, mdscr_el1 > mrs x9, oslsr_el1 > mrs x10, sctlr_el1 > +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN > mrs x11, tpidr_el1 > +alternative_else > + mrs x11, tpidr_el2 > +alternative_endif > mrs x12, sp_el0 > stp x2, x3, [x0] > stp x4, xzr, [x0, #16] > @@ -116,7 +120,11 @@ ENTRY(cpu_do_resume) > msr mdscr_el1, x10 > > msr sctlr_el1, x12 > +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN > msr tpidr_el1, x13 > +alternative_else > + msr tpidr_el2, x13 > +alternative_endif > msr sp_el0, x14 > /* > * Restore oslsr_el1 by writing oslar_el1 > -- > 2.13.3 > Otherwise: Reviewed-by: Christoffer Dall