* [PATCH v2 1/8] powerpc/32: Add VDSO version of getcpu
2019-08-22 16:34 [PATCH v2 0/8] powerpc/vdso32 enhancement and optimisation Christophe Leroy
@ 2019-08-22 16:34 ` Christophe Leroy
2019-09-18 5:51 ` Michael Ellerman
2019-08-22 16:34 ` [PATCH v2 2/8] powerpc/vdso32: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE Christophe Leroy
` (6 subsequent siblings)
7 siblings, 1 reply; 18+ messages in thread
From: Christophe Leroy @ 2019-08-22 16:34 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
Commit 18ad51dd342a ("powerpc: Add VDSO version of getcpu") added
getcpu() for PPC64 only, by making use of a user readable general
purpose SPR.
PPC32 doesn't have any such SPR, a full system call can still be
avoided by implementing a fast system call which reads the CPU id
in the task struct and returns immediately without going back in
virtual mode.
Before the patch, vdsotest reported:
getcpu: syscall: 1572 nsec/call
getcpu: libc: 1787 nsec/call
getcpu: vdso: not tested
Now, vdsotest reports:
getcpu: syscall: 1582 nsec/call
getcpu: libc: 667 nsec/call
getcpu: vdso: 368 nsec/call
For non SMP, just return CPU id 0 from the VDSO directly.
PPC32 doesn't support CONFIG_NUMA so NUMA node is always 0.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
v2: fixed build error in getcpu.S
---
arch/powerpc/include/asm/vdso.h | 2 ++
arch/powerpc/kernel/head_32.h | 13 +++++++++++++
arch/powerpc/kernel/head_booke.h | 11 +++++++++++
arch/powerpc/kernel/vdso32/Makefile | 4 +---
arch/powerpc/kernel/vdso32/getcpu.S | 7 +++++++
arch/powerpc/kernel/vdso32/vdso32.lds.S | 2 --
6 files changed, 34 insertions(+), 5 deletions(-)
diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h
index b5e1f8f8a05c..adb54782df5f 100644
--- a/arch/powerpc/include/asm/vdso.h
+++ b/arch/powerpc/include/asm/vdso.h
@@ -16,6 +16,8 @@
/* Define if 64 bits VDSO has procedure descriptors */
#undef VDS64_HAS_DESCRIPTORS
+#define NR_MAGIC_FAST_VDSO_SYSCALL 0x789a
+
#ifndef __ASSEMBLY__
/* Offsets relative to thread->vdso_base */
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 4a692553651f..a2e38b59785a 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -3,6 +3,8 @@
#define __HEAD_32_H__
#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
/*
* MSR_KERNEL is > 0x8000 on 4xx/Book-E since it include MSR_CE.
@@ -74,7 +76,13 @@
.endm
.macro SYSCALL_ENTRY trapno
+#ifdef CONFIG_SMP
+ cmplwi cr0, r0, NR_MAGIC_FAST_VDSO_SYSCALL
+#endif
mfspr r12,SPRN_SPRG_THREAD
+#ifdef CONFIG_SMP
+ beq- 1f
+#endif
mfcr r10
lwz r11,TASK_STACK-THREAD(r12)
mflr r9
@@ -152,6 +160,11 @@
mtspr SPRN_SRR0,r11
SYNC
RFI /* jump to handler, enable MMU */
+#ifdef CONFIG_SMP
+1:
+ lwz r5, TASK_CPU - THREAD(r12)
+ RFI
+#endif
.endm
/*
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 2ae635df9026..c534e87cac84 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -3,6 +3,8 @@
#define __HEAD_BOOKE_H__
#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_booke_hv_asm.h>
@@ -104,6 +106,10 @@ FTR_SECTION_ELSE
#ifdef CONFIG_KVM_BOOKE_HV
ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
#endif
+#ifdef CONFIG_SMP
+ cmplwi cr0, r0, NR_MAGIC_FAST_VDSO_SYSCALL
+ beq- 1f
+#endif
BOOKE_CLEAR_BTB(r11)
lwz r11, TASK_STACK - THREAD(r10)
rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */
@@ -176,6 +182,11 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
mtspr SPRN_SRR0,r11
SYNC
RFI /* jump to handler, enable MMU */
+#ifdef CONFIG_SMP
+1:
+ lwz r5, TASK_CPU - THREAD(r10)
+ RFI
+#endif
.endm
/* To handle the additional exception priority levels on 40x and Book-E
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index 06f54d947057..e147bbdc12cd 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -2,9 +2,7 @@
# List of files in the vdso, has to be asm only for now
-obj-vdso32-$(CONFIG_PPC64) = getcpu.o
-obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o \
- $(obj-vdso32-y)
+obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o
# Build rules
diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S
index 63e914539e1a..bde226ad904d 100644
--- a/arch/powerpc/kernel/vdso32/getcpu.S
+++ b/arch/powerpc/kernel/vdso32/getcpu.S
@@ -17,7 +17,14 @@
*/
V_FUNCTION_BEGIN(__kernel_getcpu)
.cfi_startproc
+#if defined(CONFIG_PPC64)
mfspr r5,SPRN_SPRG_VDSO_READ
+#elif defined(CONFIG_SMP)
+ li r0, NR_MAGIC_FAST_VDSO_SYSCALL
+ sc /* returns cpuid in r5, clobbers cr0 and r10-r13 */
+#else
+ li r5, 0
+#endif
cmpwi cr0,r3,0
cmpwi cr1,r4,0
clrlwi r6,r5,16
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
index 099a6db14e67..663880671e20 100644
--- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
+++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
@@ -152,9 +152,7 @@ VERSION
__kernel_sync_dicache_p5;
__kernel_sigtramp32;
__kernel_sigtramp_rt32;
-#ifdef CONFIG_PPC64
__kernel_getcpu;
-#endif
__kernel_time;
local: *;
--
2.13.3
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH v2 1/8] powerpc/32: Add VDSO version of getcpu
2019-08-22 16:34 ` [PATCH v2 1/8] powerpc/32: Add VDSO version of getcpu Christophe Leroy
@ 2019-09-18 5:51 ` Michael Ellerman
2019-10-29 16:10 ` Christophe Leroy
0 siblings, 1 reply; 18+ messages in thread
From: Michael Ellerman @ 2019-09-18 5:51 UTC (permalink / raw)
To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras
Cc: linuxppc-dev, linux-kernel
Hi Christophe,
Sorry I'm late replying to this.
Christophe Leroy <christophe.leroy@c-s.fr> writes:
> Commit 18ad51dd342a ("powerpc: Add VDSO version of getcpu") added
> getcpu() for PPC64 only, by making use of a user readable general
> purpose SPR.
>
> PPC32 doesn't have any such SPR, a full system call can still be
> avoided by implementing a fast system call which reads the CPU id
> in the task struct and returns immediately without going back in
> virtual mode.
>
> Before the patch, vdsotest reported:
> getcpu: syscall: 1572 nsec/call
> getcpu: libc: 1787 nsec/call
> getcpu: vdso: not tested
>
> Now, vdsotest reports:
> getcpu: syscall: 1582 nsec/call
> getcpu: libc: 667 nsec/call
> getcpu: vdso: 368 nsec/call
>
> For non SMP, just return CPU id 0 from the VDSO directly.
>
> PPC32 doesn't support CONFIG_NUMA so NUMA node is always 0.
>
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
>
> ---
> v2: fixed build error in getcpu.S
> ---
> arch/powerpc/include/asm/vdso.h | 2 ++
> arch/powerpc/kernel/head_32.h | 13 +++++++++++++
> arch/powerpc/kernel/head_booke.h | 11 +++++++++++
> arch/powerpc/kernel/vdso32/Makefile | 4 +---
> arch/powerpc/kernel/vdso32/getcpu.S | 7 +++++++
> arch/powerpc/kernel/vdso32/vdso32.lds.S | 2 --
> 6 files changed, 34 insertions(+), 5 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h
> index b5e1f8f8a05c..adb54782df5f 100644
> --- a/arch/powerpc/include/asm/vdso.h
> +++ b/arch/powerpc/include/asm/vdso.h
> @@ -16,6 +16,8 @@
> /* Define if 64 bits VDSO has procedure descriptors */
> #undef VDS64_HAS_DESCRIPTORS
>
> +#define NR_MAGIC_FAST_VDSO_SYSCALL 0x789a
We are still in the middle of the years long process of removing the
"magic" syscall on 64-bit:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/powerpc/kernel/exceptions-64s.S?commit=4d856f72c10ecb060868ed10ff1b1453943fc6c8#n1578
Can we not add another one on 32-bit?
Is it really such a fast path that it's worth putting a wart in the
syscall entry like that?
Is there some other method? On s390 they have a per-cpu VDSO page, that
would be a nice option. How we do that would be specific to a particular
MMU, and maybe not even possible with some MMUs. So maybe that's not
feasible.
If you do want to add a fastpath syscall then please just add it as a
regular syscall number, that way it's at least a bit less of a wart.
It's still not visible via tracing/ptrace etc. which is a pain but at
least the number is not "magical" too.
cheers
> /* Offsets relative to thread->vdso_base */
> diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
> index 4a692553651f..a2e38b59785a 100644
> --- a/arch/powerpc/kernel/head_32.h
> +++ b/arch/powerpc/kernel/head_32.h
> @@ -3,6 +3,8 @@
> #define __HEAD_32_H__
>
> #include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */
> +#include <asm/vdso.h>
> +#include <asm/asm-offsets.h>
>
> /*
> * MSR_KERNEL is > 0x8000 on 4xx/Book-E since it include MSR_CE.
> @@ -74,7 +76,13 @@
> .endm
>
> .macro SYSCALL_ENTRY trapno
> +#ifdef CONFIG_SMP
> + cmplwi cr0, r0, NR_MAGIC_FAST_VDSO_SYSCALL
> +#endif
> mfspr r12,SPRN_SPRG_THREAD
> +#ifdef CONFIG_SMP
> + beq- 1f
> +#endif
> mfcr r10
> lwz r11,TASK_STACK-THREAD(r12)
> mflr r9
> @@ -152,6 +160,11 @@
> mtspr SPRN_SRR0,r11
> SYNC
> RFI /* jump to handler, enable MMU */
> +#ifdef CONFIG_SMP
> +1:
> + lwz r5, TASK_CPU - THREAD(r12)
> + RFI
> +#endif
> .endm
>
> /*
> diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
> index 2ae635df9026..c534e87cac84 100644
> --- a/arch/powerpc/kernel/head_booke.h
> +++ b/arch/powerpc/kernel/head_booke.h
> @@ -3,6 +3,8 @@
> #define __HEAD_BOOKE_H__
>
> #include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */
> +#include <asm/vdso.h>
> +#include <asm/asm-offsets.h>
> #include <asm/kvm_asm.h>
> #include <asm/kvm_booke_hv_asm.h>
>
> @@ -104,6 +106,10 @@ FTR_SECTION_ELSE
> #ifdef CONFIG_KVM_BOOKE_HV
> ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
> #endif
> +#ifdef CONFIG_SMP
> + cmplwi cr0, r0, NR_MAGIC_FAST_VDSO_SYSCALL
> + beq- 1f
> +#endif
> BOOKE_CLEAR_BTB(r11)
> lwz r11, TASK_STACK - THREAD(r10)
> rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */
> @@ -176,6 +182,11 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
> mtspr SPRN_SRR0,r11
> SYNC
> RFI /* jump to handler, enable MMU */
> +#ifdef CONFIG_SMP
> +1:
> + lwz r5, TASK_CPU - THREAD(r10)
> + RFI
> +#endif
> .endm
>
> /* To handle the additional exception priority levels on 40x and Book-E
> diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
> index 06f54d947057..e147bbdc12cd 100644
> --- a/arch/powerpc/kernel/vdso32/Makefile
> +++ b/arch/powerpc/kernel/vdso32/Makefile
> @@ -2,9 +2,7 @@
>
> # List of files in the vdso, has to be asm only for now
>
> -obj-vdso32-$(CONFIG_PPC64) = getcpu.o
> -obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o \
> - $(obj-vdso32-y)
> +obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o
>
> # Build rules
>
> diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S
> index 63e914539e1a..bde226ad904d 100644
> --- a/arch/powerpc/kernel/vdso32/getcpu.S
> +++ b/arch/powerpc/kernel/vdso32/getcpu.S
> @@ -17,7 +17,14 @@
> */
> V_FUNCTION_BEGIN(__kernel_getcpu)
> .cfi_startproc
> +#if defined(CONFIG_PPC64)
> mfspr r5,SPRN_SPRG_VDSO_READ
> +#elif defined(CONFIG_SMP)
> + li r0, NR_MAGIC_FAST_VDSO_SYSCALL
> + sc /* returns cpuid in r5, clobbers cr0 and r10-r13 */
> +#else
> + li r5, 0
> +#endif
> cmpwi cr0,r3,0
> cmpwi cr1,r4,0
> clrlwi r6,r5,16
> diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
> index 099a6db14e67..663880671e20 100644
> --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
> +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
> @@ -152,9 +152,7 @@ VERSION
> __kernel_sync_dicache_p5;
> __kernel_sigtramp32;
> __kernel_sigtramp_rt32;
> -#ifdef CONFIG_PPC64
> __kernel_getcpu;
> -#endif
> __kernel_time;
>
> local: *;
> --
> 2.13.3
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH v2 2/8] powerpc/vdso32: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE
2019-08-22 16:34 [PATCH v2 0/8] powerpc/vdso32 enhancement and optimisation Christophe Leroy
2019-08-22 16:34 ` [PATCH v2 1/8] powerpc/32: Add VDSO version of getcpu Christophe Leroy
@ 2019-08-22 16:34 ` Christophe Leroy
2019-08-22 16:34 ` [PATCH v2 3/8] powerpc: Fix vDSO clock_getres() Christophe Leroy
` (5 subsequent siblings)
7 siblings, 0 replies; 18+ messages in thread
From: Christophe Leroy @ 2019-08-22 16:34 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
This is copied and adapted from commit 5c929885f1bb ("powerpc/vdso64:
Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE")
from Santosh Sivaraj <santosh@fossix.org>
Benchmark from vdsotest-all:
clock-gettime-realtime: syscall: 3601 nsec/call
clock-gettime-realtime: libc: 1072 nsec/call
clock-gettime-realtime: vdso: 931 nsec/call
clock-gettime-monotonic: syscall: 4034 nsec/call
clock-gettime-monotonic: libc: 1213 nsec/call
clock-gettime-monotonic: vdso: 1076 nsec/call
clock-gettime-realtime-coarse: syscall: 2722 nsec/call
clock-gettime-realtime-coarse: libc: 805 nsec/call
clock-gettime-realtime-coarse: vdso: 668 nsec/call
clock-gettime-monotonic-coarse: syscall: 2949 nsec/call
clock-gettime-monotonic-coarse: libc: 882 nsec/call
clock-gettime-monotonic-coarse: vdso: 745 nsec/call
Additional test passed with:
vdsotest -d 30 clock-gettime-monotonic-coarse verify
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Santosh Sivaraj <santosh@fossix.org>
Link: https://github.com/linuxppc/issues/issues/41
---
arch/powerpc/kernel/vdso32/gettimeofday.S | 64 +++++++++++++++++++++++++++----
1 file changed, 57 insertions(+), 7 deletions(-)
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index becd9f8767ed..decd263c16e0 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -71,7 +71,13 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
cmpli cr0,r3,CLOCK_REALTIME
cmpli cr1,r3,CLOCK_MONOTONIC
cror cr0*4+eq,cr0*4+eq,cr1*4+eq
- bne cr0,99f
+
+ cmpli cr5,r3,CLOCK_REALTIME_COARSE
+ cmpli cr6,r3,CLOCK_MONOTONIC_COARSE
+ cror cr5*4+eq,cr5*4+eq,cr6*4+eq
+
+ cror cr0*4+eq,cr0*4+eq,cr5*4+eq
+ bne cr0, .Lgettime_fallback
mflr r12 /* r12 saves lr */
.cfi_register lr,r12
@@ -80,8 +86,10 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
mr r9,r3 /* datapage ptr in r9 */
lis r7,NSEC_PER_SEC@h /* want nanoseconds */
ori r7,r7,NSEC_PER_SEC@l
-50: bl __do_get_tspec@local /* get sec/nsec from tb & kernel */
- bne cr1,80f /* not monotonic -> all done */
+ beq cr5, .Lcoarse_clocks
+.Lprecise_clocks:
+ bl __do_get_tspec@local /* get sec/nsec from tb & kernel */
+ bne cr1, .Lfinish /* not monotonic -> all done */
/*
* CLOCK_MONOTONIC
@@ -105,12 +113,53 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
add r9,r9,r0
lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
cmpl cr0,r8,r0 /* check if updated */
- bne- 50b
+ bne- .Lprecise_clocks
+ b .Lfinish_monotonic
+
+ /*
+ * For coarse clocks we get data directly from the vdso data page, so
+ * we don't need to call __do_get_tspec, but we still need to do the
+ * counter trick.
+ */
+.Lcoarse_clocks:
+ lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
+ andi. r0,r8,1 /* pending update ? loop */
+ bne- .Lcoarse_clocks
+ add r9,r9,r0 /* r0 is already 0 */
+
+ /*
+ * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE
+ * too
+ */
+ lwz r3,STAMP_XTIME+TSPC32_TV_SEC(r9)
+ lwz r4,STAMP_XTIME+TSPC32_TV_NSEC(r9)
+ bne cr6,1f
+
+ /* CLOCK_MONOTONIC_COARSE */
+ lwz r5,(WTOM_CLOCK_SEC+LOPART)(r9)
+ lwz r6,WTOM_CLOCK_NSEC(r9)
+
+ /* check if counter has updated */
+ or r0,r6,r5
+1: or r0,r0,r3
+ or r0,r0,r4
+ xor r0,r0,r0
+ add r3,r3,r0
+ lwz r0,CFG_TB_UPDATE_COUNT+LOPART(r9)
+ cmpl cr0,r0,r8 /* check if updated */
+ bne- .Lcoarse_clocks
+
+ /* Counter has not updated, so continue calculating proper values for
+ * sec and nsec if monotonic coarse, or just return with the proper
+ * values for realtime.
+ */
+ bne cr6, .Lfinish
/* Calculate and store result. Note that this mimics the C code,
* which may cause funny results if nsec goes negative... is that
* possible at all ?
*/
+.Lfinish_monotonic:
add r3,r3,r5
add r4,r4,r6
cmpw cr0,r4,r7
@@ -118,11 +167,12 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
blt 1f
subf r4,r7,r4
addi r3,r3,1
-1: bge cr1,80f
+1: bge cr1, .Lfinish
addi r3,r3,-1
add r4,r4,r7
-80: stw r3,TSPC32_TV_SEC(r11)
+.Lfinish:
+ stw r3,TSPC32_TV_SEC(r11)
stw r4,TSPC32_TV_NSEC(r11)
mtlr r12
@@ -133,7 +183,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
/*
* syscall fallback
*/
-99:
+.Lgettime_fallback:
li r0,__NR_clock_gettime
.cfi_restore lr
sc
--
2.13.3
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 3/8] powerpc: Fix vDSO clock_getres()
2019-08-22 16:34 [PATCH v2 0/8] powerpc/vdso32 enhancement and optimisation Christophe Leroy
2019-08-22 16:34 ` [PATCH v2 1/8] powerpc/32: Add VDSO version of getcpu Christophe Leroy
2019-08-22 16:34 ` [PATCH v2 2/8] powerpc/vdso32: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE Christophe Leroy
@ 2019-08-22 16:34 ` Christophe Leroy
2019-08-22 17:17 ` Sasha Levin
2019-08-22 16:34 ` [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage() Christophe Leroy
` (4 subsequent siblings)
7 siblings, 1 reply; 18+ messages in thread
From: Christophe Leroy @ 2019-08-22 16:34 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
clock_getres in the vDSO library has to preserve the same behaviour
of posix_get_hrtimer_res().
In particular, posix_get_hrtimer_res() does:
sec = 0;
ns = hrtimer_resolution;
and hrtimer_resolution depends on the enablement of the high
resolution timers that can happen either at compile or at run time.
Fix the powerpc vdso implementation of clock_getres keeping a copy of
hrtimer_resolution in vdso data and using that directly.
Fixes: a7f290dad32e ("[PATCH] powerpc: Merge vdso's and add vdso support
to 32 bits kernel")
Cc: stable@vger.kernel.org
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Reviewed-by: Christophe Leroy <christophe.leroy@c-s.fr>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
[chleroy: changed CLOCK_REALTIME_RES to CLOCK_HRTIMER_RES]
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/include/asm/vdso_datapage.h | 2 ++
arch/powerpc/kernel/asm-offsets.c | 2 +-
arch/powerpc/kernel/time.c | 1 +
arch/powerpc/kernel/vdso32/gettimeofday.S | 7 +++++--
arch/powerpc/kernel/vdso64/gettimeofday.S | 7 +++++--
5 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h
index c61d59ed3b45..2ccb938d8544 100644
--- a/arch/powerpc/include/asm/vdso_datapage.h
+++ b/arch/powerpc/include/asm/vdso_datapage.h
@@ -82,6 +82,7 @@ struct vdso_data {
__s32 wtom_clock_nsec; /* Wall to monotonic clock nsec */
__s64 wtom_clock_sec; /* Wall to monotonic clock sec */
struct timespec stamp_xtime; /* xtime as at tb_orig_stamp */
+ __u32 hrtimer_res; /* hrtimer resolution */
__u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls */
__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
};
@@ -103,6 +104,7 @@ struct vdso_data {
__s32 wtom_clock_nsec;
struct timespec stamp_xtime; /* xtime as at tb_orig_stamp */
__u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */
+ __u32 hrtimer_res; /* hrtimer resolution */
__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
__u32 dcache_block_size; /* L1 d-cache block size */
__u32 icache_block_size; /* L1 i-cache block size */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 4ccb6b3a7fbd..6279053967fd 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -387,6 +387,7 @@ int main(void)
OFFSET(WTOM_CLOCK_NSEC, vdso_data, wtom_clock_nsec);
OFFSET(STAMP_XTIME, vdso_data, stamp_xtime);
OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction);
+ OFFSET(CLOCK_HRTIMER_RES, vdso_data, hrtimer_res);
OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size);
OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size);
OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size);
@@ -417,7 +418,6 @@ int main(void)
DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
- DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
#ifdef CONFIG_BUG
DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 694522308cd5..619447b1b797 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -959,6 +959,7 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->wtom_clock_nsec = tk->wall_to_monotonic.tv_nsec;
vdso_data->stamp_xtime = xt;
vdso_data->stamp_sec_fraction = frac_sec;
+ vdso_data->hrtimer_res = hrtimer_resolution;
smp_wmb();
++(vdso_data->tb_update_count);
}
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index decd263c16e0..355b537d327a 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -206,12 +206,15 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
cror cr0*4+eq,cr0*4+eq,cr1*4+eq
bne cr0,99f
+ mflr r12
+ .cfi_register lr,r12
+ bl __get_datapage@local /* get data page */
+ lwz r5, CLOCK_HRTIMER_RES(r3)
+ mtlr r12
li r3,0
cmpli cr0,r4,0
crclr cr0*4+so
beqlr
- lis r5,CLOCK_REALTIME_RES@h
- ori r5,r5,CLOCK_REALTIME_RES@l
stw r3,TSPC32_TV_SEC(r4)
stw r5,TSPC32_TV_NSEC(r4)
blr
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index 07bfe33fe874..81757f06bbd7 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -186,12 +186,15 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
cror cr0*4+eq,cr0*4+eq,cr1*4+eq
bne cr0,99f
+ mflr r12
+ .cfi_register lr,r12
+ bl V_LOCAL_FUNC(__get_datapage)
+ lwz r5, CLOCK_HRTIMER_RES(r3)
+ mtlr r12
li r3,0
cmpldi cr0,r4,0
crclr cr0*4+so
beqlr
- lis r5,CLOCK_REALTIME_RES@h
- ori r5,r5,CLOCK_REALTIME_RES@l
std r3,TSPC64_TV_SEC(r4)
std r5,TSPC64_TV_NSEC(r4)
blr
--
2.13.3
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH v2 3/8] powerpc: Fix vDSO clock_getres()
2019-08-22 16:34 ` [PATCH v2 3/8] powerpc: Fix vDSO clock_getres() Christophe Leroy
@ 2019-08-22 17:17 ` Sasha Levin
0 siblings, 0 replies; 18+ messages in thread
From: Sasha Levin @ 2019-08-22 17:17 UTC (permalink / raw)
To: Sasha Levin, Christophe Leroy, Vincenzo Frascino, Benjamin Herrenschmidt
Cc: linux-kernel, stable, Paul Mackerras, linuxppc-dev
Hi,
[This is an automated email]
This commit has been processed because it contains a "Fixes:" tag,
fixing commit: a7f290dad32e [PATCH] powerpc: Merge vdso's and add vdso support to 32 bits kernel.
The bot has tested the following trees: v5.2.9, v4.19.67, v4.14.139, v4.9.189, v4.4.189.
v5.2.9: Build OK!
v4.19.67: Build OK!
v4.14.139: Failed to apply! Possible dependencies:
5c929885f1bb ("powerpc/vdso64: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE")
b5b4453e7912 ("powerpc/vdso64: Fix CLOCK_MONOTONIC inconsistencies across Y2038")
v4.9.189: Failed to apply! Possible dependencies:
454656155110 ("powerpc/asm: Use OFFSET macro in asm-offsets.c")
5c929885f1bb ("powerpc/vdso64: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE")
5d451a87e5eb ("powerpc/64: Retrieve number of L1 cache sets from device-tree")
7c5b06cadf27 ("KVM: PPC: Book3S HV: Adapt TLB invalidations to work on POWER9")
83677f551e0a ("KVM: PPC: Book3S HV: Adjust host/guest context switch for POWER9")
902e06eb86cd ("powerpc/32: Change the stack protector canary value per task")
b5b4453e7912 ("powerpc/vdso64: Fix CLOCK_MONOTONIC inconsistencies across Y2038")
bd067f83b084 ("powerpc/64: Fix naming of cache block vs. cache line")
e2827fe5c156 ("powerpc/64: Clean up ppc64_caches using a struct per cache")
e9cf1e085647 ("KVM: PPC: Book3S HV: Add new POWER9 guest-accessible SPRs")
f4c51f841d2a ("KVM: PPC: Book3S HV: Modify guest entry/exit paths to handle radix guests")
v4.4.189: Failed to apply! Possible dependencies:
153086644fd1 ("powerpc/ftrace: Add support for -mprofile-kernel ftrace ABI")
3eb5d5888dc6 ("powerpc: Add ppc_strict_facility_enable boot option")
454656155110 ("powerpc/asm: Use OFFSET macro in asm-offsets.c")
579e633e764e ("powerpc: create flush_all_to_thread()")
5c929885f1bb ("powerpc/vdso64: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE")
70fe3d980f5f ("powerpc: Restore FPU/VEC/VSX if previously used")
85baa095497f ("powerpc/livepatch: Add live patching support on ppc64le")
902e06eb86cd ("powerpc/32: Change the stack protector canary value per task")
b5b4453e7912 ("powerpc/vdso64: Fix CLOCK_MONOTONIC inconsistencies across Y2038")
bf76f73c5f65 ("powerpc: enable UBSAN support")
c208505900b2 ("powerpc: create giveup_all()")
d1e1cf2e38de ("powerpc: clean up asm/switch_to.h")
dc4fbba11e46 ("powerpc: Create disable_kernel_{fp,altivec,vsx,spe}()")
f17c4e01e906 ("powerpc/module: Mark module stubs with a magic value")
NOTE: The patch will not be queued to stable trees until it is upstream.
How should we proceed with this patch?
--
Thanks,
Sasha
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage()
2019-08-22 16:34 [PATCH v2 0/8] powerpc/vdso32 enhancement and optimisation Christophe Leroy
` (2 preceding siblings ...)
2019-08-22 16:34 ` [PATCH v2 3/8] powerpc: Fix vDSO clock_getres() Christophe Leroy
@ 2019-08-22 16:34 ` Christophe Leroy
2019-08-26 5:44 ` Santosh Sivaraj
2019-08-22 16:34 ` [PATCH v2 5/8] powerpc/vdso32: Don't read cache line size from the datapage on PPC32 Christophe Leroy
` (3 subsequent siblings)
7 siblings, 1 reply; 18+ messages in thread
From: Christophe Leroy @ 2019-08-22 16:34 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
__get_datapage() is only a few instructions to retrieve the
address of the page where the kernel stores data to the VDSO.
By inlining this function into its users, a bl/blr pair and
a mflr/mtlr pair is avoided, plus a few reg moves.
The improvement is noticeable (about 55 nsec/call on an 8xx)
vdsotest before the patch:
gettimeofday: vdso: 731 nsec/call
clock-gettime-realtime-coarse: vdso: 668 nsec/call
clock-gettime-monotonic-coarse: vdso: 745 nsec/call
vdsotest after the patch:
gettimeofday: vdso: 677 nsec/call
clock-gettime-realtime-coarse: vdso: 613 nsec/call
clock-gettime-monotonic-coarse: vdso: 690 nsec/call
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++-----
arch/powerpc/kernel/vdso32/datapage.S | 29 ++++-------------------------
arch/powerpc/kernel/vdso32/datapage.h | 11 +++++++++++
arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++-------
4 files changed, 26 insertions(+), 37 deletions(-)
create mode 100644 arch/powerpc/kernel/vdso32/datapage.h
diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
index 7f882e7b9f43..e9453837e4ee 100644
--- a/arch/powerpc/kernel/vdso32/cacheflush.S
+++ b/arch/powerpc/kernel/vdso32/cacheflush.S
@@ -10,6 +10,8 @@
#include <asm/vdso.h>
#include <asm/asm-offsets.h>
+#include "datapage.h"
+
.text
/*
@@ -24,14 +26,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
.cfi_startproc
mflr r12
.cfi_register lr,r12
- mr r11,r3
- bl __get_datapage@local
+ get_datapage r10, r0
mtlr r12
- mr r10,r3
lwz r7,CFG_DCACHE_BLOCKSZ(r10)
addi r5,r7,-1
- andc r6,r11,r5 /* round low to line bdy */
+ andc r6,r3,r5 /* round low to line bdy */
subf r8,r6,r4 /* compute length */
add r8,r8,r5 /* ensure we get enough */
lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
@@ -48,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
lwz r7,CFG_ICACHE_BLOCKSZ(r10)
addi r5,r7,-1
- andc r6,r11,r5 /* round low to line bdy */
+ andc r6,r3,r5 /* round low to line bdy */
subf r8,r6,r4 /* compute length */
add r8,r8,r5
lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
index 6984125b9fc0..d480d2d4a3fe 100644
--- a/arch/powerpc/kernel/vdso32/datapage.S
+++ b/arch/powerpc/kernel/vdso32/datapage.S
@@ -11,34 +11,13 @@
#include <asm/unistd.h>
#include <asm/vdso.h>
+#include "datapage.h"
+
.text
.global __kernel_datapage_offset;
__kernel_datapage_offset:
.long 0
-V_FUNCTION_BEGIN(__get_datapage)
- .cfi_startproc
- /* We don't want that exposed or overridable as we want other objects
- * to be able to bl directly to here
- */
- .protected __get_datapage
- .hidden __get_datapage
-
- mflr r0
- .cfi_register lr,r0
-
- bcl 20,31,data_page_branch
-data_page_branch:
- mflr r3
- mtlr r0
- addi r3, r3, __kernel_datapage_offset-data_page_branch
- lwz r0,0(r3)
- .cfi_restore lr
- add r3,r0,r3
- blr
- .cfi_endproc
-V_FUNCTION_END(__get_datapage)
-
/*
* void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
*
@@ -53,7 +32,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
mflr r12
.cfi_register lr,r12
mr r4,r3
- bl __get_datapage@local
+ get_datapage r3, r0
mtlr r12
addi r3,r3,CFG_SYSCALL_MAP32
cmpli cr0,r4,0
@@ -74,7 +53,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
.cfi_startproc
mflr r12
.cfi_register lr,r12
- bl __get_datapage@local
+ get_datapage r3, r0
lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
lwz r3,CFG_TB_TICKS_PER_SEC(r3)
mtlr r12
diff --git a/arch/powerpc/kernel/vdso32/datapage.h b/arch/powerpc/kernel/vdso32/datapage.h
new file mode 100644
index 000000000000..74f4f57c2da8
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/datapage.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+.macro get_datapage ptr, tmp
+ bcl 20,31,.+4
+ mflr \ptr
+ addi \ptr, \ptr, __kernel_datapage_offset - (.-4)
+ lwz \tmp, 0(\ptr)
+ add \ptr, \tmp, \ptr
+.endm
+
+
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index 355b537d327a..3e55cba19f44 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -12,6 +12,8 @@
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
+#include "datapage.h"
+
/* Offset for the low 32-bit part of a field of long type */
#ifdef CONFIG_PPC64
#define LOPART 4
@@ -35,8 +37,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
mr r10,r3 /* r10 saves tv */
mr r11,r4 /* r11 saves tz */
- bl __get_datapage@local /* get data page */
- mr r9, r3 /* datapage ptr in r9 */
+ get_datapage r9, r0
cmplwi r10,0 /* check if tv is NULL */
beq 3f
lis r7,1000000@ha /* load up USEC_PER_SEC */
@@ -82,8 +83,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
mflr r12 /* r12 saves lr */
.cfi_register lr,r12
mr r11,r4 /* r11 saves tp */
- bl __get_datapage@local /* get data page */
- mr r9,r3 /* datapage ptr in r9 */
+ get_datapage r9, r0
lis r7,NSEC_PER_SEC@h /* want nanoseconds */
ori r7,r7,NSEC_PER_SEC@l
beq cr5, .Lcoarse_clocks
@@ -208,7 +208,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
mflr r12
.cfi_register lr,r12
- bl __get_datapage@local /* get data page */
+ get_datapage r3, r0
lwz r5, CLOCK_HRTIMER_RES(r3)
mtlr r12
li r3,0
@@ -242,8 +242,7 @@ V_FUNCTION_BEGIN(__kernel_time)
.cfi_register lr,r12
mr r11,r3 /* r11 holds t */
- bl __get_datapage@local
- mr r9, r3 /* datapage ptr in r9 */
+ get_datapage r9, r0
lwz r3,STAMP_XTIME+TSPEC_TV_SEC(r9)
--
2.13.3
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage()
2019-08-22 16:34 ` [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage() Christophe Leroy
@ 2019-08-26 5:44 ` Santosh Sivaraj
2019-09-13 10:59 ` Christophe Leroy
2019-10-29 16:12 ` Christophe Leroy
0 siblings, 2 replies; 18+ messages in thread
From: Santosh Sivaraj @ 2019-08-26 5:44 UTC (permalink / raw)
To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
Michael Ellerman
Cc: linuxppc-dev, linux-kernel
Hi Christophe,
Christophe Leroy <christophe.leroy@c-s.fr> writes:
> __get_datapage() is only a few instructions to retrieve the
> address of the page where the kernel stores data to the VDSO.
>
> By inlining this function into its users, a bl/blr pair and
> a mflr/mtlr pair is avoided, plus a few reg moves.
>
> The improvement is noticeable (about 55 nsec/call on an 8xx)
>
> vdsotest before the patch:
> gettimeofday: vdso: 731 nsec/call
> clock-gettime-realtime-coarse: vdso: 668 nsec/call
> clock-gettime-monotonic-coarse: vdso: 745 nsec/call
>
> vdsotest after the patch:
> gettimeofday: vdso: 677 nsec/call
> clock-gettime-realtime-coarse: vdso: 613 nsec/call
> clock-gettime-monotonic-coarse: vdso: 690 nsec/call
>
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> ---
> arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++-----
> arch/powerpc/kernel/vdso32/datapage.S | 29 ++++-------------------------
> arch/powerpc/kernel/vdso32/datapage.h | 11 +++++++++++
> arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++-------
> 4 files changed, 26 insertions(+), 37 deletions(-)
> create mode 100644 arch/powerpc/kernel/vdso32/datapage.h
The datapage.h file should ideally be moved under include/asm, then we can use
the same for powerpc64 too.
Santosh
>
> diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
> index 7f882e7b9f43..e9453837e4ee 100644
> --- a/arch/powerpc/kernel/vdso32/cacheflush.S
> +++ b/arch/powerpc/kernel/vdso32/cacheflush.S
> @@ -10,6 +10,8 @@
> #include <asm/vdso.h>
> #include <asm/asm-offsets.h>
>
> +#include "datapage.h"
> +
> .text
>
> /*
> @@ -24,14 +26,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
> .cfi_startproc
> mflr r12
> .cfi_register lr,r12
> - mr r11,r3
> - bl __get_datapage@local
> + get_datapage r10, r0
> mtlr r12
> - mr r10,r3
>
> lwz r7,CFG_DCACHE_BLOCKSZ(r10)
> addi r5,r7,-1
> - andc r6,r11,r5 /* round low to line bdy */
> + andc r6,r3,r5 /* round low to line bdy */
> subf r8,r6,r4 /* compute length */
> add r8,r8,r5 /* ensure we get enough */
> lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
> @@ -48,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
>
> lwz r7,CFG_ICACHE_BLOCKSZ(r10)
> addi r5,r7,-1
> - andc r6,r11,r5 /* round low to line bdy */
> + andc r6,r3,r5 /* round low to line bdy */
> subf r8,r6,r4 /* compute length */
> add r8,r8,r5
> lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
> diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
> index 6984125b9fc0..d480d2d4a3fe 100644
> --- a/arch/powerpc/kernel/vdso32/datapage.S
> +++ b/arch/powerpc/kernel/vdso32/datapage.S
> @@ -11,34 +11,13 @@
> #include <asm/unistd.h>
> #include <asm/vdso.h>
>
> +#include "datapage.h"
> +
> .text
> .global __kernel_datapage_offset;
> __kernel_datapage_offset:
> .long 0
>
> -V_FUNCTION_BEGIN(__get_datapage)
> - .cfi_startproc
> - /* We don't want that exposed or overridable as we want other objects
> - * to be able to bl directly to here
> - */
> - .protected __get_datapage
> - .hidden __get_datapage
> -
> - mflr r0
> - .cfi_register lr,r0
> -
> - bcl 20,31,data_page_branch
> -data_page_branch:
> - mflr r3
> - mtlr r0
> - addi r3, r3, __kernel_datapage_offset-data_page_branch
> - lwz r0,0(r3)
> - .cfi_restore lr
> - add r3,r0,r3
> - blr
> - .cfi_endproc
> -V_FUNCTION_END(__get_datapage)
> -
> /*
> * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
> *
> @@ -53,7 +32,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
> mflr r12
> .cfi_register lr,r12
> mr r4,r3
> - bl __get_datapage@local
> + get_datapage r3, r0
> mtlr r12
> addi r3,r3,CFG_SYSCALL_MAP32
> cmpli cr0,r4,0
> @@ -74,7 +53,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
> .cfi_startproc
> mflr r12
> .cfi_register lr,r12
> - bl __get_datapage@local
> + get_datapage r3, r0
> lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
> lwz r3,CFG_TB_TICKS_PER_SEC(r3)
> mtlr r12
> diff --git a/arch/powerpc/kernel/vdso32/datapage.h b/arch/powerpc/kernel/vdso32/datapage.h
> new file mode 100644
> index 000000000000..74f4f57c2da8
> --- /dev/null
> +++ b/arch/powerpc/kernel/vdso32/datapage.h
> @@ -0,0 +1,11 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +
> +.macro get_datapage ptr, tmp
> + bcl 20,31,.+4
> + mflr \ptr
> + addi \ptr, \ptr, __kernel_datapage_offset - (.-4)
> + lwz \tmp, 0(\ptr)
> + add \ptr, \tmp, \ptr
> +.endm
> +
> +
> diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
> index 355b537d327a..3e55cba19f44 100644
> --- a/arch/powerpc/kernel/vdso32/gettimeofday.S
> +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
> @@ -12,6 +12,8 @@
> #include <asm/asm-offsets.h>
> #include <asm/unistd.h>
>
> +#include "datapage.h"
> +
> /* Offset for the low 32-bit part of a field of long type */
> #ifdef CONFIG_PPC64
> #define LOPART 4
> @@ -35,8 +37,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
>
> mr r10,r3 /* r10 saves tv */
> mr r11,r4 /* r11 saves tz */
> - bl __get_datapage@local /* get data page */
> - mr r9, r3 /* datapage ptr in r9 */
> + get_datapage r9, r0
> cmplwi r10,0 /* check if tv is NULL */
> beq 3f
> lis r7,1000000@ha /* load up USEC_PER_SEC */
> @@ -82,8 +83,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
> mflr r12 /* r12 saves lr */
> .cfi_register lr,r12
> mr r11,r4 /* r11 saves tp */
> - bl __get_datapage@local /* get data page */
> - mr r9,r3 /* datapage ptr in r9 */
> + get_datapage r9, r0
> lis r7,NSEC_PER_SEC@h /* want nanoseconds */
> ori r7,r7,NSEC_PER_SEC@l
> beq cr5, .Lcoarse_clocks
> @@ -208,7 +208,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
>
> mflr r12
> .cfi_register lr,r12
> - bl __get_datapage@local /* get data page */
> + get_datapage r3, r0
> lwz r5, CLOCK_HRTIMER_RES(r3)
> mtlr r12
> li r3,0
> @@ -242,8 +242,7 @@ V_FUNCTION_BEGIN(__kernel_time)
> .cfi_register lr,r12
>
> mr r11,r3 /* r11 holds t */
> - bl __get_datapage@local
> - mr r9, r3 /* datapage ptr in r9 */
> + get_datapage r9, r0
>
> lwz r3,STAMP_XTIME+TSPEC_TV_SEC(r9)
>
> --
> 2.13.3
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage()
2019-08-26 5:44 ` Santosh Sivaraj
@ 2019-09-13 10:59 ` Christophe Leroy
2019-09-13 13:31 ` Santosh Sivaraj
2019-10-29 16:12 ` Christophe Leroy
1 sibling, 1 reply; 18+ messages in thread
From: Christophe Leroy @ 2019-09-13 10:59 UTC (permalink / raw)
To: Santosh Sivaraj, Benjamin Herrenschmidt, Paul Mackerras,
Michael Ellerman
Cc: linuxppc-dev, linux-kernel
Hi Santosh,
Le 26/08/2019 à 07:44, Santosh Sivaraj a écrit :
> Hi Christophe,
>
> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>
>> __get_datapage() is only a few instructions to retrieve the
>> address of the page where the kernel stores data to the VDSO.
>>
>> By inlining this function into its users, a bl/blr pair and
>> a mflr/mtlr pair is avoided, plus a few reg moves.
>>
>> The improvement is noticeable (about 55 nsec/call on an 8xx)
>>
>> vdsotest before the patch:
>> gettimeofday: vdso: 731 nsec/call
>> clock-gettime-realtime-coarse: vdso: 668 nsec/call
>> clock-gettime-monotonic-coarse: vdso: 745 nsec/call
>>
>> vdsotest after the patch:
>> gettimeofday: vdso: 677 nsec/call
>> clock-gettime-realtime-coarse: vdso: 613 nsec/call
>> clock-gettime-monotonic-coarse: vdso: 690 nsec/call
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
>> ---
>> arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++-----
>> arch/powerpc/kernel/vdso32/datapage.S | 29 ++++-------------------------
>> arch/powerpc/kernel/vdso32/datapage.h | 11 +++++++++++
>> arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++-------
>> 4 files changed, 26 insertions(+), 37 deletions(-)
>> create mode 100644 arch/powerpc/kernel/vdso32/datapage.h
>
> The datapage.h file should ideally be moved under include/asm, then we can use
> the same for powerpc64 too.
I have a more ambitious project indeed.
Most of the VDSO code is duplicated between vdso32 and vdso64. I'm
aiming at merging everything into a single source code.
This means we would have to generate vdso32.so and vdso64.so out of the
same source files. Any idea on how to do that ? I'm not too good at
creating Makefiles. I guess we would have everything in
arch/powerpc/kernel/vdso/ and would have to build the objects twice,
once in arch/powerpc/kernel/vdso32/ and once in arch/powerpc/kernel/vdso64/
Christophe
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage()
2019-09-13 10:59 ` Christophe Leroy
@ 2019-09-13 13:31 ` Santosh Sivaraj
2019-09-13 13:50 ` Christophe Leroy
0 siblings, 1 reply; 18+ messages in thread
From: Santosh Sivaraj @ 2019-09-13 13:31 UTC (permalink / raw)
To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
Michael Ellerman
Cc: linuxppc-dev, linux-kernel
Christophe Leroy <christophe.leroy@c-s.fr> writes:
> Hi Santosh,
>
> Le 26/08/2019 à 07:44, Santosh Sivaraj a écrit :
>> Hi Christophe,
>>
>> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>>
>>> __get_datapage() is only a few instructions to retrieve the
>>> address of the page where the kernel stores data to the VDSO.
>>>
>>> By inlining this function into its users, a bl/blr pair and
>>> a mflr/mtlr pair is avoided, plus a few reg moves.
>>>
>>> The improvement is noticeable (about 55 nsec/call on an 8xx)
>>>
>>> vdsotest before the patch:
>>> gettimeofday: vdso: 731 nsec/call
>>> clock-gettime-realtime-coarse: vdso: 668 nsec/call
>>> clock-gettime-monotonic-coarse: vdso: 745 nsec/call
>>>
>>> vdsotest after the patch:
>>> gettimeofday: vdso: 677 nsec/call
>>> clock-gettime-realtime-coarse: vdso: 613 nsec/call
>>> clock-gettime-monotonic-coarse: vdso: 690 nsec/call
>>>
>>> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
>>> ---
>>> arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++-----
>>> arch/powerpc/kernel/vdso32/datapage.S | 29 ++++-------------------------
>>> arch/powerpc/kernel/vdso32/datapage.h | 11 +++++++++++
>>> arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++-------
>>> 4 files changed, 26 insertions(+), 37 deletions(-)
>>> create mode 100644 arch/powerpc/kernel/vdso32/datapage.h
>>
>> The datapage.h file should ideally be moved under include/asm, then we can use
>> the same for powerpc64 too.
>
> I have a more ambitious project indeed.
>
> Most of the VDSO code is duplicated between vdso32 and vdso64. I'm
> aiming at merging everything into a single source code.
>
> This means we would have to generate vdso32.so and vdso64.so out of the
> same source files. Any idea on how to do that ? I'm not too good at
> creating Makefiles. I guess we would have everything in
> arch/powerpc/kernel/vdso/ and would have to build the objects twice,
> once in arch/powerpc/kernel/vdso32/ and once in arch/powerpc/kernel/vdso64/
Should we need to build the objects twice? For 64 bit config it is going to be
a 64 bit build else a 32 bit build. It should suffice to get the single source
code compile for both, maybe with macros or (!)CONFIG_PPC64 conditional
compilation. Am I missing something when you say build twice?
Thanks,
Santosh
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage()
2019-09-13 13:31 ` Santosh Sivaraj
@ 2019-09-13 13:50 ` Christophe Leroy
2019-09-13 14:03 ` Santosh Sivaraj
0 siblings, 1 reply; 18+ messages in thread
From: Christophe Leroy @ 2019-09-13 13:50 UTC (permalink / raw)
To: Santosh Sivaraj, Benjamin Herrenschmidt, Paul Mackerras,
Michael Ellerman
Cc: linuxppc-dev, linux-kernel
Le 13/09/2019 à 15:31, Santosh Sivaraj a écrit :
> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>
>> Hi Santosh,
>>
>> Le 26/08/2019 à 07:44, Santosh Sivaraj a écrit :
>>> Hi Christophe,
>>>
>>> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>>>
>>>> __get_datapage() is only a few instructions to retrieve the
>>>> address of the page where the kernel stores data to the VDSO.
>>>>
>>>> By inlining this function into its users, a bl/blr pair and
>>>> a mflr/mtlr pair is avoided, plus a few reg moves.
>>>>
>>>> The improvement is noticeable (about 55 nsec/call on an 8xx)
>>>>
>>>> vdsotest before the patch:
>>>> gettimeofday: vdso: 731 nsec/call
>>>> clock-gettime-realtime-coarse: vdso: 668 nsec/call
>>>> clock-gettime-monotonic-coarse: vdso: 745 nsec/call
>>>>
>>>> vdsotest after the patch:
>>>> gettimeofday: vdso: 677 nsec/call
>>>> clock-gettime-realtime-coarse: vdso: 613 nsec/call
>>>> clock-gettime-monotonic-coarse: vdso: 690 nsec/call
>>>>
>>>> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
>>>> ---
>>>> arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++-----
>>>> arch/powerpc/kernel/vdso32/datapage.S | 29 ++++-------------------------
>>>> arch/powerpc/kernel/vdso32/datapage.h | 11 +++++++++++
>>>> arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++-------
>>>> 4 files changed, 26 insertions(+), 37 deletions(-)
>>>> create mode 100644 arch/powerpc/kernel/vdso32/datapage.h
>>>
>>> The datapage.h file should ideally be moved under include/asm, then we can use
>>> the same for powerpc64 too.
>>
>> I have a more ambitious project indeed.
>>
>> Most of the VDSO code is duplicated between vdso32 and vdso64. I'm
>> aiming at merging everything into a single source code.
>>
>> This means we would have to generate vdso32.so and vdso64.so out of the
>> same source files. Any idea on how to do that ? I'm not too good at
>> creating Makefiles. I guess we would have everything in
>> arch/powerpc/kernel/vdso/ and would have to build the objects twice,
>> once in arch/powerpc/kernel/vdso32/ and once in arch/powerpc/kernel/vdso64/
>
> Should we need to build the objects twice? For 64 bit config it is going to be
> a 64 bit build else a 32 bit build. It should suffice to get the single source
> code compile for both, maybe with macros or (!)CONFIG_PPC64 conditional
> compilation. Am I missing something when you say build twice?
>
IIUC, on PPC64 we build vdso64 for 64bits user apps and vdso32 for
32bits user apps.
In arch/powerpc/kernel/Makefile, you have:
obj-$(CONFIG_VDSO32) += vdso32/
obj-$(CONFIG_PPC64) += vdso64/
And in arch/powerpc/platforms/Kconfig.cputype, you have:
config VDSO32
def_bool y
depends on PPC32 || CPU_BIG_ENDIAN
help
This symbol controls whether we build the 32-bit VDSO. We obviously
want to do that if we're building a 32-bit kernel. If we're building
a 64-bit kernel then we only want a 32-bit VDSO if we're building for
big endian. That is because the only little endian configuration we
support is ppc64le which is 64-bit only.
Christophe
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage()
2019-09-13 13:50 ` Christophe Leroy
@ 2019-09-13 14:03 ` Santosh Sivaraj
0 siblings, 0 replies; 18+ messages in thread
From: Santosh Sivaraj @ 2019-09-13 14:03 UTC (permalink / raw)
To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
Michael Ellerman
Cc: linuxppc-dev, linux-kernel
Christophe Leroy <christophe.leroy@c-s.fr> writes:
> Le 13/09/2019 à 15:31, Santosh Sivaraj a écrit :
>> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>>
>>> Hi Santosh,
>>>
>>> Le 26/08/2019 à 07:44, Santosh Sivaraj a écrit :
>>>> Hi Christophe,
>>>>
>>>> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>>>>
>>>>> __get_datapage() is only a few instructions to retrieve the
>>>>> address of the page where the kernel stores data to the VDSO.
>>>>>
>>>>> By inlining this function into its users, a bl/blr pair and
>>>>> a mflr/mtlr pair is avoided, plus a few reg moves.
>>>>>
>>>>> The improvement is noticeable (about 55 nsec/call on an 8xx)
>>>>>
>>>>> vdsotest before the patch:
>>>>> gettimeofday: vdso: 731 nsec/call
>>>>> clock-gettime-realtime-coarse: vdso: 668 nsec/call
>>>>> clock-gettime-monotonic-coarse: vdso: 745 nsec/call
>>>>>
>>>>> vdsotest after the patch:
>>>>> gettimeofday: vdso: 677 nsec/call
>>>>> clock-gettime-realtime-coarse: vdso: 613 nsec/call
>>>>> clock-gettime-monotonic-coarse: vdso: 690 nsec/call
>>>>>
>>>>> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
>>>>> ---
>>>>> arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++-----
>>>>> arch/powerpc/kernel/vdso32/datapage.S | 29 ++++-------------------------
>>>>> arch/powerpc/kernel/vdso32/datapage.h | 11 +++++++++++
>>>>> arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++-------
>>>>> 4 files changed, 26 insertions(+), 37 deletions(-)
>>>>> create mode 100644 arch/powerpc/kernel/vdso32/datapage.h
>>>>
>>>> The datapage.h file should ideally be moved under include/asm, then we can use
>>>> the same for powerpc64 too.
>>>
>>> I have a more ambitious project indeed.
>>>
>>> Most of the VDSO code is duplicated between vdso32 and vdso64. I'm
>>> aiming at merging everything into a single source code.
>>>
>>> This means we would have to generate vdso32.so and vdso64.so out of the
>>> same source files. Any idea on how to do that ? I'm not too good at
>>> creating Makefiles. I guess we would have everything in
>>> arch/powerpc/kernel/vdso/ and would have to build the objects twice,
>>> once in arch/powerpc/kernel/vdso32/ and once in arch/powerpc/kernel/vdso64/
>>
>> Should we need to build the objects twice? For 64 bit config it is going to be
>> a 64 bit build else a 32 bit build. It should suffice to get the single source
>> code compile for both, maybe with macros or (!)CONFIG_PPC64 conditional
>> compilation. Am I missing something when you say build twice?
>>
>
> IIUC, on PPC64 we build vdso64 for 64bits user apps and vdso32 for
> 32bits user apps.
>
> In arch/powerpc/kernel/Makefile, you have:
>
> obj-$(CONFIG_VDSO32) += vdso32/
> obj-$(CONFIG_PPC64) += vdso64/
>
> And in arch/powerpc/platforms/Kconfig.cputype, you have:
>
> config VDSO32
> def_bool y
> depends on PPC32 || CPU_BIG_ENDIAN
> help
> This symbol controls whether we build the 32-bit VDSO. We obviously
> want to do that if we're building a 32-bit kernel. If we're building
> a 64-bit kernel then we only want a 32-bit VDSO if we're building for
> big endian. That is because the only little endian configuration we
> support is ppc64le which is 64-bit only.
>
I didn't know we build 32 bit vdso for 64 bit big endians. But I don't think
its difficult to do it, might be a bit tricky. We can have two targets from
the same source.
SRC = vdso/*.c
OBJS_32 = $(SRC:.c=vdso32/.o)
OBJS_64 = $(SRC:.c=vdso64/.o)
Something like this would work. Of course, this is out of memory, might have to
do something slightly different for the Makefiles in kernel.
Thanks,
Santosh
>
>
>
> Christophe
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage()
2019-08-26 5:44 ` Santosh Sivaraj
2019-09-13 10:59 ` Christophe Leroy
@ 2019-10-29 16:12 ` Christophe Leroy
1 sibling, 0 replies; 18+ messages in thread
From: Christophe Leroy @ 2019-10-29 16:12 UTC (permalink / raw)
To: Santosh Sivaraj, Benjamin Herrenschmidt, Paul Mackerras,
Michael Ellerman
Cc: linuxppc-dev, linux-kernel
Hi Santosh,
Le 26/08/2019 à 07:44, Santosh Sivaraj a écrit :
> Hi Christophe,
>
> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>
>> __get_datapage() is only a few instructions to retrieve the
>> address of the page where the kernel stores data to the VDSO.
>>
>> By inlining this function into its users, a bl/blr pair and
>> a mflr/mtlr pair is avoided, plus a few reg moves.
>>
>> The improvement is noticeable (about 55 nsec/call on an 8xx)
>>
>> vdsotest before the patch:
>> gettimeofday: vdso: 731 nsec/call
>> clock-gettime-realtime-coarse: vdso: 668 nsec/call
>> clock-gettime-monotonic-coarse: vdso: 745 nsec/call
>>
>> vdsotest after the patch:
>> gettimeofday: vdso: 677 nsec/call
>> clock-gettime-realtime-coarse: vdso: 613 nsec/call
>> clock-gettime-monotonic-coarse: vdso: 690 nsec/call
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
>> ---
>> arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++-----
>> arch/powerpc/kernel/vdso32/datapage.S | 29 ++++-------------------------
>> arch/powerpc/kernel/vdso32/datapage.h | 11 +++++++++++
>> arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++-------
>> 4 files changed, 26 insertions(+), 37 deletions(-)
>> create mode 100644 arch/powerpc/kernel/vdso32/datapage.h
>
> The datapage.h file should ideally be moved under include/asm, then we can use
> the same for powerpc64 too.
Finally, I added the get_datapage macro to the existing asm/vdso_datapage.h
Christophe
>
> Santosh
>
>>
>> diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
>> index 7f882e7b9f43..e9453837e4ee 100644
>> --- a/arch/powerpc/kernel/vdso32/cacheflush.S
>> +++ b/arch/powerpc/kernel/vdso32/cacheflush.S
>> @@ -10,6 +10,8 @@
>> #include <asm/vdso.h>
>> #include <asm/asm-offsets.h>
>>
>> +#include "datapage.h"
>> +
>> .text
>>
>> /*
>> @@ -24,14 +26,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
>> .cfi_startproc
>> mflr r12
>> .cfi_register lr,r12
>> - mr r11,r3
>> - bl __get_datapage@local
>> + get_datapage r10, r0
>> mtlr r12
>> - mr r10,r3
>>
>> lwz r7,CFG_DCACHE_BLOCKSZ(r10)
>> addi r5,r7,-1
>> - andc r6,r11,r5 /* round low to line bdy */
>> + andc r6,r3,r5 /* round low to line bdy */
>> subf r8,r6,r4 /* compute length */
>> add r8,r8,r5 /* ensure we get enough */
>> lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
>> @@ -48,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
>>
>> lwz r7,CFG_ICACHE_BLOCKSZ(r10)
>> addi r5,r7,-1
>> - andc r6,r11,r5 /* round low to line bdy */
>> + andc r6,r3,r5 /* round low to line bdy */
>> subf r8,r6,r4 /* compute length */
>> add r8,r8,r5
>> lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
>> diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
>> index 6984125b9fc0..d480d2d4a3fe 100644
>> --- a/arch/powerpc/kernel/vdso32/datapage.S
>> +++ b/arch/powerpc/kernel/vdso32/datapage.S
>> @@ -11,34 +11,13 @@
>> #include <asm/unistd.h>
>> #include <asm/vdso.h>
>>
>> +#include "datapage.h"
>> +
>> .text
>> .global __kernel_datapage_offset;
>> __kernel_datapage_offset:
>> .long 0
>>
>> -V_FUNCTION_BEGIN(__get_datapage)
>> - .cfi_startproc
>> - /* We don't want that exposed or overridable as we want other objects
>> - * to be able to bl directly to here
>> - */
>> - .protected __get_datapage
>> - .hidden __get_datapage
>> -
>> - mflr r0
>> - .cfi_register lr,r0
>> -
>> - bcl 20,31,data_page_branch
>> -data_page_branch:
>> - mflr r3
>> - mtlr r0
>> - addi r3, r3, __kernel_datapage_offset-data_page_branch
>> - lwz r0,0(r3)
>> - .cfi_restore lr
>> - add r3,r0,r3
>> - blr
>> - .cfi_endproc
>> -V_FUNCTION_END(__get_datapage)
>> -
>> /*
>> * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
>> *
>> @@ -53,7 +32,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
>> mflr r12
>> .cfi_register lr,r12
>> mr r4,r3
>> - bl __get_datapage@local
>> + get_datapage r3, r0
>> mtlr r12
>> addi r3,r3,CFG_SYSCALL_MAP32
>> cmpli cr0,r4,0
>> @@ -74,7 +53,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
>> .cfi_startproc
>> mflr r12
>> .cfi_register lr,r12
>> - bl __get_datapage@local
>> + get_datapage r3, r0
>> lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
>> lwz r3,CFG_TB_TICKS_PER_SEC(r3)
>> mtlr r12
>> diff --git a/arch/powerpc/kernel/vdso32/datapage.h b/arch/powerpc/kernel/vdso32/datapage.h
>> new file mode 100644
>> index 000000000000..74f4f57c2da8
>> --- /dev/null
>> +++ b/arch/powerpc/kernel/vdso32/datapage.h
>> @@ -0,0 +1,11 @@
>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>> +
>> +.macro get_datapage ptr, tmp
>> + bcl 20,31,.+4
>> + mflr \ptr
>> + addi \ptr, \ptr, __kernel_datapage_offset - (.-4)
>> + lwz \tmp, 0(\ptr)
>> + add \ptr, \tmp, \ptr
>> +.endm
>> +
>> +
>> diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
>> index 355b537d327a..3e55cba19f44 100644
>> --- a/arch/powerpc/kernel/vdso32/gettimeofday.S
>> +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
>> @@ -12,6 +12,8 @@
>> #include <asm/asm-offsets.h>
>> #include <asm/unistd.h>
>>
>> +#include "datapage.h"
>> +
>> /* Offset for the low 32-bit part of a field of long type */
>> #ifdef CONFIG_PPC64
>> #define LOPART 4
>> @@ -35,8 +37,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
>>
>> mr r10,r3 /* r10 saves tv */
>> mr r11,r4 /* r11 saves tz */
>> - bl __get_datapage@local /* get data page */
>> - mr r9, r3 /* datapage ptr in r9 */
>> + get_datapage r9, r0
>> cmplwi r10,0 /* check if tv is NULL */
>> beq 3f
>> lis r7,1000000@ha /* load up USEC_PER_SEC */
>> @@ -82,8 +83,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
>> mflr r12 /* r12 saves lr */
>> .cfi_register lr,r12
>> mr r11,r4 /* r11 saves tp */
>> - bl __get_datapage@local /* get data page */
>> - mr r9,r3 /* datapage ptr in r9 */
>> + get_datapage r9, r0
>> lis r7,NSEC_PER_SEC@h /* want nanoseconds */
>> ori r7,r7,NSEC_PER_SEC@l
>> beq cr5, .Lcoarse_clocks
>> @@ -208,7 +208,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
>>
>> mflr r12
>> .cfi_register lr,r12
>> - bl __get_datapage@local /* get data page */
>> + get_datapage r3, r0
>> lwz r5, CLOCK_HRTIMER_RES(r3)
>> mtlr r12
>> li r3,0
>> @@ -242,8 +242,7 @@ V_FUNCTION_BEGIN(__kernel_time)
>> .cfi_register lr,r12
>>
>> mr r11,r3 /* r11 holds t */
>> - bl __get_datapage@local
>> - mr r9, r3 /* datapage ptr in r9 */
>> + get_datapage r9, r0
>>
>> lwz r3,STAMP_XTIME+TSPEC_TV_SEC(r9)
>>
>> --
>> 2.13.3
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH v2 5/8] powerpc/vdso32: Don't read cache line size from the datapage on PPC32.
2019-08-22 16:34 [PATCH v2 0/8] powerpc/vdso32 enhancement and optimisation Christophe Leroy
` (3 preceding siblings ...)
2019-08-22 16:34 ` [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage() Christophe Leroy
@ 2019-08-22 16:34 ` Christophe Leroy
2019-08-22 16:34 ` [PATCH v2 6/8] powerpc/vdso32: use LOAD_REG_IMMEDIATE() Christophe Leroy
` (2 subsequent siblings)
7 siblings, 0 replies; 18+ messages in thread
From: Christophe Leroy @ 2019-08-22 16:34 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
On PPC32, the cache lines have a fixed size known at build time.
Don't read it from the datapage.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/include/asm/vdso_datapage.h | 4 ----
arch/powerpc/kernel/asm-offsets.c | 2 +-
arch/powerpc/kernel/vdso.c | 5 -----
arch/powerpc/kernel/vdso32/cacheflush.S | 23 +++++++++++++++++++++++
4 files changed, 24 insertions(+), 10 deletions(-)
diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h
index 2ccb938d8544..869df2f43400 100644
--- a/arch/powerpc/include/asm/vdso_datapage.h
+++ b/arch/powerpc/include/asm/vdso_datapage.h
@@ -106,10 +106,6 @@ struct vdso_data {
__u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */
__u32 hrtimer_res; /* hrtimer resolution */
__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
- __u32 dcache_block_size; /* L1 d-cache block size */
- __u32 icache_block_size; /* L1 i-cache block size */
- __u32 dcache_log_block_size; /* L1 d-cache log block size */
- __u32 icache_log_block_size; /* L1 i-cache log block size */
};
#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 6279053967fd..b6328a90cad7 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -388,11 +388,11 @@ int main(void)
OFFSET(STAMP_XTIME, vdso_data, stamp_xtime);
OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction);
OFFSET(CLOCK_HRTIMER_RES, vdso_data, hrtimer_res);
+#ifdef CONFIG_PPC64
OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size);
OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size);
OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size);
OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_data, dcache_log_block_size);
-#ifdef CONFIG_PPC64
OFFSET(CFG_SYSCALL_MAP64, vdso_data, syscall_map_64);
OFFSET(TVAL64_TV_SEC, timeval, tv_sec);
OFFSET(TVAL64_TV_USEC, timeval, tv_usec);
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index d60598113a9f..87d43e43e2e7 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -750,11 +750,6 @@ static int __init vdso_init(void)
*/
vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT;
DBG("vdso64_kbase: %p, 0x%x pages\n", vdso64_kbase, vdso64_pages);
-#else
- vdso_data->dcache_block_size = L1_CACHE_BYTES;
- vdso_data->dcache_log_block_size = L1_CACHE_SHIFT;
- vdso_data->icache_block_size = L1_CACHE_BYTES;
- vdso_data->icache_log_block_size = L1_CACHE_SHIFT;
#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
index e9453837e4ee..b9340a89984e 100644
--- a/arch/powerpc/kernel/vdso32/cacheflush.S
+++ b/arch/powerpc/kernel/vdso32/cacheflush.S
@@ -9,6 +9,7 @@
#include <asm/ppc_asm.h>
#include <asm/vdso.h>
#include <asm/asm-offsets.h>
+#include <asm/cache.h>
#include "datapage.h"
@@ -24,28 +25,44 @@
*/
V_FUNCTION_BEGIN(__kernel_sync_dicache)
.cfi_startproc
+#ifdef CONFIG_PPC64
mflr r12
.cfi_register lr,r12
get_datapage r10, r0
mtlr r12
+#endif
+#ifdef CONFIG_PPC64
lwz r7,CFG_DCACHE_BLOCKSZ(r10)
addi r5,r7,-1
+#else
+ li r5, L1_CACHE_BYTES - 1
+#endif
andc r6,r3,r5 /* round low to line bdy */
subf r8,r6,r4 /* compute length */
add r8,r8,r5 /* ensure we get enough */
+#ifdef CONFIG_PPC64
lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
srw. r8,r8,r9 /* compute line count */
+#else
+ srwi. r8, r8, L1_CACHE_SHIFT
+ mr r7, r6
+#endif
crclr cr0*4+so
beqlr /* nothing to do? */
mtctr r8
1: dcbst 0,r6
+#ifdef CONFIG_PPC64
add r6,r6,r7
+#else
+ addi r6, r6, L1_CACHE_BYTES
+#endif
bdnz 1b
sync
/* Now invalidate the instruction cache */
+#ifdef CONFIG_PPC64
lwz r7,CFG_ICACHE_BLOCKSZ(r10)
addi r5,r7,-1
andc r6,r3,r5 /* round low to line bdy */
@@ -55,9 +72,15 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
srw. r8,r8,r9 /* compute line count */
crclr cr0*4+so
beqlr /* nothing to do? */
+#endif
mtctr r8
+#ifdef CONFIG_PPC64
2: icbi 0,r6
add r6,r6,r7
+#else
+2: icbi 0, r7
+ addi r7, r7, L1_CACHE_BYTES
+#endif
bdnz 2b
isync
li r3,0
--
2.13.3
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 6/8] powerpc/vdso32: use LOAD_REG_IMMEDIATE()
2019-08-22 16:34 [PATCH v2 0/8] powerpc/vdso32 enhancement and optimisation Christophe Leroy
` (4 preceding siblings ...)
2019-08-22 16:34 ` [PATCH v2 5/8] powerpc/vdso32: Don't read cache line size from the datapage on PPC32 Christophe Leroy
@ 2019-08-22 16:34 ` Christophe Leroy
2019-08-22 16:34 ` [PATCH v2 7/8] powerpc/vdso32: implement clock_getres entirely Christophe Leroy
2019-08-22 16:34 ` [PATCH v2 8/8] powerpc/vdso32: miscellaneous optimisations Christophe Leroy
7 siblings, 0 replies; 18+ messages in thread
From: Christophe Leroy @ 2019-08-22 16:34 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
Use LOAD_REG_IMMEDIATE() to load registers with immediate value.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/kernel/vdso32/gettimeofday.S | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index 3e55cba19f44..0f87be0ebf7e 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -40,8 +40,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
get_datapage r9, r0
cmplwi r10,0 /* check if tv is NULL */
beq 3f
- lis r7,1000000@ha /* load up USEC_PER_SEC */
- addi r7,r7,1000000@l /* so we get microseconds in r4 */
+ LOAD_REG_IMMEDIATE(r7, 1000000) /* load up USEC_PER_SEC */
bl __do_get_tspec@local /* get sec/usec from tb & kernel */
stw r3,TVAL32_TV_SEC(r10)
stw r4,TVAL32_TV_USEC(r10)
@@ -84,8 +83,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
.cfi_register lr,r12
mr r11,r4 /* r11 saves tp */
get_datapage r9, r0
- lis r7,NSEC_PER_SEC@h /* want nanoseconds */
- ori r7,r7,NSEC_PER_SEC@l
+ LOAD_REG_IMMEDIATE(r7, NSEC_PER_SEC) /* load up NSEC_PER_SEC */
beq cr5, .Lcoarse_clocks
.Lprecise_clocks:
bl __do_get_tspec@local /* get sec/nsec from tb & kernel */
--
2.13.3
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 7/8] powerpc/vdso32: implement clock_getres entirely
2019-08-22 16:34 [PATCH v2 0/8] powerpc/vdso32 enhancement and optimisation Christophe Leroy
` (5 preceding siblings ...)
2019-08-22 16:34 ` [PATCH v2 6/8] powerpc/vdso32: use LOAD_REG_IMMEDIATE() Christophe Leroy
@ 2019-08-22 16:34 ` Christophe Leroy
2019-08-22 16:34 ` [PATCH v2 8/8] powerpc/vdso32: miscellaneous optimisations Christophe Leroy
7 siblings, 0 replies; 18+ messages in thread
From: Christophe Leroy @ 2019-08-22 16:34 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
clock_getres returns hrtimer_res for all clocks but coarse ones
for which it returns KTIME_LOW_RES.
return EINVAL for unknown clocks.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/kernel/asm-offsets.c | 3 +++
arch/powerpc/kernel/vdso32/gettimeofday.S | 19 +++++++++++--------
2 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index b6328a90cad7..dbfd3ddc85dc 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -417,7 +417,10 @@ int main(void)
DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
+ DEFINE(CLOCK_MAX, CLOCK_TAI);
DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
+ DEFINE(EINVAL, EINVAL);
+ DEFINE(KTIME_LOW_RES, KTIME_LOW_RES);
#ifdef CONFIG_BUG
DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index 0f87be0ebf7e..c65f41c612f7 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -199,17 +199,20 @@ V_FUNCTION_END(__kernel_clock_gettime)
V_FUNCTION_BEGIN(__kernel_clock_getres)
.cfi_startproc
/* Check for supported clock IDs */
- cmpwi cr0,r3,CLOCK_REALTIME
- cmpwi cr1,r3,CLOCK_MONOTONIC
- cror cr0*4+eq,cr0*4+eq,cr1*4+eq
- bne cr0,99f
+ cmplwi cr0, r3, CLOCK_MAX
+ cmpwi cr1, r3, CLOCK_REALTIME_COARSE
+ cmpwi cr7, r3, CLOCK_MONOTONIC_COARSE
+ bgt cr0, 99f
+ LOAD_REG_IMMEDIATE(r5, KTIME_LOW_RES)
+ beq cr1, 1f
+ beq cr7, 1f
mflr r12
.cfi_register lr,r12
get_datapage r3, r0
lwz r5, CLOCK_HRTIMER_RES(r3)
mtlr r12
- li r3,0
+1: li r3,0
cmpli cr0,r4,0
crclr cr0*4+so
beqlr
@@ -218,11 +221,11 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
blr
/*
- * syscall fallback
+ * invalid clock
*/
99:
- li r0,__NR_clock_getres
- sc
+ li r3, EINVAL
+ crset so
blr
.cfi_endproc
V_FUNCTION_END(__kernel_clock_getres)
--
2.13.3
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 8/8] powerpc/vdso32: miscellaneous optimisations
2019-08-22 16:34 [PATCH v2 0/8] powerpc/vdso32 enhancement and optimisation Christophe Leroy
` (6 preceding siblings ...)
2019-08-22 16:34 ` [PATCH v2 7/8] powerpc/vdso32: implement clock_getres entirely Christophe Leroy
@ 2019-08-22 16:34 ` Christophe Leroy
7 siblings, 0 replies; 18+ messages in thread
From: Christophe Leroy @ 2019-08-22 16:34 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
Various optimisations by inverting branches and removing
redundant instructions.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/kernel/vdso32/datapage.S | 3 +--
arch/powerpc/kernel/vdso32/getcpu.S | 6 +++---
arch/powerpc/kernel/vdso32/gettimeofday.S | 18 +++++++++---------
3 files changed, 13 insertions(+), 14 deletions(-)
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
index d480d2d4a3fe..436b88c455d1 100644
--- a/arch/powerpc/kernel/vdso32/datapage.S
+++ b/arch/powerpc/kernel/vdso32/datapage.S
@@ -31,11 +31,10 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
.cfi_startproc
mflr r12
.cfi_register lr,r12
- mr r4,r3
+ mr. r4,r3
get_datapage r3, r0
mtlr r12
addi r3,r3,CFG_SYSCALL_MAP32
- cmpli cr0,r4,0
beqlr
li r0,NR_syscalls
stw r0,0(r4)
diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S
index bde226ad904d..ac1faa8a2bfd 100644
--- a/arch/powerpc/kernel/vdso32/getcpu.S
+++ b/arch/powerpc/kernel/vdso32/getcpu.S
@@ -31,10 +31,10 @@ V_FUNCTION_BEGIN(__kernel_getcpu)
rlwinm r7,r5,16,31-15,31-0
beq cr0,1f
stw r6,0(r3)
-1: beq cr1,2f
- stw r7,0(r4)
-2: crclr cr0*4+so
+1: crclr cr0*4+so
li r3,0 /* always success */
+ beqlr cr1
+ stw r7,0(r4)
blr
.cfi_endproc
V_FUNCTION_END(__kernel_getcpu)
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index c65f41c612f7..47aa44ab8bbb 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -35,10 +35,9 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
mflr r12
.cfi_register lr,r12
- mr r10,r3 /* r10 saves tv */
+ mr. r10,r3 /* r10 saves tv */
mr r11,r4 /* r11 saves tz */
get_datapage r9, r0
- cmplwi r10,0 /* check if tv is NULL */
beq 3f
LOAD_REG_IMMEDIATE(r7, 1000000) /* load up USEC_PER_SEC */
bl __do_get_tspec@local /* get sec/usec from tb & kernel */
@@ -46,15 +45,16 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
stw r4,TVAL32_TV_USEC(r10)
3: cmplwi r11,0 /* check if tz is NULL */
- beq 1f
+ mtlr r12
+ crclr cr0*4+so
+ li r3,0
+ beqlr
+
lwz r4,CFG_TZ_MINUTEWEST(r9)/* fill tz */
lwz r5,CFG_TZ_DSTTIME(r9)
stw r4,TZONE_TZ_MINWEST(r11)
stw r5,TZONE_TZ_DSTTIME(r11)
-1: mtlr r12
- crclr cr0*4+so
- li r3,0
blr
.cfi_endproc
V_FUNCTION_END(__kernel_gettimeofday)
@@ -248,10 +248,10 @@ V_FUNCTION_BEGIN(__kernel_time)
lwz r3,STAMP_XTIME+TSPEC_TV_SEC(r9)
cmplwi r11,0 /* check if t is NULL */
- beq 2f
- stw r3,0(r11) /* store result at *t */
-2: mtlr r12
+ mtlr r12
crclr cr0*4+so
+ beqlr
+ stw r3,0(r11) /* store result at *t */
blr
.cfi_endproc
V_FUNCTION_END(__kernel_time)
--
2.13.3
^ permalink raw reply related [flat|nested] 18+ messages in thread