linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/8] powerpc/vdso32 enhancement and optimisation
@ 2019-08-22 16:34 Christophe Leroy
  2019-08-22 16:34 ` [PATCH v2 1/8] powerpc/32: Add VDSO version of getcpu Christophe Leroy
                   ` (7 more replies)
  0 siblings, 8 replies; 17+ messages in thread
From: Christophe Leroy @ 2019-08-22 16:34 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev

This series:
- adds getcpu()
- adds coarse clocks in clock_gettime
- fixes and adds all clocks in clock_getres
- optimises the retrieval of the datapage address
- optimises the cache functions

It puts together the three patches sent out earlier allthought they
were not presented as a series, hence the 'v2' tag for now.

v2:
- Used named labels in patch 2
- Added patch from Vincenzo to fix clock_getres() (patch 3)
- Removed unnecessary label in patch 4 as suggested by Segher
- Added patches 5 to 8

Christophe Leroy (8):
  powerpc/32: Add VDSO version of getcpu
  powerpc/vdso32: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE
  powerpc: Fix vDSO clock_getres()
  powerpc/vdso32: inline __get_datapage()
  powerpc/vdso32: Don't read cache line size from the datapage on PPC32.
  powerpc/vdso32: use LOAD_REG_IMMEDIATE()
  powerpc/vdso32: implement clock_getres entirely
  powerpc/vdso32: miscellaneous optimisations

 arch/powerpc/include/asm/vdso.h           |   2 +
 arch/powerpc/include/asm/vdso_datapage.h  |   6 +-
 arch/powerpc/kernel/asm-offsets.c         |   7 +-
 arch/powerpc/kernel/head_32.h             |  13 ++++
 arch/powerpc/kernel/head_booke.h          |  11 +++
 arch/powerpc/kernel/time.c                |   1 +
 arch/powerpc/kernel/vdso.c                |   5 --
 arch/powerpc/kernel/vdso32/Makefile       |   4 +-
 arch/powerpc/kernel/vdso32/cacheflush.S   |  33 ++++++--
 arch/powerpc/kernel/vdso32/datapage.S     |  32 ++------
 arch/powerpc/kernel/vdso32/datapage.h     |  11 +++
 arch/powerpc/kernel/vdso32/getcpu.S       |  13 +++-
 arch/powerpc/kernel/vdso32/gettimeofday.S | 125 +++++++++++++++++++++---------
 arch/powerpc/kernel/vdso32/vdso32.lds.S   |   2 -
 arch/powerpc/kernel/vdso64/gettimeofday.S |   7 +-
 15 files changed, 183 insertions(+), 89 deletions(-)
 create mode 100644 arch/powerpc/kernel/vdso32/datapage.h

-- 
2.13.3


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v2 1/8] powerpc/32: Add VDSO version of getcpu
  2019-08-22 16:34 [PATCH v2 0/8] powerpc/vdso32 enhancement and optimisation Christophe Leroy
@ 2019-08-22 16:34 ` Christophe Leroy
  2019-09-18  5:51   ` Michael Ellerman
  2019-08-22 16:34 ` [PATCH v2 2/8] powerpc/vdso32: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE Christophe Leroy
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 17+ messages in thread
From: Christophe Leroy @ 2019-08-22 16:34 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev

Commit 18ad51dd342a ("powerpc: Add VDSO version of getcpu") added
getcpu() for PPC64 only, by making use of a user readable general
purpose SPR.

PPC32 doesn't have any such SPR, a full system call can still be
avoided by implementing a fast system call which reads the CPU id
in the task struct and returns immediately without going back in
virtual mode.

Before the patch, vdsotest reported:
getcpu: syscall: 1572 nsec/call
getcpu:    libc: 1787 nsec/call
getcpu:    vdso: not tested

Now, vdsotest reports:
getcpu: syscall: 1582 nsec/call
getcpu:    libc: 667 nsec/call
getcpu:    vdso: 368 nsec/call

For non SMP, just return CPU id 0 from the VDSO directly.

PPC32 doesn't support CONFIG_NUMA so NUMA node is always 0.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>

---
v2: fixed build error in getcpu.S
---
 arch/powerpc/include/asm/vdso.h         |  2 ++
 arch/powerpc/kernel/head_32.h           | 13 +++++++++++++
 arch/powerpc/kernel/head_booke.h        | 11 +++++++++++
 arch/powerpc/kernel/vdso32/Makefile     |  4 +---
 arch/powerpc/kernel/vdso32/getcpu.S     |  7 +++++++
 arch/powerpc/kernel/vdso32/vdso32.lds.S |  2 --
 6 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h
index b5e1f8f8a05c..adb54782df5f 100644
--- a/arch/powerpc/include/asm/vdso.h
+++ b/arch/powerpc/include/asm/vdso.h
@@ -16,6 +16,8 @@
 /* Define if 64 bits VDSO has procedure descriptors */
 #undef VDS64_HAS_DESCRIPTORS
 
+#define NR_MAGIC_FAST_VDSO_SYSCALL	0x789a
+
 #ifndef __ASSEMBLY__
 
 /* Offsets relative to thread->vdso_base */
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 4a692553651f..a2e38b59785a 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -3,6 +3,8 @@
 #define __HEAD_32_H__
 
 #include <asm/ptrace.h>	/* for STACK_FRAME_REGS_MARKER */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
 
 /*
  * MSR_KERNEL is > 0x8000 on 4xx/Book-E since it include MSR_CE.
@@ -74,7 +76,13 @@
 .endm
 
 .macro SYSCALL_ENTRY trapno
+#ifdef CONFIG_SMP
+	cmplwi	cr0, r0, NR_MAGIC_FAST_VDSO_SYSCALL
+#endif
 	mfspr	r12,SPRN_SPRG_THREAD
+#ifdef CONFIG_SMP
+	beq-	1f
+#endif
 	mfcr	r10
 	lwz	r11,TASK_STACK-THREAD(r12)
 	mflr	r9
@@ -152,6 +160,11 @@
 	mtspr	SPRN_SRR0,r11
 	SYNC
 	RFI				/* jump to handler, enable MMU */
+#ifdef CONFIG_SMP
+1:
+	lwz	r5, TASK_CPU - THREAD(r12)
+	RFI
+#endif
 .endm
 
 /*
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 2ae635df9026..c534e87cac84 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -3,6 +3,8 @@
 #define __HEAD_BOOKE_H__
 
 #include <asm/ptrace.h>	/* for STACK_FRAME_REGS_MARKER */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_booke_hv_asm.h>
 
@@ -104,6 +106,10 @@ FTR_SECTION_ELSE
 #ifdef CONFIG_KVM_BOOKE_HV
 ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
 #endif
+#ifdef CONFIG_SMP
+	cmplwi	cr0, r0, NR_MAGIC_FAST_VDSO_SYSCALL
+	beq-	1f
+#endif
 	BOOKE_CLEAR_BTB(r11)
 	lwz	r11, TASK_STACK - THREAD(r10)
 	rlwinm	r12,r12,0,4,2	/* Clear SO bit in CR */
@@ -176,6 +182,11 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
 	mtspr	SPRN_SRR0,r11
 	SYNC
 	RFI				/* jump to handler, enable MMU */
+#ifdef CONFIG_SMP
+1:
+	lwz	r5, TASK_CPU - THREAD(r10)
+	RFI
+#endif
 .endm
 
 /* To handle the additional exception priority levels on 40x and Book-E
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index 06f54d947057..e147bbdc12cd 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -2,9 +2,7 @@
 
 # List of files in the vdso, has to be asm only for now
 
-obj-vdso32-$(CONFIG_PPC64) = getcpu.o
-obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o \
-		$(obj-vdso32-y)
+obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o
 
 # Build rules
 
diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S
index 63e914539e1a..bde226ad904d 100644
--- a/arch/powerpc/kernel/vdso32/getcpu.S
+++ b/arch/powerpc/kernel/vdso32/getcpu.S
@@ -17,7 +17,14 @@
  */
 V_FUNCTION_BEGIN(__kernel_getcpu)
   .cfi_startproc
+#if defined(CONFIG_PPC64)
 	mfspr	r5,SPRN_SPRG_VDSO_READ
+#elif defined(CONFIG_SMP)
+	li	r0, NR_MAGIC_FAST_VDSO_SYSCALL
+	sc	/* returns cpuid in r5, clobbers cr0 and r10-r13 */
+#else
+	li	r5, 0
+#endif
 	cmpwi	cr0,r3,0
 	cmpwi	cr1,r4,0
 	clrlwi  r6,r5,16
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
index 099a6db14e67..663880671e20 100644
--- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
+++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
@@ -152,9 +152,7 @@ VERSION
 		__kernel_sync_dicache_p5;
 		__kernel_sigtramp32;
 		__kernel_sigtramp_rt32;
-#ifdef CONFIG_PPC64
 		__kernel_getcpu;
-#endif
 		__kernel_time;
 
 	local: *;
-- 
2.13.3


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v2 2/8] powerpc/vdso32: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE
  2019-08-22 16:34 [PATCH v2 0/8] powerpc/vdso32 enhancement and optimisation Christophe Leroy
  2019-08-22 16:34 ` [PATCH v2 1/8] powerpc/32: Add VDSO version of getcpu Christophe Leroy
@ 2019-08-22 16:34 ` Christophe Leroy
  2019-08-22 16:34 ` [PATCH v2 3/8] powerpc: Fix vDSO clock_getres() Christophe Leroy
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 17+ messages in thread
From: Christophe Leroy @ 2019-08-22 16:34 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev

This is copied and adapted from commit 5c929885f1bb ("powerpc/vdso64:
Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE")
from Santosh Sivaraj <santosh@fossix.org>

Benchmark from vdsotest-all:
clock-gettime-realtime: syscall: 3601 nsec/call
clock-gettime-realtime:    libc: 1072 nsec/call
clock-gettime-realtime:    vdso: 931 nsec/call
clock-gettime-monotonic: syscall: 4034 nsec/call
clock-gettime-monotonic:    libc: 1213 nsec/call
clock-gettime-monotonic:    vdso: 1076 nsec/call
clock-gettime-realtime-coarse: syscall: 2722 nsec/call
clock-gettime-realtime-coarse:    libc: 805 nsec/call
clock-gettime-realtime-coarse:    vdso: 668 nsec/call
clock-gettime-monotonic-coarse: syscall: 2949 nsec/call
clock-gettime-monotonic-coarse:    libc: 882 nsec/call
clock-gettime-monotonic-coarse:    vdso: 745 nsec/call

Additional test passed with:
	vdsotest -d 30 clock-gettime-monotonic-coarse verify

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Santosh Sivaraj <santosh@fossix.org>
Link: https://github.com/linuxppc/issues/issues/41
---
 arch/powerpc/kernel/vdso32/gettimeofday.S | 64 +++++++++++++++++++++++++++----
 1 file changed, 57 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index becd9f8767ed..decd263c16e0 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -71,7 +71,13 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
 	cmpli	cr0,r3,CLOCK_REALTIME
 	cmpli	cr1,r3,CLOCK_MONOTONIC
 	cror	cr0*4+eq,cr0*4+eq,cr1*4+eq
-	bne	cr0,99f
+
+	cmpli	cr5,r3,CLOCK_REALTIME_COARSE
+	cmpli	cr6,r3,CLOCK_MONOTONIC_COARSE
+	cror	cr5*4+eq,cr5*4+eq,cr6*4+eq
+
+	cror	cr0*4+eq,cr0*4+eq,cr5*4+eq
+	bne	cr0, .Lgettime_fallback
 
 	mflr	r12			/* r12 saves lr */
   .cfi_register lr,r12
@@ -80,8 +86,10 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
 	mr	r9,r3			/* datapage ptr in r9 */
 	lis	r7,NSEC_PER_SEC@h	/* want nanoseconds */
 	ori	r7,r7,NSEC_PER_SEC@l
-50:	bl	__do_get_tspec@local	/* get sec/nsec from tb & kernel */
-	bne	cr1,80f			/* not monotonic -> all done */
+	beq	cr5, .Lcoarse_clocks
+.Lprecise_clocks:
+	bl	__do_get_tspec@local	/* get sec/nsec from tb & kernel */
+	bne	cr1, .Lfinish		/* not monotonic -> all done */
 
 	/*
 	 * CLOCK_MONOTONIC
@@ -105,12 +113,53 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
 	add	r9,r9,r0
 	lwz	r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
         cmpl    cr0,r8,r0		/* check if updated */
-	bne-	50b
+	bne-	.Lprecise_clocks
+	b	.Lfinish_monotonic
+
+	/*
+	 * For coarse clocks we get data directly from the vdso data page, so
+	 * we don't need to call __do_get_tspec, but we still need to do the
+	 * counter trick.
+	 */
+.Lcoarse_clocks:
+	lwz	r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
+	andi.	r0,r8,1                 /* pending update ? loop */
+	bne-	.Lcoarse_clocks
+	add	r9,r9,r0		/* r0 is already 0 */
+
+	/*
+	 * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE
+	 * too
+	 */
+	lwz	r3,STAMP_XTIME+TSPC32_TV_SEC(r9)
+	lwz	r4,STAMP_XTIME+TSPC32_TV_NSEC(r9)
+	bne	cr6,1f
+
+	/* CLOCK_MONOTONIC_COARSE */
+	lwz	r5,(WTOM_CLOCK_SEC+LOPART)(r9)
+	lwz	r6,WTOM_CLOCK_NSEC(r9)
+
+	/* check if counter has updated */
+	or	r0,r6,r5
+1:	or	r0,r0,r3
+	or	r0,r0,r4
+	xor	r0,r0,r0
+	add	r3,r3,r0
+	lwz	r0,CFG_TB_UPDATE_COUNT+LOPART(r9)
+	cmpl	cr0,r0,r8               /* check if updated */
+	bne-	.Lcoarse_clocks
+
+	/* Counter has not updated, so continue calculating proper values for
+	 * sec and nsec if monotonic coarse, or just return with the proper
+	 * values for realtime.
+	 */
+	bne	cr6, .Lfinish
 
 	/* Calculate and store result. Note that this mimics the C code,
 	 * which may cause funny results if nsec goes negative... is that
 	 * possible at all ?
 	 */
+.Lfinish_monotonic:
 	add	r3,r3,r5
 	add	r4,r4,r6
 	cmpw	cr0,r4,r7
@@ -118,11 +167,12 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
 	blt	1f
 	subf	r4,r7,r4
 	addi	r3,r3,1
-1:	bge	cr1,80f
+1:	bge	cr1, .Lfinish
 	addi	r3,r3,-1
 	add	r4,r4,r7
 
-80:	stw	r3,TSPC32_TV_SEC(r11)
+.Lfinish:
+	stw	r3,TSPC32_TV_SEC(r11)
 	stw	r4,TSPC32_TV_NSEC(r11)
 
 	mtlr	r12
@@ -133,7 +183,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
 	/*
 	 * syscall fallback
 	 */
-99:
+.Lgettime_fallback:
 	li	r0,__NR_clock_gettime
   .cfi_restore lr
 	sc
-- 
2.13.3


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v2 3/8] powerpc: Fix vDSO clock_getres()
  2019-08-22 16:34 [PATCH v2 0/8] powerpc/vdso32 enhancement and optimisation Christophe Leroy
  2019-08-22 16:34 ` [PATCH v2 1/8] powerpc/32: Add VDSO version of getcpu Christophe Leroy
  2019-08-22 16:34 ` [PATCH v2 2/8] powerpc/vdso32: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE Christophe Leroy
@ 2019-08-22 16:34 ` Christophe Leroy
  2019-08-22 16:34 ` [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage() Christophe Leroy
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 17+ messages in thread
From: Christophe Leroy @ 2019-08-22 16:34 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev

From: Vincenzo Frascino <vincenzo.frascino@arm.com>

clock_getres in the vDSO library has to preserve the same behaviour
of posix_get_hrtimer_res().

In particular, posix_get_hrtimer_res() does:
    sec = 0;
    ns = hrtimer_resolution;
and hrtimer_resolution depends on the enablement of the high
resolution timers that can happen either at compile or at run time.

Fix the powerpc vdso implementation of clock_getres keeping a copy of
hrtimer_resolution in vdso data and using that directly.

Fixes: a7f290dad32e ("[PATCH] powerpc: Merge vdso's and add vdso support
to 32 bits kernel")
Cc: stable@vger.kernel.org
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Reviewed-by: Christophe Leroy <christophe.leroy@c-s.fr>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
[chleroy: changed CLOCK_REALTIME_RES to CLOCK_HRTIMER_RES]
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/include/asm/vdso_datapage.h  | 2 ++
 arch/powerpc/kernel/asm-offsets.c         | 2 +-
 arch/powerpc/kernel/time.c                | 1 +
 arch/powerpc/kernel/vdso32/gettimeofday.S | 7 +++++--
 arch/powerpc/kernel/vdso64/gettimeofday.S | 7 +++++--
 5 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h
index c61d59ed3b45..2ccb938d8544 100644
--- a/arch/powerpc/include/asm/vdso_datapage.h
+++ b/arch/powerpc/include/asm/vdso_datapage.h
@@ -82,6 +82,7 @@ struct vdso_data {
 	__s32 wtom_clock_nsec;			/* Wall to monotonic clock nsec */
 	__s64 wtom_clock_sec;			/* Wall to monotonic clock sec */
 	struct timespec stamp_xtime;		/* xtime as at tb_orig_stamp */
+	__u32 hrtimer_res;			/* hrtimer resolution */
    	__u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls  */
    	__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
 };
@@ -103,6 +104,7 @@ struct vdso_data {
 	__s32 wtom_clock_nsec;
 	struct timespec stamp_xtime;	/* xtime as at tb_orig_stamp */
 	__u32 stamp_sec_fraction;	/* fractional seconds of stamp_xtime */
+	__u32 hrtimer_res;		/* hrtimer resolution */
    	__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
 	__u32 dcache_block_size;	/* L1 d-cache block size     */
 	__u32 icache_block_size;	/* L1 i-cache block size     */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 4ccb6b3a7fbd..6279053967fd 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -387,6 +387,7 @@ int main(void)
 	OFFSET(WTOM_CLOCK_NSEC, vdso_data, wtom_clock_nsec);
 	OFFSET(STAMP_XTIME, vdso_data, stamp_xtime);
 	OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction);
+	OFFSET(CLOCK_HRTIMER_RES, vdso_data, hrtimer_res);
 	OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size);
 	OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size);
 	OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size);
@@ -417,7 +418,6 @@ int main(void)
 	DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
 	DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
 	DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
-	DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
 
 #ifdef CONFIG_BUG
 	DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 694522308cd5..619447b1b797 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -959,6 +959,7 @@ void update_vsyscall(struct timekeeper *tk)
 	vdso_data->wtom_clock_nsec = tk->wall_to_monotonic.tv_nsec;
 	vdso_data->stamp_xtime = xt;
 	vdso_data->stamp_sec_fraction = frac_sec;
+	vdso_data->hrtimer_res = hrtimer_resolution;
 	smp_wmb();
 	++(vdso_data->tb_update_count);
 }
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index decd263c16e0..355b537d327a 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -206,12 +206,15 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
 	cror	cr0*4+eq,cr0*4+eq,cr1*4+eq
 	bne	cr0,99f
 
+	mflr	r12
+  .cfi_register lr,r12
+	bl	__get_datapage@local	/* get data page */
+	lwz	r5, CLOCK_HRTIMER_RES(r3)
+	mtlr	r12
 	li	r3,0
 	cmpli	cr0,r4,0
 	crclr	cr0*4+so
 	beqlr
-	lis	r5,CLOCK_REALTIME_RES@h
-	ori	r5,r5,CLOCK_REALTIME_RES@l
 	stw	r3,TSPC32_TV_SEC(r4)
 	stw	r5,TSPC32_TV_NSEC(r4)
 	blr
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index 07bfe33fe874..81757f06bbd7 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -186,12 +186,15 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
 	cror	cr0*4+eq,cr0*4+eq,cr1*4+eq
 	bne	cr0,99f
 
+	mflr	r12
+  .cfi_register lr,r12
+	bl	V_LOCAL_FUNC(__get_datapage)
+	lwz	r5, CLOCK_HRTIMER_RES(r3)
+	mtlr	r12
 	li	r3,0
 	cmpldi	cr0,r4,0
 	crclr	cr0*4+so
 	beqlr
-	lis	r5,CLOCK_REALTIME_RES@h
-	ori	r5,r5,CLOCK_REALTIME_RES@l
 	std	r3,TSPC64_TV_SEC(r4)
 	std	r5,TSPC64_TV_NSEC(r4)
 	blr
-- 
2.13.3


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage()
  2019-08-22 16:34 [PATCH v2 0/8] powerpc/vdso32 enhancement and optimisation Christophe Leroy
                   ` (2 preceding siblings ...)
  2019-08-22 16:34 ` [PATCH v2 3/8] powerpc: Fix vDSO clock_getres() Christophe Leroy
@ 2019-08-22 16:34 ` Christophe Leroy
  2019-08-26  5:44   ` Santosh Sivaraj
  2019-08-22 16:34 ` [PATCH v2 5/8] powerpc/vdso32: Don't read cache line size from the datapage on PPC32 Christophe Leroy
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 17+ messages in thread
From: Christophe Leroy @ 2019-08-22 16:34 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev

__get_datapage() is only a few instructions to retrieve the
address of the page where the kernel stores data to the VDSO.

By inlining this function into its users, a bl/blr pair and
a mflr/mtlr pair is avoided, plus a few reg moves.

The improvement is noticeable (about 55 nsec/call on an 8xx)

vdsotest before the patch:
gettimeofday:    vdso: 731 nsec/call
clock-gettime-realtime-coarse:    vdso: 668 nsec/call
clock-gettime-monotonic-coarse:    vdso: 745 nsec/call

vdsotest after the patch:
gettimeofday:    vdso: 677 nsec/call
clock-gettime-realtime-coarse:    vdso: 613 nsec/call
clock-gettime-monotonic-coarse:    vdso: 690 nsec/call

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/kernel/vdso32/cacheflush.S   | 10 +++++-----
 arch/powerpc/kernel/vdso32/datapage.S     | 29 ++++-------------------------
 arch/powerpc/kernel/vdso32/datapage.h     | 11 +++++++++++
 arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++-------
 4 files changed, 26 insertions(+), 37 deletions(-)
 create mode 100644 arch/powerpc/kernel/vdso32/datapage.h

diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
index 7f882e7b9f43..e9453837e4ee 100644
--- a/arch/powerpc/kernel/vdso32/cacheflush.S
+++ b/arch/powerpc/kernel/vdso32/cacheflush.S
@@ -10,6 +10,8 @@
 #include <asm/vdso.h>
 #include <asm/asm-offsets.h>
 
+#include "datapage.h"
+
 	.text
 
 /*
@@ -24,14 +26,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
   .cfi_startproc
 	mflr	r12
   .cfi_register lr,r12
-	mr	r11,r3
-	bl	__get_datapage@local
+	get_datapage	r10, r0
 	mtlr	r12
-	mr	r10,r3
 
 	lwz	r7,CFG_DCACHE_BLOCKSZ(r10)
 	addi	r5,r7,-1
-	andc	r6,r11,r5		/* round low to line bdy */
+	andc	r6,r3,r5		/* round low to line bdy */
 	subf	r8,r6,r4		/* compute length */
 	add	r8,r8,r5		/* ensure we get enough */
 	lwz	r9,CFG_DCACHE_LOGBLOCKSZ(r10)
@@ -48,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
 
 	lwz	r7,CFG_ICACHE_BLOCKSZ(r10)
 	addi	r5,r7,-1
-	andc	r6,r11,r5		/* round low to line bdy */
+	andc	r6,r3,r5		/* round low to line bdy */
 	subf	r8,r6,r4		/* compute length */
 	add	r8,r8,r5
 	lwz	r9,CFG_ICACHE_LOGBLOCKSZ(r10)
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
index 6984125b9fc0..d480d2d4a3fe 100644
--- a/arch/powerpc/kernel/vdso32/datapage.S
+++ b/arch/powerpc/kernel/vdso32/datapage.S
@@ -11,34 +11,13 @@
 #include <asm/unistd.h>
 #include <asm/vdso.h>
 
+#include "datapage.h"
+
 	.text
 	.global	__kernel_datapage_offset;
 __kernel_datapage_offset:
 	.long	0
 
-V_FUNCTION_BEGIN(__get_datapage)
-  .cfi_startproc
-	/* We don't want that exposed or overridable as we want other objects
-	 * to be able to bl directly to here
-	 */
-	.protected __get_datapage
-	.hidden __get_datapage
-
-	mflr	r0
-  .cfi_register lr,r0
-
-	bcl	20,31,data_page_branch
-data_page_branch:
-	mflr	r3
-	mtlr	r0
-	addi	r3, r3, __kernel_datapage_offset-data_page_branch
-	lwz	r0,0(r3)
-  .cfi_restore lr
-	add	r3,r0,r3
-	blr
-  .cfi_endproc
-V_FUNCTION_END(__get_datapage)
-
 /*
  * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
  *
@@ -53,7 +32,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
 	mflr	r12
   .cfi_register lr,r12
 	mr	r4,r3
-	bl	__get_datapage@local
+	get_datapage	r3, r0
 	mtlr	r12
 	addi	r3,r3,CFG_SYSCALL_MAP32
 	cmpli	cr0,r4,0
@@ -74,7 +53,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
   .cfi_startproc
 	mflr	r12
   .cfi_register lr,r12
-	bl	__get_datapage@local
+	get_datapage	r3, r0
 	lwz	r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
 	lwz	r3,CFG_TB_TICKS_PER_SEC(r3)
 	mtlr	r12
diff --git a/arch/powerpc/kernel/vdso32/datapage.h b/arch/powerpc/kernel/vdso32/datapage.h
new file mode 100644
index 000000000000..74f4f57c2da8
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/datapage.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+.macro get_datapage ptr, tmp
+	bcl	20,31,.+4
+	mflr	\ptr
+	addi	\ptr, \ptr, __kernel_datapage_offset - (.-4)
+	lwz	\tmp, 0(\ptr)
+	add	\ptr, \tmp, \ptr
+.endm
+
+
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index 355b537d327a..3e55cba19f44 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -12,6 +12,8 @@
 #include <asm/asm-offsets.h>
 #include <asm/unistd.h>
 
+#include "datapage.h"
+
 /* Offset for the low 32-bit part of a field of long type */
 #ifdef CONFIG_PPC64
 #define LOPART	4
@@ -35,8 +37,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
 
 	mr	r10,r3			/* r10 saves tv */
 	mr	r11,r4			/* r11 saves tz */
-	bl	__get_datapage@local	/* get data page */
-	mr	r9, r3			/* datapage ptr in r9 */
+	get_datapage	r9, r0
 	cmplwi	r10,0			/* check if tv is NULL */
 	beq	3f
 	lis	r7,1000000@ha		/* load up USEC_PER_SEC */
@@ -82,8 +83,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
 	mflr	r12			/* r12 saves lr */
   .cfi_register lr,r12
 	mr	r11,r4			/* r11 saves tp */
-	bl	__get_datapage@local	/* get data page */
-	mr	r9,r3			/* datapage ptr in r9 */
+	get_datapage	r9, r0
 	lis	r7,NSEC_PER_SEC@h	/* want nanoseconds */
 	ori	r7,r7,NSEC_PER_SEC@l
 	beq	cr5, .Lcoarse_clocks
@@ -208,7 +208,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
 
 	mflr	r12
   .cfi_register lr,r12
-	bl	__get_datapage@local	/* get data page */
+	get_datapage	r3, r0
 	lwz	r5, CLOCK_HRTIMER_RES(r3)
 	mtlr	r12
 	li	r3,0
@@ -242,8 +242,7 @@ V_FUNCTION_BEGIN(__kernel_time)
   .cfi_register lr,r12
 
 	mr	r11,r3			/* r11 holds t */
-	bl	__get_datapage@local
-	mr	r9, r3			/* datapage ptr in r9 */
+	get_datapage	r9, r0
 
 	lwz	r3,STAMP_XTIME+TSPEC_TV_SEC(r9)
 
-- 
2.13.3


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v2 5/8] powerpc/vdso32: Don't read cache line size from the datapage on PPC32.
  2019-08-22 16:34 [PATCH v2 0/8] powerpc/vdso32 enhancement and optimisation Christophe Leroy
                   ` (3 preceding siblings ...)
  2019-08-22 16:34 ` [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage() Christophe Leroy
@ 2019-08-22 16:34 ` Christophe Leroy
  2019-08-22 16:34 ` [PATCH v2 6/8] powerpc/vdso32: use LOAD_REG_IMMEDIATE() Christophe Leroy
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 17+ messages in thread
From: Christophe Leroy @ 2019-08-22 16:34 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev

On PPC32, the cache lines have a fixed size known at build time.

Don't read it from the datapage.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/include/asm/vdso_datapage.h |  4 ----
 arch/powerpc/kernel/asm-offsets.c        |  2 +-
 arch/powerpc/kernel/vdso.c               |  5 -----
 arch/powerpc/kernel/vdso32/cacheflush.S  | 23 +++++++++++++++++++++++
 4 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h
index 2ccb938d8544..869df2f43400 100644
--- a/arch/powerpc/include/asm/vdso_datapage.h
+++ b/arch/powerpc/include/asm/vdso_datapage.h
@@ -106,10 +106,6 @@ struct vdso_data {
 	__u32 stamp_sec_fraction;	/* fractional seconds of stamp_xtime */
 	__u32 hrtimer_res;		/* hrtimer resolution */
    	__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
-	__u32 dcache_block_size;	/* L1 d-cache block size     */
-	__u32 icache_block_size;	/* L1 i-cache block size     */
-	__u32 dcache_log_block_size;	/* L1 d-cache log block size */
-	__u32 icache_log_block_size;	/* L1 i-cache log block size */
 };
 
 #endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 6279053967fd..b6328a90cad7 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -388,11 +388,11 @@ int main(void)
 	OFFSET(STAMP_XTIME, vdso_data, stamp_xtime);
 	OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction);
 	OFFSET(CLOCK_HRTIMER_RES, vdso_data, hrtimer_res);
+#ifdef CONFIG_PPC64
 	OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size);
 	OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size);
 	OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size);
 	OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_data, dcache_log_block_size);
-#ifdef CONFIG_PPC64
 	OFFSET(CFG_SYSCALL_MAP64, vdso_data, syscall_map_64);
 	OFFSET(TVAL64_TV_SEC, timeval, tv_sec);
 	OFFSET(TVAL64_TV_USEC, timeval, tv_usec);
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index d60598113a9f..87d43e43e2e7 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -750,11 +750,6 @@ static int __init vdso_init(void)
 	 */
 	vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT;
 	DBG("vdso64_kbase: %p, 0x%x pages\n", vdso64_kbase, vdso64_pages);
-#else
-	vdso_data->dcache_block_size = L1_CACHE_BYTES;
-	vdso_data->dcache_log_block_size = L1_CACHE_SHIFT;
-	vdso_data->icache_block_size = L1_CACHE_BYTES;
-	vdso_data->icache_log_block_size = L1_CACHE_SHIFT;
 #endif /* CONFIG_PPC64 */
 
 
diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
index e9453837e4ee..b9340a89984e 100644
--- a/arch/powerpc/kernel/vdso32/cacheflush.S
+++ b/arch/powerpc/kernel/vdso32/cacheflush.S
@@ -9,6 +9,7 @@
 #include <asm/ppc_asm.h>
 #include <asm/vdso.h>
 #include <asm/asm-offsets.h>
+#include <asm/cache.h>
 
 #include "datapage.h"
 
@@ -24,28 +25,44 @@
  */
 V_FUNCTION_BEGIN(__kernel_sync_dicache)
   .cfi_startproc
+#ifdef CONFIG_PPC64
 	mflr	r12
   .cfi_register lr,r12
 	get_datapage	r10, r0
 	mtlr	r12
+#endif
 
+#ifdef CONFIG_PPC64
 	lwz	r7,CFG_DCACHE_BLOCKSZ(r10)
 	addi	r5,r7,-1
+#else
+	li	r5, L1_CACHE_BYTES - 1
+#endif
 	andc	r6,r3,r5		/* round low to line bdy */
 	subf	r8,r6,r4		/* compute length */
 	add	r8,r8,r5		/* ensure we get enough */
+#ifdef CONFIG_PPC64
 	lwz	r9,CFG_DCACHE_LOGBLOCKSZ(r10)
 	srw.	r8,r8,r9		/* compute line count */
+#else
+	srwi.	r8, r8, L1_CACHE_SHIFT
+	mr	r7, r6
+#endif
 	crclr	cr0*4+so
 	beqlr				/* nothing to do? */
 	mtctr	r8
 1:	dcbst	0,r6
+#ifdef CONFIG_PPC64
 	add	r6,r6,r7
+#else
+	addi	r6, r6, L1_CACHE_BYTES
+#endif
 	bdnz	1b
 	sync
 
 /* Now invalidate the instruction cache */
 
+#ifdef CONFIG_PPC64
 	lwz	r7,CFG_ICACHE_BLOCKSZ(r10)
 	addi	r5,r7,-1
 	andc	r6,r3,r5		/* round low to line bdy */
@@ -55,9 +72,15 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
 	srw.	r8,r8,r9		/* compute line count */
 	crclr	cr0*4+so
 	beqlr				/* nothing to do? */
+#endif
 	mtctr	r8
+#ifdef CONFIG_PPC64
 2:	icbi	0,r6
 	add	r6,r6,r7
+#else
+2:	icbi	0, r7
+	addi	r7, r7, L1_CACHE_BYTES
+#endif
 	bdnz	2b
 	isync
 	li	r3,0
-- 
2.13.3


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v2 6/8] powerpc/vdso32: use LOAD_REG_IMMEDIATE()
  2019-08-22 16:34 [PATCH v2 0/8] powerpc/vdso32 enhancement and optimisation Christophe Leroy
                   ` (4 preceding siblings ...)
  2019-08-22 16:34 ` [PATCH v2 5/8] powerpc/vdso32: Don't read cache line size from the datapage on PPC32 Christophe Leroy
@ 2019-08-22 16:34 ` Christophe Leroy
  2019-08-22 16:34 ` [PATCH v2 7/8] powerpc/vdso32: implement clock_getres entirely Christophe Leroy
  2019-08-22 16:34 ` [PATCH v2 8/8] powerpc/vdso32: miscellaneous optimisations Christophe Leroy
  7 siblings, 0 replies; 17+ messages in thread
From: Christophe Leroy @ 2019-08-22 16:34 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev

Use LOAD_REG_IMMEDIATE() to load registers with immediate value.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/kernel/vdso32/gettimeofday.S | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index 3e55cba19f44..0f87be0ebf7e 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -40,8 +40,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
 	get_datapage	r9, r0
 	cmplwi	r10,0			/* check if tv is NULL */
 	beq	3f
-	lis	r7,1000000@ha		/* load up USEC_PER_SEC */
-	addi	r7,r7,1000000@l		/* so we get microseconds in r4 */
+	LOAD_REG_IMMEDIATE(r7, 1000000)	/* load up USEC_PER_SEC */
 	bl	__do_get_tspec@local	/* get sec/usec from tb & kernel */
 	stw	r3,TVAL32_TV_SEC(r10)
 	stw	r4,TVAL32_TV_USEC(r10)
@@ -84,8 +83,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
   .cfi_register lr,r12
 	mr	r11,r4			/* r11 saves tp */
 	get_datapage	r9, r0
-	lis	r7,NSEC_PER_SEC@h	/* want nanoseconds */
-	ori	r7,r7,NSEC_PER_SEC@l
+	LOAD_REG_IMMEDIATE(r7, NSEC_PER_SEC)	/* load up NSEC_PER_SEC */
 	beq	cr5, .Lcoarse_clocks
 .Lprecise_clocks:
 	bl	__do_get_tspec@local	/* get sec/nsec from tb & kernel */
-- 
2.13.3


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v2 7/8] powerpc/vdso32: implement clock_getres entirely
  2019-08-22 16:34 [PATCH v2 0/8] powerpc/vdso32 enhancement and optimisation Christophe Leroy
                   ` (5 preceding siblings ...)
  2019-08-22 16:34 ` [PATCH v2 6/8] powerpc/vdso32: use LOAD_REG_IMMEDIATE() Christophe Leroy
@ 2019-08-22 16:34 ` Christophe Leroy
  2019-08-22 16:34 ` [PATCH v2 8/8] powerpc/vdso32: miscellaneous optimisations Christophe Leroy
  7 siblings, 0 replies; 17+ messages in thread
From: Christophe Leroy @ 2019-08-22 16:34 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev

clock_getres returns hrtimer_res for all clocks but coarse ones
for which it returns KTIME_LOW_RES.

return EINVAL for unknown clocks.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/kernel/asm-offsets.c         |  3 +++
 arch/powerpc/kernel/vdso32/gettimeofday.S | 19 +++++++++++--------
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index b6328a90cad7..dbfd3ddc85dc 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -417,7 +417,10 @@ int main(void)
 	DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
 	DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
 	DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
+	DEFINE(CLOCK_MAX, CLOCK_TAI);
 	DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
+	DEFINE(EINVAL, EINVAL);
+	DEFINE(KTIME_LOW_RES, KTIME_LOW_RES);
 
 #ifdef CONFIG_BUG
 	DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index 0f87be0ebf7e..c65f41c612f7 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -199,17 +199,20 @@ V_FUNCTION_END(__kernel_clock_gettime)
 V_FUNCTION_BEGIN(__kernel_clock_getres)
   .cfi_startproc
 	/* Check for supported clock IDs */
-	cmpwi	cr0,r3,CLOCK_REALTIME
-	cmpwi	cr1,r3,CLOCK_MONOTONIC
-	cror	cr0*4+eq,cr0*4+eq,cr1*4+eq
-	bne	cr0,99f
+	cmplwi	cr0, r3, CLOCK_MAX
+	cmpwi	cr1, r3, CLOCK_REALTIME_COARSE
+	cmpwi	cr7, r3, CLOCK_MONOTONIC_COARSE
+	bgt	cr0, 99f
+	LOAD_REG_IMMEDIATE(r5, KTIME_LOW_RES)
+	beq	cr1, 1f
+	beq	cr7, 1f
 
 	mflr	r12
   .cfi_register lr,r12
 	get_datapage	r3, r0
 	lwz	r5, CLOCK_HRTIMER_RES(r3)
 	mtlr	r12
-	li	r3,0
+1:	li	r3,0
 	cmpli	cr0,r4,0
 	crclr	cr0*4+so
 	beqlr
@@ -218,11 +221,11 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
 	blr
 
 	/*
-	 * syscall fallback
+	 * invalid clock
 	 */
 99:
-	li	r0,__NR_clock_getres
-	sc
+	li	r3, EINVAL
+	crset	so
 	blr
   .cfi_endproc
 V_FUNCTION_END(__kernel_clock_getres)
-- 
2.13.3


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v2 8/8] powerpc/vdso32: miscellaneous optimisations
  2019-08-22 16:34 [PATCH v2 0/8] powerpc/vdso32 enhancement and optimisation Christophe Leroy
                   ` (6 preceding siblings ...)
  2019-08-22 16:34 ` [PATCH v2 7/8] powerpc/vdso32: implement clock_getres entirely Christophe Leroy
@ 2019-08-22 16:34 ` Christophe Leroy
  7 siblings, 0 replies; 17+ messages in thread
From: Christophe Leroy @ 2019-08-22 16:34 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev

Various optimisations by inverting branches and removing
redundant instructions.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/kernel/vdso32/datapage.S     |  3 +--
 arch/powerpc/kernel/vdso32/getcpu.S       |  6 +++---
 arch/powerpc/kernel/vdso32/gettimeofday.S | 18 +++++++++---------
 3 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
index d480d2d4a3fe..436b88c455d1 100644
--- a/arch/powerpc/kernel/vdso32/datapage.S
+++ b/arch/powerpc/kernel/vdso32/datapage.S
@@ -31,11 +31,10 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
   .cfi_startproc
 	mflr	r12
   .cfi_register lr,r12
-	mr	r4,r3
+	mr.	r4,r3
 	get_datapage	r3, r0
 	mtlr	r12
 	addi	r3,r3,CFG_SYSCALL_MAP32
-	cmpli	cr0,r4,0
 	beqlr
 	li	r0,NR_syscalls
 	stw	r0,0(r4)
diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S
index bde226ad904d..ac1faa8a2bfd 100644
--- a/arch/powerpc/kernel/vdso32/getcpu.S
+++ b/arch/powerpc/kernel/vdso32/getcpu.S
@@ -31,10 +31,10 @@ V_FUNCTION_BEGIN(__kernel_getcpu)
 	rlwinm  r7,r5,16,31-15,31-0
 	beq	cr0,1f
 	stw	r6,0(r3)
-1:	beq	cr1,2f
-	stw	r7,0(r4)
-2:	crclr	cr0*4+so
+1:	crclr	cr0*4+so
 	li	r3,0			/* always success */
+	beqlr	cr1
+	stw	r7,0(r4)
 	blr
   .cfi_endproc
 V_FUNCTION_END(__kernel_getcpu)
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index c65f41c612f7..47aa44ab8bbb 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -35,10 +35,9 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
 	mflr	r12
   .cfi_register lr,r12
 
-	mr	r10,r3			/* r10 saves tv */
+	mr.	r10,r3			/* r10 saves tv */
 	mr	r11,r4			/* r11 saves tz */
 	get_datapage	r9, r0
-	cmplwi	r10,0			/* check if tv is NULL */
 	beq	3f
 	LOAD_REG_IMMEDIATE(r7, 1000000)	/* load up USEC_PER_SEC */
 	bl	__do_get_tspec@local	/* get sec/usec from tb & kernel */
@@ -46,15 +45,16 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
 	stw	r4,TVAL32_TV_USEC(r10)
 
 3:	cmplwi	r11,0			/* check if tz is NULL */
-	beq	1f
+	mtlr	r12
+	crclr	cr0*4+so
+	li	r3,0
+	beqlr
+
 	lwz	r4,CFG_TZ_MINUTEWEST(r9)/* fill tz */
 	lwz	r5,CFG_TZ_DSTTIME(r9)
 	stw	r4,TZONE_TZ_MINWEST(r11)
 	stw	r5,TZONE_TZ_DSTTIME(r11)
 
-1:	mtlr	r12
-	crclr	cr0*4+so
-	li	r3,0
 	blr
   .cfi_endproc
 V_FUNCTION_END(__kernel_gettimeofday)
@@ -248,10 +248,10 @@ V_FUNCTION_BEGIN(__kernel_time)
 	lwz	r3,STAMP_XTIME+TSPEC_TV_SEC(r9)
 
 	cmplwi	r11,0			/* check if t is NULL */
-	beq	2f
-	stw	r3,0(r11)		/* store result at *t */
-2:	mtlr	r12
+	mtlr	r12
 	crclr	cr0*4+so
+	beqlr
+	stw	r3,0(r11)		/* store result at *t */
 	blr
   .cfi_endproc
 V_FUNCTION_END(__kernel_time)
-- 
2.13.3


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage()
  2019-08-22 16:34 ` [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage() Christophe Leroy
@ 2019-08-26  5:44   ` Santosh Sivaraj
  2019-09-13 10:59     ` Christophe Leroy
  2019-10-29 16:12     ` Christophe Leroy
  0 siblings, 2 replies; 17+ messages in thread
From: Santosh Sivaraj @ 2019-08-26  5:44 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

Hi Christophe,

Christophe Leroy <christophe.leroy@c-s.fr> writes:

> __get_datapage() is only a few instructions to retrieve the
> address of the page where the kernel stores data to the VDSO.
>
> By inlining this function into its users, a bl/blr pair and
> a mflr/mtlr pair is avoided, plus a few reg moves.
>
> The improvement is noticeable (about 55 nsec/call on an 8xx)
>
> vdsotest before the patch:
> gettimeofday:    vdso: 731 nsec/call
> clock-gettime-realtime-coarse:    vdso: 668 nsec/call
> clock-gettime-monotonic-coarse:    vdso: 745 nsec/call
>
> vdsotest after the patch:
> gettimeofday:    vdso: 677 nsec/call
> clock-gettime-realtime-coarse:    vdso: 613 nsec/call
> clock-gettime-monotonic-coarse:    vdso: 690 nsec/call
>
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> ---
>  arch/powerpc/kernel/vdso32/cacheflush.S   | 10 +++++-----
>  arch/powerpc/kernel/vdso32/datapage.S     | 29 ++++-------------------------
>  arch/powerpc/kernel/vdso32/datapage.h     | 11 +++++++++++
>  arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++-------
>  4 files changed, 26 insertions(+), 37 deletions(-)
>  create mode 100644 arch/powerpc/kernel/vdso32/datapage.h

The datapage.h file should ideally be moved under include/asm, then we can use
the same for powerpc64 too.

Santosh

>
> diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
> index 7f882e7b9f43..e9453837e4ee 100644
> --- a/arch/powerpc/kernel/vdso32/cacheflush.S
> +++ b/arch/powerpc/kernel/vdso32/cacheflush.S
> @@ -10,6 +10,8 @@
>  #include <asm/vdso.h>
>  #include <asm/asm-offsets.h>
>  
> +#include "datapage.h"
> +
>  	.text
>  
>  /*
> @@ -24,14 +26,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
>    .cfi_startproc
>  	mflr	r12
>    .cfi_register lr,r12
> -	mr	r11,r3
> -	bl	__get_datapage@local
> +	get_datapage	r10, r0
>  	mtlr	r12
> -	mr	r10,r3
>  
>  	lwz	r7,CFG_DCACHE_BLOCKSZ(r10)
>  	addi	r5,r7,-1
> -	andc	r6,r11,r5		/* round low to line bdy */
> +	andc	r6,r3,r5		/* round low to line bdy */
>  	subf	r8,r6,r4		/* compute length */
>  	add	r8,r8,r5		/* ensure we get enough */
>  	lwz	r9,CFG_DCACHE_LOGBLOCKSZ(r10)
> @@ -48,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
>  
>  	lwz	r7,CFG_ICACHE_BLOCKSZ(r10)
>  	addi	r5,r7,-1
> -	andc	r6,r11,r5		/* round low to line bdy */
> +	andc	r6,r3,r5		/* round low to line bdy */
>  	subf	r8,r6,r4		/* compute length */
>  	add	r8,r8,r5
>  	lwz	r9,CFG_ICACHE_LOGBLOCKSZ(r10)
> diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
> index 6984125b9fc0..d480d2d4a3fe 100644
> --- a/arch/powerpc/kernel/vdso32/datapage.S
> +++ b/arch/powerpc/kernel/vdso32/datapage.S
> @@ -11,34 +11,13 @@
>  #include <asm/unistd.h>
>  #include <asm/vdso.h>
>  
> +#include "datapage.h"
> +
>  	.text
>  	.global	__kernel_datapage_offset;
>  __kernel_datapage_offset:
>  	.long	0
>  
> -V_FUNCTION_BEGIN(__get_datapage)
> -  .cfi_startproc
> -	/* We don't want that exposed or overridable as we want other objects
> -	 * to be able to bl directly to here
> -	 */
> -	.protected __get_datapage
> -	.hidden __get_datapage
> -
> -	mflr	r0
> -  .cfi_register lr,r0
> -
> -	bcl	20,31,data_page_branch
> -data_page_branch:
> -	mflr	r3
> -	mtlr	r0
> -	addi	r3, r3, __kernel_datapage_offset-data_page_branch
> -	lwz	r0,0(r3)
> -  .cfi_restore lr
> -	add	r3,r0,r3
> -	blr
> -  .cfi_endproc
> -V_FUNCTION_END(__get_datapage)
> -
>  /*
>   * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
>   *
> @@ -53,7 +32,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
>  	mflr	r12
>    .cfi_register lr,r12
>  	mr	r4,r3
> -	bl	__get_datapage@local
> +	get_datapage	r3, r0
>  	mtlr	r12
>  	addi	r3,r3,CFG_SYSCALL_MAP32
>  	cmpli	cr0,r4,0
> @@ -74,7 +53,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
>    .cfi_startproc
>  	mflr	r12
>    .cfi_register lr,r12
> -	bl	__get_datapage@local
> +	get_datapage	r3, r0
>  	lwz	r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
>  	lwz	r3,CFG_TB_TICKS_PER_SEC(r3)
>  	mtlr	r12
> diff --git a/arch/powerpc/kernel/vdso32/datapage.h b/arch/powerpc/kernel/vdso32/datapage.h
> new file mode 100644
> index 000000000000..74f4f57c2da8
> --- /dev/null
> +++ b/arch/powerpc/kernel/vdso32/datapage.h
> @@ -0,0 +1,11 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +
> +.macro get_datapage ptr, tmp
> +	bcl	20,31,.+4
> +	mflr	\ptr
> +	addi	\ptr, \ptr, __kernel_datapage_offset - (.-4)
> +	lwz	\tmp, 0(\ptr)
> +	add	\ptr, \tmp, \ptr
> +.endm
> +
> +
> diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
> index 355b537d327a..3e55cba19f44 100644
> --- a/arch/powerpc/kernel/vdso32/gettimeofday.S
> +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
> @@ -12,6 +12,8 @@
>  #include <asm/asm-offsets.h>
>  #include <asm/unistd.h>
>  
> +#include "datapage.h"
> +
>  /* Offset for the low 32-bit part of a field of long type */
>  #ifdef CONFIG_PPC64
>  #define LOPART	4
> @@ -35,8 +37,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
>  
>  	mr	r10,r3			/* r10 saves tv */
>  	mr	r11,r4			/* r11 saves tz */
> -	bl	__get_datapage@local	/* get data page */
> -	mr	r9, r3			/* datapage ptr in r9 */
> +	get_datapage	r9, r0
>  	cmplwi	r10,0			/* check if tv is NULL */
>  	beq	3f
>  	lis	r7,1000000@ha		/* load up USEC_PER_SEC */
> @@ -82,8 +83,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
>  	mflr	r12			/* r12 saves lr */
>    .cfi_register lr,r12
>  	mr	r11,r4			/* r11 saves tp */
> -	bl	__get_datapage@local	/* get data page */
> -	mr	r9,r3			/* datapage ptr in r9 */
> +	get_datapage	r9, r0
>  	lis	r7,NSEC_PER_SEC@h	/* want nanoseconds */
>  	ori	r7,r7,NSEC_PER_SEC@l
>  	beq	cr5, .Lcoarse_clocks
> @@ -208,7 +208,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
>  
>  	mflr	r12
>    .cfi_register lr,r12
> -	bl	__get_datapage@local	/* get data page */
> +	get_datapage	r3, r0
>  	lwz	r5, CLOCK_HRTIMER_RES(r3)
>  	mtlr	r12
>  	li	r3,0
> @@ -242,8 +242,7 @@ V_FUNCTION_BEGIN(__kernel_time)
>    .cfi_register lr,r12
>  
>  	mr	r11,r3			/* r11 holds t */
> -	bl	__get_datapage@local
> -	mr	r9, r3			/* datapage ptr in r9 */
> +	get_datapage	r9, r0
>  
>  	lwz	r3,STAMP_XTIME+TSPEC_TV_SEC(r9)
>  
> -- 
> 2.13.3

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage()
  2019-08-26  5:44   ` Santosh Sivaraj
@ 2019-09-13 10:59     ` Christophe Leroy
  2019-09-13 13:31       ` Santosh Sivaraj
  2019-10-29 16:12     ` Christophe Leroy
  1 sibling, 1 reply; 17+ messages in thread
From: Christophe Leroy @ 2019-09-13 10:59 UTC (permalink / raw)
  To: Santosh Sivaraj, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

Hi Santosh,

Le 26/08/2019 à 07:44, Santosh Sivaraj a écrit :
> Hi Christophe,
> 
> Christophe Leroy <christophe.leroy@c-s.fr> writes:
> 
>> __get_datapage() is only a few instructions to retrieve the
>> address of the page where the kernel stores data to the VDSO.
>>
>> By inlining this function into its users, a bl/blr pair and
>> a mflr/mtlr pair is avoided, plus a few reg moves.
>>
>> The improvement is noticeable (about 55 nsec/call on an 8xx)
>>
>> vdsotest before the patch:
>> gettimeofday:    vdso: 731 nsec/call
>> clock-gettime-realtime-coarse:    vdso: 668 nsec/call
>> clock-gettime-monotonic-coarse:    vdso: 745 nsec/call
>>
>> vdsotest after the patch:
>> gettimeofday:    vdso: 677 nsec/call
>> clock-gettime-realtime-coarse:    vdso: 613 nsec/call
>> clock-gettime-monotonic-coarse:    vdso: 690 nsec/call
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
>> ---
>>   arch/powerpc/kernel/vdso32/cacheflush.S   | 10 +++++-----
>>   arch/powerpc/kernel/vdso32/datapage.S     | 29 ++++-------------------------
>>   arch/powerpc/kernel/vdso32/datapage.h     | 11 +++++++++++
>>   arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++-------
>>   4 files changed, 26 insertions(+), 37 deletions(-)
>>   create mode 100644 arch/powerpc/kernel/vdso32/datapage.h
> 
> The datapage.h file should ideally be moved under include/asm, then we can use
> the same for powerpc64 too.

I have a more ambitious project indeed.

Most of the VDSO code is duplicated between vdso32 and vdso64. I'm 
aiming at merging everything into a single source code.

This means we would have to generate vdso32.so and vdso64.so out of the 
same source files. Any idea on how to do that ? I'm not too good at 
creating Makefiles. I guess we would have everything in 
arch/powerpc/kernel/vdso/ and would have to build the objects twice, 
once in arch/powerpc/kernel/vdso32/ and once in arch/powerpc/kernel/vdso64/

Christophe

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage()
  2019-09-13 10:59     ` Christophe Leroy
@ 2019-09-13 13:31       ` Santosh Sivaraj
  2019-09-13 13:50         ` Christophe Leroy
  0 siblings, 1 reply; 17+ messages in thread
From: Santosh Sivaraj @ 2019-09-13 13:31 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

Christophe Leroy <christophe.leroy@c-s.fr> writes:

> Hi Santosh,
>
> Le 26/08/2019 à 07:44, Santosh Sivaraj a écrit :
>> Hi Christophe,
>> 
>> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>> 
>>> __get_datapage() is only a few instructions to retrieve the
>>> address of the page where the kernel stores data to the VDSO.
>>>
>>> By inlining this function into its users, a bl/blr pair and
>>> a mflr/mtlr pair is avoided, plus a few reg moves.
>>>
>>> The improvement is noticeable (about 55 nsec/call on an 8xx)
>>>
>>> vdsotest before the patch:
>>> gettimeofday:    vdso: 731 nsec/call
>>> clock-gettime-realtime-coarse:    vdso: 668 nsec/call
>>> clock-gettime-monotonic-coarse:    vdso: 745 nsec/call
>>>
>>> vdsotest after the patch:
>>> gettimeofday:    vdso: 677 nsec/call
>>> clock-gettime-realtime-coarse:    vdso: 613 nsec/call
>>> clock-gettime-monotonic-coarse:    vdso: 690 nsec/call
>>>
>>> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
>>> ---
>>>   arch/powerpc/kernel/vdso32/cacheflush.S   | 10 +++++-----
>>>   arch/powerpc/kernel/vdso32/datapage.S     | 29 ++++-------------------------
>>>   arch/powerpc/kernel/vdso32/datapage.h     | 11 +++++++++++
>>>   arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++-------
>>>   4 files changed, 26 insertions(+), 37 deletions(-)
>>>   create mode 100644 arch/powerpc/kernel/vdso32/datapage.h
>> 
>> The datapage.h file should ideally be moved under include/asm, then we can use
>> the same for powerpc64 too.
>
> I have a more ambitious project indeed.
>
> Most of the VDSO code is duplicated between vdso32 and vdso64. I'm 
> aiming at merging everything into a single source code.
>
> This means we would have to generate vdso32.so and vdso64.so out of the 
> same source files. Any idea on how to do that ? I'm not too good at 
> creating Makefiles. I guess we would have everything in 
> arch/powerpc/kernel/vdso/ and would have to build the objects twice, 
> once in arch/powerpc/kernel/vdso32/ and once in arch/powerpc/kernel/vdso64/

Should we need to build the objects twice? For 64 bit config it is going to be
a 64 bit build else a 32 bit build. It should suffice to get the single source
code compile for both, maybe with macros or (!)CONFIG_PPC64 conditional
compilation. Am I missing something when you say build twice?

Thanks,
Santosh

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage()
  2019-09-13 13:31       ` Santosh Sivaraj
@ 2019-09-13 13:50         ` Christophe Leroy
  2019-09-13 14:03           ` Santosh Sivaraj
  0 siblings, 1 reply; 17+ messages in thread
From: Christophe Leroy @ 2019-09-13 13:50 UTC (permalink / raw)
  To: Santosh Sivaraj, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman
  Cc: linuxppc-dev, linux-kernel



Le 13/09/2019 à 15:31, Santosh Sivaraj a écrit :
> Christophe Leroy <christophe.leroy@c-s.fr> writes:
> 
>> Hi Santosh,
>>
>> Le 26/08/2019 à 07:44, Santosh Sivaraj a écrit :
>>> Hi Christophe,
>>>
>>> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>>>
>>>> __get_datapage() is only a few instructions to retrieve the
>>>> address of the page where the kernel stores data to the VDSO.
>>>>
>>>> By inlining this function into its users, a bl/blr pair and
>>>> a mflr/mtlr pair is avoided, plus a few reg moves.
>>>>
>>>> The improvement is noticeable (about 55 nsec/call on an 8xx)
>>>>
>>>> vdsotest before the patch:
>>>> gettimeofday:    vdso: 731 nsec/call
>>>> clock-gettime-realtime-coarse:    vdso: 668 nsec/call
>>>> clock-gettime-monotonic-coarse:    vdso: 745 nsec/call
>>>>
>>>> vdsotest after the patch:
>>>> gettimeofday:    vdso: 677 nsec/call
>>>> clock-gettime-realtime-coarse:    vdso: 613 nsec/call
>>>> clock-gettime-monotonic-coarse:    vdso: 690 nsec/call
>>>>
>>>> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
>>>> ---
>>>>    arch/powerpc/kernel/vdso32/cacheflush.S   | 10 +++++-----
>>>>    arch/powerpc/kernel/vdso32/datapage.S     | 29 ++++-------------------------
>>>>    arch/powerpc/kernel/vdso32/datapage.h     | 11 +++++++++++
>>>>    arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++-------
>>>>    4 files changed, 26 insertions(+), 37 deletions(-)
>>>>    create mode 100644 arch/powerpc/kernel/vdso32/datapage.h
>>>
>>> The datapage.h file should ideally be moved under include/asm, then we can use
>>> the same for powerpc64 too.
>>
>> I have a more ambitious project indeed.
>>
>> Most of the VDSO code is duplicated between vdso32 and vdso64. I'm
>> aiming at merging everything into a single source code.
>>
>> This means we would have to generate vdso32.so and vdso64.so out of the
>> same source files. Any idea on how to do that ? I'm not too good at
>> creating Makefiles. I guess we would have everything in
>> arch/powerpc/kernel/vdso/ and would have to build the objects twice,
>> once in arch/powerpc/kernel/vdso32/ and once in arch/powerpc/kernel/vdso64/
> 
> Should we need to build the objects twice? For 64 bit config it is going to be
> a 64 bit build else a 32 bit build. It should suffice to get the single source
> code compile for both, maybe with macros or (!)CONFIG_PPC64 conditional
> compilation. Am I missing something when you say build twice?
> 

IIUC, on PPC64 we build vdso64 for 64bits user apps and vdso32 for 
32bits user apps.

In arch/powerpc/kernel/Makefile, you have:

obj-$(CONFIG_VDSO32)		+= vdso32/
obj-$(CONFIG_PPC64)		+= vdso64/

And in arch/powerpc/platforms/Kconfig.cputype, you have:

config VDSO32
	def_bool y
	depends on PPC32 || CPU_BIG_ENDIAN
	help
	  This symbol controls whether we build the 32-bit VDSO. We obviously
	  want to do that if we're building a 32-bit kernel. If we're building
	  a 64-bit kernel then we only want a 32-bit VDSO if we're building for
	  big endian. That is because the only little endian configuration we
	  support is ppc64le which is 64-bit only.




Christophe

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage()
  2019-09-13 13:50         ` Christophe Leroy
@ 2019-09-13 14:03           ` Santosh Sivaraj
  0 siblings, 0 replies; 17+ messages in thread
From: Santosh Sivaraj @ 2019-09-13 14:03 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

Christophe Leroy <christophe.leroy@c-s.fr> writes:

> Le 13/09/2019 à 15:31, Santosh Sivaraj a écrit :
>> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>> 
>>> Hi Santosh,
>>>
>>> Le 26/08/2019 à 07:44, Santosh Sivaraj a écrit :
>>>> Hi Christophe,
>>>>
>>>> Christophe Leroy <christophe.leroy@c-s.fr> writes:
>>>>
>>>>> __get_datapage() is only a few instructions to retrieve the
>>>>> address of the page where the kernel stores data to the VDSO.
>>>>>
>>>>> By inlining this function into its users, a bl/blr pair and
>>>>> a mflr/mtlr pair is avoided, plus a few reg moves.
>>>>>
>>>>> The improvement is noticeable (about 55 nsec/call on an 8xx)
>>>>>
>>>>> vdsotest before the patch:
>>>>> gettimeofday:    vdso: 731 nsec/call
>>>>> clock-gettime-realtime-coarse:    vdso: 668 nsec/call
>>>>> clock-gettime-monotonic-coarse:    vdso: 745 nsec/call
>>>>>
>>>>> vdsotest after the patch:
>>>>> gettimeofday:    vdso: 677 nsec/call
>>>>> clock-gettime-realtime-coarse:    vdso: 613 nsec/call
>>>>> clock-gettime-monotonic-coarse:    vdso: 690 nsec/call
>>>>>
>>>>> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
>>>>> ---
>>>>>    arch/powerpc/kernel/vdso32/cacheflush.S   | 10 +++++-----
>>>>>    arch/powerpc/kernel/vdso32/datapage.S     | 29 ++++-------------------------
>>>>>    arch/powerpc/kernel/vdso32/datapage.h     | 11 +++++++++++
>>>>>    arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++-------
>>>>>    4 files changed, 26 insertions(+), 37 deletions(-)
>>>>>    create mode 100644 arch/powerpc/kernel/vdso32/datapage.h
>>>>
>>>> The datapage.h file should ideally be moved under include/asm, then we can use
>>>> the same for powerpc64 too.
>>>
>>> I have a more ambitious project indeed.
>>>
>>> Most of the VDSO code is duplicated between vdso32 and vdso64. I'm
>>> aiming at merging everything into a single source code.
>>>
>>> This means we would have to generate vdso32.so and vdso64.so out of the
>>> same source files. Any idea on how to do that ? I'm not too good at
>>> creating Makefiles. I guess we would have everything in
>>> arch/powerpc/kernel/vdso/ and would have to build the objects twice,
>>> once in arch/powerpc/kernel/vdso32/ and once in arch/powerpc/kernel/vdso64/
>> 
>> Should we need to build the objects twice? For 64 bit config it is going to be
>> a 64 bit build else a 32 bit build. It should suffice to get the single source
>> code compile for both, maybe with macros or (!)CONFIG_PPC64 conditional
>> compilation. Am I missing something when you say build twice?
>> 
>
> IIUC, on PPC64 we build vdso64 for 64bits user apps and vdso32 for 
> 32bits user apps.
>
> In arch/powerpc/kernel/Makefile, you have:
>
> obj-$(CONFIG_VDSO32)		+= vdso32/
> obj-$(CONFIG_PPC64)		+= vdso64/
>
> And in arch/powerpc/platforms/Kconfig.cputype, you have:
>
> config VDSO32
> 	def_bool y
> 	depends on PPC32 || CPU_BIG_ENDIAN
> 	help
> 	  This symbol controls whether we build the 32-bit VDSO. We obviously
> 	  want to do that if we're building a 32-bit kernel. If we're building
> 	  a 64-bit kernel then we only want a 32-bit VDSO if we're building for
> 	  big endian. That is because the only little endian configuration we
> 	  support is ppc64le which is 64-bit only.
>

I didn't know we build 32 bit vdso for 64 bit big endians. But I don't think
its difficult to do it, might be a bit tricky. We can have two targets from
the same source.

SRC = vdso/*.c
OBJS_32 = $(SRC:.c=vdso32/.o)
OBJS_64 = $(SRC:.c=vdso64/.o)

Something like this would work. Of course, this is out of memory, might have to
do something slightly different for the Makefiles in kernel.

Thanks,
Santosh

>
>
>
> Christophe

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 1/8] powerpc/32: Add VDSO version of getcpu
  2019-08-22 16:34 ` [PATCH v2 1/8] powerpc/32: Add VDSO version of getcpu Christophe Leroy
@ 2019-09-18  5:51   ` Michael Ellerman
  2019-10-29 16:10     ` Christophe Leroy
  0 siblings, 1 reply; 17+ messages in thread
From: Michael Ellerman @ 2019-09-18  5:51 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras
  Cc: linux-kernel, linuxppc-dev

Hi Christophe,

Sorry I'm late replying to this.

Christophe Leroy <christophe.leroy@c-s.fr> writes:
> Commit 18ad51dd342a ("powerpc: Add VDSO version of getcpu") added
> getcpu() for PPC64 only, by making use of a user readable general
> purpose SPR.
>
> PPC32 doesn't have any such SPR, a full system call can still be
> avoided by implementing a fast system call which reads the CPU id
> in the task struct and returns immediately without going back in
> virtual mode.
>
> Before the patch, vdsotest reported:
> getcpu: syscall: 1572 nsec/call
> getcpu:    libc: 1787 nsec/call
> getcpu:    vdso: not tested
>
> Now, vdsotest reports:
> getcpu: syscall: 1582 nsec/call
> getcpu:    libc: 667 nsec/call
> getcpu:    vdso: 368 nsec/call
>
> For non SMP, just return CPU id 0 from the VDSO directly.
>
> PPC32 doesn't support CONFIG_NUMA so NUMA node is always 0.
>
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
>
> ---
> v2: fixed build error in getcpu.S
> ---
>  arch/powerpc/include/asm/vdso.h         |  2 ++
>  arch/powerpc/kernel/head_32.h           | 13 +++++++++++++
>  arch/powerpc/kernel/head_booke.h        | 11 +++++++++++
>  arch/powerpc/kernel/vdso32/Makefile     |  4 +---
>  arch/powerpc/kernel/vdso32/getcpu.S     |  7 +++++++
>  arch/powerpc/kernel/vdso32/vdso32.lds.S |  2 --
>  6 files changed, 34 insertions(+), 5 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h
> index b5e1f8f8a05c..adb54782df5f 100644
> --- a/arch/powerpc/include/asm/vdso.h
> +++ b/arch/powerpc/include/asm/vdso.h
> @@ -16,6 +16,8 @@
>  /* Define if 64 bits VDSO has procedure descriptors */
>  #undef VDS64_HAS_DESCRIPTORS
>  
> +#define NR_MAGIC_FAST_VDSO_SYSCALL	0x789a

We are still in the middle of the years long process of removing the
"magic" syscall on 64-bit:

  https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/powerpc/kernel/exceptions-64s.S?commit=4d856f72c10ecb060868ed10ff1b1453943fc6c8#n1578
 

Can we not add another one on 32-bit?

Is it really such a fast path that it's worth putting a wart in the
syscall entry like that?

Is there some other method? On s390 they have a per-cpu VDSO page, that
would be a nice option. How we do that would be specific to a particular
MMU, and maybe not even possible with some MMUs. So maybe that's not
feasible.

If you do want to add a fastpath syscall then please just add it as a
regular syscall number, that way it's at least a bit less of a wart.
It's still not visible via tracing/ptrace etc. which is a pain but at
least the number is not "magical" too.

cheers


>  /* Offsets relative to thread->vdso_base */
> diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
> index 4a692553651f..a2e38b59785a 100644
> --- a/arch/powerpc/kernel/head_32.h
> +++ b/arch/powerpc/kernel/head_32.h
> @@ -3,6 +3,8 @@
>  #define __HEAD_32_H__
>  
>  #include <asm/ptrace.h>	/* for STACK_FRAME_REGS_MARKER */
> +#include <asm/vdso.h>
> +#include <asm/asm-offsets.h>
>  
>  /*
>   * MSR_KERNEL is > 0x8000 on 4xx/Book-E since it include MSR_CE.
> @@ -74,7 +76,13 @@
>  .endm
>  
>  .macro SYSCALL_ENTRY trapno
> +#ifdef CONFIG_SMP
> +	cmplwi	cr0, r0, NR_MAGIC_FAST_VDSO_SYSCALL
> +#endif
>  	mfspr	r12,SPRN_SPRG_THREAD
> +#ifdef CONFIG_SMP
> +	beq-	1f
> +#endif
>  	mfcr	r10
>  	lwz	r11,TASK_STACK-THREAD(r12)
>  	mflr	r9
> @@ -152,6 +160,11 @@
>  	mtspr	SPRN_SRR0,r11
>  	SYNC
>  	RFI				/* jump to handler, enable MMU */
> +#ifdef CONFIG_SMP
> +1:
> +	lwz	r5, TASK_CPU - THREAD(r12)
> +	RFI
> +#endif
>  .endm
>  
>  /*
> diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
> index 2ae635df9026..c534e87cac84 100644
> --- a/arch/powerpc/kernel/head_booke.h
> +++ b/arch/powerpc/kernel/head_booke.h
> @@ -3,6 +3,8 @@
>  #define __HEAD_BOOKE_H__
>  
>  #include <asm/ptrace.h>	/* for STACK_FRAME_REGS_MARKER */
> +#include <asm/vdso.h>
> +#include <asm/asm-offsets.h>
>  #include <asm/kvm_asm.h>
>  #include <asm/kvm_booke_hv_asm.h>
>  
> @@ -104,6 +106,10 @@ FTR_SECTION_ELSE
>  #ifdef CONFIG_KVM_BOOKE_HV
>  ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
>  #endif
> +#ifdef CONFIG_SMP
> +	cmplwi	cr0, r0, NR_MAGIC_FAST_VDSO_SYSCALL
> +	beq-	1f
> +#endif
>  	BOOKE_CLEAR_BTB(r11)
>  	lwz	r11, TASK_STACK - THREAD(r10)
>  	rlwinm	r12,r12,0,4,2	/* Clear SO bit in CR */
> @@ -176,6 +182,11 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
>  	mtspr	SPRN_SRR0,r11
>  	SYNC
>  	RFI				/* jump to handler, enable MMU */
> +#ifdef CONFIG_SMP
> +1:
> +	lwz	r5, TASK_CPU - THREAD(r10)
> +	RFI
> +#endif
>  .endm
>  
>  /* To handle the additional exception priority levels on 40x and Book-E
> diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
> index 06f54d947057..e147bbdc12cd 100644
> --- a/arch/powerpc/kernel/vdso32/Makefile
> +++ b/arch/powerpc/kernel/vdso32/Makefile
> @@ -2,9 +2,7 @@
>  
>  # List of files in the vdso, has to be asm only for now
>  
> -obj-vdso32-$(CONFIG_PPC64) = getcpu.o
> -obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o \
> -		$(obj-vdso32-y)
> +obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o
>  
>  # Build rules
>  
> diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S
> index 63e914539e1a..bde226ad904d 100644
> --- a/arch/powerpc/kernel/vdso32/getcpu.S
> +++ b/arch/powerpc/kernel/vdso32/getcpu.S
> @@ -17,7 +17,14 @@
>   */
>  V_FUNCTION_BEGIN(__kernel_getcpu)
>    .cfi_startproc
> +#if defined(CONFIG_PPC64)
>  	mfspr	r5,SPRN_SPRG_VDSO_READ
> +#elif defined(CONFIG_SMP)
> +	li	r0, NR_MAGIC_FAST_VDSO_SYSCALL
> +	sc	/* returns cpuid in r5, clobbers cr0 and r10-r13 */
> +#else
> +	li	r5, 0
> +#endif
>  	cmpwi	cr0,r3,0
>  	cmpwi	cr1,r4,0
>  	clrlwi  r6,r5,16
> diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
> index 099a6db14e67..663880671e20 100644
> --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
> +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
> @@ -152,9 +152,7 @@ VERSION
>  		__kernel_sync_dicache_p5;
>  		__kernel_sigtramp32;
>  		__kernel_sigtramp_rt32;
> -#ifdef CONFIG_PPC64
>  		__kernel_getcpu;
> -#endif
>  		__kernel_time;
>  
>  	local: *;
> -- 
> 2.13.3

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 1/8] powerpc/32: Add VDSO version of getcpu
  2019-09-18  5:51   ` Michael Ellerman
@ 2019-10-29 16:10     ` Christophe Leroy
  0 siblings, 0 replies; 17+ messages in thread
From: Christophe Leroy @ 2019-10-29 16:10 UTC (permalink / raw)
  To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras
  Cc: linux-kernel, linuxppc-dev



Le 18/09/2019 à 07:51, Michael Ellerman a écrit :
> 
> We are still in the middle of the years long process of removing the
> "magic" syscall on 64-bit:
> 
>    https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/powerpc/kernel/exceptions-64s.S?commit=4d856f72c10ecb060868ed10ff1b1453943fc6c8#n1578
>   
> 
> Can we not add another one on 32-bit?
> 
> Is it really such a fast path that it's worth putting a wart in the
> syscall entry like that?
> 
> Is there some other method? On s390 they have a per-cpu VDSO page, that
> would be a nice option. How we do that would be specific to a particular
> MMU, and maybe not even possible with some MMUs. So maybe that's not
> feasible.

Ok, for now I remove the fast syscall and only keep the VDSO getcpu() 
for non SMP.

I think we may in the future implement a per-cpu VDSO page that will be 
mapped to the process based on the CPU it is running on. I need to look 
at that in more details.

Christophe


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage()
  2019-08-26  5:44   ` Santosh Sivaraj
  2019-09-13 10:59     ` Christophe Leroy
@ 2019-10-29 16:12     ` Christophe Leroy
  1 sibling, 0 replies; 17+ messages in thread
From: Christophe Leroy @ 2019-10-29 16:12 UTC (permalink / raw)
  To: Santosh Sivaraj, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

Hi Santosh,

Le 26/08/2019 à 07:44, Santosh Sivaraj a écrit :
> Hi Christophe,
> 
> Christophe Leroy <christophe.leroy@c-s.fr> writes:
> 
>> __get_datapage() is only a few instructions to retrieve the
>> address of the page where the kernel stores data to the VDSO.
>>
>> By inlining this function into its users, a bl/blr pair and
>> a mflr/mtlr pair is avoided, plus a few reg moves.
>>
>> The improvement is noticeable (about 55 nsec/call on an 8xx)
>>
>> vdsotest before the patch:
>> gettimeofday:    vdso: 731 nsec/call
>> clock-gettime-realtime-coarse:    vdso: 668 nsec/call
>> clock-gettime-monotonic-coarse:    vdso: 745 nsec/call
>>
>> vdsotest after the patch:
>> gettimeofday:    vdso: 677 nsec/call
>> clock-gettime-realtime-coarse:    vdso: 613 nsec/call
>> clock-gettime-monotonic-coarse:    vdso: 690 nsec/call
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
>> ---
>>   arch/powerpc/kernel/vdso32/cacheflush.S   | 10 +++++-----
>>   arch/powerpc/kernel/vdso32/datapage.S     | 29 ++++-------------------------
>>   arch/powerpc/kernel/vdso32/datapage.h     | 11 +++++++++++
>>   arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++-------
>>   4 files changed, 26 insertions(+), 37 deletions(-)
>>   create mode 100644 arch/powerpc/kernel/vdso32/datapage.h
> 
> The datapage.h file should ideally be moved under include/asm, then we can use
> the same for powerpc64 too.

Finally, I added the get_datapage macro to the existing asm/vdso_datapage.h

Christophe

> 
> Santosh
> 
>>
>> diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
>> index 7f882e7b9f43..e9453837e4ee 100644
>> --- a/arch/powerpc/kernel/vdso32/cacheflush.S
>> +++ b/arch/powerpc/kernel/vdso32/cacheflush.S
>> @@ -10,6 +10,8 @@
>>   #include <asm/vdso.h>
>>   #include <asm/asm-offsets.h>
>>   
>> +#include "datapage.h"
>> +
>>   	.text
>>   
>>   /*
>> @@ -24,14 +26,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
>>     .cfi_startproc
>>   	mflr	r12
>>     .cfi_register lr,r12
>> -	mr	r11,r3
>> -	bl	__get_datapage@local
>> +	get_datapage	r10, r0
>>   	mtlr	r12
>> -	mr	r10,r3
>>   
>>   	lwz	r7,CFG_DCACHE_BLOCKSZ(r10)
>>   	addi	r5,r7,-1
>> -	andc	r6,r11,r5		/* round low to line bdy */
>> +	andc	r6,r3,r5		/* round low to line bdy */
>>   	subf	r8,r6,r4		/* compute length */
>>   	add	r8,r8,r5		/* ensure we get enough */
>>   	lwz	r9,CFG_DCACHE_LOGBLOCKSZ(r10)
>> @@ -48,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
>>   
>>   	lwz	r7,CFG_ICACHE_BLOCKSZ(r10)
>>   	addi	r5,r7,-1
>> -	andc	r6,r11,r5		/* round low to line bdy */
>> +	andc	r6,r3,r5		/* round low to line bdy */
>>   	subf	r8,r6,r4		/* compute length */
>>   	add	r8,r8,r5
>>   	lwz	r9,CFG_ICACHE_LOGBLOCKSZ(r10)
>> diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
>> index 6984125b9fc0..d480d2d4a3fe 100644
>> --- a/arch/powerpc/kernel/vdso32/datapage.S
>> +++ b/arch/powerpc/kernel/vdso32/datapage.S
>> @@ -11,34 +11,13 @@
>>   #include <asm/unistd.h>
>>   #include <asm/vdso.h>
>>   
>> +#include "datapage.h"
>> +
>>   	.text
>>   	.global	__kernel_datapage_offset;
>>   __kernel_datapage_offset:
>>   	.long	0
>>   
>> -V_FUNCTION_BEGIN(__get_datapage)
>> -  .cfi_startproc
>> -	/* We don't want that exposed or overridable as we want other objects
>> -	 * to be able to bl directly to here
>> -	 */
>> -	.protected __get_datapage
>> -	.hidden __get_datapage
>> -
>> -	mflr	r0
>> -  .cfi_register lr,r0
>> -
>> -	bcl	20,31,data_page_branch
>> -data_page_branch:
>> -	mflr	r3
>> -	mtlr	r0
>> -	addi	r3, r3, __kernel_datapage_offset-data_page_branch
>> -	lwz	r0,0(r3)
>> -  .cfi_restore lr
>> -	add	r3,r0,r3
>> -	blr
>> -  .cfi_endproc
>> -V_FUNCTION_END(__get_datapage)
>> -
>>   /*
>>    * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
>>    *
>> @@ -53,7 +32,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
>>   	mflr	r12
>>     .cfi_register lr,r12
>>   	mr	r4,r3
>> -	bl	__get_datapage@local
>> +	get_datapage	r3, r0
>>   	mtlr	r12
>>   	addi	r3,r3,CFG_SYSCALL_MAP32
>>   	cmpli	cr0,r4,0
>> @@ -74,7 +53,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
>>     .cfi_startproc
>>   	mflr	r12
>>     .cfi_register lr,r12
>> -	bl	__get_datapage@local
>> +	get_datapage	r3, r0
>>   	lwz	r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
>>   	lwz	r3,CFG_TB_TICKS_PER_SEC(r3)
>>   	mtlr	r12
>> diff --git a/arch/powerpc/kernel/vdso32/datapage.h b/arch/powerpc/kernel/vdso32/datapage.h
>> new file mode 100644
>> index 000000000000..74f4f57c2da8
>> --- /dev/null
>> +++ b/arch/powerpc/kernel/vdso32/datapage.h
>> @@ -0,0 +1,11 @@
>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>> +
>> +.macro get_datapage ptr, tmp
>> +	bcl	20,31,.+4
>> +	mflr	\ptr
>> +	addi	\ptr, \ptr, __kernel_datapage_offset - (.-4)
>> +	lwz	\tmp, 0(\ptr)
>> +	add	\ptr, \tmp, \ptr
>> +.endm
>> +
>> +
>> diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
>> index 355b537d327a..3e55cba19f44 100644
>> --- a/arch/powerpc/kernel/vdso32/gettimeofday.S
>> +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
>> @@ -12,6 +12,8 @@
>>   #include <asm/asm-offsets.h>
>>   #include <asm/unistd.h>
>>   
>> +#include "datapage.h"
>> +
>>   /* Offset for the low 32-bit part of a field of long type */
>>   #ifdef CONFIG_PPC64
>>   #define LOPART	4
>> @@ -35,8 +37,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
>>   
>>   	mr	r10,r3			/* r10 saves tv */
>>   	mr	r11,r4			/* r11 saves tz */
>> -	bl	__get_datapage@local	/* get data page */
>> -	mr	r9, r3			/* datapage ptr in r9 */
>> +	get_datapage	r9, r0
>>   	cmplwi	r10,0			/* check if tv is NULL */
>>   	beq	3f
>>   	lis	r7,1000000@ha		/* load up USEC_PER_SEC */
>> @@ -82,8 +83,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
>>   	mflr	r12			/* r12 saves lr */
>>     .cfi_register lr,r12
>>   	mr	r11,r4			/* r11 saves tp */
>> -	bl	__get_datapage@local	/* get data page */
>> -	mr	r9,r3			/* datapage ptr in r9 */
>> +	get_datapage	r9, r0
>>   	lis	r7,NSEC_PER_SEC@h	/* want nanoseconds */
>>   	ori	r7,r7,NSEC_PER_SEC@l
>>   	beq	cr5, .Lcoarse_clocks
>> @@ -208,7 +208,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
>>   
>>   	mflr	r12
>>     .cfi_register lr,r12
>> -	bl	__get_datapage@local	/* get data page */
>> +	get_datapage	r3, r0
>>   	lwz	r5, CLOCK_HRTIMER_RES(r3)
>>   	mtlr	r12
>>   	li	r3,0
>> @@ -242,8 +242,7 @@ V_FUNCTION_BEGIN(__kernel_time)
>>     .cfi_register lr,r12
>>   
>>   	mr	r11,r3			/* r11 holds t */
>> -	bl	__get_datapage@local
>> -	mr	r9, r3			/* datapage ptr in r9 */
>> +	get_datapage	r9, r0
>>   
>>   	lwz	r3,STAMP_XTIME+TSPEC_TV_SEC(r9)
>>   
>> -- 
>> 2.13.3

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2019-10-29 16:12 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-22 16:34 [PATCH v2 0/8] powerpc/vdso32 enhancement and optimisation Christophe Leroy
2019-08-22 16:34 ` [PATCH v2 1/8] powerpc/32: Add VDSO version of getcpu Christophe Leroy
2019-09-18  5:51   ` Michael Ellerman
2019-10-29 16:10     ` Christophe Leroy
2019-08-22 16:34 ` [PATCH v2 2/8] powerpc/vdso32: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE Christophe Leroy
2019-08-22 16:34 ` [PATCH v2 3/8] powerpc: Fix vDSO clock_getres() Christophe Leroy
2019-08-22 16:34 ` [PATCH v2 4/8] powerpc/vdso32: inline __get_datapage() Christophe Leroy
2019-08-26  5:44   ` Santosh Sivaraj
2019-09-13 10:59     ` Christophe Leroy
2019-09-13 13:31       ` Santosh Sivaraj
2019-09-13 13:50         ` Christophe Leroy
2019-09-13 14:03           ` Santosh Sivaraj
2019-10-29 16:12     ` Christophe Leroy
2019-08-22 16:34 ` [PATCH v2 5/8] powerpc/vdso32: Don't read cache line size from the datapage on PPC32 Christophe Leroy
2019-08-22 16:34 ` [PATCH v2 6/8] powerpc/vdso32: use LOAD_REG_IMMEDIATE() Christophe Leroy
2019-08-22 16:34 ` [PATCH v2 7/8] powerpc/vdso32: implement clock_getres entirely Christophe Leroy
2019-08-22 16:34 ` [PATCH v2 8/8] powerpc/vdso32: miscellaneous optimisations Christophe Leroy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).