* [PATCH] powerpc/vdso32: inline __get_datapage()
@ 2019-08-16 14:48 ` Christophe Leroy
0 siblings, 0 replies; 13+ messages in thread
From: Christophe Leroy @ 2019-08-16 14:48 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linux-kernel, linuxppc-dev
__get_datapage() is only a few instructions to retrieve the
address of the page where the kernel stores data to the VDSO.
By inlining this function into its users, a bl/blr pair and
a mflr/mtlr pair is avoided, plus a few reg moves.
The improvement is noticeable (about 55 nsec/call on an 8xx)
vdsotest before the patch:
gettimeofday: vdso: 731 nsec/call
clock-gettime-realtime-coarse: vdso: 668 nsec/call
clock-gettime-monotonic-coarse: vdso: 745 nsec/call
vdsotest after the patch:
gettimeofday: vdso: 677 nsec/call
clock-gettime-realtime-coarse: vdso: 613 nsec/call
clock-gettime-monotonic-coarse: vdso: 690 nsec/call
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++-----
arch/powerpc/kernel/vdso32/datapage.S | 29 ++++-------------------------
arch/powerpc/kernel/vdso32/datapage.h | 12 ++++++++++++
arch/powerpc/kernel/vdso32/gettimeofday.S | 11 +++++------
4 files changed, 26 insertions(+), 36 deletions(-)
create mode 100644 arch/powerpc/kernel/vdso32/datapage.h
diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
index 7f882e7b9f43..e9453837e4ee 100644
--- a/arch/powerpc/kernel/vdso32/cacheflush.S
+++ b/arch/powerpc/kernel/vdso32/cacheflush.S
@@ -10,6 +10,8 @@
#include <asm/vdso.h>
#include <asm/asm-offsets.h>
+#include "datapage.h"
+
.text
/*
@@ -24,14 +26,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
.cfi_startproc
mflr r12
.cfi_register lr,r12
- mr r11,r3
- bl __get_datapage@local
+ get_datapage r10, r0
mtlr r12
- mr r10,r3
lwz r7,CFG_DCACHE_BLOCKSZ(r10)
addi r5,r7,-1
- andc r6,r11,r5 /* round low to line bdy */
+ andc r6,r3,r5 /* round low to line bdy */
subf r8,r6,r4 /* compute length */
add r8,r8,r5 /* ensure we get enough */
lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
@@ -48,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
lwz r7,CFG_ICACHE_BLOCKSZ(r10)
addi r5,r7,-1
- andc r6,r11,r5 /* round low to line bdy */
+ andc r6,r3,r5 /* round low to line bdy */
subf r8,r6,r4 /* compute length */
add r8,r8,r5
lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
index 6984125b9fc0..d480d2d4a3fe 100644
--- a/arch/powerpc/kernel/vdso32/datapage.S
+++ b/arch/powerpc/kernel/vdso32/datapage.S
@@ -11,34 +11,13 @@
#include <asm/unistd.h>
#include <asm/vdso.h>
+#include "datapage.h"
+
.text
.global __kernel_datapage_offset;
__kernel_datapage_offset:
.long 0
-V_FUNCTION_BEGIN(__get_datapage)
- .cfi_startproc
- /* We don't want that exposed or overridable as we want other objects
- * to be able to bl directly to here
- */
- .protected __get_datapage
- .hidden __get_datapage
-
- mflr r0
- .cfi_register lr,r0
-
- bcl 20,31,data_page_branch
-data_page_branch:
- mflr r3
- mtlr r0
- addi r3, r3, __kernel_datapage_offset-data_page_branch
- lwz r0,0(r3)
- .cfi_restore lr
- add r3,r0,r3
- blr
- .cfi_endproc
-V_FUNCTION_END(__get_datapage)
-
/*
* void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
*
@@ -53,7 +32,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
mflr r12
.cfi_register lr,r12
mr r4,r3
- bl __get_datapage@local
+ get_datapage r3, r0
mtlr r12
addi r3,r3,CFG_SYSCALL_MAP32
cmpli cr0,r4,0
@@ -74,7 +53,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
.cfi_startproc
mflr r12
.cfi_register lr,r12
- bl __get_datapage@local
+ get_datapage r3, r0
lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
lwz r3,CFG_TB_TICKS_PER_SEC(r3)
mtlr r12
diff --git a/arch/powerpc/kernel/vdso32/datapage.h b/arch/powerpc/kernel/vdso32/datapage.h
new file mode 100644
index 000000000000..ad96256be090
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/datapage.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+.macro get_datapage ptr, tmp
+ bcl 20,31,888f
+888:
+ mflr \ptr
+ addi \ptr, \ptr, __kernel_datapage_offset - 888b
+ lwz \tmp, 0(\ptr)
+ add \ptr, \tmp, \ptr
+.endm
+
+
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index e10098cde89c..91a58f01dcd5 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -12,6 +12,8 @@
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
+#include "datapage.h"
+
/* Offset for the low 32-bit part of a field of long type */
#ifdef CONFIG_PPC64
#define LOPART 4
@@ -35,8 +37,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
mr r10,r3 /* r10 saves tv */
mr r11,r4 /* r11 saves tz */
- bl __get_datapage@local /* get data page */
- mr r9, r3 /* datapage ptr in r9 */
+ get_datapage r9, r0
cmplwi r10,0 /* check if tv is NULL */
beq 3f
lis r7,1000000@ha /* load up USEC_PER_SEC */
@@ -82,8 +83,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
mflr r12 /* r12 saves lr */
.cfi_register lr,r12
mr r11,r4 /* r11 saves tp */
- bl __get_datapage@local /* get data page */
- mr r9,r3 /* datapage ptr in r9 */
+ get_datapage r9, r0
lis r7,NSEC_PER_SEC@h /* want nanoseconds */
ori r7,r7,NSEC_PER_SEC@l
beq cr5,70f
@@ -235,8 +235,7 @@ V_FUNCTION_BEGIN(__kernel_time)
.cfi_register lr,r12
mr r11,r3 /* r11 holds t */
- bl __get_datapage@local
- mr r9, r3 /* datapage ptr in r9 */
+ get_datapage r9, r0
lwz r3,STAMP_XTIME+TSPEC_TV_SEC(r9)
--
2.13.3
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH] powerpc/vdso32: inline __get_datapage()
@ 2019-08-16 14:48 ` Christophe Leroy
0 siblings, 0 replies; 13+ messages in thread
From: Christophe Leroy @ 2019-08-16 14:48 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
__get_datapage() is only a few instructions to retrieve the
address of the page where the kernel stores data to the VDSO.
By inlining this function into its users, a bl/blr pair and
a mflr/mtlr pair is avoided, plus a few reg moves.
The improvement is noticeable (about 55 nsec/call on an 8xx)
vdsotest before the patch:
gettimeofday: vdso: 731 nsec/call
clock-gettime-realtime-coarse: vdso: 668 nsec/call
clock-gettime-monotonic-coarse: vdso: 745 nsec/call
vdsotest after the patch:
gettimeofday: vdso: 677 nsec/call
clock-gettime-realtime-coarse: vdso: 613 nsec/call
clock-gettime-monotonic-coarse: vdso: 690 nsec/call
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++-----
arch/powerpc/kernel/vdso32/datapage.S | 29 ++++-------------------------
arch/powerpc/kernel/vdso32/datapage.h | 12 ++++++++++++
arch/powerpc/kernel/vdso32/gettimeofday.S | 11 +++++------
4 files changed, 26 insertions(+), 36 deletions(-)
create mode 100644 arch/powerpc/kernel/vdso32/datapage.h
diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
index 7f882e7b9f43..e9453837e4ee 100644
--- a/arch/powerpc/kernel/vdso32/cacheflush.S
+++ b/arch/powerpc/kernel/vdso32/cacheflush.S
@@ -10,6 +10,8 @@
#include <asm/vdso.h>
#include <asm/asm-offsets.h>
+#include "datapage.h"
+
.text
/*
@@ -24,14 +26,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
.cfi_startproc
mflr r12
.cfi_register lr,r12
- mr r11,r3
- bl __get_datapage@local
+ get_datapage r10, r0
mtlr r12
- mr r10,r3
lwz r7,CFG_DCACHE_BLOCKSZ(r10)
addi r5,r7,-1
- andc r6,r11,r5 /* round low to line bdy */
+ andc r6,r3,r5 /* round low to line bdy */
subf r8,r6,r4 /* compute length */
add r8,r8,r5 /* ensure we get enough */
lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
@@ -48,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
lwz r7,CFG_ICACHE_BLOCKSZ(r10)
addi r5,r7,-1
- andc r6,r11,r5 /* round low to line bdy */
+ andc r6,r3,r5 /* round low to line bdy */
subf r8,r6,r4 /* compute length */
add r8,r8,r5
lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
index 6984125b9fc0..d480d2d4a3fe 100644
--- a/arch/powerpc/kernel/vdso32/datapage.S
+++ b/arch/powerpc/kernel/vdso32/datapage.S
@@ -11,34 +11,13 @@
#include <asm/unistd.h>
#include <asm/vdso.h>
+#include "datapage.h"
+
.text
.global __kernel_datapage_offset;
__kernel_datapage_offset:
.long 0
-V_FUNCTION_BEGIN(__get_datapage)
- .cfi_startproc
- /* We don't want that exposed or overridable as we want other objects
- * to be able to bl directly to here
- */
- .protected __get_datapage
- .hidden __get_datapage
-
- mflr r0
- .cfi_register lr,r0
-
- bcl 20,31,data_page_branch
-data_page_branch:
- mflr r3
- mtlr r0
- addi r3, r3, __kernel_datapage_offset-data_page_branch
- lwz r0,0(r3)
- .cfi_restore lr
- add r3,r0,r3
- blr
- .cfi_endproc
-V_FUNCTION_END(__get_datapage)
-
/*
* void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
*
@@ -53,7 +32,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
mflr r12
.cfi_register lr,r12
mr r4,r3
- bl __get_datapage@local
+ get_datapage r3, r0
mtlr r12
addi r3,r3,CFG_SYSCALL_MAP32
cmpli cr0,r4,0
@@ -74,7 +53,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
.cfi_startproc
mflr r12
.cfi_register lr,r12
- bl __get_datapage@local
+ get_datapage r3, r0
lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
lwz r3,CFG_TB_TICKS_PER_SEC(r3)
mtlr r12
diff --git a/arch/powerpc/kernel/vdso32/datapage.h b/arch/powerpc/kernel/vdso32/datapage.h
new file mode 100644
index 000000000000..ad96256be090
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/datapage.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+.macro get_datapage ptr, tmp
+ bcl 20,31,888f
+888:
+ mflr \ptr
+ addi \ptr, \ptr, __kernel_datapage_offset - 888b
+ lwz \tmp, 0(\ptr)
+ add \ptr, \tmp, \ptr
+.endm
+
+
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index e10098cde89c..91a58f01dcd5 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -12,6 +12,8 @@
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
+#include "datapage.h"
+
/* Offset for the low 32-bit part of a field of long type */
#ifdef CONFIG_PPC64
#define LOPART 4
@@ -35,8 +37,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
mr r10,r3 /* r10 saves tv */
mr r11,r4 /* r11 saves tz */
- bl __get_datapage@local /* get data page */
- mr r9, r3 /* datapage ptr in r9 */
+ get_datapage r9, r0
cmplwi r10,0 /* check if tv is NULL */
beq 3f
lis r7,1000000@ha /* load up USEC_PER_SEC */
@@ -82,8 +83,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
mflr r12 /* r12 saves lr */
.cfi_register lr,r12
mr r11,r4 /* r11 saves tp */
- bl __get_datapage@local /* get data page */
- mr r9,r3 /* datapage ptr in r9 */
+ get_datapage r9, r0
lis r7,NSEC_PER_SEC@h /* want nanoseconds */
ori r7,r7,NSEC_PER_SEC@l
beq cr5,70f
@@ -235,8 +235,7 @@ V_FUNCTION_BEGIN(__kernel_time)
.cfi_register lr,r12
mr r11,r3 /* r11 holds t */
- bl __get_datapage@local
- mr r9, r3 /* datapage ptr in r9 */
+ get_datapage r9, r0
lwz r3,STAMP_XTIME+TSPEC_TV_SEC(r9)
--
2.13.3
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH] powerpc/vdso32: inline __get_datapage()
2019-08-16 14:48 ` Christophe Leroy
@ 2019-08-21 4:37 ` Benjamin Herrenschmidt
-1 siblings, 0 replies; 13+ messages in thread
From: Benjamin Herrenschmidt @ 2019-08-21 4:37 UTC (permalink / raw)
To: Christophe Leroy, Paul Mackerras, Michael Ellerman
Cc: linux-kernel, linuxppc-dev
On Fri, 2019-08-16 at 14:48 +0000, Christophe Leroy wrote:
> __get_datapage() is only a few instructions to retrieve the
> address of the page where the kernel stores data to the VDSO.
>
> By inlining this function into its users, a bl/blr pair and
> a mflr/mtlr pair is avoided, plus a few reg moves.
>
> The improvement is noticeable (about 55 nsec/call on an 8xx)
Interesting... would be worth doing the same on vdso64 no ?
> vdsotest before the patch:
> gettimeofday: vdso: 731 nsec/call
> clock-gettime-realtime-coarse: vdso: 668 nsec/call
> clock-gettime-monotonic-coarse: vdso: 745 nsec/call
>
> vdsotest after the patch:
> gettimeofday: vdso: 677 nsec/call
> clock-gettime-realtime-coarse: vdso: 613 nsec/call
> clock-gettime-monotonic-coarse: vdso: 690 nsec/call
>
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> ---
> arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++-----
> arch/powerpc/kernel/vdso32/datapage.S | 29 ++++-----------------
> --------
> arch/powerpc/kernel/vdso32/datapage.h | 12 ++++++++++++
> arch/powerpc/kernel/vdso32/gettimeofday.S | 11 +++++------
> 4 files changed, 26 insertions(+), 36 deletions(-)
> create mode 100644 arch/powerpc/kernel/vdso32/datapage.h
>
> diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S
> b/arch/powerpc/kernel/vdso32/cacheflush.S
> index 7f882e7b9f43..e9453837e4ee 100644
> --- a/arch/powerpc/kernel/vdso32/cacheflush.S
> +++ b/arch/powerpc/kernel/vdso32/cacheflush.S
> @@ -10,6 +10,8 @@
> #include <asm/vdso.h>
> #include <asm/asm-offsets.h>
>
> +#include "datapage.h"
> +
> .text
>
> /*
> @@ -24,14 +26,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
> .cfi_startproc
> mflr r12
> .cfi_register lr,r12
> - mr r11,r3
> - bl __get_datapage@local
> + get_datapage r10, r0
> mtlr r12
> - mr r10,r3
>
> lwz r7,CFG_DCACHE_BLOCKSZ(r10)
> addi r5,r7,-1
> - andc r6,r11,r5 /* round low to line bdy */
> + andc r6,r3,r5 /* round low to line bdy */
> subf r8,r6,r4 /* compute length */
> add r8,r8,r5 /* ensure we get enough */
> lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
> @@ -48,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
>
> lwz r7,CFG_ICACHE_BLOCKSZ(r10)
> addi r5,r7,-1
> - andc r6,r11,r5 /* round low to line bdy */
> + andc r6,r3,r5 /* round low to line bdy */
> subf r8,r6,r4 /* compute length */
> add r8,r8,r5
> lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
> diff --git a/arch/powerpc/kernel/vdso32/datapage.S
> b/arch/powerpc/kernel/vdso32/datapage.S
> index 6984125b9fc0..d480d2d4a3fe 100644
> --- a/arch/powerpc/kernel/vdso32/datapage.S
> +++ b/arch/powerpc/kernel/vdso32/datapage.S
> @@ -11,34 +11,13 @@
> #include <asm/unistd.h>
> #include <asm/vdso.h>
>
> +#include "datapage.h"
> +
> .text
> .global __kernel_datapage_offset;
> __kernel_datapage_offset:
> .long 0
>
> -V_FUNCTION_BEGIN(__get_datapage)
> - .cfi_startproc
> - /* We don't want that exposed or overridable as we want other
> objects
> - * to be able to bl directly to here
> - */
> - .protected __get_datapage
> - .hidden __get_datapage
> -
> - mflr r0
> - .cfi_register lr,r0
> -
> - bcl 20,31,data_page_branch
> -data_page_branch:
> - mflr r3
> - mtlr r0
> - addi r3, r3, __kernel_datapage_offset-data_page_branch
> - lwz r0,0(r3)
> - .cfi_restore lr
> - add r3,r0,r3
> - blr
> - .cfi_endproc
> -V_FUNCTION_END(__get_datapage)
> -
> /*
> * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
> *
> @@ -53,7 +32,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
> mflr r12
> .cfi_register lr,r12
> mr r4,r3
> - bl __get_datapage@local
> + get_datapage r3, r0
> mtlr r12
> addi r3,r3,CFG_SYSCALL_MAP32
> cmpli cr0,r4,0
> @@ -74,7 +53,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
> .cfi_startproc
> mflr r12
> .cfi_register lr,r12
> - bl __get_datapage@local
> + get_datapage r3, r0
> lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
> lwz r3,CFG_TB_TICKS_PER_SEC(r3)
> mtlr r12
> diff --git a/arch/powerpc/kernel/vdso32/datapage.h
> b/arch/powerpc/kernel/vdso32/datapage.h
> new file mode 100644
> index 000000000000..ad96256be090
> --- /dev/null
> +++ b/arch/powerpc/kernel/vdso32/datapage.h
> @@ -0,0 +1,12 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +
> +.macro get_datapage ptr, tmp
> + bcl 20,31,888f
> +888:
> + mflr \ptr
> + addi \ptr, \ptr, __kernel_datapage_offset - 888b
> + lwz \tmp, 0(\ptr)
> + add \ptr, \tmp, \ptr
> +.endm
> +
> +
> diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S
> b/arch/powerpc/kernel/vdso32/gettimeofday.S
> index e10098cde89c..91a58f01dcd5 100644
> --- a/arch/powerpc/kernel/vdso32/gettimeofday.S
> +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
> @@ -12,6 +12,8 @@
> #include <asm/asm-offsets.h>
> #include <asm/unistd.h>
>
> +#include "datapage.h"
> +
> /* Offset for the low 32-bit part of a field of long type */
> #ifdef CONFIG_PPC64
> #define LOPART 4
> @@ -35,8 +37,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
>
> mr r10,r3 /* r10 saves tv */
> mr r11,r4 /* r11 saves tz */
> - bl __get_datapage@local /* get data page */
> - mr r9, r3 /* datapage ptr in r9 */
> + get_datapage r9, r0
> cmplwi r10,0 /* check if tv is NULL */
> beq 3f
> lis r7,1000000@ha /* load up USEC_PER_SEC */
> @@ -82,8 +83,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
> mflr r12 /* r12 saves lr */
> .cfi_register lr,r12
> mr r11,r4 /* r11 saves tp */
> - bl __get_datapage@local /* get data page */
> - mr r9,r3 /* datapage ptr in r9 */
> + get_datapage r9, r0
> lis r7,NSEC_PER_SEC@h /* want nanoseconds */
> ori r7,r7,NSEC_PER_SEC@l
> beq cr5,70f
> @@ -235,8 +235,7 @@ V_FUNCTION_BEGIN(__kernel_time)
> .cfi_register lr,r12
>
> mr r11,r3 /* r11 holds t */
> - bl __get_datapage@local
> - mr r9, r3 /* datapage ptr in r9 */
> + get_datapage r9, r0
>
> lwz r3,STAMP_XTIME+TSPEC_TV_SEC(r9)
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] powerpc/vdso32: inline __get_datapage()
@ 2019-08-21 4:37 ` Benjamin Herrenschmidt
0 siblings, 0 replies; 13+ messages in thread
From: Benjamin Herrenschmidt @ 2019-08-21 4:37 UTC (permalink / raw)
To: Christophe Leroy, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
On Fri, 2019-08-16 at 14:48 +0000, Christophe Leroy wrote:
> __get_datapage() is only a few instructions to retrieve the
> address of the page where the kernel stores data to the VDSO.
>
> By inlining this function into its users, a bl/blr pair and
> a mflr/mtlr pair is avoided, plus a few reg moves.
>
> The improvement is noticeable (about 55 nsec/call on an 8xx)
Interesting... would be worth doing the same on vdso64 no ?
> vdsotest before the patch:
> gettimeofday: vdso: 731 nsec/call
> clock-gettime-realtime-coarse: vdso: 668 nsec/call
> clock-gettime-monotonic-coarse: vdso: 745 nsec/call
>
> vdsotest after the patch:
> gettimeofday: vdso: 677 nsec/call
> clock-gettime-realtime-coarse: vdso: 613 nsec/call
> clock-gettime-monotonic-coarse: vdso: 690 nsec/call
>
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> ---
> arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++-----
> arch/powerpc/kernel/vdso32/datapage.S | 29 ++++-----------------
> --------
> arch/powerpc/kernel/vdso32/datapage.h | 12 ++++++++++++
> arch/powerpc/kernel/vdso32/gettimeofday.S | 11 +++++------
> 4 files changed, 26 insertions(+), 36 deletions(-)
> create mode 100644 arch/powerpc/kernel/vdso32/datapage.h
>
> diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S
> b/arch/powerpc/kernel/vdso32/cacheflush.S
> index 7f882e7b9f43..e9453837e4ee 100644
> --- a/arch/powerpc/kernel/vdso32/cacheflush.S
> +++ b/arch/powerpc/kernel/vdso32/cacheflush.S
> @@ -10,6 +10,8 @@
> #include <asm/vdso.h>
> #include <asm/asm-offsets.h>
>
> +#include "datapage.h"
> +
> .text
>
> /*
> @@ -24,14 +26,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
> .cfi_startproc
> mflr r12
> .cfi_register lr,r12
> - mr r11,r3
> - bl __get_datapage@local
> + get_datapage r10, r0
> mtlr r12
> - mr r10,r3
>
> lwz r7,CFG_DCACHE_BLOCKSZ(r10)
> addi r5,r7,-1
> - andc r6,r11,r5 /* round low to line bdy */
> + andc r6,r3,r5 /* round low to line bdy */
> subf r8,r6,r4 /* compute length */
> add r8,r8,r5 /* ensure we get enough */
> lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
> @@ -48,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
>
> lwz r7,CFG_ICACHE_BLOCKSZ(r10)
> addi r5,r7,-1
> - andc r6,r11,r5 /* round low to line bdy */
> + andc r6,r3,r5 /* round low to line bdy */
> subf r8,r6,r4 /* compute length */
> add r8,r8,r5
> lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
> diff --git a/arch/powerpc/kernel/vdso32/datapage.S
> b/arch/powerpc/kernel/vdso32/datapage.S
> index 6984125b9fc0..d480d2d4a3fe 100644
> --- a/arch/powerpc/kernel/vdso32/datapage.S
> +++ b/arch/powerpc/kernel/vdso32/datapage.S
> @@ -11,34 +11,13 @@
> #include <asm/unistd.h>
> #include <asm/vdso.h>
>
> +#include "datapage.h"
> +
> .text
> .global __kernel_datapage_offset;
> __kernel_datapage_offset:
> .long 0
>
> -V_FUNCTION_BEGIN(__get_datapage)
> - .cfi_startproc
> - /* We don't want that exposed or overridable as we want other
> objects
> - * to be able to bl directly to here
> - */
> - .protected __get_datapage
> - .hidden __get_datapage
> -
> - mflr r0
> - .cfi_register lr,r0
> -
> - bcl 20,31,data_page_branch
> -data_page_branch:
> - mflr r3
> - mtlr r0
> - addi r3, r3, __kernel_datapage_offset-data_page_branch
> - lwz r0,0(r3)
> - .cfi_restore lr
> - add r3,r0,r3
> - blr
> - .cfi_endproc
> -V_FUNCTION_END(__get_datapage)
> -
> /*
> * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
> *
> @@ -53,7 +32,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
> mflr r12
> .cfi_register lr,r12
> mr r4,r3
> - bl __get_datapage@local
> + get_datapage r3, r0
> mtlr r12
> addi r3,r3,CFG_SYSCALL_MAP32
> cmpli cr0,r4,0
> @@ -74,7 +53,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
> .cfi_startproc
> mflr r12
> .cfi_register lr,r12
> - bl __get_datapage@local
> + get_datapage r3, r0
> lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
> lwz r3,CFG_TB_TICKS_PER_SEC(r3)
> mtlr r12
> diff --git a/arch/powerpc/kernel/vdso32/datapage.h
> b/arch/powerpc/kernel/vdso32/datapage.h
> new file mode 100644
> index 000000000000..ad96256be090
> --- /dev/null
> +++ b/arch/powerpc/kernel/vdso32/datapage.h
> @@ -0,0 +1,12 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +
> +.macro get_datapage ptr, tmp
> + bcl 20,31,888f
> +888:
> + mflr \ptr
> + addi \ptr, \ptr, __kernel_datapage_offset - 888b
> + lwz \tmp, 0(\ptr)
> + add \ptr, \tmp, \ptr
> +.endm
> +
> +
> diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S
> b/arch/powerpc/kernel/vdso32/gettimeofday.S
> index e10098cde89c..91a58f01dcd5 100644
> --- a/arch/powerpc/kernel/vdso32/gettimeofday.S
> +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
> @@ -12,6 +12,8 @@
> #include <asm/asm-offsets.h>
> #include <asm/unistd.h>
>
> +#include "datapage.h"
> +
> /* Offset for the low 32-bit part of a field of long type */
> #ifdef CONFIG_PPC64
> #define LOPART 4
> @@ -35,8 +37,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
>
> mr r10,r3 /* r10 saves tv */
> mr r11,r4 /* r11 saves tz */
> - bl __get_datapage@local /* get data page */
> - mr r9, r3 /* datapage ptr in r9 */
> + get_datapage r9, r0
> cmplwi r10,0 /* check if tv is NULL */
> beq 3f
> lis r7,1000000@ha /* load up USEC_PER_SEC */
> @@ -82,8 +83,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
> mflr r12 /* r12 saves lr */
> .cfi_register lr,r12
> mr r11,r4 /* r11 saves tp */
> - bl __get_datapage@local /* get data page */
> - mr r9,r3 /* datapage ptr in r9 */
> + get_datapage r9, r0
> lis r7,NSEC_PER_SEC@h /* want nanoseconds */
> ori r7,r7,NSEC_PER_SEC@l
> beq cr5,70f
> @@ -235,8 +235,7 @@ V_FUNCTION_BEGIN(__kernel_time)
> .cfi_register lr,r12
>
> mr r11,r3 /* r11 holds t */
> - bl __get_datapage@local
> - mr r9, r3 /* datapage ptr in r9 */
> + get_datapage r9, r0
>
> lwz r3,STAMP_XTIME+TSPEC_TV_SEC(r9)
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH] powerpc/vdso64: inline __get_datapage()
2019-08-21 4:37 ` Benjamin Herrenschmidt
(?)
@ 2019-08-21 9:29 ` Santosh Sivaraj
2019-08-21 9:46 ` Christophe Leroy
2019-08-21 11:44 ` Segher Boessenkool
-1 siblings, 2 replies; 13+ messages in thread
From: Santosh Sivaraj @ 2019-08-21 9:29 UTC (permalink / raw)
To: Benjamin Herrenschmidt; +Cc: linuxppc-dev
__get_datapage() is only a few instructions to retrieve the
address of the page where the kernel stores data to the VDSO.
By inlining this function into its users, a bl/blr pair and
a mflr/mtlr pair is avoided, plus a few reg moves.
clock-gettime-monotonic: syscall: 514 nsec/call 396 nsec/call
clock-gettime-monotonic: libc: 25 nsec/call 24 nsec/call
clock-gettime-monotonic: vdso: 20 nsec/call 20 nsec/call
clock-getres-monotonic: syscall: 347 nsec/call 372 nsec/call
clock-getres-monotonic: libc: 19 nsec/call 19 nsec/call
clock-getres-monotonic: vdso: 10 nsec/call 10 nsec/call
clock-gettime-monotonic-coarse: syscall: 511 nsec/call 396 nsec/call
clock-gettime-monotonic-coarse: libc: 23 nsec/call 21 nsec/call
clock-gettime-monotonic-coarse: vdso: 15 nsec/call 13 nsec/call
clock-gettime-realtime: syscall: 526 nsec/call 405 nsec/call
clock-gettime-realtime: libc: 24 nsec/call 23 nsec/call
clock-gettime-realtime: vdso: 18 nsec/call 18 nsec/call
clock-getres-realtime: syscall: 342 nsec/call 372 nsec/call
clock-getres-realtime: libc: 19 nsec/call 19 nsec/call
clock-getres-realtime: vdso: 10 nsec/call 10 nsec/call
clock-gettime-realtime-coarse: syscall: 515 nsec/call 373 nsec/call
clock-gettime-realtime-coarse: libc: 23 nsec/call 22 nsec/call
clock-gettime-realtime-coarse: vdso: 14 nsec/call 13 nsec/call
Based on the patch by Christophe Leroy <christophe.leroy@c-s.fr> for vdso32.
Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
---
except for a couple of calls (1 or 2 nsec reduction), there are no
improvements in the call times. Or is 10 nsec the minimum granularity??
So I don't know if its even worth updating vdso64 except to keep vdso32 and
vdso64 equal.
arch/powerpc/kernel/vdso64/cacheflush.S | 10 ++++----
arch/powerpc/kernel/vdso64/datapage.S | 29 ++++-------------------
arch/powerpc/kernel/vdso64/datapage.h | 10 ++++++++
arch/powerpc/kernel/vdso64/gettimeofday.S | 8 ++++---
4 files changed, 24 insertions(+), 33 deletions(-)
create mode 100644 arch/powerpc/kernel/vdso64/datapage.h
diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S
index 3f92561a64c4..30e8b0d29bea 100644
--- a/arch/powerpc/kernel/vdso64/cacheflush.S
+++ b/arch/powerpc/kernel/vdso64/cacheflush.S
@@ -10,6 +10,8 @@
#include <asm/vdso.h>
#include <asm/asm-offsets.h>
+#include "datapage.h"
+
.text
/*
@@ -24,14 +26,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
.cfi_startproc
mflr r12
.cfi_register lr,r12
- mr r11,r3
- bl V_LOCAL_FUNC(__get_datapage)
+ get_datapage r11, r0
mtlr r12
- mr r10,r3
lwz r7,CFG_DCACHE_BLOCKSZ(r10)
addi r5,r7,-1
- andc r6,r11,r5 /* round low to line bdy */
+ andc r6,r3,r5 /* round low to line bdy */
subf r8,r6,r4 /* compute length */
add r8,r8,r5 /* ensure we get enough */
lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
@@ -48,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
lwz r7,CFG_ICACHE_BLOCKSZ(r10)
addi r5,r7,-1
- andc r6,r11,r5 /* round low to line bdy */
+ andc r6,r3,r5 /* round low to line bdy */
subf r8,r6,r4 /* compute length */
add r8,r8,r5
lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S
index dc84f5ae3802..8712f57c931c 100644
--- a/arch/powerpc/kernel/vdso64/datapage.S
+++ b/arch/powerpc/kernel/vdso64/datapage.S
@@ -11,34 +11,13 @@
#include <asm/unistd.h>
#include <asm/vdso.h>
+#include "datapage.h"
+
.text
.global __kernel_datapage_offset;
__kernel_datapage_offset:
.long 0
-V_FUNCTION_BEGIN(__get_datapage)
- .cfi_startproc
- /* We don't want that exposed or overridable as we want other objects
- * to be able to bl directly to here
- */
- .protected __get_datapage
- .hidden __get_datapage
-
- mflr r0
- .cfi_register lr,r0
-
- bcl 20,31,data_page_branch
-data_page_branch:
- mflr r3
- mtlr r0
- addi r3, r3, __kernel_datapage_offset-data_page_branch
- lwz r0,0(r3)
- .cfi_restore lr
- add r3,r0,r3
- blr
- .cfi_endproc
-V_FUNCTION_END(__get_datapage)
-
/*
* void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
*
@@ -53,7 +32,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
mflr r12
.cfi_register lr,r12
mr r4,r3
- bl V_LOCAL_FUNC(__get_datapage)
+ get_datapage r3, r0
mtlr r12
addi r3,r3,CFG_SYSCALL_MAP64
cmpldi cr0,r4,0
@@ -75,7 +54,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
.cfi_startproc
mflr r12
.cfi_register lr,r12
- bl V_LOCAL_FUNC(__get_datapage)
+ get_datapage r3, r0
ld r3,CFG_TB_TICKS_PER_SEC(r3)
mtlr r12
crclr cr0*4+so
diff --git a/arch/powerpc/kernel/vdso64/datapage.h b/arch/powerpc/kernel/vdso64/datapage.h
new file mode 100644
index 000000000000..f2f0da0f65f3
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/datapage.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+.macro get_datapage ptr, tmp
+ bcl 20,31,888f
+888:
+ mflr \ptr
+ addi \ptr, \ptr, __kernel_datapage_offset - 888b
+ lwz \tmp, 0(\ptr)
+ add \ptr, \tmp, \ptr
+.endm
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index 07bfe33fe874..7bcc879392cc 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -12,6 +12,8 @@
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
+#include "datapage.h"
+
.text
/*
* Exact prototype of gettimeofday
@@ -26,7 +28,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
mr r11,r3 /* r11 holds tv */
mr r10,r4 /* r10 holds tz */
- bl V_LOCAL_FUNC(__get_datapage) /* get data page */
+ get_datapage r3, r0
cmpldi r11,0 /* check if tv is NULL */
beq 2f
lis r7,1000000@ha /* load up USEC_PER_SEC */
@@ -71,7 +73,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
mflr r12 /* r12 saves lr */
.cfi_register lr,r12
mr r11,r4 /* r11 saves tp */
- bl V_LOCAL_FUNC(__get_datapage) /* get data page */
+ get_datapage r3, r0 /* get data page */
lis r7,NSEC_PER_SEC@h /* want nanoseconds */
ori r7,r7,NSEC_PER_SEC@l
beq cr5,70f
@@ -218,7 +220,7 @@ V_FUNCTION_BEGIN(__kernel_time)
.cfi_register lr,r12
mr r11,r3 /* r11 holds t */
- bl V_LOCAL_FUNC(__get_datapage)
+ get_datapage r3, r0
ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
--
2.21.0
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH] powerpc/vdso64: inline __get_datapage()
2019-08-21 9:29 ` [PATCH] powerpc/vdso64: " Santosh Sivaraj
@ 2019-08-21 9:46 ` Christophe Leroy
2019-08-21 11:20 ` Santosh Sivaraj
2019-08-21 15:58 ` Nathan Lynch
2019-08-21 11:44 ` Segher Boessenkool
1 sibling, 2 replies; 13+ messages in thread
From: Christophe Leroy @ 2019-08-21 9:46 UTC (permalink / raw)
To: Santosh Sivaraj, Benjamin Herrenschmidt; +Cc: linuxppc-dev
Le 21/08/2019 à 11:29, Santosh Sivaraj a écrit :
> __get_datapage() is only a few instructions to retrieve the
> address of the page where the kernel stores data to the VDSO.
>
> By inlining this function into its users, a bl/blr pair and
> a mflr/mtlr pair is avoided, plus a few reg moves.
>
> clock-gettime-monotonic: syscall: 514 nsec/call 396 nsec/call
> clock-gettime-monotonic: libc: 25 nsec/call 24 nsec/call
> clock-gettime-monotonic: vdso: 20 nsec/call 20 nsec/call
> clock-getres-monotonic: syscall: 347 nsec/call 372 nsec/call
> clock-getres-monotonic: libc: 19 nsec/call 19 nsec/call
> clock-getres-monotonic: vdso: 10 nsec/call 10 nsec/call
> clock-gettime-monotonic-coarse: syscall: 511 nsec/call 396 nsec/call
> clock-gettime-monotonic-coarse: libc: 23 nsec/call 21 nsec/call
> clock-gettime-monotonic-coarse: vdso: 15 nsec/call 13 nsec/call
> clock-gettime-realtime: syscall: 526 nsec/call 405 nsec/call
> clock-gettime-realtime: libc: 24 nsec/call 23 nsec/call
> clock-gettime-realtime: vdso: 18 nsec/call 18 nsec/call
> clock-getres-realtime: syscall: 342 nsec/call 372 nsec/call
> clock-getres-realtime: libc: 19 nsec/call 19 nsec/call
> clock-getres-realtime: vdso: 10 nsec/call 10 nsec/call
> clock-gettime-realtime-coarse: syscall: 515 nsec/call 373 nsec/call
> clock-gettime-realtime-coarse: libc: 23 nsec/call 22 nsec/call
> clock-gettime-realtime-coarse: vdso: 14 nsec/call 13 nsec/call
I think you should only put the measurements on vdso calls, and only the
ones that are impacted by the change. For exemple, getres function
doesn't use __get_datapage so showing it here is pointless.
gettimeofday should be shown there as it uses __get_datapage()
>
> Based on the patch by Christophe Leroy <christophe.leroy@c-s.fr> for vdso32.
>
> Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
> ---
>
> except for a couple of calls (1 or 2 nsec reduction), there are no
> improvements in the call times. Or is 10 nsec the minimum granularity??
Maybe the ones that show no improvements are the ones that don't use
__get_datapage() at all ...
>
> So I don't know if its even worth updating vdso64 except to keep vdso32 and
> vdso64 equal.
2ns on a 15ns call is 13% so it is worth it I think.
Christophe
>
>
> arch/powerpc/kernel/vdso64/cacheflush.S | 10 ++++----
> arch/powerpc/kernel/vdso64/datapage.S | 29 ++++-------------------
> arch/powerpc/kernel/vdso64/datapage.h | 10 ++++++++
> arch/powerpc/kernel/vdso64/gettimeofday.S | 8 ++++---
> 4 files changed, 24 insertions(+), 33 deletions(-)
> create mode 100644 arch/powerpc/kernel/vdso64/datapage.h
>
> diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S
> index 3f92561a64c4..30e8b0d29bea 100644
> --- a/arch/powerpc/kernel/vdso64/cacheflush.S
> +++ b/arch/powerpc/kernel/vdso64/cacheflush.S
> @@ -10,6 +10,8 @@
> #include <asm/vdso.h>
> #include <asm/asm-offsets.h>
>
> +#include "datapage.h"
> +
> .text
>
> /*
> @@ -24,14 +26,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
> .cfi_startproc
> mflr r12
> .cfi_register lr,r12
> - mr r11,r3
> - bl V_LOCAL_FUNC(__get_datapage)
> + get_datapage r11, r0
> mtlr r12
> - mr r10,r3
>
> lwz r7,CFG_DCACHE_BLOCKSZ(r10)
> addi r5,r7,-1
> - andc r6,r11,r5 /* round low to line bdy */
> + andc r6,r3,r5 /* round low to line bdy */
> subf r8,r6,r4 /* compute length */
> add r8,r8,r5 /* ensure we get enough */
> lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
> @@ -48,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
>
> lwz r7,CFG_ICACHE_BLOCKSZ(r10)
> addi r5,r7,-1
> - andc r6,r11,r5 /* round low to line bdy */
> + andc r6,r3,r5 /* round low to line bdy */
> subf r8,r6,r4 /* compute length */
> add r8,r8,r5
> lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
> diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S
> index dc84f5ae3802..8712f57c931c 100644
> --- a/arch/powerpc/kernel/vdso64/datapage.S
> +++ b/arch/powerpc/kernel/vdso64/datapage.S
> @@ -11,34 +11,13 @@
> #include <asm/unistd.h>
> #include <asm/vdso.h>
>
> +#include "datapage.h"
> +
> .text
> .global __kernel_datapage_offset;
> __kernel_datapage_offset:
> .long 0
>
> -V_FUNCTION_BEGIN(__get_datapage)
> - .cfi_startproc
> - /* We don't want that exposed or overridable as we want other objects
> - * to be able to bl directly to here
> - */
> - .protected __get_datapage
> - .hidden __get_datapage
> -
> - mflr r0
> - .cfi_register lr,r0
> -
> - bcl 20,31,data_page_branch
> -data_page_branch:
> - mflr r3
> - mtlr r0
> - addi r3, r3, __kernel_datapage_offset-data_page_branch
> - lwz r0,0(r3)
> - .cfi_restore lr
> - add r3,r0,r3
> - blr
> - .cfi_endproc
> -V_FUNCTION_END(__get_datapage)
> -
> /*
> * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
> *
> @@ -53,7 +32,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
> mflr r12
> .cfi_register lr,r12
> mr r4,r3
> - bl V_LOCAL_FUNC(__get_datapage)
> + get_datapage r3, r0
> mtlr r12
> addi r3,r3,CFG_SYSCALL_MAP64
> cmpldi cr0,r4,0
> @@ -75,7 +54,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
> .cfi_startproc
> mflr r12
> .cfi_register lr,r12
> - bl V_LOCAL_FUNC(__get_datapage)
> + get_datapage r3, r0
> ld r3,CFG_TB_TICKS_PER_SEC(r3)
> mtlr r12
> crclr cr0*4+so
> diff --git a/arch/powerpc/kernel/vdso64/datapage.h b/arch/powerpc/kernel/vdso64/datapage.h
> new file mode 100644
> index 000000000000..f2f0da0f65f3
> --- /dev/null
> +++ b/arch/powerpc/kernel/vdso64/datapage.h
> @@ -0,0 +1,10 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +
> +.macro get_datapage ptr, tmp
> + bcl 20,31,888f
> +888:
> + mflr \ptr
> + addi \ptr, \ptr, __kernel_datapage_offset - 888b
> + lwz \tmp, 0(\ptr)
> + add \ptr, \tmp, \ptr
> +.endm
> diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
> index 07bfe33fe874..7bcc879392cc 100644
> --- a/arch/powerpc/kernel/vdso64/gettimeofday.S
> +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
> @@ -12,6 +12,8 @@
> #include <asm/asm-offsets.h>
> #include <asm/unistd.h>
>
> +#include "datapage.h"
> +
> .text
> /*
> * Exact prototype of gettimeofday
> @@ -26,7 +28,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
>
> mr r11,r3 /* r11 holds tv */
> mr r10,r4 /* r10 holds tz */
> - bl V_LOCAL_FUNC(__get_datapage) /* get data page */
> + get_datapage r3, r0
> cmpldi r11,0 /* check if tv is NULL */
> beq 2f
> lis r7,1000000@ha /* load up USEC_PER_SEC */
> @@ -71,7 +73,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
> mflr r12 /* r12 saves lr */
> .cfi_register lr,r12
> mr r11,r4 /* r11 saves tp */
> - bl V_LOCAL_FUNC(__get_datapage) /* get data page */
> + get_datapage r3, r0 /* get data page */
> lis r7,NSEC_PER_SEC@h /* want nanoseconds */
> ori r7,r7,NSEC_PER_SEC@l
> beq cr5,70f
> @@ -218,7 +220,7 @@ V_FUNCTION_BEGIN(__kernel_time)
> .cfi_register lr,r12
>
> mr r11,r3 /* r11 holds t */
> - bl V_LOCAL_FUNC(__get_datapage)
> + get_datapage r3, r0
>
> ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
>
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] powerpc/vdso64: inline __get_datapage()
2019-08-21 9:46 ` Christophe Leroy
@ 2019-08-21 11:20 ` Santosh Sivaraj
2019-08-21 15:58 ` Nathan Lynch
1 sibling, 0 replies; 13+ messages in thread
From: Santosh Sivaraj @ 2019-08-21 11:20 UTC (permalink / raw)
To: Christophe Leroy, Benjamin Herrenschmidt; +Cc: linuxppc-dev
Christophe Leroy <christophe.leroy@c-s.fr> writes:
> Le 21/08/2019 à 11:29, Santosh Sivaraj a écrit :
>> __get_datapage() is only a few instructions to retrieve the
>> address of the page where the kernel stores data to the VDSO.
>>
>> By inlining this function into its users, a bl/blr pair and
>> a mflr/mtlr pair is avoided, plus a few reg moves.
>>
>> clock-gettime-monotonic: syscall: 514 nsec/call 396 nsec/call
>> clock-gettime-monotonic: libc: 25 nsec/call 24 nsec/call
>> clock-gettime-monotonic: vdso: 20 nsec/call 20 nsec/call
>> clock-getres-monotonic: syscall: 347 nsec/call 372 nsec/call
>> clock-getres-monotonic: libc: 19 nsec/call 19 nsec/call
>> clock-getres-monotonic: vdso: 10 nsec/call 10 nsec/call
>> clock-gettime-monotonic-coarse: syscall: 511 nsec/call 396 nsec/call
>> clock-gettime-monotonic-coarse: libc: 23 nsec/call 21 nsec/call
>> clock-gettime-monotonic-coarse: vdso: 15 nsec/call 13 nsec/call
>> clock-gettime-realtime: syscall: 526 nsec/call 405 nsec/call
>> clock-gettime-realtime: libc: 24 nsec/call 23 nsec/call
>> clock-gettime-realtime: vdso: 18 nsec/call 18 nsec/call
>> clock-getres-realtime: syscall: 342 nsec/call 372 nsec/call
>> clock-getres-realtime: libc: 19 nsec/call 19 nsec/call
>> clock-getres-realtime: vdso: 10 nsec/call 10 nsec/call
>> clock-gettime-realtime-coarse: syscall: 515 nsec/call 373 nsec/call
>> clock-gettime-realtime-coarse: libc: 23 nsec/call 22 nsec/call
>> clock-gettime-realtime-coarse: vdso: 14 nsec/call 13 nsec/call
>
> I think you should only put the measurements on vdso calls, and only the
> ones that are impacted by the change. For exemple, getres function
> doesn't use __get_datapage so showing it here is pointless.
>
> gettimeofday should be shown there as it uses __get_datapage()
>
>
>>
>> Based on the patch by Christophe Leroy <christophe.leroy@c-s.fr> for vdso32.
>>
>> Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
>> ---
>>
>> except for a couple of calls (1 or 2 nsec reduction), there are no
>> improvements in the call times. Or is 10 nsec the minimum granularity??
>
> Maybe the ones that show no improvements are the ones that don't use
> __get_datapage() at all ...
Yes makes sense.
>
>>
>> So I don't know if its even worth updating vdso64 except to keep vdso32 and
>> vdso64 equal.
>
> 2ns on a 15ns call is 13% so it is worth it I think.
true. Since datapage.h is the same for both 32 and 64, may be we should put
it in include/asm.
Thanks,
Santosh
>
> Christophe
>
>
>>
>>
>> arch/powerpc/kernel/vdso64/cacheflush.S | 10 ++++----
>> arch/powerpc/kernel/vdso64/datapage.S | 29 ++++-------------------
>> arch/powerpc/kernel/vdso64/datapage.h | 10 ++++++++
>> arch/powerpc/kernel/vdso64/gettimeofday.S | 8 ++++---
>> 4 files changed, 24 insertions(+), 33 deletions(-)
>> create mode 100644 arch/powerpc/kernel/vdso64/datapage.h
>>
>> diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S
>> index 3f92561a64c4..30e8b0d29bea 100644
>> --- a/arch/powerpc/kernel/vdso64/cacheflush.S
>> +++ b/arch/powerpc/kernel/vdso64/cacheflush.S
>> @@ -10,6 +10,8 @@
>> #include <asm/vdso.h>
>> #include <asm/asm-offsets.h>
>>
>> +#include "datapage.h"
>> +
>> .text
>>
>> /*
>> @@ -24,14 +26,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
>> .cfi_startproc
>> mflr r12
>> .cfi_register lr,r12
>> - mr r11,r3
>> - bl V_LOCAL_FUNC(__get_datapage)
>> + get_datapage r11, r0
>> mtlr r12
>> - mr r10,r3
>>
>> lwz r7,CFG_DCACHE_BLOCKSZ(r10)
>> addi r5,r7,-1
>> - andc r6,r11,r5 /* round low to line bdy */
>> + andc r6,r3,r5 /* round low to line bdy */
>> subf r8,r6,r4 /* compute length */
>> add r8,r8,r5 /* ensure we get enough */
>> lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
>> @@ -48,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
>>
>> lwz r7,CFG_ICACHE_BLOCKSZ(r10)
>> addi r5,r7,-1
>> - andc r6,r11,r5 /* round low to line bdy */
>> + andc r6,r3,r5 /* round low to line bdy */
>> subf r8,r6,r4 /* compute length */
>> add r8,r8,r5
>> lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
>> diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S
>> index dc84f5ae3802..8712f57c931c 100644
>> --- a/arch/powerpc/kernel/vdso64/datapage.S
>> +++ b/arch/powerpc/kernel/vdso64/datapage.S
>> @@ -11,34 +11,13 @@
>> #include <asm/unistd.h>
>> #include <asm/vdso.h>
>>
>> +#include "datapage.h"
>> +
>> .text
>> .global __kernel_datapage_offset;
>> __kernel_datapage_offset:
>> .long 0
>>
>> -V_FUNCTION_BEGIN(__get_datapage)
>> - .cfi_startproc
>> - /* We don't want that exposed or overridable as we want other objects
>> - * to be able to bl directly to here
>> - */
>> - .protected __get_datapage
>> - .hidden __get_datapage
>> -
>> - mflr r0
>> - .cfi_register lr,r0
>> -
>> - bcl 20,31,data_page_branch
>> -data_page_branch:
>> - mflr r3
>> - mtlr r0
>> - addi r3, r3, __kernel_datapage_offset-data_page_branch
>> - lwz r0,0(r3)
>> - .cfi_restore lr
>> - add r3,r0,r3
>> - blr
>> - .cfi_endproc
>> -V_FUNCTION_END(__get_datapage)
>> -
>> /*
>> * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
>> *
>> @@ -53,7 +32,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
>> mflr r12
>> .cfi_register lr,r12
>> mr r4,r3
>> - bl V_LOCAL_FUNC(__get_datapage)
>> + get_datapage r3, r0
>> mtlr r12
>> addi r3,r3,CFG_SYSCALL_MAP64
>> cmpldi cr0,r4,0
>> @@ -75,7 +54,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
>> .cfi_startproc
>> mflr r12
>> .cfi_register lr,r12
>> - bl V_LOCAL_FUNC(__get_datapage)
>> + get_datapage r3, r0
>> ld r3,CFG_TB_TICKS_PER_SEC(r3)
>> mtlr r12
>> crclr cr0*4+so
>> diff --git a/arch/powerpc/kernel/vdso64/datapage.h b/arch/powerpc/kernel/vdso64/datapage.h
>> new file mode 100644
>> index 000000000000..f2f0da0f65f3
>> --- /dev/null
>> +++ b/arch/powerpc/kernel/vdso64/datapage.h
>> @@ -0,0 +1,10 @@
>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>> +
>> +.macro get_datapage ptr, tmp
>> + bcl 20,31,888f
>> +888:
>> + mflr \ptr
>> + addi \ptr, \ptr, __kernel_datapage_offset - 888b
>> + lwz \tmp, 0(\ptr)
>> + add \ptr, \tmp, \ptr
>> +.endm
>> diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
>> index 07bfe33fe874..7bcc879392cc 100644
>> --- a/arch/powerpc/kernel/vdso64/gettimeofday.S
>> +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
>> @@ -12,6 +12,8 @@
>> #include <asm/asm-offsets.h>
>> #include <asm/unistd.h>
>>
>> +#include "datapage.h"
>> +
>> .text
>> /*
>> * Exact prototype of gettimeofday
>> @@ -26,7 +28,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
>>
>> mr r11,r3 /* r11 holds tv */
>> mr r10,r4 /* r10 holds tz */
>> - bl V_LOCAL_FUNC(__get_datapage) /* get data page */
>> + get_datapage r3, r0
>> cmpldi r11,0 /* check if tv is NULL */
>> beq 2f
>> lis r7,1000000@ha /* load up USEC_PER_SEC */
>> @@ -71,7 +73,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
>> mflr r12 /* r12 saves lr */
>> .cfi_register lr,r12
>> mr r11,r4 /* r11 saves tp */
>> - bl V_LOCAL_FUNC(__get_datapage) /* get data page */
>> + get_datapage r3, r0 /* get data page */
>> lis r7,NSEC_PER_SEC@h /* want nanoseconds */
>> ori r7,r7,NSEC_PER_SEC@l
>> beq cr5,70f
>> @@ -218,7 +220,7 @@ V_FUNCTION_BEGIN(__kernel_time)
>> .cfi_register lr,r12
>>
>> mr r11,r3 /* r11 holds t */
>> - bl V_LOCAL_FUNC(__get_datapage)
>> + get_datapage r3, r0
>>
>> ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
>>
>>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] powerpc/vdso64: inline __get_datapage()
2019-08-21 9:29 ` [PATCH] powerpc/vdso64: " Santosh Sivaraj
2019-08-21 9:46 ` Christophe Leroy
@ 2019-08-21 11:44 ` Segher Boessenkool
2019-08-21 11:50 ` Christophe Leroy
1 sibling, 1 reply; 13+ messages in thread
From: Segher Boessenkool @ 2019-08-21 11:44 UTC (permalink / raw)
To: Santosh Sivaraj; +Cc: linuxppc-dev
Hi!
On Wed, Aug 21, 2019 at 02:59:59PM +0530, Santosh Sivaraj wrote:
> except for a couple of calls (1 or 2 nsec reduction), there are no
> improvements in the call times. Or is 10 nsec the minimum granularity??
>
> So I don't know if its even worth updating vdso64 except to keep vdso32 and
> vdso64 equal.
Calls are cheap, in principle... It is the LR stuff that can make it
slower on some cores, and a lot of calling sequence stuff may have
considerable overhead of course.
> +.macro get_datapage ptr, tmp
> + bcl 20,31,888f
> +888:
> + mflr \ptr
> + addi \ptr, \ptr, __kernel_datapage_offset - 888b
> + lwz \tmp, 0(\ptr)
> + add \ptr, \tmp, \ptr
> +.endm
(You can just write that as
bcl 20,31,$+4
mflr \ptr
etc. Useless labels are useless :-) )
One thing you might want to do to improve performance is to do this without
the bcl etc., because you cannot really hide the LR latency of that. But
that isn't very many ns either... Superscalar helps, OoO helps, but it is
mostly just that >100MHz helps ;-)
Segher
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] powerpc/vdso64: inline __get_datapage()
2019-08-21 11:44 ` Segher Boessenkool
@ 2019-08-21 11:50 ` Christophe Leroy
2019-08-21 12:15 ` Segher Boessenkool
0 siblings, 1 reply; 13+ messages in thread
From: Christophe Leroy @ 2019-08-21 11:50 UTC (permalink / raw)
To: Segher Boessenkool, Santosh Sivaraj; +Cc: linuxppc-dev
Le 21/08/2019 à 13:44, Segher Boessenkool a écrit :
> Hi!
>
> On Wed, Aug 21, 2019 at 02:59:59PM +0530, Santosh Sivaraj wrote:
>> except for a couple of calls (1 or 2 nsec reduction), there are no
>> improvements in the call times. Or is 10 nsec the minimum granularity??
>>
>> So I don't know if its even worth updating vdso64 except to keep vdso32 and
>> vdso64 equal.
>
> Calls are cheap, in principle... It is the LR stuff that can make it
> slower on some cores, and a lot of calling sequence stuff may have
> considerable overhead of course.
On an 8xx, a taken branch is 2 cycles and a non taken branch in 1 cycle
(+ the refetch if that was not the anticipate branch).
>
>> +.macro get_datapage ptr, tmp
>> + bcl 20,31,888f
>> +888:
>> + mflr \ptr
>> + addi \ptr, \ptr, __kernel_datapage_offset - 888b
>> + lwz \tmp, 0(\ptr)
>> + add \ptr, \tmp, \ptr
>> +.endm
>
> (You can just write that as
> bcl 20,31,$+4
> mflr \ptr
> etc. Useless labels are useless :-) )
Nice trick. Will use that.
>
> One thing you might want to do to improve performance is to do this without
> the bcl etc., because you cannot really hide the LR latency of that. But
> that isn't very many ns either... Superscalar helps, OoO helps, but it is
> mostly just that >100MHz helps ;-)
Good idea. Did you have a look at my vdso32 similar patch ?
https://patchwork.ozlabs.org/patch/1148274/
Do you have any idea on how to avoid that bcl/mflr stuff ?
Christophe
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] powerpc/vdso64: inline __get_datapage()
2019-08-21 11:50 ` Christophe Leroy
@ 2019-08-21 12:15 ` Segher Boessenkool
2019-08-21 16:23 ` Christophe Leroy
0 siblings, 1 reply; 13+ messages in thread
From: Segher Boessenkool @ 2019-08-21 12:15 UTC (permalink / raw)
To: Christophe Leroy; +Cc: Santosh Sivaraj, linuxppc-dev
On Wed, Aug 21, 2019 at 01:50:52PM +0200, Christophe Leroy wrote:
> Le 21/08/2019 à 13:44, Segher Boessenkool a écrit :
> >Calls are cheap, in principle... It is the LR stuff that can make it
> >slower on some cores, and a lot of calling sequence stuff may have
> >considerable overhead of course.
>
> On an 8xx, a taken branch is 2 cycles and a non taken branch in 1 cycle
> (+ the refetch if that was not the anticipate branch).
Yup. And on the big cores they are all 0 cycles, if correctly predicted.
(Taken branches end your fetch group, of course, there are small
inefficiencies everywhere, but that's about the gist of it).
> >>+.macro get_datapage ptr, tmp
> >>+ bcl 20,31,888f
> >>+888:
> >>+ mflr \ptr
> >>+ addi \ptr, \ptr, __kernel_datapage_offset - 888b
> >>+ lwz \tmp, 0(\ptr)
> >>+ add \ptr, \tmp, \ptr
> >>+.endm
> >
> >(You can just write that as
> > bcl 20,31,$+4
> > mflr \ptr
> >etc. Useless labels are useless :-) )
>
> Nice trick. Will use that.
Or .+4 if you like that syntax better... It's all the same thing.
> >One thing you might want to do to improve performance is to do this without
> >the bcl etc., because you cannot really hide the LR latency of that. But
> >that isn't very many ns either... Superscalar helps, OoO helps, but it is
> >mostly just that >100MHz helps ;-)
>
> Good idea. Did you have a look at my vdso32 similar patch ?
> https://patchwork.ozlabs.org/patch/1148274/
Yes, I saw it.
> Do you have any idea on how to avoid that bcl/mflr stuff ?
Do a load from some fixed address? Maybe an absolute address, even?
lwz r3,-12344(0) or similar (that address is in kernel space...)
There aren't many options, and certainly not many *good* options!
Segher
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] powerpc/vdso64: inline __get_datapage()
2019-08-21 9:46 ` Christophe Leroy
2019-08-21 11:20 ` Santosh Sivaraj
@ 2019-08-21 15:58 ` Nathan Lynch
1 sibling, 0 replies; 13+ messages in thread
From: Nathan Lynch @ 2019-08-21 15:58 UTC (permalink / raw)
To: Christophe Leroy; +Cc: Santosh Sivaraj, linuxppc-dev
Christophe Leroy <christophe.leroy@c-s.fr> writes:
> Le 21/08/2019 à 11:29, Santosh Sivaraj a écrit :
>> __get_datapage() is only a few instructions to retrieve the
>> address of the page where the kernel stores data to the VDSO.
>>
>> By inlining this function into its users, a bl/blr pair and
>> a mflr/mtlr pair is avoided, plus a few reg moves.
>>
>> clock-gettime-monotonic: syscall: 514 nsec/call 396 nsec/call
>> clock-gettime-monotonic: libc: 25 nsec/call 24 nsec/call
>> clock-gettime-monotonic: vdso: 20 nsec/call 20 nsec/call
>> clock-getres-monotonic: syscall: 347 nsec/call 372 nsec/call
>> clock-getres-monotonic: libc: 19 nsec/call 19 nsec/call
>> clock-getres-monotonic: vdso: 10 nsec/call 10 nsec/call
>> clock-gettime-monotonic-coarse: syscall: 511 nsec/call 396 nsec/call
>> clock-gettime-monotonic-coarse: libc: 23 nsec/call 21 nsec/call
>> clock-gettime-monotonic-coarse: vdso: 15 nsec/call 13 nsec/call
>> clock-gettime-realtime: syscall: 526 nsec/call 405 nsec/call
>> clock-gettime-realtime: libc: 24 nsec/call 23 nsec/call
>> clock-gettime-realtime: vdso: 18 nsec/call 18 nsec/call
>> clock-getres-realtime: syscall: 342 nsec/call 372 nsec/call
>> clock-getres-realtime: libc: 19 nsec/call 19 nsec/call
>> clock-getres-realtime: vdso: 10 nsec/call 10 nsec/call
>> clock-gettime-realtime-coarse: syscall: 515 nsec/call 373 nsec/call
>> clock-gettime-realtime-coarse: libc: 23 nsec/call 22 nsec/call
>> clock-gettime-realtime-coarse: vdso: 14 nsec/call 13 nsec/call
>
> I think you should only put the measurements on vdso calls, and only the
> ones that are impacted by the change. For exemple, getres function
> doesn't use __get_datapage so showing it here is pointless.
I agree with this point, but also, I would caution against using
vdsotest's benchmark function for anything like rigorous performance
analysis. The intention was to roughly confirm the VDSO's relative
performance vs the in-kernel implementations. Not to compare one VDSO
implementation of (say) clock_gettime to another.
I suggest using perf to confirm the expected effects of the change, if
possible.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] powerpc/vdso64: inline __get_datapage()
2019-08-21 12:15 ` Segher Boessenkool
@ 2019-08-21 16:23 ` Christophe Leroy
2019-08-22 16:18 ` Santosh Sivaraj
0 siblings, 1 reply; 13+ messages in thread
From: Christophe Leroy @ 2019-08-21 16:23 UTC (permalink / raw)
To: Segher Boessenkool; +Cc: Santosh Sivaraj, linuxppc-dev
Le 21/08/2019 à 14:15, Segher Boessenkool a écrit :
> On Wed, Aug 21, 2019 at 01:50:52PM +0200, Christophe Leroy wrote:
>> Do you have any idea on how to avoid that bcl/mflr stuff ?
>
> Do a load from some fixed address? Maybe an absolute address, even?
> lwz r3,-12344(0) or similar (that address is in kernel space...)
>
> There aren't many options, and certainly not many *good* options!
>
IIUC, the VDSO is seen by apps the same way as a dynamic lib. Couldn't
the relocation be done only once when the app loads the VDSO as for a
regular .so lib ?
It looks like it is what others do, at least x86 and arm64, unless I
misunderstood their code.
Christophe
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] powerpc/vdso64: inline __get_datapage()
2019-08-21 16:23 ` Christophe Leroy
@ 2019-08-22 16:18 ` Santosh Sivaraj
0 siblings, 0 replies; 13+ messages in thread
From: Santosh Sivaraj @ 2019-08-22 16:18 UTC (permalink / raw)
To: Christophe Leroy, Segher Boessenkool; +Cc: linuxppc-dev
Christophe Leroy <christophe.leroy@c-s.fr> writes:
> Le 21/08/2019 à 14:15, Segher Boessenkool a écrit :
>> On Wed, Aug 21, 2019 at 01:50:52PM +0200, Christophe Leroy wrote:
>>> Do you have any idea on how to avoid that bcl/mflr stuff ?
>>
>> Do a load from some fixed address? Maybe an absolute address, even?
>> lwz r3,-12344(0) or similar (that address is in kernel space...)
>>
>> There aren't many options, and certainly not many *good* options!
>>
>
> IIUC, the VDSO is seen by apps the same way as a dynamic lib. Couldn't
> the relocation be done only once when the app loads the VDSO as for a
> regular .so lib ?
How does address space randomization work for .so libs?
>
> It looks like it is what others do, at least x86 and arm64, unless I
> misunderstood their code.
>
> Christophe
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2019-08-22 16:21 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-16 14:48 [PATCH] powerpc/vdso32: inline __get_datapage() Christophe Leroy
2019-08-16 14:48 ` Christophe Leroy
2019-08-21 4:37 ` Benjamin Herrenschmidt
2019-08-21 4:37 ` Benjamin Herrenschmidt
2019-08-21 9:29 ` [PATCH] powerpc/vdso64: " Santosh Sivaraj
2019-08-21 9:46 ` Christophe Leroy
2019-08-21 11:20 ` Santosh Sivaraj
2019-08-21 15:58 ` Nathan Lynch
2019-08-21 11:44 ` Segher Boessenkool
2019-08-21 11:50 ` Christophe Leroy
2019-08-21 12:15 ` Segher Boessenkool
2019-08-21 16:23 ` Christophe Leroy
2019-08-22 16:18 ` Santosh Sivaraj
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.