* [PATCH v10 0/5] powerpc: switch VDSO to C implementation
@ 2020-08-05 7:09 Christophe Leroy
2020-08-05 7:09 ` [PATCH v10 1/5] powerpc/processor: Move cpu_relax() into asm/vdso/processor.h Christophe Leroy
` (4 more replies)
0 siblings, 5 replies; 18+ messages in thread
From: Christophe Leroy @ 2020-08-05 7:09 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, nathanl, anton
Cc: linux-kernel, linuxppc-dev, arnd, tglx, vincenzo.frascino, luto,
linux-arch
This is the tenth version of a series to switch powerpc VDSO to
generic C implementation.
Changes since v10 are:
- Added a comment explaining the reason for the double stack frame
- Moved back .cfi_register lr next to mflr
Main changes since v9 are:
- Dropped the patches which put the VDSO datapage in front of VDSO text in the mapping
- Adds a second stack frame because the caller doesn't set one, at least on PPC64
- Saving the TOC pointer on PPC64 (is that really needed ?)
This series applies on today's powerpc/merge branch.
See the last patches for details on changes and performance.
Christophe Leroy (5):
powerpc/processor: Move cpu_relax() into asm/vdso/processor.h
powerpc/vdso: Prepare for switching VDSO to generic C implementation.
powerpc/vdso: Save and restore TOC pointer on PPC64
powerpc/vdso: Switch VDSO to generic C implementation.
powerpc/vdso: Provide __kernel_clock_gettime64() on vdso32
arch/powerpc/Kconfig | 2 +
arch/powerpc/include/asm/clocksource.h | 7 +
arch/powerpc/include/asm/processor.h | 13 +-
arch/powerpc/include/asm/vdso/clocksource.h | 7 +
arch/powerpc/include/asm/vdso/gettimeofday.h | 197 ++++++++++++
arch/powerpc/include/asm/vdso/processor.h | 23 ++
arch/powerpc/include/asm/vdso/vsyscall.h | 25 ++
arch/powerpc/include/asm/vdso_datapage.h | 40 +--
arch/powerpc/kernel/asm-offsets.c | 49 +--
arch/powerpc/kernel/time.c | 91 +-----
arch/powerpc/kernel/vdso.c | 5 +-
arch/powerpc/kernel/vdso32/Makefile | 32 +-
arch/powerpc/kernel/vdso32/config-fake32.h | 34 +++
arch/powerpc/kernel/vdso32/gettimeofday.S | 300 +------------------
arch/powerpc/kernel/vdso32/vdso32.lds.S | 1 +
arch/powerpc/kernel/vdso32/vgettimeofday.c | 35 +++
arch/powerpc/kernel/vdso64/Makefile | 23 +-
arch/powerpc/kernel/vdso64/gettimeofday.S | 242 +--------------
arch/powerpc/kernel/vdso64/vgettimeofday.c | 29 ++
19 files changed, 453 insertions(+), 702 deletions(-)
create mode 100644 arch/powerpc/include/asm/clocksource.h
create mode 100644 arch/powerpc/include/asm/vdso/clocksource.h
create mode 100644 arch/powerpc/include/asm/vdso/gettimeofday.h
create mode 100644 arch/powerpc/include/asm/vdso/processor.h
create mode 100644 arch/powerpc/include/asm/vdso/vsyscall.h
create mode 100644 arch/powerpc/kernel/vdso32/config-fake32.h
create mode 100644 arch/powerpc/kernel/vdso32/vgettimeofday.c
create mode 100644 arch/powerpc/kernel/vdso64/vgettimeofday.c
--
2.25.0
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH v10 1/5] powerpc/processor: Move cpu_relax() into asm/vdso/processor.h
2020-08-05 7:09 [PATCH v10 0/5] powerpc: switch VDSO to C implementation Christophe Leroy
@ 2020-08-05 7:09 ` Christophe Leroy
2020-08-05 7:09 ` [PATCH v10 2/5] powerpc/vdso: Prepare for switching VDSO to generic C implementation Christophe Leroy
` (3 subsequent siblings)
4 siblings, 0 replies; 18+ messages in thread
From: Christophe Leroy @ 2020-08-05 7:09 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, nathanl, anton
Cc: linux-kernel, linuxppc-dev, arnd, tglx, vincenzo.frascino, luto,
linux-arch
cpu_relax() need to be in asm/vdso/processor.h to be used by
the C VDSO generic library.
Move it there.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
v9: Forgot to remove cpu_relax() from processor.h in v8
---
arch/powerpc/include/asm/processor.h | 13 ++-----------
arch/powerpc/include/asm/vdso/processor.h | 23 +++++++++++++++++++++++
2 files changed, 25 insertions(+), 11 deletions(-)
create mode 100644 arch/powerpc/include/asm/vdso/processor.h
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index ed0d633ab5aa..c1ba9c8d9b90 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -6,6 +6,8 @@
* Copyright (C) 2001 PPC 64 Team, IBM Corp
*/
+#include <vdso/processor.h>
+
#include <asm/reg.h>
#ifdef CONFIG_VSX
@@ -63,14 +65,6 @@ extern int _chrp_type;
#endif /* defined(__KERNEL__) && defined(CONFIG_PPC32) */
-/* Macros for adjusting thread priority (hardware multi-threading) */
-#define HMT_very_low() asm volatile("or 31,31,31 # very low priority")
-#define HMT_low() asm volatile("or 1,1,1 # low priority")
-#define HMT_medium_low() asm volatile("or 6,6,6 # medium low priority")
-#define HMT_medium() asm volatile("or 2,2,2 # medium priority")
-#define HMT_medium_high() asm volatile("or 5,5,5 # medium high priority")
-#define HMT_high() asm volatile("or 3,3,3 # high priority")
-
#ifdef __KERNEL__
#ifdef CONFIG_PPC64
@@ -350,7 +344,6 @@ static inline unsigned long __pack_fe01(unsigned int fpmode)
}
#ifdef CONFIG_PPC64
-#define cpu_relax() do { HMT_low(); HMT_medium(); barrier(); } while (0)
#define spin_begin() HMT_low()
@@ -369,8 +362,6 @@ do { \
} \
} while (0)
-#else
-#define cpu_relax() barrier()
#endif
/* Check that a certain kernel stack pointer is valid in task_struct p */
diff --git a/arch/powerpc/include/asm/vdso/processor.h b/arch/powerpc/include/asm/vdso/processor.h
new file mode 100644
index 000000000000..39b9beace9ca
--- /dev/null
+++ b/arch/powerpc/include/asm/vdso/processor.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_VDSO_PROCESSOR_H
+#define __ASM_VDSO_PROCESSOR_H
+
+#ifndef __ASSEMBLY__
+
+/* Macros for adjusting thread priority (hardware multi-threading) */
+#define HMT_very_low() asm volatile("or 31, 31, 31 # very low priority")
+#define HMT_low() asm volatile("or 1, 1, 1 # low priority")
+#define HMT_medium_low() asm volatile("or 6, 6, 6 # medium low priority")
+#define HMT_medium() asm volatile("or 2, 2, 2 # medium priority")
+#define HMT_medium_high() asm volatile("or 5, 5, 5 # medium high priority")
+#define HMT_high() asm volatile("or 3, 3, 3 # high priority")
+
+#ifdef CONFIG_PPC64
+#define cpu_relax() do { HMT_low(); HMT_medium(); barrier(); } while (0)
+#else
+#define cpu_relax() barrier()
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_PROCESSOR_H */
--
2.25.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v10 2/5] powerpc/vdso: Prepare for switching VDSO to generic C implementation.
2020-08-05 7:09 [PATCH v10 0/5] powerpc: switch VDSO to C implementation Christophe Leroy
2020-08-05 7:09 ` [PATCH v10 1/5] powerpc/processor: Move cpu_relax() into asm/vdso/processor.h Christophe Leroy
@ 2020-08-05 7:09 ` Christophe Leroy
2020-08-05 14:03 ` Segher Boessenkool
2020-08-05 7:09 ` [PATCH v10 3/5] powerpc/vdso: Save and restore TOC pointer on PPC64 Christophe Leroy
` (2 subsequent siblings)
4 siblings, 1 reply; 18+ messages in thread
From: Christophe Leroy @ 2020-08-05 7:09 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, nathanl, anton
Cc: linux-kernel, linuxppc-dev, arnd, tglx, vincenzo.frascino, luto,
linux-arch
Prepare for switching VDSO to generic C implementation in following
patch. Here, we:
- Prepare the helpers to call the C VDSO functions
- Prepare the required callbacks for the C VDSO functions
- Prepare the clocksource.h files to define VDSO_ARCH_CLOCKMODES
- Add the C trampolines to the generic C VDSO functions
powerpc is a bit special for VDSO as well as system calls in the
way that it requires setting CR SO bit which cannot be done in C.
Therefore, entry/exit needs to be performed in ASM.
Implementing __arch_get_vdso_data() would clobber the link register,
requiring the caller to save it. As the ASM calling function already
has to set a stack frame and saves the link register before calling
the C vdso function, retriving the vdso data pointer there is lighter.
Implement __arch_vdso_capable() and:
- When the timebase is used, make it always return true.
- When the RTC clock is used, make it always return false.
Provide vdso_shift_ns(), as the generic x >> s gives the following
bad result:
18: 35 25 ff e0 addic. r9,r5,-32
1c: 41 80 00 10 blt 2c <shift+0x14>
20: 7c 64 4c 30 srw r4,r3,r9
24: 38 60 00 00 li r3,0
...
2c: 54 69 08 3c rlwinm r9,r3,1,0,30
30: 21 45 00 1f subfic r10,r5,31
34: 7c 84 2c 30 srw r4,r4,r5
38: 7d 29 50 30 slw r9,r9,r10
3c: 7c 63 2c 30 srw r3,r3,r5
40: 7d 24 23 78 or r4,r9,r4
In our case the shift is always <= 32. In addition, the upper 32 bits
of the result are likely nul. Lets GCC know it, it also optimises the
following calculations.
With the patch, we get:
0: 21 25 00 20 subfic r9,r5,32
4: 7c 69 48 30 slw r9,r3,r9
8: 7c 84 2c 30 srw r4,r4,r5
c: 7d 24 23 78 or r4,r9,r4
10: 7c 63 2c 30 srw r3,r3,r5
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
v10:
- Added a comment to explain the reason for the two stack frames.
- Moved back the .cfi_register next to mflr
v9:
- No more modification of __get_datapage(). Offset is added after.
- Adding a second stack frame because the PPC VDSO ABI doesn't force
the caller to set one.
v8:
- New, splitted out of last patch of the series
---
arch/powerpc/include/asm/clocksource.h | 7 +
arch/powerpc/include/asm/vdso/clocksource.h | 7 +
arch/powerpc/include/asm/vdso/gettimeofday.h | 185 +++++++++++++++++++
arch/powerpc/kernel/vdso32/vgettimeofday.c | 29 +++
arch/powerpc/kernel/vdso64/vgettimeofday.c | 29 +++
5 files changed, 257 insertions(+)
create mode 100644 arch/powerpc/include/asm/clocksource.h
create mode 100644 arch/powerpc/include/asm/vdso/clocksource.h
create mode 100644 arch/powerpc/include/asm/vdso/gettimeofday.h
create mode 100644 arch/powerpc/kernel/vdso32/vgettimeofday.c
create mode 100644 arch/powerpc/kernel/vdso64/vgettimeofday.c
diff --git a/arch/powerpc/include/asm/clocksource.h b/arch/powerpc/include/asm/clocksource.h
new file mode 100644
index 000000000000..482185566b0c
--- /dev/null
+++ b/arch/powerpc/include/asm/clocksource.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_CLOCKSOURCE_H
+#define _ASM_CLOCKSOURCE_H
+
+#include <asm/vdso/clocksource.h>
+
+#endif
diff --git a/arch/powerpc/include/asm/vdso/clocksource.h b/arch/powerpc/include/asm/vdso/clocksource.h
new file mode 100644
index 000000000000..ec5d672d2569
--- /dev/null
+++ b/arch/powerpc/include/asm/vdso/clocksource.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSOCLOCKSOURCE_H
+#define __ASM_VDSOCLOCKSOURCE_H
+
+#define VDSO_ARCH_CLOCKMODES VDSO_CLOCKMODE_ARCHTIMER
+
+#endif
diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h
new file mode 100644
index 000000000000..e2c462796a22
--- /dev/null
+++ b/arch/powerpc/include/asm/vdso/gettimeofday.h
@@ -0,0 +1,185 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_GETTIMEOFDAY_H
+#define __ASM_VDSO_GETTIMEOFDAY_H
+
+#include <asm/ptrace.h>
+
+#ifdef __ASSEMBLY__
+
+/*
+ * The macros sets two stack frames, one for the caller and one for the callee
+ * because there are no requirement for the caller to set a stack frame when
+ * calling VDSO so it may have omitted to set one, especially on PPC64
+ */
+
+.macro cvdso_call funct
+ .cfi_startproc
+ PPC_STLU r1, -STACK_FRAME_OVERHEAD(r1)
+ mflr r0
+ .cfi_register lr, r0
+ PPC_STLU r1, -STACK_FRAME_OVERHEAD(r1)
+ PPC_STL r0, STACK_FRAME_OVERHEAD + PPC_LR_STKOFF(r1)
+ get_datapage r5, r0
+ addi r5, r5, VDSO_DATA_OFFSET
+ bl \funct
+ PPC_LL r0, STACK_FRAME_OVERHEAD + PPC_LR_STKOFF(r1)
+ cmpwi r3, 0
+ mtlr r0
+ .cfi_restore lr
+ addi r1, r1, 2 * STACK_FRAME_OVERHEAD
+ crclr so
+ beqlr+
+ crset so
+ neg r3, r3
+ blr
+ .cfi_endproc
+.endm
+
+.macro cvdso_call_time funct
+ .cfi_startproc
+ PPC_STLU r1, -STACK_FRAME_OVERHEAD(r1)
+ mflr r0
+ .cfi_register lr, r0
+ PPC_STLU r1, -STACK_FRAME_OVERHEAD(r1)
+ PPC_STL r0, STACK_FRAME_OVERHEAD + PPC_LR_STKOFF(r1)
+ get_datapage r4, r0
+ addi r4, r4, VDSO_DATA_OFFSET
+ bl \funct
+ PPC_LL r0, STACK_FRAME_OVERHEAD + PPC_LR_STKOFF(r1)
+ crclr so
+ mtlr r0
+ .cfi_restore lr
+ addi r1, r1, 2 * STACK_FRAME_OVERHEAD
+ blr
+ .cfi_endproc
+.endm
+
+#else
+
+#include <asm/time.h>
+#include <asm/unistd.h>
+#include <uapi/linux/time.h>
+
+#define VDSO_HAS_CLOCK_GETRES 1
+
+#define VDSO_HAS_TIME 1
+
+static __always_inline int do_syscall_2(const unsigned long _r0, const unsigned long _r3,
+ const unsigned long _r4)
+{
+ register long r0 asm("r0") = _r0;
+ register unsigned long r3 asm("r3") = _r3;
+ register unsigned long r4 asm("r4") = _r4;
+ register int ret asm ("r3");
+
+ asm volatile(
+ " sc\n"
+ " bns+ 1f\n"
+ " neg %0, %0\n"
+ "1:\n"
+ : "=r" (ret), "+r" (r4), "+r" (r0)
+ : "r" (r3)
+ : "memory", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cr0", "ctr");
+
+ return ret;
+}
+
+static __always_inline
+int gettimeofday_fallback(struct __kernel_old_timeval *_tv, struct timezone *_tz)
+{
+ return do_syscall_2(__NR_gettimeofday, (unsigned long)_tv, (unsigned long)_tz);
+}
+
+static __always_inline
+int clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ return do_syscall_2(__NR_clock_gettime, _clkid, (unsigned long)_ts);
+}
+
+static __always_inline
+int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ return do_syscall_2(__NR_clock_getres, _clkid, (unsigned long)_ts);
+}
+
+#ifdef CONFIG_VDSO32
+
+#define BUILD_VDSO32 1
+
+static __always_inline
+int clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
+{
+ return do_syscall_2(__NR_clock_gettime, _clkid, (unsigned long)_ts);
+}
+
+static __always_inline
+int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
+{
+ return do_syscall_2(__NR_clock_getres, _clkid, (unsigned long)_ts);
+}
+#endif
+
+static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
+{
+ return get_tb();
+}
+
+const struct vdso_data *__arch_get_vdso_data(void);
+
+static inline bool vdso_clocksource_ok(const struct vdso_data *vd)
+{
+ return !__USE_RTC();
+}
+#define vdso_clocksource_ok vdso_clocksource_ok
+
+/*
+ * powerpc specific delta calculation.
+ *
+ * This variant removes the masking of the subtraction because the
+ * clocksource mask of all VDSO capable clocksources on powerpc is U64_MAX
+ * which would result in a pointless operation. The compiler cannot
+ * optimize it away as the mask comes from the vdso data and is not compile
+ * time constant.
+ */
+static __always_inline u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
+{
+ return (cycles - last) * mult;
+}
+#define vdso_calc_delta vdso_calc_delta
+
+#ifndef __powerpc64__
+static __always_inline u64 vdso_shift_ns(u64 ns, unsigned long shift)
+{
+ u32 hi = ns >> 32;
+ u32 lo = ns;
+
+ lo >>= shift;
+ lo |= hi << (32 - shift);
+ hi >>= shift;
+
+ if (likely(hi == 0))
+ return lo;
+
+ return ((u64)hi << 32) | lo;
+}
+#define vdso_shift_ns vdso_shift_ns
+#endif
+
+#ifdef __powerpc64__
+int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts,
+ const struct vdso_data *vd);
+int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res,
+ const struct vdso_data *vd);
+#else
+int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts,
+ const struct vdso_data *vd);
+int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res,
+ const struct vdso_data *vd);
+#endif
+int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz,
+ const struct vdso_data *vd);
+__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time,
+ const struct vdso_data *vd);
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/powerpc/kernel/vdso32/vgettimeofday.c b/arch/powerpc/kernel/vdso32/vgettimeofday.c
new file mode 100644
index 000000000000..0b9ab4c22ef2
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/vgettimeofday.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Powerpc userspace implementations of gettimeofday() and similar.
+ */
+#include <linux/time.h>
+#include <linux/types.h>
+
+int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts,
+ const struct vdso_data *vd)
+{
+ return __cvdso_clock_gettime32_data(vd, clock, ts);
+}
+
+int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz,
+ const struct vdso_data *vd)
+{
+ return __cvdso_gettimeofday_data(vd, tv, tz);
+}
+
+int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res,
+ const struct vdso_data *vd)
+{
+ return __cvdso_clock_getres_time32_data(vd, clock_id, res);
+}
+
+__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_data *vd)
+{
+ return __cvdso_time_data(vd, time);
+}
diff --git a/arch/powerpc/kernel/vdso64/vgettimeofday.c b/arch/powerpc/kernel/vdso64/vgettimeofday.c
new file mode 100644
index 000000000000..5b5500058344
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/vgettimeofday.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Powerpc userspace implementations of gettimeofday() and similar.
+ */
+#include <linux/time.h>
+#include <linux/types.h>
+
+int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts,
+ const struct vdso_data *vd)
+{
+ return __cvdso_clock_gettime_data(vd, clock, ts);
+}
+
+int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz,
+ const struct vdso_data *vd)
+{
+ return __cvdso_gettimeofday_data(vd, tv, tz);
+}
+
+int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res,
+ const struct vdso_data *vd)
+{
+ return __cvdso_clock_getres_data(vd, clock_id, res);
+}
+
+__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_data *vd)
+{
+ return __cvdso_time_data(vd, time);
+}
--
2.25.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v10 3/5] powerpc/vdso: Save and restore TOC pointer on PPC64
2020-08-05 7:09 [PATCH v10 0/5] powerpc: switch VDSO to C implementation Christophe Leroy
2020-08-05 7:09 ` [PATCH v10 1/5] powerpc/processor: Move cpu_relax() into asm/vdso/processor.h Christophe Leroy
2020-08-05 7:09 ` [PATCH v10 2/5] powerpc/vdso: Prepare for switching VDSO to generic C implementation Christophe Leroy
@ 2020-08-05 7:09 ` Christophe Leroy
2020-08-05 7:09 ` [PATCH v10 4/5] powerpc/vdso: Switch VDSO to generic C implementation Christophe Leroy
2020-08-05 7:09 ` [PATCH v10 5/5] powerpc/vdso: Provide __kernel_clock_gettime64() on vdso32 Christophe Leroy
4 siblings, 0 replies; 18+ messages in thread
From: Christophe Leroy @ 2020-08-05 7:09 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, nathanl, anton
Cc: linux-kernel, linuxppc-dev, arnd, tglx, vincenzo.frascino, luto,
linux-arch
On PPC64, the TOC pointer needs to be saved and restored.
Suggested-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
v9: New.
I'm not sure this is really needed, I can't see the VDSO C code doing
anything with r2, at least on ppc64_defconfig.
So I let you decide whether you take it or not.
---
arch/powerpc/include/asm/vdso/gettimeofday.h | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h
index e2c462796a22..8ed4329c9fb8 100644
--- a/arch/powerpc/include/asm/vdso/gettimeofday.h
+++ b/arch/powerpc/include/asm/vdso/gettimeofday.h
@@ -19,10 +19,16 @@
.cfi_register lr, r0
PPC_STLU r1, -STACK_FRAME_OVERHEAD(r1)
PPC_STL r0, STACK_FRAME_OVERHEAD + PPC_LR_STKOFF(r1)
+#ifdef CONFIG_PPC64
+ PPC_STL r2, STACK_FRAME_OVERHEAD + STK_GOT(r1)
+#endif
get_datapage r5, r0
addi r5, r5, VDSO_DATA_OFFSET
bl \funct
PPC_LL r0, STACK_FRAME_OVERHEAD + PPC_LR_STKOFF(r1)
+#ifdef CONFIG_PPC64
+ PPC_LL r2, STACK_FRAME_OVERHEAD + STK_GOT(r1)
+#endif
cmpwi r3, 0
mtlr r0
.cfi_restore lr
@@ -42,10 +48,16 @@
.cfi_register lr, r0
PPC_STLU r1, -STACK_FRAME_OVERHEAD(r1)
PPC_STL r0, STACK_FRAME_OVERHEAD + PPC_LR_STKOFF(r1)
+#ifdef CONFIG_PPC64
+ PPC_STL r2, STACK_FRAME_OVERHEAD + STK_GOT(r1)
+#endif
get_datapage r4, r0
addi r4, r4, VDSO_DATA_OFFSET
bl \funct
PPC_LL r0, STACK_FRAME_OVERHEAD + PPC_LR_STKOFF(r1)
+#ifdef CONFIG_PPC64
+ PPC_LL r2, STACK_FRAME_OVERHEAD + STK_GOT(r1)
+#endif
crclr so
mtlr r0
.cfi_restore lr
--
2.25.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v10 4/5] powerpc/vdso: Switch VDSO to generic C implementation.
2020-08-05 7:09 [PATCH v10 0/5] powerpc: switch VDSO to C implementation Christophe Leroy
` (2 preceding siblings ...)
2020-08-05 7:09 ` [PATCH v10 3/5] powerpc/vdso: Save and restore TOC pointer on PPC64 Christophe Leroy
@ 2020-08-05 7:09 ` Christophe Leroy
2020-08-05 7:09 ` Christophe Leroy
2020-08-05 7:09 ` [PATCH v10 5/5] powerpc/vdso: Provide __kernel_clock_gettime64() on vdso32 Christophe Leroy
4 siblings, 1 reply; 18+ messages in thread
From: Christophe Leroy @ 2020-08-05 7:09 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, nathanl, anton
Cc: linux-arch, arnd, linux-kernel, luto, tglx, vincenzo.frascino,
linuxppc-dev
For VDSO32 on PPC64, we create a fake 32 bits config, on the same
principle as MIPS architecture, in order to get the correct parts of
the different asm header files.
With the C VDSO, the performance is slightly lower, but it is worth
it as it will ease maintenance and evolution, and also brings clocks
that are not supported with the ASM VDSO.
On an 8xx at 132 MHz, vdsotest with the ASM VDSO:
gettimeofday: vdso: 828 nsec/call
clock-getres-realtime-coarse: vdso: 391 nsec/call
clock-gettime-realtime-coarse: vdso: 614 nsec/call
clock-getres-realtime: vdso: 460 nsec/call
clock-gettime-realtime: vdso: 876 nsec/call
clock-getres-monotonic-coarse: vdso: 399 nsec/call
clock-gettime-monotonic-coarse: vdso: 691 nsec/call
clock-getres-monotonic: vdso: 460 nsec/call
clock-gettime-monotonic: vdso: 1026 nsec/call
On an 8xx at 132 MHz, vdsotest with the C VDSO:
gettimeofday: vdso: 955 nsec/call
clock-getres-realtime-coarse: vdso: 545 nsec/call
clock-gettime-realtime-coarse: vdso: 592 nsec/call
clock-getres-realtime: vdso: 545 nsec/call
clock-gettime-realtime: vdso: 941 nsec/call
clock-getres-monotonic-coarse: vdso: 545 nsec/call
clock-gettime-monotonic-coarse: vdso: 591 nsec/call
clock-getres-monotonic: vdso: 545 nsec/call
clock-gettime-monotonic: vdso: 940 nsec/call
It is even better for gettime with monotonic clocks.
Unsupported clocks with ASM VDSO:
clock-gettime-boottime: vdso: 3851 nsec/call
clock-gettime-tai: vdso: 3852 nsec/call
clock-gettime-monotonic-raw: vdso: 3396 nsec/call
Same clocks with C VDSO:
clock-gettime-tai: vdso: 941 nsec/call
clock-gettime-monotonic-raw: vdso: 1001 nsec/call
clock-gettime-monotonic-coarse: vdso: 591 nsec/call
On an 8321E at 333 MHz, vdsotest with the ASM VDSO:
gettimeofday: vdso: 220 nsec/call
clock-getres-realtime-coarse: vdso: 102 nsec/call
clock-gettime-realtime-coarse: vdso: 178 nsec/call
clock-getres-realtime: vdso: 129 nsec/call
clock-gettime-realtime: vdso: 235 nsec/call
clock-getres-monotonic-coarse: vdso: 105 nsec/call
clock-gettime-monotonic-coarse: vdso: 208 nsec/call
clock-getres-monotonic: vdso: 129 nsec/call
clock-gettime-monotonic: vdso: 274 nsec/call
On an 8321E at 333 MHz, vdsotest with the C VDSO:
gettimeofday: vdso: 272 nsec/call
clock-getres-realtime-coarse: vdso: 160 nsec/call
clock-gettime-realtime-coarse: vdso: 184 nsec/call
clock-getres-realtime: vdso: 166 nsec/call
clock-gettime-realtime: vdso: 281 nsec/call
clock-getres-monotonic-coarse: vdso: 160 nsec/call
clock-gettime-monotonic-coarse: vdso: 184 nsec/call
clock-getres-monotonic: vdso: 169 nsec/call
clock-gettime-monotonic: vdso: 275 nsec/call
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
v9:
- Rebased (Impact on arch/powerpc/kernel/vdso??/Makefile
v7:
- Split out preparatory changes in a new preceding patch
- Added -fasynchronous-unwind-tables to CC flags.
v6:
- Added missing prototypes in asm/vdso/gettimeofday.h for __c_kernel_ functions.
- Using STACK_FRAME_OVERHEAD instead of INT_FRAME_SIZE
- Rebased on powerpc/merge as of 7 Apr 2020
- Fixed build failure with gcc 9
- Added a patch to create asm/vdso/processor.h and more cpu_relax() in it
---
arch/powerpc/Kconfig | 2 +
arch/powerpc/include/asm/vdso/vsyscall.h | 25 ++
arch/powerpc/include/asm/vdso_datapage.h | 40 +--
arch/powerpc/kernel/asm-offsets.c | 49 +---
arch/powerpc/kernel/time.c | 91 +------
arch/powerpc/kernel/vdso.c | 5 +-
arch/powerpc/kernel/vdso32/Makefile | 32 ++-
arch/powerpc/kernel/vdso32/config-fake32.h | 34 +++
arch/powerpc/kernel/vdso32/gettimeofday.S | 291 +--------------------
arch/powerpc/kernel/vdso64/Makefile | 23 +-
arch/powerpc/kernel/vdso64/gettimeofday.S | 242 +----------------
11 files changed, 143 insertions(+), 691 deletions(-)
create mode 100644 arch/powerpc/include/asm/vdso/vsyscall.h
create mode 100644 arch/powerpc/kernel/vdso32/config-fake32.h
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8c7656cc10eb..9977ab939b42 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -171,6 +171,7 @@ config PPC
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
select GENERIC_TIME_VSYSCALL
+ select GENERIC_GETTIMEOFDAY
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU
select HAVE_ARCH_JUMP_LABEL
@@ -202,6 +203,7 @@ config PPC
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200 # plugin support on gcc <= 5.1 is buggy on PPC
+ select HAVE_GENERIC_VDSO
select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
select HAVE_IDE
select HAVE_IOREMAP_PROT
diff --git a/arch/powerpc/include/asm/vdso/vsyscall.h b/arch/powerpc/include/asm/vdso/vsyscall.h
new file mode 100644
index 000000000000..c56a030c0623
--- /dev/null
+++ b/arch/powerpc/include/asm/vdso/vsyscall.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_VSYSCALL_H
+#define __ASM_VDSO_VSYSCALL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/timekeeper_internal.h>
+#include <asm/vdso_datapage.h>
+
+/*
+ * Update the vDSO data page to keep in sync with kernel timekeeping.
+ */
+static __always_inline
+struct vdso_data *__arch_get_k_vdso_data(void)
+{
+ return vdso_data->data;
+}
+#define __arch_get_k_vdso_data __arch_get_k_vdso_data
+
+/* The asm-generic header needs to be included after the definitions above */
+#include <asm-generic/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h
index b9ef6cf50ea5..c4d320504d26 100644
--- a/arch/powerpc/include/asm/vdso_datapage.h
+++ b/arch/powerpc/include/asm/vdso_datapage.h
@@ -36,6 +36,7 @@
#include <linux/unistd.h>
#include <linux/time.h>
+#include <vdso/datapage.h>
#define SYSCALL_MAP_SIZE ((NR_syscalls + 31) / 32)
@@ -45,7 +46,7 @@
#ifdef CONFIG_PPC64
-struct vdso_data {
+struct vdso_arch_data {
__u8 eye_catcher[16]; /* Eyecatcher: SYSTEMCFG:PPC64 0x00 */
struct { /* Systemcfg version numbers */
__u32 major; /* Major number 0x10 */
@@ -59,13 +60,13 @@ struct vdso_data {
__u32 processor; /* Processor type 0x1C */
__u64 processorCount; /* # of physical processors 0x20 */
__u64 physicalMemorySize; /* Size of real memory(B) 0x28 */
- __u64 tb_orig_stamp; /* Timebase at boot 0x30 */
+ __u64 tb_orig_stamp; /* (NU) Timebase at boot 0x30 */
__u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */
- __u64 tb_to_xs; /* Inverse of TB to 2^20 0x40 */
- __u64 stamp_xsec; /* 0x48 */
- __u64 tb_update_count; /* Timebase atomicity ctr 0x50 */
- __u32 tz_minuteswest; /* Minutes west of Greenwich 0x58 */
- __u32 tz_dsttime; /* Type of dst correction 0x5C */
+ __u64 tb_to_xs; /* (NU) Inverse of TB to 2^20 0x40 */
+ __u64 stamp_xsec; /* (NU) 0x48 */
+ __u64 tb_update_count; /* (NU) Timebase atomicity ctr 0x50 */
+ __u32 tz_minuteswest; /* (NU) Min. west of Greenwich 0x58 */
+ __u32 tz_dsttime; /* (NU) Type of dst correction 0x5C */
__u32 dcache_size; /* L1 d-cache size 0x60 */
__u32 dcache_line_size; /* L1 d-cache line size 0x64 */
__u32 icache_size; /* L1 i-cache size 0x68 */
@@ -78,14 +79,10 @@ struct vdso_data {
__u32 icache_block_size; /* L1 i-cache block size */
__u32 dcache_log_block_size; /* L1 d-cache log block size */
__u32 icache_log_block_size; /* L1 i-cache log block size */
- __u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */
- __s32 wtom_clock_nsec; /* Wall to monotonic clock nsec */
- __s64 wtom_clock_sec; /* Wall to monotonic clock sec */
- __s64 stamp_xtime_sec; /* xtime secs as at tb_orig_stamp */
- __s64 stamp_xtime_nsec; /* xtime nsecs as at tb_orig_stamp */
- __u32 hrtimer_res; /* hrtimer resolution */
__u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls */
__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
+
+ struct vdso_data data[CS_BASES];
};
#else /* CONFIG_PPC64 */
@@ -93,26 +90,15 @@ struct vdso_data {
/*
* And here is the simpler 32 bits version
*/
-struct vdso_data {
- __u64 tb_orig_stamp; /* Timebase at boot 0x30 */
+struct vdso_arch_data {
__u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */
- __u64 tb_to_xs; /* Inverse of TB to 2^20 0x40 */
- __u64 stamp_xsec; /* 0x48 */
- __u32 tb_update_count; /* Timebase atomicity ctr 0x50 */
- __u32 tz_minuteswest; /* Minutes west of Greenwich 0x58 */
- __u32 tz_dsttime; /* Type of dst correction 0x5C */
- __s32 wtom_clock_sec; /* Wall to monotonic clock */
- __s32 wtom_clock_nsec;
- __s32 stamp_xtime_sec; /* xtime seconds as at tb_orig_stamp */
- __s32 stamp_xtime_nsec; /* xtime nsecs as at tb_orig_stamp */
- __u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */
- __u32 hrtimer_res; /* hrtimer resolution */
__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
+ struct vdso_data data[CS_BASES];
};
#endif /* CONFIG_PPC64 */
-extern struct vdso_data *vdso_data;
+extern struct vdso_arch_data *vdso_data;
#else /* __ASSEMBLY__ */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 8711c2164b45..684260186dbf 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -397,47 +397,16 @@ int main(void)
#endif /* ! CONFIG_PPC64 */
/* datapage offsets for use by vdso */
- OFFSET(CFG_TB_ORIG_STAMP, vdso_data, tb_orig_stamp);
- OFFSET(CFG_TB_TICKS_PER_SEC, vdso_data, tb_ticks_per_sec);
- OFFSET(CFG_TB_TO_XS, vdso_data, tb_to_xs);
- OFFSET(CFG_TB_UPDATE_COUNT, vdso_data, tb_update_count);
- OFFSET(CFG_TZ_MINUTEWEST, vdso_data, tz_minuteswest);
- OFFSET(CFG_TZ_DSTTIME, vdso_data, tz_dsttime);
- OFFSET(CFG_SYSCALL_MAP32, vdso_data, syscall_map_32);
- OFFSET(WTOM_CLOCK_SEC, vdso_data, wtom_clock_sec);
- OFFSET(WTOM_CLOCK_NSEC, vdso_data, wtom_clock_nsec);
- OFFSET(STAMP_XTIME_SEC, vdso_data, stamp_xtime_sec);
- OFFSET(STAMP_XTIME_NSEC, vdso_data, stamp_xtime_nsec);
- OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction);
- OFFSET(CLOCK_HRTIMER_RES, vdso_data, hrtimer_res);
+ OFFSET(VDSO_DATA_OFFSET, vdso_arch_data, data);
+ OFFSET(CFG_TB_TICKS_PER_SEC, vdso_arch_data, tb_ticks_per_sec);
+ OFFSET(CFG_SYSCALL_MAP32, vdso_arch_data, syscall_map_32);
#ifdef CONFIG_PPC64
- OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size);
- OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size);
- OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size);
- OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_data, dcache_log_block_size);
- OFFSET(CFG_SYSCALL_MAP64, vdso_data, syscall_map_64);
- OFFSET(TVAL64_TV_SEC, __kernel_old_timeval, tv_sec);
- OFFSET(TVAL64_TV_USEC, __kernel_old_timeval, tv_usec);
-#endif
- OFFSET(TSPC64_TV_SEC, __kernel_timespec, tv_sec);
- OFFSET(TSPC64_TV_NSEC, __kernel_timespec, tv_nsec);
- OFFSET(TVAL32_TV_SEC, old_timeval32, tv_sec);
- OFFSET(TVAL32_TV_USEC, old_timeval32, tv_usec);
- OFFSET(TSPC32_TV_SEC, old_timespec32, tv_sec);
- OFFSET(TSPC32_TV_NSEC, old_timespec32, tv_nsec);
- /* timeval/timezone offsets for use by vdso */
- OFFSET(TZONE_TZ_MINWEST, timezone, tz_minuteswest);
- OFFSET(TZONE_TZ_DSTTIME, timezone, tz_dsttime);
-
- /* Other bits used by the vdso */
- DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
- DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
- DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
- DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
- DEFINE(CLOCK_MAX, CLOCK_TAI);
- DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
- DEFINE(EINVAL, EINVAL);
- DEFINE(KTIME_LOW_RES, KTIME_LOW_RES);
+ OFFSET(CFG_ICACHE_BLOCKSZ, vdso_arch_data, icache_block_size);
+ OFFSET(CFG_DCACHE_BLOCKSZ, vdso_arch_data, dcache_block_size);
+ OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_arch_data, icache_log_block_size);
+ OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_arch_data, dcache_log_block_size);
+ OFFSET(CFG_SYSCALL_MAP64, vdso_arch_data, syscall_map_64);
+#endif
#ifdef CONFIG_BUG
DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 6fcae436ae51..b63b1f97a1b3 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -91,6 +91,7 @@ static struct clocksource clocksource_timebase = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
.mask = CLOCKSOURCE_MASK(64),
.read = timebase_read,
+ .vdso_clock_mode = VDSO_CLOCKMODE_ARCHTIMER,
};
#define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF
@@ -855,95 +856,6 @@ static notrace u64 timebase_read(struct clocksource *cs)
return (u64)get_tb();
}
-
-void update_vsyscall(struct timekeeper *tk)
-{
- struct timespec64 xt;
- struct clocksource *clock = tk->tkr_mono.clock;
- u32 mult = tk->tkr_mono.mult;
- u32 shift = tk->tkr_mono.shift;
- u64 cycle_last = tk->tkr_mono.cycle_last;
- u64 new_tb_to_xs, new_stamp_xsec;
- u64 frac_sec;
-
- if (clock != &clocksource_timebase)
- return;
-
- xt.tv_sec = tk->xtime_sec;
- xt.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
-
- /* Make userspace gettimeofday spin until we're done. */
- ++vdso_data->tb_update_count;
- smp_mb();
-
- /*
- * This computes ((2^20 / 1e9) * mult) >> shift as a
- * 0.64 fixed-point fraction.
- * The computation in the else clause below won't overflow
- * (as long as the timebase frequency is >= 1.049 MHz)
- * but loses precision because we lose the low bits of the constant
- * in the shift. Note that 19342813113834067 ~= 2^(20+64) / 1e9.
- * For a shift of 24 the error is about 0.5e-9, or about 0.5ns
- * over a second. (Shift values are usually 22, 23 or 24.)
- * For high frequency clocks such as the 512MHz timebase clock
- * on POWER[6789], the mult value is small (e.g. 32768000)
- * and so we can shift the constant by 16 initially
- * (295147905179 ~= 2^(20+64-16) / 1e9) and then do the
- * remaining shifts after the multiplication, which gives a
- * more accurate result (e.g. with mult = 32768000, shift = 24,
- * the error is only about 1.2e-12, or 0.7ns over 10 minutes).
- */
- if (mult <= 62500000 && clock->shift >= 16)
- new_tb_to_xs = ((u64) mult * 295147905179ULL) >> (clock->shift - 16);
- else
- new_tb_to_xs = (u64) mult * (19342813113834067ULL >> clock->shift);
-
- /*
- * Compute the fractional second in units of 2^-32 seconds.
- * The fractional second is tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift
- * in nanoseconds, so multiplying that by 2^32 / 1e9 gives
- * it in units of 2^-32 seconds.
- * We assume shift <= 32 because clocks_calc_mult_shift()
- * generates shift values in the range 0 - 32.
- */
- frac_sec = tk->tkr_mono.xtime_nsec << (32 - shift);
- do_div(frac_sec, NSEC_PER_SEC);
-
- /*
- * Work out new stamp_xsec value for any legacy users of systemcfg.
- * stamp_xsec is in units of 2^-20 seconds.
- */
- new_stamp_xsec = frac_sec >> 12;
- new_stamp_xsec += tk->xtime_sec * XSEC_PER_SEC;
-
- /*
- * tb_update_count is used to allow the userspace gettimeofday code
- * to assure itself that it sees a consistent view of the tb_to_xs and
- * stamp_xsec variables. It reads the tb_update_count, then reads
- * tb_to_xs and stamp_xsec and then reads tb_update_count again. If
- * the two values of tb_update_count match and are even then the
- * tb_to_xs and stamp_xsec values are consistent. If not, then it
- * loops back and reads them again until this criteria is met.
- */
- vdso_data->tb_orig_stamp = cycle_last;
- vdso_data->stamp_xsec = new_stamp_xsec;
- vdso_data->tb_to_xs = new_tb_to_xs;
- vdso_data->wtom_clock_sec = tk->wall_to_monotonic.tv_sec;
- vdso_data->wtom_clock_nsec = tk->wall_to_monotonic.tv_nsec;
- vdso_data->stamp_xtime_sec = xt.tv_sec;
- vdso_data->stamp_xtime_nsec = xt.tv_nsec;
- vdso_data->stamp_sec_fraction = frac_sec;
- vdso_data->hrtimer_res = hrtimer_resolution;
- smp_wmb();
- ++(vdso_data->tb_update_count);
-}
-
-void update_vsyscall_tz(void)
-{
- vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
- vdso_data->tz_dsttime = sys_tz.tz_dsttime;
-}
-
static void __init clocksource_init(void)
{
struct clocksource *clock;
@@ -1113,7 +1025,6 @@ void __init time_init(void)
sys_tz.tz_dsttime = 0;
}
- vdso_data->tb_update_count = 0;
vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
/* initialise and enable the large decrementer (if we have one) */
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 8dad44262e75..23208a051af5 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -17,6 +17,7 @@
#include <linux/elf.h>
#include <linux/security.h>
#include <linux/memblock.h>
+#include <vdso/datapage.h>
#include <asm/processor.h>
#include <asm/mmu.h>
@@ -70,10 +71,10 @@ static int vdso_ready;
* with it, it will become dynamically allocated
*/
static union {
- struct vdso_data data;
+ struct vdso_arch_data data;
u8 page[PAGE_SIZE];
} vdso_data_store __page_aligned_data;
-struct vdso_data *vdso_data = &vdso_data_store.data;
+struct vdso_arch_data *vdso_data = &vdso_data_store.data;
/* Format of the patch table */
struct vdso_patch_def
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index 87ab1152d5ce..b922044236dd 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -2,7 +2,23 @@
# List of files in the vdso, has to be asm only for now
-obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o
+ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN
+include $(srctree)/lib/vdso/Makefile
+
+obj-vdso32 = sigtramp.o datapage.o cacheflush.o note.o getcpu.o $(obj-vdso32-y)
+obj-vdso32 += gettimeofday.o
+
+ifneq ($(c-gettimeofday-y),)
+ ifdef CONFIG_PPC64
+ CFLAGS_vgettimeofday.o += -include $(srctree)/$(src)/config-fake32.h
+ endif
+ CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
+ CFLAGS_vgettimeofday.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+ CFLAGS_vgettimeofday.o += $(call cc-option, -fno-stack-protector)
+ CFLAGS_vgettimeofday.o += -DDISABLE_BRANCH_PROFILING
+ CFLAGS_vgettimeofday.o += -ffreestanding -fasynchronous-unwind-tables
+ CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE)
+endif
# Build rules
@@ -15,6 +31,7 @@ endif
CC32FLAGS :=
ifdef CONFIG_PPC64
CC32FLAGS += -m32
+KBUILD_CFLAGS := $(filter-out -mcmodel=medium,$(KBUILD_CFLAGS))
endif
targets := $(obj-vdso32) vdso32.so vdso32.so.dbg
@@ -23,6 +40,7 @@ obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
GCOV_PROFILE := n
KCOV_INSTRUMENT := n
UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
-Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both
@@ -36,8 +54,8 @@ CPPFLAGS_vdso32.lds += -P -C -Upowerpc
$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
# link rule for the .so file, .lds has to be first
-$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
- $(call if_changed,vdso32ld)
+$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday.o FORCE
+ $(call if_changed,vdso32ld_and_check)
# strip rule for the .so file
$(obj)/%.so: OBJCOPYFLAGS := -S
@@ -47,12 +65,16 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
# assembly rules for the .S files
$(obj-vdso32): %.o: %.S FORCE
$(call if_changed_dep,vdso32as)
+$(obj)/vgettimeofday.o: %.o: %.c FORCE
+ $(call if_changed_dep,vdso32cc)
# actual build commands
-quiet_cmd_vdso32ld = VDSO32L $@
- cmd_vdso32ld = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ $(call cc-ldoption, -Wl$(comma)--orphan-handling=warn) -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
+quiet_cmd_vdso32ld_and_check = VDSO32L $@
+ cmd_vdso32ld_and_check = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ $(call cc-ldoption, -Wl$(comma)--orphan-handling=warn) -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) ; $(cmd_vdso_check)
quiet_cmd_vdso32as = VDSO32A $@
cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) -c -o $@ $<
+quiet_cmd_vdso32cc = VDSO32C $@
+ cmd_vdso32cc = $(VDSOCC) $(c_flags) $(CC32FLAGS) -c -o $@ $<
# install commands for the unstripped file
quiet_cmd_vdso_install = INSTALL $@
diff --git a/arch/powerpc/kernel/vdso32/config-fake32.h b/arch/powerpc/kernel/vdso32/config-fake32.h
new file mode 100644
index 000000000000..e16041fc15c9
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/config-fake32.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * In case of a 32 bit VDSO for a 64 bit kernel fake a 32 bit kernel
+ * configuration.
+ */
+
+#undef CONFIG_PPC64
+#undef CONFIG_64BIT
+#define CONFIG_PPC32
+#define CONFIG_32BIT 1
+#define CONFIG_GENERIC_ATOMIC64 1
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#undef CONFIG_PPC_BOOK3S_64
+#undef CONFIG_PPC_PSERIES
+#define CONFIG_PPC_BOOK3S_32
+#else
+#define CONFIG_PPC_MMU_NOHASH_32
+#define CONFIG_FSL_BOOKE
+#endif
+
+#define CONFIG_TASK_SIZE 0
+#undef CONFIG_MMIOWB
+#undef CONFIG_PPC_SPLPAR
+#undef CONFIG_SPARSEMEM
+#undef CONFIG_PGTABLE_LEVELS
+#define CONFIG_PGTABLE_LEVELS 2
+#undef CONFIG_TRANSPARENT_HUGEPAGE
+#undef CONFIG_SPARSEMEM_VMEMMAP
+#undef CONFIG_FLATMEM
+#define CONFIG_FLATMEM
+#undef CONFIG_PPC_INDIRECT_MMIO
+#undef CONFIG_PPC_INDIRECT_PIO
+#undef CONFIG_EEH
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index e7f8f9f1b3f4..fd7b01c51281 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -12,13 +12,7 @@
#include <asm/vdso_datapage.h>
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
-
-/* Offset for the low 32-bit part of a field of long type */
-#ifdef CONFIG_PPC64
-#define LOPART 4
-#else
-#define LOPART 0
-#endif
+#include <asm/vdso/gettimeofday.h>
.text
/*
@@ -28,32 +22,7 @@
*
*/
V_FUNCTION_BEGIN(__kernel_gettimeofday)
- .cfi_startproc
- mflr r12
- .cfi_register lr,r12
-
- mr. r10,r3 /* r10 saves tv */
- mr r11,r4 /* r11 saves tz */
- get_datapage r9, r0
- beq 3f
- LOAD_REG_IMMEDIATE(r7, 1000000) /* load up USEC_PER_SEC */
- bl __do_get_tspec@local /* get sec/usec from tb & kernel */
- stw r3,TVAL32_TV_SEC(r10)
- stw r4,TVAL32_TV_USEC(r10)
-
-3: cmplwi r11,0 /* check if tz is NULL */
- mtlr r12
- crclr cr0*4+so
- li r3,0
- beqlr
-
- lwz r4,CFG_TZ_MINUTEWEST(r9)/* fill tz */
- lwz r5,CFG_TZ_DSTTIME(r9)
- stw r4,TZONE_TZ_MINWEST(r11)
- stw r5,TZONE_TZ_DSTTIME(r11)
-
- blr
- .cfi_endproc
+ cvdso_call __c_kernel_gettimeofday
V_FUNCTION_END(__kernel_gettimeofday)
/*
@@ -63,127 +32,7 @@ V_FUNCTION_END(__kernel_gettimeofday)
*
*/
V_FUNCTION_BEGIN(__kernel_clock_gettime)
- .cfi_startproc
- /* Check for supported clock IDs */
- cmpli cr0,r3,CLOCK_REALTIME
- cmpli cr1,r3,CLOCK_MONOTONIC
- cror cr0*4+eq,cr0*4+eq,cr1*4+eq
-
- cmpli cr5,r3,CLOCK_REALTIME_COARSE
- cmpli cr6,r3,CLOCK_MONOTONIC_COARSE
- cror cr5*4+eq,cr5*4+eq,cr6*4+eq
-
- cror cr0*4+eq,cr0*4+eq,cr5*4+eq
- bne cr0, .Lgettime_fallback
-
- mflr r12 /* r12 saves lr */
- .cfi_register lr,r12
- mr r11,r4 /* r11 saves tp */
- get_datapage r9, r0
- LOAD_REG_IMMEDIATE(r7, NSEC_PER_SEC) /* load up NSEC_PER_SEC */
- beq cr5, .Lcoarse_clocks
-.Lprecise_clocks:
- bl __do_get_tspec@local /* get sec/nsec from tb & kernel */
- bne cr1, .Lfinish /* not monotonic -> all done */
-
- /*
- * CLOCK_MONOTONIC
- */
-
- /* now we must fixup using wall to monotonic. We need to snapshot
- * that value and do the counter trick again. Fortunately, we still
- * have the counter value in r8 that was returned by __do_get_xsec.
- * At this point, r3,r4 contain our sec/nsec values, r5 and r6
- * can be used, r7 contains NSEC_PER_SEC.
- */
-
- lwz r5,(WTOM_CLOCK_SEC+LOPART)(r9)
- lwz r6,WTOM_CLOCK_NSEC(r9)
-
- /* We now have our offset in r5,r6. We create a fake dependency
- * on that value and re-check the counter
- */
- or r0,r6,r5
- xor r0,r0,r0
- add r9,r9,r0
- lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
- cmpl cr0,r8,r0 /* check if updated */
- bne- .Lprecise_clocks
- b .Lfinish_monotonic
-
- /*
- * For coarse clocks we get data directly from the vdso data page, so
- * we don't need to call __do_get_tspec, but we still need to do the
- * counter trick.
- */
-.Lcoarse_clocks:
- lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
- andi. r0,r8,1 /* pending update ? loop */
- bne- .Lcoarse_clocks
- add r9,r9,r0 /* r0 is already 0 */
-
- /*
- * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE
- * too
- */
- lwz r3,STAMP_XTIME_SEC+LOPART(r9)
- lwz r4,STAMP_XTIME_NSEC+LOPART(r9)
- bne cr6,1f
-
- /* CLOCK_MONOTONIC_COARSE */
- lwz r5,(WTOM_CLOCK_SEC+LOPART)(r9)
- lwz r6,WTOM_CLOCK_NSEC(r9)
-
- /* check if counter has updated */
- or r0,r6,r5
-1: or r0,r0,r3
- or r0,r0,r4
- xor r0,r0,r0
- add r3,r3,r0
- lwz r0,CFG_TB_UPDATE_COUNT+LOPART(r9)
- cmpl cr0,r0,r8 /* check if updated */
- bne- .Lcoarse_clocks
-
- /* Counter has not updated, so continue calculating proper values for
- * sec and nsec if monotonic coarse, or just return with the proper
- * values for realtime.
- */
- bne cr6, .Lfinish
-
- /* Calculate and store result. Note that this mimics the C code,
- * which may cause funny results if nsec goes negative... is that
- * possible at all ?
- */
-.Lfinish_monotonic:
- add r3,r3,r5
- add r4,r4,r6
- cmpw cr0,r4,r7
- cmpwi cr1,r4,0
- blt 1f
- subf r4,r7,r4
- addi r3,r3,1
-1: bge cr1, .Lfinish
- addi r3,r3,-1
- add r4,r4,r7
-
-.Lfinish:
- stw r3,TSPC32_TV_SEC(r11)
- stw r4,TSPC32_TV_NSEC(r11)
-
- mtlr r12
- crclr cr0*4+so
- li r3,0
- blr
-
- /*
- * syscall fallback
- */
-.Lgettime_fallback:
- li r0,__NR_clock_gettime
- .cfi_restore lr
- sc
- blr
- .cfi_endproc
+ cvdso_call __c_kernel_clock_gettime
V_FUNCTION_END(__kernel_clock_gettime)
@@ -194,37 +43,7 @@ V_FUNCTION_END(__kernel_clock_gettime)
*
*/
V_FUNCTION_BEGIN(__kernel_clock_getres)
- .cfi_startproc
- /* Check for supported clock IDs */
- cmplwi cr0, r3, CLOCK_MAX
- cmpwi cr1, r3, CLOCK_REALTIME_COARSE
- cmpwi cr7, r3, CLOCK_MONOTONIC_COARSE
- bgt cr0, 99f
- LOAD_REG_IMMEDIATE(r5, KTIME_LOW_RES)
- beq cr1, 1f
- beq cr7, 1f
-
- mflr r12
- .cfi_register lr,r12
- get_datapage r3, r0
- lwz r5, CLOCK_HRTIMER_RES(r3)
- mtlr r12
-1: li r3,0
- cmpli cr0,r4,0
- crclr cr0*4+so
- beqlr
- stw r3,TSPC32_TV_SEC(r4)
- stw r5,TSPC32_TV_NSEC(r4)
- blr
-
- /*
- * syscall fallback
- */
-99:
- li r0,__NR_clock_getres
- sc
- blr
- .cfi_endproc
+ cvdso_call __c_kernel_clock_getres
V_FUNCTION_END(__kernel_clock_getres)
@@ -235,105 +54,5 @@ V_FUNCTION_END(__kernel_clock_getres)
*
*/
V_FUNCTION_BEGIN(__kernel_time)
- .cfi_startproc
- mflr r12
- .cfi_register lr,r12
-
- mr r11,r3 /* r11 holds t */
- get_datapage r9, r0
-
- lwz r3,STAMP_XTIME_SEC+LOPART(r9)
-
- cmplwi r11,0 /* check if t is NULL */
- mtlr r12
- crclr cr0*4+so
- beqlr
- stw r3,0(r11) /* store result at *t */
- blr
- .cfi_endproc
+ cvdso_call_time __c_kernel_time
V_FUNCTION_END(__kernel_time)
-
-/*
- * This is the core of clock_gettime() and gettimeofday(),
- * it returns the current time in r3 (seconds) and r4.
- * On entry, r7 gives the resolution of r4, either USEC_PER_SEC
- * or NSEC_PER_SEC, giving r4 in microseconds or nanoseconds.
- * It expects the datapage ptr in r9 and doesn't clobber it.
- * It clobbers r0, r5 and r6.
- * On return, r8 contains the counter value that can be reused.
- * This clobbers cr0 but not any other cr field.
- */
-__do_get_tspec:
- .cfi_startproc
- /* Check for update count & load values. We use the low
- * order 32 bits of the update count
- */
-1: lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
- andi. r0,r8,1 /* pending update ? loop */
- bne- 1b
- xor r0,r8,r8 /* create dependency */
- add r9,r9,r0
-
- /* Load orig stamp (offset to TB) */
- lwz r5,CFG_TB_ORIG_STAMP(r9)
- lwz r6,(CFG_TB_ORIG_STAMP+4)(r9)
-
- /* Get a stable TB value */
-2: MFTBU(r3)
- MFTBL(r4)
- MFTBU(r0)
- cmplw cr0,r3,r0
- bne- 2b
-
- /* Subtract tb orig stamp and shift left 12 bits.
- */
- subfc r4,r6,r4
- subfe r0,r5,r3
- slwi r0,r0,12
- rlwimi. r0,r4,12,20,31
- slwi r4,r4,12
-
- /*
- * Load scale factor & do multiplication.
- * We only use the high 32 bits of the tb_to_xs value.
- * Even with a 1GHz timebase clock, the high 32 bits of
- * tb_to_xs will be at least 4 million, so the error from
- * ignoring the low 32 bits will be no more than 0.25ppm.
- * The error will just make the clock run very very slightly
- * slow until the next time the kernel updates the VDSO data,
- * at which point the clock will catch up to the kernel's value,
- * so there is no long-term error accumulation.
- */
- lwz r5,CFG_TB_TO_XS(r9) /* load values */
- mulhwu r4,r4,r5
- li r3,0
-
- beq+ 4f /* skip high part computation if 0 */
- mulhwu r3,r0,r5
- mullw r5,r0,r5
- addc r4,r4,r5
- addze r3,r3
-4:
- /* At this point, we have seconds since the xtime stamp
- * as a 32.32 fixed-point number in r3 and r4.
- * Load & add the xtime stamp.
- */
- lwz r5,STAMP_XTIME_SEC+LOPART(r9)
- lwz r6,STAMP_SEC_FRAC(r9)
- addc r4,r4,r6
- adde r3,r3,r5
-
- /* We create a fake dependency on the result in r3/r4
- * and re-check the counter
- */
- or r6,r4,r3
- xor r0,r6,r6
- add r9,r9,r0
- lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
- cmplw cr0,r8,r0 /* check if updated */
- bne- 1b
-
- mulhwu r4,r4,r7 /* convert to micro or nanoseconds */
-
- blr
- .cfi_endproc
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
index 38c317f25141..7890d889f9c5 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -1,8 +1,20 @@
# SPDX-License-Identifier: GPL-2.0
# List of files in the vdso, has to be asm only for now
+ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN
+include $(srctree)/lib/vdso/Makefile
+
obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o
+ifneq ($(c-gettimeofday-y),)
+ CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
+ CFLAGS_vgettimeofday.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+ CFLAGS_vgettimeofday.o += $(call cc-option, -fno-stack-protector)
+ CFLAGS_vgettimeofday.o += -DDISABLE_BRANCH_PROFILING
+ CFLAGS_vgettimeofday.o += -ffreestanding -fasynchronous-unwind-tables
+ CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE)
+endif
+
# Build rules
targets := $(obj-vdso64) vdso64.so vdso64.so.dbg
@@ -11,6 +23,7 @@ obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
GCOV_PROFILE := n
KCOV_INSTRUMENT := n
UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
-Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both
@@ -20,12 +33,14 @@ obj-y += vdso64_wrapper.o
extra-y += vdso64.lds
CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
+$(obj)/vgettimeofday.o: %.o: %.c FORCE
+
# Force dependency (incbin is bad)
$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
# link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE
- $(call if_changed,vdso64ld)
+$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday.o FORCE
+ $(call if_changed,vdso64ld_and_check)
# strip rule for the .so file
$(obj)/%.so: OBJCOPYFLAGS := -S
@@ -33,8 +48,8 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
# actual build commands
-quiet_cmd_vdso64ld = VDSO64L $@
- cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) $(call cc-ldoption, -Wl$(comma)--orphan-handling=warn)
+quiet_cmd_vdso64ld_and_check = VDSO64L $@
+ cmd_vdso64ld_and_check = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) $(call cc-ldoption, -Wl$(comma)--orphan-handling=warn); $(cmd_vdso_check)
# install commands for the unstripped file
quiet_cmd_vdso_install = INSTALL $@
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index 20f8be40c653..d7a7bfb51081 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -12,6 +12,7 @@
#include <asm/vdso_datapage.h>
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
+#include <asm/vdso/gettimeofday.h>
.text
/*
@@ -21,31 +22,7 @@
*
*/
V_FUNCTION_BEGIN(__kernel_gettimeofday)
- .cfi_startproc
- mflr r12
- .cfi_register lr,r12
-
- mr r11,r3 /* r11 holds tv */
- mr r10,r4 /* r10 holds tz */
- get_datapage r3, r0
- cmpldi r11,0 /* check if tv is NULL */
- beq 2f
- lis r7,1000000@ha /* load up USEC_PER_SEC */
- addi r7,r7,1000000@l
- bl V_LOCAL_FUNC(__do_get_tspec) /* get sec/us from tb & kernel */
- std r4,TVAL64_TV_SEC(r11) /* store sec in tv */
- std r5,TVAL64_TV_USEC(r11) /* store usec in tv */
-2: cmpldi r10,0 /* check if tz is NULL */
- beq 1f
- lwz r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */
- lwz r5,CFG_TZ_DSTTIME(r3)
- stw r4,TZONE_TZ_MINWEST(r10)
- stw r5,TZONE_TZ_DSTTIME(r10)
-1: mtlr r12
- crclr cr0*4+so
- li r3,0 /* always success */
- blr
- .cfi_endproc
+ cvdso_call __c_kernel_gettimeofday
V_FUNCTION_END(__kernel_gettimeofday)
@@ -56,120 +33,7 @@ V_FUNCTION_END(__kernel_gettimeofday)
*
*/
V_FUNCTION_BEGIN(__kernel_clock_gettime)
- .cfi_startproc
- /* Check for supported clock IDs */
- cmpwi cr0,r3,CLOCK_REALTIME
- cmpwi cr1,r3,CLOCK_MONOTONIC
- cror cr0*4+eq,cr0*4+eq,cr1*4+eq
-
- cmpwi cr5,r3,CLOCK_REALTIME_COARSE
- cmpwi cr6,r3,CLOCK_MONOTONIC_COARSE
- cror cr5*4+eq,cr5*4+eq,cr6*4+eq
-
- cror cr0*4+eq,cr0*4+eq,cr5*4+eq
- bne cr0,99f
-
- mflr r12 /* r12 saves lr */
- .cfi_register lr,r12
- mr r11,r4 /* r11 saves tp */
- get_datapage r3, r0
- lis r7,NSEC_PER_SEC@h /* want nanoseconds */
- ori r7,r7,NSEC_PER_SEC@l
- beq cr5,70f
-50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */
- bne cr1,80f /* if not monotonic, all done */
-
- /*
- * CLOCK_MONOTONIC
- */
-
- /* now we must fixup using wall to monotonic. We need to snapshot
- * that value and do the counter trick again. Fortunately, we still
- * have the counter value in r8 that was returned by __do_get_tspec.
- * At this point, r4,r5 contain our sec/nsec values.
- */
-
- ld r6,WTOM_CLOCK_SEC(r3)
- lwa r9,WTOM_CLOCK_NSEC(r3)
-
- /* We now have our result in r6,r9. We create a fake dependency
- * on that result and re-check the counter
- */
- or r0,r6,r9
- xor r0,r0,r0
- add r3,r3,r0
- ld r0,CFG_TB_UPDATE_COUNT(r3)
- cmpld cr0,r0,r8 /* check if updated */
- bne- 50b
- b 78f
-
- /*
- * For coarse clocks we get data directly from the vdso data page, so
- * we don't need to call __do_get_tspec, but we still need to do the
- * counter trick.
- */
-70: ld r8,CFG_TB_UPDATE_COUNT(r3)
- andi. r0,r8,1 /* pending update ? loop */
- bne- 70b
- add r3,r3,r0 /* r0 is already 0 */
-
- /*
- * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE
- * too
- */
- ld r4,STAMP_XTIME_SEC(r3)
- ld r5,STAMP_XTIME_NSEC(r3)
- bne cr6,75f
-
- /* CLOCK_MONOTONIC_COARSE */
- ld r6,WTOM_CLOCK_SEC(r3)
- lwa r9,WTOM_CLOCK_NSEC(r3)
-
- /* check if counter has updated */
- or r0,r6,r9
-75: or r0,r0,r4
- or r0,r0,r5
- xor r0,r0,r0
- add r3,r3,r0
- ld r0,CFG_TB_UPDATE_COUNT(r3)
- cmpld cr0,r0,r8 /* check if updated */
- bne- 70b
-
- /* Counter has not updated, so continue calculating proper values for
- * sec and nsec if monotonic coarse, or just return with the proper
- * values for realtime.
- */
- bne cr6,80f
-
- /* Add wall->monotonic offset and check for overflow or underflow */
-78: add r4,r4,r6
- add r5,r5,r9
- cmpd cr0,r5,r7
- cmpdi cr1,r5,0
- blt 79f
- subf r5,r7,r5
- addi r4,r4,1
-79: bge cr1,80f
- addi r4,r4,-1
- add r5,r5,r7
-
-80: std r4,TSPC64_TV_SEC(r11)
- std r5,TSPC64_TV_NSEC(r11)
-
- mtlr r12
- crclr cr0*4+so
- li r3,0
- blr
-
- /*
- * syscall fallback
- */
-99:
- li r0,__NR_clock_gettime
- .cfi_restore lr
- sc
- blr
- .cfi_endproc
+ cvdso_call __c_kernel_clock_gettime
V_FUNCTION_END(__kernel_clock_gettime)
@@ -180,34 +44,7 @@ V_FUNCTION_END(__kernel_clock_gettime)
*
*/
V_FUNCTION_BEGIN(__kernel_clock_getres)
- .cfi_startproc
- /* Check for supported clock IDs */
- cmpwi cr0,r3,CLOCK_REALTIME
- cmpwi cr1,r3,CLOCK_MONOTONIC
- cror cr0*4+eq,cr0*4+eq,cr1*4+eq
- bne cr0,99f
-
- mflr r12
- .cfi_register lr,r12
- get_datapage r3, r0
- lwz r5, CLOCK_HRTIMER_RES(r3)
- mtlr r12
- li r3,0
- cmpldi cr0,r4,0
- crclr cr0*4+so
- beqlr
- std r3,TSPC64_TV_SEC(r4)
- std r5,TSPC64_TV_NSEC(r4)
- blr
-
- /*
- * syscall fallback
- */
-99:
- li r0,__NR_clock_getres
- sc
- blr
- .cfi_endproc
+ cvdso_call __c_kernel_clock_getres
V_FUNCTION_END(__kernel_clock_getres)
/*
@@ -217,74 +54,5 @@ V_FUNCTION_END(__kernel_clock_getres)
*
*/
V_FUNCTION_BEGIN(__kernel_time)
- .cfi_startproc
- mflr r12
- .cfi_register lr,r12
-
- mr r11,r3 /* r11 holds t */
- get_datapage r3, r0
-
- ld r4,STAMP_XTIME_SEC(r3)
-
- cmpldi r11,0 /* check if t is NULL */
- beq 2f
- std r4,0(r11) /* store result at *t */
-2: mtlr r12
- crclr cr0*4+so
- mr r3,r4
- blr
- .cfi_endproc
+ cvdso_call_time __c_kernel_time
V_FUNCTION_END(__kernel_time)
-
-
-/*
- * This is the core of clock_gettime() and gettimeofday(),
- * it returns the current time in r4 (seconds) and r5.
- * On entry, r7 gives the resolution of r5, either USEC_PER_SEC
- * or NSEC_PER_SEC, giving r5 in microseconds or nanoseconds.
- * It expects the datapage ptr in r3 and doesn't clobber it.
- * It clobbers r0, r6 and r9.
- * On return, r8 contains the counter value that can be reused.
- * This clobbers cr0 but not any other cr field.
- */
-V_FUNCTION_BEGIN(__do_get_tspec)
- .cfi_startproc
- /* check for update count & load values */
-1: ld r8,CFG_TB_UPDATE_COUNT(r3)
- andi. r0,r8,1 /* pending update ? loop */
- bne- 1b
- xor r0,r8,r8 /* create dependency */
- add r3,r3,r0
-
- /* Get TB & offset it. We use the MFTB macro which will generate
- * workaround code for Cell.
- */
- MFTB(r6)
- ld r9,CFG_TB_ORIG_STAMP(r3)
- subf r6,r9,r6
-
- /* Scale result */
- ld r5,CFG_TB_TO_XS(r3)
- sldi r6,r6,12 /* compute time since stamp_xtime */
- mulhdu r6,r6,r5 /* in units of 2^-32 seconds */
-
- /* Add stamp since epoch */
- ld r4,STAMP_XTIME_SEC(r3)
- lwz r5,STAMP_SEC_FRAC(r3)
- or r0,r4,r5
- or r0,r0,r6
- xor r0,r0,r0
- add r3,r3,r0
- ld r0,CFG_TB_UPDATE_COUNT(r3)
- cmpld r0,r8 /* check if updated */
- bne- 1b /* reload if so */
-
- /* convert to seconds & nanoseconds and add to stamp */
- add r6,r6,r5 /* add on fractional seconds of xtime */
- mulhwu r5,r6,r7 /* compute micro or nanoseconds and */
- srdi r6,r6,32 /* seconds since stamp_xtime */
- clrldi r5,r5,32
- add r4,r4,r6
- blr
- .cfi_endproc
-V_FUNCTION_END(__do_get_tspec)
--
2.25.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v10 4/5] powerpc/vdso: Switch VDSO to generic C implementation.
2020-08-05 7:09 ` [PATCH v10 4/5] powerpc/vdso: Switch VDSO to generic C implementation Christophe Leroy
@ 2020-08-05 7:09 ` Christophe Leroy
0 siblings, 0 replies; 18+ messages in thread
From: Christophe Leroy @ 2020-08-05 7:09 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, nathanl, anton
Cc: linux-kernel, linuxppc-dev, arnd, tglx, vincenzo.frascino, luto,
linux-arch
For VDSO32 on PPC64, we create a fake 32 bits config, on the same
principle as MIPS architecture, in order to get the correct parts of
the different asm header files.
With the C VDSO, the performance is slightly lower, but it is worth
it as it will ease maintenance and evolution, and also brings clocks
that are not supported with the ASM VDSO.
On an 8xx at 132 MHz, vdsotest with the ASM VDSO:
gettimeofday: vdso: 828 nsec/call
clock-getres-realtime-coarse: vdso: 391 nsec/call
clock-gettime-realtime-coarse: vdso: 614 nsec/call
clock-getres-realtime: vdso: 460 nsec/call
clock-gettime-realtime: vdso: 876 nsec/call
clock-getres-monotonic-coarse: vdso: 399 nsec/call
clock-gettime-monotonic-coarse: vdso: 691 nsec/call
clock-getres-monotonic: vdso: 460 nsec/call
clock-gettime-monotonic: vdso: 1026 nsec/call
On an 8xx at 132 MHz, vdsotest with the C VDSO:
gettimeofday: vdso: 955 nsec/call
clock-getres-realtime-coarse: vdso: 545 nsec/call
clock-gettime-realtime-coarse: vdso: 592 nsec/call
clock-getres-realtime: vdso: 545 nsec/call
clock-gettime-realtime: vdso: 941 nsec/call
clock-getres-monotonic-coarse: vdso: 545 nsec/call
clock-gettime-monotonic-coarse: vdso: 591 nsec/call
clock-getres-monotonic: vdso: 545 nsec/call
clock-gettime-monotonic: vdso: 940 nsec/call
It is even better for gettime with monotonic clocks.
Unsupported clocks with ASM VDSO:
clock-gettime-boottime: vdso: 3851 nsec/call
clock-gettime-tai: vdso: 3852 nsec/call
clock-gettime-monotonic-raw: vdso: 3396 nsec/call
Same clocks with C VDSO:
clock-gettime-tai: vdso: 941 nsec/call
clock-gettime-monotonic-raw: vdso: 1001 nsec/call
clock-gettime-monotonic-coarse: vdso: 591 nsec/call
On an 8321E at 333 MHz, vdsotest with the ASM VDSO:
gettimeofday: vdso: 220 nsec/call
clock-getres-realtime-coarse: vdso: 102 nsec/call
clock-gettime-realtime-coarse: vdso: 178 nsec/call
clock-getres-realtime: vdso: 129 nsec/call
clock-gettime-realtime: vdso: 235 nsec/call
clock-getres-monotonic-coarse: vdso: 105 nsec/call
clock-gettime-monotonic-coarse: vdso: 208 nsec/call
clock-getres-monotonic: vdso: 129 nsec/call
clock-gettime-monotonic: vdso: 274 nsec/call
On an 8321E at 333 MHz, vdsotest with the C VDSO:
gettimeofday: vdso: 272 nsec/call
clock-getres-realtime-coarse: vdso: 160 nsec/call
clock-gettime-realtime-coarse: vdso: 184 nsec/call
clock-getres-realtime: vdso: 166 nsec/call
clock-gettime-realtime: vdso: 281 nsec/call
clock-getres-monotonic-coarse: vdso: 160 nsec/call
clock-gettime-monotonic-coarse: vdso: 184 nsec/call
clock-getres-monotonic: vdso: 169 nsec/call
clock-gettime-monotonic: vdso: 275 nsec/call
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
v9:
- Rebased (Impact on arch/powerpc/kernel/vdso??/Makefile
v7:
- Split out preparatory changes in a new preceding patch
- Added -fasynchronous-unwind-tables to CC flags.
v6:
- Added missing prototypes in asm/vdso/gettimeofday.h for __c_kernel_ functions.
- Using STACK_FRAME_OVERHEAD instead of INT_FRAME_SIZE
- Rebased on powerpc/merge as of 7 Apr 2020
- Fixed build failure with gcc 9
- Added a patch to create asm/vdso/processor.h and more cpu_relax() in it
---
arch/powerpc/Kconfig | 2 +
arch/powerpc/include/asm/vdso/vsyscall.h | 25 ++
arch/powerpc/include/asm/vdso_datapage.h | 40 +--
arch/powerpc/kernel/asm-offsets.c | 49 +---
arch/powerpc/kernel/time.c | 91 +------
arch/powerpc/kernel/vdso.c | 5 +-
arch/powerpc/kernel/vdso32/Makefile | 32 ++-
arch/powerpc/kernel/vdso32/config-fake32.h | 34 +++
arch/powerpc/kernel/vdso32/gettimeofday.S | 291 +--------------------
arch/powerpc/kernel/vdso64/Makefile | 23 +-
arch/powerpc/kernel/vdso64/gettimeofday.S | 242 +----------------
11 files changed, 143 insertions(+), 691 deletions(-)
create mode 100644 arch/powerpc/include/asm/vdso/vsyscall.h
create mode 100644 arch/powerpc/kernel/vdso32/config-fake32.h
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8c7656cc10eb..9977ab939b42 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -171,6 +171,7 @@ config PPC
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
select GENERIC_TIME_VSYSCALL
+ select GENERIC_GETTIMEOFDAY
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU
select HAVE_ARCH_JUMP_LABEL
@@ -202,6 +203,7 @@ config PPC
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200 # plugin support on gcc <= 5.1 is buggy on PPC
+ select HAVE_GENERIC_VDSO
select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
select HAVE_IDE
select HAVE_IOREMAP_PROT
diff --git a/arch/powerpc/include/asm/vdso/vsyscall.h b/arch/powerpc/include/asm/vdso/vsyscall.h
new file mode 100644
index 000000000000..c56a030c0623
--- /dev/null
+++ b/arch/powerpc/include/asm/vdso/vsyscall.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_VSYSCALL_H
+#define __ASM_VDSO_VSYSCALL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/timekeeper_internal.h>
+#include <asm/vdso_datapage.h>
+
+/*
+ * Update the vDSO data page to keep in sync with kernel timekeeping.
+ */
+static __always_inline
+struct vdso_data *__arch_get_k_vdso_data(void)
+{
+ return vdso_data->data;
+}
+#define __arch_get_k_vdso_data __arch_get_k_vdso_data
+
+/* The asm-generic header needs to be included after the definitions above */
+#include <asm-generic/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h
index b9ef6cf50ea5..c4d320504d26 100644
--- a/arch/powerpc/include/asm/vdso_datapage.h
+++ b/arch/powerpc/include/asm/vdso_datapage.h
@@ -36,6 +36,7 @@
#include <linux/unistd.h>
#include <linux/time.h>
+#include <vdso/datapage.h>
#define SYSCALL_MAP_SIZE ((NR_syscalls + 31) / 32)
@@ -45,7 +46,7 @@
#ifdef CONFIG_PPC64
-struct vdso_data {
+struct vdso_arch_data {
__u8 eye_catcher[16]; /* Eyecatcher: SYSTEMCFG:PPC64 0x00 */
struct { /* Systemcfg version numbers */
__u32 major; /* Major number 0x10 */
@@ -59,13 +60,13 @@ struct vdso_data {
__u32 processor; /* Processor type 0x1C */
__u64 processorCount; /* # of physical processors 0x20 */
__u64 physicalMemorySize; /* Size of real memory(B) 0x28 */
- __u64 tb_orig_stamp; /* Timebase at boot 0x30 */
+ __u64 tb_orig_stamp; /* (NU) Timebase at boot 0x30 */
__u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */
- __u64 tb_to_xs; /* Inverse of TB to 2^20 0x40 */
- __u64 stamp_xsec; /* 0x48 */
- __u64 tb_update_count; /* Timebase atomicity ctr 0x50 */
- __u32 tz_minuteswest; /* Minutes west of Greenwich 0x58 */
- __u32 tz_dsttime; /* Type of dst correction 0x5C */
+ __u64 tb_to_xs; /* (NU) Inverse of TB to 2^20 0x40 */
+ __u64 stamp_xsec; /* (NU) 0x48 */
+ __u64 tb_update_count; /* (NU) Timebase atomicity ctr 0x50 */
+ __u32 tz_minuteswest; /* (NU) Min. west of Greenwich 0x58 */
+ __u32 tz_dsttime; /* (NU) Type of dst correction 0x5C */
__u32 dcache_size; /* L1 d-cache size 0x60 */
__u32 dcache_line_size; /* L1 d-cache line size 0x64 */
__u32 icache_size; /* L1 i-cache size 0x68 */
@@ -78,14 +79,10 @@ struct vdso_data {
__u32 icache_block_size; /* L1 i-cache block size */
__u32 dcache_log_block_size; /* L1 d-cache log block size */
__u32 icache_log_block_size; /* L1 i-cache log block size */
- __u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */
- __s32 wtom_clock_nsec; /* Wall to monotonic clock nsec */
- __s64 wtom_clock_sec; /* Wall to monotonic clock sec */
- __s64 stamp_xtime_sec; /* xtime secs as at tb_orig_stamp */
- __s64 stamp_xtime_nsec; /* xtime nsecs as at tb_orig_stamp */
- __u32 hrtimer_res; /* hrtimer resolution */
__u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls */
__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
+
+ struct vdso_data data[CS_BASES];
};
#else /* CONFIG_PPC64 */
@@ -93,26 +90,15 @@ struct vdso_data {
/*
* And here is the simpler 32 bits version
*/
-struct vdso_data {
- __u64 tb_orig_stamp; /* Timebase at boot 0x30 */
+struct vdso_arch_data {
__u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */
- __u64 tb_to_xs; /* Inverse of TB to 2^20 0x40 */
- __u64 stamp_xsec; /* 0x48 */
- __u32 tb_update_count; /* Timebase atomicity ctr 0x50 */
- __u32 tz_minuteswest; /* Minutes west of Greenwich 0x58 */
- __u32 tz_dsttime; /* Type of dst correction 0x5C */
- __s32 wtom_clock_sec; /* Wall to monotonic clock */
- __s32 wtom_clock_nsec;
- __s32 stamp_xtime_sec; /* xtime seconds as at tb_orig_stamp */
- __s32 stamp_xtime_nsec; /* xtime nsecs as at tb_orig_stamp */
- __u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */
- __u32 hrtimer_res; /* hrtimer resolution */
__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
+ struct vdso_data data[CS_BASES];
};
#endif /* CONFIG_PPC64 */
-extern struct vdso_data *vdso_data;
+extern struct vdso_arch_data *vdso_data;
#else /* __ASSEMBLY__ */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 8711c2164b45..684260186dbf 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -397,47 +397,16 @@ int main(void)
#endif /* ! CONFIG_PPC64 */
/* datapage offsets for use by vdso */
- OFFSET(CFG_TB_ORIG_STAMP, vdso_data, tb_orig_stamp);
- OFFSET(CFG_TB_TICKS_PER_SEC, vdso_data, tb_ticks_per_sec);
- OFFSET(CFG_TB_TO_XS, vdso_data, tb_to_xs);
- OFFSET(CFG_TB_UPDATE_COUNT, vdso_data, tb_update_count);
- OFFSET(CFG_TZ_MINUTEWEST, vdso_data, tz_minuteswest);
- OFFSET(CFG_TZ_DSTTIME, vdso_data, tz_dsttime);
- OFFSET(CFG_SYSCALL_MAP32, vdso_data, syscall_map_32);
- OFFSET(WTOM_CLOCK_SEC, vdso_data, wtom_clock_sec);
- OFFSET(WTOM_CLOCK_NSEC, vdso_data, wtom_clock_nsec);
- OFFSET(STAMP_XTIME_SEC, vdso_data, stamp_xtime_sec);
- OFFSET(STAMP_XTIME_NSEC, vdso_data, stamp_xtime_nsec);
- OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction);
- OFFSET(CLOCK_HRTIMER_RES, vdso_data, hrtimer_res);
+ OFFSET(VDSO_DATA_OFFSET, vdso_arch_data, data);
+ OFFSET(CFG_TB_TICKS_PER_SEC, vdso_arch_data, tb_ticks_per_sec);
+ OFFSET(CFG_SYSCALL_MAP32, vdso_arch_data, syscall_map_32);
#ifdef CONFIG_PPC64
- OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size);
- OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size);
- OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size);
- OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_data, dcache_log_block_size);
- OFFSET(CFG_SYSCALL_MAP64, vdso_data, syscall_map_64);
- OFFSET(TVAL64_TV_SEC, __kernel_old_timeval, tv_sec);
- OFFSET(TVAL64_TV_USEC, __kernel_old_timeval, tv_usec);
-#endif
- OFFSET(TSPC64_TV_SEC, __kernel_timespec, tv_sec);
- OFFSET(TSPC64_TV_NSEC, __kernel_timespec, tv_nsec);
- OFFSET(TVAL32_TV_SEC, old_timeval32, tv_sec);
- OFFSET(TVAL32_TV_USEC, old_timeval32, tv_usec);
- OFFSET(TSPC32_TV_SEC, old_timespec32, tv_sec);
- OFFSET(TSPC32_TV_NSEC, old_timespec32, tv_nsec);
- /* timeval/timezone offsets for use by vdso */
- OFFSET(TZONE_TZ_MINWEST, timezone, tz_minuteswest);
- OFFSET(TZONE_TZ_DSTTIME, timezone, tz_dsttime);
-
- /* Other bits used by the vdso */
- DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
- DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
- DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
- DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
- DEFINE(CLOCK_MAX, CLOCK_TAI);
- DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
- DEFINE(EINVAL, EINVAL);
- DEFINE(KTIME_LOW_RES, KTIME_LOW_RES);
+ OFFSET(CFG_ICACHE_BLOCKSZ, vdso_arch_data, icache_block_size);
+ OFFSET(CFG_DCACHE_BLOCKSZ, vdso_arch_data, dcache_block_size);
+ OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_arch_data, icache_log_block_size);
+ OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_arch_data, dcache_log_block_size);
+ OFFSET(CFG_SYSCALL_MAP64, vdso_arch_data, syscall_map_64);
+#endif
#ifdef CONFIG_BUG
DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 6fcae436ae51..b63b1f97a1b3 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -91,6 +91,7 @@ static struct clocksource clocksource_timebase = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
.mask = CLOCKSOURCE_MASK(64),
.read = timebase_read,
+ .vdso_clock_mode = VDSO_CLOCKMODE_ARCHTIMER,
};
#define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF
@@ -855,95 +856,6 @@ static notrace u64 timebase_read(struct clocksource *cs)
return (u64)get_tb();
}
-
-void update_vsyscall(struct timekeeper *tk)
-{
- struct timespec64 xt;
- struct clocksource *clock = tk->tkr_mono.clock;
- u32 mult = tk->tkr_mono.mult;
- u32 shift = tk->tkr_mono.shift;
- u64 cycle_last = tk->tkr_mono.cycle_last;
- u64 new_tb_to_xs, new_stamp_xsec;
- u64 frac_sec;
-
- if (clock != &clocksource_timebase)
- return;
-
- xt.tv_sec = tk->xtime_sec;
- xt.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
-
- /* Make userspace gettimeofday spin until we're done. */
- ++vdso_data->tb_update_count;
- smp_mb();
-
- /*
- * This computes ((2^20 / 1e9) * mult) >> shift as a
- * 0.64 fixed-point fraction.
- * The computation in the else clause below won't overflow
- * (as long as the timebase frequency is >= 1.049 MHz)
- * but loses precision because we lose the low bits of the constant
- * in the shift. Note that 19342813113834067 ~= 2^(20+64) / 1e9.
- * For a shift of 24 the error is about 0.5e-9, or about 0.5ns
- * over a second. (Shift values are usually 22, 23 or 24.)
- * For high frequency clocks such as the 512MHz timebase clock
- * on POWER[6789], the mult value is small (e.g. 32768000)
- * and so we can shift the constant by 16 initially
- * (295147905179 ~= 2^(20+64-16) / 1e9) and then do the
- * remaining shifts after the multiplication, which gives a
- * more accurate result (e.g. with mult = 32768000, shift = 24,
- * the error is only about 1.2e-12, or 0.7ns over 10 minutes).
- */
- if (mult <= 62500000 && clock->shift >= 16)
- new_tb_to_xs = ((u64) mult * 295147905179ULL) >> (clock->shift - 16);
- else
- new_tb_to_xs = (u64) mult * (19342813113834067ULL >> clock->shift);
-
- /*
- * Compute the fractional second in units of 2^-32 seconds.
- * The fractional second is tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift
- * in nanoseconds, so multiplying that by 2^32 / 1e9 gives
- * it in units of 2^-32 seconds.
- * We assume shift <= 32 because clocks_calc_mult_shift()
- * generates shift values in the range 0 - 32.
- */
- frac_sec = tk->tkr_mono.xtime_nsec << (32 - shift);
- do_div(frac_sec, NSEC_PER_SEC);
-
- /*
- * Work out new stamp_xsec value for any legacy users of systemcfg.
- * stamp_xsec is in units of 2^-20 seconds.
- */
- new_stamp_xsec = frac_sec >> 12;
- new_stamp_xsec += tk->xtime_sec * XSEC_PER_SEC;
-
- /*
- * tb_update_count is used to allow the userspace gettimeofday code
- * to assure itself that it sees a consistent view of the tb_to_xs and
- * stamp_xsec variables. It reads the tb_update_count, then reads
- * tb_to_xs and stamp_xsec and then reads tb_update_count again. If
- * the two values of tb_update_count match and are even then the
- * tb_to_xs and stamp_xsec values are consistent. If not, then it
- * loops back and reads them again until this criteria is met.
- */
- vdso_data->tb_orig_stamp = cycle_last;
- vdso_data->stamp_xsec = new_stamp_xsec;
- vdso_data->tb_to_xs = new_tb_to_xs;
- vdso_data->wtom_clock_sec = tk->wall_to_monotonic.tv_sec;
- vdso_data->wtom_clock_nsec = tk->wall_to_monotonic.tv_nsec;
- vdso_data->stamp_xtime_sec = xt.tv_sec;
- vdso_data->stamp_xtime_nsec = xt.tv_nsec;
- vdso_data->stamp_sec_fraction = frac_sec;
- vdso_data->hrtimer_res = hrtimer_resolution;
- smp_wmb();
- ++(vdso_data->tb_update_count);
-}
-
-void update_vsyscall_tz(void)
-{
- vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
- vdso_data->tz_dsttime = sys_tz.tz_dsttime;
-}
-
static void __init clocksource_init(void)
{
struct clocksource *clock;
@@ -1113,7 +1025,6 @@ void __init time_init(void)
sys_tz.tz_dsttime = 0;
}
- vdso_data->tb_update_count = 0;
vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
/* initialise and enable the large decrementer (if we have one) */
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 8dad44262e75..23208a051af5 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -17,6 +17,7 @@
#include <linux/elf.h>
#include <linux/security.h>
#include <linux/memblock.h>
+#include <vdso/datapage.h>
#include <asm/processor.h>
#include <asm/mmu.h>
@@ -70,10 +71,10 @@ static int vdso_ready;
* with it, it will become dynamically allocated
*/
static union {
- struct vdso_data data;
+ struct vdso_arch_data data;
u8 page[PAGE_SIZE];
} vdso_data_store __page_aligned_data;
-struct vdso_data *vdso_data = &vdso_data_store.data;
+struct vdso_arch_data *vdso_data = &vdso_data_store.data;
/* Format of the patch table */
struct vdso_patch_def
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index 87ab1152d5ce..b922044236dd 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -2,7 +2,23 @@
# List of files in the vdso, has to be asm only for now
-obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o
+ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN
+include $(srctree)/lib/vdso/Makefile
+
+obj-vdso32 = sigtramp.o datapage.o cacheflush.o note.o getcpu.o $(obj-vdso32-y)
+obj-vdso32 += gettimeofday.o
+
+ifneq ($(c-gettimeofday-y),)
+ ifdef CONFIG_PPC64
+ CFLAGS_vgettimeofday.o += -include $(srctree)/$(src)/config-fake32.h
+ endif
+ CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
+ CFLAGS_vgettimeofday.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+ CFLAGS_vgettimeofday.o += $(call cc-option, -fno-stack-protector)
+ CFLAGS_vgettimeofday.o += -DDISABLE_BRANCH_PROFILING
+ CFLAGS_vgettimeofday.o += -ffreestanding -fasynchronous-unwind-tables
+ CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE)
+endif
# Build rules
@@ -15,6 +31,7 @@ endif
CC32FLAGS :=
ifdef CONFIG_PPC64
CC32FLAGS += -m32
+KBUILD_CFLAGS := $(filter-out -mcmodel=medium,$(KBUILD_CFLAGS))
endif
targets := $(obj-vdso32) vdso32.so vdso32.so.dbg
@@ -23,6 +40,7 @@ obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
GCOV_PROFILE := n
KCOV_INSTRUMENT := n
UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
-Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both
@@ -36,8 +54,8 @@ CPPFLAGS_vdso32.lds += -P -C -Upowerpc
$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
# link rule for the .so file, .lds has to be first
-$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
- $(call if_changed,vdso32ld)
+$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday.o FORCE
+ $(call if_changed,vdso32ld_and_check)
# strip rule for the .so file
$(obj)/%.so: OBJCOPYFLAGS := -S
@@ -47,12 +65,16 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
# assembly rules for the .S files
$(obj-vdso32): %.o: %.S FORCE
$(call if_changed_dep,vdso32as)
+$(obj)/vgettimeofday.o: %.o: %.c FORCE
+ $(call if_changed_dep,vdso32cc)
# actual build commands
-quiet_cmd_vdso32ld = VDSO32L $@
- cmd_vdso32ld = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ $(call cc-ldoption, -Wl$(comma)--orphan-handling=warn) -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
+quiet_cmd_vdso32ld_and_check = VDSO32L $@
+ cmd_vdso32ld_and_check = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ $(call cc-ldoption, -Wl$(comma)--orphan-handling=warn) -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) ; $(cmd_vdso_check)
quiet_cmd_vdso32as = VDSO32A $@
cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) -c -o $@ $<
+quiet_cmd_vdso32cc = VDSO32C $@
+ cmd_vdso32cc = $(VDSOCC) $(c_flags) $(CC32FLAGS) -c -o $@ $<
# install commands for the unstripped file
quiet_cmd_vdso_install = INSTALL $@
diff --git a/arch/powerpc/kernel/vdso32/config-fake32.h b/arch/powerpc/kernel/vdso32/config-fake32.h
new file mode 100644
index 000000000000..e16041fc15c9
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/config-fake32.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * In case of a 32 bit VDSO for a 64 bit kernel fake a 32 bit kernel
+ * configuration.
+ */
+
+#undef CONFIG_PPC64
+#undef CONFIG_64BIT
+#define CONFIG_PPC32
+#define CONFIG_32BIT 1
+#define CONFIG_GENERIC_ATOMIC64 1
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#undef CONFIG_PPC_BOOK3S_64
+#undef CONFIG_PPC_PSERIES
+#define CONFIG_PPC_BOOK3S_32
+#else
+#define CONFIG_PPC_MMU_NOHASH_32
+#define CONFIG_FSL_BOOKE
+#endif
+
+#define CONFIG_TASK_SIZE 0
+#undef CONFIG_MMIOWB
+#undef CONFIG_PPC_SPLPAR
+#undef CONFIG_SPARSEMEM
+#undef CONFIG_PGTABLE_LEVELS
+#define CONFIG_PGTABLE_LEVELS 2
+#undef CONFIG_TRANSPARENT_HUGEPAGE
+#undef CONFIG_SPARSEMEM_VMEMMAP
+#undef CONFIG_FLATMEM
+#define CONFIG_FLATMEM
+#undef CONFIG_PPC_INDIRECT_MMIO
+#undef CONFIG_PPC_INDIRECT_PIO
+#undef CONFIG_EEH
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index e7f8f9f1b3f4..fd7b01c51281 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -12,13 +12,7 @@
#include <asm/vdso_datapage.h>
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
-
-/* Offset for the low 32-bit part of a field of long type */
-#ifdef CONFIG_PPC64
-#define LOPART 4
-#else
-#define LOPART 0
-#endif
+#include <asm/vdso/gettimeofday.h>
.text
/*
@@ -28,32 +22,7 @@
*
*/
V_FUNCTION_BEGIN(__kernel_gettimeofday)
- .cfi_startproc
- mflr r12
- .cfi_register lr,r12
-
- mr. r10,r3 /* r10 saves tv */
- mr r11,r4 /* r11 saves tz */
- get_datapage r9, r0
- beq 3f
- LOAD_REG_IMMEDIATE(r7, 1000000) /* load up USEC_PER_SEC */
- bl __do_get_tspec@local /* get sec/usec from tb & kernel */
- stw r3,TVAL32_TV_SEC(r10)
- stw r4,TVAL32_TV_USEC(r10)
-
-3: cmplwi r11,0 /* check if tz is NULL */
- mtlr r12
- crclr cr0*4+so
- li r3,0
- beqlr
-
- lwz r4,CFG_TZ_MINUTEWEST(r9)/* fill tz */
- lwz r5,CFG_TZ_DSTTIME(r9)
- stw r4,TZONE_TZ_MINWEST(r11)
- stw r5,TZONE_TZ_DSTTIME(r11)
-
- blr
- .cfi_endproc
+ cvdso_call __c_kernel_gettimeofday
V_FUNCTION_END(__kernel_gettimeofday)
/*
@@ -63,127 +32,7 @@ V_FUNCTION_END(__kernel_gettimeofday)
*
*/
V_FUNCTION_BEGIN(__kernel_clock_gettime)
- .cfi_startproc
- /* Check for supported clock IDs */
- cmpli cr0,r3,CLOCK_REALTIME
- cmpli cr1,r3,CLOCK_MONOTONIC
- cror cr0*4+eq,cr0*4+eq,cr1*4+eq
-
- cmpli cr5,r3,CLOCK_REALTIME_COARSE
- cmpli cr6,r3,CLOCK_MONOTONIC_COARSE
- cror cr5*4+eq,cr5*4+eq,cr6*4+eq
-
- cror cr0*4+eq,cr0*4+eq,cr5*4+eq
- bne cr0, .Lgettime_fallback
-
- mflr r12 /* r12 saves lr */
- .cfi_register lr,r12
- mr r11,r4 /* r11 saves tp */
- get_datapage r9, r0
- LOAD_REG_IMMEDIATE(r7, NSEC_PER_SEC) /* load up NSEC_PER_SEC */
- beq cr5, .Lcoarse_clocks
-.Lprecise_clocks:
- bl __do_get_tspec@local /* get sec/nsec from tb & kernel */
- bne cr1, .Lfinish /* not monotonic -> all done */
-
- /*
- * CLOCK_MONOTONIC
- */
-
- /* now we must fixup using wall to monotonic. We need to snapshot
- * that value and do the counter trick again. Fortunately, we still
- * have the counter value in r8 that was returned by __do_get_xsec.
- * At this point, r3,r4 contain our sec/nsec values, r5 and r6
- * can be used, r7 contains NSEC_PER_SEC.
- */
-
- lwz r5,(WTOM_CLOCK_SEC+LOPART)(r9)
- lwz r6,WTOM_CLOCK_NSEC(r9)
-
- /* We now have our offset in r5,r6. We create a fake dependency
- * on that value and re-check the counter
- */
- or r0,r6,r5
- xor r0,r0,r0
- add r9,r9,r0
- lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
- cmpl cr0,r8,r0 /* check if updated */
- bne- .Lprecise_clocks
- b .Lfinish_monotonic
-
- /*
- * For coarse clocks we get data directly from the vdso data page, so
- * we don't need to call __do_get_tspec, but we still need to do the
- * counter trick.
- */
-.Lcoarse_clocks:
- lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
- andi. r0,r8,1 /* pending update ? loop */
- bne- .Lcoarse_clocks
- add r9,r9,r0 /* r0 is already 0 */
-
- /*
- * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE
- * too
- */
- lwz r3,STAMP_XTIME_SEC+LOPART(r9)
- lwz r4,STAMP_XTIME_NSEC+LOPART(r9)
- bne cr6,1f
-
- /* CLOCK_MONOTONIC_COARSE */
- lwz r5,(WTOM_CLOCK_SEC+LOPART)(r9)
- lwz r6,WTOM_CLOCK_NSEC(r9)
-
- /* check if counter has updated */
- or r0,r6,r5
-1: or r0,r0,r3
- or r0,r0,r4
- xor r0,r0,r0
- add r3,r3,r0
- lwz r0,CFG_TB_UPDATE_COUNT+LOPART(r9)
- cmpl cr0,r0,r8 /* check if updated */
- bne- .Lcoarse_clocks
-
- /* Counter has not updated, so continue calculating proper values for
- * sec and nsec if monotonic coarse, or just return with the proper
- * values for realtime.
- */
- bne cr6, .Lfinish
-
- /* Calculate and store result. Note that this mimics the C code,
- * which may cause funny results if nsec goes negative... is that
- * possible at all ?
- */
-.Lfinish_monotonic:
- add r3,r3,r5
- add r4,r4,r6
- cmpw cr0,r4,r7
- cmpwi cr1,r4,0
- blt 1f
- subf r4,r7,r4
- addi r3,r3,1
-1: bge cr1, .Lfinish
- addi r3,r3,-1
- add r4,r4,r7
-
-.Lfinish:
- stw r3,TSPC32_TV_SEC(r11)
- stw r4,TSPC32_TV_NSEC(r11)
-
- mtlr r12
- crclr cr0*4+so
- li r3,0
- blr
-
- /*
- * syscall fallback
- */
-.Lgettime_fallback:
- li r0,__NR_clock_gettime
- .cfi_restore lr
- sc
- blr
- .cfi_endproc
+ cvdso_call __c_kernel_clock_gettime
V_FUNCTION_END(__kernel_clock_gettime)
@@ -194,37 +43,7 @@ V_FUNCTION_END(__kernel_clock_gettime)
*
*/
V_FUNCTION_BEGIN(__kernel_clock_getres)
- .cfi_startproc
- /* Check for supported clock IDs */
- cmplwi cr0, r3, CLOCK_MAX
- cmpwi cr1, r3, CLOCK_REALTIME_COARSE
- cmpwi cr7, r3, CLOCK_MONOTONIC_COARSE
- bgt cr0, 99f
- LOAD_REG_IMMEDIATE(r5, KTIME_LOW_RES)
- beq cr1, 1f
- beq cr7, 1f
-
- mflr r12
- .cfi_register lr,r12
- get_datapage r3, r0
- lwz r5, CLOCK_HRTIMER_RES(r3)
- mtlr r12
-1: li r3,0
- cmpli cr0,r4,0
- crclr cr0*4+so
- beqlr
- stw r3,TSPC32_TV_SEC(r4)
- stw r5,TSPC32_TV_NSEC(r4)
- blr
-
- /*
- * syscall fallback
- */
-99:
- li r0,__NR_clock_getres
- sc
- blr
- .cfi_endproc
+ cvdso_call __c_kernel_clock_getres
V_FUNCTION_END(__kernel_clock_getres)
@@ -235,105 +54,5 @@ V_FUNCTION_END(__kernel_clock_getres)
*
*/
V_FUNCTION_BEGIN(__kernel_time)
- .cfi_startproc
- mflr r12
- .cfi_register lr,r12
-
- mr r11,r3 /* r11 holds t */
- get_datapage r9, r0
-
- lwz r3,STAMP_XTIME_SEC+LOPART(r9)
-
- cmplwi r11,0 /* check if t is NULL */
- mtlr r12
- crclr cr0*4+so
- beqlr
- stw r3,0(r11) /* store result at *t */
- blr
- .cfi_endproc
+ cvdso_call_time __c_kernel_time
V_FUNCTION_END(__kernel_time)
-
-/*
- * This is the core of clock_gettime() and gettimeofday(),
- * it returns the current time in r3 (seconds) and r4.
- * On entry, r7 gives the resolution of r4, either USEC_PER_SEC
- * or NSEC_PER_SEC, giving r4 in microseconds or nanoseconds.
- * It expects the datapage ptr in r9 and doesn't clobber it.
- * It clobbers r0, r5 and r6.
- * On return, r8 contains the counter value that can be reused.
- * This clobbers cr0 but not any other cr field.
- */
-__do_get_tspec:
- .cfi_startproc
- /* Check for update count & load values. We use the low
- * order 32 bits of the update count
- */
-1: lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
- andi. r0,r8,1 /* pending update ? loop */
- bne- 1b
- xor r0,r8,r8 /* create dependency */
- add r9,r9,r0
-
- /* Load orig stamp (offset to TB) */
- lwz r5,CFG_TB_ORIG_STAMP(r9)
- lwz r6,(CFG_TB_ORIG_STAMP+4)(r9)
-
- /* Get a stable TB value */
-2: MFTBU(r3)
- MFTBL(r4)
- MFTBU(r0)
- cmplw cr0,r3,r0
- bne- 2b
-
- /* Subtract tb orig stamp and shift left 12 bits.
- */
- subfc r4,r6,r4
- subfe r0,r5,r3
- slwi r0,r0,12
- rlwimi. r0,r4,12,20,31
- slwi r4,r4,12
-
- /*
- * Load scale factor & do multiplication.
- * We only use the high 32 bits of the tb_to_xs value.
- * Even with a 1GHz timebase clock, the high 32 bits of
- * tb_to_xs will be at least 4 million, so the error from
- * ignoring the low 32 bits will be no more than 0.25ppm.
- * The error will just make the clock run very very slightly
- * slow until the next time the kernel updates the VDSO data,
- * at which point the clock will catch up to the kernel's value,
- * so there is no long-term error accumulation.
- */
- lwz r5,CFG_TB_TO_XS(r9) /* load values */
- mulhwu r4,r4,r5
- li r3,0
-
- beq+ 4f /* skip high part computation if 0 */
- mulhwu r3,r0,r5
- mullw r5,r0,r5
- addc r4,r4,r5
- addze r3,r3
-4:
- /* At this point, we have seconds since the xtime stamp
- * as a 32.32 fixed-point number in r3 and r4.
- * Load & add the xtime stamp.
- */
- lwz r5,STAMP_XTIME_SEC+LOPART(r9)
- lwz r6,STAMP_SEC_FRAC(r9)
- addc r4,r4,r6
- adde r3,r3,r5
-
- /* We create a fake dependency on the result in r3/r4
- * and re-check the counter
- */
- or r6,r4,r3
- xor r0,r6,r6
- add r9,r9,r0
- lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
- cmplw cr0,r8,r0 /* check if updated */
- bne- 1b
-
- mulhwu r4,r4,r7 /* convert to micro or nanoseconds */
-
- blr
- .cfi_endproc
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
index 38c317f25141..7890d889f9c5 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -1,8 +1,20 @@
# SPDX-License-Identifier: GPL-2.0
# List of files in the vdso, has to be asm only for now
+ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN
+include $(srctree)/lib/vdso/Makefile
+
obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o
+ifneq ($(c-gettimeofday-y),)
+ CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
+ CFLAGS_vgettimeofday.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+ CFLAGS_vgettimeofday.o += $(call cc-option, -fno-stack-protector)
+ CFLAGS_vgettimeofday.o += -DDISABLE_BRANCH_PROFILING
+ CFLAGS_vgettimeofday.o += -ffreestanding -fasynchronous-unwind-tables
+ CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE)
+endif
+
# Build rules
targets := $(obj-vdso64) vdso64.so vdso64.so.dbg
@@ -11,6 +23,7 @@ obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
GCOV_PROFILE := n
KCOV_INSTRUMENT := n
UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
-Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both
@@ -20,12 +33,14 @@ obj-y += vdso64_wrapper.o
extra-y += vdso64.lds
CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
+$(obj)/vgettimeofday.o: %.o: %.c FORCE
+
# Force dependency (incbin is bad)
$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
# link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE
- $(call if_changed,vdso64ld)
+$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday.o FORCE
+ $(call if_changed,vdso64ld_and_check)
# strip rule for the .so file
$(obj)/%.so: OBJCOPYFLAGS := -S
@@ -33,8 +48,8 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
# actual build commands
-quiet_cmd_vdso64ld = VDSO64L $@
- cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) $(call cc-ldoption, -Wl$(comma)--orphan-handling=warn)
+quiet_cmd_vdso64ld_and_check = VDSO64L $@
+ cmd_vdso64ld_and_check = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) $(call cc-ldoption, -Wl$(comma)--orphan-handling=warn); $(cmd_vdso_check)
# install commands for the unstripped file
quiet_cmd_vdso_install = INSTALL $@
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index 20f8be40c653..d7a7bfb51081 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -12,6 +12,7 @@
#include <asm/vdso_datapage.h>
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
+#include <asm/vdso/gettimeofday.h>
.text
/*
@@ -21,31 +22,7 @@
*
*/
V_FUNCTION_BEGIN(__kernel_gettimeofday)
- .cfi_startproc
- mflr r12
- .cfi_register lr,r12
-
- mr r11,r3 /* r11 holds tv */
- mr r10,r4 /* r10 holds tz */
- get_datapage r3, r0
- cmpldi r11,0 /* check if tv is NULL */
- beq 2f
- lis r7,1000000@ha /* load up USEC_PER_SEC */
- addi r7,r7,1000000@l
- bl V_LOCAL_FUNC(__do_get_tspec) /* get sec/us from tb & kernel */
- std r4,TVAL64_TV_SEC(r11) /* store sec in tv */
- std r5,TVAL64_TV_USEC(r11) /* store usec in tv */
-2: cmpldi r10,0 /* check if tz is NULL */
- beq 1f
- lwz r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */
- lwz r5,CFG_TZ_DSTTIME(r3)
- stw r4,TZONE_TZ_MINWEST(r10)
- stw r5,TZONE_TZ_DSTTIME(r10)
-1: mtlr r12
- crclr cr0*4+so
- li r3,0 /* always success */
- blr
- .cfi_endproc
+ cvdso_call __c_kernel_gettimeofday
V_FUNCTION_END(__kernel_gettimeofday)
@@ -56,120 +33,7 @@ V_FUNCTION_END(__kernel_gettimeofday)
*
*/
V_FUNCTION_BEGIN(__kernel_clock_gettime)
- .cfi_startproc
- /* Check for supported clock IDs */
- cmpwi cr0,r3,CLOCK_REALTIME
- cmpwi cr1,r3,CLOCK_MONOTONIC
- cror cr0*4+eq,cr0*4+eq,cr1*4+eq
-
- cmpwi cr5,r3,CLOCK_REALTIME_COARSE
- cmpwi cr6,r3,CLOCK_MONOTONIC_COARSE
- cror cr5*4+eq,cr5*4+eq,cr6*4+eq
-
- cror cr0*4+eq,cr0*4+eq,cr5*4+eq
- bne cr0,99f
-
- mflr r12 /* r12 saves lr */
- .cfi_register lr,r12
- mr r11,r4 /* r11 saves tp */
- get_datapage r3, r0
- lis r7,NSEC_PER_SEC@h /* want nanoseconds */
- ori r7,r7,NSEC_PER_SEC@l
- beq cr5,70f
-50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */
- bne cr1,80f /* if not monotonic, all done */
-
- /*
- * CLOCK_MONOTONIC
- */
-
- /* now we must fixup using wall to monotonic. We need to snapshot
- * that value and do the counter trick again. Fortunately, we still
- * have the counter value in r8 that was returned by __do_get_tspec.
- * At this point, r4,r5 contain our sec/nsec values.
- */
-
- ld r6,WTOM_CLOCK_SEC(r3)
- lwa r9,WTOM_CLOCK_NSEC(r3)
-
- /* We now have our result in r6,r9. We create a fake dependency
- * on that result and re-check the counter
- */
- or r0,r6,r9
- xor r0,r0,r0
- add r3,r3,r0
- ld r0,CFG_TB_UPDATE_COUNT(r3)
- cmpld cr0,r0,r8 /* check if updated */
- bne- 50b
- b 78f
-
- /*
- * For coarse clocks we get data directly from the vdso data page, so
- * we don't need to call __do_get_tspec, but we still need to do the
- * counter trick.
- */
-70: ld r8,CFG_TB_UPDATE_COUNT(r3)
- andi. r0,r8,1 /* pending update ? loop */
- bne- 70b
- add r3,r3,r0 /* r0 is already 0 */
-
- /*
- * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE
- * too
- */
- ld r4,STAMP_XTIME_SEC(r3)
- ld r5,STAMP_XTIME_NSEC(r3)
- bne cr6,75f
-
- /* CLOCK_MONOTONIC_COARSE */
- ld r6,WTOM_CLOCK_SEC(r3)
- lwa r9,WTOM_CLOCK_NSEC(r3)
-
- /* check if counter has updated */
- or r0,r6,r9
-75: or r0,r0,r4
- or r0,r0,r5
- xor r0,r0,r0
- add r3,r3,r0
- ld r0,CFG_TB_UPDATE_COUNT(r3)
- cmpld cr0,r0,r8 /* check if updated */
- bne- 70b
-
- /* Counter has not updated, so continue calculating proper values for
- * sec and nsec if monotonic coarse, or just return with the proper
- * values for realtime.
- */
- bne cr6,80f
-
- /* Add wall->monotonic offset and check for overflow or underflow */
-78: add r4,r4,r6
- add r5,r5,r9
- cmpd cr0,r5,r7
- cmpdi cr1,r5,0
- blt 79f
- subf r5,r7,r5
- addi r4,r4,1
-79: bge cr1,80f
- addi r4,r4,-1
- add r5,r5,r7
-
-80: std r4,TSPC64_TV_SEC(r11)
- std r5,TSPC64_TV_NSEC(r11)
-
- mtlr r12
- crclr cr0*4+so
- li r3,0
- blr
-
- /*
- * syscall fallback
- */
-99:
- li r0,__NR_clock_gettime
- .cfi_restore lr
- sc
- blr
- .cfi_endproc
+ cvdso_call __c_kernel_clock_gettime
V_FUNCTION_END(__kernel_clock_gettime)
@@ -180,34 +44,7 @@ V_FUNCTION_END(__kernel_clock_gettime)
*
*/
V_FUNCTION_BEGIN(__kernel_clock_getres)
- .cfi_startproc
- /* Check for supported clock IDs */
- cmpwi cr0,r3,CLOCK_REALTIME
- cmpwi cr1,r3,CLOCK_MONOTONIC
- cror cr0*4+eq,cr0*4+eq,cr1*4+eq
- bne cr0,99f
-
- mflr r12
- .cfi_register lr,r12
- get_datapage r3, r0
- lwz r5, CLOCK_HRTIMER_RES(r3)
- mtlr r12
- li r3,0
- cmpldi cr0,r4,0
- crclr cr0*4+so
- beqlr
- std r3,TSPC64_TV_SEC(r4)
- std r5,TSPC64_TV_NSEC(r4)
- blr
-
- /*
- * syscall fallback
- */
-99:
- li r0,__NR_clock_getres
- sc
- blr
- .cfi_endproc
+ cvdso_call __c_kernel_clock_getres
V_FUNCTION_END(__kernel_clock_getres)
/*
@@ -217,74 +54,5 @@ V_FUNCTION_END(__kernel_clock_getres)
*
*/
V_FUNCTION_BEGIN(__kernel_time)
- .cfi_startproc
- mflr r12
- .cfi_register lr,r12
-
- mr r11,r3 /* r11 holds t */
- get_datapage r3, r0
-
- ld r4,STAMP_XTIME_SEC(r3)
-
- cmpldi r11,0 /* check if t is NULL */
- beq 2f
- std r4,0(r11) /* store result at *t */
-2: mtlr r12
- crclr cr0*4+so
- mr r3,r4
- blr
- .cfi_endproc
+ cvdso_call_time __c_kernel_time
V_FUNCTION_END(__kernel_time)
-
-
-/*
- * This is the core of clock_gettime() and gettimeofday(),
- * it returns the current time in r4 (seconds) and r5.
- * On entry, r7 gives the resolution of r5, either USEC_PER_SEC
- * or NSEC_PER_SEC, giving r5 in microseconds or nanoseconds.
- * It expects the datapage ptr in r3 and doesn't clobber it.
- * It clobbers r0, r6 and r9.
- * On return, r8 contains the counter value that can be reused.
- * This clobbers cr0 but not any other cr field.
- */
-V_FUNCTION_BEGIN(__do_get_tspec)
- .cfi_startproc
- /* check for update count & load values */
-1: ld r8,CFG_TB_UPDATE_COUNT(r3)
- andi. r0,r8,1 /* pending update ? loop */
- bne- 1b
- xor r0,r8,r8 /* create dependency */
- add r3,r3,r0
-
- /* Get TB & offset it. We use the MFTB macro which will generate
- * workaround code for Cell.
- */
- MFTB(r6)
- ld r9,CFG_TB_ORIG_STAMP(r3)
- subf r6,r9,r6
-
- /* Scale result */
- ld r5,CFG_TB_TO_XS(r3)
- sldi r6,r6,12 /* compute time since stamp_xtime */
- mulhdu r6,r6,r5 /* in units of 2^-32 seconds */
-
- /* Add stamp since epoch */
- ld r4,STAMP_XTIME_SEC(r3)
- lwz r5,STAMP_SEC_FRAC(r3)
- or r0,r4,r5
- or r0,r0,r6
- xor r0,r0,r0
- add r3,r3,r0
- ld r0,CFG_TB_UPDATE_COUNT(r3)
- cmpld r0,r8 /* check if updated */
- bne- 1b /* reload if so */
-
- /* convert to seconds & nanoseconds and add to stamp */
- add r6,r6,r5 /* add on fractional seconds of xtime */
- mulhwu r5,r6,r7 /* compute micro or nanoseconds and */
- srdi r6,r6,32 /* seconds since stamp_xtime */
- clrldi r5,r5,32
- add r4,r4,r6
- blr
- .cfi_endproc
-V_FUNCTION_END(__do_get_tspec)
--
2.25.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v10 5/5] powerpc/vdso: Provide __kernel_clock_gettime64() on vdso32
2020-08-05 7:09 [PATCH v10 0/5] powerpc: switch VDSO to C implementation Christophe Leroy
` (3 preceding siblings ...)
2020-08-05 7:09 ` [PATCH v10 4/5] powerpc/vdso: Switch VDSO to generic C implementation Christophe Leroy
@ 2020-08-05 7:09 ` Christophe Leroy
2020-08-05 7:09 ` Christophe Leroy
4 siblings, 1 reply; 18+ messages in thread
From: Christophe Leroy @ 2020-08-05 7:09 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, nathanl, anton
Cc: linux-kernel, linuxppc-dev, arnd, tglx, vincenzo.frascino, luto,
linux-arch
Provides __kernel_clock_gettime64() on vdso32. This is the
64 bits version of __kernel_clock_gettime() which is
y2038 compliant.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/kernel/vdso32/gettimeofday.S | 9 +++++++++
arch/powerpc/kernel/vdso32/vdso32.lds.S | 1 +
arch/powerpc/kernel/vdso32/vgettimeofday.c | 6 ++++++
3 files changed, 16 insertions(+)
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index fd7b01c51281..a6e29f880e0e 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -35,6 +35,15 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
cvdso_call __c_kernel_clock_gettime
V_FUNCTION_END(__kernel_clock_gettime)
+/*
+ * Exact prototype of clock_gettime64()
+ *
+ * int __kernel_clock_gettime64(clockid_t clock_id, struct __timespec64 *ts);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_clock_gettime64)
+ cvdso_call __c_kernel_clock_gettime64
+V_FUNCTION_END(__kernel_clock_gettime64)
/*
* Exact prototype of clock_getres()
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
index 4c985467a668..582c5b046cc9 100644
--- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
+++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
@@ -148,6 +148,7 @@ VERSION
#ifndef CONFIG_PPC_BOOK3S_601
__kernel_gettimeofday;
__kernel_clock_gettime;
+ __kernel_clock_gettime64;
__kernel_clock_getres;
__kernel_time;
__kernel_get_tbfreq;
diff --git a/arch/powerpc/kernel/vdso32/vgettimeofday.c b/arch/powerpc/kernel/vdso32/vgettimeofday.c
index 0b9ab4c22ef2..f7f71fecf4ed 100644
--- a/arch/powerpc/kernel/vdso32/vgettimeofday.c
+++ b/arch/powerpc/kernel/vdso32/vgettimeofday.c
@@ -11,6 +11,12 @@ int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts,
return __cvdso_clock_gettime32_data(vd, clock, ts);
}
+int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts,
+ const struct vdso_data *vd)
+{
+ return __cvdso_clock_gettime_data(vd, clock, ts);
+}
+
int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz,
const struct vdso_data *vd)
{
--
2.25.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v10 5/5] powerpc/vdso: Provide __kernel_clock_gettime64() on vdso32
2020-08-05 7:09 ` [PATCH v10 5/5] powerpc/vdso: Provide __kernel_clock_gettime64() on vdso32 Christophe Leroy
@ 2020-08-05 7:09 ` Christophe Leroy
0 siblings, 0 replies; 18+ messages in thread
From: Christophe Leroy @ 2020-08-05 7:09 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, nathanl, anton
Cc: linux-kernel, linuxppc-dev, arnd, tglx, vincenzo.frascino, luto,
linux-arch
Provides __kernel_clock_gettime64() on vdso32. This is the
64 bits version of __kernel_clock_gettime() which is
y2038 compliant.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/kernel/vdso32/gettimeofday.S | 9 +++++++++
arch/powerpc/kernel/vdso32/vdso32.lds.S | 1 +
arch/powerpc/kernel/vdso32/vgettimeofday.c | 6 ++++++
3 files changed, 16 insertions(+)
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index fd7b01c51281..a6e29f880e0e 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -35,6 +35,15 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
cvdso_call __c_kernel_clock_gettime
V_FUNCTION_END(__kernel_clock_gettime)
+/*
+ * Exact prototype of clock_gettime64()
+ *
+ * int __kernel_clock_gettime64(clockid_t clock_id, struct __timespec64 *ts);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_clock_gettime64)
+ cvdso_call __c_kernel_clock_gettime64
+V_FUNCTION_END(__kernel_clock_gettime64)
/*
* Exact prototype of clock_getres()
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
index 4c985467a668..582c5b046cc9 100644
--- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
+++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
@@ -148,6 +148,7 @@ VERSION
#ifndef CONFIG_PPC_BOOK3S_601
__kernel_gettimeofday;
__kernel_clock_gettime;
+ __kernel_clock_gettime64;
__kernel_clock_getres;
__kernel_time;
__kernel_get_tbfreq;
diff --git a/arch/powerpc/kernel/vdso32/vgettimeofday.c b/arch/powerpc/kernel/vdso32/vgettimeofday.c
index 0b9ab4c22ef2..f7f71fecf4ed 100644
--- a/arch/powerpc/kernel/vdso32/vgettimeofday.c
+++ b/arch/powerpc/kernel/vdso32/vgettimeofday.c
@@ -11,6 +11,12 @@ int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts,
return __cvdso_clock_gettime32_data(vd, clock, ts);
}
+int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts,
+ const struct vdso_data *vd)
+{
+ return __cvdso_clock_gettime_data(vd, clock, ts);
+}
+
int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz,
const struct vdso_data *vd)
{
--
2.25.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH v10 2/5] powerpc/vdso: Prepare for switching VDSO to generic C implementation.
2020-08-05 7:09 ` [PATCH v10 2/5] powerpc/vdso: Prepare for switching VDSO to generic C implementation Christophe Leroy
@ 2020-08-05 14:03 ` Segher Boessenkool
2020-08-05 14:03 ` Segher Boessenkool
` (2 more replies)
0 siblings, 3 replies; 18+ messages in thread
From: Segher Boessenkool @ 2020-08-05 14:03 UTC (permalink / raw)
To: Christophe Leroy
Cc: nathanl, linux-arch, vincenzo.frascino, arnd, linux-kernel,
Paul Mackerras, luto, tglx, linuxppc-dev
Hi!
On Wed, Aug 05, 2020 at 07:09:23AM +0000, Christophe Leroy wrote:
> Provide vdso_shift_ns(), as the generic x >> s gives the following
> bad result:
>
> 18: 35 25 ff e0 addic. r9,r5,-32
> 1c: 41 80 00 10 blt 2c <shift+0x14>
> 20: 7c 64 4c 30 srw r4,r3,r9
> 24: 38 60 00 00 li r3,0
> ...
> 2c: 54 69 08 3c rlwinm r9,r3,1,0,30
> 30: 21 45 00 1f subfic r10,r5,31
> 34: 7c 84 2c 30 srw r4,r4,r5
> 38: 7d 29 50 30 slw r9,r9,r10
> 3c: 7c 63 2c 30 srw r3,r3,r5
> 40: 7d 24 23 78 or r4,r9,r4
>
> In our case the shift is always <= 32. In addition, the upper 32 bits
> of the result are likely nul. Lets GCC know it, it also optimises the
> following calculations.
>
> With the patch, we get:
> 0: 21 25 00 20 subfic r9,r5,32
> 4: 7c 69 48 30 slw r9,r3,r9
> 8: 7c 84 2c 30 srw r4,r4,r5
> c: 7d 24 23 78 or r4,r9,r4
> 10: 7c 63 2c 30 srw r3,r3,r5
See below. Such code is valid on PowerPC for all shift < 64, and a
future version of GCC will do that (it is on various TODO lists, it is
bound to happen *some* day ;-), but it won't help you yet of course).
> +/*
> + * The macros sets two stack frames, one for the caller and one for the callee
> + * because there are no requirement for the caller to set a stack frame when
> + * calling VDSO so it may have omitted to set one, especially on PPC64
> + */
If the caller follows the ABI, there always is a stack frame. So what
is going on?
> +/*
> + * powerpc specific delta calculation.
> + *
> + * This variant removes the masking of the subtraction because the
> + * clocksource mask of all VDSO capable clocksources on powerpc is U64_MAX
> + * which would result in a pointless operation. The compiler cannot
> + * optimize it away as the mask comes from the vdso data and is not compile
> + * time constant.
> + */
It cannot optimise it because it does not know shift < 32. The code
below is incorrect for shift equal to 32, fwiw.
> +static __always_inline u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
> +{
> + return (cycles - last) * mult;
> +}
> +#define vdso_calc_delta vdso_calc_delta
> +
> +#ifndef __powerpc64__
> +static __always_inline u64 vdso_shift_ns(u64 ns, unsigned long shift)
> +{
> + u32 hi = ns >> 32;
> + u32 lo = ns;
> +
> + lo >>= shift;
> + lo |= hi << (32 - shift);
> + hi >>= shift;
> + if (likely(hi == 0))
> + return lo;
Removing these two lines shouldn't change generated object code? Or not
make it worse, at least.
> + return ((u64)hi << 32) | lo;
> +}
What does the compiler do for just
static __always_inline u64 vdso_shift_ns(u64 ns, unsigned long shift)
return ns >> (shift & 31);
}
?
Segher
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v10 2/5] powerpc/vdso: Prepare for switching VDSO to generic C implementation.
2020-08-05 14:03 ` Segher Boessenkool
@ 2020-08-05 14:03 ` Segher Boessenkool
2020-08-05 16:40 ` Christophe Leroy
2020-08-05 16:51 ` Christophe Leroy
2 siblings, 0 replies; 18+ messages in thread
From: Segher Boessenkool @ 2020-08-05 14:03 UTC (permalink / raw)
To: Christophe Leroy
Cc: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
nathanl, anton, linux-arch, arnd, linux-kernel, luto, tglx,
vincenzo.frascino, linuxppc-dev
Hi!
On Wed, Aug 05, 2020 at 07:09:23AM +0000, Christophe Leroy wrote:
> Provide vdso_shift_ns(), as the generic x >> s gives the following
> bad result:
>
> 18: 35 25 ff e0 addic. r9,r5,-32
> 1c: 41 80 00 10 blt 2c <shift+0x14>
> 20: 7c 64 4c 30 srw r4,r3,r9
> 24: 38 60 00 00 li r3,0
> ...
> 2c: 54 69 08 3c rlwinm r9,r3,1,0,30
> 30: 21 45 00 1f subfic r10,r5,31
> 34: 7c 84 2c 30 srw r4,r4,r5
> 38: 7d 29 50 30 slw r9,r9,r10
> 3c: 7c 63 2c 30 srw r3,r3,r5
> 40: 7d 24 23 78 or r4,r9,r4
>
> In our case the shift is always <= 32. In addition, the upper 32 bits
> of the result are likely nul. Lets GCC know it, it also optimises the
> following calculations.
>
> With the patch, we get:
> 0: 21 25 00 20 subfic r9,r5,32
> 4: 7c 69 48 30 slw r9,r3,r9
> 8: 7c 84 2c 30 srw r4,r4,r5
> c: 7d 24 23 78 or r4,r9,r4
> 10: 7c 63 2c 30 srw r3,r3,r5
See below. Such code is valid on PowerPC for all shift < 64, and a
future version of GCC will do that (it is on various TODO lists, it is
bound to happen *some* day ;-), but it won't help you yet of course).
> +/*
> + * The macros sets two stack frames, one for the caller and one for the callee
> + * because there are no requirement for the caller to set a stack frame when
> + * calling VDSO so it may have omitted to set one, especially on PPC64
> + */
If the caller follows the ABI, there always is a stack frame. So what
is going on?
> +/*
> + * powerpc specific delta calculation.
> + *
> + * This variant removes the masking of the subtraction because the
> + * clocksource mask of all VDSO capable clocksources on powerpc is U64_MAX
> + * which would result in a pointless operation. The compiler cannot
> + * optimize it away as the mask comes from the vdso data and is not compile
> + * time constant.
> + */
It cannot optimise it because it does not know shift < 32. The code
below is incorrect for shift equal to 32, fwiw.
> +static __always_inline u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
> +{
> + return (cycles - last) * mult;
> +}
> +#define vdso_calc_delta vdso_calc_delta
> +
> +#ifndef __powerpc64__
> +static __always_inline u64 vdso_shift_ns(u64 ns, unsigned long shift)
> +{
> + u32 hi = ns >> 32;
> + u32 lo = ns;
> +
> + lo >>= shift;
> + lo |= hi << (32 - shift);
> + hi >>= shift;
> + if (likely(hi == 0))
> + return lo;
Removing these two lines shouldn't change generated object code? Or not
make it worse, at least.
> + return ((u64)hi << 32) | lo;
> +}
What does the compiler do for just
static __always_inline u64 vdso_shift_ns(u64 ns, unsigned long shift)
return ns >> (shift & 31);
}
?
Segher
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v10 2/5] powerpc/vdso: Prepare for switching VDSO to generic C implementation.
2020-08-05 14:03 ` Segher Boessenkool
2020-08-05 14:03 ` Segher Boessenkool
@ 2020-08-05 16:40 ` Christophe Leroy
2020-08-05 16:40 ` Christophe Leroy
2020-08-05 18:40 ` Segher Boessenkool
2020-08-05 16:51 ` Christophe Leroy
2 siblings, 2 replies; 18+ messages in thread
From: Christophe Leroy @ 2020-08-05 16:40 UTC (permalink / raw)
To: Segher Boessenkool
Cc: nathanl, linux-arch, vincenzo.frascino, arnd, linux-kernel,
Paul Mackerras, luto, tglx, linuxppc-dev
Hi,
On 08/05/2020 02:03 PM, Segher Boessenkool wrote:
> Hi!
>
> On Wed, Aug 05, 2020 at 07:09:23AM +0000, Christophe Leroy wrote:
>> +/*
>> + * powerpc specific delta calculation.
>> + *
>> + * This variant removes the masking of the subtraction because the
>> + * clocksource mask of all VDSO capable clocksources on powerpc is U64_MAX
>> + * which would result in a pointless operation. The compiler cannot
>> + * optimize it away as the mask comes from the vdso data and is not compile
>> + * time constant.
>> + */
>
> It cannot optimise it because it does not know shift < 32. The code
> below is incorrect for shift equal to 32, fwiw.
Is there a way to tell it ?
>
>> +static __always_inline u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
>> +{
>> + return (cycles - last) * mult;
>> +}
>> +#define vdso_calc_delta vdso_calc_delta
>> +
>> +#ifndef __powerpc64__
>> +static __always_inline u64 vdso_shift_ns(u64 ns, unsigned long shift)
>> +{
>> + u32 hi = ns >> 32;
>> + u32 lo = ns;
>> +
>> + lo >>= shift;
>> + lo |= hi << (32 - shift);
>> + hi >>= shift;
>
>
>> + if (likely(hi == 0))
>> + return lo;
>
> Removing these two lines shouldn't change generated object code? Or not
> make it worse, at least.
I remember it made noticeable difference allthough I can't remember the
details. See below with GCC 10.1. At least we see that with those two
lines, GCC only sets a 16 bytes stack frame. Without those lines it sets
a 32 bytes stack frame and seems to save some values for no reason.
With the two lines:
000006ac <__c_kernel_clock_gettime>:
6ac: 28 03 00 0f cmplwi r3,15
6b0: 41 81 01 04 bgt 7b4 <__c_kernel_clock_gettime+0x108>
6b4: 39 40 00 01 li r10,1
6b8: 7d 4a 18 30 slw r10,r10,r3
6bc: 71 47 08 83 andi. r7,r10,2179
6c0: 41 82 01 2c beq 7ec <__c_kernel_clock_gettime+0x140>
6c4: 94 21 ff f0 stwu r1,-16(r1)
6c8: 54 63 20 36 rlwinm r3,r3,4,0,27
6cc: 93 e1 00 0c stw r31,12(r1)
6d0: 7d 85 1a 14 add r12,r5,r3
6d4: 80 05 00 00 lwz r0,0(r5)
6d8: 70 06 00 01 andi. r6,r0,1
6dc: 40 82 00 d4 bne 7b0 <__c_kernel_clock_gettime+0x104>
6e0: 7d 4d 42 e6 mftbu r10
6e4: 7d 6c 42 e6 mftb r11
6e8: 7c ed 42 e6 mftbu r7
6ec: 7c 0a 38 40 cmplw r10,r7
6f0: 40 82 ff f0 bne 6e0 <__c_kernel_clock_gettime+0x34>
6f4: 80 e5 00 0c lwz r7,12(r5)
6f8: 80 65 00 08 lwz r3,8(r5)
6fc: 7c e7 58 10 subfc r7,r7,r11
700: 81 65 00 18 lwz r11,24(r5)
704: 7d 43 51 10 subfe r10,r3,r10
708: 7f e7 58 16 mulhwu r31,r7,r11
70c: 7d 4a 59 d6 mullw r10,r10,r11
710: 7c e7 59 d6 mullw r7,r7,r11
714: 80 6c 00 2c lwz r3,44(r12)
718: 81 6c 00 28 lwz r11,40(r12)
71c: 7c e7 18 14 addc r7,r7,r3
720: 7d 4a fa 14 add r10,r10,r31
724: 80 65 00 1c lwz r3,28(r5)
728: 7d 4a 59 14 adde r10,r10,r11
72c: 7c e7 1c 30 srw r7,r7,r3
730: 21 63 00 20 subfic r11,r3,32
734: 7d 43 1c 31 srw. r3,r10,r3
738: 7d 4a 58 30 slw r10,r10,r11
73c: 7d 49 3b 78 or r9,r10,r7
740: 39 00 00 00 li r8,0
744: 40 82 00 84 bne 7c8 <__c_kernel_clock_gettime+0x11c>
748: 80 6c 00 24 lwz r3,36(r12)
74c: 81 45 00 00 lwz r10,0(r5)
750: 7c 00 50 40 cmplw r0,r10
754: 40 a2 ff 80 bne 6d4 <__c_kernel_clock_gettime+0x28>
758: 2c 08 00 00 cmpwi r8,0
75c: 41 82 00 7c beq 7d8 <__c_kernel_clock_gettime+0x12c>
760: 3c e0 c4 65 lis r7,-15259
764: 3c 00 3b 9a lis r0,15258
768: 60 e7 36 00 ori r7,r7,13824
76c: 60 00 c9 ff ori r0,r0,51711
770: 7c a9 38 14 addc r5,r9,r7
774: 7d 48 01 d4 addme r10,r8
778: 2c 0a 00 00 cmpwi r10,0
77c: 7d 48 53 78 mr r8,r10
780: 7c a9 2b 78 mr r9,r5
784: 38 c6 00 01 addi r6,r6,1
788: 40 82 ff e8 bne 770 <__c_kernel_clock_gettime+0xc4>
78c: 7c 05 00 40 cmplw r5,r0
790: 41 81 ff e0 bgt 770 <__c_kernel_clock_gettime+0xc4>
794: 7c 66 18 14 addc r3,r6,r3
798: 90 64 00 00 stw r3,0(r4)
79c: 91 24 00 04 stw r9,4(r4)
7a0: 38 60 00 00 li r3,0
7a4: 83 e1 00 0c lwz r31,12(r1)
7a8: 38 21 00 10 addi r1,r1,16
7ac: 4e 80 00 20 blr
7b0: 4b ff ff 24 b 6d4 <__c_kernel_clock_gettime+0x28>
7b4: 38 00 00 f6 li r0,246
7b8: 44 00 00 02 sc
7bc: 40 a3 00 08 bns 7c4 <__c_kernel_clock_gettime+0x118>
7c0: 7c 63 00 d0 neg r3,r3
7c4: 4e 80 00 20 blr
7c8: 7d 2a 4b 78 mr r10,r9
7cc: 7c 68 1b 78 mr r8,r3
7d0: 7d 49 53 78 mr r9,r10
7d4: 4b ff ff 74 b 748 <__c_kernel_clock_gettime+0x9c>
7d8: 3d 40 3b 9a lis r10,15258
7dc: 61 4a c9 ff ori r10,r10,51711
7e0: 7c 09 50 40 cmplw r9,r10
7e4: 41 81 ff 7c bgt 760 <__c_kernel_clock_gettime+0xb4>
7e8: 4b ff ff b0 b 798 <__c_kernel_clock_gettime+0xec>
7ec: 71 47 00 60 andi. r7,r10,96
7f0: 54 69 20 36 rlwinm r9,r3,4,0,27
7f4: 7d 25 4a 14 add r9,r5,r9
7f8: 40 82 00 14 bne 80c <__c_kernel_clock_gettime+0x160>
7fc: 71 4a 00 10 andi. r10,r10,16
800: 41 a2 ff b4 beq 7b4 <__c_kernel_clock_gettime+0x108>
804: 38 a5 00 f0 addi r5,r5,240
808: 4b ff fe bc b 6c4 <__c_kernel_clock_gettime+0x18>
80c: 81 05 00 00 lwz r8,0(r5)
810: 71 0a 00 01 andi. r10,r8,1
814: 40 a2 ff f8 bne 80c <__c_kernel_clock_gettime+0x160>
818: 80 69 00 24 lwz r3,36(r9)
81c: 81 49 00 2c lwz r10,44(r9)
820: 80 e5 00 00 lwz r7,0(r5)
824: 7c 08 38 40 cmplw r8,r7
828: 40 a2 ff e4 bne 80c <__c_kernel_clock_gettime+0x160>
82c: 90 64 00 00 stw r3,0(r4)
830: 91 44 00 04 stw r10,4(r4)
834: 38 60 00 00 li r3,0
838: 4e 80 00 20 blr
Without the two lines:
000006ac <__c_kernel_clock_gettime>:
6ac: 28 03 00 0f cmplwi r3,15
6b0: 41 81 01 14 bgt 7c4 <__c_kernel_clock_gettime+0x118>
6b4: 39 20 00 01 li r9,1
6b8: 7d 29 18 30 slw r9,r9,r3
6bc: 71 2a 08 83 andi. r10,r9,2179
6c0: 41 82 01 2c beq 7ec <__c_kernel_clock_gettime+0x140>
6c4: 94 21 ff e0 stwu r1,-32(r1)
6c8: 54 63 20 36 rlwinm r3,r3,4,0,27
6cc: 93 81 00 10 stw r28,16(r1)
6d0: 93 a1 00 14 stw r29,20(r1)
6d4: 93 c1 00 18 stw r30,24(r1)
6d8: 93 e1 00 1c stw r31,28(r1)
6dc: 7c 65 1a 14 add r3,r5,r3
6e0: 81 85 00 00 lwz r12,0(r5)
6e4: 71 87 00 01 andi. r7,r12,1
6e8: 40 82 00 d8 bne 7c0 <__c_kernel_clock_gettime+0x114>
6ec: 7d 2d 42 e6 mftbu r9
6f0: 7c cc 42 e6 mftb r6
6f4: 7d 4d 42 e6 mftbu r10
6f8: 7c 09 50 40 cmplw r9,r10
6fc: 40 82 ff f0 bne 6ec <__c_kernel_clock_gettime+0x40>
700: 83 83 00 28 lwz r28,40(r3)
704: 83 a3 00 2c lwz r29,44(r3)
708: 81 65 00 08 lwz r11,8(r5)
70c: 81 05 00 0c lwz r8,12(r5)
710: 83 c5 00 18 lwz r30,24(r5)
714: 83 e5 00 1c lwz r31,28(r5)
718: 80 03 00 24 lwz r0,36(r3)
71c: 81 45 00 00 lwz r10,0(r5)
720: 7c 0c 50 40 cmplw r12,r10
724: 40 a2 ff bc bne 6e0 <__c_kernel_clock_gettime+0x34>
728: 7d 48 30 10 subfc r10,r8,r6
72c: 7c cb 49 10 subfe r6,r11,r9
730: 7c c6 f1 d6 mullw r6,r6,r30
734: 7d 2a f0 16 mulhwu r9,r10,r30
738: 7d 4a f1 d6 mullw r10,r10,r30
73c: 7c c6 4a 14 add r6,r6,r9
740: 7d 4a e8 14 addc r10,r10,r29
744: 7c c6 e1 14 adde r6,r6,r28
748: 7c c8 fc 30 srw r8,r6,r31
74c: 2c 08 00 00 cmpwi r8,0
750: 20 bf 00 20 subfic r5,r31,32
754: 7d 4a fc 30 srw r10,r10,r31
758: 7c c5 28 30 slw r5,r6,r5
75c: 7c a9 53 78 or r9,r5,r10
760: 41 82 00 78 beq 7d8 <__c_kernel_clock_gettime+0x12c>
764: 3c c0 c4 65 lis r6,-15259
768: 3c 60 3b 9a lis r3,15258
76c: 60 c6 36 00 ori r6,r6,13824
770: 60 63 c9 ff ori r3,r3,51711
774: 7c a9 30 14 addc r5,r9,r6
778: 7d 48 01 d4 addme r10,r8
77c: 2c 0a 00 00 cmpwi r10,0
780: 7d 48 53 78 mr r8,r10
784: 7c a9 2b 78 mr r9,r5
788: 38 e7 00 01 addi r7,r7,1
78c: 40 82 ff e8 bne 774 <__c_kernel_clock_gettime+0xc8>
790: 7c 05 18 40 cmplw r5,r3
794: 41 81 ff e0 bgt 774 <__c_kernel_clock_gettime+0xc8>
798: 7c 07 00 14 addc r0,r7,r0
79c: 90 04 00 00 stw r0,0(r4)
7a0: 91 24 00 04 stw r9,4(r4)
7a4: 38 60 00 00 li r3,0
7a8: 83 81 00 10 lwz r28,16(r1)
7ac: 83 a1 00 14 lwz r29,20(r1)
7b0: 83 c1 00 18 lwz r30,24(r1)
7b4: 83 e1 00 1c lwz r31,28(r1)
7b8: 38 21 00 20 addi r1,r1,32
7bc: 4e 80 00 20 blr
7c0: 4b ff ff 20 b 6e0 <__c_kernel_clock_gettime+0x34>
7c4: 38 00 00 f6 li r0,246
7c8: 44 00 00 02 sc
7cc: 40 a3 00 08 bns 7d4 <__c_kernel_clock_gettime+0x128>
7d0: 7c 63 00 d0 neg r3,r3
7d4: 4e 80 00 20 blr
7d8: 3d 40 3b 9a lis r10,15258
7dc: 61 4a c9 ff ori r10,r10,51711
7e0: 7c 09 50 40 cmplw r9,r10
7e4: 41 81 ff 80 bgt 764 <__c_kernel_clock_gettime+0xb8>
7e8: 4b ff ff b4 b 79c <__c_kernel_clock_gettime+0xf0>
7ec: 71 2a 00 60 andi. r10,r9,96
7f0: 40 82 00 14 bne 804 <__c_kernel_clock_gettime+0x158>
7f4: 71 29 00 10 andi. r9,r9,16
7f8: 41 a2 ff cc beq 7c4 <__c_kernel_clock_gettime+0x118>
7fc: 38 a5 00 f0 addi r5,r5,240
800: 4b ff fe c4 b 6c4 <__c_kernel_clock_gettime+0x18>
804: 54 69 20 36 rlwinm r9,r3,4,0,27
808: 7d 25 4a 14 add r9,r5,r9
80c: 81 05 00 00 lwz r8,0(r5)
810: 71 0a 00 01 andi. r10,r8,1
814: 40 82 00 28 bne 83c <__c_kernel_clock_gettime+0x190>
818: 80 09 00 24 lwz r0,36(r9)
81c: 81 49 00 2c lwz r10,44(r9)
820: 80 e5 00 00 lwz r7,0(r5)
824: 7c 08 38 40 cmplw r8,r7
828: 40 a2 ff e4 bne 80c <__c_kernel_clock_gettime+0x160>
82c: 90 04 00 00 stw r0,0(r4)
830: 91 44 00 04 stw r10,4(r4)
834: 38 60 00 00 li r3,0
838: 4e 80 00 20 blr
83c: 4b ff ff d0 b 80c <__c_kernel_clock_gettime+0x160>
>
>> + return ((u64)hi << 32) | lo;
>> +}
>
>
> What does the compiler do for just
>
> static __always_inline u64 vdso_shift_ns(u64 ns, unsigned long shift)
> return ns >> (shift & 31);
> }
>
Worse:
000006ac <__c_kernel_clock_gettime>:
6ac: 28 03 00 0f cmplwi r3,15
6b0: 41 81 01 30 bgt 7e0 <__c_kernel_clock_gettime+0x134>
6b4: 39 20 00 01 li r9,1
6b8: 7d 29 18 30 slw r9,r9,r3
6bc: 71 2a 08 83 andi. r10,r9,2179
6c0: 41 82 01 48 beq 808 <__c_kernel_clock_gettime+0x15c>
6c4: 94 21 ff e0 stwu r1,-32(r1)
6c8: 54 63 20 36 rlwinm r3,r3,4,0,27
6cc: 93 81 00 10 stw r28,16(r1)
6d0: 93 a1 00 14 stw r29,20(r1)
6d4: 93 c1 00 18 stw r30,24(r1)
6d8: 93 e1 00 1c stw r31,28(r1)
6dc: 7c 65 1a 14 add r3,r5,r3
6e0: 80 c5 00 00 lwz r6,0(r5)
6e4: 70 c7 00 01 andi. r7,r6,1
6e8: 40 82 00 f4 bne 7dc <__c_kernel_clock_gettime+0x130>
6ec: 7d 2d 42 e6 mftbu r9
6f0: 7d 0c 42 e6 mftb r8
6f4: 7d 4d 42 e6 mftbu r10
6f8: 7c 09 50 40 cmplw r9,r10
6fc: 40 82 ff f0 bne 6ec <__c_kernel_clock_gettime+0x40>
700: 83 83 00 28 lwz r28,40(r3)
704: 83 c3 00 2c lwz r30,44(r3)
708: 81 65 00 08 lwz r11,8(r5)
70c: 81 45 00 0c lwz r10,12(r5)
710: 83 e5 00 18 lwz r31,24(r5)
714: 81 85 00 1c lwz r12,28(r5)
718: 80 03 00 24 lwz r0,36(r3)
71c: 83 a5 00 00 lwz r29,0(r5)
720: 7c 06 e8 40 cmplw r6,r29
724: 40 a2 ff bc bne 6e0 <__c_kernel_clock_gettime+0x34>
728: 7d 0a 40 10 subfc r8,r10,r8
72c: 7c cb 49 10 subfe r6,r11,r9
730: 7c c6 f9 d6 mullw r6,r6,r31
734: 7d 28 f8 16 mulhwu r9,r8,r31
738: 7d 08 f9 d6 mullw r8,r8,r31
73c: 55 8c 06 fe clrlwi r12,r12,27
740: 7f c8 f0 14 addc r30,r8,r30
744: 7c c6 4a 14 add r6,r6,r9
748: 7c c6 e1 14 adde r6,r6,r28
74c: 34 6c ff e0 addic. r3,r12,-32
750: 41 80 00 70 blt 7c0 <__c_kernel_clock_gettime+0x114>
754: 7c c9 1c 30 srw r9,r6,r3
758: 39 00 00 00 li r8,0
75c: 2c 08 00 00 cmpwi r8,0
760: 41 82 00 94 beq 7f4 <__c_kernel_clock_gettime+0x148>
764: 3c c0 c4 65 lis r6,-15259
768: 3c 60 3b 9a lis r3,15258
76c: 60 c6 36 00 ori r6,r6,13824
770: 60 63 c9 ff ori r3,r3,51711
774: 7c a9 30 14 addc r5,r9,r6
778: 7d 48 01 d4 addme r10,r8
77c: 2c 0a 00 00 cmpwi r10,0
780: 7d 48 53 78 mr r8,r10
784: 7c a9 2b 78 mr r9,r5
788: 38 e7 00 01 addi r7,r7,1
78c: 40 82 ff e8 bne 774 <__c_kernel_clock_gettime+0xc8>
790: 7c 05 18 40 cmplw r5,r3
794: 41 81 ff e0 bgt 774 <__c_kernel_clock_gettime+0xc8>
798: 7c 07 00 14 addc r0,r7,r0
79c: 90 04 00 00 stw r0,0(r4)
7a0: 91 24 00 04 stw r9,4(r4)
7a4: 38 60 00 00 li r3,0
7a8: 83 81 00 10 lwz r28,16(r1)
7ac: 83 a1 00 14 lwz r29,20(r1)
7b0: 83 c1 00 18 lwz r30,24(r1)
7b4: 83 e1 00 1c lwz r31,28(r1)
7b8: 38 21 00 20 addi r1,r1,32
7bc: 4e 80 00 20 blr
7c0: 54 c3 08 3c rlwinm r3,r6,1,0,30
7c4: 21 6c 00 1f subfic r11,r12,31
7c8: 7c 63 58 30 slw r3,r3,r11
7cc: 7f c9 64 30 srw r9,r30,r12
7d0: 7c 69 4b 78 or r9,r3,r9
7d4: 7c c8 64 30 srw r8,r6,r12
7d8: 4b ff ff 84 b 75c <__c_kernel_clock_gettime+0xb0>
7dc: 4b ff ff 04 b 6e0 <__c_kernel_clock_gettime+0x34>
7e0: 38 00 00 f6 li r0,246
7e4: 44 00 00 02 sc
7e8: 40 a3 00 08 bns 7f0 <__c_kernel_clock_gettime+0x144>
7ec: 7c 63 00 d0 neg r3,r3
7f0: 4e 80 00 20 blr
7f4: 3d 40 3b 9a lis r10,15258
7f8: 61 4a c9 ff ori r10,r10,51711
7fc: 7c 09 50 40 cmplw r9,r10
800: 41 81 ff 64 bgt 764 <__c_kernel_clock_gettime+0xb8>
804: 4b ff ff 98 b 79c <__c_kernel_clock_gettime+0xf0>
808: 71 2a 00 60 andi. r10,r9,96
80c: 40 82 00 14 bne 820 <__c_kernel_clock_gettime+0x174>
810: 71 29 00 10 andi. r9,r9,16
814: 41 a2 ff cc beq 7e0 <__c_kernel_clock_gettime+0x134>
818: 38 a5 00 f0 addi r5,r5,240
81c: 4b ff fe a8 b 6c4 <__c_kernel_clock_gettime+0x18>
820: 54 69 20 36 rlwinm r9,r3,4,0,27
824: 7d 25 4a 14 add r9,r5,r9
828: 81 05 00 00 lwz r8,0(r5)
82c: 71 0a 00 01 andi. r10,r8,1
830: 40 82 00 28 bne 858 <__c_kernel_clock_gettime+0x1ac>
834: 80 09 00 24 lwz r0,36(r9)
838: 81 49 00 2c lwz r10,44(r9)
83c: 80 e5 00 00 lwz r7,0(r5)
840: 7c 08 38 40 cmplw r8,r7
844: 40 a2 ff e4 bne 828 <__c_kernel_clock_gettime+0x17c>
848: 90 04 00 00 stw r0,0(r4)
84c: 91 44 00 04 stw r10,4(r4)
850: 38 60 00 00 li r3,0
854: 4e 80 00 20 blr
858: 4b ff ff d0 b 828 <__c_kernel_clock_gettime+0x17c>
Christophe
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v10 2/5] powerpc/vdso: Prepare for switching VDSO to generic C implementation.
2020-08-05 16:40 ` Christophe Leroy
@ 2020-08-05 16:40 ` Christophe Leroy
2020-08-05 18:40 ` Segher Boessenkool
1 sibling, 0 replies; 18+ messages in thread
From: Christophe Leroy @ 2020-08-05 16:40 UTC (permalink / raw)
To: Segher Boessenkool
Cc: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
nathanl, anton, linux-arch, arnd, linux-kernel, luto, tglx,
vincenzo.frascino, linuxppc-dev
Hi,
On 08/05/2020 02:03 PM, Segher Boessenkool wrote:
> Hi!
>
> On Wed, Aug 05, 2020 at 07:09:23AM +0000, Christophe Leroy wrote:
>> +/*
>> + * powerpc specific delta calculation.
>> + *
>> + * This variant removes the masking of the subtraction because the
>> + * clocksource mask of all VDSO capable clocksources on powerpc is U64_MAX
>> + * which would result in a pointless operation. The compiler cannot
>> + * optimize it away as the mask comes from the vdso data and is not compile
>> + * time constant.
>> + */
>
> It cannot optimise it because it does not know shift < 32. The code
> below is incorrect for shift equal to 32, fwiw.
Is there a way to tell it ?
>
>> +static __always_inline u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
>> +{
>> + return (cycles - last) * mult;
>> +}
>> +#define vdso_calc_delta vdso_calc_delta
>> +
>> +#ifndef __powerpc64__
>> +static __always_inline u64 vdso_shift_ns(u64 ns, unsigned long shift)
>> +{
>> + u32 hi = ns >> 32;
>> + u32 lo = ns;
>> +
>> + lo >>= shift;
>> + lo |= hi << (32 - shift);
>> + hi >>= shift;
>
>
>> + if (likely(hi == 0))
>> + return lo;
>
> Removing these two lines shouldn't change generated object code? Or not
> make it worse, at least.
I remember it made noticeable difference allthough I can't remember the
details. See below with GCC 10.1. At least we see that with those two
lines, GCC only sets a 16 bytes stack frame. Without those lines it sets
a 32 bytes stack frame and seems to save some values for no reason.
With the two lines:
000006ac <__c_kernel_clock_gettime>:
6ac: 28 03 00 0f cmplwi r3,15
6b0: 41 81 01 04 bgt 7b4 <__c_kernel_clock_gettime+0x108>
6b4: 39 40 00 01 li r10,1
6b8: 7d 4a 18 30 slw r10,r10,r3
6bc: 71 47 08 83 andi. r7,r10,2179
6c0: 41 82 01 2c beq 7ec <__c_kernel_clock_gettime+0x140>
6c4: 94 21 ff f0 stwu r1,-16(r1)
6c8: 54 63 20 36 rlwinm r3,r3,4,0,27
6cc: 93 e1 00 0c stw r31,12(r1)
6d0: 7d 85 1a 14 add r12,r5,r3
6d4: 80 05 00 00 lwz r0,0(r5)
6d8: 70 06 00 01 andi. r6,r0,1
6dc: 40 82 00 d4 bne 7b0 <__c_kernel_clock_gettime+0x104>
6e0: 7d 4d 42 e6 mftbu r10
6e4: 7d 6c 42 e6 mftb r11
6e8: 7c ed 42 e6 mftbu r7
6ec: 7c 0a 38 40 cmplw r10,r7
6f0: 40 82 ff f0 bne 6e0 <__c_kernel_clock_gettime+0x34>
6f4: 80 e5 00 0c lwz r7,12(r5)
6f8: 80 65 00 08 lwz r3,8(r5)
6fc: 7c e7 58 10 subfc r7,r7,r11
700: 81 65 00 18 lwz r11,24(r5)
704: 7d 43 51 10 subfe r10,r3,r10
708: 7f e7 58 16 mulhwu r31,r7,r11
70c: 7d 4a 59 d6 mullw r10,r10,r11
710: 7c e7 59 d6 mullw r7,r7,r11
714: 80 6c 00 2c lwz r3,44(r12)
718: 81 6c 00 28 lwz r11,40(r12)
71c: 7c e7 18 14 addc r7,r7,r3
720: 7d 4a fa 14 add r10,r10,r31
724: 80 65 00 1c lwz r3,28(r5)
728: 7d 4a 59 14 adde r10,r10,r11
72c: 7c e7 1c 30 srw r7,r7,r3
730: 21 63 00 20 subfic r11,r3,32
734: 7d 43 1c 31 srw. r3,r10,r3
738: 7d 4a 58 30 slw r10,r10,r11
73c: 7d 49 3b 78 or r9,r10,r7
740: 39 00 00 00 li r8,0
744: 40 82 00 84 bne 7c8 <__c_kernel_clock_gettime+0x11c>
748: 80 6c 00 24 lwz r3,36(r12)
74c: 81 45 00 00 lwz r10,0(r5)
750: 7c 00 50 40 cmplw r0,r10
754: 40 a2 ff 80 bne 6d4 <__c_kernel_clock_gettime+0x28>
758: 2c 08 00 00 cmpwi r8,0
75c: 41 82 00 7c beq 7d8 <__c_kernel_clock_gettime+0x12c>
760: 3c e0 c4 65 lis r7,-15259
764: 3c 00 3b 9a lis r0,15258
768: 60 e7 36 00 ori r7,r7,13824
76c: 60 00 c9 ff ori r0,r0,51711
770: 7c a9 38 14 addc r5,r9,r7
774: 7d 48 01 d4 addme r10,r8
778: 2c 0a 00 00 cmpwi r10,0
77c: 7d 48 53 78 mr r8,r10
780: 7c a9 2b 78 mr r9,r5
784: 38 c6 00 01 addi r6,r6,1
788: 40 82 ff e8 bne 770 <__c_kernel_clock_gettime+0xc4>
78c: 7c 05 00 40 cmplw r5,r0
790: 41 81 ff e0 bgt 770 <__c_kernel_clock_gettime+0xc4>
794: 7c 66 18 14 addc r3,r6,r3
798: 90 64 00 00 stw r3,0(r4)
79c: 91 24 00 04 stw r9,4(r4)
7a0: 38 60 00 00 li r3,0
7a4: 83 e1 00 0c lwz r31,12(r1)
7a8: 38 21 00 10 addi r1,r1,16
7ac: 4e 80 00 20 blr
7b0: 4b ff ff 24 b 6d4 <__c_kernel_clock_gettime+0x28>
7b4: 38 00 00 f6 li r0,246
7b8: 44 00 00 02 sc
7bc: 40 a3 00 08 bns 7c4 <__c_kernel_clock_gettime+0x118>
7c0: 7c 63 00 d0 neg r3,r3
7c4: 4e 80 00 20 blr
7c8: 7d 2a 4b 78 mr r10,r9
7cc: 7c 68 1b 78 mr r8,r3
7d0: 7d 49 53 78 mr r9,r10
7d4: 4b ff ff 74 b 748 <__c_kernel_clock_gettime+0x9c>
7d8: 3d 40 3b 9a lis r10,15258
7dc: 61 4a c9 ff ori r10,r10,51711
7e0: 7c 09 50 40 cmplw r9,r10
7e4: 41 81 ff 7c bgt 760 <__c_kernel_clock_gettime+0xb4>
7e8: 4b ff ff b0 b 798 <__c_kernel_clock_gettime+0xec>
7ec: 71 47 00 60 andi. r7,r10,96
7f0: 54 69 20 36 rlwinm r9,r3,4,0,27
7f4: 7d 25 4a 14 add r9,r5,r9
7f8: 40 82 00 14 bne 80c <__c_kernel_clock_gettime+0x160>
7fc: 71 4a 00 10 andi. r10,r10,16
800: 41 a2 ff b4 beq 7b4 <__c_kernel_clock_gettime+0x108>
804: 38 a5 00 f0 addi r5,r5,240
808: 4b ff fe bc b 6c4 <__c_kernel_clock_gettime+0x18>
80c: 81 05 00 00 lwz r8,0(r5)
810: 71 0a 00 01 andi. r10,r8,1
814: 40 a2 ff f8 bne 80c <__c_kernel_clock_gettime+0x160>
818: 80 69 00 24 lwz r3,36(r9)
81c: 81 49 00 2c lwz r10,44(r9)
820: 80 e5 00 00 lwz r7,0(r5)
824: 7c 08 38 40 cmplw r8,r7
828: 40 a2 ff e4 bne 80c <__c_kernel_clock_gettime+0x160>
82c: 90 64 00 00 stw r3,0(r4)
830: 91 44 00 04 stw r10,4(r4)
834: 38 60 00 00 li r3,0
838: 4e 80 00 20 blr
Without the two lines:
000006ac <__c_kernel_clock_gettime>:
6ac: 28 03 00 0f cmplwi r3,15
6b0: 41 81 01 14 bgt 7c4 <__c_kernel_clock_gettime+0x118>
6b4: 39 20 00 01 li r9,1
6b8: 7d 29 18 30 slw r9,r9,r3
6bc: 71 2a 08 83 andi. r10,r9,2179
6c0: 41 82 01 2c beq 7ec <__c_kernel_clock_gettime+0x140>
6c4: 94 21 ff e0 stwu r1,-32(r1)
6c8: 54 63 20 36 rlwinm r3,r3,4,0,27
6cc: 93 81 00 10 stw r28,16(r1)
6d0: 93 a1 00 14 stw r29,20(r1)
6d4: 93 c1 00 18 stw r30,24(r1)
6d8: 93 e1 00 1c stw r31,28(r1)
6dc: 7c 65 1a 14 add r3,r5,r3
6e0: 81 85 00 00 lwz r12,0(r5)
6e4: 71 87 00 01 andi. r7,r12,1
6e8: 40 82 00 d8 bne 7c0 <__c_kernel_clock_gettime+0x114>
6ec: 7d 2d 42 e6 mftbu r9
6f0: 7c cc 42 e6 mftb r6
6f4: 7d 4d 42 e6 mftbu r10
6f8: 7c 09 50 40 cmplw r9,r10
6fc: 40 82 ff f0 bne 6ec <__c_kernel_clock_gettime+0x40>
700: 83 83 00 28 lwz r28,40(r3)
704: 83 a3 00 2c lwz r29,44(r3)
708: 81 65 00 08 lwz r11,8(r5)
70c: 81 05 00 0c lwz r8,12(r5)
710: 83 c5 00 18 lwz r30,24(r5)
714: 83 e5 00 1c lwz r31,28(r5)
718: 80 03 00 24 lwz r0,36(r3)
71c: 81 45 00 00 lwz r10,0(r5)
720: 7c 0c 50 40 cmplw r12,r10
724: 40 a2 ff bc bne 6e0 <__c_kernel_clock_gettime+0x34>
728: 7d 48 30 10 subfc r10,r8,r6
72c: 7c cb 49 10 subfe r6,r11,r9
730: 7c c6 f1 d6 mullw r6,r6,r30
734: 7d 2a f0 16 mulhwu r9,r10,r30
738: 7d 4a f1 d6 mullw r10,r10,r30
73c: 7c c6 4a 14 add r6,r6,r9
740: 7d 4a e8 14 addc r10,r10,r29
744: 7c c6 e1 14 adde r6,r6,r28
748: 7c c8 fc 30 srw r8,r6,r31
74c: 2c 08 00 00 cmpwi r8,0
750: 20 bf 00 20 subfic r5,r31,32
754: 7d 4a fc 30 srw r10,r10,r31
758: 7c c5 28 30 slw r5,r6,r5
75c: 7c a9 53 78 or r9,r5,r10
760: 41 82 00 78 beq 7d8 <__c_kernel_clock_gettime+0x12c>
764: 3c c0 c4 65 lis r6,-15259
768: 3c 60 3b 9a lis r3,15258
76c: 60 c6 36 00 ori r6,r6,13824
770: 60 63 c9 ff ori r3,r3,51711
774: 7c a9 30 14 addc r5,r9,r6
778: 7d 48 01 d4 addme r10,r8
77c: 2c 0a 00 00 cmpwi r10,0
780: 7d 48 53 78 mr r8,r10
784: 7c a9 2b 78 mr r9,r5
788: 38 e7 00 01 addi r7,r7,1
78c: 40 82 ff e8 bne 774 <__c_kernel_clock_gettime+0xc8>
790: 7c 05 18 40 cmplw r5,r3
794: 41 81 ff e0 bgt 774 <__c_kernel_clock_gettime+0xc8>
798: 7c 07 00 14 addc r0,r7,r0
79c: 90 04 00 00 stw r0,0(r4)
7a0: 91 24 00 04 stw r9,4(r4)
7a4: 38 60 00 00 li r3,0
7a8: 83 81 00 10 lwz r28,16(r1)
7ac: 83 a1 00 14 lwz r29,20(r1)
7b0: 83 c1 00 18 lwz r30,24(r1)
7b4: 83 e1 00 1c lwz r31,28(r1)
7b8: 38 21 00 20 addi r1,r1,32
7bc: 4e 80 00 20 blr
7c0: 4b ff ff 20 b 6e0 <__c_kernel_clock_gettime+0x34>
7c4: 38 00 00 f6 li r0,246
7c8: 44 00 00 02 sc
7cc: 40 a3 00 08 bns 7d4 <__c_kernel_clock_gettime+0x128>
7d0: 7c 63 00 d0 neg r3,r3
7d4: 4e 80 00 20 blr
7d8: 3d 40 3b 9a lis r10,15258
7dc: 61 4a c9 ff ori r10,r10,51711
7e0: 7c 09 50 40 cmplw r9,r10
7e4: 41 81 ff 80 bgt 764 <__c_kernel_clock_gettime+0xb8>
7e8: 4b ff ff b4 b 79c <__c_kernel_clock_gettime+0xf0>
7ec: 71 2a 00 60 andi. r10,r9,96
7f0: 40 82 00 14 bne 804 <__c_kernel_clock_gettime+0x158>
7f4: 71 29 00 10 andi. r9,r9,16
7f8: 41 a2 ff cc beq 7c4 <__c_kernel_clock_gettime+0x118>
7fc: 38 a5 00 f0 addi r5,r5,240
800: 4b ff fe c4 b 6c4 <__c_kernel_clock_gettime+0x18>
804: 54 69 20 36 rlwinm r9,r3,4,0,27
808: 7d 25 4a 14 add r9,r5,r9
80c: 81 05 00 00 lwz r8,0(r5)
810: 71 0a 00 01 andi. r10,r8,1
814: 40 82 00 28 bne 83c <__c_kernel_clock_gettime+0x190>
818: 80 09 00 24 lwz r0,36(r9)
81c: 81 49 00 2c lwz r10,44(r9)
820: 80 e5 00 00 lwz r7,0(r5)
824: 7c 08 38 40 cmplw r8,r7
828: 40 a2 ff e4 bne 80c <__c_kernel_clock_gettime+0x160>
82c: 90 04 00 00 stw r0,0(r4)
830: 91 44 00 04 stw r10,4(r4)
834: 38 60 00 00 li r3,0
838: 4e 80 00 20 blr
83c: 4b ff ff d0 b 80c <__c_kernel_clock_gettime+0x160>
>
>> + return ((u64)hi << 32) | lo;
>> +}
>
>
> What does the compiler do for just
>
> static __always_inline u64 vdso_shift_ns(u64 ns, unsigned long shift)
> return ns >> (shift & 31);
> }
>
Worse:
000006ac <__c_kernel_clock_gettime>:
6ac: 28 03 00 0f cmplwi r3,15
6b0: 41 81 01 30 bgt 7e0 <__c_kernel_clock_gettime+0x134>
6b4: 39 20 00 01 li r9,1
6b8: 7d 29 18 30 slw r9,r9,r3
6bc: 71 2a 08 83 andi. r10,r9,2179
6c0: 41 82 01 48 beq 808 <__c_kernel_clock_gettime+0x15c>
6c4: 94 21 ff e0 stwu r1,-32(r1)
6c8: 54 63 20 36 rlwinm r3,r3,4,0,27
6cc: 93 81 00 10 stw r28,16(r1)
6d0: 93 a1 00 14 stw r29,20(r1)
6d4: 93 c1 00 18 stw r30,24(r1)
6d8: 93 e1 00 1c stw r31,28(r1)
6dc: 7c 65 1a 14 add r3,r5,r3
6e0: 80 c5 00 00 lwz r6,0(r5)
6e4: 70 c7 00 01 andi. r7,r6,1
6e8: 40 82 00 f4 bne 7dc <__c_kernel_clock_gettime+0x130>
6ec: 7d 2d 42 e6 mftbu r9
6f0: 7d 0c 42 e6 mftb r8
6f4: 7d 4d 42 e6 mftbu r10
6f8: 7c 09 50 40 cmplw r9,r10
6fc: 40 82 ff f0 bne 6ec <__c_kernel_clock_gettime+0x40>
700: 83 83 00 28 lwz r28,40(r3)
704: 83 c3 00 2c lwz r30,44(r3)
708: 81 65 00 08 lwz r11,8(r5)
70c: 81 45 00 0c lwz r10,12(r5)
710: 83 e5 00 18 lwz r31,24(r5)
714: 81 85 00 1c lwz r12,28(r5)
718: 80 03 00 24 lwz r0,36(r3)
71c: 83 a5 00 00 lwz r29,0(r5)
720: 7c 06 e8 40 cmplw r6,r29
724: 40 a2 ff bc bne 6e0 <__c_kernel_clock_gettime+0x34>
728: 7d 0a 40 10 subfc r8,r10,r8
72c: 7c cb 49 10 subfe r6,r11,r9
730: 7c c6 f9 d6 mullw r6,r6,r31
734: 7d 28 f8 16 mulhwu r9,r8,r31
738: 7d 08 f9 d6 mullw r8,r8,r31
73c: 55 8c 06 fe clrlwi r12,r12,27
740: 7f c8 f0 14 addc r30,r8,r30
744: 7c c6 4a 14 add r6,r6,r9
748: 7c c6 e1 14 adde r6,r6,r28
74c: 34 6c ff e0 addic. r3,r12,-32
750: 41 80 00 70 blt 7c0 <__c_kernel_clock_gettime+0x114>
754: 7c c9 1c 30 srw r9,r6,r3
758: 39 00 00 00 li r8,0
75c: 2c 08 00 00 cmpwi r8,0
760: 41 82 00 94 beq 7f4 <__c_kernel_clock_gettime+0x148>
764: 3c c0 c4 65 lis r6,-15259
768: 3c 60 3b 9a lis r3,15258
76c: 60 c6 36 00 ori r6,r6,13824
770: 60 63 c9 ff ori r3,r3,51711
774: 7c a9 30 14 addc r5,r9,r6
778: 7d 48 01 d4 addme r10,r8
77c: 2c 0a 00 00 cmpwi r10,0
780: 7d 48 53 78 mr r8,r10
784: 7c a9 2b 78 mr r9,r5
788: 38 e7 00 01 addi r7,r7,1
78c: 40 82 ff e8 bne 774 <__c_kernel_clock_gettime+0xc8>
790: 7c 05 18 40 cmplw r5,r3
794: 41 81 ff e0 bgt 774 <__c_kernel_clock_gettime+0xc8>
798: 7c 07 00 14 addc r0,r7,r0
79c: 90 04 00 00 stw r0,0(r4)
7a0: 91 24 00 04 stw r9,4(r4)
7a4: 38 60 00 00 li r3,0
7a8: 83 81 00 10 lwz r28,16(r1)
7ac: 83 a1 00 14 lwz r29,20(r1)
7b0: 83 c1 00 18 lwz r30,24(r1)
7b4: 83 e1 00 1c lwz r31,28(r1)
7b8: 38 21 00 20 addi r1,r1,32
7bc: 4e 80 00 20 blr
7c0: 54 c3 08 3c rlwinm r3,r6,1,0,30
7c4: 21 6c 00 1f subfic r11,r12,31
7c8: 7c 63 58 30 slw r3,r3,r11
7cc: 7f c9 64 30 srw r9,r30,r12
7d0: 7c 69 4b 78 or r9,r3,r9
7d4: 7c c8 64 30 srw r8,r6,r12
7d8: 4b ff ff 84 b 75c <__c_kernel_clock_gettime+0xb0>
7dc: 4b ff ff 04 b 6e0 <__c_kernel_clock_gettime+0x34>
7e0: 38 00 00 f6 li r0,246
7e4: 44 00 00 02 sc
7e8: 40 a3 00 08 bns 7f0 <__c_kernel_clock_gettime+0x144>
7ec: 7c 63 00 d0 neg r3,r3
7f0: 4e 80 00 20 blr
7f4: 3d 40 3b 9a lis r10,15258
7f8: 61 4a c9 ff ori r10,r10,51711
7fc: 7c 09 50 40 cmplw r9,r10
800: 41 81 ff 64 bgt 764 <__c_kernel_clock_gettime+0xb8>
804: 4b ff ff 98 b 79c <__c_kernel_clock_gettime+0xf0>
808: 71 2a 00 60 andi. r10,r9,96
80c: 40 82 00 14 bne 820 <__c_kernel_clock_gettime+0x174>
810: 71 29 00 10 andi. r9,r9,16
814: 41 a2 ff cc beq 7e0 <__c_kernel_clock_gettime+0x134>
818: 38 a5 00 f0 addi r5,r5,240
81c: 4b ff fe a8 b 6c4 <__c_kernel_clock_gettime+0x18>
820: 54 69 20 36 rlwinm r9,r3,4,0,27
824: 7d 25 4a 14 add r9,r5,r9
828: 81 05 00 00 lwz r8,0(r5)
82c: 71 0a 00 01 andi. r10,r8,1
830: 40 82 00 28 bne 858 <__c_kernel_clock_gettime+0x1ac>
834: 80 09 00 24 lwz r0,36(r9)
838: 81 49 00 2c lwz r10,44(r9)
83c: 80 e5 00 00 lwz r7,0(r5)
840: 7c 08 38 40 cmplw r8,r7
844: 40 a2 ff e4 bne 828 <__c_kernel_clock_gettime+0x17c>
848: 90 04 00 00 stw r0,0(r4)
84c: 91 44 00 04 stw r10,4(r4)
850: 38 60 00 00 li r3,0
854: 4e 80 00 20 blr
858: 4b ff ff d0 b 828 <__c_kernel_clock_gettime+0x17c>
Christophe
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v10 2/5] powerpc/vdso: Prepare for switching VDSO to generic C implementation.
2020-08-05 14:03 ` Segher Boessenkool
2020-08-05 14:03 ` Segher Boessenkool
2020-08-05 16:40 ` Christophe Leroy
@ 2020-08-05 16:51 ` Christophe Leroy
2020-08-05 16:51 ` Christophe Leroy
2020-08-05 20:55 ` Segher Boessenkool
2 siblings, 2 replies; 18+ messages in thread
From: Christophe Leroy @ 2020-08-05 16:51 UTC (permalink / raw)
To: Segher Boessenkool
Cc: nathanl, linux-arch, vincenzo.frascino, arnd, linux-kernel,
Paul Mackerras, luto, tglx, linuxppc-dev
Hi Again,
Le 05/08/2020 à 16:03, Segher Boessenkool a écrit :
> Hi!
>
> On Wed, Aug 05, 2020 at 07:09:23AM +0000, Christophe Leroy wrote:
>> +/*
>> + * The macros sets two stack frames, one for the caller and one for the callee
>> + * because there are no requirement for the caller to set a stack frame when
>> + * calling VDSO so it may have omitted to set one, especially on PPC64
>> + */
>
> If the caller follows the ABI, there always is a stack frame. So what
> is going on?
Looks like it is not the case. See discussion at
https://patchwork.ozlabs.org/project/linuxppc-dev/patch/2a67c333893454868bbfda773ba4b01c20272a5d.1588079622.git.christophe.leroy@c-s.fr/
Seems like GCC uses the redzone and doesn't set a stack frame. I guess
it doesn't know that the inline assembly contains a function call so it
doesn't set the frame.
Christophe
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v10 2/5] powerpc/vdso: Prepare for switching VDSO to generic C implementation.
2020-08-05 16:51 ` Christophe Leroy
@ 2020-08-05 16:51 ` Christophe Leroy
2020-08-05 20:55 ` Segher Boessenkool
1 sibling, 0 replies; 18+ messages in thread
From: Christophe Leroy @ 2020-08-05 16:51 UTC (permalink / raw)
To: Segher Boessenkool
Cc: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
nathanl, anton, linux-arch, arnd, linux-kernel, luto, tglx,
vincenzo.frascino, linuxppc-dev
Hi Again,
Le 05/08/2020 à 16:03, Segher Boessenkool a écrit :
> Hi!
>
> On Wed, Aug 05, 2020 at 07:09:23AM +0000, Christophe Leroy wrote:
>> +/*
>> + * The macros sets two stack frames, one for the caller and one for the callee
>> + * because there are no requirement for the caller to set a stack frame when
>> + * calling VDSO so it may have omitted to set one, especially on PPC64
>> + */
>
> If the caller follows the ABI, there always is a stack frame. So what
> is going on?
Looks like it is not the case. See discussion at
https://patchwork.ozlabs.org/project/linuxppc-dev/patch/2a67c333893454868bbfda773ba4b01c20272a5d.1588079622.git.christophe.leroy@c-s.fr/
Seems like GCC uses the redzone and doesn't set a stack frame. I guess
it doesn't know that the inline assembly contains a function call so it
doesn't set the frame.
Christophe
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v10 2/5] powerpc/vdso: Prepare for switching VDSO to generic C implementation.
2020-08-05 16:40 ` Christophe Leroy
2020-08-05 16:40 ` Christophe Leroy
@ 2020-08-05 18:40 ` Segher Boessenkool
2020-08-05 18:40 ` Segher Boessenkool
2020-08-06 5:46 ` Christophe Leroy
1 sibling, 2 replies; 18+ messages in thread
From: Segher Boessenkool @ 2020-08-05 18:40 UTC (permalink / raw)
To: Christophe Leroy
Cc: nathanl, linux-arch, vincenzo.frascino, arnd, linux-kernel,
Paul Mackerras, luto, tglx, linuxppc-dev
Hi!
On Wed, Aug 05, 2020 at 04:40:16PM +0000, Christophe Leroy wrote:
> >It cannot optimise it because it does not know shift < 32. The code
> >below is incorrect for shift equal to 32, fwiw.
>
> Is there a way to tell it ?
Sure, for example the &31 should work (but it doesn't, with the GCC
version you used -- which version is that?)
> >What does the compiler do for just
> >
> >static __always_inline u64 vdso_shift_ns(u64 ns, unsigned long shift)
> > return ns >> (shift & 31);
> >}
> >
>
> Worse:
I cannot make heads or tails of all that branch spaghetti, sorry.
> 73c: 55 8c 06 fe clrlwi r12,r12,27
> 740: 7f c8 f0 14 addc r30,r8,r30
> 744: 7c c6 4a 14 add r6,r6,r9
> 748: 7c c6 e1 14 adde r6,r6,r28
> 74c: 34 6c ff e0 addic. r3,r12,-32
> 750: 41 80 00 70 blt 7c0 <__c_kernel_clock_gettime+0x114>
This branch is always true. Hrm.
Segher
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v10 2/5] powerpc/vdso: Prepare for switching VDSO to generic C implementation.
2020-08-05 18:40 ` Segher Boessenkool
@ 2020-08-05 18:40 ` Segher Boessenkool
2020-08-06 5:46 ` Christophe Leroy
1 sibling, 0 replies; 18+ messages in thread
From: Segher Boessenkool @ 2020-08-05 18:40 UTC (permalink / raw)
To: Christophe Leroy
Cc: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
nathanl, anton, linux-arch, arnd, linux-kernel, luto, tglx,
vincenzo.frascino, linuxppc-dev
Hi!
On Wed, Aug 05, 2020 at 04:40:16PM +0000, Christophe Leroy wrote:
> >It cannot optimise it because it does not know shift < 32. The code
> >below is incorrect for shift equal to 32, fwiw.
>
> Is there a way to tell it ?
Sure, for example the &31 should work (but it doesn't, with the GCC
version you used -- which version is that?)
> >What does the compiler do for just
> >
> >static __always_inline u64 vdso_shift_ns(u64 ns, unsigned long shift)
> > return ns >> (shift & 31);
> >}
> >
>
> Worse:
I cannot make heads or tails of all that branch spaghetti, sorry.
> 73c: 55 8c 06 fe clrlwi r12,r12,27
> 740: 7f c8 f0 14 addc r30,r8,r30
> 744: 7c c6 4a 14 add r6,r6,r9
> 748: 7c c6 e1 14 adde r6,r6,r28
> 74c: 34 6c ff e0 addic. r3,r12,-32
> 750: 41 80 00 70 blt 7c0 <__c_kernel_clock_gettime+0x114>
This branch is always true. Hrm.
Segher
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v10 2/5] powerpc/vdso: Prepare for switching VDSO to generic C implementation.
2020-08-05 16:51 ` Christophe Leroy
2020-08-05 16:51 ` Christophe Leroy
@ 2020-08-05 20:55 ` Segher Boessenkool
1 sibling, 0 replies; 18+ messages in thread
From: Segher Boessenkool @ 2020-08-05 20:55 UTC (permalink / raw)
To: Christophe Leroy
Cc: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
nathanl, anton, linux-arch, arnd, linux-kernel, luto, tglx,
vincenzo.frascino, linuxppc-dev
Hi!
On Wed, Aug 05, 2020 at 06:51:44PM +0200, Christophe Leroy wrote:
> Le 05/08/2020 à 16:03, Segher Boessenkool a écrit :
> >On Wed, Aug 05, 2020 at 07:09:23AM +0000, Christophe Leroy wrote:
> >>+/*
> >>+ * The macros sets two stack frames, one for the caller and one for the
> >>callee
> >>+ * because there are no requirement for the caller to set a stack frame
> >>when
> >>+ * calling VDSO so it may have omitted to set one, especially on PPC64
> >>+ */
> >
> >If the caller follows the ABI, there always is a stack frame. So what
> >is going on?
>
> Looks like it is not the case. See discussion at
> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/2a67c333893454868bbfda773ba4b01c20272a5d.1588079622.git.christophe.leroy@c-s.fr/
>
> Seems like GCC uses the redzone and doesn't set a stack frame. I guess
> it doesn't know that the inline assembly contains a function call so it
> doesn't set the frame.
Yes, that is the problem. See
https://gcc.gnu.org/onlinedocs/gcc-10.2.0/gcc/Extended-Asm.html#AssemblerTemplate
where this is (briefly) discussed:
"Accessing data from C programs without using input/output operands
(such as by using global symbols directly from the assembler
template) may not work as expected. Similarly, calling functions
directly from an assembler template requires a detailed understanding
of the target assembler and ABI."
I don't know of a good way to tell GCC some function needs a frame (that
is, one that doesn't result in extra code other than to set up the
frame). I'll think about it.
Segher
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v10 2/5] powerpc/vdso: Prepare for switching VDSO to generic C implementation.
2020-08-05 18:40 ` Segher Boessenkool
2020-08-05 18:40 ` Segher Boessenkool
@ 2020-08-06 5:46 ` Christophe Leroy
1 sibling, 0 replies; 18+ messages in thread
From: Christophe Leroy @ 2020-08-06 5:46 UTC (permalink / raw)
To: Segher Boessenkool
Cc: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
nathanl, anton, linux-arch, arnd, linux-kernel, luto, tglx,
vincenzo.frascino, linuxppc-dev
Hi,
On 08/05/2020 06:40 PM, Segher Boessenkool wrote:
> Hi!
>
> On Wed, Aug 05, 2020 at 04:40:16PM +0000, Christophe Leroy wrote:
>>> It cannot optimise it because it does not know shift < 32. The code
>>> below is incorrect for shift equal to 32, fwiw.
>>
>> Is there a way to tell it ?
>
> Sure, for example the &31 should work (but it doesn't, with the GCC
> version you used -- which version is that?)
GCC 10.1
>
>>> What does the compiler do for just
>>>
>>> static __always_inline u64 vdso_shift_ns(u64 ns, unsigned long shift)
>>> return ns >> (shift & 31);
>>> }
>>>
>>
>> Worse:
>
> I cannot make heads or tails of all that branch spaghetti, sorry.
>
>> 73c: 55 8c 06 fe clrlwi r12,r12,27
>> 740: 7f c8 f0 14 addc r30,r8,r30
>> 744: 7c c6 4a 14 add r6,r6,r9
>> 748: 7c c6 e1 14 adde r6,r6,r28
>> 74c: 34 6c ff e0 addic. r3,r12,-32
>> 750: 41 80 00 70 blt 7c0 <__c_kernel_clock_gettime+0x114>
>
> This branch is always true. Hrm.
As a standalone function:
With your suggestion:
000006ac <vdso_shift_ns>:
6ac: 54 a5 06 fe clrlwi r5,r5,27
6b0: 35 25 ff e0 addic. r9,r5,-32
6b4: 41 80 00 10 blt 6c4 <vdso_shift_ns+0x18>
6b8: 7c 64 4c 30 srw r4,r3,r9
6bc: 38 60 00 00 li r3,0
6c0: 4e 80 00 20 blr
6c4: 54 69 08 3c rlwinm r9,r3,1,0,30
6c8: 21 45 00 1f subfic r10,r5,31
6cc: 7c 84 2c 30 srw r4,r4,r5
6d0: 7d 29 50 30 slw r9,r9,r10
6d4: 7c 63 2c 30 srw r3,r3,r5
6d8: 7d 24 23 78 or r4,r9,r4
6dc: 4e 80 00 20 blr
With the version as is in my series:
000006ac <vdso_shift_ns>:
6ac: 21 25 00 20 subfic r9,r5,32
6b0: 7c 69 48 30 slw r9,r3,r9
6b4: 7c 84 2c 30 srw r4,r4,r5
6b8: 7d 24 23 78 or r4,r9,r4
6bc: 7c 63 2c 30 srw r3,r3,r5
6c0: 4e 80 00 20 blr
Christophe
^ permalink raw reply [flat|nested] 18+ messages in thread
end of thread, other threads:[~2020-08-06 5:46 UTC | newest]
Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-05 7:09 [PATCH v10 0/5] powerpc: switch VDSO to C implementation Christophe Leroy
2020-08-05 7:09 ` [PATCH v10 1/5] powerpc/processor: Move cpu_relax() into asm/vdso/processor.h Christophe Leroy
2020-08-05 7:09 ` [PATCH v10 2/5] powerpc/vdso: Prepare for switching VDSO to generic C implementation Christophe Leroy
2020-08-05 14:03 ` Segher Boessenkool
2020-08-05 14:03 ` Segher Boessenkool
2020-08-05 16:40 ` Christophe Leroy
2020-08-05 16:40 ` Christophe Leroy
2020-08-05 18:40 ` Segher Boessenkool
2020-08-05 18:40 ` Segher Boessenkool
2020-08-06 5:46 ` Christophe Leroy
2020-08-05 16:51 ` Christophe Leroy
2020-08-05 16:51 ` Christophe Leroy
2020-08-05 20:55 ` Segher Boessenkool
2020-08-05 7:09 ` [PATCH v10 3/5] powerpc/vdso: Save and restore TOC pointer on PPC64 Christophe Leroy
2020-08-05 7:09 ` [PATCH v10 4/5] powerpc/vdso: Switch VDSO to generic C implementation Christophe Leroy
2020-08-05 7:09 ` Christophe Leroy
2020-08-05 7:09 ` [PATCH v10 5/5] powerpc/vdso: Provide __kernel_clock_gettime64() on vdso32 Christophe Leroy
2020-08-05 7:09 ` Christophe Leroy
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).