linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4 10/12] arm64: vdso: replace gettimeofday.S with global vgettimeofday.C
@ 2017-10-31 18:32 Mark Salyzyn
  2017-11-01 22:14 ` Mark Salyzyn
  0 siblings, 1 reply; 6+ messages in thread
From: Mark Salyzyn @ 2017-10-31 18:32 UTC (permalink / raw)
  To: linux-kernel
  Cc: Mark Salyzyn, James Morse, Russell King, Catalin Marinas,
	Will Deacon, Andy Lutomirski, Dmitry Safonov, John Stultz,
	Mark Rutland, Laura Abbott, Kees Cook, Ard Biesheuvel,
	Andy Gross, Kevin Brodsky, Andrew Pinski, Thomas Gleixner,
	linux-arm-kernel

Take an effort from the previous 9 patches to recode the arm64 vdso
code from assembler to C previously submitted by
Andrew Pinski <apinski@cavium.com>, rework it for use in both arm and
arm64, overlapping any optimizations for each architecture. But
instead of landing it in arm64, land the result into lib/vdso and
unify both implementations to simplify future maintenance.

apinski@cavium.com makes the following claims in the original patch:

This allows the compiler to optimize the divide by 1000 and remove
the other divides.

On ThunderX, gettimeofday improves by 32%.  On ThunderX 2,
gettimeofday improves by 18%.

Note I noticed a bug in the old (arm64) implementation of
__kernel_clock_getres; it was checking only the lower 32bits of the
pointer; this would work for most cases but could fail in a few.

<end of claim>

Signed-off-by: Mark Salyzyn <salyzyn@android.com>
Cc: James Morse <james.morse@arm.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dmitry Safonov <dsafonov@virtuozzo.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Andy Gross <andy.gross@linaro.org>
Cc: Kevin Brodsky <kevin.brodsky@arm.com>
Cc: Andrew Pinski <apinski@cavium.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org

v2:
- turned off profiling, restored quiet_cmd_vdsoas.

v3:
- move arch/arm/vdso/vgettimeofday.c to lib/vdso/vgettimeofday.c.
- adjust vgettimeofday.c to be a better global candidate, switch to
  using ARCH_PROVIDES_TIMER and __arch_counter_get() as more generic.

v4:
- simplify arch_vdso_read_counter, use read_sysreg.
- Use GENMASK_ULL macro for ARCH_CLOCK_FIXED_MASK.
- update commit message.
- add vdso_wtm_clock_nsec_t, vdso_xtime_clock_sec_t and
  vdso_raw_time_nsec_t.

---
 arch/arm64/include/asm/vdso_datapage.h |  23 ++-
 arch/arm64/kernel/asm-offsets.c        |   2 +-
 arch/arm64/kernel/vdso.c               |   2 +-
 arch/arm64/kernel/vdso/Makefile        |  16 +-
 arch/arm64/kernel/vdso/compiler.h      |  67 +++++++
 arch/arm64/kernel/vdso/datapage.h      |  59 ++++++
 arch/arm64/kernel/vdso/gettimeofday.S  | 328 ---------------------------------
 arch/arm64/kernel/vdso/vgettimeofday.c |   3 +
 8 files changed, 160 insertions(+), 340 deletions(-)
 create mode 100644 arch/arm64/kernel/vdso/compiler.h
 create mode 100644 arch/arm64/kernel/vdso/datapage.h
 delete mode 100644 arch/arm64/kernel/vdso/gettimeofday.S
 create mode 100644 arch/arm64/kernel/vdso/vgettimeofday.c

diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h
index 2b9a63771eda..95f4a7abab80 100644
--- a/arch/arm64/include/asm/vdso_datapage.h
+++ b/arch/arm64/include/asm/vdso_datapage.h
@@ -20,16 +20,31 @@
 
 #ifndef __ASSEMBLY__
 
+#ifndef _VDSO_WTM_CLOCK_SEC_T
+#define _VDSO_WTM_CLOCK_SEC_T
+typedef __u64 vdso_wtm_clock_nsec_t;
+#endif
+
+#ifndef _VDSO_XTIME_CLOCK_SEC_T
+#define _VDSO_XTIME_CLOCK_SEC_T
+typedef __u64 vdso_xtime_clock_sec_t;
+#endif
+
+#ifndef _VDSO_RAW_TIME_SEC_T
+#define _VDSO_RAW_TIME_SEC_T
+typedef __u64 vdso_raw_time_sec_t;
+#endif
+
 struct vdso_data {
 	__u64 cs_cycle_last;	/* Timebase at clocksource init */
-	__u64 raw_time_sec;	/* Raw time */
+	vdso_raw_time_sec_t raw_time_sec;	/* Raw time */
 	__u64 raw_time_nsec;
-	__u64 xtime_clock_sec;	/* Kernel time */
-	__u64 xtime_clock_nsec;
+	vdso_xtime_clock_sec_t xtime_clock_sec;	/* Kernel time */
+	__u64 xtime_clock_snsec;
 	__u64 xtime_coarse_sec;	/* Coarse time */
 	__u64 xtime_coarse_nsec;
 	__u64 wtm_clock_sec;	/* Wall to monotonic time */
-	__u64 wtm_clock_nsec;
+	vdso_wtm_clock_nsec_t wtm_clock_nsec;
 	__u32 tb_seq_count;	/* Timebase sequence counter */
 	/* cs_* members must be adjacent and in this order (ldp accesses) */
 	__u32 cs_mono_mult;	/* NTP-adjusted clocksource multiplier */
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 71bf088f1e4b..db5b305806b7 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -104,7 +104,7 @@ int main(void)
   DEFINE(VDSO_RAW_TIME_SEC,	offsetof(struct vdso_data, raw_time_sec));
   DEFINE(VDSO_RAW_TIME_NSEC,	offsetof(struct vdso_data, raw_time_nsec));
   DEFINE(VDSO_XTIME_CLK_SEC,	offsetof(struct vdso_data, xtime_clock_sec));
-  DEFINE(VDSO_XTIME_CLK_NSEC,	offsetof(struct vdso_data, xtime_clock_nsec));
+  DEFINE(VDSO_XTIME_CLK_NSEC,	offsetof(struct vdso_data, xtime_clock_snsec));
   DEFINE(VDSO_XTIME_CRS_SEC,	offsetof(struct vdso_data, xtime_coarse_sec));
   DEFINE(VDSO_XTIME_CRS_NSEC,	offsetof(struct vdso_data, xtime_coarse_nsec));
   DEFINE(VDSO_WTM_CLK_SEC,	offsetof(struct vdso_data, wtm_clock_sec));
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 2d419006ad43..59f150c25889 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -238,7 +238,7 @@ void update_vsyscall(struct timekeeper *tk)
 		vdso_data->raw_time_sec         = tk->raw_sec;
 		vdso_data->raw_time_nsec        = tk->tkr_raw.xtime_nsec;
 		vdso_data->xtime_clock_sec	= tk->xtime_sec;
-		vdso_data->xtime_clock_nsec	= tk->tkr_mono.xtime_nsec;
+		vdso_data->xtime_clock_snsec	= tk->tkr_mono.xtime_nsec;
 		vdso_data->cs_mono_mult		= tk->tkr_mono.mult;
 		vdso_data->cs_raw_mult		= tk->tkr_raw.mult;
 		/* tkr_mono.shift == tkr_raw.shift */
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index 62c84f7cb01b..0925f8908dcb 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -5,18 +5,26 @@
 # Heavily based on the vDSO Makefiles for other archs.
 #
 
-obj-vdso := gettimeofday.o note.o sigreturn.o
+obj-vdso := vgettimeofday.o note.o sigreturn.o
 
 # Build rules
 targets := $(obj-vdso) vdso.so vdso.so.dbg
 obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
 
-ccflags-y := -shared -fno-common -fno-builtin
+ccflags-y := -shared -fno-common -fno-builtin -fno-stack-protector
+ccflags-y += -DDISABLE_BRANCH_PROFILING
 ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \
 		$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
 
+# Force -O2 to avoid libgcc dependencies
+CFLAGS_REMOVE_vgettimeofday.o = -pg -Os
+CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny
+
 # Disable gcov profiling for VDSO code
 GCOV_PROFILE := n
+KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
+KCOV_INSTRUMENT := n
 
 # Workaround for bare-metal (ELF) toolchains that neglect to pass -shared
 # down to collect2, resulting in silent corruption of the vDSO image.
@@ -48,10 +56,6 @@ endef
 include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
 	$(call if_changed,vdsosym)
 
-# Assembly rules for the .S files
-$(obj-vdso): %.o: %.S FORCE
-	$(call if_changed_dep,vdsoas)
-
 # Actual build commands
 quiet_cmd_vdsold = VDSOL   $@
       cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@
diff --git a/arch/arm64/kernel/vdso/compiler.h b/arch/arm64/kernel/vdso/compiler.h
new file mode 100644
index 000000000000..fd38451d03ba
--- /dev/null
+++ b/arch/arm64/kernel/vdso/compiler.h
@@ -0,0 +1,67 @@
+/*
+ * Userspace implementations of fallback calls
+ *
+ * Copyright (C) 2017 Cavium, Inc.
+ * Copyright (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ * Rewriten into C by: Andrew Pinski <apinski@cavium.com>
+ */
+
+#ifndef __VDSO_COMPILER_H
+#define __VDSO_COMPILER_H
+
+#include <asm/processor.h>
+#include <asm/unistd.h>
+#include <linux/compiler.h>
+
+#ifdef CONFIG_ARM_ARCH_TIMER
+#define ARCH_PROVIDES_TIMER
+#endif
+
+#define DEFINE_FALLBACK(name, type_arg1, name_arg1, type_arg2, name_arg2) \
+static notrace long name##_fallback(type_arg1 _##name_arg1,		  \
+				    type_arg2 _##name_arg2)		  \
+{									  \
+	register type_arg1 name_arg1 asm("x0") = _##name_arg1;		  \
+	register type_arg2 name_arg2 asm("x1") = _##name_arg2;		  \
+	register long ret asm ("x0");					  \
+	register long nr asm("x8") = __NR_##name;			  \
+									  \
+	asm volatile(							  \
+	"	svc #0\n"						  \
+	: "=r" (ret)							  \
+	: "r" (name_arg1), "r" (name_arg2), "r" (nr)			  \
+	: "memory");							  \
+									  \
+	return ret;							  \
+}
+
+/*
+ * AArch64 implementation of arch_counter_get_cntvct() suitable for vdso
+ */
+static __always_inline notrace u64 arch_vdso_read_counter(void)
+{
+	/* Read the virtual counter. */
+	isb();
+	return read_sysreg(cntvct_el0);
+}
+
+/* Rename exported vdso functions */
+#define __vdso_clock_gettime __kernel_clock_gettime
+#define __vdso_gettimeofday __kernel_gettimeofday
+#define __vdso_clock_getres __kernel_clock_getres
+
+#endif /* __VDSO_COMPILER_H */
diff --git a/arch/arm64/kernel/vdso/datapage.h b/arch/arm64/kernel/vdso/datapage.h
new file mode 100644
index 000000000000..be86a6074cf8
--- /dev/null
+++ b/arch/arm64/kernel/vdso/datapage.h
@@ -0,0 +1,59 @@
+/*
+ * Userspace implementations of __get_datapage
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __VDSO_DATAPAGE_H
+#define __VDSO_DATAPAGE_H
+
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <asm/vdso_datapage.h>
+
+/*
+ * We use the hidden visibility to prevent the compiler from generating a GOT
+ * relocation. Not only is going through a GOT useless (the entry couldn't and
+ * mustn't be overridden by another library), it does not even work: the linker
+ * cannot generate an absolute address to the data page.
+ *
+ * With the hidden visibility, the compiler simply generates a PC-relative
+ * relocation (R_ARM_REL32), and this is what we need.
+ */
+extern const struct vdso_data _vdso_data __attribute__((visibility("hidden")));
+
+static inline const struct vdso_data *__get_datapage(void)
+{
+	const struct vdso_data *ret;
+	/*
+	 * This simply puts &_vdso_data into ret. The reason why we don't use
+	 * `ret = &_vdso_data` is that the compiler tends to optimise this in a
+	 * very suboptimal way: instead of keeping &_vdso_data in a register,
+	 * it goes through a relocation almost every time _vdso_data must be
+	 * accessed (even in subfunctions). This is both time and space
+	 * consuming: each relocation uses a word in the code section, and it
+	 * has to be loaded at runtime.
+	 *
+	 * This trick hides the assignment from the compiler. Since it cannot
+	 * track where the pointer comes from, it will only use one relocation
+	 * where __get_datapage() is called, and then keep the result in a
+	 * register.
+	 */
+	asm("" : "=r"(ret) : "0"(&_vdso_data));
+	return ret;
+}
+
+/* We can only guarantee 56 bits of precision. */
+#define ARCH_CLOCK_FIXED_MASK GENMASK_ULL(55, 0)
+
+#endif /* __VDSO_DATAPAGE_H */
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
deleted file mode 100644
index 76320e920965..000000000000
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ /dev/null
@@ -1,328 +0,0 @@
-/*
- * Userspace implementations of gettimeofday() and friends.
- *
- * Copyright (C) 2012 ARM Limited
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Will Deacon <will.deacon@arm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-
-#define NSEC_PER_SEC_LO16	0xca00
-#define NSEC_PER_SEC_HI16	0x3b9a
-
-vdso_data	.req	x6
-seqcnt		.req	w7
-w_tmp		.req	w8
-x_tmp		.req	x8
-
-/*
- * Conventions for macro arguments:
- * - An argument is write-only if its name starts with "res".
- * - All other arguments are read-only, unless otherwise specified.
- */
-
-	.macro	seqcnt_acquire
-9999:	ldr	seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT]
-	tbnz	seqcnt, #0, 9999b
-	dmb	ishld
-	.endm
-
-	.macro	seqcnt_check fail
-	dmb	ishld
-	ldr	w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT]
-	cmp	w_tmp, seqcnt
-	b.ne	\fail
-	.endm
-
-	.macro	syscall_check fail
-	ldr	w_tmp, [vdso_data, #VDSO_USE_SYSCALL]
-	cbnz	w_tmp, \fail
-	.endm
-
-	.macro get_nsec_per_sec res
-	mov	\res, #NSEC_PER_SEC_LO16
-	movk	\res, #NSEC_PER_SEC_HI16, lsl #16
-	.endm
-
-	/*
-	 * Returns the clock delta, in nanoseconds left-shifted by the clock
-	 * shift.
-	 */
-	.macro	get_clock_shifted_nsec res, cycle_last, mult
-	/* Read the virtual counter. */
-	isb
-	mrs	x_tmp, cntvct_el0
-	/* Calculate cycle delta and convert to ns. */
-	sub	\res, x_tmp, \cycle_last
-	/* We can only guarantee 56 bits of precision. */
-	movn	x_tmp, #0xff00, lsl #48
-	and	\res, x_tmp, \res
-	mul	\res, \res, \mult
-	.endm
-
-	/*
-	 * Returns in res_{sec,nsec} the REALTIME timespec, based on the
-	 * "wall time" (xtime) and the clock_mono delta.
-	 */
-	.macro	get_ts_realtime res_sec, res_nsec, \
-			clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec
-	add	\res_nsec, \clock_nsec, \xtime_nsec
-	udiv	x_tmp, \res_nsec, \nsec_to_sec
-	add	\res_sec, \xtime_sec, x_tmp
-	msub	\res_nsec, x_tmp, \nsec_to_sec, \res_nsec
-	.endm
-
-	/*
-	 * Returns in res_{sec,nsec} the timespec based on the clock_raw delta,
-	 * used for CLOCK_MONOTONIC_RAW.
-	 */
-	.macro	get_ts_clock_raw res_sec, res_nsec, clock_nsec, nsec_to_sec
-	udiv	\res_sec, \clock_nsec, \nsec_to_sec
-	msub	\res_nsec, \res_sec, \nsec_to_sec, \clock_nsec
-	.endm
-
-	/* sec and nsec are modified in place. */
-	.macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec
-	/* Add timespec. */
-	add	\sec, \sec, \ts_sec
-	add	\nsec, \nsec, \ts_nsec
-
-	/* Normalise the new timespec. */
-	cmp	\nsec, \nsec_to_sec
-	b.lt	9999f
-	sub	\nsec, \nsec, \nsec_to_sec
-	add	\sec, \sec, #1
-9999:
-	cmp	\nsec, #0
-	b.ge	9998f
-	add	\nsec, \nsec, \nsec_to_sec
-	sub	\sec, \sec, #1
-9998:
-	.endm
-
-	.macro clock_gettime_return, shift=0
-	.if \shift == 1
-	lsr	x11, x11, x12
-	.endif
-	stp	x10, x11, [x1, #TSPEC_TV_SEC]
-	mov	x0, xzr
-	ret
-	.endm
-
-	.macro jump_slot jumptable, index, label
-	.if (. - \jumptable) != 4 * (\index)
-	.error "Jump slot index mismatch"
-	.endif
-	b	\label
-	.endm
-
-	.text
-
-/* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */
-ENTRY(__kernel_gettimeofday)
-	.cfi_startproc
-	adr	vdso_data, _vdso_data
-	/* If tv is NULL, skip to the timezone code. */
-	cbz	x0, 2f
-
-	/* Compute the time of day. */
-1:	seqcnt_acquire
-	syscall_check fail=4f
-	ldr	x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
-	/* w11 = cs_mono_mult, w12 = cs_shift */
-	ldp	w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
-	ldp	x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
-	seqcnt_check fail=1b
-
-	get_nsec_per_sec res=x9
-	lsl	x9, x9, x12
-
-	get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
-	get_ts_realtime res_sec=x10, res_nsec=x11, \
-		clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
-
-	/* Convert ns to us. */
-	mov	x13, #1000
-	lsl	x13, x13, x12
-	udiv	x11, x11, x13
-	stp	x10, x11, [x0, #TVAL_TV_SEC]
-2:
-	/* If tz is NULL, return 0. */
-	cbz	x1, 3f
-	ldp	w4, w5, [vdso_data, #VDSO_TZ_MINWEST]
-	stp	w4, w5, [x1, #TZ_MINWEST]
-3:
-	mov	x0, xzr
-	ret
-4:
-	/* Syscall fallback. */
-	mov	x8, #__NR_gettimeofday
-	svc	#0
-	ret
-	.cfi_endproc
-ENDPROC(__kernel_gettimeofday)
-
-#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE
-
-/* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */
-ENTRY(__kernel_clock_gettime)
-	.cfi_startproc
-	cmp	w0, #JUMPSLOT_MAX
-	b.hi	syscall
-	adr	vdso_data, _vdso_data
-	adr	x_tmp, jumptable
-	add	x_tmp, x_tmp, w0, uxtw #2
-	br	x_tmp
-
-	ALIGN
-jumptable:
-	jump_slot jumptable, CLOCK_REALTIME, realtime
-	jump_slot jumptable, CLOCK_MONOTONIC, monotonic
-	b	syscall
-	b	syscall
-	jump_slot jumptable, CLOCK_MONOTONIC_RAW, monotonic_raw
-	jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse
-	jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse
-
-	.if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1)
-	.error	"Wrong jumptable size"
-	.endif
-
-	ALIGN
-realtime:
-	seqcnt_acquire
-	syscall_check fail=syscall
-	ldr	x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
-	/* w11 = cs_mono_mult, w12 = cs_shift */
-	ldp	w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
-	ldp	x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
-	seqcnt_check fail=realtime
-
-	/* All computations are done with left-shifted nsecs. */
-	get_nsec_per_sec res=x9
-	lsl	x9, x9, x12
-
-	get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
-	get_ts_realtime res_sec=x10, res_nsec=x11, \
-		clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
-	clock_gettime_return, shift=1
-
-	ALIGN
-monotonic:
-	seqcnt_acquire
-	syscall_check fail=syscall
-	ldr	x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
-	/* w11 = cs_mono_mult, w12 = cs_shift */
-	ldp	w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
-	ldp	x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
-	ldp	x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC]
-	seqcnt_check fail=monotonic
-
-	/* All computations are done with left-shifted nsecs. */
-	lsl	x4, x4, x12
-	get_nsec_per_sec res=x9
-	lsl	x9, x9, x12
-
-	get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
-	get_ts_realtime res_sec=x10, res_nsec=x11, \
-		clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
-
-	add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9
-	clock_gettime_return, shift=1
-
-	ALIGN
-monotonic_raw:
-	seqcnt_acquire
-	syscall_check fail=syscall
-	ldr	x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
-	/* w11 = cs_raw_mult, w12 = cs_shift */
-	ldp	w12, w11, [vdso_data, #VDSO_CS_SHIFT]
-	ldp	x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC]
-	seqcnt_check fail=monotonic_raw
-
-	/* All computations are done with left-shifted nsecs. */
-	get_nsec_per_sec res=x9
-	lsl	x9, x9, x12
-
-	get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
-	get_ts_clock_raw res_sec=x10, res_nsec=x11, \
-		clock_nsec=x15, nsec_to_sec=x9
-
-	add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
-	clock_gettime_return, shift=1
-
-	ALIGN
-realtime_coarse:
-	seqcnt_acquire
-	ldp	x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
-	seqcnt_check fail=realtime_coarse
-	clock_gettime_return
-
-	ALIGN
-monotonic_coarse:
-	seqcnt_acquire
-	ldp	x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
-	ldp	x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC]
-	seqcnt_check fail=monotonic_coarse
-
-	/* Computations are done in (non-shifted) nsecs. */
-	get_nsec_per_sec res=x9
-	add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
-	clock_gettime_return
-
-	ALIGN
-syscall: /* Syscall fallback. */
-	mov	x8, #__NR_clock_gettime
-	svc	#0
-	ret
-	.cfi_endproc
-ENDPROC(__kernel_clock_gettime)
-
-/* int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); */
-ENTRY(__kernel_clock_getres)
-	.cfi_startproc
-	cmp	w0, #CLOCK_REALTIME
-	ccmp	w0, #CLOCK_MONOTONIC, #0x4, ne
-	ccmp	w0, #CLOCK_MONOTONIC_RAW, #0x4, ne
-	b.ne	1f
-
-	ldr	x2, 5f
-	b	2f
-1:
-	cmp	w0, #CLOCK_REALTIME_COARSE
-	ccmp	w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne
-	b.ne	4f
-	ldr	x2, 6f
-2:
-	cbz	w1, 3f
-	stp	xzr, x2, [x1]
-
-3:	/* res == NULL. */
-	mov	w0, wzr
-	ret
-
-4:	/* Syscall fallback. */
-	mov	x8, #__NR_clock_getres
-	svc	#0
-	ret
-5:
-	.quad	CLOCK_REALTIME_RES
-6:
-	.quad	CLOCK_COARSE_RES
-	.cfi_endproc
-ENDPROC(__kernel_clock_getres)
diff --git a/arch/arm64/kernel/vdso/vgettimeofday.c b/arch/arm64/kernel/vdso/vgettimeofday.c
new file mode 100644
index 000000000000..b73d4011993d
--- /dev/null
+++ b/arch/arm64/kernel/vdso/vgettimeofday.c
@@ -0,0 +1,3 @@
+#include "compiler.h"
+#include "datapage.h"
+#include "../../../../lib/vdso/vgettimeofday.c"
-- 
2.15.0.rc2.357.g7e34df9404-goog

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v4 10/12] arm64: vdso: replace gettimeofday.S with global vgettimeofday.C
  2017-10-31 18:32 [PATCH v4 10/12] arm64: vdso: replace gettimeofday.S with global vgettimeofday.C Mark Salyzyn
@ 2017-11-01 22:14 ` Mark Salyzyn
       [not found]   ` <61EF90BA-9A6D-4DC4-A1F4-BEAA210D708C@redhat.com>
  0 siblings, 1 reply; 6+ messages in thread
From: Mark Salyzyn @ 2017-11-01 22:14 UTC (permalink / raw)
  To: linux-kernel
  Cc: James Morse, Russell King, Catalin Marinas, Will Deacon,
	Andy Lutomirski, Dmitry Safonov, John Stultz, Mark Rutland,
	Laura Abbott, Kees Cook, Ard Biesheuvel, Andy Gross,
	Kevin Brodsky, Andrew Pinski, Thomas Gleixner, linux-arm-kernel,
	Jon Masters

On 10/31/2017 11:32 AM, Mark Salyzyn wrote:
> . . .
> diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
> index 62c84f7cb01b..0925f8908dcb 100644
> --- a/arch/arm64/kernel/vdso/Makefile
> +++ b/arch/arm64/kernel/vdso/Makefile
> @@ -5,18 +5,26 @@
>   # Heavily based on the vDSO Makefiles for other archs.
>   #
>   
> -obj-vdso := gettimeofday.o note.o sigreturn.o
> +obj-vdso := vgettimeofday.o note.o sigreturn.o
>   
>   # Build rules
>   targets := $(obj-vdso) vdso.so vdso.so.dbg
>   obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
>   
> -ccflags-y := -shared -fno-common -fno-builtin
> +ccflags-y := -shared -fno-common -fno-builtin -fno-stack-protector
> +ccflags-y += -DDISABLE_BRANCH_PROFILING
>   ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \
>   		$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
>   
> +# Force -O2 to avoid libgcc dependencies
> +CFLAGS_REMOVE_vgettimeofday.o = -pg -Os
> +CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny
> +
This last stanza breaks clang builds. Suggested fix is:

@@ -18,7 +18,11 @@ ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \

  # Force -O2 to avoid libgcc dependencies
  CFLAGS_REMOVE_vgettimeofday.o = -pg -Os
+ifeq ($(cc-name),clang)
+CFLAGS_vgettimeofday.o = -O2
+else
  CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny
+endif

  # Disable gcov profiling for VDSO code
  GCOV_PROFILE := n

Holding off respin or followup patch until after we get a response from 
Jon Masters <jcm@readhat.com> on their QE tests on this patch series.

-- Mark

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v4 10/12] arm64: vdso: replace gettimeofday.S with global vgettimeofday.C
       [not found]   ` <61EF90BA-9A6D-4DC4-A1F4-BEAA210D708C@redhat.com>
@ 2017-11-21 18:03     ` Jeffrey Bastian
  2017-11-21 21:16       ` Laura Abbott
                         ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Jeffrey Bastian @ 2017-11-21 18:03 UTC (permalink / raw)
  To: Jon Masters
  Cc: Mark Salyzyn, linux-kernel, James Morse, Russell King,
	Catalin Marinas, Will Deacon, Andy Lutomirski, Dmitry Safonov,
	John Stultz, Mark Rutland, Laura Abbott, Kees Cook,
	Ard Biesheuvel, Andy Gross, Kevin Brodsky, Andrew Pinski,
	Thomas Gleixner, linux-arm-kernel

On Thu, Nov 02, 2017 at 12:22:36AM -0400, Jon Masters wrote:
> On Nov 2, 2017, at 06:14, Mark Salyzyn <salyzyn@android.com> wrote:
> > Holding off respin or followup patch until after we get a response
> > from Jon Masters <jcm@readhat.com> on their QE tests on this patch
> > series.
> 
> Copying Jeff Bastian who owns QE. Jeff, can you recall what test was
> failing when we tried the gettimeofday C rewrite VDSO patches?

The LTP clock_getres01 test was failing when we tried the initial
implementation of gettimeofday-in-C.  This would be a good place to
start when testing the new version of the patches.

https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/syscalls/clock_getres/clock_getres01.c


-- 
Jeff Bastian
Kernel QE - Hardware Enablement
Red Hat

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v4 10/12] arm64: vdso: replace gettimeofday.S with global vgettimeofday.C
  2017-11-21 18:03     ` Jeffrey Bastian
@ 2017-11-21 21:16       ` Laura Abbott
  2017-11-21 21:28       ` Mark Salyzyn
  2017-12-15 22:29       ` Jeffrey Bastian
  2 siblings, 0 replies; 6+ messages in thread
From: Laura Abbott @ 2017-11-21 21:16 UTC (permalink / raw)
  To: Jon Masters, Mark Salyzyn, linux-kernel, James Morse,
	Russell King, Catalin Marinas, Will Deacon, Andy Lutomirski,
	Dmitry Safonov, John Stultz, Mark Rutland, Kees Cook,
	Ard Biesheuvel, Andy Gross, Kevin Brodsky, Andrew Pinski,
	Thomas Gleixner, linux-arm-kernel

On 11/21/2017 10:03 AM, Jeffrey Bastian wrote:
> On Thu, Nov 02, 2017 at 12:22:36AM -0400, Jon Masters wrote:
>> On Nov 2, 2017, at 06:14, Mark Salyzyn <salyzyn@android.com> wrote:
>>> Holding off respin or followup patch until after we get a response
>>> from Jon Masters <jcm@readhat.com> on their QE tests on this patch
>>> series.
>>
>> Copying Jeff Bastian who owns QE. Jeff, can you recall what test was
>> failing when we tried the gettimeofday C rewrite VDSO patches?
> 
> The LTP clock_getres01 test was failing when we tried the initial
> implementation of gettimeofday-in-C.  This would be a good place to
> start when testing the new version of the patches.
> 
> https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/syscalls/clock_getres/clock_getres01.c
> 
> 

FWIW, I just tried this on my Mustang (4.14 + these patches) with
both DT and acpi=force and it passed.

Thanks,
Laura

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v4 10/12] arm64: vdso: replace gettimeofday.S with global vgettimeofday.C
  2017-11-21 18:03     ` Jeffrey Bastian
  2017-11-21 21:16       ` Laura Abbott
@ 2017-11-21 21:28       ` Mark Salyzyn
  2017-12-15 22:29       ` Jeffrey Bastian
  2 siblings, 0 replies; 6+ messages in thread
From: Mark Salyzyn @ 2017-11-21 21:28 UTC (permalink / raw)
  To: Jon Masters, linux-kernel, James Morse, Russell King,
	Catalin Marinas, Will Deacon, Andy Lutomirski, Dmitry Safonov,
	John Stultz, Mark Rutland, Laura Abbott, Kees Cook,
	Ard Biesheuvel, Andy Gross, Kevin Brodsky, Andrew Pinski,
	Thomas Gleixner, linux-arm-kernel

On 11/21/2017 10:03 AM, Jeffrey Bastian wrote:
> On Thu, Nov 02, 2017 at 12:22:36AM -0400, Jon Masters wrote:
>> On Nov 2, 2017, at 06:14, Mark Salyzyn <salyzyn@android.com> wrote:
>>> Holding off respin or followup patch until after we get a response
>>> from Jon Masters <jcm@readhat.com> on their QE tests on this patch
>>> series.
>> Copying Jeff Bastian who owns QE. Jeff, can you recall what test was
>> failing when we tried the gettimeofday C rewrite VDSO patches?
> The LTP clock_getres01 test was failing when we tried the initial
> implementation of gettimeofday-in-C.  This would be a good place to
> start when testing the new version of the patches.
>
> https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/syscalls/clock_getres/clock_getres01.c
>
>
Thanks.

I ran the test (32 and 64 bit) on my latest work (but back-ported to 
4.9) and it passes. I did not change __vdso_clock_getres or the fallback 
(syscall) handler in the patch series though, so the failure has me 
wondering. Could you re-run the test with latest to be sure, and I will 
look into setting up to test with a ToT kernel (after the thanksgiving 
break).

Using bionic libc, with advance modifications to utilize 
__vdso_clock_getres; required in order to support the test.

NB: in AOSP _today_ bionic does _not_ propagate the errno correctly on 
the __vdso_clock_gettime vdso path and will understandably fail the 
similar tests. The public rubber stamp of __vdso_clock_gettime code to 
support __vdso_clock_getres will not work. errno propagation issue will 
be fixed in bionic.

-- Mark

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v4 10/12] arm64: vdso: replace gettimeofday.S with global vgettimeofday.C
  2017-11-21 18:03     ` Jeffrey Bastian
  2017-11-21 21:16       ` Laura Abbott
  2017-11-21 21:28       ` Mark Salyzyn
@ 2017-12-15 22:29       ` Jeffrey Bastian
  2 siblings, 0 replies; 6+ messages in thread
From: Jeffrey Bastian @ 2017-12-15 22:29 UTC (permalink / raw)
  To: Jon Masters, Mark Salyzyn, linux-kernel, James Morse,
	Russell King, Catalin Marinas, Will Deacon, Andy Lutomirski,
	Dmitry Safonov, John Stultz, Mark Rutland, Laura Abbott,
	Kees Cook, Ard Biesheuvel, Andy Gross, Kevin Brodsky,
	Andrew Pinski, Thomas Gleixner, linux-arm-kernel

On Tue, Nov 21, 2017 at 13:28:13 PST, Mark Salyzyn wrote:
> I ran the test (32 and 64 bit) on my latest work (but back-ported to
> 4.9) and it passes. I did not change __vdso_clock_getres or the fallback
> (syscall) handler in the patch series though, so the failure has me
> wondering. Could you re-run the test with latest to be sure, and I will
> look into setting up to test with a ToT kernel (after the thanksgiving
> break).


I applied your patch set to a 4.14 kernel and ran LTP clock_getres01
(64-bit) and it still passed:

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[root@localhost ~]# uname -r
4.14.0-vdsogtod.aarch64

[root@localhost ~]# ./ltp-full-20170929/testcases/kernel/syscalls/clock_getres/clock_getres01
tst_test.c:934: INFO: Timeout per run is 0h 05m 00s
clock_getres01.c:79: PASS: clock_getres(REALTIME, ...) succeeded
clock_getres01.c:79: PASS: clock_getres(MONOTONIC, ...) succeeded
clock_getres01.c:79: PASS: clock_getres(PROCESS_CPUTIME_ID, ...) succeeded
clock_getres01.c:79: PASS: clock_getres(THREAD_CPUTIME_ID, ...) succeeded
clock_getres01.c:79: PASS: clock_getres(REALTIME, ...) succeeded
clock_getres01.c:79: PASS: clock_getres(CLOCK_MONOTONIC_RAW, ...) succeeded
clock_getres01.c:79: PASS: clock_getres(CLOCK_REALTIME_COARSE, ...) succeeded
clock_getres01.c:79: PASS: clock_getres(CLOCK_MONOTONIC_COARSE, ...) succeeded
clock_getres01.c:79: PASS: clock_getres(CLOCK_BOOTTIME, ...) succeeded
clock_getres01.c:64: CONF: clock_getres(CLOCK_REALTIME_ALARM, ...) NO SUPPORTED
clock_getres01.c:64: CONF: clock_getres(CLOCK_BOOTTIME_ALARM, ...) NO SUPPORTED
clock_getres01.c:79: PASS: clock_getres(-1, ...) succeeded

Summary:
passed   10
failed   0
skipped  2
warnings 0
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


I also tried another test (see source below) which sleeps for 0.5
seconds and then verifies that approximately 0.5 seconds passed.  With
the original gettimeofday-in-C patches, it would often fail with weird
elapsed times like 0.1 seconds or 1.1 seconds:

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[root@localhost ~]# ./a.out ; echo $?
1: clock_gettime: tv_sec = 1207, tv_nsec = 6403566
Sleeping 0.5 seconds
2: clock_gettime: tv_sec = 1207, tv_nsec = 16512476
Elapsed: 0.10108910
FAIL: elapsed time is not approximately 0.5 seconds
255

[root@localhost ~]# ./a.out ; echo $?
1: clock_gettime: tv_sec = 1215, tv_nsec = 9784545
Sleeping 0.5 seconds
2: clock_gettime: tv_sec = 1216, tv_nsec = 9898595
Elapsed: 1.114050
FAIL: elapsed time is not approximately 0.5 seconds
255

[root@localhost ~]# ./a.out ; echo $?
1: clock_gettime: tv_sec = 1225, tv_nsec = 13710370
Sleeping 0.5 seconds
2: clock_gettime: tv_sec = 1225, tv_nsec = 13819740
Elapsed: 0.109370
FAIL: elapsed time is not approximately 0.5 seconds
255
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


The elapsed time matches the expected 0.5 seconds with the latest patch
version:

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[root@localhost ~]# ./a.out ; echo $?
1: clock_gettime: tv_sec = 2077, tv_nsec = 989623095
Sleeping 0.5 seconds
2: clock_gettime: tv_sec = 2078, tv_nsec = 489728070
Elapsed: 0.500104975
PASS
0
[root@localhost ~]# ./a.out ; echo $?
1: clock_gettime: tv_sec = 2079, tv_nsec = 519625035
Sleeping 0.5 seconds
2: clock_gettime: tv_sec = 2080, tv_nsec = 19726365
Elapsed: 0.500101330
PASS
0
[root@localhost ~]# ./a.out ; echo $?
1: clock_gettime: tv_sec = 2080, tv_nsec = 684894725
Sleeping 0.5 seconds
2: clock_gettime: tv_sec = 2081, tv_nsec = 184994605
Elapsed: 0.500099880
PASS
0
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


Source for the elapsed time test:

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <stdio.h>
#include <time.h>

int main(void)
{
    int ret;
    clockid_t clk_id = CLOCK_MONOTONIC_RAW;
    struct timespec tp1, tp2, elapsed;
    struct timespec req, rem;

    ret = clock_gettime(clk_id, &tp1);
    if (ret) {
        perror("ERROR: clock_gettime:");
    }
    printf("1: clock_gettime: tv_sec = %ld, tv_nsec = %lld\n",
           tp1.tv_sec, tp1.tv_nsec);

    printf("Sleeping 0.5 seconds\n");
    req.tv_sec = 0;
    req.tv_nsec = 500000000;
    ret = nanosleep(&req, &rem);
    if (ret) {
        perror("ERROR: nanosleep:");
        printf("       Remaining sleep: tv_sec = %ld, tv_nsec = %lld\n",
               rem.tv_sec, rem.tv_nsec);
    }

    ret = clock_gettime(clk_id, &tp2);
    if (ret) {
        perror("ERROR: clock_gettime:");
    }
    printf("2: clock_gettime: tv_sec = %ld, tv_nsec = %lld\n",
           tp2.tv_sec, tp2.tv_nsec);

    if ((tp2.tv_nsec - tp1.tv_nsec) < 0) {
        elapsed.tv_sec = tp2.tv_sec - tp1.tv_sec - 1;
        elapsed.tv_nsec = 1000000000 + tp2.tv_nsec - tp1.tv_nsec;
    } else {
        elapsed.tv_sec = tp2.tv_sec - tp1.tv_sec;
        elapsed.tv_nsec = tp2.tv_nsec - tp1.tv_nsec;
    }

    printf("Elapsed: %ld.%lld\n", elapsed.tv_sec, elapsed.tv_nsec);

    if (elapsed.tv_sec != 0 ||
        elapsed.tv_nsec < 490000000 ||
        elapsed.tv_nsec > 510000000) {
        printf("FAIL: elapsed time is not approximately 0.5 seconds\n");
        return -1;
    }

    printf("PASS\n");
    return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


-- 
Jeff Bastian
Kernel QE - Hardware Enablement
Red Hat

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-12-15 22:31 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-31 18:32 [PATCH v4 10/12] arm64: vdso: replace gettimeofday.S with global vgettimeofday.C Mark Salyzyn
2017-11-01 22:14 ` Mark Salyzyn
     [not found]   ` <61EF90BA-9A6D-4DC4-A1F4-BEAA210D708C@redhat.com>
2017-11-21 18:03     ` Jeffrey Bastian
2017-11-21 21:16       ` Laura Abbott
2017-11-21 21:28       ` Mark Salyzyn
2017-12-15 22:29       ` Jeffrey Bastian

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).