linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Dmitry Safonov <dima@arista.com>
To: linux-kernel@vger.kernel.org
Cc: Andrei Vagin <avagin@openvz.org>,
	Dmitry Safonov <dima@arista.com>, Adrian Reber <adrian@lisas.de>,
	Andy Lutomirski <luto@kernel.org>, Arnd Bergmann <arnd@arndb.de>,
	Christian Brauner <christian.brauner@ubuntu.com>,
	Cyrill Gorcunov <gorcunov@openvz.org>,
	Dmitry Safonov <0x7f454c46@gmail.com>,
	"Eric W. Biederman" <ebiederm@xmission.com>,
	"H. Peter Anvin" <hpa@zytor.com>, Ingo Molnar <mingo@redhat.com>,
	Jann Horn <jannh@google.com>, Jeff Dike <jdike@addtoit.com>,
	Oleg Nesterov <oleg@redhat.com>,
	Pavel Emelyanov <xemul@virtuozzo.com>,
	Shuah Khan <shuah@kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Vincenzo Frascino <vincenzo.frascino@arm.com>,
	containers@lists.linux-foundation.org, criu@openvz.org,
	linux-api@vger.kernel.org, x86@kernel.org,
	Andrei Vagin <avagin@gmail.com>
Subject: [PATCHv4 15/28] x86/vdso: Add offsets page in vvar
Date: Wed, 12 Jun 2019 20:26:14 +0100	[thread overview]
Message-ID: <20190612192628.23797-16-dima@arista.com> (raw)
In-Reply-To: <20190612192628.23797-1-dima@arista.com>

From: Andrei Vagin <avagin@openvz.org>

As modern applications fetch time from VDSO without entering the kernel,
it's needed to provide offsets for userspace code inside time namespace.

A page for timens offsets is allocated on time namespace construction.
Put that page into VVAR for tasks inside timens and zero page for
host processes.

As VDSO code is already optimized as much as possible in terms of speed,
any new if-condition in VDSO code is undesirable; the goal is to provide
two .so(s), as was originally suggested by Andy and Thomas:
- for host tasks with optimized-out clk_to_ns() without any penalty
- for processes inside timens with clk_to_ns()
For this purpose, define clk_to_ns() under CONFIG_TIME_NS.

To eliminate any performance regression, clk_to_ns() will be called
under static_branch with follow-up patches, that adds support for
patching vdso.

VDSO mappings are platform-specific, add Kconfig dependency for arch.

Signed-off-by: Andrei Vagin <avagin@gmail.com>
Co-developed-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
 arch/Kconfig                          |  5 ++++
 arch/x86/Kconfig                      |  1 +
 arch/x86/entry/vdso/vclock_gettime.c  | 43 +++++++++++++++++++++++++++
 arch/x86/entry/vdso/vdso-layout.lds.S |  9 +++++-
 arch/x86/entry/vdso/vdso2c.c          |  3 ++
 arch/x86/entry/vdso/vma.c             | 12 ++++++++
 arch/x86/include/asm/vdso.h           |  1 +
 init/Kconfig                          |  1 +
 8 files changed, 74 insertions(+), 1 deletion(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index c47b328eada0..503a4113dc6c 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -734,6 +734,11 @@ config HAVE_ARCH_NVRAM_OPS
 config ISA_BUS_API
 	def_bool ISA
 
+config ARCH_HAS_VDSO_TIME_NS
+	bool
+	help
+	 VDSO can add time-ns offsets without entering kernel.
+
 #
 # ABI hall of shame
 #
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2bbbd4d1ba31..da70b320eb09 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -79,6 +79,7 @@ config X86
 	select ARCH_HAS_STRICT_MODULE_RWX
 	select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
+	select ARCH_HAS_VDSO_TIME_NS
 	select ARCH_HAS_ZONE_DEVICE		if X86_64
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_MIGHT_HAVE_ACPI_PDC		if ACPI
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 0f82a70c7682..e2d93628c0dd 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -21,6 +21,7 @@
 #include <linux/math64.h>
 #include <linux/time.h>
 #include <linux/kernel.h>
+#include <linux/timens_offsets.h>
 
 #define gtod (&VVAR(vsyscall_gtod_data))
 
@@ -38,6 +39,11 @@ extern u8 hvclock_page[PAGE_SIZE]
 	__attribute__((visibility("hidden")));
 #endif
 
+#ifdef CONFIG_TIME_NS
+extern u8 timens_page
+	__attribute__((visibility("hidden")));
+#endif
+
 #ifndef BUILD_VDSO32
 
 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
@@ -139,6 +145,39 @@ notrace static inline u64 vgetcyc(int mode)
 	return U64_MAX;
 }
 
+#ifdef CONFIG_TIME_NS
+notrace static __always_inline void clk_to_ns(clockid_t clk, struct timespec *ts)
+{
+	struct timens_offsets *timens = (struct timens_offsets *) &timens_page;
+	struct timespec64 *offset64;
+
+	switch (clk) {
+	case CLOCK_MONOTONIC:
+	case CLOCK_MONOTONIC_COARSE:
+	case CLOCK_MONOTONIC_RAW:
+		offset64 = &timens->monotonic;
+		break;
+	case CLOCK_BOOTTIME:
+		offset64 = &timens->boottime;
+	default:
+		return;
+	}
+
+	ts->tv_nsec += offset64->tv_nsec;
+	ts->tv_sec += offset64->tv_sec;
+	if (ts->tv_nsec >= NSEC_PER_SEC) {
+		ts->tv_nsec -= NSEC_PER_SEC;
+		ts->tv_sec++;
+	}
+	if (ts->tv_nsec < 0) {
+		ts->tv_nsec += NSEC_PER_SEC;
+		ts->tv_sec--;
+	}
+}
+#else
+notrace static __always_inline void clk_to_ns(clockid_t clk, struct timespec *ts) {}
+#endif
+
 notrace static int do_hres(clockid_t clk, struct timespec *ts)
 {
 	struct vgtod_ts *base = &gtod->basetime[clk];
@@ -165,6 +204,8 @@ notrace static int do_hres(clockid_t clk, struct timespec *ts)
 	ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
 	ts->tv_nsec = ns;
 
+	clk_to_ns(clk, ts);
+
 	return 0;
 }
 
@@ -178,6 +219,8 @@ notrace static void do_coarse(clockid_t clk, struct timespec *ts)
 		ts->tv_sec = base->sec;
 		ts->tv_nsec = base->nsec;
 	} while (unlikely(gtod_read_retry(gtod, seq)));
+
+	clk_to_ns(clk, ts);
 }
 
 notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S
index 93c6dc7812d0..ba216527e59f 100644
--- a/arch/x86/entry/vdso/vdso-layout.lds.S
+++ b/arch/x86/entry/vdso/vdso-layout.lds.S
@@ -7,6 +7,12 @@
  * This script controls its layout.
  */
 
+#ifdef CONFIG_TIME_NS
+# define TIMENS_SZ	PAGE_SIZE
+#else
+# define TIMENS_SZ	0
+#endif
+
 SECTIONS
 {
 	/*
@@ -16,7 +22,7 @@ SECTIONS
 	 * segment.
 	 */
 
-	vvar_start = . - 3 * PAGE_SIZE;
+	vvar_start = . - (3 * PAGE_SIZE + TIMENS_SZ);
 	vvar_page = vvar_start;
 
 	/* Place all vvars at the offsets in asm/vvar.h. */
@@ -28,6 +34,7 @@ SECTIONS
 
 	pvclock_page = vvar_start + PAGE_SIZE;
 	hvclock_page = vvar_start + 2 * PAGE_SIZE;
+	timens_page = vvar_start + 3 * PAGE_SIZE;
 
 	. = SIZEOF_HEADERS;
 
diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index ce67370d14e5..7380908045c7 100644
--- a/arch/x86/entry/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
@@ -75,12 +75,14 @@ enum {
 	sym_vvar_page,
 	sym_pvclock_page,
 	sym_hvclock_page,
+	sym_timens_page,
 };
 
 const int special_pages[] = {
 	sym_vvar_page,
 	sym_pvclock_page,
 	sym_hvclock_page,
+	sym_timens_page,
 };
 
 struct vdso_sym {
@@ -93,6 +95,7 @@ struct vdso_sym required_syms[] = {
 	[sym_vvar_page] = {"vvar_page", true},
 	[sym_pvclock_page] = {"pvclock_page", true},
 	[sym_hvclock_page] = {"hvclock_page", true},
+	[sym_timens_page] = {"timens_page", true},
 	{"VDSO32_NOTE_MASK", true},
 	{"__kernel_vsyscall", true},
 	{"__kernel_sigreturn", true},
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index c30a33b2963b..8a7f4cfe1fad 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -14,6 +14,7 @@
 #include <linux/elf.h>
 #include <linux/cpu.h>
 #include <linux/ptrace.h>
+#include <linux/time_namespace.h>
 #include <asm/pvclock.h>
 #include <asm/vgtod.h>
 #include <asm/proto.h>
@@ -23,6 +24,7 @@
 #include <asm/desc.h>
 #include <asm/cpufeature.h>
 #include <asm/mshyperv.h>
+#include <asm/page.h>
 
 #if defined(CONFIG_X86_64)
 unsigned int __read_mostly vdso64_enabled = 1;
@@ -135,6 +137,16 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
 		if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK))
 			return vmf_insert_pfn(vma, vmf->address,
 					vmalloc_to_pfn(tsc_pg));
+	} else if (sym_offset == image->sym_timens_page) {
+		struct time_namespace *ns = current->nsproxy->time_ns;
+		unsigned long pfn;
+
+		if (!ns->offsets)
+			pfn = page_to_pfn(ZERO_PAGE(0));
+		else
+			pfn = page_to_pfn(virt_to_page(ns->offsets));
+
+		return vmf_insert_pfn(vma, vmf->address, pfn);
 	}
 
 	return VM_FAULT_SIGBUS;
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index dffdc12cc7d6..9d420c545607 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -21,6 +21,7 @@ struct vdso_image {
 	long sym_vvar_page;
 	long sym_pvclock_page;
 	long sym_hvclock_page;
+	long sym_timens_page;
 	long sym_VDSO32_NOTE_MASK;
 	long sym___kernel_sigreturn;
 	long sym___kernel_rt_sigreturn;
diff --git a/init/Kconfig b/init/Kconfig
index 098fe185360c..3d9497241394 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -993,6 +993,7 @@ config UTS_NS
 
 config TIME_NS
 	bool "TIME namespace"
+	depends on ARCH_HAS_VDSO_TIME_NS
 	default y
 	help
 	  In this namespace boottime and monotonic clocks can be set.
-- 
2.22.0


  parent reply	other threads:[~2019-06-12 19:28 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-12 19:25 [PATCHv4 00/28] kernel: Introduce Time Namespace Dmitry Safonov
2019-06-12 19:26 ` [PATCHv4 01/28] ns: " Dmitry Safonov
2019-06-12 19:26 ` [PATCHv4 02/28] timens: Add timens_offsets Dmitry Safonov
2019-06-14 13:11   ` Thomas Gleixner
2019-06-14 14:32     ` Dmitry Safonov
2019-07-29 22:26     ` Dmitry Safonov
2019-06-12 19:26 ` [PATCHv4 03/28] posix-clocks: add another call back to return clock time in ktime_t Dmitry Safonov
2019-06-14 13:32   ` Thomas Gleixner
2019-06-14 14:39     ` Dmitry Safonov
2019-06-12 19:26 ` [PATCHv4 04/28] timens: Introduce CLOCK_MONOTONIC offsets Dmitry Safonov
2019-06-12 19:26 ` [PATCHv4 05/28] timens: Introduce CLOCK_BOOTTIME offset Dmitry Safonov
2019-06-12 19:26 ` [PATCHv4 06/28] timerfd/timens: Take into account ns clock offsets Dmitry Safonov
2019-06-14 13:37   ` Thomas Gleixner
2019-06-16 17:43     ` Dmitry Safonov
2019-06-12 19:26 ` [PATCHv4 07/28] posix-timers/timens: Take into account " Dmitry Safonov
2019-06-14 13:42   ` Thomas Gleixner
2019-06-16 17:45     ` Dmitry Safonov
2019-06-12 19:26 ` [PATCHv4 08/28] timens/kernel: Take into account timens clock offsets in clock_nanosleep Dmitry Safonov
2019-06-14 13:49   ` Thomas Gleixner
2019-06-12 19:26 ` [PATCHv4 09/28] timens: Shift /proc/uptime Dmitry Safonov
2019-06-14 13:50   ` Thomas Gleixner
2019-06-16 17:48     ` Dmitry Safonov
2019-06-12 19:26 ` [PATCHv4 10/28] x86/vdso2c: Correct err messages on file opening Dmitry Safonov
2019-06-12 19:26 ` [PATCHv4 11/28] x86/vdso2c: Convert iterator to unsigned Dmitry Safonov
2019-06-12 19:26 ` [PATCHv4 12/28] x86/vdso/Makefile: Add vobjs32 Dmitry Safonov
2019-06-12 19:26 ` [PATCHv4 13/28] x86/vdso: Restrict splitting VVAR VMA Dmitry Safonov
2019-06-12 19:26 ` [PATCHv4 14/28] x86/vdso: Rename vdso_image {.data=>.text} Dmitry Safonov
2019-06-12 19:26 ` Dmitry Safonov [this message]
2019-06-14 13:58   ` [PATCHv4 15/28] x86/vdso: Add offsets page in vvar Thomas Gleixner
2019-06-16 17:49     ` Dmitry Safonov
2019-06-12 19:26 ` [PATCHv4 16/28] x86/vdso: Allocate timens vdso Dmitry Safonov
2019-06-12 19:26 ` [PATCHv4 17/28] x86/vdso: Switch image on setns()/unshare()/clone() Dmitry Safonov
2019-06-14 14:05   ` Thomas Gleixner
2019-06-16 17:51     ` Dmitry Safonov
2019-06-12 19:26 ` [PATCHv4 18/28] vdso: introduce timens_static_branch Dmitry Safonov
2019-06-12 19:26 ` [PATCHv4 19/28] timens: Add align for timens_offsets Dmitry Safonov
2019-06-12 19:26 ` [PATCHv4 20/28] timens/fs/proc: Introduce /proc/pid/timens_offsets Dmitry Safonov
2019-06-12 19:26 ` [PATCHv4 21/28] selftest/timens: Add Time Namespace test for supported clocks Dmitry Safonov
2019-06-12 19:26 ` [PATCHv4 22/28] selftest/timens: Add a test for timerfd Dmitry Safonov
2019-06-12 19:26 ` [PATCHv4 23/28] selftest/timens: Add a test for clock_nanosleep() Dmitry Safonov
2019-06-12 19:26 ` [PATCHv4 24/28] selftest/timens: Add procfs selftest Dmitry Safonov
2019-06-12 19:26 ` [PATCHv4 25/28] selftest/timens: Add timer offsets test Dmitry Safonov
2019-06-12 19:26 ` [PATCHv4 26/28] x86/vdso: Align VDSO functions by CPU L1 cache line Dmitry Safonov
2019-06-14 14:13   ` Thomas Gleixner
2019-06-23  5:26     ` Andrei Vagin
2019-06-12 19:26 ` [PATCHv4 27/28] selftests: Add a simple perf test for clock_gettime() Dmitry Safonov
2019-06-12 19:26 ` [PATCHv4 28/28] selftest/timens: Check that a right vdso is mapped after fork and exec Dmitry Safonov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190612192628.23797-16-dima@arista.com \
    --to=dima@arista.com \
    --cc=0x7f454c46@gmail.com \
    --cc=adrian@lisas.de \
    --cc=arnd@arndb.de \
    --cc=avagin@gmail.com \
    --cc=avagin@openvz.org \
    --cc=christian.brauner@ubuntu.com \
    --cc=containers@lists.linux-foundation.org \
    --cc=criu@openvz.org \
    --cc=ebiederm@xmission.com \
    --cc=gorcunov@openvz.org \
    --cc=hpa@zytor.com \
    --cc=jannh@google.com \
    --cc=jdike@addtoit.com \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=oleg@redhat.com \
    --cc=shuah@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=vincenzo.frascino@arm.com \
    --cc=x86@kernel.org \
    --cc=xemul@virtuozzo.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).