linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Dmitry Safonov <dima@arista.com>
To: linux-kernel@vger.kernel.org
Cc: Dmitry Safonov <0x7f454c46@gmail.com>,
	Andrei Vagin <avagin@openvz.org>,
	Dmitry Safonov <dima@arista.com>, Adrian Reber <adrian@lisas.de>,
	Andy Lutomirski <luto@kernel.org>,
	Christian Brauner <christian.brauner@ubuntu.com>,
	Cyrill Gorcunov <gorcunov@openvz.org>,
	"Eric W. Biederman" <ebiederm@xmission.com>,
	"H. Peter Anvin" <hpa@zytor.com>, Ingo Molnar <mingo@redhat.com>,
	Jeff Dike <jdike@addtoit.com>, Oleg Nesterov <oleg@redhat.com>,
	Pavel Emelyanov <xemul@virtuozzo.com>,
	Shuah Khan <shuah@kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	containers@lists.linux-foundation.org, criu@openvz.org,
	linux-api@vger.kernel.org, x86@kernel.org
Subject: [RFC 09/20] x86/vdso/timens: Add offsets page in vvar
Date: Wed, 19 Sep 2018 21:50:26 +0100	[thread overview]
Message-ID: <20180919205037.9574-10-dima@arista.com> (raw)
In-Reply-To: <20180919205037.9574-1-dima@arista.com>

From: Andrei Vagin <avagin@openvz.org>

As modern applications fetch time from vdso without entering the kernel,
it's needed to provide offsets for userspace code.

Allocate a page for timens offsets when constructing time namespace.
As vdso mappings are platform-specific, add Kconfig dependency for arch.

Signed-off-by: Andrei Vagin <avagin@openvz.org>
Co-developed-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
 arch/Kconfig                          |  5 +++++
 arch/x86/Kconfig                      |  1 +
 arch/x86/entry/vdso/vclock_gettime.c  | 26 ++++++++++++++++++++++++++
 arch/x86/entry/vdso/vdso-layout.lds.S |  9 ++++++++-
 arch/x86/entry/vdso/vdso2c.c          |  3 +++
 arch/x86/entry/vdso/vma.c             | 12 ++++++++++++
 arch/x86/include/asm/vdso.h           |  1 +
 init/Kconfig                          |  1 +
 8 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 6801123932a5..411df0227a1d 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -681,6 +681,11 @@ config HAVE_ARCH_HASH
 config ISA_BUS_API
 	def_bool ISA
 
+config ARCH_HAS_VDSO_TIME_NS
+	bool
+	help
+	 VDSO can add time-ns offsets without entering kernel.
+
 #
 # ABI hall of shame
 #
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1a0be022f91d..4bcbdd1f1200 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -70,6 +70,7 @@ config X86
 	select ARCH_HAS_STRICT_MODULE_RWX
 	select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
+	select ARCH_HAS_VDSO_TIME_NS
 	select ARCH_HAS_ZONE_DEVICE		if X86_64
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_MIGHT_HAVE_ACPI_PDC		if ACPI
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index f19856d95c60..0594266740b9 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -21,6 +21,7 @@
 #include <linux/math64.h>
 #include <linux/time.h>
 #include <linux/kernel.h>
+#include <linux/timens_offsets.h>
 
 #define gtod (&VVAR(vsyscall_gtod_data))
 
@@ -38,6 +39,11 @@ extern u8 hvclock_page
 	__attribute__((visibility("hidden")));
 #endif
 
+#ifdef CONFIG_TIME_NS
+extern u8 timens_page
+	__attribute__((visibility("hidden")));
+#endif
+
 #ifndef BUILD_VDSO32
 
 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
@@ -225,6 +231,23 @@ notrace static int __always_inline do_realtime(struct timespec *ts)
 	return mode;
 }
 
+notrace static __always_inline void monotonic_to_ns(struct timespec *ts)
+{
+#ifdef CONFIG_TIME_NS
+	struct timens_offsets *timens = (struct timens_offsets *) &timens_page;
+
+	ts->tv_sec += timens->monotonic_time_offset.tv_sec;
+	ts->tv_nsec += timens->monotonic_time_offset.tv_nsec;
+	if (ts->tv_nsec > NSEC_PER_SEC) {
+		ts->tv_nsec -= NSEC_PER_SEC;
+		ts->tv_sec++;
+	} else if (ts->tv_nsec < 0) {
+		ts->tv_nsec += NSEC_PER_SEC;
+		ts->tv_sec--;
+	}
+#endif
+}
+
 notrace static int __always_inline do_monotonic(struct timespec *ts)
 {
 	unsigned long seq;
@@ -243,6 +266,8 @@ notrace static int __always_inline do_monotonic(struct timespec *ts)
 	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
 	ts->tv_nsec = ns;
 
+	monotonic_to_ns(ts);
+
 	return mode;
 }
 
@@ -264,6 +289,7 @@ notrace static void do_monotonic_coarse(struct timespec *ts)
 		ts->tv_sec = gtod->monotonic_time_coarse_sec;
 		ts->tv_nsec = gtod->monotonic_time_coarse_nsec;
 	} while (unlikely(gtod_read_retry(gtod, seq)));
+	monotonic_to_ns(ts);
 }
 
 notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S
index acfd5ba7d943..e5c2e9deca03 100644
--- a/arch/x86/entry/vdso/vdso-layout.lds.S
+++ b/arch/x86/entry/vdso/vdso-layout.lds.S
@@ -17,6 +17,12 @@
 
 #define NUM_FAKE_SHDRS 13
 
+#ifdef CONFIG_TIME_NS
+# define TIMENS_SZ	PAGE_SIZE
+#else
+# define TIMENS_SZ	0
+#endif
+
 SECTIONS
 {
 	/*
@@ -26,7 +32,7 @@ SECTIONS
 	 * segment.
 	 */
 
-	vvar_start = . - 3 * PAGE_SIZE;
+	vvar_start = . - (3 * PAGE_SIZE + TIMENS_SZ);
 	vvar_page = vvar_start;
 
 	/* Place all vvars at the offsets in asm/vvar.h. */
@@ -38,6 +44,7 @@ SECTIONS
 
 	pvclock_page = vvar_start + PAGE_SIZE;
 	hvclock_page = vvar_start + 2 * PAGE_SIZE;
+	timens_page = vvar_start + 3 * PAGE_SIZE;
 
 	. = SIZEOF_HEADERS;
 
diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index 4674f58581a1..6c67cde7fe99 100644
--- a/arch/x86/entry/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
@@ -76,6 +76,7 @@ enum {
 	sym_hpet_page,
 	sym_pvclock_page,
 	sym_hvclock_page,
+	sym_timens_page,
 	sym_VDSO_FAKE_SECTION_TABLE_START,
 	sym_VDSO_FAKE_SECTION_TABLE_END,
 };
@@ -85,6 +86,7 @@ const int special_pages[] = {
 	sym_hpet_page,
 	sym_pvclock_page,
 	sym_hvclock_page,
+	sym_timens_page,
 };
 
 struct vdso_sym {
@@ -98,6 +100,7 @@ struct vdso_sym required_syms[] = {
 	[sym_hpet_page] = {"hpet_page", true},
 	[sym_pvclock_page] = {"pvclock_page", true},
 	[sym_hvclock_page] = {"hvclock_page", true},
+	[sym_timens_page] = {"timens_page", true},
 	[sym_VDSO_FAKE_SECTION_TABLE_START] = {
 		"VDSO_FAKE_SECTION_TABLE_START", false
 	},
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 8cc0395687b0..0f92227a4a7e 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -14,6 +14,7 @@
 #include <linux/elf.h>
 #include <linux/cpu.h>
 #include <linux/ptrace.h>
+#include <linux/time_namespace.h>
 #include <asm/pvclock.h>
 #include <asm/vgtod.h>
 #include <asm/proto.h>
@@ -23,6 +24,7 @@
 #include <asm/desc.h>
 #include <asm/cpufeature.h>
 #include <asm/mshyperv.h>
+#include <asm/page.h>
 
 #if defined(CONFIG_X86_64)
 unsigned int __read_mostly vdso64_enabled = 1;
@@ -138,6 +140,16 @@ static int vvar_fault(const struct vm_special_mapping *sm,
 		if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK))
 			ret = vm_insert_pfn(vma, vmf->address,
 					    vmalloc_to_pfn(tsc_pg));
+	} else if (sym_offset == image->sym_timens_page) {
+		struct time_namespace *ns = current->nsproxy->time_ns;
+		unsigned long pfn;
+
+		if (!ns->offsets)
+			pfn = page_to_pfn(ZERO_PAGE(0));
+		else
+			pfn = page_to_pfn(virt_to_page(ns->offsets));
+
+		ret = vm_insert_pfn(vma, vmf->address, pfn);
 	}
 
 	if (ret == 0 || ret == -EBUSY)
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 27566e57e87d..619322065b8e 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -22,6 +22,7 @@ struct vdso_image {
 	long sym_hpet_page;
 	long sym_pvclock_page;
 	long sym_hvclock_page;
+	long sym_timens_page;
 	long sym_VDSO32_NOTE_MASK;
 	long sym___kernel_sigreturn;
 	long sym___kernel_rt_sigreturn;
diff --git a/init/Kconfig b/init/Kconfig
index dc2b40f7d73f..c9b250475ddb 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -929,6 +929,7 @@ config UTS_NS
 
 config TIME_NS
 	bool "TIME namespace"
+	depends on ARCH_HAS_VDSO_TIME_NS
 	default y
 	help
 	  In this namespace boottime and monotonic clocks can be set.
-- 
2.13.6


  parent reply	other threads:[~2018-09-19 20:50 UTC|newest]

Thread overview: 59+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-19 20:50 [RFC 00/20] ns: Introduce Time Namespace Dmitry Safonov
2018-09-19 20:50 ` [RFC 01/20] " Dmitry Safonov
2018-09-28 18:20   ` Laurent Vivier
2018-09-19 20:50 ` [RFC 02/20] timens: Add timens_offsets Dmitry Safonov
2018-09-20 18:45   ` Cyrill Gorcunov
2018-09-20 22:14     ` Cyrill Gorcunov
2018-09-19 20:50 ` [RFC 03/20] timens: Introduce CLOCK_MONOTONIC offsets Dmitry Safonov
2018-09-19 20:50 ` [RFC 04/20] timens: Introduce CLOCK_BOOTTIME offset Dmitry Safonov
2018-09-30  3:18   ` [LKP] [timens] 3cc8de9dcb: RIP:posix_get_boottime kernel test robot
2018-09-19 20:50 ` [RFC 05/20] timerfd/timens: Take into account ns clock offsets Dmitry Safonov
2018-09-19 20:50 ` [RFC 06/20] kernel: Take into account timens clock offsets in clock_nanosleep Dmitry Safonov
2018-09-19 20:50 ` [RFC 07/20] timens: Shift /proc/uptime Dmitry Safonov
2018-09-19 20:50 ` [RFC 08/20] x86/vdso: Restrict splitting vvar vma Dmitry Safonov
2018-09-19 20:50 ` Dmitry Safonov [this message]
2018-09-19 20:50 ` [RFC 10/20] x86/vdso: Use set_normalized_timespec() to avoid 32 bit overflow Dmitry Safonov
2018-09-19 20:50 ` [RFC 11/20] x86/vdso: Purge timens page on setns()/unshare()/clone() Dmitry Safonov
2018-09-19 20:50 ` [RFC 12/20] x86/vdso: Look for vvar vma to purge timens page Dmitry Safonov
2018-09-19 20:50 ` [RFC 13/20] posix-timers/timens: Take into account clock offsets Dmitry Safonov
2018-09-30  3:11   ` [LKP] [posix] 25217c6e39: BUG:KASAN:null-ptr-deref_in_c kernel test robot
2018-09-19 20:50 ` [RFC 14/20] timens: Add align for timens_offsets Dmitry Safonov
2018-09-19 20:50 ` [RFC 15/20] timens: Optimize zero-offsets Dmitry Safonov
2018-09-19 20:50 ` [RFC 16/20] selftest: Add Time Namespace test for supported clocks Dmitry Safonov
2018-09-24 21:36   ` Shuah Khan
2018-09-19 20:50 ` [RFC 17/20] selftest/timens: Add test for timerfd Dmitry Safonov
2018-09-19 20:50 ` [RFC 18/20] selftest/timens: Add test for clock_nanosleep Dmitry Safonov
2018-09-19 20:50 ` [RFC 19/20] timens/selftest: Add procfs selftest Dmitry Safonov
2018-09-19 20:50 ` [RFC 20/20] timens/selftest: Add timer offsets test Dmitry Safonov
2018-09-21 12:27 ` [RFC 00/20] ns: Introduce Time Namespace Eric W. Biederman
2018-09-24 20:51   ` Andrey Vagin
2018-09-24 22:02     ` Eric W. Biederman
2018-09-25  1:42       ` Andrey Vagin
2018-09-26 17:36         ` Eric W. Biederman
2018-09-26 17:59           ` Dmitry Safonov
2018-09-27 21:30           ` Thomas Gleixner
2018-09-27 21:41             ` Thomas Gleixner
2018-10-01 23:20               ` Andrey Vagin
2018-10-02  6:15                 ` Thomas Gleixner
2018-10-02 21:05                   ` Dmitry Safonov
2018-10-02 21:26                     ` Thomas Gleixner
2018-09-28 17:03             ` Eric W. Biederman
2018-09-28 19:32               ` Thomas Gleixner
2018-10-01  9:05                 ` Eric W. Biederman
2018-10-01  9:15                 ` Setting monotonic time? Eric W. Biederman
2018-10-01 18:52                   ` Thomas Gleixner
2018-10-02 20:00                     ` Arnd Bergmann
2018-10-02 20:06                       ` Thomas Gleixner
2018-10-03  4:50                         ` Eric W. Biederman
2018-10-03  5:25                           ` Thomas Gleixner
2018-10-03  6:14                             ` Eric W. Biederman
2018-10-03  7:02                               ` Arnd Bergmann
2018-10-03  6:14                             ` Thomas Gleixner
2018-10-01 20:51                   ` Andrey Vagin
2018-10-02  6:16                     ` Thomas Gleixner
2018-10-21  1:41               ` [RFC 00/20] ns: Introduce Time Namespace Andrei Vagin
2018-10-21  3:54                 ` Andrei Vagin
2018-10-29 20:33                 ` Thomas Gleixner
2018-10-29 21:21                   ` Eric W. Biederman
2018-10-29 21:36                     ` Thomas Gleixner
2018-10-31 16:26                   ` Andrei Vagin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180919205037.9574-10-dima@arista.com \
    --to=dima@arista.com \
    --cc=0x7f454c46@gmail.com \
    --cc=adrian@lisas.de \
    --cc=avagin@openvz.org \
    --cc=christian.brauner@ubuntu.com \
    --cc=containers@lists.linux-foundation.org \
    --cc=criu@openvz.org \
    --cc=ebiederm@xmission.com \
    --cc=gorcunov@openvz.org \
    --cc=hpa@zytor.com \
    --cc=jdike@addtoit.com \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=oleg@redhat.com \
    --cc=shuah@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    --cc=xemul@virtuozzo.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).