Linux-api Archive on lore.kernel.org
 help / color / Atom feed
From: Dmitry Safonov <dima@arista.com>
To: linux-kernel@vger.kernel.org
Cc: Dmitry Safonov <0x7f454c46@gmail.com>,
	Dmitry Safonov <dima@arista.com>, Adrian Reber <adrian@lisas.de>,
	Andrei Vagin <avagin@openvz.org>,
	Andy Lutomirski <luto@kernel.org>, Arnd Bergmann <arnd@arndb.de>,
	Christian Brauner <christian.brauner@ubuntu.com>,
	Cyrill Gorcunov <gorcunov@openvz.org>,
	"Eric W. Biederman" <ebiederm@xmission.com>,
	"H. Peter Anvin" <hpa@zytor.com>, Ingo Molnar <mingo@redhat.com>,
	Jann Horn <jannh@google.com>, Jeff Dike <jdike@addtoit.com>,
	Oleg Nesterov <oleg@redhat.com>,
	Pavel Emelyanov <xemul@virtuozzo.com>,
	Shuah Khan <shuah@kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Vincenzo Frascino <vincenzo.frascino@arm.com>,
	containers@lists.linux-foundation.org, criu@openvz.org,
	linux-api@vger.kernel.org, x86@kernel.org
Subject: [PATCHv8 22/34] x86/vdso: Add timens page
Date: Tue, 12 Nov 2019 01:27:11 +0000
Message-ID: <20191112012724.250792-23-dima@arista.com> (raw)
In-Reply-To: <20191112012724.250792-1-dima@arista.com>

To support time namespaces in the VDSO with a minimal impact on regular non
time namespace affected tasks, the namespace handling needs to be hidden in
a slow path.

The most obvious place is vdso_seq_begin(). If a task belongs to a time
namespace then the VVAR page which contains the system wide VDSO data is
replaced with a namespace specific page which has the same layout as the
VVAR page. That page has vdso_data->seq set to 1 to enforce the slow path
and vdso_data->clock_mode set to VCLOCK_TIMENS to enforce the time
namespace handling path.

The extra check in the case that vdso_data->seq is odd, e.g. a concurrent
update of the VDSO data is in progress, is not really affecting regular
tasks which are not part of a time namespace as the task is spin waiting
for the update to finish and vdso_data->seq to become even again.

If a time namespace task hits that code path, it invokes the corresponding
time getter function which retrieves the real VVAR page, reads host time
and then adds the offset for the requested clock which is stored in the
special VVAR page.

Allocate Timens page among VVAR pages and place vdso_data on it.
Provide __arch_get_timens_vdso_data() helper for VDSO code to get the
code-relative position of VVARs on that special page.

Co-developed-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
 arch/x86/Kconfig                         |  1 +
 arch/x86/entry/vdso/vdso-layout.lds.S    | 11 +++++++++--
 arch/x86/entry/vdso/vdso2c.c             |  3 +++
 arch/x86/include/asm/vdso.h              |  1 +
 arch/x86/include/asm/vdso/gettimeofday.h |  9 +++++++++
 arch/x86/include/asm/vvar.h              |  5 ++++-
 6 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2329f9e5b57c..58dacb97545f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -126,6 +126,7 @@ config X86
 	select GENERIC_STRNLEN_USER
 	select GENERIC_TIME_VSYSCALL
 	select GENERIC_GETTIMEOFDAY
+	select GENERIC_VDSO_TIME_NS
 	select GUP_GET_PTE_LOW_HIGH		if X86_PAE
 	select HARDLOCKUP_CHECK_TIMESTAMP	if X86_64
 	select HAVE_ACPI_APEI			if ACPI
diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S
index 2330daad67c3..ea7e0155c604 100644
--- a/arch/x86/entry/vdso/vdso-layout.lds.S
+++ b/arch/x86/entry/vdso/vdso-layout.lds.S
@@ -16,8 +16,8 @@ SECTIONS
 	 * segment.
 	 */
 
-	vvar_start = . - 3 * PAGE_SIZE;
-	vvar_page = vvar_start;
+	vvar_start = . - 4 * PAGE_SIZE;
+	vvar_page  = vvar_start;
 
 	/* Place all vvars at the offsets in asm/vvar.h. */
 #define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset;
@@ -26,6 +26,13 @@ SECTIONS
 
 	pvclock_page = vvar_start + PAGE_SIZE;
 	hvclock_page = vvar_start + 2 * PAGE_SIZE;
+	timens_page  = vvar_start + 3 * PAGE_SIZE;
+
+#undef _ASM_X86_VVAR_H
+	/* Place all vvars in timens too at the offsets in asm/vvar.h. */
+#define EMIT_VVAR(name, offset) timens_ ## name = timens_page + offset;
+#include <asm/vvar.h>
+#undef EMIT_VVAR
 
 	. = SIZEOF_HEADERS;
 
diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index 3a4d8d4d39f8..3842873b3ae3 100644
--- a/arch/x86/entry/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
@@ -75,12 +75,14 @@ enum {
 	sym_vvar_page,
 	sym_pvclock_page,
 	sym_hvclock_page,
+	sym_timens_page,
 };
 
 const int special_pages[] = {
 	sym_vvar_page,
 	sym_pvclock_page,
 	sym_hvclock_page,
+	sym_timens_page,
 };
 
 struct vdso_sym {
@@ -93,6 +95,7 @@ struct vdso_sym required_syms[] = {
 	[sym_vvar_page] = {"vvar_page", true},
 	[sym_pvclock_page] = {"pvclock_page", true},
 	[sym_hvclock_page] = {"hvclock_page", true},
+	[sym_timens_page] = {"timens_page", true},
 	{"VDSO32_NOTE_MASK", true},
 	{"__kernel_vsyscall", true},
 	{"__kernel_sigreturn", true},
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 230474e2ddb5..bbcdc7b8f963 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -21,6 +21,7 @@ struct vdso_image {
 	long sym_vvar_page;
 	long sym_pvclock_page;
 	long sym_hvclock_page;
+	long sym_timens_page;
 	long sym_VDSO32_NOTE_MASK;
 	long sym___kernel_sigreturn;
 	long sym___kernel_rt_sigreturn;
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
index e9ee139cf29e..4f938f8e756b 100644
--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -21,6 +21,7 @@
 #include <clocksource/hyperv_timer.h>
 
 #define __vdso_data (VVAR(_vdso_data))
+#define __timens_vdso_data (TIMENS(_vdso_data))
 
 #define VDSO_HAS_TIME 1
 
@@ -56,6 +57,14 @@ extern struct ms_hyperv_tsc_page hvclock_page
 	__attribute__((visibility("hidden")));
 #endif
 
+#ifdef CONFIG_TIME_NS
+static __always_inline
+const struct vdso_data *__arch_get_timens_vdso_data(void)
+{
+	return __timens_vdso_data;
+}
+#endif
+
 #ifndef BUILD_VDSO32
 
 static __always_inline
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index ff2de3025388..183e98e49ab9 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -33,9 +33,12 @@ extern char __vvar_page;
 
 #define DECLARE_VVAR(offset, type, name)				\
 	extern type vvar_ ## name[CS_BASES]				\
-	__attribute__((visibility("hidden")));
+	__attribute__((visibility("hidden")));				\
+	extern type timens_ ## name[CS_BASES]				\
+	__attribute__((visibility("hidden")));				\
 
 #define VVAR(name) (vvar_ ## name)
+#define TIMENS(name) (timens_ ## name)
 
 #define DEFINE_VVAR(type, name)						\
 	type name[CS_BASES]						\
-- 
2.24.0

  parent reply index

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-12  1:26 [PATCHv8 00/34] kernel: Introduce Time Namespace Dmitry Safonov
2019-11-12  1:26 ` [PATCHv8 01/34] lib/vdso: Add unlikely() hint into vdso_read_begin() Dmitry Safonov
2019-11-12  1:26 ` [PATCHv8 02/34] lib/vdso: make do_hres and do_coarse as __always_inline Dmitry Safonov
     [not found]   ` <20191112012724.250792-3-dima-nzgTgzXrdUbQT0dZR+AlfA@public.gmane.org>
2020-01-10  9:45     ` Vincenzo Frascino
2020-01-10 11:42       ` Thomas Gleixner
     [not found]         ` <878smfa66i.fsf-ecDvlHI5BZPZikZi3RtOZ1XZhhPuCNm+@public.gmane.org>
2020-01-10 11:47           ` Vincenzo Frascino
2020-01-10 12:02             ` Thomas Gleixner
     [not found]               ` <875zhja59q.fsf-ecDvlHI5BZPZikZi3RtOZ1XZhhPuCNm+@public.gmane.org>
2020-01-10 12:18                 ` Vincenzo Frascino
2020-01-13  5:27               ` Andrei Vagin
2019-11-12  1:26 ` [PATCHv8 03/34] ns: Introduce Time Namespace Dmitry Safonov
     [not found]   ` <20191112012724.250792-4-dima-nzgTgzXrdUbQT0dZR+AlfA@public.gmane.org>
2020-01-27 14:12     ` Dmitry Vyukov
     [not found]       ` <CACT4Y+b70bRRS2XD3yxhBoy4E-LFy_K3wMrjeuPmiEvaPe_c2Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2020-01-27 14:19         ` Dmitry Safonov
2020-02-17 14:20   ` Time Namespaces: CLONE_NEWTIME and clone3()? Michael Kerrisk
2020-02-17 14:59     ` Christian Brauner
2020-02-17 21:47       ` Michael Kerrisk (man-pages)
2020-02-17 23:03         ` Christian Brauner
2020-02-17 23:29           ` Thomas Gleixner
2020-02-18  2:37             ` Eric W. Biederman
2020-02-18 17:11           ` Adrian Reber
2020-02-18 17:26             ` Christian Brauner
2019-11-12  1:26 ` [PATCHv8 04/34] time: Add timens_offsets to be used for tasks in timens Dmitry Safonov
2019-11-12  1:26 ` [PATCHv8 05/34] posix-clocks: Rename the clock_get() callback to clock_get_timespec() Dmitry Safonov
2019-11-12  1:26 ` [PATCHv8 06/34] posix-clocks: Rename .clock_get_timespec() callbacks accordingly Dmitry Safonov
2019-11-12  1:26 ` [PATCHv8 07/34] alarmtimer: Rename gettime() callback to get_ktime() Dmitry Safonov
2019-11-12  1:26 ` [PATCHv8 08/34] alarmtimer: Provide get_timespec() callback Dmitry Safonov
2019-11-12  1:26 ` [PATCHv8 09/34] posix-clocks: Introduce clock_get_ktime() callback Dmitry Safonov
2019-11-12  1:26 ` [PATCHv8 10/34] posix-timers: Use clock_get_ktime() in common_timer_get() Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 11/34] posix-clocks: Wire up clock_gettime() with timens offsets Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 12/34] kernel: Add do_timens_ktime_to_host() helper Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 13/34] timerfd: Make timerfd_settime() time namespace aware Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 14/34] posix-timers: Make timer_settime() " Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 15/34] alarmtimer: Make nanosleep " Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 16/34] hrtimers: Prepare hrtimer_nanosleep() for time namespaces Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 17/34] posix-timers: Make clock_nanosleep() time namespace aware Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 18/34] fs/proc: Respect boottime inside time namespace for /proc/uptime Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 19/34] x86/vdso: Restrict splitting VVAR VMA Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 20/34] lib/vdso: Prepare for time namespace support Dmitry Safonov
     [not found]   ` <20191112012724.250792-21-dima-nzgTgzXrdUbQT0dZR+AlfA@public.gmane.org>
2020-01-12 10:32     ` Thomas Gleixner
2019-11-12  1:27 ` [PATCHv8 21/34] x86/vdso: Provide vdso_data offset on vvar_page Dmitry Safonov
2019-11-12  1:27 ` Dmitry Safonov [this message]
2019-11-12  1:27 ` [PATCHv8 23/34] time: Allocate per-timens vvar page Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 24/34] x86/vdso: Handle faults on timens page Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 25/34] x86/vdso: On timens page fault prefault also VVAR page Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 26/34] x86/vdso: Zap vvar pages on switch a time namspace Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 27/34] fs/proc: Introduce /proc/pid/timens_offsets Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 28/34] selftests/timens: Add Time Namespace test for supported clocks Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 29/34] selftests/timens: Add a test for timerfd Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 30/34] selftests/timens: Add a test for clock_nanosleep() Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 31/34] selftests/timens: Add procfs selftest Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 32/34] selftests/timens: Add timer offsets test Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 33/34] selftests/timens: Add a simple perf test for clock_gettime() Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 34/34] selftests/timens: Check for right timens offsets after fork and exec Dmitry Safonov
2019-11-21 18:05 ` [PATCHv8 00/34] kernel: Introduce Time Namespace Andrei Vagin
2019-12-11 20:38   ` Dmitry Safonov
2020-01-09 21:09     ` Thomas Gleixner
2020-01-10  9:52       ` Vincenzo Frascino

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191112012724.250792-23-dima@arista.com \
    --to=dima@arista.com \
    --cc=0x7f454c46@gmail.com \
    --cc=adrian@lisas.de \
    --cc=arnd@arndb.de \
    --cc=avagin@openvz.org \
    --cc=christian.brauner@ubuntu.com \
    --cc=containers@lists.linux-foundation.org \
    --cc=criu@openvz.org \
    --cc=ebiederm@xmission.com \
    --cc=gorcunov@openvz.org \
    --cc=hpa@zytor.com \
    --cc=jannh@google.com \
    --cc=jdike@addtoit.com \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=oleg@redhat.com \
    --cc=shuah@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=vincenzo.frascino@arm.com \
    --cc=x86@kernel.org \
    --cc=xemul@virtuozzo.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-api Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-api/0 linux-api/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-api linux-api/ https://lore.kernel.org/linux-api \
		linux-api@vger.kernel.org
	public-inbox-index linux-api

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-api


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git