linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Andrei Vagin <avagin@gmail.com>
To: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrei Vagin <avagin@gmail.com>,
	Rasmus Villemoes <linux@rasmusvillemoes.dk>,
	Dmitry Safonov <dima@arista.com>,
	LKML <linux-kernel@vger.kernel.org>,
	Adrian Reber <adrian@lisas.de>, Andrei Vagin <avagin@openvz.org>,
	Andy Lutomirski <luto@kernel.org>,
	Andy Tucker <agtucker@google.com>, Arnd Bergmann <arnd@arndb.de>,
	Christian Brauner <christian.brauner@ubuntu.com>,
	Cyrill Gorcunov <gorcunov@openvz.org>,
	Dmitry Safonov <0x7f454c46@gmail.com>,
	"Eric W. Biederman" <ebiederm@xmission.com>,
	"H. Peter Anvin" <hpa@zytor.com>, Ingo Molnar <mingo@redhat.com>,
	Jeff Dike <jdike@addtoit.com>, Oleg Nesterov <oleg@redhat.com>,
	Pavel Emelyanov <xemul@virtuozzo.com>,
	Shuah Khan <shuah@kernel.org>,
	containers@lists.linux-foundation.org, criu@openvz.org,
	linux-api@vger.kernel.org, x86@kernel.org,
	Vincenzo Frascino <vincenzo.frascino@arm.com>,
	Will Deacon <will.deacon@arm.com>
Subject: [PATCH RFC] vdso: introduce timens_static_branch
Date: Wed, 27 Mar 2019 11:06:51 -0700	[thread overview]
Message-ID: <20190327180651.14495-1-avagin@gmail.com> (raw)
In-Reply-To: <20190327175957.GA9309@gmail.com>

As it has been discussed on timens RFC, adding a new conditional branch
`if (inside_time_ns)` on VDSO for all processes is undesirable.

Addressing those problems, there are two versions of VDSO's .so:
for host tasks (without any penalty) and for processes inside of time
namespace with clk_to_ns() that subtracts offsets from host's time.

This patch introduces timens_static_branch(), which is similar with
static_branch_unlikely.

The timens code in vdso looks like this:

       if (timens_static_branch()) {
               clk_to_ns(clk, ts);
       }

The version of vdso which is compiled from sources will never execute
clk_to_ns(). And then we can patch the 'no-op' in the straight-line
codepath with a 'jump' instruction to the out-of-line true branch and
get the timens version of the vdso library.

Cc: Dmitry Safonov <dima@arista.com>
Co-developed-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Andrei Vagin <avagin@gmail.com>
---
 arch/x86/entry/vdso/vclock_gettime.c  | 21 ++++++++++++++-------
 arch/x86/entry/vdso/vdso-layout.lds.S |  1 +
 arch/x86/entry/vdso/vdso2c.h          | 11 ++++++++++-
 arch/x86/entry/vdso/vma.c             | 19 +++++++++++++++++++
 arch/x86/include/asm/jump_label.h     | 14 ++++++++++++++
 arch/x86/include/asm/vdso.h           |  1 +
 include/linux/jump_label.h            |  5 +++++
 7 files changed, 64 insertions(+), 8 deletions(-)

diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index cb55bd994497..74de42f1f7d8 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -18,6 +18,7 @@
 #include <asm/msr.h>
 #include <asm/pvclock.h>
 #include <asm/mshyperv.h>
+#include <asm/jump_label.h>
 #include <linux/math64.h>
 #include <linux/time.h>
 #include <linux/kernel.h>
@@ -39,7 +40,7 @@ extern u8 hvclock_page
 	__attribute__((visibility("hidden")));
 #endif
 
-#ifdef BUILD_VDSO_TIME_NS
+#ifdef CONFIG_TIME_NS
 extern u8 timens_page
 	__attribute__((visibility("hidden")));
 #endif
@@ -145,9 +146,9 @@ notrace static inline u64 vgetcyc(int mode)
 	return U64_MAX;
 }
 
+#ifdef CONFIG_TIME_NS
 notrace static __always_inline void clk_to_ns(clockid_t clk, struct timespec *ts)
 {
-#ifdef BUILD_VDSO_TIME_NS
 	struct timens_offsets *timens = (struct timens_offsets *) &timens_page;
 	struct timespec64 *offset64;
 
@@ -173,9 +174,12 @@ notrace static __always_inline void clk_to_ns(clockid_t clk, struct timespec *ts
 		ts->tv_nsec += NSEC_PER_SEC;
 		ts->tv_sec--;
 	}
-
-#endif
 }
+#define _timens_static_branch_unlikely timens_static_branch_unlikely
+#else
+notrace static __always_inline void clk_to_ns(clockid_t clk, struct timespec *ts) {}
+notrace static __always_inline bool _timens_static_branch_unlikely(void) { return false; }
+#endif
 
 notrace static int do_hres(clockid_t clk, struct timespec *ts)
 {
@@ -203,8 +207,9 @@ notrace static int do_hres(clockid_t clk, struct timespec *ts)
 	ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
 	ts->tv_nsec = ns;
 
-	clk_to_ns(clk, ts);
-
+	if (_timens_static_branch_unlikely()) {
+		clk_to_ns(clk, ts);
+	}
 	return 0;
 }
 
@@ -219,7 +224,9 @@ notrace static void do_coarse(clockid_t clk, struct timespec *ts)
 		ts->tv_nsec = base->nsec;
 	} while (unlikely(gtod_read_retry(gtod, seq)));
 
-	clk_to_ns(clk, ts);
+	if (_timens_static_branch_unlikely()) {
+		clk_to_ns(clk, ts);
+	}
 }
 
 notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S
index ba216527e59f..69dbe4821aa5 100644
--- a/arch/x86/entry/vdso/vdso-layout.lds.S
+++ b/arch/x86/entry/vdso/vdso-layout.lds.S
@@ -45,6 +45,7 @@ SECTIONS
 	.gnu.version	: { *(.gnu.version) }
 	.gnu.version_d	: { *(.gnu.version_d) }
 	.gnu.version_r	: { *(.gnu.version_r) }
+	__jump_table	: { *(__jump_table) }	:text
 
 	.dynamic	: { *(.dynamic) }		:text	:dynamic
 
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index 660f725a02c1..e4eef5e1c6ac 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
@@ -16,7 +16,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
 	unsigned int i, syms_nr;
 	unsigned long j;
 	ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr,
-		*alt_sec = NULL;
+		*alt_sec = NULL, *jump_table_sec = NULL;
 	ELF(Dyn) *dyn = 0, *dyn_end = 0;
 	const char *secstrings;
 	INT_BITS syms[NSYMS] = {};
@@ -78,6 +78,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
 		if (!strcmp(secstrings + GET_LE(&sh->sh_name),
 			    ".altinstructions"))
 			alt_sec = sh;
+		if (!strcmp(secstrings + GET_LE(&sh->sh_name),
+			    "__jump_table"))
+			jump_table_sec  = sh;
 	}
 
 	if (!symtab_hdr)
@@ -165,6 +168,12 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
 		fprintf(outfile, "\t.alt_len = %lu,\n",
 			(unsigned long)GET_LE(&alt_sec->sh_size));
 	}
+	if (jump_table_sec) {
+		fprintf(outfile, "\t.jump_table = %lu,\n",
+			(unsigned long)GET_LE(&jump_table_sec->sh_offset));
+		fprintf(outfile, "\t.jump_table_len = %lu,\n",
+			(unsigned long)GET_LE(&jump_table_sec->sh_size));
+	}
 	for (i = 0; i < NSYMS; i++) {
 		if (required_syms[i].export && syms[i])
 			fprintf(outfile, "\t.sym_%s = %" PRIi64 ",\n",
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 0b8d9f6f0ce3..5c0e6491aefb 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -15,6 +15,7 @@
 #include <linux/cpu.h>
 #include <linux/ptrace.h>
 #include <linux/time_namespace.h>
+#include <linux/jump_label.h>
 #include <asm/pvclock.h>
 #include <asm/vgtod.h>
 #include <asm/proto.h>
@@ -38,6 +39,22 @@ static __init int vdso_setup(char *s)
 __setup("vdso=", vdso_setup);
 #endif
 
+#ifdef CONFIG_TIME_NS
+static __init int apply_jump_tables(struct vdso_jump_entry *ent, unsigned long nr)
+{
+	while (nr--) {
+		void *code_addr	= (void *)ent + ent->code;
+		long target_addr = (long) ent->target - (ent->code + JUMP_LABEL_NOP_SIZE);
+		((char *)code_addr)[0] = 0xe9; /* JMP rel32 */
+		*((long *)(code_addr + 1)) = (long)target_addr;
+
+		ent++;
+	}
+
+	return 0;
+}
+#endif
+
 void __init init_vdso_image(struct vdso_image *image)
 {
 	BUG_ON(image->size % PAGE_SIZE != 0);
@@ -51,6 +68,8 @@ void __init init_vdso_image(struct vdso_image *image)
 		return;
 
 	memcpy(image->text_timens, image->text, image->size);
+	apply_jump_tables((struct vdso_jump_entry *)(image->text_timens + image->jump_table),
+			image->jump_table_len / sizeof(struct vdso_jump_entry));
 #endif
 }
 
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 65191ce8e1cf..1784aa49cc82 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -51,6 +51,20 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool
 	return true;
 }
 
+static __always_inline bool timens_static_branch_unlikely(void)
+{
+	asm_volatile_goto("1:\n\t"
+		".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
+		 ".pushsection __jump_table,  \"aw\"\n\t"
+		 "2: .word 1b - 2b, %l[l_yes] - 2b\n\t"
+		 ".popsection\n\t"
+		 : :  :  : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
 #else	/* __ASSEMBLY__ */
 
 .macro STATIC_JUMP_IF_TRUE target, key, def
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 583133446874..883151c3a032 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -16,6 +16,7 @@ struct vdso_image {
 	unsigned long size;   /* Always a multiple of PAGE_SIZE */
 
 	unsigned long alt, alt_len;
+	unsigned long jump_table, jump_table_len;
 
 	long sym_vvar_start;  /* Negative offset to the vvar area */
 
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 3e113a1fa0f1..69854a05d2f2 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -125,6 +125,11 @@ struct jump_entry {
 	long key;	// key may be far away from the core kernel under KASLR
 };
 
+struct vdso_jump_entry {
+	u16 code;
+	u16 target;
+};
+
 static inline unsigned long jump_entry_code(const struct jump_entry *entry)
 {
 	return (unsigned long)&entry->code + entry->code;
-- 
2.20.1


  parent reply	other threads:[~2019-03-27 19:21 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-06  0:10 [PATCH 00/32] kernel: Introduce Time Namespace Dmitry Safonov
2019-02-06  0:10 ` [PATCH 01/32] ns: " Dmitry Safonov
2019-02-06  0:10 ` [PATCH 02/32] timens: Add timens_offsets Dmitry Safonov
2019-02-06  0:10 ` [PATCH 03/32] timens: Introduce CLOCK_MONOTONIC offsets Dmitry Safonov
2019-02-07 21:40   ` Thomas Gleixner
2019-02-08  9:02     ` Andrei Vagin
2019-02-08  9:46     ` Thomas Gleixner
2019-02-06  0:10 ` [PATCH 04/32] timens: Introduce CLOCK_BOOTTIME offset Dmitry Safonov
2019-02-06  0:10 ` [PATCH 05/32] timerfd/timens: Take into account ns clock offsets Dmitry Safonov
2019-02-06  8:52   ` Cyrill Gorcunov
2019-02-06  8:55     ` Cyrill Gorcunov
2019-02-07  6:38     ` Andrei Vagin
2019-02-06  0:10 ` [PATCH 06/32] posix-timers/timens: Take into account " Dmitry Safonov
2019-02-06  0:10 ` [PATCH 07/32] timens/kernel: Take into account timens clock offsets in clock_nanosleep Dmitry Safonov
2019-02-08  7:56   ` Thomas Gleixner
2019-02-06  0:10 ` [PATCH 08/32] timens: Shift /proc/uptime Dmitry Safonov
2019-02-06  0:10 ` [PATCH 09/32] x86/vdso2c: Correct err messages on file opening Dmitry Safonov
2019-02-06  0:10 ` [PATCH 10/32] x86/vdso2c: Convert iterator to unsigned Dmitry Safonov
2019-02-06  0:10 ` [PATCH 11/32] x86/vdso/Makefile: Add vobjs32 Dmitry Safonov
2019-02-06  0:10 ` [PATCH 12/32] x86/vdso/timens: Add offsets page in vvar Dmitry Safonov
2019-02-06  0:10 ` [PATCH 13/32] x86/vdso: Build timens .so(s) Dmitry Safonov
2019-02-06  0:10 ` [PATCH 14/32] x86/VDSO: Build VDSO with -ffunction-sections Dmitry Safonov
2019-02-06  0:10 ` [PATCH 15/32] x86/vdso2c: Optionally produce linker script for vdso entries Dmitry Safonov
2019-02-06  0:10 ` [PATCH 16/32] x86/vdso: Generate vdso{,32}-timens.lds Dmitry Safonov
2019-02-07  8:31   ` Rasmus Villemoes
2019-02-07 16:11     ` Dmitry Safonov
2019-02-08  9:57     ` Thomas Gleixner
2019-02-08 15:18       ` Dmitry Safonov
2019-03-27 18:00       ` Andrei Vagin
2019-03-27 18:06         ` [PATCH RFC] x86/asm: Introduce static_retcall(s) Andrei Vagin
2019-03-27 18:06         ` Andrei Vagin [this message]
2019-02-06  0:10 ` [PATCH 17/32] x86/vdso2c: Sort vdso entries by addresses for linker script Dmitry Safonov
2019-02-06  0:10 ` [PATCH 18/32] x86/vdso.lds: Align !timens (host's) vdso.so entries Dmitry Safonov
2019-02-06  0:10 ` [PATCH 19/32] x86/vdso2c: Align LOCAL symbols between vdso{-timens,}.so Dmitry Safonov
2019-02-06  0:10 ` [PATCH 20/32] x86/vdso: Initialize timens 64-bit vdso Dmitry Safonov
2019-02-06  0:10 ` [PATCH 21/32] x86/vdso: Switch image on setns()/unshare()/clone() Dmitry Safonov
2019-02-06  0:10 ` [PATCH 22/32] timens: Add align for timens_offsets Dmitry Safonov
2019-02-06  0:10 ` [PATCH 23/32] timens/fs/proc: Introduce /proc/pid/timens_offsets Dmitry Safonov
2019-02-06  0:10 ` [PATCH 24/32] selftest/timens: Add Time Namespace test for supported clocks Dmitry Safonov
2019-02-06  0:10 ` [PATCH 25/32] selftest/timens: Add a test for timerfd Dmitry Safonov
2019-02-06  0:11 ` [PATCH 26/32] selftest/timens: Add a test for clock_nanosleep() Dmitry Safonov
2019-02-06  0:11 ` [PATCH 27/32] selftest/timens: Add procfs selftest Dmitry Safonov
2019-02-06  0:11 ` [PATCH 28/32] selftest/timens: Add timer offsets test Dmitry Safonov
2019-02-06  0:11 ` [PATCH 29/32] selftests: Add a simple perf test for clock_gettime() Dmitry Safonov
2019-02-06  0:11 ` [PATCH 30/32] selftest/timens: Check that a right vdso is mapped after fork and exec Dmitry Safonov
2019-02-06  0:11 ` [PATCH 31/32] x86/vdso: Align VDSO functions by CPU L1 cache line Dmitry Safonov
2019-02-06  0:11 ` [PATCH 32/32] x86/vdso: Restrict splitting VVAR VMA Dmitry Safonov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190327180651.14495-1-avagin@gmail.com \
    --to=avagin@gmail.com \
    --cc=0x7f454c46@gmail.com \
    --cc=adrian@lisas.de \
    --cc=agtucker@google.com \
    --cc=arnd@arndb.de \
    --cc=avagin@openvz.org \
    --cc=christian.brauner@ubuntu.com \
    --cc=containers@lists.linux-foundation.org \
    --cc=criu@openvz.org \
    --cc=dima@arista.com \
    --cc=ebiederm@xmission.com \
    --cc=gorcunov@openvz.org \
    --cc=hpa@zytor.com \
    --cc=jdike@addtoit.com \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux@rasmusvillemoes.dk \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=oleg@redhat.com \
    --cc=shuah@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=vincenzo.frascino@arm.com \
    --cc=will.deacon@arm.com \
    --cc=x86@kernel.org \
    --cc=xemul@virtuozzo.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).