Linux-api Archive on lore.kernel.org
 help / color / Atom feed
From: Dmitry Safonov <dima@arista.com>
To: linux-kernel@vger.kernel.org
Cc: Dmitry Safonov <0x7f454c46@gmail.com>,
	Dmitry Safonov <dima@arista.com>, Adrian Reber <adrian@lisas.de>,
	Andrei Vagin <avagin@openvz.org>,
	Andy Lutomirski <luto@kernel.org>, Arnd Bergmann <arnd@arndb.de>,
	Christian Brauner <christian.brauner@ubuntu.com>,
	Cyrill Gorcunov <gorcunov@openvz.org>,
	"Eric W. Biederman" <ebiederm@xmission.com>,
	"H. Peter Anvin" <hpa@zytor.com>, Ingo Molnar <mingo@redhat.com>,
	Jann Horn <jannh@google.com>, Jeff Dike <jdike@addtoit.com>,
	Oleg Nesterov <oleg@redhat.com>,
	Pavel Emelyanov <xemul@virtuozzo.com>,
	Shuah Khan <shuah@kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Vincenzo Frascino <vincenzo.frascino@arm.com>,
	containers@lists.linux-foundation.org, criu@openvz.org,
	linux-api@vger.kernel.org, x86@kernel.org
Subject: [PATCHv8 24/34] x86/vdso: Handle faults on timens page
Date: Tue, 12 Nov 2019 01:27:13 +0000
Message-ID: <20191112012724.250792-25-dima@arista.com> (raw)
In-Reply-To: <20191112012724.250792-1-dima@arista.com>

If a task belongs to a time namespace then the VVAR page which contains
the system wide VDSO data is replaced with a namespace specific page
which has the same layout as the VVAR page.

Co-developed-by: Andrei Vagin <avagin@gmail.com>
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
 arch/x86/entry/vdso/vma.c | 53 +++++++++++++++++++++++++++++++++++++--
 mm/mmap.c                 |  2 ++
 2 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 5dab706aca2e..f6e13ab29d94 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -14,11 +14,13 @@
 #include <linux/elf.h>
 #include <linux/cpu.h>
 #include <linux/ptrace.h>
+#include <linux/time_namespace.h>
 #include <asm/pvclock.h>
 #include <asm/vgtod.h>
 #include <asm/proto.h>
 #include <asm/vdso.h>
 #include <asm/vvar.h>
+#include <asm/tlb.h>
 #include <asm/page.h>
 #include <asm/desc.h>
 #include <asm/cpufeature.h>
@@ -107,10 +109,36 @@ static int vvar_mremap(const struct vm_special_mapping *sm,
 	return 0;
 }
 
+#ifdef CONFIG_TIME_NS
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+	if (likely(vma->vm_mm == current->mm))
+		return current->nsproxy->time_ns->vvar_page;
+
+	/*
+	 * VM_PFNMAP | VM_IO protect .fault() handler from being called
+	 * through interfaces like /proc/$pid/mem or
+	 * process_vm_{readv,writev}() as long as there's no .access()
+	 * in special_mapping_vmops().
+	 * For more details check_vma_flags() and __access_remote_vm()
+	 */
+
+	WARN(1, "vvar_page accessed remotely");
+
+	return NULL;
+}
+#else
+static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+	return NULL;
+}
+#endif
+
 static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
 		      struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	const struct vdso_image *image = vma->vm_mm->context.vdso_image;
+	unsigned long pfn;
 	long sym_offset;
 
 	if (!image)
@@ -130,8 +158,21 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
 		return VM_FAULT_SIGBUS;
 
 	if (sym_offset == image->sym_vvar_page) {
-		return vmf_insert_pfn(vma, vmf->address,
-				__pa_symbol(&__vvar_page) >> PAGE_SHIFT);
+		struct page *timens_page = find_timens_vvar_page(vma);
+
+		pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT;
+
+		/*
+		 * If a task belongs to a time namespace then a namespace
+		 * specific VVAR is mapped with the sym_vvar_page offset and
+		 * the real VVAR page is mapped with the sym_timens_page
+		 * offset.
+		 * See also the comment near timens_setup_vdso_data().
+		 */
+		if (timens_page)
+			pfn = page_to_pfn(timens_page);
+
+		return vmf_insert_pfn(vma, vmf->address, pfn);
 	} else if (sym_offset == image->sym_pvclock_page) {
 		struct pvclock_vsyscall_time_info *pvti =
 			pvclock_get_pvti_cpu0_va();
@@ -146,6 +187,14 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
 		if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK))
 			return vmf_insert_pfn(vma, vmf->address,
 					virt_to_phys(tsc_pg) >> PAGE_SHIFT);
+	} else if (sym_offset == image->sym_timens_page) {
+		struct page *timens_page = find_timens_vvar_page(vma);
+
+		if (!timens_page)
+			return VM_FAULT_SIGBUS;
+
+		pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT;
+		return vmf_insert_pfn(vma, vmf->address, pfn);
 	}
 
 	return VM_FAULT_SIGBUS;
diff --git a/mm/mmap.c b/mm/mmap.c
index 4d4db76a07da..73ec982f8b82 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3357,6 +3357,8 @@ static const struct vm_operations_struct special_mapping_vmops = {
 	.fault = special_mapping_fault,
 	.mremap = special_mapping_mremap,
 	.name = special_mapping_name,
+	/* vDSO code relies that VVAR can't be accessed remotely */
+	.access = NULL,
 };
 
 static const struct vm_operations_struct legacy_special_mapping_vmops = {
-- 
2.24.0

  parent reply index

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-12  1:26 [PATCHv8 00/34] kernel: Introduce Time Namespace Dmitry Safonov
2019-11-12  1:26 ` [PATCHv8 01/34] lib/vdso: Add unlikely() hint into vdso_read_begin() Dmitry Safonov
2019-11-12  1:26 ` [PATCHv8 02/34] lib/vdso: make do_hres and do_coarse as __always_inline Dmitry Safonov
     [not found]   ` <20191112012724.250792-3-dima-nzgTgzXrdUbQT0dZR+AlfA@public.gmane.org>
2020-01-10  9:45     ` Vincenzo Frascino
2020-01-10 11:42       ` Thomas Gleixner
     [not found]         ` <878smfa66i.fsf-ecDvlHI5BZPZikZi3RtOZ1XZhhPuCNm+@public.gmane.org>
2020-01-10 11:47           ` Vincenzo Frascino
2020-01-10 12:02             ` Thomas Gleixner
     [not found]               ` <875zhja59q.fsf-ecDvlHI5BZPZikZi3RtOZ1XZhhPuCNm+@public.gmane.org>
2020-01-10 12:18                 ` Vincenzo Frascino
2020-01-13  5:27               ` Andrei Vagin
2019-11-12  1:26 ` [PATCHv8 03/34] ns: Introduce Time Namespace Dmitry Safonov
     [not found]   ` <20191112012724.250792-4-dima-nzgTgzXrdUbQT0dZR+AlfA@public.gmane.org>
2020-01-27 14:12     ` Dmitry Vyukov
     [not found]       ` <CACT4Y+b70bRRS2XD3yxhBoy4E-LFy_K3wMrjeuPmiEvaPe_c2Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2020-01-27 14:19         ` Dmitry Safonov
2020-02-17 14:20   ` Time Namespaces: CLONE_NEWTIME and clone3()? Michael Kerrisk
2020-02-17 14:59     ` Christian Brauner
2020-02-17 21:47       ` Michael Kerrisk (man-pages)
2020-02-17 23:03         ` Christian Brauner
2020-02-17 23:29           ` Thomas Gleixner
2020-02-18  2:37             ` Eric W. Biederman
2020-02-18 17:11           ` Adrian Reber
2020-02-18 17:26             ` Christian Brauner
2019-11-12  1:26 ` [PATCHv8 04/34] time: Add timens_offsets to be used for tasks in timens Dmitry Safonov
2019-11-12  1:26 ` [PATCHv8 05/34] posix-clocks: Rename the clock_get() callback to clock_get_timespec() Dmitry Safonov
2019-11-12  1:26 ` [PATCHv8 06/34] posix-clocks: Rename .clock_get_timespec() callbacks accordingly Dmitry Safonov
2019-11-12  1:26 ` [PATCHv8 07/34] alarmtimer: Rename gettime() callback to get_ktime() Dmitry Safonov
2019-11-12  1:26 ` [PATCHv8 08/34] alarmtimer: Provide get_timespec() callback Dmitry Safonov
2019-11-12  1:26 ` [PATCHv8 09/34] posix-clocks: Introduce clock_get_ktime() callback Dmitry Safonov
2019-11-12  1:26 ` [PATCHv8 10/34] posix-timers: Use clock_get_ktime() in common_timer_get() Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 11/34] posix-clocks: Wire up clock_gettime() with timens offsets Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 12/34] kernel: Add do_timens_ktime_to_host() helper Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 13/34] timerfd: Make timerfd_settime() time namespace aware Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 14/34] posix-timers: Make timer_settime() " Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 15/34] alarmtimer: Make nanosleep " Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 16/34] hrtimers: Prepare hrtimer_nanosleep() for time namespaces Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 17/34] posix-timers: Make clock_nanosleep() time namespace aware Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 18/34] fs/proc: Respect boottime inside time namespace for /proc/uptime Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 19/34] x86/vdso: Restrict splitting VVAR VMA Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 20/34] lib/vdso: Prepare for time namespace support Dmitry Safonov
     [not found]   ` <20191112012724.250792-21-dima-nzgTgzXrdUbQT0dZR+AlfA@public.gmane.org>
2020-01-12 10:32     ` Thomas Gleixner
2019-11-12  1:27 ` [PATCHv8 21/34] x86/vdso: Provide vdso_data offset on vvar_page Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 22/34] x86/vdso: Add timens page Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 23/34] time: Allocate per-timens vvar page Dmitry Safonov
2019-11-12  1:27 ` Dmitry Safonov [this message]
2019-11-12  1:27 ` [PATCHv8 25/34] x86/vdso: On timens page fault prefault also VVAR page Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 26/34] x86/vdso: Zap vvar pages on switch a time namspace Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 27/34] fs/proc: Introduce /proc/pid/timens_offsets Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 28/34] selftests/timens: Add Time Namespace test for supported clocks Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 29/34] selftests/timens: Add a test for timerfd Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 30/34] selftests/timens: Add a test for clock_nanosleep() Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 31/34] selftests/timens: Add procfs selftest Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 32/34] selftests/timens: Add timer offsets test Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 33/34] selftests/timens: Add a simple perf test for clock_gettime() Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 34/34] selftests/timens: Check for right timens offsets after fork and exec Dmitry Safonov
2019-11-21 18:05 ` [PATCHv8 00/34] kernel: Introduce Time Namespace Andrei Vagin
2019-12-11 20:38   ` Dmitry Safonov
2020-01-09 21:09     ` Thomas Gleixner
2020-01-10  9:52       ` Vincenzo Frascino

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191112012724.250792-25-dima@arista.com \
    --to=dima@arista.com \
    --cc=0x7f454c46@gmail.com \
    --cc=adrian@lisas.de \
    --cc=arnd@arndb.de \
    --cc=avagin@openvz.org \
    --cc=christian.brauner@ubuntu.com \
    --cc=containers@lists.linux-foundation.org \
    --cc=criu@openvz.org \
    --cc=ebiederm@xmission.com \
    --cc=gorcunov@openvz.org \
    --cc=hpa@zytor.com \
    --cc=jannh@google.com \
    --cc=jdike@addtoit.com \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=oleg@redhat.com \
    --cc=shuah@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=vincenzo.frascino@arm.com \
    --cc=x86@kernel.org \
    --cc=xemul@virtuozzo.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-api Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-api/0 linux-api/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-api linux-api/ https://lore.kernel.org/linux-api \
		linux-api@vger.kernel.org
	public-inbox-index linux-api

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-api


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git