Linux-api Archive on lore.kernel.org
 help / color / Atom feed
From: Dmitry Safonov <dima@arista.com>
To: linux-kernel@vger.kernel.org
Cc: Dmitry Safonov <0x7f454c46@gmail.com>,
	Andrei Vagin <avagin@gmail.com>, Dmitry Safonov <dima@arista.com>,
	Adrian Reber <adrian@lisas.de>, Andrei Vagin <avagin@openvz.org>,
	Andy Lutomirski <luto@kernel.org>, Arnd Bergmann <arnd@arndb.de>,
	Christian Brauner <christian.brauner@ubuntu.com>,
	Cyrill Gorcunov <gorcunov@openvz.org>,
	"Eric W. Biederman" <ebiederm@xmission.com>,
	"H. Peter Anvin" <hpa@zytor.com>, Ingo Molnar <mingo@redhat.com>,
	Jann Horn <jannh@google.com>, Jeff Dike <jdike@addtoit.com>,
	Oleg Nesterov <oleg@redhat.com>,
	Pavel Emelyanov <xemul@virtuozzo.com>,
	Shuah Khan <shuah@kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Vincenzo Frascino <vincenzo.frascino@arm.com>,
	containers@lists.linux-foundation.org, criu@openvz.org,
	linux-api
Subject: [PATCHv8 27/34] fs/proc: Introduce /proc/pid/timens_offsets
Date: Tue, 12 Nov 2019 01:27:16 +0000
Message-ID: <20191112012724.250792-28-dima@arista.com> (raw)
In-Reply-To: <20191112012724.250792-1-dima@arista.com>

From: Andrei Vagin <avagin@gmail.com>

API to set time namespace offsets for children processes, i.e.:
echo "clockid off_ses off_nsec" > /proc/self/timens_offsets

Signed-off-by: Andrei Vagin <avagin@gmail.com>
Co-developed-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
 fs/proc/base.c                 |  95 +++++++++++++++++++++++++++++++
 include/linux/time_namespace.h |  10 ++++
 kernel/time/namespace.c        | 100 +++++++++++++++++++++++++++++++++
 3 files changed, 205 insertions(+)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index ebea9501afb8..1d2007365e87 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -94,6 +94,7 @@
 #include <linux/sched/debug.h>
 #include <linux/sched/stat.h>
 #include <linux/posix-timers.h>
+#include <linux/time_namespace.h>
 #include <trace/events/oom.h>
 #include "internal.h"
 #include "fd.h"
@@ -1533,6 +1534,97 @@ static const struct file_operations proc_pid_sched_autogroup_operations = {
 
 #endif /* CONFIG_SCHED_AUTOGROUP */
 
+#ifdef CONFIG_TIME_NS
+static int timens_offsets_show(struct seq_file *m, void *v)
+{
+	struct task_struct *p;
+
+	p = get_proc_task(file_inode(m->file));
+	if (!p)
+		return -ESRCH;
+	proc_timens_show_offsets(p, m);
+
+	put_task_struct(p);
+
+	return 0;
+}
+
+static ssize_t
+timens_offsets_write(struct file *file, const char __user *buf,
+	    size_t count, loff_t *ppos)
+{
+	struct inode *inode = file_inode(file);
+	struct proc_timens_offset offsets[2];
+	char *kbuf = NULL, *pos, *next_line;
+	struct task_struct *p;
+	int ret, noffsets;
+
+	/* Only allow < page size writes at the beginning of the file */
+	if ((*ppos != 0) || (count >= PAGE_SIZE))
+		return -EINVAL;
+
+	/* Slurp in the user data */
+	kbuf = memdup_user_nul(buf, count);
+	if (IS_ERR(kbuf))
+		return PTR_ERR(kbuf);
+
+	/* Parse the user data */
+	ret = -EINVAL;
+	noffsets = 0;
+	for (pos = kbuf; pos; pos = next_line) {
+		struct proc_timens_offset *off = &offsets[noffsets];
+		int err;
+
+		/* Find the end of line and ensure we don't look past it */
+		next_line = strchr(pos, '\n');
+		if (next_line) {
+			*next_line = '\0';
+			next_line++;
+			if (*next_line == '\0')
+				next_line = NULL;
+		}
+
+		err = sscanf(pos, "%u %lld %lu", &off->clockid,
+				&off->val.tv_sec, &off->val.tv_nsec);
+		if (err != 3 || off->val.tv_nsec >= NSEC_PER_SEC)
+			goto out;
+		noffsets++;
+		if (noffsets == ARRAY_SIZE(offsets)) {
+			if (next_line)
+				count = next_line - kbuf;
+			break;
+		}
+	}
+
+	ret = -ESRCH;
+	p = get_proc_task(inode);
+	if (!p)
+		goto out;
+	ret = proc_timens_set_offset(file, p, offsets, noffsets);
+	put_task_struct(p);
+	if (ret)
+		goto out;
+
+	ret = count;
+out:
+	kfree(kbuf);
+	return ret;
+}
+
+static int timens_offsets_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, timens_offsets_show, inode);
+}
+
+static const struct file_operations proc_timens_offsets_operations = {
+	.open		= timens_offsets_open,
+	.read		= seq_read,
+	.write		= timens_offsets_write,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+#endif /* CONFIG_TIME_NS */
+
 static ssize_t comm_write(struct file *file, const char __user *buf,
 				size_t count, loff_t *offset)
 {
@@ -3015,6 +3107,9 @@ static const struct pid_entry tgid_base_stuff[] = {
 #endif
 #ifdef CONFIG_SCHED_AUTOGROUP
 	REG("autogroup",  S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
+#endif
+#ifdef CONFIG_TIME_NS
+	REG("timens_offsets",  S_IRUGO|S_IWUSR, proc_timens_offsets_operations),
 #endif
 	REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h
index f1cdd3a6f842..793d2486a87d 100644
--- a/include/linux/time_namespace.h
+++ b/include/linux/time_namespace.h
@@ -50,6 +50,16 @@ static inline void put_time_ns(struct time_namespace *ns)
 	kref_put(&ns->kref, free_time_ns);
 }
 
+extern void proc_timens_show_offsets(struct task_struct *p, struct seq_file *m);
+
+struct proc_timens_offset {
+	int clockid;
+	struct timespec64 val;
+};
+
+extern int proc_timens_set_offset(struct file *file, struct task_struct *p,
+				struct proc_timens_offset *offsets, int n);
+
 static inline void timens_add_monotonic(struct timespec64 *ts)
 {
 	struct timens_offsets *ns_offsets = &current->nsproxy->time_ns->offsets;
diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c
index 0dc0742ed1ee..267120f31699 100644
--- a/kernel/time/namespace.c
+++ b/kernel/time/namespace.c
@@ -8,6 +8,7 @@
 #include <linux/user_namespace.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/task.h>
+#include <linux/seq_file.h>
 #include <linux/proc_ns.h>
 #include <linux/export.h>
 #include <linux/time.h>
@@ -333,6 +334,105 @@ static struct user_namespace *timens_owner(struct ns_common *ns)
 	return to_time_ns(ns)->user_ns;
 }
 
+static void show_offset(struct seq_file *m, int clockid, struct timespec64 *ts)
+{
+	seq_printf(m, "%d %lld %ld\n", clockid, ts->tv_sec, ts->tv_nsec);
+}
+
+void proc_timens_show_offsets(struct task_struct *p, struct seq_file *m)
+{
+	struct ns_common *ns;
+	struct time_namespace *time_ns;
+
+	ns = timens_for_children_get(p);
+	if (!ns)
+		return;
+	time_ns = to_time_ns(ns);
+
+	show_offset(m, CLOCK_MONOTONIC, &time_ns->offsets.monotonic);
+	show_offset(m, CLOCK_BOOTTIME, &time_ns->offsets.boottime);
+	put_time_ns(time_ns);
+}
+
+int proc_timens_set_offset(struct file *file, struct task_struct *p,
+			   struct proc_timens_offset *offsets, int noffsets)
+{
+	struct ns_common *ns;
+	struct time_namespace *time_ns;
+	struct timespec64 tp;
+	int i, err;
+
+	ns = timens_for_children_get(p);
+	if (!ns)
+		return -ESRCH;
+	time_ns = to_time_ns(ns);
+
+	if (!file_ns_capable(file, time_ns->user_ns, CAP_SYS_TIME)) {
+		put_time_ns(time_ns);
+		return -EPERM;
+	}
+
+	for (i = 0; i < noffsets; i++) {
+		struct proc_timens_offset *off = &offsets[i];
+
+		switch (off->clockid) {
+		case CLOCK_MONOTONIC:
+			ktime_get_ts64(&tp);
+			break;
+		case CLOCK_BOOTTIME:
+			ktime_get_boottime_ts64(&tp);
+			break;
+		default:
+			err = -EINVAL;
+			goto out;
+		}
+
+		err = -ERANGE;
+
+		if (off->val.tv_sec > KTIME_SEC_MAX || off->val.tv_sec < -KTIME_SEC_MAX)
+			goto out;
+
+		tp = timespec64_add(tp, off->val);
+		/*
+		 * KTIME_SEC_MAX is divided by 2 to be sure that KTIME_MAX is
+		 * still unreachable.
+		 */
+		if (tp.tv_sec < 0 || tp.tv_sec > KTIME_SEC_MAX / 2)
+			goto out;
+	}
+
+	mutex_lock(&offset_lock);
+	if (time_ns->frozen_offsets) {
+		err = -EACCES;
+		goto out_unlock;
+	}
+
+	err = 0;
+	/* don't report errors after this line */
+	for (i = 0; i < noffsets; i++) {
+		struct proc_timens_offset *off = &offsets[i];
+		struct timespec64 *offset = NULL;
+
+		switch (off->clockid) {
+		case CLOCK_MONOTONIC:
+			offset = &time_ns->offsets.monotonic;
+			break;
+		case CLOCK_BOOTTIME:
+			offset = &time_ns->offsets.boottime;
+			break;
+		}
+
+		*offset = off->val;
+	}
+
+out_unlock:
+	mutex_unlock(&offset_lock);
+out:
+	put_time_ns(time_ns);
+
+	return err;
+}
+
 const struct proc_ns_operations timens_operations = {
 	.name		= "time",
 	.type		= CLONE_NEWTIME,
-- 
2.24.0

  parent reply index

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-12  1:26 [PATCHv8 00/34] kernel: Introduce Time Namespace Dmitry Safonov
2019-11-12  1:26 ` [PATCHv8 01/34] lib/vdso: Add unlikely() hint into vdso_read_begin() Dmitry Safonov
2019-11-12  1:26 ` [PATCHv8 02/34] lib/vdso: make do_hres and do_coarse as __always_inline Dmitry Safonov
     [not found]   ` <20191112012724.250792-3-dima-nzgTgzXrdUbQT0dZR+AlfA@public.gmane.org>
2020-01-10  9:45     ` Vincenzo Frascino
2020-01-10 11:42       ` Thomas Gleixner
     [not found]         ` <878smfa66i.fsf-ecDvlHI5BZPZikZi3RtOZ1XZhhPuCNm+@public.gmane.org>
2020-01-10 11:47           ` Vincenzo Frascino
2020-01-10 12:02             ` Thomas Gleixner
     [not found]               ` <875zhja59q.fsf-ecDvlHI5BZPZikZi3RtOZ1XZhhPuCNm+@public.gmane.org>
2020-01-10 12:18                 ` Vincenzo Frascino
2020-01-13  5:27               ` Andrei Vagin
2019-11-12  1:26 ` [PATCHv8 03/34] ns: Introduce Time Namespace Dmitry Safonov
     [not found]   ` <20191112012724.250792-4-dima-nzgTgzXrdUbQT0dZR+AlfA@public.gmane.org>
2020-01-27 14:12     ` Dmitry Vyukov
     [not found]       ` <CACT4Y+b70bRRS2XD3yxhBoy4E-LFy_K3wMrjeuPmiEvaPe_c2Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2020-01-27 14:19         ` Dmitry Safonov
2020-02-17 14:20   ` Time Namespaces: CLONE_NEWTIME and clone3()? Michael Kerrisk
2020-02-17 14:59     ` Christian Brauner
2020-02-17 21:47       ` Michael Kerrisk (man-pages)
2020-02-17 23:03         ` Christian Brauner
2020-02-17 23:29           ` Thomas Gleixner
2020-02-18  2:37             ` Eric W. Biederman
2020-02-18 17:11           ` Adrian Reber
2020-02-18 17:26             ` Christian Brauner
2019-11-12  1:26 ` [PATCHv8 04/34] time: Add timens_offsets to be used for tasks in timens Dmitry Safonov
2019-11-12  1:26 ` [PATCHv8 05/34] posix-clocks: Rename the clock_get() callback to clock_get_timespec() Dmitry Safonov
2019-11-12  1:26 ` [PATCHv8 06/34] posix-clocks: Rename .clock_get_timespec() callbacks accordingly Dmitry Safonov
2019-11-12  1:26 ` [PATCHv8 07/34] alarmtimer: Rename gettime() callback to get_ktime() Dmitry Safonov
2019-11-12  1:26 ` [PATCHv8 08/34] alarmtimer: Provide get_timespec() callback Dmitry Safonov
2019-11-12  1:26 ` [PATCHv8 09/34] posix-clocks: Introduce clock_get_ktime() callback Dmitry Safonov
2019-11-12  1:26 ` [PATCHv8 10/34] posix-timers: Use clock_get_ktime() in common_timer_get() Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 11/34] posix-clocks: Wire up clock_gettime() with timens offsets Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 12/34] kernel: Add do_timens_ktime_to_host() helper Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 13/34] timerfd: Make timerfd_settime() time namespace aware Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 14/34] posix-timers: Make timer_settime() " Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 15/34] alarmtimer: Make nanosleep " Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 16/34] hrtimers: Prepare hrtimer_nanosleep() for time namespaces Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 17/34] posix-timers: Make clock_nanosleep() time namespace aware Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 18/34] fs/proc: Respect boottime inside time namespace for /proc/uptime Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 19/34] x86/vdso: Restrict splitting VVAR VMA Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 20/34] lib/vdso: Prepare for time namespace support Dmitry Safonov
     [not found]   ` <20191112012724.250792-21-dima-nzgTgzXrdUbQT0dZR+AlfA@public.gmane.org>
2020-01-12 10:32     ` Thomas Gleixner
2019-11-12  1:27 ` [PATCHv8 21/34] x86/vdso: Provide vdso_data offset on vvar_page Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 22/34] x86/vdso: Add timens page Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 23/34] time: Allocate per-timens vvar page Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 24/34] x86/vdso: Handle faults on timens page Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 25/34] x86/vdso: On timens page fault prefault also VVAR page Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 26/34] x86/vdso: Zap vvar pages on switch a time namspace Dmitry Safonov
2019-11-12  1:27 ` Dmitry Safonov [this message]
2019-11-12  1:27 ` [PATCHv8 28/34] selftests/timens: Add Time Namespace test for supported clocks Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 29/34] selftests/timens: Add a test for timerfd Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 30/34] selftests/timens: Add a test for clock_nanosleep() Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 31/34] selftests/timens: Add procfs selftest Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 32/34] selftests/timens: Add timer offsets test Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 33/34] selftests/timens: Add a simple perf test for clock_gettime() Dmitry Safonov
2019-11-12  1:27 ` [PATCHv8 34/34] selftests/timens: Check for right timens offsets after fork and exec Dmitry Safonov
2019-11-21 18:05 ` [PATCHv8 00/34] kernel: Introduce Time Namespace Andrei Vagin
2019-12-11 20:38   ` Dmitry Safonov
2020-01-09 21:09     ` Thomas Gleixner
2020-01-10  9:52       ` Vincenzo Frascino

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191112012724.250792-28-dima@arista.com \
    --to=dima@arista.com \
    --cc=0x7f454c46@gmail.com \
    --cc=adrian@lisas.de \
    --cc=arnd@arndb.de \
    --cc=avagin@gmail.com \
    --cc=avagin@openvz.org \
    --cc=christian.brauner@ubuntu.com \
    --cc=containers@lists.linux-foundation.org \
    --cc=criu@openvz.org \
    --cc=ebiederm@xmission.com \
    --cc=gorcunov@openvz.org \
    --cc=hpa@zytor.com \
    --cc=jannh@google.com \
    --cc=jdike@addtoit.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=oleg@redhat.com \
    --cc=shuah@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=vincenzo.frascino@arm.com \
    --cc=xemul@virtuozzo.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-api Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-api/0 linux-api/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-api linux-api/ https://lore.kernel.org/linux-api \
		linux-api@vger.kernel.org
	public-inbox-index linux-api

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-api


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git