linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/2] Expose task pid_ns_for_children to userspace
@ 2017-04-12 15:33 Kirill Tkhai
  2017-04-12 15:33 ` [PATCH v2 1/2] ns: Allow ns_entries to have custom symlink content Kirill Tkhai
  2017-04-12 15:34 ` [PATCH v2 2/2] pidns: Expose task pid_ns_for_children to userspace Kirill Tkhai
  0 siblings, 2 replies; 3+ messages in thread
From: Kirill Tkhai @ 2017-04-12 15:33 UTC (permalink / raw)
  To: serge, ebiederm, agruenba, linux-api, oleg, linux-kernel, paul,
	ktkhai, viro, avagin, linux-fsdevel, mtk.manpages, akpm, luto,
	gorcunov, mingo, keescook

pid_ns_for_children set by a task is known only to the task itself,
and it's impossible to identify it from outside.

It's a big problem for checkpoint/restore software like CRIU,
because it can't correctly handle tasks, that do setns(CLONE_NEWPID)
in proccess of their work. If they have a custom pid_ns_for_children
before dump, they must have the same ns after restore. Otherwise,
restored task bumped into enviroment it does not expect.

This patchset solves the problem. It exposes pid_ns_for_children
to ns directory in standard way with the name "pid_for_children":

~# ls /proc/5531/ns -l | grep pid
lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid -> pid:[4026531836]
lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid_for_children -> pid:[4026532286]

v2: Do not allow to take a pid namespace, if there is no child reaper
created. This prevents race between creation of the child reaper and
other tasks.
---

Kirill Tkhai (2):
      ns: Allow ns_entries to have custom symlink content
      pidns: Expose task pid_ns_for_children to userspace


 fs/nsfs.c               |    4 +++-
 fs/proc/namespaces.c    |    1 +
 include/linux/proc_ns.h |    2 ++
 kernel/pid_namespace.c  |   34 ++++++++++++++++++++++++++++++++++
 4 files changed, 40 insertions(+), 1 deletion(-)

--
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH v2 1/2] ns: Allow ns_entries to have custom symlink content
  2017-04-12 15:33 [PATCH v2 0/2] Expose task pid_ns_for_children to userspace Kirill Tkhai
@ 2017-04-12 15:33 ` Kirill Tkhai
  2017-04-12 15:34 ` [PATCH v2 2/2] pidns: Expose task pid_ns_for_children to userspace Kirill Tkhai
  1 sibling, 0 replies; 3+ messages in thread
From: Kirill Tkhai @ 2017-04-12 15:33 UTC (permalink / raw)
  To: serge, ebiederm, agruenba, linux-api, oleg, linux-kernel, paul,
	ktkhai, viro, avagin, linux-fsdevel, mtk.manpages, akpm, luto,
	gorcunov, mingo, keescook

Patch series "Expose task pid_ns_for_children to userspace".

pid_ns_for_children set by a task is known only to the task itself, and
it's impossible to identify it from outside.

It's a big problem for checkpoint/restore software like CRIU, because it
can't correctly handle tasks, that do setns(CLONE_NEWPID) in proccess of
their work.  If they have a custom pid_ns_for_children before dump, they
must have the same ns after restore.  Otherwise, restored task bumped into
enviroment it does not expect.

This patchset solves the problem.  It exposes pid_ns_for_children to ns
directory in standard way with the name "pid_for_children":

~# ls /proc/5531/ns -l | grep pid
lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid -> pid:[4026531836]
lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid_for_children -> pid:[4026532286]

This patch (of 2):

Make possible to have link content prefix yyy
different from the link name xxx:

$ readlink /proc/[pid]/ns/xxx
yyy:[4026531838]

This will be used in next patch.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Reviewed-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
---
 fs/nsfs.c               |    4 +++-
 include/linux/proc_ns.h |    1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/nsfs.c b/fs/nsfs.c
index 1656843e87d2..495f12b83a7b 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -195,9 +195,11 @@ int ns_get_name(char *buf, size_t size, struct task_struct *task,
 {
 	struct ns_common *ns;
 	int res = -ENOENT;
+	const char *name;
 	ns = ns_ops->get(task);
 	if (ns) {
-		res = snprintf(buf, size, "%s:[%u]", ns_ops->name, ns->inum);
+		name = ns_ops->real_ns_name ? : ns_ops->name;
+		res = snprintf(buf, size, "%s:[%u]", name, ns->inum);
 		ns_ops->put(ns);
 	}
 	return res;
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 12cb8bd81d2d..88dba3b53375 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -14,6 +14,7 @@ struct inode;
 
 struct proc_ns_operations {
 	const char *name;
+	const char *real_ns_name;
 	int type;
 	struct ns_common *(*get)(struct task_struct *task);
 	void (*put)(struct ns_common *ns);

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH v2 2/2] pidns: Expose task pid_ns_for_children to userspace
  2017-04-12 15:33 [PATCH v2 0/2] Expose task pid_ns_for_children to userspace Kirill Tkhai
  2017-04-12 15:33 ` [PATCH v2 1/2] ns: Allow ns_entries to have custom symlink content Kirill Tkhai
@ 2017-04-12 15:34 ` Kirill Tkhai
  1 sibling, 0 replies; 3+ messages in thread
From: Kirill Tkhai @ 2017-04-12 15:34 UTC (permalink / raw)
  To: serge, ebiederm, agruenba, linux-api, oleg, linux-kernel, paul,
	ktkhai, viro, avagin, linux-fsdevel, mtk.manpages, akpm, luto,
	gorcunov, mingo, keescook

pid_ns_for_children set by a task is known only to the task itself,
and it's impossible to identify it from outside.

It's a big problem for checkpoint/restore software like CRIU,
because it can't correctly handle tasks, that do setns(CLONE_NEWPID)
in proccess of their work.

This patch solves the problem, and it exposes pid_ns_for_children
to ns directory in standard way with the name "pid_for_children":

~# ls /proc/5531/ns -l | grep pid
lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid -> pid:[4026531836]
lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid_for_children -> pid:[4026532286]

v2: Do not allow to get namespace if there is no child reaper created,
as other tasks need initializations it did (e.g., pid_namespace::proc_mnt),
and we don't want they race.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
 fs/proc/namespaces.c    |    1 +
 include/linux/proc_ns.h |    1 +
 kernel/pid_namespace.c  |   34 ++++++++++++++++++++++++++++++++++
 3 files changed, 36 insertions(+)

diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 766f0c637ad1..3803b24ca220 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -23,6 +23,7 @@ static const struct proc_ns_operations *ns_entries[] = {
 #endif
 #ifdef CONFIG_PID_NS
 	&pidns_operations,
+	&pidns_for_children_operations,
 #endif
 #ifdef CONFIG_USER_NS
 	&userns_operations,
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 88dba3b53375..58ab28d81fc2 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -27,6 +27,7 @@ extern const struct proc_ns_operations netns_operations;
 extern const struct proc_ns_operations utsns_operations;
 extern const struct proc_ns_operations ipcns_operations;
 extern const struct proc_ns_operations pidns_operations;
+extern const struct proc_ns_operations pidns_for_children_operations;
 extern const struct proc_ns_operations userns_operations;
 extern const struct proc_ns_operations mntns_operations;
 extern const struct proc_ns_operations cgroupns_operations;
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index de461aa0bf9a..d1f3e9f558b8 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -374,6 +374,29 @@ static struct ns_common *pidns_get(struct task_struct *task)
 	return ns ? &ns->ns : NULL;
 }
 
+static struct ns_common *pidns_for_children_get(struct task_struct *task)
+{
+	struct pid_namespace *ns = NULL;
+
+	task_lock(task);
+	if (task->nsproxy) {
+		ns = task->nsproxy->pid_ns_for_children;
+		get_pid_ns(ns);
+	}
+	task_unlock(task);
+
+	if (ns) {
+		read_lock(&tasklist_lock);
+		if (!ns->child_reaper) {
+			put_pid_ns(ns);
+			ns = NULL;
+		}
+		read_unlock(&tasklist_lock);
+	}
+
+	return ns ? &ns->ns : NULL;
+}
+
 static void pidns_put(struct ns_common *ns)
 {
 	put_pid_ns(to_pid_ns(ns));
@@ -443,6 +466,17 @@ const struct proc_ns_operations pidns_operations = {
 	.get_parent	= pidns_get_parent,
 };
 
+const struct proc_ns_operations pidns_for_children_operations = {
+	.name		= "pid_for_children",
+	.real_ns_name	= "pid",
+	.type		= CLONE_NEWPID,
+	.get		= pidns_for_children_get,
+	.put		= pidns_put,
+	.install	= pidns_install,
+	.owner		= pidns_owner,
+	.get_parent	= pidns_get_parent,
+};
+
 static __init int pid_namespaces_init(void)
 {
 	pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2017-04-12 15:34 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-12 15:33 [PATCH v2 0/2] Expose task pid_ns_for_children to userspace Kirill Tkhai
2017-04-12 15:33 ` [PATCH v2 1/2] ns: Allow ns_entries to have custom symlink content Kirill Tkhai
2017-04-12 15:34 ` [PATCH v2 2/2] pidns: Expose task pid_ns_for_children to userspace Kirill Tkhai

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).