* [PATCH 1/4] kernel: add a helper to get an owning user namespace for a namespace
2016-09-06 7:47 [PATCH 0/4 v3] Add an interface to discover relationships between namespaces Andrei Vagin
@ 2016-09-06 7:47 ` Andrei Vagin
2016-09-06 7:47 ` [PATCH 2/4] nsfs: add ioctl to get an owning user namespace for ns file descriptor Andrei Vagin
` (3 subsequent siblings)
4 siblings, 0 replies; 9+ messages in thread
From: Andrei Vagin @ 2016-09-06 7:47 UTC (permalink / raw)
To: Eric W. Biederman, containers
Cc: linux-api, linux-kernel, linux-fsdevel, Andrey Vagin,
James Bottomley, Michael Kerrisk (man-pages),
W. Trevor King, Alexander Viro, Serge Hallyn
From: Andrey Vagin <avagin@openvz.org>
Return -EPERM if an owning user namespace is outside of a process
current user namespace.
v2: In a first version ns_get_owner returned ENOENT for init_user_ns.
This special cases was removed from this version. There is nothing
outside of init_user_ns, so we can return EPERM.
v3: rename ns->get_owner() to ns->owner(). get_* usually means that it
grabs a reference.
Acked-by: Serge Hallyn <serge@hallyn.com>
Signed-off-by: Andrei Vagin <avagin@openvz.org>
---
fs/namespace.c | 6 ++++++
include/linux/proc_ns.h | 1 +
include/linux/user_namespace.h | 7 +++++++
ipc/namespace.c | 6 ++++++
kernel/cgroup.c | 6 ++++++
kernel/pid_namespace.c | 6 ++++++
kernel/user_namespace.c | 24 ++++++++++++++++++++++++
kernel/utsname.c | 6 ++++++
net/core/net_namespace.c | 6 ++++++
9 files changed, 68 insertions(+)
diff --git a/fs/namespace.c b/fs/namespace.c
index 491b8f3..dd27ce4 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3368,10 +3368,16 @@ static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
return 0;
}
+static struct user_namespace *mntns_owner(struct ns_common *ns)
+{
+ return to_mnt_ns(ns)->user_ns;
+}
+
const struct proc_ns_operations mntns_operations = {
.name = "mnt",
.type = CLONE_NEWNS,
.get = mntns_get,
.put = mntns_put,
.install = mntns_install,
+ .owner = mntns_owner,
};
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index de0e771..ca85a43 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -18,6 +18,7 @@ struct proc_ns_operations {
struct ns_common *(*get)(struct task_struct *task);
void (*put)(struct ns_common *ns);
int (*install)(struct nsproxy *nsproxy, struct ns_common *ns);
+ struct user_namespace *(*owner)(struct ns_common *ns);
};
extern const struct proc_ns_operations netns_operations;
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 30ffe10..eb209d4 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -106,6 +106,8 @@ extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t,
extern int proc_setgroups_show(struct seq_file *m, void *v);
extern bool userns_may_setgroups(const struct user_namespace *ns);
extern bool current_in_userns(const struct user_namespace *target_ns);
+
+struct ns_common *ns_get_owner(struct ns_common *ns);
#else
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
@@ -139,6 +141,11 @@ static inline bool current_in_userns(const struct user_namespace *target_ns)
{
return true;
}
+
+static inline struct ns_common *ns_get_owner(struct ns_common *ns)
+{
+ return ERR_PTR(-EPERM);
+}
#endif
#endif /* _LINUX_USER_H */
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 7309142..465c981 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -188,10 +188,16 @@ static int ipcns_install(struct nsproxy *nsproxy, struct ns_common *new)
return 0;
}
+static struct user_namespace *ipcns_owner(struct ns_common *ns)
+{
+ return to_ipc_ns(ns)->user_ns;
+}
+
const struct proc_ns_operations ipcns_operations = {
.name = "ipc",
.type = CLONE_NEWIPC,
.get = ipcns_get,
.put = ipcns_put,
.install = ipcns_install,
+ .owner = ipcns_owner,
};
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e9e4427..2665b58 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -6421,12 +6421,18 @@ static void cgroupns_put(struct ns_common *ns)
put_cgroup_ns(to_cg_ns(ns));
}
+static struct user_namespace *cgroupns_owner(struct ns_common *ns)
+{
+ return to_cg_ns(ns)->user_ns;
+}
+
const struct proc_ns_operations cgroupns_operations = {
.name = "cgroup",
.type = CLONE_NEWCGROUP,
.get = cgroupns_get,
.put = cgroupns_put,
.install = cgroupns_install,
+ .owner = cgroupns_owner,
};
static __init int cgroup_namespaces_init(void)
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 30a7f33..c18f0f4f 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -405,12 +405,18 @@ static int pidns_install(struct nsproxy *nsproxy, struct ns_common *ns)
return 0;
}
+static struct user_namespace *pidns_owner(struct ns_common *ns)
+{
+ return to_pid_ns(ns)->user_ns;
+}
+
const struct proc_ns_operations pidns_operations = {
.name = "pid",
.type = CLONE_NEWPID,
.get = pidns_get,
.put = pidns_put,
.install = pidns_install,
+ .owner = pidns_owner,
};
static __init int pid_namespaces_init(void)
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 0edafe3..42a64d5 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -1050,12 +1050,36 @@ static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
return commit_creds(cred);
}
+struct ns_common *ns_get_owner(struct ns_common *ns)
+{
+ struct user_namespace *my_user_ns = current_user_ns();
+ struct user_namespace *owner, *p;
+
+ /* See if the owner is in the current user namespace */
+ owner = p = ns->ops->owner(ns);
+ for (;;) {
+ if (!p)
+ return ERR_PTR(-EPERM);
+ if (p == my_user_ns)
+ break;
+ p = p->parent;
+ }
+
+ return &get_user_ns(owner)->ns;
+}
+
+static struct user_namespace *userns_owner(struct ns_common *ns)
+{
+ return to_user_ns(ns)->parent;
+}
+
const struct proc_ns_operations userns_operations = {
.name = "user",
.type = CLONE_NEWUSER,
.get = userns_get,
.put = userns_put,
.install = userns_install,
+ .owner = userns_owner,
};
static __init int user_namespaces_init(void)
diff --git a/kernel/utsname.c b/kernel/utsname.c
index f3b0bb4..0795e97 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -154,10 +154,16 @@ static int utsns_install(struct nsproxy *nsproxy, struct ns_common *new)
return 0;
}
+static struct user_namespace *utsns_owner(struct ns_common *ns)
+{
+ return to_uts_ns(ns)->user_ns;
+}
+
const struct proc_ns_operations utsns_operations = {
.name = "uts",
.type = CLONE_NEWUTS,
.get = utsns_get,
.put = utsns_put,
.install = utsns_install,
+ .owner = utsns_owner,
};
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 3e2812a..8619e89 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -1016,11 +1016,17 @@ static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns)
return 0;
}
+static struct user_namespace *netns_owner(struct ns_common *ns)
+{
+ return to_net_ns(ns)->user_ns;
+}
+
const struct proc_ns_operations netns_operations = {
.name = "net",
.type = CLONE_NEWNET,
.get = netns_get,
.put = netns_put,
.install = netns_install,
+ .owner = netns_owner,
};
#endif
--
2.5.5
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 2/4] nsfs: add ioctl to get an owning user namespace for ns file descriptor
2016-09-06 7:47 [PATCH 0/4 v3] Add an interface to discover relationships between namespaces Andrei Vagin
2016-09-06 7:47 ` [PATCH 1/4] kernel: add a helper to get an owning user namespace for a namespace Andrei Vagin
@ 2016-09-06 7:47 ` Andrei Vagin
2016-09-06 15:54 ` Serge E. Hallyn
2016-09-06 7:47 ` [PATCH 3/4] nsfs: add ioctl to get a parent namespace Andrei Vagin
` (2 subsequent siblings)
4 siblings, 1 reply; 9+ messages in thread
From: Andrei Vagin @ 2016-09-06 7:47 UTC (permalink / raw)
To: Eric W. Biederman, containers
Cc: linux-api, linux-kernel, linux-fsdevel, Andrey Vagin,
James Bottomley, Michael Kerrisk (man-pages),
W. Trevor King, Alexander Viro, Serge Hallyn
From: Andrey Vagin <avagin@openvz.org>
Each namespace has an owning user namespace and now there is not way
to discover these relationships.
Understending namespaces relationships allows to answer the question:
what capability does process X have to perform operations on a resource
governed by namespace Y?
After a long discussion, Eric W. Biederman proposed to use ioctl-s for
this purpose.
The NS_GET_USERNS ioctl returns a file descriptor to an owning user
namespace.
It returns EPERM if a target namespace is outside of a current user
namespace.
v2: rename parent to relative
Link: https://lkml.org/lkml/2016/7/6/158
Signed-off-by: Andrei Vagin <avagin@openvz.org>
---
fs/nsfs.c | 95 ++++++++++++++++++++++++++++++++++++++++-------
include/uapi/linux/nsfs.h | 11 ++++++
2 files changed, 93 insertions(+), 13 deletions(-)
create mode 100644 include/uapi/linux/nsfs.h
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 8f20d60..be7d193 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -5,11 +5,16 @@
#include <linux/magic.h>
#include <linux/ktime.h>
#include <linux/seq_file.h>
+#include <linux/user_namespace.h>
+#include <linux/nsfs.h>
static struct vfsmount *nsfs_mnt;
+static long ns_ioctl(struct file *filp, unsigned int ioctl,
+ unsigned long arg);
static const struct file_operations ns_file_operations = {
.llseek = no_llseek,
+ .unlocked_ioctl = ns_ioctl,
};
static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
@@ -44,22 +49,14 @@ static void nsfs_evict(struct inode *inode)
ns->ops->put(ns);
}
-void *ns_get_path(struct path *path, struct task_struct *task,
- const struct proc_ns_operations *ns_ops)
+static void *__ns_get_path(struct path *path, struct ns_common *ns)
{
struct vfsmount *mnt = mntget(nsfs_mnt);
struct qstr qname = { .name = "", };
struct dentry *dentry;
struct inode *inode;
- struct ns_common *ns;
unsigned long d;
-again:
- ns = ns_ops->get(task);
- if (!ns) {
- mntput(mnt);
- return ERR_PTR(-ENOENT);
- }
rcu_read_lock();
d = atomic_long_read(&ns->stashed);
if (!d)
@@ -68,7 +65,7 @@ again:
if (!lockref_get_not_dead(&dentry->d_lockref))
goto slow;
rcu_read_unlock();
- ns_ops->put(ns);
+ ns->ops->put(ns);
got_it:
path->mnt = mnt;
path->dentry = dentry;
@@ -77,7 +74,7 @@ slow:
rcu_read_unlock();
inode = new_inode_pseudo(mnt->mnt_sb);
if (!inode) {
- ns_ops->put(ns);
+ ns->ops->put(ns);
mntput(mnt);
return ERR_PTR(-ENOMEM);
}
@@ -95,17 +92,89 @@ slow:
return ERR_PTR(-ENOMEM);
}
d_instantiate(dentry, inode);
- dentry->d_fsdata = (void *)ns_ops;
+ dentry->d_fsdata = (void *)ns->ops;
d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry);
if (d) {
d_delete(dentry); /* make sure ->d_prune() does nothing */
dput(dentry);
cpu_relax();
- goto again;
+ return ERR_PTR(-EAGAIN);
}
goto got_it;
}
+void *ns_get_path(struct path *path, struct task_struct *task,
+ const struct proc_ns_operations *ns_ops)
+{
+ struct ns_common *ns;
+ void *ret;
+
+again:
+ ns = ns_ops->get(task);
+ if (!ns)
+ return ERR_PTR(-ENOENT);
+
+ ret = __ns_get_path(path, ns);
+ if (IS_ERR(ret) && PTR_ERR(ret) == -EAGAIN)
+ goto again;
+ return ret;
+}
+
+static int open_related_ns(struct ns_common *ns,
+ struct ns_common *(*get_ns)(struct ns_common *ns))
+{
+ struct path path = {};
+ struct file *f;
+ void *err;
+ int fd;
+
+ fd = get_unused_fd_flags(O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ while (1) {
+ struct ns_common *relative;
+
+ relative = get_ns(ns);
+ if (IS_ERR(relative)) {
+ put_unused_fd(fd);
+ return PTR_ERR(relative);
+ }
+
+ err = __ns_get_path(&path, relative);
+ if (IS_ERR(err) && PTR_ERR(err) == -EAGAIN)
+ continue;
+ break;
+ }
+ if (IS_ERR(err)) {
+ put_unused_fd(fd);
+ return PTR_ERR(err);
+ }
+
+ f = dentry_open(&path, O_RDONLY, current_cred());
+ path_put(&path);
+ if (IS_ERR(f)) {
+ put_unused_fd(fd);
+ fd = PTR_ERR(f);
+ } else
+ fd_install(fd, f);
+
+ return fd;
+}
+
+static long ns_ioctl(struct file *filp, unsigned int ioctl,
+ unsigned long arg)
+{
+ struct ns_common *ns = get_proc_ns(file_inode(filp));
+
+ switch (ioctl) {
+ case NS_GET_USERNS:
+ return open_related_ns(ns, ns_get_owner);
+ default:
+ return -ENOTTY;
+ }
+}
+
int ns_get_name(char *buf, size_t size, struct task_struct *task,
const struct proc_ns_operations *ns_ops)
{
diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
new file mode 100644
index 0000000..5cacd5c
--- /dev/null
+++ b/include/uapi/linux/nsfs.h
@@ -0,0 +1,11 @@
+#ifndef __LINUX_NSFS_H
+#define __LINUX_NSFS_H
+
+#include <linux/ioctl.h>
+
+#define NSIO 0xb7
+
+/* Returns a file descriptor that refers to an owning user namespace */
+#define NS_GET_USERNS _IO(NSIO, 0x1)
+
+#endif /* __LINUX_NSFS_H */
--
2.5.5
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH 2/4] nsfs: add ioctl to get an owning user namespace for ns file descriptor
2016-09-06 7:47 ` [PATCH 2/4] nsfs: add ioctl to get an owning user namespace for ns file descriptor Andrei Vagin
@ 2016-09-06 15:54 ` Serge E. Hallyn
0 siblings, 0 replies; 9+ messages in thread
From: Serge E. Hallyn @ 2016-09-06 15:54 UTC (permalink / raw)
To: Andrei Vagin
Cc: Eric W. Biederman, containers, Serge Hallyn, linux-api,
linux-kernel, James Bottomley, Michael Kerrisk (man-pages),
linux-fsdevel, Alexander Viro
Quoting Andrei Vagin (avagin@openvz.org):
> From: Andrey Vagin <avagin@openvz.org>
>
> Each namespace has an owning user namespace and now there is not way
> to discover these relationships.
>
> Understending namespaces relationships allows to answer the question:
> what capability does process X have to perform operations on a resource
> governed by namespace Y?
>
> After a long discussion, Eric W. Biederman proposed to use ioctl-s for
> this purpose.
>
> The NS_GET_USERNS ioctl returns a file descriptor to an owning user
> namespace.
> It returns EPERM if a target namespace is outside of a current user
> namespace.
>
> v2: rename parent to relative
>
> Link: https://lkml.org/lkml/2016/7/6/158
> Signed-off-by: Andrei Vagin <avagin@openvz.org>
Acked-by: Serge Hallyn <serge@hallyn.com>
> ---
> fs/nsfs.c | 95 ++++++++++++++++++++++++++++++++++++++++-------
> include/uapi/linux/nsfs.h | 11 ++++++
> 2 files changed, 93 insertions(+), 13 deletions(-)
> create mode 100644 include/uapi/linux/nsfs.h
>
> diff --git a/fs/nsfs.c b/fs/nsfs.c
> index 8f20d60..be7d193 100644
> --- a/fs/nsfs.c
> +++ b/fs/nsfs.c
> @@ -5,11 +5,16 @@
> #include <linux/magic.h>
> #include <linux/ktime.h>
> #include <linux/seq_file.h>
> +#include <linux/user_namespace.h>
> +#include <linux/nsfs.h>
>
> static struct vfsmount *nsfs_mnt;
>
> +static long ns_ioctl(struct file *filp, unsigned int ioctl,
> + unsigned long arg);
> static const struct file_operations ns_file_operations = {
> .llseek = no_llseek,
> + .unlocked_ioctl = ns_ioctl,
> };
>
> static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
> @@ -44,22 +49,14 @@ static void nsfs_evict(struct inode *inode)
> ns->ops->put(ns);
> }
>
> -void *ns_get_path(struct path *path, struct task_struct *task,
> - const struct proc_ns_operations *ns_ops)
> +static void *__ns_get_path(struct path *path, struct ns_common *ns)
> {
> struct vfsmount *mnt = mntget(nsfs_mnt);
> struct qstr qname = { .name = "", };
> struct dentry *dentry;
> struct inode *inode;
> - struct ns_common *ns;
> unsigned long d;
>
> -again:
> - ns = ns_ops->get(task);
> - if (!ns) {
> - mntput(mnt);
> - return ERR_PTR(-ENOENT);
> - }
> rcu_read_lock();
> d = atomic_long_read(&ns->stashed);
> if (!d)
> @@ -68,7 +65,7 @@ again:
> if (!lockref_get_not_dead(&dentry->d_lockref))
> goto slow;
> rcu_read_unlock();
> - ns_ops->put(ns);
> + ns->ops->put(ns);
> got_it:
> path->mnt = mnt;
> path->dentry = dentry;
> @@ -77,7 +74,7 @@ slow:
> rcu_read_unlock();
> inode = new_inode_pseudo(mnt->mnt_sb);
> if (!inode) {
> - ns_ops->put(ns);
> + ns->ops->put(ns);
> mntput(mnt);
> return ERR_PTR(-ENOMEM);
> }
> @@ -95,17 +92,89 @@ slow:
> return ERR_PTR(-ENOMEM);
> }
> d_instantiate(dentry, inode);
> - dentry->d_fsdata = (void *)ns_ops;
> + dentry->d_fsdata = (void *)ns->ops;
> d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry);
> if (d) {
> d_delete(dentry); /* make sure ->d_prune() does nothing */
> dput(dentry);
> cpu_relax();
> - goto again;
> + return ERR_PTR(-EAGAIN);
> }
> goto got_it;
> }
>
> +void *ns_get_path(struct path *path, struct task_struct *task,
> + const struct proc_ns_operations *ns_ops)
> +{
> + struct ns_common *ns;
> + void *ret;
> +
> +again:
> + ns = ns_ops->get(task);
> + if (!ns)
> + return ERR_PTR(-ENOENT);
> +
> + ret = __ns_get_path(path, ns);
> + if (IS_ERR(ret) && PTR_ERR(ret) == -EAGAIN)
> + goto again;
> + return ret;
> +}
> +
> +static int open_related_ns(struct ns_common *ns,
> + struct ns_common *(*get_ns)(struct ns_common *ns))
> +{
> + struct path path = {};
> + struct file *f;
> + void *err;
> + int fd;
> +
> + fd = get_unused_fd_flags(O_CLOEXEC);
> + if (fd < 0)
> + return fd;
> +
> + while (1) {
> + struct ns_common *relative;
> +
> + relative = get_ns(ns);
> + if (IS_ERR(relative)) {
> + put_unused_fd(fd);
> + return PTR_ERR(relative);
> + }
> +
> + err = __ns_get_path(&path, relative);
> + if (IS_ERR(err) && PTR_ERR(err) == -EAGAIN)
> + continue;
> + break;
> + }
> + if (IS_ERR(err)) {
> + put_unused_fd(fd);
> + return PTR_ERR(err);
> + }
> +
> + f = dentry_open(&path, O_RDONLY, current_cred());
> + path_put(&path);
> + if (IS_ERR(f)) {
> + put_unused_fd(fd);
> + fd = PTR_ERR(f);
> + } else
> + fd_install(fd, f);
> +
> + return fd;
> +}
> +
> +static long ns_ioctl(struct file *filp, unsigned int ioctl,
> + unsigned long arg)
> +{
> + struct ns_common *ns = get_proc_ns(file_inode(filp));
> +
> + switch (ioctl) {
> + case NS_GET_USERNS:
> + return open_related_ns(ns, ns_get_owner);
> + default:
> + return -ENOTTY;
> + }
> +}
> +
> int ns_get_name(char *buf, size_t size, struct task_struct *task,
> const struct proc_ns_operations *ns_ops)
> {
> diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
> new file mode 100644
> index 0000000..5cacd5c
> --- /dev/null
> +++ b/include/uapi/linux/nsfs.h
> @@ -0,0 +1,11 @@
> +#ifndef __LINUX_NSFS_H
> +#define __LINUX_NSFS_H
> +
> +#include <linux/ioctl.h>
> +
> +#define NSIO 0xb7
> +
> +/* Returns a file descriptor that refers to an owning user namespace */
> +#define NS_GET_USERNS _IO(NSIO, 0x1)
> +
> +#endif /* __LINUX_NSFS_H */
> --
> 2.5.5
>
> _______________________________________________
> Containers mailing list
> Containers@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/containers
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 3/4] nsfs: add ioctl to get a parent namespace
2016-09-06 7:47 [PATCH 0/4 v3] Add an interface to discover relationships between namespaces Andrei Vagin
2016-09-06 7:47 ` [PATCH 1/4] kernel: add a helper to get an owning user namespace for a namespace Andrei Vagin
2016-09-06 7:47 ` [PATCH 2/4] nsfs: add ioctl to get an owning user namespace for ns file descriptor Andrei Vagin
@ 2016-09-06 7:47 ` Andrei Vagin
2016-09-06 15:51 ` Serge E. Hallyn
2016-09-06 7:47 ` [PATCH 4/4] tools/testing: add a test to check nsfs ioctl-s Andrei Vagin
2016-09-23 1:09 ` [PATCH 0/4 v3] Add an interface to discover relationships between namespaces Eric W. Biederman
4 siblings, 1 reply; 9+ messages in thread
From: Andrei Vagin @ 2016-09-06 7:47 UTC (permalink / raw)
To: Eric W. Biederman, containers
Cc: linux-api, linux-kernel, linux-fsdevel, Andrey Vagin,
James Bottomley, Michael Kerrisk (man-pages),
W. Trevor King, Alexander Viro, Serge Hallyn
From: Andrey Vagin <avagin@openvz.org>
Pid and user namepaces are hierarchical. There is no way to discover
parent-child relationships.
In a future we will use this interface to dump and restore nested
namespaces.
Signed-off-by: Andrei Vagin <avagin@openvz.org>
---
fs/nsfs.c | 4 ++++
include/linux/proc_ns.h | 1 +
include/uapi/linux/nsfs.h | 2 ++
kernel/pid_namespace.c | 19 +++++++++++++++++++
kernel/user_namespace.c | 1 +
5 files changed, 27 insertions(+)
diff --git a/fs/nsfs.c b/fs/nsfs.c
index be7d193..11a4b15 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -170,6 +170,10 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
switch (ioctl) {
case NS_GET_USERNS:
return open_related_ns(ns, ns_get_owner);
+ case NS_GET_PARENT:
+ if (!ns->ops->get_parent)
+ return -EINVAL;
+ return open_related_ns(ns, ns->ops->get_parent);
default:
return -ENOTTY;
}
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index ca85a43..12cb8bd 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -19,6 +19,7 @@ struct proc_ns_operations {
void (*put)(struct ns_common *ns);
int (*install)(struct nsproxy *nsproxy, struct ns_common *ns);
struct user_namespace *(*owner)(struct ns_common *ns);
+ struct ns_common *(*get_parent)(struct ns_common *ns);
};
extern const struct proc_ns_operations netns_operations;
diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
index 5cacd5c..3af6172 100644
--- a/include/uapi/linux/nsfs.h
+++ b/include/uapi/linux/nsfs.h
@@ -7,5 +7,7 @@
/* Returns a file descriptor that refers to an owning user namespace */
#define NS_GET_USERNS _IO(NSIO, 0x1)
+/* Returns a file descriptor that refers to a parent namespace */
+#define NS_GET_PARENT _IO(NSIO, 0x2)
#endif /* __LINUX_NSFS_H */
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index c18f0f4f..aad470b 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -405,6 +405,24 @@ static int pidns_install(struct nsproxy *nsproxy, struct ns_common *ns)
return 0;
}
+static struct ns_common *pidns_get_parent(struct ns_common *ns)
+{
+ struct pid_namespace *active = task_active_pid_ns(current);
+ struct pid_namespace *pid_ns, *p;
+
+ /* See if the parent is in the current namespace */
+ pid_ns = p = to_pid_ns(ns)->parent;
+ for (;;) {
+ if (!p)
+ return ERR_PTR(-EPERM);
+ if (p == active)
+ break;
+ p = p->parent;
+ }
+
+ return &get_pid_ns(pid_ns)->ns;
+}
+
static struct user_namespace *pidns_owner(struct ns_common *ns)
{
return to_pid_ns(ns)->user_ns;
@@ -417,6 +435,7 @@ const struct proc_ns_operations pidns_operations = {
.put = pidns_put,
.install = pidns_install,
.owner = pidns_owner,
+ .get_parent = pidns_get_parent,
};
static __init int pid_namespaces_init(void)
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 42a64d5..33b523c 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -1080,6 +1080,7 @@ const struct proc_ns_operations userns_operations = {
.put = userns_put,
.install = userns_install,
.owner = userns_owner,
+ .get_parent = ns_get_owner,
};
static __init int user_namespaces_init(void)
--
2.5.5
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH 3/4] nsfs: add ioctl to get a parent namespace
2016-09-06 7:47 ` [PATCH 3/4] nsfs: add ioctl to get a parent namespace Andrei Vagin
@ 2016-09-06 15:51 ` Serge E. Hallyn
0 siblings, 0 replies; 9+ messages in thread
From: Serge E. Hallyn @ 2016-09-06 15:51 UTC (permalink / raw)
To: Andrei Vagin
Cc: Eric W. Biederman, containers, linux-api, linux-kernel,
linux-fsdevel, James Bottomley, Michael Kerrisk (man-pages),
W. Trevor King, Alexander Viro, Serge Hallyn
Quoting Andrei Vagin (avagin@openvz.org):
> From: Andrey Vagin <avagin@openvz.org>
>
> Pid and user namepaces are hierarchical. There is no way to discover
> parent-child relationships.
>
> In a future we will use this interface to dump and restore nested
> namespaces.
>
> Signed-off-by: Andrei Vagin <avagin@openvz.org>
Acked-by: Serge Hallyn <serge@hallyn.com>
> ---
> fs/nsfs.c | 4 ++++
> include/linux/proc_ns.h | 1 +
> include/uapi/linux/nsfs.h | 2 ++
> kernel/pid_namespace.c | 19 +++++++++++++++++++
> kernel/user_namespace.c | 1 +
> 5 files changed, 27 insertions(+)
>
> diff --git a/fs/nsfs.c b/fs/nsfs.c
> index be7d193..11a4b15 100644
> --- a/fs/nsfs.c
> +++ b/fs/nsfs.c
> @@ -170,6 +170,10 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
> switch (ioctl) {
> case NS_GET_USERNS:
> return open_related_ns(ns, ns_get_owner);
> + case NS_GET_PARENT:
> + if (!ns->ops->get_parent)
> + return -EINVAL;
> + return open_related_ns(ns, ns->ops->get_parent);
> default:
> return -ENOTTY;
> }
> diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
> index ca85a43..12cb8bd 100644
> --- a/include/linux/proc_ns.h
> +++ b/include/linux/proc_ns.h
> @@ -19,6 +19,7 @@ struct proc_ns_operations {
> void (*put)(struct ns_common *ns);
> int (*install)(struct nsproxy *nsproxy, struct ns_common *ns);
> struct user_namespace *(*owner)(struct ns_common *ns);
> + struct ns_common *(*get_parent)(struct ns_common *ns);
> };
>
> extern const struct proc_ns_operations netns_operations;
> diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
> index 5cacd5c..3af6172 100644
> --- a/include/uapi/linux/nsfs.h
> +++ b/include/uapi/linux/nsfs.h
> @@ -7,5 +7,7 @@
>
> /* Returns a file descriptor that refers to an owning user namespace */
> #define NS_GET_USERNS _IO(NSIO, 0x1)
> +/* Returns a file descriptor that refers to a parent namespace */
> +#define NS_GET_PARENT _IO(NSIO, 0x2)
>
> #endif /* __LINUX_NSFS_H */
> diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
> index c18f0f4f..aad470b 100644
> --- a/kernel/pid_namespace.c
> +++ b/kernel/pid_namespace.c
> @@ -405,6 +405,24 @@ static int pidns_install(struct nsproxy *nsproxy, struct ns_common *ns)
> return 0;
> }
>
> +static struct ns_common *pidns_get_parent(struct ns_common *ns)
> +{
> + struct pid_namespace *active = task_active_pid_ns(current);
> + struct pid_namespace *pid_ns, *p;
> +
> + /* See if the parent is in the current namespace */
> + pid_ns = p = to_pid_ns(ns)->parent;
> + for (;;) {
> + if (!p)
> + return ERR_PTR(-EPERM);
> + if (p == active)
> + break;
> + p = p->parent;
> + }
> +
> + return &get_pid_ns(pid_ns)->ns;
> +}
> +
> static struct user_namespace *pidns_owner(struct ns_common *ns)
> {
> return to_pid_ns(ns)->user_ns;
> @@ -417,6 +435,7 @@ const struct proc_ns_operations pidns_operations = {
> .put = pidns_put,
> .install = pidns_install,
> .owner = pidns_owner,
> + .get_parent = pidns_get_parent,
> };
>
> static __init int pid_namespaces_init(void)
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 42a64d5..33b523c 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -1080,6 +1080,7 @@ const struct proc_ns_operations userns_operations = {
> .put = userns_put,
> .install = userns_install,
> .owner = userns_owner,
> + .get_parent = ns_get_owner,
> };
>
> static __init int user_namespaces_init(void)
> --
> 2.5.5
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 4/4] tools/testing: add a test to check nsfs ioctl-s
2016-09-06 7:47 [PATCH 0/4 v3] Add an interface to discover relationships between namespaces Andrei Vagin
` (2 preceding siblings ...)
2016-09-06 7:47 ` [PATCH 3/4] nsfs: add ioctl to get a parent namespace Andrei Vagin
@ 2016-09-06 7:47 ` Andrei Vagin
2016-09-23 1:09 ` [PATCH 0/4 v3] Add an interface to discover relationships between namespaces Eric W. Biederman
4 siblings, 0 replies; 9+ messages in thread
From: Andrei Vagin @ 2016-09-06 7:47 UTC (permalink / raw)
To: Eric W. Biederman, containers
Cc: linux-api, linux-kernel, linux-fsdevel, Andrey Vagin,
James Bottomley, Michael Kerrisk (man-pages),
W. Trevor King, Alexander Viro, Serge Hallyn
From: Andrey Vagin <avagin@openvz.org>
There are two new ioctl-s:
One ioctl for the user namespace that owns a file descriptor.
One ioctl for the parent namespace of a namespace file descriptor.
The test checks that these ioctl-s works and that they handle a case
when a target namespace is outside of the current process namespace.
Signed-off-by: Andrei Vagin <avagin@openvz.org>
---
tools/testing/selftests/Makefile | 1 +
tools/testing/selftests/nsfs/Makefile | 12 +++++
tools/testing/selftests/nsfs/owner.c | 91 +++++++++++++++++++++++++++++++++++
tools/testing/selftests/nsfs/pidns.c | 78 ++++++++++++++++++++++++++++++
4 files changed, 182 insertions(+)
create mode 100644 tools/testing/selftests/nsfs/Makefile
create mode 100644 tools/testing/selftests/nsfs/owner.c
create mode 100644 tools/testing/selftests/nsfs/pidns.c
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index ff9e5f2..f770dba 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -15,6 +15,7 @@ TARGETS += memory-hotplug
TARGETS += mount
TARGETS += mqueue
TARGETS += net
+TARGETS += nsfs
TARGETS += powerpc
TARGETS += pstore
TARGETS += ptrace
diff --git a/tools/testing/selftests/nsfs/Makefile b/tools/testing/selftests/nsfs/Makefile
new file mode 100644
index 0000000..2306054
--- /dev/null
+++ b/tools/testing/selftests/nsfs/Makefile
@@ -0,0 +1,12 @@
+TEST_PROGS := owner pidns
+
+CFLAGS := -Wall -Werror
+
+all: owner pidns
+owner: owner.c
+pidns: pidns.c
+
+clean:
+ $(RM) owner pidns
+
+include ../lib.mk
diff --git a/tools/testing/selftests/nsfs/owner.c b/tools/testing/selftests/nsfs/owner.c
new file mode 100644
index 0000000..437205f
--- /dev/null
+++ b/tools/testing/selftests/nsfs/owner.c
@@ -0,0 +1,91 @@
+#define _GNU_SOURCE
+#include <sched.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+
+#define NSIO 0xb7
+#define NS_GET_USERNS _IO(NSIO, 0x1)
+
+#define pr_err(fmt, ...) \
+ ({ \
+ fprintf(stderr, "%s:%d:" fmt ": %m\n", \
+ __func__, __LINE__, ##__VA_ARGS__); \
+ 1; \
+ })
+
+int main(int argc, char *argvp[])
+{
+ int pfd[2], ns, uns, init_uns;
+ struct stat st1, st2;
+ char path[128];
+ pid_t pid;
+ char c;
+
+ if (pipe(pfd))
+ return 1;
+
+ pid = fork();
+ if (pid < 0)
+ return pr_err("fork");
+ if (pid == 0) {
+ prctl(PR_SET_PDEATHSIG, SIGKILL);
+ if (unshare(CLONE_NEWUTS | CLONE_NEWUSER))
+ return pr_err("unshare");
+ close(pfd[0]);
+ close(pfd[1]);
+ while (1)
+ sleep(1);
+ return 0;
+ }
+ close(pfd[1]);
+ if (read(pfd[0], &c, 1) != 0)
+ return pr_err("Unable to read from pipe");
+ close(pfd[0]);
+
+ snprintf(path, sizeof(path), "/proc/%d/ns/uts", pid);
+ ns = open(path, O_RDONLY);
+ if (ns < 0)
+ return pr_err("Unable to open %s", path);
+
+ uns = ioctl(ns, NS_GET_USERNS);
+ if (uns < 0)
+ return pr_err("Unable to get an owning user namespace");
+
+ if (fstat(uns, &st1))
+ return pr_err("fstat");
+
+ snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
+ if (stat(path, &st2))
+ return pr_err("stat");
+
+ if (st1.st_ino != st2.st_ino)
+ return pr_err("NS_GET_USERNS returned a wrong namespace");
+
+ init_uns = ioctl(uns, NS_GET_USERNS);
+ if (uns < 0)
+ return pr_err("Unable to get an owning user namespace");
+
+ if (ioctl(init_uns, NS_GET_USERNS) >= 0 || errno != EPERM)
+ return pr_err("Don't get EPERM");
+
+ if (unshare(CLONE_NEWUSER))
+ return pr_err("unshare");
+
+ if (ioctl(ns, NS_GET_USERNS) >= 0 || errno != EPERM)
+ return pr_err("Don't get EPERM");
+ if (ioctl(init_uns, NS_GET_USERNS) >= 0 || errno != EPERM)
+ return pr_err("Don't get EPERM");
+
+ kill(pid, SIGKILL);
+ wait(NULL);
+ return 0;
+}
diff --git a/tools/testing/selftests/nsfs/pidns.c b/tools/testing/selftests/nsfs/pidns.c
new file mode 100644
index 0000000..ae3a0d6
--- /dev/null
+++ b/tools/testing/selftests/nsfs/pidns.c
@@ -0,0 +1,78 @@
+#define _GNU_SOURCE
+#include <sched.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+
+#define pr_err(fmt, ...) \
+ ({ \
+ fprintf(stderr, "%s:%d:" fmt ": %m\n", \
+ __func__, __LINE__, ##__VA_ARGS__); \
+ 1; \
+ })
+
+#define NSIO 0xb7
+#define NS_GET_USERNS _IO(NSIO, 0x1)
+#define NS_GET_PARENT _IO(NSIO, 0x2)
+
+#define __stack_aligned__ __attribute__((aligned(16)))
+struct cr_clone_arg {
+ char stack[128] __stack_aligned__;
+ char stack_ptr[0];
+};
+
+static int child(void *args)
+{
+ prctl(PR_SET_PDEATHSIG, SIGKILL);
+ while (1)
+ sleep(1);
+ exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ char *ns_strs[] = {"pid", "user"};
+ char path[] = "/proc/0123456789/ns/pid";
+ struct cr_clone_arg ca;
+ struct stat st1, st2;
+ int ns, pns, i;
+ pid_t pid;
+
+ pid = clone(child, ca.stack_ptr, CLONE_NEWUSER | CLONE_NEWPID | SIGCHLD, NULL);
+ if (pid < 0)
+ return pr_err("clone");
+
+ for (i = 0; i < 2; i++) {
+ snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns_strs[i]);
+ ns = open(path, O_RDONLY);
+ if (ns < 0)
+ return pr_err("Unable to open %s", path);
+
+ pns = ioctl(ns, NS_GET_PARENT);
+ if (pns < 0)
+ return pr_err("Unable to get a parent pidns");
+
+ snprintf(path, sizeof(path), "/proc/self/ns/%s", ns_strs[i]);
+ if (stat(path, &st2))
+ return pr_err("Unable to stat %s", path);
+ if (fstat(pns, &st1))
+ return pr_err("Unable to stat the parent pidns");
+ if (st1.st_ino != st2.st_ino)
+ return pr_err("NS_GET_PARENT returned a wrong namespace");
+
+ if (ioctl(pns, NS_GET_PARENT) >= 0 || errno != EPERM)
+ return pr_err("Don't get EPERM");;
+ }
+
+ kill(pid, SIGKILL);
+ wait(NULL);
+ return 0;
+}
--
2.5.5
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH 0/4 v3] Add an interface to discover relationships between namespaces
2016-09-06 7:47 [PATCH 0/4 v3] Add an interface to discover relationships between namespaces Andrei Vagin
` (3 preceding siblings ...)
2016-09-06 7:47 ` [PATCH 4/4] tools/testing: add a test to check nsfs ioctl-s Andrei Vagin
@ 2016-09-23 1:09 ` Eric W. Biederman
4 siblings, 0 replies; 9+ messages in thread
From: Eric W. Biederman @ 2016-09-23 1:09 UTC (permalink / raw)
To: Andrei Vagin
Cc: containers, linux-api, linux-kernel, linux-fsdevel,
James Bottomley, Michael Kerrisk (man-pages),
W. Trevor King, Alexander Viro, Serge Hallyn
Andrei Vagin <avagin@openvz.org> writes:
> From: Andrey Vagin <avagin@openvz.org>
>
> Each namespace has an owning user namespace and now there is not way
> to discover these relationships.
>
> Pid and user namepaces are hierarchical. There is no way to discover
> parent-child relationships too.
>
> Why we may want to know relationships between namespaces?
>
> One use would be visualization, in order to understand the running
> system. Another would be to answer the question: what capability does
> process X have to perform operations on a resource governed by namespace
> Y?
>
> One more use-case (which usually called abnormal) is checkpoint/restart.
> In CRIU we are going to dump and restore nested namespaces.
>
> There [1] was a discussion about which interface to choose to determing
> relationships between namespaces.
>
> Eric suggested to add two ioctl-s [2]:
>> Grumble, Grumble. I think this may actually a case for creating ioctls
>> for these two cases. Now that random nsfs file descriptors are bind
>> mountable the original reason for using proc files is not as pressing.
>>
>> One ioctl for the user namespace that owns a file descriptor.
>> One ioctl for the parent namespace of a namespace file descriptor.
>
> Here is an implementaions of these ioctl-s.
>
> $ man man7/namespaces.7
> ...
> Since Linux 4.X, the following ioctl(2) calls are supported for
> namespace file descriptors. The correct syntax is:
>
> fd = ioctl(ns_fd, ioctl_type);
>
> where ioctl_type is one of the following:
>
> NS_GET_USERNS
> Returns a file descriptor that refers to an owning user names‐
> pace.
>
> NS_GET_PARENT
> Returns a file descriptor that refers to a parent namespace.
> This ioctl(2) can be used for pid and user namespaces. For
> user namespaces, NS_GET_PARENT and NS_GET_USERNS have the same
> meaning.
>
> In addition to generic ioctl(2) errors, the following specific ones
> can occur:
>
> EINVAL NS_GET_PARENT was called for a nonhierarchical namespace.
>
> EPERM The requested namespace is outside of the current namespace
> scope.
>
> [1] https://lkml.org/lkml/2016/7/6/158
> [2] https://lkml.org/lkml/2016/7/9/101
>
> Changes for v2:
> * don't return ENOENT for init_user_ns and init_pid_ns. There is nothing
> outside of the init namespace, so we can return EPERM in this case too.
>> The fewer special cases the easier the code is to get
>> correct, and the easier it is to read. // Eric
>
> Changes for v3:
> * rename ns->get_owner() to ns->owner(). get_* usually means that it
> grabs a reference.
>
> Cc: "Eric W. Biederman" <ebiederm@xmission.com>
> Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
> Cc: "Michael Kerrisk (man-pages)" <mtk.manpages@gmail.com>
> Cc: "W. Trevor King" <wking@tremily.us>
> Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> Cc: Serge Hallyn <serge.hallyn@canonical.com>
>
Applied thanks.
I didn't see any issues except your patch __ns_get_path was missing a
mntput in the retry case. So I just fixed that.
Eric
^ permalink raw reply [flat|nested] 9+ messages in thread