* [PATCH 1/7] ns: proc files for namespace naming policy.
@ 2011-05-07 2:24 ` Eric W. Biederman
0 siblings, 0 replies; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-07 2:24 UTC (permalink / raw)
To: linux-arch
Cc: linux-kernel, netdev, linux-fsdevel, jamal, Daniel Lezcano,
Linux Containers, Renato Westphal, Eric W. Biederman
Create files under /proc/<pid>/ns/ to allow controlling the
namespaces of a process.
This addresses three specific problems that can make namespaces hard to
work with.
- Namespaces require a dedicated process to pin them in memory.
- It is not possible to use a namespace unless you are the child
of the original creator.
- Namespaces don't have names that userspace can use to talk about
them.
The namespace files under /proc/<pid>/ns/ can be opened and the
file descriptor can be used to talk about a specific namespace, and
to keep the specified namespace alive.
A namespace can be kept alive by either holding the file descriptor
open or bind mounting the file someplace else. aka:
mount --bind /proc/self/ns/net /some/filesystem/path
mount --bind /proc/self/fd/<N> /some/filesystem/path
This allows namespaces to be named with userspace policy.
It requires additional support to make use of these filedescriptors
and that will be comming in the following patches.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/proc/Makefile | 1 +
fs/proc/base.c | 20 ++---
fs/proc/inode.c | 7 ++
fs/proc/internal.h | 18 +++++
fs/proc/namespaces.c | 188 +++++++++++++++++++++++++++++++++++++++++++++++
include/linux/proc_fs.h | 18 +++++
6 files changed, 241 insertions(+), 11 deletions(-)
create mode 100644 fs/proc/namespaces.c
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index df434c5..c1c7293 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -20,6 +20,7 @@ proc-y += stat.o
proc-y += uptime.o
proc-y += version.o
proc-y += softirqs.o
+proc-y += namespaces.o
proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
proc-$(CONFIG_NET) += proc_net.o
proc-$(CONFIG_PROC_KCORE) += kcore.o
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dfa5327..dc8bca7 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -600,7 +600,7 @@ static int proc_fd_access_allowed(struct inode *inode)
return allowed;
}
-static int proc_setattr(struct dentry *dentry, struct iattr *attr)
+int proc_setattr(struct dentry *dentry, struct iattr *attr)
{
int error;
struct inode *inode = dentry->d_inode;
@@ -1736,8 +1736,7 @@ static int task_dumpable(struct task_struct *task)
return 0;
}
-
-static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
+struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
{
struct inode * inode;
struct proc_inode *ei;
@@ -1779,7 +1778,7 @@ out_unlock:
return NULL;
}
-static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
{
struct inode *inode = dentry->d_inode;
struct task_struct *task;
@@ -1820,7 +1819,7 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
* made this apply to all per process world readable and executable
* directories.
*/
-static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
+int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct inode *inode;
struct task_struct *task;
@@ -1862,7 +1861,7 @@ static int pid_delete_dentry(const struct dentry * dentry)
return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
}
-static const struct dentry_operations pid_dentry_operations =
+const struct dentry_operations pid_dentry_operations =
{
.d_revalidate = pid_revalidate,
.d_delete = pid_delete_dentry,
@@ -1870,9 +1869,6 @@ static const struct dentry_operations pid_dentry_operations =
/* Lookups */
-typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
- struct task_struct *, const void *);
-
/*
* Fill a directory entry.
*
@@ -1885,8 +1881,8 @@ typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
* reported by readdir in sync with the inode numbers reported
* by stat.
*/
-static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
- char *name, int len,
+int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+ const char *name, int len,
instantiate_t instantiate, struct task_struct *task, const void *ptr)
{
struct dentry *child, *dir = filp->f_path.dentry;
@@ -2820,6 +2816,7 @@ static const struct pid_entry tgid_base_stuff[] = {
DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
+ DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
#ifdef CONFIG_NET
DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
#endif
@@ -3168,6 +3165,7 @@ out_no_task:
static const struct pid_entry tid_base_stuff[] = {
DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
+ DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
REG("environ", S_IRUSR, proc_environ_operations),
INF("auxv", S_IRUSR, proc_pid_auxv),
ONE("status", S_IRUGO, proc_pid_status),
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index d15aa1b..74b48cf 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -28,6 +28,7 @@ static void proc_evict_inode(struct inode *inode)
{
struct proc_dir_entry *de;
struct ctl_table_header *head;
+ const struct proc_ns_operations *ns_ops;
truncate_inode_pages(&inode->i_data, 0);
end_writeback(inode);
@@ -44,6 +45,10 @@ static void proc_evict_inode(struct inode *inode)
rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
sysctl_head_put(head);
}
+ /* Release any associated namespace */
+ ns_ops = PROC_I(inode)->ns_ops;
+ if (ns_ops && ns_ops->put)
+ ns_ops->put(PROC_I(inode)->ns);
}
static struct kmem_cache * proc_inode_cachep;
@@ -62,6 +67,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
ei->pde = NULL;
ei->sysctl = NULL;
ei->sysctl_entry = NULL;
+ ei->ns = NULL;
+ ei->ns_ops = NULL;
inode = &ei->vfs_inode;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
return inode;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c03e8d3..96245a1 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -119,3 +119,21 @@ struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
*/
int proc_readdir(struct file *, void *, filldir_t);
struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
+
+
+
+/* Lookups */
+typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
+ struct task_struct *, const void *);
+int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+ const char *name, int len,
+ instantiate_t instantiate, struct task_struct *task, const void *ptr);
+int pid_revalidate(struct dentry *dentry, struct nameidata *nd);
+struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task);
+extern const struct dentry_operations pid_dentry_operations;
+int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
+int proc_setattr(struct dentry *dentry, struct iattr *attr);
+
+extern const struct inode_operations proc_ns_dir_inode_operations;
+extern const struct file_operations proc_ns_dir_operations;
+
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
new file mode 100644
index 0000000..6ae9f07
--- /dev/null
+++ b/fs/proc/namespaces.c
@@ -0,0 +1,188 @@
+#include <linux/proc_fs.h>
+#include <linux/nsproxy.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <linux/fs_struct.h>
+#include <linux/mount.h>
+#include <linux/path.h>
+#include <linux/namei.h>
+#include <linux/file.h>
+#include <linux/utsname.h>
+#include <net/net_namespace.h>
+#include <linux/mnt_namespace.h>
+#include <linux/ipc_namespace.h>
+#include <linux/pid_namespace.h>
+#include "internal.h"
+
+
+static const struct proc_ns_operations *ns_entries[] = {
+};
+
+static const struct file_operations ns_file_operations = {
+ .llseek = no_llseek,
+};
+
+static struct dentry *proc_ns_instantiate(struct inode *dir,
+ struct dentry *dentry, struct task_struct *task, const void *ptr)
+{
+ const struct proc_ns_operations *ns_ops = ptr;
+ struct inode *inode;
+ struct proc_inode *ei;
+ struct dentry *error = ERR_PTR(-ENOENT);
+
+ inode = proc_pid_make_inode(dir->i_sb, task);
+ if (!inode)
+ goto out;
+
+ ei = PROC_I(inode);
+ inode->i_mode = S_IFREG|S_IRUSR;
+ inode->i_fop = &ns_file_operations;
+ ei->ns_ops = ns_ops;
+ ei->ns = ns_ops->get(task);
+ if (!ei->ns)
+ goto out_iput;
+
+ dentry->d_op = &pid_dentry_operations;
+ d_add(dentry, inode);
+ /* Close the race of the process dying before we return the dentry */
+ if (pid_revalidate(dentry, NULL))
+ error = NULL;
+out:
+ return error;
+out_iput:
+ iput(inode);
+ goto out;
+}
+
+static int proc_ns_fill_cache(struct file *filp, void *dirent,
+ filldir_t filldir, struct task_struct *task,
+ const struct proc_ns_operations *ops)
+{
+ return proc_fill_cache(filp, dirent, filldir,
+ ops->name, strlen(ops->name),
+ proc_ns_instantiate, task, ops);
+}
+
+static int proc_ns_dir_readdir(struct file *filp, void *dirent,
+ filldir_t filldir)
+{
+ int i;
+ struct dentry *dentry = filp->f_path.dentry;
+ struct inode *inode = dentry->d_inode;
+ struct task_struct *task = get_proc_task(inode);
+ const struct proc_ns_operations **entry, **last;
+ ino_t ino;
+ int ret;
+
+ ret = -ENOENT;
+ if (!task)
+ goto out_no_task;
+
+ ret = -EPERM;
+ if (!ptrace_may_access(task, PTRACE_MODE_READ))
+ goto out;
+
+ ret = 0;
+ i = filp->f_pos;
+ switch (i) {
+ case 0:
+ ino = inode->i_ino;
+ if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
+ goto out;
+ i++;
+ filp->f_pos++;
+ /* fall through */
+ case 1:
+ ino = parent_ino(dentry);
+ if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
+ goto out;
+ i++;
+ filp->f_pos++;
+ /* fall through */
+ default:
+ i -= 2;
+ if (i >= ARRAY_SIZE(ns_entries)) {
+ ret = 1;
+ goto out;
+ }
+ entry = ns_entries + i;
+ last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
+ while (entry <= last) {
+ if (proc_ns_fill_cache(filp, dirent, filldir,
+ task, *entry) < 0)
+ goto out;
+ filp->f_pos++;
+ entry++;
+ }
+ }
+
+ ret = 1;
+out:
+ put_task_struct(task);
+out_no_task:
+ return ret;
+}
+
+const struct file_operations proc_ns_dir_operations = {
+ .read = generic_read_dir,
+ .readdir = proc_ns_dir_readdir,
+};
+
+static struct dentry *proc_ns_dir_lookup(struct inode *dir,
+ struct dentry *dentry, struct nameidata *nd)
+{
+ struct dentry *error;
+ struct task_struct *task = get_proc_task(dir);
+ const struct proc_ns_operations **entry, **last;
+ unsigned int len = dentry->d_name.len;
+
+ error = ERR_PTR(-ENOENT);
+
+ if (!task)
+ goto out_no_task;
+
+ error = ERR_PTR(-EPERM);
+ if (!ptrace_may_access(task, PTRACE_MODE_READ))
+ goto out;
+
+ last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
+ for (entry = ns_entries; entry <= last; entry++) {
+ if (strlen((*entry)->name) != len)
+ continue;
+ if (!memcmp(dentry->d_name.name, (*entry)->name, len))
+ break;
+ }
+ if (entry > last)
+ goto out;
+
+ error = proc_ns_instantiate(dir, dentry, task, *entry);
+out:
+ put_task_struct(task);
+out_no_task:
+ return error;
+}
+
+const struct inode_operations proc_ns_dir_inode_operations = {
+ .lookup = proc_ns_dir_lookup,
+ .getattr = pid_getattr,
+ .setattr = proc_setattr,
+};
+
+struct file *proc_ns_fget(int fd)
+{
+ struct file *file;
+
+ file = fget(fd);
+ if (!file)
+ return ERR_PTR(-EBADF);
+
+ if (file->f_op != &ns_file_operations)
+ goto out_invalid;
+
+ return file;
+
+out_invalid:
+ fput(file);
+ return ERR_PTR(-EINVAL);
+}
+
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 838c114..a6d2c6d 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -179,6 +179,8 @@ extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
extern struct file *get_mm_exe_file(struct mm_struct *mm);
extern void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm);
+extern struct file *proc_ns_fget(int fd);
+
#else
#define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; })
@@ -239,6 +241,11 @@ static inline void dup_mm_exe_file(struct mm_struct *oldmm,
struct mm_struct *newmm)
{}
+static inline struct file *proc_ns_fget(int fd)
+{
+ return ERR_PTR(-EINVAL);
+}
+
#endif /* CONFIG_PROC_FS */
#if !defined(CONFIG_PROC_KCORE)
@@ -250,6 +257,15 @@ kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
extern void kclist_add(struct kcore_list *, void *, size_t, int type);
#endif
+struct nsproxy;
+struct proc_ns_operations {
+ const char *name;
+ int type;
+ void *(*get)(struct task_struct *task);
+ void (*put)(void *ns);
+ int (*install)(struct nsproxy *nsproxy, void *ns);
+};
+
union proc_op {
int (*proc_get_link)(struct inode *, struct path *);
int (*proc_read)(struct task_struct *task, char *page);
@@ -268,6 +284,8 @@ struct proc_inode {
struct proc_dir_entry *pde;
struct ctl_table_header *sysctl;
struct ctl_table *sysctl_entry;
+ void *ns;
+ const struct proc_ns_operations *ns_ops;
struct inode vfs_inode;
};
--
1.6.5.2.143.g8cc62
^ permalink raw reply related [flat|nested] 70+ messages in thread
[parent not found: <1304735101-1824-1-git-send-email-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>]
* [PATCH 2/7] ns: Introduce the setns syscall
2011-05-07 2:24 ` Eric W. Biederman
(?)
(?)
@ 2011-05-07 2:24 ` Eric W. Biederman
-1 siblings, 0 replies; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-07 2:24 UTC (permalink / raw)
To: linux-arch-u79uwXL29TY76Z2rM5mHXA
Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, Linux Containers,
Eric W. Biederman, linux-fsdevel-u79uwXL29TY76Z2rM5mHXA
With the networking stack today there is demand to handle
multiple network stacks at a time. Not in the context
of containers but in the context of people doing interesting
things with routing.
There is also demand in the context of containers to have
an efficient way to execute some code in the container itself.
If nothing else it is very useful ad a debugging technique.
Both problems can be solved by starting some form of login
daemon in the namespaces people want access to, or you
can play games by ptracing a process and getting the
traced process to do things you want it to do. However
it turns out that a login daemon or a ptrace puppet
controller are more code, they are more prone to
failure, and generally they are less efficient than
simply changing the namespace of a process to a
specified one.
Pieces of this puzzle can also be solved by instead of
coming up with a general purpose system call coming up
with targed system calls perhaps socketat that solve
a subset of the larger problem. Overall that appears
to be more work for less reward.
int setns(int fd, int nstype);
The fd argument is a file descriptor referring to a proc
file of the namespace you want to switch the process to.
In the setns system call the nstype is 0 or specifies
an clone flag of the namespace you intend to change
to prevent changing a namespace unintentionally.
v2: Most of the architecture support added by Daniel Lezcano <dlezcano-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
v3: ported to v2.6.36-rc4 by: Eric W. Biederman <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
v4: Moved wiring up of the system call to another patch
Signed-off-by: Eric W. Biederman <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
kernel/nsproxy.c | 37 +++++++++++++++++++++++++++++++++++++
1 files changed, 37 insertions(+), 0 deletions(-)
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index a05d191..96059d8 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -22,6 +22,9 @@
#include <linux/pid_namespace.h>
#include <net/net_namespace.h>
#include <linux/ipc_namespace.h>
+#include <linux/proc_fs.h>
+#include <linux/file.h>
+#include <linux/syscalls.h>
static struct kmem_cache *nsproxy_cachep;
@@ -233,6 +236,40 @@ void exit_task_namespaces(struct task_struct *p)
switch_task_namespaces(p, NULL);
}
+SYSCALL_DEFINE2(setns, int, fd, int, nstype)
+{
+ const struct proc_ns_operations *ops;
+ struct task_struct *tsk = current;
+ struct nsproxy *new_nsproxy;
+ struct proc_inode *ei;
+ struct file *file;
+ int err;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ file = proc_ns_fget(fd);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ err = -EINVAL;
+ ei = PROC_I(file->f_dentry->d_inode);
+ ops = ei->ns_ops;
+ if (nstype && (ops->type != nstype))
+ goto out;
+
+ new_nsproxy = create_new_namespaces(0, tsk, tsk->fs);
+ err = ops->install(new_nsproxy, ei->ns);
+ if (err) {
+ free_nsproxy(new_nsproxy);
+ goto out;
+ }
+ switch_task_namespaces(tsk, new_nsproxy);
+out:
+ fput(file);
+ return err;
+}
+
static int __init nsproxy_cache_init(void)
{
nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);
--
1.6.5.2.143.g8cc62
^ permalink raw reply related [flat|nested] 70+ messages in thread
* [PATCH 2/7] ns: Introduce the setns syscall
@ 2011-05-07 2:24 ` Eric W. Biederman
0 siblings, 0 replies; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-07 2:24 UTC (permalink / raw)
To: linux-arch
Cc: linux-kernel, netdev, linux-fsdevel, jamal, Daniel Lezcano,
Linux Containers, Renato Westphal, Eric W. Biederman
With the networking stack today there is demand to handle
multiple network stacks at a time. Not in the context
of containers but in the context of people doing interesting
things with routing.
There is also demand in the context of containers to have
an efficient way to execute some code in the container itself.
If nothing else it is very useful ad a debugging technique.
Both problems can be solved by starting some form of login
daemon in the namespaces people want access to, or you
can play games by ptracing a process and getting the
traced process to do things you want it to do. However
it turns out that a login daemon or a ptrace puppet
controller are more code, they are more prone to
failure, and generally they are less efficient than
simply changing the namespace of a process to a
specified one.
Pieces of this puzzle can also be solved by instead of
coming up with a general purpose system call coming up
with targed system calls perhaps socketat that solve
a subset of the larger problem. Overall that appears
to be more work for less reward.
int setns(int fd, int nstype);
The fd argument is a file descriptor referring to a proc
file of the namespace you want to switch the process to.
In the setns system call the nstype is 0 or specifies
an clone flag of the namespace you intend to change
to prevent changing a namespace unintentionally.
v2: Most of the architecture support added by Daniel Lezcano <dlezcano@fr.ibm.com>
v3: ported to v2.6.36-rc4 by: Eric W. Biederman <ebiederm@xmission.com>
v4: Moved wiring up of the system call to another patch
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
kernel/nsproxy.c | 37 +++++++++++++++++++++++++++++++++++++
1 files changed, 37 insertions(+), 0 deletions(-)
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index a05d191..96059d8 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -22,6 +22,9 @@
#include <linux/pid_namespace.h>
#include <net/net_namespace.h>
#include <linux/ipc_namespace.h>
+#include <linux/proc_fs.h>
+#include <linux/file.h>
+#include <linux/syscalls.h>
static struct kmem_cache *nsproxy_cachep;
@@ -233,6 +236,40 @@ void exit_task_namespaces(struct task_struct *p)
switch_task_namespaces(p, NULL);
}
+SYSCALL_DEFINE2(setns, int, fd, int, nstype)
+{
+ const struct proc_ns_operations *ops;
+ struct task_struct *tsk = current;
+ struct nsproxy *new_nsproxy;
+ struct proc_inode *ei;
+ struct file *file;
+ int err;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ file = proc_ns_fget(fd);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ err = -EINVAL;
+ ei = PROC_I(file->f_dentry->d_inode);
+ ops = ei->ns_ops;
+ if (nstype && (ops->type != nstype))
+ goto out;
+
+ new_nsproxy = create_new_namespaces(0, tsk, tsk->fs);
+ err = ops->install(new_nsproxy, ei->ns);
+ if (err) {
+ free_nsproxy(new_nsproxy);
+ goto out;
+ }
+ switch_task_namespaces(tsk, new_nsproxy);
+out:
+ fput(file);
+ return err;
+}
+
static int __init nsproxy_cache_init(void)
{
nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);
--
1.6.5.2.143.g8cc62
^ permalink raw reply related [flat|nested] 70+ messages in thread
* [PATCH 2/7] ns: Introduce the setns syscall
@ 2011-05-07 2:24 ` Eric W. Biederman
0 siblings, 0 replies; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-07 2:24 UTC (permalink / raw)
To: linux-arch-u79uwXL29TY76Z2rM5mHXA
Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, Linux Containers,
Eric W. Biederman, linux-fsdevel-u79uwXL29TY76Z2rM5mHXA
With the networking stack today there is demand to handle
multiple network stacks at a time. Not in the context
of containers but in the context of people doing interesting
things with routing.
There is also demand in the context of containers to have
an efficient way to execute some code in the container itself.
If nothing else it is very useful ad a debugging technique.
Both problems can be solved by starting some form of login
daemon in the namespaces people want access to, or you
can play games by ptracing a process and getting the
traced process to do things you want it to do. However
it turns out that a login daemon or a ptrace puppet
controller are more code, they are more prone to
failure, and generally they are less efficient than
simply changing the namespace of a process to a
specified one.
Pieces of this puzzle can also be solved by instead of
coming up with a general purpose system call coming up
with targed system calls perhaps socketat that solve
a subset of the larger problem. Overall that appears
to be more work for less reward.
int setns(int fd, int nstype);
The fd argument is a file descriptor referring to a proc
file of the namespace you want to switch the process to.
In the setns system call the nstype is 0 or specifies
an clone flag of the namespace you intend to change
to prevent changing a namespace unintentionally.
v2: Most of the architecture support added by Daniel Lezcano <dlezcano-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
v3: ported to v2.6.36-rc4 by: Eric W. Biederman <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
v4: Moved wiring up of the system call to another patch
Signed-off-by: Eric W. Biederman <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
kernel/nsproxy.c | 37 +++++++++++++++++++++++++++++++++++++
1 files changed, 37 insertions(+), 0 deletions(-)
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index a05d191..96059d8 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -22,6 +22,9 @@
#include <linux/pid_namespace.h>
#include <net/net_namespace.h>
#include <linux/ipc_namespace.h>
+#include <linux/proc_fs.h>
+#include <linux/file.h>
+#include <linux/syscalls.h>
static struct kmem_cache *nsproxy_cachep;
@@ -233,6 +236,40 @@ void exit_task_namespaces(struct task_struct *p)
switch_task_namespaces(p, NULL);
}
+SYSCALL_DEFINE2(setns, int, fd, int, nstype)
+{
+ const struct proc_ns_operations *ops;
+ struct task_struct *tsk = current;
+ struct nsproxy *new_nsproxy;
+ struct proc_inode *ei;
+ struct file *file;
+ int err;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ file = proc_ns_fget(fd);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ err = -EINVAL;
+ ei = PROC_I(file->f_dentry->d_inode);
+ ops = ei->ns_ops;
+ if (nstype && (ops->type != nstype))
+ goto out;
+
+ new_nsproxy = create_new_namespaces(0, tsk, tsk->fs);
+ err = ops->install(new_nsproxy, ei->ns);
+ if (err) {
+ free_nsproxy(new_nsproxy);
+ goto out;
+ }
+ switch_task_namespaces(tsk, new_nsproxy);
+out:
+ fput(file);
+ return err;
+}
+
static int __init nsproxy_cache_init(void)
{
nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);
--
1.6.5.2.143.g8cc62
^ permalink raw reply related [flat|nested] 70+ messages in thread
* [PATCH 2/7] ns: Introduce the setns syscall
@ 2011-05-07 2:24 ` Eric W. Biederman
0 siblings, 0 replies; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-07 2:24 UTC (permalink / raw)
To: linux-arch
Cc: linux-kernel, netdev, linux-fsdevel, jamal, Daniel Lezcano,
Linux Containers, Renato Westphal, Eric W. Biederman
With the networking stack today there is demand to handle
multiple network stacks at a time. Not in the context
of containers but in the context of people doing interesting
things with routing.
There is also demand in the context of containers to have
an efficient way to execute some code in the container itself.
If nothing else it is very useful ad a debugging technique.
Both problems can be solved by starting some form of login
daemon in the namespaces people want access to, or you
can play games by ptracing a process and getting the
traced process to do things you want it to do. However
it turns out that a login daemon or a ptrace puppet
controller are more code, they are more prone to
failure, and generally they are less efficient than
simply changing the namespace of a process to a
specified one.
Pieces of this puzzle can also be solved by instead of
coming up with a general purpose system call coming up
with targed system calls perhaps socketat that solve
a subset of the larger problem. Overall that appears
to be more work for less reward.
int setns(int fd, int nstype);
The fd argument is a file descriptor referring to a proc
file of the namespace you want to switch the process to.
In the setns system call the nstype is 0 or specifies
an clone flag of the namespace you intend to change
to prevent changing a namespace unintentionally.
v2: Most of the architecture support added by Daniel Lezcano <dlezcano@fr.ibm.com>
v3: ported to v2.6.36-rc4 by: Eric W. Biederman <ebiederm@xmission.com>
v4: Moved wiring up of the system call to another patch
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
kernel/nsproxy.c | 37 +++++++++++++++++++++++++++++++++++++
1 files changed, 37 insertions(+), 0 deletions(-)
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index a05d191..96059d8 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -22,6 +22,9 @@
#include <linux/pid_namespace.h>
#include <net/net_namespace.h>
#include <linux/ipc_namespace.h>
+#include <linux/proc_fs.h>
+#include <linux/file.h>
+#include <linux/syscalls.h>
static struct kmem_cache *nsproxy_cachep;
@@ -233,6 +236,40 @@ void exit_task_namespaces(struct task_struct *p)
switch_task_namespaces(p, NULL);
}
+SYSCALL_DEFINE2(setns, int, fd, int, nstype)
+{
+ const struct proc_ns_operations *ops;
+ struct task_struct *tsk = current;
+ struct nsproxy *new_nsproxy;
+ struct proc_inode *ei;
+ struct file *file;
+ int err;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ file = proc_ns_fget(fd);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ err = -EINVAL;
+ ei = PROC_I(file->f_dentry->d_inode);
+ ops = ei->ns_ops;
+ if (nstype && (ops->type != nstype))
+ goto out;
+
+ new_nsproxy = create_new_namespaces(0, tsk, tsk->fs);
+ err = ops->install(new_nsproxy, ei->ns);
+ if (err) {
+ free_nsproxy(new_nsproxy);
+ goto out;
+ }
+ switch_task_namespaces(tsk, new_nsproxy);
+out:
+ fput(file);
+ return err;
+}
+
static int __init nsproxy_cache_init(void)
{
nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);
--
1.6.5.2.143.g8cc62
^ permalink raw reply related [flat|nested] 70+ messages in thread
[parent not found: <1304735101-1824-2-git-send-email-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>]
* Re: [PATCH 2/7] ns: Introduce the setns syscall
2011-05-07 2:24 ` Eric W. Biederman
@ 2011-05-07 8:01 ` Rémi Denis-Courmont
-1 siblings, 0 replies; 70+ messages in thread
From: Rémi Denis-Courmont @ 2011-05-07 8:01 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-arch-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, Linux Containers,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA
Le samedi 7 mai 2011 05:24:56 Eric W. Biederman, vous avez écrit :
> Pieces of this puzzle can also be solved by instead of
> coming up with a general purpose system call coming up
> with targed system calls perhaps socketat that solve
> a subset of the larger problem. Overall that appears
> to be more work for less reward.
socketat() is still required for multithreaded namespace-aware userspace, I
believe.
--
Rémi Denis-Courmont
http://www.remlab.info/
http://fi.linkedin.com/in/remidenis
^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH 2/7] ns: Introduce the setns syscall
@ 2011-05-07 8:01 ` Rémi Denis-Courmont
0 siblings, 0 replies; 70+ messages in thread
From: Rémi Denis-Courmont @ 2011-05-07 8:01 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-arch, linux-kernel, netdev, linux-fsdevel, jamal,
Daniel Lezcano, Linux Containers, Renato Westphal
Le samedi 7 mai 2011 05:24:56 Eric W. Biederman, vous avez écrit :
> Pieces of this puzzle can also be solved by instead of
> coming up with a general purpose system call coming up
> with targed system calls perhaps socketat that solve
> a subset of the larger problem. Overall that appears
> to be more work for less reward.
socketat() is still required for multithreaded namespace-aware userspace, I
believe.
--
Rémi Denis-Courmont
http://www.remlab.info/
http://fi.linkedin.com/in/remidenis
^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH 2/7] ns: Introduce the setns syscall
2011-05-07 8:01 ` Rémi Denis-Courmont
@ 2011-05-07 13:57 ` Eric W. Biederman
-1 siblings, 0 replies; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-07 13:57 UTC (permalink / raw)
To: Rémi Denis-Courmont
Cc: linux-arch, linux-kernel, netdev, linux-fsdevel, jamal,
Daniel Lezcano, Linux Containers, Renato Westphal
"Rémi Denis-Courmont" <remi@remlab.net> writes:
> Le samedi 7 mai 2011 05:24:56 Eric W. Biederman, vous avez écrit :
>> Pieces of this puzzle can also be solved by instead of
>> coming up with a general purpose system call coming up
>> with targed system calls perhaps socketat that solve
>> a subset of the larger problem. Overall that appears
>> to be more work for less reward.
>
> socketat() is still required for multithreaded namespace-aware userspace, I
> believe.
The network namespace is a per task property so there are no problems
with multithreaded network namespace aware userspace applications. The
implementation of a userspace socketat will still need to disable signal
handling around the network namespace switch to be signal safe. Which
means that ultimately a kernel version of socketat may be desirable,
for performance reasons but I know of know correctness reasons to need
it.
For the time being I have simply removed socketat from what I plan to
merge because it is not strictly needed, I don't yet have a test case
for socketat, and I don't have as much time to work on this as I
would like.
There is one bug a multi-threaded network namespace aware user space
application might run into, and that is /proc/net is a symlink to
/proc/self. Which means that if you open /proc/net/foo from a task with
a different network namespace than your the task whose tid equals your
tgid, the /proc/net will return the wrong file. Still you can
avoid even that silliness by opening /proc/<tid>/net.
Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH 2/7] ns: Introduce the setns syscall
@ 2011-05-07 13:57 ` Eric W. Biederman
0 siblings, 0 replies; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-07 13:57 UTC (permalink / raw)
To: Rémi Denis-Courmont
Cc: linux-arch, linux-kernel, netdev, linux-fsdevel, jamal,
Daniel Lezcano, Linux Containers, Renato Westphal
"Rémi Denis-Courmont" <remi@remlab.net> writes:
> Le samedi 7 mai 2011 05:24:56 Eric W. Biederman, vous avez écrit :
>> Pieces of this puzzle can also be solved by instead of
>> coming up with a general purpose system call coming up
>> with targed system calls perhaps socketat that solve
>> a subset of the larger problem. Overall that appears
>> to be more work for less reward.
>
> socketat() is still required for multithreaded namespace-aware userspace, I
> believe.
The network namespace is a per task property so there are no problems
with multithreaded network namespace aware userspace applications. The
implementation of a userspace socketat will still need to disable signal
handling around the network namespace switch to be signal safe. Which
means that ultimately a kernel version of socketat may be desirable,
for performance reasons but I know of know correctness reasons to need
it.
For the time being I have simply removed socketat from what I plan to
merge because it is not strictly needed, I don't yet have a test case
for socketat, and I don't have as much time to work on this as I
would like.
There is one bug a multi-threaded network namespace aware user space
application might run into, and that is /proc/net is a symlink to
/proc/self. Which means that if you open /proc/net/foo from a task with
a different network namespace than your the task whose tid equals your
tgid, the /proc/net will return the wrong file. Still you can
avoid even that silliness by opening /proc/<tid>/net.
Eric
^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH 2/7] ns: Introduce the setns syscall
2011-05-07 2:24 ` Eric W. Biederman
` (3 preceding siblings ...)
(?)
@ 2011-05-07 22:39 ` Daniel Lezcano
-1 siblings, 0 replies; 70+ messages in thread
From: Daniel Lezcano @ 2011-05-07 22:39 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-arch, linux-kernel, netdev, linux-fsdevel, jamal,
Linux Containers, Renato Westphal
On 05/07/2011 04:24 AM, Eric W. Biederman wrote:
> With the networking stack today there is demand to handle
> multiple network stacks at a time. Not in the context
> of containers but in the context of people doing interesting
> things with routing.
>
> There is also demand in the context of containers to have
> an efficient way to execute some code in the container itself.
> If nothing else it is very useful ad a debugging technique.
>
> Both problems can be solved by starting some form of login
> daemon in the namespaces people want access to, or you
> can play games by ptracing a process and getting the
> traced process to do things you want it to do. However
> it turns out that a login daemon or a ptrace puppet
> controller are more code, they are more prone to
> failure, and generally they are less efficient than
> simply changing the namespace of a process to a
> specified one.
>
> Pieces of this puzzle can also be solved by instead of
> coming up with a general purpose system call coming up
> with targed system calls perhaps socketat that solve
> a subset of the larger problem. Overall that appears
> to be more work for less reward.
>
> int setns(int fd, int nstype);
>
> The fd argument is a file descriptor referring to a proc
> file of the namespace you want to switch the process to.
>
> In the setns system call the nstype is 0 or specifies
> an clone flag of the namespace you intend to change
> to prevent changing a namespace unintentionally.
>
> v2: Most of the architecture support added by Daniel Lezcano<dlezcano@fr.ibm.com>
> v3: ported to v2.6.36-rc4 by: Eric W. Biederman<ebiederm@xmission.com>
> v4: Moved wiring up of the system call to another patch
>
> Signed-off-by: Eric W. Biederman<ebiederm@xmission.com>
> ---
Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>
^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH 2/7] ns: Introduce the setns syscall
2011-05-07 2:24 ` Eric W. Biederman
` (4 preceding siblings ...)
(?)
@ 2011-05-08 3:51 ` Matt Helsley
-1 siblings, 0 replies; 70+ messages in thread
From: Matt Helsley @ 2011-05-08 3:51 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-arch, netdev, linux-kernel, Linux Containers, linux-fsdevel
On Fri, May 06, 2011 at 07:24:56PM -0700, Eric W. Biederman wrote:
> With the networking stack today there is demand to handle
> multiple network stacks at a time. Not in the context
> of containers but in the context of people doing interesting
> things with routing.
>
> There is also demand in the context of containers to have
> an efficient way to execute some code in the container itself.
> If nothing else it is very useful ad a debugging technique.
>
> Both problems can be solved by starting some form of login
> daemon in the namespaces people want access to, or you
> can play games by ptracing a process and getting the
> traced process to do things you want it to do. However
> it turns out that a login daemon or a ptrace puppet
> controller are more code, they are more prone to
> failure, and generally they are less efficient than
> simply changing the namespace of a process to a
> specified one.
>
> Pieces of this puzzle can also be solved by instead of
> coming up with a general purpose system call coming up
> with targed system calls perhaps socketat that solve
> a subset of the larger problem. Overall that appears
> to be more work for less reward.
>
> int setns(int fd, int nstype);
>
> The fd argument is a file descriptor referring to a proc
> file of the namespace you want to switch the process to.
>
> In the setns system call the nstype is 0 or specifies
> an clone flag of the namespace you intend to change
> to prevent changing a namespace unintentionally.
>
> v2: Most of the architecture support added by Daniel Lezcano <dlezcano@fr.ibm.com>
> v3: ported to v2.6.36-rc4 by: Eric W. Biederman <ebiederm@xmission.com>
> v4: Moved wiring up of the system call to another patch
>
> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
> ---
> kernel/nsproxy.c | 37 +++++++++++++++++++++++++++++++++++++
> 1 files changed, 37 insertions(+), 0 deletions(-)
>
> diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
> index a05d191..96059d8 100644
> --- a/kernel/nsproxy.c
> +++ b/kernel/nsproxy.c
> @@ -22,6 +22,9 @@
> #include <linux/pid_namespace.h>
> #include <net/net_namespace.h>
> #include <linux/ipc_namespace.h>
> +#include <linux/proc_fs.h>
> +#include <linux/file.h>
> +#include <linux/syscalls.h>
>
> static struct kmem_cache *nsproxy_cachep;
>
> @@ -233,6 +236,40 @@ void exit_task_namespaces(struct task_struct *p)
> switch_task_namespaces(p, NULL);
> }
>
> +SYSCALL_DEFINE2(setns, int, fd, int, nstype)
> +{
> + const struct proc_ns_operations *ops;
> + struct task_struct *tsk = current;
> + struct nsproxy *new_nsproxy;
> + struct proc_inode *ei;
> + struct file *file;
> + int err;
> +
> + if (!capable(CAP_SYS_ADMIN))
> + return -EPERM;
> +
> + file = proc_ns_fget(fd);
> + if (IS_ERR(file))
> + return PTR_ERR(file);
> +
> + err = -EINVAL;
> + ei = PROC_I(file->f_dentry->d_inode);
> + ops = ei->ns_ops;
> + if (nstype && (ops->type != nstype))
> + goto out;
> +
> + new_nsproxy = create_new_namespaces(0, tsk, tsk->fs);
Doesn't this need some error checking like:
if (IS_ERR(new_nsproxy)) {
err = PTR_ERR(new_nsproxy);
goto out;
}
> + err = ops->install(new_nsproxy, ei->ns);
> + if (err) {
> + free_nsproxy(new_nsproxy);
> + goto out;
> + }
> + switch_task_namespaces(tsk, new_nsproxy);
> +out:
> + fput(file);
> + return err;
> +}
> +
> static int __init nsproxy_cache_init(void)
> {
> nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);
> --
> 1.6.5.2.143.g8cc62
>
> _______________________________________________
> Containers mailing list
> Containers@lists.linux-foundation.org
> https://lists.linux-foundation.org/mailman/listinfo/containers
^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH 2/7] ns: Introduce the setns syscall
2011-05-07 2:24 ` Eric W. Biederman
` (5 preceding siblings ...)
(?)
@ 2011-05-11 19:21 ` Nathan Lynch
2011-05-11 20:33 ` Eric W. Biederman
-1 siblings, 1 reply; 70+ messages in thread
From: Nathan Lynch @ 2011-05-11 19:21 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-arch, netdev, linux-kernel, Linux Containers, linux-fsdevel
Hi Eric,
On Fri, 2011-05-06 at 19:24 -0700, Eric W. Biederman wrote:
> With the networking stack today there is demand to handle
> multiple network stacks at a time. Not in the context
> of containers but in the context of people doing interesting
> things with routing.
>
> There is also demand in the context of containers to have
> an efficient way to execute some code in the container itself.
> If nothing else it is very useful ad a debugging technique.
>
> Both problems can be solved by starting some form of login
> daemon in the namespaces people want access to, or you
> can play games by ptracing a process and getting the
> traced process to do things you want it to do. However
> it turns out that a login daemon or a ptrace puppet
> controller are more code, they are more prone to
> failure, and generally they are less efficient than
> simply changing the namespace of a process to a
> specified one.
>
> Pieces of this puzzle can also be solved by instead of
> coming up with a general purpose system call coming up
> with targed system calls perhaps socketat that solve
> a subset of the larger problem. Overall that appears
> to be more work for less reward.
>
> int setns(int fd, int nstype);
>
> The fd argument is a file descriptor referring to a proc
> file of the namespace you want to switch the process to.
>
> In the setns system call the nstype is 0 or specifies
> an clone flag of the namespace you intend to change
> to prevent changing a namespace unintentionally.
I don't understand exactly what the nstype argument buys us - why would
correct code ever need to specify a value other than 0? And reusing the
CLONE_NEW* values in this interface is kind of ugly when setns is
precisely _not_ creating new namespaces.
Is there some fundamental reason it couldn't be
int setns(int fd);
or is there a use case I'm missing?
> +SYSCALL_DEFINE2(setns, int, fd, int, nstype)
> +{
> + const struct proc_ns_operations *ops;
> + struct task_struct *tsk = current;
> + struct nsproxy *new_nsproxy;
> + struct proc_inode *ei;
> + struct file *file;
> + int err;
> +
> + if (!capable(CAP_SYS_ADMIN))
> + return -EPERM;
> +
> + file = proc_ns_fget(fd);
> + if (IS_ERR(file))
> + return PTR_ERR(file);
> +
> + err = -EINVAL;
> + ei = PROC_I(file->f_dentry->d_inode);
> + ops = ei->ns_ops;
> + if (nstype && (ops->type != nstype))
> + goto out;
> +
> + new_nsproxy = create_new_namespaces(0, tsk, tsk->fs);
create_new_namespaces() can fail; shouldn't this be checked?
> + err = ops->install(new_nsproxy, ei->ns);
> + if (err) {
> + free_nsproxy(new_nsproxy);
> + goto out;
> + }
> + switch_task_namespaces(tsk, new_nsproxy);
> +out:
> + fput(file);
> + return err;
> +}
> +
^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH 2/7] ns: Introduce the setns syscall
2011-05-11 19:21 ` Nathan Lynch
@ 2011-05-11 20:33 ` Eric W. Biederman
0 siblings, 0 replies; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-11 20:33 UTC (permalink / raw)
To: Nathan Lynch
Cc: linux-arch, netdev, linux-kernel, Linux Containers, linux-fsdevel
Nathan Lynch <ntl@pobox.com> writes:
> Hi Eric,
>
> On Fri, 2011-05-06 at 19:24 -0700, Eric W. Biederman wrote:
>> With the networking stack today there is demand to handle
>> multiple network stacks at a time. Not in the context
>> of containers but in the context of people doing interesting
>> things with routing.
>>
>> There is also demand in the context of containers to have
>> an efficient way to execute some code in the container itself.
>> If nothing else it is very useful ad a debugging technique.
>>
>> Both problems can be solved by starting some form of login
>> daemon in the namespaces people want access to, or you
>> can play games by ptracing a process and getting the
>> traced process to do things you want it to do. However
>> it turns out that a login daemon or a ptrace puppet
>> controller are more code, they are more prone to
>> failure, and generally they are less efficient than
>> simply changing the namespace of a process to a
>> specified one.
>>
>> Pieces of this puzzle can also be solved by instead of
>> coming up with a general purpose system call coming up
>> with targed system calls perhaps socketat that solve
>> a subset of the larger problem. Overall that appears
>> to be more work for less reward.
>>
>> int setns(int fd, int nstype);
>>
>> The fd argument is a file descriptor referring to a proc
>> file of the namespace you want to switch the process to.
>>
>> In the setns system call the nstype is 0 or specifies
>> an clone flag of the namespace you intend to change
>> to prevent changing a namespace unintentionally.
>
> I don't understand exactly what the nstype argument buys us - why would
> correct code ever need to specify a value other than 0? And reusing the
> CLONE_NEW* values in this interface is kind of ugly when setns is
> precisely _not_ creating new namespaces.
No but it is setting a new namespace. I do agree it is a bit ugly. But
the worst case at this point is I introduce a new set of beautiful
defines with the same values.
> Is there some fundamental reason it couldn't be
>
> int setns(int fd);
>
> or is there a use case I'm missing?
When someone else opens the file descriptor and passes it to us
and we don't completely trust them. Or equally when someone
else does the bind mount into the filesystem namespace and we
don't completely trust them.
Plus having a flags field is useful in general.
>> +SYSCALL_DEFINE2(setns, int, fd, int, nstype)
>> +{
>> + const struct proc_ns_operations *ops;
>> + struct task_struct *tsk = current;
>> + struct nsproxy *new_nsproxy;
>> + struct proc_inode *ei;
>> + struct file *file;
>> + int err;
>> +
>> + if (!capable(CAP_SYS_ADMIN))
>> + return -EPERM;
>> +
>> + file = proc_ns_fget(fd);
>> + if (IS_ERR(file))
>> + return PTR_ERR(file);
>> +
>> + err = -EINVAL;
>> + ei = PROC_I(file->f_dentry->d_inode);
>> + ops = ei->ns_ops;
>> + if (nstype && (ops->type != nstype))
>> + goto out;
>> +
>> + new_nsproxy = create_new_namespaces(0, tsk, tsk->fs);
>
> create_new_namespaces() can fail; shouldn't this be checked?
Yes. This was pointed out a little earlier and has been fixed
in my tree.
>> + err = ops->install(new_nsproxy, ei->ns);
>> + if (err) {
>> + free_nsproxy(new_nsproxy);
>> + goto out;
>> + }
>> + switch_task_namespaces(tsk, new_nsproxy);
>> +out:
>> + fput(file);
>> + return err;
>> +}
>> +
Eric
^ permalink raw reply [flat|nested] 70+ messages in thread
* [PATCH 6/7] net: Allow setting the network namespace by fd
2011-05-07 2:24 ` Eric W. Biederman
(?)
(?)
@ 2011-05-07 2:25 ` Eric W. Biederman
-1 siblings, 0 replies; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-07 2:25 UTC (permalink / raw)
To: linux-arch-u79uwXL29TY76Z2rM5mHXA
Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, Linux Containers,
Eric W. Biederman, linux-fsdevel-u79uwXL29TY76Z2rM5mHXA
Take advantage of the new abstraction and allow network devices
to be placed in any network namespace that we have a fd to talk
about.
Signed-off-by: Eric W. Biederman <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
include/linux/if_link.h | 1 +
include/net/net_namespace.h | 1 +
net/core/net_namespace.c | 33 +++++++++++++++++++++++++++++++--
net/core/rtnetlink.c | 5 ++++-
4 files changed, 37 insertions(+), 3 deletions(-)
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index f4a2e6b..0ee969a 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -136,6 +136,7 @@ enum {
IFLA_PORT_SELF,
IFLA_AF_SPEC,
IFLA_GROUP, /* Group the device belongs to */
+ IFLA_NET_NS_FD,
__IFLA_MAX
};
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 3ae4919..dcc8f57 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -119,6 +119,7 @@ static inline struct net *copy_net_ns(unsigned long flags, struct net *net_ns)
extern struct list_head net_namespace_list;
extern struct net *get_net_ns_by_pid(pid_t pid);
+extern struct net *get_net_ns_by_fd(int pid);
#ifdef CONFIG_NET_NS
extern void __put_net(struct net *net);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index bf7707e..b7403ff 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -8,6 +8,8 @@
#include <linux/idr.h>
#include <linux/rculist.h>
#include <linux/nsproxy.h>
+#include <linux/proc_fs.h>
+#include <linux/file.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -343,6 +345,28 @@ struct net *get_net_ns_by_pid(pid_t pid)
}
EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
+struct net *get_net_ns_by_fd(int fd)
+{
+ struct proc_inode *ei;
+ struct file *file;
+ struct net *net;
+
+ net = ERR_PTR(-EINVAL);
+ file = proc_ns_fget(fd);
+ if (!file)
+ goto out;
+
+ ei = PROC_I(file->f_dentry->d_inode);
+ if (ei->ns_ops != &netns_operations)
+ goto out;
+
+ net = get_net(ei->ns);
+out:
+ if (file)
+ fput(file);
+ return net;
+}
+
static int __init net_ns_init(void)
{
struct net_generic *ng;
@@ -577,10 +601,15 @@ EXPORT_SYMBOL_GPL(unregister_pernet_device);
#ifdef CONFIG_NET_NS
static void *netns_get(struct task_struct *task)
{
- struct net *net;
+ struct net *net = NULL;
+ struct nsproxy *nsproxy;
+
rcu_read_lock();
- net = get_net(task->nsproxy->net_ns);
+ nsproxy = task_nsproxy(task);
+ if (nsproxy)
+ net = get_net(nsproxy->net_ns);
rcu_read_unlock();
+
return net;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d7c4bb4..dca9602 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1043,6 +1043,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_LINKMODE] = { .type = NLA_U8 },
[IFLA_LINKINFO] = { .type = NLA_NESTED },
[IFLA_NET_NS_PID] = { .type = NLA_U32 },
+ [IFLA_NET_NS_FD] = { .type = NLA_U32 },
[IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 },
[IFLA_VFINFO_LIST] = {. type = NLA_NESTED },
[IFLA_VF_PORTS] = { .type = NLA_NESTED },
@@ -1091,6 +1092,8 @@ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
*/
if (tb[IFLA_NET_NS_PID])
net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
+ else if (tb[IFLA_NET_NS_FD])
+ net = get_net_ns_by_fd(nla_get_u32(tb[IFLA_NET_NS_FD]));
else
net = get_net(src_net);
return net;
@@ -1221,7 +1224,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
int send_addr_notify = 0;
int err;
- if (tb[IFLA_NET_NS_PID]) {
+ if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) {
struct net *net = rtnl_link_get_net(dev_net(dev), tb);
if (IS_ERR(net)) {
err = PTR_ERR(net);
--
1.6.5.2.143.g8cc62
^ permalink raw reply related [flat|nested] 70+ messages in thread
* [PATCH 6/7] net: Allow setting the network namespace by fd
@ 2011-05-07 2:25 ` Eric W. Biederman
0 siblings, 0 replies; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-07 2:25 UTC (permalink / raw)
To: linux-arch
Cc: linux-kernel, netdev, linux-fsdevel, jamal, Daniel Lezcano,
Linux Containers, Renato Westphal, Eric W. Biederman
Take advantage of the new abstraction and allow network devices
to be placed in any network namespace that we have a fd to talk
about.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
include/linux/if_link.h | 1 +
include/net/net_namespace.h | 1 +
net/core/net_namespace.c | 33 +++++++++++++++++++++++++++++++--
net/core/rtnetlink.c | 5 ++++-
4 files changed, 37 insertions(+), 3 deletions(-)
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index f4a2e6b..0ee969a 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -136,6 +136,7 @@ enum {
IFLA_PORT_SELF,
IFLA_AF_SPEC,
IFLA_GROUP, /* Group the device belongs to */
+ IFLA_NET_NS_FD,
__IFLA_MAX
};
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 3ae4919..dcc8f57 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -119,6 +119,7 @@ static inline struct net *copy_net_ns(unsigned long flags, struct net *net_ns)
extern struct list_head net_namespace_list;
extern struct net *get_net_ns_by_pid(pid_t pid);
+extern struct net *get_net_ns_by_fd(int pid);
#ifdef CONFIG_NET_NS
extern void __put_net(struct net *net);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index bf7707e..b7403ff 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -8,6 +8,8 @@
#include <linux/idr.h>
#include <linux/rculist.h>
#include <linux/nsproxy.h>
+#include <linux/proc_fs.h>
+#include <linux/file.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -343,6 +345,28 @@ struct net *get_net_ns_by_pid(pid_t pid)
}
EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
+struct net *get_net_ns_by_fd(int fd)
+{
+ struct proc_inode *ei;
+ struct file *file;
+ struct net *net;
+
+ net = ERR_PTR(-EINVAL);
+ file = proc_ns_fget(fd);
+ if (!file)
+ goto out;
+
+ ei = PROC_I(file->f_dentry->d_inode);
+ if (ei->ns_ops != &netns_operations)
+ goto out;
+
+ net = get_net(ei->ns);
+out:
+ if (file)
+ fput(file);
+ return net;
+}
+
static int __init net_ns_init(void)
{
struct net_generic *ng;
@@ -577,10 +601,15 @@ EXPORT_SYMBOL_GPL(unregister_pernet_device);
#ifdef CONFIG_NET_NS
static void *netns_get(struct task_struct *task)
{
- struct net *net;
+ struct net *net = NULL;
+ struct nsproxy *nsproxy;
+
rcu_read_lock();
- net = get_net(task->nsproxy->net_ns);
+ nsproxy = task_nsproxy(task);
+ if (nsproxy)
+ net = get_net(nsproxy->net_ns);
rcu_read_unlock();
+
return net;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d7c4bb4..dca9602 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1043,6 +1043,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_LINKMODE] = { .type = NLA_U8 },
[IFLA_LINKINFO] = { .type = NLA_NESTED },
[IFLA_NET_NS_PID] = { .type = NLA_U32 },
+ [IFLA_NET_NS_FD] = { .type = NLA_U32 },
[IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 },
[IFLA_VFINFO_LIST] = {. type = NLA_NESTED },
[IFLA_VF_PORTS] = { .type = NLA_NESTED },
@@ -1091,6 +1092,8 @@ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
*/
if (tb[IFLA_NET_NS_PID])
net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
+ else if (tb[IFLA_NET_NS_FD])
+ net = get_net_ns_by_fd(nla_get_u32(tb[IFLA_NET_NS_FD]));
else
net = get_net(src_net);
return net;
@@ -1221,7 +1224,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
int send_addr_notify = 0;
int err;
- if (tb[IFLA_NET_NS_PID]) {
+ if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) {
struct net *net = rtnl_link_get_net(dev_net(dev), tb);
if (IS_ERR(net)) {
err = PTR_ERR(net);
--
1.6.5.2.143.g8cc62
^ permalink raw reply related [flat|nested] 70+ messages in thread
* [PATCH 6/7] net: Allow setting the network namespace by fd
@ 2011-05-07 2:25 ` Eric W. Biederman
0 siblings, 0 replies; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-07 2:25 UTC (permalink / raw)
To: linux-arch-u79uwXL29TY76Z2rM5mHXA
Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, Linux Containers,
Eric W. Biederman, linux-fsdevel-u79uwXL29TY76Z2rM5mHXA
Take advantage of the new abstraction and allow network devices
to be placed in any network namespace that we have a fd to talk
about.
Signed-off-by: Eric W. Biederman <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
include/linux/if_link.h | 1 +
include/net/net_namespace.h | 1 +
net/core/net_namespace.c | 33 +++++++++++++++++++++++++++++++--
net/core/rtnetlink.c | 5 ++++-
4 files changed, 37 insertions(+), 3 deletions(-)
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index f4a2e6b..0ee969a 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -136,6 +136,7 @@ enum {
IFLA_PORT_SELF,
IFLA_AF_SPEC,
IFLA_GROUP, /* Group the device belongs to */
+ IFLA_NET_NS_FD,
__IFLA_MAX
};
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 3ae4919..dcc8f57 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -119,6 +119,7 @@ static inline struct net *copy_net_ns(unsigned long flags, struct net *net_ns)
extern struct list_head net_namespace_list;
extern struct net *get_net_ns_by_pid(pid_t pid);
+extern struct net *get_net_ns_by_fd(int pid);
#ifdef CONFIG_NET_NS
extern void __put_net(struct net *net);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index bf7707e..b7403ff 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -8,6 +8,8 @@
#include <linux/idr.h>
#include <linux/rculist.h>
#include <linux/nsproxy.h>
+#include <linux/proc_fs.h>
+#include <linux/file.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -343,6 +345,28 @@ struct net *get_net_ns_by_pid(pid_t pid)
}
EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
+struct net *get_net_ns_by_fd(int fd)
+{
+ struct proc_inode *ei;
+ struct file *file;
+ struct net *net;
+
+ net = ERR_PTR(-EINVAL);
+ file = proc_ns_fget(fd);
+ if (!file)
+ goto out;
+
+ ei = PROC_I(file->f_dentry->d_inode);
+ if (ei->ns_ops != &netns_operations)
+ goto out;
+
+ net = get_net(ei->ns);
+out:
+ if (file)
+ fput(file);
+ return net;
+}
+
static int __init net_ns_init(void)
{
struct net_generic *ng;
@@ -577,10 +601,15 @@ EXPORT_SYMBOL_GPL(unregister_pernet_device);
#ifdef CONFIG_NET_NS
static void *netns_get(struct task_struct *task)
{
- struct net *net;
+ struct net *net = NULL;
+ struct nsproxy *nsproxy;
+
rcu_read_lock();
- net = get_net(task->nsproxy->net_ns);
+ nsproxy = task_nsproxy(task);
+ if (nsproxy)
+ net = get_net(nsproxy->net_ns);
rcu_read_unlock();
+
return net;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d7c4bb4..dca9602 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1043,6 +1043,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_LINKMODE] = { .type = NLA_U8 },
[IFLA_LINKINFO] = { .type = NLA_NESTED },
[IFLA_NET_NS_PID] = { .type = NLA_U32 },
+ [IFLA_NET_NS_FD] = { .type = NLA_U32 },
[IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 },
[IFLA_VFINFO_LIST] = {. type = NLA_NESTED },
[IFLA_VF_PORTS] = { .type = NLA_NESTED },
@@ -1091,6 +1092,8 @@ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
*/
if (tb[IFLA_NET_NS_PID])
net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
+ else if (tb[IFLA_NET_NS_FD])
+ net = get_net_ns_by_fd(nla_get_u32(tb[IFLA_NET_NS_FD]));
else
net = get_net(src_net);
return net;
@@ -1221,7 +1224,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
int send_addr_notify = 0;
int err;
- if (tb[IFLA_NET_NS_PID]) {
+ if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) {
struct net *net = rtnl_link_get_net(dev_net(dev), tb);
if (IS_ERR(net)) {
err = PTR_ERR(net);
--
1.6.5.2.143.g8cc62
^ permalink raw reply related [flat|nested] 70+ messages in thread
* [PATCH 6/7] net: Allow setting the network namespace by fd
@ 2011-05-07 2:25 ` Eric W. Biederman
0 siblings, 0 replies; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-07 2:25 UTC (permalink / raw)
To: linux-arch
Cc: linux-kernel, netdev, linux-fsdevel, jamal, Daniel Lezcano,
Linux Containers, Renato Westphal, Eric W. Biederman
Take advantage of the new abstraction and allow network devices
to be placed in any network namespace that we have a fd to talk
about.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
include/linux/if_link.h | 1 +
include/net/net_namespace.h | 1 +
net/core/net_namespace.c | 33 +++++++++++++++++++++++++++++++--
net/core/rtnetlink.c | 5 ++++-
4 files changed, 37 insertions(+), 3 deletions(-)
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index f4a2e6b..0ee969a 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -136,6 +136,7 @@ enum {
IFLA_PORT_SELF,
IFLA_AF_SPEC,
IFLA_GROUP, /* Group the device belongs to */
+ IFLA_NET_NS_FD,
__IFLA_MAX
};
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 3ae4919..dcc8f57 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -119,6 +119,7 @@ static inline struct net *copy_net_ns(unsigned long flags, struct net *net_ns)
extern struct list_head net_namespace_list;
extern struct net *get_net_ns_by_pid(pid_t pid);
+extern struct net *get_net_ns_by_fd(int pid);
#ifdef CONFIG_NET_NS
extern void __put_net(struct net *net);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index bf7707e..b7403ff 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -8,6 +8,8 @@
#include <linux/idr.h>
#include <linux/rculist.h>
#include <linux/nsproxy.h>
+#include <linux/proc_fs.h>
+#include <linux/file.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -343,6 +345,28 @@ struct net *get_net_ns_by_pid(pid_t pid)
}
EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
+struct net *get_net_ns_by_fd(int fd)
+{
+ struct proc_inode *ei;
+ struct file *file;
+ struct net *net;
+
+ net = ERR_PTR(-EINVAL);
+ file = proc_ns_fget(fd);
+ if (!file)
+ goto out;
+
+ ei = PROC_I(file->f_dentry->d_inode);
+ if (ei->ns_ops != &netns_operations)
+ goto out;
+
+ net = get_net(ei->ns);
+out:
+ if (file)
+ fput(file);
+ return net;
+}
+
static int __init net_ns_init(void)
{
struct net_generic *ng;
@@ -577,10 +601,15 @@ EXPORT_SYMBOL_GPL(unregister_pernet_device);
#ifdef CONFIG_NET_NS
static void *netns_get(struct task_struct *task)
{
- struct net *net;
+ struct net *net = NULL;
+ struct nsproxy *nsproxy;
+
rcu_read_lock();
- net = get_net(task->nsproxy->net_ns);
+ nsproxy = task_nsproxy(task);
+ if (nsproxy)
+ net = get_net(nsproxy->net_ns);
rcu_read_unlock();
+
return net;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d7c4bb4..dca9602 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1043,6 +1043,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_LINKMODE] = { .type = NLA_U8 },
[IFLA_LINKINFO] = { .type = NLA_NESTED },
[IFLA_NET_NS_PID] = { .type = NLA_U32 },
+ [IFLA_NET_NS_FD] = { .type = NLA_U32 },
[IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 },
[IFLA_VFINFO_LIST] = {. type = NLA_NESTED },
[IFLA_VF_PORTS] = { .type = NLA_NESTED },
@@ -1091,6 +1092,8 @@ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
*/
if (tb[IFLA_NET_NS_PID])
net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
+ else if (tb[IFLA_NET_NS_FD])
+ net = get_net_ns_by_fd(nla_get_u32(tb[IFLA_NET_NS_FD]));
else
net = get_net(src_net);
return net;
@@ -1221,7 +1224,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
int send_addr_notify = 0;
int err;
- if (tb[IFLA_NET_NS_PID]) {
+ if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) {
struct net *net = rtnl_link_get_net(dev_net(dev), tb);
if (IS_ERR(net)) {
err = PTR_ERR(net);
--
1.6.5.2.143.g8cc62
^ permalink raw reply related [flat|nested] 70+ messages in thread
* Re: [PATCH 6/7] net: Allow setting the network namespace by fd
2011-05-07 2:25 ` Eric W. Biederman
` (2 preceding siblings ...)
(?)
@ 2011-05-07 22:46 ` Daniel Lezcano
-1 siblings, 0 replies; 70+ messages in thread
From: Daniel Lezcano @ 2011-05-07 22:46 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-arch, linux-kernel, netdev, linux-fsdevel, jamal,
Linux Containers, Renato Westphal
On 05/07/2011 04:25 AM, Eric W. Biederman wrote:
> Take advantage of the new abstraction and allow network devices
> to be placed in any network namespace that we have a fd to talk
> about.
>
> Signed-off-by: Eric W. Biederman<ebiederm@xmission.com>
> ---
Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>
^ permalink raw reply [flat|nested] 70+ messages in thread
* [PATCH 3/7] ns proc: Add support for the network namespace.
2011-05-07 2:24 ` Eric W. Biederman
@ 2011-05-07 2:24 ` Eric W. Biederman
-1 siblings, 0 replies; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-07 2:24 UTC (permalink / raw)
To: linux-arch
Cc: linux-kernel, netdev, linux-fsdevel, jamal, Daniel Lezcano,
Linux Containers, Renato Westphal, Eric W. Biederman
Implementing file descriptors for the network namespace
is simple and straight forward.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/proc/namespaces.c | 3 +++
include/linux/proc_fs.h | 1 +
net/core/net_namespace.c | 31 +++++++++++++++++++++++++++++++
3 files changed, 35 insertions(+), 0 deletions(-)
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 6ae9f07..dcbd483 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -16,6 +16,9 @@
static const struct proc_ns_operations *ns_entries[] = {
+#ifdef CONFIG_NET_NS
+ &netns_operations,
+#endif
};
static const struct file_operations ns_file_operations = {
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index a6d2c6d..62126ec 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -265,6 +265,7 @@ struct proc_ns_operations {
void (*put)(void *ns);
int (*install)(struct nsproxy *nsproxy, void *ns);
};
+extern const struct proc_ns_operations netns_operations;
union proc_op {
int (*proc_get_link)(struct inode *, struct path *);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 3f86026..bf7707e 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -573,3 +573,34 @@ void unregister_pernet_device(struct pernet_operations *ops)
mutex_unlock(&net_mutex);
}
EXPORT_SYMBOL_GPL(unregister_pernet_device);
+
+#ifdef CONFIG_NET_NS
+static void *netns_get(struct task_struct *task)
+{
+ struct net *net;
+ rcu_read_lock();
+ net = get_net(task->nsproxy->net_ns);
+ rcu_read_unlock();
+ return net;
+}
+
+static void netns_put(void *ns)
+{
+ put_net(ns);
+}
+
+static int netns_install(struct nsproxy *nsproxy, void *ns)
+{
+ put_net(nsproxy->net_ns);
+ nsproxy->net_ns = get_net(ns);
+ return 0;
+}
+
+const struct proc_ns_operations netns_operations = {
+ .name = "net",
+ .type = CLONE_NEWNET,
+ .get = netns_get,
+ .put = netns_put,
+ .install = netns_install,
+};
+#endif
--
1.6.5.2.143.g8cc62
^ permalink raw reply related [flat|nested] 70+ messages in thread
* [PATCH 3/7] ns proc: Add support for the network namespace.
@ 2011-05-07 2:24 ` Eric W. Biederman
0 siblings, 0 replies; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-07 2:24 UTC (permalink / raw)
To: linux-arch
Cc: linux-kernel, netdev, linux-fsdevel, jamal, Daniel Lezcano,
Linux Containers, Renato Westphal, Eric W. Biederman
Implementing file descriptors for the network namespace
is simple and straight forward.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/proc/namespaces.c | 3 +++
include/linux/proc_fs.h | 1 +
net/core/net_namespace.c | 31 +++++++++++++++++++++++++++++++
3 files changed, 35 insertions(+), 0 deletions(-)
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 6ae9f07..dcbd483 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -16,6 +16,9 @@
static const struct proc_ns_operations *ns_entries[] = {
+#ifdef CONFIG_NET_NS
+ &netns_operations,
+#endif
};
static const struct file_operations ns_file_operations = {
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index a6d2c6d..62126ec 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -265,6 +265,7 @@ struct proc_ns_operations {
void (*put)(void *ns);
int (*install)(struct nsproxy *nsproxy, void *ns);
};
+extern const struct proc_ns_operations netns_operations;
union proc_op {
int (*proc_get_link)(struct inode *, struct path *);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 3f86026..bf7707e 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -573,3 +573,34 @@ void unregister_pernet_device(struct pernet_operations *ops)
mutex_unlock(&net_mutex);
}
EXPORT_SYMBOL_GPL(unregister_pernet_device);
+
+#ifdef CONFIG_NET_NS
+static void *netns_get(struct task_struct *task)
+{
+ struct net *net;
+ rcu_read_lock();
+ net = get_net(task->nsproxy->net_ns);
+ rcu_read_unlock();
+ return net;
+}
+
+static void netns_put(void *ns)
+{
+ put_net(ns);
+}
+
+static int netns_install(struct nsproxy *nsproxy, void *ns)
+{
+ put_net(nsproxy->net_ns);
+ nsproxy->net_ns = get_net(ns);
+ return 0;
+}
+
+const struct proc_ns_operations netns_operations = {
+ .name = "net",
+ .type = CLONE_NEWNET,
+ .get = netns_get,
+ .put = netns_put,
+ .install = netns_install,
+};
+#endif
--
1.6.5.2.143.g8cc62
^ permalink raw reply related [flat|nested] 70+ messages in thread
* Re: [PATCH 3/7] ns proc: Add support for the network namespace.
2011-05-07 2:24 ` Eric W. Biederman
(?)
@ 2011-05-07 22:41 ` Daniel Lezcano
-1 siblings, 0 replies; 70+ messages in thread
From: Daniel Lezcano @ 2011-05-07 22:41 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-arch, linux-kernel, netdev, linux-fsdevel, jamal,
Linux Containers, Renato Westphal
On 05/07/2011 04:24 AM, Eric W. Biederman wrote:
> Implementing file descriptors for the network namespace
> is simple and straight forward.
>
> Signed-off-by: Eric W. Biederman<ebiederm@xmission.com>
> ---
Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>
^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH 3/7] ns proc: Add support for the network namespace.
2011-05-07 2:24 ` Eric W. Biederman
(?)
(?)
@ 2011-05-11 19:21 ` Nathan Lynch
2011-05-11 21:34 ` Eric W. Biederman
-1 siblings, 1 reply; 70+ messages in thread
From: Nathan Lynch @ 2011-05-11 19:21 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-arch, linux-kernel, netdev, linux-fsdevel, jamal,
Daniel Lezcano, Linux Containers, Renato Westphal
On Fri, 2011-05-06 at 19:24 -0700, Eric W. Biederman wrote:
> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
> index 3f86026..bf7707e 100644
> --- a/net/core/net_namespace.c
> +++ b/net/core/net_namespace.c
> @@ -573,3 +573,34 @@ void unregister_pernet_device(struct pernet_operations *ops)
> mutex_unlock(&net_mutex);
> }
> EXPORT_SYMBOL_GPL(unregister_pernet_device);
> +
> +#ifdef CONFIG_NET_NS
> +static void *netns_get(struct task_struct *task)
> +{
> + struct net *net;
> + rcu_read_lock();
> + net = get_net(task->nsproxy->net_ns);
This should use task_nsproxy() and check the result before grabbing the
net_ns, but I think you fix that in a later patch.
Regardless, it looks as if all the proc_ns_ops->get() implementations
really just want the nsproxy, so maybe the get() methods should take
that instead of the task_struct, and proc_ns_instantiate() should do
something like:
struct nsproxy *nsproxy;
...
ei->ns_ops = ns_ops;
error = -ESRCH;
rcu_read_lock();
nsproxy = task_nsproxy(task);
rcu_read_unlock();
if (!nsproxy)
got out;
ei->ns = ns_ops->get(nsproxy);
So then the zombie check is consolidated in one place instead of having
to do it in every get() method.
> + rcu_read_unlock();
> + return net;
> +}
> +
> +static void netns_put(void *ns)
> +{
> + put_net(ns);
> +}
> +
> +static int netns_install(struct nsproxy *nsproxy, void *ns)
> +{
> + put_net(nsproxy->net_ns);
> + nsproxy->net_ns = get_net(ns);
> + return 0;
> +}
This introduces a window where, potentially, nsproxy->net_ns is stale
before it is updated with the namespace which is being attached, no?
(Same concern applies to other install methods in the patch set). It
seems possible to oops the kernel in this window by looking up
/proc/$PID/ns/net while $PID is in the midst of setns().
^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH 3/7] ns proc: Add support for the network namespace.
2011-05-11 19:21 ` Nathan Lynch
@ 2011-05-11 21:34 ` Eric W. Biederman
2011-05-11 21:42 ` Nathan Lynch
0 siblings, 1 reply; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-11 21:34 UTC (permalink / raw)
To: Nathan Lynch
Cc: linux-arch, linux-kernel, netdev, linux-fsdevel, jamal,
Daniel Lezcano, Linux Containers, Renato Westphal
Nathan Lynch <ntl@pobox.com> writes:
> On Fri, 2011-05-06 at 19:24 -0700, Eric W. Biederman wrote:
>> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
>> index 3f86026..bf7707e 100644
>> --- a/net/core/net_namespace.c
>> +++ b/net/core/net_namespace.c
>> @@ -573,3 +573,34 @@ void unregister_pernet_device(struct pernet_operations *ops)
>> mutex_unlock(&net_mutex);
>> }
>> EXPORT_SYMBOL_GPL(unregister_pernet_device);
>> +
>> +#ifdef CONFIG_NET_NS
>> +static void *netns_get(struct task_struct *task)
>> +{
>> + struct net *net;
>> + rcu_read_lock();
>> + net = get_net(task->nsproxy->net_ns);
>
> This should use task_nsproxy() and check the result before grabbing the
> net_ns, but I think you fix that in a later patch.
>
> Regardless, it looks as if all the proc_ns_ops->get() implementations
> really just want the nsproxy, so maybe the get() methods should take
> that instead of the task_struct, and proc_ns_instantiate() should do
> something like:
>
> struct nsproxy *nsproxy;
> ...
>
> ei->ns_ops = ns_ops;
> error = -ESRCH;
> rcu_read_lock();
> nsproxy = task_nsproxy(task);
> rcu_read_unlock();
> if (!nsproxy)
> got out;
> ei->ns = ns_ops->get(nsproxy);
>
>
> So then the zombie check is consolidated in one place instead of having
> to do it in every get() method.
For the pid namespace at least I want the task not the nsproxy,
so I can use task_active_pid_namespace().
I admit that is a little asymmetrical with the install, but at
least until the details of getting the pid namespace working in
this context are worked out I don't want to reconsider the
current design.
There is also the user namespace that does not even exist in
nsproxy to consider. I will worry about that namespace when
it happens.
Ultimately nsproxy is an space/time optimization that not all
namespaces use so forcing it in the design is probably not
what we want.
>> + rcu_read_unlock();
>> + return net;
>> +}
>> +
>> +static void netns_put(void *ns)
>> +{
>> + put_net(ns);
>> +}
>> +
>> +static int netns_install(struct nsproxy *nsproxy, void *ns)
>> +{
>> + put_net(nsproxy->net_ns);
>> + nsproxy->net_ns = get_net(ns);
>> + return 0;
>> +}
>
> This introduces a window where, potentially, nsproxy->net_ns is stale
> before it is updated with the namespace which is being attached, no?
> (Same concern applies to other install methods in the patch set). It
> seems possible to oops the kernel in this window by looking up
> /proc/$PID/ns/net while $PID is in the midst of setns().
Except the nsproxy being referred to is a brand new nsproxy, with an
extra reference count on every namespace. current->nsproxy still
contains the reference counts of the current process.
Eric
^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH 3/7] ns proc: Add support for the network namespace.
2011-05-11 21:34 ` Eric W. Biederman
@ 2011-05-11 21:42 ` Nathan Lynch
0 siblings, 0 replies; 70+ messages in thread
From: Nathan Lynch @ 2011-05-11 21:42 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-arch, linux-kernel, netdev, linux-fsdevel, jamal,
Daniel Lezcano, Linux Containers, Renato Westphal
On Wed, 2011-05-11 at 14:34 -0700, Eric W. Biederman wrote:
> Nathan Lynch <ntl@pobox.com> writes:
>
> > On Fri, 2011-05-06 at 19:24 -0700, Eric W. Biederman wrote:
> >> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
> >> index 3f86026..bf7707e 100644
> >> --- a/net/core/net_namespace.c
> >> +++ b/net/core/net_namespace.c
> >> @@ -573,3 +573,34 @@ void unregister_pernet_device(struct pernet_operations *ops)
> >> mutex_unlock(&net_mutex);
> >> }
> >> EXPORT_SYMBOL_GPL(unregister_pernet_device);
> >> +
> >> +#ifdef CONFIG_NET_NS
> >> +static void *netns_get(struct task_struct *task)
> >> +{
> >> + struct net *net;
> >> + rcu_read_lock();
> >> + net = get_net(task->nsproxy->net_ns);
> >
> > This should use task_nsproxy() and check the result before grabbing the
> > net_ns, but I think you fix that in a later patch.
> >
> > Regardless, it looks as if all the proc_ns_ops->get() implementations
> > really just want the nsproxy, so maybe the get() methods should take
> > that instead of the task_struct, and proc_ns_instantiate() should do
> > something like:
> >
> > struct nsproxy *nsproxy;
> > ...
> >
> > ei->ns_ops = ns_ops;
> > error = -ESRCH;
> > rcu_read_lock();
> > nsproxy = task_nsproxy(task);
> > rcu_read_unlock();
> > if (!nsproxy)
> > got out;
> > ei->ns = ns_ops->get(nsproxy);
> >
> >
> > So then the zombie check is consolidated in one place instead of having
> > to do it in every get() method.
>
> For the pid namespace at least I want the task not the nsproxy,
> so I can use task_active_pid_namespace().
>
> I admit that is a little asymmetrical with the install, but at
> least until the details of getting the pid namespace working in
> this context are worked out I don't want to reconsider the
> current design.
>
> There is also the user namespace that does not even exist in
> nsproxy to consider. I will worry about that namespace when
> it happens.
>
> Ultimately nsproxy is an space/time optimization that not all
> namespaces use so forcing it in the design is probably not
> what we want.
Okay.
> >> + rcu_read_unlock();
> >> + return net;
> >> +}
> >> +
> >> +static void netns_put(void *ns)
> >> +{
> >> + put_net(ns);
> >> +}
> >> +
> >> +static int netns_install(struct nsproxy *nsproxy, void *ns)
> >> +{
> >> + put_net(nsproxy->net_ns);
> >> + nsproxy->net_ns = get_net(ns);
> >> + return 0;
> >> +}
> >
> > This introduces a window where, potentially, nsproxy->net_ns is stale
> > before it is updated with the namespace which is being attached, no?
> > (Same concern applies to other install methods in the patch set). It
> > seems possible to oops the kernel in this window by looking up
> > /proc/$PID/ns/net while $PID is in the midst of setns().
>
> Except the nsproxy being referred to is a brand new nsproxy, with an
> extra reference count on every namespace. current->nsproxy still
> contains the reference counts of the current process.
Ahh, yeah. Got it. Thanks.
^ permalink raw reply [flat|nested] 70+ messages in thread
* [PATCH 4/7] ns proc: Add support for the uts namespace
2011-05-07 2:24 ` Eric W. Biederman
@ 2011-05-07 2:24 ` Eric W. Biederman
-1 siblings, 0 replies; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-07 2:24 UTC (permalink / raw)
To: linux-arch
Cc: linux-kernel, netdev, linux-fsdevel, jamal, Daniel Lezcano,
Linux Containers, Renato Westphal, Eric W. Biederman
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/proc/namespaces.c | 3 +++
include/linux/proc_fs.h | 1 +
kernel/utsname.c | 39 +++++++++++++++++++++++++++++++++++++++
3 files changed, 43 insertions(+), 0 deletions(-)
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index dcbd483..b017181 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -19,6 +19,9 @@ static const struct proc_ns_operations *ns_entries[] = {
#ifdef CONFIG_NET_NS
&netns_operations,
#endif
+#ifdef CONFIG_UTS_NS
+ &utsns_operations,
+#endif
};
static const struct file_operations ns_file_operations = {
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 62126ec..52aa89d 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -266,6 +266,7 @@ struct proc_ns_operations {
int (*install)(struct nsproxy *nsproxy, void *ns);
};
extern const struct proc_ns_operations netns_operations;
+extern const struct proc_ns_operations utsns_operations;
union proc_op {
int (*proc_get_link)(struct inode *, struct path *);
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 4464617..bff131b 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -15,6 +15,7 @@
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/user_namespace.h>
+#include <linux/proc_fs.h>
static struct uts_namespace *create_uts_ns(void)
{
@@ -79,3 +80,41 @@ void free_uts_ns(struct kref *kref)
put_user_ns(ns->user_ns);
kfree(ns);
}
+
+static void *utsns_get(struct task_struct *task)
+{
+ struct uts_namespace *ns = NULL;
+ struct nsproxy *nsproxy;
+
+ rcu_read_lock();
+ nsproxy = task_nsproxy(task);
+ if (nsproxy) {
+ ns = nsproxy->uts_ns;
+ get_uts_ns(ns);
+ }
+ rcu_read_unlock();
+
+ return ns;
+}
+
+static void utsns_put(void *ns)
+{
+ put_uts_ns(ns);
+}
+
+static int utsns_install(struct nsproxy *nsproxy, void *ns)
+{
+ get_uts_ns(ns);
+ put_uts_ns(nsproxy->uts_ns);
+ nsproxy->uts_ns = ns;
+ return 0;
+}
+
+const struct proc_ns_operations utsns_operations = {
+ .name = "uts",
+ .type = CLONE_NEWUTS,
+ .get = utsns_get,
+ .put = utsns_put,
+ .install = utsns_install,
+};
+
--
1.6.5.2.143.g8cc62
^ permalink raw reply related [flat|nested] 70+ messages in thread
* [PATCH 4/7] ns proc: Add support for the uts namespace
@ 2011-05-07 2:24 ` Eric W. Biederman
0 siblings, 0 replies; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-07 2:24 UTC (permalink / raw)
To: linux-arch
Cc: linux-kernel, netdev, linux-fsdevel, jamal, Daniel Lezcano,
Linux Containers, Renato Westphal, Eric W. Biederman
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/proc/namespaces.c | 3 +++
include/linux/proc_fs.h | 1 +
kernel/utsname.c | 39 +++++++++++++++++++++++++++++++++++++++
3 files changed, 43 insertions(+), 0 deletions(-)
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index dcbd483..b017181 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -19,6 +19,9 @@ static const struct proc_ns_operations *ns_entries[] = {
#ifdef CONFIG_NET_NS
&netns_operations,
#endif
+#ifdef CONFIG_UTS_NS
+ &utsns_operations,
+#endif
};
static const struct file_operations ns_file_operations = {
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 62126ec..52aa89d 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -266,6 +266,7 @@ struct proc_ns_operations {
int (*install)(struct nsproxy *nsproxy, void *ns);
};
extern const struct proc_ns_operations netns_operations;
+extern const struct proc_ns_operations utsns_operations;
union proc_op {
int (*proc_get_link)(struct inode *, struct path *);
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 4464617..bff131b 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -15,6 +15,7 @@
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/user_namespace.h>
+#include <linux/proc_fs.h>
static struct uts_namespace *create_uts_ns(void)
{
@@ -79,3 +80,41 @@ void free_uts_ns(struct kref *kref)
put_user_ns(ns->user_ns);
kfree(ns);
}
+
+static void *utsns_get(struct task_struct *task)
+{
+ struct uts_namespace *ns = NULL;
+ struct nsproxy *nsproxy;
+
+ rcu_read_lock();
+ nsproxy = task_nsproxy(task);
+ if (nsproxy) {
+ ns = nsproxy->uts_ns;
+ get_uts_ns(ns);
+ }
+ rcu_read_unlock();
+
+ return ns;
+}
+
+static void utsns_put(void *ns)
+{
+ put_uts_ns(ns);
+}
+
+static int utsns_install(struct nsproxy *nsproxy, void *ns)
+{
+ get_uts_ns(ns);
+ put_uts_ns(nsproxy->uts_ns);
+ nsproxy->uts_ns = ns;
+ return 0;
+}
+
+const struct proc_ns_operations utsns_operations = {
+ .name = "uts",
+ .type = CLONE_NEWUTS,
+ .get = utsns_get,
+ .put = utsns_put,
+ .install = utsns_install,
+};
+
--
1.6.5.2.143.g8cc62
^ permalink raw reply related [flat|nested] 70+ messages in thread
* Re: [PATCH 4/7] ns proc: Add support for the uts namespace
2011-05-07 2:24 ` Eric W. Biederman
(?)
@ 2011-05-07 22:42 ` Daniel Lezcano
-1 siblings, 0 replies; 70+ messages in thread
From: Daniel Lezcano @ 2011-05-07 22:42 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-arch, linux-kernel, netdev, linux-fsdevel, jamal,
Linux Containers, Renato Westphal
On 05/07/2011 04:24 AM, Eric W. Biederman wrote:
> Signed-off-by: Eric W. Biederman<ebiederm@xmission.com>
> ---
Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>
^ permalink raw reply [flat|nested] 70+ messages in thread
* [PATCH 5/7] ns proc: Add support for the ipc namespace
2011-05-07 2:24 ` Eric W. Biederman
@ 2011-05-07 2:24 ` Eric W. Biederman
-1 siblings, 0 replies; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-07 2:24 UTC (permalink / raw)
To: linux-arch
Cc: linux-kernel, netdev, linux-fsdevel, jamal, Daniel Lezcano,
Linux Containers, Renato Westphal, Eric W. Biederman
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/proc/namespaces.c | 3 +++
include/linux/proc_fs.h | 1 +
ipc/namespace.c | 37 +++++++++++++++++++++++++++++++++++++
3 files changed, 41 insertions(+), 0 deletions(-)
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index b017181..f18d6d5 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -22,6 +22,9 @@ static const struct proc_ns_operations *ns_entries[] = {
#ifdef CONFIG_UTS_NS
&utsns_operations,
#endif
+#ifdef CONFIG_IPC_NS
+ &ipcns_operations,
+#endif
};
static const struct file_operations ns_file_operations = {
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 52aa89d..a23f0b7 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -267,6 +267,7 @@ struct proc_ns_operations {
};
extern const struct proc_ns_operations netns_operations;
extern const struct proc_ns_operations utsns_operations;
+extern const struct proc_ns_operations ipcns_operations;
union proc_op {
int (*proc_get_link)(struct inode *, struct path *);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 8054c8e..ce0a647 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -12,6 +12,7 @@
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/user_namespace.h>
+#include <linux/proc_fs.h>
#include "util.h"
@@ -140,3 +141,39 @@ void put_ipc_ns(struct ipc_namespace *ns)
free_ipc_ns(ns);
}
}
+
+static void *ipcns_get(struct task_struct *task)
+{
+ struct ipc_namespace *ns = NULL;
+ struct nsproxy *nsproxy;
+
+ rcu_read_lock();
+ nsproxy = task_nsproxy(task);
+ if (nsproxy)
+ ns = get_ipc_ns(nsproxy->ipc_ns);
+ rcu_read_unlock();
+
+ return ns;
+}
+
+static void ipcns_put(void *ns)
+{
+ return put_ipc_ns(ns);
+}
+
+static int ipcns_install(struct nsproxy *nsproxy, void *ns)
+{
+ /* Ditch state from the old ipc namespace */
+ exit_sem(current);
+ put_ipc_ns(nsproxy->ipc_ns);
+ nsproxy->ipc_ns = get_ipc_ns(ns);
+ return 0;
+}
+
+const struct proc_ns_operations ipcns_operations = {
+ .name = "ipc",
+ .type = CLONE_NEWIPC,
+ .get = ipcns_get,
+ .put = ipcns_put,
+ .install = ipcns_install,
+};
--
1.6.5.2.143.g8cc62
^ permalink raw reply related [flat|nested] 70+ messages in thread
* [PATCH 5/7] ns proc: Add support for the ipc namespace
@ 2011-05-07 2:24 ` Eric W. Biederman
0 siblings, 0 replies; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-07 2:24 UTC (permalink / raw)
To: linux-arch
Cc: linux-kernel, netdev, linux-fsdevel, jamal, Daniel Lezcano,
Linux Containers, Renato Westphal, Eric W. Biederman
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/proc/namespaces.c | 3 +++
include/linux/proc_fs.h | 1 +
ipc/namespace.c | 37 +++++++++++++++++++++++++++++++++++++
3 files changed, 41 insertions(+), 0 deletions(-)
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index b017181..f18d6d5 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -22,6 +22,9 @@ static const struct proc_ns_operations *ns_entries[] = {
#ifdef CONFIG_UTS_NS
&utsns_operations,
#endif
+#ifdef CONFIG_IPC_NS
+ &ipcns_operations,
+#endif
};
static const struct file_operations ns_file_operations = {
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 52aa89d..a23f0b7 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -267,6 +267,7 @@ struct proc_ns_operations {
};
extern const struct proc_ns_operations netns_operations;
extern const struct proc_ns_operations utsns_operations;
+extern const struct proc_ns_operations ipcns_operations;
union proc_op {
int (*proc_get_link)(struct inode *, struct path *);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 8054c8e..ce0a647 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -12,6 +12,7 @@
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/user_namespace.h>
+#include <linux/proc_fs.h>
#include "util.h"
@@ -140,3 +141,39 @@ void put_ipc_ns(struct ipc_namespace *ns)
free_ipc_ns(ns);
}
}
+
+static void *ipcns_get(struct task_struct *task)
+{
+ struct ipc_namespace *ns = NULL;
+ struct nsproxy *nsproxy;
+
+ rcu_read_lock();
+ nsproxy = task_nsproxy(task);
+ if (nsproxy)
+ ns = get_ipc_ns(nsproxy->ipc_ns);
+ rcu_read_unlock();
+
+ return ns;
+}
+
+static void ipcns_put(void *ns)
+{
+ return put_ipc_ns(ns);
+}
+
+static int ipcns_install(struct nsproxy *nsproxy, void *ns)
+{
+ /* Ditch state from the old ipc namespace */
+ exit_sem(current);
+ put_ipc_ns(nsproxy->ipc_ns);
+ nsproxy->ipc_ns = get_ipc_ns(ns);
+ return 0;
+}
+
+const struct proc_ns_operations ipcns_operations = {
+ .name = "ipc",
+ .type = CLONE_NEWIPC,
+ .get = ipcns_get,
+ .put = ipcns_put,
+ .install = ipcns_install,
+};
--
1.6.5.2.143.g8cc62
^ permalink raw reply related [flat|nested] 70+ messages in thread
* Re: [PATCH 5/7] ns proc: Add support for the ipc namespace
2011-05-07 2:24 ` Eric W. Biederman
(?)
@ 2011-05-07 22:44 ` Daniel Lezcano
-1 siblings, 0 replies; 70+ messages in thread
From: Daniel Lezcano @ 2011-05-07 22:44 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-arch, linux-kernel, netdev, linux-fsdevel, jamal,
Linux Containers, Renato Westphal
On 05/07/2011 04:24 AM, Eric W. Biederman wrote:
> Signed-off-by: Eric W. Biederman<ebiederm@xmission.com>
> ---
Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>
^ permalink raw reply [flat|nested] 70+ messages in thread
* [PATCH 7/7] ns: Wire up the setns system call
2011-05-07 2:24 ` Eric W. Biederman
@ 2011-05-07 2:25 ` Eric W. Biederman
-1 siblings, 0 replies; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-07 2:25 UTC (permalink / raw)
To: linux-arch
Cc: linux-kernel, netdev, linux-fsdevel, jamal, Daniel Lezcano,
Linux Containers, Renato Westphal, Eric W. Biederman
v2: Most of the architecture support added by Daniel Lezcano <dlezcano@fr.ibm.com>
v3: ported to v2.6.36-rc4 by: Eric W. Biederman <ebiederm@xmission.com>
v4: Moved wiring up of the system call to another patch
v5: ported to v2.6.39-rc6
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
arch/alpha/include/asm/unistd.h | 3 ++-
arch/alpha/kernel/systbls.S | 1 +
arch/arm/include/asm/unistd.h | 1 +
arch/arm/kernel/calls.S | 1 +
arch/avr32/include/asm/unistd.h | 3 ++-
arch/avr32/kernel/syscall_table.S | 1 +
arch/blackfin/include/asm/unistd.h | 3 ++-
arch/blackfin/mach-common/entry.S | 1 +
arch/cris/arch-v10/kernel/entry.S | 1 +
arch/cris/arch-v32/kernel/entry.S | 1 +
arch/cris/include/asm/unistd.h | 3 ++-
arch/frv/include/asm/unistd.h | 3 ++-
arch/frv/kernel/entry.S | 1 +
arch/h8300/include/asm/unistd.h | 3 ++-
arch/h8300/kernel/syscalls.S | 1 +
arch/ia64/include/asm/unistd.h | 3 ++-
arch/ia64/kernel/entry.S | 1 +
arch/m32r/include/asm/unistd.h | 3 ++-
arch/m32r/kernel/syscall_table.S | 1 +
arch/m68k/include/asm/unistd.h | 3 ++-
arch/m68k/kernel/syscalltable.S | 1 +
arch/microblaze/include/asm/unistd.h | 3 ++-
arch/microblaze/kernel/syscall_table.S | 1 +
arch/mips/include/asm/unistd.h | 15 +++++++++------
arch/mips/kernel/scall32-o32.S | 1 +
arch/mips/kernel/scall64-64.S | 1 +
arch/mips/kernel/scall64-n32.S | 1 +
arch/mips/kernel/scall64-o32.S | 1 +
arch/mn10300/include/asm/unistd.h | 3 ++-
arch/mn10300/kernel/entry.S | 1 +
arch/parisc/include/asm/unistd.h | 4 ++--
arch/parisc/kernel/syscall_table.S | 1 +
arch/powerpc/include/asm/systbl.h | 1 +
arch/powerpc/include/asm/unistd.h | 3 ++-
arch/s390/include/asm/unistd.h | 3 ++-
arch/s390/kernel/syscalls.S | 1 +
arch/sh/include/asm/unistd_32.h | 3 ++-
arch/sh/include/asm/unistd_64.h | 3 ++-
arch/sh/kernel/syscalls_32.S | 1 +
arch/sh/kernel/syscalls_64.S | 1 +
arch/sparc/include/asm/unistd.h | 3 ++-
arch/sparc/kernel/systbls_32.S | 2 +-
arch/sparc/kernel/systbls_64.S | 4 ++--
arch/x86/ia32/ia32entry.S | 1 +
arch/x86/include/asm/unistd_32.h | 3 ++-
arch/x86/include/asm/unistd_64.h | 2 ++
arch/x86/kernel/syscall_table_32.S | 1 +
arch/xtensa/include/asm/unistd.h | 4 +++-
include/asm-generic/unistd.h | 4 +++-
49 files changed, 81 insertions(+), 30 deletions(-)
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index 058937b..664383d 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -452,10 +452,11 @@
#define __NR_fanotify_init 494
#define __NR_fanotify_mark 495
#define __NR_prlimit64 496
+#define __NR_setns 497
#ifdef __KERNEL__
-#define NR_SYSCALLS 497
+#define NR_SYSCALLS 498
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index a6a1de9..4663fd5 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -515,6 +515,7 @@ sys_call_table:
.quad sys_fanotify_init
.quad sys_fanotify_mark /* 495 */
.quad sys_prlimit64
+ .quad sys_setns
.size sys_call_table, . - sys_call_table
.type sys_call_table, @object
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 87dbe3e..41afe2e 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -400,6 +400,7 @@
#define __NR_open_by_handle_at (__NR_SYSCALL_BASE+371)
#define __NR_clock_adjtime (__NR_SYSCALL_BASE+372)
#define __NR_syncfs (__NR_SYSCALL_BASE+373)
+#define __NR_setns (__NR_SYSCALL_BASE+374)
/*
* The following SWIs are ARM private.
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 7fbf28c..a05f759 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -383,6 +383,7 @@
CALL(sys_open_by_handle_at)
CALL(sys_clock_adjtime)
CALL(sys_syncfs)
+ CALL(sys_setns)
#ifndef syscalls_counted
.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
#define syscalls_counted
diff --git a/arch/avr32/include/asm/unistd.h b/arch/avr32/include/asm/unistd.h
index 89861a2..800ea91 100644
--- a/arch/avr32/include/asm/unistd.h
+++ b/arch/avr32/include/asm/unistd.h
@@ -299,9 +299,10 @@
#define __NR_signalfd 279
/* 280 was __NR_timerfd */
#define __NR_eventfd 281
+#define __NR_setns 282
#ifdef __KERNEL__
-#define NR_syscalls 282
+#define NR_syscalls 283
/* Old stuff */
#define __IGNORE_uselib
diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S
index e76bad1..c7fd394 100644
--- a/arch/avr32/kernel/syscall_table.S
+++ b/arch/avr32/kernel/syscall_table.S
@@ -296,4 +296,5 @@ sys_call_table:
.long sys_ni_syscall /* 280, was sys_timerfd */
.long sys_eventfd
.long sys_recvmmsg
+ .long sys_setns
.long sys_ni_syscall /* r8 is saturated at nr_syscalls */
diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h
index ff9a9f3..b638e6b 100644
--- a/arch/blackfin/include/asm/unistd.h
+++ b/arch/blackfin/include/asm/unistd.h
@@ -397,8 +397,9 @@
#define __NR_open_by_handle_at 376
#define __NR_clock_adjtime 377
#define __NR_syncfs 378
+#define __NR_setns 379
-#define __NR_syscall 379
+#define __NR_syscall 380
#define NR_syscalls __NR_syscall
/* Old optional stuff no one actually uses */
diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index f96933f..dda11ef 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -1753,6 +1753,7 @@ ENTRY(_sys_call_table)
.long _sys_open_by_handle_at
.long _sys_clock_adjtime
.long _sys_syncfs
+ .long _sys_setns
.rept NR_syscalls-(.-_sys_call_table)/4
.long _sys_ni_syscall
diff --git a/arch/cris/arch-v10/kernel/entry.S b/arch/cris/arch-v10/kernel/entry.S
index 0d6420d..1161883 100644
--- a/arch/cris/arch-v10/kernel/entry.S
+++ b/arch/cris/arch-v10/kernel/entry.S
@@ -937,6 +937,7 @@ sys_call_table:
.long sys_inotify_init1
.long sys_preadv
.long sys_pwritev
+ .long sys_setns /* 335 */
/*
* NOTE!! This doesn't have to be exact - we just have
diff --git a/arch/cris/arch-v32/kernel/entry.S b/arch/cris/arch-v32/kernel/entry.S
index 3abf12c..84fed7e 100644
--- a/arch/cris/arch-v32/kernel/entry.S
+++ b/arch/cris/arch-v32/kernel/entry.S
@@ -880,6 +880,7 @@ sys_call_table:
.long sys_inotify_init1
.long sys_preadv
.long sys_pwritev
+ .long sys_setns /* 335 */
/*
* NOTE!! This doesn't have to be exact - we just have
diff --git a/arch/cris/include/asm/unistd.h b/arch/cris/include/asm/unistd.h
index f6fad83..c59a107 100644
--- a/arch/cris/include/asm/unistd.h
+++ b/arch/cris/include/asm/unistd.h
@@ -339,10 +339,11 @@
#define __NR_inotify_init1 332
#define __NR_preadv 333
#define __NR_pwritev 334
+#define __NR_setns 335
#ifdef __KERNEL__
-#define NR_syscalls 335
+#define NR_syscalls 336
#include <arch/unistd.h>
diff --git a/arch/frv/include/asm/unistd.h b/arch/frv/include/asm/unistd.h
index b28da49..6470f11 100644
--- a/arch/frv/include/asm/unistd.h
+++ b/arch/frv/include/asm/unistd.h
@@ -343,10 +343,11 @@
#define __NR_pwritev 334
#define __NR_rt_tgsigqueueinfo 335
#define __NR_perf_event_open 336
+#define __NR_setns 337
#ifdef __KERNEL__
-#define NR_syscalls 337
+#define NR_syscalls 338
#define __ARCH_WANT_IPC_PARSE_VERSION
/* #define __ARCH_WANT_OLD_READDIR */
diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S
index 63d579b..017d6d7 100644
--- a/arch/frv/kernel/entry.S
+++ b/arch/frv/kernel/entry.S
@@ -1526,5 +1526,6 @@ sys_call_table:
.long sys_pwritev
.long sys_rt_tgsigqueueinfo /* 335 */
.long sys_perf_event_open
+ .long sys_setns
syscall_table_size = (. - sys_call_table)
diff --git a/arch/h8300/include/asm/unistd.h b/arch/h8300/include/asm/unistd.h
index 50f2c5a..85c9552 100644
--- a/arch/h8300/include/asm/unistd.h
+++ b/arch/h8300/include/asm/unistd.h
@@ -325,10 +325,11 @@
#define __NR_move_pages 317
#define __NR_getcpu 318
#define __NR_epoll_pwait 319
+#define __NR_setns 320
#ifdef __KERNEL__
-#define NR_syscalls 320
+#define NR_syscalls 321
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/h8300/kernel/syscalls.S b/arch/h8300/kernel/syscalls.S
index faefaff..f4b2e67 100644
--- a/arch/h8300/kernel/syscalls.S
+++ b/arch/h8300/kernel/syscalls.S
@@ -333,6 +333,7 @@ SYMBOL_NAME_LABEL(sys_call_table)
.long SYMBOL_NAME(sys_ni_syscall) /* sys_move_pages */
.long SYMBOL_NAME(sys_getcpu)
.long SYMBOL_NAME(sys_ni_syscall) /* sys_epoll_pwait */
+ .long SYMBOL_NAME(sys_setns) /* 320 */
.macro call_sp addr
mov.l #SYMBOL_NAME(\addr),er6
diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index 404d037..2fbda93 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h
@@ -319,11 +319,12 @@
#define __NR_open_by_handle_at 1327
#define __NR_clock_adjtime 1328
#define __NR_syncfs 1329
+#define __NR_setns 1330
#ifdef __KERNEL__
-#define NR_syscalls 306 /* length of syscall table */
+#define NR_syscalls 307 /* length of syscall table */
/*
* The following defines stop scripts/checksyscalls.sh from complaining about
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 6de2e23..9ca8019 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1775,6 +1775,7 @@ sys_call_table:
data8 sys_open_by_handle_at
data8 sys_clock_adjtime
data8 sys_syncfs
+ data8 sys_setns // 1330
.org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
diff --git a/arch/m32r/include/asm/unistd.h b/arch/m32r/include/asm/unistd.h
index c705456..3c7dafc 100644
--- a/arch/m32r/include/asm/unistd.h
+++ b/arch/m32r/include/asm/unistd.h
@@ -330,10 +330,11 @@
/* #define __NR_timerfd 322 removed */
#define __NR_eventfd 323
#define __NR_fallocate 324
+#define __NR_setns 325
#ifdef __KERNEL__
-#define NR_syscalls 325
+#define NR_syscalls 326
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_STAT64
diff --git a/arch/m32r/kernel/syscall_table.S b/arch/m32r/kernel/syscall_table.S
index 60536e2..528f2e6 100644
--- a/arch/m32r/kernel/syscall_table.S
+++ b/arch/m32r/kernel/syscall_table.S
@@ -324,3 +324,4 @@ ENTRY(sys_call_table)
.long sys_ni_syscall
.long sys_eventfd
.long sys_fallocate
+ .long sys_setns /* 325 */
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index 29e1790..c11a093 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -347,10 +347,11 @@
#define __NR_open_by_handle_at 341
#define __NR_clock_adjtime 342
#define __NR_syncfs 343
+#define __NR_setns 344
#ifdef __KERNEL__
-#define NR_syscalls 344
+#define NR_syscalls 345
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S
index 9b8393d..b5a7bbf 100644
--- a/arch/m68k/kernel/syscalltable.S
+++ b/arch/m68k/kernel/syscalltable.S
@@ -362,6 +362,7 @@ ENTRY(sys_call_table)
.long sys_open_by_handle_at
.long sys_clock_adjtime
.long sys_syncfs
+ .long sys_setns
.rept NR_syscalls-(.-sys_call_table)/4
.long sys_ni_syscall
diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h
index 30edd61..dcebe59 100644
--- a/arch/microblaze/include/asm/unistd.h
+++ b/arch/microblaze/include/asm/unistd.h
@@ -390,8 +390,9 @@
#define __NR_open_by_handle_at 372
#define __NR_clock_adjtime 373
#define __NR_syncfs 374
+#define __NR_setns 375
-#define __NR_syscalls 375
+#define __NR_syscalls 376
#ifdef __KERNEL__
#ifndef __ASSEMBLY__
diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S
index 85cea81..d915a12 100644
--- a/arch/microblaze/kernel/syscall_table.S
+++ b/arch/microblaze/kernel/syscall_table.S
@@ -379,3 +379,4 @@ ENTRY(sys_call_table)
.long sys_open_by_handle_at
.long sys_clock_adjtime
.long sys_syncfs
+ .long sys_setns /* 375 */
diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h
index fa2e37e..d0d84eb 100644
--- a/arch/mips/include/asm/unistd.h
+++ b/arch/mips/include/asm/unistd.h
@@ -363,16 +363,17 @@
#define __NR_open_by_handle_at (__NR_Linux + 340)
#define __NR_clock_adjtime (__NR_Linux + 341)
#define __NR_syncfs (__NR_Linux + 342)
+#define __NR_setns (__NR_Linux + 343)
/*
* Offset of the last Linux o32 flavoured syscall
*/
-#define __NR_Linux_syscalls 342
+#define __NR_Linux_syscalls 343
#endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
#define __NR_O32_Linux 4000
-#define __NR_O32_Linux_syscalls 342
+#define __NR_O32_Linux_syscalls 343
#if _MIPS_SIM == _MIPS_SIM_ABI64
@@ -682,16 +683,17 @@
#define __NR_open_by_handle_at (__NR_Linux + 299)
#define __NR_clock_adjtime (__NR_Linux + 300)
#define __NR_syncfs (__NR_Linux + 301)
+#define __NR_setns (__NR_Linux + 302)
/*
* Offset of the last Linux 64-bit flavoured syscall
*/
-#define __NR_Linux_syscalls 301
+#define __NR_Linux_syscalls 302
#endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
#define __NR_64_Linux 5000
-#define __NR_64_Linux_syscalls 301
+#define __NR_64_Linux_syscalls 302
#if _MIPS_SIM == _MIPS_SIM_NABI32
@@ -1006,16 +1008,17 @@
#define __NR_open_by_handle_at (__NR_Linux + 304)
#define __NR_clock_adjtime (__NR_Linux + 305)
#define __NR_syncfs (__NR_Linux + 306)
+#define __NR_setns (__NR_Linux + 307)
/*
* Offset of the last N32 flavoured syscall
*/
-#define __NR_Linux_syscalls 306
+#define __NR_Linux_syscalls 307
#endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
#define __NR_N32_Linux 6000
-#define __NR_N32_Linux_syscalls 306
+#define __NR_N32_Linux_syscalls 307
#ifdef __KERNEL__
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 7f5468b..c6a2b58 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -590,6 +590,7 @@ einval: li v0, -ENOSYS
sys sys_open_by_handle_at 3 /* 4340 */
sys sys_clock_adjtime 2
sys sys_syncfs 1
+ sys sys_setns 2
.endm
/* We pre-compute the number of _instruction_ bytes needed to
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index a2e1fcb..57e4184 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -429,4 +429,5 @@ sys_call_table:
PTR sys_open_by_handle_at
PTR sys_clock_adjtime /* 5300 */
PTR sys_syncfs
+ PTR sys_setns
.size sys_call_table,.-sys_call_table
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index b2c7624..91ae887 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -429,4 +429,5 @@ EXPORT(sysn32_call_table)
PTR sys_open_by_handle_at
PTR compat_sys_clock_adjtime /* 6305 */
PTR sys_syncfs
+ PTR sys_setns
.size sysn32_call_table,.-sysn32_call_table
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 049a9c8..02e6ae5 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -547,4 +547,5 @@ sys_call_table:
PTR compat_sys_open_by_handle_at /* 4340 */
PTR compat_sys_clock_adjtime
PTR sys_syncfs
+ PTR sys_setns
.size sys_call_table,.-sys_call_table
diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h
index 9d056f5..faf57bb 100644
--- a/arch/mn10300/include/asm/unistd.h
+++ b/arch/mn10300/include/asm/unistd.h
@@ -349,10 +349,11 @@
#define __NR_rt_tgsigqueueinfo 336
#define __NR_perf_event_open 337
#define __NR_recvmmsg 338
+#define __NR_setns 339
#ifdef __KERNEL__
-#define NR_syscalls 339
+#define NR_syscalls 340
/*
* specify the deprecated syscalls we want to support on this arch
diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S
index fb93ad7..ae435e1 100644
--- a/arch/mn10300/kernel/entry.S
+++ b/arch/mn10300/kernel/entry.S
@@ -759,6 +759,7 @@ ENTRY(sys_call_table)
.long sys_rt_tgsigqueueinfo
.long sys_perf_event_open
.long sys_recvmmsg
+ .long sys_setns
nr_syscalls=(.-sys_call_table)/4
diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index 3eb82c2..bbaed57 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -814,9 +814,9 @@
#define __NR_recvmmsg (__NR_Linux + 319)
#define __NR_accept4 (__NR_Linux + 320)
#define __NR_prlimit64 (__NR_Linux + 321)
+#define __NR_setns (__NR_Linux + 322)
-#define __NR_Linux_syscalls (__NR_prlimit64 + 1)
-
+#define __NR_Linux_syscalls (__NR_setns + 1)
#define __IGNORE_select /* newselect */
#define __IGNORE_fadvise64 /* fadvise64_64 */
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 4be85ee..151349c 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -420,6 +420,7 @@
ENTRY_COMP(recvmmsg)
ENTRY_SAME(accept4) /* 320 */
ENTRY_SAME(prlimit64)
+ ENTRY_SAME(setns)
/* Nothing yet */
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 60f64b1..2a5f60c 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -352,3 +352,4 @@ SYSCALL_SPU(name_to_handle_at)
COMPAT_SYS_SPU(open_by_handle_at)
COMPAT_SYS_SPU(clock_adjtime)
SYSCALL_SPU(syncfs)
+SYSCALL_SPU(setns)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 3c21564..2780f7a 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -371,10 +371,11 @@
#define __NR_open_by_handle_at 346
#define __NR_clock_adjtime 347
#define __NR_syncfs 348
+#define __NR_setns 349
#ifdef __KERNEL__
-#define __NR_syscalls 349
+#define __NR_syscalls 350
#define __NR__exit __NR_exit
#define NR_syscalls __NR_syscalls
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index e821525..3399bed 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -276,7 +276,8 @@
#define __NR_open_by_handle_at 336
#define __NR_clock_adjtime 337
#define __NR_syncfs 338
-#define NR_syscalls 339
+#define __NR_setns 339
+#define NR_syscalls 340
/*
* There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 9c65fd4..3ec04ba 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -347,3 +347,4 @@ SYSCALL(sys_name_to_handle_at,sys_name_to_handle_at,sys_name_to_handle_at_wrappe
SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at_wrapper)
SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime_wrapper)
SYSCALL(sys_syncfs,sys_syncfs,sys_syncfs_wrapper)
+SYSCALL(sys_setns,sys_setns,sys_setns)
diff --git a/arch/sh/include/asm/unistd_32.h b/arch/sh/include/asm/unistd_32.h
index ca7765e..26c697c 100644
--- a/arch/sh/include/asm/unistd_32.h
+++ b/arch/sh/include/asm/unistd_32.h
@@ -373,8 +373,9 @@
#define __NR_open_by_handle_at 360
#define __NR_clock_adjtime 361
#define __NR_syncfs 362
+#define __NR_setns 363
-#define NR_syscalls 363
+#define NR_syscalls 364
#ifdef __KERNEL__
diff --git a/arch/sh/include/asm/unistd_64.h b/arch/sh/include/asm/unistd_64.h
index a694009..330668a 100644
--- a/arch/sh/include/asm/unistd_64.h
+++ b/arch/sh/include/asm/unistd_64.h
@@ -394,10 +394,11 @@
#define __NR_open_by_handle_at 371
#define __NR_clock_adjtime 372
#define __NR_syncfs 373
+#define __NR_setns 374
#ifdef __KERNEL__
-#define NR_syscalls 374
+#define NR_syscalls 375
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S
index 030966a..67dcf66 100644
--- a/arch/sh/kernel/syscalls_32.S
+++ b/arch/sh/kernel/syscalls_32.S
@@ -380,3 +380,4 @@ ENTRY(sys_call_table)
.long sys_open_by_handle_at /* 360 */
.long sys_clock_adjtime
.long sys_syncfs
+ .long sys_setns
diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S
index ca0a614..dade54e 100644
--- a/arch/sh/kernel/syscalls_64.S
+++ b/arch/sh/kernel/syscalls_64.S
@@ -400,3 +400,4 @@ sys_call_table:
.long sys_open_by_handle_at
.long sys_clock_adjtime
.long sys_syncfs
+ .long sys_setns
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index 9d897b6..10b3105 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -404,8 +404,9 @@
#define __NR_open_by_handle_at 333
#define __NR_clock_adjtime 334
#define __NR_syncfs 335
+#define __NR_setns 336
-#define NR_syscalls 336
+#define NR_syscalls 337
#ifdef __32bit_syscall_numbers__
/* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 47ac73c..88d0a93 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -84,4 +84,4 @@ sys_call_table:
/*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
/*330*/ .long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
-/*335*/ .long sys_syncfs
+/*335*/ .long sys_syncfs, sys_setns
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 4f3170c..19cdb12 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -85,7 +85,7 @@ sys_call_table32:
/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init
/*330*/ .word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime
- .word sys_syncfs
+ .word sys_syncfs, sys_setns
#endif /* CONFIG_COMPAT */
@@ -162,4 +162,4 @@ sys_call_table:
/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
/*330*/ .word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
- .word sys_syncfs
+ .word sys_syncfs, sys_setns
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 849a9d2..58bef77 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -848,4 +848,5 @@ ia32_sys_call_table:
.quad compat_sys_open_by_handle_at
.quad compat_sys_clock_adjtime
.quad sys_syncfs
+ .quad sys_setns /* 345 */
ia32_syscall_end:
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index a755ef5..be6c177 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -350,10 +350,11 @@
#define __NR_open_by_handle_at 342
#define __NR_clock_adjtime 343
#define __NR_syncfs 344
+#define __NR_setns 345
#ifdef __KERNEL__
-#define NR_syscalls 345
+#define NR_syscalls 346
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 160fa76..c46f833 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -677,6 +677,8 @@ __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
#define __NR_syncfs 306
__SYSCALL(__NR_syncfs, sys_syncfs)
+#define __NR_setns 307
+__SYSCALL(__NR_setns, sys_setns)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index abce34d..3d77b82 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -344,3 +344,4 @@ ENTRY(sys_call_table)
.long sys_open_by_handle_at
.long sys_clock_adjtime
.long sys_syncfs
+ .long sys_setns /* 345 */
diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h
index 528042c..a6f934f 100644
--- a/arch/xtensa/include/asm/unistd.h
+++ b/arch/xtensa/include/asm/unistd.h
@@ -683,8 +683,10 @@ __SYSCALL(305, sys_ni_syscall, 0)
__SYSCALL(306, sys_eventfd, 1)
#define __NR_recvmmsg 307
__SYSCALL(307, sys_recvmmsg, 5)
+#define __NR_setns 308
+__SYSCALL(308, sys_setns, 2)
-#define __NR_syscall_count 308
+#define __NR_syscall_count 309
/*
* sysxtensa syscall handler
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index 07c40d5..5fcd304 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -654,9 +654,11 @@ __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
#define __NR_syncfs 267
__SYSCALL(__NR_syncfs, sys_syncfs)
+#define __NR_setns 268
+__SYSCALL(__NR_setns, sys_setns)
#undef __NR_syscalls
-#define __NR_syscalls 268
+#define __NR_syscalls 269
/*
* All syscalls below here should go away really,
--
1.6.5.2.143.g8cc62
^ permalink raw reply related [flat|nested] 70+ messages in thread
* [PATCH 7/7] ns: Wire up the setns system call
@ 2011-05-07 2:25 ` Eric W. Biederman
0 siblings, 0 replies; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-07 2:25 UTC (permalink / raw)
To: linux-arch
Cc: linux-kernel, netdev, linux-fsdevel, jamal, Daniel Lezcano,
Linux Containers, Renato Westphal, Eric W. Biederman
v2: Most of the architecture support added by Daniel Lezcano <dlezcano@fr.ibm.com>
v3: ported to v2.6.36-rc4 by: Eric W. Biederman <ebiederm@xmission.com>
v4: Moved wiring up of the system call to another patch
v5: ported to v2.6.39-rc6
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
arch/alpha/include/asm/unistd.h | 3 ++-
arch/alpha/kernel/systbls.S | 1 +
arch/arm/include/asm/unistd.h | 1 +
arch/arm/kernel/calls.S | 1 +
arch/avr32/include/asm/unistd.h | 3 ++-
arch/avr32/kernel/syscall_table.S | 1 +
arch/blackfin/include/asm/unistd.h | 3 ++-
arch/blackfin/mach-common/entry.S | 1 +
arch/cris/arch-v10/kernel/entry.S | 1 +
arch/cris/arch-v32/kernel/entry.S | 1 +
arch/cris/include/asm/unistd.h | 3 ++-
arch/frv/include/asm/unistd.h | 3 ++-
arch/frv/kernel/entry.S | 1 +
arch/h8300/include/asm/unistd.h | 3 ++-
arch/h8300/kernel/syscalls.S | 1 +
arch/ia64/include/asm/unistd.h | 3 ++-
arch/ia64/kernel/entry.S | 1 +
arch/m32r/include/asm/unistd.h | 3 ++-
arch/m32r/kernel/syscall_table.S | 1 +
arch/m68k/include/asm/unistd.h | 3 ++-
arch/m68k/kernel/syscalltable.S | 1 +
arch/microblaze/include/asm/unistd.h | 3 ++-
arch/microblaze/kernel/syscall_table.S | 1 +
arch/mips/include/asm/unistd.h | 15 +++++++++------
arch/mips/kernel/scall32-o32.S | 1 +
arch/mips/kernel/scall64-64.S | 1 +
arch/mips/kernel/scall64-n32.S | 1 +
arch/mips/kernel/scall64-o32.S | 1 +
arch/mn10300/include/asm/unistd.h | 3 ++-
arch/mn10300/kernel/entry.S | 1 +
arch/parisc/include/asm/unistd.h | 4 ++--
arch/parisc/kernel/syscall_table.S | 1 +
arch/powerpc/include/asm/systbl.h | 1 +
arch/powerpc/include/asm/unistd.h | 3 ++-
arch/s390/include/asm/unistd.h | 3 ++-
arch/s390/kernel/syscalls.S | 1 +
arch/sh/include/asm/unistd_32.h | 3 ++-
arch/sh/include/asm/unistd_64.h | 3 ++-
arch/sh/kernel/syscalls_32.S | 1 +
arch/sh/kernel/syscalls_64.S | 1 +
arch/sparc/include/asm/unistd.h | 3 ++-
arch/sparc/kernel/systbls_32.S | 2 +-
arch/sparc/kernel/systbls_64.S | 4 ++--
arch/x86/ia32/ia32entry.S | 1 +
arch/x86/include/asm/unistd_32.h | 3 ++-
arch/x86/include/asm/unistd_64.h | 2 ++
arch/x86/kernel/syscall_table_32.S | 1 +
arch/xtensa/include/asm/unistd.h | 4 +++-
include/asm-generic/unistd.h | 4 +++-
49 files changed, 81 insertions(+), 30 deletions(-)
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index 058937b..664383d 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -452,10 +452,11 @@
#define __NR_fanotify_init 494
#define __NR_fanotify_mark 495
#define __NR_prlimit64 496
+#define __NR_setns 497
#ifdef __KERNEL__
-#define NR_SYSCALLS 497
+#define NR_SYSCALLS 498
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index a6a1de9..4663fd5 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -515,6 +515,7 @@ sys_call_table:
.quad sys_fanotify_init
.quad sys_fanotify_mark /* 495 */
.quad sys_prlimit64
+ .quad sys_setns
.size sys_call_table, . - sys_call_table
.type sys_call_table, @object
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 87dbe3e..41afe2e 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -400,6 +400,7 @@
#define __NR_open_by_handle_at (__NR_SYSCALL_BASE+371)
#define __NR_clock_adjtime (__NR_SYSCALL_BASE+372)
#define __NR_syncfs (__NR_SYSCALL_BASE+373)
+#define __NR_setns (__NR_SYSCALL_BASE+374)
/*
* The following SWIs are ARM private.
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 7fbf28c..a05f759 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -383,6 +383,7 @@
CALL(sys_open_by_handle_at)
CALL(sys_clock_adjtime)
CALL(sys_syncfs)
+ CALL(sys_setns)
#ifndef syscalls_counted
.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
#define syscalls_counted
diff --git a/arch/avr32/include/asm/unistd.h b/arch/avr32/include/asm/unistd.h
index 89861a2..800ea91 100644
--- a/arch/avr32/include/asm/unistd.h
+++ b/arch/avr32/include/asm/unistd.h
@@ -299,9 +299,10 @@
#define __NR_signalfd 279
/* 280 was __NR_timerfd */
#define __NR_eventfd 281
+#define __NR_setns 282
#ifdef __KERNEL__
-#define NR_syscalls 282
+#define NR_syscalls 283
/* Old stuff */
#define __IGNORE_uselib
diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S
index e76bad1..c7fd394 100644
--- a/arch/avr32/kernel/syscall_table.S
+++ b/arch/avr32/kernel/syscall_table.S
@@ -296,4 +296,5 @@ sys_call_table:
.long sys_ni_syscall /* 280, was sys_timerfd */
.long sys_eventfd
.long sys_recvmmsg
+ .long sys_setns
.long sys_ni_syscall /* r8 is saturated at nr_syscalls */
diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h
index ff9a9f3..b638e6b 100644
--- a/arch/blackfin/include/asm/unistd.h
+++ b/arch/blackfin/include/asm/unistd.h
@@ -397,8 +397,9 @@
#define __NR_open_by_handle_at 376
#define __NR_clock_adjtime 377
#define __NR_syncfs 378
+#define __NR_setns 379
-#define __NR_syscall 379
+#define __NR_syscall 380
#define NR_syscalls __NR_syscall
/* Old optional stuff no one actually uses */
diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index f96933f..dda11ef 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -1753,6 +1753,7 @@ ENTRY(_sys_call_table)
.long _sys_open_by_handle_at
.long _sys_clock_adjtime
.long _sys_syncfs
+ .long _sys_setns
.rept NR_syscalls-(.-_sys_call_table)/4
.long _sys_ni_syscall
diff --git a/arch/cris/arch-v10/kernel/entry.S b/arch/cris/arch-v10/kernel/entry.S
index 0d6420d..1161883 100644
--- a/arch/cris/arch-v10/kernel/entry.S
+++ b/arch/cris/arch-v10/kernel/entry.S
@@ -937,6 +937,7 @@ sys_call_table:
.long sys_inotify_init1
.long sys_preadv
.long sys_pwritev
+ .long sys_setns /* 335 */
/*
* NOTE!! This doesn't have to be exact - we just have
diff --git a/arch/cris/arch-v32/kernel/entry.S b/arch/cris/arch-v32/kernel/entry.S
index 3abf12c..84fed7e 100644
--- a/arch/cris/arch-v32/kernel/entry.S
+++ b/arch/cris/arch-v32/kernel/entry.S
@@ -880,6 +880,7 @@ sys_call_table:
.long sys_inotify_init1
.long sys_preadv
.long sys_pwritev
+ .long sys_setns /* 335 */
/*
* NOTE!! This doesn't have to be exact - we just have
diff --git a/arch/cris/include/asm/unistd.h b/arch/cris/include/asm/unistd.h
index f6fad83..c59a107 100644
--- a/arch/cris/include/asm/unistd.h
+++ b/arch/cris/include/asm/unistd.h
@@ -339,10 +339,11 @@
#define __NR_inotify_init1 332
#define __NR_preadv 333
#define __NR_pwritev 334
+#define __NR_setns 335
#ifdef __KERNEL__
-#define NR_syscalls 335
+#define NR_syscalls 336
#include <arch/unistd.h>
diff --git a/arch/frv/include/asm/unistd.h b/arch/frv/include/asm/unistd.h
index b28da49..6470f11 100644
--- a/arch/frv/include/asm/unistd.h
+++ b/arch/frv/include/asm/unistd.h
@@ -343,10 +343,11 @@
#define __NR_pwritev 334
#define __NR_rt_tgsigqueueinfo 335
#define __NR_perf_event_open 336
+#define __NR_setns 337
#ifdef __KERNEL__
-#define NR_syscalls 337
+#define NR_syscalls 338
#define __ARCH_WANT_IPC_PARSE_VERSION
/* #define __ARCH_WANT_OLD_READDIR */
diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S
index 63d579b..017d6d7 100644
--- a/arch/frv/kernel/entry.S
+++ b/arch/frv/kernel/entry.S
@@ -1526,5 +1526,6 @@ sys_call_table:
.long sys_pwritev
.long sys_rt_tgsigqueueinfo /* 335 */
.long sys_perf_event_open
+ .long sys_setns
syscall_table_size = (. - sys_call_table)
diff --git a/arch/h8300/include/asm/unistd.h b/arch/h8300/include/asm/unistd.h
index 50f2c5a..85c9552 100644
--- a/arch/h8300/include/asm/unistd.h
+++ b/arch/h8300/include/asm/unistd.h
@@ -325,10 +325,11 @@
#define __NR_move_pages 317
#define __NR_getcpu 318
#define __NR_epoll_pwait 319
+#define __NR_setns 320
#ifdef __KERNEL__
-#define NR_syscalls 320
+#define NR_syscalls 321
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/h8300/kernel/syscalls.S b/arch/h8300/kernel/syscalls.S
index faefaff..f4b2e67 100644
--- a/arch/h8300/kernel/syscalls.S
+++ b/arch/h8300/kernel/syscalls.S
@@ -333,6 +333,7 @@ SYMBOL_NAME_LABEL(sys_call_table)
.long SYMBOL_NAME(sys_ni_syscall) /* sys_move_pages */
.long SYMBOL_NAME(sys_getcpu)
.long SYMBOL_NAME(sys_ni_syscall) /* sys_epoll_pwait */
+ .long SYMBOL_NAME(sys_setns) /* 320 */
.macro call_sp addr
mov.l #SYMBOL_NAME(\addr),er6
diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index 404d037..2fbda93 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h
@@ -319,11 +319,12 @@
#define __NR_open_by_handle_at 1327
#define __NR_clock_adjtime 1328
#define __NR_syncfs 1329
+#define __NR_setns 1330
#ifdef __KERNEL__
-#define NR_syscalls 306 /* length of syscall table */
+#define NR_syscalls 307 /* length of syscall table */
/*
* The following defines stop scripts/checksyscalls.sh from complaining about
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 6de2e23..9ca8019 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1775,6 +1775,7 @@ sys_call_table:
data8 sys_open_by_handle_at
data8 sys_clock_adjtime
data8 sys_syncfs
+ data8 sys_setns // 1330
.org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
diff --git a/arch/m32r/include/asm/unistd.h b/arch/m32r/include/asm/unistd.h
index c705456..3c7dafc 100644
--- a/arch/m32r/include/asm/unistd.h
+++ b/arch/m32r/include/asm/unistd.h
@@ -330,10 +330,11 @@
/* #define __NR_timerfd 322 removed */
#define __NR_eventfd 323
#define __NR_fallocate 324
+#define __NR_setns 325
#ifdef __KERNEL__
-#define NR_syscalls 325
+#define NR_syscalls 326
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_STAT64
diff --git a/arch/m32r/kernel/syscall_table.S b/arch/m32r/kernel/syscall_table.S
index 60536e2..528f2e6 100644
--- a/arch/m32r/kernel/syscall_table.S
+++ b/arch/m32r/kernel/syscall_table.S
@@ -324,3 +324,4 @@ ENTRY(sys_call_table)
.long sys_ni_syscall
.long sys_eventfd
.long sys_fallocate
+ .long sys_setns /* 325 */
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index 29e1790..c11a093 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -347,10 +347,11 @@
#define __NR_open_by_handle_at 341
#define __NR_clock_adjtime 342
#define __NR_syncfs 343
+#define __NR_setns 344
#ifdef __KERNEL__
-#define NR_syscalls 344
+#define NR_syscalls 345
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S
index 9b8393d..b5a7bbf 100644
--- a/arch/m68k/kernel/syscalltable.S
+++ b/arch/m68k/kernel/syscalltable.S
@@ -362,6 +362,7 @@ ENTRY(sys_call_table)
.long sys_open_by_handle_at
.long sys_clock_adjtime
.long sys_syncfs
+ .long sys_setns
.rept NR_syscalls-(.-sys_call_table)/4
.long sys_ni_syscall
diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h
index 30edd61..dcebe59 100644
--- a/arch/microblaze/include/asm/unistd.h
+++ b/arch/microblaze/include/asm/unistd.h
@@ -390,8 +390,9 @@
#define __NR_open_by_handle_at 372
#define __NR_clock_adjtime 373
#define __NR_syncfs 374
+#define __NR_setns 375
-#define __NR_syscalls 375
+#define __NR_syscalls 376
#ifdef __KERNEL__
#ifndef __ASSEMBLY__
diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S
index 85cea81..d915a12 100644
--- a/arch/microblaze/kernel/syscall_table.S
+++ b/arch/microblaze/kernel/syscall_table.S
@@ -379,3 +379,4 @@ ENTRY(sys_call_table)
.long sys_open_by_handle_at
.long sys_clock_adjtime
.long sys_syncfs
+ .long sys_setns /* 375 */
diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h
index fa2e37e..d0d84eb 100644
--- a/arch/mips/include/asm/unistd.h
+++ b/arch/mips/include/asm/unistd.h
@@ -363,16 +363,17 @@
#define __NR_open_by_handle_at (__NR_Linux + 340)
#define __NR_clock_adjtime (__NR_Linux + 341)
#define __NR_syncfs (__NR_Linux + 342)
+#define __NR_setns (__NR_Linux + 343)
/*
* Offset of the last Linux o32 flavoured syscall
*/
-#define __NR_Linux_syscalls 342
+#define __NR_Linux_syscalls 343
#endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
#define __NR_O32_Linux 4000
-#define __NR_O32_Linux_syscalls 342
+#define __NR_O32_Linux_syscalls 343
#if _MIPS_SIM == _MIPS_SIM_ABI64
@@ -682,16 +683,17 @@
#define __NR_open_by_handle_at (__NR_Linux + 299)
#define __NR_clock_adjtime (__NR_Linux + 300)
#define __NR_syncfs (__NR_Linux + 301)
+#define __NR_setns (__NR_Linux + 302)
/*
* Offset of the last Linux 64-bit flavoured syscall
*/
-#define __NR_Linux_syscalls 301
+#define __NR_Linux_syscalls 302
#endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
#define __NR_64_Linux 5000
-#define __NR_64_Linux_syscalls 301
+#define __NR_64_Linux_syscalls 302
#if _MIPS_SIM == _MIPS_SIM_NABI32
@@ -1006,16 +1008,17 @@
#define __NR_open_by_handle_at (__NR_Linux + 304)
#define __NR_clock_adjtime (__NR_Linux + 305)
#define __NR_syncfs (__NR_Linux + 306)
+#define __NR_setns (__NR_Linux + 307)
/*
* Offset of the last N32 flavoured syscall
*/
-#define __NR_Linux_syscalls 306
+#define __NR_Linux_syscalls 307
#endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
#define __NR_N32_Linux 6000
-#define __NR_N32_Linux_syscalls 306
+#define __NR_N32_Linux_syscalls 307
#ifdef __KERNEL__
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 7f5468b..c6a2b58 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -590,6 +590,7 @@ einval: li v0, -ENOSYS
sys sys_open_by_handle_at 3 /* 4340 */
sys sys_clock_adjtime 2
sys sys_syncfs 1
+ sys sys_setns 2
.endm
/* We pre-compute the number of _instruction_ bytes needed to
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index a2e1fcb..57e4184 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -429,4 +429,5 @@ sys_call_table:
PTR sys_open_by_handle_at
PTR sys_clock_adjtime /* 5300 */
PTR sys_syncfs
+ PTR sys_setns
.size sys_call_table,.-sys_call_table
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index b2c7624..91ae887 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -429,4 +429,5 @@ EXPORT(sysn32_call_table)
PTR sys_open_by_handle_at
PTR compat_sys_clock_adjtime /* 6305 */
PTR sys_syncfs
+ PTR sys_setns
.size sysn32_call_table,.-sysn32_call_table
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 049a9c8..02e6ae5 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -547,4 +547,5 @@ sys_call_table:
PTR compat_sys_open_by_handle_at /* 4340 */
PTR compat_sys_clock_adjtime
PTR sys_syncfs
+ PTR sys_setns
.size sys_call_table,.-sys_call_table
diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h
index 9d056f5..faf57bb 100644
--- a/arch/mn10300/include/asm/unistd.h
+++ b/arch/mn10300/include/asm/unistd.h
@@ -349,10 +349,11 @@
#define __NR_rt_tgsigqueueinfo 336
#define __NR_perf_event_open 337
#define __NR_recvmmsg 338
+#define __NR_setns 339
#ifdef __KERNEL__
-#define NR_syscalls 339
+#define NR_syscalls 340
/*
* specify the deprecated syscalls we want to support on this arch
diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S
index fb93ad7..ae435e1 100644
--- a/arch/mn10300/kernel/entry.S
+++ b/arch/mn10300/kernel/entry.S
@@ -759,6 +759,7 @@ ENTRY(sys_call_table)
.long sys_rt_tgsigqueueinfo
.long sys_perf_event_open
.long sys_recvmmsg
+ .long sys_setns
nr_syscalls=(.-sys_call_table)/4
diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index 3eb82c2..bbaed57 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -814,9 +814,9 @@
#define __NR_recvmmsg (__NR_Linux + 319)
#define __NR_accept4 (__NR_Linux + 320)
#define __NR_prlimit64 (__NR_Linux + 321)
+#define __NR_setns (__NR_Linux + 322)
-#define __NR_Linux_syscalls (__NR_prlimit64 + 1)
-
+#define __NR_Linux_syscalls (__NR_setns + 1)
#define __IGNORE_select /* newselect */
#define __IGNORE_fadvise64 /* fadvise64_64 */
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 4be85ee..151349c 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -420,6 +420,7 @@
ENTRY_COMP(recvmmsg)
ENTRY_SAME(accept4) /* 320 */
ENTRY_SAME(prlimit64)
+ ENTRY_SAME(setns)
/* Nothing yet */
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 60f64b1..2a5f60c 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -352,3 +352,4 @@ SYSCALL_SPU(name_to_handle_at)
COMPAT_SYS_SPU(open_by_handle_at)
COMPAT_SYS_SPU(clock_adjtime)
SYSCALL_SPU(syncfs)
+SYSCALL_SPU(setns)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 3c21564..2780f7a 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -371,10 +371,11 @@
#define __NR_open_by_handle_at 346
#define __NR_clock_adjtime 347
#define __NR_syncfs 348
+#define __NR_setns 349
#ifdef __KERNEL__
-#define __NR_syscalls 349
+#define __NR_syscalls 350
#define __NR__exit __NR_exit
#define NR_syscalls __NR_syscalls
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index e821525..3399bed 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -276,7 +276,8 @@
#define __NR_open_by_handle_at 336
#define __NR_clock_adjtime 337
#define __NR_syncfs 338
-#define NR_syscalls 339
+#define __NR_setns 339
+#define NR_syscalls 340
/*
* There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 9c65fd4..3ec04ba 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -347,3 +347,4 @@ SYSCALL(sys_name_to_handle_at,sys_name_to_handle_at,sys_name_to_handle_at_wrappe
SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at_wrapper)
SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime_wrapper)
SYSCALL(sys_syncfs,sys_syncfs,sys_syncfs_wrapper)
+SYSCALL(sys_setns,sys_setns,sys_setns)
diff --git a/arch/sh/include/asm/unistd_32.h b/arch/sh/include/asm/unistd_32.h
index ca7765e..26c697c 100644
--- a/arch/sh/include/asm/unistd_32.h
+++ b/arch/sh/include/asm/unistd_32.h
@@ -373,8 +373,9 @@
#define __NR_open_by_handle_at 360
#define __NR_clock_adjtime 361
#define __NR_syncfs 362
+#define __NR_setns 363
-#define NR_syscalls 363
+#define NR_syscalls 364
#ifdef __KERNEL__
diff --git a/arch/sh/include/asm/unistd_64.h b/arch/sh/include/asm/unistd_64.h
index a694009..330668a 100644
--- a/arch/sh/include/asm/unistd_64.h
+++ b/arch/sh/include/asm/unistd_64.h
@@ -394,10 +394,11 @@
#define __NR_open_by_handle_at 371
#define __NR_clock_adjtime 372
#define __NR_syncfs 373
+#define __NR_setns 374
#ifdef __KERNEL__
-#define NR_syscalls 374
+#define NR_syscalls 375
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S
index 030966a..67dcf66 100644
--- a/arch/sh/kernel/syscalls_32.S
+++ b/arch/sh/kernel/syscalls_32.S
@@ -380,3 +380,4 @@ ENTRY(sys_call_table)
.long sys_open_by_handle_at /* 360 */
.long sys_clock_adjtime
.long sys_syncfs
+ .long sys_setns
diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S
index ca0a614..dade54e 100644
--- a/arch/sh/kernel/syscalls_64.S
+++ b/arch/sh/kernel/syscalls_64.S
@@ -400,3 +400,4 @@ sys_call_table:
.long sys_open_by_handle_at
.long sys_clock_adjtime
.long sys_syncfs
+ .long sys_setns
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index 9d897b6..10b3105 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -404,8 +404,9 @@
#define __NR_open_by_handle_at 333
#define __NR_clock_adjtime 334
#define __NR_syncfs 335
+#define __NR_setns 336
-#define NR_syscalls 336
+#define NR_syscalls 337
#ifdef __32bit_syscall_numbers__
/* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 47ac73c..88d0a93 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -84,4 +84,4 @@ sys_call_table:
/*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
/*330*/ .long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
-/*335*/ .long sys_syncfs
+/*335*/ .long sys_syncfs, sys_setns
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 4f3170c..19cdb12 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -85,7 +85,7 @@ sys_call_table32:
/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init
/*330*/ .word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime
- .word sys_syncfs
+ .word sys_syncfs, sys_setns
#endif /* CONFIG_COMPAT */
@@ -162,4 +162,4 @@ sys_call_table:
/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
/*330*/ .word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
- .word sys_syncfs
+ .word sys_syncfs, sys_setns
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 849a9d2..58bef77 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -848,4 +848,5 @@ ia32_sys_call_table:
.quad compat_sys_open_by_handle_at
.quad compat_sys_clock_adjtime
.quad sys_syncfs
+ .quad sys_setns /* 345 */
ia32_syscall_end:
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index a755ef5..be6c177 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -350,10 +350,11 @@
#define __NR_open_by_handle_at 342
#define __NR_clock_adjtime 343
#define __NR_syncfs 344
+#define __NR_setns 345
#ifdef __KERNEL__
-#define NR_syscalls 345
+#define NR_syscalls 346
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 160fa76..c46f833 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -677,6 +677,8 @@ __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
#define __NR_syncfs 306
__SYSCALL(__NR_syncfs, sys_syncfs)
+#define __NR_setns 307
+__SYSCALL(__NR_setns, sys_setns)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index abce34d..3d77b82 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -344,3 +344,4 @@ ENTRY(sys_call_table)
.long sys_open_by_handle_at
.long sys_clock_adjtime
.long sys_syncfs
+ .long sys_setns /* 345 */
diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h
index 528042c..a6f934f 100644
--- a/arch/xtensa/include/asm/unistd.h
+++ b/arch/xtensa/include/asm/unistd.h
@@ -683,8 +683,10 @@ __SYSCALL(305, sys_ni_syscall, 0)
__SYSCALL(306, sys_eventfd, 1)
#define __NR_recvmmsg 307
__SYSCALL(307, sys_recvmmsg, 5)
+#define __NR_setns 308
+__SYSCALL(308, sys_setns, 2)
-#define __NR_syscall_count 308
+#define __NR_syscall_count 309
/*
* sysxtensa syscall handler
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index 07c40d5..5fcd304 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -654,9 +654,11 @@ __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
#define __NR_syncfs 267
__SYSCALL(__NR_syncfs, sys_syncfs)
+#define __NR_setns 268
+__SYSCALL(__NR_setns, sys_setns)
#undef __NR_syscalls
-#define __NR_syscalls 268
+#define __NR_syscalls 269
/*
* All syscalls below here should go away really,
--
1.6.5.2.143.g8cc62
^ permalink raw reply related [flat|nested] 70+ messages in thread
* Re: [PATCH 7/7] ns: Wire up the setns system call
2011-05-07 2:25 ` Eric W. Biederman
(?)
@ 2011-05-07 8:27 ` Geert Uytterhoeven
2011-05-07 14:09 ` Eric W. Biederman
-1 siblings, 1 reply; 70+ messages in thread
From: Geert Uytterhoeven @ 2011-05-07 8:27 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-arch, linux-kernel, netdev, linux-fsdevel, jamal,
Daniel Lezcano, Linux Containers, Renato Westphal
On Sat, May 7, 2011 at 04:25, Eric W. Biederman <ebiederm@xmission.com> wrote:
> arch/m68k/include/asm/unistd.h | 3 ++-
> arch/m68k/kernel/syscalltable.S | 1 +
As the unified syscalltable for m68k/m68knommu is not yet in mainline
(planned for
2.6.40), you should also add it to arch/m68k/kernel/entry_mm.S.
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH 7/7] ns: Wire up the setns system call
2011-05-07 8:27 ` Geert Uytterhoeven
2011-05-07 14:09 ` Eric W. Biederman
@ 2011-05-07 14:09 ` Eric W. Biederman
0 siblings, 0 replies; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-07 14:09 UTC (permalink / raw)
To: Geert Uytterhoeven
Cc: linux-arch, linux-kernel, netdev, linux-fsdevel, jamal,
Daniel Lezcano, Linux Containers, Renato Westphal
Geert Uytterhoeven <geert@linux-m68k.org> writes:
> On Sat, May 7, 2011 at 04:25, Eric W. Biederman <ebiederm@xmission.com> wrote:
>> arch/m68k/include/asm/unistd.h | 3 ++-
>> arch/m68k/kernel/syscalltable.S | 1 +
>
> As the unified syscalltable for m68k/m68knommu is not yet in mainline
> (planned for
> 2.6.40), you should also add it to arch/m68k/kernel/entry_mm.S.
>
> Gr{oetje,eeting}s,
Like so?
From c06a03281d944ed36e2da02f5374ec6c650e4988 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 7 May 2011 07:00:24 -0700
Subject: [PATCH] m68knommu: Wire up the setns system call
It seems I overlooked m68knommu where I wired up this syscall.
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
arch/m68k/kernel/entry_mm.S | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/arch/m68k/kernel/entry_mm.S b/arch/m68k/kernel/entry_mm.S
index 1359ee6..e048015 100644
--- a/arch/m68k/kernel/entry_mm.S
+++ b/arch/m68k/kernel/entry_mm.S
@@ -754,4 +754,5 @@ sys_call_table:
.long sys_open_by_handle_at
.long sys_clock_adjtime
.long sys_syncfs
+ .long sys_setns
--
1.7.5.1.217.g4e3aa
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 70+ messages in thread
* Re: [PATCH 7/7] ns: Wire up the setns system call
@ 2011-05-07 14:09 ` Eric W. Biederman
0 siblings, 0 replies; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-07 14:09 UTC (permalink / raw)
To: Geert Uytterhoeven
Cc: linux-arch, linux-kernel, netdev, linux-fsdevel, jamal,
Daniel Lezcano, Linux Containers, Renato Westphal
Geert Uytterhoeven <geert@linux-m68k.org> writes:
> On Sat, May 7, 2011 at 04:25, Eric W. Biederman <ebiederm@xmission.com> wrote:
>> arch/m68k/include/asm/unistd.h | 3 ++-
>> arch/m68k/kernel/syscalltable.S | 1 +
>
> As the unified syscalltable for m68k/m68knommu is not yet in mainline
> (planned for
> 2.6.40), you should also add it to arch/m68k/kernel/entry_mm.S.
>
> Gr{oetje,eeting}s,
Like so?
From c06a03281d944ed36e2da02f5374ec6c650e4988 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 7 May 2011 07:00:24 -0700
Subject: [PATCH] m68knommu: Wire up the setns system call
It seems I overlooked m68knommu where I wired up this syscall.
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
arch/m68k/kernel/entry_mm.S | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/arch/m68k/kernel/entry_mm.S b/arch/m68k/kernel/entry_mm.S
index 1359ee6..e048015 100644
--- a/arch/m68k/kernel/entry_mm.S
+++ b/arch/m68k/kernel/entry_mm.S
@@ -754,4 +754,5 @@ sys_call_table:
.long sys_open_by_handle_at
.long sys_clock_adjtime
.long sys_syncfs
+ .long sys_setns
--
1.7.5.1.217.g4e3aa
^ permalink raw reply related [flat|nested] 70+ messages in thread
* Re: [PATCH 7/7] ns: Wire up the setns system call
@ 2011-05-07 14:09 ` Eric W. Biederman
0 siblings, 0 replies; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-07 14:09 UTC (permalink / raw)
To: Geert Uytterhoeven
Cc: linux-arch, linux-kernel, netdev, linux-fsdevel, jamal,
Daniel Lezcano, Linux Containers, Renato Westphal
Geert Uytterhoeven <geert@linux-m68k.org> writes:
> On Sat, May 7, 2011 at 04:25, Eric W. Biederman <ebiederm@xmission.com> wrote:
>> arch/m68k/include/asm/unistd.h | 3 ++-
>> arch/m68k/kernel/syscalltable.S | 1 +
>
> As the unified syscalltable for m68k/m68knommu is not yet in mainline
> (planned for
> 2.6.40), you should also add it to arch/m68k/kernel/entry_mm.S.
>
> Gr{oetje,eeting}s,
Like so?
>From c06a03281d944ed36e2da02f5374ec6c650e4988 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 7 May 2011 07:00:24 -0700
Subject: [PATCH] m68knommu: Wire up the setns system call
It seems I overlooked m68knommu where I wired up this syscall.
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
arch/m68k/kernel/entry_mm.S | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/arch/m68k/kernel/entry_mm.S b/arch/m68k/kernel/entry_mm.S
index 1359ee6..e048015 100644
--- a/arch/m68k/kernel/entry_mm.S
+++ b/arch/m68k/kernel/entry_mm.S
@@ -754,4 +754,5 @@ sys_call_table:
.long sys_open_by_handle_at
.long sys_clock_adjtime
.long sys_syncfs
+ .long sys_setns
--
1.7.5.1.217.g4e3aa
^ permalink raw reply related [flat|nested] 70+ messages in thread
* Re: [PATCH 7/7] ns: Wire up the setns system call
2011-05-07 14:09 ` Eric W. Biederman
(?)
(?)
@ 2011-05-07 18:22 ` Geert Uytterhoeven
-1 siblings, 0 replies; 70+ messages in thread
From: Geert Uytterhoeven @ 2011-05-07 18:22 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-arch, linux-kernel, netdev, linux-fsdevel, jamal,
Daniel Lezcano, Linux Containers, Renato Westphal
On Sat, May 7, 2011 at 16:09, Eric W. Biederman <ebiederm@xmission.com> wrote:
> Geert Uytterhoeven <geert@linux-m68k.org> writes:
>
>> On Sat, May 7, 2011 at 04:25, Eric W. Biederman <ebiederm@xmission.com> wrote:
>>> arch/m68k/include/asm/unistd.h | 3 ++-
>>> arch/m68k/kernel/syscalltable.S | 1 +
>>
>> As the unified syscalltable for m68k/m68knommu is not yet in mainline
>> (planned for
>> 2.6.40), you should also add it to arch/m68k/kernel/entry_mm.S.
>>
>> Gr{oetje,eeting}s,
>
> Like so?
>
> From c06a03281d944ed36e2da02f5374ec6c650e4988 Mon Sep 17 00:00:00 2001
> From: "Eric W. Biederman" <ebiederm@xmission.com>
> Date: Sat, 7 May 2011 07:00:24 -0700
> Subject: [PATCH] m68knommu: Wire up the setns system call
>
> It seems I overlooked m68knommu where I wired up this syscall.
You overlooked m68k with MMU. syscalltable.s is used by m68knommu.
> Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
> ---
> arch/m68k/kernel/entry_mm.S | 1 +
> 1 files changed, 1 insertions(+), 0 deletions(-)
>
> diff --git a/arch/m68k/kernel/entry_mm.S b/arch/m68k/kernel/entry_mm.S
> index 1359ee6..e048015 100644
> --- a/arch/m68k/kernel/entry_mm.S
> +++ b/arch/m68k/kernel/entry_mm.S
> @@ -754,4 +754,5 @@ sys_call_table:
> .long sys_open_by_handle_at
> .long sys_clock_adjtime
> .long sys_syncfs
> + .long sys_setns
Yep.
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH 7/7] ns: Wire up the setns system call
2011-05-07 2:25 ` Eric W. Biederman
(?)
(?)
@ 2011-05-07 13:59 ` Mike Frysinger
-1 siblings, 0 replies; 70+ messages in thread
From: Mike Frysinger @ 2011-05-07 13:59 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-arch, linux-kernel, netdev, linux-fsdevel, jamal,
Daniel Lezcano, Linux Containers, Renato Westphal
On Fri, May 6, 2011 at 22:25, Eric W. Biederman wrote:
> v2: Most of the architecture support added by Daniel Lezcano <dlezcano@fr.ibm.com>
> v3: ported to v2.6.36-rc4 by: Eric W. Biederman <ebiederm@xmission.com>
> v4: Moved wiring up of the system call to another patch
> v5: ported to v2.6.39-rc6
>
> arch/blackfin/include/asm/unistd.h | 3 ++-
> arch/blackfin/mach-common/entry.S | 1 +
Acked-by: Mike Frysinger <vapier@gentoo.org>
-mike
^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH 7/7] ns: Wire up the setns system call
2011-05-07 2:25 ` Eric W. Biederman
` (2 preceding siblings ...)
(?)
@ 2011-05-07 20:06 ` James Bottomley
2011-05-08 2:19 ` Eric W. Biederman
-1 siblings, 1 reply; 70+ messages in thread
From: James Bottomley @ 2011-05-07 20:06 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-arch, linux-kernel, netdev, linux-fsdevel, jamal,
Daniel Lezcano, Linux Containers, Renato Westphal
On Fri, 2011-05-06 at 19:25 -0700, Eric W. Biederman wrote:
> v2: Most of the architecture support added by Daniel Lezcano <dlezcano@fr.ibm.com>
> v3: ported to v2.6.36-rc4 by: Eric W. Biederman <ebiederm@xmission.com>
> v4: Moved wiring up of the system call to another patch
> v5: ported to v2.6.39-rc6
>
> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
We already have several syscalls queued up for parisc:
http://git.kernel.org/?p=linux/kernel/git/jejb/parisc-2.6.git;a=shortlog;h=refs/heads/misc
So if you could make this patch over them (or over linux-next), that
would help the merge process.
Thanks,
James
^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH 7/7] ns: Wire up the setns system call
2011-05-07 20:06 ` James Bottomley
@ 2011-05-08 2:19 ` Eric W. Biederman
2011-05-08 4:02 ` James Bottomley
0 siblings, 1 reply; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-08 2:19 UTC (permalink / raw)
To: James Bottomley
Cc: linux-arch, linux-kernel, netdev, linux-fsdevel, jamal,
Daniel Lezcano, Linux Containers, Renato Westphal
James Bottomley <James.Bottomley@HansenPartnership.com> writes:
> On Fri, 2011-05-06 at 19:25 -0700, Eric W. Biederman wrote:
>> v2: Most of the architecture support added by Daniel Lezcano <dlezcano@fr.ibm.com>
>> v3: ported to v2.6.36-rc4 by: Eric W. Biederman <ebiederm@xmission.com>
>> v4: Moved wiring up of the system call to another patch
>> v5: ported to v2.6.39-rc6
>>
>> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
>
> We already have several syscalls queued up for parisc:
>
> http://git.kernel.org/?p=linux/kernel/git/jejb/parisc-2.6.git;a=shortlog;h=refs/heads/misc
>
> So if you could make this patch over them (or over linux-next), that
> would help the merge process.
I will take a look. I was rather pleasantly surprised that no other
system call conflicts had shown up before now.
This is unfortunately one of those areas where it is almost impossible
to avoid conflicts.
Do you know if there is any chance that the parisc tree might get
rebased or anything horrible like that?
If not I think I will just pull the hunk of the parisc tree with the syscalls
e38f5b745075828ac51b12c8c95c85a7be4a3ec7...2e7bad5f34b5beed47542490c760ed26574e38ba
into my tree so I don't have to worry about merge order.
Eric
^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH 7/7] ns: Wire up the setns system call
2011-05-08 2:19 ` Eric W. Biederman
@ 2011-05-08 4:02 ` James Bottomley
0 siblings, 0 replies; 70+ messages in thread
From: James Bottomley @ 2011-05-08 4:02 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-arch, linux-kernel, netdev, linux-fsdevel, jamal,
Daniel Lezcano, Linux Containers, Renato Westphal
On Sat, 2011-05-07 at 19:19 -0700, Eric W. Biederman wrote:
> James Bottomley <James.Bottomley@HansenPartnership.com> writes:
>
> > On Fri, 2011-05-06 at 19:25 -0700, Eric W. Biederman wrote:
> >> v2: Most of the architecture support added by Daniel Lezcano <dlezcano@fr.ibm.com>
> >> v3: ported to v2.6.36-rc4 by: Eric W. Biederman <ebiederm@xmission.com>
> >> v4: Moved wiring up of the system call to another patch
> >> v5: ported to v2.6.39-rc6
> >>
> >> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
> >
> > We already have several syscalls queued up for parisc:
> >
> > http://git.kernel.org/?p=linux/kernel/git/jejb/parisc-2.6.git;a=shortlog;h=refs/heads/misc
> >
> > So if you could make this patch over them (or over linux-next), that
> > would help the merge process.
>
> I will take a look. I was rather pleasantly surprised that no other
> system call conflicts had shown up before now.
>
> This is unfortunately one of those areas where it is almost impossible
> to avoid conflicts.
>
> Do you know if there is any chance that the parisc tree might get
> rebased or anything horrible like that?
It shouldn't unless one of the wire ups is wrong and I have to replace
it, but I think the possibility of that will be minute.
> If not I think I will just pull the hunk of the parisc tree with the syscalls
> e38f5b745075828ac51b12c8c95c85a7be4a3ec7...2e7bad5f34b5beed47542490c760ed26574e38ba
> into my tree so I don't have to worry about merge order.
Yes, that should work fine.
James
^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH 1/7] ns: proc files for namespace naming policy.
2011-05-07 2:24 ` Eric W. Biederman
` (5 preceding siblings ...)
(?)
@ 2011-05-07 22:37 ` Daniel Lezcano
-1 siblings, 0 replies; 70+ messages in thread
From: Daniel Lezcano @ 2011-05-07 22:37 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-arch, linux-kernel, netdev, linux-fsdevel, jamal,
Linux Containers, Renato Westphal
On 05/07/2011 04:24 AM, Eric W. Biederman wrote:
> Create files under /proc/<pid>/ns/ to allow controlling the
> namespaces of a process.
>
> This addresses three specific problems that can make namespaces hard to
> work with.
> - Namespaces require a dedicated process to pin them in memory.
> - It is not possible to use a namespace unless you are the child
> of the original creator.
> - Namespaces don't have names that userspace can use to talk about
> them.
>
> The namespace files under /proc/<pid>/ns/ can be opened and the
> file descriptor can be used to talk about a specific namespace, and
> to keep the specified namespace alive.
>
> A namespace can be kept alive by either holding the file descriptor
> open or bind mounting the file someplace else. aka:
> mount --bind /proc/self/ns/net /some/filesystem/path
> mount --bind /proc/self/fd/<N> /some/filesystem/path
>
> This allows namespaces to be named with userspace policy.
>
> It requires additional support to make use of these filedescriptors
> and that will be comming in the following patches.
>
> Signed-off-by: Eric W. Biederman<ebiederm@xmission.com>
Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>
^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH 1/7] ns: proc files for namespace naming policy.
2011-05-07 2:24 ` Eric W. Biederman
` (6 preceding siblings ...)
(?)
@ 2011-05-11 19:20 ` Nathan Lynch
2011-05-11 22:52 ` Eric W. Biederman
-1 siblings, 1 reply; 70+ messages in thread
From: Nathan Lynch @ 2011-05-11 19:20 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-arch, linux-kernel, netdev, linux-fsdevel, jamal,
Daniel Lezcano, Linux Containers, Renato Westphal
Hi Eric,
A few comments on your patch set.
On Fri, 2011-05-06 at 19:24 -0700, Eric W. Biederman wrote:
> diff --git a/fs/proc/inode.c b/fs/proc/inode.c
> index d15aa1b..74b48cf 100644
> --- a/fs/proc/inode.c
> +++ b/fs/proc/inode.c
> @@ -28,6 +28,7 @@ static void proc_evict_inode(struct inode *inode)
> {
> struct proc_dir_entry *de;
> struct ctl_table_header *head;
> + const struct proc_ns_operations *ns_ops;
>
> truncate_inode_pages(&inode->i_data, 0);
> end_writeback(inode);
> @@ -44,6 +45,10 @@ static void proc_evict_inode(struct inode *inode)
> rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
> sysctl_head_put(head);
> }
> + /* Release any associated namespace */
> + ns_ops = PROC_I(inode)->ns_ops;
> + if (ns_ops && ns_ops->put)
> + ns_ops->put(PROC_I(inode)->ns);
Is it ever valid for ns_ops->put to be null? If not, I suggest removing
the check.
> diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
> new file mode 100644
> index 0000000..6ae9f07
> --- /dev/null
> +++ b/fs/proc/namespaces.c
...
> +static struct dentry *proc_ns_dir_lookup(struct inode *dir,
> + struct dentry *dentry, struct nameidata *nd)
> +{
> + struct dentry *error;
> + struct task_struct *task = get_proc_task(dir);
> + const struct proc_ns_operations **entry, **last;
> + unsigned int len = dentry->d_name.len;
> +
> + error = ERR_PTR(-ENOENT);
> +
> + if (!task)
> + goto out_no_task;
> +
> + error = ERR_PTR(-EPERM);
> + if (!ptrace_may_access(task, PTRACE_MODE_READ))
> + goto out;
> +
> + last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
> + for (entry = ns_entries; entry <= last; entry++) {
> + if (strlen((*entry)->name) != len)
> + continue;
> + if (!memcmp(dentry->d_name.name, (*entry)->name, len))
> + break;
> + }
> + if (entry > last)
> + goto out;
This returns EPERM when it should return ENOENT?
> +
> + error = proc_ns_instantiate(dir, dentry, task, *entry);
> +out:
> + put_task_struct(task);
> +out_no_task:
> + return error;
> +}
...
> --- a/include/linux/proc_fs.h
> +++ b/include/linux/proc_fs.h
> @@ -250,6 +257,15 @@ kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
> extern void kclist_add(struct kcore_list *, void *, size_t, int type);
> #endif
>
> +struct nsproxy;
> +struct proc_ns_operations {
> + const char *name;
> + int type;
> + void *(*get)(struct task_struct *task);
> + void (*put)(void *ns);
> + int (*install)(struct nsproxy *nsproxy, void *ns);
> +};
> +
> union proc_op {
> int (*proc_get_link)(struct inode *, struct path *);
> int (*proc_read)(struct task_struct *task, char *page);
> @@ -268,6 +284,8 @@ struct proc_inode {
> struct proc_dir_entry *pde;
> struct ctl_table_header *sysctl;
> struct ctl_table *sysctl_entry;
> + void *ns;
> + const struct proc_ns_operations *ns_ops;
> struct inode vfs_inode;
> };
Not that I have any better ideas, but it seems a bit undesirable to
increase the size of proc_inode for this one purpose.
^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH 1/7] ns: proc files for namespace naming policy.
2011-05-11 19:20 ` Nathan Lynch
@ 2011-05-11 22:52 ` Eric W. Biederman
0 siblings, 0 replies; 70+ messages in thread
From: Eric W. Biederman @ 2011-05-11 22:52 UTC (permalink / raw)
To: Nathan Lynch
Cc: linux-arch, linux-kernel, netdev, linux-fsdevel, jamal,
Daniel Lezcano, Linux Containers, Renato Westphal
Nathan Lynch <ntl@pobox.com> writes:
> Hi Eric,
>
> A few comments on your patch set.
>
>
> On Fri, 2011-05-06 at 19:24 -0700, Eric W. Biederman wrote:
>> diff --git a/fs/proc/inode.c b/fs/proc/inode.c
>> index d15aa1b..74b48cf 100644
>> --- a/fs/proc/inode.c
>> +++ b/fs/proc/inode.c
>> @@ -28,6 +28,7 @@ static void proc_evict_inode(struct inode *inode)
>> {
>> struct proc_dir_entry *de;
>> struct ctl_table_header *head;
>> + const struct proc_ns_operations *ns_ops;
>>
>> truncate_inode_pages(&inode->i_data, 0);
>> end_writeback(inode);
>> @@ -44,6 +45,10 @@ static void proc_evict_inode(struct inode *inode)
>> rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
>> sysctl_head_put(head);
>> }
>> + /* Release any associated namespace */
>> + ns_ops = PROC_I(inode)->ns_ops;
>> + if (ns_ops && ns_ops->put)
>> + ns_ops->put(PROC_I(inode)->ns);
>
> Is it ever valid for ns_ops->put to be null? If not, I suggest removing
> the check.
>
>
>> diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
>> new file mode 100644
>> index 0000000..6ae9f07
>> --- /dev/null
>> +++ b/fs/proc/namespaces.c
>
> ...
>
>> +static struct dentry *proc_ns_dir_lookup(struct inode *dir,
>> + struct dentry *dentry, struct nameidata *nd)
>> +{
>> + struct dentry *error;
>> + struct task_struct *task = get_proc_task(dir);
>> + const struct proc_ns_operations **entry, **last;
>> + unsigned int len = dentry->d_name.len;
>> +
>> + error = ERR_PTR(-ENOENT);
>> +
>> + if (!task)
>> + goto out_no_task;
>> +
>> + error = ERR_PTR(-EPERM);
>> + if (!ptrace_may_access(task, PTRACE_MODE_READ))
>> + goto out;
>> +
>> + last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
>> + for (entry = ns_entries; entry <= last; entry++) {
>> + if (strlen((*entry)->name) != len)
>> + continue;
>> + if (!memcmp(dentry->d_name.name, (*entry)->name, len))
>> + break;
>> + }
>> + if (entry > last)
>> + goto out;
>
> This returns EPERM when it should return ENOENT?
Good catch.
And fixed now.
>> union proc_op {
>> int (*proc_get_link)(struct inode *, struct path *);
>> int (*proc_read)(struct task_struct *task, char *page);
>> @@ -268,6 +284,8 @@ struct proc_inode {
>> struct proc_dir_entry *pde;
>> struct ctl_table_header *sysctl;
>> struct ctl_table *sysctl_entry;
>> + void *ns;
>> + const struct proc_ns_operations *ns_ops;
>> struct inode vfs_inode;
>> };
>
> Not that I have any better ideas, but it seems a bit undesirable to
> increase the size of proc_inode for this one purpose.
Of the options I could think of this was the cleanest, and proc_inode
is just a caching data structure which means that the effect should be
comparatively minimal.
That said I won't oppose a change at some point to reduce the
proc_inode, there are a lot of fields that are not used for most proc
entries.
Eric
^ permalink raw reply [flat|nested] 70+ messages in thread