All of lore.kernel.org
 help / color / mirror / Atom feed
From: Louis Rilling <Louis.Rilling@kerlabs.com>
To: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	Linux Containers <containers@lists.osdl.org>,
	Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>,
	linux-kernel@vger.kernel.org, Pavel Emelyanov <xemul@openvz.org>,
	Oleg Nesterov <oleg@redhat.com>
Subject: Re: [PATCH 01/24] pidns: Remove races by stopping the caching of proc_mnt
Date: Sun, 11 Jul 2010 16:14:07 +0200	[thread overview]
Message-ID: <20100711141406.GD18586@hawkmoon.kerlabs.com> (raw)
In-Reply-To: <m1aaq04ph2.fsf_-_@fess.ebiederm.org>

[-- Attachment #1: Type: text/plain, Size: 7705 bytes --]

On 09/07/10  8:58 -0700, Eric W. Biederman wrote:
> 
> Having proc reference the pid_namespace and the pid_namespace
> reference proc is a serious reference counting problem, which has
> resulted in both leaks and use after free problems.  Mount already
> knows how to go from a pid_namespace to a mount of proc, so we don't
> need to cache the proc mount.
> 
> To do this I introduce get_proc_mnt and replace pid_ns->proc_mnt users
> with it. Additionally I remove pid_ns_(prepare|release)_proc as they
> are now unneeded.
> 
> This is slightly less efficient but it is much easier to avoid the
> races.  If efficiency winds up being a problem we can revisit our data
> structures.

IIUC, the difference between this solution and the first one I proposed is that
instead of pinning proc_mnt with mntget() at copy_process()-time, proc_mnt is
looked for and, if possible, mntget() at release_task()-time.

Could you elaborate on the trade-off, that is accessing proc_mnt at
copy_process()-time vs looking up proc_mnt at release_task()-time?

Thanks,

Louis

> 
> Reported-by: Louis Rilling <Louis.Rilling@kerlabs.com>
> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
> ---
>  arch/um/drivers/mconsole_kern.c |    4 +++-
>  fs/hppfs/hppfs.c                |    2 +-
>  fs/proc/base.c                  |   13 +++++++------
>  fs/proc/root.c                  |   15 ++-------------
>  include/linux/pid_namespace.h   |    3 ---
>  include/linux/proc_fs.h         |   13 +------------
>  kernel/fork.c                   |    6 ------
>  kernel/sysctl_binary.c          |    3 ++-
>  8 files changed, 16 insertions(+), 43 deletions(-)
> 
> diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
> index de317d0..e89f72c 100644
> --- a/arch/um/drivers/mconsole_kern.c
> +++ b/arch/um/drivers/mconsole_kern.c
> @@ -125,7 +125,7 @@ void mconsole_log(struct mc_request *req)
>  void mconsole_proc(struct mc_request *req)
>  {
>  	struct nameidata nd;
> -	struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt;
> +	struct vfsmount *mnt;
>  	struct file *file;
>  	int n, err;
>  	char *ptr = req->request.data, *buf;
> @@ -134,7 +134,9 @@ void mconsole_proc(struct mc_request *req)
>  	ptr += strlen("proc");
>  	ptr = skip_spaces(ptr);
>  
> +	mnt = get_proc_mnt(task_active_pid_ns(current));
>  	err = vfs_path_lookup(mnt->mnt_root, mnt, ptr, LOOKUP_FOLLOW, &nd);
> +	mntput(mnt);
>  	if (err) {
>  		mconsole_reply(req, "Failed to look up file", 1, 0);
>  		goto out;
> diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
> index 826c3f9..79e61ab 100644
> --- a/fs/hppfs/hppfs.c
> +++ b/fs/hppfs/hppfs.c
> @@ -718,7 +718,7 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
>  	struct vfsmount *proc_mnt;
>  	int err = -ENOENT;
>  
> -	proc_mnt = mntget(current->nsproxy->pid_ns->proc_mnt);
> +	proc_mnt = get_proc_mnt(task_active_pid_ns(current));
>  	if (IS_ERR(proc_mnt))
>  		goto out;
>  
> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index acb7ef8..93a3ee9 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -2735,6 +2735,7 @@ void proc_flush_task(struct task_struct *task)
>  {
>  	int i;
>  	struct pid *pid, *tgid;
> +	struct vfsmount *mnt;
>  	struct upid *upid;
>  
>  	pid = task_pid(task);
> @@ -2742,13 +2743,13 @@ void proc_flush_task(struct task_struct *task)
>  
>  	for (i = 0; i <= pid->level; i++) {
>  		upid = &pid->numbers[i];
> -		proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
> -					tgid->numbers[i].nr);
> -	}
> +		mnt = get_proc_mnt(upid->ns);
> +		if (IS_ERR(mnt))
> +			continue;
>  
> -	upid = &pid->numbers[pid->level];
> -	if (upid->nr == 1)
> -		pid_ns_release_proc(upid->ns);
> +		proc_flush_task_mnt(mnt, upid->nr, tgid->numbers[i].nr);
> +		mntput(mnt);
> +	}
>  }
>  
>  static struct dentry *proc_pid_instantiate(struct inode *dir,
> diff --git a/fs/proc/root.c b/fs/proc/root.c
> index 4258384..c042015 100644
> --- a/fs/proc/root.c
> +++ b/fs/proc/root.c
> @@ -79,7 +79,6 @@ static int proc_get_sb(struct file_system_type *fs_type,
>  		}
>  
>  		sb->s_flags |= MS_ACTIVE;
> -		ns->proc_mnt = mnt;
>  	}
>  
>  	simple_set_mnt(mnt, sb);
> @@ -204,18 +203,8 @@ struct proc_dir_entry proc_root = {
>  	.parent		= &proc_root,
>  };
>  
> -int pid_ns_prepare_proc(struct pid_namespace *ns)
> +struct vfsmount *get_proc_mnt(struct pid_namespace *ns)
>  {
> -	struct vfsmount *mnt;
> -
> -	mnt = kern_mount_data(&proc_fs_type, ns);
> -	if (IS_ERR(mnt))
> -		return PTR_ERR(mnt);
> -
> -	return 0;
> +	return kern_mount_data(&proc_fs_type, ns);
>  }
>  
> -void pid_ns_release_proc(struct pid_namespace *ns)
> -{
> -	mntput(ns->proc_mnt);
> -}
> diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
> index 38d1032..3feb917 100644
> --- a/include/linux/pid_namespace.h
> +++ b/include/linux/pid_namespace.h
> @@ -24,9 +24,6 @@ struct pid_namespace {
>  	struct kmem_cache *pid_cachep;
>  	unsigned int level;
>  	struct pid_namespace *parent;
> -#ifdef CONFIG_PROC_FS
> -	struct vfsmount *proc_mnt;
> -#endif
>  #ifdef CONFIG_BSD_PROCESS_ACCT
>  	struct bsd_acct_struct *bacct;
>  #endif
> diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
> index 379eaed..6b7a416 100644
> --- a/include/linux/proc_fs.h
> +++ b/include/linux/proc_fs.h
> @@ -103,6 +103,7 @@ struct vmcore {
>  #ifdef CONFIG_PROC_FS
>  
>  extern void proc_root_init(void);
> +struct vfsmount *get_proc_mnt(struct pid_namespace *ns);
>  
>  void proc_flush_task(struct task_struct *task);
>  
> @@ -116,9 +117,6 @@ extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
>  
>  struct pid_namespace;
>  
> -extern int pid_ns_prepare_proc(struct pid_namespace *ns);
> -extern void pid_ns_release_proc(struct pid_namespace *ns);
> -
>  /*
>   * proc_tty.c
>   */
> @@ -217,15 +215,6 @@ struct tty_driver;
>  static inline void proc_tty_register_driver(struct tty_driver *driver) {};
>  static inline void proc_tty_unregister_driver(struct tty_driver *driver) {};
>  
> -static inline int pid_ns_prepare_proc(struct pid_namespace *ns)
> -{
> -	return 0;
> -}
> -
> -static inline void pid_ns_release_proc(struct pid_namespace *ns)
> -{
> -}
> -
>  static inline void set_mm_exe_file(struct mm_struct *mm,
>  				   struct file *new_exe_file)
>  {}
> diff --git a/kernel/fork.c b/kernel/fork.c
> index b6cce14..b063a9c 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1154,12 +1154,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
>  		pid = alloc_pid(p->nsproxy->pid_ns);
>  		if (!pid)
>  			goto bad_fork_cleanup_io;
> -
> -		if (clone_flags & CLONE_NEWPID) {
> -			retval = pid_ns_prepare_proc(p->nsproxy->pid_ns);
> -			if (retval < 0)
> -				goto bad_fork_free_pid;
> -		}
>  	}
>  
>  	p->pid = pid_nr(pid);
> diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
> index 1357c57..68f302a 100644
> --- a/kernel/sysctl_binary.c
> +++ b/kernel/sysctl_binary.c
> @@ -1350,8 +1350,9 @@ static ssize_t binary_sysctl(const int *name, int nlen,
>  		goto out_putname;
>  	}
>  
> -	mnt = current->nsproxy->pid_ns->proc_mnt;
> +	mnt = get_proc_mnt(task_active_pid_ns(current));
>  	result = vfs_path_lookup(mnt->mnt_root, mnt, pathname, 0, &nd);
> +	mntput(mnt);
>  	if (result)
>  		goto out_putname;
>  
> -- 
> 1.6.5.2.143.g8cc62
> 

-- 
Dr Louis Rilling			Kerlabs
Skype: louis.rilling			Batiment Germanium
Phone: (+33|0) 6 80 89 08 23		80 avenue des Buttes de Coesmes
http://www.kerlabs.com/			35700 Rennes

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 197 bytes --]

  parent reply	other threads:[~2010-07-11 14:14 UTC|newest]

Thread overview: 93+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-06-16 16:34 [PATCH] procfs: Do not release pid_ns->proc_mnt too early Louis Rilling
     [not found] ` <1276706068-18567-1-git-send-email-louis.rilling-aw0BnHfMbSpBDgjK7y7TUQ@public.gmane.org>
2010-06-17  9:53   ` Pavel Emelyanov
2010-06-17  9:53     ` Pavel Emelyanov
2010-06-17 13:41     ` Eric W. Biederman
2010-06-17 14:20       ` Louis Rilling
2010-06-17 21:36       ` Oleg Nesterov
2010-06-18  8:27         ` Louis Rilling
2010-06-18 16:27           ` Oleg Nesterov
2010-06-21 11:11             ` Louis Rilling
2010-06-21 12:58               ` Eric W. Biederman
2010-06-21 14:15                 ` Louis Rilling
2010-06-21 14:26                   ` Eric W. Biederman
     [not found]           ` <20100618082738.GE16877-Hu8+6S1rdjywhHL9vcZdMVaTQe2KTcn/@public.gmane.org>
2010-06-18 16:27             ` Oleg Nesterov
2010-06-17 21:20 ` Oleg Nesterov
2010-06-18  8:20   ` Louis Rilling
2010-06-18 11:15     ` Oleg Nesterov
2010-06-18 16:08       ` Oleg Nesterov
2010-06-18 16:08         ` Oleg Nesterov
2010-06-18 17:33         ` Louis Rilling
2010-06-18 17:55           ` Oleg Nesterov
2010-06-18 17:55             ` Oleg Nesterov
2010-06-18 21:23             ` Oleg Nesterov
2010-06-18 21:23               ` Oleg Nesterov
2010-06-19 19:08               ` [PATCH 0/4] pid_ns_prepare_proc/unshare cleanups Oleg Nesterov
2010-06-19 19:09                 ` [PATCH 1/4] procfs: proc_get_sb: consolidate/cleanup root_inode->pid logic Oleg Nesterov
2010-06-19 19:10                 ` [PATCH 2/4] procfs: kill the global proc_mnt variable Oleg Nesterov
2010-06-19 19:10                 ` [PATCH 3/4] procfs: move pid_ns_prepare_proc() from copy_process() to create_pid_namespace() Oleg Nesterov
2010-06-19 19:11                 ` [PATCH RESEND 4/4] sys_unshare: simplify the not-really-implemented CLONE_THREAD/SIGHAND/VM code Oleg Nesterov
2010-06-20  8:42                 ` [PATCH 0/6] Unshare support for the pid namespace Eric W. Biederman
2010-06-20  8:44                   ` [PATCH 1/6] pid: Remove the child_reaper special case in init/main.c Eric W. Biederman
     [not found]                     ` <m1ljaaqejm.fsf_-_-+imSwln9KH6u2/kzUuoCbdi2O/JbrIOy@public.gmane.org>
2010-06-20 18:29                       ` Oleg Nesterov
2010-06-20 18:29                         ` Oleg Nesterov
2010-06-20 20:27                         ` Oleg Nesterov
2010-06-20  8:45                   ` [PATCH 2/6] pidns: Call pid_ns_prepare_proc from create_pid_namespace Eric W. Biederman
     [not found]                     ` <m1hbkyqeib.fsf_-_-+imSwln9KH6u2/kzUuoCbdi2O/JbrIOy@public.gmane.org>
2010-06-20 18:19                       ` Oleg Nesterov
2010-06-20 18:19                         ` Oleg Nesterov
2010-06-20  8:45                   ` [PATCH 3/6] procfs: kill the global proc_mnt variable Eric W. Biederman
2010-06-20  8:47                   ` [PATCH 4/6] pidns: Don't allow new pids after the namespace is dead Eric W. Biederman
2010-06-20 18:44                     ` Oleg Nesterov
2010-06-20  8:48                   ` [PATCH 5/6] pidns: Use task_active_pid_ns where appropriate Eric W. Biederman
2010-06-20  8:49                   ` [PATCH 6/6] pidns: Support unsharing the pid namespace Eric W. Biederman
2010-06-20 20:14                     ` Oleg Nesterov
2010-06-20 20:42                       ` Oleg Nesterov
2010-06-21  1:53                       ` Eric W. Biederman
2010-06-20 18:03                   ` [PATCH 0/6] Unshare support for " Oleg Nesterov
2010-06-20 18:05                     ` [PATCH 0/2] pid_ns_release_proc() fixes Oleg Nesterov
2010-06-20 18:06                       ` [PATCH 1/2] pid_ns: move destroy_pid_namespace() into workqueue context Oleg Nesterov
2010-06-20 18:06                       ` [PATCH 2/2] pid_ns: refactor the buggy pid_ns_release_proc() logic Oleg Nesterov
2010-06-20 21:00                     ` [PATCH 0/6] Unshare support for the pid namespace Eric W. Biederman
2010-06-20 21:48                       ` Oleg Nesterov
     [not found]                       ` <m14ogxctd6.fsf-+imSwln9KH6u2/kzUuoCbdi2O/JbrIOy@public.gmane.org>
2010-06-20 21:56                         ` Oleg Nesterov
2010-06-20 21:56                           ` Oleg Nesterov
2011-01-26 15:57                   ` Daniel Lezcano
2010-06-23 20:36                 ` [PATCH 0/1] pid_ns: move pid_ns_release_proc() from proc_flush_task() to zap_pid_ns_processes() Oleg Nesterov
     [not found]                   ` <20100623203652.GA25298-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2010-06-23 20:37                     ` [PATCH 1/1] " Oleg Nesterov
2010-06-23 20:37                       ` Oleg Nesterov
2010-06-24  6:36                       ` Sukadev Bhattiprolu
2010-06-24 12:59                         ` Oleg Nesterov
2010-06-24  7:06                       ` Eric W. Biederman
2010-06-24 13:01                         ` Oleg Nesterov
2010-06-24  8:37                   ` [PATCH] pid_ns: Fix proc_flush_task() accessing freed proc_mnt Louis Rilling
2010-06-24 17:08                   ` [RESEND PATCH] " Louis Rilling
2010-06-24 19:18                     ` Oleg Nesterov
2010-06-25 10:23                       ` Louis Rilling
     [not found]                         ` <20100625102303.GG3773-Hu8+6S1rdjywhHL9vcZdMVaTQe2KTcn/@public.gmane.org>
2010-06-25 12:21                           ` Oleg Nesterov
2010-06-25 12:21                             ` Oleg Nesterov
2010-06-25 18:37                           ` Sukadev Bhattiprolu
2010-06-25 18:37                         ` Sukadev Bhattiprolu
2010-06-25 19:29                           ` Oleg Nesterov
2010-06-25 21:26                             ` Sukadev Bhattiprolu
2010-06-25 21:27                               ` Oleg Nesterov
2010-06-25 22:07                                 ` Sukadev Bhattiprolu
2010-07-09  4:36                                   ` [RFC][PATCH 1/2] pidns: Add a flag to indicate a pid namespace is dead Eric W. Biederman
2010-07-09  4:39                                     ` [RFC][PATCH 2/2] pidns: Remove proc flush races when a pid namespaces are exiting Eric W. Biederman
2010-07-09 12:14                                       ` Louis Rilling
2010-07-09 13:05                                         ` Eric W. Biederman
2010-07-09 14:13                                           ` Louis Rilling
2010-07-09 15:58                                             ` [PATCH 01/24] pidns: Remove races by stopping the caching of proc_mnt Eric W. Biederman
2010-07-09 22:13                                               ` Serge E. Hallyn
2010-07-11 14:14                                               ` Louis Rilling [this message]
2010-07-11 14:25                                                 ` Eric W. Biederman
2010-07-12 18:09                                                 ` [PATCH] pidns: Fix wait for zombies to be reaped in zap_pid_ns_processes Eric W. Biederman
2010-07-13 21:42                                                   ` Louis Rilling
     [not found]                                                     ` <20100713214234.GA21042-Hu8+6S1rdjywhHL9vcZdMVaTQe2KTcn/@public.gmane.org>
2010-07-13 22:34                                                       ` Serge E. Hallyn
2010-07-13 22:34                                                     ` Serge E. Hallyn
2010-07-14  1:47                                                     ` Eric W. Biederman
     [not found]                                                       ` <m1oceakf5x.fsf-+imSwln9KH6u2/kzUuoCbdi2O/JbrIOy@public.gmane.org>
2010-10-30  7:07                                                         ` Sukadev Bhattiprolu
2010-10-30  7:07                                                           ` Sukadev Bhattiprolu
2010-07-14 20:53                                                   ` Sukadev Bhattiprolu
2010-07-14 21:35                                                     ` Eric W. Biederman
2010-06-21 11:09             ` [PATCH] procfs: Do not release pid_ns->proc_mnt too early Louis Rilling
2010-06-21 11:15             ` Louis Rilling
2010-06-21 14:38               ` Oleg Nesterov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100711141406.GD18586@hawkmoon.kerlabs.com \
    --to=louis.rilling@kerlabs.com \
    --cc=akpm@linux-foundation.org \
    --cc=containers@lists.osdl.org \
    --cc=ebiederm@xmission.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=oleg@redhat.com \
    --cc=sukadev@linux.vnet.ibm.com \
    --cc=xemul@openvz.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.