From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <linux-kernel-owner@vger.kernel.org>
Date: Wed, 4 May 2016 23:19:04 +0000
From: Serge Hallyn <serge.hallyn@ubuntu.com>
To: Djalal Harouni <tixxdz@gmail.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>, Chris Mason <clm@fb.com>,
	tytso@mit.edu, Serge Hallyn <serge.hallyn@canonical.com>,
	Josh Triplett <josh@joshtriplett.org>,
	"Eric W. Biederman" <ebiederm@xmission.com>,
	Andy Lutomirski <luto@kernel.org>,
	Seth Forshee <seth.forshee@canonical.com>,
	linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-security-module@vger.kernel.org,
	Dongsu Park <dongsu@endocode.com>,
	David Herrmann <dh.herrmann@googlemail.com>,
	Miklos Szeredi <mszeredi@redhat.com>,
	Alban Crequy <alban.crequy@gmail.com>,
	Djalal Harouni <tixxdz@opendz.org>
Subject: Re: [RFC v2 PATCH 3/8] fs: Treat foreign mounts as nosuid
Message-ID: <20160504231904.GA17801@ubuntumail>
References: <1462372014-3786-1-git-send-email-tixxdz@gmail.com>
 <1462372014-3786-4-git-send-email-tixxdz@gmail.com>
MIME-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
In-Reply-To: <1462372014-3786-4-git-send-email-tixxdz@gmail.com>
Sender: linux-kernel-owner@vger.kernel.org
List-ID: <linux-fsdevel.vger.kernel.org>

Quoting Djalal Harouni (tixxdz@gmail.com):
> If a process gets access to a mount from a different user
> namespace, that process should not be able to take advantage of
> setuid files or selinux entrypoints from that filesystem.  Prevent
> this by treating mounts from other mount namespaces and those not
> owned by current_user_ns() or an ancestor as nosuid.
> 
> This patch was just adapted from the original one that was written
> by Andy Lutomirski <luto@amacapital.net>
> https://www.redhat.com/archives/dm-devel/2016-April/msg00374.html

I'm not sure that this makes sense given what you're doing.  In the
case of Seth's set, a filesystem is mounted specifically (and privately)
in a user namespace.  We don't want for instance the initial user ns
to find a link to a setuid-root exploit left in the container-mounted
filesystem.

But you are having a parent user namespace mount the fs so that its
children can all access the fs, uid-shifted for convenience.  Not
allowing the child namespaces to make use of setuid-root does not
seem applicable here.

> Signed-off-by: Djalal Harouni <tixxdz@opendz.org>
> ---
>  fs/exec.c                      |  2 +-
>  fs/namespace.c                 | 15 +++++++++++++++
>  include/linux/mount.h          |  1 +
>  include/linux/user_namespace.h |  8 ++++++++
>  kernel/user_namespace.c        | 13 +++++++++++++
>  security/commoncap.c           |  2 +-
>  security/selinux/hooks.c       |  2 +-
>  7 files changed, 40 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/exec.c b/fs/exec.c
> index c4010b8..706088d 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1391,7 +1391,7 @@ static void bprm_fill_uid(struct linux_binprm *bprm)
>  	bprm->cred->euid = current_euid();
>  	bprm->cred->egid = current_egid();
>  
> -	if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
> +	if (!mnt_may_suid(bprm->file->f_path.mnt))
>  		return;
>  
>  	if (task_no_new_privs(current))
> diff --git a/fs/namespace.c b/fs/namespace.c
> index de02b39..a8820fb 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -3374,6 +3374,21 @@ found:
>  	return visible;
>  }
>  
> +bool mnt_may_suid(struct vfsmount *mnt)
> +{
> +	struct mount *m = real_mount(mnt);
> +
> +	/*
> +	 * Foreign mounts (accessed via fchdir or through /proc
> +	 * symlinks) are always treated as if they are nosuid. This
> +	 * prevents namespaces from trusting potentially unsafe
> +	 * suid/sgid bits, file caps, or security labels that originate
> +	 * in other namespaces.
> +	 */
> +	return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(m) &&
> +		 in_userns(current_user_ns(), m->mnt_ns->user_ns);
> +}
> +
>  static struct ns_common *mntns_get(struct task_struct *task)
>  {
>  	struct ns_common *ns = NULL;
> diff --git a/include/linux/mount.h b/include/linux/mount.h
> index f822c3c..54a594d 100644
> --- a/include/linux/mount.h
> +++ b/include/linux/mount.h
> @@ -81,6 +81,7 @@ extern void mntput(struct vfsmount *mnt);
>  extern struct vfsmount *mntget(struct vfsmount *mnt);
>  extern struct vfsmount *mnt_clone_internal(struct path *path);
>  extern int __mnt_is_readonly(struct vfsmount *mnt);
> +extern bool mnt_may_suid(struct vfsmount *mnt);
>  
>  struct path;
>  extern struct vfsmount *clone_private_mount(struct path *path);
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index 8297e5b..a43faa7 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -72,6 +72,8 @@ extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t,
>  extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *);
>  extern int proc_setgroups_show(struct seq_file *m, void *v);
>  extern bool userns_may_setgroups(const struct user_namespace *ns);
> +extern bool in_userns(const struct user_namespace *ns,
> +		      const struct user_namespace *target_ns);
>  #else
>  
>  static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
> @@ -100,6 +102,12 @@ static inline bool userns_may_setgroups(const struct user_namespace *ns)
>  {
>  	return true;
>  }
> +
> +static inline bool in_userns(const struct user_namespace *ns,
> +			     const struct user_namespace *target_ns)
> +{
> +	return true;
> +}
>  #endif
>  
>  #endif /* _LINUX_USER_H */
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 9bafc21..9a496a8 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -938,6 +938,19 @@ bool userns_may_setgroups(const struct user_namespace *ns)
>  	return allowed;
>  }
>  
> +/*
> + * Returns true if @ns is the same namespace as or a descendant of
> + * @target_ns.
> + */
> +bool in_userns(const struct user_namespace *ns,
> +	       const struct user_namespace *target_ns)
> +{
> +	for (; ns; ns = ns->parent) {
> +		if (ns == target_ns)
> +			return true;
> +	}
> +}
> +
>  static inline struct user_namespace *to_user_ns(struct ns_common *ns)
>  {
>  	return container_of(ns, struct user_namespace, ns);
> diff --git a/security/commoncap.c b/security/commoncap.c
> index 48071ed..6c082d2 100644
> --- a/security/commoncap.c
> +++ b/security/commoncap.c
> @@ -453,7 +453,7 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_c
>  	if (!file_caps_enabled)
>  		return 0;
>  
> -	if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
> +	if (!mnt_may_suid(bprm->file->f_path.mnt))
>  		return 0;
>  
>  	rc = get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps);
> diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
> index 912deee..1350167 100644
> --- a/security/selinux/hooks.c
> +++ b/security/selinux/hooks.c
> @@ -2234,7 +2234,7 @@ static int check_nnp_nosuid(const struct linux_binprm *bprm,
>  			    const struct task_security_struct *new_tsec)
>  {
>  	int nnp = (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS);
> -	int nosuid = (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID);
> +	int nosuid = !mnt_may_suid(bprm->file->f_path.mnt);
>  	int rc;
>  
>  	if (!nnp && !nosuid)
> -- 
> 2.5.5
>