On Mon, Oct 17, 2016 at 11:39:49AM -0500, Eric W. Biederman wrote: > > During exec dumpable is cleared if the file that is being executed is > not readable by the user executing the file. A bug in > ptrace_may_access allows reading the file if the executable happens to > enter into a subordinate user namespace (aka clone(CLONE_NEWUSER), > unshare(CLONE_NEWUSER), or setns(fd, CLONE_NEWUSER). > > This problem is fixed with only necessary userspace breakage by adding > a user namespace owner to mm_struct, captured at the time of exec, > so it is clear in which user namespace CAP_SYS_PTRACE must be present > in to be able to safely give read permission to the executable. > > The function ptrace_may_access is modified to verify that the ptracer > has CAP_SYS_ADMIN in task->mm->user_ns instead of task->cred->user_ns. > This ensures that if the task changes it's cred into a subordinate > user namespace it does not become ptraceable. This looks good! Basically applies the same rules that already apply to EUID/... changes to namespace changes, and anyone entering a user namespace can now safely drop UIDs and GIDs to namespace root. This integrates better in the existing security concept than my old patch "ptrace: being capable wrt a process requires mapped uids/gids", and it has less issues in cases where e.g. the extra privileges of an entering process are the filesystem root or so. FWIW, if you want, you can add "Reviewed-by: Jann Horn ". > Cc: stable@vger.kernel.org > Fixes: 8409cca70561 ("userns: allow ptrace from non-init user namespaces") > Signed-off-by: "Eric W. Biederman" > --- > > It turns out that dumpable needs to be fixed to be user namespace > aware to fix this issue. When this patch is ready I plan to place it in > my userns tree and send it to Linus, hopefully for -rc2. > > include/linux/mm_types.h | 1 + > kernel/fork.c | 9 ++++++--- > kernel/ptrace.c | 17 ++++++----------- > mm/init-mm.c | 2 ++ > 4 files changed, 15 insertions(+), 14 deletions(-) > > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h > index 4a8acedf4b7d..08d947fc4c59 100644 > --- a/include/linux/mm_types.h > +++ b/include/linux/mm_types.h > @@ -473,6 +473,7 @@ struct mm_struct { > */ > struct task_struct __rcu *owner; > #endif > + struct user_namespace *user_ns; > > /* store ref to file /proc//exe symlink points to */ > struct file __rcu *exe_file; > diff --git a/kernel/fork.c b/kernel/fork.c > index 623259fc794d..fd85c68c2791 100644 > --- a/kernel/fork.c > +++ b/kernel/fork.c > @@ -742,7 +742,8 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) > #endif > } > > -static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) > +static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, > + struct user_namespace *user_ns) > { > mm->mmap = NULL; > mm->mm_rb = RB_ROOT; > @@ -782,6 +783,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) > if (init_new_context(p, mm)) > goto fail_nocontext; > > + mm->user_ns = get_user_ns(user_ns); > return mm; > > fail_nocontext: > @@ -827,7 +829,7 @@ struct mm_struct *mm_alloc(void) > return NULL; > > memset(mm, 0, sizeof(*mm)); > - return mm_init(mm, current); > + return mm_init(mm, current, current_user_ns()); > } > > /* > @@ -842,6 +844,7 @@ void __mmdrop(struct mm_struct *mm) > destroy_context(mm); > mmu_notifier_mm_destroy(mm); > check_mm(mm); > + put_user_ns(mm->user_ns); > free_mm(mm); > } > EXPORT_SYMBOL_GPL(__mmdrop); > @@ -1123,7 +1126,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk) > > memcpy(mm, oldmm, sizeof(*mm)); > > - if (!mm_init(mm, tsk)) > + if (!mm_init(mm, tsk, mm->user_ns)) > goto fail_nomem; > > err = dup_mmap(mm, oldmm); > diff --git a/kernel/ptrace.c b/kernel/ptrace.c > index 2a99027312a6..f2d1b9afb3f8 100644 > --- a/kernel/ptrace.c > +++ b/kernel/ptrace.c > @@ -220,7 +220,7 @@ static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) > static int __ptrace_may_access(struct task_struct *task, unsigned int mode) > { > const struct cred *cred = current_cred(), *tcred; > - int dumpable = 0; > + struct mm_struct *mm; > kuid_t caller_uid; > kgid_t caller_gid; > > @@ -271,16 +271,11 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) > return -EPERM; > ok: > rcu_read_unlock(); > - smp_rmb(); > - if (task->mm) > - dumpable = get_dumpable(task->mm); > - rcu_read_lock(); > - if (dumpable != SUID_DUMP_USER && > - !ptrace_has_cap(__task_cred(task)->user_ns, mode)) { > - rcu_read_unlock(); > - return -EPERM; > - } > - rcu_read_unlock(); > + mm = task->mm; > + if (!mm || > + ((get_dumpable(mm) != SUID_DUMP_USER) && > + !ptrace_has_cap(mm->user_ns, mode))) > + return -EPERM; > > return security_ptrace_access_check(task, mode); > } > diff --git a/mm/init-mm.c b/mm/init-mm.c > index a56a851908d2..975e49f00f34 100644 > --- a/mm/init-mm.c > +++ b/mm/init-mm.c > @@ -6,6 +6,7 @@ > #include > > #include > +#include > #include > #include > > @@ -21,5 +22,6 @@ struct mm_struct init_mm = { > .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), > .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), > .mmlist = LIST_HEAD_INIT(init_mm.mmlist), > + .user_ns = &init_user_ns, > INIT_MM_CONTEXT(init_mm) > }; > -- > 2.8.3