All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jann Horn <jannh@google.com>
To: Andrew Morton <akpm@linux-foundation.org>,
	linux-mm@kvack.org, Eric Biederman <ebiederm@xmission.com>,
	Oleg Nesterov <oleg@redhat.com>
Cc: linux-kernel@vger.kernel.org, Will Deacon <will@kernel.org>,
	Kees Cook <keescook@chromium.org>, Ingo Molnar <mingo@kernel.org>
Subject: [RFC PATCH resend 1/6] ptrace: Keep mm around after exit_mm() for __ptrace_may_access()
Date: Sat, 17 Oct 2020 01:09:10 +0200	[thread overview]
Message-ID: <20201016230915.1972840-2-jannh@google.com> (raw)
In-Reply-To: <20201016230915.1972840-1-jannh@google.com>

__ptrace_may_access() checks can happen on target tasks that are in the
middle of do_exit(), past exit_mm(). At that point, the ->mm pointer has
been NULLed out, and the mm_struct has been mmput().

Unfortunately, the mm_struct contains the dumpability and the user_ns in
which the task last went through execve(), and we need those for
__ptrace_may_access(). Currently, that problem is handled by failing open:
If the ->mm is gone, we assume that the task was dumpable. In some edge
cases, this could potentially expose access to things like
/proc/$pid/fd/$fd of originally non-dumpable processes.
(exit_files() comes after exit_mm(), so the file descriptor table is still
there when we've gone through exit_mm().)

One way to fix this would be to move mm->user_ns and the dumpability state
over into the task_struct. However, that gets quite ugly if we want to
preserve existing semantics because e.g. PR_SET_DUMPABLE and commit_creds()
would then have to scan through all tasks sharing the mm_struct and keep
them in sync manually - that'd be a bit error-prone and overcomplicated.

(Moving these things into the signal_struct is not an option because that
is kept across executions, and pre-execve co-threads will share the
signal_struct that is also used by the task that has gone through
execve().)

I believe that this patch may be the least bad option to fix this - keep
the mm_struct (but not process memory) around with an mmgrab() reference
from exit_mm() until the task goes away completely.

Note that this moves free_task() down in order to make mmdrop_async()
available without a forward declaration.

Cc: stable@vger.kernel.org
Fixes: bfedb589252c ("mm: Add a user_ns owner to mm_struct and fix ptrace permission checks")
Signed-off-by: Jann Horn <jannh@google.com>
---
 include/linux/sched.h |  8 +++++++
 kernel/exit.c         |  2 ++
 kernel/fork.c         | 54 ++++++++++++++++++++++---------------------
 kernel/ptrace.c       | 10 ++++++++
 4 files changed, 48 insertions(+), 26 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index afe01e232935..55bec6ff5626 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -747,6 +747,14 @@ struct task_struct {
 
 	struct mm_struct		*mm;
 	struct mm_struct		*active_mm;
+	/*
+	 * When we exit and ->mm (the reference pinning ->mm's address space)
+	 * goes away, we stash a reference to the mm_struct itself (counted via
+	 * exit_mm->mm_count) in this member.
+	 * This allows us to continue using the mm_struct for security checks
+	 * and such even after the task has started exiting.
+	 */
+	struct mm_struct		*exit_mm;
 
 	/* Per-thread vma caching: */
 	struct vmacache			vmacache;
diff --git a/kernel/exit.c b/kernel/exit.c
index 733e80f334e7..97253ef33486 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -476,6 +476,8 @@ static void exit_mm(void)
 	/* more a memory barrier than a real lock */
 	task_lock(current);
 	current->mm = NULL;
+	mmgrab(mm); /* for current->exit_mm */
+	current->exit_mm = mm;
 	mmap_read_unlock(mm);
 	enter_lazy_tlb(mm, current);
 	task_unlock(current);
diff --git a/kernel/fork.c b/kernel/fork.c
index da8d360fb032..4942428a217c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -438,32 +438,6 @@ void put_task_stack(struct task_struct *tsk)
 }
 #endif
 
-void free_task(struct task_struct *tsk)
-{
-	scs_release(tsk);
-
-#ifndef CONFIG_THREAD_INFO_IN_TASK
-	/*
-	 * The task is finally done with both the stack and thread_info,
-	 * so free both.
-	 */
-	release_task_stack(tsk);
-#else
-	/*
-	 * If the task had a separate stack allocation, it should be gone
-	 * by now.
-	 */
-	WARN_ON_ONCE(refcount_read(&tsk->stack_refcount) != 0);
-#endif
-	rt_mutex_debug_task_free(tsk);
-	ftrace_graph_exit_task(tsk);
-	arch_release_task_struct(tsk);
-	if (tsk->flags & PF_KTHREAD)
-		free_kthread_struct(tsk);
-	free_task_struct(tsk);
-}
-EXPORT_SYMBOL(free_task);
-
 #ifdef CONFIG_MMU
 static __latent_entropy int dup_mmap(struct mm_struct *mm,
 					struct mm_struct *oldmm)
@@ -722,6 +696,34 @@ static inline void put_signal_struct(struct signal_struct *sig)
 		free_signal_struct(sig);
 }
 
+void free_task(struct task_struct *tsk)
+{
+	scs_release(tsk);
+
+#ifndef CONFIG_THREAD_INFO_IN_TASK
+	/*
+	 * The task is finally done with both the stack and thread_info,
+	 * so free both.
+	 */
+	release_task_stack(tsk);
+#else
+	/*
+	 * If the task had a separate stack allocation, it should be gone
+	 * by now.
+	 */
+	WARN_ON_ONCE(refcount_read(&tsk->stack_refcount) != 0);
+#endif
+	rt_mutex_debug_task_free(tsk);
+	ftrace_graph_exit_task(tsk);
+	arch_release_task_struct(tsk);
+	if (tsk->flags & PF_KTHREAD)
+		free_kthread_struct(tsk);
+	if (tsk->exit_mm)
+		mmdrop_async(tsk->exit_mm);
+	free_task_struct(tsk);
+}
+EXPORT_SYMBOL(free_task);
+
 void __put_task_struct(struct task_struct *tsk)
 {
 	WARN_ON(!tsk->exit_state);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 43d6179508d6..0aedc6cf5bdc 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -342,7 +342,17 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
 	 * Pairs with a write barrier in commit_creds().
 	 */
 	smp_rmb();
+	/*
+	 * Look up the target task's mm_struct. If it fails because the task is
+	 * exiting and has gone through exit_mm(), we can instead use ->exit_mm
+	 * as long as we only use members that are preserved by an mmgrab()
+	 * reference.
+	 * The only case in which both ->mm and ->exit_mm can be NULL should be
+	 * kernel threads.
+	 */
 	mm = task->mm;
+	if (!mm)
+		mm = task->exit_mm;
 	if (mm &&
 	    ((get_dumpable(mm) != SUID_DUMP_USER) &&
 	     !ptrace_has_cap(cred, mm->user_ns, mode)))
-- 
2.29.0.rc1.297.gfa9743e501-goog


  reply	other threads:[~2020-10-16 23:09 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-16 23:09 [RFC PATCH resend 0/6] mm and ptrace: Track dumpability until task is freed Jann Horn
2020-10-16 23:09 ` Jann Horn [this message]
2020-10-16 23:09 ` [RFC PATCH resend 2/6] refcount: Move refcount_t definition into linux/types.h Jann Horn
2020-10-16 23:09 ` [RFC PATCH resend 3/6] mm: Add refcount for preserving mm_struct without pgd Jann Horn
2020-10-16 23:21   ` Jason Gunthorpe
2020-10-17  0:30     ` Jann Horn
2020-10-17  0:30       ` Jann Horn
2020-11-03  2:11       ` Jann Horn
2020-11-03  2:11         ` Jann Horn
2020-11-03  3:19         ` Jann Horn
2020-11-03  3:19           ` Jann Horn
2020-11-03 13:21           ` Jason Gunthorpe
2020-11-03 19:18             ` John Hubbard
2020-10-16 23:09 ` [RFC PATCH resend 4/6] mm, oom: Use mm_ref()/mm_unref() and avoid mmdrop_async() Jann Horn
2020-10-16 23:09 ` [RFC PATCH resend 5/6] ptrace: Use mm_ref() for ->exit_mm Jann Horn
2020-10-16 23:09 ` [RFC PATCH resend 6/6] mm: remove now-unused mmdrop_async() Jann Horn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201016230915.1972840-2-jannh@google.com \
    --to=jannh@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=ebiederm@xmission.com \
    --cc=keescook@chromium.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@kernel.org \
    --cc=oleg@redhat.com \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.