LKML Archive on lore.kernel.org
 help / color / Atom feed
From: Bernd Edlinger <bernd.edlinger@hotmail.de>
To: "gregkh@linuxfoundation.org" <gregkh@linuxfoundation.org>,
	Kirill Tkhai <ktkhai@virtuozzo.com>,
	"Eric W. Biederman" <ebiederm@xmission.com>,
	Christian Brauner <christian.brauner@ubuntu.com>,
	Kees Cook <keescook@chromium.org>,
	"jannh@google.com" <jannh@google.com>,
	Jonathan Corbet <corbet@lwn.net>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	Andrew Morton <akpm@linux-foundation.org>,
	"adobriyan@gmail.com" <adobriyan@gmail.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Oleg Nesterov <oleg@redhat.com>,
	Frederic Weisbecker <frederic@kernel.org>,
	"avagin@gmail.com" <avagin@gmail.com>,
	Ingo Molnar <mingo@kernel.org>,
	"Peter Zijlstra (Intel)" <peterz@infradead.org>,
	"duyuyang@gmail.com" <duyuyang@gmail.com>,
	David Hildenbrand <david@redhat.com>,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	Anshuman Khandual <anshuman.khandual@arm.com>,
	David Howells <dhowells@redhat.com>,
	James Morris <jamorris@linux.microsoft.com>,
	Shakeel Butt <shakeelb@google.com>,
	Jason Gunthorpe <jgg@ziepe.ca>,
	"christian@kellner.me" <christian@kellner.me>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Aleksa Sarai <cyphar@cyphar.com>,
	"Dmitry V. Levin" <ldv@altlinux.org>,
	"linux-doc@vger.kernel.org" <linux-doc@vger.kernel.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"linux-fsdevel@vger.kernel.org" <linux-fsdevel@vger.kernel.org>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>,
	"stable@vger.kernel.org" <stable@vger.kernel.org>,
	"linux-api@vger.kernel.org" <linux-api@vger.kernel.org>
Subject: [PATCH v6 15/16] exec: Fix dead-lock in de_thread with ptrace_attach
Date: Sat, 21 Mar 2020 02:46:39 +0000
Message-ID: <b6537ae6-31b1-5c50-f32b-8b8332ace882@hotmail.de> (raw)
In-Reply-To: <077b63b7-6f5e-aa8e-bf96-a586b481cc46@hotmail.de>

This removes the last users of cred_guard_mutex
and replaces it with a new mutex exec_guard_mutex,
and a boolean unsafe_execve_in_progress.

This addresses the case when at least one of the
sibling threads is traced, and therefore the trace
process may dead-lock in ptrace_attach, but de_thread
will need to wait for the tracer to continue execution.

The solution is to detect this situation and make
ptrace_attach and similar functions return -EAGAIN,
but only in a situation where a dead-lock is imminent.

This means this is an API change, but only when the
process is traced while execve happens in a
multi-threaded application.

See tools/testing/selftests/ptrace/vmaccess.c
for a test case that gets fixed by this change.

Signed-off-by: Bernd Edlinger <bernd.edlinger@hotmail.de>
---
 fs/exec.c                    | 44 +++++++++++++++++++++++++++++++++++---------
 fs/proc/base.c               | 13 ++++++++-----
 include/linux/sched/signal.h | 14 +++++++++-----
 init/init_task.c             |  2 +-
 kernel/cred.c                |  2 +-
 kernel/fork.c                |  2 +-
 kernel/ptrace.c              | 20 +++++++++++++++++---
 kernel/seccomp.c             | 15 +++++++++------
 8 files changed, 81 insertions(+), 31 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 0e46ec5..2056562 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1078,14 +1078,26 @@ static int de_thread(struct task_struct *tsk)
 	struct signal_struct *sig = tsk->signal;
 	struct sighand_struct *oldsighand = tsk->sighand;
 	spinlock_t *lock = &oldsighand->siglock;
+	struct task_struct *t = tsk;
 
 	if (thread_group_empty(tsk))
 		goto no_thread_group;
 
+	spin_lock_irq(lock);
+	while_each_thread(tsk, t) {
+		if (unlikely(t->ptrace))
+			sig->unsafe_execve_in_progress = true;
+	}
+
+	if (unlikely(sig->unsafe_execve_in_progress)) {
+		spin_unlock_irq(lock);
+		mutex_unlock(&sig->exec_guard_mutex);
+		spin_lock_irq(lock);
+	}
+
 	/*
 	 * Kill all other threads in the thread group.
 	 */
-	spin_lock_irq(lock);
 	if (signal_group_exit(sig)) {
 		/*
 		 * Another group action in progress, just
@@ -1429,22 +1441,30 @@ void finalize_exec(struct linux_binprm *bprm)
 EXPORT_SYMBOL(finalize_exec);
 
 /*
- * Prepare credentials and lock ->cred_guard_mutex.
+ * Prepare credentials and lock ->exec_guard_mutex.
  * install_exec_creds() commits the new creds and drops the lock.
  * Or, if exec fails before, free_bprm() should release ->cred and
  * and unlock.
  */
 static int prepare_bprm_creds(struct linux_binprm *bprm)
 {
-	if (mutex_lock_interruptible(&current->signal->cred_guard_mutex))
+	int ret;
+
+	if (mutex_lock_interruptible(&current->signal->exec_guard_mutex))
 		return -ERESTARTNOINTR;
 
+	ret = -EAGAIN;
+	if (unlikely(current->signal->unsafe_execve_in_progress))
+		goto out;
+
 	bprm->cred = prepare_exec_creds();
 	if (likely(bprm->cred))
 		return 0;
 
-	mutex_unlock(&current->signal->cred_guard_mutex);
-	return -ENOMEM;
+	ret = -ENOMEM;
+out:
+	mutex_unlock(&current->signal->exec_guard_mutex);
+	return ret;
 }
 
 static void free_bprm(struct linux_binprm *bprm)
@@ -1453,7 +1473,10 @@ static void free_bprm(struct linux_binprm *bprm)
 	if (bprm->cred) {
 		if (bprm->called_exec_mmap)
 			mutex_unlock(&current->signal->exec_update_mutex);
-		mutex_unlock(&current->signal->cred_guard_mutex);
+		if (unlikely(current->signal->unsafe_execve_in_progress))
+			mutex_lock(&current->signal->exec_guard_mutex);
+		current->signal->unsafe_execve_in_progress = false;
+		mutex_unlock(&current->signal->exec_guard_mutex);
 		abort_creds(bprm->cred);
 	}
 	if (bprm->file) {
@@ -1497,19 +1520,22 @@ void install_exec_creds(struct linux_binprm *bprm)
 	if (get_dumpable(current->mm) != SUID_DUMP_USER)
 		perf_event_exit_task(current);
 	/*
-	 * cred_guard_mutex must be held at least to this point to prevent
+	 * exec_guard_mutex must be held at least to this point to prevent
 	 * ptrace_attach() from altering our determination of the task's
 	 * credentials; any time after this it may be unlocked.
 	 */
 	security_bprm_committed_creds(bprm);
 	mutex_unlock(&current->signal->exec_update_mutex);
-	mutex_unlock(&current->signal->cred_guard_mutex);
+	if (unlikely(current->signal->unsafe_execve_in_progress))
+		mutex_lock(&current->signal->exec_guard_mutex);
+	current->signal->unsafe_execve_in_progress = false;
+	mutex_unlock(&current->signal->exec_guard_mutex);
 }
 EXPORT_SYMBOL(install_exec_creds);
 
 /*
  * determine how safe it is to execute the proposed program
- * - the caller must hold ->cred_guard_mutex to protect against
+ * - the caller must hold ->exec_guard_mutex to protect against
  *   PTRACE_ATTACH or seccomp thread-sync
  */
 static void check_unsafe_exec(struct linux_binprm *bprm)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6b13fc4..a428536 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2680,14 +2680,17 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
 	}
 
 	/* Guard against adverse ptrace interaction */
-	rv = mutex_lock_interruptible(&current->signal->cred_guard_mutex);
+	rv = mutex_lock_interruptible(&current->signal->exec_guard_mutex);
 	if (rv < 0)
 		goto out_free;
 
-	rv = security_setprocattr(PROC_I(inode)->op.lsm,
-				  file->f_path.dentry->d_name.name, page,
-				  count);
-	mutex_unlock(&current->signal->cred_guard_mutex);
+	if (unlikely(current->signal->unsafe_execve_in_progress))
+		rv = -EAGAIN;
+	else
+		rv = security_setprocattr(PROC_I(inode)->op.lsm,
+					  file->f_path.dentry->d_name.name,
+					  page, count);
+	mutex_unlock(&current->signal->exec_guard_mutex);
 out_free:
 	kfree(page);
 out:
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index a29df79..e83cef2 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -212,6 +212,13 @@ struct signal_struct {
 #endif
 
 	/*
+	 * Set while execve is executing but is *not* holding
+	 * exec_guard_mutex to avoid possible dead-locks.
+	 * Only valid when exec_guard_mutex is held.
+	 */
+	bool unsafe_execve_in_progress;
+
+	/*
 	 * Thread is the potential origin of an oom condition; kill first on
 	 * oom
 	 */
@@ -222,11 +229,8 @@ struct signal_struct {
 	struct mm_struct *oom_mm;	/* recorded mm when the thread group got
 					 * killed by the oom killer */
 
-	struct mutex cred_guard_mutex;	/* guard against foreign influences on
-					 * credential calculations
-					 * (notably. ptrace)
-					 * Deprecated do not use in new code.
-					 * Use exec_update_mutex instead.
+	struct mutex exec_guard_mutex;	/* Held while execve runs, except when
+					 * a sibling thread is being traced.
 					 */
 	struct mutex exec_update_mutex;	/* Held while task_struct is being
 					 * updated during exec, and may have
diff --git a/init/init_task.c b/init/init_task.c
index bd403ed..6f96327 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -25,7 +25,7 @@
 	},
 	.multiprocess	= HLIST_HEAD_INIT,
 	.rlim		= INIT_RLIMITS,
-	.cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex),
+	.exec_guard_mutex = __MUTEX_INITIALIZER(init_signals.exec_guard_mutex),
 	.exec_update_mutex = __MUTEX_INITIALIZER(init_signals.exec_update_mutex),
 #ifdef CONFIG_POSIX_TIMERS
 	.posix_timers = LIST_HEAD_INIT(init_signals.posix_timers),
diff --git a/kernel/cred.c b/kernel/cred.c
index 71a7926..341ca59 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -295,7 +295,7 @@ struct cred *prepare_creds(void)
 
 /*
  * Prepare credentials for current to perform an execve()
- * - The caller must hold ->cred_guard_mutex
+ * - The caller must hold ->exec_guard_mutex
  */
 struct cred *prepare_exec_creds(void)
 {
diff --git a/kernel/fork.c b/kernel/fork.c
index e23ccac..98012f7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1593,7 +1593,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sig->oom_score_adj = current->signal->oom_score_adj;
 	sig->oom_score_adj_min = current->signal->oom_score_adj_min;
 
-	mutex_init(&sig->cred_guard_mutex);
+	mutex_init(&sig->exec_guard_mutex);
 	mutex_init(&sig->exec_update_mutex);
 
 	return 0;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 43d6179..221759e 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -392,9 +392,13 @@ static int ptrace_attach(struct task_struct *task, long request,
 	 * under ptrace.
 	 */
 	retval = -ERESTARTNOINTR;
-	if (mutex_lock_interruptible(&task->signal->cred_guard_mutex))
+	if (mutex_lock_interruptible(&task->signal->exec_guard_mutex))
 		goto out;
 
+	retval = -EAGAIN;
+	if (unlikely(task->signal->unsafe_execve_in_progress))
+		goto unlock_creds;
+
 	task_lock(task);
 	retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS);
 	task_unlock(task);
@@ -447,7 +451,7 @@ static int ptrace_attach(struct task_struct *task, long request,
 unlock_tasklist:
 	write_unlock_irq(&tasklist_lock);
 unlock_creds:
-	mutex_unlock(&task->signal->cred_guard_mutex);
+	mutex_unlock(&task->signal->exec_guard_mutex);
 out:
 	if (!retval) {
 		/*
@@ -472,10 +476,18 @@ static int ptrace_attach(struct task_struct *task, long request,
  */
 static int ptrace_traceme(void)
 {
-	int ret = -EPERM;
+	int ret;
+
+	if (mutex_lock_interruptible(&current->signal->exec_guard_mutex))
+		return -ERESTARTNOINTR;
+
+	ret = -EAGAIN;
+	if (unlikely(current->signal->unsafe_execve_in_progress))
+		goto unlock_creds;
 
 	write_lock_irq(&tasklist_lock);
 	/* Are we already being traced? */
+	ret = -EPERM;
 	if (!current->ptrace) {
 		ret = security_ptrace_traceme(current->parent);
 		/*
@@ -490,6 +502,8 @@ static int ptrace_traceme(void)
 	}
 	write_unlock_irq(&tasklist_lock);
 
+unlock_creds:
+	mutex_unlock(&current->signal->exec_guard_mutex);
 	return ret;
 }
 
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index b6ea3dc..acd6960 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -329,7 +329,7 @@ static int is_ancestor(struct seccomp_filter *parent,
 /**
  * seccomp_can_sync_threads: checks if all threads can be synchronized
  *
- * Expects sighand and cred_guard_mutex locks to be held.
+ * Expects sighand and exec_guard_mutex locks to be held.
  *
  * Returns 0 on success, -ve on error, or the pid of a thread which was
  * either not in the correct seccomp mode or did not have an ancestral
@@ -339,9 +339,12 @@ static inline pid_t seccomp_can_sync_threads(void)
 {
 	struct task_struct *thread, *caller;
 
-	BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
+	BUG_ON(!mutex_is_locked(&current->signal->exec_guard_mutex));
 	assert_spin_locked(&current->sighand->siglock);
 
+	if (unlikely(current->signal->unsafe_execve_in_progress))
+		return -EAGAIN;
+
 	/* Validate all threads being eligible for synchronization. */
 	caller = current;
 	for_each_thread(caller, thread) {
@@ -371,7 +374,7 @@ static inline pid_t seccomp_can_sync_threads(void)
 /**
  * seccomp_sync_threads: sets all threads to use current's filter
  *
- * Expects sighand and cred_guard_mutex locks to be held, and for
+ * Expects sighand and exec_guard_mutex locks to be held, and for
  * seccomp_can_sync_threads() to have returned success already
  * without dropping the locks.
  *
@@ -380,7 +383,7 @@ static inline void seccomp_sync_threads(unsigned long flags)
 {
 	struct task_struct *thread, *caller;
 
-	BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
+	BUG_ON(!mutex_is_locked(&current->signal->exec_guard_mutex));
 	assert_spin_locked(&current->sighand->siglock);
 
 	/* Synchronize all threads. */
@@ -1319,7 +1322,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
 	 * while another thread is in the middle of calling exec.
 	 */
 	if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
-	    mutex_lock_killable(&current->signal->cred_guard_mutex))
+	    mutex_lock_killable(&current->signal->exec_guard_mutex))
 		goto out_put_fd;
 
 	spin_lock_irq(&current->sighand->siglock);
@@ -1337,7 +1340,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
 out:
 	spin_unlock_irq(&current->sighand->siglock);
 	if (flags & SECCOMP_FILTER_FLAG_TSYNC)
-		mutex_unlock(&current->signal->cred_guard_mutex);
+		mutex_unlock(&current->signal->exec_guard_mutex);
 out_put_fd:
 	if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
 		if (ret) {
-- 
1.9.1

  parent reply index

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <077b63b7-6f5e-aa8e-bf96-a586b481cc46@hotmail.de>
2020-03-20 20:24 ` [PATCH v6 01/16] exec: Only compute current once in flush_old_exec Bernd Edlinger
2020-03-20 20:24 ` [PATCH v6 02/16] exec: Factor unshare_sighand out of de_thread and call it separately Bernd Edlinger
2020-03-20 20:25 ` [PATCH v6 03/16] exec: Move cleanup of posix timers on exec out of de_thread Bernd Edlinger
2020-03-20 20:25 ` [PATCH v6 04/16] exec: Move exec_mmap right after de_thread in flush_old_exec Bernd Edlinger
2020-03-20 20:25 ` [PATCH v6 05/16] exec: Add exec_update_mutex to replace cred_guard_mutex Bernd Edlinger
2020-03-23 10:51   ` Kirill Tkhai
2020-03-20 20:26 ` [PATCH v6 06/16] exec: Fix a deadlock in strace Bernd Edlinger
2020-03-20 20:26 ` [PATCH v6 07/16] selftests/ptrace: add test cases for dead-locks Bernd Edlinger
2020-03-20 20:26 ` [PATCH v6 08/16] mm: docs: Fix a comment in process_vm_rw_core Bernd Edlinger
2020-03-20 20:26 ` [PATCH v6 09/16] kernel: doc: remove outdated comment cred.c Bernd Edlinger
2020-03-20 20:27 ` [PATCH v6 10/16] kernel/kcmp.c: Use new infrastructure to fix deadlocks in execve Bernd Edlinger
2020-03-25 15:41   ` Christian Brauner
2020-03-20 20:27 ` [PATCH v6 11/16] proc: " Bernd Edlinger
2020-03-20 20:27 ` [PATCH v6 12/16] proc: io_accounting: " Bernd Edlinger
2020-03-20 20:27 ` [PATCH v6 13/16] perf: " Bernd Edlinger
2020-03-21  2:46 ` [PATCH v6 14/16] pidfd: " Bernd Edlinger
2020-03-25 15:40   ` Christian Brauner
2020-03-21  2:46 ` Bernd Edlinger [this message]
2020-03-25 14:27   ` [PATCH v6 15/16] exec: Fix dead-lock in de_thread with ptrace_attach Eric W. Biederman
2020-03-29  4:31     ` Bernd Edlinger
2020-03-29  6:36       ` Bernd Edlinger
2020-03-30 18:26         ` [PATCH v7 " Bernd Edlinger
2020-03-21  2:47 ` [PATCH v6 16/16] doc: Update documentation of ->exec_*_mutex Bernd Edlinger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b6537ae6-31b1-5c50-f32b-8b8332ace882@hotmail.de \
    --to=bernd.edlinger@hotmail.de \
    --cc=aarcange@redhat.com \
    --cc=adobriyan@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=anshuman.khandual@arm.com \
    --cc=avagin@gmail.com \
    --cc=bigeasy@linutronix.de \
    --cc=christian.brauner@ubuntu.com \
    --cc=christian@kellner.me \
    --cc=corbet@lwn.net \
    --cc=cyphar@cyphar.com \
    --cc=david@redhat.com \
    --cc=dhowells@redhat.com \
    --cc=duyuyang@gmail.com \
    --cc=ebiederm@xmission.com \
    --cc=frederic@kernel.org \
    --cc=gregkh@linuxfoundation.org \
    --cc=jamorris@linux.microsoft.com \
    --cc=jannh@google.com \
    --cc=jgg@ziepe.ca \
    --cc=keescook@chromium.org \
    --cc=ktkhai@virtuozzo.com \
    --cc=ldv@altlinux.org \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@kernel.org \
    --cc=oleg@redhat.com \
    --cc=peterz@infradead.org \
    --cc=shakeelb@google.com \
    --cc=stable@vger.kernel.org \
    --cc=tglx@linutronix.de \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git
	git clone --mirror https://lore.kernel.org/lkml/8 lkml/git/8.git
	git clone --mirror https://lore.kernel.org/lkml/9 lkml/git/9.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git