linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: ebiederm@xmission.com (Eric W. Biederman)
To: <linux-kernel@vger.kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
	Oleg Nesterov <oleg@redhat.com>,
	Al Viro <viro@ZenIV.linux.org.uk>, <linux-api@vger.kernel.org>,
	Kees Cook <keescook@chromium.org>
Subject: [PATCH 5/6] coredump:  Don't perform any cleanups before dumping core
Date: Thu, 23 Sep 2021 19:11:39 -0500	[thread overview]
Message-ID: <874kaax26c.fsf@disp2133> (raw)
In-Reply-To: <87v92qx2c6.fsf@disp2133> (Eric W. Biederman's message of "Thu, 23 Sep 2021 19:08:09 -0500")


Rename coredump_exit_mm to coredump_task_exit and call it from do_exit
before PTRACE_EVENT_EXIT, and before any cleanup work for a task
happens.  This ensures that an accurate copy of the process can be
captured in the coredump as no cleanup for the process happens before
the coredump completes.  This also ensures that PTRACE_EVENT_EXIT
will not be visited by any thread until the coredump is complete.

Add a new flag PF_POSTCOREDUMP so that tasks that have passed through
coredump_task_exit can be recognized and ignored in zap_process.

Now that all of the coredumping happens before exit_mm remove code to
test for a coredump in progress from mm_release.

Replace "may_ptrace_stop()" with a simple test of "current->ptrace".
The other tests in may_ptrace_stop all concern avoiding stopping
during a coredump.  These tests are no longer necessary as it is now
guaranteed that fatal_signal_pending will be set if the code enters
ptrace_stop during a coredump.  The code in ptrace_stop is guaranteed
not to stop if fatal_signal_pending returns true.

Until this change "ptrace_event(PTRACE_EVENT_EXIT)" could call
ptrace_stop without fatal_signal_pending being true, as signals are
dequeued in get_signal before calling do_exit.  This is no longer
an issue as "ptrace_event(PTRACE_EVENT_EXIT)" is no longer reached
until after the coredump completes.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/coredump.c         |  8 ++++----
 include/linux/sched.h |  1 +
 kernel/exit.c         | 19 ++++++++++++-------
 kernel/fork.c         |  3 +--
 kernel/signal.c       | 27 +--------------------------
 mm/oom_kill.c         |  2 +-
 6 files changed, 20 insertions(+), 40 deletions(-)

diff --git a/fs/coredump.c b/fs/coredump.c
index 5e0e08a7fb9b..d576287fb88b 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -359,7 +359,7 @@ static int zap_process(struct task_struct *start, int exit_code, int flags)
 
 	for_each_thread(start, t) {
 		task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
-		if (t != current && t->mm) {
+		if (t != current && !(t->flags & PF_POSTCOREDUMP)) {
 			sigaddset(&t->pending.signal, SIGKILL);
 			signal_wake_up(t, 1);
 			nr++;
@@ -404,8 +404,8 @@ static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
 	 *
 	 * do_exit:
 	 *	The caller holds mm->mmap_lock. This means that the task which
-	 *	uses this mm can't pass coredump_exit_mm(), so it can't exit or
-	 *	clear its ->mm.
+	 *	uses this mm can't pass coredump_task_exit(), so it can't exit
+	 *	or clear its ->mm.
 	 *
 	 * de_thread:
 	 *	It does list_replace_rcu(&leader->tasks, &current->tasks),
@@ -500,7 +500,7 @@ static void coredump_finish(struct mm_struct *mm, bool core_dumped)
 		next = curr->next;
 		task = curr->task;
 		/*
-		 * see coredump_exit_mm(), curr->task must not see
+		 * see coredump_task_exit(), curr->task must not see
 		 * ->task == NULL before we read ->next.
 		 */
 		smp_mb();
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e12b524426b0..f3741f23935e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1664,6 +1664,7 @@ extern struct pid *cad_pid;
 #define PF_VCPU			0x00000001	/* I'm a virtual CPU */
 #define PF_IDLE			0x00000002	/* I am an IDLE thread */
 #define PF_EXITING		0x00000004	/* Getting shut down */
+#define PF_POSTCOREDUMP		0x00000008	/* Coredumps should ignore this task */
 #define PF_IO_WORKER		0x00000010	/* Task is an IO worker */
 #define PF_WQ_WORKER		0x00000020	/* I'm a workqueue worker */
 #define PF_FORKNOEXEC		0x00000040	/* Forked but didn't exec */
diff --git a/kernel/exit.c b/kernel/exit.c
index cb1619d8fd64..774e6b5061b8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -339,23 +339,29 @@ kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
 	}
 }
 
-static void coredump_exit_mm(struct mm_struct *mm)
+static void coredump_task_exit(struct task_struct *tsk)
 {
 	struct core_state *core_state;
+	struct mm_struct *mm;
+
+	mm = tsk->mm;
+	if (!mm)
+		return;
 
 	/*
 	 * Serialize with any possible pending coredump.
 	 * We must hold mmap_lock around checking core_state
-	 * and clearing tsk->mm.  The core-inducing thread
+	 * and setting PF_POSTCOREDUMP.  The core-inducing thread
 	 * will increment ->nr_threads for each thread in the
-	 * group with ->mm != NULL.
+	 * group without PF_POSTCOREDUMP set.
 	 */
+	mmap_read_lock(mm);
+	tsk->flags |= PF_POSTCOREDUMP;
 	core_state = mm->core_state;
+	mmap_read_unlock(mm);
 	if (core_state) {
 		struct core_thread self;
 
-		mmap_read_unlock(mm);
-
 		self.task = current;
 		if (self.task->flags & PF_SIGNALED)
 			self.next = xchg(&core_state->dumper.next, &self);
@@ -375,7 +381,6 @@ static void coredump_exit_mm(struct mm_struct *mm)
 			freezable_schedule();
 		}
 		__set_current_state(TASK_RUNNING);
-		mmap_read_lock(mm);
 	}
 }
 
@@ -480,7 +485,6 @@ static void exit_mm(void)
 		return;
 	sync_mm_rss(mm);
 	mmap_read_lock(mm);
-	coredump_exit_mm(mm);
 	mmgrab(mm);
 	BUG_ON(mm != current->active_mm);
 	/* more a memory barrier than a real lock */
@@ -768,6 +772,7 @@ void __noreturn do_exit(long code)
 	profile_task_exit(tsk);
 	kcov_task_exit(tsk);
 
+	coredump_task_exit(tsk);
 	ptrace_event(PTRACE_EVENT_EXIT, code);
 
 	validate_creds_for_do_exit(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index 38681ad44c76..9bd9f2da9e41 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1392,8 +1392,7 @@ static void mm_release(struct task_struct *tsk, struct mm_struct *mm)
 	 * purposes.
 	 */
 	if (tsk->clear_child_tid) {
-		if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) &&
-		    atomic_read(&mm->mm_users) > 1) {
+		if (atomic_read(&mm->mm_users) > 1) {
 			/*
 			 * We don't check the error code - if userspace has
 			 * not set up a proper pointer then tough luck.
diff --git a/kernel/signal.c b/kernel/signal.c
index c9759ff2cb43..b0db80acc6ef 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2158,31 +2158,6 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
 	spin_unlock_irqrestore(&sighand->siglock, flags);
 }
 
-static inline bool may_ptrace_stop(void)
-{
-	if (!likely(current->ptrace))
-		return false;
-	/*
-	 * Are we in the middle of do_coredump?
-	 * If so and our tracer is also part of the coredump stopping
-	 * is a deadlock situation, and pointless because our tracer
-	 * is dead so don't allow us to stop.
-	 * If SIGKILL was already sent before the caller unlocked
-	 * ->siglock we must see ->core_state != NULL. Otherwise it
-	 * is safe to enter schedule().
-	 *
-	 * This is almost outdated, a task with the pending SIGKILL can't
-	 * block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported
-	 * after SIGKILL was already dequeued.
-	 */
-	if (unlikely(current->mm->core_state) &&
-	    unlikely(current->mm == current->parent->mm))
-		return false;
-
-	return true;
-}
-
-
 /*
  * This must be called with current->sighand->siglock held.
  *
@@ -2263,7 +2238,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
 
 	spin_unlock_irq(&current->sighand->siglock);
 	read_lock(&tasklist_lock);
-	if (may_ptrace_stop()) {
+	if (likely(current->ptrace)) {
 		/*
 		 * Notify parents of the stop.
 		 *
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 295c8bdfd6c8..7877c755ab37 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -788,7 +788,7 @@ static inline bool __task_will_free_mem(struct task_struct *task)
 
 	/*
 	 * A coredumping process may sleep for an extended period in
-	 * coredump_exit_mm(), so the oom killer cannot assume that
+	 * coredump_task_exit(), so the oom killer cannot assume that
 	 * the process will promptly exit and release memory.
 	 */
 	if (sig->flags & SIGNAL_GROUP_COREDUMP)
-- 
2.20.1


  parent reply	other threads:[~2021-09-24  0:11 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-24  0:08 [PATCH 0/6] per signal_struct coredumps Eric W. Biederman
2021-09-24  0:09 ` [PATCH 1/6] signal: Remove the bogus sigkill_pending in ptrace_stop Eric W. Biederman
2021-09-24 15:22   ` Kees Cook
2021-09-24 15:48     ` Eric W. Biederman
2021-09-24 19:06       ` Kees Cook
2021-09-24  0:10 ` [PATCH 2/6] ptrace: Remove the unnecessary arguments from arch_ptrace_stop Eric W. Biederman
2021-09-24 15:26   ` Kees Cook
2021-09-24  0:10 ` [PATCH 3/6] exec: Check for a pending fatal signal instead of core_state Eric W. Biederman
2021-09-24 15:38   ` Kees Cook
2021-09-24  0:11 ` [PATCH 4/6] exit: Factor coredump_exit_mm out of exit_mm Eric W. Biederman
2021-09-24 18:28   ` Kees Cook
2021-09-24  0:11 ` Eric W. Biederman [this message]
2021-09-24 18:51   ` [PATCH 5/6] coredump: Don't perform any cleanups before dumping core Kees Cook
2021-09-24 21:28     ` Eric W. Biederman
2021-09-24 21:41       ` Kees Cook
2021-09-24  0:12 ` [PATCH 6/6] coredump: Limit coredumps to a single thread group Eric W. Biederman
2021-09-24 18:56   ` Kees Cook
2021-10-06 17:03     ` Eric W. Biederman
2021-11-19 16:03   ` Kyle Huey
2021-11-19 17:38     ` Eric W. Biederman
2021-09-24  5:59 ` [PATCH 0/6] per signal_struct coredumps Kees Cook
2021-09-24 14:00   ` Eric W. Biederman
2021-09-24 15:22 ` [PATCH 2/6] ptrace: Remove the unnecessary arguments from arch_ptrace_stop Eric W. Biederman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=874kaax26c.fsf@disp2133 \
    --to=ebiederm@xmission.com \
    --cc=keescook@chromium.org \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=oleg@redhat.com \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@ZenIV.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).