linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Suren Baghdasaryan <surenb@google.com>
To: akpm@linux-foundation.org
Cc: mhocko@suse.com, rientjes@google.com, willy@infradead.org,
	yuzhoujian@didichuxing.com, jrdr.linux@gmail.com, guro@fb.com,
	hannes@cmpxchg.org, penguin-kernel@I-love.SAKURA.ne.jp,
	ebiederm@xmission.com, shakeelb@google.com, christian@brauner.io,
	minchan@kernel.org, timmurray@google.com, dancol@google.com,
	joel@joelfernandes.org, jannh@google.com, surenb@google.com,
	linux-mm@kvack.org, lsf-pc@lists.linux-foundation.org,
	linux-kernel@vger.kernel.org, kernel-team@android.com
Subject: [RFC 2/2] signal: extend pidfd_send_signal() to allow expedited process killing
Date: Wed, 10 Apr 2019 18:43:53 -0700	[thread overview]
Message-ID: <20190411014353.113252-3-surenb@google.com> (raw)
In-Reply-To: <20190411014353.113252-1-surenb@google.com>

Add new SS_EXPEDITE flag to be used when sending SIGKILL via
pidfd_send_signal() syscall to allow expedited memory reclaim of the
victim process. The usage of this flag is currently limited to SIGKILL
signal and only to privileged users.

Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
 include/linux/sched/signal.h |  3 ++-
 include/linux/signal.h       | 11 ++++++++++-
 ipc/mqueue.c                 |  2 +-
 kernel/signal.c              | 37 ++++++++++++++++++++++++++++--------
 kernel/time/itimer.c         |  2 +-
 5 files changed, 43 insertions(+), 12 deletions(-)

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index e412c092c1e8..8a227633a058 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -327,7 +327,8 @@ extern int send_sig_info(int, struct kernel_siginfo *, struct task_struct *);
 extern void force_sigsegv(int sig, struct task_struct *p);
 extern int force_sig_info(int, struct kernel_siginfo *, struct task_struct *);
 extern int __kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pgrp);
-extern int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid);
+extern int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid,
+				bool expedite);
 extern int kill_pid_info_as_cred(int, struct kernel_siginfo *, struct pid *,
 				const struct cred *);
 extern int kill_pgrp(struct pid *pid, int sig, int priv);
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 9702016734b1..34b7852aa4a0 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -446,8 +446,17 @@ int __save_altstack(stack_t __user *, unsigned long);
 } while (0);
 
 #ifdef CONFIG_PROC_FS
+
+/*
+ * SS_FLAGS values used in pidfd_send_signal:
+ *
+ * SS_EXPEDITE indicates desire to expedite the operation.
+ */
+#define SS_EXPEDITE	0x00000001
+
 struct seq_file;
 extern void render_sigset_t(struct seq_file *, const char *, sigset_t *);
-#endif
+
+#endif /* CONFIG_PROC_FS */
 
 #endif /* _LINUX_SIGNAL_H */
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index aea30530c472..27c66296e08e 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -720,7 +720,7 @@ static void __do_notify(struct mqueue_inode_info *info)
 			rcu_read_unlock();
 
 			kill_pid_info(info->notify.sigev_signo,
-				      &sig_i, info->notify_owner);
+				      &sig_i, info->notify_owner, false);
 			break;
 		case SIGEV_THREAD:
 			set_cookie(info->notify_cookie, NOTIFY_WOKENUP);
diff --git a/kernel/signal.c b/kernel/signal.c
index f98448cf2def..02ed4332d17c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -43,6 +43,7 @@
 #include <linux/compiler.h>
 #include <linux/posix-timers.h>
 #include <linux/livepatch.h>
+#include <linux/oom.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/signal.h>
@@ -1394,7 +1395,8 @@ int __kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pgrp)
 	return success ? 0 : retval;
 }
 
-int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid)
+int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid,
+				  bool expedite)
 {
 	int error = -ESRCH;
 	struct task_struct *p;
@@ -1402,8 +1404,17 @@ int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid)
 	for (;;) {
 		rcu_read_lock();
 		p = pid_task(pid, PIDTYPE_PID);
-		if (p)
+		if (p) {
 			error = group_send_sig_info(sig, info, p, PIDTYPE_TGID);
+
+			/*
+			 * Ignore expedite_reclaim return value, it is best
+			 * effort only.
+			 */
+			if (!error && expedite)
+				expedite_reclaim(p);
+		}
+
 		rcu_read_unlock();
 		if (likely(!p || error != -ESRCH))
 			return error;
@@ -1420,7 +1431,7 @@ static int kill_proc_info(int sig, struct kernel_siginfo *info, pid_t pid)
 {
 	int error;
 	rcu_read_lock();
-	error = kill_pid_info(sig, info, find_vpid(pid));
+	error = kill_pid_info(sig, info, find_vpid(pid), false);
 	rcu_read_unlock();
 	return error;
 }
@@ -1487,7 +1498,7 @@ static int kill_something_info(int sig, struct kernel_siginfo *info, pid_t pid)
 
 	if (pid > 0) {
 		rcu_read_lock();
-		ret = kill_pid_info(sig, info, find_vpid(pid));
+		ret = kill_pid_info(sig, info, find_vpid(pid), false);
 		rcu_read_unlock();
 		return ret;
 	}
@@ -1704,7 +1715,7 @@ EXPORT_SYMBOL(kill_pgrp);
 
 int kill_pid(struct pid *pid, int sig, int priv)
 {
-	return kill_pid_info(sig, __si_special(priv), pid);
+	return kill_pid_info(sig, __si_special(priv), pid, false);
 }
 EXPORT_SYMBOL(kill_pid);
 
@@ -3577,10 +3588,20 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
 	struct pid *pid;
 	kernel_siginfo_t kinfo;
 
-	/* Enforce flags be set to 0 until we add an extension. */
-	if (flags)
+	/* Enforce no unknown flags. */
+	if (flags & ~SS_EXPEDITE)
 		return -EINVAL;
 
+	if (flags & SS_EXPEDITE) {
+		/* Enforce SS_EXPEDITE to be used with SIGKILL only. */
+		if (sig != SIGKILL)
+			return -EINVAL;
+
+		/* Limit expedited killing to privileged users only. */
+		if (!capable(CAP_SYS_NICE))
+			return -EPERM;
+	}
+
 	f = fdget_raw(pidfd);
 	if (!f.file)
 		return -EBADF;
@@ -3614,7 +3635,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
 		prepare_kill_siginfo(sig, &kinfo);
 	}
 
-	ret = kill_pid_info(sig, &kinfo, pid);
+	ret = kill_pid_info(sig, &kinfo, pid, (flags & SS_EXPEDITE) != 0);
 
 err:
 	fdput(f);
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index 02068b2d5862..c926483cdb53 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -140,7 +140,7 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer)
 	struct pid *leader_pid = sig->pids[PIDTYPE_TGID];
 
 	trace_itimer_expire(ITIMER_REAL, leader_pid, 0);
-	kill_pid_info(SIGALRM, SEND_SIG_PRIV, leader_pid);
+	kill_pid_info(SIGALRM, SEND_SIG_PRIV, leader_pid, false);
 
 	return HRTIMER_NORESTART;
 }
-- 
2.21.0.392.gf8f6787159e-goog


  parent reply	other threads:[~2019-04-11  1:44 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-11  1:43 [RFC 0/2] opportunistic memory reclaim of a killed process Suren Baghdasaryan
2019-04-11  1:43 ` [RFC 1/2] mm: oom: expose expedite_reclaim to use oom_reaper outside of oom_kill.c Suren Baghdasaryan
2019-04-25 21:12   ` Tetsuo Handa
2019-04-25 21:56     ` Suren Baghdasaryan
2019-04-11  1:43 ` Suren Baghdasaryan [this message]
2019-04-11 10:30   ` [RFC 2/2] signal: extend pidfd_send_signal() to allow expedited process killing Christian Brauner
2019-04-11 10:34     ` Christian Brauner
2019-04-11 15:18     ` Suren Baghdasaryan
2019-04-11 15:23       ` Suren Baghdasaryan
2019-04-11 16:25         ` Daniel Colascione
2019-04-11 15:33   ` Matthew Wilcox
2019-04-11 17:05     ` Johannes Weiner
2019-04-11 17:09     ` Suren Baghdasaryan
2019-04-11 17:33       ` Daniel Colascione
2019-04-11 17:36         ` Matthew Wilcox
2019-04-11 17:47           ` Daniel Colascione
2019-04-12  6:49             ` Michal Hocko
2019-04-12 14:15               ` Suren Baghdasaryan
2019-04-12 14:20                 ` Daniel Colascione
2019-04-12 21:03             ` Matthew Wilcox
2019-04-11 17:52           ` Suren Baghdasaryan
2019-04-11 21:45       ` Roman Gushchin
2019-04-11 21:59         ` Suren Baghdasaryan
2019-04-12  6:53     ` Michal Hocko
2019-04-12 14:10       ` Suren Baghdasaryan
2019-04-12 14:14       ` Daniel Colascione
2019-04-12 15:30         ` Daniel Colascione
2019-04-25 16:09         ` Suren Baghdasaryan
2019-04-11 10:51 ` [RFC 0/2] opportunistic memory reclaim of a killed process Michal Hocko
2019-04-11 16:18   ` Joel Fernandes
2019-04-11 18:12     ` Michal Hocko
2019-04-11 19:14       ` Joel Fernandes
2019-04-11 20:11         ` Michal Hocko
2019-04-11 21:11           ` Joel Fernandes
2019-04-11 16:20   ` Sandeep Patil
2019-04-11 16:47   ` Suren Baghdasaryan
2019-04-11 18:19     ` Michal Hocko
2019-04-11 19:56       ` Suren Baghdasaryan
2019-04-11 20:17         ` Michal Hocko
2019-04-11 17:19   ` Johannes Weiner
2019-04-11 11:51 ` [Lsf-pc] " Rik van Riel
2019-04-11 12:16   ` Michal Hocko
2019-04-11 16:54     ` Suren Baghdasaryan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190411014353.113252-3-surenb@google.com \
    --to=surenb@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=christian@brauner.io \
    --cc=dancol@google.com \
    --cc=ebiederm@xmission.com \
    --cc=guro@fb.com \
    --cc=hannes@cmpxchg.org \
    --cc=jannh@google.com \
    --cc=joel@joelfernandes.org \
    --cc=jrdr.linux@gmail.com \
    --cc=kernel-team@android.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lsf-pc@lists.linux-foundation.org \
    --cc=mhocko@suse.com \
    --cc=minchan@kernel.org \
    --cc=penguin-kernel@I-love.SAKURA.ne.jp \
    --cc=rientjes@google.com \
    --cc=shakeelb@google.com \
    --cc=timmurray@google.com \
    --cc=willy@infradead.org \
    --cc=yuzhoujian@didichuxing.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).