linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Michal Hocko <mhocko@suse.com>
To: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Suren Baghdasaryan <surenb@google.com>,
	christian.brauner@ubuntu.com, mingo@kernel.org,
	peterz@infradead.org, tglx@linutronix.de, esyr@redhat.com,
	christian@kellner.me, areber@redhat.com, shakeelb@google.com,
	cyphar@cyphar.com, oleg@redhat.com, adobriyan@gmail.com,
	akpm@linux-foundation.org, gladkov.alexey@gmail.com,
	walken@google.com, daniel.m.jordan@oracle.com, avagin@gmail.com,
	bernd.edlinger@hotmail.de, john.johansen@canonical.com,
	laoar.shao@gmail.com, timmurray@google.com, minchan@kernel.org,
	kernel-team@android.com, linux-kernel@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, linux-mm@kvack.org
Subject: Re: [PATCH 1/1] mm, oom_adj: don't loop through tasks in __set_oom_adj when not necessary
Date: Thu, 20 Aug 2020 16:12:22 +0200	[thread overview]
Message-ID: <20200820141222.GL5033@dhcp22.suse.cz> (raw)
In-Reply-To: <874koxxwn5.fsf@x220.int.ebiederm.org>

On Thu 20-08-20 08:41:18, Eric W. Biederman wrote:
[...]
> I expect something like this completley untested patch will work.

Neat. I think you will need to duplicate oom_score_adj_min as well.
I am not familiar with vfork specifics to review this in depth but if
this is correct then it is much more effective than the existing
implementation which has focused more on not spreading the oom specifics
to the core code. So if the state duplication is deemed ok then no
objections from me.

Thanks!

> 
> Eric
> 
> 
>  fs/exec.c                    |    4 ++++
>  fs/proc/base.c               |   30 ++++++------------------------
>  include/linux/mm_types.h     |    4 ++++
>  include/linux/sched/signal.h |    4 +---
>  kernel/fork.c                |    3 +--
>  mm/oom_kill.c                |   12 ++++++------
>  6 files changed, 22 insertions(+), 35 deletions(-)
> 
> diff --git a/fs/exec.c b/fs/exec.c
> index 9b723d2560d1..e7eed5212c6c 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1139,6 +1139,10 @@ static int exec_mmap(struct mm_struct *mm)
>  	vmacache_flush(tsk);
>  	task_unlock(tsk);
>  	if (old_mm) {
> +		mm->oom_score_adj = old_mm->oom_score_adj;
> +		mm->oom_score_adj_min = old_mm->oom_score_adj_min;
> +		if (tsk->vfork_done)
> +			mm->oom_score_adj = tsk->vfork_oom_score_adj;
>  		mmap_read_unlock(old_mm);
>  		BUG_ON(active_mm != old_mm);
>  		setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm);
> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index 617db4e0faa0..795fa0a8db52 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -1103,33 +1103,15 @@ static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
>  		}
>  	}
>  
> -	task->signal->oom_score_adj = oom_adj;
> -	if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
> -		task->signal->oom_score_adj_min = (short)oom_adj;
> -	trace_oom_score_adj_update(task);
> -
>  	if (mm) {
>  		struct task_struct *p;
>  
> -		rcu_read_lock();
> -		for_each_process(p) {
> -			if (same_thread_group(task, p))
> -				continue;
> -
> -			/* do not touch kernel threads or the global init */
> -			if (p->flags & PF_KTHREAD || is_global_init(p))
> -				continue;
> -
> -			task_lock(p);
> -			if (!p->vfork_done && process_shares_mm(p, mm)) {
> -				p->signal->oom_score_adj = oom_adj;
> -				if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
> -					p->signal->oom_score_adj_min = (short)oom_adj;
> -			}
> -			task_unlock(p);
> -		}
> -		rcu_read_unlock();
> -		mmdrop(mm);
> +		mm->oom_score_adj = oom_adj;
> +		if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
> +			mm->oom_score_adj_min = (short)oom_adj;
> +		trace_oom_score_adj_update(task);
> +	} else {
> +		task->signal->vfork_oom_score_adj = oom_adj;
>  	}
>  err_unlock:
>  	mutex_unlock(&oom_adj_mutex);
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 496c3ff97cce..b865048ab25a 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -542,6 +542,10 @@ struct mm_struct {
>  		atomic_long_t hugetlb_usage;
>  #endif
>  		struct work_struct async_put_work;
> +
> +		short oom_score_adj;		/* OOM kill score adjustment */
> +		short oom_score_adj_min;	/* OOM kill score adjustment min value.
> +					 * Only settable by CAP_SYS_RESOURCE. */
>  	} __randomize_layout;
>  
>  	/*
> diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
> index 1bad18a1d8ba..a69eb9e0d247 100644
> --- a/include/linux/sched/signal.h
> +++ b/include/linux/sched/signal.h
> @@ -218,9 +218,7 @@ struct signal_struct {
>  	 * oom
>  	 */
>  	bool oom_flag_origin;
> -	short oom_score_adj;		/* OOM kill score adjustment */
> -	short oom_score_adj_min;	/* OOM kill score adjustment min value.
> -					 * Only settable by CAP_SYS_RESOURCE. */
> +	short vfork_oom_score_adj;		/* OOM kill score adjustment */
>  	struct mm_struct *oom_mm;	/* recorded mm when the thread group got
>  					 * killed by the oom killer */
>  
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 3049a41076f3..1ba4deaa2f98 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1584,8 +1584,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
>  	tty_audit_fork(sig);
>  	sched_autogroup_fork(sig);
>  
> -	sig->oom_score_adj = current->signal->oom_score_adj;
> -	sig->oom_score_adj_min = current->signal->oom_score_adj_min;
> +	sig->vfork_oom_score_adj = current->mm->oom_score_adj;
>  
>  	mutex_init(&sig->cred_guard_mutex);
>  	mutex_init(&sig->exec_update_mutex);
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> index e90f25d6385d..0412f64e74c1 100644
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -213,7 +213,7 @@ long oom_badness(struct task_struct *p, unsigned long totalpages)
>  	 * unkillable or have been already oom reaped or the are in
>  	 * the middle of vfork
>  	 */
> -	adj = (long)p->signal->oom_score_adj;
> +	adj = (long)p->mm->oom_score_adj;
>  	if (adj == OOM_SCORE_ADJ_MIN ||
>  			test_bit(MMF_OOM_SKIP, &p->mm->flags) ||
>  			in_vfork(p)) {
> @@ -403,7 +403,7 @@ static int dump_task(struct task_struct *p, void *arg)
>  		task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
>  		mm_pgtables_bytes(task->mm),
>  		get_mm_counter(task->mm, MM_SWAPENTS),
> -		task->signal->oom_score_adj, task->comm);
> +		task->mm->oom_score_adj, task->comm);
>  	task_unlock(task);
>  
>  	return 0;
> @@ -452,7 +452,7 @@ static void dump_header(struct oom_control *oc, struct task_struct *p)
>  {
>  	pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, oom_score_adj=%hd\n",
>  		current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order,
> -			current->signal->oom_score_adj);
> +			current->mm->oom_score_adj);
>  	if (!IS_ENABLED(CONFIG_COMPACTION) && oc->order)
>  		pr_warn("COMPACTION is disabled!!!\n");
>  
> @@ -892,7 +892,7 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
>  		K(get_mm_counter(mm, MM_FILEPAGES)),
>  		K(get_mm_counter(mm, MM_SHMEMPAGES)),
>  		from_kuid(&init_user_ns, task_uid(victim)),
> -		mm_pgtables_bytes(mm) >> 10, victim->signal->oom_score_adj);
> +		mm_pgtables_bytes(mm) >> 10, victim->mm->oom_score_adj);
>  	task_unlock(victim);
>  
>  	/*
> @@ -942,7 +942,7 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
>   */
>  static int oom_kill_memcg_member(struct task_struct *task, void *message)
>  {
> -	if (task->signal->oom_score_adj != OOM_SCORE_ADJ_MIN &&
> +	if (task->mm->oom_score_adj != OOM_SCORE_ADJ_MIN &&
>  	    !is_global_init(task)) {
>  		get_task_struct(task);
>  		__oom_kill_process(task, message);
> @@ -1089,7 +1089,7 @@ bool out_of_memory(struct oom_control *oc)
>  	if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task &&
>  	    current->mm && !oom_unkillable_task(current) &&
>  	    oom_cpuset_eligible(current, oc) &&
> -	    current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
> +	    current->mm->oom_score_adj != OOM_SCORE_ADJ_MIN) {
>  		get_task_struct(current);
>  		oc->chosen = current;
>  		oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)");

-- 
Michal Hocko
SUSE Labs

      parent reply	other threads:[~2020-08-20 14:12 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-20  0:20 [PATCH 1/1] mm, oom_adj: don't loop through tasks in __set_oom_adj when not necessary Suren Baghdasaryan
2020-08-20  5:56 ` Michal Hocko
2020-08-20  8:46 ` Christian Brauner
2020-08-20  9:09   ` Michal Hocko
2020-08-20 10:32     ` Christian Brauner
2020-08-20 11:14       ` Michal Hocko
2020-08-20 10:55 ` Oleg Nesterov
2020-08-20 11:13   ` Michal Hocko
2020-08-20 11:29     ` Michal Hocko
2020-08-20 11:41       ` Oleg Nesterov
2020-08-20 11:47       ` Christian Brauner
2020-08-20 11:30     ` Christian Brauner
2020-08-20 11:42       ` Michal Hocko
2020-08-20 12:41         ` Michal Hocko
2020-08-20 13:43           ` Christian Brauner
2020-08-20 12:34 ` Eric W. Biederman
2020-08-20 12:42   ` Michal Hocko
2020-08-20 12:45     ` Eric W. Biederman
2020-08-20 12:54       ` Eric W. Biederman
2020-08-20 13:26         ` Michal Hocko
2020-08-20 13:34           ` Christian Brauner
2020-08-20 13:48             ` Tetsuo Handa
2020-08-20 14:00               ` Christian Brauner
2020-08-20 14:15                 ` Michal Hocko
2020-08-20 14:26                   ` Tetsuo Handa
2020-08-20 14:34                     ` Michal Hocko
2020-08-20 14:18                 ` Tetsuo Handa
2020-08-20 14:49                   ` Eric W. Biederman
2020-08-20 15:06                     ` Christian Brauner
2020-08-20 15:56                     ` Suren Baghdasaryan
2020-08-20 16:26                       ` Michal Hocko
2020-08-20 16:29                         ` Christian Brauner
2020-08-20 16:47                           ` Suren Baghdasaryan
2020-08-21  4:39                         ` Eric W. Biederman
2020-08-21  7:17                           ` Michal Hocko
2020-08-21 11:15                           ` Oleg Nesterov
2020-08-21 15:28                             ` Suren Baghdasaryan
2020-08-21 16:06                               ` Suren Baghdasaryan
2020-08-21 16:37                                 ` Oleg Nesterov
2020-08-21 17:22                                   ` Suren Baghdasaryan
2020-08-21 16:33                               ` Oleg Nesterov
2020-08-21 17:59                                 ` Oleg Nesterov
2020-08-21 18:53                                   ` Suren Baghdasaryan
2020-08-24 20:03                                     ` Suren Baghdasaryan
2020-08-20 13:41           ` Eric W. Biederman
2020-08-20 14:04             ` Oleg Nesterov
2020-08-20 14:36               ` Oleg Nesterov
2020-08-20 15:06                 ` Eric W. Biederman
2020-08-20 14:43               ` Eric W. Biederman
2020-08-20 14:12             ` Michal Hocko [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200820141222.GL5033@dhcp22.suse.cz \
    --to=mhocko@suse.com \
    --cc=adobriyan@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=areber@redhat.com \
    --cc=avagin@gmail.com \
    --cc=bernd.edlinger@hotmail.de \
    --cc=christian.brauner@ubuntu.com \
    --cc=christian@kellner.me \
    --cc=cyphar@cyphar.com \
    --cc=daniel.m.jordan@oracle.com \
    --cc=ebiederm@xmission.com \
    --cc=esyr@redhat.com \
    --cc=gladkov.alexey@gmail.com \
    --cc=john.johansen@canonical.com \
    --cc=kernel-team@android.com \
    --cc=laoar.shao@gmail.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=minchan@kernel.org \
    --cc=mingo@kernel.org \
    --cc=oleg@redhat.com \
    --cc=peterz@infradead.org \
    --cc=shakeelb@google.com \
    --cc=surenb@google.com \
    --cc=tglx@linutronix.de \
    --cc=timmurray@google.com \
    --cc=walken@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).