All of lore.kernel.org
 help / color / mirror / Atom feed
From: Johannes Weiner <hannes@cmpxchg.org>
To: Roman Gushchin <guro@fb.com>
Cc: linux-mm@kvack.org, Michal Hocko <mhocko@kernel.org>,
	Vladimir Davydov <vdavydov.dev@gmail.com>,
	Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>,
	David Rientjes <rientjes@google.com>, Tejun Heo <tj@kernel.org>,
	kernel-team@fb.com, cgroups@vger.kernel.org,
	linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: Re: [v5 2/4] mm, oom: cgroup-aware OOM killer
Date: Tue, 22 Aug 2017 13:03:44 -0400	[thread overview]
Message-ID: <20170822170344.GA13547@cmpxchg.org> (raw)
In-Reply-To: <20170814183213.12319-3-guro@fb.com>

Hi Roman,

great work! This looks mostly good to me now. Below are some nitpicks
concerning naming and code layout, but nothing major.

On Mon, Aug 14, 2017 at 07:32:11PM +0100, Roman Gushchin wrote:
> @@ -39,6 +39,7 @@ struct oom_control {
>  	unsigned long totalpages;
>  	struct task_struct *chosen;
>  	unsigned long chosen_points;
> +	struct mem_cgroup *chosen_memcg;
>  };

Please rename 'chosen' to 'chosen_task' to make the distinction to
chosen_memcg clearer.

The ordering is a little weird too, with chosen_points in between.

	chosen_task
	chosen_memcg
	chosen_points

?

> @@ -2639,6 +2639,181 @@ static inline bool memcg_has_children(struct mem_cgroup *memcg)
>  	return ret;
>  }
>  
> +static long memcg_oom_badness(struct mem_cgroup *memcg,
> +			      const nodemask_t *nodemask)
> +{
> +	long points = 0;
> +	int nid;
> +
> +	for_each_node_state(nid, N_MEMORY) {
> +		if (nodemask && !node_isset(nid, *nodemask))
> +			continue;
> +
> +		points += mem_cgroup_node_nr_lru_pages(memcg, nid,
> +				LRU_ALL_ANON | BIT(LRU_UNEVICTABLE));
> +	}
> +
> +	points += memcg_page_state(memcg, MEMCG_KERNEL_STACK_KB) /
> +		(PAGE_SIZE / 1024);
> +	points += memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE);

NR_SLAB_UNRECLAIMABLE is now accounted per-lruvec, which takes
nodeness into account, and so would be more accurate here.

You can get it with mem_cgroup_lruvec() and lruvec_page_state().

> +	points += memcg_page_state(memcg, MEMCG_SOCK);
> +	points += memcg_page_state(memcg, MEMCG_SWAP);
> +
> +	return points;
> +}
> +
> +static long oom_evaluate_memcg(struct mem_cgroup *memcg,
> +			       const nodemask_t *nodemask)
> +{
> +	struct css_task_iter it;
> +	struct task_struct *task;
> +	int elegible = 0;

eligible

> +
> +	css_task_iter_start(&memcg->css, 0, &it);
> +	while ((task = css_task_iter_next(&it))) {
> +		/*
> +		 * If there are no tasks, or all tasks have oom_score_adj set
> +		 * to OOM_SCORE_ADJ_MIN and oom_kill_all_tasks is not set,
> +		 * don't select this memory cgroup.
> +		 */
> +		if (!elegible &&
> +		    (memcg->oom_kill_all_tasks ||
> +		     task->signal->oom_score_adj != OOM_SCORE_ADJ_MIN))
> +			elegible = 1;

This is a little awkward to read. How about something like this:

	/*
	 * When killing individual tasks, we respect OOM score adjustments:
	 * at least one task in the group needs to be killable for the group
	 * to be oomable.
	 *
	 * Also check that previous OOM kills have finished, and abort if
	 * there are any pending OOM victims.
	 */
	oomable = memcg->oom_kill_all_tasks;
	while ((task = css_task_iter_next(&it))) {
		if (!oomable && task->signal_oom_score_adj != OOM_SCORE_ADJ_MIN)
			oomable = 1;

> +		if (tsk_is_oom_victim(task) &&
> +		    !test_bit(MMF_OOM_SKIP, &task->signal->oom_mm->flags)) {
> +			elegible = -1;
> +			break;
> +		}
> +	}
> +	css_task_iter_end(&it);

etc.

> +
> +	return elegible > 0 ? memcg_oom_badness(memcg, nodemask) : elegible;

I find these much easier to read if broken up, even if it's more LOC:

	if (eligible <= 0)
		return eligible;

	return memcg_oom_badness(memcg, nodemask);

> +static void select_victim_memcg(struct mem_cgroup *root, struct oom_control *oc)
> +{
> +	struct mem_cgroup *iter, *parent;
> +
> +	for_each_mem_cgroup_tree(iter, root) {
> +		if (memcg_has_children(iter)) {
> +			iter->oom_score = 0;
> +			continue;
> +		}
> +
> +		iter->oom_score = oom_evaluate_memcg(iter, oc->nodemask);
> +		if (iter->oom_score == -1) {

Please add comments to document the special returns. Maybe #defines
would be clearer, too.

> +			oc->chosen_memcg = (void *)-1UL;
> +			mem_cgroup_iter_break(root, iter);
> +			return;
> +		}
> +
> +		if (!iter->oom_score)
> +			continue;

Same here.

Maybe a switch would be suitable to handle the abort/no-score cases.

> +		for (parent = parent_mem_cgroup(iter); parent && parent != root;
> +		     parent = parent_mem_cgroup(parent))
> +			parent->oom_score += iter->oom_score;
> +	}
> +
> +	for (;;) {
> +		struct cgroup_subsys_state *css;
> +		struct mem_cgroup *memcg = NULL;
> +		long score = LONG_MIN;
> +
> +		css_for_each_child(css, &root->css) {
> +			struct mem_cgroup *iter = mem_cgroup_from_css(css);
> +
> +			if (iter->oom_score > score) {
> +				memcg = iter;
> +				score = iter->oom_score;
> +			}
> +		}
> +
> +		if (!memcg) {
> +			if (oc->memcg && root == oc->memcg) {
> +				oc->chosen_memcg = oc->memcg;
> +				css_get(&oc->chosen_memcg->css);
> +				oc->chosen_points = oc->memcg->oom_score;
> +			}
> +			break;
> +		}
> +
> +		if (memcg->oom_kill_all_tasks || !memcg_has_children(memcg)) {
> +			oc->chosen_memcg = memcg;
> +			css_get(&oc->chosen_memcg->css);
> +			oc->chosen_points = score;
> +			break;
> +		}
> +
> +		root = memcg;
> +	}
> +}
> +
> +static void select_victim_root_cgroup_task(struct oom_control *oc)
> +{
> +	struct css_task_iter it;
> +	struct task_struct *task;
> +	int ret = 0;
> +
> +	css_task_iter_start(&root_mem_cgroup->css, 0, &it);
> +	while (!ret && (task = css_task_iter_next(&it)))
> +		ret = oom_evaluate_task(task, oc);
> +	css_task_iter_end(&it);
> +}
> +
> +bool mem_cgroup_select_oom_victim(struct oom_control *oc)
> +{
> +	struct mem_cgroup *root = root_mem_cgroup;
> +
> +	oc->chosen = NULL;
> +	oc->chosen_memcg = NULL;
> +
> +	if (mem_cgroup_disabled())
> +		return false;
> +
> +	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
> +		return false;
> +
> +	if (oc->memcg)
> +		root = oc->memcg;
> +
> +	rcu_read_lock();
> +
> +	select_victim_memcg(root, oc);
> +	if (oc->chosen_memcg == (void *)-1UL) {
> +		/* Existing OOM victims are found. */
> +		rcu_read_unlock();
> +		return true;
> +	}

It would be good to format this branch like the block below, with a
newline and the comment before the branch block rather than inside.

That would also set apart the call to select_victim_memcg(), which is
the main workhorse of this function.

> +	/*
> +	 * For system-wide OOMs we should consider tasks in the root cgroup
> +	 * with oom_score larger than oc->chosen_points.
> +	 */
> +	if (!oc->memcg) {
> +		select_victim_root_cgroup_task(oc);
> +
> +		if (oc->chosen && oc->chosen_memcg) {
> +			/*
> +			 * If we've decided to kill a task in the root memcg,
> +			 * release chosen_memcg.
> +			 */
> +			css_put(&oc->chosen_memcg->css);
> +			oc->chosen_memcg = NULL;
> +		}
> +	}

^^ like this one.

> +
> +	rcu_read_unlock();
> +
> +	return !!oc->chosen || !!oc->chosen_memcg;

The !! are detrimental to readability and shouldn't be necessary.

> @@ -5190,6 +5365,33 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
>  	return nbytes;
>  }
>  
> +static int memory_oom_kill_all_tasks_show(struct seq_file *m, void *v)
> +{
> +	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
> +	bool oom_kill_all_tasks = memcg->oom_kill_all_tasks;
> +
> +	seq_printf(m, "%d\n", oom_kill_all_tasks);
> +
> +	return 0;
> +}
> +
> +static ssize_t memory_oom_kill_all_tasks_write(struct kernfs_open_file *of,
> +					       char *buf, size_t nbytes,
> +					       loff_t off)
> +{
> +	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
> +	int oom_kill_all_tasks;
> +	int err;
> +
> +	err = kstrtoint(strstrip(buf), 0, &oom_kill_all_tasks);
> +	if (err)
> +		return err;
> +
> +	memcg->oom_kill_all_tasks = !!oom_kill_all_tasks;
> +
> +	return nbytes;
> +}
> +
>  static int memory_events_show(struct seq_file *m, void *v)
>  {
>  	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
> @@ -5310,6 +5512,12 @@ static struct cftype memory_files[] = {
>  		.write = memory_max_write,
>  	},
>  	{
> +		.name = "oom_kill_all_tasks",
> +		.flags = CFTYPE_NOT_ON_ROOT,
> +		.seq_show = memory_oom_kill_all_tasks_show,
> +		.write = memory_oom_kill_all_tasks_write,
> +	},

This name is quite a mouthful and reminiscent of the awkward v1
interface names. It doesn't really go well with the v2 names.

How about memory.oom_group?

> +	{
>  		.name = "events",
>  		.flags = CFTYPE_NOT_ON_ROOT,
>  		.file_offset = offsetof(struct mem_cgroup, events_file),
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> index 5c29a3dd591b..28e42a0d5eee 100644
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -288,7 +288,7 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc)
>  	return CONSTRAINT_NONE;
>  }
>  
> -static int oom_evaluate_task(struct task_struct *task, void *arg)
> +int oom_evaluate_task(struct task_struct *task, void *arg)
>  {
>  	struct oom_control *oc = arg;
>  	unsigned long points;
> @@ -823,6 +823,9 @@ static void __oom_kill_process(struct task_struct *victim)
>  	struct mm_struct *mm;
>  	bool can_oom_reap = true;
>  
> +	if (is_global_init(victim) || (victim->flags & PF_KTHREAD))
> +		return;
> +
>  	p = find_lock_task_mm(victim);
>  	if (!p) {
>  		put_task_struct(victim);
> @@ -958,6 +961,60 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
>  	put_task_struct(victim);
>  }
>  
> +static int oom_kill_memcg_member(struct task_struct *task, void *unused)
> +{
> +	if (!tsk_is_oom_victim(task))
> +		__oom_kill_process(task);
> +	return 0;
> +}
> +
> +static bool oom_kill_memcg_victim(struct oom_control *oc)
> +{
> +	static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
> +				      DEFAULT_RATELIMIT_BURST);
> +
> +	if (oc->chosen) {
> +		if (oc->chosen != (void *)-1UL) {
> +			if (__ratelimit(&oom_rs))
> +				dump_header(oc, oc->chosen);
> +
> +			__oom_kill_process(oc->chosen);
> +			put_task_struct(oc->chosen);
> +			schedule_timeout_killable(1);
> +		}
> +		return true;
> +
> +	} else if (oc->chosen_memcg) {
> +		if (oc->chosen_memcg == (void *)-1UL)
> +			return true;

Can you format the above chosen == (void *)-1UL the same way? That
makes it easier to see that it's checking the same thing.

Thanks

WARNING: multiple messages have this Message-ID (diff)
From: Johannes Weiner <hannes@cmpxchg.org>
To: Roman Gushchin <guro@fb.com>
Cc: linux-mm@kvack.org, Michal Hocko <mhocko@kernel.org>,
	Vladimir Davydov <vdavydov.dev@gmail.com>,
	Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>,
	David Rientjes <rientjes@google.com>, Tejun Heo <tj@kernel.org>,
	kernel-team@fb.com, cgroups@vger.kernel.org,
	linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: Re: [v5 2/4] mm, oom: cgroup-aware OOM killer
Date: Tue, 22 Aug 2017 13:03:44 -0400	[thread overview]
Message-ID: <20170822170344.GA13547@cmpxchg.org> (raw)
In-Reply-To: <20170814183213.12319-3-guro@fb.com>

Hi Roman,

great work! This looks mostly good to me now. Below are some nitpicks
concerning naming and code layout, but nothing major.

On Mon, Aug 14, 2017 at 07:32:11PM +0100, Roman Gushchin wrote:
> @@ -39,6 +39,7 @@ struct oom_control {
>  	unsigned long totalpages;
>  	struct task_struct *chosen;
>  	unsigned long chosen_points;
> +	struct mem_cgroup *chosen_memcg;
>  };

Please rename 'chosen' to 'chosen_task' to make the distinction to
chosen_memcg clearer.

The ordering is a little weird too, with chosen_points in between.

	chosen_task
	chosen_memcg
	chosen_points

?

> @@ -2639,6 +2639,181 @@ static inline bool memcg_has_children(struct mem_cgroup *memcg)
>  	return ret;
>  }
>  
> +static long memcg_oom_badness(struct mem_cgroup *memcg,
> +			      const nodemask_t *nodemask)
> +{
> +	long points = 0;
> +	int nid;
> +
> +	for_each_node_state(nid, N_MEMORY) {
> +		if (nodemask && !node_isset(nid, *nodemask))
> +			continue;
> +
> +		points += mem_cgroup_node_nr_lru_pages(memcg, nid,
> +				LRU_ALL_ANON | BIT(LRU_UNEVICTABLE));
> +	}
> +
> +	points += memcg_page_state(memcg, MEMCG_KERNEL_STACK_KB) /
> +		(PAGE_SIZE / 1024);
> +	points += memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE);

NR_SLAB_UNRECLAIMABLE is now accounted per-lruvec, which takes
nodeness into account, and so would be more accurate here.

You can get it with mem_cgroup_lruvec() and lruvec_page_state().

> +	points += memcg_page_state(memcg, MEMCG_SOCK);
> +	points += memcg_page_state(memcg, MEMCG_SWAP);
> +
> +	return points;
> +}
> +
> +static long oom_evaluate_memcg(struct mem_cgroup *memcg,
> +			       const nodemask_t *nodemask)
> +{
> +	struct css_task_iter it;
> +	struct task_struct *task;
> +	int elegible = 0;

eligible

> +
> +	css_task_iter_start(&memcg->css, 0, &it);
> +	while ((task = css_task_iter_next(&it))) {
> +		/*
> +		 * If there are no tasks, or all tasks have oom_score_adj set
> +		 * to OOM_SCORE_ADJ_MIN and oom_kill_all_tasks is not set,
> +		 * don't select this memory cgroup.
> +		 */
> +		if (!elegible &&
> +		    (memcg->oom_kill_all_tasks ||
> +		     task->signal->oom_score_adj != OOM_SCORE_ADJ_MIN))
> +			elegible = 1;

This is a little awkward to read. How about something like this:

	/*
	 * When killing individual tasks, we respect OOM score adjustments:
	 * at least one task in the group needs to be killable for the group
	 * to be oomable.
	 *
	 * Also check that previous OOM kills have finished, and abort if
	 * there are any pending OOM victims.
	 */
	oomable = memcg->oom_kill_all_tasks;
	while ((task = css_task_iter_next(&it))) {
		if (!oomable && task->signal_oom_score_adj != OOM_SCORE_ADJ_MIN)
			oomable = 1;

> +		if (tsk_is_oom_victim(task) &&
> +		    !test_bit(MMF_OOM_SKIP, &task->signal->oom_mm->flags)) {
> +			elegible = -1;
> +			break;
> +		}
> +	}
> +	css_task_iter_end(&it);

etc.

> +
> +	return elegible > 0 ? memcg_oom_badness(memcg, nodemask) : elegible;

I find these much easier to read if broken up, even if it's more LOC:

	if (eligible <= 0)
		return eligible;

	return memcg_oom_badness(memcg, nodemask);

> +static void select_victim_memcg(struct mem_cgroup *root, struct oom_control *oc)
> +{
> +	struct mem_cgroup *iter, *parent;
> +
> +	for_each_mem_cgroup_tree(iter, root) {
> +		if (memcg_has_children(iter)) {
> +			iter->oom_score = 0;
> +			continue;
> +		}
> +
> +		iter->oom_score = oom_evaluate_memcg(iter, oc->nodemask);
> +		if (iter->oom_score == -1) {

Please add comments to document the special returns. Maybe #defines
would be clearer, too.

> +			oc->chosen_memcg = (void *)-1UL;
> +			mem_cgroup_iter_break(root, iter);
> +			return;
> +		}
> +
> +		if (!iter->oom_score)
> +			continue;

Same here.

Maybe a switch would be suitable to handle the abort/no-score cases.

> +		for (parent = parent_mem_cgroup(iter); parent && parent != root;
> +		     parent = parent_mem_cgroup(parent))
> +			parent->oom_score += iter->oom_score;
> +	}
> +
> +	for (;;) {
> +		struct cgroup_subsys_state *css;
> +		struct mem_cgroup *memcg = NULL;
> +		long score = LONG_MIN;
> +
> +		css_for_each_child(css, &root->css) {
> +			struct mem_cgroup *iter = mem_cgroup_from_css(css);
> +
> +			if (iter->oom_score > score) {
> +				memcg = iter;
> +				score = iter->oom_score;
> +			}
> +		}
> +
> +		if (!memcg) {
> +			if (oc->memcg && root == oc->memcg) {
> +				oc->chosen_memcg = oc->memcg;
> +				css_get(&oc->chosen_memcg->css);
> +				oc->chosen_points = oc->memcg->oom_score;
> +			}
> +			break;
> +		}
> +
> +		if (memcg->oom_kill_all_tasks || !memcg_has_children(memcg)) {
> +			oc->chosen_memcg = memcg;
> +			css_get(&oc->chosen_memcg->css);
> +			oc->chosen_points = score;
> +			break;
> +		}
> +
> +		root = memcg;
> +	}
> +}
> +
> +static void select_victim_root_cgroup_task(struct oom_control *oc)
> +{
> +	struct css_task_iter it;
> +	struct task_struct *task;
> +	int ret = 0;
> +
> +	css_task_iter_start(&root_mem_cgroup->css, 0, &it);
> +	while (!ret && (task = css_task_iter_next(&it)))
> +		ret = oom_evaluate_task(task, oc);
> +	css_task_iter_end(&it);
> +}
> +
> +bool mem_cgroup_select_oom_victim(struct oom_control *oc)
> +{
> +	struct mem_cgroup *root = root_mem_cgroup;
> +
> +	oc->chosen = NULL;
> +	oc->chosen_memcg = NULL;
> +
> +	if (mem_cgroup_disabled())
> +		return false;
> +
> +	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
> +		return false;
> +
> +	if (oc->memcg)
> +		root = oc->memcg;
> +
> +	rcu_read_lock();
> +
> +	select_victim_memcg(root, oc);
> +	if (oc->chosen_memcg == (void *)-1UL) {
> +		/* Existing OOM victims are found. */
> +		rcu_read_unlock();
> +		return true;
> +	}

It would be good to format this branch like the block below, with a
newline and the comment before the branch block rather than inside.

That would also set apart the call to select_victim_memcg(), which is
the main workhorse of this function.

> +	/*
> +	 * For system-wide OOMs we should consider tasks in the root cgroup
> +	 * with oom_score larger than oc->chosen_points.
> +	 */
> +	if (!oc->memcg) {
> +		select_victim_root_cgroup_task(oc);
> +
> +		if (oc->chosen && oc->chosen_memcg) {
> +			/*
> +			 * If we've decided to kill a task in the root memcg,
> +			 * release chosen_memcg.
> +			 */
> +			css_put(&oc->chosen_memcg->css);
> +			oc->chosen_memcg = NULL;
> +		}
> +	}

^^ like this one.

> +
> +	rcu_read_unlock();
> +
> +	return !!oc->chosen || !!oc->chosen_memcg;

The !! are detrimental to readability and shouldn't be necessary.

> @@ -5190,6 +5365,33 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
>  	return nbytes;
>  }
>  
> +static int memory_oom_kill_all_tasks_show(struct seq_file *m, void *v)
> +{
> +	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
> +	bool oom_kill_all_tasks = memcg->oom_kill_all_tasks;
> +
> +	seq_printf(m, "%d\n", oom_kill_all_tasks);
> +
> +	return 0;
> +}
> +
> +static ssize_t memory_oom_kill_all_tasks_write(struct kernfs_open_file *of,
> +					       char *buf, size_t nbytes,
> +					       loff_t off)
> +{
> +	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
> +	int oom_kill_all_tasks;
> +	int err;
> +
> +	err = kstrtoint(strstrip(buf), 0, &oom_kill_all_tasks);
> +	if (err)
> +		return err;
> +
> +	memcg->oom_kill_all_tasks = !!oom_kill_all_tasks;
> +
> +	return nbytes;
> +}
> +
>  static int memory_events_show(struct seq_file *m, void *v)
>  {
>  	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
> @@ -5310,6 +5512,12 @@ static struct cftype memory_files[] = {
>  		.write = memory_max_write,
>  	},
>  	{
> +		.name = "oom_kill_all_tasks",
> +		.flags = CFTYPE_NOT_ON_ROOT,
> +		.seq_show = memory_oom_kill_all_tasks_show,
> +		.write = memory_oom_kill_all_tasks_write,
> +	},

This name is quite a mouthful and reminiscent of the awkward v1
interface names. It doesn't really go well with the v2 names.

How about memory.oom_group?

> +	{
>  		.name = "events",
>  		.flags = CFTYPE_NOT_ON_ROOT,
>  		.file_offset = offsetof(struct mem_cgroup, events_file),
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> index 5c29a3dd591b..28e42a0d5eee 100644
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -288,7 +288,7 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc)
>  	return CONSTRAINT_NONE;
>  }
>  
> -static int oom_evaluate_task(struct task_struct *task, void *arg)
> +int oom_evaluate_task(struct task_struct *task, void *arg)
>  {
>  	struct oom_control *oc = arg;
>  	unsigned long points;
> @@ -823,6 +823,9 @@ static void __oom_kill_process(struct task_struct *victim)
>  	struct mm_struct *mm;
>  	bool can_oom_reap = true;
>  
> +	if (is_global_init(victim) || (victim->flags & PF_KTHREAD))
> +		return;
> +
>  	p = find_lock_task_mm(victim);
>  	if (!p) {
>  		put_task_struct(victim);
> @@ -958,6 +961,60 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
>  	put_task_struct(victim);
>  }
>  
> +static int oom_kill_memcg_member(struct task_struct *task, void *unused)
> +{
> +	if (!tsk_is_oom_victim(task))
> +		__oom_kill_process(task);
> +	return 0;
> +}
> +
> +static bool oom_kill_memcg_victim(struct oom_control *oc)
> +{
> +	static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
> +				      DEFAULT_RATELIMIT_BURST);
> +
> +	if (oc->chosen) {
> +		if (oc->chosen != (void *)-1UL) {
> +			if (__ratelimit(&oom_rs))
> +				dump_header(oc, oc->chosen);
> +
> +			__oom_kill_process(oc->chosen);
> +			put_task_struct(oc->chosen);
> +			schedule_timeout_killable(1);
> +		}
> +		return true;
> +
> +	} else if (oc->chosen_memcg) {
> +		if (oc->chosen_memcg == (void *)-1UL)
> +			return true;

Can you format the above chosen == (void *)-1UL the same way? That
makes it easier to see that it's checking the same thing.

Thanks

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2017-08-22 17:04 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-14 18:32 [v5 1/4] mm, oom: refactor the oom_kill_process() function Roman Gushchin
2017-08-14 18:32 ` Roman Gushchin
2017-08-14 18:32 ` [v5 0/4] cgroup-aware OOM killer Roman Gushchin
2017-08-14 18:32   ` Roman Gushchin
2017-08-14 18:32   ` Roman Gushchin
2017-08-14 18:32 ` [v5 2/4] mm, oom: " Roman Gushchin
2017-08-14 18:32   ` Roman Gushchin
2017-08-14 22:42   ` David Rientjes
2017-08-14 22:42     ` David Rientjes
2017-08-15 12:15     ` Roman Gushchin
2017-08-15 12:15       ` Roman Gushchin
2017-08-15 12:20       ` Aleksa Sarai
2017-08-15 12:20         ` Aleksa Sarai
2017-08-15 12:57         ` Roman Gushchin
2017-08-15 12:57           ` Roman Gushchin
2017-08-15 21:47       ` David Rientjes
2017-08-15 21:47         ` David Rientjes
2017-08-15 21:47         ` David Rientjes
2017-08-16 15:43         ` Roman Gushchin
2017-08-16 15:43           ` Roman Gushchin
2017-08-16 15:43           ` Roman Gushchin
2017-08-21  0:50           ` David Rientjes
2017-08-21  0:50             ` David Rientjes
2017-08-21  9:46             ` Roman Gushchin
2017-08-21  9:46               ` Roman Gushchin
2017-08-21  9:46               ` Roman Gushchin
2017-08-22 17:03   ` Johannes Weiner [this message]
2017-08-22 17:03     ` Johannes Weiner
2017-08-23 16:20     ` Roman Gushchin
2017-08-23 16:20       ` Roman Gushchin
2017-08-23 17:24       ` Johannes Weiner
2017-08-23 17:24         ` Johannes Weiner
2017-08-23 18:04         ` Roman Gushchin
2017-08-23 18:04           ` Roman Gushchin
2017-08-23 18:04           ` Roman Gushchin
2017-08-23 23:13           ` David Rientjes
2017-08-23 23:13             ` David Rientjes
2017-08-14 18:32 ` [v5 3/4] mm, oom: introduce oom_priority for memory cgroups Roman Gushchin
2017-08-14 18:32   ` Roman Gushchin
2017-08-14 22:44   ` David Rientjes
2017-08-14 22:44     ` David Rientjes
2017-08-14 18:32 ` [v5 4/4] mm, oom, docs: describe the cgroup-aware OOM killer Roman Gushchin
2017-08-14 18:32   ` Roman Gushchin
2017-08-14 18:32   ` Roman Gushchin
2017-08-14 22:52   ` David Rientjes
2017-08-14 22:52     ` David Rientjes
2017-08-15 14:13     ` Roman Gushchin
2017-08-15 14:13       ` Roman Gushchin
2017-08-15 20:56       ` David Rientjes
2017-08-15 20:56         ` David Rientjes
2017-08-16 14:43         ` Roman Gushchin
2017-08-16 14:43           ` Roman Gushchin
2017-08-17 12:16         ` Roman Gushchin
2017-08-17 12:16           ` Roman Gushchin
2017-08-17 12:16           ` Roman Gushchin
2017-08-21  0:41           ` David Rientjes
2017-08-21  0:41             ` David Rientjes
2017-08-14 22:00 ` [v5 1/4] mm, oom: refactor the oom_kill_process() function David Rientjes
2017-08-14 22:00   ` David Rientjes
2017-08-14 22:00   ` David Rientjes
2017-08-22 17:06 ` Johannes Weiner
2017-08-22 17:06   ` Johannes Weiner
2017-08-23 12:30   ` Roman Gushchin
2017-08-23 12:30     ` Roman Gushchin
2017-08-23 12:30     ` Roman Gushchin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170822170344.GA13547@cmpxchg.org \
    --to=hannes@cmpxchg.org \
    --cc=cgroups@vger.kernel.org \
    --cc=guro@fb.com \
    --cc=kernel-team@fb.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=penguin-kernel@I-love.SAKURA.ne.jp \
    --cc=rientjes@google.com \
    --cc=tj@kernel.org \
    --cc=vdavydov.dev@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.