* [patch 1/3] mm, oom: organize oom context into struct
@ 2015-06-18 23:00 David Rientjes
2015-06-18 23:00 ` [patch 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq David Rientjes
` (4 more replies)
0 siblings, 5 replies; 21+ messages in thread
From: David Rientjes @ 2015-06-18 23:00 UTC (permalink / raw)
To: Andrew Morton; +Cc: Michal Hocko, linux-kernel, linux-mm
There are essential elements to an oom context that are passed around to
multiple functions.
Organize these elements into a new struct, struct oom_context, that
specifies the context for an oom condition.
This patch introduces no functional change.
Signed-off-by: David Rientjes <rientjes@google.com>
---
drivers/tty/sysrq.c | 12 +++++-
include/linux/oom.h | 25 +++++++-----
mm/memcontrol.c | 16 +++++---
mm/oom_kill.c | 115 ++++++++++++++++++++++++----------------------------
mm/page_alloc.c | 10 ++++-
5 files changed, 98 insertions(+), 80 deletions(-)
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -353,9 +353,17 @@ static struct sysrq_key_op sysrq_term_op = {
static void moom_callback(struct work_struct *ignored)
{
+ const gfp_t gfp_mask = GFP_KERNEL;
+ struct oom_control oc = {
+ .zonelist = node_zonelist(first_memory_node, gfp_mask),
+ .nodemask = NULL,
+ .gfp_mask = gfp_mask,
+ .order = 0,
+ .force_kill = true,
+ };
+
mutex_lock(&oom_lock);
- if (!out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL),
- GFP_KERNEL, 0, NULL, true))
+ if (!out_of_memory(&oc))
pr_info("OOM request ignored because killer is disabled\n");
mutex_unlock(&oom_lock);
}
diff --git a/include/linux/oom.h b/include/linux/oom.h
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -12,6 +12,14 @@ struct notifier_block;
struct mem_cgroup;
struct task_struct;
+struct oom_control {
+ struct zonelist *zonelist;
+ nodemask_t *nodemask;
+ gfp_t gfp_mask;
+ int order;
+ bool force_kill;
+};
+
/*
* Types of limitations to the nodes from which allocations may occur
*/
@@ -57,21 +65,18 @@ extern unsigned long oom_badness(struct task_struct *p,
extern int oom_kills_count(void);
extern void note_oom_kill(void);
-extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
+extern void oom_kill_process(struct oom_control *oc, struct task_struct *p,
unsigned int points, unsigned long totalpages,
- struct mem_cgroup *memcg, nodemask_t *nodemask,
- const char *message);
+ struct mem_cgroup *memcg, const char *message);
-extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
- int order, const nodemask_t *nodemask,
+extern void check_panic_on_oom(struct oom_control *oc,
+ enum oom_constraint constraint,
struct mem_cgroup *memcg);
-extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
- unsigned long totalpages, const nodemask_t *nodemask,
- bool force_kill);
+extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
+ struct task_struct *task, unsigned long totalpages);
-extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
- int order, nodemask_t *mask, bool force_kill);
+extern bool out_of_memory(struct oom_control *oc);
extern void exit_oom_victim(void);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1545,6 +1545,13 @@ static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
int order)
{
+ struct oom_control oc = {
+ .zonelist = NULL,
+ .nodemask = NULL,
+ .gfp_mask = gfp_mask,
+ .order = order,
+ .force_kill = false,
+ };
struct mem_cgroup *iter;
unsigned long chosen_points = 0;
unsigned long totalpages;
@@ -1563,7 +1570,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
goto unlock;
}
- check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL, memcg);
+ check_panic_on_oom(&oc, CONSTRAINT_MEMCG, memcg);
totalpages = mem_cgroup_get_limit(memcg) ? : 1;
for_each_mem_cgroup_tree(iter, memcg) {
struct css_task_iter it;
@@ -1571,8 +1578,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
css_task_iter_start(&iter->css, &it);
while ((task = css_task_iter_next(&it))) {
- switch (oom_scan_process_thread(task, totalpages, NULL,
- false)) {
+ switch (oom_scan_process_thread(&oc, task, totalpages)) {
case OOM_SCAN_SELECT:
if (chosen)
put_task_struct(chosen);
@@ -1610,8 +1616,8 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
if (chosen) {
points = chosen_points * 1000 / totalpages;
- oom_kill_process(chosen, gfp_mask, order, points, totalpages,
- memcg, NULL, "Memory cgroup out of memory");
+ oom_kill_process(&oc, chosen, points, totalpages, memcg,
+ "Memory cgroup out of memory");
}
unlock:
mutex_unlock(&oom_lock);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -196,27 +196,26 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
* Determine the type of allocation constraint.
*/
#ifdef CONFIG_NUMA
-static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
- gfp_t gfp_mask, nodemask_t *nodemask,
- unsigned long *totalpages)
+static enum oom_constraint constrained_alloc(struct oom_control *oc,
+ unsigned long *totalpages)
{
struct zone *zone;
struct zoneref *z;
- enum zone_type high_zoneidx = gfp_zone(gfp_mask);
+ enum zone_type high_zoneidx = gfp_zone(oc->gfp_mask);
bool cpuset_limited = false;
int nid;
/* Default to all available memory */
*totalpages = totalram_pages + total_swap_pages;
- if (!zonelist)
+ if (!oc->zonelist)
return CONSTRAINT_NONE;
/*
* Reach here only when __GFP_NOFAIL is used. So, we should avoid
* to kill current.We have to random task kill in this case.
* Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now.
*/
- if (gfp_mask & __GFP_THISNODE)
+ if (oc->gfp_mask & __GFP_THISNODE)
return CONSTRAINT_NONE;
/*
@@ -224,17 +223,18 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
* the page allocator means a mempolicy is in effect. Cpuset policy
* is enforced in get_page_from_freelist().
*/
- if (nodemask && !nodes_subset(node_states[N_MEMORY], *nodemask)) {
+ if (oc->nodemask &&
+ !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) {
*totalpages = total_swap_pages;
- for_each_node_mask(nid, *nodemask)
+ for_each_node_mask(nid, *oc->nodemask)
*totalpages += node_spanned_pages(nid);
return CONSTRAINT_MEMORY_POLICY;
}
/* Check this allocation failure is caused by cpuset's wall function */
- for_each_zone_zonelist_nodemask(zone, z, zonelist,
- high_zoneidx, nodemask)
- if (!cpuset_zone_allowed(zone, gfp_mask))
+ for_each_zone_zonelist_nodemask(zone, z, oc->zonelist,
+ high_zoneidx, oc->nodemask)
+ if (!cpuset_zone_allowed(zone, oc->gfp_mask))
cpuset_limited = true;
if (cpuset_limited) {
@@ -246,20 +246,18 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
return CONSTRAINT_NONE;
}
#else
-static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
- gfp_t gfp_mask, nodemask_t *nodemask,
- unsigned long *totalpages)
+static enum oom_constraint constrained_alloc(struct oom_control *oc,
+ unsigned long *totalpages)
{
*totalpages = totalram_pages + total_swap_pages;
return CONSTRAINT_NONE;
}
#endif
-enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
- unsigned long totalpages, const nodemask_t *nodemask,
- bool force_kill)
+enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
+ struct task_struct *task, unsigned long totalpages)
{
- if (oom_unkillable_task(task, NULL, nodemask))
+ if (oom_unkillable_task(task, NULL, oc->nodemask))
return OOM_SCAN_CONTINUE;
/*
@@ -267,7 +265,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
* Don't allow any other task to have access to the reserves.
*/
if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
- if (!force_kill)
+ if (!oc->force_kill)
return OOM_SCAN_ABORT;
}
if (!task->mm)
@@ -280,7 +278,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
if (oom_task_origin(task))
return OOM_SCAN_SELECT;
- if (task_will_free_mem(task) && !force_kill)
+ if (task_will_free_mem(task) && !oc->force_kill)
return OOM_SCAN_ABORT;
return OOM_SCAN_OK;
@@ -289,12 +287,9 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
/*
* Simple selection loop. We chose the process with the highest
* number of 'points'. Returns -1 on scan abort.
- *
- * (not docbooked, we don't want this one cluttering up the manual)
*/
-static struct task_struct *select_bad_process(unsigned int *ppoints,
- unsigned long totalpages, const nodemask_t *nodemask,
- bool force_kill)
+static struct task_struct *select_bad_process(struct oom_control *oc,
+ unsigned int *ppoints, unsigned long totalpages)
{
struct task_struct *g, *p;
struct task_struct *chosen = NULL;
@@ -304,8 +299,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
for_each_process_thread(g, p) {
unsigned int points;
- switch (oom_scan_process_thread(p, totalpages, nodemask,
- force_kill)) {
+ switch (oom_scan_process_thread(oc, p, totalpages)) {
case OOM_SCAN_SELECT:
chosen = p;
chosen_points = ULONG_MAX;
@@ -318,7 +312,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
case OOM_SCAN_OK:
break;
};
- points = oom_badness(p, NULL, nodemask, totalpages);
+ points = oom_badness(p, NULL, oc->nodemask, totalpages);
if (!points || points < chosen_points)
continue;
/* Prefer thread group leaders for display purposes */
@@ -380,13 +374,13 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
rcu_read_unlock();
}
-static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
- struct mem_cgroup *memcg, const nodemask_t *nodemask)
+static void dump_header(struct oom_control *oc, struct task_struct *p,
+ struct mem_cgroup *memcg)
{
task_lock(current);
pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, "
"oom_score_adj=%hd\n",
- current->comm, gfp_mask, order,
+ current->comm, oc->gfp_mask, oc->order,
current->signal->oom_score_adj);
cpuset_print_task_mems_allowed(current);
task_unlock(current);
@@ -396,7 +390,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
else
show_mem(SHOW_MEM_FILTER_NODES);
if (sysctl_oom_dump_tasks)
- dump_tasks(memcg, nodemask);
+ dump_tasks(memcg, oc->nodemask);
}
/*
@@ -487,10 +481,9 @@ void oom_killer_enable(void)
* Must be called while holding a reference to p, which will be released upon
* returning.
*/
-void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
+void oom_kill_process(struct oom_control *oc, struct task_struct *p,
unsigned int points, unsigned long totalpages,
- struct mem_cgroup *memcg, nodemask_t *nodemask,
- const char *message)
+ struct mem_cgroup *memcg, const char *message)
{
struct task_struct *victim = p;
struct task_struct *child;
@@ -514,7 +507,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
task_unlock(p);
if (__ratelimit(&oom_rs))
- dump_header(p, gfp_mask, order, memcg, nodemask);
+ dump_header(oc, p, memcg);
task_lock(p);
pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
@@ -537,7 +530,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
/*
* oom_badness() returns 0 if the thread is unkillable
*/
- child_points = oom_badness(child, memcg, nodemask,
+ child_points = oom_badness(child, memcg, oc->nodemask,
totalpages);
if (child_points > victim_points) {
put_task_struct(victim);
@@ -600,8 +593,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
/*
* Determines whether the kernel must panic because of the panic_on_oom sysctl.
*/
-void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
- int order, const nodemask_t *nodemask,
+void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
struct mem_cgroup *memcg)
{
if (likely(!sysctl_panic_on_oom))
@@ -615,7 +607,7 @@ void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
if (constraint != CONSTRAINT_NONE)
return;
}
- dump_header(NULL, gfp_mask, order, memcg, nodemask);
+ dump_header(oc, NULL, memcg);
panic("Out of memory: %s panic_on_oom is enabled\n",
sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
}
@@ -635,22 +627,16 @@ int unregister_oom_notifier(struct notifier_block *nb)
EXPORT_SYMBOL_GPL(unregister_oom_notifier);
/**
- * __out_of_memory - kill the "best" process when we run out of memory
- * @zonelist: zonelist pointer
- * @gfp_mask: memory allocation flags
- * @order: amount of memory being requested as a power of 2
- * @nodemask: nodemask passed to page allocator
- * @force_kill: true if a task must be killed, even if others are exiting
+ * out_of_memory - kill the "best" process when we run out of memory
+ * @oc: pointer to struct oom_control
*
* If we run out of memory, we have the choice between either
* killing a random task (bad), letting the system crash (worse)
* OR try to be smart about which process to kill. Note that we
* don't have to be perfect here, we just have to be good.
*/
-bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
- int order, nodemask_t *nodemask, bool force_kill)
+bool out_of_memory(struct oom_control *oc)
{
- const nodemask_t *mpol_mask;
struct task_struct *p;
unsigned long totalpages;
unsigned long freed = 0;
@@ -684,30 +670,29 @@ bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
* Check if there were limitations on the allocation (only relevant for
* NUMA) that may require different handling.
*/
- constraint = constrained_alloc(zonelist, gfp_mask, nodemask,
- &totalpages);
- mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL;
- check_panic_on_oom(constraint, gfp_mask, order, mpol_mask, NULL);
+ constraint = constrained_alloc(oc, &totalpages);
+ if (constraint != CONSTRAINT_MEMORY_POLICY)
+ oc->nodemask = NULL;
+ check_panic_on_oom(oc, constraint, NULL);
if (sysctl_oom_kill_allocating_task && current->mm &&
- !oom_unkillable_task(current, NULL, nodemask) &&
+ !oom_unkillable_task(current, NULL, oc->nodemask) &&
current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
get_task_struct(current);
- oom_kill_process(current, gfp_mask, order, 0, totalpages, NULL,
- nodemask,
+ oom_kill_process(oc, current, 0, totalpages, NULL,
"Out of memory (oom_kill_allocating_task)");
goto out;
}
- p = select_bad_process(&points, totalpages, mpol_mask, force_kill);
+ p = select_bad_process(oc, &points, totalpages);
/* Found nothing?!?! Either we hang forever, or we panic. */
if (!p) {
- dump_header(NULL, gfp_mask, order, NULL, mpol_mask);
+ dump_header(oc, NULL, NULL);
panic("Out of memory and no killable processes...\n");
}
if (p != (void *)-1UL) {
- oom_kill_process(p, gfp_mask, order, points, totalpages, NULL,
- nodemask, "Out of memory");
+ oom_kill_process(oc, p, points, totalpages, NULL,
+ "Out of memory");
killed = 1;
}
out:
@@ -728,13 +713,21 @@ out:
*/
void pagefault_out_of_memory(void)
{
+ struct oom_control oc = {
+ .zonelist = NULL,
+ .nodemask = NULL,
+ .gfp_mask = 0,
+ .order = 0,
+ .force_kill = false,
+ };
+
if (mem_cgroup_oom_synchronize(true))
return;
if (!mutex_trylock(&oom_lock))
return;
- if (!out_of_memory(NULL, 0, 0, NULL, false)) {
+ if (!out_of_memory(&oc)) {
/*
* There shouldn't be any user tasks runnable while the
* OOM killer is disabled, so the current task has to
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2680,6 +2680,13 @@ static inline struct page *
__alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
const struct alloc_context *ac, unsigned long *did_some_progress)
{
+ struct oom_control oc = {
+ .zonelist = ac->zonelist,
+ .nodemask = ac->nodemask,
+ .gfp_mask = gfp_mask,
+ .order = order,
+ .force_kill = false,
+ };
struct page *page;
*did_some_progress = 0;
@@ -2731,8 +2738,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
goto out;
}
/* Exhausted what can be done so it's blamo time */
- if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false)
- || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
+ if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
*did_some_progress = 1;
out:
mutex_unlock(&oom_lock);
^ permalink raw reply [flat|nested] 21+ messages in thread
* [patch 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq
2015-06-18 23:00 [patch 1/3] mm, oom: organize oom context into struct David Rientjes
@ 2015-06-18 23:00 ` David Rientjes
2015-06-19 7:32 ` Michal Hocko
2015-06-18 23:00 ` [patch 3/3] mm, oom: do not panic for oom kills triggered from sysrq David Rientjes
` (3 subsequent siblings)
4 siblings, 1 reply; 21+ messages in thread
From: David Rientjes @ 2015-06-18 23:00 UTC (permalink / raw)
To: Andrew Morton; +Cc: Michal Hocko, linux-kernel, linux-mm
The force_kill member of struct oom_context isn't needed if an order of
-1 is used instead.
This patch introduces no functional change.
Signed-off-by: David Rientjes <rientjes@google.com>
---
drivers/tty/sysrq.c | 3 +--
include/linux/oom.h | 1 -
mm/memcontrol.c | 1 -
mm/oom_kill.c | 5 ++---
mm/page_alloc.c | 1 -
5 files changed, 3 insertions(+), 8 deletions(-)
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -358,8 +358,7 @@ static void moom_callback(struct work_struct *ignored)
.zonelist = node_zonelist(first_memory_node, gfp_mask),
.nodemask = NULL,
.gfp_mask = gfp_mask,
- .order = 0,
- .force_kill = true,
+ .order = -1,
};
mutex_lock(&oom_lock);
diff --git a/include/linux/oom.h b/include/linux/oom.h
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -17,7 +17,6 @@ struct oom_control {
nodemask_t *nodemask;
gfp_t gfp_mask;
int order;
- bool force_kill;
};
/*
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1550,7 +1550,6 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
.nodemask = NULL,
.gfp_mask = gfp_mask,
.order = order,
- .force_kill = false,
};
struct mem_cgroup *iter;
unsigned long chosen_points = 0;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -265,7 +265,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
* Don't allow any other task to have access to the reserves.
*/
if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
- if (!oc->force_kill)
+ if (oc->order != -1)
return OOM_SCAN_ABORT;
}
if (!task->mm)
@@ -278,7 +278,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
if (oom_task_origin(task))
return OOM_SCAN_SELECT;
- if (task_will_free_mem(task) && !oc->force_kill)
+ if (task_will_free_mem(task) && oc->order != -1)
return OOM_SCAN_ABORT;
return OOM_SCAN_OK;
@@ -718,7 +718,6 @@ void pagefault_out_of_memory(void)
.nodemask = NULL,
.gfp_mask = 0,
.order = 0,
- .force_kill = false,
};
if (mem_cgroup_oom_synchronize(true))
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2685,7 +2685,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
.nodemask = ac->nodemask,
.gfp_mask = gfp_mask,
.order = order,
- .force_kill = false,
};
struct page *page;
^ permalink raw reply [flat|nested] 21+ messages in thread
* [patch 3/3] mm, oom: do not panic for oom kills triggered from sysrq
2015-06-18 23:00 [patch 1/3] mm, oom: organize oom context into struct David Rientjes
2015-06-18 23:00 ` [patch 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq David Rientjes
@ 2015-06-18 23:00 ` David Rientjes
2015-06-19 0:14 ` [patch 1/3] mm, oom: organize oom context into struct Sergey Senozhatsky
` (2 subsequent siblings)
4 siblings, 0 replies; 21+ messages in thread
From: David Rientjes @ 2015-06-18 23:00 UTC (permalink / raw)
To: Andrew Morton; +Cc: Michal Hocko, linux-kernel, linux-mm
Sysrq+f is used to kill a process either for debug or when the VM is
otherwise unresponsive.
It is not intended to trigger a panic when no process may be killed.
Avoid panicking the system for sysrq+f when no processes are killed.
Suggested-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: David Rientjes <rientjes@google.com>
---
Documentation/sysrq.txt | 3 ++-
mm/oom_kill.c | 7 +++++--
2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -75,7 +75,8 @@ On all - write a character to /proc/sysrq-trigger. e.g.:
'e' - Send a SIGTERM to all processes, except for init.
-'f' - Will call oom_kill to kill a memory hog process.
+'f' - Will call the oom killer to kill a memory hog process, but do not
+ panic if nothing can be killed.
'g' - Used by kgdb (kernel debugger)
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -607,6 +607,9 @@ void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
if (constraint != CONSTRAINT_NONE)
return;
}
+ /* Do not panic for oom kills triggered by sysrq */
+ if (oc->order == -1)
+ return;
dump_header(oc, NULL, memcg);
panic("Out of memory: %s panic_on_oom is enabled\n",
sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
@@ -686,11 +689,11 @@ bool out_of_memory(struct oom_control *oc)
p = select_bad_process(oc, &points, totalpages);
/* Found nothing?!?! Either we hang forever, or we panic. */
- if (!p) {
+ if (!p && oc->order != -1) {
dump_header(oc, NULL, NULL);
panic("Out of memory and no killable processes...\n");
}
- if (p != (void *)-1UL) {
+ if (p && p != (void *)-1UL) {
oom_kill_process(oc, p, points, totalpages, NULL,
"Out of memory");
killed = 1;
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch 1/3] mm, oom: organize oom context into struct
2015-06-18 23:00 [patch 1/3] mm, oom: organize oom context into struct David Rientjes
2015-06-18 23:00 ` [patch 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq David Rientjes
2015-06-18 23:00 ` [patch 3/3] mm, oom: do not panic for oom kills triggered from sysrq David Rientjes
@ 2015-06-19 0:14 ` Sergey Senozhatsky
2015-06-30 22:46 ` David Rientjes
2015-06-19 7:30 ` Michal Hocko
2015-07-01 21:37 ` [patch v2 " David Rientjes
4 siblings, 1 reply; 21+ messages in thread
From: Sergey Senozhatsky @ 2015-06-19 0:14 UTC (permalink / raw)
To: David Rientjes; +Cc: Andrew Morton, Michal Hocko, linux-kernel, linux-mm
Hello,
On (06/18/15 16:00), David Rientjes wrote:
> There are essential elements to an oom context that are passed around to
> multiple functions.
>
> Organize these elements into a new struct, struct oom_context, that
> specifies the context for an oom condition.
>
s/oom_context/oom_control/ ?
[..]
>
> +struct oom_control {
> + struct zonelist *zonelist;
> + nodemask_t *nodemask;
> + gfp_t gfp_mask;
> + int order;
> + bool force_kill;
> +};
> +
> -extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
> - int order, const nodemask_t *nodemask,
> +extern void check_panic_on_oom(struct oom_control *oc,
> + enum oom_constraint constraint,
> struct mem_cgroup *memcg);
>
> -extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
> - unsigned long totalpages, const nodemask_t *nodemask,
> - bool force_kill);
> +extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
> + struct task_struct *task, unsigned long totalpages);
>
> -extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
> - int order, nodemask_t *mask, bool force_kill);
> +extern bool out_of_memory(struct oom_control *oc);
>
> extern void exit_oom_victim(void);
>
[..]
-ss
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch 1/3] mm, oom: organize oom context into struct
2015-06-18 23:00 [patch 1/3] mm, oom: organize oom context into struct David Rientjes
` (2 preceding siblings ...)
2015-06-19 0:14 ` [patch 1/3] mm, oom: organize oom context into struct Sergey Senozhatsky
@ 2015-06-19 7:30 ` Michal Hocko
2015-07-01 21:37 ` [patch v2 " David Rientjes
4 siblings, 0 replies; 21+ messages in thread
From: Michal Hocko @ 2015-06-19 7:30 UTC (permalink / raw)
To: David Rientjes; +Cc: Andrew Morton, linux-kernel, linux-mm
On Thu 18-06-15 16:00:02, David Rientjes wrote:
> There are essential elements to an oom context that are passed around to
> multiple functions.
>
> Organize these elements into a new struct, struct oom_context, that
> specifies the context for an oom condition.
Yes this makes sense to me.
> This patch introduces no functional change.
>
> Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
> ---
> drivers/tty/sysrq.c | 12 +++++-
> include/linux/oom.h | 25 +++++++-----
> mm/memcontrol.c | 16 +++++---
> mm/oom_kill.c | 115 ++++++++++++++++++++++++----------------------------
> mm/page_alloc.c | 10 ++++-
> 5 files changed, 98 insertions(+), 80 deletions(-)
>
> diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
> --- a/drivers/tty/sysrq.c
> +++ b/drivers/tty/sysrq.c
> @@ -353,9 +353,17 @@ static struct sysrq_key_op sysrq_term_op = {
>
> static void moom_callback(struct work_struct *ignored)
> {
> + const gfp_t gfp_mask = GFP_KERNEL;
> + struct oom_control oc = {
> + .zonelist = node_zonelist(first_memory_node, gfp_mask),
> + .nodemask = NULL,
> + .gfp_mask = gfp_mask,
> + .order = 0,
> + .force_kill = true,
> + };
> +
> mutex_lock(&oom_lock);
> - if (!out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL),
> - GFP_KERNEL, 0, NULL, true))
> + if (!out_of_memory(&oc))
> pr_info("OOM request ignored because killer is disabled\n");
> mutex_unlock(&oom_lock);
> }
> diff --git a/include/linux/oom.h b/include/linux/oom.h
> --- a/include/linux/oom.h
> +++ b/include/linux/oom.h
> @@ -12,6 +12,14 @@ struct notifier_block;
> struct mem_cgroup;
> struct task_struct;
>
> +struct oom_control {
> + struct zonelist *zonelist;
> + nodemask_t *nodemask;
> + gfp_t gfp_mask;
> + int order;
> + bool force_kill;
> +};
> +
> /*
> * Types of limitations to the nodes from which allocations may occur
> */
> @@ -57,21 +65,18 @@ extern unsigned long oom_badness(struct task_struct *p,
>
> extern int oom_kills_count(void);
> extern void note_oom_kill(void);
> -extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
> +extern void oom_kill_process(struct oom_control *oc, struct task_struct *p,
> unsigned int points, unsigned long totalpages,
> - struct mem_cgroup *memcg, nodemask_t *nodemask,
> - const char *message);
> + struct mem_cgroup *memcg, const char *message);
>
> -extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
> - int order, const nodemask_t *nodemask,
> +extern void check_panic_on_oom(struct oom_control *oc,
> + enum oom_constraint constraint,
> struct mem_cgroup *memcg);
>
> -extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
> - unsigned long totalpages, const nodemask_t *nodemask,
> - bool force_kill);
> +extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
> + struct task_struct *task, unsigned long totalpages);
>
> -extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
> - int order, nodemask_t *mask, bool force_kill);
> +extern bool out_of_memory(struct oom_control *oc);
>
> extern void exit_oom_victim(void);
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1545,6 +1545,13 @@ static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
> static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
> int order)
> {
> + struct oom_control oc = {
> + .zonelist = NULL,
> + .nodemask = NULL,
> + .gfp_mask = gfp_mask,
> + .order = order,
> + .force_kill = false,
> + };
> struct mem_cgroup *iter;
> unsigned long chosen_points = 0;
> unsigned long totalpages;
> @@ -1563,7 +1570,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
> goto unlock;
> }
>
> - check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL, memcg);
> + check_panic_on_oom(&oc, CONSTRAINT_MEMCG, memcg);
> totalpages = mem_cgroup_get_limit(memcg) ? : 1;
> for_each_mem_cgroup_tree(iter, memcg) {
> struct css_task_iter it;
> @@ -1571,8 +1578,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
>
> css_task_iter_start(&iter->css, &it);
> while ((task = css_task_iter_next(&it))) {
> - switch (oom_scan_process_thread(task, totalpages, NULL,
> - false)) {
> + switch (oom_scan_process_thread(&oc, task, totalpages)) {
> case OOM_SCAN_SELECT:
> if (chosen)
> put_task_struct(chosen);
> @@ -1610,8 +1616,8 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
>
> if (chosen) {
> points = chosen_points * 1000 / totalpages;
> - oom_kill_process(chosen, gfp_mask, order, points, totalpages,
> - memcg, NULL, "Memory cgroup out of memory");
> + oom_kill_process(&oc, chosen, points, totalpages, memcg,
> + "Memory cgroup out of memory");
> }
> unlock:
> mutex_unlock(&oom_lock);
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -196,27 +196,26 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
> * Determine the type of allocation constraint.
> */
> #ifdef CONFIG_NUMA
> -static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
> - gfp_t gfp_mask, nodemask_t *nodemask,
> - unsigned long *totalpages)
> +static enum oom_constraint constrained_alloc(struct oom_control *oc,
> + unsigned long *totalpages)
> {
> struct zone *zone;
> struct zoneref *z;
> - enum zone_type high_zoneidx = gfp_zone(gfp_mask);
> + enum zone_type high_zoneidx = gfp_zone(oc->gfp_mask);
> bool cpuset_limited = false;
> int nid;
>
> /* Default to all available memory */
> *totalpages = totalram_pages + total_swap_pages;
>
> - if (!zonelist)
> + if (!oc->zonelist)
> return CONSTRAINT_NONE;
> /*
> * Reach here only when __GFP_NOFAIL is used. So, we should avoid
> * to kill current.We have to random task kill in this case.
> * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now.
> */
> - if (gfp_mask & __GFP_THISNODE)
> + if (oc->gfp_mask & __GFP_THISNODE)
> return CONSTRAINT_NONE;
>
> /*
> @@ -224,17 +223,18 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
> * the page allocator means a mempolicy is in effect. Cpuset policy
> * is enforced in get_page_from_freelist().
> */
> - if (nodemask && !nodes_subset(node_states[N_MEMORY], *nodemask)) {
> + if (oc->nodemask &&
> + !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) {
> *totalpages = total_swap_pages;
> - for_each_node_mask(nid, *nodemask)
> + for_each_node_mask(nid, *oc->nodemask)
> *totalpages += node_spanned_pages(nid);
> return CONSTRAINT_MEMORY_POLICY;
> }
>
> /* Check this allocation failure is caused by cpuset's wall function */
> - for_each_zone_zonelist_nodemask(zone, z, zonelist,
> - high_zoneidx, nodemask)
> - if (!cpuset_zone_allowed(zone, gfp_mask))
> + for_each_zone_zonelist_nodemask(zone, z, oc->zonelist,
> + high_zoneidx, oc->nodemask)
> + if (!cpuset_zone_allowed(zone, oc->gfp_mask))
> cpuset_limited = true;
>
> if (cpuset_limited) {
> @@ -246,20 +246,18 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
> return CONSTRAINT_NONE;
> }
> #else
> -static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
> - gfp_t gfp_mask, nodemask_t *nodemask,
> - unsigned long *totalpages)
> +static enum oom_constraint constrained_alloc(struct oom_control *oc,
> + unsigned long *totalpages)
> {
> *totalpages = totalram_pages + total_swap_pages;
> return CONSTRAINT_NONE;
> }
> #endif
>
> -enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
> - unsigned long totalpages, const nodemask_t *nodemask,
> - bool force_kill)
> +enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
> + struct task_struct *task, unsigned long totalpages)
> {
> - if (oom_unkillable_task(task, NULL, nodemask))
> + if (oom_unkillable_task(task, NULL, oc->nodemask))
> return OOM_SCAN_CONTINUE;
>
> /*
> @@ -267,7 +265,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
> * Don't allow any other task to have access to the reserves.
> */
> if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
> - if (!force_kill)
> + if (!oc->force_kill)
> return OOM_SCAN_ABORT;
> }
> if (!task->mm)
> @@ -280,7 +278,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
> if (oom_task_origin(task))
> return OOM_SCAN_SELECT;
>
> - if (task_will_free_mem(task) && !force_kill)
> + if (task_will_free_mem(task) && !oc->force_kill)
> return OOM_SCAN_ABORT;
>
> return OOM_SCAN_OK;
> @@ -289,12 +287,9 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
> /*
> * Simple selection loop. We chose the process with the highest
> * number of 'points'. Returns -1 on scan abort.
> - *
> - * (not docbooked, we don't want this one cluttering up the manual)
> */
> -static struct task_struct *select_bad_process(unsigned int *ppoints,
> - unsigned long totalpages, const nodemask_t *nodemask,
> - bool force_kill)
> +static struct task_struct *select_bad_process(struct oom_control *oc,
> + unsigned int *ppoints, unsigned long totalpages)
> {
> struct task_struct *g, *p;
> struct task_struct *chosen = NULL;
> @@ -304,8 +299,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
> for_each_process_thread(g, p) {
> unsigned int points;
>
> - switch (oom_scan_process_thread(p, totalpages, nodemask,
> - force_kill)) {
> + switch (oom_scan_process_thread(oc, p, totalpages)) {
> case OOM_SCAN_SELECT:
> chosen = p;
> chosen_points = ULONG_MAX;
> @@ -318,7 +312,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
> case OOM_SCAN_OK:
> break;
> };
> - points = oom_badness(p, NULL, nodemask, totalpages);
> + points = oom_badness(p, NULL, oc->nodemask, totalpages);
> if (!points || points < chosen_points)
> continue;
> /* Prefer thread group leaders for display purposes */
> @@ -380,13 +374,13 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
> rcu_read_unlock();
> }
>
> -static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
> - struct mem_cgroup *memcg, const nodemask_t *nodemask)
> +static void dump_header(struct oom_control *oc, struct task_struct *p,
> + struct mem_cgroup *memcg)
> {
> task_lock(current);
> pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, "
> "oom_score_adj=%hd\n",
> - current->comm, gfp_mask, order,
> + current->comm, oc->gfp_mask, oc->order,
> current->signal->oom_score_adj);
> cpuset_print_task_mems_allowed(current);
> task_unlock(current);
> @@ -396,7 +390,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
> else
> show_mem(SHOW_MEM_FILTER_NODES);
> if (sysctl_oom_dump_tasks)
> - dump_tasks(memcg, nodemask);
> + dump_tasks(memcg, oc->nodemask);
> }
>
> /*
> @@ -487,10 +481,9 @@ void oom_killer_enable(void)
> * Must be called while holding a reference to p, which will be released upon
> * returning.
> */
> -void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
> +void oom_kill_process(struct oom_control *oc, struct task_struct *p,
> unsigned int points, unsigned long totalpages,
> - struct mem_cgroup *memcg, nodemask_t *nodemask,
> - const char *message)
> + struct mem_cgroup *memcg, const char *message)
> {
> struct task_struct *victim = p;
> struct task_struct *child;
> @@ -514,7 +507,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
> task_unlock(p);
>
> if (__ratelimit(&oom_rs))
> - dump_header(p, gfp_mask, order, memcg, nodemask);
> + dump_header(oc, p, memcg);
>
> task_lock(p);
> pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
> @@ -537,7 +530,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
> /*
> * oom_badness() returns 0 if the thread is unkillable
> */
> - child_points = oom_badness(child, memcg, nodemask,
> + child_points = oom_badness(child, memcg, oc->nodemask,
> totalpages);
> if (child_points > victim_points) {
> put_task_struct(victim);
> @@ -600,8 +593,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
> /*
> * Determines whether the kernel must panic because of the panic_on_oom sysctl.
> */
> -void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
> - int order, const nodemask_t *nodemask,
> +void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
> struct mem_cgroup *memcg)
> {
> if (likely(!sysctl_panic_on_oom))
> @@ -615,7 +607,7 @@ void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
> if (constraint != CONSTRAINT_NONE)
> return;
> }
> - dump_header(NULL, gfp_mask, order, memcg, nodemask);
> + dump_header(oc, NULL, memcg);
> panic("Out of memory: %s panic_on_oom is enabled\n",
> sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
> }
> @@ -635,22 +627,16 @@ int unregister_oom_notifier(struct notifier_block *nb)
> EXPORT_SYMBOL_GPL(unregister_oom_notifier);
>
> /**
> - * __out_of_memory - kill the "best" process when we run out of memory
> - * @zonelist: zonelist pointer
> - * @gfp_mask: memory allocation flags
> - * @order: amount of memory being requested as a power of 2
> - * @nodemask: nodemask passed to page allocator
> - * @force_kill: true if a task must be killed, even if others are exiting
> + * out_of_memory - kill the "best" process when we run out of memory
> + * @oc: pointer to struct oom_control
> *
> * If we run out of memory, we have the choice between either
> * killing a random task (bad), letting the system crash (worse)
> * OR try to be smart about which process to kill. Note that we
> * don't have to be perfect here, we just have to be good.
> */
> -bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
> - int order, nodemask_t *nodemask, bool force_kill)
> +bool out_of_memory(struct oom_control *oc)
> {
> - const nodemask_t *mpol_mask;
> struct task_struct *p;
> unsigned long totalpages;
> unsigned long freed = 0;
> @@ -684,30 +670,29 @@ bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
> * Check if there were limitations on the allocation (only relevant for
> * NUMA) that may require different handling.
> */
> - constraint = constrained_alloc(zonelist, gfp_mask, nodemask,
> - &totalpages);
> - mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL;
> - check_panic_on_oom(constraint, gfp_mask, order, mpol_mask, NULL);
> + constraint = constrained_alloc(oc, &totalpages);
> + if (constraint != CONSTRAINT_MEMORY_POLICY)
> + oc->nodemask = NULL;
> + check_panic_on_oom(oc, constraint, NULL);
>
> if (sysctl_oom_kill_allocating_task && current->mm &&
> - !oom_unkillable_task(current, NULL, nodemask) &&
> + !oom_unkillable_task(current, NULL, oc->nodemask) &&
> current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
> get_task_struct(current);
> - oom_kill_process(current, gfp_mask, order, 0, totalpages, NULL,
> - nodemask,
> + oom_kill_process(oc, current, 0, totalpages, NULL,
> "Out of memory (oom_kill_allocating_task)");
> goto out;
> }
>
> - p = select_bad_process(&points, totalpages, mpol_mask, force_kill);
> + p = select_bad_process(oc, &points, totalpages);
> /* Found nothing?!?! Either we hang forever, or we panic. */
> if (!p) {
> - dump_header(NULL, gfp_mask, order, NULL, mpol_mask);
> + dump_header(oc, NULL, NULL);
> panic("Out of memory and no killable processes...\n");
> }
> if (p != (void *)-1UL) {
> - oom_kill_process(p, gfp_mask, order, points, totalpages, NULL,
> - nodemask, "Out of memory");
> + oom_kill_process(oc, p, points, totalpages, NULL,
> + "Out of memory");
> killed = 1;
> }
> out:
> @@ -728,13 +713,21 @@ out:
> */
> void pagefault_out_of_memory(void)
> {
> + struct oom_control oc = {
> + .zonelist = NULL,
> + .nodemask = NULL,
> + .gfp_mask = 0,
> + .order = 0,
> + .force_kill = false,
> + };
> +
> if (mem_cgroup_oom_synchronize(true))
> return;
>
> if (!mutex_trylock(&oom_lock))
> return;
>
> - if (!out_of_memory(NULL, 0, 0, NULL, false)) {
> + if (!out_of_memory(&oc)) {
> /*
> * There shouldn't be any user tasks runnable while the
> * OOM killer is disabled, so the current task has to
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2680,6 +2680,13 @@ static inline struct page *
> __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
> const struct alloc_context *ac, unsigned long *did_some_progress)
> {
> + struct oom_control oc = {
> + .zonelist = ac->zonelist,
> + .nodemask = ac->nodemask,
> + .gfp_mask = gfp_mask,
> + .order = order,
> + .force_kill = false,
> + };
> struct page *page;
>
> *did_some_progress = 0;
> @@ -2731,8 +2738,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
> goto out;
> }
> /* Exhausted what can be done so it's blamo time */
> - if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false)
> - || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
> + if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
> *did_some_progress = 1;
> out:
> mutex_unlock(&oom_lock);
--
Michal Hocko
SUSE Labs
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq
2015-06-18 23:00 ` [patch 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq David Rientjes
@ 2015-06-19 7:32 ` Michal Hocko
2015-06-30 22:50 ` David Rientjes
0 siblings, 1 reply; 21+ messages in thread
From: Michal Hocko @ 2015-06-19 7:32 UTC (permalink / raw)
To: David Rientjes; +Cc: Andrew Morton, linux-kernel, linux-mm
On Thu 18-06-15 16:00:07, David Rientjes wrote:
> The force_kill member of struct oom_context isn't needed if an order of
> -1 is used instead.
But this doesn't make much sense to me. It is not like we would _have_
to spare few bytes here. The meaning of force_kill is clear while order
with a weird value is a hack. It is harder to follow without any good
reason.
> This patch introduces no functional change.
>
> Signed-off-by: David Rientjes <rientjes@google.com>
> ---
> drivers/tty/sysrq.c | 3 +--
> include/linux/oom.h | 1 -
> mm/memcontrol.c | 1 -
> mm/oom_kill.c | 5 ++---
> mm/page_alloc.c | 1 -
> 5 files changed, 3 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
> --- a/drivers/tty/sysrq.c
> +++ b/drivers/tty/sysrq.c
> @@ -358,8 +358,7 @@ static void moom_callback(struct work_struct *ignored)
> .zonelist = node_zonelist(first_memory_node, gfp_mask),
> .nodemask = NULL,
> .gfp_mask = gfp_mask,
> - .order = 0,
> - .force_kill = true,
> + .order = -1,
> };
>
> mutex_lock(&oom_lock);
> diff --git a/include/linux/oom.h b/include/linux/oom.h
> --- a/include/linux/oom.h
> +++ b/include/linux/oom.h
> @@ -17,7 +17,6 @@ struct oom_control {
> nodemask_t *nodemask;
> gfp_t gfp_mask;
> int order;
> - bool force_kill;
> };
>
> /*
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1550,7 +1550,6 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
> .nodemask = NULL,
> .gfp_mask = gfp_mask,
> .order = order,
> - .force_kill = false,
> };
> struct mem_cgroup *iter;
> unsigned long chosen_points = 0;
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -265,7 +265,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
> * Don't allow any other task to have access to the reserves.
> */
> if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
> - if (!oc->force_kill)
> + if (oc->order != -1)
> return OOM_SCAN_ABORT;
> }
> if (!task->mm)
> @@ -278,7 +278,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
> if (oom_task_origin(task))
> return OOM_SCAN_SELECT;
>
> - if (task_will_free_mem(task) && !oc->force_kill)
> + if (task_will_free_mem(task) && oc->order != -1)
> return OOM_SCAN_ABORT;
>
> return OOM_SCAN_OK;
> @@ -718,7 +718,6 @@ void pagefault_out_of_memory(void)
> .nodemask = NULL,
> .gfp_mask = 0,
> .order = 0,
> - .force_kill = false,
> };
>
> if (mem_cgroup_oom_synchronize(true))
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2685,7 +2685,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
> .nodemask = ac->nodemask,
> .gfp_mask = gfp_mask,
> .order = order,
> - .force_kill = false,
> };
> struct page *page;
>
--
Michal Hocko
SUSE Labs
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch 1/3] mm, oom: organize oom context into struct
2015-06-19 0:14 ` [patch 1/3] mm, oom: organize oom context into struct Sergey Senozhatsky
@ 2015-06-30 22:46 ` David Rientjes
2015-07-01 0:11 ` Sergey Senozhatsky
0 siblings, 1 reply; 21+ messages in thread
From: David Rientjes @ 2015-06-30 22:46 UTC (permalink / raw)
To: Sergey Senozhatsky; +Cc: Andrew Morton, Michal Hocko, linux-kernel, linux-mm
On Fri, 19 Jun 2015, Sergey Senozhatsky wrote:
> > There are essential elements to an oom context that are passed around to
> > multiple functions.
> >
> > Organize these elements into a new struct, struct oom_context, that
> > specifies the context for an oom condition.
> >
>
> s/oom_context/oom_control/ ?
>
I think it would be confused with the existing memory.oom_control for
memcg.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq
2015-06-19 7:32 ` Michal Hocko
@ 2015-06-30 22:50 ` David Rientjes
0 siblings, 0 replies; 21+ messages in thread
From: David Rientjes @ 2015-06-30 22:50 UTC (permalink / raw)
To: Michal Hocko; +Cc: Andrew Morton, linux-kernel, linux-mm
On Fri, 19 Jun 2015, Michal Hocko wrote:
> > The force_kill member of struct oom_context isn't needed if an order of
> > -1 is used instead.
>
> But this doesn't make much sense to me. It is not like we would _have_
> to spare few bytes here. The meaning of force_kill is clear while order
> with a weird value is a hack. It is harder to follow without any good
> reason.
>
To me, this is the same as treating order == -1 as special in
struct compact_control meaning that it was triggered from the command line
and we really want to fully compact memory. It seems to have a nice
symmetry.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch 1/3] mm, oom: organize oom context into struct
2015-06-30 22:46 ` David Rientjes
@ 2015-07-01 0:11 ` Sergey Senozhatsky
2015-07-01 21:29 ` David Rientjes
0 siblings, 1 reply; 21+ messages in thread
From: Sergey Senozhatsky @ 2015-07-01 0:11 UTC (permalink / raw)
To: David Rientjes
Cc: Sergey Senozhatsky, Andrew Morton, Michal Hocko, linux-kernel, linux-mm
On (06/30/15 15:46), David Rientjes wrote:
> > > There are essential elements to an oom context that are passed around to
> > > multiple functions.
> > >
> > > Organize these elements into a new struct, struct oom_context, that
> > > specifies the context for an oom condition.
> > >
> >
> > s/oom_context/oom_control/ ?
> >
>
> I think it would be confused with the existing memory.oom_control for
> memcg.
>
Hello David,
Sorry, I meant that in commit message you say
:Organize these elements into a new struct, struct oom_context, that
:specifies the context for an oom condition.
but define and use `struct oom_control' (not `struct oom_context')
[..]
+ const gfp_t gfp_mask = GFP_KERNEL;
+ struct oom_control oc = {
+ .zonelist = node_zonelist(first_memory_node, gfp_mask),
+ .nodemask = NULL,
+ .gfp_mask = gfp_mask,
+ .order = 0,
+ .force_kill = true,
+ };
+
[..]
+struct oom_control {
+ struct zonelist *zonelist;
+ nodemask_t *nodemask;
+ gfp_t gfp_mask;
+ int order;
+ bool force_kill;
+};
[..]
etc.
-ss
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch 1/3] mm, oom: organize oom context into struct
2015-07-01 0:11 ` Sergey Senozhatsky
@ 2015-07-01 21:29 ` David Rientjes
0 siblings, 0 replies; 21+ messages in thread
From: David Rientjes @ 2015-07-01 21:29 UTC (permalink / raw)
To: Sergey Senozhatsky; +Cc: Andrew Morton, Michal Hocko, linux-kernel, linux-mm
On Wed, 1 Jul 2015, Sergey Senozhatsky wrote:
> On (06/30/15 15:46), David Rientjes wrote:
> > > > There are essential elements to an oom context that are passed around to
> > > > multiple functions.
> > > >
> > > > Organize these elements into a new struct, struct oom_context, that
> > > > specifies the context for an oom condition.
> > > >
> > >
> > > s/oom_context/oom_control/ ?
> > >
> >
> > I think it would be confused with the existing memory.oom_control for
> > memcg.
> >
>
> Hello David,
>
> Sorry, I meant that in commit message you say
>
> :Organize these elements into a new struct, struct oom_context, that
> :specifies the context for an oom condition.
>
> but define and use `struct oom_control' (not `struct oom_context')
>
Oh, point very well taken, thank you.
^ permalink raw reply [flat|nested] 21+ messages in thread
* [patch v2 1/3] mm, oom: organize oom context into struct
2015-06-18 23:00 [patch 1/3] mm, oom: organize oom context into struct David Rientjes
` (3 preceding siblings ...)
2015-06-19 7:30 ` Michal Hocko
@ 2015-07-01 21:37 ` David Rientjes
2015-07-01 21:37 ` [patch v2 2/3] " David Rientjes
` (3 more replies)
4 siblings, 4 replies; 21+ messages in thread
From: David Rientjes @ 2015-07-01 21:37 UTC (permalink / raw)
To: Andrew Morton; +Cc: Sergey Senozhatsky, Michal Hocko, linux-kernel, linux-mm
There are essential elements to an oom context that are passed around to
multiple functions.
Organize these elements into a new struct, struct oom_control, that
specifies the context for an oom condition.
This patch introduces no functional change.
Acked-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: David Rientjes <rientjes@google.com>
---
v2: fix changelog typo per Sergey
drivers/tty/sysrq.c | 12 +++++-
include/linux/oom.h | 25 +++++++-----
mm/memcontrol.c | 16 +++++---
mm/oom_kill.c | 115 ++++++++++++++++++++++++----------------------------
mm/page_alloc.c | 10 ++++-
5 files changed, 98 insertions(+), 80 deletions(-)
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -353,9 +353,17 @@ static struct sysrq_key_op sysrq_term_op = {
static void moom_callback(struct work_struct *ignored)
{
+ const gfp_t gfp_mask = GFP_KERNEL;
+ struct oom_control oc = {
+ .zonelist = node_zonelist(first_memory_node, gfp_mask),
+ .nodemask = NULL,
+ .gfp_mask = gfp_mask,
+ .order = 0,
+ .force_kill = true,
+ };
+
mutex_lock(&oom_lock);
- if (!out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL),
- GFP_KERNEL, 0, NULL, true))
+ if (!out_of_memory(&oc))
pr_info("OOM request ignored because killer is disabled\n");
mutex_unlock(&oom_lock);
}
diff --git a/include/linux/oom.h b/include/linux/oom.h
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -12,6 +12,14 @@ struct notifier_block;
struct mem_cgroup;
struct task_struct;
+struct oom_control {
+ struct zonelist *zonelist;
+ nodemask_t *nodemask;
+ gfp_t gfp_mask;
+ int order;
+ bool force_kill;
+};
+
/*
* Types of limitations to the nodes from which allocations may occur
*/
@@ -57,21 +65,18 @@ extern unsigned long oom_badness(struct task_struct *p,
extern int oom_kills_count(void);
extern void note_oom_kill(void);
-extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
+extern void oom_kill_process(struct oom_control *oc, struct task_struct *p,
unsigned int points, unsigned long totalpages,
- struct mem_cgroup *memcg, nodemask_t *nodemask,
- const char *message);
+ struct mem_cgroup *memcg, const char *message);
-extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
- int order, const nodemask_t *nodemask,
+extern void check_panic_on_oom(struct oom_control *oc,
+ enum oom_constraint constraint,
struct mem_cgroup *memcg);
-extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
- unsigned long totalpages, const nodemask_t *nodemask,
- bool force_kill);
+extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
+ struct task_struct *task, unsigned long totalpages);
-extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
- int order, nodemask_t *mask, bool force_kill);
+extern bool out_of_memory(struct oom_control *oc);
extern void exit_oom_victim(void);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1545,6 +1545,13 @@ static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
int order)
{
+ struct oom_control oc = {
+ .zonelist = NULL,
+ .nodemask = NULL,
+ .gfp_mask = gfp_mask,
+ .order = order,
+ .force_kill = false,
+ };
struct mem_cgroup *iter;
unsigned long chosen_points = 0;
unsigned long totalpages;
@@ -1563,7 +1570,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
goto unlock;
}
- check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL, memcg);
+ check_panic_on_oom(&oc, CONSTRAINT_MEMCG, memcg);
totalpages = mem_cgroup_get_limit(memcg) ? : 1;
for_each_mem_cgroup_tree(iter, memcg) {
struct css_task_iter it;
@@ -1571,8 +1578,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
css_task_iter_start(&iter->css, &it);
while ((task = css_task_iter_next(&it))) {
- switch (oom_scan_process_thread(task, totalpages, NULL,
- false)) {
+ switch (oom_scan_process_thread(&oc, task, totalpages)) {
case OOM_SCAN_SELECT:
if (chosen)
put_task_struct(chosen);
@@ -1610,8 +1616,8 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
if (chosen) {
points = chosen_points * 1000 / totalpages;
- oom_kill_process(chosen, gfp_mask, order, points, totalpages,
- memcg, NULL, "Memory cgroup out of memory");
+ oom_kill_process(&oc, chosen, points, totalpages, memcg,
+ "Memory cgroup out of memory");
}
unlock:
mutex_unlock(&oom_lock);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -196,27 +196,26 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
* Determine the type of allocation constraint.
*/
#ifdef CONFIG_NUMA
-static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
- gfp_t gfp_mask, nodemask_t *nodemask,
- unsigned long *totalpages)
+static enum oom_constraint constrained_alloc(struct oom_control *oc,
+ unsigned long *totalpages)
{
struct zone *zone;
struct zoneref *z;
- enum zone_type high_zoneidx = gfp_zone(gfp_mask);
+ enum zone_type high_zoneidx = gfp_zone(oc->gfp_mask);
bool cpuset_limited = false;
int nid;
/* Default to all available memory */
*totalpages = totalram_pages + total_swap_pages;
- if (!zonelist)
+ if (!oc->zonelist)
return CONSTRAINT_NONE;
/*
* Reach here only when __GFP_NOFAIL is used. So, we should avoid
* to kill current.We have to random task kill in this case.
* Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now.
*/
- if (gfp_mask & __GFP_THISNODE)
+ if (oc->gfp_mask & __GFP_THISNODE)
return CONSTRAINT_NONE;
/*
@@ -224,17 +223,18 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
* the page allocator means a mempolicy is in effect. Cpuset policy
* is enforced in get_page_from_freelist().
*/
- if (nodemask && !nodes_subset(node_states[N_MEMORY], *nodemask)) {
+ if (oc->nodemask &&
+ !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) {
*totalpages = total_swap_pages;
- for_each_node_mask(nid, *nodemask)
+ for_each_node_mask(nid, *oc->nodemask)
*totalpages += node_spanned_pages(nid);
return CONSTRAINT_MEMORY_POLICY;
}
/* Check this allocation failure is caused by cpuset's wall function */
- for_each_zone_zonelist_nodemask(zone, z, zonelist,
- high_zoneidx, nodemask)
- if (!cpuset_zone_allowed(zone, gfp_mask))
+ for_each_zone_zonelist_nodemask(zone, z, oc->zonelist,
+ high_zoneidx, oc->nodemask)
+ if (!cpuset_zone_allowed(zone, oc->gfp_mask))
cpuset_limited = true;
if (cpuset_limited) {
@@ -246,20 +246,18 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
return CONSTRAINT_NONE;
}
#else
-static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
- gfp_t gfp_mask, nodemask_t *nodemask,
- unsigned long *totalpages)
+static enum oom_constraint constrained_alloc(struct oom_control *oc,
+ unsigned long *totalpages)
{
*totalpages = totalram_pages + total_swap_pages;
return CONSTRAINT_NONE;
}
#endif
-enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
- unsigned long totalpages, const nodemask_t *nodemask,
- bool force_kill)
+enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
+ struct task_struct *task, unsigned long totalpages)
{
- if (oom_unkillable_task(task, NULL, nodemask))
+ if (oom_unkillable_task(task, NULL, oc->nodemask))
return OOM_SCAN_CONTINUE;
/*
@@ -267,7 +265,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
* Don't allow any other task to have access to the reserves.
*/
if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
- if (!force_kill)
+ if (!oc->force_kill)
return OOM_SCAN_ABORT;
}
if (!task->mm)
@@ -280,7 +278,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
if (oom_task_origin(task))
return OOM_SCAN_SELECT;
- if (task_will_free_mem(task) && !force_kill)
+ if (task_will_free_mem(task) && !oc->force_kill)
return OOM_SCAN_ABORT;
return OOM_SCAN_OK;
@@ -289,12 +287,9 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
/*
* Simple selection loop. We chose the process with the highest
* number of 'points'. Returns -1 on scan abort.
- *
- * (not docbooked, we don't want this one cluttering up the manual)
*/
-static struct task_struct *select_bad_process(unsigned int *ppoints,
- unsigned long totalpages, const nodemask_t *nodemask,
- bool force_kill)
+static struct task_struct *select_bad_process(struct oom_control *oc,
+ unsigned int *ppoints, unsigned long totalpages)
{
struct task_struct *g, *p;
struct task_struct *chosen = NULL;
@@ -304,8 +299,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
for_each_process_thread(g, p) {
unsigned int points;
- switch (oom_scan_process_thread(p, totalpages, nodemask,
- force_kill)) {
+ switch (oom_scan_process_thread(oc, p, totalpages)) {
case OOM_SCAN_SELECT:
chosen = p;
chosen_points = ULONG_MAX;
@@ -318,7 +312,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
case OOM_SCAN_OK:
break;
};
- points = oom_badness(p, NULL, nodemask, totalpages);
+ points = oom_badness(p, NULL, oc->nodemask, totalpages);
if (!points || points < chosen_points)
continue;
/* Prefer thread group leaders for display purposes */
@@ -380,13 +374,13 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
rcu_read_unlock();
}
-static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
- struct mem_cgroup *memcg, const nodemask_t *nodemask)
+static void dump_header(struct oom_control *oc, struct task_struct *p,
+ struct mem_cgroup *memcg)
{
task_lock(current);
pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, "
"oom_score_adj=%hd\n",
- current->comm, gfp_mask, order,
+ current->comm, oc->gfp_mask, oc->order,
current->signal->oom_score_adj);
cpuset_print_task_mems_allowed(current);
task_unlock(current);
@@ -396,7 +390,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
else
show_mem(SHOW_MEM_FILTER_NODES);
if (sysctl_oom_dump_tasks)
- dump_tasks(memcg, nodemask);
+ dump_tasks(memcg, oc->nodemask);
}
/*
@@ -487,10 +481,9 @@ void oom_killer_enable(void)
* Must be called while holding a reference to p, which will be released upon
* returning.
*/
-void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
+void oom_kill_process(struct oom_control *oc, struct task_struct *p,
unsigned int points, unsigned long totalpages,
- struct mem_cgroup *memcg, nodemask_t *nodemask,
- const char *message)
+ struct mem_cgroup *memcg, const char *message)
{
struct task_struct *victim = p;
struct task_struct *child;
@@ -514,7 +507,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
task_unlock(p);
if (__ratelimit(&oom_rs))
- dump_header(p, gfp_mask, order, memcg, nodemask);
+ dump_header(oc, p, memcg);
task_lock(p);
pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
@@ -537,7 +530,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
/*
* oom_badness() returns 0 if the thread is unkillable
*/
- child_points = oom_badness(child, memcg, nodemask,
+ child_points = oom_badness(child, memcg, oc->nodemask,
totalpages);
if (child_points > victim_points) {
put_task_struct(victim);
@@ -600,8 +593,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
/*
* Determines whether the kernel must panic because of the panic_on_oom sysctl.
*/
-void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
- int order, const nodemask_t *nodemask,
+void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
struct mem_cgroup *memcg)
{
if (likely(!sysctl_panic_on_oom))
@@ -615,7 +607,7 @@ void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
if (constraint != CONSTRAINT_NONE)
return;
}
- dump_header(NULL, gfp_mask, order, memcg, nodemask);
+ dump_header(oc, NULL, memcg);
panic("Out of memory: %s panic_on_oom is enabled\n",
sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
}
@@ -635,22 +627,16 @@ int unregister_oom_notifier(struct notifier_block *nb)
EXPORT_SYMBOL_GPL(unregister_oom_notifier);
/**
- * __out_of_memory - kill the "best" process when we run out of memory
- * @zonelist: zonelist pointer
- * @gfp_mask: memory allocation flags
- * @order: amount of memory being requested as a power of 2
- * @nodemask: nodemask passed to page allocator
- * @force_kill: true if a task must be killed, even if others are exiting
+ * out_of_memory - kill the "best" process when we run out of memory
+ * @oc: pointer to struct oom_control
*
* If we run out of memory, we have the choice between either
* killing a random task (bad), letting the system crash (worse)
* OR try to be smart about which process to kill. Note that we
* don't have to be perfect here, we just have to be good.
*/
-bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
- int order, nodemask_t *nodemask, bool force_kill)
+bool out_of_memory(struct oom_control *oc)
{
- const nodemask_t *mpol_mask;
struct task_struct *p;
unsigned long totalpages;
unsigned long freed = 0;
@@ -684,30 +670,29 @@ bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
* Check if there were limitations on the allocation (only relevant for
* NUMA) that may require different handling.
*/
- constraint = constrained_alloc(zonelist, gfp_mask, nodemask,
- &totalpages);
- mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL;
- check_panic_on_oom(constraint, gfp_mask, order, mpol_mask, NULL);
+ constraint = constrained_alloc(oc, &totalpages);
+ if (constraint != CONSTRAINT_MEMORY_POLICY)
+ oc->nodemask = NULL;
+ check_panic_on_oom(oc, constraint, NULL);
if (sysctl_oom_kill_allocating_task && current->mm &&
- !oom_unkillable_task(current, NULL, nodemask) &&
+ !oom_unkillable_task(current, NULL, oc->nodemask) &&
current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
get_task_struct(current);
- oom_kill_process(current, gfp_mask, order, 0, totalpages, NULL,
- nodemask,
+ oom_kill_process(oc, current, 0, totalpages, NULL,
"Out of memory (oom_kill_allocating_task)");
goto out;
}
- p = select_bad_process(&points, totalpages, mpol_mask, force_kill);
+ p = select_bad_process(oc, &points, totalpages);
/* Found nothing?!?! Either we hang forever, or we panic. */
if (!p) {
- dump_header(NULL, gfp_mask, order, NULL, mpol_mask);
+ dump_header(oc, NULL, NULL);
panic("Out of memory and no killable processes...\n");
}
if (p != (void *)-1UL) {
- oom_kill_process(p, gfp_mask, order, points, totalpages, NULL,
- nodemask, "Out of memory");
+ oom_kill_process(oc, p, points, totalpages, NULL,
+ "Out of memory");
killed = 1;
}
out:
@@ -728,13 +713,21 @@ out:
*/
void pagefault_out_of_memory(void)
{
+ struct oom_control oc = {
+ .zonelist = NULL,
+ .nodemask = NULL,
+ .gfp_mask = 0,
+ .order = 0,
+ .force_kill = false,
+ };
+
if (mem_cgroup_oom_synchronize(true))
return;
if (!mutex_trylock(&oom_lock))
return;
- if (!out_of_memory(NULL, 0, 0, NULL, false)) {
+ if (!out_of_memory(&oc)) {
/*
* There shouldn't be any user tasks runnable while the
* OOM killer is disabled, so the current task has to
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2680,6 +2680,13 @@ static inline struct page *
__alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
const struct alloc_context *ac, unsigned long *did_some_progress)
{
+ struct oom_control oc = {
+ .zonelist = ac->zonelist,
+ .nodemask = ac->nodemask,
+ .gfp_mask = gfp_mask,
+ .order = order,
+ .force_kill = false,
+ };
struct page *page;
*did_some_progress = 0;
@@ -2731,8 +2738,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
goto out;
}
/* Exhausted what can be done so it's blamo time */
- if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false)
- || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
+ if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
*did_some_progress = 1;
out:
mutex_unlock(&oom_lock);
^ permalink raw reply [flat|nested] 21+ messages in thread
* [patch v2 2/3] mm, oom: organize oom context into struct
2015-07-01 21:37 ` [patch v2 " David Rientjes
@ 2015-07-01 21:37 ` David Rientjes
2015-07-02 6:01 ` Michal Hocko
2015-07-01 21:37 ` [patch v2 3/3] " David Rientjes
` (2 subsequent siblings)
3 siblings, 1 reply; 21+ messages in thread
From: David Rientjes @ 2015-07-01 21:37 UTC (permalink / raw)
To: Andrew Morton; +Cc: Sergey Senozhatsky, Michal Hocko, linux-kernel, linux-mm
The force_kill member of struct oom_control isn't needed if an order of
-1 is used instead. This is the same as order == -1 in
struct compact_control which requires full memory compaction.
This patch introduces no functional change.
Signed-off-by: David Rientjes <rientjes@google.com>
---
v2: fix changelog typo per Sergey
drivers/tty/sysrq.c | 3 +--
include/linux/oom.h | 1 -
mm/memcontrol.c | 1 -
mm/oom_kill.c | 5 ++---
mm/page_alloc.c | 1 -
5 files changed, 3 insertions(+), 8 deletions(-)
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -358,8 +358,7 @@ static void moom_callback(struct work_struct *ignored)
.zonelist = node_zonelist(first_memory_node, gfp_mask),
.nodemask = NULL,
.gfp_mask = gfp_mask,
- .order = 0,
- .force_kill = true,
+ .order = -1,
};
mutex_lock(&oom_lock);
diff --git a/include/linux/oom.h b/include/linux/oom.h
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -17,7 +17,6 @@ struct oom_control {
nodemask_t *nodemask;
gfp_t gfp_mask;
int order;
- bool force_kill;
};
/*
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1550,7 +1550,6 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
.nodemask = NULL,
.gfp_mask = gfp_mask,
.order = order,
- .force_kill = false,
};
struct mem_cgroup *iter;
unsigned long chosen_points = 0;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -265,7 +265,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
* Don't allow any other task to have access to the reserves.
*/
if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
- if (!oc->force_kill)
+ if (oc->order != -1)
return OOM_SCAN_ABORT;
}
if (!task->mm)
@@ -278,7 +278,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
if (oom_task_origin(task))
return OOM_SCAN_SELECT;
- if (task_will_free_mem(task) && !oc->force_kill)
+ if (task_will_free_mem(task) && oc->order != -1)
return OOM_SCAN_ABORT;
return OOM_SCAN_OK;
@@ -718,7 +718,6 @@ void pagefault_out_of_memory(void)
.nodemask = NULL,
.gfp_mask = 0,
.order = 0,
- .force_kill = false,
};
if (mem_cgroup_oom_synchronize(true))
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2685,7 +2685,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
.nodemask = ac->nodemask,
.gfp_mask = gfp_mask,
.order = order,
- .force_kill = false,
};
struct page *page;
^ permalink raw reply [flat|nested] 21+ messages in thread
* [patch v2 3/3] mm, oom: organize oom context into struct
2015-07-01 21:37 ` [patch v2 " David Rientjes
2015-07-01 21:37 ` [patch v2 2/3] " David Rientjes
@ 2015-07-01 21:37 ` David Rientjes
2015-07-02 6:00 ` [patch v2 1/3] " Michal Hocko
2015-07-08 23:42 ` [patch v3 " David Rientjes
3 siblings, 0 replies; 21+ messages in thread
From: David Rientjes @ 2015-07-01 21:37 UTC (permalink / raw)
To: Andrew Morton; +Cc: Sergey Senozhatsky, Michal Hocko, linux-kernel, linux-mm
Sysrq+f is used to kill a process either for debug or when the VM is
otherwise unresponsive.
It is not intended to trigger a panic when no process may be killed.
Avoid panicking the system for sysrq+f when no processes are killed.
Suggested-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: David Rientjes <rientjes@google.com>
---
v2: no change
Documentation/sysrq.txt | 3 ++-
mm/oom_kill.c | 7 +++++--
2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -75,7 +75,8 @@ On all - write a character to /proc/sysrq-trigger. e.g.:
'e' - Send a SIGTERM to all processes, except for init.
-'f' - Will call oom_kill to kill a memory hog process.
+'f' - Will call the oom killer to kill a memory hog process, but do not
+ panic if nothing can be killed.
'g' - Used by kgdb (kernel debugger)
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -607,6 +607,9 @@ void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
if (constraint != CONSTRAINT_NONE)
return;
}
+ /* Do not panic for oom kills triggered by sysrq */
+ if (oc->order == -1)
+ return;
dump_header(oc, NULL, memcg);
panic("Out of memory: %s panic_on_oom is enabled\n",
sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
@@ -686,11 +689,11 @@ bool out_of_memory(struct oom_control *oc)
p = select_bad_process(oc, &points, totalpages);
/* Found nothing?!?! Either we hang forever, or we panic. */
- if (!p) {
+ if (!p && oc->order != -1) {
dump_header(oc, NULL, NULL);
panic("Out of memory and no killable processes...\n");
}
- if (p != (void *)-1UL) {
+ if (p && p != (void *)-1UL) {
oom_kill_process(oc, p, points, totalpages, NULL,
"Out of memory");
killed = 1;
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch v2 1/3] mm, oom: organize oom context into struct
2015-07-01 21:37 ` [patch v2 " David Rientjes
2015-07-01 21:37 ` [patch v2 2/3] " David Rientjes
2015-07-01 21:37 ` [patch v2 3/3] " David Rientjes
@ 2015-07-02 6:00 ` Michal Hocko
2015-07-08 23:42 ` [patch v3 " David Rientjes
3 siblings, 0 replies; 21+ messages in thread
From: Michal Hocko @ 2015-07-02 6:00 UTC (permalink / raw)
To: David Rientjes; +Cc: Andrew Morton, Sergey Senozhatsky, linux-kernel, linux-mm
JFYI. I have added this patch of yours into my series which is fixing
other sysrq+f issue and it is waiting for the merge window to close.
On Wed 01-07-15 14:37:07, David Rientjes wrote:
> There are essential elements to an oom context that are passed around to
> multiple functions.
>
> Organize these elements into a new struct, struct oom_control, that
> specifies the context for an oom condition.
>
> This patch introduces no functional change.
>
> Acked-by: Michal Hocko <mhocko@suse.cz>
> Signed-off-by: David Rientjes <rientjes@google.com>
> ---
> v2: fix changelog typo per Sergey
>
> drivers/tty/sysrq.c | 12 +++++-
> include/linux/oom.h | 25 +++++++-----
> mm/memcontrol.c | 16 +++++---
> mm/oom_kill.c | 115 ++++++++++++++++++++++++----------------------------
> mm/page_alloc.c | 10 ++++-
> 5 files changed, 98 insertions(+), 80 deletions(-)
>
> diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
> --- a/drivers/tty/sysrq.c
> +++ b/drivers/tty/sysrq.c
> @@ -353,9 +353,17 @@ static struct sysrq_key_op sysrq_term_op = {
>
> static void moom_callback(struct work_struct *ignored)
> {
> + const gfp_t gfp_mask = GFP_KERNEL;
> + struct oom_control oc = {
> + .zonelist = node_zonelist(first_memory_node, gfp_mask),
> + .nodemask = NULL,
> + .gfp_mask = gfp_mask,
> + .order = 0,
> + .force_kill = true,
> + };
> +
> mutex_lock(&oom_lock);
> - if (!out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL),
> - GFP_KERNEL, 0, NULL, true))
> + if (!out_of_memory(&oc))
> pr_info("OOM request ignored because killer is disabled\n");
> mutex_unlock(&oom_lock);
> }
> diff --git a/include/linux/oom.h b/include/linux/oom.h
> --- a/include/linux/oom.h
> +++ b/include/linux/oom.h
> @@ -12,6 +12,14 @@ struct notifier_block;
> struct mem_cgroup;
> struct task_struct;
>
> +struct oom_control {
> + struct zonelist *zonelist;
> + nodemask_t *nodemask;
> + gfp_t gfp_mask;
> + int order;
> + bool force_kill;
> +};
> +
> /*
> * Types of limitations to the nodes from which allocations may occur
> */
> @@ -57,21 +65,18 @@ extern unsigned long oom_badness(struct task_struct *p,
>
> extern int oom_kills_count(void);
> extern void note_oom_kill(void);
> -extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
> +extern void oom_kill_process(struct oom_control *oc, struct task_struct *p,
> unsigned int points, unsigned long totalpages,
> - struct mem_cgroup *memcg, nodemask_t *nodemask,
> - const char *message);
> + struct mem_cgroup *memcg, const char *message);
>
> -extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
> - int order, const nodemask_t *nodemask,
> +extern void check_panic_on_oom(struct oom_control *oc,
> + enum oom_constraint constraint,
> struct mem_cgroup *memcg);
>
> -extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
> - unsigned long totalpages, const nodemask_t *nodemask,
> - bool force_kill);
> +extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
> + struct task_struct *task, unsigned long totalpages);
>
> -extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
> - int order, nodemask_t *mask, bool force_kill);
> +extern bool out_of_memory(struct oom_control *oc);
>
> extern void exit_oom_victim(void);
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1545,6 +1545,13 @@ static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
> static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
> int order)
> {
> + struct oom_control oc = {
> + .zonelist = NULL,
> + .nodemask = NULL,
> + .gfp_mask = gfp_mask,
> + .order = order,
> + .force_kill = false,
> + };
> struct mem_cgroup *iter;
> unsigned long chosen_points = 0;
> unsigned long totalpages;
> @@ -1563,7 +1570,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
> goto unlock;
> }
>
> - check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL, memcg);
> + check_panic_on_oom(&oc, CONSTRAINT_MEMCG, memcg);
> totalpages = mem_cgroup_get_limit(memcg) ? : 1;
> for_each_mem_cgroup_tree(iter, memcg) {
> struct css_task_iter it;
> @@ -1571,8 +1578,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
>
> css_task_iter_start(&iter->css, &it);
> while ((task = css_task_iter_next(&it))) {
> - switch (oom_scan_process_thread(task, totalpages, NULL,
> - false)) {
> + switch (oom_scan_process_thread(&oc, task, totalpages)) {
> case OOM_SCAN_SELECT:
> if (chosen)
> put_task_struct(chosen);
> @@ -1610,8 +1616,8 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
>
> if (chosen) {
> points = chosen_points * 1000 / totalpages;
> - oom_kill_process(chosen, gfp_mask, order, points, totalpages,
> - memcg, NULL, "Memory cgroup out of memory");
> + oom_kill_process(&oc, chosen, points, totalpages, memcg,
> + "Memory cgroup out of memory");
> }
> unlock:
> mutex_unlock(&oom_lock);
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -196,27 +196,26 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
> * Determine the type of allocation constraint.
> */
> #ifdef CONFIG_NUMA
> -static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
> - gfp_t gfp_mask, nodemask_t *nodemask,
> - unsigned long *totalpages)
> +static enum oom_constraint constrained_alloc(struct oom_control *oc,
> + unsigned long *totalpages)
> {
> struct zone *zone;
> struct zoneref *z;
> - enum zone_type high_zoneidx = gfp_zone(gfp_mask);
> + enum zone_type high_zoneidx = gfp_zone(oc->gfp_mask);
> bool cpuset_limited = false;
> int nid;
>
> /* Default to all available memory */
> *totalpages = totalram_pages + total_swap_pages;
>
> - if (!zonelist)
> + if (!oc->zonelist)
> return CONSTRAINT_NONE;
> /*
> * Reach here only when __GFP_NOFAIL is used. So, we should avoid
> * to kill current.We have to random task kill in this case.
> * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now.
> */
> - if (gfp_mask & __GFP_THISNODE)
> + if (oc->gfp_mask & __GFP_THISNODE)
> return CONSTRAINT_NONE;
>
> /*
> @@ -224,17 +223,18 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
> * the page allocator means a mempolicy is in effect. Cpuset policy
> * is enforced in get_page_from_freelist().
> */
> - if (nodemask && !nodes_subset(node_states[N_MEMORY], *nodemask)) {
> + if (oc->nodemask &&
> + !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) {
> *totalpages = total_swap_pages;
> - for_each_node_mask(nid, *nodemask)
> + for_each_node_mask(nid, *oc->nodemask)
> *totalpages += node_spanned_pages(nid);
> return CONSTRAINT_MEMORY_POLICY;
> }
>
> /* Check this allocation failure is caused by cpuset's wall function */
> - for_each_zone_zonelist_nodemask(zone, z, zonelist,
> - high_zoneidx, nodemask)
> - if (!cpuset_zone_allowed(zone, gfp_mask))
> + for_each_zone_zonelist_nodemask(zone, z, oc->zonelist,
> + high_zoneidx, oc->nodemask)
> + if (!cpuset_zone_allowed(zone, oc->gfp_mask))
> cpuset_limited = true;
>
> if (cpuset_limited) {
> @@ -246,20 +246,18 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
> return CONSTRAINT_NONE;
> }
> #else
> -static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
> - gfp_t gfp_mask, nodemask_t *nodemask,
> - unsigned long *totalpages)
> +static enum oom_constraint constrained_alloc(struct oom_control *oc,
> + unsigned long *totalpages)
> {
> *totalpages = totalram_pages + total_swap_pages;
> return CONSTRAINT_NONE;
> }
> #endif
>
> -enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
> - unsigned long totalpages, const nodemask_t *nodemask,
> - bool force_kill)
> +enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
> + struct task_struct *task, unsigned long totalpages)
> {
> - if (oom_unkillable_task(task, NULL, nodemask))
> + if (oom_unkillable_task(task, NULL, oc->nodemask))
> return OOM_SCAN_CONTINUE;
>
> /*
> @@ -267,7 +265,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
> * Don't allow any other task to have access to the reserves.
> */
> if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
> - if (!force_kill)
> + if (!oc->force_kill)
> return OOM_SCAN_ABORT;
> }
> if (!task->mm)
> @@ -280,7 +278,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
> if (oom_task_origin(task))
> return OOM_SCAN_SELECT;
>
> - if (task_will_free_mem(task) && !force_kill)
> + if (task_will_free_mem(task) && !oc->force_kill)
> return OOM_SCAN_ABORT;
>
> return OOM_SCAN_OK;
> @@ -289,12 +287,9 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
> /*
> * Simple selection loop. We chose the process with the highest
> * number of 'points'. Returns -1 on scan abort.
> - *
> - * (not docbooked, we don't want this one cluttering up the manual)
> */
> -static struct task_struct *select_bad_process(unsigned int *ppoints,
> - unsigned long totalpages, const nodemask_t *nodemask,
> - bool force_kill)
> +static struct task_struct *select_bad_process(struct oom_control *oc,
> + unsigned int *ppoints, unsigned long totalpages)
> {
> struct task_struct *g, *p;
> struct task_struct *chosen = NULL;
> @@ -304,8 +299,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
> for_each_process_thread(g, p) {
> unsigned int points;
>
> - switch (oom_scan_process_thread(p, totalpages, nodemask,
> - force_kill)) {
> + switch (oom_scan_process_thread(oc, p, totalpages)) {
> case OOM_SCAN_SELECT:
> chosen = p;
> chosen_points = ULONG_MAX;
> @@ -318,7 +312,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
> case OOM_SCAN_OK:
> break;
> };
> - points = oom_badness(p, NULL, nodemask, totalpages);
> + points = oom_badness(p, NULL, oc->nodemask, totalpages);
> if (!points || points < chosen_points)
> continue;
> /* Prefer thread group leaders for display purposes */
> @@ -380,13 +374,13 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
> rcu_read_unlock();
> }
>
> -static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
> - struct mem_cgroup *memcg, const nodemask_t *nodemask)
> +static void dump_header(struct oom_control *oc, struct task_struct *p,
> + struct mem_cgroup *memcg)
> {
> task_lock(current);
> pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, "
> "oom_score_adj=%hd\n",
> - current->comm, gfp_mask, order,
> + current->comm, oc->gfp_mask, oc->order,
> current->signal->oom_score_adj);
> cpuset_print_task_mems_allowed(current);
> task_unlock(current);
> @@ -396,7 +390,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
> else
> show_mem(SHOW_MEM_FILTER_NODES);
> if (sysctl_oom_dump_tasks)
> - dump_tasks(memcg, nodemask);
> + dump_tasks(memcg, oc->nodemask);
> }
>
> /*
> @@ -487,10 +481,9 @@ void oom_killer_enable(void)
> * Must be called while holding a reference to p, which will be released upon
> * returning.
> */
> -void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
> +void oom_kill_process(struct oom_control *oc, struct task_struct *p,
> unsigned int points, unsigned long totalpages,
> - struct mem_cgroup *memcg, nodemask_t *nodemask,
> - const char *message)
> + struct mem_cgroup *memcg, const char *message)
> {
> struct task_struct *victim = p;
> struct task_struct *child;
> @@ -514,7 +507,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
> task_unlock(p);
>
> if (__ratelimit(&oom_rs))
> - dump_header(p, gfp_mask, order, memcg, nodemask);
> + dump_header(oc, p, memcg);
>
> task_lock(p);
> pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
> @@ -537,7 +530,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
> /*
> * oom_badness() returns 0 if the thread is unkillable
> */
> - child_points = oom_badness(child, memcg, nodemask,
> + child_points = oom_badness(child, memcg, oc->nodemask,
> totalpages);
> if (child_points > victim_points) {
> put_task_struct(victim);
> @@ -600,8 +593,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
> /*
> * Determines whether the kernel must panic because of the panic_on_oom sysctl.
> */
> -void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
> - int order, const nodemask_t *nodemask,
> +void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
> struct mem_cgroup *memcg)
> {
> if (likely(!sysctl_panic_on_oom))
> @@ -615,7 +607,7 @@ void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
> if (constraint != CONSTRAINT_NONE)
> return;
> }
> - dump_header(NULL, gfp_mask, order, memcg, nodemask);
> + dump_header(oc, NULL, memcg);
> panic("Out of memory: %s panic_on_oom is enabled\n",
> sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
> }
> @@ -635,22 +627,16 @@ int unregister_oom_notifier(struct notifier_block *nb)
> EXPORT_SYMBOL_GPL(unregister_oom_notifier);
>
> /**
> - * __out_of_memory - kill the "best" process when we run out of memory
> - * @zonelist: zonelist pointer
> - * @gfp_mask: memory allocation flags
> - * @order: amount of memory being requested as a power of 2
> - * @nodemask: nodemask passed to page allocator
> - * @force_kill: true if a task must be killed, even if others are exiting
> + * out_of_memory - kill the "best" process when we run out of memory
> + * @oc: pointer to struct oom_control
> *
> * If we run out of memory, we have the choice between either
> * killing a random task (bad), letting the system crash (worse)
> * OR try to be smart about which process to kill. Note that we
> * don't have to be perfect here, we just have to be good.
> */
> -bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
> - int order, nodemask_t *nodemask, bool force_kill)
> +bool out_of_memory(struct oom_control *oc)
> {
> - const nodemask_t *mpol_mask;
> struct task_struct *p;
> unsigned long totalpages;
> unsigned long freed = 0;
> @@ -684,30 +670,29 @@ bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
> * Check if there were limitations on the allocation (only relevant for
> * NUMA) that may require different handling.
> */
> - constraint = constrained_alloc(zonelist, gfp_mask, nodemask,
> - &totalpages);
> - mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL;
> - check_panic_on_oom(constraint, gfp_mask, order, mpol_mask, NULL);
> + constraint = constrained_alloc(oc, &totalpages);
> + if (constraint != CONSTRAINT_MEMORY_POLICY)
> + oc->nodemask = NULL;
> + check_panic_on_oom(oc, constraint, NULL);
>
> if (sysctl_oom_kill_allocating_task && current->mm &&
> - !oom_unkillable_task(current, NULL, nodemask) &&
> + !oom_unkillable_task(current, NULL, oc->nodemask) &&
> current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
> get_task_struct(current);
> - oom_kill_process(current, gfp_mask, order, 0, totalpages, NULL,
> - nodemask,
> + oom_kill_process(oc, current, 0, totalpages, NULL,
> "Out of memory (oom_kill_allocating_task)");
> goto out;
> }
>
> - p = select_bad_process(&points, totalpages, mpol_mask, force_kill);
> + p = select_bad_process(oc, &points, totalpages);
> /* Found nothing?!?! Either we hang forever, or we panic. */
> if (!p) {
> - dump_header(NULL, gfp_mask, order, NULL, mpol_mask);
> + dump_header(oc, NULL, NULL);
> panic("Out of memory and no killable processes...\n");
> }
> if (p != (void *)-1UL) {
> - oom_kill_process(p, gfp_mask, order, points, totalpages, NULL,
> - nodemask, "Out of memory");
> + oom_kill_process(oc, p, points, totalpages, NULL,
> + "Out of memory");
> killed = 1;
> }
> out:
> @@ -728,13 +713,21 @@ out:
> */
> void pagefault_out_of_memory(void)
> {
> + struct oom_control oc = {
> + .zonelist = NULL,
> + .nodemask = NULL,
> + .gfp_mask = 0,
> + .order = 0,
> + .force_kill = false,
> + };
> +
> if (mem_cgroup_oom_synchronize(true))
> return;
>
> if (!mutex_trylock(&oom_lock))
> return;
>
> - if (!out_of_memory(NULL, 0, 0, NULL, false)) {
> + if (!out_of_memory(&oc)) {
> /*
> * There shouldn't be any user tasks runnable while the
> * OOM killer is disabled, so the current task has to
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2680,6 +2680,13 @@ static inline struct page *
> __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
> const struct alloc_context *ac, unsigned long *did_some_progress)
> {
> + struct oom_control oc = {
> + .zonelist = ac->zonelist,
> + .nodemask = ac->nodemask,
> + .gfp_mask = gfp_mask,
> + .order = order,
> + .force_kill = false,
> + };
> struct page *page;
>
> *did_some_progress = 0;
> @@ -2731,8 +2738,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
> goto out;
> }
> /* Exhausted what can be done so it's blamo time */
> - if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false)
> - || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
> + if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
> *did_some_progress = 1;
> out:
> mutex_unlock(&oom_lock);
--
Michal Hocko
SUSE Labs
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch v2 2/3] mm, oom: organize oom context into struct
2015-07-01 21:37 ` [patch v2 2/3] " David Rientjes
@ 2015-07-02 6:01 ` Michal Hocko
2015-07-08 23:28 ` David Rientjes
0 siblings, 1 reply; 21+ messages in thread
From: Michal Hocko @ 2015-07-02 6:01 UTC (permalink / raw)
To: David Rientjes; +Cc: Andrew Morton, Sergey Senozhatsky, linux-kernel, linux-mm
On Wed 01-07-15 14:37:14, David Rientjes wrote:
> The force_kill member of struct oom_control isn't needed if an order of
> -1 is used instead. This is the same as order == -1 in
> struct compact_control which requires full memory compaction.
>
> This patch introduces no functional change.
But it obscures the code and I really dislike this change as pointed out
previously.
> Signed-off-by: David Rientjes <rientjes@google.com>
> ---
> v2: fix changelog typo per Sergey
>
> drivers/tty/sysrq.c | 3 +--
> include/linux/oom.h | 1 -
> mm/memcontrol.c | 1 -
> mm/oom_kill.c | 5 ++---
> mm/page_alloc.c | 1 -
> 5 files changed, 3 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
> --- a/drivers/tty/sysrq.c
> +++ b/drivers/tty/sysrq.c
> @@ -358,8 +358,7 @@ static void moom_callback(struct work_struct *ignored)
> .zonelist = node_zonelist(first_memory_node, gfp_mask),
> .nodemask = NULL,
> .gfp_mask = gfp_mask,
> - .order = 0,
> - .force_kill = true,
> + .order = -1,
> };
>
> mutex_lock(&oom_lock);
> diff --git a/include/linux/oom.h b/include/linux/oom.h
> --- a/include/linux/oom.h
> +++ b/include/linux/oom.h
> @@ -17,7 +17,6 @@ struct oom_control {
> nodemask_t *nodemask;
> gfp_t gfp_mask;
> int order;
> - bool force_kill;
> };
>
> /*
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1550,7 +1550,6 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
> .nodemask = NULL,
> .gfp_mask = gfp_mask,
> .order = order,
> - .force_kill = false,
> };
> struct mem_cgroup *iter;
> unsigned long chosen_points = 0;
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -265,7 +265,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
> * Don't allow any other task to have access to the reserves.
> */
> if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
> - if (!oc->force_kill)
> + if (oc->order != -1)
> return OOM_SCAN_ABORT;
> }
> if (!task->mm)
> @@ -278,7 +278,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
> if (oom_task_origin(task))
> return OOM_SCAN_SELECT;
>
> - if (task_will_free_mem(task) && !oc->force_kill)
> + if (task_will_free_mem(task) && oc->order != -1)
> return OOM_SCAN_ABORT;
>
> return OOM_SCAN_OK;
> @@ -718,7 +718,6 @@ void pagefault_out_of_memory(void)
> .nodemask = NULL,
> .gfp_mask = 0,
> .order = 0,
> - .force_kill = false,
> };
>
> if (mem_cgroup_oom_synchronize(true))
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2685,7 +2685,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
> .nodemask = ac->nodemask,
> .gfp_mask = gfp_mask,
> .order = order,
> - .force_kill = false,
> };
> struct page *page;
>
--
Michal Hocko
SUSE Labs
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch v2 2/3] mm, oom: organize oom context into struct
2015-07-02 6:01 ` Michal Hocko
@ 2015-07-08 23:28 ` David Rientjes
0 siblings, 0 replies; 21+ messages in thread
From: David Rientjes @ 2015-07-08 23:28 UTC (permalink / raw)
To: Michal Hocko; +Cc: Andrew Morton, Sergey Senozhatsky, linux-kernel, linux-mm
On Thu, 2 Jul 2015, Michal Hocko wrote:
> On Wed 01-07-15 14:37:14, David Rientjes wrote:
> > The force_kill member of struct oom_control isn't needed if an order of
> > -1 is used instead. This is the same as order == -1 in
> > struct compact_control which requires full memory compaction.
> >
> > This patch introduces no functional change.
>
> But it obscures the code and I really dislike this change as pointed out
> previously.
>
The oom killer is often called at the end of a very lengthy stack since
memory allocation itself can be called deep in the stack. Thus, reducing
the amount of memory, even for a small lil bool, is helpful. This is
especially true when other such structs, struct compact_control, does the
exact same thing by using order == -1 to mean explicit compaction.
I'm personally tired of fixing stack overflows and you're arguing against
"obscurity" that even occurs in other parts of the mm code.
oc->force_kill has no reason to exist, and thus it's removed in this patch
and for good reason.
^ permalink raw reply [flat|nested] 21+ messages in thread
* [patch v3 1/3] mm, oom: organize oom context into struct
2015-07-01 21:37 ` [patch v2 " David Rientjes
` (2 preceding siblings ...)
2015-07-02 6:00 ` [patch v2 1/3] " Michal Hocko
@ 2015-07-08 23:42 ` David Rientjes
2015-07-08 23:42 ` [patch v3 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq David Rientjes
` (2 more replies)
3 siblings, 3 replies; 21+ messages in thread
From: David Rientjes @ 2015-07-08 23:42 UTC (permalink / raw)
To: Andrew Morton; +Cc: Sergey Senozhatsky, Michal Hocko, linux-kernel, linux-mm
There are essential elements to an oom context that are passed around to
multiple functions.
Organize these elements into a new struct, struct oom_control, that
specifies the context for an oom condition.
This patch introduces no functional change.
Acked-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: David Rientjes <rientjes@google.com>
---
v2: fix changelog typo per Sergey
v3: no change
drivers/tty/sysrq.c | 12 +++++-
include/linux/oom.h | 25 +++++++-----
mm/memcontrol.c | 16 +++++---
mm/oom_kill.c | 115 ++++++++++++++++++++++++----------------------------
mm/page_alloc.c | 10 ++++-
5 files changed, 98 insertions(+), 80 deletions(-)
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -353,9 +353,17 @@ static struct sysrq_key_op sysrq_term_op = {
static void moom_callback(struct work_struct *ignored)
{
+ const gfp_t gfp_mask = GFP_KERNEL;
+ struct oom_control oc = {
+ .zonelist = node_zonelist(first_memory_node, gfp_mask),
+ .nodemask = NULL,
+ .gfp_mask = gfp_mask,
+ .order = 0,
+ .force_kill = true,
+ };
+
mutex_lock(&oom_lock);
- if (!out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL),
- GFP_KERNEL, 0, NULL, true))
+ if (!out_of_memory(&oc))
pr_info("OOM request ignored because killer is disabled\n");
mutex_unlock(&oom_lock);
}
diff --git a/include/linux/oom.h b/include/linux/oom.h
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -12,6 +12,14 @@ struct notifier_block;
struct mem_cgroup;
struct task_struct;
+struct oom_control {
+ struct zonelist *zonelist;
+ nodemask_t *nodemask;
+ gfp_t gfp_mask;
+ int order;
+ bool force_kill;
+};
+
/*
* Types of limitations to the nodes from which allocations may occur
*/
@@ -57,21 +65,18 @@ extern unsigned long oom_badness(struct task_struct *p,
extern int oom_kills_count(void);
extern void note_oom_kill(void);
-extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
+extern void oom_kill_process(struct oom_control *oc, struct task_struct *p,
unsigned int points, unsigned long totalpages,
- struct mem_cgroup *memcg, nodemask_t *nodemask,
- const char *message);
+ struct mem_cgroup *memcg, const char *message);
-extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
- int order, const nodemask_t *nodemask,
+extern void check_panic_on_oom(struct oom_control *oc,
+ enum oom_constraint constraint,
struct mem_cgroup *memcg);
-extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
- unsigned long totalpages, const nodemask_t *nodemask,
- bool force_kill);
+extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
+ struct task_struct *task, unsigned long totalpages);
-extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
- int order, nodemask_t *mask, bool force_kill);
+extern bool out_of_memory(struct oom_control *oc);
extern void exit_oom_victim(void);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1545,6 +1545,13 @@ static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
int order)
{
+ struct oom_control oc = {
+ .zonelist = NULL,
+ .nodemask = NULL,
+ .gfp_mask = gfp_mask,
+ .order = order,
+ .force_kill = false,
+ };
struct mem_cgroup *iter;
unsigned long chosen_points = 0;
unsigned long totalpages;
@@ -1563,7 +1570,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
goto unlock;
}
- check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL, memcg);
+ check_panic_on_oom(&oc, CONSTRAINT_MEMCG, memcg);
totalpages = mem_cgroup_get_limit(memcg) ? : 1;
for_each_mem_cgroup_tree(iter, memcg) {
struct css_task_iter it;
@@ -1571,8 +1578,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
css_task_iter_start(&iter->css, &it);
while ((task = css_task_iter_next(&it))) {
- switch (oom_scan_process_thread(task, totalpages, NULL,
- false)) {
+ switch (oom_scan_process_thread(&oc, task, totalpages)) {
case OOM_SCAN_SELECT:
if (chosen)
put_task_struct(chosen);
@@ -1610,8 +1616,8 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
if (chosen) {
points = chosen_points * 1000 / totalpages;
- oom_kill_process(chosen, gfp_mask, order, points, totalpages,
- memcg, NULL, "Memory cgroup out of memory");
+ oom_kill_process(&oc, chosen, points, totalpages, memcg,
+ "Memory cgroup out of memory");
}
unlock:
mutex_unlock(&oom_lock);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -196,27 +196,26 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
* Determine the type of allocation constraint.
*/
#ifdef CONFIG_NUMA
-static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
- gfp_t gfp_mask, nodemask_t *nodemask,
- unsigned long *totalpages)
+static enum oom_constraint constrained_alloc(struct oom_control *oc,
+ unsigned long *totalpages)
{
struct zone *zone;
struct zoneref *z;
- enum zone_type high_zoneidx = gfp_zone(gfp_mask);
+ enum zone_type high_zoneidx = gfp_zone(oc->gfp_mask);
bool cpuset_limited = false;
int nid;
/* Default to all available memory */
*totalpages = totalram_pages + total_swap_pages;
- if (!zonelist)
+ if (!oc->zonelist)
return CONSTRAINT_NONE;
/*
* Reach here only when __GFP_NOFAIL is used. So, we should avoid
* to kill current.We have to random task kill in this case.
* Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now.
*/
- if (gfp_mask & __GFP_THISNODE)
+ if (oc->gfp_mask & __GFP_THISNODE)
return CONSTRAINT_NONE;
/*
@@ -224,17 +223,18 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
* the page allocator means a mempolicy is in effect. Cpuset policy
* is enforced in get_page_from_freelist().
*/
- if (nodemask && !nodes_subset(node_states[N_MEMORY], *nodemask)) {
+ if (oc->nodemask &&
+ !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) {
*totalpages = total_swap_pages;
- for_each_node_mask(nid, *nodemask)
+ for_each_node_mask(nid, *oc->nodemask)
*totalpages += node_spanned_pages(nid);
return CONSTRAINT_MEMORY_POLICY;
}
/* Check this allocation failure is caused by cpuset's wall function */
- for_each_zone_zonelist_nodemask(zone, z, zonelist,
- high_zoneidx, nodemask)
- if (!cpuset_zone_allowed(zone, gfp_mask))
+ for_each_zone_zonelist_nodemask(zone, z, oc->zonelist,
+ high_zoneidx, oc->nodemask)
+ if (!cpuset_zone_allowed(zone, oc->gfp_mask))
cpuset_limited = true;
if (cpuset_limited) {
@@ -246,20 +246,18 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
return CONSTRAINT_NONE;
}
#else
-static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
- gfp_t gfp_mask, nodemask_t *nodemask,
- unsigned long *totalpages)
+static enum oom_constraint constrained_alloc(struct oom_control *oc,
+ unsigned long *totalpages)
{
*totalpages = totalram_pages + total_swap_pages;
return CONSTRAINT_NONE;
}
#endif
-enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
- unsigned long totalpages, const nodemask_t *nodemask,
- bool force_kill)
+enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
+ struct task_struct *task, unsigned long totalpages)
{
- if (oom_unkillable_task(task, NULL, nodemask))
+ if (oom_unkillable_task(task, NULL, oc->nodemask))
return OOM_SCAN_CONTINUE;
/*
@@ -267,7 +265,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
* Don't allow any other task to have access to the reserves.
*/
if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
- if (!force_kill)
+ if (!oc->force_kill)
return OOM_SCAN_ABORT;
}
if (!task->mm)
@@ -280,7 +278,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
if (oom_task_origin(task))
return OOM_SCAN_SELECT;
- if (task_will_free_mem(task) && !force_kill)
+ if (task_will_free_mem(task) && !oc->force_kill)
return OOM_SCAN_ABORT;
return OOM_SCAN_OK;
@@ -289,12 +287,9 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
/*
* Simple selection loop. We chose the process with the highest
* number of 'points'. Returns -1 on scan abort.
- *
- * (not docbooked, we don't want this one cluttering up the manual)
*/
-static struct task_struct *select_bad_process(unsigned int *ppoints,
- unsigned long totalpages, const nodemask_t *nodemask,
- bool force_kill)
+static struct task_struct *select_bad_process(struct oom_control *oc,
+ unsigned int *ppoints, unsigned long totalpages)
{
struct task_struct *g, *p;
struct task_struct *chosen = NULL;
@@ -304,8 +299,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
for_each_process_thread(g, p) {
unsigned int points;
- switch (oom_scan_process_thread(p, totalpages, nodemask,
- force_kill)) {
+ switch (oom_scan_process_thread(oc, p, totalpages)) {
case OOM_SCAN_SELECT:
chosen = p;
chosen_points = ULONG_MAX;
@@ -318,7 +312,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
case OOM_SCAN_OK:
break;
};
- points = oom_badness(p, NULL, nodemask, totalpages);
+ points = oom_badness(p, NULL, oc->nodemask, totalpages);
if (!points || points < chosen_points)
continue;
/* Prefer thread group leaders for display purposes */
@@ -380,13 +374,13 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
rcu_read_unlock();
}
-static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
- struct mem_cgroup *memcg, const nodemask_t *nodemask)
+static void dump_header(struct oom_control *oc, struct task_struct *p,
+ struct mem_cgroup *memcg)
{
task_lock(current);
pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, "
"oom_score_adj=%hd\n",
- current->comm, gfp_mask, order,
+ current->comm, oc->gfp_mask, oc->order,
current->signal->oom_score_adj);
cpuset_print_task_mems_allowed(current);
task_unlock(current);
@@ -396,7 +390,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
else
show_mem(SHOW_MEM_FILTER_NODES);
if (sysctl_oom_dump_tasks)
- dump_tasks(memcg, nodemask);
+ dump_tasks(memcg, oc->nodemask);
}
/*
@@ -487,10 +481,9 @@ void oom_killer_enable(void)
* Must be called while holding a reference to p, which will be released upon
* returning.
*/
-void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
+void oom_kill_process(struct oom_control *oc, struct task_struct *p,
unsigned int points, unsigned long totalpages,
- struct mem_cgroup *memcg, nodemask_t *nodemask,
- const char *message)
+ struct mem_cgroup *memcg, const char *message)
{
struct task_struct *victim = p;
struct task_struct *child;
@@ -514,7 +507,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
task_unlock(p);
if (__ratelimit(&oom_rs))
- dump_header(p, gfp_mask, order, memcg, nodemask);
+ dump_header(oc, p, memcg);
task_lock(p);
pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
@@ -537,7 +530,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
/*
* oom_badness() returns 0 if the thread is unkillable
*/
- child_points = oom_badness(child, memcg, nodemask,
+ child_points = oom_badness(child, memcg, oc->nodemask,
totalpages);
if (child_points > victim_points) {
put_task_struct(victim);
@@ -600,8 +593,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
/*
* Determines whether the kernel must panic because of the panic_on_oom sysctl.
*/
-void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
- int order, const nodemask_t *nodemask,
+void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
struct mem_cgroup *memcg)
{
if (likely(!sysctl_panic_on_oom))
@@ -615,7 +607,7 @@ void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
if (constraint != CONSTRAINT_NONE)
return;
}
- dump_header(NULL, gfp_mask, order, memcg, nodemask);
+ dump_header(oc, NULL, memcg);
panic("Out of memory: %s panic_on_oom is enabled\n",
sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
}
@@ -635,22 +627,16 @@ int unregister_oom_notifier(struct notifier_block *nb)
EXPORT_SYMBOL_GPL(unregister_oom_notifier);
/**
- * __out_of_memory - kill the "best" process when we run out of memory
- * @zonelist: zonelist pointer
- * @gfp_mask: memory allocation flags
- * @order: amount of memory being requested as a power of 2
- * @nodemask: nodemask passed to page allocator
- * @force_kill: true if a task must be killed, even if others are exiting
+ * out_of_memory - kill the "best" process when we run out of memory
+ * @oc: pointer to struct oom_control
*
* If we run out of memory, we have the choice between either
* killing a random task (bad), letting the system crash (worse)
* OR try to be smart about which process to kill. Note that we
* don't have to be perfect here, we just have to be good.
*/
-bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
- int order, nodemask_t *nodemask, bool force_kill)
+bool out_of_memory(struct oom_control *oc)
{
- const nodemask_t *mpol_mask;
struct task_struct *p;
unsigned long totalpages;
unsigned long freed = 0;
@@ -684,30 +670,29 @@ bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
* Check if there were limitations on the allocation (only relevant for
* NUMA) that may require different handling.
*/
- constraint = constrained_alloc(zonelist, gfp_mask, nodemask,
- &totalpages);
- mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL;
- check_panic_on_oom(constraint, gfp_mask, order, mpol_mask, NULL);
+ constraint = constrained_alloc(oc, &totalpages);
+ if (constraint != CONSTRAINT_MEMORY_POLICY)
+ oc->nodemask = NULL;
+ check_panic_on_oom(oc, constraint, NULL);
if (sysctl_oom_kill_allocating_task && current->mm &&
- !oom_unkillable_task(current, NULL, nodemask) &&
+ !oom_unkillable_task(current, NULL, oc->nodemask) &&
current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
get_task_struct(current);
- oom_kill_process(current, gfp_mask, order, 0, totalpages, NULL,
- nodemask,
+ oom_kill_process(oc, current, 0, totalpages, NULL,
"Out of memory (oom_kill_allocating_task)");
goto out;
}
- p = select_bad_process(&points, totalpages, mpol_mask, force_kill);
+ p = select_bad_process(oc, &points, totalpages);
/* Found nothing?!?! Either we hang forever, or we panic. */
if (!p) {
- dump_header(NULL, gfp_mask, order, NULL, mpol_mask);
+ dump_header(oc, NULL, NULL);
panic("Out of memory and no killable processes...\n");
}
if (p != (void *)-1UL) {
- oom_kill_process(p, gfp_mask, order, points, totalpages, NULL,
- nodemask, "Out of memory");
+ oom_kill_process(oc, p, points, totalpages, NULL,
+ "Out of memory");
killed = 1;
}
out:
@@ -728,13 +713,21 @@ out:
*/
void pagefault_out_of_memory(void)
{
+ struct oom_control oc = {
+ .zonelist = NULL,
+ .nodemask = NULL,
+ .gfp_mask = 0,
+ .order = 0,
+ .force_kill = false,
+ };
+
if (mem_cgroup_oom_synchronize(true))
return;
if (!mutex_trylock(&oom_lock))
return;
- if (!out_of_memory(NULL, 0, 0, NULL, false)) {
+ if (!out_of_memory(&oc)) {
/*
* There shouldn't be any user tasks runnable while the
* OOM killer is disabled, so the current task has to
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2680,6 +2680,13 @@ static inline struct page *
__alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
const struct alloc_context *ac, unsigned long *did_some_progress)
{
+ struct oom_control oc = {
+ .zonelist = ac->zonelist,
+ .nodemask = ac->nodemask,
+ .gfp_mask = gfp_mask,
+ .order = order,
+ .force_kill = false,
+ };
struct page *page;
*did_some_progress = 0;
@@ -2731,8 +2738,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
goto out;
}
/* Exhausted what can be done so it's blamo time */
- if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false)
- || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
+ if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
*did_some_progress = 1;
out:
mutex_unlock(&oom_lock);
^ permalink raw reply [flat|nested] 21+ messages in thread
* [patch v3 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq
2015-07-08 23:42 ` [patch v3 " David Rientjes
@ 2015-07-08 23:42 ` David Rientjes
2015-07-08 23:42 ` [patch v3 3/3] mm, oom: do not panic for oom kills triggered from sysrq David Rientjes
2015-07-14 22:52 ` [patch v3 1/3] mm, oom: organize oom context into struct Andrew Morton
2 siblings, 0 replies; 21+ messages in thread
From: David Rientjes @ 2015-07-08 23:42 UTC (permalink / raw)
To: Andrew Morton; +Cc: Sergey Senozhatsky, Michal Hocko, linux-kernel, linux-mm
The force_kill member of struct oom_control isn't needed if an order of
-1 is used instead. This is the same as order == -1 in
struct compact_control which requires full memory compaction.
This patch introduces no functional change.
Signed-off-by: David Rientjes <rientjes@google.com>
---
v2: fix changelog typo per Sergey
v3: fix title per Hillf
drivers/tty/sysrq.c | 3 +--
include/linux/oom.h | 1 -
mm/memcontrol.c | 1 -
mm/oom_kill.c | 5 ++---
mm/page_alloc.c | 1 -
5 files changed, 3 insertions(+), 8 deletions(-)
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -358,8 +358,7 @@ static void moom_callback(struct work_struct *ignored)
.zonelist = node_zonelist(first_memory_node, gfp_mask),
.nodemask = NULL,
.gfp_mask = gfp_mask,
- .order = 0,
- .force_kill = true,
+ .order = -1,
};
mutex_lock(&oom_lock);
diff --git a/include/linux/oom.h b/include/linux/oom.h
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -17,7 +17,6 @@ struct oom_control {
nodemask_t *nodemask;
gfp_t gfp_mask;
int order;
- bool force_kill;
};
/*
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1550,7 +1550,6 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
.nodemask = NULL,
.gfp_mask = gfp_mask,
.order = order,
- .force_kill = false,
};
struct mem_cgroup *iter;
unsigned long chosen_points = 0;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -265,7 +265,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
* Don't allow any other task to have access to the reserves.
*/
if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
- if (!oc->force_kill)
+ if (oc->order != -1)
return OOM_SCAN_ABORT;
}
if (!task->mm)
@@ -278,7 +278,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
if (oom_task_origin(task))
return OOM_SCAN_SELECT;
- if (task_will_free_mem(task) && !oc->force_kill)
+ if (task_will_free_mem(task) && oc->order != -1)
return OOM_SCAN_ABORT;
return OOM_SCAN_OK;
@@ -718,7 +718,6 @@ void pagefault_out_of_memory(void)
.nodemask = NULL,
.gfp_mask = 0,
.order = 0,
- .force_kill = false,
};
if (mem_cgroup_oom_synchronize(true))
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2685,7 +2685,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
.nodemask = ac->nodemask,
.gfp_mask = gfp_mask,
.order = order,
- .force_kill = false,
};
struct page *page;
^ permalink raw reply [flat|nested] 21+ messages in thread
* [patch v3 3/3] mm, oom: do not panic for oom kills triggered from sysrq
2015-07-08 23:42 ` [patch v3 " David Rientjes
2015-07-08 23:42 ` [patch v3 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq David Rientjes
@ 2015-07-08 23:42 ` David Rientjes
2015-07-14 22:52 ` [patch v3 1/3] mm, oom: organize oom context into struct Andrew Morton
2 siblings, 0 replies; 21+ messages in thread
From: David Rientjes @ 2015-07-08 23:42 UTC (permalink / raw)
To: Andrew Morton; +Cc: Sergey Senozhatsky, Michal Hocko, linux-kernel, linux-mm
Sysrq+f is used to kill a process either for debug or when the VM is
otherwise unresponsive.
It is not intended to trigger a panic when no process may be killed.
Avoid panicking the system for sysrq+f when no processes are killed.
Suggested-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: David Rientjes <rientjes@google.com>
---
v2: no change
v3: fix title per Hillf
Documentation/sysrq.txt | 3 ++-
mm/oom_kill.c | 7 +++++--
2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -75,7 +75,8 @@ On all - write a character to /proc/sysrq-trigger. e.g.:
'e' - Send a SIGTERM to all processes, except for init.
-'f' - Will call oom_kill to kill a memory hog process.
+'f' - Will call the oom killer to kill a memory hog process, but do not
+ panic if nothing can be killed.
'g' - Used by kgdb (kernel debugger)
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -607,6 +607,9 @@ void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
if (constraint != CONSTRAINT_NONE)
return;
}
+ /* Do not panic for oom kills triggered by sysrq */
+ if (oc->order == -1)
+ return;
dump_header(oc, NULL, memcg);
panic("Out of memory: %s panic_on_oom is enabled\n",
sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
@@ -686,11 +689,11 @@ bool out_of_memory(struct oom_control *oc)
p = select_bad_process(oc, &points, totalpages);
/* Found nothing?!?! Either we hang forever, or we panic. */
- if (!p) {
+ if (!p && oc->order != -1) {
dump_header(oc, NULL, NULL);
panic("Out of memory and no killable processes...\n");
}
- if (p != (void *)-1UL) {
+ if (p && p != (void *)-1UL) {
oom_kill_process(oc, p, points, totalpages, NULL,
"Out of memory");
killed = 1;
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch v3 1/3] mm, oom: organize oom context into struct
2015-07-08 23:42 ` [patch v3 " David Rientjes
2015-07-08 23:42 ` [patch v3 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq David Rientjes
2015-07-08 23:42 ` [patch v3 3/3] mm, oom: do not panic for oom kills triggered from sysrq David Rientjes
@ 2015-07-14 22:52 ` Andrew Morton
2015-07-14 23:44 ` David Rientjes
2 siblings, 1 reply; 21+ messages in thread
From: Andrew Morton @ 2015-07-14 22:52 UTC (permalink / raw)
To: David Rientjes; +Cc: Sergey Senozhatsky, Michal Hocko, linux-kernel, linux-mm
On Wed, 8 Jul 2015 16:42:42 -0700 (PDT) David Rientjes <rientjes@google.com> wrote:
> There are essential elements to an oom context that are passed around to
> multiple functions.
>
> Organize these elements into a new struct, struct oom_control, that
> specifies the context for an oom condition.
>
> This patch introduces no functional change.
>
> ...
>
> --- a/include/linux/oom.h
> +++ b/include/linux/oom.h
> @@ -12,6 +12,14 @@ struct notifier_block;
> struct mem_cgroup;
> struct task_struct;
>
> +struct oom_control {
> + struct zonelist *zonelist;
> + nodemask_t *nodemask;
> + gfp_t gfp_mask;
> + int order;
> + bool force_kill;
> +};
Some docs would be nice.
gfp_mask and order are what the page-allocating caller originally asked
for, I think? They haven't been mucked with?
It's somewhat obvious what force_kill does, but why is it provided, why
is it set? And what does it actually kill? A process which was
selected based on the other fields...
Also, it's a bit odd that zonelist and nodemask are here. They're
low-level implementation details whereas the other three fields are
high-level caller control stuff.
Anyway, please have a think about it. The definition site for a controlling
structure can be a great place to reveal the overall design and operation.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch v3 1/3] mm, oom: organize oom context into struct
2015-07-14 22:52 ` [patch v3 1/3] mm, oom: organize oom context into struct Andrew Morton
@ 2015-07-14 23:44 ` David Rientjes
0 siblings, 0 replies; 21+ messages in thread
From: David Rientjes @ 2015-07-14 23:44 UTC (permalink / raw)
To: Andrew Morton; +Cc: Sergey Senozhatsky, Michal Hocko, linux-kernel, linux-mm
On Tue, 14 Jul 2015, Andrew Morton wrote:
> > --- a/include/linux/oom.h
> > +++ b/include/linux/oom.h
> > @@ -12,6 +12,14 @@ struct notifier_block;
> > struct mem_cgroup;
> > struct task_struct;
> >
> > +struct oom_control {
> > + struct zonelist *zonelist;
> > + nodemask_t *nodemask;
> > + gfp_t gfp_mask;
> > + int order;
> > + bool force_kill;
> > +};
>
> Some docs would be nice.
>
Ok!
> gfp_mask and order are what the page-allocating caller originally asked
> for, I think? They haven't been mucked with?
>
Yes, it's a good opportunity to make them const.
> It's somewhat obvious what force_kill does, but why is it provided, why
> is it set? And what does it actually kill? A process which was
> selected based on the other fields...
>
It's removed in the next patch since it's unneeded, so I'll define what
order == -1 means.
> Also, it's a bit odd that zonelist and nodemask are here. They're
> low-level implementation details whereas the other three fields are
> high-level caller control stuff.
>
Zonelist and nodemask are indeed pretty weird here. We use them to
determine if the oom kill is constrained by cpuset and/or mempolicy,
respectively so we don't kill things unnecessarily and leave a cpuset
still oom, for example. We could determine that before actually calling
the oom killer and passing the enum oom_constraint in, but its purpose is
for the oom killer so it's just a part of that logical unit.
^ permalink raw reply [flat|nested] 21+ messages in thread
end of thread, other threads:[~2015-07-14 23:44 UTC | newest]
Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-18 23:00 [patch 1/3] mm, oom: organize oom context into struct David Rientjes
2015-06-18 23:00 ` [patch 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq David Rientjes
2015-06-19 7:32 ` Michal Hocko
2015-06-30 22:50 ` David Rientjes
2015-06-18 23:00 ` [patch 3/3] mm, oom: do not panic for oom kills triggered from sysrq David Rientjes
2015-06-19 0:14 ` [patch 1/3] mm, oom: organize oom context into struct Sergey Senozhatsky
2015-06-30 22:46 ` David Rientjes
2015-07-01 0:11 ` Sergey Senozhatsky
2015-07-01 21:29 ` David Rientjes
2015-06-19 7:30 ` Michal Hocko
2015-07-01 21:37 ` [patch v2 " David Rientjes
2015-07-01 21:37 ` [patch v2 2/3] " David Rientjes
2015-07-02 6:01 ` Michal Hocko
2015-07-08 23:28 ` David Rientjes
2015-07-01 21:37 ` [patch v2 3/3] " David Rientjes
2015-07-02 6:00 ` [patch v2 1/3] " Michal Hocko
2015-07-08 23:42 ` [patch v3 " David Rientjes
2015-07-08 23:42 ` [patch v3 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq David Rientjes
2015-07-08 23:42 ` [patch v3 3/3] mm, oom: do not panic for oom kills triggered from sysrq David Rientjes
2015-07-14 22:52 ` [patch v3 1/3] mm, oom: organize oom context into struct Andrew Morton
2015-07-14 23:44 ` David Rientjes
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).