linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch 1/3] mm, oom: organize oom context into struct
@ 2015-06-18 23:00 David Rientjes
  2015-06-18 23:00 ` [patch 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq David Rientjes
                   ` (4 more replies)
  0 siblings, 5 replies; 21+ messages in thread
From: David Rientjes @ 2015-06-18 23:00 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Michal Hocko, linux-kernel, linux-mm

There are essential elements to an oom context that are passed around to
multiple functions.

Organize these elements into a new struct, struct oom_context, that
specifies the context for an oom condition.

This patch introduces no functional change.

Signed-off-by: David Rientjes <rientjes@google.com>
---
 drivers/tty/sysrq.c |  12 +++++-
 include/linux/oom.h |  25 +++++++-----
 mm/memcontrol.c     |  16 +++++---
 mm/oom_kill.c       | 115 ++++++++++++++++++++++++----------------------------
 mm/page_alloc.c     |  10 ++++-
 5 files changed, 98 insertions(+), 80 deletions(-)

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -353,9 +353,17 @@ static struct sysrq_key_op sysrq_term_op = {
 
 static void moom_callback(struct work_struct *ignored)
 {
+	const gfp_t gfp_mask = GFP_KERNEL;
+	struct oom_control oc = {
+		.zonelist = node_zonelist(first_memory_node, gfp_mask),
+		.nodemask = NULL,
+		.gfp_mask = gfp_mask,
+		.order = 0,
+		.force_kill = true,
+	};
+
 	mutex_lock(&oom_lock);
-	if (!out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL),
-			   GFP_KERNEL, 0, NULL, true))
+	if (!out_of_memory(&oc))
 		pr_info("OOM request ignored because killer is disabled\n");
 	mutex_unlock(&oom_lock);
 }
diff --git a/include/linux/oom.h b/include/linux/oom.h
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -12,6 +12,14 @@ struct notifier_block;
 struct mem_cgroup;
 struct task_struct;
 
+struct oom_control {
+	struct zonelist *zonelist;
+	nodemask_t	*nodemask;
+	gfp_t		gfp_mask;
+	int		order;
+	bool		force_kill;
+};
+
 /*
  * Types of limitations to the nodes from which allocations may occur
  */
@@ -57,21 +65,18 @@ extern unsigned long oom_badness(struct task_struct *p,
 
 extern int oom_kills_count(void);
 extern void note_oom_kill(void);
-extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
+extern void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 			     unsigned int points, unsigned long totalpages,
-			     struct mem_cgroup *memcg, nodemask_t *nodemask,
-			     const char *message);
+			     struct mem_cgroup *memcg, const char *message);
 
-extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
-			       int order, const nodemask_t *nodemask,
+extern void check_panic_on_oom(struct oom_control *oc,
+			       enum oom_constraint constraint,
 			       struct mem_cgroup *memcg);
 
-extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
-		unsigned long totalpages, const nodemask_t *nodemask,
-		bool force_kill);
+extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
+		struct task_struct *task, unsigned long totalpages);
 
-extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
-		int order, nodemask_t *mask, bool force_kill);
+extern bool out_of_memory(struct oom_control *oc);
 
 extern void exit_oom_victim(void);
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1545,6 +1545,13 @@ static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
 static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 				     int order)
 {
+	struct oom_control oc = {
+		.zonelist = NULL,
+		.nodemask = NULL,
+		.gfp_mask = gfp_mask,
+		.order = order,
+		.force_kill = false,
+	};
 	struct mem_cgroup *iter;
 	unsigned long chosen_points = 0;
 	unsigned long totalpages;
@@ -1563,7 +1570,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 		goto unlock;
 	}
 
-	check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL, memcg);
+	check_panic_on_oom(&oc, CONSTRAINT_MEMCG, memcg);
 	totalpages = mem_cgroup_get_limit(memcg) ? : 1;
 	for_each_mem_cgroup_tree(iter, memcg) {
 		struct css_task_iter it;
@@ -1571,8 +1578,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 
 		css_task_iter_start(&iter->css, &it);
 		while ((task = css_task_iter_next(&it))) {
-			switch (oom_scan_process_thread(task, totalpages, NULL,
-							false)) {
+			switch (oom_scan_process_thread(&oc, task, totalpages)) {
 			case OOM_SCAN_SELECT:
 				if (chosen)
 					put_task_struct(chosen);
@@ -1610,8 +1616,8 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 
 	if (chosen) {
 		points = chosen_points * 1000 / totalpages;
-		oom_kill_process(chosen, gfp_mask, order, points, totalpages,
-				 memcg, NULL, "Memory cgroup out of memory");
+		oom_kill_process(&oc, chosen, points, totalpages, memcg,
+				 "Memory cgroup out of memory");
 	}
 unlock:
 	mutex_unlock(&oom_lock);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -196,27 +196,26 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
  * Determine the type of allocation constraint.
  */
 #ifdef CONFIG_NUMA
-static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
-				gfp_t gfp_mask, nodemask_t *nodemask,
-				unsigned long *totalpages)
+static enum oom_constraint constrained_alloc(struct oom_control *oc,
+					     unsigned long *totalpages)
 {
 	struct zone *zone;
 	struct zoneref *z;
-	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
+	enum zone_type high_zoneidx = gfp_zone(oc->gfp_mask);
 	bool cpuset_limited = false;
 	int nid;
 
 	/* Default to all available memory */
 	*totalpages = totalram_pages + total_swap_pages;
 
-	if (!zonelist)
+	if (!oc->zonelist)
 		return CONSTRAINT_NONE;
 	/*
 	 * Reach here only when __GFP_NOFAIL is used. So, we should avoid
 	 * to kill current.We have to random task kill in this case.
 	 * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now.
 	 */
-	if (gfp_mask & __GFP_THISNODE)
+	if (oc->gfp_mask & __GFP_THISNODE)
 		return CONSTRAINT_NONE;
 
 	/*
@@ -224,17 +223,18 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
 	 * the page allocator means a mempolicy is in effect.  Cpuset policy
 	 * is enforced in get_page_from_freelist().
 	 */
-	if (nodemask && !nodes_subset(node_states[N_MEMORY], *nodemask)) {
+	if (oc->nodemask &&
+	    !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) {
 		*totalpages = total_swap_pages;
-		for_each_node_mask(nid, *nodemask)
+		for_each_node_mask(nid, *oc->nodemask)
 			*totalpages += node_spanned_pages(nid);
 		return CONSTRAINT_MEMORY_POLICY;
 	}
 
 	/* Check this allocation failure is caused by cpuset's wall function */
-	for_each_zone_zonelist_nodemask(zone, z, zonelist,
-			high_zoneidx, nodemask)
-		if (!cpuset_zone_allowed(zone, gfp_mask))
+	for_each_zone_zonelist_nodemask(zone, z, oc->zonelist,
+			high_zoneidx, oc->nodemask)
+		if (!cpuset_zone_allowed(zone, oc->gfp_mask))
 			cpuset_limited = true;
 
 	if (cpuset_limited) {
@@ -246,20 +246,18 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
 	return CONSTRAINT_NONE;
 }
 #else
-static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
-				gfp_t gfp_mask, nodemask_t *nodemask,
-				unsigned long *totalpages)
+static enum oom_constraint constrained_alloc(struct oom_control *oc,
+					     unsigned long *totalpages)
 {
 	*totalpages = totalram_pages + total_swap_pages;
 	return CONSTRAINT_NONE;
 }
 #endif
 
-enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
-		unsigned long totalpages, const nodemask_t *nodemask,
-		bool force_kill)
+enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
+			struct task_struct *task, unsigned long totalpages)
 {
-	if (oom_unkillable_task(task, NULL, nodemask))
+	if (oom_unkillable_task(task, NULL, oc->nodemask))
 		return OOM_SCAN_CONTINUE;
 
 	/*
@@ -267,7 +265,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
 	 * Don't allow any other task to have access to the reserves.
 	 */
 	if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
-		if (!force_kill)
+		if (!oc->force_kill)
 			return OOM_SCAN_ABORT;
 	}
 	if (!task->mm)
@@ -280,7 +278,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
 	if (oom_task_origin(task))
 		return OOM_SCAN_SELECT;
 
-	if (task_will_free_mem(task) && !force_kill)
+	if (task_will_free_mem(task) && !oc->force_kill)
 		return OOM_SCAN_ABORT;
 
 	return OOM_SCAN_OK;
@@ -289,12 +287,9 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
 /*
  * Simple selection loop. We chose the process with the highest
  * number of 'points'.  Returns -1 on scan abort.
- *
- * (not docbooked, we don't want this one cluttering up the manual)
  */
-static struct task_struct *select_bad_process(unsigned int *ppoints,
-		unsigned long totalpages, const nodemask_t *nodemask,
-		bool force_kill)
+static struct task_struct *select_bad_process(struct oom_control *oc,
+		unsigned int *ppoints, unsigned long totalpages)
 {
 	struct task_struct *g, *p;
 	struct task_struct *chosen = NULL;
@@ -304,8 +299,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 	for_each_process_thread(g, p) {
 		unsigned int points;
 
-		switch (oom_scan_process_thread(p, totalpages, nodemask,
-						force_kill)) {
+		switch (oom_scan_process_thread(oc, p, totalpages)) {
 		case OOM_SCAN_SELECT:
 			chosen = p;
 			chosen_points = ULONG_MAX;
@@ -318,7 +312,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 		case OOM_SCAN_OK:
 			break;
 		};
-		points = oom_badness(p, NULL, nodemask, totalpages);
+		points = oom_badness(p, NULL, oc->nodemask, totalpages);
 		if (!points || points < chosen_points)
 			continue;
 		/* Prefer thread group leaders for display purposes */
@@ -380,13 +374,13 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
 	rcu_read_unlock();
 }
 
-static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
-			struct mem_cgroup *memcg, const nodemask_t *nodemask)
+static void dump_header(struct oom_control *oc, struct task_struct *p,
+			struct mem_cgroup *memcg)
 {
 	task_lock(current);
 	pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, "
 		"oom_score_adj=%hd\n",
-		current->comm, gfp_mask, order,
+		current->comm, oc->gfp_mask, oc->order,
 		current->signal->oom_score_adj);
 	cpuset_print_task_mems_allowed(current);
 	task_unlock(current);
@@ -396,7 +390,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
 	else
 		show_mem(SHOW_MEM_FILTER_NODES);
 	if (sysctl_oom_dump_tasks)
-		dump_tasks(memcg, nodemask);
+		dump_tasks(memcg, oc->nodemask);
 }
 
 /*
@@ -487,10 +481,9 @@ void oom_killer_enable(void)
  * Must be called while holding a reference to p, which will be released upon
  * returning.
  */
-void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
+void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 		      unsigned int points, unsigned long totalpages,
-		      struct mem_cgroup *memcg, nodemask_t *nodemask,
-		      const char *message)
+		      struct mem_cgroup *memcg, const char *message)
 {
 	struct task_struct *victim = p;
 	struct task_struct *child;
@@ -514,7 +507,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 	task_unlock(p);
 
 	if (__ratelimit(&oom_rs))
-		dump_header(p, gfp_mask, order, memcg, nodemask);
+		dump_header(oc, p, memcg);
 
 	task_lock(p);
 	pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
@@ -537,7 +530,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 			/*
 			 * oom_badness() returns 0 if the thread is unkillable
 			 */
-			child_points = oom_badness(child, memcg, nodemask,
+			child_points = oom_badness(child, memcg, oc->nodemask,
 								totalpages);
 			if (child_points > victim_points) {
 				put_task_struct(victim);
@@ -600,8 +593,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 /*
  * Determines whether the kernel must panic because of the panic_on_oom sysctl.
  */
-void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
-			int order, const nodemask_t *nodemask,
+void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
 			struct mem_cgroup *memcg)
 {
 	if (likely(!sysctl_panic_on_oom))
@@ -615,7 +607,7 @@ void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
 		if (constraint != CONSTRAINT_NONE)
 			return;
 	}
-	dump_header(NULL, gfp_mask, order, memcg, nodemask);
+	dump_header(oc, NULL, memcg);
 	panic("Out of memory: %s panic_on_oom is enabled\n",
 		sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
 }
@@ -635,22 +627,16 @@ int unregister_oom_notifier(struct notifier_block *nb)
 EXPORT_SYMBOL_GPL(unregister_oom_notifier);
 
 /**
- * __out_of_memory - kill the "best" process when we run out of memory
- * @zonelist: zonelist pointer
- * @gfp_mask: memory allocation flags
- * @order: amount of memory being requested as a power of 2
- * @nodemask: nodemask passed to page allocator
- * @force_kill: true if a task must be killed, even if others are exiting
+ * out_of_memory - kill the "best" process when we run out of memory
+ * @oc: pointer to struct oom_control
  *
  * If we run out of memory, we have the choice between either
  * killing a random task (bad), letting the system crash (worse)
  * OR try to be smart about which process to kill. Note that we
  * don't have to be perfect here, we just have to be good.
  */
-bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
-		   int order, nodemask_t *nodemask, bool force_kill)
+bool out_of_memory(struct oom_control *oc)
 {
-	const nodemask_t *mpol_mask;
 	struct task_struct *p;
 	unsigned long totalpages;
 	unsigned long freed = 0;
@@ -684,30 +670,29 @@ bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
 	 * Check if there were limitations on the allocation (only relevant for
 	 * NUMA) that may require different handling.
 	 */
-	constraint = constrained_alloc(zonelist, gfp_mask, nodemask,
-						&totalpages);
-	mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL;
-	check_panic_on_oom(constraint, gfp_mask, order, mpol_mask, NULL);
+	constraint = constrained_alloc(oc, &totalpages);
+	if (constraint != CONSTRAINT_MEMORY_POLICY)
+		oc->nodemask = NULL;
+	check_panic_on_oom(oc, constraint, NULL);
 
 	if (sysctl_oom_kill_allocating_task && current->mm &&
-	    !oom_unkillable_task(current, NULL, nodemask) &&
+	    !oom_unkillable_task(current, NULL, oc->nodemask) &&
 	    current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
 		get_task_struct(current);
-		oom_kill_process(current, gfp_mask, order, 0, totalpages, NULL,
-				 nodemask,
+		oom_kill_process(oc, current, 0, totalpages, NULL,
 				 "Out of memory (oom_kill_allocating_task)");
 		goto out;
 	}
 
-	p = select_bad_process(&points, totalpages, mpol_mask, force_kill);
+	p = select_bad_process(oc, &points, totalpages);
 	/* Found nothing?!?! Either we hang forever, or we panic. */
 	if (!p) {
-		dump_header(NULL, gfp_mask, order, NULL, mpol_mask);
+		dump_header(oc, NULL, NULL);
 		panic("Out of memory and no killable processes...\n");
 	}
 	if (p != (void *)-1UL) {
-		oom_kill_process(p, gfp_mask, order, points, totalpages, NULL,
-				 nodemask, "Out of memory");
+		oom_kill_process(oc, p, points, totalpages, NULL,
+				 "Out of memory");
 		killed = 1;
 	}
 out:
@@ -728,13 +713,21 @@ out:
  */
 void pagefault_out_of_memory(void)
 {
+	struct oom_control oc = {
+		.zonelist = NULL,
+		.nodemask = NULL,
+		.gfp_mask = 0,
+		.order = 0,
+		.force_kill = false,
+	};
+
 	if (mem_cgroup_oom_synchronize(true))
 		return;
 
 	if (!mutex_trylock(&oom_lock))
 		return;
 
-	if (!out_of_memory(NULL, 0, 0, NULL, false)) {
+	if (!out_of_memory(&oc)) {
 		/*
 		 * There shouldn't be any user tasks runnable while the
 		 * OOM killer is disabled, so the current task has to
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2680,6 +2680,13 @@ static inline struct page *
 __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 	const struct alloc_context *ac, unsigned long *did_some_progress)
 {
+	struct oom_control oc = {
+		.zonelist = ac->zonelist,
+		.nodemask = ac->nodemask,
+		.gfp_mask = gfp_mask,
+		.order = order,
+		.force_kill = false,
+	};
 	struct page *page;
 
 	*did_some_progress = 0;
@@ -2731,8 +2738,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 			goto out;
 	}
 	/* Exhausted what can be done so it's blamo time */
-	if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false)
-			|| WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
+	if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
 		*did_some_progress = 1;
 out:
 	mutex_unlock(&oom_lock);

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [patch 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq
  2015-06-18 23:00 [patch 1/3] mm, oom: organize oom context into struct David Rientjes
@ 2015-06-18 23:00 ` David Rientjes
  2015-06-19  7:32   ` Michal Hocko
  2015-06-18 23:00 ` [patch 3/3] mm, oom: do not panic for oom kills triggered from sysrq David Rientjes
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 21+ messages in thread
From: David Rientjes @ 2015-06-18 23:00 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Michal Hocko, linux-kernel, linux-mm

The force_kill member of struct oom_context isn't needed if an order of
-1 is used instead.

This patch introduces no functional change.

Signed-off-by: David Rientjes <rientjes@google.com>
---
 drivers/tty/sysrq.c | 3 +--
 include/linux/oom.h | 1 -
 mm/memcontrol.c     | 1 -
 mm/oom_kill.c       | 5 ++---
 mm/page_alloc.c     | 1 -
 5 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -358,8 +358,7 @@ static void moom_callback(struct work_struct *ignored)
 		.zonelist = node_zonelist(first_memory_node, gfp_mask),
 		.nodemask = NULL,
 		.gfp_mask = gfp_mask,
-		.order = 0,
-		.force_kill = true,
+		.order = -1,
 	};
 
 	mutex_lock(&oom_lock);
diff --git a/include/linux/oom.h b/include/linux/oom.h
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -17,7 +17,6 @@ struct oom_control {
 	nodemask_t	*nodemask;
 	gfp_t		gfp_mask;
 	int		order;
-	bool		force_kill;
 };
 
 /*
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1550,7 +1550,6 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 		.nodemask = NULL,
 		.gfp_mask = gfp_mask,
 		.order = order,
-		.force_kill = false,
 	};
 	struct mem_cgroup *iter;
 	unsigned long chosen_points = 0;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -265,7 +265,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
 	 * Don't allow any other task to have access to the reserves.
 	 */
 	if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
-		if (!oc->force_kill)
+		if (oc->order != -1)
 			return OOM_SCAN_ABORT;
 	}
 	if (!task->mm)
@@ -278,7 +278,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
 	if (oom_task_origin(task))
 		return OOM_SCAN_SELECT;
 
-	if (task_will_free_mem(task) && !oc->force_kill)
+	if (task_will_free_mem(task) && oc->order != -1)
 		return OOM_SCAN_ABORT;
 
 	return OOM_SCAN_OK;
@@ -718,7 +718,6 @@ void pagefault_out_of_memory(void)
 		.nodemask = NULL,
 		.gfp_mask = 0,
 		.order = 0,
-		.force_kill = false,
 	};
 
 	if (mem_cgroup_oom_synchronize(true))
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2685,7 +2685,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 		.nodemask = ac->nodemask,
 		.gfp_mask = gfp_mask,
 		.order = order,
-		.force_kill = false,
 	};
 	struct page *page;
 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [patch 3/3] mm, oom: do not panic for oom kills triggered from sysrq
  2015-06-18 23:00 [patch 1/3] mm, oom: organize oom context into struct David Rientjes
  2015-06-18 23:00 ` [patch 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq David Rientjes
@ 2015-06-18 23:00 ` David Rientjes
  2015-06-19  0:14 ` [patch 1/3] mm, oom: organize oom context into struct Sergey Senozhatsky
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 21+ messages in thread
From: David Rientjes @ 2015-06-18 23:00 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Michal Hocko, linux-kernel, linux-mm

Sysrq+f is used to kill a process either for debug or when the VM is
otherwise unresponsive.

It is not intended to trigger a panic when no process may be killed.

Avoid panicking the system for sysrq+f when no processes are killed.

Suggested-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: David Rientjes <rientjes@google.com>
---
 Documentation/sysrq.txt | 3 ++-
 mm/oom_kill.c           | 7 +++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -75,7 +75,8 @@ On all -  write a character to /proc/sysrq-trigger.  e.g.:
 
 'e'     - Send a SIGTERM to all processes, except for init.
 
-'f'	- Will call oom_kill to kill a memory hog process.
+'f'	- Will call the oom killer to kill a memory hog process, but do not
+	  panic if nothing can be killed.
 
 'g'	- Used by kgdb (kernel debugger)
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -607,6 +607,9 @@ void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
 		if (constraint != CONSTRAINT_NONE)
 			return;
 	}
+	/* Do not panic for oom kills triggered by sysrq */
+	if (oc->order == -1)
+		return;
 	dump_header(oc, NULL, memcg);
 	panic("Out of memory: %s panic_on_oom is enabled\n",
 		sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
@@ -686,11 +689,11 @@ bool out_of_memory(struct oom_control *oc)
 
 	p = select_bad_process(oc, &points, totalpages);
 	/* Found nothing?!?! Either we hang forever, or we panic. */
-	if (!p) {
+	if (!p && oc->order != -1) {
 		dump_header(oc, NULL, NULL);
 		panic("Out of memory and no killable processes...\n");
 	}
-	if (p != (void *)-1UL) {
+	if (p && p != (void *)-1UL) {
 		oom_kill_process(oc, p, points, totalpages, NULL,
 				 "Out of memory");
 		killed = 1;

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch 1/3] mm, oom: organize oom context into struct
  2015-06-18 23:00 [patch 1/3] mm, oom: organize oom context into struct David Rientjes
  2015-06-18 23:00 ` [patch 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq David Rientjes
  2015-06-18 23:00 ` [patch 3/3] mm, oom: do not panic for oom kills triggered from sysrq David Rientjes
@ 2015-06-19  0:14 ` Sergey Senozhatsky
  2015-06-30 22:46   ` David Rientjes
  2015-06-19  7:30 ` Michal Hocko
  2015-07-01 21:37 ` [patch v2 " David Rientjes
  4 siblings, 1 reply; 21+ messages in thread
From: Sergey Senozhatsky @ 2015-06-19  0:14 UTC (permalink / raw)
  To: David Rientjes; +Cc: Andrew Morton, Michal Hocko, linux-kernel, linux-mm

Hello,

On (06/18/15 16:00), David Rientjes wrote:
> There are essential elements to an oom context that are passed around to
> multiple functions.
> 
> Organize these elements into a new struct, struct oom_context, that
> specifies the context for an oom condition.
> 

s/oom_context/oom_control/ ?

[..]
>  
> +struct oom_control {
> +	struct zonelist *zonelist;
> +	nodemask_t	*nodemask;
> +	gfp_t		gfp_mask;
> +	int		order;
> +	bool		force_kill;
> +};
> +
> -extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
> -			       int order, const nodemask_t *nodemask,
> +extern void check_panic_on_oom(struct oom_control *oc,
> +			       enum oom_constraint constraint,
>  			       struct mem_cgroup *memcg);
>  
> -extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
> -		unsigned long totalpages, const nodemask_t *nodemask,
> -		bool force_kill);
> +extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
> +		struct task_struct *task, unsigned long totalpages);
>  
> -extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
> -		int order, nodemask_t *mask, bool force_kill);
> +extern bool out_of_memory(struct oom_control *oc);
>  
>  extern void exit_oom_victim(void);
>  
[..]

	-ss

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch 1/3] mm, oom: organize oom context into struct
  2015-06-18 23:00 [patch 1/3] mm, oom: organize oom context into struct David Rientjes
                   ` (2 preceding siblings ...)
  2015-06-19  0:14 ` [patch 1/3] mm, oom: organize oom context into struct Sergey Senozhatsky
@ 2015-06-19  7:30 ` Michal Hocko
  2015-07-01 21:37 ` [patch v2 " David Rientjes
  4 siblings, 0 replies; 21+ messages in thread
From: Michal Hocko @ 2015-06-19  7:30 UTC (permalink / raw)
  To: David Rientjes; +Cc: Andrew Morton, linux-kernel, linux-mm

On Thu 18-06-15 16:00:02, David Rientjes wrote:
> There are essential elements to an oom context that are passed around to
> multiple functions.
> 
> Organize these elements into a new struct, struct oom_context, that
> specifies the context for an oom condition.

Yes this makes sense to me.
 
> This patch introduces no functional change.
> 
> Signed-off-by: David Rientjes <rientjes@google.com>

Acked-by: Michal Hocko <mhocko@suse.cz>

> ---
>  drivers/tty/sysrq.c |  12 +++++-
>  include/linux/oom.h |  25 +++++++-----
>  mm/memcontrol.c     |  16 +++++---
>  mm/oom_kill.c       | 115 ++++++++++++++++++++++++----------------------------
>  mm/page_alloc.c     |  10 ++++-
>  5 files changed, 98 insertions(+), 80 deletions(-)
> 
> diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
> --- a/drivers/tty/sysrq.c
> +++ b/drivers/tty/sysrq.c
> @@ -353,9 +353,17 @@ static struct sysrq_key_op sysrq_term_op = {
>  
>  static void moom_callback(struct work_struct *ignored)
>  {
> +	const gfp_t gfp_mask = GFP_KERNEL;
> +	struct oom_control oc = {
> +		.zonelist = node_zonelist(first_memory_node, gfp_mask),
> +		.nodemask = NULL,
> +		.gfp_mask = gfp_mask,
> +		.order = 0,
> +		.force_kill = true,
> +	};
> +
>  	mutex_lock(&oom_lock);
> -	if (!out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL),
> -			   GFP_KERNEL, 0, NULL, true))
> +	if (!out_of_memory(&oc))
>  		pr_info("OOM request ignored because killer is disabled\n");
>  	mutex_unlock(&oom_lock);
>  }
> diff --git a/include/linux/oom.h b/include/linux/oom.h
> --- a/include/linux/oom.h
> +++ b/include/linux/oom.h
> @@ -12,6 +12,14 @@ struct notifier_block;
>  struct mem_cgroup;
>  struct task_struct;
>  
> +struct oom_control {
> +	struct zonelist *zonelist;
> +	nodemask_t	*nodemask;
> +	gfp_t		gfp_mask;
> +	int		order;
> +	bool		force_kill;
> +};
> +
>  /*
>   * Types of limitations to the nodes from which allocations may occur
>   */
> @@ -57,21 +65,18 @@ extern unsigned long oom_badness(struct task_struct *p,
>  
>  extern int oom_kills_count(void);
>  extern void note_oom_kill(void);
> -extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
> +extern void oom_kill_process(struct oom_control *oc, struct task_struct *p,
>  			     unsigned int points, unsigned long totalpages,
> -			     struct mem_cgroup *memcg, nodemask_t *nodemask,
> -			     const char *message);
> +			     struct mem_cgroup *memcg, const char *message);
>  
> -extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
> -			       int order, const nodemask_t *nodemask,
> +extern void check_panic_on_oom(struct oom_control *oc,
> +			       enum oom_constraint constraint,
>  			       struct mem_cgroup *memcg);
>  
> -extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
> -		unsigned long totalpages, const nodemask_t *nodemask,
> -		bool force_kill);
> +extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
> +		struct task_struct *task, unsigned long totalpages);
>  
> -extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
> -		int order, nodemask_t *mask, bool force_kill);
> +extern bool out_of_memory(struct oom_control *oc);
>  
>  extern void exit_oom_victim(void);
>  
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1545,6 +1545,13 @@ static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
>  static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
>  				     int order)
>  {
> +	struct oom_control oc = {
> +		.zonelist = NULL,
> +		.nodemask = NULL,
> +		.gfp_mask = gfp_mask,
> +		.order = order,
> +		.force_kill = false,
> +	};
>  	struct mem_cgroup *iter;
>  	unsigned long chosen_points = 0;
>  	unsigned long totalpages;
> @@ -1563,7 +1570,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
>  		goto unlock;
>  	}
>  
> -	check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL, memcg);
> +	check_panic_on_oom(&oc, CONSTRAINT_MEMCG, memcg);
>  	totalpages = mem_cgroup_get_limit(memcg) ? : 1;
>  	for_each_mem_cgroup_tree(iter, memcg) {
>  		struct css_task_iter it;
> @@ -1571,8 +1578,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
>  
>  		css_task_iter_start(&iter->css, &it);
>  		while ((task = css_task_iter_next(&it))) {
> -			switch (oom_scan_process_thread(task, totalpages, NULL,
> -							false)) {
> +			switch (oom_scan_process_thread(&oc, task, totalpages)) {
>  			case OOM_SCAN_SELECT:
>  				if (chosen)
>  					put_task_struct(chosen);
> @@ -1610,8 +1616,8 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
>  
>  	if (chosen) {
>  		points = chosen_points * 1000 / totalpages;
> -		oom_kill_process(chosen, gfp_mask, order, points, totalpages,
> -				 memcg, NULL, "Memory cgroup out of memory");
> +		oom_kill_process(&oc, chosen, points, totalpages, memcg,
> +				 "Memory cgroup out of memory");
>  	}
>  unlock:
>  	mutex_unlock(&oom_lock);
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -196,27 +196,26 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
>   * Determine the type of allocation constraint.
>   */
>  #ifdef CONFIG_NUMA
> -static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
> -				gfp_t gfp_mask, nodemask_t *nodemask,
> -				unsigned long *totalpages)
> +static enum oom_constraint constrained_alloc(struct oom_control *oc,
> +					     unsigned long *totalpages)
>  {
>  	struct zone *zone;
>  	struct zoneref *z;
> -	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
> +	enum zone_type high_zoneidx = gfp_zone(oc->gfp_mask);
>  	bool cpuset_limited = false;
>  	int nid;
>  
>  	/* Default to all available memory */
>  	*totalpages = totalram_pages + total_swap_pages;
>  
> -	if (!zonelist)
> +	if (!oc->zonelist)
>  		return CONSTRAINT_NONE;
>  	/*
>  	 * Reach here only when __GFP_NOFAIL is used. So, we should avoid
>  	 * to kill current.We have to random task kill in this case.
>  	 * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now.
>  	 */
> -	if (gfp_mask & __GFP_THISNODE)
> +	if (oc->gfp_mask & __GFP_THISNODE)
>  		return CONSTRAINT_NONE;
>  
>  	/*
> @@ -224,17 +223,18 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
>  	 * the page allocator means a mempolicy is in effect.  Cpuset policy
>  	 * is enforced in get_page_from_freelist().
>  	 */
> -	if (nodemask && !nodes_subset(node_states[N_MEMORY], *nodemask)) {
> +	if (oc->nodemask &&
> +	    !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) {
>  		*totalpages = total_swap_pages;
> -		for_each_node_mask(nid, *nodemask)
> +		for_each_node_mask(nid, *oc->nodemask)
>  			*totalpages += node_spanned_pages(nid);
>  		return CONSTRAINT_MEMORY_POLICY;
>  	}
>  
>  	/* Check this allocation failure is caused by cpuset's wall function */
> -	for_each_zone_zonelist_nodemask(zone, z, zonelist,
> -			high_zoneidx, nodemask)
> -		if (!cpuset_zone_allowed(zone, gfp_mask))
> +	for_each_zone_zonelist_nodemask(zone, z, oc->zonelist,
> +			high_zoneidx, oc->nodemask)
> +		if (!cpuset_zone_allowed(zone, oc->gfp_mask))
>  			cpuset_limited = true;
>  
>  	if (cpuset_limited) {
> @@ -246,20 +246,18 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
>  	return CONSTRAINT_NONE;
>  }
>  #else
> -static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
> -				gfp_t gfp_mask, nodemask_t *nodemask,
> -				unsigned long *totalpages)
> +static enum oom_constraint constrained_alloc(struct oom_control *oc,
> +					     unsigned long *totalpages)
>  {
>  	*totalpages = totalram_pages + total_swap_pages;
>  	return CONSTRAINT_NONE;
>  }
>  #endif
>  
> -enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
> -		unsigned long totalpages, const nodemask_t *nodemask,
> -		bool force_kill)
> +enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
> +			struct task_struct *task, unsigned long totalpages)
>  {
> -	if (oom_unkillable_task(task, NULL, nodemask))
> +	if (oom_unkillable_task(task, NULL, oc->nodemask))
>  		return OOM_SCAN_CONTINUE;
>  
>  	/*
> @@ -267,7 +265,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
>  	 * Don't allow any other task to have access to the reserves.
>  	 */
>  	if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
> -		if (!force_kill)
> +		if (!oc->force_kill)
>  			return OOM_SCAN_ABORT;
>  	}
>  	if (!task->mm)
> @@ -280,7 +278,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
>  	if (oom_task_origin(task))
>  		return OOM_SCAN_SELECT;
>  
> -	if (task_will_free_mem(task) && !force_kill)
> +	if (task_will_free_mem(task) && !oc->force_kill)
>  		return OOM_SCAN_ABORT;
>  
>  	return OOM_SCAN_OK;
> @@ -289,12 +287,9 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
>  /*
>   * Simple selection loop. We chose the process with the highest
>   * number of 'points'.  Returns -1 on scan abort.
> - *
> - * (not docbooked, we don't want this one cluttering up the manual)
>   */
> -static struct task_struct *select_bad_process(unsigned int *ppoints,
> -		unsigned long totalpages, const nodemask_t *nodemask,
> -		bool force_kill)
> +static struct task_struct *select_bad_process(struct oom_control *oc,
> +		unsigned int *ppoints, unsigned long totalpages)
>  {
>  	struct task_struct *g, *p;
>  	struct task_struct *chosen = NULL;
> @@ -304,8 +299,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
>  	for_each_process_thread(g, p) {
>  		unsigned int points;
>  
> -		switch (oom_scan_process_thread(p, totalpages, nodemask,
> -						force_kill)) {
> +		switch (oom_scan_process_thread(oc, p, totalpages)) {
>  		case OOM_SCAN_SELECT:
>  			chosen = p;
>  			chosen_points = ULONG_MAX;
> @@ -318,7 +312,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
>  		case OOM_SCAN_OK:
>  			break;
>  		};
> -		points = oom_badness(p, NULL, nodemask, totalpages);
> +		points = oom_badness(p, NULL, oc->nodemask, totalpages);
>  		if (!points || points < chosen_points)
>  			continue;
>  		/* Prefer thread group leaders for display purposes */
> @@ -380,13 +374,13 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
>  	rcu_read_unlock();
>  }
>  
> -static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
> -			struct mem_cgroup *memcg, const nodemask_t *nodemask)
> +static void dump_header(struct oom_control *oc, struct task_struct *p,
> +			struct mem_cgroup *memcg)
>  {
>  	task_lock(current);
>  	pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, "
>  		"oom_score_adj=%hd\n",
> -		current->comm, gfp_mask, order,
> +		current->comm, oc->gfp_mask, oc->order,
>  		current->signal->oom_score_adj);
>  	cpuset_print_task_mems_allowed(current);
>  	task_unlock(current);
> @@ -396,7 +390,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
>  	else
>  		show_mem(SHOW_MEM_FILTER_NODES);
>  	if (sysctl_oom_dump_tasks)
> -		dump_tasks(memcg, nodemask);
> +		dump_tasks(memcg, oc->nodemask);
>  }
>  
>  /*
> @@ -487,10 +481,9 @@ void oom_killer_enable(void)
>   * Must be called while holding a reference to p, which will be released upon
>   * returning.
>   */
> -void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
> +void oom_kill_process(struct oom_control *oc, struct task_struct *p,
>  		      unsigned int points, unsigned long totalpages,
> -		      struct mem_cgroup *memcg, nodemask_t *nodemask,
> -		      const char *message)
> +		      struct mem_cgroup *memcg, const char *message)
>  {
>  	struct task_struct *victim = p;
>  	struct task_struct *child;
> @@ -514,7 +507,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
>  	task_unlock(p);
>  
>  	if (__ratelimit(&oom_rs))
> -		dump_header(p, gfp_mask, order, memcg, nodemask);
> +		dump_header(oc, p, memcg);
>  
>  	task_lock(p);
>  	pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
> @@ -537,7 +530,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
>  			/*
>  			 * oom_badness() returns 0 if the thread is unkillable
>  			 */
> -			child_points = oom_badness(child, memcg, nodemask,
> +			child_points = oom_badness(child, memcg, oc->nodemask,
>  								totalpages);
>  			if (child_points > victim_points) {
>  				put_task_struct(victim);
> @@ -600,8 +593,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
>  /*
>   * Determines whether the kernel must panic because of the panic_on_oom sysctl.
>   */
> -void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
> -			int order, const nodemask_t *nodemask,
> +void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
>  			struct mem_cgroup *memcg)
>  {
>  	if (likely(!sysctl_panic_on_oom))
> @@ -615,7 +607,7 @@ void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
>  		if (constraint != CONSTRAINT_NONE)
>  			return;
>  	}
> -	dump_header(NULL, gfp_mask, order, memcg, nodemask);
> +	dump_header(oc, NULL, memcg);
>  	panic("Out of memory: %s panic_on_oom is enabled\n",
>  		sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
>  }
> @@ -635,22 +627,16 @@ int unregister_oom_notifier(struct notifier_block *nb)
>  EXPORT_SYMBOL_GPL(unregister_oom_notifier);
>  
>  /**
> - * __out_of_memory - kill the "best" process when we run out of memory
> - * @zonelist: zonelist pointer
> - * @gfp_mask: memory allocation flags
> - * @order: amount of memory being requested as a power of 2
> - * @nodemask: nodemask passed to page allocator
> - * @force_kill: true if a task must be killed, even if others are exiting
> + * out_of_memory - kill the "best" process when we run out of memory
> + * @oc: pointer to struct oom_control
>   *
>   * If we run out of memory, we have the choice between either
>   * killing a random task (bad), letting the system crash (worse)
>   * OR try to be smart about which process to kill. Note that we
>   * don't have to be perfect here, we just have to be good.
>   */
> -bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
> -		   int order, nodemask_t *nodemask, bool force_kill)
> +bool out_of_memory(struct oom_control *oc)
>  {
> -	const nodemask_t *mpol_mask;
>  	struct task_struct *p;
>  	unsigned long totalpages;
>  	unsigned long freed = 0;
> @@ -684,30 +670,29 @@ bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
>  	 * Check if there were limitations on the allocation (only relevant for
>  	 * NUMA) that may require different handling.
>  	 */
> -	constraint = constrained_alloc(zonelist, gfp_mask, nodemask,
> -						&totalpages);
> -	mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL;
> -	check_panic_on_oom(constraint, gfp_mask, order, mpol_mask, NULL);
> +	constraint = constrained_alloc(oc, &totalpages);
> +	if (constraint != CONSTRAINT_MEMORY_POLICY)
> +		oc->nodemask = NULL;
> +	check_panic_on_oom(oc, constraint, NULL);
>  
>  	if (sysctl_oom_kill_allocating_task && current->mm &&
> -	    !oom_unkillable_task(current, NULL, nodemask) &&
> +	    !oom_unkillable_task(current, NULL, oc->nodemask) &&
>  	    current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
>  		get_task_struct(current);
> -		oom_kill_process(current, gfp_mask, order, 0, totalpages, NULL,
> -				 nodemask,
> +		oom_kill_process(oc, current, 0, totalpages, NULL,
>  				 "Out of memory (oom_kill_allocating_task)");
>  		goto out;
>  	}
>  
> -	p = select_bad_process(&points, totalpages, mpol_mask, force_kill);
> +	p = select_bad_process(oc, &points, totalpages);
>  	/* Found nothing?!?! Either we hang forever, or we panic. */
>  	if (!p) {
> -		dump_header(NULL, gfp_mask, order, NULL, mpol_mask);
> +		dump_header(oc, NULL, NULL);
>  		panic("Out of memory and no killable processes...\n");
>  	}
>  	if (p != (void *)-1UL) {
> -		oom_kill_process(p, gfp_mask, order, points, totalpages, NULL,
> -				 nodemask, "Out of memory");
> +		oom_kill_process(oc, p, points, totalpages, NULL,
> +				 "Out of memory");
>  		killed = 1;
>  	}
>  out:
> @@ -728,13 +713,21 @@ out:
>   */
>  void pagefault_out_of_memory(void)
>  {
> +	struct oom_control oc = {
> +		.zonelist = NULL,
> +		.nodemask = NULL,
> +		.gfp_mask = 0,
> +		.order = 0,
> +		.force_kill = false,
> +	};
> +
>  	if (mem_cgroup_oom_synchronize(true))
>  		return;
>  
>  	if (!mutex_trylock(&oom_lock))
>  		return;
>  
> -	if (!out_of_memory(NULL, 0, 0, NULL, false)) {
> +	if (!out_of_memory(&oc)) {
>  		/*
>  		 * There shouldn't be any user tasks runnable while the
>  		 * OOM killer is disabled, so the current task has to
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2680,6 +2680,13 @@ static inline struct page *
>  __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
>  	const struct alloc_context *ac, unsigned long *did_some_progress)
>  {
> +	struct oom_control oc = {
> +		.zonelist = ac->zonelist,
> +		.nodemask = ac->nodemask,
> +		.gfp_mask = gfp_mask,
> +		.order = order,
> +		.force_kill = false,
> +	};
>  	struct page *page;
>  
>  	*did_some_progress = 0;
> @@ -2731,8 +2738,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
>  			goto out;
>  	}
>  	/* Exhausted what can be done so it's blamo time */
> -	if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false)
> -			|| WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
> +	if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
>  		*did_some_progress = 1;
>  out:
>  	mutex_unlock(&oom_lock);

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq
  2015-06-18 23:00 ` [patch 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq David Rientjes
@ 2015-06-19  7:32   ` Michal Hocko
  2015-06-30 22:50     ` David Rientjes
  0 siblings, 1 reply; 21+ messages in thread
From: Michal Hocko @ 2015-06-19  7:32 UTC (permalink / raw)
  To: David Rientjes; +Cc: Andrew Morton, linux-kernel, linux-mm

On Thu 18-06-15 16:00:07, David Rientjes wrote:
> The force_kill member of struct oom_context isn't needed if an order of
> -1 is used instead.

But this doesn't make much sense to me. It is not like we would _have_
to spare few bytes here. The meaning of force_kill is clear while order
with a weird value is a hack. It is harder to follow without any good
reason.

> This patch introduces no functional change.
> 
> Signed-off-by: David Rientjes <rientjes@google.com>
> ---
>  drivers/tty/sysrq.c | 3 +--
>  include/linux/oom.h | 1 -
>  mm/memcontrol.c     | 1 -
>  mm/oom_kill.c       | 5 ++---
>  mm/page_alloc.c     | 1 -
>  5 files changed, 3 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
> --- a/drivers/tty/sysrq.c
> +++ b/drivers/tty/sysrq.c
> @@ -358,8 +358,7 @@ static void moom_callback(struct work_struct *ignored)
>  		.zonelist = node_zonelist(first_memory_node, gfp_mask),
>  		.nodemask = NULL,
>  		.gfp_mask = gfp_mask,
> -		.order = 0,
> -		.force_kill = true,
> +		.order = -1,
>  	};
>  
>  	mutex_lock(&oom_lock);
> diff --git a/include/linux/oom.h b/include/linux/oom.h
> --- a/include/linux/oom.h
> +++ b/include/linux/oom.h
> @@ -17,7 +17,6 @@ struct oom_control {
>  	nodemask_t	*nodemask;
>  	gfp_t		gfp_mask;
>  	int		order;
> -	bool		force_kill;
>  };
>  
>  /*
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1550,7 +1550,6 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
>  		.nodemask = NULL,
>  		.gfp_mask = gfp_mask,
>  		.order = order,
> -		.force_kill = false,
>  	};
>  	struct mem_cgroup *iter;
>  	unsigned long chosen_points = 0;
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -265,7 +265,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
>  	 * Don't allow any other task to have access to the reserves.
>  	 */
>  	if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
> -		if (!oc->force_kill)
> +		if (oc->order != -1)
>  			return OOM_SCAN_ABORT;
>  	}
>  	if (!task->mm)
> @@ -278,7 +278,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
>  	if (oom_task_origin(task))
>  		return OOM_SCAN_SELECT;
>  
> -	if (task_will_free_mem(task) && !oc->force_kill)
> +	if (task_will_free_mem(task) && oc->order != -1)
>  		return OOM_SCAN_ABORT;
>  
>  	return OOM_SCAN_OK;
> @@ -718,7 +718,6 @@ void pagefault_out_of_memory(void)
>  		.nodemask = NULL,
>  		.gfp_mask = 0,
>  		.order = 0,
> -		.force_kill = false,
>  	};
>  
>  	if (mem_cgroup_oom_synchronize(true))
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2685,7 +2685,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
>  		.nodemask = ac->nodemask,
>  		.gfp_mask = gfp_mask,
>  		.order = order,
> -		.force_kill = false,
>  	};
>  	struct page *page;
>  

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch 1/3] mm, oom: organize oom context into struct
  2015-06-19  0:14 ` [patch 1/3] mm, oom: organize oom context into struct Sergey Senozhatsky
@ 2015-06-30 22:46   ` David Rientjes
  2015-07-01  0:11     ` Sergey Senozhatsky
  0 siblings, 1 reply; 21+ messages in thread
From: David Rientjes @ 2015-06-30 22:46 UTC (permalink / raw)
  To: Sergey Senozhatsky; +Cc: Andrew Morton, Michal Hocko, linux-kernel, linux-mm

On Fri, 19 Jun 2015, Sergey Senozhatsky wrote:

> > There are essential elements to an oom context that are passed around to
> > multiple functions.
> > 
> > Organize these elements into a new struct, struct oom_context, that
> > specifies the context for an oom condition.
> > 
> 
> s/oom_context/oom_control/ ?
> 

I think it would be confused with the existing memory.oom_control for 
memcg.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq
  2015-06-19  7:32   ` Michal Hocko
@ 2015-06-30 22:50     ` David Rientjes
  0 siblings, 0 replies; 21+ messages in thread
From: David Rientjes @ 2015-06-30 22:50 UTC (permalink / raw)
  To: Michal Hocko; +Cc: Andrew Morton, linux-kernel, linux-mm

On Fri, 19 Jun 2015, Michal Hocko wrote:

> > The force_kill member of struct oom_context isn't needed if an order of
> > -1 is used instead.
> 
> But this doesn't make much sense to me. It is not like we would _have_
> to spare few bytes here. The meaning of force_kill is clear while order
> with a weird value is a hack. It is harder to follow without any good
> reason.
> 

To me, this is the same as treating order == -1 as special in 
struct compact_control meaning that it was triggered from the command line 
and we really want to fully compact memory.  It seems to have a nice 
symmetry.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch 1/3] mm, oom: organize oom context into struct
  2015-06-30 22:46   ` David Rientjes
@ 2015-07-01  0:11     ` Sergey Senozhatsky
  2015-07-01 21:29       ` David Rientjes
  0 siblings, 1 reply; 21+ messages in thread
From: Sergey Senozhatsky @ 2015-07-01  0:11 UTC (permalink / raw)
  To: David Rientjes
  Cc: Sergey Senozhatsky, Andrew Morton, Michal Hocko, linux-kernel, linux-mm

On (06/30/15 15:46), David Rientjes wrote:
> > > There are essential elements to an oom context that are passed around to
> > > multiple functions.
> > > 
> > > Organize these elements into a new struct, struct oom_context, that
> > > specifies the context for an oom condition.
> > > 
> > 
> > s/oom_context/oom_control/ ?
> > 
> 
> I think it would be confused with the existing memory.oom_control for 
> memcg.
> 

Hello David,

Sorry, I meant that in commit message you say

:Organize these elements into a new struct, struct oom_context, that
:specifies the context for an oom condition.

but define and use `struct oom_control' (not `struct oom_context')

[..]

+       const gfp_t gfp_mask = GFP_KERNEL;
+       struct oom_control oc = {
+               .zonelist = node_zonelist(first_memory_node, gfp_mask),
+               .nodemask = NULL,
+               .gfp_mask = gfp_mask,
+               .order = 0,
+               .force_kill = true,
+       };
+

[..]

+struct oom_control {
+       struct zonelist *zonelist;
+       nodemask_t      *nodemask;
+       gfp_t           gfp_mask;
+       int             order;
+       bool            force_kill;
+};

[..]

etc.

	-ss

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch 1/3] mm, oom: organize oom context into struct
  2015-07-01  0:11     ` Sergey Senozhatsky
@ 2015-07-01 21:29       ` David Rientjes
  0 siblings, 0 replies; 21+ messages in thread
From: David Rientjes @ 2015-07-01 21:29 UTC (permalink / raw)
  To: Sergey Senozhatsky; +Cc: Andrew Morton, Michal Hocko, linux-kernel, linux-mm

On Wed, 1 Jul 2015, Sergey Senozhatsky wrote:

> On (06/30/15 15:46), David Rientjes wrote:
> > > > There are essential elements to an oom context that are passed around to
> > > > multiple functions.
> > > > 
> > > > Organize these elements into a new struct, struct oom_context, that
> > > > specifies the context for an oom condition.
> > > > 
> > > 
> > > s/oom_context/oom_control/ ?
> > > 
> > 
> > I think it would be confused with the existing memory.oom_control for 
> > memcg.
> > 
> 
> Hello David,
> 
> Sorry, I meant that in commit message you say
> 
> :Organize these elements into a new struct, struct oom_context, that
> :specifies the context for an oom condition.
> 
> but define and use `struct oom_control' (not `struct oom_context')
> 

Oh, point very well taken, thank you.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [patch v2 1/3] mm, oom: organize oom context into struct
  2015-06-18 23:00 [patch 1/3] mm, oom: organize oom context into struct David Rientjes
                   ` (3 preceding siblings ...)
  2015-06-19  7:30 ` Michal Hocko
@ 2015-07-01 21:37 ` David Rientjes
  2015-07-01 21:37   ` [patch v2 2/3] " David Rientjes
                     ` (3 more replies)
  4 siblings, 4 replies; 21+ messages in thread
From: David Rientjes @ 2015-07-01 21:37 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Sergey Senozhatsky, Michal Hocko, linux-kernel, linux-mm

There are essential elements to an oom context that are passed around to
multiple functions.

Organize these elements into a new struct, struct oom_control, that
specifies the context for an oom condition.

This patch introduces no functional change.

Acked-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: David Rientjes <rientjes@google.com>
---
 v2: fix changelog typo per Sergey

 drivers/tty/sysrq.c |  12 +++++-
 include/linux/oom.h |  25 +++++++-----
 mm/memcontrol.c     |  16 +++++---
 mm/oom_kill.c       | 115 ++++++++++++++++++++++++----------------------------
 mm/page_alloc.c     |  10 ++++-
 5 files changed, 98 insertions(+), 80 deletions(-)

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -353,9 +353,17 @@ static struct sysrq_key_op sysrq_term_op = {
 
 static void moom_callback(struct work_struct *ignored)
 {
+	const gfp_t gfp_mask = GFP_KERNEL;
+	struct oom_control oc = {
+		.zonelist = node_zonelist(first_memory_node, gfp_mask),
+		.nodemask = NULL,
+		.gfp_mask = gfp_mask,
+		.order = 0,
+		.force_kill = true,
+	};
+
 	mutex_lock(&oom_lock);
-	if (!out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL),
-			   GFP_KERNEL, 0, NULL, true))
+	if (!out_of_memory(&oc))
 		pr_info("OOM request ignored because killer is disabled\n");
 	mutex_unlock(&oom_lock);
 }
diff --git a/include/linux/oom.h b/include/linux/oom.h
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -12,6 +12,14 @@ struct notifier_block;
 struct mem_cgroup;
 struct task_struct;
 
+struct oom_control {
+	struct zonelist *zonelist;
+	nodemask_t	*nodemask;
+	gfp_t		gfp_mask;
+	int		order;
+	bool		force_kill;
+};
+
 /*
  * Types of limitations to the nodes from which allocations may occur
  */
@@ -57,21 +65,18 @@ extern unsigned long oom_badness(struct task_struct *p,
 
 extern int oom_kills_count(void);
 extern void note_oom_kill(void);
-extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
+extern void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 			     unsigned int points, unsigned long totalpages,
-			     struct mem_cgroup *memcg, nodemask_t *nodemask,
-			     const char *message);
+			     struct mem_cgroup *memcg, const char *message);
 
-extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
-			       int order, const nodemask_t *nodemask,
+extern void check_panic_on_oom(struct oom_control *oc,
+			       enum oom_constraint constraint,
 			       struct mem_cgroup *memcg);
 
-extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
-		unsigned long totalpages, const nodemask_t *nodemask,
-		bool force_kill);
+extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
+		struct task_struct *task, unsigned long totalpages);
 
-extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
-		int order, nodemask_t *mask, bool force_kill);
+extern bool out_of_memory(struct oom_control *oc);
 
 extern void exit_oom_victim(void);
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1545,6 +1545,13 @@ static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
 static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 				     int order)
 {
+	struct oom_control oc = {
+		.zonelist = NULL,
+		.nodemask = NULL,
+		.gfp_mask = gfp_mask,
+		.order = order,
+		.force_kill = false,
+	};
 	struct mem_cgroup *iter;
 	unsigned long chosen_points = 0;
 	unsigned long totalpages;
@@ -1563,7 +1570,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 		goto unlock;
 	}
 
-	check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL, memcg);
+	check_panic_on_oom(&oc, CONSTRAINT_MEMCG, memcg);
 	totalpages = mem_cgroup_get_limit(memcg) ? : 1;
 	for_each_mem_cgroup_tree(iter, memcg) {
 		struct css_task_iter it;
@@ -1571,8 +1578,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 
 		css_task_iter_start(&iter->css, &it);
 		while ((task = css_task_iter_next(&it))) {
-			switch (oom_scan_process_thread(task, totalpages, NULL,
-							false)) {
+			switch (oom_scan_process_thread(&oc, task, totalpages)) {
 			case OOM_SCAN_SELECT:
 				if (chosen)
 					put_task_struct(chosen);
@@ -1610,8 +1616,8 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 
 	if (chosen) {
 		points = chosen_points * 1000 / totalpages;
-		oom_kill_process(chosen, gfp_mask, order, points, totalpages,
-				 memcg, NULL, "Memory cgroup out of memory");
+		oom_kill_process(&oc, chosen, points, totalpages, memcg,
+				 "Memory cgroup out of memory");
 	}
 unlock:
 	mutex_unlock(&oom_lock);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -196,27 +196,26 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
  * Determine the type of allocation constraint.
  */
 #ifdef CONFIG_NUMA
-static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
-				gfp_t gfp_mask, nodemask_t *nodemask,
-				unsigned long *totalpages)
+static enum oom_constraint constrained_alloc(struct oom_control *oc,
+					     unsigned long *totalpages)
 {
 	struct zone *zone;
 	struct zoneref *z;
-	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
+	enum zone_type high_zoneidx = gfp_zone(oc->gfp_mask);
 	bool cpuset_limited = false;
 	int nid;
 
 	/* Default to all available memory */
 	*totalpages = totalram_pages + total_swap_pages;
 
-	if (!zonelist)
+	if (!oc->zonelist)
 		return CONSTRAINT_NONE;
 	/*
 	 * Reach here only when __GFP_NOFAIL is used. So, we should avoid
 	 * to kill current.We have to random task kill in this case.
 	 * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now.
 	 */
-	if (gfp_mask & __GFP_THISNODE)
+	if (oc->gfp_mask & __GFP_THISNODE)
 		return CONSTRAINT_NONE;
 
 	/*
@@ -224,17 +223,18 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
 	 * the page allocator means a mempolicy is in effect.  Cpuset policy
 	 * is enforced in get_page_from_freelist().
 	 */
-	if (nodemask && !nodes_subset(node_states[N_MEMORY], *nodemask)) {
+	if (oc->nodemask &&
+	    !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) {
 		*totalpages = total_swap_pages;
-		for_each_node_mask(nid, *nodemask)
+		for_each_node_mask(nid, *oc->nodemask)
 			*totalpages += node_spanned_pages(nid);
 		return CONSTRAINT_MEMORY_POLICY;
 	}
 
 	/* Check this allocation failure is caused by cpuset's wall function */
-	for_each_zone_zonelist_nodemask(zone, z, zonelist,
-			high_zoneidx, nodemask)
-		if (!cpuset_zone_allowed(zone, gfp_mask))
+	for_each_zone_zonelist_nodemask(zone, z, oc->zonelist,
+			high_zoneidx, oc->nodemask)
+		if (!cpuset_zone_allowed(zone, oc->gfp_mask))
 			cpuset_limited = true;
 
 	if (cpuset_limited) {
@@ -246,20 +246,18 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
 	return CONSTRAINT_NONE;
 }
 #else
-static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
-				gfp_t gfp_mask, nodemask_t *nodemask,
-				unsigned long *totalpages)
+static enum oom_constraint constrained_alloc(struct oom_control *oc,
+					     unsigned long *totalpages)
 {
 	*totalpages = totalram_pages + total_swap_pages;
 	return CONSTRAINT_NONE;
 }
 #endif
 
-enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
-		unsigned long totalpages, const nodemask_t *nodemask,
-		bool force_kill)
+enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
+			struct task_struct *task, unsigned long totalpages)
 {
-	if (oom_unkillable_task(task, NULL, nodemask))
+	if (oom_unkillable_task(task, NULL, oc->nodemask))
 		return OOM_SCAN_CONTINUE;
 
 	/*
@@ -267,7 +265,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
 	 * Don't allow any other task to have access to the reserves.
 	 */
 	if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
-		if (!force_kill)
+		if (!oc->force_kill)
 			return OOM_SCAN_ABORT;
 	}
 	if (!task->mm)
@@ -280,7 +278,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
 	if (oom_task_origin(task))
 		return OOM_SCAN_SELECT;
 
-	if (task_will_free_mem(task) && !force_kill)
+	if (task_will_free_mem(task) && !oc->force_kill)
 		return OOM_SCAN_ABORT;
 
 	return OOM_SCAN_OK;
@@ -289,12 +287,9 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
 /*
  * Simple selection loop. We chose the process with the highest
  * number of 'points'.  Returns -1 on scan abort.
- *
- * (not docbooked, we don't want this one cluttering up the manual)
  */
-static struct task_struct *select_bad_process(unsigned int *ppoints,
-		unsigned long totalpages, const nodemask_t *nodemask,
-		bool force_kill)
+static struct task_struct *select_bad_process(struct oom_control *oc,
+		unsigned int *ppoints, unsigned long totalpages)
 {
 	struct task_struct *g, *p;
 	struct task_struct *chosen = NULL;
@@ -304,8 +299,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 	for_each_process_thread(g, p) {
 		unsigned int points;
 
-		switch (oom_scan_process_thread(p, totalpages, nodemask,
-						force_kill)) {
+		switch (oom_scan_process_thread(oc, p, totalpages)) {
 		case OOM_SCAN_SELECT:
 			chosen = p;
 			chosen_points = ULONG_MAX;
@@ -318,7 +312,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 		case OOM_SCAN_OK:
 			break;
 		};
-		points = oom_badness(p, NULL, nodemask, totalpages);
+		points = oom_badness(p, NULL, oc->nodemask, totalpages);
 		if (!points || points < chosen_points)
 			continue;
 		/* Prefer thread group leaders for display purposes */
@@ -380,13 +374,13 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
 	rcu_read_unlock();
 }
 
-static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
-			struct mem_cgroup *memcg, const nodemask_t *nodemask)
+static void dump_header(struct oom_control *oc, struct task_struct *p,
+			struct mem_cgroup *memcg)
 {
 	task_lock(current);
 	pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, "
 		"oom_score_adj=%hd\n",
-		current->comm, gfp_mask, order,
+		current->comm, oc->gfp_mask, oc->order,
 		current->signal->oom_score_adj);
 	cpuset_print_task_mems_allowed(current);
 	task_unlock(current);
@@ -396,7 +390,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
 	else
 		show_mem(SHOW_MEM_FILTER_NODES);
 	if (sysctl_oom_dump_tasks)
-		dump_tasks(memcg, nodemask);
+		dump_tasks(memcg, oc->nodemask);
 }
 
 /*
@@ -487,10 +481,9 @@ void oom_killer_enable(void)
  * Must be called while holding a reference to p, which will be released upon
  * returning.
  */
-void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
+void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 		      unsigned int points, unsigned long totalpages,
-		      struct mem_cgroup *memcg, nodemask_t *nodemask,
-		      const char *message)
+		      struct mem_cgroup *memcg, const char *message)
 {
 	struct task_struct *victim = p;
 	struct task_struct *child;
@@ -514,7 +507,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 	task_unlock(p);
 
 	if (__ratelimit(&oom_rs))
-		dump_header(p, gfp_mask, order, memcg, nodemask);
+		dump_header(oc, p, memcg);
 
 	task_lock(p);
 	pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
@@ -537,7 +530,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 			/*
 			 * oom_badness() returns 0 if the thread is unkillable
 			 */
-			child_points = oom_badness(child, memcg, nodemask,
+			child_points = oom_badness(child, memcg, oc->nodemask,
 								totalpages);
 			if (child_points > victim_points) {
 				put_task_struct(victim);
@@ -600,8 +593,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 /*
  * Determines whether the kernel must panic because of the panic_on_oom sysctl.
  */
-void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
-			int order, const nodemask_t *nodemask,
+void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
 			struct mem_cgroup *memcg)
 {
 	if (likely(!sysctl_panic_on_oom))
@@ -615,7 +607,7 @@ void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
 		if (constraint != CONSTRAINT_NONE)
 			return;
 	}
-	dump_header(NULL, gfp_mask, order, memcg, nodemask);
+	dump_header(oc, NULL, memcg);
 	panic("Out of memory: %s panic_on_oom is enabled\n",
 		sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
 }
@@ -635,22 +627,16 @@ int unregister_oom_notifier(struct notifier_block *nb)
 EXPORT_SYMBOL_GPL(unregister_oom_notifier);
 
 /**
- * __out_of_memory - kill the "best" process when we run out of memory
- * @zonelist: zonelist pointer
- * @gfp_mask: memory allocation flags
- * @order: amount of memory being requested as a power of 2
- * @nodemask: nodemask passed to page allocator
- * @force_kill: true if a task must be killed, even if others are exiting
+ * out_of_memory - kill the "best" process when we run out of memory
+ * @oc: pointer to struct oom_control
  *
  * If we run out of memory, we have the choice between either
  * killing a random task (bad), letting the system crash (worse)
  * OR try to be smart about which process to kill. Note that we
  * don't have to be perfect here, we just have to be good.
  */
-bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
-		   int order, nodemask_t *nodemask, bool force_kill)
+bool out_of_memory(struct oom_control *oc)
 {
-	const nodemask_t *mpol_mask;
 	struct task_struct *p;
 	unsigned long totalpages;
 	unsigned long freed = 0;
@@ -684,30 +670,29 @@ bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
 	 * Check if there were limitations on the allocation (only relevant for
 	 * NUMA) that may require different handling.
 	 */
-	constraint = constrained_alloc(zonelist, gfp_mask, nodemask,
-						&totalpages);
-	mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL;
-	check_panic_on_oom(constraint, gfp_mask, order, mpol_mask, NULL);
+	constraint = constrained_alloc(oc, &totalpages);
+	if (constraint != CONSTRAINT_MEMORY_POLICY)
+		oc->nodemask = NULL;
+	check_panic_on_oom(oc, constraint, NULL);
 
 	if (sysctl_oom_kill_allocating_task && current->mm &&
-	    !oom_unkillable_task(current, NULL, nodemask) &&
+	    !oom_unkillable_task(current, NULL, oc->nodemask) &&
 	    current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
 		get_task_struct(current);
-		oom_kill_process(current, gfp_mask, order, 0, totalpages, NULL,
-				 nodemask,
+		oom_kill_process(oc, current, 0, totalpages, NULL,
 				 "Out of memory (oom_kill_allocating_task)");
 		goto out;
 	}
 
-	p = select_bad_process(&points, totalpages, mpol_mask, force_kill);
+	p = select_bad_process(oc, &points, totalpages);
 	/* Found nothing?!?! Either we hang forever, or we panic. */
 	if (!p) {
-		dump_header(NULL, gfp_mask, order, NULL, mpol_mask);
+		dump_header(oc, NULL, NULL);
 		panic("Out of memory and no killable processes...\n");
 	}
 	if (p != (void *)-1UL) {
-		oom_kill_process(p, gfp_mask, order, points, totalpages, NULL,
-				 nodemask, "Out of memory");
+		oom_kill_process(oc, p, points, totalpages, NULL,
+				 "Out of memory");
 		killed = 1;
 	}
 out:
@@ -728,13 +713,21 @@ out:
  */
 void pagefault_out_of_memory(void)
 {
+	struct oom_control oc = {
+		.zonelist = NULL,
+		.nodemask = NULL,
+		.gfp_mask = 0,
+		.order = 0,
+		.force_kill = false,
+	};
+
 	if (mem_cgroup_oom_synchronize(true))
 		return;
 
 	if (!mutex_trylock(&oom_lock))
 		return;
 
-	if (!out_of_memory(NULL, 0, 0, NULL, false)) {
+	if (!out_of_memory(&oc)) {
 		/*
 		 * There shouldn't be any user tasks runnable while the
 		 * OOM killer is disabled, so the current task has to
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2680,6 +2680,13 @@ static inline struct page *
 __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 	const struct alloc_context *ac, unsigned long *did_some_progress)
 {
+	struct oom_control oc = {
+		.zonelist = ac->zonelist,
+		.nodemask = ac->nodemask,
+		.gfp_mask = gfp_mask,
+		.order = order,
+		.force_kill = false,
+	};
 	struct page *page;
 
 	*did_some_progress = 0;
@@ -2731,8 +2738,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 			goto out;
 	}
 	/* Exhausted what can be done so it's blamo time */
-	if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false)
-			|| WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
+	if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
 		*did_some_progress = 1;
 out:
 	mutex_unlock(&oom_lock);

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [patch v2 2/3] mm, oom: organize oom context into struct
  2015-07-01 21:37 ` [patch v2 " David Rientjes
@ 2015-07-01 21:37   ` David Rientjes
  2015-07-02  6:01     ` Michal Hocko
  2015-07-01 21:37   ` [patch v2 3/3] " David Rientjes
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 21+ messages in thread
From: David Rientjes @ 2015-07-01 21:37 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Sergey Senozhatsky, Michal Hocko, linux-kernel, linux-mm

The force_kill member of struct oom_control isn't needed if an order of
-1 is used instead.  This is the same as order == -1 in
struct compact_control which requires full memory compaction.

This patch introduces no functional change.

Signed-off-by: David Rientjes <rientjes@google.com>
---
 v2: fix changelog typo per Sergey

 drivers/tty/sysrq.c | 3 +--
 include/linux/oom.h | 1 -
 mm/memcontrol.c     | 1 -
 mm/oom_kill.c       | 5 ++---
 mm/page_alloc.c     | 1 -
 5 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -358,8 +358,7 @@ static void moom_callback(struct work_struct *ignored)
 		.zonelist = node_zonelist(first_memory_node, gfp_mask),
 		.nodemask = NULL,
 		.gfp_mask = gfp_mask,
-		.order = 0,
-		.force_kill = true,
+		.order = -1,
 	};
 
 	mutex_lock(&oom_lock);
diff --git a/include/linux/oom.h b/include/linux/oom.h
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -17,7 +17,6 @@ struct oom_control {
 	nodemask_t	*nodemask;
 	gfp_t		gfp_mask;
 	int		order;
-	bool		force_kill;
 };
 
 /*
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1550,7 +1550,6 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 		.nodemask = NULL,
 		.gfp_mask = gfp_mask,
 		.order = order,
-		.force_kill = false,
 	};
 	struct mem_cgroup *iter;
 	unsigned long chosen_points = 0;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -265,7 +265,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
 	 * Don't allow any other task to have access to the reserves.
 	 */
 	if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
-		if (!oc->force_kill)
+		if (oc->order != -1)
 			return OOM_SCAN_ABORT;
 	}
 	if (!task->mm)
@@ -278,7 +278,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
 	if (oom_task_origin(task))
 		return OOM_SCAN_SELECT;
 
-	if (task_will_free_mem(task) && !oc->force_kill)
+	if (task_will_free_mem(task) && oc->order != -1)
 		return OOM_SCAN_ABORT;
 
 	return OOM_SCAN_OK;
@@ -718,7 +718,6 @@ void pagefault_out_of_memory(void)
 		.nodemask = NULL,
 		.gfp_mask = 0,
 		.order = 0,
-		.force_kill = false,
 	};
 
 	if (mem_cgroup_oom_synchronize(true))
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2685,7 +2685,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 		.nodemask = ac->nodemask,
 		.gfp_mask = gfp_mask,
 		.order = order,
-		.force_kill = false,
 	};
 	struct page *page;
 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [patch v2 3/3] mm, oom: organize oom context into struct
  2015-07-01 21:37 ` [patch v2 " David Rientjes
  2015-07-01 21:37   ` [patch v2 2/3] " David Rientjes
@ 2015-07-01 21:37   ` David Rientjes
  2015-07-02  6:00   ` [patch v2 1/3] " Michal Hocko
  2015-07-08 23:42   ` [patch v3 " David Rientjes
  3 siblings, 0 replies; 21+ messages in thread
From: David Rientjes @ 2015-07-01 21:37 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Sergey Senozhatsky, Michal Hocko, linux-kernel, linux-mm

Sysrq+f is used to kill a process either for debug or when the VM is
otherwise unresponsive.

It is not intended to trigger a panic when no process may be killed.

Avoid panicking the system for sysrq+f when no processes are killed.

Suggested-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: David Rientjes <rientjes@google.com>
---
 v2: no change

 Documentation/sysrq.txt | 3 ++-
 mm/oom_kill.c           | 7 +++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -75,7 +75,8 @@ On all -  write a character to /proc/sysrq-trigger.  e.g.:
 
 'e'     - Send a SIGTERM to all processes, except for init.
 
-'f'	- Will call oom_kill to kill a memory hog process.
+'f'	- Will call the oom killer to kill a memory hog process, but do not
+	  panic if nothing can be killed.
 
 'g'	- Used by kgdb (kernel debugger)
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -607,6 +607,9 @@ void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
 		if (constraint != CONSTRAINT_NONE)
 			return;
 	}
+	/* Do not panic for oom kills triggered by sysrq */
+	if (oc->order == -1)
+		return;
 	dump_header(oc, NULL, memcg);
 	panic("Out of memory: %s panic_on_oom is enabled\n",
 		sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
@@ -686,11 +689,11 @@ bool out_of_memory(struct oom_control *oc)
 
 	p = select_bad_process(oc, &points, totalpages);
 	/* Found nothing?!?! Either we hang forever, or we panic. */
-	if (!p) {
+	if (!p && oc->order != -1) {
 		dump_header(oc, NULL, NULL);
 		panic("Out of memory and no killable processes...\n");
 	}
-	if (p != (void *)-1UL) {
+	if (p && p != (void *)-1UL) {
 		oom_kill_process(oc, p, points, totalpages, NULL,
 				 "Out of memory");
 		killed = 1;

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch v2 1/3] mm, oom: organize oom context into struct
  2015-07-01 21:37 ` [patch v2 " David Rientjes
  2015-07-01 21:37   ` [patch v2 2/3] " David Rientjes
  2015-07-01 21:37   ` [patch v2 3/3] " David Rientjes
@ 2015-07-02  6:00   ` Michal Hocko
  2015-07-08 23:42   ` [patch v3 " David Rientjes
  3 siblings, 0 replies; 21+ messages in thread
From: Michal Hocko @ 2015-07-02  6:00 UTC (permalink / raw)
  To: David Rientjes; +Cc: Andrew Morton, Sergey Senozhatsky, linux-kernel, linux-mm

JFYI. I have added this patch of yours into my series which is fixing
other sysrq+f issue and it is waiting for the merge window to close.

On Wed 01-07-15 14:37:07, David Rientjes wrote:
> There are essential elements to an oom context that are passed around to
> multiple functions.
> 
> Organize these elements into a new struct, struct oom_control, that
> specifies the context for an oom condition.
> 
> This patch introduces no functional change.
> 
> Acked-by: Michal Hocko <mhocko@suse.cz>
> Signed-off-by: David Rientjes <rientjes@google.com>
> ---
>  v2: fix changelog typo per Sergey
> 
>  drivers/tty/sysrq.c |  12 +++++-
>  include/linux/oom.h |  25 +++++++-----
>  mm/memcontrol.c     |  16 +++++---
>  mm/oom_kill.c       | 115 ++++++++++++++++++++++++----------------------------
>  mm/page_alloc.c     |  10 ++++-
>  5 files changed, 98 insertions(+), 80 deletions(-)
> 
> diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
> --- a/drivers/tty/sysrq.c
> +++ b/drivers/tty/sysrq.c
> @@ -353,9 +353,17 @@ static struct sysrq_key_op sysrq_term_op = {
>  
>  static void moom_callback(struct work_struct *ignored)
>  {
> +	const gfp_t gfp_mask = GFP_KERNEL;
> +	struct oom_control oc = {
> +		.zonelist = node_zonelist(first_memory_node, gfp_mask),
> +		.nodemask = NULL,
> +		.gfp_mask = gfp_mask,
> +		.order = 0,
> +		.force_kill = true,
> +	};
> +
>  	mutex_lock(&oom_lock);
> -	if (!out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL),
> -			   GFP_KERNEL, 0, NULL, true))
> +	if (!out_of_memory(&oc))
>  		pr_info("OOM request ignored because killer is disabled\n");
>  	mutex_unlock(&oom_lock);
>  }
> diff --git a/include/linux/oom.h b/include/linux/oom.h
> --- a/include/linux/oom.h
> +++ b/include/linux/oom.h
> @@ -12,6 +12,14 @@ struct notifier_block;
>  struct mem_cgroup;
>  struct task_struct;
>  
> +struct oom_control {
> +	struct zonelist *zonelist;
> +	nodemask_t	*nodemask;
> +	gfp_t		gfp_mask;
> +	int		order;
> +	bool		force_kill;
> +};
> +
>  /*
>   * Types of limitations to the nodes from which allocations may occur
>   */
> @@ -57,21 +65,18 @@ extern unsigned long oom_badness(struct task_struct *p,
>  
>  extern int oom_kills_count(void);
>  extern void note_oom_kill(void);
> -extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
> +extern void oom_kill_process(struct oom_control *oc, struct task_struct *p,
>  			     unsigned int points, unsigned long totalpages,
> -			     struct mem_cgroup *memcg, nodemask_t *nodemask,
> -			     const char *message);
> +			     struct mem_cgroup *memcg, const char *message);
>  
> -extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
> -			       int order, const nodemask_t *nodemask,
> +extern void check_panic_on_oom(struct oom_control *oc,
> +			       enum oom_constraint constraint,
>  			       struct mem_cgroup *memcg);
>  
> -extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
> -		unsigned long totalpages, const nodemask_t *nodemask,
> -		bool force_kill);
> +extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
> +		struct task_struct *task, unsigned long totalpages);
>  
> -extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
> -		int order, nodemask_t *mask, bool force_kill);
> +extern bool out_of_memory(struct oom_control *oc);
>  
>  extern void exit_oom_victim(void);
>  
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1545,6 +1545,13 @@ static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
>  static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
>  				     int order)
>  {
> +	struct oom_control oc = {
> +		.zonelist = NULL,
> +		.nodemask = NULL,
> +		.gfp_mask = gfp_mask,
> +		.order = order,
> +		.force_kill = false,
> +	};
>  	struct mem_cgroup *iter;
>  	unsigned long chosen_points = 0;
>  	unsigned long totalpages;
> @@ -1563,7 +1570,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
>  		goto unlock;
>  	}
>  
> -	check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL, memcg);
> +	check_panic_on_oom(&oc, CONSTRAINT_MEMCG, memcg);
>  	totalpages = mem_cgroup_get_limit(memcg) ? : 1;
>  	for_each_mem_cgroup_tree(iter, memcg) {
>  		struct css_task_iter it;
> @@ -1571,8 +1578,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
>  
>  		css_task_iter_start(&iter->css, &it);
>  		while ((task = css_task_iter_next(&it))) {
> -			switch (oom_scan_process_thread(task, totalpages, NULL,
> -							false)) {
> +			switch (oom_scan_process_thread(&oc, task, totalpages)) {
>  			case OOM_SCAN_SELECT:
>  				if (chosen)
>  					put_task_struct(chosen);
> @@ -1610,8 +1616,8 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
>  
>  	if (chosen) {
>  		points = chosen_points * 1000 / totalpages;
> -		oom_kill_process(chosen, gfp_mask, order, points, totalpages,
> -				 memcg, NULL, "Memory cgroup out of memory");
> +		oom_kill_process(&oc, chosen, points, totalpages, memcg,
> +				 "Memory cgroup out of memory");
>  	}
>  unlock:
>  	mutex_unlock(&oom_lock);
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -196,27 +196,26 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
>   * Determine the type of allocation constraint.
>   */
>  #ifdef CONFIG_NUMA
> -static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
> -				gfp_t gfp_mask, nodemask_t *nodemask,
> -				unsigned long *totalpages)
> +static enum oom_constraint constrained_alloc(struct oom_control *oc,
> +					     unsigned long *totalpages)
>  {
>  	struct zone *zone;
>  	struct zoneref *z;
> -	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
> +	enum zone_type high_zoneidx = gfp_zone(oc->gfp_mask);
>  	bool cpuset_limited = false;
>  	int nid;
>  
>  	/* Default to all available memory */
>  	*totalpages = totalram_pages + total_swap_pages;
>  
> -	if (!zonelist)
> +	if (!oc->zonelist)
>  		return CONSTRAINT_NONE;
>  	/*
>  	 * Reach here only when __GFP_NOFAIL is used. So, we should avoid
>  	 * to kill current.We have to random task kill in this case.
>  	 * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now.
>  	 */
> -	if (gfp_mask & __GFP_THISNODE)
> +	if (oc->gfp_mask & __GFP_THISNODE)
>  		return CONSTRAINT_NONE;
>  
>  	/*
> @@ -224,17 +223,18 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
>  	 * the page allocator means a mempolicy is in effect.  Cpuset policy
>  	 * is enforced in get_page_from_freelist().
>  	 */
> -	if (nodemask && !nodes_subset(node_states[N_MEMORY], *nodemask)) {
> +	if (oc->nodemask &&
> +	    !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) {
>  		*totalpages = total_swap_pages;
> -		for_each_node_mask(nid, *nodemask)
> +		for_each_node_mask(nid, *oc->nodemask)
>  			*totalpages += node_spanned_pages(nid);
>  		return CONSTRAINT_MEMORY_POLICY;
>  	}
>  
>  	/* Check this allocation failure is caused by cpuset's wall function */
> -	for_each_zone_zonelist_nodemask(zone, z, zonelist,
> -			high_zoneidx, nodemask)
> -		if (!cpuset_zone_allowed(zone, gfp_mask))
> +	for_each_zone_zonelist_nodemask(zone, z, oc->zonelist,
> +			high_zoneidx, oc->nodemask)
> +		if (!cpuset_zone_allowed(zone, oc->gfp_mask))
>  			cpuset_limited = true;
>  
>  	if (cpuset_limited) {
> @@ -246,20 +246,18 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
>  	return CONSTRAINT_NONE;
>  }
>  #else
> -static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
> -				gfp_t gfp_mask, nodemask_t *nodemask,
> -				unsigned long *totalpages)
> +static enum oom_constraint constrained_alloc(struct oom_control *oc,
> +					     unsigned long *totalpages)
>  {
>  	*totalpages = totalram_pages + total_swap_pages;
>  	return CONSTRAINT_NONE;
>  }
>  #endif
>  
> -enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
> -		unsigned long totalpages, const nodemask_t *nodemask,
> -		bool force_kill)
> +enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
> +			struct task_struct *task, unsigned long totalpages)
>  {
> -	if (oom_unkillable_task(task, NULL, nodemask))
> +	if (oom_unkillable_task(task, NULL, oc->nodemask))
>  		return OOM_SCAN_CONTINUE;
>  
>  	/*
> @@ -267,7 +265,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
>  	 * Don't allow any other task to have access to the reserves.
>  	 */
>  	if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
> -		if (!force_kill)
> +		if (!oc->force_kill)
>  			return OOM_SCAN_ABORT;
>  	}
>  	if (!task->mm)
> @@ -280,7 +278,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
>  	if (oom_task_origin(task))
>  		return OOM_SCAN_SELECT;
>  
> -	if (task_will_free_mem(task) && !force_kill)
> +	if (task_will_free_mem(task) && !oc->force_kill)
>  		return OOM_SCAN_ABORT;
>  
>  	return OOM_SCAN_OK;
> @@ -289,12 +287,9 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
>  /*
>   * Simple selection loop. We chose the process with the highest
>   * number of 'points'.  Returns -1 on scan abort.
> - *
> - * (not docbooked, we don't want this one cluttering up the manual)
>   */
> -static struct task_struct *select_bad_process(unsigned int *ppoints,
> -		unsigned long totalpages, const nodemask_t *nodemask,
> -		bool force_kill)
> +static struct task_struct *select_bad_process(struct oom_control *oc,
> +		unsigned int *ppoints, unsigned long totalpages)
>  {
>  	struct task_struct *g, *p;
>  	struct task_struct *chosen = NULL;
> @@ -304,8 +299,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
>  	for_each_process_thread(g, p) {
>  		unsigned int points;
>  
> -		switch (oom_scan_process_thread(p, totalpages, nodemask,
> -						force_kill)) {
> +		switch (oom_scan_process_thread(oc, p, totalpages)) {
>  		case OOM_SCAN_SELECT:
>  			chosen = p;
>  			chosen_points = ULONG_MAX;
> @@ -318,7 +312,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
>  		case OOM_SCAN_OK:
>  			break;
>  		};
> -		points = oom_badness(p, NULL, nodemask, totalpages);
> +		points = oom_badness(p, NULL, oc->nodemask, totalpages);
>  		if (!points || points < chosen_points)
>  			continue;
>  		/* Prefer thread group leaders for display purposes */
> @@ -380,13 +374,13 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
>  	rcu_read_unlock();
>  }
>  
> -static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
> -			struct mem_cgroup *memcg, const nodemask_t *nodemask)
> +static void dump_header(struct oom_control *oc, struct task_struct *p,
> +			struct mem_cgroup *memcg)
>  {
>  	task_lock(current);
>  	pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, "
>  		"oom_score_adj=%hd\n",
> -		current->comm, gfp_mask, order,
> +		current->comm, oc->gfp_mask, oc->order,
>  		current->signal->oom_score_adj);
>  	cpuset_print_task_mems_allowed(current);
>  	task_unlock(current);
> @@ -396,7 +390,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
>  	else
>  		show_mem(SHOW_MEM_FILTER_NODES);
>  	if (sysctl_oom_dump_tasks)
> -		dump_tasks(memcg, nodemask);
> +		dump_tasks(memcg, oc->nodemask);
>  }
>  
>  /*
> @@ -487,10 +481,9 @@ void oom_killer_enable(void)
>   * Must be called while holding a reference to p, which will be released upon
>   * returning.
>   */
> -void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
> +void oom_kill_process(struct oom_control *oc, struct task_struct *p,
>  		      unsigned int points, unsigned long totalpages,
> -		      struct mem_cgroup *memcg, nodemask_t *nodemask,
> -		      const char *message)
> +		      struct mem_cgroup *memcg, const char *message)
>  {
>  	struct task_struct *victim = p;
>  	struct task_struct *child;
> @@ -514,7 +507,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
>  	task_unlock(p);
>  
>  	if (__ratelimit(&oom_rs))
> -		dump_header(p, gfp_mask, order, memcg, nodemask);
> +		dump_header(oc, p, memcg);
>  
>  	task_lock(p);
>  	pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
> @@ -537,7 +530,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
>  			/*
>  			 * oom_badness() returns 0 if the thread is unkillable
>  			 */
> -			child_points = oom_badness(child, memcg, nodemask,
> +			child_points = oom_badness(child, memcg, oc->nodemask,
>  								totalpages);
>  			if (child_points > victim_points) {
>  				put_task_struct(victim);
> @@ -600,8 +593,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
>  /*
>   * Determines whether the kernel must panic because of the panic_on_oom sysctl.
>   */
> -void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
> -			int order, const nodemask_t *nodemask,
> +void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
>  			struct mem_cgroup *memcg)
>  {
>  	if (likely(!sysctl_panic_on_oom))
> @@ -615,7 +607,7 @@ void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
>  		if (constraint != CONSTRAINT_NONE)
>  			return;
>  	}
> -	dump_header(NULL, gfp_mask, order, memcg, nodemask);
> +	dump_header(oc, NULL, memcg);
>  	panic("Out of memory: %s panic_on_oom is enabled\n",
>  		sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
>  }
> @@ -635,22 +627,16 @@ int unregister_oom_notifier(struct notifier_block *nb)
>  EXPORT_SYMBOL_GPL(unregister_oom_notifier);
>  
>  /**
> - * __out_of_memory - kill the "best" process when we run out of memory
> - * @zonelist: zonelist pointer
> - * @gfp_mask: memory allocation flags
> - * @order: amount of memory being requested as a power of 2
> - * @nodemask: nodemask passed to page allocator
> - * @force_kill: true if a task must be killed, even if others are exiting
> + * out_of_memory - kill the "best" process when we run out of memory
> + * @oc: pointer to struct oom_control
>   *
>   * If we run out of memory, we have the choice between either
>   * killing a random task (bad), letting the system crash (worse)
>   * OR try to be smart about which process to kill. Note that we
>   * don't have to be perfect here, we just have to be good.
>   */
> -bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
> -		   int order, nodemask_t *nodemask, bool force_kill)
> +bool out_of_memory(struct oom_control *oc)
>  {
> -	const nodemask_t *mpol_mask;
>  	struct task_struct *p;
>  	unsigned long totalpages;
>  	unsigned long freed = 0;
> @@ -684,30 +670,29 @@ bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
>  	 * Check if there were limitations on the allocation (only relevant for
>  	 * NUMA) that may require different handling.
>  	 */
> -	constraint = constrained_alloc(zonelist, gfp_mask, nodemask,
> -						&totalpages);
> -	mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL;
> -	check_panic_on_oom(constraint, gfp_mask, order, mpol_mask, NULL);
> +	constraint = constrained_alloc(oc, &totalpages);
> +	if (constraint != CONSTRAINT_MEMORY_POLICY)
> +		oc->nodemask = NULL;
> +	check_panic_on_oom(oc, constraint, NULL);
>  
>  	if (sysctl_oom_kill_allocating_task && current->mm &&
> -	    !oom_unkillable_task(current, NULL, nodemask) &&
> +	    !oom_unkillable_task(current, NULL, oc->nodemask) &&
>  	    current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
>  		get_task_struct(current);
> -		oom_kill_process(current, gfp_mask, order, 0, totalpages, NULL,
> -				 nodemask,
> +		oom_kill_process(oc, current, 0, totalpages, NULL,
>  				 "Out of memory (oom_kill_allocating_task)");
>  		goto out;
>  	}
>  
> -	p = select_bad_process(&points, totalpages, mpol_mask, force_kill);
> +	p = select_bad_process(oc, &points, totalpages);
>  	/* Found nothing?!?! Either we hang forever, or we panic. */
>  	if (!p) {
> -		dump_header(NULL, gfp_mask, order, NULL, mpol_mask);
> +		dump_header(oc, NULL, NULL);
>  		panic("Out of memory and no killable processes...\n");
>  	}
>  	if (p != (void *)-1UL) {
> -		oom_kill_process(p, gfp_mask, order, points, totalpages, NULL,
> -				 nodemask, "Out of memory");
> +		oom_kill_process(oc, p, points, totalpages, NULL,
> +				 "Out of memory");
>  		killed = 1;
>  	}
>  out:
> @@ -728,13 +713,21 @@ out:
>   */
>  void pagefault_out_of_memory(void)
>  {
> +	struct oom_control oc = {
> +		.zonelist = NULL,
> +		.nodemask = NULL,
> +		.gfp_mask = 0,
> +		.order = 0,
> +		.force_kill = false,
> +	};
> +
>  	if (mem_cgroup_oom_synchronize(true))
>  		return;
>  
>  	if (!mutex_trylock(&oom_lock))
>  		return;
>  
> -	if (!out_of_memory(NULL, 0, 0, NULL, false)) {
> +	if (!out_of_memory(&oc)) {
>  		/*
>  		 * There shouldn't be any user tasks runnable while the
>  		 * OOM killer is disabled, so the current task has to
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2680,6 +2680,13 @@ static inline struct page *
>  __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
>  	const struct alloc_context *ac, unsigned long *did_some_progress)
>  {
> +	struct oom_control oc = {
> +		.zonelist = ac->zonelist,
> +		.nodemask = ac->nodemask,
> +		.gfp_mask = gfp_mask,
> +		.order = order,
> +		.force_kill = false,
> +	};
>  	struct page *page;
>  
>  	*did_some_progress = 0;
> @@ -2731,8 +2738,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
>  			goto out;
>  	}
>  	/* Exhausted what can be done so it's blamo time */
> -	if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false)
> -			|| WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
> +	if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
>  		*did_some_progress = 1;
>  out:
>  	mutex_unlock(&oom_lock);

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch v2 2/3] mm, oom: organize oom context into struct
  2015-07-01 21:37   ` [patch v2 2/3] " David Rientjes
@ 2015-07-02  6:01     ` Michal Hocko
  2015-07-08 23:28       ` David Rientjes
  0 siblings, 1 reply; 21+ messages in thread
From: Michal Hocko @ 2015-07-02  6:01 UTC (permalink / raw)
  To: David Rientjes; +Cc: Andrew Morton, Sergey Senozhatsky, linux-kernel, linux-mm

On Wed 01-07-15 14:37:14, David Rientjes wrote:
> The force_kill member of struct oom_control isn't needed if an order of
> -1 is used instead.  This is the same as order == -1 in
> struct compact_control which requires full memory compaction.
> 
> This patch introduces no functional change.

But it obscures the code and I really dislike this change as pointed out
previously.

> Signed-off-by: David Rientjes <rientjes@google.com>
> ---
>  v2: fix changelog typo per Sergey
> 
>  drivers/tty/sysrq.c | 3 +--
>  include/linux/oom.h | 1 -
>  mm/memcontrol.c     | 1 -
>  mm/oom_kill.c       | 5 ++---
>  mm/page_alloc.c     | 1 -
>  5 files changed, 3 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
> --- a/drivers/tty/sysrq.c
> +++ b/drivers/tty/sysrq.c
> @@ -358,8 +358,7 @@ static void moom_callback(struct work_struct *ignored)
>  		.zonelist = node_zonelist(first_memory_node, gfp_mask),
>  		.nodemask = NULL,
>  		.gfp_mask = gfp_mask,
> -		.order = 0,
> -		.force_kill = true,
> +		.order = -1,
>  	};
>  
>  	mutex_lock(&oom_lock);
> diff --git a/include/linux/oom.h b/include/linux/oom.h
> --- a/include/linux/oom.h
> +++ b/include/linux/oom.h
> @@ -17,7 +17,6 @@ struct oom_control {
>  	nodemask_t	*nodemask;
>  	gfp_t		gfp_mask;
>  	int		order;
> -	bool		force_kill;
>  };
>  
>  /*
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1550,7 +1550,6 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
>  		.nodemask = NULL,
>  		.gfp_mask = gfp_mask,
>  		.order = order,
> -		.force_kill = false,
>  	};
>  	struct mem_cgroup *iter;
>  	unsigned long chosen_points = 0;
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -265,7 +265,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
>  	 * Don't allow any other task to have access to the reserves.
>  	 */
>  	if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
> -		if (!oc->force_kill)
> +		if (oc->order != -1)
>  			return OOM_SCAN_ABORT;
>  	}
>  	if (!task->mm)
> @@ -278,7 +278,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
>  	if (oom_task_origin(task))
>  		return OOM_SCAN_SELECT;
>  
> -	if (task_will_free_mem(task) && !oc->force_kill)
> +	if (task_will_free_mem(task) && oc->order != -1)
>  		return OOM_SCAN_ABORT;
>  
>  	return OOM_SCAN_OK;
> @@ -718,7 +718,6 @@ void pagefault_out_of_memory(void)
>  		.nodemask = NULL,
>  		.gfp_mask = 0,
>  		.order = 0,
> -		.force_kill = false,
>  	};
>  
>  	if (mem_cgroup_oom_synchronize(true))
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2685,7 +2685,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
>  		.nodemask = ac->nodemask,
>  		.gfp_mask = gfp_mask,
>  		.order = order,
> -		.force_kill = false,
>  	};
>  	struct page *page;
>  

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch v2 2/3] mm, oom: organize oom context into struct
  2015-07-02  6:01     ` Michal Hocko
@ 2015-07-08 23:28       ` David Rientjes
  0 siblings, 0 replies; 21+ messages in thread
From: David Rientjes @ 2015-07-08 23:28 UTC (permalink / raw)
  To: Michal Hocko; +Cc: Andrew Morton, Sergey Senozhatsky, linux-kernel, linux-mm

On Thu, 2 Jul 2015, Michal Hocko wrote:

> On Wed 01-07-15 14:37:14, David Rientjes wrote:
> > The force_kill member of struct oom_control isn't needed if an order of
> > -1 is used instead.  This is the same as order == -1 in
> > struct compact_control which requires full memory compaction.
> > 
> > This patch introduces no functional change.
> 
> But it obscures the code and I really dislike this change as pointed out
> previously.
> 

The oom killer is often called at the end of a very lengthy stack since 
memory allocation itself can be called deep in the stack.  Thus, reducing 
the amount of memory, even for a small lil bool, is helpful.  This is 
especially true when other such structs, struct compact_control, does the 
exact same thing by using order == -1 to mean explicit compaction.

I'm personally tired of fixing stack overflows and you're arguing against 
"obscurity" that even occurs in other parts of the mm code.  
oc->force_kill has no reason to exist, and thus it's removed in this patch 
and for good reason.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [patch v3 1/3] mm, oom: organize oom context into struct
  2015-07-01 21:37 ` [patch v2 " David Rientjes
                     ` (2 preceding siblings ...)
  2015-07-02  6:00   ` [patch v2 1/3] " Michal Hocko
@ 2015-07-08 23:42   ` David Rientjes
  2015-07-08 23:42     ` [patch v3 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq David Rientjes
                       ` (2 more replies)
  3 siblings, 3 replies; 21+ messages in thread
From: David Rientjes @ 2015-07-08 23:42 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Sergey Senozhatsky, Michal Hocko, linux-kernel, linux-mm

There are essential elements to an oom context that are passed around to
multiple functions.

Organize these elements into a new struct, struct oom_control, that
specifies the context for an oom condition.

This patch introduces no functional change.

Acked-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: David Rientjes <rientjes@google.com>
---
 v2: fix changelog typo per Sergey
 v3: no change

 drivers/tty/sysrq.c |  12 +++++-
 include/linux/oom.h |  25 +++++++-----
 mm/memcontrol.c     |  16 +++++---
 mm/oom_kill.c       | 115 ++++++++++++++++++++++++----------------------------
 mm/page_alloc.c     |  10 ++++-
 5 files changed, 98 insertions(+), 80 deletions(-)

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -353,9 +353,17 @@ static struct sysrq_key_op sysrq_term_op = {
 
 static void moom_callback(struct work_struct *ignored)
 {
+	const gfp_t gfp_mask = GFP_KERNEL;
+	struct oom_control oc = {
+		.zonelist = node_zonelist(first_memory_node, gfp_mask),
+		.nodemask = NULL,
+		.gfp_mask = gfp_mask,
+		.order = 0,
+		.force_kill = true,
+	};
+
 	mutex_lock(&oom_lock);
-	if (!out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL),
-			   GFP_KERNEL, 0, NULL, true))
+	if (!out_of_memory(&oc))
 		pr_info("OOM request ignored because killer is disabled\n");
 	mutex_unlock(&oom_lock);
 }
diff --git a/include/linux/oom.h b/include/linux/oom.h
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -12,6 +12,14 @@ struct notifier_block;
 struct mem_cgroup;
 struct task_struct;
 
+struct oom_control {
+	struct zonelist *zonelist;
+	nodemask_t	*nodemask;
+	gfp_t		gfp_mask;
+	int		order;
+	bool		force_kill;
+};
+
 /*
  * Types of limitations to the nodes from which allocations may occur
  */
@@ -57,21 +65,18 @@ extern unsigned long oom_badness(struct task_struct *p,
 
 extern int oom_kills_count(void);
 extern void note_oom_kill(void);
-extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
+extern void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 			     unsigned int points, unsigned long totalpages,
-			     struct mem_cgroup *memcg, nodemask_t *nodemask,
-			     const char *message);
+			     struct mem_cgroup *memcg, const char *message);
 
-extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
-			       int order, const nodemask_t *nodemask,
+extern void check_panic_on_oom(struct oom_control *oc,
+			       enum oom_constraint constraint,
 			       struct mem_cgroup *memcg);
 
-extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
-		unsigned long totalpages, const nodemask_t *nodemask,
-		bool force_kill);
+extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
+		struct task_struct *task, unsigned long totalpages);
 
-extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
-		int order, nodemask_t *mask, bool force_kill);
+extern bool out_of_memory(struct oom_control *oc);
 
 extern void exit_oom_victim(void);
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1545,6 +1545,13 @@ static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
 static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 				     int order)
 {
+	struct oom_control oc = {
+		.zonelist = NULL,
+		.nodemask = NULL,
+		.gfp_mask = gfp_mask,
+		.order = order,
+		.force_kill = false,
+	};
 	struct mem_cgroup *iter;
 	unsigned long chosen_points = 0;
 	unsigned long totalpages;
@@ -1563,7 +1570,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 		goto unlock;
 	}
 
-	check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL, memcg);
+	check_panic_on_oom(&oc, CONSTRAINT_MEMCG, memcg);
 	totalpages = mem_cgroup_get_limit(memcg) ? : 1;
 	for_each_mem_cgroup_tree(iter, memcg) {
 		struct css_task_iter it;
@@ -1571,8 +1578,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 
 		css_task_iter_start(&iter->css, &it);
 		while ((task = css_task_iter_next(&it))) {
-			switch (oom_scan_process_thread(task, totalpages, NULL,
-							false)) {
+			switch (oom_scan_process_thread(&oc, task, totalpages)) {
 			case OOM_SCAN_SELECT:
 				if (chosen)
 					put_task_struct(chosen);
@@ -1610,8 +1616,8 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 
 	if (chosen) {
 		points = chosen_points * 1000 / totalpages;
-		oom_kill_process(chosen, gfp_mask, order, points, totalpages,
-				 memcg, NULL, "Memory cgroup out of memory");
+		oom_kill_process(&oc, chosen, points, totalpages, memcg,
+				 "Memory cgroup out of memory");
 	}
 unlock:
 	mutex_unlock(&oom_lock);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -196,27 +196,26 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
  * Determine the type of allocation constraint.
  */
 #ifdef CONFIG_NUMA
-static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
-				gfp_t gfp_mask, nodemask_t *nodemask,
-				unsigned long *totalpages)
+static enum oom_constraint constrained_alloc(struct oom_control *oc,
+					     unsigned long *totalpages)
 {
 	struct zone *zone;
 	struct zoneref *z;
-	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
+	enum zone_type high_zoneidx = gfp_zone(oc->gfp_mask);
 	bool cpuset_limited = false;
 	int nid;
 
 	/* Default to all available memory */
 	*totalpages = totalram_pages + total_swap_pages;
 
-	if (!zonelist)
+	if (!oc->zonelist)
 		return CONSTRAINT_NONE;
 	/*
 	 * Reach here only when __GFP_NOFAIL is used. So, we should avoid
 	 * to kill current.We have to random task kill in this case.
 	 * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now.
 	 */
-	if (gfp_mask & __GFP_THISNODE)
+	if (oc->gfp_mask & __GFP_THISNODE)
 		return CONSTRAINT_NONE;
 
 	/*
@@ -224,17 +223,18 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
 	 * the page allocator means a mempolicy is in effect.  Cpuset policy
 	 * is enforced in get_page_from_freelist().
 	 */
-	if (nodemask && !nodes_subset(node_states[N_MEMORY], *nodemask)) {
+	if (oc->nodemask &&
+	    !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) {
 		*totalpages = total_swap_pages;
-		for_each_node_mask(nid, *nodemask)
+		for_each_node_mask(nid, *oc->nodemask)
 			*totalpages += node_spanned_pages(nid);
 		return CONSTRAINT_MEMORY_POLICY;
 	}
 
 	/* Check this allocation failure is caused by cpuset's wall function */
-	for_each_zone_zonelist_nodemask(zone, z, zonelist,
-			high_zoneidx, nodemask)
-		if (!cpuset_zone_allowed(zone, gfp_mask))
+	for_each_zone_zonelist_nodemask(zone, z, oc->zonelist,
+			high_zoneidx, oc->nodemask)
+		if (!cpuset_zone_allowed(zone, oc->gfp_mask))
 			cpuset_limited = true;
 
 	if (cpuset_limited) {
@@ -246,20 +246,18 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
 	return CONSTRAINT_NONE;
 }
 #else
-static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
-				gfp_t gfp_mask, nodemask_t *nodemask,
-				unsigned long *totalpages)
+static enum oom_constraint constrained_alloc(struct oom_control *oc,
+					     unsigned long *totalpages)
 {
 	*totalpages = totalram_pages + total_swap_pages;
 	return CONSTRAINT_NONE;
 }
 #endif
 
-enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
-		unsigned long totalpages, const nodemask_t *nodemask,
-		bool force_kill)
+enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
+			struct task_struct *task, unsigned long totalpages)
 {
-	if (oom_unkillable_task(task, NULL, nodemask))
+	if (oom_unkillable_task(task, NULL, oc->nodemask))
 		return OOM_SCAN_CONTINUE;
 
 	/*
@@ -267,7 +265,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
 	 * Don't allow any other task to have access to the reserves.
 	 */
 	if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
-		if (!force_kill)
+		if (!oc->force_kill)
 			return OOM_SCAN_ABORT;
 	}
 	if (!task->mm)
@@ -280,7 +278,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
 	if (oom_task_origin(task))
 		return OOM_SCAN_SELECT;
 
-	if (task_will_free_mem(task) && !force_kill)
+	if (task_will_free_mem(task) && !oc->force_kill)
 		return OOM_SCAN_ABORT;
 
 	return OOM_SCAN_OK;
@@ -289,12 +287,9 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
 /*
  * Simple selection loop. We chose the process with the highest
  * number of 'points'.  Returns -1 on scan abort.
- *
- * (not docbooked, we don't want this one cluttering up the manual)
  */
-static struct task_struct *select_bad_process(unsigned int *ppoints,
-		unsigned long totalpages, const nodemask_t *nodemask,
-		bool force_kill)
+static struct task_struct *select_bad_process(struct oom_control *oc,
+		unsigned int *ppoints, unsigned long totalpages)
 {
 	struct task_struct *g, *p;
 	struct task_struct *chosen = NULL;
@@ -304,8 +299,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 	for_each_process_thread(g, p) {
 		unsigned int points;
 
-		switch (oom_scan_process_thread(p, totalpages, nodemask,
-						force_kill)) {
+		switch (oom_scan_process_thread(oc, p, totalpages)) {
 		case OOM_SCAN_SELECT:
 			chosen = p;
 			chosen_points = ULONG_MAX;
@@ -318,7 +312,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 		case OOM_SCAN_OK:
 			break;
 		};
-		points = oom_badness(p, NULL, nodemask, totalpages);
+		points = oom_badness(p, NULL, oc->nodemask, totalpages);
 		if (!points || points < chosen_points)
 			continue;
 		/* Prefer thread group leaders for display purposes */
@@ -380,13 +374,13 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
 	rcu_read_unlock();
 }
 
-static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
-			struct mem_cgroup *memcg, const nodemask_t *nodemask)
+static void dump_header(struct oom_control *oc, struct task_struct *p,
+			struct mem_cgroup *memcg)
 {
 	task_lock(current);
 	pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, "
 		"oom_score_adj=%hd\n",
-		current->comm, gfp_mask, order,
+		current->comm, oc->gfp_mask, oc->order,
 		current->signal->oom_score_adj);
 	cpuset_print_task_mems_allowed(current);
 	task_unlock(current);
@@ -396,7 +390,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
 	else
 		show_mem(SHOW_MEM_FILTER_NODES);
 	if (sysctl_oom_dump_tasks)
-		dump_tasks(memcg, nodemask);
+		dump_tasks(memcg, oc->nodemask);
 }
 
 /*
@@ -487,10 +481,9 @@ void oom_killer_enable(void)
  * Must be called while holding a reference to p, which will be released upon
  * returning.
  */
-void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
+void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 		      unsigned int points, unsigned long totalpages,
-		      struct mem_cgroup *memcg, nodemask_t *nodemask,
-		      const char *message)
+		      struct mem_cgroup *memcg, const char *message)
 {
 	struct task_struct *victim = p;
 	struct task_struct *child;
@@ -514,7 +507,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 	task_unlock(p);
 
 	if (__ratelimit(&oom_rs))
-		dump_header(p, gfp_mask, order, memcg, nodemask);
+		dump_header(oc, p, memcg);
 
 	task_lock(p);
 	pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
@@ -537,7 +530,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 			/*
 			 * oom_badness() returns 0 if the thread is unkillable
 			 */
-			child_points = oom_badness(child, memcg, nodemask,
+			child_points = oom_badness(child, memcg, oc->nodemask,
 								totalpages);
 			if (child_points > victim_points) {
 				put_task_struct(victim);
@@ -600,8 +593,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 /*
  * Determines whether the kernel must panic because of the panic_on_oom sysctl.
  */
-void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
-			int order, const nodemask_t *nodemask,
+void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
 			struct mem_cgroup *memcg)
 {
 	if (likely(!sysctl_panic_on_oom))
@@ -615,7 +607,7 @@ void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
 		if (constraint != CONSTRAINT_NONE)
 			return;
 	}
-	dump_header(NULL, gfp_mask, order, memcg, nodemask);
+	dump_header(oc, NULL, memcg);
 	panic("Out of memory: %s panic_on_oom is enabled\n",
 		sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
 }
@@ -635,22 +627,16 @@ int unregister_oom_notifier(struct notifier_block *nb)
 EXPORT_SYMBOL_GPL(unregister_oom_notifier);
 
 /**
- * __out_of_memory - kill the "best" process when we run out of memory
- * @zonelist: zonelist pointer
- * @gfp_mask: memory allocation flags
- * @order: amount of memory being requested as a power of 2
- * @nodemask: nodemask passed to page allocator
- * @force_kill: true if a task must be killed, even if others are exiting
+ * out_of_memory - kill the "best" process when we run out of memory
+ * @oc: pointer to struct oom_control
  *
  * If we run out of memory, we have the choice between either
  * killing a random task (bad), letting the system crash (worse)
  * OR try to be smart about which process to kill. Note that we
  * don't have to be perfect here, we just have to be good.
  */
-bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
-		   int order, nodemask_t *nodemask, bool force_kill)
+bool out_of_memory(struct oom_control *oc)
 {
-	const nodemask_t *mpol_mask;
 	struct task_struct *p;
 	unsigned long totalpages;
 	unsigned long freed = 0;
@@ -684,30 +670,29 @@ bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
 	 * Check if there were limitations on the allocation (only relevant for
 	 * NUMA) that may require different handling.
 	 */
-	constraint = constrained_alloc(zonelist, gfp_mask, nodemask,
-						&totalpages);
-	mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL;
-	check_panic_on_oom(constraint, gfp_mask, order, mpol_mask, NULL);
+	constraint = constrained_alloc(oc, &totalpages);
+	if (constraint != CONSTRAINT_MEMORY_POLICY)
+		oc->nodemask = NULL;
+	check_panic_on_oom(oc, constraint, NULL);
 
 	if (sysctl_oom_kill_allocating_task && current->mm &&
-	    !oom_unkillable_task(current, NULL, nodemask) &&
+	    !oom_unkillable_task(current, NULL, oc->nodemask) &&
 	    current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
 		get_task_struct(current);
-		oom_kill_process(current, gfp_mask, order, 0, totalpages, NULL,
-				 nodemask,
+		oom_kill_process(oc, current, 0, totalpages, NULL,
 				 "Out of memory (oom_kill_allocating_task)");
 		goto out;
 	}
 
-	p = select_bad_process(&points, totalpages, mpol_mask, force_kill);
+	p = select_bad_process(oc, &points, totalpages);
 	/* Found nothing?!?! Either we hang forever, or we panic. */
 	if (!p) {
-		dump_header(NULL, gfp_mask, order, NULL, mpol_mask);
+		dump_header(oc, NULL, NULL);
 		panic("Out of memory and no killable processes...\n");
 	}
 	if (p != (void *)-1UL) {
-		oom_kill_process(p, gfp_mask, order, points, totalpages, NULL,
-				 nodemask, "Out of memory");
+		oom_kill_process(oc, p, points, totalpages, NULL,
+				 "Out of memory");
 		killed = 1;
 	}
 out:
@@ -728,13 +713,21 @@ out:
  */
 void pagefault_out_of_memory(void)
 {
+	struct oom_control oc = {
+		.zonelist = NULL,
+		.nodemask = NULL,
+		.gfp_mask = 0,
+		.order = 0,
+		.force_kill = false,
+	};
+
 	if (mem_cgroup_oom_synchronize(true))
 		return;
 
 	if (!mutex_trylock(&oom_lock))
 		return;
 
-	if (!out_of_memory(NULL, 0, 0, NULL, false)) {
+	if (!out_of_memory(&oc)) {
 		/*
 		 * There shouldn't be any user tasks runnable while the
 		 * OOM killer is disabled, so the current task has to
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2680,6 +2680,13 @@ static inline struct page *
 __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 	const struct alloc_context *ac, unsigned long *did_some_progress)
 {
+	struct oom_control oc = {
+		.zonelist = ac->zonelist,
+		.nodemask = ac->nodemask,
+		.gfp_mask = gfp_mask,
+		.order = order,
+		.force_kill = false,
+	};
 	struct page *page;
 
 	*did_some_progress = 0;
@@ -2731,8 +2738,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 			goto out;
 	}
 	/* Exhausted what can be done so it's blamo time */
-	if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false)
-			|| WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
+	if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
 		*did_some_progress = 1;
 out:
 	mutex_unlock(&oom_lock);

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [patch v3 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq
  2015-07-08 23:42   ` [patch v3 " David Rientjes
@ 2015-07-08 23:42     ` David Rientjes
  2015-07-08 23:42     ` [patch v3 3/3] mm, oom: do not panic for oom kills triggered from sysrq David Rientjes
  2015-07-14 22:52     ` [patch v3 1/3] mm, oom: organize oom context into struct Andrew Morton
  2 siblings, 0 replies; 21+ messages in thread
From: David Rientjes @ 2015-07-08 23:42 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Sergey Senozhatsky, Michal Hocko, linux-kernel, linux-mm

The force_kill member of struct oom_control isn't needed if an order of
-1 is used instead.  This is the same as order == -1 in
struct compact_control which requires full memory compaction.

This patch introduces no functional change.

Signed-off-by: David Rientjes <rientjes@google.com>
---
 v2: fix changelog typo per Sergey
 v3: fix title per Hillf

 drivers/tty/sysrq.c | 3 +--
 include/linux/oom.h | 1 -
 mm/memcontrol.c     | 1 -
 mm/oom_kill.c       | 5 ++---
 mm/page_alloc.c     | 1 -
 5 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -358,8 +358,7 @@ static void moom_callback(struct work_struct *ignored)
 		.zonelist = node_zonelist(first_memory_node, gfp_mask),
 		.nodemask = NULL,
 		.gfp_mask = gfp_mask,
-		.order = 0,
-		.force_kill = true,
+		.order = -1,
 	};
 
 	mutex_lock(&oom_lock);
diff --git a/include/linux/oom.h b/include/linux/oom.h
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -17,7 +17,6 @@ struct oom_control {
 	nodemask_t	*nodemask;
 	gfp_t		gfp_mask;
 	int		order;
-	bool		force_kill;
 };
 
 /*
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1550,7 +1550,6 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 		.nodemask = NULL,
 		.gfp_mask = gfp_mask,
 		.order = order,
-		.force_kill = false,
 	};
 	struct mem_cgroup *iter;
 	unsigned long chosen_points = 0;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -265,7 +265,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
 	 * Don't allow any other task to have access to the reserves.
 	 */
 	if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
-		if (!oc->force_kill)
+		if (oc->order != -1)
 			return OOM_SCAN_ABORT;
 	}
 	if (!task->mm)
@@ -278,7 +278,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
 	if (oom_task_origin(task))
 		return OOM_SCAN_SELECT;
 
-	if (task_will_free_mem(task) && !oc->force_kill)
+	if (task_will_free_mem(task) && oc->order != -1)
 		return OOM_SCAN_ABORT;
 
 	return OOM_SCAN_OK;
@@ -718,7 +718,6 @@ void pagefault_out_of_memory(void)
 		.nodemask = NULL,
 		.gfp_mask = 0,
 		.order = 0,
-		.force_kill = false,
 	};
 
 	if (mem_cgroup_oom_synchronize(true))
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2685,7 +2685,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 		.nodemask = ac->nodemask,
 		.gfp_mask = gfp_mask,
 		.order = order,
-		.force_kill = false,
 	};
 	struct page *page;
 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [patch v3 3/3] mm, oom: do not panic for oom kills triggered from sysrq
  2015-07-08 23:42   ` [patch v3 " David Rientjes
  2015-07-08 23:42     ` [patch v3 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq David Rientjes
@ 2015-07-08 23:42     ` David Rientjes
  2015-07-14 22:52     ` [patch v3 1/3] mm, oom: organize oom context into struct Andrew Morton
  2 siblings, 0 replies; 21+ messages in thread
From: David Rientjes @ 2015-07-08 23:42 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Sergey Senozhatsky, Michal Hocko, linux-kernel, linux-mm

Sysrq+f is used to kill a process either for debug or when the VM is
otherwise unresponsive.

It is not intended to trigger a panic when no process may be killed.

Avoid panicking the system for sysrq+f when no processes are killed.

Suggested-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: David Rientjes <rientjes@google.com>
---
 v2: no change
 v3: fix title per Hillf

 Documentation/sysrq.txt | 3 ++-
 mm/oom_kill.c           | 7 +++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -75,7 +75,8 @@ On all -  write a character to /proc/sysrq-trigger.  e.g.:
 
 'e'     - Send a SIGTERM to all processes, except for init.
 
-'f'	- Will call oom_kill to kill a memory hog process.
+'f'	- Will call the oom killer to kill a memory hog process, but do not
+	  panic if nothing can be killed.
 
 'g'	- Used by kgdb (kernel debugger)
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -607,6 +607,9 @@ void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
 		if (constraint != CONSTRAINT_NONE)
 			return;
 	}
+	/* Do not panic for oom kills triggered by sysrq */
+	if (oc->order == -1)
+		return;
 	dump_header(oc, NULL, memcg);
 	panic("Out of memory: %s panic_on_oom is enabled\n",
 		sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
@@ -686,11 +689,11 @@ bool out_of_memory(struct oom_control *oc)
 
 	p = select_bad_process(oc, &points, totalpages);
 	/* Found nothing?!?! Either we hang forever, or we panic. */
-	if (!p) {
+	if (!p && oc->order != -1) {
 		dump_header(oc, NULL, NULL);
 		panic("Out of memory and no killable processes...\n");
 	}
-	if (p != (void *)-1UL) {
+	if (p && p != (void *)-1UL) {
 		oom_kill_process(oc, p, points, totalpages, NULL,
 				 "Out of memory");
 		killed = 1;

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch v3 1/3] mm, oom: organize oom context into struct
  2015-07-08 23:42   ` [patch v3 " David Rientjes
  2015-07-08 23:42     ` [patch v3 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq David Rientjes
  2015-07-08 23:42     ` [patch v3 3/3] mm, oom: do not panic for oom kills triggered from sysrq David Rientjes
@ 2015-07-14 22:52     ` Andrew Morton
  2015-07-14 23:44       ` David Rientjes
  2 siblings, 1 reply; 21+ messages in thread
From: Andrew Morton @ 2015-07-14 22:52 UTC (permalink / raw)
  To: David Rientjes; +Cc: Sergey Senozhatsky, Michal Hocko, linux-kernel, linux-mm

On Wed, 8 Jul 2015 16:42:42 -0700 (PDT) David Rientjes <rientjes@google.com> wrote:

> There are essential elements to an oom context that are passed around to
> multiple functions.
> 
> Organize these elements into a new struct, struct oom_control, that
> specifies the context for an oom condition.
> 
> This patch introduces no functional change.
> 
> ...
>
> --- a/include/linux/oom.h
> +++ b/include/linux/oom.h
> @@ -12,6 +12,14 @@ struct notifier_block;
>  struct mem_cgroup;
>  struct task_struct;
>  
> +struct oom_control {
> +	struct zonelist *zonelist;
> +	nodemask_t	*nodemask;
> +	gfp_t		gfp_mask;
> +	int		order;
> +	bool		force_kill;
> +};

Some docs would be nice.

gfp_mask and order are what the page-allocating caller originally asked
for, I think?  They haven't been mucked with?

It's somewhat obvious what force_kill does, but why is it provided, why
is it set?  And what does it actually kill?  A process which was
selected based on the other fields...

Also, it's a bit odd that zonelist and nodemask are here.  They're
low-level implementation details whereas the other three fields are
high-level caller control stuff.

Anyway, please have a think about it.  The definition site for a controlling
structure can be a great place to reveal the overall design and operation.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch v3 1/3] mm, oom: organize oom context into struct
  2015-07-14 22:52     ` [patch v3 1/3] mm, oom: organize oom context into struct Andrew Morton
@ 2015-07-14 23:44       ` David Rientjes
  0 siblings, 0 replies; 21+ messages in thread
From: David Rientjes @ 2015-07-14 23:44 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Sergey Senozhatsky, Michal Hocko, linux-kernel, linux-mm

On Tue, 14 Jul 2015, Andrew Morton wrote:

> > --- a/include/linux/oom.h
> > +++ b/include/linux/oom.h
> > @@ -12,6 +12,14 @@ struct notifier_block;
> >  struct mem_cgroup;
> >  struct task_struct;
> >  
> > +struct oom_control {
> > +	struct zonelist *zonelist;
> > +	nodemask_t	*nodemask;
> > +	gfp_t		gfp_mask;
> > +	int		order;
> > +	bool		force_kill;
> > +};
> 
> Some docs would be nice.
> 

Ok!

> gfp_mask and order are what the page-allocating caller originally asked
> for, I think?  They haven't been mucked with?
> 

Yes, it's a good opportunity to make them const.

> It's somewhat obvious what force_kill does, but why is it provided, why
> is it set?  And what does it actually kill?  A process which was
> selected based on the other fields...
> 

It's removed in the next patch since it's unneeded, so I'll define what 
order == -1 means.

> Also, it's a bit odd that zonelist and nodemask are here.  They're
> low-level implementation details whereas the other three fields are
> high-level caller control stuff.
> 

Zonelist and nodemask are indeed pretty weird here.  We use them to 
determine if the oom kill is constrained by cpuset and/or mempolicy, 
respectively so we don't kill things unnecessarily and leave a cpuset 
still oom, for example.  We could determine that before actually calling 
the oom killer and passing the enum oom_constraint in, but its purpose is 
for the oom killer so it's just a part of that logical unit.

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2015-07-14 23:44 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-18 23:00 [patch 1/3] mm, oom: organize oom context into struct David Rientjes
2015-06-18 23:00 ` [patch 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq David Rientjes
2015-06-19  7:32   ` Michal Hocko
2015-06-30 22:50     ` David Rientjes
2015-06-18 23:00 ` [patch 3/3] mm, oom: do not panic for oom kills triggered from sysrq David Rientjes
2015-06-19  0:14 ` [patch 1/3] mm, oom: organize oom context into struct Sergey Senozhatsky
2015-06-30 22:46   ` David Rientjes
2015-07-01  0:11     ` Sergey Senozhatsky
2015-07-01 21:29       ` David Rientjes
2015-06-19  7:30 ` Michal Hocko
2015-07-01 21:37 ` [patch v2 " David Rientjes
2015-07-01 21:37   ` [patch v2 2/3] " David Rientjes
2015-07-02  6:01     ` Michal Hocko
2015-07-08 23:28       ` David Rientjes
2015-07-01 21:37   ` [patch v2 3/3] " David Rientjes
2015-07-02  6:00   ` [patch v2 1/3] " Michal Hocko
2015-07-08 23:42   ` [patch v3 " David Rientjes
2015-07-08 23:42     ` [patch v3 2/3] mm, oom: pass an oom order of -1 when triggered by sysrq David Rientjes
2015-07-08 23:42     ` [patch v3 3/3] mm, oom: do not panic for oom kills triggered from sysrq David Rientjes
2015-07-14 22:52     ` [patch v3 1/3] mm, oom: organize oom context into struct Andrew Morton
2015-07-14 23:44       ` David Rientjes

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).