[PATCH] mm, oom: avoid printk() iteration under RCU

* [PATCH] mm, oom: avoid printk() iteration under RCU
@ 2019-07-17 10:55 Tetsuo Handa
  2019-07-18  0:31 ` Shakeel Butt
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Tetsuo Handa @ 2019-07-17 10:55 UTC (permalink / raw)
  To: linux-mm
  Cc: Andrew Morton, Tetsuo Handa, Michal Hocko, Roman Gushchin, Shakeel Butt

Currently dump_tasks() might call printk() for many thousands times under
RCU, which might take many minutes for slow consoles. Therefore, split
dump_tasks() into three stages; take a snapshot of possible OOM victim
candidates under RCU, dump the snapshot from reschedulable context, and
destroy the snapshot.

In a future patch, the first stage would be moved to select_bad_process()
and the third stage would be moved to after oom_kill_process(), and will
simplify refcount handling.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Roman Gushchin <guro@fb.com>
---
 include/linux/sched.h |  1 +
 mm/oom_kill.c         | 67 +++++++++++++++++++++++++--------------------------
 2 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8dc1811..cb6696b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1246,6 +1246,7 @@ struct task_struct {
 #ifdef CONFIG_MMU
 	struct task_struct		*oom_reaper_list;
 #endif
+	struct list_head		oom_victim_list;
 #ifdef CONFIG_VMAP_STACK
 	struct vm_struct		*stack_vm_area;
 #endif
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index eda2e2a..bd22ca0 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -377,36 +377,13 @@ static void select_bad_process(struct oom_control *oc)
 	}
 }
 
-static int dump_task(struct task_struct *p, void *arg)
-{
-	struct oom_control *oc = arg;
-	struct task_struct *task;
-
-	if (oom_unkillable_task(p))
-		return 0;
-
-	/* p may not have freeable memory in nodemask */
-	if (!is_memcg_oom(oc) && !oom_cpuset_eligible(p, oc))
-		return 0;
 
-	task = find_lock_task_mm(p);
-	if (!task) {
-		/*
-		 * This is a kthread or all of p's threads have already
-		 * detached their mm's.  There's no need to report
-		 * them; they can't be oom killed anyway.
-		 */
-		return 0;
+static int add_candidate_task(struct task_struct *p, void *arg)
+{
+	if (!oom_unkillable_task(p)) {
+		get_task_struct(p);
+		list_add_tail(&p->oom_victim_list, (struct list_head *) arg);
 	}
-
-	pr_info("[%7d] %5d %5d %8lu %8lu %8ld %8lu         %5hd %s\n",
-		task->pid, from_kuid(&init_user_ns, task_uid(task)),
-		task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
-		mm_pgtables_bytes(task->mm),
-		get_mm_counter(task->mm, MM_SWAPENTS),
-		task->signal->oom_score_adj, task->comm);
-	task_unlock(task);
-
 	return 0;
 }
 
@@ -422,19 +399,41 @@ static int dump_task(struct task_struct *p, void *arg)
  */
 static void dump_tasks(struct oom_control *oc)
 {
-	pr_info("Tasks state (memory values in pages):\n");
-	pr_info("[  pid  ]   uid  tgid total_vm      rss pgtables_bytes swapents oom_score_adj name\n");
+	static LIST_HEAD(list);
+	struct task_struct *p;
+	struct task_struct *t;
 
 	if (is_memcg_oom(oc))
-		mem_cgroup_scan_tasks(oc->memcg, dump_task, oc);
+		mem_cgroup_scan_tasks(oc->memcg, add_candidate_task, &list);
 	else {
-		struct task_struct *p;
-
 		rcu_read_lock();
 		for_each_process(p)
-			dump_task(p, oc);
+			add_candidate_task(p, &list);
 		rcu_read_unlock();
 	}
+	pr_info("Tasks state (memory values in pages):\n");
+	pr_info("[  pid  ]   uid  tgid total_vm      rss pgtables_bytes swapents oom_score_adj name\n");
+	list_for_each_entry(p, &list, oom_victim_list) {
+		cond_resched();
+		/* p may not have freeable memory in nodemask */
+		if (!is_memcg_oom(oc) && !oom_cpuset_eligible(p, oc))
+			continue;
+		/* All of p's threads might have already detached their mm's. */
+		t = find_lock_task_mm(p);
+		if (!t)
+			continue;
+		pr_info("[%7d] %5d %5d %8lu %8lu %8ld %8lu         %5hd %s\n",
+			t->pid, from_kuid(&init_user_ns, task_uid(t)),
+			t->tgid, t->mm->total_vm, get_mm_rss(t->mm),
+			mm_pgtables_bytes(t->mm),
+			get_mm_counter(t->mm, MM_SWAPENTS),
+			t->signal->oom_score_adj, t->comm);
+		task_unlock(t);
+	}
+	list_for_each_entry_safe(p, t, &list, oom_victim_list) {
+		list_del(&p->oom_victim_list);
+		put_task_struct(p);
+	}
 }
 
 static void dump_oom_summary(struct oom_control *oc, struct task_struct *victim)
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread