linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Hillf Danton <hdanton@sina.com>
To: Michal Hocko <mhocko@suse.cz>, Johannes Weiner <hannes@cmpxchg.org>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	Shakeel Butt <shakeelb@google.com>, Roman Gushchin <guro@fb.com>,
	Matthew Wilcox <willy@infradead.org>,
	Hillf Danton <hdanton@sina.com>
Subject: [RFC] mm: memcg: add priority for soft limit reclaiming
Date: Thu, 19 Sep 2019 21:13:32 +0800	[thread overview]
Message-ID: <20190919131332.4180-1-hdanton@sina.com> (raw)


Currently memory controler is playing increasingly important role in
how memory is used and how pages are reclaimed on memory pressure.

In daily works memcg is often created for critical tasks and their pre
configured memory usage is supposed to be met even on memory pressure.
Administrator wants to make it configurable that the pages consumed by
memcg-B can be reclaimed by page allocations invoked not by memcg-A but
by memcg-C.

That configurability is addressed by adding priority for soft limit
reclaiming to make sure that no pages will be reclaimed from memcg of
higer priortiy in favor of memcg of lower priority.

Pages are reclaimed with no priority being taken into account by default
unless user turns it on, and then they are responsible for their smart
activities almost the same way as they play realtime FIFO/RR games.

Priority is available only in the direct reclaiming context in order to
advoid churning in the complex kswapd behavior.

Cc: Shakeel Butt <shakeelb@google.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Hillf Danton <hdanton@sina.com>
---

--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -230,6 +230,21 @@ struct mem_cgroup {
 	int		under_oom;
 
 	int	swappiness;
+	/*
+	 * slrp, soft limit reclaiming priority
+	 *
+	 * 0, by default, no slrp considered on soft reclaiming.
+	 *
+	 * 1-32, user configurable in ascending order,
+	 * 	no page will be reclaimed from memcg of higher slrp in
+	 * 	favor of memcg of lower slrp.
+	 *
+	 * only in direct reclaiming context now.
+	 */
+	int	slrp;
+#define MEMCG_SLRP_MIN 1
+#define MEMCG_SLRP_MAX 32
+
 	/* OOM-Killer disable */
 	int		oom_kill_disable;
 
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -647,7 +647,8 @@ static void mem_cgroup_remove_from_trees
 }
 
 static struct mem_cgroup_per_node *
-__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
+__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz,
+					int slrp)
 {
 	struct mem_cgroup_per_node *mz;
 
@@ -664,7 +665,7 @@ retry:
 	 * position in the tree.
 	 */
 	__mem_cgroup_remove_exceeded(mz, mctz);
-	if (!soft_limit_excess(mz->memcg) ||
+	if (!soft_limit_excess(mz->memcg) || mz->memcg->slrp > slrp ||
 	    !css_tryget_online(&mz->memcg->css))
 		goto retry;
 done:
@@ -672,12 +673,13 @@ done:
 }
 
 static struct mem_cgroup_per_node *
-mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
+mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz,
+					int slrp)
 {
 	struct mem_cgroup_per_node *mz;
 
 	spin_lock_irq(&mctz->lock);
-	mz = __mem_cgroup_largest_soft_limit_node(mctz);
+	mz = __mem_cgroup_largest_soft_limit_node(mctz, slrp);
 	spin_unlock_irq(&mctz->lock);
 	return mz;
 }
@@ -2972,6 +2974,31 @@ static int mem_cgroup_resize_max(struct
 	return ret;
 }
 
+static int mem_cgroup_get_slrp(void)
+{
+	int slrp;
+
+	if (current->flags & PF_KTHREAD) {
+		/*
+		 * now slrp does not churn in background reclaiming to
+		 * make life simple
+		 */
+		slrp = 0;
+	} else {
+		struct mem_cgroup *memcg;
+
+		rcu_read_lock();
+		memcg = mem_cgroup_from_task(current);
+		if (!memcg || memcg == root_mem_cgroup)
+			slrp = 0;
+		else
+			slrp = memcg->slrp;
+		rcu_read_unlock();
+	}
+
+	return slrp;
+}
+
 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
 					    gfp_t gfp_mask,
 					    unsigned long *total_scanned)
@@ -2980,6 +3007,7 @@ unsigned long mem_cgroup_soft_limit_recl
 	struct mem_cgroup_per_node *mz, *next_mz = NULL;
 	unsigned long reclaimed;
 	int loop = 0;
+	int slrp;
 	struct mem_cgroup_tree_per_node *mctz;
 	unsigned long excess;
 	unsigned long nr_scanned;
@@ -2997,6 +3025,7 @@ unsigned long mem_cgroup_soft_limit_recl
 	if (!mctz || RB_EMPTY_ROOT(&mctz->rb_root))
 		return 0;
 
+	slrp = mem_cgroup_get_slrp();
 	/*
 	 * This loop can run a while, specially if mem_cgroup's continuously
 	 * keep exceeding their soft limit and putting the system under
@@ -3006,7 +3035,7 @@ unsigned long mem_cgroup_soft_limit_recl
 		if (next_mz)
 			mz = next_mz;
 		else
-			mz = mem_cgroup_largest_soft_limit_node(mctz);
+			mz = mem_cgroup_largest_soft_limit_node(mctz, slrp);
 		if (!mz)
 			break;
 
@@ -3024,8 +3053,8 @@ unsigned long mem_cgroup_soft_limit_recl
 		 */
 		next_mz = NULL;
 		if (!reclaimed)
-			next_mz = __mem_cgroup_largest_soft_limit_node(mctz);
-
+			next_mz = __mem_cgroup_largest_soft_limit_node(mctz,
+							slrp);
 		excess = soft_limit_excess(mz->memcg);
 		/*
 		 * One school of thought says that we should not add
@@ -5817,6 +5846,37 @@ static ssize_t memory_oom_group_write(st
 	return nbytes;
 }
 
+static int memory_slrp_show(struct seq_file *m, void *v)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
+
+	seq_printf(m, "%d\n", memcg->slrp);
+
+	return 0;
+}
+
+static ssize_t memory_slrp_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	int ret, slrp;
+
+	buf = strstrip(buf);
+	if (!buf)
+		return -EINVAL;
+
+	ret = kstrtoint(buf, 0, &slrp);
+	if (ret)
+		return ret;
+
+	if (slrp < MEMCG_SLRP_MIN || MEMCG_SLRP_MAX < slrp)
+		return -EINVAL;
+
+	memcg->slrp = slrp;
+
+	return nbytes;
+}
+
 static struct cftype memory_files[] = {
 	{
 		.name = "current",
@@ -5870,6 +5930,12 @@ static struct cftype memory_files[] = {
 		.seq_show = memory_oom_group_show,
 		.write = memory_oom_group_write,
 	},
+	{
+		.name = "slrp",
+		.flags = CFTYPE_NOT_ON_ROOT | CFTYPE_NS_DELEGATABLE,
+		.seq_show = memory_slrp_show,
+		.write = memory_slrp_write,
+	},
 	{ }	/* terminate */
 };
 



             reply	other threads:[~2019-09-19 13:13 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-19 13:13 Hillf Danton [this message]
2019-09-19 13:32 ` [RFC] mm: memcg: add priority for soft limit reclaiming Michal Hocko
2019-09-23 13:04   ` Hillf Danton
2019-09-23 13:28     ` Michal Hocko
2019-09-24  7:36 Hillf Danton
2019-09-24 13:30 ` Michal Hocko
2019-09-24 17:23   ` Roman Gushchin
2019-09-25  2:35   ` Hillf Danton
2019-09-25  6:52     ` Michal Hocko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190919131332.4180-1-hdanton@sina.com \
    --to=hdanton@sina.com \
    --cc=akpm@linux-foundation.org \
    --cc=guro@fb.com \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.cz \
    --cc=shakeelb@google.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).