All of lore.kernel.org
 help / color / mirror / Atom feed
From: Michal Hocko <mhocko@suse.cz>
To: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	Ying Han <yinghan@google.com>, Hugh Dickins <hughd@google.com>,
	Glauber Costa <glommer@parallels.com>,
	Michel Lespinasse <walken@google.com>,
	Greg Thelen <gthelen@google.com>, Tejun Heo <tj@kernel.org>,
	Balbir Singh <bsingharora@gmail.com>,
	cgroups@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org
Subject: [PATCH 1/3] memcg: track children in soft limit excess to improve soft limit
Date: Mon, 27 May 2013 19:13:09 +0200	[thread overview]
Message-ID: <1369674791-13861-2-git-send-email-mhocko@suse.cz> (raw)
In-Reply-To: <1369674791-13861-1-git-send-email-mhocko@suse.cz>

Soft limit reclaim has to check the whole reclaim hierarchy while doing
the first pass of the reclaim. This leads to a higher system time which
can be visible especially when there are many groups in the hierarchy.

- TODO put testing results here

This patch adds a per-memcg counter of children in excess. It also
restores MEM_CGROUP_TARGET_SOFTLIMIT into mem_cgroup_event_ratelimit for
a proper batching.
If a group crosses soft limit for the first time it increases parent's
children_in_excess up the hierarchy. The similarly if a group gets below
the limit it will decrease the counter. The transition phase is recorded
in soft_contributed flag.

mem_cgroup_soft_reclaim_eligible then uses this information to better
decide whether to skip the node or the whole subtree. The rule is
simple. Skip the node with a children in excess or skip the whole subtree
otherwise.

Signed-off-by: Michal Hocko <mhocko@suse.cz>
---
 mm/memcontrol.c |   51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 981ee12..60b48bc 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -136,6 +136,7 @@ static const char * const mem_cgroup_lru_names[] = {
  */
 enum mem_cgroup_events_target {
 	MEM_CGROUP_TARGET_THRESH,
+	MEM_CGROUP_TARGET_SOFTLIMIT,
 	MEM_CGROUP_TARGET_NUMAINFO,
 	MEM_CGROUP_NTARGETS,
 };
@@ -355,6 +356,10 @@ struct mem_cgroup {
 	atomic_t	numainfo_updating;
 #endif
 
+	spinlock_t soft_lock;
+	bool soft_contributed;
+	atomic_t children_in_excess;
+
 	/*
 	 * Per cgroup active and inactive list, similar to the
 	 * per zone LRU lists.
@@ -890,6 +895,9 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
 		case MEM_CGROUP_TARGET_THRESH:
 			next = val + THRESHOLDS_EVENTS_TARGET;
 			break;
+		case MEM_CGROUP_TARGET_SOFTLIMIT:
+			next = val + SOFTLIMIT_EVENTS_TARGET;
+			break;
 		case MEM_CGROUP_TARGET_NUMAINFO:
 			next = val + NUMAINFO_EVENTS_TARGET;
 			break;
@@ -902,6 +910,34 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
 	return false;
 }
 
+static void mem_cgroup_update_soft_limit(struct mem_cgroup *memcg)
+{
+	unsigned long long excess = res_counter_soft_limit_excess(&memcg->res);
+	struct mem_cgroup *parent = memcg;
+	int delta = 0;
+
+	spin_lock(&memcg->soft_lock);
+	if (excess) {
+		if (!memcg->soft_contributed) {
+			delta = 1;
+			memcg->soft_contributed = true;
+		}
+	} else {
+		if (memcg->soft_contributed) {
+			delta = -1;
+			memcg->soft_contributed = false;
+		}
+	}
+
+	/*
+	 * Necessary to update all ancestors when hierarchy is used
+	 * because their event counter is not touched.
+	 */
+	while (delta && (parent = parent_mem_cgroup(parent)))
+		atomic_add(delta, &parent->children_in_excess);
+	spin_unlock(&memcg->soft_lock);
+}
+
 /*
  * Check events in order.
  *
@@ -912,8 +948,11 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
 	/* threshold event is triggered in finer grain than soft limit */
 	if (unlikely(mem_cgroup_event_ratelimit(memcg,
 						MEM_CGROUP_TARGET_THRESH))) {
+		bool do_softlimit;
 		bool do_numainfo __maybe_unused;
 
+		do_softlimit = mem_cgroup_event_ratelimit(memcg,
+						MEM_CGROUP_TARGET_SOFTLIMIT);
 #if MAX_NUMNODES > 1
 		do_numainfo = mem_cgroup_event_ratelimit(memcg,
 						MEM_CGROUP_TARGET_NUMAINFO);
@@ -921,6 +960,8 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
 		preempt_enable();
 
 		mem_cgroup_threshold(memcg);
+		if (unlikely(do_softlimit))
+			mem_cgroup_update_soft_limit(memcg);
 #if MAX_NUMNODES > 1
 		if (unlikely(do_numainfo))
 			atomic_inc(&memcg->numainfo_events);
@@ -1894,6 +1935,9 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
  * hierarchy if
  * 	a) it is over its soft limit
  * 	b) any parent up the hierarchy is over its soft limit
+ *
+ * If the given group doesn't have any children over the limit then it
+ * doesn't make any sense to iterate its subtree.
  */
 enum mem_cgroup_filter_t
 mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
@@ -1915,6 +1959,8 @@ mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
 			break;
 	}
 
+	if (!atomic_read(&memcg->children_in_excess))
+		return SKIP_TREE;
 	return SKIP;
 }
 
@@ -6061,6 +6107,7 @@ mem_cgroup_css_alloc(struct cgroup *cont)
 	mutex_init(&memcg->thresholds_lock);
 	spin_lock_init(&memcg->move_lock);
 	vmpressure_init(&memcg->vmpressure);
+	spin_lock_init(&memcg->soft_lock);
 
 	return &memcg->css;
 
@@ -6150,6 +6197,10 @@ static void mem_cgroup_css_offline(struct cgroup *cont)
 
 	mem_cgroup_invalidate_reclaim_iterators(memcg);
 	mem_cgroup_reparent_charges(memcg);
+	if (memcg->soft_contributed) {
+		while ((memcg = parent_mem_cgroup(memcg)))
+			atomic_dec(&memcg->children_in_excess);
+	}
 	mem_cgroup_destroy_all_caches(memcg);
 }
 
-- 
1.7.10.4


WARNING: multiple messages have this Message-ID (diff)
From: Michal Hocko <mhocko@suse.cz>
To: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	Ying Han <yinghan@google.com>, Hugh Dickins <hughd@google.com>,
	Glauber Costa <glommer@parallels.com>,
	Michel Lespinasse <walken@google.com>,
	Greg Thelen <gthelen@google.com>, Tejun Heo <tj@kernel.org>,
	Balbir Singh <bsingharora@gmail.com>,
	cgroups@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org
Subject: [PATCH 1/3] memcg: track children in soft limit excess to improve soft limit
Date: Mon, 27 May 2013 19:13:09 +0200	[thread overview]
Message-ID: <1369674791-13861-2-git-send-email-mhocko@suse.cz> (raw)
In-Reply-To: <1369674791-13861-1-git-send-email-mhocko@suse.cz>

Soft limit reclaim has to check the whole reclaim hierarchy while doing
the first pass of the reclaim. This leads to a higher system time which
can be visible especially when there are many groups in the hierarchy.

- TODO put testing results here

This patch adds a per-memcg counter of children in excess. It also
restores MEM_CGROUP_TARGET_SOFTLIMIT into mem_cgroup_event_ratelimit for
a proper batching.
If a group crosses soft limit for the first time it increases parent's
children_in_excess up the hierarchy. The similarly if a group gets below
the limit it will decrease the counter. The transition phase is recorded
in soft_contributed flag.

mem_cgroup_soft_reclaim_eligible then uses this information to better
decide whether to skip the node or the whole subtree. The rule is
simple. Skip the node with a children in excess or skip the whole subtree
otherwise.

Signed-off-by: Michal Hocko <mhocko@suse.cz>
---
 mm/memcontrol.c |   51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 981ee12..60b48bc 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -136,6 +136,7 @@ static const char * const mem_cgroup_lru_names[] = {
  */
 enum mem_cgroup_events_target {
 	MEM_CGROUP_TARGET_THRESH,
+	MEM_CGROUP_TARGET_SOFTLIMIT,
 	MEM_CGROUP_TARGET_NUMAINFO,
 	MEM_CGROUP_NTARGETS,
 };
@@ -355,6 +356,10 @@ struct mem_cgroup {
 	atomic_t	numainfo_updating;
 #endif
 
+	spinlock_t soft_lock;
+	bool soft_contributed;
+	atomic_t children_in_excess;
+
 	/*
 	 * Per cgroup active and inactive list, similar to the
 	 * per zone LRU lists.
@@ -890,6 +895,9 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
 		case MEM_CGROUP_TARGET_THRESH:
 			next = val + THRESHOLDS_EVENTS_TARGET;
 			break;
+		case MEM_CGROUP_TARGET_SOFTLIMIT:
+			next = val + SOFTLIMIT_EVENTS_TARGET;
+			break;
 		case MEM_CGROUP_TARGET_NUMAINFO:
 			next = val + NUMAINFO_EVENTS_TARGET;
 			break;
@@ -902,6 +910,34 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
 	return false;
 }
 
+static void mem_cgroup_update_soft_limit(struct mem_cgroup *memcg)
+{
+	unsigned long long excess = res_counter_soft_limit_excess(&memcg->res);
+	struct mem_cgroup *parent = memcg;
+	int delta = 0;
+
+	spin_lock(&memcg->soft_lock);
+	if (excess) {
+		if (!memcg->soft_contributed) {
+			delta = 1;
+			memcg->soft_contributed = true;
+		}
+	} else {
+		if (memcg->soft_contributed) {
+			delta = -1;
+			memcg->soft_contributed = false;
+		}
+	}
+
+	/*
+	 * Necessary to update all ancestors when hierarchy is used
+	 * because their event counter is not touched.
+	 */
+	while (delta && (parent = parent_mem_cgroup(parent)))
+		atomic_add(delta, &parent->children_in_excess);
+	spin_unlock(&memcg->soft_lock);
+}
+
 /*
  * Check events in order.
  *
@@ -912,8 +948,11 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
 	/* threshold event is triggered in finer grain than soft limit */
 	if (unlikely(mem_cgroup_event_ratelimit(memcg,
 						MEM_CGROUP_TARGET_THRESH))) {
+		bool do_softlimit;
 		bool do_numainfo __maybe_unused;
 
+		do_softlimit = mem_cgroup_event_ratelimit(memcg,
+						MEM_CGROUP_TARGET_SOFTLIMIT);
 #if MAX_NUMNODES > 1
 		do_numainfo = mem_cgroup_event_ratelimit(memcg,
 						MEM_CGROUP_TARGET_NUMAINFO);
@@ -921,6 +960,8 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
 		preempt_enable();
 
 		mem_cgroup_threshold(memcg);
+		if (unlikely(do_softlimit))
+			mem_cgroup_update_soft_limit(memcg);
 #if MAX_NUMNODES > 1
 		if (unlikely(do_numainfo))
 			atomic_inc(&memcg->numainfo_events);
@@ -1894,6 +1935,9 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
  * hierarchy if
  * 	a) it is over its soft limit
  * 	b) any parent up the hierarchy is over its soft limit
+ *
+ * If the given group doesn't have any children over the limit then it
+ * doesn't make any sense to iterate its subtree.
  */
 enum mem_cgroup_filter_t
 mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
@@ -1915,6 +1959,8 @@ mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
 			break;
 	}
 
+	if (!atomic_read(&memcg->children_in_excess))
+		return SKIP_TREE;
 	return SKIP;
 }
 
@@ -6061,6 +6107,7 @@ mem_cgroup_css_alloc(struct cgroup *cont)
 	mutex_init(&memcg->thresholds_lock);
 	spin_lock_init(&memcg->move_lock);
 	vmpressure_init(&memcg->vmpressure);
+	spin_lock_init(&memcg->soft_lock);
 
 	return &memcg->css;
 
@@ -6150,6 +6197,10 @@ static void mem_cgroup_css_offline(struct cgroup *cont)
 
 	mem_cgroup_invalidate_reclaim_iterators(memcg);
 	mem_cgroup_reparent_charges(memcg);
+	if (memcg->soft_contributed) {
+		while ((memcg = parent_mem_cgroup(memcg)))
+			atomic_dec(&memcg->children_in_excess);
+	}
 	mem_cgroup_destroy_all_caches(memcg);
 }
 
-- 
1.7.10.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2013-05-27 17:13 UTC|newest]

Thread overview: 74+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-05-13  7:46 [patch v3 0/3 -mm] Soft limit rework Michal Hocko
2013-05-13  7:46 ` Michal Hocko
2013-05-13  7:46 ` Michal Hocko
2013-05-13  7:46 ` [patch v3 -mm 1/3] memcg: integrate soft reclaim tighter with zone shrinking code Michal Hocko
2013-05-13  7:46   ` Michal Hocko
2013-05-15  8:34   ` Glauber Costa
2013-05-15  8:34     ` Glauber Costa
2013-05-16 22:12   ` Tejun Heo
2013-05-16 22:12     ` Tejun Heo
2013-05-16 22:12     ` Tejun Heo
2013-05-16 22:15     ` Tejun Heo
2013-05-16 22:15       ` Tejun Heo
2013-05-17  7:16       ` Michal Hocko
2013-05-17  7:16         ` Michal Hocko
2013-05-17  7:16         ` Michal Hocko
2013-05-17  7:12     ` Michal Hocko
2013-05-17  7:12       ` Michal Hocko
2013-05-17 16:02   ` Johannes Weiner
2013-05-17 16:02     ` Johannes Weiner
2013-05-17 16:57     ` Tejun Heo
2013-05-17 16:57       ` Tejun Heo
2013-05-17 17:27       ` Johannes Weiner
2013-05-17 17:27         ` Johannes Weiner
2013-05-17 17:45         ` Tejun Heo
2013-05-17 17:45           ` Tejun Heo
2013-05-17 17:45           ` Tejun Heo
2013-05-20 14:44     ` Michal Hocko
2013-05-20 14:44       ` Michal Hocko
2013-05-20 14:44       ` Michal Hocko
2013-05-21  6:53       ` Michal Hocko
2013-05-21  6:53         ` Michal Hocko
2013-05-27 17:13     ` Michal Hocko
2013-05-27 17:13       ` Michal Hocko
2013-05-27 17:13       ` Michal Hocko
2013-05-27 17:13       ` Michal Hocko [this message]
2013-05-27 17:13         ` [PATCH 1/3] memcg: track children in soft limit excess to improve soft limit Michal Hocko
2013-05-27 17:13       ` [PATCH 2/3] memcg, vmscan: Do not attempt soft limit reclaim if it would not scan anything Michal Hocko
2013-05-27 17:13         ` Michal Hocko
2013-05-27 17:13       ` [PATCH 3/3] memcg: Track all children over limit in the root Michal Hocko
2013-05-27 17:13         ` Michal Hocko
2013-05-27 17:20       ` [PATCH] memcg: enhance memcg iterator to support predicates Michal Hocko
2013-05-27 17:20         ` Michal Hocko
2013-05-27 17:20         ` Michal Hocko
2013-05-29 13:05       ` [patch v3 -mm 1/3] memcg: integrate soft reclaim tighter with zone shrinking code Michal Hocko
2013-05-29 13:05         ` Michal Hocko
2013-05-29 13:05         ` Michal Hocko
2013-05-29 15:57         ` Michal Hocko
2013-05-29 15:57           ` Michal Hocko
2013-05-29 20:01           ` Johannes Weiner
2013-05-29 20:01             ` Johannes Weiner
2013-05-30  8:45             ` Michal Hocko
2013-05-30  8:45               ` Michal Hocko
2013-05-29 14:54       ` Michal Hocko
2013-05-29 14:54         ` Michal Hocko
2013-05-30  8:36         ` Michal Hocko
2013-05-30  8:36           ` Michal Hocko
2013-05-13  7:46 ` [patch v3 -mm 2/3] memcg: Get rid of soft-limit tree infrastructure Michal Hocko
2013-05-13  7:46   ` Michal Hocko
2013-05-15  8:38   ` Glauber Costa
2013-05-15  8:38     ` Glauber Costa
2013-05-16 22:16   ` Tejun Heo
2013-05-16 22:16     ` Tejun Heo
2013-05-13  7:46 ` [patch v3 -mm 3/3] vmscan, memcg: Do softlimit reclaim also for targeted reclaim Michal Hocko
2013-05-13  7:46   ` Michal Hocko
2013-05-13  7:46   ` Michal Hocko
2013-05-15  8:42   ` Glauber Costa
2013-05-15  8:42     ` Glauber Costa
2013-05-17  7:50     ` Michal Hocko
2013-05-17  7:50       ` Michal Hocko
2013-05-17  7:50       ` Michal Hocko
2013-05-16 23:12   ` Tejun Heo
2013-05-16 23:12     ` Tejun Heo
2013-05-17  7:34     ` Michal Hocko
2013-05-17  7:34       ` Michal Hocko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1369674791-13861-2-git-send-email-mhocko@suse.cz \
    --to=mhocko@suse.cz \
    --cc=akpm@linux-foundation.org \
    --cc=bsingharora@gmail.com \
    --cc=cgroups@vger.kernel.org \
    --cc=glommer@parallels.com \
    --cc=gthelen@google.com \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=tj@kernel.org \
    --cc=walken@google.com \
    --cc=yinghan@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.