From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755742Ab3E0RNl (ORCPT ); Mon, 27 May 2013 13:13:41 -0400 Received: from cantor2.suse.de ([195.135.220.15]:38000 "EHLO mx2.suse.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755619Ab3E0RNh (ORCPT ); Mon, 27 May 2013 13:13:37 -0400 From: Michal Hocko To: Johannes Weiner Cc: Andrew Morton , KAMEZAWA Hiroyuki , Ying Han , Hugh Dickins , Glauber Costa , Michel Lespinasse , Greg Thelen , Tejun Heo , Balbir Singh , cgroups@vger.kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org Subject: [PATCH 1/3] memcg: track children in soft limit excess to improve soft limit Date: Mon, 27 May 2013 19:13:09 +0200 Message-Id: <1369674791-13861-2-git-send-email-mhocko@suse.cz> X-Mailer: git-send-email 1.7.10.4 In-Reply-To: <1369674791-13861-1-git-send-email-mhocko@suse.cz> References: <20130517160247.GA10023@cmpxchg.org> <1369674791-13861-1-git-send-email-mhocko@suse.cz> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Soft limit reclaim has to check the whole reclaim hierarchy while doing the first pass of the reclaim. This leads to a higher system time which can be visible especially when there are many groups in the hierarchy. - TODO put testing results here This patch adds a per-memcg counter of children in excess. It also restores MEM_CGROUP_TARGET_SOFTLIMIT into mem_cgroup_event_ratelimit for a proper batching. If a group crosses soft limit for the first time it increases parent's children_in_excess up the hierarchy. The similarly if a group gets below the limit it will decrease the counter. The transition phase is recorded in soft_contributed flag. mem_cgroup_soft_reclaim_eligible then uses this information to better decide whether to skip the node or the whole subtree. The rule is simple. Skip the node with a children in excess or skip the whole subtree otherwise. Signed-off-by: Michal Hocko --- mm/memcontrol.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 981ee12..60b48bc 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -136,6 +136,7 @@ static const char * const mem_cgroup_lru_names[] = { */ enum mem_cgroup_events_target { MEM_CGROUP_TARGET_THRESH, + MEM_CGROUP_TARGET_SOFTLIMIT, MEM_CGROUP_TARGET_NUMAINFO, MEM_CGROUP_NTARGETS, }; @@ -355,6 +356,10 @@ struct mem_cgroup { atomic_t numainfo_updating; #endif + spinlock_t soft_lock; + bool soft_contributed; + atomic_t children_in_excess; + /* * Per cgroup active and inactive list, similar to the * per zone LRU lists. @@ -890,6 +895,9 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, case MEM_CGROUP_TARGET_THRESH: next = val + THRESHOLDS_EVENTS_TARGET; break; + case MEM_CGROUP_TARGET_SOFTLIMIT: + next = val + SOFTLIMIT_EVENTS_TARGET; + break; case MEM_CGROUP_TARGET_NUMAINFO: next = val + NUMAINFO_EVENTS_TARGET; break; @@ -902,6 +910,34 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, return false; } +static void mem_cgroup_update_soft_limit(struct mem_cgroup *memcg) +{ + unsigned long long excess = res_counter_soft_limit_excess(&memcg->res); + struct mem_cgroup *parent = memcg; + int delta = 0; + + spin_lock(&memcg->soft_lock); + if (excess) { + if (!memcg->soft_contributed) { + delta = 1; + memcg->soft_contributed = true; + } + } else { + if (memcg->soft_contributed) { + delta = -1; + memcg->soft_contributed = false; + } + } + + /* + * Necessary to update all ancestors when hierarchy is used + * because their event counter is not touched. + */ + while (delta && (parent = parent_mem_cgroup(parent))) + atomic_add(delta, &parent->children_in_excess); + spin_unlock(&memcg->soft_lock); +} + /* * Check events in order. * @@ -912,8 +948,11 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) /* threshold event is triggered in finer grain than soft limit */ if (unlikely(mem_cgroup_event_ratelimit(memcg, MEM_CGROUP_TARGET_THRESH))) { + bool do_softlimit; bool do_numainfo __maybe_unused; + do_softlimit = mem_cgroup_event_ratelimit(memcg, + MEM_CGROUP_TARGET_SOFTLIMIT); #if MAX_NUMNODES > 1 do_numainfo = mem_cgroup_event_ratelimit(memcg, MEM_CGROUP_TARGET_NUMAINFO); @@ -921,6 +960,8 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) preempt_enable(); mem_cgroup_threshold(memcg); + if (unlikely(do_softlimit)) + mem_cgroup_update_soft_limit(memcg); #if MAX_NUMNODES > 1 if (unlikely(do_numainfo)) atomic_inc(&memcg->numainfo_events); @@ -1894,6 +1935,9 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg) * hierarchy if * a) it is over its soft limit * b) any parent up the hierarchy is over its soft limit + * + * If the given group doesn't have any children over the limit then it + * doesn't make any sense to iterate its subtree. */ enum mem_cgroup_filter_t mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, @@ -1915,6 +1959,8 @@ mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, break; } + if (!atomic_read(&memcg->children_in_excess)) + return SKIP_TREE; return SKIP; } @@ -6061,6 +6107,7 @@ mem_cgroup_css_alloc(struct cgroup *cont) mutex_init(&memcg->thresholds_lock); spin_lock_init(&memcg->move_lock); vmpressure_init(&memcg->vmpressure); + spin_lock_init(&memcg->soft_lock); return &memcg->css; @@ -6150,6 +6197,10 @@ static void mem_cgroup_css_offline(struct cgroup *cont) mem_cgroup_invalidate_reclaim_iterators(memcg); mem_cgroup_reparent_charges(memcg); + if (memcg->soft_contributed) { + while ((memcg = parent_mem_cgroup(memcg))) + atomic_dec(&memcg->children_in_excess); + } mem_cgroup_destroy_all_caches(memcg); } -- 1.7.10.4 From mboxrd@z Thu Jan 1 00:00:00 1970 From: Michal Hocko Subject: [PATCH 1/3] memcg: track children in soft limit excess to improve soft limit Date: Mon, 27 May 2013 19:13:09 +0200 Message-ID: <1369674791-13861-2-git-send-email-mhocko@suse.cz> References: <20130517160247.GA10023@cmpxchg.org> <1369674791-13861-1-git-send-email-mhocko@suse.cz> Return-path: In-Reply-To: <1369674791-13861-1-git-send-email-mhocko@suse.cz> Sender: owner-linux-mm@kvack.org List-ID: MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit To: Johannes Weiner Cc: Andrew Morton , KAMEZAWA Hiroyuki , Ying Han , Hugh Dickins , Glauber Costa , Michel Lespinasse , Greg Thelen , Tejun Heo , Balbir Singh , cgroups@vger.kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org Soft limit reclaim has to check the whole reclaim hierarchy while doing the first pass of the reclaim. This leads to a higher system time which can be visible especially when there are many groups in the hierarchy. - TODO put testing results here This patch adds a per-memcg counter of children in excess. It also restores MEM_CGROUP_TARGET_SOFTLIMIT into mem_cgroup_event_ratelimit for a proper batching. If a group crosses soft limit for the first time it increases parent's children_in_excess up the hierarchy. The similarly if a group gets below the limit it will decrease the counter. The transition phase is recorded in soft_contributed flag. mem_cgroup_soft_reclaim_eligible then uses this information to better decide whether to skip the node or the whole subtree. The rule is simple. Skip the node with a children in excess or skip the whole subtree otherwise. Signed-off-by: Michal Hocko --- mm/memcontrol.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 981ee12..60b48bc 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -136,6 +136,7 @@ static const char * const mem_cgroup_lru_names[] = { */ enum mem_cgroup_events_target { MEM_CGROUP_TARGET_THRESH, + MEM_CGROUP_TARGET_SOFTLIMIT, MEM_CGROUP_TARGET_NUMAINFO, MEM_CGROUP_NTARGETS, }; @@ -355,6 +356,10 @@ struct mem_cgroup { atomic_t numainfo_updating; #endif + spinlock_t soft_lock; + bool soft_contributed; + atomic_t children_in_excess; + /* * Per cgroup active and inactive list, similar to the * per zone LRU lists. @@ -890,6 +895,9 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, case MEM_CGROUP_TARGET_THRESH: next = val + THRESHOLDS_EVENTS_TARGET; break; + case MEM_CGROUP_TARGET_SOFTLIMIT: + next = val + SOFTLIMIT_EVENTS_TARGET; + break; case MEM_CGROUP_TARGET_NUMAINFO: next = val + NUMAINFO_EVENTS_TARGET; break; @@ -902,6 +910,34 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, return false; } +static void mem_cgroup_update_soft_limit(struct mem_cgroup *memcg) +{ + unsigned long long excess = res_counter_soft_limit_excess(&memcg->res); + struct mem_cgroup *parent = memcg; + int delta = 0; + + spin_lock(&memcg->soft_lock); + if (excess) { + if (!memcg->soft_contributed) { + delta = 1; + memcg->soft_contributed = true; + } + } else { + if (memcg->soft_contributed) { + delta = -1; + memcg->soft_contributed = false; + } + } + + /* + * Necessary to update all ancestors when hierarchy is used + * because their event counter is not touched. + */ + while (delta && (parent = parent_mem_cgroup(parent))) + atomic_add(delta, &parent->children_in_excess); + spin_unlock(&memcg->soft_lock); +} + /* * Check events in order. * @@ -912,8 +948,11 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) /* threshold event is triggered in finer grain than soft limit */ if (unlikely(mem_cgroup_event_ratelimit(memcg, MEM_CGROUP_TARGET_THRESH))) { + bool do_softlimit; bool do_numainfo __maybe_unused; + do_softlimit = mem_cgroup_event_ratelimit(memcg, + MEM_CGROUP_TARGET_SOFTLIMIT); #if MAX_NUMNODES > 1 do_numainfo = mem_cgroup_event_ratelimit(memcg, MEM_CGROUP_TARGET_NUMAINFO); @@ -921,6 +960,8 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) preempt_enable(); mem_cgroup_threshold(memcg); + if (unlikely(do_softlimit)) + mem_cgroup_update_soft_limit(memcg); #if MAX_NUMNODES > 1 if (unlikely(do_numainfo)) atomic_inc(&memcg->numainfo_events); @@ -1894,6 +1935,9 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg) * hierarchy if * a) it is over its soft limit * b) any parent up the hierarchy is over its soft limit + * + * If the given group doesn't have any children over the limit then it + * doesn't make any sense to iterate its subtree. */ enum mem_cgroup_filter_t mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, @@ -1915,6 +1959,8 @@ mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, break; } + if (!atomic_read(&memcg->children_in_excess)) + return SKIP_TREE; return SKIP; } @@ -6061,6 +6107,7 @@ mem_cgroup_css_alloc(struct cgroup *cont) mutex_init(&memcg->thresholds_lock); spin_lock_init(&memcg->move_lock); vmpressure_init(&memcg->vmpressure); + spin_lock_init(&memcg->soft_lock); return &memcg->css; @@ -6150,6 +6197,10 @@ static void mem_cgroup_css_offline(struct cgroup *cont) mem_cgroup_invalidate_reclaim_iterators(memcg); mem_cgroup_reparent_charges(memcg); + if (memcg->soft_contributed) { + while ((memcg = parent_mem_cgroup(memcg))) + atomic_dec(&memcg->children_in_excess); + } mem_cgroup_destroy_all_caches(memcg); } -- 1.7.10.4 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org