From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pb0-f43.google.com (mail-pb0-f43.google.com [209.85.160.43]) by kanga.kvack.org (Postfix) with ESMTP id 4769C6B00E5 for ; Thu, 24 Oct 2013 08:05:36 -0400 (EDT) Received: by mail-pb0-f43.google.com with SMTP id md12so2220981pbc.16 for ; Thu, 24 Oct 2013 05:05:35 -0700 (PDT) Received: from psmtp.com ([74.125.245.123]) by mx.google.com with SMTP id ru9si832697pbc.18.2013.10.24.05.05.34 for ; Thu, 24 Oct 2013 05:05:35 -0700 (PDT) From: Vladimir Davydov Subject: [PATCH v11 07/15] memcg: scan cache objects hierarchically Date: Thu, 24 Oct 2013 16:04:58 +0400 Message-ID: <1906bf1f9832370d5695f84b7d47ed0b5c5f42ac.1382603434.git.vdavydov@parallels.com> In-Reply-To: References: MIME-Version: 1.0 Content-Type: text/plain Sender: owner-linux-mm@kvack.org List-ID: To: akpm@linux-foundation.org Cc: glommer@openvz.org, khorenko@parallels.com, devel@openvz.org, linux-mm@kvack.org, cgroups@vger.kernel.org, linux-kernel@vger.kernel.org, Dave Chinner , Mel Gorman , Rik van Riel , Johannes Weiner , Michal Hocko , Hugh Dickins , Kamezawa Hiroyuki From: Glauber Costa When reaching shrink_slab, we should descent in children memcg searching for objects that could be shrunk. This is true even if the memcg does not have kmem limits on, since the kmem res_counter will also be billed against the user res_counter of the parent. It is possible that we will free objects and not free any pages, that will just harm the child groups without helping the parent group at all. But at this point, we basically are prepared to pay the price. Signed-off-by: Glauber Costa Cc: Dave Chinner Cc: Mel Gorman Cc: Rik van Riel Cc: Johannes Weiner Cc: Michal Hocko Cc: Hugh Dickins Cc: Kamezawa Hiroyuki Cc: Andrew Morton --- include/linux/memcontrol.h | 6 ++++ mm/memcontrol.c | 13 +++++++++ mm/vmscan.c | 65 ++++++++++++++++++++++++++++++++++++-------- 3 files changed, 73 insertions(+), 11 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d16ba51..a513fad 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -488,6 +488,7 @@ static inline bool memcg_kmem_enabled(void) return static_key_false(&memcg_kmem_enabled_key); } +bool memcg_kmem_should_reclaim(struct mem_cgroup *memcg); bool memcg_kmem_is_active(struct mem_cgroup *memcg); /* @@ -624,6 +625,11 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) } #else +static inline bool memcg_kmem_should_reclaim(struct mem_cgroup *memcg) +{ + return false; +} + static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg) { return false; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f5b9921..2f5a777 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2972,6 +2972,19 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, } #ifdef CONFIG_MEMCG_KMEM +bool memcg_kmem_should_reclaim(struct mem_cgroup *memcg) +{ + struct mem_cgroup *iter; + + for_each_mem_cgroup_tree(iter, memcg) { + if (memcg_kmem_is_active(iter)) { + mem_cgroup_iter_break(memcg, iter); + return true; + } + } + return false; +} + static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg) { return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg) && diff --git a/mm/vmscan.c b/mm/vmscan.c index cdfc364..36fc133 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -149,7 +149,7 @@ static bool global_reclaim(struct scan_control *sc) static bool has_kmem_reclaim(struct scan_control *sc) { return !sc->target_mem_cgroup || - memcg_kmem_is_active(sc->target_mem_cgroup); + memcg_kmem_should_reclaim(sc->target_mem_cgroup); } static unsigned long @@ -360,12 +360,35 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker, * * Returns the number of slab objects which we shrunk. */ +static unsigned long +shrink_slab_one(struct shrink_control *shrinkctl, struct shrinker *shrinker, + unsigned long nr_pages_scanned, unsigned long lru_pages) +{ + unsigned long freed = 0; + + for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) { + if (!node_online(shrinkctl->nid)) + continue; + + if (!(shrinker->flags & SHRINKER_NUMA_AWARE) && + (shrinkctl->nid != 0)) + break; + + freed += shrink_slab_node(shrinkctl, shrinker, + nr_pages_scanned, lru_pages); + + } + + return freed; +} + unsigned long shrink_slab(struct shrink_control *shrinkctl, unsigned long nr_pages_scanned, unsigned long lru_pages) { struct shrinker *shrinker; unsigned long freed = 0; + struct mem_cgroup *root = shrinkctl->target_mem_cgroup; if (nr_pages_scanned == 0) nr_pages_scanned = SWAP_CLUSTER_MAX; @@ -390,19 +413,39 @@ unsigned long shrink_slab(struct shrink_control *shrinkctl, if (shrinkctl->target_mem_cgroup && !(shrinker->flags & SHRINKER_MEMCG_AWARE)) continue; + /* + * In a hierarchical chain, it might be that not all memcgs are + * kmem active. kmemcg design mandates that when one memcg is + * active, its children will be active as well. But it is + * perfectly possible that its parent is not. + * + * We also need to make sure we scan at least once, for the + * global case. So if we don't have a target memcg (saved in + * root), we proceed normally and expect to break in the next + * round. + */ + do { + struct mem_cgroup *memcg = shrinkctl->target_mem_cgroup; - for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) { - if (!node_online(shrinkctl->nid)) - continue; - - if (!(shrinker->flags & SHRINKER_NUMA_AWARE) && - (shrinkctl->nid != 0)) + if (!memcg || memcg_kmem_is_active(memcg)) + freed += shrink_slab_one(shrinkctl, shrinker, + nr_pages_scanned, lru_pages); + /* + * For non-memcg aware shrinkers, we will arrive here + * at first pass because we need to scan the root + * memcg. We need to bail out, since exactly because + * they are not memcg aware, instead of noticing they + * have nothing to shrink, they will just shrink again, + * and deplete too many objects. + */ + if (!(shrinker->flags & SHRINKER_MEMCG_AWARE)) break; + shrinkctl->target_mem_cgroup = + mem_cgroup_iter(root, memcg, NULL); + } while (shrinkctl->target_mem_cgroup); - freed += shrink_slab_node(shrinkctl, shrinker, - nr_pages_scanned, lru_pages); - - } + /* restore original state */ + shrinkctl->target_mem_cgroup = root; } up_read(&shrinker_rwsem); out: -- 1.7.10.4 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org