linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Roman Gushchin <guro@fb.com>
To: Andrew Morton <akpm@linux-foundation.org>,
	Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: <linux-mm@kvack.org>, <linux-kernel@vger.kernel.org>,
	<kernel-team@fb.com>, Johannes Weiner <hannes@cmpxchg.org>,
	Shakeel Butt <shakeelb@google.com>,
	Waiman Long <longman@redhat.com>, Roman Gushchin <guro@fb.com>
Subject: [PATCH v7 09/10] mm: stop setting page->mem_cgroup pointer for slab pages
Date: Tue, 11 Jun 2019 16:18:12 -0700	[thread overview]
Message-ID: <20190611231813.3148843-10-guro@fb.com> (raw)
In-Reply-To: <20190611231813.3148843-1-guro@fb.com>

Every slab page charged to a non-root memory cgroup has a pointer
to the memory cgroup and holds a reference to it, which protects
a non-empty memory cgroup from being released. At the same time
the page has a pointer to the corresponding kmem_cache, and also
hold a reference to the kmem_cache. And kmem_cache by itself
holds a reference to the cgroup.

So there is clearly some redundancy, which allows to stop setting
the page->mem_cgroup pointer and rely on getting memcg pointer
indirectly via kmem_cache. Further it will allow to change this
pointer easier, without a need to go over all charged pages.

So let's stop setting page->mem_cgroup pointer for slab pages,
and stop using the css refcounter directly for protecting
the memory cgroup from going away. Instead rely on kmem_cache
as an intermediate object.

Make sure that vmstats and shrinker lists are working as previously,
as well as /proc/kpagecgroup interface.

Signed-off-by: Roman Gushchin <guro@fb.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
---
 mm/list_lru.c   |  3 +-
 mm/memcontrol.c | 12 ++++----
 mm/slab.h       | 74 ++++++++++++++++++++++++++++++++++++++++---------
 3 files changed, 70 insertions(+), 19 deletions(-)

diff --git a/mm/list_lru.c b/mm/list_lru.c
index 927d85be32f6..0f1f6b06b7f3 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -12,6 +12,7 @@
 #include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/memcontrol.h>
+#include "slab.h"
 
 #ifdef CONFIG_MEMCG_KMEM
 static LIST_HEAD(list_lrus);
@@ -63,7 +64,7 @@ static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr)
 	if (!memcg_kmem_enabled())
 		return NULL;
 	page = virt_to_head_page(ptr);
-	return page->mem_cgroup;
+	return memcg_from_slab_page(page);
 }
 
 static inline struct list_lru_one *
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 43a42bc3ed3f..25e72779fd33 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -486,7 +486,10 @@ ino_t page_cgroup_ino(struct page *page)
 	unsigned long ino = 0;
 
 	rcu_read_lock();
-	memcg = READ_ONCE(page->mem_cgroup);
+	if (PageHead(page) && PageSlab(page))
+		memcg = memcg_from_slab_page(page);
+	else
+		memcg = READ_ONCE(page->mem_cgroup);
 	while (memcg && !(memcg->css.flags & CSS_ONLINE))
 		memcg = parent_mem_cgroup(memcg);
 	if (memcg)
@@ -2807,9 +2810,6 @@ int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
 		cancel_charge(memcg, nr_pages);
 		return -ENOMEM;
 	}
-
-	page->mem_cgroup = memcg;
-
 	return 0;
 }
 
@@ -2832,8 +2832,10 @@ int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
 	memcg = get_mem_cgroup_from_current();
 	if (!mem_cgroup_is_root(memcg)) {
 		ret = __memcg_kmem_charge_memcg(page, gfp, order, memcg);
-		if (!ret)
+		if (!ret) {
+			page->mem_cgroup = memcg;
 			__SetPageKmemcg(page);
+		}
 	}
 	css_put(&memcg->css);
 	return ret;
diff --git a/mm/slab.h b/mm/slab.h
index 5d2b8511e6fb..7ead47cb9338 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -255,30 +255,67 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
 	return s->memcg_params.root_cache;
 }
 
+/*
+ * Expects a pointer to a slab page. Please note, that PageSlab() check
+ * isn't sufficient, as it returns true also for tail compound slab pages,
+ * which do not have slab_cache pointer set.
+ * So this function assumes that the page can pass PageHead() and PageSlab()
+ * checks.
+ */
+static inline struct mem_cgroup *memcg_from_slab_page(struct page *page)
+{
+	struct kmem_cache *s;
+
+	s = READ_ONCE(page->slab_cache);
+	if (s && !is_root_cache(s))
+		return s->memcg_params.memcg;
+
+	return NULL;
+}
+
+/*
+ * Charge the slab page belonging to the non-root kmem_cache.
+ * Can be called for non-root kmem_caches only.
+ */
 static __always_inline int memcg_charge_slab(struct page *page,
 					     gfp_t gfp, int order,
 					     struct kmem_cache *s)
 {
+	struct mem_cgroup *memcg;
+	struct lruvec *lruvec;
 	int ret;
 
-	if (is_root_cache(s))
-		return 0;
-
-	ret = memcg_kmem_charge_memcg(page, gfp, order, s->memcg_params.memcg);
+	memcg = s->memcg_params.memcg;
+	ret = memcg_kmem_charge_memcg(page, gfp, order, memcg);
 	if (ret)
 		return ret;
 
+	lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg);
+	mod_lruvec_state(lruvec, cache_vmstat_idx(s), 1 << order);
+
+	/* transer try_charge() page references to kmem_cache */
 	percpu_ref_get_many(&s->memcg_params.refcnt, 1 << order);
+	css_put_many(&memcg->css, 1 << order);
 
 	return 0;
 }
 
+/*
+ * Uncharge a slab page belonging to a non-root kmem_cache.
+ * Can be called for non-root kmem_caches only.
+ */
 static __always_inline void memcg_uncharge_slab(struct page *page, int order,
 						struct kmem_cache *s)
 {
-	if (!is_root_cache(s))
-		percpu_ref_put_many(&s->memcg_params.refcnt, 1 << order);
-	memcg_kmem_uncharge(page, order);
+	struct mem_cgroup *memcg;
+	struct lruvec *lruvec;
+
+	memcg = s->memcg_params.memcg;
+	lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg);
+	mod_lruvec_state(lruvec, cache_vmstat_idx(s), -(1 << order));
+	memcg_kmem_uncharge_memcg(page, order, memcg);
+
+	percpu_ref_put_many(&s->memcg_params.refcnt, 1 << order);
 }
 
 extern void slab_init_memcg_params(struct kmem_cache *);
@@ -314,6 +351,11 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
 	return s;
 }
 
+static inline struct mem_cgroup *memcg_from_slab_page(struct page *page)
+{
+	return NULL;
+}
+
 static inline int memcg_charge_slab(struct page *page, gfp_t gfp, int order,
 				    struct kmem_cache *s)
 {
@@ -351,18 +393,24 @@ static __always_inline int charge_slab_page(struct page *page,
 					    gfp_t gfp, int order,
 					    struct kmem_cache *s)
 {
-	int ret = memcg_charge_slab(page, gfp, order, s);
-
-	if (!ret)
-		mod_lruvec_page_state(page, cache_vmstat_idx(s), 1 << order);
+	if (is_root_cache(s)) {
+		mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
+				    1 << order);
+		return 0;
+	}
 
-	return ret;
+	return memcg_charge_slab(page, gfp, order, s);
 }
 
 static __always_inline void uncharge_slab_page(struct page *page, int order,
 					       struct kmem_cache *s)
 {
-	mod_lruvec_page_state(page, cache_vmstat_idx(s), -(1 << order));
+	if (is_root_cache(s)) {
+		mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
+				    -(1 << order));
+		return;
+	}
+
 	memcg_uncharge_slab(page, order, s);
 }
 
-- 
2.21.0


  parent reply	other threads:[~2019-06-11 23:18 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-11 23:18 [PATCH v7 00/10] mm: reparent slab memory on cgroup removal Roman Gushchin
2019-06-11 23:18 ` [PATCH v7 01/10] mm: postpone kmem_cache memcg pointer initialization to memcg_link_cache() Roman Gushchin
2019-06-13  2:04   ` Andrew Morton
2019-06-13 16:25     ` Roman Gushchin
2019-06-11 23:18 ` [PATCH v7 02/10] mm: rename slab delayed deactivation functions and fields Roman Gushchin
2019-06-25 18:17   ` Shakeel Butt
2019-06-11 23:18 ` [PATCH v7 03/10] mm: generalize postponed non-root kmem_cache deactivation Roman Gushchin
2019-06-25 18:17   ` Shakeel Butt
2019-06-11 23:18 ` [PATCH v7 04/10] mm: introduce __memcg_kmem_uncharge_memcg() Roman Gushchin
2019-06-11 23:18 ` [PATCH v7 05/10] mm: unify SLAB and SLUB page accounting Roman Gushchin
2019-06-11 23:18 ` [PATCH v7 06/10] mm: don't check the dying flag on kmem_cache creation Roman Gushchin
2019-06-16 16:26   ` Vladimir Davydov
2019-06-25 18:31   ` Shakeel Butt
2019-06-11 23:18 ` [PATCH v7 07/10] mm: synchronize access to kmem_cache dying flag using a spinlock Roman Gushchin
2019-06-16 16:27   ` Vladimir Davydov
2019-06-25 18:33   ` Shakeel Butt
2019-06-11 23:18 ` [PATCH v7 08/10] mm: rework non-root kmem_cache lifecycle management Roman Gushchin
2019-06-25 23:57   ` Shakeel Butt
2019-11-21 11:17   ` WARNING bisected (was Re: [PATCH v7 08/10] mm: rework non-root kmem_cache lifecycle management) Christian Borntraeger
2019-11-21 13:08     ` Christian Borntraeger
2019-11-21 16:58     ` Roman Gushchin
2019-11-21 16:59       ` Christian Borntraeger
2019-11-21 18:45         ` Roman Gushchin
2019-11-21 20:43           ` Rik van Riel
2019-11-21 20:55             ` Roman Gushchin
2019-11-21 22:09               ` Roman Gushchin
2019-11-22 13:00                 ` Christian Borntraeger
2019-11-22 16:28           ` Christian Borntraeger
2019-11-24  0:39             ` Roman Gushchin
2019-11-25  8:00               ` Christian Borntraeger
2019-11-25 18:07                 ` Roman Gushchin
2019-06-11 23:18 ` Roman Gushchin [this message]
2019-06-26  0:15   ` [PATCH v7 09/10] mm: stop setting page->mem_cgroup pointer for slab pages Shakeel Butt
2019-06-11 23:18 ` [PATCH v7 10/10] mm: reparent memcg kmem_caches on cgroup removal Roman Gushchin
2019-06-16 16:29   ` Vladimir Davydov
2019-06-26  0:15   ` Shakeel Butt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190611231813.3148843-10-guro@fb.com \
    --to=guro@fb.com \
    --cc=akpm@linux-foundation.org \
    --cc=hannes@cmpxchg.org \
    --cc=kernel-team@fb.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=longman@redhat.com \
    --cc=shakeelb@google.com \
    --cc=vdavydov.dev@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).