linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Roman Gushchin <guro@fb.com>
To: <linux-mm@kvack.org>, Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@kernel.org>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Shakeel Butt <shakeelb@google.com>,
	Vladimir Davydov <vdavydov.dev@gmail.com>,
	<linux-kernel@vger.kernel.org>, <kernel-team@fb.com>,
	Bharata B Rao <bharata@linux.ibm.com>,
	Yafang Shao <laoar.shao@gmail.com>, Roman Gushchin <guro@fb.com>
Subject: [PATCH v2 18/28] mm: memcg/slab: charge individual slab objects instead of pages
Date: Mon, 27 Jan 2020 09:34:43 -0800	[thread overview]
Message-ID: <20200127173453.2089565-19-guro@fb.com> (raw)
In-Reply-To: <20200127173453.2089565-1-guro@fb.com>

Switch to per-object accounting of non-root slab objects.

Charging is performed using obj_cgroup API in the pre_alloc hook.
Obj_cgroup is charged with the size of the object and the size
of metadata: as now it's the size of an obj_cgroup pointer.
If the amount of memory has been charged successfully, the actual
allocation code is executed. Otherwise, -ENOMEM is returned.

In the post_alloc hook if the actual allocation succeeded,
corresponding vmstats are bumped and the obj_cgroup pointer is saved.
Otherwise, the charge is canceled.

On the free path obj_cgroup pointer is obtained and used to uncharge
the size of the releasing object.

Memcg and lruvec counters are now representing only memory used
by active slab objects and do not include the free space. The free
space is shared and doesn't belong to any specific cgroup.

Global per-node slab vmstats are still modified from (un)charge_slab_page()
functions. The idea is to keep all slab pages accounted as slab pages
on system level.

Signed-off-by: Roman Gushchin <guro@fb.com>
---
 mm/slab.h | 171 ++++++++++++++++++++++++------------------------------
 1 file changed, 75 insertions(+), 96 deletions(-)

diff --git a/mm/slab.h b/mm/slab.h
index 0fdbeaf4aa8c..6585638e5be0 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -352,72 +352,6 @@ static inline struct mem_cgroup *memcg_from_slab_page(struct page *page)
 	return NULL;
 }
 
-/*
- * Charge the slab page belonging to the non-root kmem_cache.
- * Can be called for non-root kmem_caches only.
- */
-static __always_inline int memcg_charge_slab(struct page *page,
-					     gfp_t gfp, int order,
-					     struct kmem_cache *s)
-{
-	unsigned int nr_pages = 1 << order;
-	struct mem_cgroup *memcg;
-	struct lruvec *lruvec;
-	int ret;
-
-	rcu_read_lock();
-	memcg = READ_ONCE(s->memcg_params.memcg);
-	while (memcg && !css_tryget_online(&memcg->css))
-		memcg = parent_mem_cgroup(memcg);
-	rcu_read_unlock();
-
-	if (unlikely(!memcg || mem_cgroup_is_root(memcg))) {
-		mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
-				    nr_pages << PAGE_SHIFT);
-		percpu_ref_get_many(&s->memcg_params.refcnt, nr_pages);
-		return 0;
-	}
-
-	ret = memcg_kmem_charge(memcg, gfp, nr_pages);
-	if (ret)
-		goto out;
-
-	lruvec = mem_cgroup_lruvec(memcg, page_pgdat(page));
-	mod_lruvec_state(lruvec, cache_vmstat_idx(s), nr_pages << PAGE_SHIFT);
-
-	percpu_ref_get_many(&s->memcg_params.refcnt, 1 << order);
-out:
-	css_put(&memcg->css);
-	return ret;
-}
-
-/*
- * Uncharge a slab page belonging to a non-root kmem_cache.
- * Can be called for non-root kmem_caches only.
- */
-static __always_inline void memcg_uncharge_slab(struct page *page, int order,
-						struct kmem_cache *s)
-{
-	unsigned int nr_pages = 1 << order;
-	struct mem_cgroup *memcg;
-	struct lruvec *lruvec;
-
-	rcu_read_lock();
-	memcg = READ_ONCE(s->memcg_params.memcg);
-	if (likely(!mem_cgroup_is_root(memcg))) {
-		lruvec = mem_cgroup_lruvec(memcg, page_pgdat(page));
-		mod_lruvec_state(lruvec, cache_vmstat_idx(s),
-				 -(nr_pages << PAGE_SHIFT));
-		memcg_kmem_uncharge(memcg, nr_pages);
-	} else {
-		mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
-				    -(nr_pages << PAGE_SHIFT));
-	}
-	rcu_read_unlock();
-
-	percpu_ref_put_many(&s->memcg_params.refcnt, nr_pages);
-}
-
 static inline int memcg_alloc_page_obj_cgroups(struct page *page, gfp_t gfp,
 					     unsigned int objects)
 {
@@ -437,6 +371,45 @@ static inline void memcg_free_page_obj_cgroups(struct page *page)
 	page->obj_cgroups = NULL;
 }
 
+static inline size_t obj_full_size(struct kmem_cache *s)
+{
+	/*
+	 * For each accounted object there is an extra space which is used
+	 * to store obj_cgroup membership. Charge it too.
+	 */
+	return s->size + sizeof(struct obj_cgroup *);
+}
+
+static inline struct kmem_cache *memcg_slab_pre_alloc_hook(struct kmem_cache *s,
+						struct obj_cgroup **objcgp,
+						size_t objects, gfp_t flags)
+{
+	struct kmem_cache *cachep;
+
+	cachep = memcg_kmem_get_cache(s, objcgp);
+	if (is_root_cache(cachep))
+		return s;
+
+	if (obj_cgroup_charge(*objcgp, flags, objects * obj_full_size(s))) {
+		memcg_kmem_put_cache(cachep);
+		cachep = NULL;
+	}
+
+	return cachep;
+}
+
+static inline void mod_objcg_memcg_state(struct obj_cgroup *objcg,
+					 struct pglist_data *pgdat,
+					 int idx, int nr)
+{
+	struct lruvec *lruvec;
+
+	rcu_read_lock();
+	lruvec = mem_cgroup_lruvec(obj_cgroup_memcg(objcg), pgdat);
+	mod_lruvec_memcg_state(lruvec, idx, nr);
+	rcu_read_unlock();
+}
+
 static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
 					      struct obj_cgroup *objcg,
 					      size_t size, void **p)
@@ -451,6 +424,10 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
 			off = obj_to_index(s, page, p[i]);
 			obj_cgroup_get(objcg);
 			page_obj_cgroups(page)[off] = objcg;
+			mod_objcg_memcg_state(objcg, page_pgdat(page),
+					      cache_vmstat_idx(s), s->size);
+		} else {
+			obj_cgroup_uncharge(objcg, obj_full_size(s));
 		}
 	}
 	obj_cgroup_put(objcg);
@@ -469,6 +446,11 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s, struct page *page,
 	off = obj_to_index(s, page, p);
 	objcg = page_obj_cgroups(page)[off];
 	page_obj_cgroups(page)[off] = NULL;
+
+	obj_cgroup_uncharge(objcg, obj_full_size(s));
+	mod_objcg_memcg_state(objcg, page_pgdat(page), cache_vmstat_idx(s),
+			      -s->size);
+
 	obj_cgroup_put(objcg);
 }
 
@@ -510,17 +492,6 @@ static inline struct mem_cgroup *memcg_from_slab_page(struct page *page)
 	return NULL;
 }
 
-static inline int memcg_charge_slab(struct page *page, gfp_t gfp, int order,
-				    struct kmem_cache *s)
-{
-	return 0;
-}
-
-static inline void memcg_uncharge_slab(struct page *page, int order,
-				       struct kmem_cache *s)
-{
-}
-
 static inline int memcg_alloc_page_obj_cgroups(struct page *page, gfp_t gfp,
 					       unsigned int objects)
 {
@@ -531,6 +502,13 @@ static inline void memcg_free_page_obj_cgroups(struct page *page)
 {
 }
 
+static inline struct kmem_cache *memcg_slab_pre_alloc_hook(struct kmem_cache *s,
+						struct obj_cgroup **objcgp,
+						size_t objects, gfp_t flags)
+{
+	return NULL;
+}
+
 static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
 					      struct obj_cgroup *objcg,
 					      size_t size, void **p)
@@ -569,32 +547,33 @@ static __always_inline int charge_slab_page(struct page *page,
 					    struct kmem_cache *s,
 					    unsigned int objects)
 {
-	int ret;
-
-	if (is_root_cache(s)) {
-		mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
-				    PAGE_SIZE << order);
-		return 0;
-	}
+#ifdef CONFIG_MEMCG_KMEM
+	if (!is_root_cache(s)) {
+		int ret;
 
-	ret = memcg_alloc_page_obj_cgroups(page, gfp, objects);
-	if (ret)
-		return ret;
+		ret = memcg_alloc_page_obj_cgroups(page, gfp, objects);
+		if (ret)
+			return ret;
 
-	return memcg_charge_slab(page, gfp, order, s);
+		percpu_ref_get_many(&s->memcg_params.refcnt, 1 << order);
+	}
+#endif
+	mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
+			    PAGE_SIZE << order);
+	return 0;
 }
 
 static __always_inline void uncharge_slab_page(struct page *page, int order,
 					       struct kmem_cache *s)
 {
-	if (is_root_cache(s)) {
-		mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
-				    -(PAGE_SIZE << order));
-		return;
+#ifdef CONFIG_MEMCG_KMEM
+	if (!is_root_cache(s)) {
+		memcg_free_page_obj_cgroups(page);
+		percpu_ref_put_many(&s->memcg_params.refcnt, 1 << order);
 	}
-
-	memcg_free_page_obj_cgroups(page);
-	memcg_uncharge_slab(page, order, s);
+#endif
+	mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
+			    -(PAGE_SIZE << order));
 }
 
 static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
@@ -666,7 +645,7 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
 
 	if (memcg_kmem_enabled() &&
 	    ((flags & __GFP_ACCOUNT) || (s->flags & SLAB_ACCOUNT)))
-		return memcg_kmem_get_cache(s, objcgp);
+		return memcg_slab_pre_alloc_hook(s, objcgp, size, flags);
 
 	return s;
 }
-- 
2.24.1



  parent reply	other threads:[~2020-01-27 17:43 UTC|newest]

Thread overview: 84+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-27 17:34 [PATCH v2 00/28] The new cgroup slab memory controller Roman Gushchin
2020-01-27 17:34 ` [PATCH v2 01/28] mm: kmem: cleanup (__)memcg_kmem_charge_memcg() arguments Roman Gushchin
2020-01-27 17:34 ` [PATCH v2 02/28] mm: kmem: cleanup memcg_kmem_uncharge_memcg() arguments Roman Gushchin
2020-01-27 17:34 ` [PATCH v2 03/28] mm: kmem: rename memcg_kmem_(un)charge() into memcg_kmem_(un)charge_page() Roman Gushchin
2020-01-27 17:34 ` [PATCH v2 04/28] mm: kmem: switch to nr_pages in (__)memcg_kmem_charge_memcg() Roman Gushchin
2020-01-27 17:34 ` [PATCH v2 05/28] mm: memcg/slab: cache page number in memcg_(un)charge_slab() Roman Gushchin
2020-01-27 17:34 ` [PATCH v2 06/28] mm: kmem: rename (__)memcg_kmem_(un)charge_memcg() to __memcg_kmem_(un)charge() Roman Gushchin
2020-01-27 17:34 ` [PATCH v2 07/28] mm: memcg/slab: introduce mem_cgroup_from_obj() Roman Gushchin
2020-02-03 16:05   ` Johannes Weiner
2020-01-27 17:34 ` [PATCH v2 08/28] mm: fork: fix kernel_stack memcg stats for various stack implementations Roman Gushchin
2020-02-03 16:12   ` Johannes Weiner
2020-01-27 17:34 ` [PATCH v2 09/28] mm: memcg/slab: rename __mod_lruvec_slab_state() into __mod_lruvec_obj_state() Roman Gushchin
2020-02-03 16:13   ` Johannes Weiner
2020-01-27 17:34 ` [PATCH v2 10/28] mm: memcg: introduce mod_lruvec_memcg_state() Roman Gushchin
2020-02-03 17:39   ` Johannes Weiner
2020-01-27 17:34 ` [PATCH v2 11/28] mm: slub: implement SLUB version of obj_to_index() Roman Gushchin
2020-02-03 17:44   ` Johannes Weiner
2020-01-27 17:34 ` [PATCH v2 12/28] mm: vmstat: use s32 for vm_node_stat_diff in struct per_cpu_nodestat Roman Gushchin
2020-02-03 17:58   ` Johannes Weiner
2020-02-03 18:25     ` Roman Gushchin
2020-02-03 20:34       ` Johannes Weiner
2020-02-03 22:28         ` Roman Gushchin
2020-02-03 22:39           ` Johannes Weiner
2020-02-04  1:44             ` Roman Gushchin
2020-01-27 17:34 ` [PATCH v2 13/28] mm: vmstat: convert slab vmstat counter to bytes Roman Gushchin
2020-01-27 17:34 ` [PATCH v2 14/28] mm: memcontrol: decouple reference counting from page accounting Roman Gushchin
2020-01-27 17:34 ` [PATCH v2 15/28] mm: memcg/slab: obj_cgroup API Roman Gushchin
2020-02-03 19:31   ` Johannes Weiner
2020-01-27 17:34 ` [PATCH v2 16/28] mm: memcg/slab: allocate obj_cgroups for non-root slab pages Roman Gushchin
2020-02-03 18:27   ` Johannes Weiner
2020-02-03 18:34     ` Roman Gushchin
2020-02-03 20:46       ` Johannes Weiner
2020-02-03 21:19         ` Roman Gushchin
2020-02-03 22:29           ` Johannes Weiner
2020-01-27 17:34 ` [PATCH v2 17/28] mm: memcg/slab: save obj_cgroup for non-root slab objects Roman Gushchin
2020-02-03 19:53   ` Johannes Weiner
2020-01-27 17:34 ` Roman Gushchin [this message]
2020-01-27 17:34 ` [PATCH v2 19/28] mm: memcg/slab: deprecate memory.kmem.slabinfo Roman Gushchin
2020-01-27 17:34 ` [PATCH v2 20/28] mm: memcg/slab: move memcg_kmem_bypass() to memcontrol.h Roman Gushchin
2020-01-27 17:34 ` [PATCH v2 21/28] mm: memcg/slab: use a single set of kmem_caches for all memory cgroups Roman Gushchin
2020-02-03 19:50   ` Johannes Weiner
2020-02-03 20:58     ` Roman Gushchin
2020-02-03 22:17       ` Johannes Weiner
2020-02-03 22:38         ` Roman Gushchin
2020-02-04  1:15         ` Roman Gushchin
2020-02-04  2:47           ` Johannes Weiner
2020-02-04  4:35             ` Roman Gushchin
2020-02-04 18:41               ` Johannes Weiner
2020-02-05 15:58                 ` Roman Gushchin
2020-01-27 17:34 ` [PATCH v2 22/28] mm: memcg/slab: simplify memcg cache creation Roman Gushchin
2020-01-27 17:34 ` [PATCH v2 23/28] mm: memcg/slab: deprecate memcg_kmem_get_cache() Roman Gushchin
2020-01-27 17:34 ` [PATCH v2 24/28] mm: memcg/slab: deprecate slab_root_caches Roman Gushchin
2020-01-27 17:34 ` [PATCH v2 25/28] mm: memcg/slab: remove redundant check in memcg_accumulate_slabinfo() Roman Gushchin
2020-01-27 17:34 ` [PATCH v2 26/28] tools/cgroup: add slabinfo.py tool Roman Gushchin
2020-01-27 17:34 ` [PATCH v2 27/28] tools/cgroup: make slabinfo.py compatible with new slab controller Roman Gushchin
2020-01-30  2:17   ` Bharata B Rao
2020-01-30  2:44     ` Roman Gushchin
2020-01-31 22:24     ` Roman Gushchin
2020-02-12  5:21       ` Bharata B Rao
2020-02-12 20:42         ` Roman Gushchin
2020-01-27 17:34 ` [PATCH v2 28/28] kselftests: cgroup: add kernel memory accounting tests Roman Gushchin
2020-01-30  2:06 ` [PATCH v2 00/28] The new cgroup slab memory controller Bharata B Rao
2020-01-30  2:41   ` Roman Gushchin
2020-08-12 23:16     ` Pavel Tatashin
2020-08-12 23:18       ` Pavel Tatashin
2020-08-13  0:04       ` Roman Gushchin
2020-08-13  0:31         ` Pavel Tatashin
2020-08-28 16:47           ` Pavel Tatashin
2020-09-01  5:28             ` Bharata B Rao
2020-09-01 12:52               ` Pavel Tatashin
2020-09-02  6:23                 ` Bharata B Rao
2020-09-02 12:34                   ` Pavel Tatashin
2020-09-02  9:53             ` Vlastimil Babka
2020-09-02 10:39               ` David Hildenbrand
2020-09-02 12:42                 ` Pavel Tatashin
2020-09-02 13:50                   ` Michal Hocko
2020-09-02 14:20                     ` Pavel Tatashin
2020-09-03 18:09                       ` David Hildenbrand
2020-09-02 11:26               ` Michal Hocko
2020-09-02 12:51                 ` Pavel Tatashin
2020-09-02 13:51                   ` Michal Hocko
2020-09-02 11:32               ` Michal Hocko
2020-09-02 12:53                 ` Pavel Tatashin
2020-09-02 13:52                   ` Michal Hocko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200127173453.2089565-19-guro@fb.com \
    --to=guro@fb.com \
    --cc=akpm@linux-foundation.org \
    --cc=bharata@linux.ibm.com \
    --cc=hannes@cmpxchg.org \
    --cc=kernel-team@fb.com \
    --cc=laoar.shao@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=shakeelb@google.com \
    --cc=vdavydov.dev@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).