linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Roman Gushchin <guro@fb.com>
To: <linux-mm@kvack.org>
Cc: Michal Hocko <mhocko@kernel.org>,
	Johannes Weiner <hannes@cmpxchg.org>,
	<linux-kernel@vger.kernel.org>, <kernel-team@fb.com>,
	Shakeel Butt <shakeelb@google.com>,
	Vladimir Davydov <vdavydov.dev@gmail.com>,
	Waiman Long <longman@redhat.com>,
	Christoph Lameter <cl@linux.com>, Roman Gushchin <guro@fb.com>
Subject: [PATCH 09/16] mm: memcg/slab: charge individual slab objects instead of pages
Date: Thu, 17 Oct 2019 17:28:13 -0700	[thread overview]
Message-ID: <20191018002820.307763-10-guro@fb.com> (raw)
In-Reply-To: <20191018002820.307763-1-guro@fb.com>

Switch to per-object accounting of non-root slab objects.

Charging is performed using subpage charging API in pre_alloc hook.
If the amount of memory has been charged successfully, we proceed
with the actual allocation. Otherwise, -ENOMEM is returned.

In post_alloc hook we do check if the actual allocation succeeded.
If so, corresponding vmstats are bumped and memcg membership
information is recorded. Otherwise, the charge is canceled.

On free path we do look for memcg membership information,
decrement stats and do uncharge. No operations are performed
with root kmem_caches.

Global per-node slab-related vmstats NR_SLAB_(UN)RECLAIMABLE_B
are still modified from (un)charge_slab_page() functions. The idea
is to keep all slab pages accounted as slab pages on system level.
Memcg and lruvec counters are now representing only memory used
by actual slab objects and do not include free space. Free space
is shared and doesn't belong to any specific cgroup.

Signed-off-by: Roman Gushchin <guro@fb.com>
---
 mm/slab.h | 152 ++++++++++++++++++++----------------------------------
 1 file changed, 57 insertions(+), 95 deletions(-)

diff --git a/mm/slab.h b/mm/slab.h
index 28feabed1e9a..0f2f712de77a 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -340,72 +340,6 @@ static inline struct mem_cgroup *memcg_from_slab_page(struct page *page)
 	return NULL;
 }
 
-/*
- * Charge the slab page belonging to the non-root kmem_cache.
- * Can be called for non-root kmem_caches only.
- */
-static __always_inline int memcg_charge_slab(struct page *page,
-					     gfp_t gfp, int order,
-					     struct kmem_cache *s)
-{
-	struct mem_cgroup *memcg;
-	struct lruvec *lruvec;
-	int ret;
-
-	rcu_read_lock();
-	memcg = READ_ONCE(s->memcg_params.memcg);
-	while (memcg && !css_tryget_online(&memcg->css))
-		memcg = parent_mem_cgroup(memcg);
-	rcu_read_unlock();
-
-	if (unlikely(!memcg || mem_cgroup_is_root(memcg))) {
-		mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
-				    (PAGE_SIZE << order));
-		percpu_ref_get_many(&s->memcg_params.refcnt, 1 << order);
-		return 0;
-	}
-
-	ret = memcg_kmem_charge_memcg(page, gfp, order, memcg);
-	if (ret)
-		goto out;
-
-	lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg);
-	mod_lruvec_state(lruvec, cache_vmstat_idx(s), PAGE_SIZE << order);
-
-	/* transer try_charge() page references to kmem_cache */
-	percpu_ref_get_many(&s->memcg_params.refcnt, 1 << order);
-	css_put_many(&memcg->css, 1 << order);
-out:
-	css_put(&memcg->css);
-	return ret;
-}
-
-/*
- * Uncharge a slab page belonging to a non-root kmem_cache.
- * Can be called for non-root kmem_caches only.
- */
-static __always_inline void memcg_uncharge_slab(struct page *page, int order,
-						struct kmem_cache *s)
-{
-	struct mem_cgroup *memcg;
-	struct lruvec *lruvec;
-
-	rcu_read_lock();
-	memcg = READ_ONCE(s->memcg_params.memcg);
-	if (likely(!mem_cgroup_is_root(memcg))) {
-		lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg);
-		mod_lruvec_state(lruvec, cache_vmstat_idx(s),
-				 -(PAGE_SIZE << order));
-		memcg_kmem_uncharge_memcg(page, order, memcg);
-	} else {
-		mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
-				    -(PAGE_SIZE << order));
-	}
-	rcu_read_unlock();
-
-	percpu_ref_put_many(&s->memcg_params.refcnt, 1 << order);
-}
-
 static inline int memcg_alloc_page_memcg_vec(struct page *page, gfp_t gfp,
 					     unsigned int objects)
 {
@@ -423,11 +357,31 @@ static inline void memcg_free_page_memcg_vec(struct page *page)
 	page->mem_cgroup_vec = NULL;
 }
 
+static inline struct kmem_cache *memcg_slab_pre_alloc_hook(struct kmem_cache *s,
+						struct mem_cgroup **memcgp,
+						size_t size, gfp_t flags)
+{
+	struct kmem_cache *cachep;
+
+	cachep = memcg_kmem_get_cache(s, memcgp);
+	if (is_root_cache(cachep))
+		return s;
+
+	if (__memcg_kmem_charge_subpage(*memcgp, size * s->size, flags)) {
+		mem_cgroup_put(*memcgp);
+		memcg_kmem_put_cache(cachep);
+		cachep = NULL;
+	}
+
+	return cachep;
+}
+
 static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
 					      struct mem_cgroup *memcg,
 					      size_t size, void **p)
 {
 	struct mem_cgroup_ptr *memcg_ptr;
+	struct lruvec *lruvec;
 	struct page *page;
 	unsigned long off;
 	size_t i;
@@ -439,6 +393,11 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
 			off = obj_to_index(s, page, p[i]);
 			mem_cgroup_ptr_get(memcg_ptr);
 			page->mem_cgroup_vec[off] = memcg_ptr;
+			lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg);
+			mod_lruvec_memcg_state(lruvec, cache_vmstat_idx(s),
+					       s->size);
+		} else {
+			__memcg_kmem_uncharge_subpage(memcg, s->size);
 		}
 	}
 	mem_cgroup_ptr_put(memcg_ptr);
@@ -451,6 +410,8 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s, struct page *page,
 					void *p)
 {
 	struct mem_cgroup_ptr *memcg_ptr;
+	struct mem_cgroup *memcg;
+	struct lruvec *lruvec;
 	unsigned int off;
 
 	if (!memcg_kmem_enabled() || is_root_cache(s))
@@ -459,6 +420,14 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s, struct page *page,
 	off = obj_to_index(s, page, p);
 	memcg_ptr = page->mem_cgroup_vec[off];
 	page->mem_cgroup_vec[off] = NULL;
+	rcu_read_lock();
+	memcg = memcg_ptr->memcg;
+	if (likely(!mem_cgroup_is_root(memcg))) {
+		__memcg_kmem_uncharge_subpage(memcg, s->size);
+		lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg);
+		mod_lruvec_memcg_state(lruvec, cache_vmstat_idx(s), -s->size);
+	}
+	rcu_read_unlock();
 	mem_cgroup_ptr_put(memcg_ptr);
 }
 
@@ -500,17 +469,6 @@ static inline struct mem_cgroup *memcg_from_slab_page(struct page *page)
 	return NULL;
 }
 
-static inline int memcg_charge_slab(struct page *page, gfp_t gfp, int order,
-				    struct kmem_cache *s)
-{
-	return 0;
-}
-
-static inline void memcg_uncharge_slab(struct page *page, int order,
-				       struct kmem_cache *s)
-{
-}
-
 static inline int memcg_alloc_page_memcg_vec(struct page *page, gfp_t gfp,
 					     unsigned int objects)
 {
@@ -521,6 +479,13 @@ static inline void memcg_free_page_memcg_vec(struct page *page)
 {
 }
 
+static inline struct kmem_cache *memcg_slab_pre_alloc_hook(struct kmem_cache *s,
+						struct mem_cgroup **memcgp,
+						size_t size, gfp_t flags)
+{
+	return NULL;
+}
+
 static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
 					      struct mem_cgroup *memcg,
 					      size_t size, void **p)
@@ -561,30 +526,27 @@ static __always_inline int charge_slab_page(struct page *page,
 {
 	int ret;
 
-	if (is_root_cache(s)) {
-		mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
-				    PAGE_SIZE << order);
-		return 0;
-	}
-
-	ret = memcg_alloc_page_memcg_vec(page, gfp, objects);
-	if (ret)
-		return ret;
+	if (!is_root_cache(s)) {
+		ret = memcg_alloc_page_memcg_vec(page, gfp, objects);
+		if (ret)
+			return ret;
 
-	return memcg_charge_slab(page, gfp, order, s);
+		percpu_ref_get_many(&s->memcg_params.refcnt, 1 << order);
+	}
+	mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
+			    PAGE_SIZE << order);
+	return 0;
 }
 
 static __always_inline void uncharge_slab_page(struct page *page, int order,
 					       struct kmem_cache *s)
 {
-	if (is_root_cache(s)) {
-		mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
-				    -(PAGE_SIZE << order));
-		return;
+	if (!is_root_cache(s)) {
+		memcg_free_page_memcg_vec(page);
+		percpu_ref_put_many(&s->memcg_params.refcnt, 1 << order);
 	}
-
-	memcg_free_page_memcg_vec(page);
-	memcg_uncharge_slab(page, order, s);
+	mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
+			    -(PAGE_SIZE << order));
 }
 
 static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
@@ -656,7 +618,7 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
 
 	if (memcg_kmem_enabled() &&
 	    ((flags & __GFP_ACCOUNT) || (s->flags & SLAB_ACCOUNT)))
-		return memcg_kmem_get_cache(s, memcgp);
+		return memcg_slab_pre_alloc_hook(s, memcgp, size, flags);
 
 	return s;
 }
-- 
2.21.0


  parent reply	other threads:[~2019-10-18  0:28 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-18  0:28 [PATCH 00/16] The new slab memory controller Roman Gushchin
2019-10-18  0:28 ` [PATCH 01/16] mm: memcg: introduce mem_cgroup_ptr Roman Gushchin
2019-10-18  0:28 ` [PATCH 02/16] mm: vmstat: use s32 for vm_node_stat_diff in struct per_cpu_nodestat Roman Gushchin
2019-10-20 22:44   ` Christopher Lameter
2019-10-21  1:15     ` Roman Gushchin
2019-10-21 18:09       ` Christopher Lameter
2019-10-20 22:51   ` Christopher Lameter
2019-10-21  1:21     ` Roman Gushchin
2019-10-18  0:28 ` [PATCH 03/16] mm: vmstat: convert slab vmstat counter to bytes Roman Gushchin
2019-10-18  0:28 ` [PATCH 04/16] mm: memcg/slab: allocate space for memcg ownership data for non-root slabs Roman Gushchin
2019-10-18  0:28 ` [PATCH 05/16] mm: slub: implement SLUB version of obj_to_index() Roman Gushchin
2019-10-18  0:28 ` [PATCH 06/16] mm: memcg/slab: save memcg ownership data for non-root slab objects Roman Gushchin
2019-10-18  0:28 ` [PATCH 07/16] mm: memcg: move memcg_kmem_bypass() to memcontrol.h Roman Gushchin
2019-10-18  0:28 ` [PATCH 08/16] mm: memcg: introduce __mod_lruvec_memcg_state() Roman Gushchin
2019-10-18  0:28 ` Roman Gushchin [this message]
2019-10-25 19:41   ` [PATCH 09/16] mm: memcg/slab: charge individual slab objects instead of pages Johannes Weiner
2019-10-25 20:00     ` Roman Gushchin
2019-10-25 20:52       ` Johannes Weiner
2019-10-31  1:52     ` Roman Gushchin
2019-10-31 14:23       ` Johannes Weiner
2019-10-31 14:41       ` Johannes Weiner
2019-10-31 15:07         ` Roman Gushchin
2019-10-31 18:50           ` Johannes Weiner
2019-10-18  0:28 ` [PATCH 10/16] mm: memcg: move get_mem_cgroup_from_current() to memcontrol.h Roman Gushchin
2019-10-18  0:28 ` [PATCH 11/16] mm: memcg/slab: replace memcg_from_slab_page() with memcg_from_slab_obj() Roman Gushchin
2019-10-18  0:28 ` [PATCH 13/16] mm: memcg/slab: deprecate memory.kmem.slabinfo Roman Gushchin
2019-10-18  0:28 ` [PATCH 14/16] mm: memcg/slab: use one set of kmem_caches for all memory cgroups Roman Gushchin
2019-10-18  0:28 ` [PATCH 15/16] tools/cgroup: make slabinfo.py compatible with new slab controller Roman Gushchin
2019-10-18  0:28 ` [PATCH 16/16] mm: slab: remove redundant check in memcg_accumulate_slabinfo() Roman Gushchin
2019-10-18 17:03 ` [PATCH 00/16] The new slab memory controller Waiman Long
2019-10-18 17:12   ` Roman Gushchin
2019-10-22 13:22 ` Michal Hocko
2019-10-22 13:28   ` Michal Hocko
2019-10-22 15:48     ` Roman Gushchin
2019-10-22 13:31 ` Michal Hocko
2019-10-22 15:59   ` Roman Gushchin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191018002820.307763-10-guro@fb.com \
    --to=guro@fb.com \
    --cc=cl@linux.com \
    --cc=hannes@cmpxchg.org \
    --cc=kernel-team@fb.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=longman@redhat.com \
    --cc=mhocko@kernel.org \
    --cc=shakeelb@google.com \
    --cc=vdavydov.dev@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).