LKML Archive on lore.kernel.org
 help / color / Atom feed
From: Roman Gushchin <guro@fb.com>
To: Andrew Morton <akpm@linux-foundation.org>,
	Christoph Lameter <cl@linux.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>,
	Michal Hocko <mhocko@kernel.org>,
	Shakeel Butt <shakeelb@google.com>, <linux-mm@kvack.org>,
	Vlastimil Babka <vbabka@suse.cz>, <kernel-team@fb.com>,
	<linux-kernel@vger.kernel.org>, Roman Gushchin <guro@fb.com>
Subject: [PATCH v6 08/19] mm: memcg/slab: save obj_cgroup for non-root slab objects
Date: Mon, 8 Jun 2020 16:06:43 -0700
Message-ID: <20200608230654.828134-9-guro@fb.com> (raw)
In-Reply-To: <20200608230654.828134-1-guro@fb.com>

Store the obj_cgroup pointer in the corresponding place of
page->obj_cgroups for each allocated non-root slab object.
Make sure that each allocated object holds a reference to obj_cgroup.

Objcg pointer is obtained from the memcg->objcg dereferencing
in memcg_kmem_get_cache() and passed from pre_alloc_hook to
post_alloc_hook. Then in case of successful allocation(s) it's
getting stored in the page->obj_cgroups vector.

The objcg obtaining part look a bit bulky now, but it will be simplified
by next commits in the series.

Signed-off-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/memcontrol.h |  3 +-
 mm/memcontrol.c            | 14 +++++++--
 mm/slab.c                  | 18 +++++++-----
 mm/slab.h                  | 60 ++++++++++++++++++++++++++++++++++----
 mm/slub.c                  | 14 +++++----
 5 files changed, 88 insertions(+), 21 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index c69e66fe4f12..c63473fffdda 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1404,7 +1404,8 @@ static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
 }
 #endif
 
-struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep);
+struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep,
+					struct obj_cgroup **objcgp);
 void memcg_kmem_put_cache(struct kmem_cache *cachep);
 
 #ifdef CONFIG_MEMCG_KMEM
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2020c7542aa1..f0ea0ce6bea5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2971,7 +2971,8 @@ static inline bool memcg_kmem_bypass(void)
  * done with it, memcg_kmem_put_cache() must be called to release the
  * reference.
  */
-struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep)
+struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep,
+					struct obj_cgroup **objcgp)
 {
 	struct mem_cgroup *memcg;
 	struct kmem_cache *memcg_cachep;
@@ -3027,8 +3028,17 @@ struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep)
 	 */
 	if (unlikely(!memcg_cachep))
 		memcg_schedule_kmem_cache_create(memcg, cachep);
-	else if (percpu_ref_tryget(&memcg_cachep->memcg_params.refcnt))
+	else if (percpu_ref_tryget(&memcg_cachep->memcg_params.refcnt)) {
+		struct obj_cgroup *objcg = rcu_dereference(memcg->objcg);
+
+		if (!objcg || !obj_cgroup_tryget(objcg)) {
+			percpu_ref_put(&memcg_cachep->memcg_params.refcnt);
+			goto out_unlock;
+		}
+
+		*objcgp = objcg;
 		cachep = memcg_cachep;
+	}
 out_unlock:
 	rcu_read_unlock();
 	return cachep;
diff --git a/mm/slab.c b/mm/slab.c
index 9350062ffc1a..02b4363930c1 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3222,9 +3222,10 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
 	unsigned long save_flags;
 	void *ptr;
 	int slab_node = numa_mem_id();
+	struct obj_cgroup *objcg = NULL;
 
 	flags &= gfp_allowed_mask;
-	cachep = slab_pre_alloc_hook(cachep, flags);
+	cachep = slab_pre_alloc_hook(cachep, &objcg, 1, flags);
 	if (unlikely(!cachep))
 		return NULL;
 
@@ -3260,7 +3261,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
 	if (unlikely(slab_want_init_on_alloc(flags, cachep)) && ptr)
 		memset(ptr, 0, cachep->object_size);
 
-	slab_post_alloc_hook(cachep, flags, 1, &ptr);
+	slab_post_alloc_hook(cachep, objcg, flags, 1, &ptr);
 	return ptr;
 }
 
@@ -3301,9 +3302,10 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
 {
 	unsigned long save_flags;
 	void *objp;
+	struct obj_cgroup *objcg = NULL;
 
 	flags &= gfp_allowed_mask;
-	cachep = slab_pre_alloc_hook(cachep, flags);
+	cachep = slab_pre_alloc_hook(cachep, &objcg, 1, flags);
 	if (unlikely(!cachep))
 		return NULL;
 
@@ -3317,7 +3319,7 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
 	if (unlikely(slab_want_init_on_alloc(flags, cachep)) && objp)
 		memset(objp, 0, cachep->object_size);
 
-	slab_post_alloc_hook(cachep, flags, 1, &objp);
+	slab_post_alloc_hook(cachep, objcg, flags, 1, &objp);
 	return objp;
 }
 
@@ -3439,6 +3441,7 @@ void ___cache_free(struct kmem_cache *cachep, void *objp,
 		memset(objp, 0, cachep->object_size);
 	kmemleak_free_recursive(objp, cachep->flags);
 	objp = cache_free_debugcheck(cachep, objp, caller);
+	memcg_slab_free_hook(cachep, virt_to_head_page(objp), objp);
 
 	/*
 	 * Skip calling cache_free_alien() when the platform is not numa.
@@ -3504,8 +3507,9 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 			  void **p)
 {
 	size_t i;
+	struct obj_cgroup *objcg = NULL;
 
-	s = slab_pre_alloc_hook(s, flags);
+	s = slab_pre_alloc_hook(s, &objcg, size, flags);
 	if (!s)
 		return 0;
 
@@ -3528,13 +3532,13 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 		for (i = 0; i < size; i++)
 			memset(p[i], 0, s->object_size);
 
-	slab_post_alloc_hook(s, flags, size, p);
+	slab_post_alloc_hook(s, objcg, flags, size, p);
 	/* FIXME: Trace call missing. Christoph would like a bulk variant */
 	return size;
 error:
 	local_irq_enable();
 	cache_alloc_debugcheck_after_bulk(s, flags, i, p, _RET_IP_);
-	slab_post_alloc_hook(s, flags, i, p);
+	slab_post_alloc_hook(s, objcg, flags, i, p);
 	__kmem_cache_free_bulk(s, i, p);
 	return 0;
 }
diff --git a/mm/slab.h b/mm/slab.h
index a1633ea15fbf..8bca0cb4b928 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -438,6 +438,41 @@ static inline void memcg_free_page_obj_cgroups(struct page *page)
 	page->obj_cgroups = NULL;
 }
 
+static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
+					      struct obj_cgroup *objcg,
+					      size_t size, void **p)
+{
+	struct page *page;
+	unsigned long off;
+	size_t i;
+
+	for (i = 0; i < size; i++) {
+		if (likely(p[i])) {
+			page = virt_to_head_page(p[i]);
+			off = obj_to_index(s, page, p[i]);
+			obj_cgroup_get(objcg);
+			page_obj_cgroups(page)[off] = objcg;
+		}
+	}
+	obj_cgroup_put(objcg);
+	memcg_kmem_put_cache(s);
+}
+
+static inline void memcg_slab_free_hook(struct kmem_cache *s, struct page *page,
+					void *p)
+{
+	struct obj_cgroup *objcg;
+	unsigned int off;
+
+	if (!memcg_kmem_enabled() || is_root_cache(s))
+		return;
+
+	off = obj_to_index(s, page, p);
+	objcg = page_obj_cgroups(page)[off];
+	page_obj_cgroups(page)[off] = NULL;
+	obj_cgroup_put(objcg);
+}
+
 extern void slab_init_memcg_params(struct kmem_cache *);
 extern void memcg_link_cache(struct kmem_cache *s, struct mem_cgroup *memcg);
 
@@ -497,6 +532,17 @@ static inline void memcg_free_page_obj_cgroups(struct page *page)
 {
 }
 
+static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
+					      struct obj_cgroup *objcg,
+					      size_t size, void **p)
+{
+}
+
+static inline void memcg_slab_free_hook(struct kmem_cache *s, struct page *page,
+					void *p)
+{
+}
+
 static inline void slab_init_memcg_params(struct kmem_cache *s)
 {
 }
@@ -605,7 +651,8 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
 }
 
 static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
-						     gfp_t flags)
+						     struct obj_cgroup **objcgp,
+						     size_t size, gfp_t flags)
 {
 	flags &= gfp_allowed_mask;
 
@@ -619,13 +666,14 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
 
 	if (memcg_kmem_enabled() &&
 	    ((flags & __GFP_ACCOUNT) || (s->flags & SLAB_ACCOUNT)))
-		return memcg_kmem_get_cache(s);
+		return memcg_kmem_get_cache(s, objcgp);
 
 	return s;
 }
 
-static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
-					size_t size, void **p)
+static inline void slab_post_alloc_hook(struct kmem_cache *s,
+					struct obj_cgroup *objcg,
+					gfp_t flags, size_t size, void **p)
 {
 	size_t i;
 
@@ -637,8 +685,8 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
 					 s->flags, flags);
 	}
 
-	if (memcg_kmem_enabled())
-		memcg_kmem_put_cache(s);
+	if (memcg_kmem_enabled() && !is_root_cache(s))
+		memcg_slab_post_alloc_hook(s, objcg, size, p);
 }
 
 #ifndef CONFIG_SLOB
diff --git a/mm/slub.c b/mm/slub.c
index 6007c38071f5..7007eceac4c4 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2738,8 +2738,9 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
 	struct kmem_cache_cpu *c;
 	struct page *page;
 	unsigned long tid;
+	struct obj_cgroup *objcg = NULL;
 
-	s = slab_pre_alloc_hook(s, gfpflags);
+	s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags);
 	if (!s)
 		return NULL;
 redo:
@@ -2815,7 +2816,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
 	if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
 		memset(object, 0, s->object_size);
 
-	slab_post_alloc_hook(s, gfpflags, 1, &object);
+	slab_post_alloc_hook(s, objcg, gfpflags, 1, &object);
 
 	return object;
 }
@@ -3020,6 +3021,8 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
 	void *tail_obj = tail ? : head;
 	struct kmem_cache_cpu *c;
 	unsigned long tid;
+
+	memcg_slab_free_hook(s, page, head);
 redo:
 	/*
 	 * Determine the currently cpus per cpu slab.
@@ -3199,9 +3202,10 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 {
 	struct kmem_cache_cpu *c;
 	int i;
+	struct obj_cgroup *objcg = NULL;
 
 	/* memcg and kmem_cache debug support */
-	s = slab_pre_alloc_hook(s, flags);
+	s = slab_pre_alloc_hook(s, &objcg, size, flags);
 	if (unlikely(!s))
 		return false;
 	/*
@@ -3255,11 +3259,11 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 	}
 
 	/* memcg and kmem_cache debug support */
-	slab_post_alloc_hook(s, flags, size, p);
+	slab_post_alloc_hook(s, objcg, flags, size, p);
 	return i;
 error:
 	local_irq_enable();
-	slab_post_alloc_hook(s, flags, i, p);
+	slab_post_alloc_hook(s, objcg, flags, i, p);
 	__kmem_cache_free_bulk(s, i, p);
 	return 0;
 }
-- 
2.25.4


  parent reply index

Thread overview: 92+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-08 23:06 [PATCH v6 00/19] The new cgroup slab memory controller Roman Gushchin
2020-06-08 23:06 ` [PATCH v6 01/19] mm: memcg: factor out memcg- and lruvec-level changes out of __mod_lruvec_state() Roman Gushchin
2020-06-17  1:52   ` Shakeel Butt
2020-06-17  2:50     ` Roman Gushchin
2020-06-17  2:59       ` Shakeel Butt
2020-06-17  3:19         ` Roman Gushchin
2020-06-08 23:06 ` [PATCH v6 02/19] mm: memcg: prepare for byte-sized vmstat items Roman Gushchin
2020-06-17  2:57   ` Shakeel Butt
2020-06-17  3:19     ` Roman Gushchin
2020-06-17 15:55   ` Shakeel Butt
2020-06-08 23:06 ` [PATCH v6 03/19] mm: memcg: convert vmstat slab counters to bytes Roman Gushchin
2020-06-17  3:03   ` Shakeel Butt
2020-06-08 23:06 ` [PATCH v6 04/19] mm: slub: implement SLUB version of obj_to_index() Roman Gushchin
2020-06-17  3:08   ` Shakeel Butt
2020-06-08 23:06 ` [PATCH v6 05/19] mm: memcontrol: decouple reference counting from page accounting Roman Gushchin
2020-06-18  0:47   ` Shakeel Butt
2020-06-18 14:55   ` Shakeel Butt
2020-06-18 19:51     ` Roman Gushchin
2020-06-19  1:08     ` Roman Gushchin
2020-06-19  1:18       ` Shakeel Butt
2020-06-19  1:31   ` Shakeel Butt
2020-06-08 23:06 ` [PATCH v6 06/19] mm: memcg/slab: obj_cgroup API Roman Gushchin
2020-06-19 15:42   ` Shakeel Butt
2020-06-19 21:38     ` Roman Gushchin
2020-06-19 22:16       ` Shakeel Butt
2020-06-19 22:52         ` Roman Gushchin
2020-06-20 22:50       ` Andrew Morton
2020-06-08 23:06 ` [PATCH v6 07/19] mm: memcg/slab: allocate obj_cgroups for non-root slab pages Roman Gushchin
2020-06-19 16:36   ` Shakeel Butt
2020-06-20  0:25     ` Roman Gushchin
2020-06-20  0:31       ` Shakeel Butt
2020-06-08 23:06 ` Roman Gushchin [this message]
2020-06-20  0:16   ` [PATCH v6 08/19] mm: memcg/slab: save obj_cgroup for non-root slab objects Shakeel Butt
2020-06-20  1:19     ` Roman Gushchin
2020-06-08 23:06 ` [PATCH v6 09/19] mm: memcg/slab: charge individual slab objects instead of pages Roman Gushchin
2020-06-20  0:54   ` Shakeel Butt
2020-06-20  1:29     ` Roman Gushchin
2020-06-08 23:06 ` [PATCH v6 10/19] mm: memcg/slab: deprecate memory.kmem.slabinfo Roman Gushchin
2020-06-22 17:12   ` Shakeel Butt
2020-06-22 18:01     ` Roman Gushchin
2020-06-22 18:09       ` Shakeel Butt
2020-06-22 18:25         ` Roman Gushchin
2020-06-22 18:38           ` Shakeel Butt
2020-06-08 23:06 ` [PATCH v6 11/19] mm: memcg/slab: move memcg_kmem_bypass() to memcontrol.h Roman Gushchin
2020-06-20  1:19   ` Shakeel Butt
2020-06-08 23:06 ` [PATCH v6 12/19] mm: memcg/slab: use a single set of kmem_caches for all accounted allocations Roman Gushchin
2020-06-22 16:56   ` Shakeel Butt
2020-06-08 23:06 ` [PATCH v6 13/19] mm: memcg/slab: simplify memcg cache creation Roman Gushchin
2020-06-22 17:29   ` Shakeel Butt
2020-06-22 17:40     ` Roman Gushchin
2020-06-22 18:03       ` Shakeel Butt
2020-06-08 23:06 ` [PATCH v6 14/19] mm: memcg/slab: remove memcg_kmem_get_cache() Roman Gushchin
2020-06-22 18:42   ` Shakeel Butt
2020-06-08 23:06 ` [PATCH v6 15/19] mm: memcg/slab: deprecate slab_root_caches Roman Gushchin
2020-06-22 17:36   ` Shakeel Butt
2020-06-08 23:06 ` [PATCH v6 16/19] mm: memcg/slab: remove redundant check in memcg_accumulate_slabinfo() Roman Gushchin
2020-06-22 17:32   ` Shakeel Butt
2020-06-08 23:06 ` [PATCH v6 17/19] mm: memcg/slab: use a single set of kmem_caches for all allocations Roman Gushchin
2020-06-17 23:35   ` Andrew Morton
2020-06-18  0:35     ` Roman Gushchin
2020-06-18  7:33       ` Vlastimil Babka
2020-06-18 19:54         ` Roman Gushchin
2020-06-22 19:21   ` Shakeel Butt
2020-06-22 20:37     ` Roman Gushchin
2020-06-22 21:04       ` Shakeel Butt
2020-06-22 21:13         ` Roman Gushchin
2020-06-22 21:28           ` Shakeel Butt
2020-06-22 21:58             ` Roman Gushchin
2020-06-22 22:05               ` Shakeel Butt
2020-06-08 23:06 ` [PATCH v6 18/19] kselftests: cgroup: add kernel memory accounting tests Roman Gushchin
2020-06-17  1:46 ` [PATCH v6 00/19] The new cgroup slab memory controller Shakeel Butt
2020-06-17  2:41   ` Roman Gushchin
2020-06-17  3:05     ` Shakeel Butt
2020-06-17  3:32       ` Roman Gushchin
2020-06-17 11:24         ` Vlastimil Babka
2020-06-17 14:31           ` Mel Gorman
2020-06-20  0:57             ` Roman Gushchin
2020-06-18  1:29           ` Roman Gushchin
2020-06-18  8:43             ` Jesper Dangaard Brouer
2020-06-18  9:31               ` Jesper Dangaard Brouer
2020-06-19  1:30                 ` Roman Gushchin
2020-06-19  8:32                   ` Jesper Dangaard Brouer
2020-06-19  1:27               ` Roman Gushchin
2020-06-19  9:39                 ` Jesper Dangaard Brouer
2020-06-19 18:47                   ` Roman Gushchin
2020-06-18  1:18   ` Roman Gushchin
2020-06-18  9:27 ` Mike Rapoport
2020-06-18 20:43   ` Roman Gushchin
2020-06-21 22:57 ` Qian Cai
2020-06-21 23:34   ` Roman Gushchin
2020-06-21 23:53     ` Qian Cai
2020-06-22  3:07       ` Roman Gushchin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200608230654.828134-9-guro@fb.com \
    --to=guro@fb.com \
    --cc=akpm@linux-foundation.org \
    --cc=cl@linux.com \
    --cc=hannes@cmpxchg.org \
    --cc=kernel-team@fb.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=shakeelb@google.com \
    --cc=vbabka@suse.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git
	git clone --mirror https://lore.kernel.org/lkml/8 lkml/git/8.git
	git clone --mirror https://lore.kernel.org/lkml/9 lkml/git/9.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git