bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Muchun Song <songmuchun@bytedance.com>
To: viro@zeniv.linux.org.uk, jack@suse.cz, amir73il@gmail.com,
	ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
	kafai@fb.com, songliubraving@fb.com, yhs@fb.com,
	john.fastabend@gmail.com, kpsingh@kernel.org, mingo@redhat.com,
	peterz@infradead.org, juri.lelli@redhat.com,
	vincent.guittot@linaro.org, dietmar.eggemann@arm.com,
	rostedt@goodmis.org, bsegall@google.com, mgorman@suse.de,
	bristot@redhat.com, hannes@cmpxchg.org, mhocko@kernel.org,
	vdavydov.dev@gmail.com, akpm@linux-foundation.org,
	shakeelb@google.com, guro@fb.com, songmuchun@bytedance.com,
	alex.shi@linux.alibaba.com, alexander.h.duyck@linux.intel.com,
	chris@chrisdown.name, richard.weiyang@gmail.com, vbabka@suse.cz,
	mathieu.desnoyers@efficios.com, posk@google.com,
	jannh@google.com, iamjoonsoo.kim@lge.com, daniel.vetter@ffwll.ch,
	longman@redhat.com, walken@google.com,
	christian.brauner@ubuntu.com, ebiederm@xmission.com,
	keescook@chromium.org, krisman@collabora.com, esyr@redhat.com,
	surenb@google.com, elver@google.com
Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
	netdev@vger.kernel.org, bpf@vger.kernel.org,
	cgroups@vger.kernel.org, linux-mm@kvack.org,
	duanxiongchun@bytedance.com
Subject: [PATCH 3/5] mm: memcontrol: reparent the kmem pages on cgroup removal
Date: Mon,  1 Mar 2021 14:22:25 +0800	[thread overview]
Message-ID: <20210301062227.59292-4-songmuchun@bytedance.com> (raw)
In-Reply-To: <20210301062227.59292-1-songmuchun@bytedance.com>

Currently the slab objects already reparent to it's parent memcg on
cgroup removal. But there are still some corner objects which are
not reparent (e.g. allocations larger than order-1 page on SLUB).
Actually those objects are allocated directly from the buddy allocator.
And they are chared as kmem to memcg via __memcg_kmem_charge_page().
Such objects are not reparent on cgroup removal.

So this patch aims to reparent kmem pages on cgroup removal. Doing
this is simple with help of the infrastructures of obj_cgroup.
Finally, the page->memcg_data points to an object cgroup for the
kmem page.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
 include/linux/memcontrol.h |  66 +++++++++++--------
 mm/memcontrol.c            | 155 ++++++++++++++++++++++++---------------------
 2 files changed, 124 insertions(+), 97 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1d2c82464c8c..27043478220f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -370,23 +370,15 @@ static inline bool page_memcg_charged(struct page *page)
 }
 
 /*
- * page_memcg_kmem - get the memory cgroup associated with a kmem page.
- * @page: a pointer to the page struct
+ * After the initialization objcg->memcg is always pointing at
+ * a valid memcg, but can be atomically swapped to the parent memcg.
  *
- * Returns a pointer to the memory cgroup associated with the kmem page,
- * or NULL. This function assumes that the page is known to have a proper
- * memory cgroup pointer. It is only suitable for kmem pages which means
- * PageMemcgKmem() returns true for this page.
+ * The caller must ensure that the returned memcg won't be released:
+ * e.g. acquire the rcu_read_lock or css_set_lock.
  */
-static inline struct mem_cgroup *page_memcg_kmem(struct page *page)
+static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg)
 {
-	unsigned long memcg_data = page->memcg_data;
-
-	VM_BUG_ON_PAGE(PageSlab(page), page);
-	VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page);
-	VM_BUG_ON_PAGE(!(memcg_data & MEMCG_DATA_KMEM), page);
-
-	return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+	return READ_ONCE(objcg->memcg);
 }
 
 /*
@@ -462,6 +454,17 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page)
 	if (memcg_data & MEMCG_DATA_OBJCGS)
 		return NULL;
 
+	if (memcg_data & MEMCG_DATA_KMEM) {
+		struct obj_cgroup *objcg;
+
+		/*
+		 * The caller must ensure that the returned memcg won't be
+		 * released: e.g. acquire the rcu_read_lock or css_set_lock.
+		 */
+		objcg = (void *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+		return obj_cgroup_memcg(objcg);
+	}
+
 	return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
 }
 
@@ -520,6 +523,24 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page)
 	return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
 }
 
+/*
+ * page_objcg - get the object cgroup associated with a kmem page
+ * @page: a pointer to the page struct
+ *
+ * Returns a pointer to the object cgroup associated with the kmem page,
+ * or NULL. This function assumes that the page is known to have an
+ * associated object cgroup. It's only safe to call this function
+ * against kmem pages (PageMemcgKmem() returns true).
+ */
+static inline struct obj_cgroup *page_objcg(struct page *page)
+{
+	unsigned long memcg_data = page->memcg_data;
+
+	VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page);
+	VM_BUG_ON_PAGE(!(memcg_data & MEMCG_DATA_KMEM), page);
+
+	return (struct obj_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+}
 #else
 static inline struct obj_cgroup **page_objcgs(struct page *page)
 {
@@ -530,6 +551,11 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page)
 {
 	return NULL;
 }
+
+static inline struct obj_cgroup *page_objcg(struct page *page)
+{
+	return NULL;
+}
 #endif
 
 static __always_inline bool memcg_stat_item_in_bytes(int idx)
@@ -748,18 +774,6 @@ static inline void obj_cgroup_put(struct obj_cgroup *objcg)
 	percpu_ref_put(&objcg->refcnt);
 }
 
-/*
- * After the initialization objcg->memcg is always pointing at
- * a valid memcg, but can be atomically swapped to the parent memcg.
- *
- * The caller must ensure that the returned memcg won't be released:
- * e.g. acquire the rcu_read_lock or css_set_lock.
- */
-static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg)
-{
-	return READ_ONCE(objcg->memcg);
-}
-
 static inline void mem_cgroup_put(struct mem_cgroup *memcg)
 {
 	if (memcg)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bfd6efe1e196..39cb8c5bf8b2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -856,10 +856,16 @@ void __mod_lruvec_page_state(struct page *page, enum node_stat_item idx,
 {
 	struct page *head = compound_head(page); /* rmap on tail pages */
 	struct mem_cgroup *memcg;
-	pg_data_t *pgdat = page_pgdat(page);
+	pg_data_t *pgdat;
 	struct lruvec *lruvec;
 
-	memcg = PageMemcgKmem(head) ? page_memcg_kmem(head) : page_memcg(head);
+	if (PageMemcgKmem(head)) {
+		__mod_lruvec_kmem_state(page_to_virt(head), idx, val);
+		return;
+	}
+
+	pgdat = page_pgdat(head);
+	memcg = page_memcg(head);
 	/* Untracked pages have no memcg, no lruvec. Update only the node */
 	if (!memcg) {
 		__mod_node_page_state(pgdat, idx, val);
@@ -1056,24 +1062,6 @@ static __always_inline struct mem_cgroup *active_memcg(void)
 		return current->active_memcg;
 }
 
-static __always_inline struct mem_cgroup *get_active_memcg(void)
-{
-	struct mem_cgroup *memcg;
-
-	rcu_read_lock();
-	memcg = active_memcg();
-	if (memcg) {
-		/* current->active_memcg must hold a ref. */
-		if (WARN_ON_ONCE(!css_tryget(&memcg->css)))
-			memcg = root_mem_cgroup;
-		else
-			memcg = current->active_memcg;
-	}
-	rcu_read_unlock();
-
-	return memcg;
-}
-
 static __always_inline bool memcg_kmem_bypass(void)
 {
 	/* Allow remote memcg charging from any context. */
@@ -1088,20 +1076,6 @@ static __always_inline bool memcg_kmem_bypass(void)
 }
 
 /**
- * If active memcg is set, do not fallback to current->mm->memcg.
- */
-static __always_inline struct mem_cgroup *get_mem_cgroup_from_current(void)
-{
-	if (memcg_kmem_bypass())
-		return NULL;
-
-	if (unlikely(active_memcg()))
-		return get_active_memcg();
-
-	return get_mem_cgroup_from_mm(current->mm);
-}
-
-/**
  * mem_cgroup_iter - iterate over memory cgroup hierarchy
  * @root: hierarchy root
  * @prev: previously returned memcg, NULL on first invocation
@@ -3148,18 +3122,18 @@ static void __memcg_kmem_uncharge(struct mem_cgroup *memcg, unsigned int nr_page
  */
 int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
 {
-	struct mem_cgroup *memcg;
+	struct obj_cgroup *objcg;
 	int ret = 0;
 
-	memcg = get_mem_cgroup_from_current();
-	if (memcg && !mem_cgroup_is_root(memcg)) {
-		ret = __memcg_kmem_charge(memcg, gfp, 1 << order);
+	objcg = get_obj_cgroup_from_current();
+	if (objcg) {
+		ret = obj_cgroup_charge_page(objcg, gfp, 1 << order);
 		if (!ret) {
-			page->memcg_data = (unsigned long)memcg |
+			page->memcg_data = (unsigned long)objcg |
 				MEMCG_DATA_KMEM;
 			return 0;
 		}
-		css_put(&memcg->css);
+		obj_cgroup_put(objcg);
 	}
 	return ret;
 }
@@ -3171,17 +3145,18 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
  */
 void __memcg_kmem_uncharge_page(struct page *page, int order)
 {
-	struct mem_cgroup *memcg;
+	struct obj_cgroup *objcg;
 	unsigned int nr_pages = 1 << order;
 
 	if (!page_memcg_charged(page))
 		return;
 
-	memcg = page_memcg_kmem(page);
-	VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page);
-	__memcg_kmem_uncharge(memcg, nr_pages);
+	VM_BUG_ON_PAGE(!PageMemcgKmem(page), page);
+
+	objcg = page_objcg(page);
+	obj_cgroup_uncharge_page(objcg, nr_pages);
 	page->memcg_data = 0;
-	css_put(&memcg->css);
+	obj_cgroup_put(objcg);
 }
 
 static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
@@ -6798,8 +6773,12 @@ struct uncharge_gather {
 	struct mem_cgroup *memcg;
 	unsigned long nr_pages;
 	unsigned long pgpgout;
-	unsigned long nr_kmem;
 	struct page *dummy_page;
+
+#ifdef CONFIG_MEMCG_KMEM
+	struct obj_cgroup *objcg;
+	unsigned long nr_kmem;
+#endif
 };
 
 static inline void uncharge_gather_clear(struct uncharge_gather *ug)
@@ -6811,12 +6790,21 @@ static void uncharge_batch(const struct uncharge_gather *ug)
 {
 	unsigned long flags;
 
+#ifdef CONFIG_MEMCG_KMEM
+	if (ug->objcg) {
+		obj_cgroup_uncharge_page(ug->objcg, ug->nr_kmem);
+		/* drop reference from uncharge_kmem_page */
+		obj_cgroup_put(ug->objcg);
+	}
+#endif
+
+	if (!ug->memcg)
+		return;
+
 	if (!mem_cgroup_is_root(ug->memcg)) {
 		page_counter_uncharge(&ug->memcg->memory, ug->nr_pages);
 		if (do_memsw_account())
 			page_counter_uncharge(&ug->memcg->memsw, ug->nr_pages);
-		if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && ug->nr_kmem)
-			page_counter_uncharge(&ug->memcg->kmem, ug->nr_kmem);
 		memcg_oom_recover(ug->memcg);
 	}
 
@@ -6826,26 +6814,40 @@ static void uncharge_batch(const struct uncharge_gather *ug)
 	memcg_check_events(ug->memcg, ug->dummy_page);
 	local_irq_restore(flags);
 
-	/* drop reference from uncharge_page */
+	/* drop reference from uncharge_user_page */
 	css_put(&ug->memcg->css);
 }
 
-static void uncharge_page(struct page *page, struct uncharge_gather *ug)
+#ifdef CONFIG_MEMCG_KMEM
+static void uncharge_kmem_page(struct page *page, struct uncharge_gather *ug)
 {
-	unsigned long nr_pages;
-	struct mem_cgroup *memcg;
+	struct obj_cgroup *objcg = page_objcg(page);
 
-	VM_BUG_ON_PAGE(PageLRU(page), page);
+	if (ug->objcg != objcg) {
+		if (ug->objcg) {
+			uncharge_batch(ug);
+			uncharge_gather_clear(ug);
+		}
+		ug->objcg = objcg;
 
-	if (!page_memcg_charged(page))
-		return;
+		/* pairs with obj_cgroup_put in uncharge_batch */
+		obj_cgroup_get(ug->objcg);
+	}
+
+	ug->nr_kmem += compound_nr(page);
+	page->memcg_data = 0;
+	obj_cgroup_put(ug->objcg);
+}
+#else
+static void uncharge_kmem_page(struct page *page, struct uncharge_gather *ug)
+{
+}
+#endif
+
+static void uncharge_user_page(struct page *page, struct uncharge_gather *ug)
+{
+	struct mem_cgroup *memcg = page_memcg(page);
 
-	/*
-	 * Nobody should be changing or seriously looking at
-	 * page memcg at this point, we have fully exclusive
-	 * access to the page.
-	 */
-	memcg = PageMemcgKmem(page) ? page_memcg_kmem(page) : page_memcg(page);
 	if (ug->memcg != memcg) {
 		if (ug->memcg) {
 			uncharge_batch(ug);
@@ -6856,18 +6858,30 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
 		/* pairs with css_put in uncharge_batch */
 		css_get(&ug->memcg->css);
 	}
+	ug->pgpgout++;
+	ug->dummy_page = page;
+
+	ug->nr_pages += compound_nr(page);
+	page->memcg_data = 0;
+	css_put(&ug->memcg->css);
+}
 
-	nr_pages = compound_nr(page);
-	ug->nr_pages += nr_pages;
+static void uncharge_page(struct page *page, struct uncharge_gather *ug)
+{
+	VM_BUG_ON_PAGE(PageLRU(page), page);
 
+	if (!page_memcg_charged(page))
+		return;
+
+	/*
+	 * Nobody should be changing or seriously looking at
+	 * page memcg at this point, we have fully exclusive
+	 * access to the page.
+	 */
 	if (PageMemcgKmem(page))
-		ug->nr_kmem += nr_pages;
+		uncharge_kmem_page(page, ug);
 	else
-		ug->pgpgout++;
-
-	ug->dummy_page = page;
-	page->memcg_data = 0;
-	css_put(&ug->memcg->css);
+		uncharge_user_page(page, ug);
 }
 
 /**
@@ -6910,8 +6924,7 @@ void mem_cgroup_uncharge_list(struct list_head *page_list)
 	uncharge_gather_clear(&ug);
 	list_for_each_entry(page, page_list, lru)
 		uncharge_page(page, &ug);
-	if (ug.memcg)
-		uncharge_batch(&ug);
+	uncharge_batch(&ug);
 }
 
 /**
-- 
2.11.0


  parent reply	other threads:[~2021-03-01  6:28 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-01  6:22 [PATCH 0/5] Use obj_cgroup APIs to change kmem pages Muchun Song
2021-03-01  6:22 ` [PATCH 1/5] mm: memcontrol: introduce obj_cgroup_{un}charge_page Muchun Song
2021-03-01  6:22 ` [PATCH 2/5] mm: memcontrol: make page_memcg{_rcu} only applicable for non-kmem page Muchun Song
2021-03-01 18:11   ` Shakeel Butt
2021-03-01 19:09     ` Johannes Weiner
2021-03-02  3:49       ` [External] " Muchun Song
2021-03-02  3:03     ` Muchun Song
2021-03-02  3:35       ` Shakeel Butt
2021-03-02  3:51         ` Muchun Song
2021-03-01  6:22 ` Muchun Song [this message]
2021-03-01  6:22 ` [PATCH 4/5] mm: memcontrol: move remote memcg charging APIs to CONFIG_MEMCG_KMEM Muchun Song
2021-03-02  1:15   ` Roman Gushchin
2021-03-02  3:43     ` Shakeel Butt
2021-03-02  3:58       ` Roman Gushchin
2021-03-02  4:12     ` [External] " Muchun Song
2021-03-01  6:22 ` [PATCH 5/5] mm: memcontrol: use object cgroup for remote memory cgroup charging Muchun Song
2021-03-02  1:12 ` [PATCH 0/5] Use obj_cgroup APIs to change kmem pages Roman Gushchin
2021-03-02  2:50   ` [External] " Muchun Song

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210301062227.59292-4-songmuchun@bytedance.com \
    --to=songmuchun@bytedance.com \
    --cc=akpm@linux-foundation.org \
    --cc=alex.shi@linux.alibaba.com \
    --cc=alexander.h.duyck@linux.intel.com \
    --cc=amir73il@gmail.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=bristot@redhat.com \
    --cc=bsegall@google.com \
    --cc=cgroups@vger.kernel.org \
    --cc=chris@chrisdown.name \
    --cc=christian.brauner@ubuntu.com \
    --cc=daniel.vetter@ffwll.ch \
    --cc=daniel@iogearbox.net \
    --cc=dietmar.eggemann@arm.com \
    --cc=duanxiongchun@bytedance.com \
    --cc=ebiederm@xmission.com \
    --cc=elver@google.com \
    --cc=esyr@redhat.com \
    --cc=guro@fb.com \
    --cc=hannes@cmpxchg.org \
    --cc=iamjoonsoo.kim@lge.com \
    --cc=jack@suse.cz \
    --cc=jannh@google.com \
    --cc=john.fastabend@gmail.com \
    --cc=juri.lelli@redhat.com \
    --cc=kafai@fb.com \
    --cc=keescook@chromium.org \
    --cc=kpsingh@kernel.org \
    --cc=krisman@collabora.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=longman@redhat.com \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mgorman@suse.de \
    --cc=mhocko@kernel.org \
    --cc=mingo@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=posk@google.com \
    --cc=richard.weiyang@gmail.com \
    --cc=rostedt@goodmis.org \
    --cc=shakeelb@google.com \
    --cc=songliubraving@fb.com \
    --cc=surenb@google.com \
    --cc=vbabka@suse.cz \
    --cc=vdavydov.dev@gmail.com \
    --cc=vincent.guittot@linaro.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=walken@google.com \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).