From: Muchun Song <songmuchun@bytedance.com>
To: viro@zeniv.linux.org.uk, jack@suse.cz, amir73il@gmail.com,
ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
kafai@fb.com, songliubraving@fb.com, yhs@fb.com,
john.fastabend@gmail.com, kpsingh@kernel.org, mingo@redhat.com,
peterz@infradead.org, juri.lelli@redhat.com,
vincent.guittot@linaro.org, dietmar.eggemann@arm.com,
rostedt@goodmis.org, bsegall@google.com, mgorman@suse.de,
bristot@redhat.com, hannes@cmpxchg.org, mhocko@kernel.org,
vdavydov.dev@gmail.com, akpm@linux-foundation.org,
shakeelb@google.com, guro@fb.com, songmuchun@bytedance.com,
alex.shi@linux.alibaba.com, alexander.h.duyck@linux.intel.com,
chris@chrisdown.name, richard.weiyang@gmail.com, vbabka@suse.cz,
mathieu.desnoyers@efficios.com, posk@google.com,
jannh@google.com, iamjoonsoo.kim@lge.com, daniel.vetter@ffwll.ch,
longman@redhat.com, walken@google.com,
christian.brauner@ubuntu.com, ebiederm@xmission.com,
keescook@chromium.org, krisman@collabora.com, esyr@redhat.com,
surenb@google.com, elver@google.com
Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
netdev@vger.kernel.org, bpf@vger.kernel.org,
cgroups@vger.kernel.org, linux-mm@kvack.org,
duanxiongchun@bytedance.com
Subject: [PATCH 3/5] mm: memcontrol: reparent the kmem pages on cgroup removal
Date: Mon, 1 Mar 2021 14:22:25 +0800 [thread overview]
Message-ID: <20210301062227.59292-4-songmuchun@bytedance.com> (raw)
In-Reply-To: <20210301062227.59292-1-songmuchun@bytedance.com>
Currently the slab objects already reparent to it's parent memcg on
cgroup removal. But there are still some corner objects which are
not reparent (e.g. allocations larger than order-1 page on SLUB).
Actually those objects are allocated directly from the buddy allocator.
And they are chared as kmem to memcg via __memcg_kmem_charge_page().
Such objects are not reparent on cgroup removal.
So this patch aims to reparent kmem pages on cgroup removal. Doing
this is simple with help of the infrastructures of obj_cgroup.
Finally, the page->memcg_data points to an object cgroup for the
kmem page.
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
include/linux/memcontrol.h | 66 +++++++++++--------
mm/memcontrol.c | 155 ++++++++++++++++++++++++---------------------
2 files changed, 124 insertions(+), 97 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1d2c82464c8c..27043478220f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -370,23 +370,15 @@ static inline bool page_memcg_charged(struct page *page)
}
/*
- * page_memcg_kmem - get the memory cgroup associated with a kmem page.
- * @page: a pointer to the page struct
+ * After the initialization objcg->memcg is always pointing at
+ * a valid memcg, but can be atomically swapped to the parent memcg.
*
- * Returns a pointer to the memory cgroup associated with the kmem page,
- * or NULL. This function assumes that the page is known to have a proper
- * memory cgroup pointer. It is only suitable for kmem pages which means
- * PageMemcgKmem() returns true for this page.
+ * The caller must ensure that the returned memcg won't be released:
+ * e.g. acquire the rcu_read_lock or css_set_lock.
*/
-static inline struct mem_cgroup *page_memcg_kmem(struct page *page)
+static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg)
{
- unsigned long memcg_data = page->memcg_data;
-
- VM_BUG_ON_PAGE(PageSlab(page), page);
- VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page);
- VM_BUG_ON_PAGE(!(memcg_data & MEMCG_DATA_KMEM), page);
-
- return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+ return READ_ONCE(objcg->memcg);
}
/*
@@ -462,6 +454,17 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page)
if (memcg_data & MEMCG_DATA_OBJCGS)
return NULL;
+ if (memcg_data & MEMCG_DATA_KMEM) {
+ struct obj_cgroup *objcg;
+
+ /*
+ * The caller must ensure that the returned memcg won't be
+ * released: e.g. acquire the rcu_read_lock or css_set_lock.
+ */
+ objcg = (void *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+ return obj_cgroup_memcg(objcg);
+ }
+
return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
@@ -520,6 +523,24 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page)
return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
+/*
+ * page_objcg - get the object cgroup associated with a kmem page
+ * @page: a pointer to the page struct
+ *
+ * Returns a pointer to the object cgroup associated with the kmem page,
+ * or NULL. This function assumes that the page is known to have an
+ * associated object cgroup. It's only safe to call this function
+ * against kmem pages (PageMemcgKmem() returns true).
+ */
+static inline struct obj_cgroup *page_objcg(struct page *page)
+{
+ unsigned long memcg_data = page->memcg_data;
+
+ VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page);
+ VM_BUG_ON_PAGE(!(memcg_data & MEMCG_DATA_KMEM), page);
+
+ return (struct obj_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+}
#else
static inline struct obj_cgroup **page_objcgs(struct page *page)
{
@@ -530,6 +551,11 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page)
{
return NULL;
}
+
+static inline struct obj_cgroup *page_objcg(struct page *page)
+{
+ return NULL;
+}
#endif
static __always_inline bool memcg_stat_item_in_bytes(int idx)
@@ -748,18 +774,6 @@ static inline void obj_cgroup_put(struct obj_cgroup *objcg)
percpu_ref_put(&objcg->refcnt);
}
-/*
- * After the initialization objcg->memcg is always pointing at
- * a valid memcg, but can be atomically swapped to the parent memcg.
- *
- * The caller must ensure that the returned memcg won't be released:
- * e.g. acquire the rcu_read_lock or css_set_lock.
- */
-static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg)
-{
- return READ_ONCE(objcg->memcg);
-}
-
static inline void mem_cgroup_put(struct mem_cgroup *memcg)
{
if (memcg)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bfd6efe1e196..39cb8c5bf8b2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -856,10 +856,16 @@ void __mod_lruvec_page_state(struct page *page, enum node_stat_item idx,
{
struct page *head = compound_head(page); /* rmap on tail pages */
struct mem_cgroup *memcg;
- pg_data_t *pgdat = page_pgdat(page);
+ pg_data_t *pgdat;
struct lruvec *lruvec;
- memcg = PageMemcgKmem(head) ? page_memcg_kmem(head) : page_memcg(head);
+ if (PageMemcgKmem(head)) {
+ __mod_lruvec_kmem_state(page_to_virt(head), idx, val);
+ return;
+ }
+
+ pgdat = page_pgdat(head);
+ memcg = page_memcg(head);
/* Untracked pages have no memcg, no lruvec. Update only the node */
if (!memcg) {
__mod_node_page_state(pgdat, idx, val);
@@ -1056,24 +1062,6 @@ static __always_inline struct mem_cgroup *active_memcg(void)
return current->active_memcg;
}
-static __always_inline struct mem_cgroup *get_active_memcg(void)
-{
- struct mem_cgroup *memcg;
-
- rcu_read_lock();
- memcg = active_memcg();
- if (memcg) {
- /* current->active_memcg must hold a ref. */
- if (WARN_ON_ONCE(!css_tryget(&memcg->css)))
- memcg = root_mem_cgroup;
- else
- memcg = current->active_memcg;
- }
- rcu_read_unlock();
-
- return memcg;
-}
-
static __always_inline bool memcg_kmem_bypass(void)
{
/* Allow remote memcg charging from any context. */
@@ -1088,20 +1076,6 @@ static __always_inline bool memcg_kmem_bypass(void)
}
/**
- * If active memcg is set, do not fallback to current->mm->memcg.
- */
-static __always_inline struct mem_cgroup *get_mem_cgroup_from_current(void)
-{
- if (memcg_kmem_bypass())
- return NULL;
-
- if (unlikely(active_memcg()))
- return get_active_memcg();
-
- return get_mem_cgroup_from_mm(current->mm);
-}
-
-/**
* mem_cgroup_iter - iterate over memory cgroup hierarchy
* @root: hierarchy root
* @prev: previously returned memcg, NULL on first invocation
@@ -3148,18 +3122,18 @@ static void __memcg_kmem_uncharge(struct mem_cgroup *memcg, unsigned int nr_page
*/
int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
{
- struct mem_cgroup *memcg;
+ struct obj_cgroup *objcg;
int ret = 0;
- memcg = get_mem_cgroup_from_current();
- if (memcg && !mem_cgroup_is_root(memcg)) {
- ret = __memcg_kmem_charge(memcg, gfp, 1 << order);
+ objcg = get_obj_cgroup_from_current();
+ if (objcg) {
+ ret = obj_cgroup_charge_page(objcg, gfp, 1 << order);
if (!ret) {
- page->memcg_data = (unsigned long)memcg |
+ page->memcg_data = (unsigned long)objcg |
MEMCG_DATA_KMEM;
return 0;
}
- css_put(&memcg->css);
+ obj_cgroup_put(objcg);
}
return ret;
}
@@ -3171,17 +3145,18 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
*/
void __memcg_kmem_uncharge_page(struct page *page, int order)
{
- struct mem_cgroup *memcg;
+ struct obj_cgroup *objcg;
unsigned int nr_pages = 1 << order;
if (!page_memcg_charged(page))
return;
- memcg = page_memcg_kmem(page);
- VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page);
- __memcg_kmem_uncharge(memcg, nr_pages);
+ VM_BUG_ON_PAGE(!PageMemcgKmem(page), page);
+
+ objcg = page_objcg(page);
+ obj_cgroup_uncharge_page(objcg, nr_pages);
page->memcg_data = 0;
- css_put(&memcg->css);
+ obj_cgroup_put(objcg);
}
static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
@@ -6798,8 +6773,12 @@ struct uncharge_gather {
struct mem_cgroup *memcg;
unsigned long nr_pages;
unsigned long pgpgout;
- unsigned long nr_kmem;
struct page *dummy_page;
+
+#ifdef CONFIG_MEMCG_KMEM
+ struct obj_cgroup *objcg;
+ unsigned long nr_kmem;
+#endif
};
static inline void uncharge_gather_clear(struct uncharge_gather *ug)
@@ -6811,12 +6790,21 @@ static void uncharge_batch(const struct uncharge_gather *ug)
{
unsigned long flags;
+#ifdef CONFIG_MEMCG_KMEM
+ if (ug->objcg) {
+ obj_cgroup_uncharge_page(ug->objcg, ug->nr_kmem);
+ /* drop reference from uncharge_kmem_page */
+ obj_cgroup_put(ug->objcg);
+ }
+#endif
+
+ if (!ug->memcg)
+ return;
+
if (!mem_cgroup_is_root(ug->memcg)) {
page_counter_uncharge(&ug->memcg->memory, ug->nr_pages);
if (do_memsw_account())
page_counter_uncharge(&ug->memcg->memsw, ug->nr_pages);
- if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && ug->nr_kmem)
- page_counter_uncharge(&ug->memcg->kmem, ug->nr_kmem);
memcg_oom_recover(ug->memcg);
}
@@ -6826,26 +6814,40 @@ static void uncharge_batch(const struct uncharge_gather *ug)
memcg_check_events(ug->memcg, ug->dummy_page);
local_irq_restore(flags);
- /* drop reference from uncharge_page */
+ /* drop reference from uncharge_user_page */
css_put(&ug->memcg->css);
}
-static void uncharge_page(struct page *page, struct uncharge_gather *ug)
+#ifdef CONFIG_MEMCG_KMEM
+static void uncharge_kmem_page(struct page *page, struct uncharge_gather *ug)
{
- unsigned long nr_pages;
- struct mem_cgroup *memcg;
+ struct obj_cgroup *objcg = page_objcg(page);
- VM_BUG_ON_PAGE(PageLRU(page), page);
+ if (ug->objcg != objcg) {
+ if (ug->objcg) {
+ uncharge_batch(ug);
+ uncharge_gather_clear(ug);
+ }
+ ug->objcg = objcg;
- if (!page_memcg_charged(page))
- return;
+ /* pairs with obj_cgroup_put in uncharge_batch */
+ obj_cgroup_get(ug->objcg);
+ }
+
+ ug->nr_kmem += compound_nr(page);
+ page->memcg_data = 0;
+ obj_cgroup_put(ug->objcg);
+}
+#else
+static void uncharge_kmem_page(struct page *page, struct uncharge_gather *ug)
+{
+}
+#endif
+
+static void uncharge_user_page(struct page *page, struct uncharge_gather *ug)
+{
+ struct mem_cgroup *memcg = page_memcg(page);
- /*
- * Nobody should be changing or seriously looking at
- * page memcg at this point, we have fully exclusive
- * access to the page.
- */
- memcg = PageMemcgKmem(page) ? page_memcg_kmem(page) : page_memcg(page);
if (ug->memcg != memcg) {
if (ug->memcg) {
uncharge_batch(ug);
@@ -6856,18 +6858,30 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
/* pairs with css_put in uncharge_batch */
css_get(&ug->memcg->css);
}
+ ug->pgpgout++;
+ ug->dummy_page = page;
+
+ ug->nr_pages += compound_nr(page);
+ page->memcg_data = 0;
+ css_put(&ug->memcg->css);
+}
- nr_pages = compound_nr(page);
- ug->nr_pages += nr_pages;
+static void uncharge_page(struct page *page, struct uncharge_gather *ug)
+{
+ VM_BUG_ON_PAGE(PageLRU(page), page);
+ if (!page_memcg_charged(page))
+ return;
+
+ /*
+ * Nobody should be changing or seriously looking at
+ * page memcg at this point, we have fully exclusive
+ * access to the page.
+ */
if (PageMemcgKmem(page))
- ug->nr_kmem += nr_pages;
+ uncharge_kmem_page(page, ug);
else
- ug->pgpgout++;
-
- ug->dummy_page = page;
- page->memcg_data = 0;
- css_put(&ug->memcg->css);
+ uncharge_user_page(page, ug);
}
/**
@@ -6910,8 +6924,7 @@ void mem_cgroup_uncharge_list(struct list_head *page_list)
uncharge_gather_clear(&ug);
list_for_each_entry(page, page_list, lru)
uncharge_page(page, &ug);
- if (ug.memcg)
- uncharge_batch(&ug);
+ uncharge_batch(&ug);
}
/**
--
2.11.0
next prev parent reply other threads:[~2021-03-01 6:28 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-03-01 6:22 [PATCH 0/5] Use obj_cgroup APIs to change kmem pages Muchun Song
2021-03-01 6:22 ` [PATCH 1/5] mm: memcontrol: introduce obj_cgroup_{un}charge_page Muchun Song
2021-03-01 6:22 ` [PATCH 2/5] mm: memcontrol: make page_memcg{_rcu} only applicable for non-kmem page Muchun Song
2021-03-01 18:11 ` Shakeel Butt
2021-03-01 19:09 ` Johannes Weiner
2021-03-02 3:49 ` [External] " Muchun Song
2021-03-02 3:03 ` Muchun Song
2021-03-02 3:35 ` Shakeel Butt
2021-03-02 3:51 ` Muchun Song
2021-03-01 6:22 ` Muchun Song [this message]
2021-03-01 6:22 ` [PATCH 4/5] mm: memcontrol: move remote memcg charging APIs to CONFIG_MEMCG_KMEM Muchun Song
2021-03-02 1:15 ` Roman Gushchin
2021-03-02 3:43 ` Shakeel Butt
2021-03-02 3:58 ` Roman Gushchin
2021-03-02 4:12 ` [External] " Muchun Song
2021-03-01 6:22 ` [PATCH 5/5] mm: memcontrol: use object cgroup for remote memory cgroup charging Muchun Song
2021-03-02 1:12 ` [PATCH 0/5] Use obj_cgroup APIs to change kmem pages Roman Gushchin
2021-03-02 2:50 ` [External] " Muchun Song
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210301062227.59292-4-songmuchun@bytedance.com \
--to=songmuchun@bytedance.com \
--cc=akpm@linux-foundation.org \
--cc=alex.shi@linux.alibaba.com \
--cc=alexander.h.duyck@linux.intel.com \
--cc=amir73il@gmail.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=bristot@redhat.com \
--cc=bsegall@google.com \
--cc=cgroups@vger.kernel.org \
--cc=chris@chrisdown.name \
--cc=christian.brauner@ubuntu.com \
--cc=daniel.vetter@ffwll.ch \
--cc=daniel@iogearbox.net \
--cc=dietmar.eggemann@arm.com \
--cc=duanxiongchun@bytedance.com \
--cc=ebiederm@xmission.com \
--cc=elver@google.com \
--cc=esyr@redhat.com \
--cc=guro@fb.com \
--cc=hannes@cmpxchg.org \
--cc=iamjoonsoo.kim@lge.com \
--cc=jack@suse.cz \
--cc=jannh@google.com \
--cc=john.fastabend@gmail.com \
--cc=juri.lelli@redhat.com \
--cc=kafai@fb.com \
--cc=keescook@chromium.org \
--cc=kpsingh@kernel.org \
--cc=krisman@collabora.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=longman@redhat.com \
--cc=mathieu.desnoyers@efficios.com \
--cc=mgorman@suse.de \
--cc=mhocko@kernel.org \
--cc=mingo@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=peterz@infradead.org \
--cc=posk@google.com \
--cc=richard.weiyang@gmail.com \
--cc=rostedt@goodmis.org \
--cc=shakeelb@google.com \
--cc=songliubraving@fb.com \
--cc=surenb@google.com \
--cc=vbabka@suse.cz \
--cc=vdavydov.dev@gmail.com \
--cc=vincent.guittot@linaro.org \
--cc=viro@zeniv.linux.org.uk \
--cc=walken@google.com \
--cc=yhs@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).