All of lore.kernel.org
 help / color / mirror / Atom feed
From: Roman Gushchin <guro@fb.com>
To: <bpf@vger.kernel.org>
Cc: <ast@kernel.org>, <daniel@iogearbox.net>,
	<netdev@vger.kernel.org>, <andrii@kernel.org>,
	<akpm@linux-foundation.org>, <linux-mm@kvack.org>,
	<linux-kernel@vger.kernel.org>, <kernel-team@fb.com>
Subject: [PATCH bpf-next v7 04/34] mm: convert page kmemcg type to a page memcg flag
Date: Thu, 19 Nov 2020 09:37:24 -0800	[thread overview]
Message-ID: <20201119173754.4125257-5-guro@fb.com> (raw)
In-Reply-To: <20201119173754.4125257-1-guro@fb.com>

PageKmemcg flag is currently defined as a page type (like buddy, offline,
table and guard).  Semantically it means that the page was accounted as a
kernel memory by the page allocator and has to be uncharged on the
release.

As a side effect of defining the flag as a page type, the accounted page
can't be mapped to userspace (look at page_has_type() and comments above).
In particular, this blocks the accounting of vmalloc-backed memory used
by some bpf maps, because these maps do map the memory to userspace.

One option is to fix it by complicating the access to page->mapcount,
which provides some free bits for page->page_type.

But it's way better to move this flag into page->memcg_data flags.
Indeed, the flag makes no sense without enabled memory cgroups and memory
cgroup pointer set in particular.

This commit replaces PageKmemcg() and __SetPageKmemcg() with
PageMemcgKmem() and an open-coded OR operation setting the memcg pointer
with the MEMCG_DATA_KMEM bit.  __ClearPageKmemcg() can be simple deleted,
as the whole memcg_data is zeroed at once.

As a bonus, on !CONFIG_MEMCG build the PageMemcgKmem() check will be
compiled out.

Link: https://lkml.kernel.org/r/20201027001657.3398190-5-guro@fb.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 37 +++++++++++++++++++++++++++++++++----
 include/linux/page-flags.h | 11 ++---------
 mm/memcontrol.c            | 16 +++++-----------
 mm/page_alloc.c            |  4 ++--
 4 files changed, 42 insertions(+), 26 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 99a4841d658b..7c9d43476166 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -346,8 +346,10 @@ extern struct mem_cgroup *root_mem_cgroup;
 enum page_memcg_data_flags {
 	/* page->memcg_data is a pointer to an objcgs vector */
 	MEMCG_DATA_OBJCGS = (1UL << 0),
+	/* page has been accounted as a non-slab kernel page */
+	MEMCG_DATA_KMEM = (1UL << 1),
 	/* the next bit after the last actual flag */
-	__NR_MEMCG_DATA_FLAGS  = (1UL << 1),
+	__NR_MEMCG_DATA_FLAGS  = (1UL << 2),
 };
 
 #define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1)
@@ -369,8 +371,12 @@ enum page_memcg_data_flags {
  */
 static inline struct mem_cgroup *page_memcg(struct page *page)
 {
+	unsigned long memcg_data = page->memcg_data;
+
 	VM_BUG_ON_PAGE(PageSlab(page), page);
-	return (struct mem_cgroup *)page->memcg_data;
+	VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page);
+
+	return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
 }
 
 /*
@@ -387,7 +393,8 @@ static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
 	VM_BUG_ON_PAGE(PageSlab(page), page);
 	WARN_ON_ONCE(!rcu_read_lock_held());
 
-	return (struct mem_cgroup *)READ_ONCE(page->memcg_data);
+	return (struct mem_cgroup *)(READ_ONCE(page->memcg_data) &
+				     ~MEMCG_DATA_FLAGS_MASK);
 }
 
 /*
@@ -416,7 +423,21 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page)
 	if (memcg_data & MEMCG_DATA_OBJCGS)
 		return NULL;
 
-	return (struct mem_cgroup *)memcg_data;
+	return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+}
+
+/*
+ * PageMemcgKmem - check if the page has MemcgKmem flag set
+ * @page: a pointer to the page struct
+ *
+ * Checks if the page has MemcgKmem flag set. The caller must ensure that
+ * the page has an associated memory cgroup. It's not safe to call this function
+ * against some types of pages, e.g. slab pages.
+ */
+static inline bool PageMemcgKmem(struct page *page)
+{
+	VM_BUG_ON_PAGE(page->memcg_data & MEMCG_DATA_OBJCGS, page);
+	return page->memcg_data & MEMCG_DATA_KMEM;
 }
 
 #ifdef CONFIG_MEMCG_KMEM
@@ -435,6 +456,7 @@ static inline struct obj_cgroup **page_objcgs(struct page *page)
 	unsigned long memcg_data = READ_ONCE(page->memcg_data);
 
 	VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), page);
+	VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
 
 	return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
 }
@@ -454,6 +476,8 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page)
 	if (!memcg_data || !(memcg_data & MEMCG_DATA_OBJCGS))
 		return NULL;
 
+	VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
+
 	return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
 }
 
@@ -1109,6 +1133,11 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page)
 	return NULL;
 }
 
+static inline bool PageMemcgKmem(struct page *page)
+{
+	return false;
+}
+
 static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
 {
 	return true;
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 4f6ba9379112..fc0e1bd48e73 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -715,9 +715,8 @@ PAGEFLAG_FALSE(DoubleMap)
 #define PAGE_MAPCOUNT_RESERVE	-128
 #define PG_buddy	0x00000080
 #define PG_offline	0x00000100
-#define PG_kmemcg	0x00000200
-#define PG_table	0x00000400
-#define PG_guard	0x00000800
+#define PG_table	0x00000200
+#define PG_guard	0x00000400
 
 #define PageType(page, flag)						\
 	((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
@@ -768,12 +767,6 @@ PAGE_TYPE_OPS(Buddy, buddy)
  */
 PAGE_TYPE_OPS(Offline, offline)
 
-/*
- * If kmemcg is enabled, the buddy allocator will set PageKmemcg() on
- * pages allocated with __GFP_ACCOUNT. It gets cleared on page free.
- */
-PAGE_TYPE_OPS(Kmemcg, kmemcg)
-
 /*
  * Marks pages in use as page tables.
  */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0054b4846770..e0366e306221 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3090,8 +3090,8 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
 	if (memcg && !mem_cgroup_is_root(memcg)) {
 		ret = __memcg_kmem_charge(memcg, gfp, 1 << order);
 		if (!ret) {
-			page->memcg_data = (unsigned long)memcg;
-			__SetPageKmemcg(page);
+			page->memcg_data = (unsigned long)memcg |
+				MEMCG_DATA_KMEM;
 			return 0;
 		}
 		css_put(&memcg->css);
@@ -3116,10 +3116,6 @@ void __memcg_kmem_uncharge_page(struct page *page, int order)
 	__memcg_kmem_uncharge(memcg, nr_pages);
 	page->memcg_data = 0;
 	css_put(&memcg->css);
-
-	/* slab pages do not have PageKmemcg flag set */
-	if (PageKmemcg(page))
-		__ClearPageKmemcg(page);
 }
 
 static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
@@ -6877,12 +6873,10 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
 	nr_pages = compound_nr(page);
 	ug->nr_pages += nr_pages;
 
-	if (!PageKmemcg(page)) {
-		ug->pgpgout++;
-	} else {
+	if (PageMemcgKmem(page))
 		ug->nr_kmem += nr_pages;
-		__ClearPageKmemcg(page);
-	}
+	else
+		ug->pgpgout++;
 
 	ug->dummy_page = page;
 	page->memcg_data = 0;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 271133b8243b..3c53018c9c61 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1214,7 +1214,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
 		 * Do not let hwpoison pages hit pcplists/buddy
 		 * Untie memcg state and reset page's owner
 		 */
-		if (memcg_kmem_enabled() && PageKmemcg(page))
+		if (memcg_kmem_enabled() && PageMemcgKmem(page))
 			__memcg_kmem_uncharge_page(page, order);
 		reset_page_owner(page, order);
 		return false;
@@ -1244,7 +1244,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
 	}
 	if (PageMappingFlags(page))
 		page->mapping = NULL;
-	if (memcg_kmem_enabled() && PageKmemcg(page))
+	if (memcg_kmem_enabled() && PageMemcgKmem(page))
 		__memcg_kmem_uncharge_page(page, order);
 	if (check_free)
 		bad += check_free_page(page);
-- 
2.26.2


  parent reply	other threads:[~2020-11-19 17:38 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-19 17:37 [PATCH bpf-next v7 00/34] bpf: switch to memcg-based memory accounting Roman Gushchin
2020-11-19 17:37 ` [PATCH bpf-next v7 01/34] mm: memcontrol: use helpers to read page's memcg data Roman Gushchin
2020-11-19 17:37 ` [PATCH bpf-next v7 02/34] mm: memcontrol/slab: use helpers to access slab page's memcg_data Roman Gushchin
2020-11-19 17:37 ` [PATCH bpf-next v7 03/34] mm: introduce page memcg flags Roman Gushchin
2020-11-19 17:37 ` Roman Gushchin [this message]
2020-11-19 17:37 ` [PATCH bpf-next v7 05/34] bpf: memcg-based memory accounting for bpf progs Roman Gushchin
2020-11-19 17:37 ` [PATCH bpf-next v7 06/34] bpf: prepare for memcg-based memory accounting for bpf maps Roman Gushchin
2020-11-20  1:05   ` Song Liu
2020-11-19 17:37 ` [PATCH bpf-next v7 07/34] bpf: " Roman Gushchin
2020-11-19 17:37 ` [PATCH bpf-next v7 08/34] bpf: refine memcg-based memory accounting for arraymap maps Roman Gushchin
2020-11-19 17:37 ` [PATCH bpf-next v7 09/34] bpf: refine memcg-based memory accounting for cpumap maps Roman Gushchin
2020-11-20  1:33   ` Song Liu
2020-11-19 17:37 ` [PATCH bpf-next v7 10/34] bpf: memcg-based memory accounting for cgroup storage maps Roman Gushchin
2020-11-20  1:37   ` Song Liu
2020-11-19 17:37 ` [PATCH bpf-next v7 11/34] bpf: refine memcg-based memory accounting for devmap maps Roman Gushchin
2020-11-20  1:38   ` Song Liu
2020-11-19 17:37 ` [PATCH bpf-next v7 12/34] bpf: refine memcg-based memory accounting for hashtab maps Roman Gushchin
2020-11-20  1:39   ` Song Liu
2020-11-19 17:37 ` [PATCH bpf-next v7 13/34] bpf: memcg-based memory accounting for lpm_trie maps Roman Gushchin
2020-11-20  1:39   ` Song Liu
2020-11-19 17:37 ` [PATCH bpf-next v7 14/34] bpf: memcg-based memory accounting for bpf ringbuffer Roman Gushchin
2020-11-19 17:37 ` [PATCH bpf-next v7 15/34] bpf: memcg-based memory accounting for bpf local storage maps Roman Gushchin
2020-11-20  1:44   ` Song Liu
2020-11-19 17:37 ` [PATCH bpf-next v7 16/34] bpf: refine memcg-based memory accounting for sockmap and sockhash maps Roman Gushchin
2020-11-20  1:45   ` Song Liu
2020-11-19 17:37 ` [PATCH bpf-next v7 17/34] bpf: refine memcg-based memory accounting for xskmap maps Roman Gushchin
2020-11-20  1:45   ` Song Liu
2020-11-19 17:37 ` [PATCH bpf-next v7 18/34] bpf: eliminate rlimit-based memory accounting for arraymap maps Roman Gushchin
2020-11-19 17:37 ` [PATCH bpf-next v7 19/34] bpf: eliminate rlimit-based memory accounting for bpf_struct_ops maps Roman Gushchin
2020-11-19 17:37 ` [PATCH bpf-next v7 20/34] bpf: eliminate rlimit-based memory accounting for cpumap maps Roman Gushchin
2020-11-19 17:37 ` [PATCH bpf-next v7 21/34] bpf: eliminate rlimit-based memory accounting for cgroup storage maps Roman Gushchin
2020-11-19 17:37 ` [PATCH bpf-next v7 22/34] bpf: eliminate rlimit-based memory accounting for devmap maps Roman Gushchin
2020-11-19 17:37 ` [PATCH bpf-next v7 23/34] bpf: eliminate rlimit-based memory accounting for hashtab maps Roman Gushchin
2020-11-19 17:37 ` [PATCH bpf-next v7 24/34] bpf: eliminate rlimit-based memory accounting for lpm_trie maps Roman Gushchin
2020-11-19 17:37 ` [PATCH bpf-next v7 25/34] bpf: eliminate rlimit-based memory accounting for queue_stack_maps maps Roman Gushchin
2020-11-19 17:37 ` [PATCH bpf-next v7 26/34] bpf: eliminate rlimit-based memory accounting for reuseport_array maps Roman Gushchin
2020-11-19 17:37 ` [PATCH bpf-next v7 27/34] bpf: eliminate rlimit-based memory accounting for bpf ringbuffer Roman Gushchin
2020-11-19 17:37 ` [PATCH bpf-next v7 28/34] bpf: eliminate rlimit-based memory accounting for sockmap and sockhash maps Roman Gushchin
2020-11-19 17:37 ` [PATCH bpf-next v7 29/34] bpf: eliminate rlimit-based memory accounting for stackmap maps Roman Gushchin
2020-11-19 17:37 ` [PATCH bpf-next v7 30/34] bpf: eliminate rlimit-based memory accounting for xskmap maps Roman Gushchin
2020-11-19 17:37 ` [PATCH bpf-next v7 31/34] bpf: eliminate rlimit-based memory accounting for bpf local storage maps Roman Gushchin
2020-11-19 17:37 ` [PATCH bpf-next v7 32/34] bpf: eliminate rlimit-based memory accounting infra for bpf maps Roman Gushchin
2020-11-21  2:52   ` Alexei Starovoitov
2020-11-21  2:59     ` Roman Gushchin
2020-11-19 17:37 ` [PATCH bpf-next v7 33/34] bpf: eliminate rlimit-based memory accounting for bpf progs Roman Gushchin
2020-11-19 17:37 ` [PATCH bpf-next v7 34/34] bpf: samples: do not touch RLIMIT_MEMLOCK Roman Gushchin
2020-11-23 13:30 ` [PATCH bpf-next v7 00/34] bpf: switch to memcg-based memory accounting Daniel Borkmann
2020-11-24  0:05   ` Roman Gushchin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201119173754.4125257-5-guro@fb.com \
    --to=guro@fb.com \
    --cc=akpm@linux-foundation.org \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=kernel-team@fb.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.