bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Roman Gushchin <guro@fb.com>
To: <bpf@vger.kernel.org>
Cc: <netdev@vger.kernel.org>, Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>, <kernel-team@fb.com>,
	<linux-kernel@vger.kernel.org>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Shakeel Butt <shakeelb@google.com>, <linux-mm@kvack.org>,
	Roman Gushchin <guro@fb.com>
Subject: [PATCH bpf-next v4 03/30] bpf: memcg-based memory accounting for bpf maps
Date: Fri, 21 Aug 2020 08:01:07 -0700	[thread overview]
Message-ID: <20200821150134.2581465-4-guro@fb.com> (raw)
In-Reply-To: <20200821150134.2581465-1-guro@fb.com>

This patch enables memcg-based memory accounting for memory allocated
by __bpf_map_area_alloc(), which is used by most map types for
large allocations.

If a map is updated from an interrupt context, and the update
results in memory allocation, the memory cgroup can't be determined
from the context of the current process. To address this case,
bpf map preserves a pointer to the memory cgroup of the process,
which created the map. This memory cgroup is charged for allocations
from interrupt context.

Following patches in the series will refine the accounting for
some map types.

Signed-off-by: Roman Gushchin <guro@fb.com>
---
 include/linux/bpf.h  |  4 ++++
 kernel/bpf/helpers.c | 37 ++++++++++++++++++++++++++++++++++++-
 kernel/bpf/syscall.c | 27 ++++++++++++++++++++++++++-
 3 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a9b7185a6b37..b5f178afde94 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -34,6 +34,7 @@ struct btf_type;
 struct exception_table_entry;
 struct seq_operations;
 struct bpf_iter_aux_info;
+struct mem_cgroup;
 
 extern struct idr btf_idr;
 extern spinlock_t btf_idr_lock;
@@ -138,6 +139,9 @@ struct bpf_map {
 	u32 btf_value_type_id;
 	struct btf *btf;
 	struct bpf_map_memory memory;
+#ifdef CONFIG_MEMCG_KMEM
+	struct mem_cgroup *memcg;
+#endif
 	char name[BPF_OBJ_NAME_LEN];
 	u32 btf_vmlinux_value_type_id;
 	bool bypass_spec_v1;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index be43ab3e619f..f8ce7bc7003f 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -14,6 +14,7 @@
 #include <linux/jiffies.h>
 #include <linux/pid_namespace.h>
 #include <linux/proc_ns.h>
+#include <linux/sched/mm.h>
 
 #include "../../lib/kstrtox.h"
 
@@ -41,11 +42,45 @@ const struct bpf_func_proto bpf_map_lookup_elem_proto = {
 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
 };
 
+#ifdef CONFIG_MEMCG_KMEM
+static __always_inline int __bpf_map_update_elem(struct bpf_map *map, void *key,
+						 void *value, u64 flags)
+{
+	struct mem_cgroup *old_memcg;
+	bool in_interrupt;
+	int ret;
+
+	/*
+	 * If update from an interrupt context results in a memory allocation,
+	 * the memory cgroup to charge can't be determined from the context
+	 * of the current task. Instead, we charge the memory cgroup, which
+	 * contained a process created the map.
+	 */
+	in_interrupt = in_interrupt();
+	if (in_interrupt)
+		old_memcg = memalloc_use_memcg(map->memcg);
+
+	ret = map->ops->map_update_elem(map, key, value, flags);
+
+	if (in_interrupt)
+		memalloc_use_memcg(old_memcg);
+
+	return ret;
+}
+#else
+static __always_inline int __bpf_map_update_elem(struct bpf_map *map, void *key,
+						 void *value, u64 flags)
+{
+	return map->ops->map_update_elem(map, key, value, flags);
+}
+#endif
+
 BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
 	   void *, value, u64, flags)
 {
 	WARN_ON_ONCE(!rcu_read_lock_held());
-	return map->ops->map_update_elem(map, key, value, flags);
+
+	return __bpf_map_update_elem(map, key, value, flags);
 }
 
 const struct bpf_func_proto bpf_map_update_elem_proto = {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 689d736b6904..683614c17a95 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -29,6 +29,7 @@
 #include <linux/bpf_lsm.h>
 #include <linux/poll.h>
 #include <linux/bpf-netns.h>
+#include <linux/memcontrol.h>
 
 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
 			  (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
@@ -275,7 +276,7 @@ static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
 	 * __GFP_RETRY_MAYFAIL to avoid such situations.
 	 */
 
-	const gfp_t gfp = __GFP_NOWARN | __GFP_ZERO;
+	const gfp_t gfp = __GFP_NOWARN | __GFP_ZERO | __GFP_ACCOUNT;
 	unsigned int flags = 0;
 	unsigned long align = 1;
 	void *area;
@@ -452,6 +453,27 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
 		__release(&map_idr_lock);
 }
 
+#ifdef CONFIG_MEMCG_KMEM
+static void bpf_map_save_memcg(struct bpf_map *map)
+{
+	map->memcg = get_mem_cgroup_from_mm(current->mm);
+}
+
+static void bpf_map_release_memcg(struct bpf_map *map)
+{
+	mem_cgroup_put(map->memcg);
+}
+
+#else
+static void bpf_map_save_memcg(struct bpf_map *map)
+{
+}
+
+static void bpf_map_release_memcg(struct bpf_map *map)
+{
+}
+#endif
+
 /* called from workqueue */
 static void bpf_map_free_deferred(struct work_struct *work)
 {
@@ -463,6 +485,7 @@ static void bpf_map_free_deferred(struct work_struct *work)
 	/* implementation dependent freeing */
 	map->ops->map_free(map);
 	bpf_map_charge_finish(&mem);
+	bpf_map_release_memcg(map);
 }
 
 static void bpf_map_put_uref(struct bpf_map *map)
@@ -869,6 +892,8 @@ static int map_create(union bpf_attr *attr)
 	if (err)
 		goto free_map_sec;
 
+	bpf_map_save_memcg(map);
+
 	err = bpf_map_new_fd(map, f_flags);
 	if (err < 0) {
 		/* failed to allocate fd.
-- 
2.26.2


  parent reply	other threads:[~2020-08-21 15:02 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-21 15:01 [PATCH bpf-next v4 00/30] bpf: switch to memcg-based memory accounting Roman Gushchin
2020-08-21 15:01 ` [PATCH bpf-next v4 01/30] mm: support nesting memalloc_use_memcg() Roman Gushchin
2020-08-21 16:29   ` Shakeel Butt
2020-08-21 15:01 ` [PATCH bpf-next v4 02/30] bpf: memcg-based memory accounting for bpf progs Roman Gushchin
2020-08-25 19:00   ` Shakeel Butt
2020-08-25 22:26     ` Roman Gushchin
2020-08-21 15:01 ` Roman Gushchin [this message]
2020-08-25 23:27   ` [PATCH bpf-next v4 03/30] bpf: memcg-based memory accounting for bpf maps Shakeel Butt
2020-08-26  2:38     ` Roman Gushchin
2020-08-21 15:01 ` [PATCH bpf-next v4 04/30] bpf: refine memcg-based memory accounting for arraymap maps Roman Gushchin
2020-08-27  1:19   ` Shakeel Butt
2020-08-21 15:01 ` [PATCH bpf-next v4 05/30] bpf: refine memcg-based memory accounting for cpumap maps Roman Gushchin
2020-08-27  1:24   ` Shakeel Butt
2020-08-21 15:01 ` [PATCH bpf-next v4 06/30] bpf: memcg-based memory accounting for cgroup storage maps Roman Gushchin
2020-08-27  1:25   ` Shakeel Butt
2020-08-21 15:01 ` [PATCH bpf-next v4 07/30] bpf: refine memcg-based memory accounting for devmap maps Roman Gushchin
2020-08-27  1:38   ` Shakeel Butt
2020-08-21 15:01 ` [PATCH bpf-next v4 08/30] bpf: refine memcg-based memory accounting for hashtab maps Roman Gushchin
2020-08-28 16:44   ` Shakeel Butt
2020-08-21 15:01 ` [PATCH bpf-next v4 09/30] bpf: memcg-based memory accounting for lpm_trie maps Roman Gushchin
2020-08-21 15:01 ` [PATCH bpf-next v4 10/30] bpf: memcg-based memory accounting for bpf ringbuffer Roman Gushchin
2020-08-21 15:01 ` [PATCH bpf-next v4 11/30] bpf: memcg-based memory accounting for socket storage maps Roman Gushchin
2020-08-21 15:01 ` [PATCH bpf-next v4 12/30] bpf: refine memcg-based memory accounting for sockmap and sockhash maps Roman Gushchin
2020-08-21 15:01 ` [PATCH bpf-next v4 13/30] bpf: refine memcg-based memory accounting for xskmap maps Roman Gushchin
2020-08-21 15:01 ` [PATCH bpf-next v4 14/30] bpf: eliminate rlimit-based memory accounting for arraymap maps Roman Gushchin
2020-08-21 15:01 ` [PATCH bpf-next v4 15/30] bpf: eliminate rlimit-based memory accounting for bpf_struct_ops maps Roman Gushchin
2020-08-21 15:01 ` [PATCH bpf-next v4 16/30] bpf: eliminate rlimit-based memory accounting for cpumap maps Roman Gushchin
2020-08-21 15:01 ` [PATCH bpf-next v4 17/30] bpf: eliminate rlimit-based memory accounting for cgroup storage maps Roman Gushchin
2020-08-21 15:01 ` [PATCH bpf-next v4 18/30] bpf: eliminate rlimit-based memory accounting for devmap maps Roman Gushchin
2020-08-21 15:01 ` [PATCH bpf-next v4 19/30] bpf: eliminate rlimit-based memory accounting for hashtab maps Roman Gushchin
2020-08-21 15:01 ` [PATCH bpf-next v4 20/30] bpf: eliminate rlimit-based memory accounting for lpm_trie maps Roman Gushchin
2020-08-21 15:01 ` [PATCH bpf-next v4 21/30] bpf: eliminate rlimit-based memory accounting for queue_stack_maps maps Roman Gushchin
2020-08-21 15:01 ` [PATCH bpf-next v4 22/30] bpf: eliminate rlimit-based memory accounting for reuseport_array maps Roman Gushchin
2020-08-21 15:01 ` [PATCH bpf-next v4 23/30] bpf: eliminate rlimit-based memory accounting for bpf ringbuffer Roman Gushchin
2020-08-21 15:01 ` [PATCH bpf-next v4 24/30] bpf: eliminate rlimit-based memory accounting for sockmap and sockhash maps Roman Gushchin
2020-08-21 15:01 ` [PATCH bpf-next v4 25/30] bpf: eliminate rlimit-based memory accounting for stackmap maps Roman Gushchin
2020-08-21 15:01 ` [PATCH bpf-next v4 26/30] bpf: eliminate rlimit-based memory accounting for socket storage maps Roman Gushchin
2020-08-21 15:01 ` [PATCH bpf-next v4 27/30] bpf: eliminate rlimit-based memory accounting for xskmap maps Roman Gushchin
2020-08-21 15:01 ` [PATCH bpf-next v4 28/30] bpf: eliminate rlimit-based memory accounting infra for bpf maps Roman Gushchin
2020-08-21 18:23   ` Alexei Starovoitov
2020-08-21 23:15     ` Roman Gushchin
2020-08-21 15:01 ` [PATCH bpf-next v4 29/30] bpf: eliminate rlimit-based memory accounting for bpf progs Roman Gushchin
2020-08-21 15:01 ` [PATCH bpf-next v4 30/30] bpf: samples: do not touch RLIMIT_MEMLOCK Roman Gushchin
2020-08-21 22:20 ` [PATCH bpf-next v4 00/30] bpf: switch to memcg-based memory accounting Roman Gushchin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200821150134.2581465-4-guro@fb.com \
    --to=guro@fb.com \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=hannes@cmpxchg.org \
    --cc=kernel-team@fb.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=netdev@vger.kernel.org \
    --cc=shakeelb@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).