All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yafang Shao <laoar.shao@gmail.com>
To: ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
	kafai@fb.com, songliubraving@fb.com, yhs@fb.com,
	john.fastabend@gmail.com, kpsingh@kernel.org, sdf@google.com,
	haoluo@google.com, jolsa@kernel.org, hannes@cmpxchg.org,
	mhocko@kernel.org, roman.gushchin@linux.dev, shakeelb@google.com,
	songmuchun@bytedance.com, akpm@linux-foundation.org,
	tj@kernel.org, lizefan.x@bytedance.com
Cc: cgroups@vger.kernel.org, netdev@vger.kernel.org,
	bpf@vger.kernel.org, linux-mm@kvack.org,
	Yafang Shao <laoar.shao@gmail.com>
Subject: [PATCH bpf-next v2 12/12] bpf: Introduce selectable memcg for bpf map
Date: Thu, 18 Aug 2022 14:31:18 +0000	[thread overview]
Message-ID: <20220818143118.17733-13-laoar.shao@gmail.com> (raw)
In-Reply-To: <20220818143118.17733-1-laoar.shao@gmail.com>

A new bpf attr memcg_fd is introduced for map creation. The map creation
path in libbpf is changed consequently to set this attr.

A new member memcg_fd is introduced into bpf attr of BPF_MAP_CREATE
command, which is the fd of an opened cgroup directory. In this cgroup,
the memory subsystem must be enabled. The valid memcg_fd must be a postive
number, that means it can't be zero(a valid return value of open(2)). Once
the kernel get the memory cgroup from this fd, it will set this memcg into
bpf map, then all the subsequent memory allocation of this map will be
charged to the memcg. The map creation paths in libbpf are also changed
consequently.

The usage of this new member as follows,
	struct bpf_map_create_opts map_opts = {
		.sz = sizeof(map_opts),
	};
	int memcg_fd, map_fd, old_fd;
	int key, value;

	memcg_fd = open("/sys/fs/cgroup/memory/bpf", O_DIRECTORY);
	if (memcg_fd < 0) {
		perror("memcg dir open");
		return -1;
	}

	/* 0 is a invalid fd */
	if (memcg_fd == 0) {
		old_fd = memcg_fd;
		memcg_fd = fcntl(memcg_fd, F_DUPFD_CLOEXEC, 3);
		close(old_fd);
		if (memcg_fd < 0) {
			perror("fcntl");
			return -1;
		}
	}

	map_opts.memcg_fd = memcg_fd;
	map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, "map_for_memcg",
				sizeof(key), sizeof(value),
				1024, &map_opts);
	if (map_fd <= 0) {
		close(memcg_fd);
		perror("map create");
		return -1;
	}

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 include/uapi/linux/bpf.h       |  1 +
 kernel/bpf/syscall.c           | 45 ++++++++++++++++++++++++++++++++----------
 tools/include/uapi/linux/bpf.h |  1 +
 tools/lib/bpf/bpf.c            |  3 ++-
 tools/lib/bpf/bpf.h            |  3 ++-
 tools/lib/bpf/gen_loader.c     |  2 +-
 tools/lib/bpf/libbpf.c         |  2 ++
 tools/lib/bpf/skel_internal.h  |  2 +-
 8 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7d1e279..048b692 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1300,6 +1300,7 @@ struct bpf_stack_build_id {
 		 * to using 5 hash functions).
 		 */
 		__u64	map_extra;
+		__u32	memcg_fd;	/* selectable memcg */
 	};
 
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5f5cade4..fd1f67c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -294,14 +294,33 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
 }
 
 #ifdef CONFIG_MEMCG_KMEM
-static void bpf_map_save_memcg(struct bpf_map *map)
+static int bpf_map_save_memcg(struct bpf_map *map, u32 memcg_fd)
 {
-	/* Currently if a map is created by a process belonging to the root
-	 * memory cgroup, get_obj_cgroup_from_current() will return NULL.
-	 * So we have to check map->objcg for being NULL each time it's
-	 * being used.
-	 */
-	map->objcg = get_obj_cgroup_from_current();
+	struct obj_cgroup *objcg;
+	struct cgroup *cgrp;
+
+	if (memcg_fd) {
+		cgrp = cgroup_get_from_fd(memcg_fd);
+		if (IS_ERR(cgrp))
+			return -EINVAL;
+
+		objcg = get_obj_cgroup_from_cgroup(cgrp);
+		if (IS_ERR(objcg)) {
+			cgroup_put(cgrp);
+			return PTR_ERR(objcg);
+		}
+		cgroup_put(cgrp);
+	} else {
+		/* Currently if a map is created by a process belonging to the root
+		 * memory cgroup, get_obj_cgroup_from_current() will return NULL.
+		 * So we have to check map->objcg for being NULL each time it's
+		 * being used.
+		 */
+		objcg = get_obj_cgroup_from_current();
+	}
+
+	map->objcg = objcg;
+	return 0;
 }
 
 static void bpf_map_release_memcg(struct bpf_map *map)
@@ -311,8 +330,9 @@ static void bpf_map_release_memcg(struct bpf_map *map)
 }
 
 #else
-static void bpf_map_save_memcg(struct bpf_map *map)
+static int bpf_map_save_memcg(struct bpf_map *map, u32 memcg_fd)
 {
+	return 0;
 }
 
 static void bpf_map_release_memcg(struct bpf_map *map)
@@ -405,7 +425,12 @@ static u32 bpf_map_flags_retain_permanent(u32 flags)
 
 int bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
 {
-	bpf_map_save_memcg(map);
+	int err;
+
+	err = bpf_map_save_memcg(map, attr->memcg_fd);
+	if (err)
+		return err;
+
 	map->map_type = attr->map_type;
 	map->key_size = attr->key_size;
 	map->value_size = attr->value_size;
@@ -1091,7 +1116,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 	return ret;
 }
 
-#define BPF_MAP_CREATE_LAST_FIELD map_extra
+#define BPF_MAP_CREATE_LAST_FIELD memcg_fd
 /* called via syscall */
 static int map_create(union bpf_attr *attr)
 {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e174ad2..1e9efe7 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1300,6 +1300,7 @@ struct bpf_stack_build_id {
 		 * to using 5 hash functions).
 		 */
 		__u64	map_extra;
+		__u32	memcg_fd;	/* selectable memcg */
 	};
 
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 6a96e66..6517553 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -171,7 +171,7 @@ int bpf_map_create(enum bpf_map_type map_type,
 		   __u32 max_entries,
 		   const struct bpf_map_create_opts *opts)
 {
-	const size_t attr_sz = offsetofend(union bpf_attr, map_extra);
+	const size_t attr_sz = offsetofend(union bpf_attr, memcg_fd);
 	union bpf_attr attr;
 	int fd;
 
@@ -199,6 +199,7 @@ int bpf_map_create(enum bpf_map_type map_type,
 	attr.map_extra = OPTS_GET(opts, map_extra, 0);
 	attr.numa_node = OPTS_GET(opts, numa_node, 0);
 	attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0);
+	attr.memcg_fd = OPTS_GET(opts, memcg_fd, 0);
 
 	fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);
 	return libbpf_err_errno(fd);
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 9c50bea..dd0d929 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -51,8 +51,9 @@ struct bpf_map_create_opts {
 
 	__u32 numa_node;
 	__u32 map_ifindex;
+	__u32 memcg_fd;
 };
-#define bpf_map_create_opts__last_field map_ifindex
+#define bpf_map_create_opts__last_field memcg_fd
 
 LIBBPF_API int bpf_map_create(enum bpf_map_type map_type,
 			      const char *map_name,
diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c
index 23f5c46..f35b014 100644
--- a/tools/lib/bpf/gen_loader.c
+++ b/tools/lib/bpf/gen_loader.c
@@ -451,7 +451,7 @@ void bpf_gen__map_create(struct bpf_gen *gen,
 			 __u32 key_size, __u32 value_size, __u32 max_entries,
 			 struct bpf_map_create_opts *map_attr, int map_idx)
 {
-	int attr_size = offsetofend(union bpf_attr, map_extra);
+	int attr_size = offsetofend(union bpf_attr, memcg_fd);
 	bool close_inner_map_fd = false;
 	int map_create_attr, idx;
 	union bpf_attr attr;
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 3f01f5c..ad7aa39 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -511,6 +511,7 @@ struct bpf_map {
 	bool reused;
 	bool autocreate;
 	__u64 map_extra;
+	__u32 memcg_fd;
 };
 
 enum extern_type {
@@ -4936,6 +4937,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
 	create_attr.map_flags = def->map_flags;
 	create_attr.numa_node = map->numa_node;
 	create_attr.map_extra = map->map_extra;
+	create_attr.memcg_fd = map->memcg_fd;
 
 	if (bpf_map__is_struct_ops(map))
 		create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h
index bd6f450..83403cc 100644
--- a/tools/lib/bpf/skel_internal.h
+++ b/tools/lib/bpf/skel_internal.h
@@ -222,7 +222,7 @@ static inline int skel_map_create(enum bpf_map_type map_type,
 				  __u32 value_size,
 				  __u32 max_entries)
 {
-	const size_t attr_sz = offsetofend(union bpf_attr, map_extra);
+	const size_t attr_sz = offsetofend(union bpf_attr, memcg_fd);
 	union bpf_attr attr;
 
 	memset(&attr, 0, attr_sz);
-- 
1.8.3.1


  parent reply	other threads:[~2022-08-18 14:32 UTC|newest]

Thread overview: 61+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-18 14:31 [PATCH bpf-next v2 00/12] bpf: Introduce selectable memcg for bpf map Yafang Shao
2022-08-18 14:31 ` [PATCH bpf-next v2 01/12] cgroup: Update the comment on cgroup_get_from_fd Yafang Shao
2022-08-18 14:31   ` Yafang Shao
2022-08-18 19:11   ` Yosry Ahmed
2022-08-18 19:11     ` Yosry Ahmed
2022-08-18 14:31 ` [PATCH bpf-next v2 02/12] bpf: Introduce new helper bpf_map_put_memcg() Yafang Shao
2022-08-18 14:31   ` Yafang Shao
2022-08-18 14:31 ` [PATCH bpf-next v2 03/12] bpf: Define bpf_map_{get,put}_memcg for !CONFIG_MEMCG_KMEM Yafang Shao
2022-08-18 14:31 ` [PATCH bpf-next v2 04/12] bpf: Call bpf_map_init_from_attr() immediately after map creation Yafang Shao
2022-08-18 14:31 ` [PATCH bpf-next v2 05/12] bpf: Save memcg in bpf_map_init_from_attr() Yafang Shao
2022-08-18 14:31   ` Yafang Shao
2022-08-18 14:31 ` [PATCH bpf-next v2 06/12] bpf: Use scoped-based charge in bpf_map_area_alloc Yafang Shao
2022-08-18 14:31   ` Yafang Shao
2022-08-18 14:31 ` [PATCH bpf-next v2 07/12] bpf: Introduce new helpers bpf_ringbuf_pages_{alloc,free} Yafang Shao
2022-08-18 14:31   ` Yafang Shao
2022-08-18 17:30   ` Andrii Nakryiko
2022-08-18 14:31 ` [PATCH bpf-next v2 08/12] bpf: Use bpf_map_kzalloc in arraymap Yafang Shao
2022-08-18 14:31   ` Yafang Shao
2022-08-18 14:31 ` [PATCH bpf-next v2 09/12] bpf: Use bpf_map_kvcalloc in bpf_local_storage Yafang Shao
2022-08-18 14:31   ` Yafang Shao
2022-08-18 14:31 ` [PATCH bpf-next v2 10/12] mm, memcg: Add new helper get_obj_cgroup_from_cgroup Yafang Shao
2022-08-18 20:38   ` Shakeel Butt
2022-08-19  1:21     ` Yafang Shao
2022-08-18 14:31 ` [PATCH bpf-next v2 11/12] bpf: Add return value for bpf_map_init_from_attr Yafang Shao
2022-08-18 14:31 ` Yafang Shao [this message]
2022-08-18 22:20 ` [PATCH bpf-next v2 00/12] bpf: Introduce selectable memcg for bpf map Tejun Heo
2022-08-18 22:20   ` Tejun Heo
2022-08-18 22:33   ` Tejun Heo
2022-08-19  1:09     ` Yafang Shao
2022-08-19  1:09       ` Yafang Shao
2022-08-19 17:06       ` Tejun Heo
2022-08-19 17:06         ` Tejun Heo
2022-08-20  2:25         ` Yafang Shao
2022-08-20  2:25           ` Yafang Shao
2022-08-22 11:29           ` [RFD RESEND] cgroup: Persistent memory usage tracking Tejun Heo
2022-08-22 11:29             ` Tejun Heo
2022-08-22 16:12             ` Shakeel Butt
2022-08-22 16:12               ` Shakeel Butt
2022-08-22 19:02             ` Mina Almasry
2022-08-22 19:02               ` Mina Almasry
2022-08-22 21:19               ` Johannes Weiner
2022-08-22 21:19                 ` Johannes Weiner
2022-08-22 21:52                 ` Mina Almasry
2022-08-22 21:52                   ` Mina Almasry
2022-08-23  3:01                 ` Roman Gushchin
2022-08-23  3:01                   ` Roman Gushchin
2022-08-23  3:14                   ` Tejun Heo
2022-08-23  3:14                     ` Tejun Heo
2022-08-24 19:02                     ` Mina Almasry
2022-08-24 19:02                       ` Mina Almasry
2022-08-25 17:59                       ` Tejun Heo
2022-08-25 17:59                         ` Tejun Heo
2022-08-23 11:08             ` Yafang Shao
2022-08-23 11:08               ` Yafang Shao
2022-08-23 17:12               ` Tejun Heo
2022-08-23 17:12                 ` Tejun Heo
2022-08-24 11:57                 ` Yafang Shao
2022-08-24 11:57                   ` Yafang Shao
2022-08-19  0:59   ` [PATCH bpf-next v2 00/12] bpf: Introduce selectable memcg for bpf map Yafang Shao
2022-08-19 16:45     ` Tejun Heo
2022-08-19 16:45       ` Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220818143118.17733-13-laoar.shao@gmail.com \
    --to=laoar.shao@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=cgroups@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=hannes@cmpxchg.org \
    --cc=haoluo@google.com \
    --cc=john.fastabend@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=kafai@fb.com \
    --cc=kpsingh@kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lizefan.x@bytedance.com \
    --cc=mhocko@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=roman.gushchin@linux.dev \
    --cc=sdf@google.com \
    --cc=shakeelb@google.com \
    --cc=songliubraving@fb.com \
    --cc=songmuchun@bytedance.com \
    --cc=tj@kernel.org \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.