All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yafang Shao <laoar.shao@gmail.com>
To: ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
	kafai@fb.com, songliubraving@fb.com, yhs@fb.com,
	john.fastabend@gmail.com, kpsingh@kernel.org, sdf@google.com,
	haoluo@google.com, jolsa@kernel.org, tj@kernel.org,
	dennis@kernel.org, cl@linux.com, akpm@linux-foundation.org,
	penberg@kernel.org, rientjes@google.com, iamjoonsoo.kim@lge.com,
	roman.gushchin@linux.dev, 42.hyeyoo@gmail.com, vbabka@suse.cz,
	urezki@gmail.com
Cc: linux-mm@kvack.org, bpf@vger.kernel.org,
	Yafang Shao <laoar.shao@gmail.com>
Subject: [PATCH bpf-next 7/7] bpf: hashtab memory usage
Date: Thu,  2 Feb 2023 01:41:58 +0000	[thread overview]
Message-ID: <20230202014158.19616-8-laoar.shao@gmail.com> (raw)
In-Reply-To: <20230202014158.19616-1-laoar.shao@gmail.com>

Get htab memory usage from the htab pointers we have allocated. Some
small pointers are ignored as their size are quite small compared with
the total size.

The result as follows,
- before this change
1: hash  name count_map  flags 0x0  <<<< prealloc
        key 16B  value 24B  max_entries 1048576  memlock 41943040B
2: hash  name count_map  flags 0x1  <<<< non prealloc, fully set
        key 16B  value 24B  max_entries 1048576  memlock 41943040B
3: hash  name count_map  flags 0x1  <<<< non prealloc, non set
        key 16B  value 24B  max_entries 1048576  memlock 41943040B

The memlock is always a fixed number whatever it is preallocated or
not, and whatever the allocated elements number is.

- after this change
1: hash  name count_map  flags 0x0  <<<< prealloc
        key 16B  value 24B  max_entries 1048576  memlock 109064464B
2: hash  name count_map  flags 0x1  <<<< non prealloc, fully set
        key 16B  value 24B  max_entries 1048576  memlock 117464320B
3: hash  name count_map  flags 0x1  <<<< non prealloc, non set
        key 16B  value 24B  max_entries 1048576  memlock 16797952B

The memlock now is hashtab actually allocated.

At worst, the difference can be 10x, for example,
- before this change
4: hash  name count_map  flags 0x0
        key 4B  value 4B  max_entries 1048576  memlock 8388608B

- after this change
4: hash  name count_map  flags 0x0
        key 4B  value 4B  max_entries 1048576  memlock 83898640B

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 kernel/bpf/hashtab.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 79 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 66bded1..cba540b 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -273,6 +273,25 @@ static void htab_free_elems(struct bpf_htab *htab)
 	bpf_map_area_free(htab->elems);
 }
 
+static unsigned long htab_prealloc_elems_size(struct bpf_htab *htab)
+{
+	unsigned long size = 0;
+	int i;
+
+	if (!htab_is_percpu(htab))
+		return kvsize(htab->elems);
+
+	for (i = 0; i < htab->map.max_entries; i++) {
+		void __percpu *pptr;
+
+		pptr = htab_elem_get_ptr(get_htab_elem(htab, i),
+					htab->map.key_size);
+		size += percpu_size(pptr);
+	}
+	size += kvsize(htab->elems);
+	return size;
+}
+
 /* The LRU list has a lock (lru_lock). Each htab bucket has a lock
  * (bucket_lock). If both locks need to be acquired together, the lock
  * order is always lru_lock -> bucket_lock and this only happens in
@@ -864,6 +883,16 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
 	bpf_mem_cache_free(&htab->ma, l);
 }
 
+static unsigned long htab_elem_size(struct bpf_htab *htab, struct htab_elem *l)
+{
+	unsigned long size = 0;
+
+	if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
+		size += bpf_mem_cache_elem_size(&htab->pcpu_ma, l->ptr_to_pptr);
+
+	return size + bpf_mem_cache_elem_size(&htab->ma, l);
+}
+
 static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l)
 {
 	struct bpf_map *map = &htab->map;
@@ -899,7 +928,6 @@ static void dec_elem_count(struct bpf_htab *htab)
 		atomic_dec(&htab->count);
 }
 
-
 static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
 {
 	htab_put_fd_value(htab, l);
@@ -1457,6 +1485,31 @@ static void delete_all_elements(struct bpf_htab *htab)
 	migrate_enable();
 }
 
+static unsigned long htab_non_prealloc_elems_size(struct bpf_htab *htab)
+{
+	unsigned long size = 0;
+	unsigned long count;
+	int i;
+
+	rcu_read_lock();
+	for (i = 0; i < htab->n_buckets; i++) {
+		struct hlist_nulls_head *head = select_bucket(htab, i);
+		struct hlist_nulls_node *n;
+		struct htab_elem *l;
+
+		hlist_nulls_for_each_entry(l, n, head, hash_node) {
+			size = htab_elem_size(htab, l);
+			goto out;
+		}
+	}
+out:
+	rcu_read_unlock();
+	count = htab->use_percpu_counter ? percpu_counter_sum(&htab->pcount) :
+			atomic_read(&htab->count);
+
+	return size * count;
+}
+
 static void htab_free_malloced_timers(struct bpf_htab *htab)
 {
 	int i;
@@ -1523,6 +1576,26 @@ static void htab_map_free(struct bpf_map *map)
 	bpf_map_area_free(htab);
 }
 
+/* Get the htab memory usage from pointers we have already allocated.
+ * Some minor pointers are igored as their size are quite small compared
+ * with the total size.
+ */
+static unsigned long htab_mem_usage(const struct bpf_map *map)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	unsigned long size = 0;
+
+	if (!htab_is_prealloc(htab))
+		size += htab_non_prealloc_elems_size(htab);
+	else
+		size += htab_prealloc_elems_size(htab);
+	size += percpu_size(htab->extra_elems);
+	size += kvsize(htab->buckets);
+	size += bpf_mem_alloc_size(&htab->pcpu_ma);
+	size += bpf_mem_alloc_size(&htab->ma);
+	return size;
+}
+
 static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
 				   struct seq_file *m)
 {
@@ -2191,6 +2264,7 @@ static int bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_f
 	.map_seq_show_elem = htab_map_seq_show_elem,
 	.map_set_for_each_callback_args = map_set_for_each_callback_args,
 	.map_for_each_callback = bpf_for_each_hash_elem,
+	.map_mem_usage = htab_mem_usage,
 	BATCH_OPS(htab),
 	.map_btf_id = &htab_map_btf_ids[0],
 	.iter_seq_info = &iter_seq_info,
@@ -2212,6 +2286,7 @@ static int bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_f
 	.map_seq_show_elem = htab_map_seq_show_elem,
 	.map_set_for_each_callback_args = map_set_for_each_callback_args,
 	.map_for_each_callback = bpf_for_each_hash_elem,
+	.map_mem_usage = htab_mem_usage,
 	BATCH_OPS(htab_lru),
 	.map_btf_id = &htab_map_btf_ids[0],
 	.iter_seq_info = &iter_seq_info,
@@ -2363,6 +2438,7 @@ static void htab_percpu_map_seq_show_elem(struct bpf_map *map, void *key,
 	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
 	.map_set_for_each_callback_args = map_set_for_each_callback_args,
 	.map_for_each_callback = bpf_for_each_hash_elem,
+	.map_mem_usage = htab_mem_usage,
 	BATCH_OPS(htab_percpu),
 	.map_btf_id = &htab_map_btf_ids[0],
 	.iter_seq_info = &iter_seq_info,
@@ -2382,6 +2458,7 @@ static void htab_percpu_map_seq_show_elem(struct bpf_map *map, void *key,
 	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
 	.map_set_for_each_callback_args = map_set_for_each_callback_args,
 	.map_for_each_callback = bpf_for_each_hash_elem,
+	.map_mem_usage = htab_mem_usage,
 	BATCH_OPS(htab_lru_percpu),
 	.map_btf_id = &htab_map_btf_ids[0],
 	.iter_seq_info = &iter_seq_info,
@@ -2519,6 +2596,7 @@ static void htab_of_map_free(struct bpf_map *map)
 	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
 	.map_gen_lookup = htab_of_map_gen_lookup,
 	.map_check_btf = map_check_no_btf,
+	.map_mem_usage = htab_mem_usage,
 	BATCH_OPS(htab),
 	.map_btf_id = &htab_map_btf_ids[0],
 };
-- 
1.8.3.1


  parent reply	other threads:[~2023-02-02  1:42 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-02  1:41 [PATCH bpf-next 0/7] bpf, mm: bpf memory usage Yafang Shao
2023-02-02  1:41 ` [PATCH bpf-next 1/7] mm: percpu: fix incorrect size in pcpu_obj_full_size() Yafang Shao
2023-02-02  1:41 ` [PATCH bpf-next 2/7] mm: percpu: introduce percpu_size() Yafang Shao
2023-02-02 14:32   ` Christoph Lameter
2023-02-02 15:01     ` Yafang Shao
2023-02-02  1:41 ` [PATCH bpf-next 3/7] mm: vmalloc: introduce vsize() Yafang Shao
2023-02-02 10:23   ` Christoph Hellwig
2023-02-02 14:10     ` Yafang Shao
2023-02-02  1:41 ` [PATCH bpf-next 4/7] mm: util: introduce kvsize() Yafang Shao
2023-02-02  1:41 ` [PATCH bpf-next 5/7] bpf: add new map ops ->map_mem_usage Yafang Shao
2023-02-02  1:41 ` [PATCH bpf-next 6/7] bpf: introduce bpf_mem_alloc_size() Yafang Shao
2023-02-02  4:53   ` kernel test robot
2023-02-02 14:11     ` Yafang Shao
2023-02-02  1:41 ` Yafang Shao [this message]
2023-02-04  2:01   ` [PATCH bpf-next 7/7] bpf: hashtab memory usage John Fastabend
2023-02-05  3:55     ` Yafang Shao
2023-02-08  1:56       ` Alexei Starovoitov
2023-02-08  3:33         ` Yafang Shao
2023-02-08  4:29           ` Alexei Starovoitov
2023-02-08 14:22             ` Yafang Shao
2023-02-05 22:14   ` Cong Wang
2023-02-06 11:52     ` Yafang Shao
2023-02-04  2:15 ` [PATCH bpf-next 0/7] bpf, mm: bpf " John Fastabend
2023-02-05  4:03   ` Yafang Shao
2023-02-07  0:48     ` Ho-Ren Chuang
2023-02-07  7:02       ` Yafang Shao
2023-02-07  0:53     ` Ho-Ren Chuang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230202014158.19616-8-laoar.shao@gmail.com \
    --to=laoar.shao@gmail.com \
    --cc=42.hyeyoo@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=cl@linux.com \
    --cc=daniel@iogearbox.net \
    --cc=dennis@kernel.org \
    --cc=haoluo@google.com \
    --cc=iamjoonsoo.kim@lge.com \
    --cc=john.fastabend@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=kafai@fb.com \
    --cc=kpsingh@kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=penberg@kernel.org \
    --cc=rientjes@google.com \
    --cc=roman.gushchin@linux.dev \
    --cc=sdf@google.com \
    --cc=songliubraving@fb.com \
    --cc=tj@kernel.org \
    --cc=urezki@gmail.com \
    --cc=vbabka@suse.cz \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.