All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf-next] bpf: add bpf_map_lookup_percpu_elem for percpu map
@ 2022-05-07  2:48 Feng zhou
  2022-05-10  0:34 ` Andrii Nakryiko
  0 siblings, 1 reply; 6+ messages in thread
From: Feng zhou @ 2022-05-07  2:48 UTC (permalink / raw)
  To: ast, daniel, andrii, kafai, songliubraving, yhs, john.fastabend,
	kpsingh, rostedt, mingo, jolsa, davemarchevsky, joannekoong,
	geliang.tang
  Cc: netdev, bpf, linux-kernel, duanxiongchun, songmuchun,
	wangdongdong.6, cong.wang, zhouchengming, zhoufeng.zf

From: Feng Zhou <zhoufeng.zf@bytedance.com>

Trace some functions, such as enqueue_task_fair, need to access the
corresponding cpu, not the current cpu, and bpf_map_lookup_elem percpu map
cannot do it. So add bpf_map_lookup_percpu_elem to accomplish it for
percpu_array_map, percpu_hash_map, lru_percpu_hash_map.

The implementation method is relatively simple, refer to the implementation
method of map_lookup_elem of percpu map, increase the parameters of cpu, and
obtain it according to the specified cpu.

Signed-off-by: Feng Zhou <zhoufeng.zf@bytedance.com>
---
 include/linux/bpf.h            |  2 ++
 include/uapi/linux/bpf.h       |  9 +++++++++
 kernel/bpf/arraymap.c          | 15 +++++++++++++++
 kernel/bpf/core.c              |  1 +
 kernel/bpf/hashtab.c           | 32 ++++++++++++++++++++++++++++++++
 kernel/bpf/helpers.c           | 18 ++++++++++++++++++
 kernel/bpf/verifier.c          | 17 +++++++++++++++--
 kernel/trace/bpf_trace.c       |  2 ++
 tools/include/uapi/linux/bpf.h |  9 +++++++++
 9 files changed, 103 insertions(+), 2 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index be94833d390a..6b5cf5a90d73 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -89,6 +89,7 @@ struct bpf_map_ops {
 	int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
 	int (*map_pop_elem)(struct bpf_map *map, void *value);
 	int (*map_peek_elem)(struct bpf_map *map, void *value);
+	void *(*map_lookup_percpu_elem)(struct bpf_map *map, void *key, u32 cpu);
 
 	/* funcs called by prog_array and perf_event_array map */
 	void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
@@ -2161,6 +2162,7 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto;
 extern const struct bpf_func_proto bpf_map_push_elem_proto;
 extern const struct bpf_func_proto bpf_map_pop_elem_proto;
 extern const struct bpf_func_proto bpf_map_peek_elem_proto;
+extern const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto;
 
 extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
 extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 444fe6f1cf35..024fb9f319a8 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5154,6 +5154,14 @@ union bpf_attr {
  *		if not NULL, is a reference which must be released using its
  *		corresponding release function, or moved into a BPF map before
  *		program exit.
+ *
+ * void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu)
+ * 	Description
+ * 		Perform a lookup in *percpu map* for an entry associated to
+ * 		*key* on *cpu*.
+ * 	Return
+ * 		Map value associated to *key* on *cpu*, or **NULL** if no entry
+ * 		was found or *cpu* is invalid.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5351,6 +5359,7 @@ union bpf_attr {
 	FN(skb_set_tstamp),		\
 	FN(ima_file_hash),		\
 	FN(kptr_xchg),			\
+	FN(map_lookup_percpu_elem),     \
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index b3bf31fd9458..71d9db976ab0 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -243,6 +243,20 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
 	return this_cpu_ptr(array->pptrs[index & array->index_mask]);
 }
 
+static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu)
+{
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	u32 index = *(u32 *)key;
+
+	if (cpu >= nr_cpu_ids)
+		return NULL;
+
+	if (unlikely(index >= array->map.max_entries))
+		return NULL;
+
+	return per_cpu_ptr(array->pptrs[index & array->index_mask], cpu);
+}
+
 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
 {
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
@@ -725,6 +739,7 @@ const struct bpf_map_ops percpu_array_map_ops = {
 	.map_lookup_elem = percpu_array_map_lookup_elem,
 	.map_update_elem = array_map_update_elem,
 	.map_delete_elem = array_map_delete_elem,
+	.map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem,
 	.map_seq_show_elem = percpu_array_map_seq_show_elem,
 	.map_check_btf = array_map_check_btf,
 	.map_lookup_batch = generic_map_lookup_batch,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 13e9dbeeedf3..76f68d0a7ae8 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2619,6 +2619,7 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
 const struct bpf_func_proto bpf_map_push_elem_proto __weak;
 const struct bpf_func_proto bpf_map_pop_elem_proto __weak;
 const struct bpf_func_proto bpf_map_peek_elem_proto __weak;
+const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto __weak;
 const struct bpf_func_proto bpf_spin_lock_proto __weak;
 const struct bpf_func_proto bpf_spin_unlock_proto __weak;
 const struct bpf_func_proto bpf_jiffies64_proto __weak;
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 3e00e62b2218..9c45b07dd5b6 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -2191,6 +2191,20 @@ static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key)
 		return NULL;
 }
 
+static void *htab_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu)
+{
+	struct htab_elem *l;
+
+	if (cpu >= nr_cpu_ids)
+		return NULL;
+
+	l = __htab_map_lookup_elem(map, key);
+	if (l)
+		return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu);
+	else
+		return NULL;
+}
+
 static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key)
 {
 	struct htab_elem *l = __htab_map_lookup_elem(map, key);
@@ -2203,6 +2217,22 @@ static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key)
 	return NULL;
 }
 
+static void *htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu)
+{
+	struct htab_elem *l;
+
+	if (cpu >= nr_cpu_ids)
+		return NULL;
+
+	l = __htab_map_lookup_elem(map, key);
+	if (l) {
+		bpf_lru_node_set_ref(&l->lru_node);
+		return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu);
+	}
+
+	return NULL;
+}
+
 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
 {
 	struct htab_elem *l;
@@ -2292,6 +2322,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
 	.map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem,
 	.map_update_elem = htab_percpu_map_update_elem,
 	.map_delete_elem = htab_map_delete_elem,
+	.map_lookup_percpu_elem = htab_percpu_map_lookup_percpu_elem,
 	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
 	.map_set_for_each_callback_args = map_set_for_each_callback_args,
 	.map_for_each_callback = bpf_for_each_hash_elem,
@@ -2310,6 +2341,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
 	.map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem,
 	.map_update_elem = htab_lru_percpu_map_update_elem,
 	.map_delete_elem = htab_lru_map_delete_elem,
+	.map_lookup_percpu_elem = htab_lru_percpu_map_lookup_percpu_elem,
 	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
 	.map_set_for_each_callback_args = map_set_for_each_callback_args,
 	.map_for_each_callback = bpf_for_each_hash_elem,
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 3e709fed5306..d5f104a39092 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -119,6 +119,22 @@ const struct bpf_func_proto bpf_map_peek_elem_proto = {
 	.arg2_type	= ARG_PTR_TO_UNINIT_MAP_VALUE,
 };
 
+BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map *, map, void *, key, u32, cpu)
+{
+	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
+	return (unsigned long) map->ops->map_lookup_percpu_elem(map, key, cpu);
+}
+
+const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto = {
+	.func		= bpf_map_lookup_percpu_elem,
+	.gpl_only	= false,
+	.pkt_access	= true,
+	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
+	.arg1_type	= ARG_CONST_MAP_PTR,
+	.arg2_type	= ARG_PTR_TO_MAP_KEY,
+	.arg3_type	= ARG_ANYTHING,
+};
+
 const struct bpf_func_proto bpf_get_prandom_u32_proto = {
 	.func		= bpf_user_rnd_u32,
 	.gpl_only	= false,
@@ -1420,6 +1436,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_map_pop_elem_proto;
 	case BPF_FUNC_map_peek_elem:
 		return &bpf_map_peek_elem_proto;
+	case BPF_FUNC_map_lookup_percpu_elem:
+		return &bpf_map_lookup_percpu_elem_proto;
 	case BPF_FUNC_get_prandom_u32:
 		return &bpf_get_prandom_u32_proto;
 	case BPF_FUNC_get_smp_processor_id:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 813f6ee80419..67ac0b047caf 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6138,6 +6138,12 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		    map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
 			goto error;
 		break;
+	case BPF_FUNC_map_lookup_percpu_elem:
+		if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
+		    map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
+		    map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH)
+			goto error;
+		break;
 	case BPF_FUNC_sk_storage_get:
 	case BPF_FUNC_sk_storage_delete:
 		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
@@ -6751,7 +6757,8 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
 	    func_id != BPF_FUNC_map_pop_elem &&
 	    func_id != BPF_FUNC_map_peek_elem &&
 	    func_id != BPF_FUNC_for_each_map_elem &&
-	    func_id != BPF_FUNC_redirect_map)
+	    func_id != BPF_FUNC_redirect_map &&
+	    func_id != BPF_FUNC_map_lookup_percpu_elem)
 		return 0;
 
 	if (map == NULL) {
@@ -13811,7 +13818,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
 		     insn->imm == BPF_FUNC_map_pop_elem    ||
 		     insn->imm == BPF_FUNC_map_peek_elem   ||
 		     insn->imm == BPF_FUNC_redirect_map    ||
-		     insn->imm == BPF_FUNC_for_each_map_elem)) {
+		     insn->imm == BPF_FUNC_for_each_map_elem ||
+		     insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
 			aux = &env->insn_aux_data[i + delta];
 			if (bpf_map_ptr_poisoned(aux))
 				goto patch_call_imm;
@@ -13860,6 +13868,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
 					      bpf_callback_t callback_fn,
 					      void *callback_ctx,
 					      u64 flags))NULL));
+			BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
+				     (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL));
 
 patch_map_ops_generic:
 			switch (insn->imm) {
@@ -13887,6 +13897,9 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
 			case BPF_FUNC_for_each_map_elem:
 				insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
 				continue;
+			case BPF_FUNC_map_lookup_percpu_elem:
+				insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
+				continue;
 			}
 
 			goto patch_call_imm;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f15b826f9899..af4125407c20 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1182,6 +1182,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_map_pop_elem_proto;
 	case BPF_FUNC_map_peek_elem:
 		return &bpf_map_peek_elem_proto;
+	case BPF_FUNC_map_lookup_percpu_elem:
+		return &bpf_map_lookup_percpu_elem_proto;
 	case BPF_FUNC_ktime_get_ns:
 		return &bpf_ktime_get_ns_proto;
 	case BPF_FUNC_ktime_get_boot_ns:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 444fe6f1cf35..024fb9f319a8 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5154,6 +5154,14 @@ union bpf_attr {
  *		if not NULL, is a reference which must be released using its
  *		corresponding release function, or moved into a BPF map before
  *		program exit.
+ *
+ * void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu)
+ * 	Description
+ * 		Perform a lookup in *percpu map* for an entry associated to
+ * 		*key* on *cpu*.
+ * 	Return
+ * 		Map value associated to *key* on *cpu*, or **NULL** if no entry
+ * 		was found or *cpu* is invalid.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5351,6 +5359,7 @@ union bpf_attr {
 	FN(skb_set_tstamp),		\
 	FN(ima_file_hash),		\
 	FN(kptr_xchg),			\
+	FN(map_lookup_percpu_elem),     \
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH bpf-next] bpf: add bpf_map_lookup_percpu_elem for percpu map
  2022-05-07  2:48 [PATCH bpf-next] bpf: add bpf_map_lookup_percpu_elem for percpu map Feng zhou
@ 2022-05-10  0:34 ` Andrii Nakryiko
  2022-05-10  1:04   ` Yosry Ahmed
  0 siblings, 1 reply; 6+ messages in thread
From: Andrii Nakryiko @ 2022-05-10  0:34 UTC (permalink / raw)
  To: Feng zhou
  Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, Martin Lau,
	Song Liu, Yonghong Song, john fastabend, KP Singh,
	Steven Rostedt, Ingo Molnar, Jiri Olsa, Dave Marchevsky,
	Joanne Koong, Geliang Tang, Networking, bpf, open list,
	duanxiongchun, Muchun Song, Dongdong Wang, Cong Wang,
	zhouchengming

On Fri, May 6, 2022 at 7:49 PM Feng zhou <zhoufeng.zf@bytedance.com> wrote:
>
> From: Feng Zhou <zhoufeng.zf@bytedance.com>
>
> Trace some functions, such as enqueue_task_fair, need to access the
> corresponding cpu, not the current cpu, and bpf_map_lookup_elem percpu map
> cannot do it. So add bpf_map_lookup_percpu_elem to accomplish it for
> percpu_array_map, percpu_hash_map, lru_percpu_hash_map.
>
> The implementation method is relatively simple, refer to the implementation
> method of map_lookup_elem of percpu map, increase the parameters of cpu, and
> obtain it according to the specified cpu.
>

I don't think it's safe in general to access per-cpu data from another
CPU. I'd suggest just having either a ARRAY_OF_MAPS or adding CPU ID
as part of the key, if you need such a custom access pattern.

> Signed-off-by: Feng Zhou <zhoufeng.zf@bytedance.com>
> ---
>  include/linux/bpf.h            |  2 ++
>  include/uapi/linux/bpf.h       |  9 +++++++++
>  kernel/bpf/arraymap.c          | 15 +++++++++++++++
>  kernel/bpf/core.c              |  1 +
>  kernel/bpf/hashtab.c           | 32 ++++++++++++++++++++++++++++++++
>  kernel/bpf/helpers.c           | 18 ++++++++++++++++++
>  kernel/bpf/verifier.c          | 17 +++++++++++++++--
>  kernel/trace/bpf_trace.c       |  2 ++
>  tools/include/uapi/linux/bpf.h |  9 +++++++++
>  9 files changed, 103 insertions(+), 2 deletions(-)
>

[...]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH bpf-next] bpf: add bpf_map_lookup_percpu_elem for percpu map
  2022-05-10  0:34 ` Andrii Nakryiko
@ 2022-05-10  1:04   ` Yosry Ahmed
  2022-05-10  2:41     ` [External] " Feng Zhou
  0 siblings, 1 reply; 6+ messages in thread
From: Yosry Ahmed @ 2022-05-10  1:04 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Feng zhou, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Martin Lau, Song Liu, Yonghong Song, john fastabend, KP Singh,
	Steven Rostedt, Ingo Molnar, Jiri Olsa, Dave Marchevsky,
	Joanne Koong, Geliang Tang, Networking, bpf, open list,
	duanxiongchun, Muchun Song, Dongdong Wang, Cong Wang,
	zhouchengming

On Mon, May 9, 2022 at 5:34 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Fri, May 6, 2022 at 7:49 PM Feng zhou <zhoufeng.zf@bytedance.com> wrote:
> >
> > From: Feng Zhou <zhoufeng.zf@bytedance.com>
> >
> > Trace some functions, such as enqueue_task_fair, need to access the
> > corresponding cpu, not the current cpu, and bpf_map_lookup_elem percpu map
> > cannot do it. So add bpf_map_lookup_percpu_elem to accomplish it for
> > percpu_array_map, percpu_hash_map, lru_percpu_hash_map.
> >
> > The implementation method is relatively simple, refer to the implementation
> > method of map_lookup_elem of percpu map, increase the parameters of cpu, and
> > obtain it according to the specified cpu.
> >
>
> I don't think it's safe in general to access per-cpu data from another
> CPU. I'd suggest just having either a ARRAY_OF_MAPS or adding CPU ID
> as part of the key, if you need such a custom access pattern.

I actually just sent an RFC patch series containing a similar patch
for the exact same purpose. There are instances in the kernel where
per-cpu data is accessed from other cpus (e.g.
mem_cgroup_css_rstat_flush()). I believe, like any other variable,
percpu data can be safe or not safe to access, based on the access
pattern. It is up to the user to coordinate accesses to the variable.

For example, in my use case, one of the accessors only reads percpu
values of different cpus, so it should be safe. If a user accesses
percpu data of another cpu without guaranteeing safety, they corrupt
their own data. I understand that the main purpose of percpu data is
lockless (and therefore fast) access, but in some use cases the user
may be able to safely (and locklessly) access the data concurrently.

>
> > Signed-off-by: Feng Zhou <zhoufeng.zf@bytedance.com>
> > ---
> >  include/linux/bpf.h            |  2 ++
> >  include/uapi/linux/bpf.h       |  9 +++++++++
> >  kernel/bpf/arraymap.c          | 15 +++++++++++++++
> >  kernel/bpf/core.c              |  1 +
> >  kernel/bpf/hashtab.c           | 32 ++++++++++++++++++++++++++++++++
> >  kernel/bpf/helpers.c           | 18 ++++++++++++++++++
> >  kernel/bpf/verifier.c          | 17 +++++++++++++++--
> >  kernel/trace/bpf_trace.c       |  2 ++
> >  tools/include/uapi/linux/bpf.h |  9 +++++++++
> >  9 files changed, 103 insertions(+), 2 deletions(-)
> >
>
> [...]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [External] Re: [PATCH bpf-next] bpf: add bpf_map_lookup_percpu_elem for percpu map
  2022-05-10  1:04   ` Yosry Ahmed
@ 2022-05-10  2:41     ` Feng Zhou
  2022-05-10  3:15       ` Alexei Starovoitov
  0 siblings, 1 reply; 6+ messages in thread
From: Feng Zhou @ 2022-05-10  2:41 UTC (permalink / raw)
  To: Yosry Ahmed, Andrii Nakryiko
  Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, Martin Lau,
	Song Liu, Yonghong Song, john fastabend, KP Singh,
	Steven Rostedt, Ingo Molnar, Jiri Olsa, Dave Marchevsky,
	Joanne Koong, Geliang Tang, Networking, bpf, open list,
	duanxiongchun, Muchun Song, Dongdong Wang, Cong Wang,
	zhouchengming

在 2022/5/10 上午9:04, Yosry Ahmed 写道:
> On Mon, May 9, 2022 at 5:34 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
>> On Fri, May 6, 2022 at 7:49 PM Feng zhou <zhoufeng.zf@bytedance.com> wrote:
>>> From: Feng Zhou <zhoufeng.zf@bytedance.com>
>>>
>>> Trace some functions, such as enqueue_task_fair, need to access the
>>> corresponding cpu, not the current cpu, and bpf_map_lookup_elem percpu map
>>> cannot do it. So add bpf_map_lookup_percpu_elem to accomplish it for
>>> percpu_array_map, percpu_hash_map, lru_percpu_hash_map.
>>>
>>> The implementation method is relatively simple, refer to the implementation
>>> method of map_lookup_elem of percpu map, increase the parameters of cpu, and
>>> obtain it according to the specified cpu.
>>>
>> I don't think it's safe in general to access per-cpu data from another
>> CPU. I'd suggest just having either a ARRAY_OF_MAPS or adding CPU ID
>> as part of the key, if you need such a custom access pattern.
> I actually just sent an RFC patch series containing a similar patch
> for the exact same purpose. There are instances in the kernel where
> per-cpu data is accessed from other cpus (e.g.
> mem_cgroup_css_rstat_flush()). I believe, like any other variable,
> percpu data can be safe or not safe to access, based on the access
> pattern. It is up to the user to coordinate accesses to the variable.
>
> For example, in my use case, one of the accessors only reads percpu
> values of different cpus, so it should be safe. If a user accesses
> percpu data of another cpu without guaranteeing safety, they corrupt
> their own data. I understand that the main purpose of percpu data is
> lockless (and therefore fast) access, but in some use cases the user
> may be able to safely (and locklessly) access the data concurrently.
>

Regarding data security, I think users need to consider before using it, 
such
as hook enqueue_task_fair, the function itself takes the rq lock of the
corresponding cpu, there is no problem, and the kernel only provides a 
method,
like bpf_per_cpu_ptr and bpf_this_cpu_ptr, data security needs to be 
guaranteed
by users in different scenarios, such as using bpf_spin_lock.


>>> Signed-off-by: Feng Zhou <zhoufeng.zf@bytedance.com>
>>> ---
>>>   include/linux/bpf.h            |  2 ++
>>>   include/uapi/linux/bpf.h       |  9 +++++++++
>>>   kernel/bpf/arraymap.c          | 15 +++++++++++++++
>>>   kernel/bpf/core.c              |  1 +
>>>   kernel/bpf/hashtab.c           | 32 ++++++++++++++++++++++++++++++++
>>>   kernel/bpf/helpers.c           | 18 ++++++++++++++++++
>>>   kernel/bpf/verifier.c          | 17 +++++++++++++++--
>>>   kernel/trace/bpf_trace.c       |  2 ++
>>>   tools/include/uapi/linux/bpf.h |  9 +++++++++
>>>   9 files changed, 103 insertions(+), 2 deletions(-)
>>>
>> [...]



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [External] Re: [PATCH bpf-next] bpf: add bpf_map_lookup_percpu_elem for percpu map
  2022-05-10  2:41     ` [External] " Feng Zhou
@ 2022-05-10  3:15       ` Alexei Starovoitov
  2022-05-10  5:52         ` Feng Zhou
  0 siblings, 1 reply; 6+ messages in thread
From: Alexei Starovoitov @ 2022-05-10  3:15 UTC (permalink / raw)
  To: Feng Zhou
  Cc: Yosry Ahmed, Andrii Nakryiko, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, Martin Lau, Song Liu,
	Yonghong Song, john fastabend, KP Singh, Steven Rostedt,
	Ingo Molnar, Jiri Olsa, Dave Marchevsky, Joanne Koong,
	Geliang Tang, Networking, bpf, open list, Xiongchun Duan,
	Muchun Song, Dongdong Wang, Cong Wang, Chengming Zhou

On Mon, May 9, 2022 at 7:41 PM Feng Zhou <zhoufeng.zf@bytedance.com> wrote:
>
> 在 2022/5/10 上午9:04, Yosry Ahmed 写道:
> > On Mon, May 9, 2022 at 5:34 PM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> >> On Fri, May 6, 2022 at 7:49 PM Feng zhou <zhoufeng.zf@bytedance.com> wrote:
> >>> From: Feng Zhou <zhoufeng.zf@bytedance.com>
> >>>
> >>> Trace some functions, such as enqueue_task_fair, need to access the
> >>> corresponding cpu, not the current cpu, and bpf_map_lookup_elem percpu map
> >>> cannot do it. So add bpf_map_lookup_percpu_elem to accomplish it for
> >>> percpu_array_map, percpu_hash_map, lru_percpu_hash_map.
> >>>
> >>> The implementation method is relatively simple, refer to the implementation
> >>> method of map_lookup_elem of percpu map, increase the parameters of cpu, and
> >>> obtain it according to the specified cpu.
> >>>
> >> I don't think it's safe in general to access per-cpu data from another
> >> CPU. I'd suggest just having either a ARRAY_OF_MAPS or adding CPU ID
> >> as part of the key, if you need such a custom access pattern.
> > I actually just sent an RFC patch series containing a similar patch
> > for the exact same purpose. There are instances in the kernel where
> > per-cpu data is accessed from other cpus (e.g.
> > mem_cgroup_css_rstat_flush()). I believe, like any other variable,
> > percpu data can be safe or not safe to access, based on the access
> > pattern. It is up to the user to coordinate accesses to the variable.
> >
> > For example, in my use case, one of the accessors only reads percpu
> > values of different cpus, so it should be safe. If a user accesses
> > percpu data of another cpu without guaranteeing safety, they corrupt
> > their own data. I understand that the main purpose of percpu data is
> > lockless (and therefore fast) access, but in some use cases the user
> > may be able to safely (and locklessly) access the data concurrently.
> >
>
> Regarding data security, I think users need to consider before using it,
> such
> as hook enqueue_task_fair, the function itself takes the rq lock of the
> corresponding cpu, there is no problem, and the kernel only provides a
> method,
> like bpf_per_cpu_ptr and bpf_this_cpu_ptr, data security needs to be
> guaranteed
> by users in different scenarios, such as using bpf_spin_lock.

Right. The new helper looks useful and is safe.
Please add a selftest and respin.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [External] Re: [PATCH bpf-next] bpf: add bpf_map_lookup_percpu_elem for percpu map
  2022-05-10  3:15       ` Alexei Starovoitov
@ 2022-05-10  5:52         ` Feng Zhou
  0 siblings, 0 replies; 6+ messages in thread
From: Feng Zhou @ 2022-05-10  5:52 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Yosry Ahmed, Andrii Nakryiko, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, Martin Lau, Song Liu,
	Yonghong Song, john fastabend, KP Singh, Steven Rostedt,
	Ingo Molnar, Jiri Olsa, Dave Marchevsky, Joanne Koong,
	Geliang Tang, Networking, bpf, open list, Xiongchun Duan,
	Muchun Song, Dongdong Wang, Cong Wang, Chengming Zhou

在 2022/5/10 上午11:15, Alexei Starovoitov 写道:
> On Mon, May 9, 2022 at 7:41 PM Feng Zhou <zhoufeng.zf@bytedance.com> wrote:
>> 在 2022/5/10 上午9:04, Yosry Ahmed 写道:
>>> On Mon, May 9, 2022 at 5:34 PM Andrii Nakryiko
>>> <andrii.nakryiko@gmail.com> wrote:
>>>> On Fri, May 6, 2022 at 7:49 PM Feng zhou <zhoufeng.zf@bytedance.com> wrote:
>>>>> From: Feng Zhou <zhoufeng.zf@bytedance.com>
>>>>>
>>>>> Trace some functions, such as enqueue_task_fair, need to access the
>>>>> corresponding cpu, not the current cpu, and bpf_map_lookup_elem percpu map
>>>>> cannot do it. So add bpf_map_lookup_percpu_elem to accomplish it for
>>>>> percpu_array_map, percpu_hash_map, lru_percpu_hash_map.
>>>>>
>>>>> The implementation method is relatively simple, refer to the implementation
>>>>> method of map_lookup_elem of percpu map, increase the parameters of cpu, and
>>>>> obtain it according to the specified cpu.
>>>>>
>>>> I don't think it's safe in general to access per-cpu data from another
>>>> CPU. I'd suggest just having either a ARRAY_OF_MAPS or adding CPU ID
>>>> as part of the key, if you need such a custom access pattern.
>>> I actually just sent an RFC patch series containing a similar patch
>>> for the exact same purpose. There are instances in the kernel where
>>> per-cpu data is accessed from other cpus (e.g.
>>> mem_cgroup_css_rstat_flush()). I believe, like any other variable,
>>> percpu data can be safe or not safe to access, based on the access
>>> pattern. It is up to the user to coordinate accesses to the variable.
>>>
>>> For example, in my use case, one of the accessors only reads percpu
>>> values of different cpus, so it should be safe. If a user accesses
>>> percpu data of another cpu without guaranteeing safety, they corrupt
>>> their own data. I understand that the main purpose of percpu data is
>>> lockless (and therefore fast) access, but in some use cases the user
>>> may be able to safely (and locklessly) access the data concurrently.
>>>
>> Regarding data security, I think users need to consider before using it,
>> such
>> as hook enqueue_task_fair, the function itself takes the rq lock of the
>> corresponding cpu, there is no problem, and the kernel only provides a
>> method,
>> like bpf_per_cpu_ptr and bpf_this_cpu_ptr, data security needs to be
>> guaranteed
>> by users in different scenarios, such as using bpf_spin_lock.
> Right. The new helper looks useful and is safe.
> Please add a selftest and respin.


Ok, will do. Thanks.



^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-05-10  5:53 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-07  2:48 [PATCH bpf-next] bpf: add bpf_map_lookup_percpu_elem for percpu map Feng zhou
2022-05-10  0:34 ` Andrii Nakryiko
2022-05-10  1:04   ` Yosry Ahmed
2022-05-10  2:41     ` [External] " Feng Zhou
2022-05-10  3:15       ` Alexei Starovoitov
2022-05-10  5:52         ` Feng Zhou

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.