From: Hao Luo <haoluo@google.com>
To: netdev@vger.kernel.org, bpf@vger.kernel.org,
linux-kernel@vger.kernel.org, linux-kselftest@vger.kernel.org
Cc: Shuah Khan <shuah@kernel.org>,
Alexei Starovoitov <ast@kernel.org>,
Andrii Nakryiko <andriin@fb.com>,
Daniel Borkmann <daniel@iogearbox.net>,
Martin KaFai Lau <kafai@fb.com>, Song Liu <songliubraving@fb.com>,
Yonghong Song <yhs@fb.com>,
John Fastabend <john.fastabend@gmail.com>,
KP Singh <kpsingh@chromium.org>,
Quentin Monnet <quentin@isovalent.com>,
Hao Luo <haoluo@google.com>, Steven Rostedt <rostedt@goodmis.org>,
Ingo Molnar <mingo@redhat.com>, Andrey Ignatov <rdna@fb.com>,
Jakub Sitnicki <jakub@cloudflare.com>
Subject: [PATCH bpf-next v1 6/8] bpf: Introduce bpf_per_cpu_ptr()
Date: Wed, 19 Aug 2020 15:40:28 -0700 [thread overview]
Message-ID: <20200819224030.1615203-7-haoluo@google.com> (raw)
In-Reply-To: <20200819224030.1615203-1-haoluo@google.com>
Add bpf_per_cpu_ptr() to help bpf programs access percpu vars.
bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the kernel
except that it may return NULL. This happens when the cpu parameter is
out of range. So the caller must check the returned value.
Signed-off-by: Hao Luo <haoluo@google.com>
---
include/linux/bpf.h | 3 ++
include/linux/btf.h | 11 +++++++
include/uapi/linux/bpf.h | 14 +++++++++
kernel/bpf/btf.c | 10 -------
kernel/bpf/verifier.c | 64 ++++++++++++++++++++++++++++++++++++++--
kernel/trace/bpf_trace.c | 18 +++++++++++
6 files changed, 107 insertions(+), 13 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 55f694b63164..613404beab33 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -268,6 +268,7 @@ enum bpf_arg_type {
ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */
ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */
ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
+ ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */
};
/* type of values returned from helper functions */
@@ -281,6 +282,7 @@ enum bpf_return_type {
RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */
RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */
RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */
+ RET_PTR_TO_MEM_OR_BTF_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */
};
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@@ -360,6 +362,7 @@ enum bpf_reg_type {
PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */
PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */
PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
+ PTR_TO_PERCPU_BTF_ID, /* reg points to percpu kernel type */
};
/* The information passed from prog-specific *_is_valid_access
diff --git a/include/linux/btf.h b/include/linux/btf.h
index cee4089e83c0..dc3509246913 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -72,6 +72,11 @@ btf_resolve_size(const struct btf *btf, const struct btf_type *type,
i < btf_type_vlen(struct_type); \
i++, member++)
+#define for_each_vsi(i, struct_type, member) \
+ for (i = 0, member = btf_type_var_secinfo(struct_type); \
+ i < btf_type_vlen(struct_type); \
+ i++, member++)
+
static inline bool btf_type_is_ptr(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info) == BTF_KIND_PTR;
@@ -156,6 +161,12 @@ static inline const struct btf_member *btf_type_member(const struct btf_type *t)
return (const struct btf_member *)(t + 1);
}
+static inline const struct btf_var_secinfo *btf_type_var_secinfo(
+ const struct btf_type *t)
+{
+ return (const struct btf_var_secinfo *)(t + 1);
+}
+
#ifdef CONFIG_BPF_SYSCALL
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
const char *btf_name_by_offset(const struct btf *btf, u32 offset);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 468376f2910b..c7e49a102ed2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3415,6 +3415,19 @@ union bpf_attr {
* A non-negative value equal to or less than *size* on success,
* or a negative error in case of failure.
*
+ * void *bpf_per_cpu_ptr(const void *ptr, u32 cpu)
+ * Description
+ * Take the address of a percpu ksym and return a pointer pointing
+ * to the variable on *cpu*. A ksym is an extern variable decorated
+ * with '__ksym'. A ksym is percpu if there is a global percpu var
+ * (either static or global) defined of the same name in the kernel.
+ *
+ * bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the
+ * kernel, except that bpf_per_cpu_ptr() may return NULL. This
+ * happens if *cpu* is larger than nr_cpu_ids. The caller of
+ * bpf_per_cpu_ptr() must check the returned value.
+ * Return
+ * A generic pointer pointing to the variable on *cpu*.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3559,6 +3572,7 @@ union bpf_attr {
FN(skc_to_tcp_request_sock), \
FN(skc_to_udp6_sock), \
FN(get_task_stack), \
+ FN(bpf_per_cpu_ptr), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index b6d8f653afe2..e735804f5f34 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -186,11 +186,6 @@
i < btf_type_vlen(struct_type); \
i++, member++)
-#define for_each_vsi(i, struct_type, member) \
- for (i = 0, member = btf_type_var_secinfo(struct_type); \
- i < btf_type_vlen(struct_type); \
- i++, member++)
-
#define for_each_vsi_from(i, from, struct_type, member) \
for (i = from, member = btf_type_var_secinfo(struct_type) + from; \
i < btf_type_vlen(struct_type); \
@@ -511,11 +506,6 @@ static const struct btf_var *btf_type_var(const struct btf_type *t)
return (const struct btf_var *)(t + 1);
}
-static const struct btf_var_secinfo *btf_type_var_secinfo(const struct btf_type *t)
-{
- return (const struct btf_var_secinfo *)(t + 1);
-}
-
static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
{
return kind_ops[BTF_INFO_KIND(t->info)];
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 47badde71f83..c2db6308d6fa 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -238,6 +238,7 @@ struct bpf_call_arg_meta {
int ref_obj_id;
int func_id;
u32 btf_id;
+ u32 ret_btf_id;
};
struct btf *btf_vmlinux;
@@ -503,6 +504,7 @@ static const char * const reg_type_str[] = {
[PTR_TO_XDP_SOCK] = "xdp_sock",
[PTR_TO_BTF_ID] = "ptr_",
[PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_",
+ [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_",
[PTR_TO_MEM] = "mem",
[PTR_TO_MEM_OR_NULL] = "mem_or_null",
[PTR_TO_RDONLY_BUF] = "rdonly_buf",
@@ -569,7 +571,9 @@ static void print_verifier_state(struct bpf_verifier_env *env,
/* reg->off should be 0 for SCALAR_VALUE */
verbose(env, "%lld", reg->var_off.value + reg->off);
} else {
- if (t == PTR_TO_BTF_ID || t == PTR_TO_BTF_ID_OR_NULL)
+ if (t == PTR_TO_BTF_ID ||
+ t == PTR_TO_BTF_ID_OR_NULL ||
+ t == PTR_TO_PERCPU_BTF_ID)
verbose(env, "%s", kernel_type_name(reg->btf_id));
verbose(env, "(id=%d", reg->id);
if (reg_type_may_be_refcounted_or_null(t))
@@ -2183,6 +2187,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
case PTR_TO_RDONLY_BUF_OR_NULL:
case PTR_TO_RDWR_BUF:
case PTR_TO_RDWR_BUF_OR_NULL:
+ case PTR_TO_PERCPU_BTF_ID:
return true;
default:
return false;
@@ -3959,6 +3964,15 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
if (type != expected_type)
goto err_type;
}
+ } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
+ expected_type = PTR_TO_PERCPU_BTF_ID;
+ if (type != expected_type)
+ goto err_type;
+ if (!reg->btf_id) {
+ verbose(env, "Helper has zero btf_id in R%d\n", regno);
+ return -EACCES;
+ }
+ meta->ret_btf_id = reg->btf_id;
} else if (arg_type == ARG_PTR_TO_BTF_ID) {
expected_type = PTR_TO_BTF_ID;
if (type != expected_type)
@@ -4904,6 +4918,30 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
regs[BPF_REG_0].id = ++env->id_gen;
regs[BPF_REG_0].mem_size = meta.mem_size;
+ } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_OR_NULL) {
+ const struct btf_type *t;
+
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL);
+ if (!btf_type_is_struct(t)) {
+ u32 tsize;
+ const struct btf_type *ret;
+ const char *tname;
+
+ /* resolve the type size of ksym. */
+ ret = btf_resolve_size(btf_vmlinux, t, &tsize, NULL, NULL);
+ if (IS_ERR(ret)) {
+ tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+ verbose(env, "unable to resolve the size of type '%s': %ld\n",
+ tname, PTR_ERR(ret));
+ return -EINVAL;
+ }
+ regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
+ regs[BPF_REG_0].mem_size = tsize;
+ } else {
+ regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL;
+ regs[BPF_REG_0].btf_id = meta.ret_btf_id;
+ }
} else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
int ret_btf_id;
@@ -7210,10 +7248,15 @@ static inline int check_pseudo_btf_id(struct bpf_verifier_env *env,
struct bpf_insn *insn)
{
struct bpf_reg_state *regs = cur_regs(env);
- u32 type, id = insn->imm;
+ u32 datasec_id, type, id = insn->imm;
u64 addr;
const char *sym_name;
const struct btf_type *t = btf_type_by_id(btf_vmlinux, id);
+ const struct btf_type *datasec;
+ const struct btf_var_secinfo *vsi;
+ int i;
+
+ bool percpu = false;
if (!t) {
verbose(env, "%s: invalid btf_id %d\n", __func__, id);
@@ -7243,9 +7286,24 @@ static inline int check_pseudo_btf_id(struct bpf_verifier_env *env,
insn[1].imm = addr >> 32;
mark_reg_known_zero(env, regs, insn->dst_reg);
+ datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu",
+ BTF_KIND_DATASEC);
+ if (datasec_id > 0) {
+ datasec = btf_type_by_id(btf_vmlinux, datasec_id);
+ for_each_vsi(i, datasec, vsi) {
+ if (vsi->type == id) {
+ percpu = true;
+ break;
+ }
+ }
+ }
+
type = t->type;
t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
- if (!btf_type_is_struct(t)) {
+ if (percpu) {
+ regs[insn->dst_reg].type = PTR_TO_PERCPU_BTF_ID;
+ regs[insn->dst_reg].btf_id = type;
+ } else if (!btf_type_is_struct(t)) {
u32 tsize;
const struct btf_type *ret;
const char *tname;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index a8d4f253ed77..7f0033960d82 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1098,6 +1098,22 @@ static const struct bpf_func_proto bpf_send_signal_thread_proto = {
.arg1_type = ARG_ANYTHING,
};
+BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
+{
+ if (cpu >= nr_cpu_ids)
+ return 0;
+
+ return (u64)per_cpu_ptr(ptr, cpu);
+}
+
+static const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
+ .func = bpf_per_cpu_ptr,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_MEM_OR_BTF_OR_NULL,
+ .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
+ .arg2_type = ARG_ANYTHING,
+};
+
const struct bpf_func_proto *
bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@@ -1182,6 +1198,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_jiffies64_proto;
case BPF_FUNC_get_task_stack:
return &bpf_get_task_stack_proto;
+ case BPF_FUNC_bpf_per_cpu_ptr:
+ return &bpf_per_cpu_ptr_proto;
default:
return NULL;
}
--
2.28.0.220.ged08abb693-goog
next prev parent reply other threads:[~2020-08-19 22:41 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-08-19 22:40 [PATCH bpf-next v1 0/8] bpf: BTF support for ksyms Hao Luo
2020-08-19 22:40 ` [PATCH bpf-next v1 1/8] bpf: Introduce pseudo_btf_id Hao Luo
2020-08-20 15:22 ` Yonghong Song
2020-08-20 17:04 ` Hao Luo
2020-08-25 0:05 ` Hao Luo
2020-08-25 0:43 ` Yonghong Song
2020-08-20 16:43 ` Yonghong Song
2020-08-20 21:53 ` Alexei Starovoitov
2020-08-21 2:22 ` Hao Luo
2020-08-19 22:40 ` [PATCH bpf-next v1 2/8] bpf: Propagate BPF_PSEUDO_BTF_ID to uapi headers in /tools Hao Luo
2020-08-20 16:44 ` Yonghong Song
2020-08-19 22:40 ` [PATCH bpf-next v1 3/8] bpf: Introduce help function to validate ksym's type Hao Luo
2020-08-20 17:20 ` Yonghong Song
2020-08-21 21:50 ` Andrii Nakryiko
2020-08-22 0:43 ` Hao Luo
2020-08-22 2:43 ` Andrii Nakryiko
2020-08-22 7:04 ` Hao Luo
2020-08-19 22:40 ` [PATCH bpf-next v1 4/8] bpf/libbpf: BTF support for typed ksyms Hao Luo
2020-08-21 22:37 ` Andrii Nakryiko
2020-08-27 22:29 ` Hao Luo
2020-09-01 18:11 ` Andrii Nakryiko
2020-09-01 20:35 ` Hao Luo
2020-09-01 23:54 ` Andrii Nakryiko
2020-09-02 0:46 ` Hao Luo
2020-08-19 22:40 ` [PATCH bpf-next v1 5/8] bpf/selftests: ksyms_btf to test " Hao Luo
2020-08-20 17:28 ` Yonghong Song
2020-08-21 23:03 ` Andrii Nakryiko
2020-08-22 7:26 ` Hao Luo
2020-08-22 7:35 ` Andrii Nakryiko
2020-08-19 22:40 ` Hao Luo [this message]
2020-08-22 3:26 ` [PATCH bpf-next v1 6/8] bpf: Introduce bpf_per_cpu_ptr() Andrii Nakryiko
2020-08-22 3:31 ` Andrii Nakryiko
2020-08-22 7:49 ` Hao Luo
2020-08-22 7:55 ` Andrii Nakryiko
2020-08-25 1:03 ` Hao Luo
2020-08-19 22:40 ` [PATCH bpf-next v1 7/8] bpf: Propagate bpf_per_cpu_ptr() to /tools Hao Luo
2020-08-20 17:30 ` Yonghong Song
2020-08-19 22:40 ` [PATCH bpf-next v1 8/8] bpf/selftests: Test for bpf_per_cpu_ptr() Hao Luo
2020-08-22 3:30 ` Andrii Nakryiko
2020-08-28 3:42 ` Hao Luo
2020-09-01 18:12 ` Andrii Nakryiko
2020-09-01 19:47 ` Hao Luo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200819224030.1615203-7-haoluo@google.com \
--to=haoluo@google.com \
--cc=andriin@fb.com \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=jakub@cloudflare.com \
--cc=john.fastabend@gmail.com \
--cc=kafai@fb.com \
--cc=kpsingh@chromium.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=quentin@isovalent.com \
--cc=rdna@fb.com \
--cc=rostedt@goodmis.org \
--cc=shuah@kernel.org \
--cc=songliubraving@fb.com \
--cc=yhs@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).