All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexei Starovoitov <alexei.starovoitov@gmail.com>
To: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: bpf@vger.kernel.org, "Alexei Starovoitov" <ast@kernel.org>,
	"Daniel Borkmann" <daniel@iogearbox.net>,
	"Andrii Nakryiko" <andrii@kernel.org>,
	"Martin KaFai Lau" <kafai@fb.com>,
	"Song Liu" <songliubraving@fb.com>, "Yonghong Song" <yhs@fb.com>,
	"Jesper Dangaard Brouer" <brouer@redhat.com>,
	"Toke Høiland-Jørgensen" <toke@redhat.com>,
	netdev@vger.kernel.org
Subject: Re: [PATCH bpf-next v6 8/9] libbpf: Update gen_loader to emit BTF_KIND_FUNC relocations
Date: Fri, 1 Oct 2021 12:56:46 -0700	[thread overview]
Message-ID: <20211001195646.7d5ycynrsivn3zxv@ast-mbp.dhcp.thefacebook.com> (raw)
In-Reply-To: <20210930062948.1843919-9-memxor@gmail.com>

On Thu, Sep 30, 2021 at 11:59:47AM +0530, Kumar Kartikeya Dwivedi wrote:
> This change updates the BPF syscall loader to relocate BTF_KIND_FUNC
> relocations, with support for weak kfunc relocations. The general idea
> is to move map_fds to loader map, and also use the data for storing
> kfunc BTF fds. Since both reuse the fd_array parameter, they need to be
> kept together.
> 
> For map_fds, we reserve MAX_USED_MAPS slots in a region, and for kfunc,
> we reserve MAX_KFUNC_DESCS. This is done so that insn->off has more
> chances of being <= INT16_MAX than treating data map as a sparse array
> and adding fd as needed.
> 
> When the MAX_KFUNC_DESCS limit is reached, we fall back to the sparse
> array model, so that as long as it does remain <= INT16_MAX, we pass an
> index relative to the start of fd_array.
> 
> We store all ksyms in an array where we try to avoid calling the
> bpf_btf_find_by_name_kind helper, and also reuse the BTF fd that was
> already stored. This also speeds up the loading process compared to
> emitting calls in all cases, in later tests.
> 
> Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
> ---
>  tools/lib/bpf/bpf_gen_internal.h |  16 +-
>  tools/lib/bpf/gen_loader.c       | 323 ++++++++++++++++++++++++++-----
>  tools/lib/bpf/libbpf.c           |   8 +-
>  3 files changed, 292 insertions(+), 55 deletions(-)
> 
> diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h
> index 615400391e57..70eccbffefb1 100644
> --- a/tools/lib/bpf/bpf_gen_internal.h
> +++ b/tools/lib/bpf/bpf_gen_internal.h
> @@ -7,6 +7,15 @@ struct ksym_relo_desc {
>  	const char *name;
>  	int kind;
>  	int insn_idx;
> +	bool is_weak;
> +};
> +
> +struct ksym_desc {
> +	const char *name;
> +	int ref;
> +	int kind;
> +	int off;
> +	int insn;
>  };
>  
>  struct bpf_gen {
> @@ -24,6 +33,10 @@ struct bpf_gen {
>  	int relo_cnt;
>  	char attach_target[128];
>  	int attach_kind;
> +	struct ksym_desc *ksyms;
> +	__u32 nr_ksyms;
> +	int fd_array;
> +	int nr_fd_array;
>  };
>  
>  void bpf_gen__init(struct bpf_gen *gen, int log_level);
> @@ -36,6 +49,7 @@ void bpf_gen__prog_load(struct bpf_gen *gen, struct bpf_prog_load_params *load_a
>  void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size);
>  void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx);
>  void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type);
> -void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, int insn_idx);
> +void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, int kind,
> +			    int insn_idx);
>  
>  #endif
> diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c
> index 80087b13877f..9902f2f47fb2 100644
> --- a/tools/lib/bpf/gen_loader.c
> +++ b/tools/lib/bpf/gen_loader.c
> @@ -14,8 +14,10 @@
>  #include "bpf_gen_internal.h"
>  #include "skel_internal.h"
>  
> -#define MAX_USED_MAPS 64
> -#define MAX_USED_PROGS 32
> +#define MAX_USED_MAPS	64
> +#define MAX_USED_PROGS	32
> +#define MAX_KFUNC_DESCS 256
> +#define MAX_FD_ARRAY_SZ (MAX_USED_PROGS + MAX_KFUNC_DESCS)
>  
>  /* The following structure describes the stack layout of the loader program.
>   * In addition R6 contains the pointer to context.
> @@ -30,7 +32,6 @@
>   */
>  struct loader_stack {
>  	__u32 btf_fd;
> -	__u32 map_fd[MAX_USED_MAPS];
>  	__u32 prog_fd[MAX_USED_PROGS];
>  	__u32 inner_map_fd;
>  };
> @@ -143,13 +144,58 @@ static int add_data(struct bpf_gen *gen, const void *data, __u32 size)
>  	if (realloc_data_buf(gen, size8))
>  		return 0;
>  	prev = gen->data_cur;
> -	memcpy(gen->data_cur, data, size);
> +	if (data)
> +		memcpy(gen->data_cur, data, size);

Manpage says that realloc doesn't init the memory.
Lets zero it here instead of leaving uninitialized.

>  	gen->data_cur += size;
>  	memcpy(gen->data_cur, &zero, size8 - size);
>  	gen->data_cur += size8 - size;
>  	return prev - gen->data_start;
>  }
>  
> +/* Get index for map_fd/btf_fd slot in reserved fd_array, or in data relative
> + * to start of fd_array. Caller can decide if it is usable or not.
> + */
> +static int fd_array_init(struct bpf_gen *gen)
> +{
> +	if (!gen->fd_array)
> +		gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int));
> +	return gen->fd_array;
> +}
> +
> +static int add_map_fd(struct bpf_gen *gen)
> +{
> +	if (!fd_array_init(gen))
> +		return -1;

Pls drop the error check here and other places.
In out-of-mem situation gen->error will be set and all subsequent ops
will be using zero. The final check is in finish().

> +	if (gen->nr_maps == MAX_USED_MAPS) {
> +		pr_warn("Total maps exceeds %d\n", MAX_USED_MAPS);
> +		gen->error = -E2BIG;
> +		return -1;
> +	}
> +	return gen->nr_maps++;
> +}
> +
> +static int add_kfunc_btf_fd(struct bpf_gen *gen)
> +{
> +	int cur;
> +
> +	if (!fd_array_init(gen))
> +		return -1;

drop it.

> +	if (gen->nr_fd_array == MAX_KFUNC_DESCS) {
> +		cur = add_data(gen, NULL, sizeof(int));
> +		if (!cur)
> +			return -1;

drop it.

> +		return (cur - gen->fd_array) / sizeof(int);
> +	}
> +	return MAX_USED_MAPS + gen->nr_fd_array++;
> +}
> +
> +static int blob_fd_array_off(struct bpf_gen *gen, int index)
> +{
> +	if (!gen->fd_array)
> +		return 0;

drop it.

> +	return gen->fd_array + (index * sizeof(int));

no need for extra ()

> +}
> +
>  static int insn_bytes_to_bpf_size(__u32 sz)
>  {
>  	switch (sz) {
> @@ -171,14 +217,22 @@ static void emit_rel_store(struct bpf_gen *gen, int off, int data)
>  	emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0));
>  }
>  
> -/* *(u64 *)(blob + off) = (u64)(void *)(%sp + stack_off) */
> -static void emit_rel_store_sp(struct bpf_gen *gen, int off, int stack_off)
> +static void move_blob2blob(struct bpf_gen *gen, int off, int size, int blob_off)
>  {
> -	emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_10));
> -	emit(gen, BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, stack_off));
> +	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_2, BPF_PSEUDO_MAP_IDX_VALUE,
> +					 0, 0, 0, blob_off));
> +	emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_2, 0));
>  	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
>  					 0, 0, 0, off));
> -	emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0));
> +	emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0));
> +}
> +
> +static void move_blob2ctx(struct bpf_gen *gen, int ctx_off, int size, int blob_off)
> +{
> +	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
> +					 0, 0, 0, blob_off));
> +	emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_1, 0));
> +	emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_6, BPF_REG_0, ctx_off));
>  }
>  
>  static void move_ctx2blob(struct bpf_gen *gen, int off, int size, int ctx_off,
> @@ -326,11 +380,11 @@ int bpf_gen__finish(struct bpf_gen *gen)
>  			       offsetof(struct bpf_prog_desc, prog_fd), 4,
>  			       stack_off(prog_fd[i]));
>  	for (i = 0; i < gen->nr_maps; i++)
> -		move_stack2ctx(gen,
> -			       sizeof(struct bpf_loader_ctx) +
> -			       sizeof(struct bpf_map_desc) * i +
> -			       offsetof(struct bpf_map_desc, map_fd), 4,
> -			       stack_off(map_fd[i]));
> +		move_blob2ctx(gen,
> +			      sizeof(struct bpf_loader_ctx) +
> +			      sizeof(struct bpf_map_desc) * i +
> +			      offsetof(struct bpf_map_desc, map_fd), 4,
> +			      blob_fd_array_off(gen, i));
>  	emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0));
>  	emit(gen, BPF_EXIT_INSN());
>  	pr_debug("gen: finish %d\n", gen->error);
> @@ -390,7 +444,7 @@ void bpf_gen__map_create(struct bpf_gen *gen,
>  {
>  	int attr_size = offsetofend(union bpf_attr, btf_vmlinux_value_type_id);
>  	bool close_inner_map_fd = false;
> -	int map_create_attr;
> +	int map_create_attr, idx;
>  	union bpf_attr attr;
>  
>  	memset(&attr, 0, attr_size);
> @@ -467,9 +521,13 @@ void bpf_gen__map_create(struct bpf_gen *gen,
>  		gen->error = -EDOM; /* internal bug */
>  		return;
>  	} else {
> -		emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7,
> -				      stack_off(map_fd[map_idx])));
> -		gen->nr_maps++;
> +		/* add_map_fd does gen->nr_maps++ */
> +		idx = add_map_fd(gen);
> +		if (idx < 0)
> +			return;

drop it.

> +		emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
> +						 0, 0, 0, blob_fd_array_off(gen, idx)));
> +		emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7, 0));
>  	}
>  	if (close_inner_map_fd)
>  		emit_sys_close_stack(gen, stack_off(inner_map_fd));
> @@ -511,8 +569,8 @@ static void emit_find_attach_target(struct bpf_gen *gen)
>  	 */
>  }
>  
> -void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind,
> -			    int insn_idx)
> +void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak,
> +			    int kind, int insn_idx)
>  {
>  	struct ksym_relo_desc *relo;
>  
> @@ -524,38 +582,196 @@ void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind,
>  	gen->relos = relo;
>  	relo += gen->relo_cnt;
>  	relo->name = name;
> +	relo->is_weak = is_weak;
>  	relo->kind = kind;
>  	relo->insn_idx = insn_idx;
>  	gen->relo_cnt++;
>  }
>  
> -static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns)
> +/* returns existing ksym_desc with ref incremented, or inserts a new one */
> +static struct ksym_desc *get_ksym_desc(struct bpf_gen *gen, struct ksym_relo_desc *relo)
>  {
> -	int name, insn, len = strlen(relo->name) + 1;
> +	struct ksym_desc *kdesc;
>  
> -	pr_debug("gen: emit_relo: %s at %d\n", relo->name, relo->insn_idx);
> -	name = add_data(gen, relo->name, len);
> +	for (int i = 0; i < gen->nr_ksyms; i++) {
> +		if (!strcmp(gen->ksyms[i].name, relo->name)) {
> +			gen->ksyms[i].ref++;
> +			return &gen->ksyms[i];
> +		}
> +	}
> +	kdesc = libbpf_reallocarray(gen->ksyms, gen->nr_ksyms + 1, sizeof(*kdesc));
> +	if (!kdesc) {
> +		gen->error = -ENOMEM;
> +		return NULL;
> +	}
> +	gen->ksyms = kdesc;
> +	kdesc = &gen->ksyms[gen->nr_ksyms++];
> +	kdesc->name = relo->name;
> +	kdesc->kind = relo->kind;
> +	kdesc->ref = 1;
> +	kdesc->off = 0;
> +	kdesc->insn = 0;
> +	return kdesc;
> +}
> +
> +/* Overwrites BPF_REG_{0, 1, 2, 3, 4, 7}
> + * Returns result in BPF_REG_7
> + */
> +static void emit_bpf_find_by_name_kind(struct bpf_gen *gen, struct ksym_relo_desc *relo)
> +{
> +	int name_off, len = strlen(relo->name) + 1;
>  
> +	name_off = add_data(gen, relo->name, len);
> +	if (!name_off)
> +		return;

drop it.

>  	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
> -					 0, 0, 0, name));
> +					 0, 0, 0, name_off));
>  	emit(gen, BPF_MOV64_IMM(BPF_REG_2, len));
>  	emit(gen, BPF_MOV64_IMM(BPF_REG_3, relo->kind));
>  	emit(gen, BPF_MOV64_IMM(BPF_REG_4, 0));
>  	emit(gen, BPF_EMIT_CALL(BPF_FUNC_btf_find_by_name_kind));
>  	emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0));
>  	debug_ret(gen, "find_by_name_kind(%s,%d)", relo->name, relo->kind);
> -	emit_check_err(gen);
> +}
> +
> +/* Expects:
> + * BPF_REG_8 - pointer to instruction
> + *
> + * We need to reuse BTF fd for same symbol otherwise each relocation takes a new
> + * index, while kernel limits total kfunc BTFs to 256. For duplicate symbols,
> + * this would mean a new BTF fd index for each entry. By pairing symbol name
> + * with index, we get the insn->imm, insn->off pairing that kernel uses for
> + * kfunc_tab, which becomes the effective limit even though all of them may
> + * share same index in fd_array (such that kfunc_btf_tab has 1 element).
> + */
> +static void emit_relo_kfunc_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn)
> +{
> +	struct ksym_desc *kdesc;
> +	int btf_fd_idx;
> +
> +	kdesc = get_ksym_desc(gen, relo);
> +	if (!kdesc)
> +		return;
> +	/* try to copy from existing bpf_insn */
> +	if (kdesc->ref > 1) {
> +		move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4,
> +			       kdesc->insn + offsetof(struct bpf_insn, imm));
> +		move_blob2blob(gen, insn + offsetof(struct bpf_insn, off), 2,
> +			       kdesc->insn + offsetof(struct bpf_insn, off));
> +		goto log;
> +	}
> +	/* remember insn offset, so we can copy BTF ID and FD later */
> +	kdesc->insn = insn;
> +	emit_bpf_find_by_name_kind(gen, relo);
> +	if (!relo->is_weak)
> +		emit_check_err(gen);
> +	/* get index in fd_array to store BTF FD at */
> +	btf_fd_idx = add_kfunc_btf_fd(gen);
> +	if (btf_fd_idx < 0)
> +		return;

drop it.

> +	if (btf_fd_idx > INT16_MAX) {
> +		pr_warn("BTF fd off %d for kfunc %s exceeds INT16_MAX, cannot process relocation\n",
> +			btf_fd_idx, relo->name);
> +		gen->error = -E2BIG;
> +		return;

this one is necessary. keep it.

  reply	other threads:[~2021-10-01 19:56 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-30  6:29 [PATCH bpf-next v6 0/9] Support kernel module function calls from eBPF Kumar Kartikeya Dwivedi
2021-09-30  6:29 ` [PATCH bpf-next v6 1/9] bpf: Introduce BPF support for kernel module function calls Kumar Kartikeya Dwivedi
2021-09-30  6:29 ` [PATCH bpf-next v6 2/9] bpf: Be conservative while processing invalid kfunc calls Kumar Kartikeya Dwivedi
2021-09-30  6:29 ` [PATCH bpf-next v6 3/9] bpf: btf: Introduce helpers for dynamic BTF set registration Kumar Kartikeya Dwivedi
2021-10-01 21:09   ` Andrii Nakryiko
2021-09-30  6:29 ` [PATCH bpf-next v6 4/9] tools: Allow specifying base BTF file in resolve_btfids Kumar Kartikeya Dwivedi
2021-10-01 21:11   ` Andrii Nakryiko
2021-09-30  6:29 ` [PATCH bpf-next v6 5/9] bpf: Enable TCP congestion control kfunc from modules Kumar Kartikeya Dwivedi
2021-10-01 21:14   ` Andrii Nakryiko
2021-09-30  6:29 ` [PATCH bpf-next v6 6/9] libbpf: Support kernel module function calls Kumar Kartikeya Dwivedi
2021-10-01 21:56   ` Andrii Nakryiko
2021-09-30  6:29 ` [PATCH bpf-next v6 7/9] libbpf: Resolve invalid weak kfunc calls with imm = 0, off = 0 Kumar Kartikeya Dwivedi
2021-09-30  6:29 ` [PATCH bpf-next v6 8/9] libbpf: Update gen_loader to emit BTF_KIND_FUNC relocations Kumar Kartikeya Dwivedi
2021-10-01 19:56   ` Alexei Starovoitov [this message]
2021-09-30  6:29 ` [PATCH bpf-next v6 9/9] bpf: selftests: Add selftests for module kfunc support Kumar Kartikeya Dwivedi
2021-10-01 22:13   ` Andrii Nakryiko
2021-10-01 22:16     ` Kumar Kartikeya Dwivedi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211001195646.7d5ycynrsivn3zxv@ast-mbp.dhcp.thefacebook.com \
    --to=alexei.starovoitov@gmail.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=brouer@redhat.com \
    --cc=daniel@iogearbox.net \
    --cc=kafai@fb.com \
    --cc=memxor@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=songliubraving@fb.com \
    --cc=toke@redhat.com \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.