Re: [PATCH bpf-next 2/3] bpf: Add get_func_[arg|ret|arg_cnt] helpers

From: Andrii Nakryiko <andriin@fb.com>
To: Jiri Olsa <jolsa@redhat.com>, Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>
Cc: <netdev@vger.kernel.org>, <bpf@vger.kernel.org>,
	Martin KaFai Lau <kafai@fb.com>, Song Liu <songliubraving@fb.com>,
	Yonghong Song <yhs@fb.com>,
	John Fastabend <john.fastabend@gmail.com>,
	KP Singh <kpsingh@chromium.org>
Subject: Re: [PATCH bpf-next 2/3] bpf: Add get_func_[arg|ret|arg_cnt] helpers
Date: Mon, 6 Dec 2021 13:54:53 -0800	[thread overview]
Message-ID: <f1cf12b9-01f5-f980-a349-1cbcd1124409@fb.com> (raw)
In-Reply-To: <20211204140700.396138-3-jolsa@kernel.org>

On 12/4/21 6:06 AM, Jiri Olsa wrote:
> Adding following helpers for tracing programs:
>
> Get n-th argument of the traced function:
>    long bpf_get_func_arg(void *ctx, u32 n, u64 *value)
>
> Get return value of the traced function:
>    long bpf_get_func_ret(void *ctx, u64 *value)
>
> Get arguments count of the traced funtion:
>    long bpf_get_func_arg_cnt(void *ctx)
>
> The trampoline now stores number of arguments on ctx-8
> address, so it's easy to verify argument index and find
> return value argument's position.
>
> Moving function ip address on the trampoline stack behind
> the number of functions arguments, so it's now stored on
> ctx-16 address if it's needed.
>
> All helpers above are inlined by verifier.
>
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---

Please cc me at andrii@kernel.org email for future emails, you'll save a 
lot of trouble with replying to your emails :) Thanks!

>   arch/x86/net/bpf_jit_comp.c    | 15 ++++++-
>   include/uapi/linux/bpf.h       | 28 +++++++++++++
>   kernel/bpf/verifier.c          | 73 +++++++++++++++++++++++++++++++++-
>   kernel/trace/bpf_trace.c       | 58 ++++++++++++++++++++++++++-
>   tools/include/uapi/linux/bpf.h | 28 +++++++++++++
>   5 files changed, 198 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index b106e80e8d9c..142e6b90fa52 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -1941,7 +1941,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
>   				void *orig_call)
>   {
>   	int ret, i, nr_args = m->nr_args;
> -	int regs_off, ip_off, stack_size = nr_args * 8;
> +	int regs_off, ip_off, args_off, stack_size = nr_args * 8;
>   	struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY];
>   	struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT];
>   	struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN];
> @@ -1968,6 +1968,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
>   	 *                 [ ...             ]
>   	 * RBP - regs_off  [ reg_arg1        ]
>   	 *
> +	 * RBP - args_off  [ args count      ]  always
> +	 *
>   	 * RBP - ip_off    [ traced function ]  BPF_TRAMP_F_IP_ARG flag
>   	 */
>   
> @@ -1978,6 +1980,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
>   
>   	regs_off = stack_size;
>   
> +	/* args count  */
> +	stack_size += 8;
> +	args_off = stack_size;
> +
>   	if (flags & BPF_TRAMP_F_IP_ARG)
>   		stack_size += 8; /* room for IP address argument */
>   
> @@ -1996,6 +2002,13 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
>   	EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */
>   	EMIT1(0x53);		 /* push rbx */
>   
> +	/* Store number of arguments of the traced function:
> +	 *   mov rax, nr_args
> +	 *   mov QWORD PTR [rbp - args_off], rax
> +	 */
> +	emit_mov_imm64(&prog, BPF_REG_0, 0, (u32) nr_args);
> +	emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -args_off);
> +
>   	if (flags & BPF_TRAMP_F_IP_ARG) {
>   		/* Store IP address of the traced function:
>   		 * mov rax, QWORD PTR [rbp + 8]
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index c26871263f1f..d5a3791071d6 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -4983,6 +4983,31 @@ union bpf_attr {
>    *	Return
>    *		The number of loops performed, **-EINVAL** for invalid **flags**,
>    *		**-E2BIG** if **nr_loops** exceeds the maximum number of loops.
> + *
> + * long bpf_get_func_arg(void *ctx, u32 n, u64 *value)
> + *	Description
> + *		Get **n**-th argument (zero based) of the traced function (for tracing programs)
> + *		returned in **value**.
> + *
> + *	Return
> + *		0 on success.
> + *		**-EINVAL** if n >= arguments count of traced function.
> + *
> + * long bpf_get_func_ret(void *ctx, u64 *value)
> + *	Description
> + *		Get return value of the traced function (for tracing programs)
> + *		in **value**.
> + *
> + *	Return
> + *		0 on success.
> + *		**-EINVAL** for tracing programs other than BPF_TRACE_FEXIT or BPF_MODIFY_RETURN.

-EOPNOTSUPP maybe?

> + *
> + * long bpf_get_func_arg_cnt(void *ctx)
> + *	Description
> + *		Get number of arguments of the traced function (for tracing programs).
> + *
> + *	Return
> + *		The number of arguments of the traced function.
>    */
>   #define __BPF_FUNC_MAPPER(FN)		\
>   	FN(unspec),			\
> @@ -5167,6 +5192,9 @@ union bpf_attr {
>   	FN(kallsyms_lookup_name),	\
>   	FN(find_vma),			\
>   	FN(loop),			\
> +	FN(get_func_arg),		\
> +	FN(get_func_ret),		\
> +	FN(get_func_arg_cnt),		\
>   	/* */
>   
>   /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 6522ffdea487..cf6853d3a8e9 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -12974,6 +12974,7 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env,
>   static int do_misc_fixups(struct bpf_verifier_env *env)
>   {
>   	struct bpf_prog *prog = env->prog;
> +	enum bpf_attach_type eatype = prog->expected_attach_type;
>   	bool expect_blinding = bpf_jit_blinding_enabled(prog);
>   	enum bpf_prog_type prog_type = resolve_prog_type(prog);
>   	struct bpf_insn *insn = prog->insnsi;
> @@ -13344,11 +13345,79 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
>   			continue;
>   		}
>   
> +		/* Implement bpf_get_func_arg inline. */
> +		if (prog_type == BPF_PROG_TYPE_TRACING &&
> +		    insn->imm == BPF_FUNC_get_func_arg) {
> +			/* Load nr_args from ctx - 8 */
> +			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
> +			insn_buf[1] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
> +			insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
> +			insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
> +			insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
> +			insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
> +			insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
> +			insn_buf[7] = BPF_JMP_A(1);
> +			insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
> +			cnt = 9;
> +
> +			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
> +			if (!new_prog)
> +				return -ENOMEM;
> +
> +			delta    += cnt - 1;
> +			env->prog = prog = new_prog;
> +			insn      = new_prog->insnsi + i + delta;
> +			continue;
> +		}
> +
> +		/* Implement bpf_get_func_ret inline. */
> +		if (prog_type == BPF_PROG_TYPE_TRACING &&
> +		    insn->imm == BPF_FUNC_get_func_ret) {
> +			if (eatype == BPF_TRACE_FEXIT ||
> +			    eatype == BPF_MODIFY_RETURN) {
> +				/* Load nr_args from ctx - 8 */
> +				insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
> +				insn_buf[1] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
> +				insn_buf[2] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
> +				insn_buf[3] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
> +				insn_buf[4] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
> +				insn_buf[5] = BPF_MOV64_IMM(BPF_REG_0, 0);
> +				cnt = 6;
> +			} else {
> +				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
> +				cnt = 1;
> +			}
> +
> +			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
> +			if (!new_prog)
> +				return -ENOMEM;
> +
> +			delta    += cnt - 1;
> +			env->prog = prog = new_prog;
> +			insn      = new_prog->insnsi + i + delta;
> +			continue;
> +		}
> +
> +		/* Implement get_func_arg_cnt inline. */
> +		if (prog_type == BPF_PROG_TYPE_TRACING &&
> +		    insn->imm == BPF_FUNC_get_func_arg_cnt) {
> +			/* Load nr_args from ctx - 8 */
> +			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
> +
> +			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
> +			if (!new_prog)
> +				return -ENOMEM;
> +
> +			env->prog = prog = new_prog;
> +			insn      = new_prog->insnsi + i + delta;
> +			continue;
> +		}

To be entirely honest, I'm not even sure we need to inline them. In 
programs that care about performance they will be called at most once. 
In others it doesn't matter. But even if they weren't, is the function 
call really such a big overhead for tracing cases? I don't mind it 
either, I just can hardly follow it.

> +
>   		/* Implement bpf_get_func_ip inline. */
>   		if (prog_type == BPF_PROG_TYPE_TRACING &&
>   		    insn->imm == BPF_FUNC_get_func_ip) {
> -			/* Load IP address from ctx - 8 */
> -			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
> +			/* Load IP address from ctx - 16 */
> +			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16);
>   
>   			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
>   			if (!new_prog)

[...]