All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexei Starovoitov <ast@fb.com>
To: Yonghong Song <yhs@fb.com>, <daniel@iogearbox.net>,
	<netdev@vger.kernel.org>
Cc: <kernel-team@fb.com>
Subject: Re: [RFC PATCH bpf-next 2/6] bpf: add bpf_get_stack helper
Date: Sun, 8 Apr 2018 20:34:07 -0700	[thread overview]
Message-ID: <e532530c-d5ea-a889-6467-1d8dd2b4f098@fb.com> (raw)
In-Reply-To: <20180406214846.916265-3-yhs@fb.com>

On 4/6/18 2:48 PM, Yonghong Song wrote:
> Currently, stackmap and bpf_get_stackid helper are provided
> for bpf program to get the stack trace. This approach has
> a limitation though. If two stack traces have the same hash,
> only one will get stored in the stackmap table,
> so some stack traces are missing from user perspective.
>
> This patch implements a new helper, bpf_get_stack, will
> send stack traces directly to bpf program. The bpf program
> is able to see all stack traces, and then can do in-kernel
> processing or send stack traces to user space through
> shared map or bpf_perf_event_output.
>
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---
>  include/linux/bpf.h      |  1 +
>  include/linux/filter.h   |  3 ++-
>  include/uapi/linux/bpf.h | 17 +++++++++++++--
>  kernel/bpf/stackmap.c    | 56 ++++++++++++++++++++++++++++++++++++++++++++++++
>  kernel/bpf/syscall.c     | 12 ++++++++++-
>  kernel/bpf/verifier.c    |  3 +++
>  kernel/trace/bpf_trace.c | 50 +++++++++++++++++++++++++++++++++++++++++-
>  7 files changed, 137 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 95a7abd..72ccb9a 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -676,6 +676,7 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto;
>  extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
>  extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
>  extern const struct bpf_func_proto bpf_get_stackid_proto;
> +extern const struct bpf_func_proto bpf_get_stack_proto;
>  extern const struct bpf_func_proto bpf_sock_map_update_proto;
>
>  /* Shared helpers among cBPF and eBPF. */
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index fc4e8f9..9b64f63 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -467,7 +467,8 @@ struct bpf_prog {
>  				dst_needed:1,	/* Do we need dst entry? */
>  				blinded:1,	/* Was blinded */
>  				is_func:1,	/* program is a bpf function */
> -				kprobe_override:1; /* Do we override a kprobe? */
> +				kprobe_override:1, /* Do we override a kprobe? */
> +				need_callchain_buf:1; /* Needs callchain buffer? */
>  	enum bpf_prog_type	type;		/* Type of BPF program */
>  	enum bpf_attach_type	expected_attach_type; /* For some prog types */
>  	u32			len;		/* Number of filter blocks */
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index c5ec897..a4ff5b7 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -517,6 +517,17 @@ union bpf_attr {
>   *             other bits - reserved
>   *     Return: >= 0 stackid on success or negative error
>   *
> + * int bpf_get_stack(ctx, buf, size, flags)
> + *     walk user or kernel stack and store the ips in buf
> + *     @ctx: struct pt_regs*
> + *     @buf: user buffer to fill stack
> + *     @size: the buf size
> + *     @flags: bits 0-7 - numer of stack frames to skip
> + *             bit 8 - collect user stack instead of kernel
> + *             bit 11 - get build-id as well if user stack
> + *             other bits - reserved
> + *     Return: >= 0 size copied on success or negative error
> + *
>   * s64 bpf_csum_diff(from, from_size, to, to_size, seed)
>   *     calculate csum diff
>   *     @from: raw from buffer
> @@ -821,7 +832,8 @@ union bpf_attr {
>  	FN(msg_apply_bytes),		\
>  	FN(msg_cork_bytes),		\
>  	FN(msg_pull_data),		\
> -	FN(bind),
> +	FN(bind),			\
> +	FN(get_stack),
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> @@ -855,11 +867,12 @@ enum bpf_func_id {
>  /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
>  #define BPF_F_TUNINFO_IPV6		(1ULL << 0)
>
> -/* BPF_FUNC_get_stackid flags. */
> +/* BPF_FUNC_get_stackid and BPF_FUNC_get_stack flags. */
>  #define BPF_F_SKIP_FIELD_MASK		0xffULL
>  #define BPF_F_USER_STACK		(1ULL << 8)
>  #define BPF_F_FAST_STACK_CMP		(1ULL << 9)
>  #define BPF_F_REUSE_STACKID		(1ULL << 10)
> +#define BPF_F_USER_BUILD_ID		(1ULL << 11)

the comment above is not quite correct.
This new flag is only available for new helper.

>
>  /* BPF_FUNC_skb_set_tunnel_key flags. */
>  #define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
> index 04f6ec1..371c72e 100644
> --- a/kernel/bpf/stackmap.c
> +++ b/kernel/bpf/stackmap.c
> @@ -402,6 +402,62 @@ const struct bpf_func_proto bpf_get_stackid_proto = {
>  	.arg3_type	= ARG_ANYTHING,
>  };
>
> +BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
> +	   u64, flags)
> +{
> +	u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
> +	bool user_build_id = flags & BPF_F_USER_BUILD_ID;
> +	u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
> +	bool user = flags & BPF_F_USER_STACK;
> +	struct perf_callchain_entry *trace;
> +	bool kernel = !user;
> +	u64 *ips;
> +
> +	if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
> +			       BPF_F_USER_BUILD_ID)))
> +		return -EINVAL;
> +
> +	elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id)
> +					    : sizeof(u64);
> +	if (unlikely(size % elem_size))
> +		return -EINVAL;
> +
> +	num_elem = size / elem_size;
> +	if (sysctl_perf_event_max_stack < num_elem)
> +		init_nr = 0;

prog's buffer should be zero padded in this case since it
points to uninit_mem.

> +	else
> +		init_nr = sysctl_perf_event_max_stack - num_elem;
> +	trace = get_perf_callchain(regs, init_nr, kernel, user,
> +				   sysctl_perf_event_max_stack, false, false);
> +	if (unlikely(!trace))
> +		return -EFAULT;
> +
> +	trace_nr = trace->nr - init_nr;
> +	if (trace_nr <= skip)
> +		return -EFAULT;
> +
> +	trace_nr -= skip;
> +	trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem;
> +	copy_len = trace_nr * elem_size;
> +	ips = trace->ip + skip + init_nr;
> +	if (user && user_build_id)

the combination of kern + user_build_id should probably be rejected
earlier with einval.

> +		stack_map_get_build_id_offset(buf, ips, trace_nr, user);
> +	else
> +		memcpy(buf, ips, copy_len);
> +
> +	return copy_len;
> +}
> +
> +const struct bpf_func_proto bpf_get_stack_proto = {
> +	.func		= bpf_get_stack,
> +	.gpl_only	= true,
> +	.ret_type	= RET_INTEGER,
> +	.arg1_type	= ARG_PTR_TO_CTX,
> +	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
> +	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,

why allow zero size?
I'm not sure the helper will work correctly when size=0

> +	.arg4_type	= ARG_ANYTHING,
> +};
> +
>  /* Called from eBPF program */
>  static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
>  {
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 0244973..2aa3a65 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -984,10 +984,13 @@ void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
>  static void __bpf_prog_put_rcu(struct rcu_head *rcu)
>  {
>  	struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
> +	bool need_callchain_buf = aux->prog->need_callchain_buf;
>
>  	free_used_maps(aux);
>  	bpf_prog_uncharge_memlock(aux->prog);
>  	security_bpf_prog_free(aux);
> +	if (need_callchain_buf)
> +		put_callchain_buffers();
>  	bpf_prog_free(aux->prog);
>  }
>
> @@ -1004,7 +1007,8 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
>  			bpf_prog_kallsyms_del(prog->aux->func[i]);
>  		bpf_prog_kallsyms_del(prog);
>
> -		call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
> +		synchronize_rcu();
> +		__bpf_prog_put_rcu(&prog->aux->rcu);

there should have been lockdep splat.
We cannot call synchronize_rcu here, since we cannot sleep
in some cases.

  reply	other threads:[~2018-04-09  3:34 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-06 21:48 [RFC PATCH bpf-next 0/6] bpf: add bpf_get_stack_helper Yonghong Song
2018-04-06 21:48 ` [RFC PATCH bpf-next 1/6] bpf: change prototype for stack_map_get_build_id_offset Yonghong Song
2018-04-06 21:48 ` [RFC PATCH bpf-next 2/6] bpf: add bpf_get_stack helper Yonghong Song
2018-04-09  3:34   ` Alexei Starovoitov [this message]
2018-04-09  4:53     ` Yonghong Song
2018-04-09  5:02       ` Alexei Starovoitov
2018-04-09 10:01         ` Daniel Borkmann
2018-04-09 16:52           ` Yonghong Song
2018-04-06 21:48 ` [RFC PATCH bpf-next 3/6] tools/bpf: add bpf_get_stack helper to tools headers Yonghong Song
2018-04-06 21:48 ` [RFC PATCH bpf-next 4/6] samples/bpf: move common-purpose perf_event functions to bpf_load.c Yonghong Song
2018-04-06 21:48 ` [RFC PATCH bpf-next 5/6] samples/bpf: add a test for bpf_get_stack helper Yonghong Song
2018-04-06 21:48 ` [RFC PATCH bpf-next 6/6] tools/bpf: add a test case " Yonghong Song

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e532530c-d5ea-a889-6467-1d8dd2b4f098@fb.com \
    --to=ast@fb.com \
    --cc=daniel@iogearbox.net \
    --cc=kernel-team@fb.com \
    --cc=netdev@vger.kernel.org \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.