All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrii Nakryiko <andrii.nakryiko@gmail.com>
To: Shmulik Ladkani <shmulik@metanetworks.com>
Cc: bpf@vger.kernel.org, Daniel Borkmann <daniel@iogearbox.net>,
	John Fastabend <john.fastabend@gmail.com>,
	Joanne Koong <joannelkoong@gmail.com>,
	Alexei Starovoitov <ast@kernel.org>,
	Paul Chaignon <paul@isovalent.com>,
	Shmulik Ladkani <shmulik.ladkani@gmail.com>
Subject: Re: [PATCH v7 bpf-next 2/4] bpf: Support setting variable-length tunnel options
Date: Wed, 21 Sep 2022 17:20:21 -0700	[thread overview]
Message-ID: <CAEf4BzZE+JBDBaMzWa86zuFp67KUa41gGGcJVGW26qguFjmO4w@mail.gmail.com> (raw)
In-Reply-To: <20220911122328.306188-3-shmulik.ladkani@gmail.com>

On Sun, Sep 11, 2022 at 5:23 AM Shmulik Ladkani
<shmulik@metanetworks.com> wrote:
>
> Existing 'bpf_skb_set_tunnel_opt' allows setting tunnel options given
> an option buffer (ARG_PTR_TO_MEM) and the compile-time fixed buffer
> size (ARG_CONST_SIZE).
>
> However, in certain cases we wish to set tunnel options of dynamic
> length.
>
> For example, we have an ebpf program that gets geneve options on
> incoming packets, stores them into a map (using a key representing
> the incoming flow), and later needs to assign *same* options to
> reply packets (belonging to same flow).
>
> This is currently imposssible without knowing sender's exact geneve
> options length, which unfortunately is dymamic.
>
> Introduce 'bpf_skb_set_tunnel_opt_dynptr'.
>
> This is a variant of 'bpf_skb_set_tunnel_opt' which gets a bpf dynamic
> pointer (ARG_PTR_TO_DYNPTR) parameter whose data points to the options
> buffer to set.
>
> Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
>
> ---
> v3: Avoid 'inline' for the __bpf_skb_set_tunopt helper function
> v4: change API to be based on bpf_dynptr, suggested by John Fastabend <john.fastabend@gmail.com>
> v6: Remove superfluous 'len' from bpf_skb_set_tunnel_opt_dynptr API
>     (rely on dynptr's internal size), suggested by Andrii Nakryiko <andrii.nakryiko@gmail.com>
> ---
>  include/uapi/linux/bpf.h       | 11 +++++++++++
>  net/core/filter.c              | 31 +++++++++++++++++++++++++++++--
>  tools/include/uapi/linux/bpf.h | 11 +++++++++++
>  3 files changed, 51 insertions(+), 2 deletions(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 3df78c56c1bf..ba12f7e1ccb6 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -5387,6 +5387,16 @@ union bpf_attr {
>   *     Return
>   *             Current *ktime*.
>   *
> + * long bpf_skb_set_tunnel_opt_dynptr(struct sk_buff *skb, struct bpf_dynptr *opt)
> + *     Description
> + *             Set tunnel options metadata for the packet associated to *skb*
> + *             to the option data pointed to by the *opt* dynptr.
> + *
> + *             See also the description of the **bpf_skb_get_tunnel_opt**\ ()
> + *             helper for additional information.
> + *     Return
> + *             0 on success, or a negative error in case of failure.
> + *
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -5598,6 +5608,7 @@ union bpf_attr {
>         FN(tcp_raw_check_syncookie_ipv4),       \
>         FN(tcp_raw_check_syncookie_ipv6),       \
>         FN(ktime_get_tai_ns),           \
> +       FN(skb_set_tunnel_opt_dynptr),  \
>         /* */
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> diff --git a/net/core/filter.c b/net/core/filter.c
> index e872f45399b0..1c652936ef86 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -4674,8 +4674,7 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
>         .arg4_type      = ARG_ANYTHING,
>  };
>
> -BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
> -          const u8 *, from, u32, size)
> +static u64 __bpf_skb_set_tunopt(struct sk_buff *skb, const u8 *from, u32 size)
>  {
>         struct ip_tunnel_info *info = skb_tunnel_info(skb);
>         const struct metadata_dst *md = this_cpu_ptr(md_dst);
> @@ -4690,6 +4689,22 @@ BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
>         return 0;
>  }
>
> +BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
> +          const u8 *, from, u32, size)
> +{
> +       return __bpf_skb_set_tunopt(skb, from, size);
> +}
> +
> +BPF_CALL_2(bpf_skb_set_tunnel_opt_dynptr, struct sk_buff *, skb,
> +          struct bpf_dynptr_kern *, ptr)
> +{
> +       const u8 *from = bpf_dynptr_get_data(ptr);
> +
> +       if (unlikely(!from))
> +               return -EFAULT;
> +       return __bpf_skb_set_tunopt(skb, from, bpf_dynptr_get_size(ptr));
> +}
> +
>  static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
>         .func           = bpf_skb_set_tunnel_opt,
>         .gpl_only       = false,
> @@ -4699,6 +4714,14 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
>         .arg3_type      = ARG_CONST_SIZE,
>  };
>
> +static const struct bpf_func_proto bpf_skb_set_tunnel_opt_dynptr_proto = {
> +       .func           = bpf_skb_set_tunnel_opt_dynptr,
> +       .gpl_only       = false,
> +       .ret_type       = RET_INTEGER,
> +       .arg1_type      = ARG_PTR_TO_CTX,
> +       .arg2_type      = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL,

being able to accept only DYNPTR_TYPE_LOCAL is quite limiting. We have
RINGBUF type, as well as soon we'll have MALLOC type. Even with SKB
and XDP types there is a linear area that kernel accept directly.

So if feels better to not specify exact type, accept any type, and
then have internal kernel helpers that will return you direct memory
pointer, if it is readable (i.e., for skb/xdp that would mean data
points to linear part).

So basically exactly the same behavior as bpf_dynptr_data() BPF helper.

Also note that dynptr is now CAP_BPF-only, so you might want to make
sure that your new helper is also CAP_BPF-guarded?

> +};
> +
>  static const struct bpf_func_proto *
>  bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
>  {
> @@ -4719,6 +4742,8 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
>                 return &bpf_skb_set_tunnel_key_proto;
>         case BPF_FUNC_skb_set_tunnel_opt:
>                 return &bpf_skb_set_tunnel_opt_proto;
> +       case BPF_FUNC_skb_set_tunnel_opt_dynptr:
> +               return &bpf_skb_set_tunnel_opt_dynptr_proto;
>         default:
>                 return NULL;
>         }
> @@ -7798,6 +7823,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>         case BPF_FUNC_skb_get_tunnel_opt:
>                 return &bpf_skb_get_tunnel_opt_proto;
>         case BPF_FUNC_skb_set_tunnel_opt:
> +       case BPF_FUNC_skb_set_tunnel_opt_dynptr:
>                 return bpf_get_skb_set_tunnel_proto(func_id);
>         case BPF_FUNC_redirect:
>                 return &bpf_redirect_proto;
> @@ -8145,6 +8171,7 @@ lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>         case BPF_FUNC_skb_get_tunnel_opt:
>                 return &bpf_skb_get_tunnel_opt_proto;
>         case BPF_FUNC_skb_set_tunnel_opt:
> +       case BPF_FUNC_skb_set_tunnel_opt_dynptr:
>                 return bpf_get_skb_set_tunnel_proto(func_id);
>         case BPF_FUNC_redirect:
>                 return &bpf_redirect_proto;
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 3df78c56c1bf..ba12f7e1ccb6 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -5387,6 +5387,16 @@ union bpf_attr {
>   *     Return
>   *             Current *ktime*.
>   *
> + * long bpf_skb_set_tunnel_opt_dynptr(struct sk_buff *skb, struct bpf_dynptr *opt)
> + *     Description
> + *             Set tunnel options metadata for the packet associated to *skb*
> + *             to the option data pointed to by the *opt* dynptr.
> + *
> + *             See also the description of the **bpf_skb_get_tunnel_opt**\ ()
> + *             helper for additional information.
> + *     Return
> + *             0 on success, or a negative error in case of failure.
> + *
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -5598,6 +5608,7 @@ union bpf_attr {
>         FN(tcp_raw_check_syncookie_ipv4),       \
>         FN(tcp_raw_check_syncookie_ipv6),       \
>         FN(ktime_get_tai_ns),           \
> +       FN(skb_set_tunnel_opt_dynptr),  \
>         /* */
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> --
> 2.37.3
>

  reply	other threads:[~2022-09-22  0:20 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-11 12:23 [PATCH v7 bpf-next 0/4] bpf: Support setting variable-length tunnel options Shmulik Ladkani
2022-09-11 12:23 ` [PATCH v7 bpf-next 1/4] bpf: Export 'bpf_dynptr_get_data, bpf_dynptr_get_size' helpers Shmulik Ladkani
2022-09-20  2:32   ` Yonghong Song
2022-09-21  8:38   ` Hou Tao
2022-09-21 21:27     ` Kumar Kartikeya Dwivedi
2022-09-11 12:23 ` [PATCH v7 bpf-next 2/4] bpf: Support setting variable-length tunnel options Shmulik Ladkani
2022-09-22  0:20   ` Andrii Nakryiko [this message]
2022-09-11 12:23 ` [PATCH v7 bpf-next 3/4] selftests/bpf: Simplify test_tunnel setup for allowing non-local tunnel traffic Shmulik Ladkani
2022-09-11 12:23 ` [PATCH v7 bpf-next 4/4] selftests/bpf: Add geneve with bpf_skb_set_tunnel_opt_dynptr test-case to test_progs Shmulik Ladkani
2022-09-20  2:58   ` Yonghong Song
2022-09-20  5:22     ` Shmulik Ladkani
2022-09-21  6:11       ` Yonghong Song

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAEf4BzZE+JBDBaMzWa86zuFp67KUa41gGGcJVGW26qguFjmO4w@mail.gmail.com \
    --to=andrii.nakryiko@gmail.com \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=joannelkoong@gmail.com \
    --cc=john.fastabend@gmail.com \
    --cc=paul@isovalent.com \
    --cc=shmulik.ladkani@gmail.com \
    --cc=shmulik@metanetworks.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.