From: Daniel Borkmann <daniel@iogearbox.net>
To: "Toke Høiland-Jørgensen" <toke@redhat.com>
Cc: David Ahern <dsahern@kernel.org>,
netdev@vger.kernel.org, bpf@vger.kernel.org
Subject: Re: [PATCH RFC bpf-next 1/2] bpf_redirect_neigh: Support supplying the nexthop as a helper parameter
Date: Mon, 19 Oct 2020 17:01:12 +0200 [thread overview]
Message-ID: <013e2c8b-13b5-661c-89c5-508b91cd3f4c@iogearbox.net> (raw)
In-Reply-To: <160277680864.157904.8719768977907736015.stgit@toke.dk>
On 10/15/20 5:46 PM, Toke Høiland-Jørgensen wrote:
[...]
> +struct bpf_redir_neigh {
> + /* network family for lookup (AF_INET, AF_INET6)
> + */
> + __u8 nh_family;
> + /* network address of nexthop; skips fib lookup to find gateway */
> + union {
> + __be32 ipv4_nh;
> + __u32 ipv6_nh[4]; /* in6_addr; network order */
> + };
> +};
> +
> enum bpf_task_fd_type {
> BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
> BPF_FD_TYPE_TRACEPOINT, /* tp name */
> diff --git a/net/core/filter.c b/net/core/filter.c
> index c5e2a1c5fd8d..d073031a3a61 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -2165,12 +2165,11 @@ static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
> }
>
> #if IS_ENABLED(CONFIG_IPV6)
> -static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
> +static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
> + struct net_device *dev, const struct in6_addr *nexthop)
> {
> - struct dst_entry *dst = skb_dst(skb);
> - struct net_device *dev = dst->dev;
> u32 hh_len = LL_RESERVED_SPACE(dev);
> - const struct in6_addr *nexthop;
> + struct dst_entry *dst = NULL;
> struct neighbour *neigh;
>
> if (dev_xmit_recursion()) {
> @@ -2196,8 +2195,11 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
> }
>
> rcu_read_lock_bh();
> - nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
> - &ipv6_hdr(skb)->daddr);
> + if (!nexthop) {
> + dst = skb_dst(skb);
> + nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
> + &ipv6_hdr(skb)->daddr);
> + }
> neigh = ip_neigh_gw6(dev, nexthop);
> if (likely(!IS_ERR(neigh))) {
> int ret;
> @@ -2210,36 +2212,46 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
> return ret;
> }
> rcu_read_unlock_bh();
> - IP6_INC_STATS(dev_net(dst->dev),
> - ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
> + if (dst)
> + IP6_INC_STATS(dev_net(dst->dev),
> + ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
> out_drop:
> kfree_skb(skb);
> return -ENETDOWN;
> }
>
> -static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
> +static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
> + struct bpf_nh_params *nh)
> {
> const struct ipv6hdr *ip6h = ipv6_hdr(skb);
> + struct in6_addr *nexthop = NULL;
> struct net *net = dev_net(dev);
> int err, ret = NET_XMIT_DROP;
> - struct dst_entry *dst;
> - struct flowi6 fl6 = {
> - .flowi6_flags = FLOWI_FLAG_ANYSRC,
> - .flowi6_mark = skb->mark,
> - .flowlabel = ip6_flowinfo(ip6h),
> - .flowi6_oif = dev->ifindex,
> - .flowi6_proto = ip6h->nexthdr,
> - .daddr = ip6h->daddr,
> - .saddr = ip6h->saddr,
> - };
>
> - dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
> - if (IS_ERR(dst))
> - goto out_drop;
> + if (!nh->nh_family) {
> + struct dst_entry *dst;
> + struct flowi6 fl6 = {
> + .flowi6_flags = FLOWI_FLAG_ANYSRC,
> + .flowi6_mark = skb->mark,
> + .flowlabel = ip6_flowinfo(ip6h),
> + .flowi6_oif = dev->ifindex,
> + .flowi6_proto = ip6h->nexthdr,
> + .daddr = ip6h->daddr,
> + .saddr = ip6h->saddr,
nit: Would be good for readability to keep the previous whitespace alignment intact.
> + };
> +
> + dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
> + if (IS_ERR(dst))
> + goto out_drop;
>
> - skb_dst_set(skb, dst);
> + skb_dst_set(skb, dst);
> + } else if (nh->nh_family == AF_INET6) {
> + nexthop = &nh->ipv6_nh;
> + } else {
> + goto out_drop;
> + }
>
> - err = bpf_out_neigh_v6(net, skb);
> + err = bpf_out_neigh_v6(net, skb, dev, nexthop);
I'd probably model the bpf_out_neigh_v{4,6}() as close as possible similar to each other in terms
of args we pass etc. In the v6 case you pass the nexthop in6_addr directly whereas v4 passes
bpf_nh_params, I'd probably also stick to the latter for v6 to keep it symmetric.
> if (unlikely(net_xmit_eval(err)))
> dev->stats.tx_errors++;
> else
> @@ -2260,11 +2272,9 @@ static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
> #endif /* CONFIG_IPV6 */
>
> #if IS_ENABLED(CONFIG_INET)
> -static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb)
> +static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
> + struct net_device *dev, struct bpf_nh_params *nh)
> {
> - struct dst_entry *dst = skb_dst(skb);
> - struct rtable *rt = container_of(dst, struct rtable, dst);
> - struct net_device *dev = dst->dev;
> u32 hh_len = LL_RESERVED_SPACE(dev);
> struct neighbour *neigh;
> bool is_v6gw = false;
> @@ -2292,7 +2302,20 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb)
> }
>
> rcu_read_lock_bh();
> - neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
> + if (!nh) {
> + struct dst_entry *dst = skb_dst(skb);
> + struct rtable *rt = container_of(dst, struct rtable, dst);
> +
> + neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
> + } else if (nh->nh_family == AF_INET6) {
> + neigh = ip_neigh_gw6(dev, &nh->ipv6_nh);
> + is_v6gw = true;
> + } else if (nh->nh_family == AF_INET) {
> + neigh = ip_neigh_gw4(dev, nh->ipv4_nh);
> + } else {
> + goto out_drop;
> + }
> +
> if (likely(!IS_ERR(neigh))) {
> int ret;
>
next prev parent reply other threads:[~2020-10-19 15:01 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-10-15 15:46 [PATCH RFC bpf-next 0/2] bpf: Rework bpf_redirect_neigh() to allow supplying nexthop from caller Toke Høiland-Jørgensen
2020-10-15 15:46 ` [PATCH RFC bpf-next 1/2] bpf_redirect_neigh: Support supplying the nexthop as a helper parameter Toke Høiland-Jørgensen
2020-10-15 16:27 ` David Ahern
2020-10-15 19:34 ` Toke Høiland-Jørgensen
2020-10-19 13:09 ` Daniel Borkmann
2020-10-19 13:28 ` Toke Høiland-Jørgensen
2020-10-19 14:48 ` Daniel Borkmann
2020-10-19 14:56 ` Toke Høiland-Jørgensen
2020-10-19 15:01 ` Daniel Borkmann [this message]
2020-10-15 15:46 ` [PATCH RFC bpf-next 2/2] selftests: Update test_tc_neigh to use the modified bpf_redirect_neigh() Toke Høiland-Jørgensen
2020-10-19 14:40 ` Daniel Borkmann
2020-10-19 14:48 ` Toke Høiland-Jørgensen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=013e2c8b-13b5-661c-89c5-508b91cd3f4c@iogearbox.net \
--to=daniel@iogearbox.net \
--cc=bpf@vger.kernel.org \
--cc=dsahern@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=toke@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).