BPF Archive on lore.kernel.org
 help / color / Atom feed
From: David Ahern <dsahern@gmail.com>
To: "Toke Høiland-Jørgensen" <toke@redhat.com>,
	"Daniel Borkmann" <daniel@iogearbox.net>
Cc: David Ahern <dsahern@kernel.org>,
	netdev@vger.kernel.org, bpf@vger.kernel.org
Subject: Re: [PATCH RFC bpf-next 1/2] bpf_redirect_neigh: Support supplying the nexthop as a helper parameter
Date: Thu, 15 Oct 2020 10:27:12 -0600
Message-ID: <d5c14618-089d-5f29-7f10-11d11b0d59ab@gmail.com> (raw)
In-Reply-To: <160277680864.157904.8719768977907736015.stgit@toke.dk>

On 10/15/20 9:46 AM, Toke Høiland-Jørgensen wrote:
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index bf5a99d803e4..980cc1363be8 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3677,15 +3677,19 @@ union bpf_attr {
>   * 	Return
>   * 		The id is returned or 0 in case the id could not be retrieved.
>   *
> - * long bpf_redirect_neigh(u32 ifindex, u64 flags)
> + * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags)

why not fold ifindex into params? with params and plen this should be
extensible later if needed.

A couple of nits below that caught me eye.


>   * 	Description
>   * 		Redirect the packet to another net device of index *ifindex*
>   * 		and fill in L2 addresses from neighboring subsystem. This helper
>   * 		is somewhat similar to **bpf_redirect**\ (), except that it
>   * 		populates L2 addresses as well, meaning, internally, the helper
> - * 		performs a FIB lookup based on the skb's networking header to
> - * 		get the address of the next hop and then relies on the neighbor
> - * 		lookup for the L2 address of the nexthop.
> + * 		relies on the neighbor lookup for the L2 address of the nexthop.
> + *
> + * 		The helper will perform a FIB lookup based on the skb's
> + * 		networking header to get the address of the next hop, unless
> + * 		this is supplied by the caller in the *params* argument. The
> + * 		*plen* argument indicates the len of *params* and should be set
> + * 		to 0 if *params* is NULL.
>   *
>   * 		The *flags* argument is reserved and must be 0. The helper is
>   * 		currently only supported for tc BPF program types, and enabled
> @@ -4906,6 +4910,17 @@ struct bpf_fib_lookup {
>  	__u8	dmac[6];     /* ETH_ALEN */
>  };
>  
> +struct bpf_redir_neigh {
> +	/* network family for lookup (AF_INET, AF_INET6)
> +	 */

second line for the comment is not needed.

> +	__u8	nh_family;
> +	/* network address of nexthop; skips fib lookup to find gateway */
> +	union {
> +		__be32		ipv4_nh;
> +		__u32		ipv6_nh[4];  /* in6_addr; network order */
> +	};
> +};
> +
>  enum bpf_task_fd_type {
>  	BPF_FD_TYPE_RAW_TRACEPOINT,	/* tp name */
>  	BPF_FD_TYPE_TRACEPOINT,		/* tp name */
> diff --git a/net/core/filter.c b/net/core/filter.c
> index c5e2a1c5fd8d..d073031a3a61 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -2165,12 +2165,11 @@ static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
>  }
>  
>  #if IS_ENABLED(CONFIG_IPV6)
> -static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
> +static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
> +			    struct net_device *dev, const struct in6_addr *nexthop)
>  {
> -	struct dst_entry *dst = skb_dst(skb);
> -	struct net_device *dev = dst->dev;
>  	u32 hh_len = LL_RESERVED_SPACE(dev);
> -	const struct in6_addr *nexthop;
> +	struct dst_entry *dst = NULL;
>  	struct neighbour *neigh;
>  
>  	if (dev_xmit_recursion()) {
> @@ -2196,8 +2195,11 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
>  	}
>  
>  	rcu_read_lock_bh();
> -	nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
> -			      &ipv6_hdr(skb)->daddr);
> +	if (!nexthop) {
> +		dst = skb_dst(skb);
> +		nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
> +				      &ipv6_hdr(skb)->daddr);
> +	}
>  	neigh = ip_neigh_gw6(dev, nexthop);
>  	if (likely(!IS_ERR(neigh))) {
>  		int ret;
> @@ -2210,36 +2212,46 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
>  		return ret;
>  	}
>  	rcu_read_unlock_bh();
> -	IP6_INC_STATS(dev_net(dst->dev),
> -		      ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
> +	if (dst)
> +		IP6_INC_STATS(dev_net(dst->dev),
> +			      ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
>  out_drop:
>  	kfree_skb(skb);
>  	return -ENETDOWN;
>  }
>  
> -static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
> +static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
> +				   struct bpf_nh_params *nh)
>  {
>  	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
> +	struct in6_addr *nexthop = NULL;
>  	struct net *net = dev_net(dev);
>  	int err, ret = NET_XMIT_DROP;
> -	struct dst_entry *dst;
> -	struct flowi6 fl6 = {
> -		.flowi6_flags	= FLOWI_FLAG_ANYSRC,
> -		.flowi6_mark	= skb->mark,
> -		.flowlabel	= ip6_flowinfo(ip6h),
> -		.flowi6_oif	= dev->ifindex,
> -		.flowi6_proto	= ip6h->nexthdr,
> -		.daddr		= ip6h->daddr,
> -		.saddr		= ip6h->saddr,
> -	};
>  
> -	dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
> -	if (IS_ERR(dst))
> -		goto out_drop;
> +	if (!nh->nh_family) {
> +		struct dst_entry *dst;

reverse xmas tree ordering

> +		struct flowi6 fl6 = {
> +			.flowi6_flags = FLOWI_FLAG_ANYSRC,
> +			.flowi6_mark = skb->mark,
> +			.flowlabel = ip6_flowinfo(ip6h),
> +			.flowi6_oif = dev->ifindex,
> +			.flowi6_proto = ip6h->nexthdr,
> +			.daddr = ip6h->daddr,
> +			.saddr = ip6h->saddr,
> +		};
> +
> +		dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
> +		if (IS_ERR(dst))
> +			goto out_drop;
>  
> -	skb_dst_set(skb, dst);
> +		skb_dst_set(skb, dst);
> +	} else if (nh->nh_family == AF_INET6) {
> +		nexthop = &nh->ipv6_nh;
> +	} else {
> +		goto out_drop;
> +	}
>  
> -	err = bpf_out_neigh_v6(net, skb);
> +	err = bpf_out_neigh_v6(net, skb, dev, nexthop);
>  	if (unlikely(net_xmit_eval(err)))
>  		dev->stats.tx_errors++;
>  	else



  reply index

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-15 15:46 [PATCH RFC bpf-next 0/2] bpf: Rework bpf_redirect_neigh() to allow supplying nexthop from caller Toke Høiland-Jørgensen
2020-10-15 15:46 ` [PATCH RFC bpf-next 1/2] bpf_redirect_neigh: Support supplying the nexthop as a helper parameter Toke Høiland-Jørgensen
2020-10-15 16:27   ` David Ahern [this message]
2020-10-15 19:34     ` Toke Høiland-Jørgensen
2020-10-19 13:09       ` Daniel Borkmann
2020-10-19 13:28         ` Toke Høiland-Jørgensen
2020-10-19 14:48   ` Daniel Borkmann
2020-10-19 14:56     ` Toke Høiland-Jørgensen
2020-10-19 15:01   ` Daniel Borkmann
2020-10-15 15:46 ` [PATCH RFC bpf-next 2/2] selftests: Update test_tc_neigh to use the modified bpf_redirect_neigh() Toke Høiland-Jørgensen
2020-10-19 14:40   ` Daniel Borkmann
2020-10-19 14:48     ` Toke Høiland-Jørgensen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d5c14618-089d-5f29-7f10-11d11b0d59ab@gmail.com \
    --to=dsahern@gmail.com \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=dsahern@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=toke@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

BPF Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/bpf/0 bpf/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 bpf bpf/ https://lore.kernel.org/bpf \
		bpf@vger.kernel.org
	public-inbox-index bpf

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.bpf


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git