[PATCH bpf-next 0/2] bpf: Add ipip6 and ip6ip decap support for bpf_skb_adjust

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH bpf-next 0/2] bpf: Add ipip6 and ip6ip decap support for bpf_skb_adjust_room()
@ 2023-01-06  3:55 Ziyang Xuan
  2023-01-06  3:55 ` [PATCH bpf-next 1/2] " Ziyang Xuan
  2023-01-06  3:55 ` [PATCH bpf-next 2/2] selftests/bpf: add ipip6 and ip6ip decap to test_tc_tunnel Ziyang Xuan
  0 siblings, 2 replies; 10+ messages in thread
From: Ziyang Xuan @ 2023-01-06  3:55 UTC (permalink / raw)
  To: ast, daniel, andrii, davem, edumazet, kuba, pabeni, bpf, netdev,
	martin.lau, song, yhs, john.fastabend, kpsingh, sdf, haoluo,
	jolsa

Add ipip6 and ip6ip decap support for bpf_skb_adjust_room().
Main use case is for using cls_bpf on ingress hook to decapsulate
IPv4 over IPv6 and IPv6 over IPv4 tunnel packets.

And add ipip6 and ip6ip decap testcases to verify that
bpf_skb_adjust_room() correctly decapsulate ipip6 and ip6ip
tunnel packets.

Ziyang Xuan (2):
  bpf: Add ipip6 and ip6ip decap support for bpf_skb_adjust_room()
  selftests/bpf: add ipip6 and ip6ip decap to test_tc_tunnel

 net/core/filter.c                             | 34 +++++++++-
 .../selftests/bpf/progs/test_tc_tunnel.c      | 64 +++++++++++++++++++
 tools/testing/selftests/bpf/test_tc_tunnel.sh | 15 +++--
 3 files changed, 105 insertions(+), 8 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH bpf-next 1/2] bpf: Add ipip6 and ip6ip decap support for bpf_skb_adjust_room()
  2023-01-06  3:55 [PATCH bpf-next 0/2] bpf: Add ipip6 and ip6ip decap support for bpf_skb_adjust_room() Ziyang Xuan
@ 2023-01-06  3:55 ` Ziyang Xuan
  2023-01-06 19:55   ` sdf
  2023-01-06  3:55 ` [PATCH bpf-next 2/2] selftests/bpf: add ipip6 and ip6ip decap to test_tc_tunnel Ziyang Xuan
  1 sibling, 1 reply; 10+ messages in thread
From: Ziyang Xuan @ 2023-01-06  3:55 UTC (permalink / raw)
  To: ast, daniel, andrii, davem, edumazet, kuba, pabeni, bpf, netdev,
	martin.lau, song, yhs, john.fastabend, kpsingh, sdf, haoluo,
	jolsa

Add ipip6 and ip6ip decap support for bpf_skb_adjust_room().
Main use case is for using cls_bpf on ingress hook to decapsulate
IPv4 over IPv6 and IPv6 over IPv4 tunnel packets.

Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
---
 net/core/filter.c | 34 ++++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 929358677183..73982fb4fe2e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3495,6 +3495,12 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
 static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
 			      u64 flags)
 {
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} ip;
+	__be16 proto;
 	int ret;
 
 	if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
@@ -3512,10 +3518,19 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
 	if (unlikely(ret < 0))
 		return ret;
 
+	ip.hdr = skb_inner_network_header(skb);
+	if (ip.v4->version == 4)
+		proto = htons(ETH_P_IP);
+	else
+		proto = htons(ETH_P_IPV6);
+
 	ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
 	if (unlikely(ret < 0))
 		return ret;
 
+	/* Match skb->protocol to new outer l3 protocol */
+	skb->protocol = proto;
+
 	if (skb_is_gso(skb)) {
 		struct skb_shared_info *shinfo = skb_shinfo(skb);
 
@@ -3578,10 +3593,14 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
 	   u32, mode, u64, flags)
 {
 	u32 len_cur, len_diff_abs = abs(len_diff);
-	u32 len_min = bpf_skb_net_base_len(skb);
-	u32 len_max = BPF_SKB_MAX_LEN;
+	u32 len_min, len_max = BPF_SKB_MAX_LEN;
 	__be16 proto = skb->protocol;
 	bool shrink = len_diff < 0;
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} ip;
 	u32 off;
 	int ret;
 
@@ -3594,6 +3613,9 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
 		     proto != htons(ETH_P_IPV6)))
 		return -ENOTSUPP;
 
+	if (unlikely(shrink && !skb->encapsulation))
+		return -ENOTSUPP;
+
 	off = skb_mac_header_len(skb);
 	switch (mode) {
 	case BPF_ADJ_ROOM_NET:
@@ -3605,6 +3627,14 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
 		return -ENOTSUPP;
 	}
 
+	if (shrink) {
+		ip.hdr = skb_inner_network_header(skb);
+		if (ip.v4->version == 4)
+			len_min = sizeof(struct iphdr);
+		else
+			len_min = sizeof(struct ipv6hdr);
+	}
+
 	len_cur = skb->len - skb_network_offset(skb);
 	if ((shrink && (len_diff_abs >= len_cur ||
 			len_cur - len_diff_abs < len_min)) ||
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH bpf-next 2/2] selftests/bpf: add ipip6 and ip6ip decap to test_tc_tunnel
  2023-01-06  3:55 [PATCH bpf-next 0/2] bpf: Add ipip6 and ip6ip decap support for bpf_skb_adjust_room() Ziyang Xuan
  2023-01-06  3:55 ` [PATCH bpf-next 1/2] " Ziyang Xuan
@ 2023-01-06  3:55 ` Ziyang Xuan
  1 sibling, 0 replies; 10+ messages in thread
From: Ziyang Xuan @ 2023-01-06  3:55 UTC (permalink / raw)
  To: ast, daniel, andrii, davem, edumazet, kuba, pabeni, bpf, netdev,
	martin.lau, song, yhs, john.fastabend, kpsingh, sdf, haoluo,
	jolsa

Add ipip6 and ip6ip decap testcases. Verify that bpf_skb_adjust_room()
correctly decapsulate ipip6 and ip6ip tunnel packets.

Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
---
 .../selftests/bpf/progs/test_tc_tunnel.c      | 64 +++++++++++++++++++
 tools/testing/selftests/bpf/test_tc_tunnel.sh | 15 +++--
 2 files changed, 73 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index a0e7762b1e5a..1d24f1bee186 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -363,6 +363,61 @@ static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
 	return TC_ACT_OK;
 }
 
+static int encap_ipv6_ipip6(struct __sk_buff *skb)
+{
+	struct iphdr iph_inner;
+	struct v6hdr h_outer;
+	struct tcphdr tcph;
+	struct ethhdr eth;
+	__u64 flags;
+	int olen;
+
+	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
+			       sizeof(iph_inner)) < 0)
+		return TC_ACT_OK;
+
+	/* filter only packets we want */
+	if (bpf_skb_load_bytes(skb, ETH_HLEN + (iph_inner.ihl << 2),
+			       &tcph, sizeof(tcph)) < 0)
+		return TC_ACT_OK;
+
+	if (tcph.dest != __bpf_constant_htons(cfg_port))
+		return TC_ACT_OK;
+
+	olen = sizeof(h_outer.ip);
+
+	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
+
+	/* add room between mac and network header */
+	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
+		return TC_ACT_SHOT;
+
+	/* prepare new outer network header */
+	memset(&h_outer.ip, 0, sizeof(h_outer.ip));
+	h_outer.ip.version = 6;
+	h_outer.ip.hop_limit = iph_inner.ttl;
+	h_outer.ip.saddr.s6_addr[1] = 0xfd;
+	h_outer.ip.saddr.s6_addr[15] = 1;
+	h_outer.ip.daddr.s6_addr[1] = 0xfd;
+	h_outer.ip.daddr.s6_addr[15] = 2;
+	h_outer.ip.payload_len = iph_inner.tot_len;
+	h_outer.ip.nexthdr = IPPROTO_IPIP;
+
+	/* store new outer network header */
+	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
+				BPF_F_INVALIDATE_HASH) < 0)
+		return TC_ACT_SHOT;
+
+	/* update eth->h_proto */
+	if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
+		return TC_ACT_SHOT;
+	eth.h_proto = bpf_htons(ETH_P_IPV6);
+	if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
+		return TC_ACT_SHOT;
+
+	return TC_ACT_OK;
+}
+
 static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
 				      __u16 l2_proto)
 {
@@ -461,6 +516,15 @@ int __encap_ip6tnl_none(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
+SEC("encap_ipip6_none")
+int __encap_ipip6_none(struct __sk_buff *skb)
+{
+	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+		return encap_ipv6_ipip6(skb);
+	else
+		return TC_ACT_OK;
+}
+
 SEC("encap_ip6gre_none")
 int __encap_ip6gre_none(struct __sk_buff *skb)
 {
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index 334bdfeab940..910044f08908 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -100,6 +100,9 @@ if [[ "$#" -eq "0" ]]; then
 	echo "ipip"
 	$0 ipv4 ipip none 100
 
+	echo "ipip6"
+	$0 ipv4 ipip6 none 100
+
 	echo "ip6ip6"
 	$0 ipv6 ip6tnl none 100
 
@@ -224,6 +227,9 @@ elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
 elif [[ "$tuntype" =~ "vxlan" && "$mac" == "eth" ]]; then
 	ttype="vxlan"
 	targs="id 1 dstport 8472 udp6zerocsumrx"
+elif [[ "$tuntype" == "ipip6" ]]; then
+	ttype="ip6tnl"
+	targs=""
 else
 	ttype=$tuntype
 	targs=""
@@ -233,6 +239,9 @@ fi
 if [[ "${tuntype}" == "sit" ]]; then
 	link_addr1="${ns1_v4}"
 	link_addr2="${ns2_v4}"
+elif [[ "${tuntype}" == "ipip6" ]]; then
+	link_addr1="${ns1_v6}"
+	link_addr2="${ns2_v6}"
 else
 	link_addr1="${addr1}"
 	link_addr2="${addr2}"
@@ -287,12 +296,6 @@ else
 	server_listen
 fi
 
-# bpf_skb_net_shrink does not take tunnel flags yet, cannot update L3.
-if [[ "${tuntype}" == "sit" ]]; then
-	echo OK
-	exit 0
-fi
-
 # serverside, use BPF for decap
 ip netns exec "${ns2}" ip link del dev testtun0
 ip netns exec "${ns2}" tc qdisc add dev veth2 clsact
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: Add ipip6 and ip6ip decap support for bpf_skb_adjust_room()
  2023-01-06  3:55 ` [PATCH bpf-next 1/2] " Ziyang Xuan
@ 2023-01-06 19:55   ` sdf
  2023-01-08 19:16     ` Willem de Bruijn
  0 siblings, 1 reply; 10+ messages in thread
From: sdf @ 2023-01-06 19:55 UTC (permalink / raw)
  To: Ziyang Xuan
  Cc: ast, daniel, andrii, davem, edumazet, kuba, pabeni, bpf, netdev,
	martin.lau, song, yhs, john.fastabend, kpsingh, haoluo, jolsa,
	willemb

On 01/06, Ziyang Xuan wrote:
> Add ipip6 and ip6ip decap support for bpf_skb_adjust_room().
> Main use case is for using cls_bpf on ingress hook to decapsulate
> IPv4 over IPv6 and IPv6 over IPv4 tunnel packets.

CC'd Willem since he has done bpf_skb_adjust_room changes in the past.
There might be a lot of GRO/GSO context I'm missing.

> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
> ---
>   net/core/filter.c | 34 ++++++++++++++++++++++++++++++++--
>   1 file changed, 32 insertions(+), 2 deletions(-)

> diff --git a/net/core/filter.c b/net/core/filter.c
> index 929358677183..73982fb4fe2e 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -3495,6 +3495,12 @@ static int bpf_skb_net_grow(struct sk_buff *skb,  
> u32 off, u32 len_diff,
>   static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
>   			      u64 flags)
>   {
> +	union {
> +		struct iphdr *v4;
> +		struct ipv6hdr *v6;
> +		unsigned char *hdr;
> +	} ip;
> +	__be16 proto;
>   	int ret;

>   	if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
> @@ -3512,10 +3518,19 @@ static int bpf_skb_net_shrink(struct sk_buff  
> *skb, u32 off, u32 len_diff,
>   	if (unlikely(ret < 0))
>   		return ret;

> +	ip.hdr = skb_inner_network_header(skb);
> +	if (ip.v4->version == 4)
> +		proto = htons(ETH_P_IP);
> +	else
> +		proto = htons(ETH_P_IPV6);
> +
>   	ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
>   	if (unlikely(ret < 0))
>   		return ret;

> +	/* Match skb->protocol to new outer l3 protocol */
> +	skb->protocol = proto;
> +
>   	if (skb_is_gso(skb)) {
>   		struct skb_shared_info *shinfo = skb_shinfo(skb);

> @@ -3578,10 +3593,14 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *,  
> skb, s32, len_diff,
>   	   u32, mode, u64, flags)
>   {
>   	u32 len_cur, len_diff_abs = abs(len_diff);
> -	u32 len_min = bpf_skb_net_base_len(skb);
> -	u32 len_max = BPF_SKB_MAX_LEN;
> +	u32 len_min, len_max = BPF_SKB_MAX_LEN;
>   	__be16 proto = skb->protocol;
>   	bool shrink = len_diff < 0;
> +	union {
> +		struct iphdr *v4;
> +		struct ipv6hdr *v6;
> +		unsigned char *hdr;
> +	} ip;
>   	u32 off;
>   	int ret;

> @@ -3594,6 +3613,9 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *,  
> skb, s32, len_diff,
>   		     proto != htons(ETH_P_IPV6)))
>   		return -ENOTSUPP;

> +	if (unlikely(shrink && !skb->encapsulation))
> +		return -ENOTSUPP;
> +
>   	off = skb_mac_header_len(skb);
>   	switch (mode) {
>   	case BPF_ADJ_ROOM_NET:
> @@ -3605,6 +3627,14 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *,  
> skb, s32, len_diff,
>   		return -ENOTSUPP;
>   	}

> +	if (shrink) {
> +		ip.hdr = skb_inner_network_header(skb);
> +		if (ip.v4->version == 4)
> +			len_min = sizeof(struct iphdr);
> +		else
> +			len_min = sizeof(struct ipv6hdr);
> +	}
> +
>   	len_cur = skb->len - skb_network_offset(skb);
>   	if ((shrink && (len_diff_abs >= len_cur ||
>   			len_cur - len_diff_abs < len_min)) ||
> --
> 2.25.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: Add ipip6 and ip6ip decap support for bpf_skb_adjust_room()
  2023-01-06 19:55   ` sdf
@ 2023-01-08 19:16     ` Willem de Bruijn
  2023-01-09  9:19       ` Ziyang Xuan (William)
  0 siblings, 1 reply; 10+ messages in thread
From: Willem de Bruijn @ 2023-01-08 19:16 UTC (permalink / raw)
  To: Ziyang Xuan
  Cc: ast, daniel, andrii, davem, edumazet, kuba, pabeni, bpf, netdev,
	martin.lau, song, yhs, john.fastabend, kpsingh, haoluo, jolsa,
	sdf

On Fri, Jan 6, 2023 at 2:55 PM <sdf@google.com> wrote:
>
> On 01/06, Ziyang Xuan wrote:
> > Add ipip6 and ip6ip decap support for bpf_skb_adjust_room().
> > Main use case is for using cls_bpf on ingress hook to decapsulate
> > IPv4 over IPv6 and IPv6 over IPv4 tunnel packets.
>
> CC'd Willem since he has done bpf_skb_adjust_room changes in the past.
> There might be a lot of GRO/GSO context I'm missing.
>
> > Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
> > ---
> >   net/core/filter.c | 34 ++++++++++++++++++++++++++++++++--
> >   1 file changed, 32 insertions(+), 2 deletions(-)
>
> > diff --git a/net/core/filter.c b/net/core/filter.c
> > index 929358677183..73982fb4fe2e 100644
> > --- a/net/core/filter.c
> > +++ b/net/core/filter.c
> > @@ -3495,6 +3495,12 @@ static int bpf_skb_net_grow(struct sk_buff *skb,
> > u32 off, u32 len_diff,
> >   static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
> >                             u64 flags)
> >   {
> > +     union {
> > +             struct iphdr *v4;
> > +             struct ipv6hdr *v6;
> > +             unsigned char *hdr;
> > +     } ip;
> > +     __be16 proto;
> >       int ret;
>
> >       if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
> > @@ -3512,10 +3518,19 @@ static int bpf_skb_net_shrink(struct sk_buff
> > *skb, u32 off, u32 len_diff,
> >       if (unlikely(ret < 0))
> >               return ret;
>
> > +     ip.hdr = skb_inner_network_header(skb);
> > +     if (ip.v4->version == 4)
> > +             proto = htons(ETH_P_IP);
> > +     else
> > +             proto = htons(ETH_P_IPV6);
> > +
> >       ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
> >       if (unlikely(ret < 0))
> >               return ret;
>
> > +     /* Match skb->protocol to new outer l3 protocol */
> > +     skb->protocol = proto;
> > +
> >       if (skb_is_gso(skb)) {
> >               struct skb_shared_info *shinfo = skb_shinfo(skb);
>
> > @@ -3578,10 +3593,14 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *,
> > skb, s32, len_diff,
> >          u32, mode, u64, flags)
> >   {
> >       u32 len_cur, len_diff_abs = abs(len_diff);
> > -     u32 len_min = bpf_skb_net_base_len(skb);
> > -     u32 len_max = BPF_SKB_MAX_LEN;
> > +     u32 len_min, len_max = BPF_SKB_MAX_LEN;
> >       __be16 proto = skb->protocol;
> >       bool shrink = len_diff < 0;
> > +     union {
> > +             struct iphdr *v4;
> > +             struct ipv6hdr *v6;
> > +             unsigned char *hdr;
> > +     } ip;
> >       u32 off;
> >       int ret;
>
> > @@ -3594,6 +3613,9 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *,
> > skb, s32, len_diff,
> >                    proto != htons(ETH_P_IPV6)))
> >               return -ENOTSUPP;
>
> > +     if (unlikely(shrink && !skb->encapsulation))
> > +             return -ENOTSUPP;
> > +

This new restriction might break existing users.

There is no pre-existing requirement that shrink is used solely with
packets encapsulated by the protocol stack.

Indeed, skb->encapsulation is likely not set on packets arriving from
the wire, even if encapsulated. Referring to your comment "Main use
case is for using cls_bpf on ingress hook to decapsulate"

Can a combination of the existing bpf_skb_adjust_room and
bpf_skb_change_proto address your problem?

> >       off = skb_mac_header_len(skb);
> >       switch (mode) {
> >       case BPF_ADJ_ROOM_NET:
> > @@ -3605,6 +3627,14 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *,
> > skb, s32, len_diff,
> >               return -ENOTSUPP;
> >       }
>
> > +     if (shrink) {
> > +             ip.hdr = skb_inner_network_header(skb);
> > +             if (ip.v4->version == 4)
> > +                     len_min = sizeof(struct iphdr);
> > +             else
> > +                     len_min = sizeof(struct ipv6hdr);
> > +     }
> > +
> >       len_cur = skb->len - skb_network_offset(skb);
> >       if ((shrink && (len_diff_abs >= len_cur ||
> >                       len_cur - len_diff_abs < len_min)) ||
> > --
> > 2.25.1
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: Add ipip6 and ip6ip decap support for bpf_skb_adjust_room()
  2023-01-08 19:16     ` Willem de Bruijn
@ 2023-01-09  9:19       ` Ziyang Xuan (William)
  2023-01-09 13:32         ` Willem de Bruijn
  0 siblings, 1 reply; 10+ messages in thread
From: Ziyang Xuan (William) @ 2023-01-09  9:19 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: ast, daniel, andrii, davem, edumazet, kuba, pabeni, bpf, netdev,
	martin.lau, song, yhs, john.fastabend, kpsingh, haoluo, jolsa,
	sdf

> On Fri, Jan 6, 2023 at 2:55 PM <sdf@google.com> wrote:
>>
>> On 01/06, Ziyang Xuan wrote:
>>> Add ipip6 and ip6ip decap support for bpf_skb_adjust_room().
>>> Main use case is for using cls_bpf on ingress hook to decapsulate
>>> IPv4 over IPv6 and IPv6 over IPv4 tunnel packets.
>>
>> CC'd Willem since he has done bpf_skb_adjust_room changes in the past.
>> There might be a lot of GRO/GSO context I'm missing.
>>
>>> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
>>> ---
>>>   net/core/filter.c | 34 ++++++++++++++++++++++++++++++++--
>>>   1 file changed, 32 insertions(+), 2 deletions(-)
>>
>>> diff --git a/net/core/filter.c b/net/core/filter.c
>>> index 929358677183..73982fb4fe2e 100644
>>> --- a/net/core/filter.c
>>> +++ b/net/core/filter.c
>>> @@ -3495,6 +3495,12 @@ static int bpf_skb_net_grow(struct sk_buff *skb,
>>> u32 off, u32 len_diff,
>>>   static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
>>>                             u64 flags)
>>>   {
>>> +     union {
>>> +             struct iphdr *v4;
>>> +             struct ipv6hdr *v6;
>>> +             unsigned char *hdr;
>>> +     } ip;
>>> +     __be16 proto;
>>>       int ret;
>>
>>>       if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
>>> @@ -3512,10 +3518,19 @@ static int bpf_skb_net_shrink(struct sk_buff
>>> *skb, u32 off, u32 len_diff,
>>>       if (unlikely(ret < 0))
>>>               return ret;
>>
>>> +     ip.hdr = skb_inner_network_header(skb);
>>> +     if (ip.v4->version == 4)
>>> +             proto = htons(ETH_P_IP);
>>> +     else
>>> +             proto = htons(ETH_P_IPV6);
>>> +
>>>       ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
>>>       if (unlikely(ret < 0))
>>>               return ret;
>>
>>> +     /* Match skb->protocol to new outer l3 protocol */
>>> +     skb->protocol = proto;
>>> +
>>>       if (skb_is_gso(skb)) {
>>>               struct skb_shared_info *shinfo = skb_shinfo(skb);
>>
>>> @@ -3578,10 +3593,14 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *,
>>> skb, s32, len_diff,
>>>          u32, mode, u64, flags)
>>>   {
>>>       u32 len_cur, len_diff_abs = abs(len_diff);
>>> -     u32 len_min = bpf_skb_net_base_len(skb);
>>> -     u32 len_max = BPF_SKB_MAX_LEN;
>>> +     u32 len_min, len_max = BPF_SKB_MAX_LEN;
>>>       __be16 proto = skb->protocol;
>>>       bool shrink = len_diff < 0;
>>> +     union {
>>> +             struct iphdr *v4;
>>> +             struct ipv6hdr *v6;
>>> +             unsigned char *hdr;
>>> +     } ip;
>>>       u32 off;
>>>       int ret;
>>
>>> @@ -3594,6 +3613,9 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *,
>>> skb, s32, len_diff,
>>>                    proto != htons(ETH_P_IPV6)))
>>>               return -ENOTSUPP;
>>
>>> +     if (unlikely(shrink && !skb->encapsulation))
>>> +             return -ENOTSUPP;
>>> +
> 
> This new restriction might break existing users.
> 
> There is no pre-existing requirement that shrink is used solely with
> packets encapsulated by the protocol stack.
> 
> Indeed, skb->encapsulation is likely not set on packets arriving from
> the wire, even if encapsulated. Referring to your comment "Main use
> case is for using cls_bpf on ingress hook to decapsulate"
> 
> Can a combination of the existing bpf_skb_adjust_room and
> bpf_skb_change_proto address your problem?

Hello Willem,

I think combination bpf_skb_adjust_room and bpf_skb_change_proto can not
address my problem.

Now, bpf_skb_adjust_room() would fail for "len_cur - len_diff_abs < len_min"
when decap ipip6 packet, because "len_min" should be sizeof(struct iphdr)
but not sizeof(struct ipv6hdr).

We can remove skb->encapsulation restriction and parse outer and inner IP
header to determine ipip6 and ip6ip packets. As following:

--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3498,6 +3498,12 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
 static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
                              u64 flags)
 {
+       union {
+               struct iphdr *v4;
+               struct ipv6hdr *v6;
+               unsigned char *hdr;
+       } ip;
+       __be16 proto;
        int ret;

        if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
@@ -3515,10 +3521,23 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
        if (unlikely(ret < 0))
                return ret;

+       ip.hdr = skb_network_header(skb);
+       if (ip.v4->version == 4) {
+               if (ip.v4->protocol == IPPROTO_IPV6)
+                       proto = htons(ETH_P_IPV6);
+       } else {
+               struct ipv6_opt_hdr *opt_hdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + sizeof(struct ipv6hdr));
+               if (ip.v6->nexthdr == NEXTHDR_DEST && opt_hdr->nexthdr == NEXTHDR_IPV4)
+                       proto = htons(ETH_P_IP);
+       }
+
        ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
        if (unlikely(ret < 0))
                return ret;

+       /* Match skb->protocol to new outer l3 protocol */
+       skb->protocol = proto;
+
        if (skb_is_gso(skb)) {
                struct skb_shared_info *shinfo = skb_shinfo(skb);

@@ -3585,6 +3604,11 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
        u32 len_max = BPF_SKB_MAX_LEN;
        __be16 proto = skb->protocol;
        bool shrink = len_diff < 0;
+       union {
+               struct iphdr *v4;
+               struct ipv6hdr *v6;
+               unsigned char *hdr;
+       } ip;
        u32 off;
        int ret;

@@ -3608,6 +3632,19 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
                return -ENOTSUPP;
        }

+       if (shrink) {
+               ip.hdr = skb_network_header(skb);
+               if (ip.v4->version == 4) {
+                       if (ip.v4->protocol == IPPROTO_IPV6)
+                               len_min = sizeof(struct ipv6hdr);
+               } else {
+                       struct ipv6_opt_hdr *opt_hdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + sizeof(struct ipv6hdr));
+                       if (ip.v6->nexthdr == NEXTHDR_DEST && opt_hdr->nexthdr == NEXTHDR_IPV4) {
+                               len_min = sizeof(struct iphdr);
+                       }
+               }
+       }
+
        len_cur = skb->len - skb_network_offset(skb);


Look forward to your comments and suggestions.

Thank you!

> 
>>>       off = skb_mac_header_len(skb);
>>>       switch (mode) {
>>>       case BPF_ADJ_ROOM_NET:
>>> @@ -3605,6 +3627,14 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *,
>>> skb, s32, len_diff,
>>>               return -ENOTSUPP;
>>>       }
>>
>>> +     if (shrink) {
>>> +             ip.hdr = skb_inner_network_header(skb);
>>> +             if (ip.v4->version == 4)
>>> +                     len_min = sizeof(struct iphdr);
>>> +             else
>>> +                     len_min = sizeof(struct ipv6hdr);
>>> +     }
>>> +
>>>       len_cur = skb->len - skb_network_offset(skb);
>>>       if ((shrink && (len_diff_abs >= len_cur ||
>>>                       len_cur - len_diff_abs < len_min)) ||
>>> --
>>> 2.25.1
>>
> .
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: Add ipip6 and ip6ip decap support for bpf_skb_adjust_room()
  2023-01-09  9:19       ` Ziyang Xuan (William)
@ 2023-01-09 13:32         ` Willem de Bruijn
  2023-01-10 13:07           ` Ziyang Xuan (William)
  0 siblings, 1 reply; 10+ messages in thread
From: Willem de Bruijn @ 2023-01-09 13:32 UTC (permalink / raw)
  To: Ziyang Xuan (William)
  Cc: ast, daniel, andrii, davem, edumazet, kuba, pabeni, bpf, netdev,
	martin.lau, song, yhs, john.fastabend, kpsingh, haoluo, jolsa,
	sdf

On Mon, Jan 9, 2023 at 4:20 AM Ziyang Xuan (William)
<william.xuanziyang@huawei.com> wrote:
>
> > On Fri, Jan 6, 2023 at 2:55 PM <sdf@google.com> wrote:
> >>
> >> On 01/06, Ziyang Xuan wrote:
> >>> Add ipip6 and ip6ip decap support for bpf_skb_adjust_room().
> >>> Main use case is for using cls_bpf on ingress hook to decapsulate
> >>> IPv4 over IPv6 and IPv6 over IPv4 tunnel packets.
> >>
> >> CC'd Willem since he has done bpf_skb_adjust_room changes in the past.
> >> There might be a lot of GRO/GSO context I'm missing.
> >>
> >>> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
> >>> ---
> >>>   net/core/filter.c | 34 ++++++++++++++++++++++++++++++++--
> >>>   1 file changed, 32 insertions(+), 2 deletions(-)
> >>
> >>> diff --git a/net/core/filter.c b/net/core/filter.c
> >>> index 929358677183..73982fb4fe2e 100644
> >>> --- a/net/core/filter.c
> >>> +++ b/net/core/filter.c
> >>> @@ -3495,6 +3495,12 @@ static int bpf_skb_net_grow(struct sk_buff *skb,
> >>> u32 off, u32 len_diff,
> >>>   static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
> >>>                             u64 flags)
> >>>   {
> >>> +     union {
> >>> +             struct iphdr *v4;
> >>> +             struct ipv6hdr *v6;
> >>> +             unsigned char *hdr;
> >>> +     } ip;
> >>> +     __be16 proto;
> >>>       int ret;
> >>
> >>>       if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
> >>> @@ -3512,10 +3518,19 @@ static int bpf_skb_net_shrink(struct sk_buff
> >>> *skb, u32 off, u32 len_diff,
> >>>       if (unlikely(ret < 0))
> >>>               return ret;
> >>
> >>> +     ip.hdr = skb_inner_network_header(skb);
> >>> +     if (ip.v4->version == 4)
> >>> +             proto = htons(ETH_P_IP);
> >>> +     else
> >>> +             proto = htons(ETH_P_IPV6);
> >>> +
> >>>       ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
> >>>       if (unlikely(ret < 0))
> >>>               return ret;
> >>
> >>> +     /* Match skb->protocol to new outer l3 protocol */
> >>> +     skb->protocol = proto;
> >>> +
> >>>       if (skb_is_gso(skb)) {
> >>>               struct skb_shared_info *shinfo = skb_shinfo(skb);
> >>
> >>> @@ -3578,10 +3593,14 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *,
> >>> skb, s32, len_diff,
> >>>          u32, mode, u64, flags)
> >>>   {
> >>>       u32 len_cur, len_diff_abs = abs(len_diff);
> >>> -     u32 len_min = bpf_skb_net_base_len(skb);
> >>> -     u32 len_max = BPF_SKB_MAX_LEN;
> >>> +     u32 len_min, len_max = BPF_SKB_MAX_LEN;
> >>>       __be16 proto = skb->protocol;
> >>>       bool shrink = len_diff < 0;
> >>> +     union {
> >>> +             struct iphdr *v4;
> >>> +             struct ipv6hdr *v6;
> >>> +             unsigned char *hdr;
> >>> +     } ip;
> >>>       u32 off;
> >>>       int ret;
> >>
> >>> @@ -3594,6 +3613,9 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *,
> >>> skb, s32, len_diff,
> >>>                    proto != htons(ETH_P_IPV6)))
> >>>               return -ENOTSUPP;
> >>
> >>> +     if (unlikely(shrink && !skb->encapsulation))
> >>> +             return -ENOTSUPP;
> >>> +
> >
> > This new restriction might break existing users.
> >
> > There is no pre-existing requirement that shrink is used solely with
> > packets encapsulated by the protocol stack.
> >
> > Indeed, skb->encapsulation is likely not set on packets arriving from
> > the wire, even if encapsulated. Referring to your comment "Main use
> > case is for using cls_bpf on ingress hook to decapsulate"
> >
> > Can a combination of the existing bpf_skb_adjust_room and
> > bpf_skb_change_proto address your problem?
>
> Hello Willem,
>
> I think combination bpf_skb_adjust_room and bpf_skb_change_proto can not
> address my problem.
>
> Now, bpf_skb_adjust_room() would fail for "len_cur - len_diff_abs < len_min"
> when decap ipip6 packet, because "len_min" should be sizeof(struct iphdr)
> but not sizeof(struct ipv6hdr).
>
> We can remove skb->encapsulation restriction and parse outer and inner IP
> header to determine ipip6 and ip6ip packets. As following:

Adding logic for network layer protocol conversion like this looks
good to me. bpf_skb_adjust_room already has a few other metadata
quirks.

But like those, let's make this intent explicit: define a new flag
that requests this behavior.

Let's avoid introducing a new union. Just use check (ip_hdr(skb)->version == 4).

>
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -3498,6 +3498,12 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
>  static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
>                               u64 flags)
>  {
> +       union {
> +               struct iphdr *v4;
> +               struct ipv6hdr *v6;
> +               unsigned char *hdr;
> +       } ip;
> +       __be16 proto;
>         int ret;
>
>         if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
> @@ -3515,10 +3521,23 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
>         if (unlikely(ret < 0))
>                 return ret;
>
> +       ip.hdr = skb_network_header(skb);
> +       if (ip.v4->version == 4) {
> +               if (ip.v4->protocol == IPPROTO_IPV6)
> +                       proto = htons(ETH_P_IPV6);
> +       } else {
> +               struct ipv6_opt_hdr *opt_hdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + sizeof(struct ipv6hdr));
> +               if (ip.v6->nexthdr == NEXTHDR_DEST && opt_hdr->nexthdr == NEXTHDR_IPV4)
> +                       proto = htons(ETH_P_IP);
> +       }
> +
>         ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
>         if (unlikely(ret < 0))
>                 return ret;
>
> +       /* Match skb->protocol to new outer l3 protocol */
> +       skb->protocol = proto;
> +
>         if (skb_is_gso(skb)) {
>                 struct skb_shared_info *shinfo = skb_shinfo(skb);
>
> @@ -3585,6 +3604,11 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
>         u32 len_max = BPF_SKB_MAX_LEN;
>         __be16 proto = skb->protocol;
>         bool shrink = len_diff < 0;
> +       union {
> +               struct iphdr *v4;
> +               struct ipv6hdr *v6;
> +               unsigned char *hdr;
> +       } ip;
>         u32 off;
>         int ret;
>
> @@ -3608,6 +3632,19 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
>                 return -ENOTSUPP;
>         }
>
> +       if (shrink) {
> +               ip.hdr = skb_network_header(skb);
> +               if (ip.v4->version == 4) {
> +                       if (ip.v4->protocol == IPPROTO_IPV6)
> +                               len_min = sizeof(struct ipv6hdr);
> +               } else {
> +                       struct ipv6_opt_hdr *opt_hdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + sizeof(struct ipv6hdr));
> +                       if (ip.v6->nexthdr == NEXTHDR_DEST && opt_hdr->nexthdr == NEXTHDR_IPV4) {
> +                               len_min = sizeof(struct iphdr);
> +                       }
> +               }
> +       }
> +
>         len_cur = skb->len - skb_network_offset(skb);
>
>
> Look forward to your comments and suggestions.
>
> Thank you!
>
> >
> >>>       off = skb_mac_header_len(skb);
> >>>       switch (mode) {
> >>>       case BPF_ADJ_ROOM_NET:
> >>> @@ -3605,6 +3627,14 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *,
> >>> skb, s32, len_diff,
> >>>               return -ENOTSUPP;
> >>>       }
> >>
> >>> +     if (shrink) {
> >>> +             ip.hdr = skb_inner_network_header(skb);
> >>> +             if (ip.v4->version == 4)
> >>> +                     len_min = sizeof(struct iphdr);
> >>> +             else
> >>> +                     len_min = sizeof(struct ipv6hdr);
> >>> +     }
> >>> +
> >>>       len_cur = skb->len - skb_network_offset(skb);
> >>>       if ((shrink && (len_diff_abs >= len_cur ||
> >>>                       len_cur - len_diff_abs < len_min)) ||
> >>> --
> >>> 2.25.1
> >>
> > .
> >

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: Add ipip6 and ip6ip decap support for bpf_skb_adjust_room()
  2023-01-09 13:32         ` Willem de Bruijn
@ 2023-01-10 13:07           ` Ziyang Xuan (William)
  2023-01-10 14:09             ` Willem de Bruijn
  0 siblings, 1 reply; 10+ messages in thread
From: Ziyang Xuan (William) @ 2023-01-10 13:07 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: ast, daniel, andrii, davem, edumazet, kuba, pabeni, bpf, netdev,
	martin.lau, song, yhs, john.fastabend, kpsingh, haoluo, jolsa,
	sdf

> On Mon, Jan 9, 2023 at 4:20 AM Ziyang Xuan (William)
> <william.xuanziyang@huawei.com> wrote:
>>
>>> On Fri, Jan 6, 2023 at 2:55 PM <sdf@google.com> wrote:
>>>>
>>>> On 01/06, Ziyang Xuan wrote:
>>>>> Add ipip6 and ip6ip decap support for bpf_skb_adjust_room().
>>>>> Main use case is for using cls_bpf on ingress hook to decapsulate
>>>>> IPv4 over IPv6 and IPv6 over IPv4 tunnel packets.
>>>>
>>>> CC'd Willem since he has done bpf_skb_adjust_room changes in the past.
>>>> There might be a lot of GRO/GSO context I'm missing.
>>>>
>>>>> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
>>>>> ---
>>>>>   net/core/filter.c | 34 ++++++++++++++++++++++++++++++++--
>>>>>   1 file changed, 32 insertions(+), 2 deletions(-)
>>>>
>>>>> diff --git a/net/core/filter.c b/net/core/filter.c
>>>>> index 929358677183..73982fb4fe2e 100644
>>>>> --- a/net/core/filter.c
>>>>> +++ b/net/core/filter.c
>>>>> @@ -3495,6 +3495,12 @@ static int bpf_skb_net_grow(struct sk_buff *skb,
>>>>> u32 off, u32 len_diff,
>>>>>   static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
>>>>>                             u64 flags)
>>>>>   {
>>>>> +     union {
>>>>> +             struct iphdr *v4;
>>>>> +             struct ipv6hdr *v6;
>>>>> +             unsigned char *hdr;
>>>>> +     } ip;
>>>>> +     __be16 proto;
>>>>>       int ret;
>>>>
>>>>>       if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
>>>>> @@ -3512,10 +3518,19 @@ static int bpf_skb_net_shrink(struct sk_buff
>>>>> *skb, u32 off, u32 len_diff,
>>>>>       if (unlikely(ret < 0))
>>>>>               return ret;
>>>>
>>>>> +     ip.hdr = skb_inner_network_header(skb);
>>>>> +     if (ip.v4->version == 4)
>>>>> +             proto = htons(ETH_P_IP);
>>>>> +     else
>>>>> +             proto = htons(ETH_P_IPV6);
>>>>> +
>>>>>       ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
>>>>>       if (unlikely(ret < 0))
>>>>>               return ret;
>>>>
>>>>> +     /* Match skb->protocol to new outer l3 protocol */
>>>>> +     skb->protocol = proto;
>>>>> +
>>>>>       if (skb_is_gso(skb)) {
>>>>>               struct skb_shared_info *shinfo = skb_shinfo(skb);
>>>>
>>>>> @@ -3578,10 +3593,14 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *,
>>>>> skb, s32, len_diff,
>>>>>          u32, mode, u64, flags)
>>>>>   {
>>>>>       u32 len_cur, len_diff_abs = abs(len_diff);
>>>>> -     u32 len_min = bpf_skb_net_base_len(skb);
>>>>> -     u32 len_max = BPF_SKB_MAX_LEN;
>>>>> +     u32 len_min, len_max = BPF_SKB_MAX_LEN;
>>>>>       __be16 proto = skb->protocol;
>>>>>       bool shrink = len_diff < 0;
>>>>> +     union {
>>>>> +             struct iphdr *v4;
>>>>> +             struct ipv6hdr *v6;
>>>>> +             unsigned char *hdr;
>>>>> +     } ip;
>>>>>       u32 off;
>>>>>       int ret;
>>>>
>>>>> @@ -3594,6 +3613,9 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *,
>>>>> skb, s32, len_diff,
>>>>>                    proto != htons(ETH_P_IPV6)))
>>>>>               return -ENOTSUPP;
>>>>
>>>>> +     if (unlikely(shrink && !skb->encapsulation))
>>>>> +             return -ENOTSUPP;
>>>>> +
>>>
>>> This new restriction might break existing users.
>>>
>>> There is no pre-existing requirement that shrink is used solely with
>>> packets encapsulated by the protocol stack.
>>>
>>> Indeed, skb->encapsulation is likely not set on packets arriving from
>>> the wire, even if encapsulated. Referring to your comment "Main use
>>> case is for using cls_bpf on ingress hook to decapsulate"
>>>
>>> Can a combination of the existing bpf_skb_adjust_room and
>>> bpf_skb_change_proto address your problem?
>>
>> Hello Willem,
>>
>> I think combination bpf_skb_adjust_room and bpf_skb_change_proto can not
>> address my problem.
>>
>> Now, bpf_skb_adjust_room() would fail for "len_cur - len_diff_abs < len_min"
>> when decap ipip6 packet, because "len_min" should be sizeof(struct iphdr)
>> but not sizeof(struct ipv6hdr).
>>
>> We can remove skb->encapsulation restriction and parse outer and inner IP
>> header to determine ipip6 and ip6ip packets. As following:
> 
> Adding logic for network layer protocol conversion like this looks
> good to me. bpf_skb_adjust_room already has a few other metadata
> quirks.
> 
> But like those, let's make this intent explicit: define a new flag
> that requests this behavior.

Hello Willem,

I think you prefer like this:

--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2644,6 +2644,11 @@ union bpf_attr {
  *               Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
  *               L2 type as Ethernet.
  *
+ *              * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**,
+ *                **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**:
+ *                Indicates the new IP header version after decapsulate the
+ *                outer IP header.
+ *
  *             A call to this helper is susceptible to change the underlying
  *             packet buffer. Therefore, at load time, all checks on pointers
  *             previously done by the verifier are invalidated and must be
@@ -5803,6 +5808,8 @@ enum {
        BPF_F_ADJ_ROOM_ENCAP_L4_UDP     = (1ULL << 4),
        BPF_F_ADJ_ROOM_NO_CSUM_RESET    = (1ULL << 5),
        BPF_F_ADJ_ROOM_ENCAP_L2_ETH     = (1ULL << 6),
+       BPF_F_ADJ_ROOM_DECAP_L3_IPV4    = (1ULL << 7),
+       BPF_F_ADJ_ROOM_DECAP_L3_IPV6    = (1ULL << 8),
 };

 enum {
diff --git a/net/core/filter.c b/net/core/filter.c
index 43cc1fe58a2c..0bbe5e67337c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3381,13 +3381,17 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
 #define BPF_F_ADJ_ROOM_ENCAP_L3_MASK   (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \
                                         BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)

+#define BPF_F_ADJ_ROOM_DECAP_L3_MASK   (BPF_F_ADJ_ROOM_DECAP_L3_IPV4 | \
+                                        BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
+
 #define BPF_F_ADJ_ROOM_MASK            (BPF_F_ADJ_ROOM_FIXED_GSO | \
                                         BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
                                         BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
                                         BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
                                         BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
                                         BPF_F_ADJ_ROOM_ENCAP_L2( \
-                                         BPF_ADJ_ROOM_ENCAP_L2_MASK))
+                                         BPF_ADJ_ROOM_ENCAP_L2_MASK) | \
+                                        BPF_F_ADJ_ROOM_DECAP_L3_MASK)

 static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
                            u64 flags)
@@ -3501,6 +3505,7 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
        int ret;

        if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
+                              BPF_F_ADJ_ROOM_DECAP_L3_MASK |
                               BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
                return -EINVAL;

@@ -3519,6 +3524,14 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
        if (unlikely(ret < 0))
                return ret;

+       /* Match skb->protocol to new outer l3 protocol */
+       if (skb->protocol == htons(ETH_P_IP) &&
+           flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
+               skb->protocol = htons(ETH_P_IPV6);
+       else if (skb->protocol == htons(ETH_P_IPV6) &&
+                flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4)
+               skb->protocol = htons(ETH_P_IP);
+
        if (skb_is_gso(skb)) {
                struct skb_shared_info *shinfo = skb_shinfo(skb);

@@ -3597,6 +3610,10 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
                     proto != htons(ETH_P_IPV6)))
                return -ENOTSUPP;

+       if (unlikely(shrink && flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4 &&
+                    flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6))
+               return -EINVAL;
+
        off = skb_mac_header_len(skb);
        switch (mode) {
        case BPF_ADJ_ROOM_NET:
@@ -3608,6 +3625,16 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
                return -ENOTSUPP;
        }

+       if (shrink) {
+               if (proto == htons(ETH_P_IP) &&
+                   flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6) {
+                       len_min = sizeof(struct ipv6hdr);
+               } else if (proto == htons(ETH_P_IPV6) &&
+                          flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4) {
+                       len_min = sizeof(struct iphdr);
+               }
+       }
+
        len_cur = skb->len - skb_network_offset(skb);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 464ca3f01fe7..041361bc6ccf 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2644,6 +2644,11 @@ union bpf_attr {
  *               Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
  *               L2 type as Ethernet.
  *
+ *              * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**,
+ *                **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**:
+ *                Indicates the new IP header version after decapsulate the
+ *                outer IP header.
+ *
  *             A call to this helper is susceptible to change the underlying
  *             packet buffer. Therefore, at load time, all checks on pointers
  *             previously done by the verifier are invalidated and must be
@@ -5803,6 +5808,8 @@ enum {
        BPF_F_ADJ_ROOM_ENCAP_L4_UDP     = (1ULL << 4),
        BPF_F_ADJ_ROOM_NO_CSUM_RESET    = (1ULL << 5),
        BPF_F_ADJ_ROOM_ENCAP_L2_ETH     = (1ULL << 6),
+       BPF_F_ADJ_ROOM_DECAP_L3_IPV4    = (1ULL << 7),
+       BPF_F_ADJ_ROOM_DECAP_L3_IPV6    = (1ULL << 8),
 };

 enum {

Thank you!

> 
> Let's avoid introducing a new union. Just use check (ip_hdr(skb)->version == 4).
> 
>>
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>> @@ -3498,6 +3498,12 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
>>  static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
>>                               u64 flags)
>>  {
>> +       union {
>> +               struct iphdr *v4;
>> +               struct ipv6hdr *v6;
>> +               unsigned char *hdr;
>> +       } ip;
>> +       __be16 proto;
>>         int ret;
>>
>>         if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
>> @@ -3515,10 +3521,23 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
>>         if (unlikely(ret < 0))
>>                 return ret;
>>
>> +       ip.hdr = skb_network_header(skb);
>> +       if (ip.v4->version == 4) {
>> +               if (ip.v4->protocol == IPPROTO_IPV6)
>> +                       proto = htons(ETH_P_IPV6);
>> +       } else {
>> +               struct ipv6_opt_hdr *opt_hdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + sizeof(struct ipv6hdr));
>> +               if (ip.v6->nexthdr == NEXTHDR_DEST && opt_hdr->nexthdr == NEXTHDR_IPV4)
>> +                       proto = htons(ETH_P_IP);
>> +       }
>> +
>>         ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
>>         if (unlikely(ret < 0))
>>                 return ret;
>>
>> +       /* Match skb->protocol to new outer l3 protocol */
>> +       skb->protocol = proto;
>> +
>>         if (skb_is_gso(skb)) {
>>                 struct skb_shared_info *shinfo = skb_shinfo(skb);
>>
>> @@ -3585,6 +3604,11 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
>>         u32 len_max = BPF_SKB_MAX_LEN;
>>         __be16 proto = skb->protocol;
>>         bool shrink = len_diff < 0;
>> +       union {
>> +               struct iphdr *v4;
>> +               struct ipv6hdr *v6;
>> +               unsigned char *hdr;
>> +       } ip;
>>         u32 off;
>>         int ret;
>>
>> @@ -3608,6 +3632,19 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
>>                 return -ENOTSUPP;
>>         }
>>
>> +       if (shrink) {
>> +               ip.hdr = skb_network_header(skb);
>> +               if (ip.v4->version == 4) {
>> +                       if (ip.v4->protocol == IPPROTO_IPV6)
>> +                               len_min = sizeof(struct ipv6hdr);
>> +               } else {
>> +                       struct ipv6_opt_hdr *opt_hdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + sizeof(struct ipv6hdr));
>> +                       if (ip.v6->nexthdr == NEXTHDR_DEST && opt_hdr->nexthdr == NEXTHDR_IPV4) {
>> +                               len_min = sizeof(struct iphdr);
>> +                       }
>> +               }
>> +       }
>> +
>>         len_cur = skb->len - skb_network_offset(skb);
>>
>>
>> Look forward to your comments and suggestions.
>>
>> Thank you!
>>
>>>
>>>>>       off = skb_mac_header_len(skb);
>>>>>       switch (mode) {
>>>>>       case BPF_ADJ_ROOM_NET:
>>>>> @@ -3605,6 +3627,14 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *,
>>>>> skb, s32, len_diff,
>>>>>               return -ENOTSUPP;
>>>>>       }
>>>>
>>>>> +     if (shrink) {
>>>>> +             ip.hdr = skb_inner_network_header(skb);
>>>>> +             if (ip.v4->version == 4)
>>>>> +                     len_min = sizeof(struct iphdr);
>>>>> +             else
>>>>> +                     len_min = sizeof(struct ipv6hdr);
>>>>> +     }
>>>>> +
>>>>>       len_cur = skb->len - skb_network_offset(skb);
>>>>>       if ((shrink && (len_diff_abs >= len_cur ||
>>>>>                       len_cur - len_diff_abs < len_min)) ||
>>>>> --
>>>>> 2.25.1
>>>>
>>> .
>>>
> .
> 

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: Add ipip6 and ip6ip decap support for bpf_skb_adjust_room()
  2023-01-10 13:07           ` Ziyang Xuan (William)
@ 2023-01-10 14:09             ` Willem de Bruijn
  2023-01-11  2:37               ` Ziyang Xuan (William)
  0 siblings, 1 reply; 10+ messages in thread
From: Willem de Bruijn @ 2023-01-10 14:09 UTC (permalink / raw)
  To: Ziyang Xuan (William)
  Cc: ast, daniel, andrii, davem, edumazet, kuba, pabeni, bpf, netdev,
	martin.lau, song, yhs, john.fastabend, kpsingh, haoluo, jolsa,
	sdf

> I think you prefer like this:

Yes, this looks good to me. A few comments inline.

> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2644,6 +2644,11 @@ union bpf_attr {
>   *               Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
>   *               L2 type as Ethernet.
>   *
> + *              * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**,
> + *                **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**:
> + *                Indicates the new IP header version after decapsulate the
> + *                outer IP header.
> + *
>   *             A call to this helper is susceptible to change the underlying
>   *             packet buffer. Therefore, at load time, all checks on pointers
>   *             previously done by the verifier are invalidated and must be
> @@ -5803,6 +5808,8 @@ enum {
>         BPF_F_ADJ_ROOM_ENCAP_L4_UDP     = (1ULL << 4),
>         BPF_F_ADJ_ROOM_NO_CSUM_RESET    = (1ULL << 5),
>         BPF_F_ADJ_ROOM_ENCAP_L2_ETH     = (1ULL << 6),
> +       BPF_F_ADJ_ROOM_DECAP_L3_IPV4    = (1ULL << 7),
> +       BPF_F_ADJ_ROOM_DECAP_L3_IPV6    = (1ULL << 8),
>  };
>
>  enum {
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 43cc1fe58a2c..0bbe5e67337c 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -3381,13 +3381,17 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
>  #define BPF_F_ADJ_ROOM_ENCAP_L3_MASK   (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \
>                                          BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
>
> +#define BPF_F_ADJ_ROOM_DECAP_L3_MASK   (BPF_F_ADJ_ROOM_DECAP_L3_IPV4 | \
> +                                        BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
> +
>  #define BPF_F_ADJ_ROOM_MASK            (BPF_F_ADJ_ROOM_FIXED_GSO | \
>                                          BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
>                                          BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
>                                          BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
>                                          BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
>                                          BPF_F_ADJ_ROOM_ENCAP_L2( \
> -                                         BPF_ADJ_ROOM_ENCAP_L2_MASK))
> +                                         BPF_ADJ_ROOM_ENCAP_L2_MASK) | \
> +                                        BPF_F_ADJ_ROOM_DECAP_L3_MASK)
>
>  static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
>                             u64 flags)
> @@ -3501,6 +3505,7 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
>         int ret;
>
>         if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
> +                              BPF_F_ADJ_ROOM_DECAP_L3_MASK |
>                                BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
>                 return -EINVAL;
>
> @@ -3519,6 +3524,14 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
>         if (unlikely(ret < 0))
>                 return ret;
>
> +       /* Match skb->protocol to new outer l3 protocol */
> +       if (skb->protocol == htons(ETH_P_IP) &&
> +           flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
> +               skb->protocol = htons(ETH_P_IPV6);
> +       else if (skb->protocol == htons(ETH_P_IPV6) &&
> +                flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4)
> +               skb->protocol = htons(ETH_P_IP);
> +
>         if (skb_is_gso(skb)) {
>                 struct skb_shared_info *shinfo = skb_shinfo(skb);
>
> @@ -3597,6 +3610,10 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
>                      proto != htons(ETH_P_IPV6)))
>                 return -ENOTSUPP;
>
> +       if (unlikely(shrink && flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4 &&
> +                    flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6))
> +               return -EINVAL;
> +

parentheses and can use mask:

  if (shrink && (flags & .._MASK == .._MASK)

also should fail if the flags are passed but shrink is false.

>         off = skb_mac_header_len(skb);
>         switch (mode) {
>         case BPF_ADJ_ROOM_NET:
> @@ -3608,6 +3625,16 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
>                 return -ENOTSUPP;
>         }
>
> +       if (shrink) {
> +               if (proto == htons(ETH_P_IP) &&
> +                   flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6) {
> +                       len_min = sizeof(struct ipv6hdr);
> +               } else if (proto == htons(ETH_P_IPV6) &&
> +                          flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4) {
> +                       len_min = sizeof(struct iphdr);
> +               }
> +       }
> +

No need to test proto first?

>         len_cur = skb->len - skb_network_offset(skb);
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 464ca3f01fe7..041361bc6ccf 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -2644,6 +2644,11 @@ union bpf_attr {
>   *               Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
>   *               L2 type as Ethernet.
>   *
> + *              * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**,
> + *                **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**:
> + *                Indicates the new IP header version after decapsulate the
> + *                outer IP header.
> + *
>   *             A call to this helper is susceptible to change the underlying
>   *             packet buffer. Therefore, at load time, all checks on pointers
>   *             previously done by the verifier are invalidated and must be
> @@ -5803,6 +5808,8 @@ enum {
>         BPF_F_ADJ_ROOM_ENCAP_L4_UDP     = (1ULL << 4),
>         BPF_F_ADJ_ROOM_NO_CSUM_RESET    = (1ULL << 5),
>         BPF_F_ADJ_ROOM_ENCAP_L2_ETH     = (1ULL << 6),
> +       BPF_F_ADJ_ROOM_DECAP_L3_IPV4    = (1ULL << 7),
> +       BPF_F_ADJ_ROOM_DECAP_L3_IPV6    = (1ULL << 8),
>  };
>
>  enum {
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: Add ipip6 and ip6ip decap support for bpf_skb_adjust_room()
  2023-01-10 14:09             ` Willem de Bruijn
@ 2023-01-11  2:37               ` Ziyang Xuan (William)
  0 siblings, 0 replies; 10+ messages in thread
From: Ziyang Xuan (William) @ 2023-01-11  2:37 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: ast, daniel, andrii, davem, edumazet, kuba, pabeni, bpf, netdev,
	martin.lau, song, yhs, john.fastabend, kpsingh, haoluo, jolsa,
	sdf

>> I think you prefer like this:
> 
> Yes, this looks good to me. A few comments inline.
> 
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -2644,6 +2644,11 @@ union bpf_attr {
>>   *               Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
>>   *               L2 type as Ethernet.
>>   *
>> + *              * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**,
>> + *                **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**:
>> + *                Indicates the new IP header version after decapsulate the
>> + *                outer IP header.
>> + *
>>   *             A call to this helper is susceptible to change the underlying
>>   *             packet buffer. Therefore, at load time, all checks on pointers
>>   *             previously done by the verifier are invalidated and must be
>> @@ -5803,6 +5808,8 @@ enum {
>>         BPF_F_ADJ_ROOM_ENCAP_L4_UDP     = (1ULL << 4),
>>         BPF_F_ADJ_ROOM_NO_CSUM_RESET    = (1ULL << 5),
>>         BPF_F_ADJ_ROOM_ENCAP_L2_ETH     = (1ULL << 6),
>> +       BPF_F_ADJ_ROOM_DECAP_L3_IPV4    = (1ULL << 7),
>> +       BPF_F_ADJ_ROOM_DECAP_L3_IPV6    = (1ULL << 8),
>>  };
>>
>>  enum {
>> diff --git a/net/core/filter.c b/net/core/filter.c
>> index 43cc1fe58a2c..0bbe5e67337c 100644
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>> @@ -3381,13 +3381,17 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
>>  #define BPF_F_ADJ_ROOM_ENCAP_L3_MASK   (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \
>>                                          BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
>>
>> +#define BPF_F_ADJ_ROOM_DECAP_L3_MASK   (BPF_F_ADJ_ROOM_DECAP_L3_IPV4 | \
>> +                                        BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
>> +
>>  #define BPF_F_ADJ_ROOM_MASK            (BPF_F_ADJ_ROOM_FIXED_GSO | \
>>                                          BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
>>                                          BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
>>                                          BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
>>                                          BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
>>                                          BPF_F_ADJ_ROOM_ENCAP_L2( \
>> -                                         BPF_ADJ_ROOM_ENCAP_L2_MASK))
>> +                                         BPF_ADJ_ROOM_ENCAP_L2_MASK) | \
>> +                                        BPF_F_ADJ_ROOM_DECAP_L3_MASK)
>>
>>  static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
>>                             u64 flags)
>> @@ -3501,6 +3505,7 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
>>         int ret;
>>
>>         if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
>> +                              BPF_F_ADJ_ROOM_DECAP_L3_MASK |
>>                                BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
>>                 return -EINVAL;
>>
>> @@ -3519,6 +3524,14 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
>>         if (unlikely(ret < 0))
>>                 return ret;
>>
>> +       /* Match skb->protocol to new outer l3 protocol */
>> +       if (skb->protocol == htons(ETH_P_IP) &&
>> +           flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
>> +               skb->protocol = htons(ETH_P_IPV6);
>> +       else if (skb->protocol == htons(ETH_P_IPV6) &&
>> +                flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4)
>> +               skb->protocol = htons(ETH_P_IP);
>> +
>>         if (skb_is_gso(skb)) {
>>                 struct skb_shared_info *shinfo = skb_shinfo(skb);
>>
>> @@ -3597,6 +3610,10 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
>>                      proto != htons(ETH_P_IPV6)))
>>                 return -ENOTSUPP;
>>
>> +       if (unlikely(shrink && flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4 &&
>> +                    flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6))
>> +               return -EINVAL;
>> +
> 
> parentheses and can use mask:
> 
>   if (shrink && (flags & .._MASK == .._MASK)
> 
> also should fail if the flags are passed but shrink is false.

Thank you for your valuable comments!

> 
>>         off = skb_mac_header_len(skb);
>>         switch (mode) {
>>         case BPF_ADJ_ROOM_NET:
>> @@ -3608,6 +3625,16 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
>>                 return -ENOTSUPP;
>>         }
>>
>> +       if (shrink) {
>> +               if (proto == htons(ETH_P_IP) &&
>> +                   flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6) {
>> +                       len_min = sizeof(struct ipv6hdr);
>> +               } else if (proto == htons(ETH_P_IPV6) &&
>> +                          flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4) {
>> +                       len_min = sizeof(struct iphdr);
>> +               }
>> +       }
>> +
> 
> No need to test proto first?

Indeed, I will remove them in patch v2.

> 
>>         len_cur = skb->len - skb_network_offset(skb);
>> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
>> index 464ca3f01fe7..041361bc6ccf 100644
>> --- a/tools/include/uapi/linux/bpf.h
>> +++ b/tools/include/uapi/linux/bpf.h
>> @@ -2644,6 +2644,11 @@ union bpf_attr {
>>   *               Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
>>   *               L2 type as Ethernet.
>>   *
>> + *              * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**,
>> + *                **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**:
>> + *                Indicates the new IP header version after decapsulate the
>> + *                outer IP header.
>> + *
>>   *             A call to this helper is susceptible to change the underlying
>>   *             packet buffer. Therefore, at load time, all checks on pointers
>>   *             previously done by the verifier are invalidated and must be
>> @@ -5803,6 +5808,8 @@ enum {
>>         BPF_F_ADJ_ROOM_ENCAP_L4_UDP     = (1ULL << 4),
>>         BPF_F_ADJ_ROOM_NO_CSUM_RESET    = (1ULL << 5),
>>         BPF_F_ADJ_ROOM_ENCAP_L2_ETH     = (1ULL << 6),
>> +       BPF_F_ADJ_ROOM_DECAP_L3_IPV4    = (1ULL << 7),
>> +       BPF_F_ADJ_ROOM_DECAP_L3_IPV6    = (1ULL << 8),
>>  };
>>
>>  enum {
>>
> .
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2023-01-11  2:37 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-06  3:55 [PATCH bpf-next 0/2] bpf: Add ipip6 and ip6ip decap support for bpf_skb_adjust_room() Ziyang Xuan
2023-01-06  3:55 ` [PATCH bpf-next 1/2] " Ziyang Xuan
2023-01-06 19:55   ` sdf
2023-01-08 19:16     ` Willem de Bruijn
2023-01-09  9:19       ` Ziyang Xuan (William)
2023-01-09 13:32         ` Willem de Bruijn
2023-01-10 13:07           ` Ziyang Xuan (William)
2023-01-10 14:09             ` Willem de Bruijn
2023-01-11  2:37               ` Ziyang Xuan (William)
2023-01-06  3:55 ` [PATCH bpf-next 2/2] selftests/bpf: add ipip6 and ip6ip decap to test_tc_tunnel Ziyang Xuan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).