All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf-next] bpf: fix memory leak in bpf_lwt_xmit_reroute
@ 2019-02-14  6:09 Peter Oskolkov
  2019-02-14 18:11 ` David Ahern
  0 siblings, 1 reply; 12+ messages in thread
From: Peter Oskolkov @ 2019-02-14  6:09 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, netdev
  Cc: Peter Oskolkov, David Ahern, Willem de Bruijn, Peter Oskolkov

On error the skb should be freed. Tested with diff/steps
provided by David Ahern.

Reported-by: David Ahern <dsahern@gmail.com>
Fixes: 3bd0b15281af ("bpf: add handling of BPF_LWT_REROUTE to lwt_bpf.c")
Signed-off-by: Peter Oskolkov <posk@google.com>
---
 net/core/lwt_bpf.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 32251f3fcda0..f3273cbb6b22 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -179,18 +179,19 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
 	struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev);
 	int oif = l3mdev ? l3mdev->ifindex : 0;
 	struct dst_entry *dst = NULL;
+	int err = -EAFNOSUPPORT;
 	struct sock *sk;
 	struct net *net;
 	bool ipv4;
-	int err;
 
 	if (skb->protocol == htons(ETH_P_IP))
 		ipv4 = true;
 	else if (skb->protocol == htons(ETH_P_IPV6))
 		ipv4 = false;
 	else
-		return -EAFNOSUPPORT;
+		goto err;
 
+	err = -EINVAL;
 	sk = sk_to_full_sk(skb->sk);
 	if (sk) {
 		if (sk->sk_bound_dev_if)
@@ -216,7 +217,7 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
 
 		rt = ip_route_output_key(net, &fl4);
 		if (IS_ERR(rt))
-			return -EINVAL;
+			goto err;
 		dst = &rt->dst;
 	} else {
 		struct ipv6hdr *iph6 = ipv6_hdr(skb);
@@ -231,12 +232,15 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
 		fl6.saddr = iph6->saddr;
 
 		err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6);
-		if (err || IS_ERR(dst))
-			return -EINVAL;
+		if (err || IS_ERR(dst)) {
+			err = -EINVAL;
+			goto err;
+		}
 	}
 	if (unlikely(dst->error)) {
 		dst_release(dst);
-		return -EINVAL;
+		err = -EINVAL;
+		goto err;
 	}
 
 	/* Although skb header was reserved in bpf_lwt_push_ip_encap(), it
@@ -246,17 +250,21 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
 	 */
 	err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
 	if (unlikely(err))
-		return err;
+		goto err;
 
 	skb_dst_drop(skb);
 	skb_dst_set(skb, dst);
 
 	err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb);
 	if (unlikely(err))
-		return err;
+		goto err;
 
 	/* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */
 	return LWTUNNEL_XMIT_DONE;
+
+err:
+	kfree_skb(skb);
+	return err;
 }
 
 static int bpf_xmit(struct sk_buff *skb)
-- 
2.21.0.rc0.258.g878e2cd30e-goog


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH bpf-next] bpf: fix memory leak in bpf_lwt_xmit_reroute
  2019-02-14  6:09 [PATCH bpf-next] bpf: fix memory leak in bpf_lwt_xmit_reroute Peter Oskolkov
@ 2019-02-14 18:11 ` David Ahern
  2019-02-14 18:42   ` Peter Oskolkov
  0 siblings, 1 reply; 12+ messages in thread
From: David Ahern @ 2019-02-14 18:11 UTC (permalink / raw)
  To: Peter Oskolkov, Alexei Starovoitov, Daniel Borkmann, netdev
  Cc: Peter Oskolkov, Willem de Bruijn

On 2/13/19 11:09 PM, Peter Oskolkov wrote:
> On error the skb should be freed. Tested with diff/steps
> provided by David Ahern.
> 
> Reported-by: David Ahern <dsahern@gmail.com>
> Fixes: 3bd0b15281af ("bpf: add handling of BPF_LWT_REROUTE to lwt_bpf.c")
> Signed-off-by: Peter Oskolkov <posk@google.com>
> ---
>  net/core/lwt_bpf.c | 24 ++++++++++++++++--------
>  1 file changed, 16 insertions(+), 8 deletions(-)
> 
> diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
> index 32251f3fcda0..f3273cbb6b22 100644
> --- a/net/core/lwt_bpf.c
> +++ b/net/core/lwt_bpf.c
> @@ -179,18 +179,19 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
>  	struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev);
>  	int oif = l3mdev ? l3mdev->ifindex : 0;
>  	struct dst_entry *dst = NULL;
> +	int err = -EAFNOSUPPORT;
>  	struct sock *sk;
>  	struct net *net;
>  	bool ipv4;
> -	int err;
>  
>  	if (skb->protocol == htons(ETH_P_IP))
>  		ipv4 = true;
>  	else if (skb->protocol == htons(ETH_P_IPV6))
>  		ipv4 = false;
>  	else
> -		return -EAFNOSUPPORT;
> +		goto err;
>  
> +	err = -EINVAL;
>  	sk = sk_to_full_sk(skb->sk);
>  	if (sk) {
>  		if (sk->sk_bound_dev_if)
> @@ -216,7 +217,7 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
>  
>  		rt = ip_route_output_key(net, &fl4);
>  		if (IS_ERR(rt))
> -			return -EINVAL;
> +			goto err;
>  		dst = &rt->dst;
>  	} else {
>  		struct ipv6hdr *iph6 = ipv6_hdr(skb);
> @@ -231,12 +232,15 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
>  		fl6.saddr = iph6->saddr;
>  
>  		err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6);
> -		if (err || IS_ERR(dst))
> -			return -EINVAL;
> +		if (err || IS_ERR(dst)) {
> +			err = -EINVAL;
> +			goto err;
> +		}
>  	}
>  	if (unlikely(dst->error)) {
>  		dst_release(dst);
> -		return -EINVAL;
> +		err = -EINVAL;
> +		goto err;
>  	}
>  
>  	/* Although skb header was reserved in bpf_lwt_push_ip_encap(), it

EINVAL is a confusing return code; it is not an EINVAL problem, it is a
routing problem:

...
starting egress IPv4 encap test
ping: sendmsg: Invalid argument
FAIL: test_ping: 1


Versus returning the error from the lookup:
...
starting egress IPv4 encap test
ping: sendmsg: No route to host
FAIL: test_ping: 1


diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index f3273cbb6b22..a1901ba319fc 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -191,7 +191,6 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
        else
                goto err;

-       err = -EINVAL;
        sk = sk_to_full_sk(skb->sk);
        if (sk) {
                if (sk->sk_bound_dev_if)
@@ -216,8 +215,10 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
                fl4.saddr = iph->saddr;

                rt = ip_route_output_key(net, &fl4);
-               if (IS_ERR(rt))
+               if (IS_ERR(rt)) {
+                       err = PTR_ERR(rt);
                        goto err;
+               }
                dst = &rt->dst;
        } else {
                struct ipv6hdr *iph6 = ipv6_hdr(skb);
@@ -232,14 +233,12 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
                fl6.saddr = iph6->saddr;

                err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6);
-               if (err || IS_ERR(dst)) {
-                       err = -EINVAL;
+               if (err || IS_ERR(dst))
                        goto err;
-               }
        }
        if (unlikely(dst->error)) {
                dst_release(dst);
-               err = -EINVAL;
+               err = dst->error;
                goto err;
        }




> @@ -246,17 +250,21 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
>  	 */
>  	err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
>  	if (unlikely(err))
> -		return err;
> +		goto err;
>  
>  	skb_dst_drop(skb);
>  	skb_dst_set(skb, dst);
>  
>  	err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb);
>  	if (unlikely(err))
> -		return err;
> +		goto err;
>  
>  	/* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */
>  	return LWTUNNEL_XMIT_DONE;
> +
> +err:
> +	kfree_skb(skb);
> +	return err;
>  }
>  
>  static int bpf_xmit(struct sk_buff *skb)
> 

I figured it was a leaked skb.

Also, the test script needs to be updated as well with the negative
tests -- ie., toggle the route from a dev/gateway to a reject
(e.g.,unreachable) and back.

Also, don't exit on the first failure - run all of them.

Having the result line up is more user friendly. e.g.,

# ./fib_tests.sh

Single path route test
    Start point
    TEST: IPv4 fibmatch                                     [ OK ]
    TEST: IPv6 fibmatch                                     [ OK ]
    Nexthop device deleted
    TEST: IPv4 fibmatch - no route                          [ OK ]
    TEST: IPv6 fibmatch - no route                          [ OK ]
...

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH bpf-next] bpf: fix memory leak in bpf_lwt_xmit_reroute
  2019-02-14 18:11 ` David Ahern
@ 2019-02-14 18:42   ` Peter Oskolkov
  2019-02-14 19:10     ` David Ahern
  0 siblings, 1 reply; 12+ messages in thread
From: Peter Oskolkov @ 2019-02-14 18:42 UTC (permalink / raw)
  To: David Ahern
  Cc: Peter Oskolkov, Alexei Starovoitov, Daniel Borkmann, netdev,
	Willem de Bruijn

On Thu, Feb 14, 2019 at 10:11 AM David Ahern <dsahern@gmail.com> wrote:
>
> On 2/13/19 11:09 PM, Peter Oskolkov wrote:
> > On error the skb should be freed. Tested with diff/steps
> > provided by David Ahern.
> >
> > Reported-by: David Ahern <dsahern@gmail.com>
> > Fixes: 3bd0b15281af ("bpf: add handling of BPF_LWT_REROUTE to lwt_bpf.c")
> > Signed-off-by: Peter Oskolkov <posk@google.com>
> > ---
> >  net/core/lwt_bpf.c | 24 ++++++++++++++++--------
> >  1 file changed, 16 insertions(+), 8 deletions(-)
> >
> > diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
> > index 32251f3fcda0..f3273cbb6b22 100644
> > --- a/net/core/lwt_bpf.c
> > +++ b/net/core/lwt_bpf.c
> > @@ -179,18 +179,19 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
> >       struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev);
> >       int oif = l3mdev ? l3mdev->ifindex : 0;
> >       struct dst_entry *dst = NULL;
> > +     int err = -EAFNOSUPPORT;
> >       struct sock *sk;
> >       struct net *net;
> >       bool ipv4;
> > -     int err;
> >
> >       if (skb->protocol == htons(ETH_P_IP))
> >               ipv4 = true;
> >       else if (skb->protocol == htons(ETH_P_IPV6))
> >               ipv4 = false;
> >       else
> > -             return -EAFNOSUPPORT;
> > +             goto err;
> >
> > +     err = -EINVAL;
> >       sk = sk_to_full_sk(skb->sk);
> >       if (sk) {
> >               if (sk->sk_bound_dev_if)
> > @@ -216,7 +217,7 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
> >
> >               rt = ip_route_output_key(net, &fl4);
> >               if (IS_ERR(rt))
> > -                     return -EINVAL;
> > +                     goto err;
> >               dst = &rt->dst;
> >       } else {
> >               struct ipv6hdr *iph6 = ipv6_hdr(skb);
> > @@ -231,12 +232,15 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
> >               fl6.saddr = iph6->saddr;
> >
> >               err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6);
> > -             if (err || IS_ERR(dst))
> > -                     return -EINVAL;
> > +             if (err || IS_ERR(dst)) {
> > +                     err = -EINVAL;
> > +                     goto err;
> > +             }
> >       }
> >       if (unlikely(dst->error)) {
> >               dst_release(dst);
> > -             return -EINVAL;
> > +             err = -EINVAL;
> > +             goto err;
> >       }
> >
> >       /* Although skb header was reserved in bpf_lwt_push_ip_encap(), it
>
> EINVAL is a confusing return code; it is not an EINVAL problem, it is a
> routing problem:

Thanks, David! Sent a v2 of the patch.

>
> ...
> starting egress IPv4 encap test
> ping: sendmsg: Invalid argument
> FAIL: test_ping: 1
>
>
> Versus returning the error from the lookup:
> ...
> starting egress IPv4 encap test
> ping: sendmsg: No route to host
> FAIL: test_ping: 1
>
>
> diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
> index f3273cbb6b22..a1901ba319fc 100644
> --- a/net/core/lwt_bpf.c
> +++ b/net/core/lwt_bpf.c
> @@ -191,7 +191,6 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
>         else
>                 goto err;
>
> -       err = -EINVAL;
>         sk = sk_to_full_sk(skb->sk);
>         if (sk) {
>                 if (sk->sk_bound_dev_if)
> @@ -216,8 +215,10 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
>                 fl4.saddr = iph->saddr;
>
>                 rt = ip_route_output_key(net, &fl4);
> -               if (IS_ERR(rt))
> +               if (IS_ERR(rt)) {
> +                       err = PTR_ERR(rt);
>                         goto err;
> +               }
>                 dst = &rt->dst;
>         } else {
>                 struct ipv6hdr *iph6 = ipv6_hdr(skb);
> @@ -232,14 +233,12 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
>                 fl6.saddr = iph6->saddr;
>
>                 err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6);
> -               if (err || IS_ERR(dst)) {
> -                       err = -EINVAL;
> +               if (err || IS_ERR(dst))
>                         goto err;
> -               }
>         }
>         if (unlikely(dst->error)) {
>                 dst_release(dst);
> -               err = -EINVAL;
> +               err = dst->error;
>                 goto err;
>         }
>
>
>
>
> > @@ -246,17 +250,21 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
> >        */
> >       err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
> >       if (unlikely(err))
> > -             return err;
> > +             goto err;
> >
> >       skb_dst_drop(skb);
> >       skb_dst_set(skb, dst);
> >
> >       err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb);
> >       if (unlikely(err))
> > -             return err;
> > +             goto err;
> >
> >       /* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */
> >       return LWTUNNEL_XMIT_DONE;
> > +
> > +err:
> > +     kfree_skb(skb);
> > +     return err;
> >  }
> >
> >  static int bpf_xmit(struct sk_buff *skb)
> >
>
> I figured it was a leaked skb.
>
> Also, the test script needs to be updated as well with the negative
> tests -- ie., toggle the route from a dev/gateway to a reject
> (e.g.,unreachable) and back.
>
> Also, don't exit on the first failure - run all of them.

I'll refactor the test as you suggest here
when I add VRF and GRO tests in a couple of weeks, if this is OK.

>
> Having the result line up is more user friendly. e.g.,
>
> # ./fib_tests.sh
>
> Single path route test
>     Start point
>     TEST: IPv4 fibmatch                                     [ OK ]
>     TEST: IPv6 fibmatch                                     [ OK ]
>     Nexthop device deleted
>     TEST: IPv4 fibmatch - no route                          [ OK ]
>     TEST: IPv6 fibmatch - no route                          [ OK ]
> ...

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH bpf-next] bpf: fix memory leak in bpf_lwt_xmit_reroute
  2019-02-14 18:42   ` Peter Oskolkov
@ 2019-02-14 19:10     ` David Ahern
  2019-02-14 19:28       ` Peter Oskolkov
       [not found]       ` <CAPNVh5f_+BdAvoYH6jY7eQ4c4C6qF4-=o0fZ8-nKvvxROuN17Q@mail.gmail.com>
  0 siblings, 2 replies; 12+ messages in thread
From: David Ahern @ 2019-02-14 19:10 UTC (permalink / raw)
  To: Peter Oskolkov
  Cc: Peter Oskolkov, Alexei Starovoitov, Daniel Borkmann, netdev,
	Willem de Bruijn

On 2/14/19 11:42 AM, Peter Oskolkov wrote:
> I'll refactor the test as you suggest here
> when I add VRF and GRO tests in a couple of weeks, if this is OK.

IMO, the tests should go in with the feature, not a release later. If we
are at -rc6 this week then you might get next week as well.

The unreachable toggle is a fairly quick integration. GRO really should
also get in the same cycle as the feature. Preferably VRF tests as well
since you have the commands.

The pretty printing cleanup can be done later.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH bpf-next] bpf: fix memory leak in bpf_lwt_xmit_reroute
  2019-02-14 19:10     ` David Ahern
@ 2019-02-14 19:28       ` Peter Oskolkov
       [not found]       ` <CAPNVh5f_+BdAvoYH6jY7eQ4c4C6qF4-=o0fZ8-nKvvxROuN17Q@mail.gmail.com>
  1 sibling, 0 replies; 12+ messages in thread
From: Peter Oskolkov @ 2019-02-14 19:28 UTC (permalink / raw)
  To: David Ahern
  Cc: Peter Oskolkov, Alexei Starovoitov, Daniel Borkmann, netdev,
	Willem de Bruijn

On Thu, Feb 14, 2019 at 11:10 AM David Ahern <dsahern@gmail.com> wrote:
>
> On 2/14/19 11:42 AM, Peter Oskolkov wrote:
> > I'll refactor the test as you suggest here
> > when I add VRF and GRO tests in a couple of weeks, if this is OK.
>
> IMO, the tests should go in with the feature, not a release later. If we
> are at -rc6 this week then you might get next week as well.
>
> The unreachable toggle is a fairly quick integration. GRO really should
> also get in the same cycle as the feature. Preferably VRF tests as well
> since you have the commands.

OK, I'll work on the negative tests and GRO first.

>
> The pretty printing cleanup can be done later.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH bpf-next] bpf: fix memory leak in bpf_lwt_xmit_reroute
       [not found]       ` <CAPNVh5f_+BdAvoYH6jY7eQ4c4C6qF4-=o0fZ8-nKvvxROuN17Q@mail.gmail.com>
@ 2019-03-02  2:27         ` David Ahern
  2019-03-04  2:54           ` Willem de Bruijn
  0 siblings, 1 reply; 12+ messages in thread
From: David Ahern @ 2019-03-02  2:27 UTC (permalink / raw)
  To: Peter Oskolkov
  Cc: Peter Oskolkov, Alexei Starovoitov, Daniel Borkmann, netdev,
	Willem de Bruijn

On 2/28/19 10:57 AM, Peter Oskolkov wrote:
> David: I'm not sure how to test GSO (I assume we are talking about GSO
> here) in
> the selftest: the encapping code sets SKB_GSO_DODGY flag, and veth does
> not support
> dodginess: "tx-gso-robust: off [fixed]".
> 
> If the "dodgy" flag is not set, then gso validation in dev.c passes, and
> large GSO packets
> happily go through; if the "dodgy" flag is set, "dodgy" GSO packets are
> rejected, TCP does
> segmentation, and non-GSO packets happily go through (with an mtu tweak
> to the LWT tunnel).
> 
> So I see three options:
> - add a sysctl to _not_ set SKB_GSO_DODGY flag in lwt_bpf.c =>
> handle_gso_type();
> - change veth to accept "dodgy" GSO packets
> - test the code "as is", meaning that GSO will be tried and disabled by
> TCP stack
> 
> Which approach would you prefer?
> 

definitely not a sysctl.

After that, I don't have a suggestion for GSO at the moment.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH bpf-next] bpf: fix memory leak in bpf_lwt_xmit_reroute
  2019-03-02  2:27         ` David Ahern
@ 2019-03-04  2:54           ` Willem de Bruijn
  2019-03-04  4:05             ` Willem de Bruijn
  2019-03-04 20:39             ` Peter Oskolkov
  0 siblings, 2 replies; 12+ messages in thread
From: Willem de Bruijn @ 2019-03-04  2:54 UTC (permalink / raw)
  To: David Ahern
  Cc: Peter Oskolkov, Peter Oskolkov, Alexei Starovoitov,
	Daniel Borkmann, netdev

On Fri, Mar 1, 2019 at 9:27 PM David Ahern <dsahern@gmail.com> wrote:
>
> On 2/28/19 10:57 AM, Peter Oskolkov wrote:
> > David: I'm not sure how to test GSO (I assume we are talking about GSO
> > here) in
> > the selftest: the encapping code sets SKB_GSO_DODGY flag, and veth does
> > not support
> > dodginess: "tx-gso-robust: off [fixed]".
> >
> > If the "dodgy" flag is not set, then gso validation in dev.c passes, and
> > large GSO packets
> > happily go through; if the "dodgy" flag is set, "dodgy" GSO packets are
> > rejected, TCP does
> > segmentation, and non-GSO packets happily go through (with an mtu tweak
> > to the LWT tunnel).

Very few devices unconditionally accept dodgy packets (only veth?).

A device that lacks the robust gso feature will cause a gso packet
with dodgy flag to enter software gso instead of passing to device
segmentation offload.

That should be perfect for checking that the packets can be segmented
correctly with the new header.

If the gso layer drops the packets, that is not due to dropping all
dodgy sources. It will be dropped somewhere else inside gso,
indication that something is not as expected with the packet.

> > So I see three options:
> > - add a sysctl to _not_ set SKB_GSO_DODGY flag in lwt_bpf.c =>
> > handle_gso_type();
> > - change veth to accept "dodgy" GSO packets

Neither, as these would bypass segmentation offload and pass the large
packet to the receive path. It is more interesting to validate the
packet in gso.

> > - test the code "as is", meaning that GSO will be tried and disabled by
> > TCP stack
> >
> > Which approach would you prefer?
> >
>
> definitely not a sysctl.
>
> After that, I don't have a suggestion for GSO at the moment.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH bpf-next] bpf: fix memory leak in bpf_lwt_xmit_reroute
  2019-03-04  2:54           ` Willem de Bruijn
@ 2019-03-04  4:05             ` Willem de Bruijn
  2019-03-04 20:39             ` Peter Oskolkov
  1 sibling, 0 replies; 12+ messages in thread
From: Willem de Bruijn @ 2019-03-04  4:05 UTC (permalink / raw)
  To: David Ahern
  Cc: Peter Oskolkov, Peter Oskolkov, Alexei Starovoitov,
	Daniel Borkmann, netdev

On Sun, Mar 3, 2019 at 9:55 PM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> On Fri, Mar 1, 2019 at 9:27 PM David Ahern <dsahern@gmail.com> wrote:
> >
> > On 2/28/19 10:57 AM, Peter Oskolkov wrote:
> > > David: I'm not sure how to test GSO (I assume we are talking about GSO
> > > here) in
> > > the selftest: the encapping code sets SKB_GSO_DODGY flag, and veth does
> > > not support
> > > dodginess: "tx-gso-robust: off [fixed]".
> > >
> > > If the "dodgy" flag is not set, then gso validation in dev.c passes, and
> > > large GSO packets
> > > happily go through; if the "dodgy" flag is set, "dodgy" GSO packets are
> > > rejected, TCP does
> > > segmentation, and non-GSO packets happily go through (with an mtu tweak
> > > to the LWT tunnel).
>
> Very few devices unconditionally accept dodgy packets (only veth?).

virtio-net, I meant. But there are a few other virtual devices, like
macvlan and xen-netfront

> A device that lacks the robust gso feature will cause a gso packet
> with dodgy flag to enter software gso instead of passing to device
> segmentation offload.
>
> That should be perfect for checking that the packets can be segmented
> correctly with the new header.
>
> If the gso layer drops the packets, that is not due to dropping all
> dodgy sources. It will be dropped somewhere else inside gso,
> indication that something is not as expected with the packet.
>
> > > So I see three options:
> > > - add a sysctl to _not_ set SKB_GSO_DODGY flag in lwt_bpf.c =>
> > > handle_gso_type();
> > > - change veth to accept "dodgy" GSO packets
>
> Neither, as these would bypass segmentation offload and pass the large
> packet to the receive path. It is more interesting to validate the
> packet in gso.

That said, one solution would be to make veth gso_robust configurable
through ethtool, by advertising it in hw_features.



> > > - test the code "as is", meaning that GSO will be tried and disabled by
> > > TCP stack
> > >
> > > Which approach would you prefer?
> > >
> >
> > definitely not a sysctl.
> >
> > After that, I don't have a suggestion for GSO at the moment.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH bpf-next] bpf: fix memory leak in bpf_lwt_xmit_reroute
  2019-03-04  2:54           ` Willem de Bruijn
  2019-03-04  4:05             ` Willem de Bruijn
@ 2019-03-04 20:39             ` Peter Oskolkov
  2019-03-04 21:03               ` David Ahern
  1 sibling, 1 reply; 12+ messages in thread
From: Peter Oskolkov @ 2019-03-04 20:39 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: David Ahern, Peter Oskolkov, Alexei Starovoitov, Daniel Borkmann, netdev

On Sun, Mar 3, 2019 at 6:55 PM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> On Fri, Mar 1, 2019 at 9:27 PM David Ahern <dsahern@gmail.com> wrote:
> >
> > On 2/28/19 10:57 AM, Peter Oskolkov wrote:
> > > David: I'm not sure how to test GSO (I assume we are talking about GSO
> > > here) in
> > > the selftest: the encapping code sets SKB_GSO_DODGY flag, and veth does
> > > not support
> > > dodginess: "tx-gso-robust: off [fixed]".
> > >
> > > If the "dodgy" flag is not set, then gso validation in dev.c passes, and
> > > large GSO packets
> > > happily go through; if the "dodgy" flag is set, "dodgy" GSO packets are
> > > rejected, TCP does
> > > segmentation, and non-GSO packets happily go through (with an mtu tweak
> > > to the LWT tunnel).
>
> Very few devices unconditionally accept dodgy packets (only veth?).
>
> A device that lacks the robust gso feature will cause a gso packet
> with dodgy flag to enter software gso instead of passing to device
> segmentation offload.
>
> That should be perfect for checking that the packets can be segmented
> correctly with the new header.
>
> If the gso layer drops the packets, that is not due to dropping all
> dodgy sources. It will be dropped somewhere else inside gso,
> indication that something is not as expected with the packet.
>
> > > So I see three options:
> > > - add a sysctl to _not_ set SKB_GSO_DODGY flag in lwt_bpf.c =>
> > > handle_gso_type();
> > > - change veth to accept "dodgy" GSO packets
>
> Neither, as these would bypass segmentation offload and pass the large
> packet to the receive path. It is more interesting to validate the
> packet in gso.

I found the problem: skb->inner_protocol was not set, so software GSO
fallback failed.  I have a patch that fixes the issue: IPIP+GRE+TCP
gso works! net-next is closed though... Will have to wait for net-next
to reopen.


>
> > > - test the code "as is", meaning that GSO will be tried and disabled by
> > > TCP stack
> > >
> > > Which approach would you prefer?
> > >
> >
> > definitely not a sysctl.
> >
> > After that, I don't have a suggestion for GSO at the moment.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH bpf-next] bpf: fix memory leak in bpf_lwt_xmit_reroute
  2019-03-04 20:39             ` Peter Oskolkov
@ 2019-03-04 21:03               ` David Ahern
  2019-03-04 22:37                 ` Peter Oskolkov
  0 siblings, 1 reply; 12+ messages in thread
From: David Ahern @ 2019-03-04 21:03 UTC (permalink / raw)
  To: Peter Oskolkov, Willem de Bruijn
  Cc: Peter Oskolkov, Alexei Starovoitov, Daniel Borkmann, netdev

On 3/4/19 1:39 PM, Peter Oskolkov wrote:
> I found the problem: skb->inner_protocol was not set, so software GSO
> fallback failed.  I have a patch that fixes the issue: IPIP+GRE+TCP
> gso works! net-next is closed though... Will have to wait for net-next
> to reopen.

That's a bug fix. I suggest sending now.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH bpf-next] bpf: fix memory leak in bpf_lwt_xmit_reroute
  2019-03-04 21:03               ` David Ahern
@ 2019-03-04 22:37                 ` Peter Oskolkov
  2019-03-04 23:28                   ` Eric Dumazet
  0 siblings, 1 reply; 12+ messages in thread
From: Peter Oskolkov @ 2019-03-04 22:37 UTC (permalink / raw)
  To: David Ahern
  Cc: Willem de Bruijn, Peter Oskolkov, Alexei Starovoitov,
	Daniel Borkmann, netdev

On Mon, Mar 4, 2019 at 1:03 PM David Ahern <dsahern@gmail.com> wrote:
>
> On 3/4/19 1:39 PM, Peter Oskolkov wrote:
> > I found the problem: skb->inner_protocol was not set, so software GSO
> > fallback failed.  I have a patch that fixes the issue: IPIP+GRE+TCP
> > gso works! net-next is closed though... Will have to wait for net-next
> > to reopen.
>
> That's a bug fix. I suggest sending now.

I see the encap patches neither in net nor in bpf trees, only in
net-next and bpf-next. And *-next trees are closed, so there is
nowhere to send the fix. Am I missing something?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH bpf-next] bpf: fix memory leak in bpf_lwt_xmit_reroute
  2019-03-04 22:37                 ` Peter Oskolkov
@ 2019-03-04 23:28                   ` Eric Dumazet
  0 siblings, 0 replies; 12+ messages in thread
From: Eric Dumazet @ 2019-03-04 23:28 UTC (permalink / raw)
  To: Peter Oskolkov, David Ahern
  Cc: Willem de Bruijn, Peter Oskolkov, Alexei Starovoitov,
	Daniel Borkmann, netdev



On 03/04/2019 02:37 PM, Peter Oskolkov wrote:
> On Mon, Mar 4, 2019 at 1:03 PM David Ahern <dsahern@gmail.com> wrote:
>>
>> On 3/4/19 1:39 PM, Peter Oskolkov wrote:
>>> I found the problem: skb->inner_protocol was not set, so software GSO
>>> fallback failed.  I have a patch that fixes the issue: IPIP+GRE+TCP
>>> gso works! net-next is closed though... Will have to wait for net-next
>>> to reopen.
>>
>> That's a bug fix. I suggest sending now.
> 
> I see the encap patches neither in net nor in bpf trees, only in
> net-next and bpf-next. And *-next trees are closed, so there is
> nowhere to send the fix. Am I missing something?
> 

David has not yet sent his pull request to Linus.

This means your fix needs to target net-next

net-next is closed for patches that are meant for linux-5.2,
but 'open' for fixes (targeting 5.1)


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2019-03-04 23:28 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-14  6:09 [PATCH bpf-next] bpf: fix memory leak in bpf_lwt_xmit_reroute Peter Oskolkov
2019-02-14 18:11 ` David Ahern
2019-02-14 18:42   ` Peter Oskolkov
2019-02-14 19:10     ` David Ahern
2019-02-14 19:28       ` Peter Oskolkov
     [not found]       ` <CAPNVh5f_+BdAvoYH6jY7eQ4c4C6qF4-=o0fZ8-nKvvxROuN17Q@mail.gmail.com>
2019-03-02  2:27         ` David Ahern
2019-03-04  2:54           ` Willem de Bruijn
2019-03-04  4:05             ` Willem de Bruijn
2019-03-04 20:39             ` Peter Oskolkov
2019-03-04 21:03               ` David Ahern
2019-03-04 22:37                 ` Peter Oskolkov
2019-03-04 23:28                   ` Eric Dumazet

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.