netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCH v4 net] net: udp: fix Fast/frag0 UDP GRO
@ 2020-11-10  0:28 Alexander Lobakin
  2020-11-10 18:49 ` Willem de Bruijn
  0 siblings, 1 reply; 8+ messages in thread
From: Alexander Lobakin @ 2020-11-10  0:28 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski
  Cc: Alexander Lobakin, Alexey Kuznetsov, Hideaki YOSHIFUJI,
	Paolo Abeni, Willem de Bruijn, Steffen Klassert, Eric Dumazet,
	netdev, linux-kernel

From: Alexander Lobakin <alobakin@pm.me>
Date: Tue, 10 Nov 2020 00:17:18 +0000

> While testing UDP GSO fraglists forwarding through driver that uses
> Fast GRO (via napi_gro_frags()), I was observing lots of out-of-order
> iperf packets:
>
> [ ID] Interval           Transfer     Bitrate         Jitter
> [SUM]  0.0-40.0 sec  12106 datagrams received out-of-order
>
> Simple switch to napi_gro_receive() or any other method without frag0
> shortcut completely resolved them.
>
> I've found that UDP GRO uses udp_hdr(skb) in its .gro_receive()
> callback. While it's probably OK for non-frag0 paths (when all
> headers or even the entire frame are already in skb->data), this
> inline points to junk when using Fast GRO (napi_gro_frags() or
> napi_gro_receive() with only Ethernet header in skb->data and all
> the rest in shinfo->frags) and breaks GRO packet compilation and
> the packet flow itself.
> To support both modes, skb_gro_header_fast() + skb_gro_header_slow()
> are typically used. UDP even has an inline helper that makes use of
> them, udp_gro_udphdr(). Use that instead of troublemaking udp_hdr()
> to get rid of the out-of-order delivers.
>
> Present since the introduction of plain UDP GRO in 5.0-rc1.
>
> Since v3 [1]:
>  - restore the original {,__}udp{4,6}_lib_lookup_skb() and use
>    private versions of them inside GRO code (Willem).

Note: this doesn't cover a support for nested tunnels as it's out of
the subject and requires more invasive changes. It will be handled
separately in net-next series.

> Since v2 [2]:
>  - dropped redundant check introduced in v2 as it's performed right
>    before (thanks to Eric);
>  - udp_hdr() switched to data + off for skbs from list (also Eric);
>  - fixed possible malfunction of {,__}udp{4,6}_lib_lookup_skb() with
>    Fast/frag0 due to ip{,v6}_hdr() usage (Willem).
>
> Since v1 [3]:
>  - added a NULL pointer check for "uh" as suggested by Willem.
>
> [1] https://lore.kernel.org/netdev/MgZce9htmEtCtHg7pmWxXXfdhmQ6AHrnltXC41zOoo@cp7-web-042.plabs.ch
> [2] https://lore.kernel.org/netdev/0eaG8xtbtKY1dEKCTKUBubGiC9QawGgB3tVZtNqVdY@cp4-web-030.plabs.ch
> [3] https://lore.kernel.org/netdev/YazU6GEzBdpyZMDMwJirxDX7B4sualpDG68ADZYvJI@cp4-web-034.plabs.ch
>
> Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.")
> Cc: Eric Dumazet <edumazet@google.com>
> Cc: Willem de Bruijn <willemb@google.com>
> Signed-off-by: Alexander Lobakin <alobakin@pm.me>
> ---
>  net/ipv4/udp_offload.c | 23 +++++++++++++++++++----
>  net/ipv6/udp_offload.c | 14 +++++++++++++-
>  2 files changed, 32 insertions(+), 5 deletions(-)
>
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index e67a66fbf27b..6064efe17cdb 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
> @@ -366,11 +366,11 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
>  static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
>  					       struct sk_buff *skb)
>  {
> -	struct udphdr *uh = udp_hdr(skb);
> +	struct udphdr *uh = udp_gro_udphdr(skb);
>  	struct sk_buff *pp = NULL;
>  	struct udphdr *uh2;
>  	struct sk_buff *p;
> -	unsigned int ulen;
> +	u32 ulen, off;
>  	int ret = 0;
>
>  	/* requires non zero csum, for symmetry with GSO */
> @@ -385,6 +385,9 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
>  		NAPI_GRO_CB(skb)->flush = 1;
>  		return NULL;
>  	}
> +
> +	off = skb_gro_offset(skb);
> +
>  	/* pull encapsulating udp header */
>  	skb_gro_pull(skb, sizeof(struct udphdr));
>
> @@ -392,7 +395,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
>  		if (!NAPI_GRO_CB(p)->same_flow)
>  			continue;
>
> -		uh2 = udp_hdr(p);
> +		uh2 = (void *)p->data + off;
>
>  		/* Match ports only, as csum is always non zero */
>  		if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) {
> @@ -500,6 +503,16 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
>  }
>  EXPORT_SYMBOL(udp_gro_receive);
>
> +static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
> +					__be16 dport)
> +{
> +	const struct iphdr *iph = skb_gro_network_header(skb);
> +
> +	return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
> +				 iph->daddr, dport, inet_iif(skb),
> +				 inet_sdif(skb), &udp_table, NULL);
> +}
> +
>  INDIRECT_CALLABLE_SCOPE
>  struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
>  {
> @@ -523,7 +536,9 @@ struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
>  skip:
>  	NAPI_GRO_CB(skb)->is_ipv6 = 0;
>  	rcu_read_lock();
> -	sk = static_branch_unlikely(&udp_encap_needed_key) ? udp4_lib_lookup_skb(skb, uh->source, uh->dest) : NULL;
> +	sk = static_branch_unlikely(&udp_encap_needed_key) ?
> +	     udp4_gro_lookup_skb(skb, uh->source, uh->dest) :
> +	     NULL;
>  	pp = udp_gro_receive(head, skb, uh, sk);
>  	rcu_read_unlock();
>  	return pp;
> diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
> index 584157a07759..b126ab2120d9 100644
> --- a/net/ipv6/udp_offload.c
> +++ b/net/ipv6/udp_offload.c
> @@ -111,6 +111,16 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
>  	return segs;
>  }
>
> +static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
> +					__be16 dport)
> +{
> +	const struct ipv6hdr *iph = skb_gro_network_header(skb);
> +
> +	return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
> +				 &iph->daddr, dport, inet6_iif(skb),
> +				 inet6_sdif(skb), &udp_table, NULL);
> +}
> +
>  INDIRECT_CALLABLE_SCOPE
>  struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
>  {
> @@ -135,7 +145,9 @@ struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
>  skip:
>  	NAPI_GRO_CB(skb)->is_ipv6 = 1;
>  	rcu_read_lock();
> -	sk = static_branch_unlikely(&udpv6_encap_needed_key) ? udp6_lib_lookup_skb(skb, uh->source, uh->dest) : NULL;
> +	sk = static_branch_unlikely(&udpv6_encap_needed_key) ?
> +	     udp6_gro_lookup_skb(skb, uh->source, uh->dest) :
> +	     NULL;
>  	pp = udp_gro_receive(head, skb, uh, sk);
>  	rcu_read_unlock();
>  	return pp;
> --
> 2.29.2

Al


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v4 net] net: udp: fix Fast/frag0 UDP GRO
  2020-11-10  0:28 [PATCH v4 net] net: udp: fix Fast/frag0 UDP GRO Alexander Lobakin
@ 2020-11-10 18:49 ` Willem de Bruijn
  2020-11-11 11:29   ` Alexander Lobakin
  0 siblings, 1 reply; 8+ messages in thread
From: Willem de Bruijn @ 2020-11-10 18:49 UTC (permalink / raw)
  To: Alexander Lobakin
  Cc: David S. Miller, Jakub Kicinski, Alexey Kuznetsov,
	Hideaki YOSHIFUJI, Paolo Abeni, Steffen Klassert, Eric Dumazet,
	Network Development, linux-kernel

On Mon, Nov 9, 2020 at 7:29 PM Alexander Lobakin <alobakin@pm.me> wrote:
>
> From: Alexander Lobakin <alobakin@pm.me>
> Date: Tue, 10 Nov 2020 00:17:18 +0000
>
> > While testing UDP GSO fraglists forwarding through driver that uses
> > Fast GRO (via napi_gro_frags()), I was observing lots of out-of-order
> > iperf packets:
> >
> > [ ID] Interval           Transfer     Bitrate         Jitter
> > [SUM]  0.0-40.0 sec  12106 datagrams received out-of-order
> >
> > Simple switch to napi_gro_receive() or any other method without frag0
> > shortcut completely resolved them.
> >
> > I've found that UDP GRO uses udp_hdr(skb) in its .gro_receive()
> > callback. While it's probably OK for non-frag0 paths (when all
> > headers or even the entire frame are already in skb->data), this
> > inline points to junk when using Fast GRO (napi_gro_frags() or
> > napi_gro_receive() with only Ethernet header in skb->data and all
> > the rest in shinfo->frags) and breaks GRO packet compilation and
> > the packet flow itself.
> > To support both modes, skb_gro_header_fast() + skb_gro_header_slow()
> > are typically used. UDP even has an inline helper that makes use of
> > them, udp_gro_udphdr(). Use that instead of troublemaking udp_hdr()
> > to get rid of the out-of-order delivers.
> >
> > Present since the introduction of plain UDP GRO in 5.0-rc1.
> >
> > Since v3 [1]:
> >  - restore the original {,__}udp{4,6}_lib_lookup_skb() and use
> >    private versions of them inside GRO code (Willem).
>
> Note: this doesn't cover a support for nested tunnels as it's out of
> the subject and requires more invasive changes. It will be handled
> separately in net-next series.

Thanks for looking into that.

In that case, should the p->data + off change be deferred to that,
too? It adds some risk unrelated to the bug fix.

> > Since v2 [2]:
> >  - dropped redundant check introduced in v2 as it's performed right
> >    before (thanks to Eric);
> >  - udp_hdr() switched to data + off for skbs from list (also Eric);
> >  - fixed possible malfunction of {,__}udp{4,6}_lib_lookup_skb() with
> >    Fast/frag0 due to ip{,v6}_hdr() usage (Willem).
> >
> > Since v1 [3]:
> >  - added a NULL pointer check for "uh" as suggested by Willem.
> >
> > [1] https://lore.kernel.org/netdev/MgZce9htmEtCtHg7pmWxXXfdhmQ6AHrnltXC41zOoo@cp7-web-042.plabs.ch
> > [2] https://lore.kernel.org/netdev/0eaG8xtbtKY1dEKCTKUBubGiC9QawGgB3tVZtNqVdY@cp4-web-030.plabs.ch
> > [3] https://lore.kernel.org/netdev/YazU6GEzBdpyZMDMwJirxDX7B4sualpDG68ADZYvJI@cp4-web-034.plabs.ch
> >
> > Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.")
> > Cc: Eric Dumazet <edumazet@google.com>
> > Cc: Willem de Bruijn <willemb@google.com>
> > Signed-off-by: Alexander Lobakin <alobakin@pm.me>
> > ---
> >  net/ipv4/udp_offload.c | 23 +++++++++++++++++++----
> >  net/ipv6/udp_offload.c | 14 +++++++++++++-
> >  2 files changed, 32 insertions(+), 5 deletions(-)
> >
> > diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> > index e67a66fbf27b..6064efe17cdb 100644
> > --- a/net/ipv4/udp_offload.c
> > +++ b/net/ipv4/udp_offload.c
> > @@ -366,11 +366,11 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
> >  static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
> >                                              struct sk_buff *skb)
> >  {
> > -     struct udphdr *uh = udp_hdr(skb);
> > +     struct udphdr *uh = udp_gro_udphdr(skb);
> >       struct sk_buff *pp = NULL;
> >       struct udphdr *uh2;
> >       struct sk_buff *p;
> > -     unsigned int ulen;
> > +     u32 ulen, off;

a specific reason for changing type here?

> >       int ret = 0;
> >
> >       /* requires non zero csum, for symmetry with GSO */
> > @@ -385,6 +385,9 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
> >               NAPI_GRO_CB(skb)->flush = 1;
> >               return NULL;
> >       }
> > +
> > +     off = skb_gro_offset(skb);
> > +
> >       /* pull encapsulating udp header */
> >       skb_gro_pull(skb, sizeof(struct udphdr));
> >
> > @@ -392,7 +395,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
> >               if (!NAPI_GRO_CB(p)->same_flow)
> >                       continue;
> >
> > -             uh2 = udp_hdr(p);
> > +             uh2 = (void *)p->data + off;
> >
> >               /* Match ports only, as csum is always non zero */
> >               if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) {
> > @@ -500,6 +503,16 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
> >  }
> >  EXPORT_SYMBOL(udp_gro_receive);
> >
> > +static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
> > +                                     __be16 dport)
> > +{
> > +     const struct iphdr *iph = skb_gro_network_header(skb);
> > +
> > +     return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
> > +                              iph->daddr, dport, inet_iif(skb),
> > +                              inet_sdif(skb), &udp_table, NULL);
> > +}
> > +
> >  INDIRECT_CALLABLE_SCOPE
> >  struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
> >  {
> > @@ -523,7 +536,9 @@ struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
> >  skip:
> >       NAPI_GRO_CB(skb)->is_ipv6 = 0;
> >       rcu_read_lock();
> > -     sk = static_branch_unlikely(&udp_encap_needed_key) ? udp4_lib_lookup_skb(skb, uh->source, uh->dest) : NULL;
> > +     sk = static_branch_unlikely(&udp_encap_needed_key) ?
> > +          udp4_gro_lookup_skb(skb, uh->source, uh->dest) :
> > +          NULL;

Does this indentation pass checkpatch?

Else, the line limit is no longer strict,a and this only shortens the
line, so a single line is fine.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v4 net] net: udp: fix Fast/frag0 UDP GRO
  2020-11-10 18:49 ` Willem de Bruijn
@ 2020-11-11 11:29   ` Alexander Lobakin
  2020-11-11 14:58     ` Willem de Bruijn
  2020-11-11 16:26     ` Jakub Kicinski
  0 siblings, 2 replies; 8+ messages in thread
From: Alexander Lobakin @ 2020-11-11 11:29 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Alexander Lobakin, David S. Miller, Jakub Kicinski,
	Alexey Kuznetsov, Hideaki YOSHIFUJI, Paolo Abeni,
	Steffen Klassert, Eric Dumazet, netdev, linux-kernel

From: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
Date: Tue, 10 Nov 2020 13:49:56 -0500

> On Mon, Nov 9, 2020 at 7:29 PM Alexander Lobakin <alobakin@pm.me> wrote:
>>
>> From: Alexander Lobakin <alobakin@pm.me>
>> Date: Tue, 10 Nov 2020 00:17:18 +0000
>>
>>> While testing UDP GSO fraglists forwarding through driver that uses
>>> Fast GRO (via napi_gro_frags()), I was observing lots of out-of-order
>>> iperf packets:
>>>
>>> [ ID] Interval           Transfer     Bitrate         Jitter
>>> [SUM]  0.0-40.0 sec  12106 datagrams received out-of-order
>>>
>>> Simple switch to napi_gro_receive() or any other method without frag0
>>> shortcut completely resolved them.
>>>
>>> I've found that UDP GRO uses udp_hdr(skb) in its .gro_receive()
>>> callback. While it's probably OK for non-frag0 paths (when all
>>> headers or even the entire frame are already in skb->data), this
>>> inline points to junk when using Fast GRO (napi_gro_frags() or
>>> napi_gro_receive() with only Ethernet header in skb->data and all
>>> the rest in shinfo->frags) and breaks GRO packet compilation and
>>> the packet flow itself.
>>> To support both modes, skb_gro_header_fast() + skb_gro_header_slow()
>>> are typically used. UDP even has an inline helper that makes use of
>>> them, udp_gro_udphdr(). Use that instead of troublemaking udp_hdr()
>>> to get rid of the out-of-order delivers.
>>>
>>> Present since the introduction of plain UDP GRO in 5.0-rc1.
>>>
>>> Since v3 [1]:
>>>  - restore the original {,__}udp{4,6}_lib_lookup_skb() and use
>>>    private versions of them inside GRO code (Willem).
>>
>> Note: this doesn't cover a support for nested tunnels as it's out of
>> the subject and requires more invasive changes. It will be handled
>> separately in net-next series.
>
> Thanks for looking into that.

Thank you (and Eric) for all your comments and reviews :)

> In that case, should the p->data + off change be deferred to that,
> too? It adds some risk unrelated to the bug fix.

Change to p->data + off is absolutely safe and even can prevent from
any other potentional problems with Fast/frag0 GRO of UDP fraglists.
I find them pretty fragile currently.

>>> Since v2 [2]:
>>>  - dropped redundant check introduced in v2 as it's performed right
>>>    before (thanks to Eric);
>>>  - udp_hdr() switched to data + off for skbs from list (also Eric);
>>>  - fixed possible malfunction of {,__}udp{4,6}_lib_lookup_skb() with
>>>    Fast/frag0 due to ip{,v6}_hdr() usage (Willem).
>>>
>>> Since v1 [3]:
>>>  - added a NULL pointer check for "uh" as suggested by Willem.
>>>
>>> [1] https://lore.kernel.org/netdev/MgZce9htmEtCtHg7pmWxXXfdhmQ6AHrnltXC41zOoo@cp7-web-042.plabs.ch
>>> [2] https://lore.kernel.org/netdev/0eaG8xtbtKY1dEKCTKUBubGiC9QawGgB3tVZtNqVdY@cp4-web-030.plabs.ch
>>> [3] https://lore.kernel.org/netdev/YazU6GEzBdpyZMDMwJirxDX7B4sualpDG68ADZYvJI@cp4-web-034.plabs.ch
>>>
>>> Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.")
>>> Cc: Eric Dumazet <edumazet@google.com>
>>> Cc: Willem de Bruijn <willemb@google.com>
>>> Signed-off-by: Alexander Lobakin <alobakin@pm.me>
>>> ---
>>>  net/ipv4/udp_offload.c | 23 +++++++++++++++++++----
>>>  net/ipv6/udp_offload.c | 14 +++++++++++++-
>>>  2 files changed, 32 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
>>> index e67a66fbf27b..6064efe17cdb 100644
>>> --- a/net/ipv4/udp_offload.c
>>> +++ b/net/ipv4/udp_offload.c
>>> @@ -366,11 +366,11 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
>>>  static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
>>>                                              struct sk_buff *skb)
>>>  {
>>> -     struct udphdr *uh = udp_hdr(skb);
>>> +     struct udphdr *uh = udp_gro_udphdr(skb);
>>>       struct sk_buff *pp = NULL;
>>>       struct udphdr *uh2;
>>>       struct sk_buff *p;
>>> -     unsigned int ulen;
>>> +     u32 ulen, off;
>
> a specific reason for changing type here?

Yep. unsigned int == u32, I had to add another variable, and the
easiest way to do this without breaking reverse christmas tree or
adding new lines was this.
Pure cosmetics, I can change this if somebody doesn't like that one.

>>>       int ret = 0;
>>>
>>>       /* requires non zero csum, for symmetry with GSO */
>>> @@ -385,6 +385,9 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
>>>               NAPI_GRO_CB(skb)->flush = 1;
>>>               return NULL;
>>>       }
>>> +
>>> +     off = skb_gro_offset(skb);
>>> +
>>>       /* pull encapsulating udp header */
>>>       skb_gro_pull(skb, sizeof(struct udphdr));
>>>
>>> @@ -392,7 +395,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
>>>               if (!NAPI_GRO_CB(p)->same_flow)
>>>                       continue;
>>>
>>> -             uh2 = udp_hdr(p);
>>> +             uh2 = (void *)p->data + off;
>>>
>>>               /* Match ports only, as csum is always non zero */
>>>               if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) {
>>> @@ -500,6 +503,16 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
>>>  }
>>>  EXPORT_SYMBOL(udp_gro_receive);
>>>
>>> +static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
>>> +                                     __be16 dport)
>>> +{
>>> +     const struct iphdr *iph = skb_gro_network_header(skb);
>>> +
>>> +     return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
>>> +                              iph->daddr, dport, inet_iif(skb),
>>> +                              inet_sdif(skb), &udp_table, NULL);
>>> +}
>>> +
>>>  INDIRECT_CALLABLE_SCOPE
>>>  struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
>>>  {
>>> @@ -523,7 +536,9 @@ struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
>>>  skip:
>>>       NAPI_GRO_CB(skb)->is_ipv6 = 0;
>>>       rcu_read_lock();
>>> -     sk = static_branch_unlikely(&udp_encap_needed_key) ? udp4_lib_lookup_skb(skb, uh->source, uh->dest) : NULL;
>>> +     sk = static_branch_unlikely(&udp_encap_needed_key) ?
>>> +          udp4_gro_lookup_skb(skb, uh->source, uh->dest) :
>>> +          NULL;
>
> Does this indentation pass checkpatch?

Sure, I always check my changes with checkpatch --strict.

> Else, the line limit is no longer strict,a and this only shortens the
> line, so a single line is fine.

These single lines is about 120 chars, don't find them eye-pleasant.
But, as with "u32" above, they're pure cosmetics and can be changed.

Thanks,
Al


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v4 net] net: udp: fix Fast/frag0 UDP GRO
  2020-11-11 11:29   ` Alexander Lobakin
@ 2020-11-11 14:58     ` Willem de Bruijn
  2020-11-11 16:26     ` Jakub Kicinski
  1 sibling, 0 replies; 8+ messages in thread
From: Willem de Bruijn @ 2020-11-11 14:58 UTC (permalink / raw)
  To: Alexander Lobakin
  Cc: David S. Miller, Jakub Kicinski, Alexey Kuznetsov,
	Hideaki YOSHIFUJI, Paolo Abeni, Steffen Klassert, Eric Dumazet,
	Network Development, linux-kernel

On Wed, Nov 11, 2020 at 6:29 AM Alexander Lobakin <alobakin@pm.me> wrote:
>
> From: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
> Date: Tue, 10 Nov 2020 13:49:56 -0500
>
> > On Mon, Nov 9, 2020 at 7:29 PM Alexander Lobakin <alobakin@pm.me> wrote:
> >>
> >> From: Alexander Lobakin <alobakin@pm.me>
> >> Date: Tue, 10 Nov 2020 00:17:18 +0000
> >>
> >>> While testing UDP GSO fraglists forwarding through driver that uses
> >>> Fast GRO (via napi_gro_frags()), I was observing lots of out-of-order
> >>> iperf packets:
> >>>
> >>> [ ID] Interval           Transfer     Bitrate         Jitter
> >>> [SUM]  0.0-40.0 sec  12106 datagrams received out-of-order
> >>>
> >>> Simple switch to napi_gro_receive() or any other method without frag0
> >>> shortcut completely resolved them.
> >>>
> >>> I've found that UDP GRO uses udp_hdr(skb) in its .gro_receive()
> >>> callback. While it's probably OK for non-frag0 paths (when all
> >>> headers or even the entire frame are already in skb->data), this
> >>> inline points to junk when using Fast GRO (napi_gro_frags() or
> >>> napi_gro_receive() with only Ethernet header in skb->data and all
> >>> the rest in shinfo->frags) and breaks GRO packet compilation and
> >>> the packet flow itself.
> >>> To support both modes, skb_gro_header_fast() + skb_gro_header_slow()
> >>> are typically used. UDP even has an inline helper that makes use of
> >>> them, udp_gro_udphdr(). Use that instead of troublemaking udp_hdr()
> >>> to get rid of the out-of-order delivers.
> >>>
> >>> Present since the introduction of plain UDP GRO in 5.0-rc1.
> >>>
> >>> Since v3 [1]:
> >>>  - restore the original {,__}udp{4,6}_lib_lookup_skb() and use
> >>>    private versions of them inside GRO code (Willem).
> >>
> >> Note: this doesn't cover a support for nested tunnels as it's out of
> >> the subject and requires more invasive changes. It will be handled
> >> separately in net-next series.
> >
> > Thanks for looking into that.
>
> Thank you (and Eric) for all your comments and reviews :)
>
> > In that case, should the p->data + off change be deferred to that,
> > too? It adds some risk unrelated to the bug fix.
>
> Change to p->data + off is absolutely safe and even can prevent from
> any other potentional problems with Fast/frag0 GRO of UDP fraglists.
> I find them pretty fragile currently.

Especially for fixes that go to net and eventually stable branches,
I'm in favor of the smallest possible change, minimizing odds of
unintended side effects.

Skipping this would also avoid the int to u32 change.

But admittedly at some point it is a matter of preference. Overall
makes sense to me. Thanks for the fix!

Acked-by: Willem de Bruijn <willemb@google.com>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v4 net] net: udp: fix Fast/frag0 UDP GRO
  2020-11-11 11:29   ` Alexander Lobakin
  2020-11-11 14:58     ` Willem de Bruijn
@ 2020-11-11 16:26     ` Jakub Kicinski
  2020-11-11 17:28       ` Willem de Bruijn
  1 sibling, 1 reply; 8+ messages in thread
From: Jakub Kicinski @ 2020-11-11 16:26 UTC (permalink / raw)
  To: Alexander Lobakin
  Cc: Willem de Bruijn, David S. Miller, Alexey Kuznetsov,
	Hideaki YOSHIFUJI, Paolo Abeni, Steffen Klassert, Eric Dumazet,
	netdev, linux-kernel

On Wed, 11 Nov 2020 11:29:06 +0000 Alexander Lobakin wrote:
> >>> +     sk = static_branch_unlikely(&udp_encap_needed_key) ?
> >>> +          udp4_gro_lookup_skb(skb, uh->source, uh->dest) :
> >>> +          NULL;  
> >
> > Does this indentation pass checkpatch?  
> 
> Sure, I always check my changes with checkpatch --strict.
> 
> > Else, the line limit is no longer strict,a and this only shortens the
> > line, so a single line is fine.  
> 
> These single lines is about 120 chars, don't find them eye-pleasant.
> But, as with "u32" above, they're pure cosmetics and can be changed.

let me chime in on the perhaps least important aspect of the patch :)

Is there are reason to use a ternary operator here at all?
Isn't this cleaner when written with an if statement?

	sk = NULL;
	if (static_branch_unlikely(&udp_encap_needed_key))
		sk = udp4_gro_lookup_skb(skb, uh->source, uh->dest);

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v4 net] net: udp: fix Fast/frag0 UDP GRO
  2020-11-11 16:26     ` Jakub Kicinski
@ 2020-11-11 17:28       ` Willem de Bruijn
  2020-11-11 19:26         ` Alexander Lobakin
  0 siblings, 1 reply; 8+ messages in thread
From: Willem de Bruijn @ 2020-11-11 17:28 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Alexander Lobakin, David S. Miller, Alexey Kuznetsov,
	Hideaki YOSHIFUJI, Paolo Abeni, Steffen Klassert, Eric Dumazet,
	Network Development, linux-kernel

On Wed, Nov 11, 2020 at 11:27 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Wed, 11 Nov 2020 11:29:06 +0000 Alexander Lobakin wrote:
> > >>> +     sk = static_branch_unlikely(&udp_encap_needed_key) ?
> > >>> +          udp4_gro_lookup_skb(skb, uh->source, uh->dest) :
> > >>> +          NULL;
> > >
> > > Does this indentation pass checkpatch?
> >
> > Sure, I always check my changes with checkpatch --strict.
> >
> > > Else, the line limit is no longer strict,a and this only shortens the
> > > line, so a single line is fine.
> >
> > These single lines is about 120 chars, don't find them eye-pleasant.
> > But, as with "u32" above, they're pure cosmetics and can be changed.
>
> let me chime in on the perhaps least important aspect of the patch :)
>
> Is there are reason to use a ternary operator here at all?
> Isn't this cleaner when written with an if statement?
>
>         sk = NULL;
>         if (static_branch_unlikely(&udp_encap_needed_key))
>                 sk = udp4_gro_lookup_skb(skb, uh->source, uh->dest);

Ah indeed :)

One other thing I missed before. The socket lookup is actually an
independent issue, introduced on commit a6024562ffd7 ("udp: Add GRO
functions to UDP socket"). Should be a separate Fixes tag, perhaps
even a separate patch.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v4 net] net: udp: fix Fast/frag0 UDP GRO
  2020-11-11 17:28       ` Willem de Bruijn
@ 2020-11-11 19:26         ` Alexander Lobakin
  0 siblings, 0 replies; 8+ messages in thread
From: Alexander Lobakin @ 2020-11-11 19:26 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Jakub Kicinski, Alexander Lobakin, David S. Miller,
	Alexey Kuznetsov, Hideaki YOSHIFUJI, Paolo Abeni,
	Steffen Klassert, Eric Dumazet, netdev, linux-kernel

From: Jakub Kicinski <kuba@kernel.org>

From: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
Date: Wed, 11 Nov 2020 12:28:21 -0500

> On Wed, Nov 11, 2020 at 11:27 AM Jakub Kicinski <kuba@kernel.org> wrote:
>>
>> On Wed, 11 Nov 2020 11:29:06 +0000 Alexander Lobakin wrote:
>>>>>> +     sk = static_branch_unlikely(&udp_encap_needed_key) ?
>>>>>> +          udp4_gro_lookup_skb(skb, uh->source, uh->dest) :
>>>>>> +          NULL;
>>>>
>>>> Does this indentation pass checkpatch?
>>>
>>> Sure, I always check my changes with checkpatch --strict.
>>>
>>>> Else, the line limit is no longer strict,a and this only shortens the
>>>> line, so a single line is fine.
>>>
>>> These single lines is about 120 chars, don't find them eye-pleasant.
>>> But, as with "u32" above, they're pure cosmetics and can be changed.
>>
>> let me chime in on the perhaps least important aspect of the patch :)
>>
>> Is there are reason to use a ternary operator here at all?
>> Isn't this cleaner when written with an if statement?
>>
>>         sk = NULL;
>>         if (static_branch_unlikely(&udp_encap_needed_key))
>>                 sk = udp4_gro_lookup_skb(skb, uh->source, uh->dest);

This idea came to me right after I submitted the last version
actually. Sure, there's absolutely no need to use a split ternary.

> Ah indeed :)
>
> One other thing I missed before. The socket lookup is actually an
> independent issue, introduced on commit a6024562ffd7 ("udp: Add GRO
> functions to UDP socket"). Should be a separate Fixes tag, perhaps
> even a separate patch.

Seems reasonable. I'll convert v5 to a pair.

Thanks,
Al


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v4 net] net: udp: fix Fast/frag0 UDP GRO
@ 2020-11-10  0:17 Alexander Lobakin
  0 siblings, 0 replies; 8+ messages in thread
From: Alexander Lobakin @ 2020-11-10  0:17 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski
  Cc: Alexey Kuznetsov, Hideaki YOSHIFUJI, Paolo Abeni,
	Willem de Bruijn, Steffen Klassert, Alexander Lobakin, netdev,
	linux-kernel, Eric Dumazet

While testing UDP GSO fraglists forwarding through driver that uses
Fast GRO (via napi_gro_frags()), I was observing lots of out-of-order
iperf packets:

[ ID] Interval           Transfer     Bitrate         Jitter
[SUM]  0.0-40.0 sec  12106 datagrams received out-of-order

Simple switch to napi_gro_receive() or any other method without frag0
shortcut completely resolved them.

I've found that UDP GRO uses udp_hdr(skb) in its .gro_receive()
callback. While it's probably OK for non-frag0 paths (when all
headers or even the entire frame are already in skb->data), this
inline points to junk when using Fast GRO (napi_gro_frags() or
napi_gro_receive() with only Ethernet header in skb->data and all
the rest in shinfo->frags) and breaks GRO packet compilation and
the packet flow itself.
To support both modes, skb_gro_header_fast() + skb_gro_header_slow()
are typically used. UDP even has an inline helper that makes use of
them, udp_gro_udphdr(). Use that instead of troublemaking udp_hdr()
to get rid of the out-of-order delivers.

Present since the introduction of plain UDP GRO in 5.0-rc1.

Since v3 [1]:
 - restore the original {,__}udp{4,6}_lib_lookup_skb() and use
   private versions of them inside GRO code (Willem).

Since v2 [2]:
 - dropped redundant check introduced in v2 as it's performed right
   before (thanks to Eric);
 - udp_hdr() switched to data + off for skbs from list (also Eric);
 - fixed possible malfunction of {,__}udp{4,6}_lib_lookup_skb() with
   Fast/frag0 due to ip{,v6}_hdr() usage (Willem).

Since v1 [3]:
 - added a NULL pointer check for "uh" as suggested by Willem.

[1] https://lore.kernel.org/netdev/MgZce9htmEtCtHg7pmWxXXfdhmQ6AHrnltXC41zOoo@cp7-web-042.plabs.ch
[2] https://lore.kernel.org/netdev/0eaG8xtbtKY1dEKCTKUBubGiC9QawGgB3tVZtNqVdY@cp4-web-030.plabs.ch
[3] https://lore.kernel.org/netdev/YazU6GEzBdpyZMDMwJirxDX7B4sualpDG68ADZYvJI@cp4-web-034.plabs.ch

Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.")
Cc: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Signed-off-by: Alexander Lobakin <alobakin@pm.me>
---
 net/ipv4/udp_offload.c | 23 +++++++++++++++++++----
 net/ipv6/udp_offload.c | 14 +++++++++++++-
 2 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index e67a66fbf27b..6064efe17cdb 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -366,11 +366,11 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
 					       struct sk_buff *skb)
 {
-	struct udphdr *uh = udp_hdr(skb);
+	struct udphdr *uh = udp_gro_udphdr(skb);
 	struct sk_buff *pp = NULL;
 	struct udphdr *uh2;
 	struct sk_buff *p;
-	unsigned int ulen;
+	u32 ulen, off;
 	int ret = 0;
 
 	/* requires non zero csum, for symmetry with GSO */
@@ -385,6 +385,9 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
 		NAPI_GRO_CB(skb)->flush = 1;
 		return NULL;
 	}
+
+	off = skb_gro_offset(skb);
+
 	/* pull encapsulating udp header */
 	skb_gro_pull(skb, sizeof(struct udphdr));
 
@@ -392,7 +395,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
 		if (!NAPI_GRO_CB(p)->same_flow)
 			continue;
 
-		uh2 = udp_hdr(p);
+		uh2 = (void *)p->data + off;
 
 		/* Match ports only, as csum is always non zero */
 		if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) {
@@ -500,6 +503,16 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(udp_gro_receive);
 
+static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
+					__be16 dport)
+{
+	const struct iphdr *iph = skb_gro_network_header(skb);
+
+	return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
+				 iph->daddr, dport, inet_iif(skb),
+				 inet_sdif(skb), &udp_table, NULL);
+}
+
 INDIRECT_CALLABLE_SCOPE
 struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
@@ -523,7 +536,9 @@ struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
 skip:
 	NAPI_GRO_CB(skb)->is_ipv6 = 0;
 	rcu_read_lock();
-	sk = static_branch_unlikely(&udp_encap_needed_key) ? udp4_lib_lookup_skb(skb, uh->source, uh->dest) : NULL;
+	sk = static_branch_unlikely(&udp_encap_needed_key) ?
+	     udp4_gro_lookup_skb(skb, uh->source, uh->dest) :
+	     NULL;
 	pp = udp_gro_receive(head, skb, uh, sk);
 	rcu_read_unlock();
 	return pp;
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 584157a07759..b126ab2120d9 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -111,6 +111,16 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 	return segs;
 }
 
+static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
+					__be16 dport)
+{
+	const struct ipv6hdr *iph = skb_gro_network_header(skb);
+
+	return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
+				 &iph->daddr, dport, inet6_iif(skb),
+				 inet6_sdif(skb), &udp_table, NULL);
+}
+
 INDIRECT_CALLABLE_SCOPE
 struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
@@ -135,7 +145,9 @@ struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
 skip:
 	NAPI_GRO_CB(skb)->is_ipv6 = 1;
 	rcu_read_lock();
-	sk = static_branch_unlikely(&udpv6_encap_needed_key) ? udp6_lib_lookup_skb(skb, uh->source, uh->dest) : NULL;
+	sk = static_branch_unlikely(&udpv6_encap_needed_key) ?
+	     udp6_gro_lookup_skb(skb, uh->source, uh->dest) :
+	     NULL;
 	pp = udp_gro_receive(head, skb, uh, sk);
 	rcu_read_unlock();
 	return pp;
-- 
2.29.2



^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2020-11-11 19:26 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-10  0:28 [PATCH v4 net] net: udp: fix Fast/frag0 UDP GRO Alexander Lobakin
2020-11-10 18:49 ` Willem de Bruijn
2020-11-11 11:29   ` Alexander Lobakin
2020-11-11 14:58     ` Willem de Bruijn
2020-11-11 16:26     ` Jakub Kicinski
2020-11-11 17:28       ` Willem de Bruijn
2020-11-11 19:26         ` Alexander Lobakin
  -- strict thread matches above, loose matches on Subject: below --
2020-11-10  0:17 Alexander Lobakin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).