* [PATCH RFC v2 1/3] UDP: enable GRO by default.
2019-01-28 8:50 [PATCH RFC v2 0/3] Support fraglist GRO/GSO Steffen Klassert
@ 2019-01-28 8:50 ` Steffen Klassert
2019-01-28 15:30 ` Paolo Abeni
[not found] ` <9615a29c028dea4f95efa7d1921f67b1a8c1f6e2.camel@redhat.com>
2019-01-28 8:50 ` [PATCH RFC v2 2/3] net: Support GRO/GSO fraglist chaining Steffen Klassert
2019-01-28 8:50 ` [PATCH RFC v2 3/3] udp: Support UDP fraglist GRO/GSO Steffen Klassert
2 siblings, 2 replies; 11+ messages in thread
From: Steffen Klassert @ 2019-01-28 8:50 UTC (permalink / raw)
To: netdev
Cc: Steffen Klassert, Willem de Bruijn, Paolo Abeni, Jason A. Donenfeld
This patch enables UDP GRO regardless if a GRO capable
socket is present. With this GRO is done by default
for the local input and forwarding path.
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
include/net/udp.h | 2 +-
net/ipv4/udp_offload.c | 33 ++++++++++++++-------------------
net/ipv6/udp_offload.c | 8 +++++++-
3 files changed, 22 insertions(+), 21 deletions(-)
diff --git a/include/net/udp.h b/include/net/udp.h
index fd6d948755c8..2b8a0119264d 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -171,7 +171,7 @@ typedef struct sock *(*udp_lookup_t)(struct sk_buff *skb, __be16 sport,
__be16 dport);
struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
- struct udphdr *uh, udp_lookup_t lookup);
+ struct udphdr *uh, struct sock *sk);
int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 64f9715173ac..584635db9231 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -392,35 +392,24 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
return NULL;
}
-INDIRECT_CALLABLE_DECLARE(struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
- __be16 sport, __be16 dport));
struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
- struct udphdr *uh, udp_lookup_t lookup)
+ struct udphdr *uh, struct sock *sk)
{
struct sk_buff *pp = NULL;
struct sk_buff *p;
struct udphdr *uh2;
unsigned int off = skb_gro_offset(skb);
int flush = 1;
- struct sock *sk;
-
- rcu_read_lock();
- sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb,
- udp4_lib_lookup_skb, skb, uh->source, uh->dest);
- if (!sk)
- goto out_unlock;
- if (udp_sk(sk)->gro_enabled) {
+ if (!sk || !udp_sk(sk)->gro_receive) {
pp = call_gro_receive(udp_gro_receive_segment, head, skb);
- rcu_read_unlock();
return pp;
}
if (NAPI_GRO_CB(skb)->encap_mark ||
(skb->ip_summed != CHECKSUM_PARTIAL &&
NAPI_GRO_CB(skb)->csum_cnt == 0 &&
- !NAPI_GRO_CB(skb)->csum_valid) ||
- !udp_sk(sk)->gro_receive)
+ !NAPI_GRO_CB(skb)->csum_valid))
goto out_unlock;
/* mark that this skb passed once through the tunnel gro layer */
@@ -459,8 +448,10 @@ INDIRECT_CALLABLE_SCOPE
struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
{
struct udphdr *uh = udp_gro_udphdr(skb);
+ struct sk_buff *pp;
+ struct sock *sk;
- if (unlikely(!uh) || !static_branch_unlikely(&udp_encap_needed_key))
+ if (unlikely(!uh))
goto flush;
/* Don't bother verifying checksum if we're going to flush anyway. */
@@ -475,7 +466,11 @@ struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
inet_gro_compute_pseudo);
skip:
NAPI_GRO_CB(skb)->is_ipv6 = 0;
- return udp_gro_receive(head, skb, uh, udp4_lib_lookup_skb);
+ rcu_read_lock();
+ sk = static_branch_unlikely(&udp_encap_needed_key) ? udp4_lib_lookup_skb(skb, uh->source, uh->dest) : NULL;
+ pp = udp_gro_receive(head, skb, uh, sk);
+ rcu_read_unlock();
+ return pp;
flush:
NAPI_GRO_CB(skb)->flush = 1;
@@ -508,9 +503,7 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
rcu_read_lock();
sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb,
udp4_lib_lookup_skb, skb, uh->source, uh->dest);
- if (sk && udp_sk(sk)->gro_enabled) {
- err = udp_gro_complete_segment(skb);
- } else if (sk && udp_sk(sk)->gro_complete) {
+ if (sk && udp_sk(sk)->gro_complete) {
skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM
: SKB_GSO_UDP_TUNNEL;
@@ -520,6 +513,8 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
skb->encapsulation = 1;
err = udp_sk(sk)->gro_complete(sk, skb,
nhoff + sizeof(struct udphdr));
+ } else {
+ err = udp_gro_complete_segment(skb);
}
rcu_read_unlock();
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 83b11d0ac091..5f7937a4f71a 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -119,6 +119,8 @@ INDIRECT_CALLABLE_SCOPE
struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
{
struct udphdr *uh = udp_gro_udphdr(skb);
+ struct sk_buff *pp;
+ struct sock *sk;
if (unlikely(!uh) || !static_branch_unlikely(&udpv6_encap_needed_key))
goto flush;
@@ -136,7 +138,11 @@ struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
skip:
NAPI_GRO_CB(skb)->is_ipv6 = 1;
- return udp_gro_receive(head, skb, uh, udp6_lib_lookup_skb);
+ rcu_read_lock();
+ sk = static_branch_unlikely(&udp_encap_needed_key) ? udp6_lib_lookup_skb(skb, uh->source, uh->dest) : NULL;
+ pp = udp_gro_receive(head, skb, uh, sk);
+ rcu_read_unlock();
+ return pp;
flush:
NAPI_GRO_CB(skb)->flush = 1;
--
2.17.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH RFC v2 1/3] UDP: enable GRO by default.
2019-01-28 8:50 ` [PATCH RFC v2 1/3] UDP: enable GRO by default Steffen Klassert
@ 2019-01-28 15:30 ` Paolo Abeni
[not found] ` <9615a29c028dea4f95efa7d1921f67b1a8c1f6e2.camel@redhat.com>
1 sibling, 0 replies; 11+ messages in thread
From: Paolo Abeni @ 2019-01-28 15:30 UTC (permalink / raw)
To: Steffen Klassert, netdev; +Cc: Willem de Bruijn, Jason A. Donenfeld
Hi,
On Mon, 2019-01-28 at 09:50 +0100, Steffen Klassert wrote:
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index 64f9715173ac..584635db9231 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
> @@ -392,35 +392,24 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
> return NULL;
> }
>
> -INDIRECT_CALLABLE_DECLARE(struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
> - __be16 sport, __be16 dport));
> struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
> - struct udphdr *uh, udp_lookup_t lookup)
> + struct udphdr *uh, struct sock *sk)
> {
> struct sk_buff *pp = NULL;
> struct sk_buff *p;
> struct udphdr *uh2;
> unsigned int off = skb_gro_offset(skb);
> int flush = 1;
> - struct sock *sk;
> -
> - rcu_read_lock();
> - sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb,
> - udp4_lib_lookup_skb, skb, uh->source, uh->dest);
> - if (!sk)
> - goto out_unlock;
>
> - if (udp_sk(sk)->gro_enabled) {
> + if (!sk || !udp_sk(sk)->gro_receive) {
> pp = call_gro_receive(udp_gro_receive_segment, head, skb);
> - rcu_read_unlock();
> return pp;
> }
>
> if (NAPI_GRO_CB(skb)->encap_mark ||
> (skb->ip_summed != CHECKSUM_PARTIAL &&
> NAPI_GRO_CB(skb)->csum_cnt == 0 &&
> - !NAPI_GRO_CB(skb)->csum_valid) ||
> - !udp_sk(sk)->gro_receive)
> + !NAPI_GRO_CB(skb)->csum_valid))
> goto out_unlock;
Here I think an additional chunk is missing: the caller is holding the
rcu lock, we should drop the rcu_read_unlock() from this function (and
likely rename the associated label).
> /* mark that this skb passed once through the tunnel gro layer */
> @@ -459,8 +448,10 @@ INDIRECT_CALLABLE_SCOPE
> struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
> {
> struct udphdr *uh = udp_gro_udphdr(skb);
> + struct sk_buff *pp;
> + struct sock *sk;
>
> - if (unlikely(!uh) || !static_branch_unlikely(&udp_encap_needed_key))
> + if (unlikely(!uh))
> goto flush;
>
> /* Don't bother verifying checksum if we're going to flush anyway. */
> @@ -475,7 +466,11 @@ struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
> inet_gro_compute_pseudo);
> skip:
> NAPI_GRO_CB(skb)->is_ipv6 = 0;
> - return udp_gro_receive(head, skb, uh, udp4_lib_lookup_skb);
> + rcu_read_lock();
> + sk = static_branch_unlikely(&udp_encap_needed_key) ? udp4_lib_lookup_skb(skb, uh->source, uh->dest) : NULL;
> + pp = udp_gro_receive(head, skb, uh, sk);
> + rcu_read_unlock();
> + return pp;
>
> flush:
> NAPI_GRO_CB(skb)->flush = 1;
_Unrelated_ to this patch, but IIRC the RCU lock is already help by
dev_gro_receive(), so IMHO a follow-up patch could possibly remove the
lock here and make the code smaller.
Apart from first point above, I like this patch a lot!
Paolo
^ permalink raw reply [flat|nested] 11+ messages in thread
[parent not found: <9615a29c028dea4f95efa7d1921f67b1a8c1f6e2.camel@redhat.com>]
* [PATCH RFC v2 2/3] net: Support GRO/GSO fraglist chaining.
2019-01-28 8:50 [PATCH RFC v2 0/3] Support fraglist GRO/GSO Steffen Klassert
2019-01-28 8:50 ` [PATCH RFC v2 1/3] UDP: enable GRO by default Steffen Klassert
@ 2019-01-28 8:50 ` Steffen Klassert
2019-01-28 20:50 ` Willem de Bruijn
2019-01-28 8:50 ` [PATCH RFC v2 3/3] udp: Support UDP fraglist GRO/GSO Steffen Klassert
2 siblings, 1 reply; 11+ messages in thread
From: Steffen Klassert @ 2019-01-28 8:50 UTC (permalink / raw)
To: netdev
Cc: Steffen Klassert, Willem de Bruijn, Paolo Abeni, Jason A. Donenfeld
This patch adds the core functions to chain/unchain
GSO skbs at the frag_list pointer. This also adds
a new GSO type SKB_GSO_FRAGLIST and a is_flist
flag to napi_gro_cb which indicates that this
flow will be GROed by fraglist chaining.
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
include/linux/netdevice.h | 4 +-
include/linux/skbuff.h | 4 ++
net/core/dev.c | 2 +-
net/core/skbuff.c | 106 ++++++++++++++++++++++++++++++++++++++
4 files changed, 114 insertions(+), 2 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1377d085ef99..050cff782fbc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2300,7 +2300,8 @@ struct napi_gro_cb {
/* Number of gro_receive callbacks this packet already went through */
u8 recursion_counter:4;
- /* 1 bit hole */
+ /* GRO is done by frag_list pointer chaining. */
+ u8 is_flist:1;
/* used to support CHECKSUM_COMPLETE for tunneling protocols */
__wsum csum;
@@ -2660,6 +2661,7 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id);
int netdev_get_name(struct net *net, char *name, int ifindex);
int dev_restart(struct net_device *dev);
int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb);
+int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb);
static inline unsigned int skb_gro_offset(const struct sk_buff *skb)
{
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2a57a365c711..b35a209c9c55 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -578,6 +578,8 @@ enum {
SKB_GSO_UDP = 1 << 16,
SKB_GSO_UDP_L4 = 1 << 17,
+
+ SKB_GSO_FRAGLIST = 1 << 18,
};
#if BITS_PER_LONG > 32
@@ -3369,6 +3371,8 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet);
bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu);
bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len);
struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
+struct sk_buff *skb_segment_list(struct sk_buff *skb, netdev_features_t features,
+ unsigned int offset);
struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
int skb_ensure_writable(struct sk_buff *skb, int write_len);
int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci);
diff --git a/net/core/dev.c b/net/core/dev.c
index 1b5a4410be0e..90b480b5bdf6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3101,7 +3101,7 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
segs = skb_mac_gso_segment(skb, features);
- if (unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
+ if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
skb_warn_bad_offload(skb);
return segs;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 37317ffec146..7cd5e9da21bd 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3462,6 +3462,112 @@ static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb)
return head_frag;
}
+struct sk_buff *skb_segment_list(struct sk_buff *skb,
+ netdev_features_t features,
+ unsigned int offset)
+{
+ struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
+ unsigned int tnl_hlen = skb_tnl_header_len(skb);
+ unsigned int delta_truesize = 0;
+ unsigned int delta_len = 0;
+ struct sk_buff *tail = NULL;
+ struct sk_buff *nskb;
+
+ skb_push(skb, -skb_network_offset(skb) + offset);
+
+ skb_shinfo(skb)->frag_list = NULL;
+
+ do {
+ nskb = list_skb;
+ list_skb = list_skb->next;
+
+ if (!tail)
+ skb->next = nskb;
+ else
+ tail->next = nskb;
+
+ tail = nskb;
+
+ delta_len += nskb->len;
+ delta_truesize += nskb->truesize;
+
+ skb_push(nskb, -skb_network_offset(nskb) + offset);
+
+ if (!secpath_exists(nskb))
+ __skb_ext_copy(nskb, skb);
+
+ memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
+
+ nskb->ip_summed = CHECKSUM_NONE;
+ nskb->csum_valid = 1;
+ nskb->tstamp = skb->tstamp;
+ nskb->dev = skb->dev;
+ nskb->queue_mapping = skb->queue_mapping;
+
+ nskb->mac_len = skb->mac_len;
+ nskb->mac_header = skb->mac_header;
+ nskb->transport_header = skb->transport_header;
+ nskb->network_header = skb->network_header;
+ skb_dst_copy(nskb, skb);
+
+ skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb));
+ skb_copy_from_linear_data_offset(skb, -tnl_hlen,
+ nskb->data - tnl_hlen,
+ offset + tnl_hlen);
+
+ if (skb_needs_linearize(nskb, features) &&
+ __skb_linearize(nskb)) {
+ kfree_skb_list(skb->next);
+ skb->next = NULL;
+ return ERR_PTR(-ENOMEM);
+ }
+ } while (list_skb);
+
+ skb->truesize = skb->truesize - delta_truesize;
+ skb->data_len = skb->data_len - delta_len;
+ skb->len = skb->len - delta_len;
+
+ skb_gso_reset(skb);
+
+ skb->prev = tail;
+
+ if (skb_needs_linearize(skb, features) &&
+ __skb_linearize(skb)) {
+ skb->next = NULL;
+ kfree_skb_list(skb->next);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ skb_get(skb);
+
+ return skb;
+}
+EXPORT_SYMBOL_GPL(skb_segment_list);
+
+int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
+{
+ if (unlikely(p->len + skb->len >= 65536))
+ return -E2BIG;
+
+ if (NAPI_GRO_CB(p)->last == p)
+ skb_shinfo(p)->frag_list = skb;
+ else
+ NAPI_GRO_CB(p)->last->next = skb;
+
+ skb_pull(skb, skb_gro_offset(skb));
+
+ NAPI_GRO_CB(p)->last = skb;
+ NAPI_GRO_CB(p)->count++;
+ p->data_len += skb->len;
+ p->truesize += skb->truesize;
+ p->len += skb->len;
+
+ NAPI_GRO_CB(skb)->same_flow = 1;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(skb_gro_receive_list);
+
/**
* skb_segment - Perform protocol segmentation on skb.
* @head_skb: buffer to segment
--
2.17.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH RFC v2 2/3] net: Support GRO/GSO fraglist chaining.
2019-01-28 8:50 ` [PATCH RFC v2 2/3] net: Support GRO/GSO fraglist chaining Steffen Klassert
@ 2019-01-28 20:50 ` Willem de Bruijn
2019-02-13 11:49 ` Steffen Klassert
0 siblings, 1 reply; 11+ messages in thread
From: Willem de Bruijn @ 2019-01-28 20:50 UTC (permalink / raw)
To: Steffen Klassert
Cc: Network Development, Willem de Bruijn, Paolo Abeni, Jason A. Donenfeld
On Mon, Jan 28, 2019 at 2:53 AM Steffen Klassert
<steffen.klassert@secunet.com> wrote:
>
> This patch adds the core functions to chain/unchain
> GSO skbs at the frag_list pointer. This also adds
> a new GSO type SKB_GSO_FRAGLIST and a is_flist
> flag to napi_gro_cb which indicates that this
> flow will be GROed by fraglist chaining.
>
> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
> +struct sk_buff *skb_segment_list(struct sk_buff *skb,
> + netdev_features_t features,
> + unsigned int offset)
> +{
> + struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
> + unsigned int tnl_hlen = skb_tnl_header_len(skb);
> + unsigned int delta_truesize = 0;
> + unsigned int delta_len = 0;
> + struct sk_buff *tail = NULL;
> + struct sk_buff *nskb;
> +
> + skb_push(skb, -skb_network_offset(skb) + offset);
> +
> + skb_shinfo(skb)->frag_list = NULL;
> +
> + do {
> + nskb = list_skb;
> + list_skb = list_skb->next;
> +
> + if (!tail)
> + skb->next = nskb;
> + else
> + tail->next = nskb;
> +
> + tail = nskb;
> +
> + delta_len += nskb->len;
> + delta_truesize += nskb->truesize;
> +
> + skb_push(nskb, -skb_network_offset(nskb) + offset);
> +
> + if (!secpath_exists(nskb))
> + __skb_ext_copy(nskb, skb);
> +
> + memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
> +
> + nskb->ip_summed = CHECKSUM_NONE;
> + nskb->csum_valid = 1;
> + nskb->tstamp = skb->tstamp;
> + nskb->dev = skb->dev;
> + nskb->queue_mapping = skb->queue_mapping;
> +
> + nskb->mac_len = skb->mac_len;
> + nskb->mac_header = skb->mac_header;
> + nskb->transport_header = skb->transport_header;
> + nskb->network_header = skb->network_header;
> + skb_dst_copy(nskb, skb);
> +
> + skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb));
> + skb_copy_from_linear_data_offset(skb, -tnl_hlen,
> + nskb->data - tnl_hlen,
> + offset + tnl_hlen);
> +
> + if (skb_needs_linearize(nskb, features) &&
> + __skb_linearize(nskb)) {
> + kfree_skb_list(skb->next);
> + skb->next = NULL;
> + return ERR_PTR(-ENOMEM);
> + }
> + } while (list_skb);
> +
> + skb->truesize = skb->truesize - delta_truesize;
> + skb->data_len = skb->data_len - delta_len;
> + skb->len = skb->len - delta_len;
> +
> + skb_gso_reset(skb);
> +
> + skb->prev = tail;
> +
> + if (skb_needs_linearize(skb, features) &&
> + __skb_linearize(skb)) {
> + skb->next = NULL;
> + kfree_skb_list(skb->next);
inverse order
also, I would probably deduplicate with the same branch above in a new
err_linearize: block
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH RFC v2 2/3] net: Support GRO/GSO fraglist chaining.
2019-01-28 20:50 ` Willem de Bruijn
@ 2019-02-13 11:49 ` Steffen Klassert
0 siblings, 0 replies; 11+ messages in thread
From: Steffen Klassert @ 2019-02-13 11:49 UTC (permalink / raw)
To: Willem de Bruijn
Cc: Network Development, Willem de Bruijn, Paolo Abeni, Jason A. Donenfeld
On Mon, Jan 28, 2019 at 02:50:34PM -0600, Willem de Bruijn wrote:
> On Mon, Jan 28, 2019 at 2:53 AM Steffen Klassert
> <steffen.klassert@secunet.com> wrote:
> > +
> > + if (skb_needs_linearize(skb, features) &&
> > + __skb_linearize(skb)) {
> > + skb->next = NULL;
> > + kfree_skb_list(skb->next);
>
> inverse order
Oh yes, apparently.
>
> also, I would probably deduplicate with the same branch above in a new
> err_linearize: block
Will do that.
Thanks for the review!
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH RFC v2 3/3] udp: Support UDP fraglist GRO/GSO.
2019-01-28 8:50 [PATCH RFC v2 0/3] Support fraglist GRO/GSO Steffen Klassert
2019-01-28 8:50 ` [PATCH RFC v2 1/3] UDP: enable GRO by default Steffen Klassert
2019-01-28 8:50 ` [PATCH RFC v2 2/3] net: Support GRO/GSO fraglist chaining Steffen Klassert
@ 2019-01-28 8:50 ` Steffen Klassert
2019-01-28 16:37 ` Paolo Abeni
2019-01-28 20:49 ` Willem de Bruijn
2 siblings, 2 replies; 11+ messages in thread
From: Steffen Klassert @ 2019-01-28 8:50 UTC (permalink / raw)
To: netdev
Cc: Steffen Klassert, Willem de Bruijn, Paolo Abeni, Jason A. Donenfeld
This patch extends UDP GRO to support fraglist GRO/GSO
by using the previously introduced infrastructure.
All UDP packets that are not targeted to a GRO capable
UDP sockets are going to fraglist GRO now (local input
and forward).
---
net/ipv4/udp_offload.c | 45 ++++++++++++++++++++++++++++++++++++++----
net/ipv6/udp_offload.c | 9 +++++++++
2 files changed, 50 insertions(+), 4 deletions(-)
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 584635db9231..c0be33216750 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -188,6 +188,22 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
}
EXPORT_SYMBOL(skb_udp_tunnel_segment);
+static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ unsigned int mss = skb_shinfo(skb)->gso_size;
+
+ skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
+ if (IS_ERR(skb))
+ return skb;
+
+ udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss);
+ skb->ip_summed = CHECKSUM_NONE;
+ skb->csum_valid = 1;
+
+ return skb;
+}
+
struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
netdev_features_t features)
{
@@ -200,6 +216,9 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
__sum16 check;
__be16 newlen;
+ if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
+ return __udp_gso_segment_list(gso_skb, features);
+
mss = skb_shinfo(gso_skb)->gso_size;
if (gso_skb->len <= sizeof(*uh) + mss)
return ERR_PTR(-EINVAL);
@@ -352,16 +371,15 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
struct sk_buff *pp = NULL;
struct udphdr *uh2;
struct sk_buff *p;
+ int ret;
/* requires non zero csum, for symmetry with GSO */
if (!uh->check) {
NAPI_GRO_CB(skb)->flush = 1;
return NULL;
}
-
/* pull encapsulating udp header */
skb_gro_pull(skb, sizeof(struct udphdr));
- skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
list_for_each_entry(p, head, list) {
if (!NAPI_GRO_CB(p)->same_flow)
@@ -379,8 +397,17 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
* Under small packet flood GRO count could elsewhere grow a lot
* leading to execessive truesize values
*/
- if (!skb_gro_receive(p, skb) &&
- NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX)
+ if (NAPI_GRO_CB(skb)->is_flist) {
+ if (!pskb_may_pull(skb, skb_gro_offset(skb)))
+ return NULL;
+ ret = skb_gro_receive_list(p, skb);
+ } else {
+ skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
+
+ ret = skb_gro_receive(p, skb);
+ }
+
+ if (!ret && NAPI_GRO_CB(p)->count > UDP_GRO_CNT_MAX)
pp = p;
else if (uh->len != uh2->len)
pp = p;
@@ -402,6 +429,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
int flush = 1;
if (!sk || !udp_sk(sk)->gro_receive) {
+ NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1;
pp = call_gro_receive(udp_gro_receive_segment, head, skb);
return pp;
}
@@ -530,6 +558,15 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
const struct iphdr *iph = ip_hdr(skb);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
+ if (NAPI_GRO_CB(skb)->is_flist) {
+ uh->len = htons(skb->len - nhoff);
+
+ skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+ return 0;
+ }
+
if (uh->check)
uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
iph->daddr, 0);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 5f7937a4f71a..7c3f28310baa 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -154,6 +154,15 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
+ if (NAPI_GRO_CB(skb)->is_flist) {
+ uh->len = htons(skb->len - nhoff);
+
+ skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+ return 0;
+ }
+
if (uh->check)
uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr,
&ipv6h->daddr, 0);
--
2.17.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH RFC v2 3/3] udp: Support UDP fraglist GRO/GSO.
2019-01-28 8:50 ` [PATCH RFC v2 3/3] udp: Support UDP fraglist GRO/GSO Steffen Klassert
@ 2019-01-28 16:37 ` Paolo Abeni
2019-01-28 20:49 ` Willem de Bruijn
1 sibling, 0 replies; 11+ messages in thread
From: Paolo Abeni @ 2019-01-28 16:37 UTC (permalink / raw)
To: Steffen Klassert, netdev; +Cc: Willem de Bruijn, Jason A. Donenfeld
Hi,
On Mon, 2019-01-28 at 09:50 +0100, Steffen Klassert wrote:
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index 584635db9231..c0be33216750 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
[...]
> @@ -379,8 +397,17 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
> * Under small packet flood GRO count could elsewhere grow a lot
> * leading to execessive truesize values
> */
> - if (!skb_gro_receive(p, skb) &&
> - NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX)
> + if (NAPI_GRO_CB(skb)->is_flist) {
> + if (!pskb_may_pull(skb, skb_gro_offset(skb)))
> + return NULL;
> + ret = skb_gro_receive_list(p, skb);
> + } else {
> + skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
> +
> + ret = skb_gro_receive(p, skb);
> + }
> +
> + if (!ret && NAPI_GRO_CB(p)->count > UDP_GRO_CNT_MAX)
^^
Minor nitpick: here we may want to preserve the '>=' operator.
Note: I've not finished looking at the patches yet, I'll try to provide
some benck-marking and it will take some time.
Cheers,
Paolo
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH RFC v2 3/3] udp: Support UDP fraglist GRO/GSO.
2019-01-28 8:50 ` [PATCH RFC v2 3/3] udp: Support UDP fraglist GRO/GSO Steffen Klassert
2019-01-28 16:37 ` Paolo Abeni
@ 2019-01-28 20:49 ` Willem de Bruijn
2019-02-13 11:48 ` Steffen Klassert
1 sibling, 1 reply; 11+ messages in thread
From: Willem de Bruijn @ 2019-01-28 20:49 UTC (permalink / raw)
To: Steffen Klassert
Cc: Network Development, Willem de Bruijn, Paolo Abeni, Jason A. Donenfeld
On Mon, Jan 28, 2019 at 2:51 AM Steffen Klassert
<steffen.klassert@secunet.com> wrote:
>
> This patch extends UDP GRO to support fraglist GRO/GSO
> by using the previously introduced infrastructure.
> All UDP packets that are not targeted to a GRO capable
> UDP sockets are going to fraglist GRO now (local input
> and forward).
> ---
> net/ipv4/udp_offload.c | 45 ++++++++++++++++++++++++++++++++++++++----
> net/ipv6/udp_offload.c | 9 +++++++++
> 2 files changed, 50 insertions(+), 4 deletions(-)
>
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index 584635db9231..c0be33216750 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
> @@ -188,6 +188,22 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
> }
> EXPORT_SYMBOL(skb_udp_tunnel_segment);
>
> +static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
> + netdev_features_t features)
> +{
> + unsigned int mss = skb_shinfo(skb)->gso_size;
> +
> + skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
> + if (IS_ERR(skb))
> + return skb;
> +
> + udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss);
> + skb->ip_summed = CHECKSUM_NONE;
> + skb->csum_valid = 1;
csum_valid is only used on ingress.
Hardcoding CHECKSUM_NONE is probably fine as long as this function is
only used for forwarding, assuming we don't care about verifiying
checksums in the forwarding case.
But this is fragile if we ever add local list segment output. Should
convert the checksum field in skb_forward_csum, instead of at the GSO
layer, just as for forwarding of non list skbs? Though that would
require traversing the list yet another time. Other option is to
already do this conversion when building the list in GRO.
The comment also applies to the same logic in skb_segment_list. As a
matter or fact, even if this belongs in GSO instead of core forwarding
or GRO, then probably both the list head and frag_list skbs should be
set in the same function, so skb_segment_list.
> +
> + return skb;
> +}
> +
> struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
> netdev_features_t features)
> {
> @@ -200,6 +216,9 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
> __sum16 check;
> __be16 newlen;
>
> + if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
> + return __udp_gso_segment_list(gso_skb, features);
> +
> mss = skb_shinfo(gso_skb)->gso_size;
> if (gso_skb->len <= sizeof(*uh) + mss)
> return ERR_PTR(-EINVAL);
> @@ -352,16 +371,15 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
> struct sk_buff *pp = NULL;
> struct udphdr *uh2;
> struct sk_buff *p;
> + int ret;
>
> /* requires non zero csum, for symmetry with GSO */
> if (!uh->check) {
> NAPI_GRO_CB(skb)->flush = 1;
> return NULL;
> }
> -
Accidental whitespace removal?
> /* pull encapsulating udp header */
> skb_gro_pull(skb, sizeof(struct udphdr));
> - skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
>
> list_for_each_entry(p, head, list) {
> if (!NAPI_GRO_CB(p)->same_flow)
> @@ -379,8 +397,17 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
> * Under small packet flood GRO count could elsewhere grow a lot
> * leading to execessive truesize values
> */
> - if (!skb_gro_receive(p, skb) &&
> - NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX)
> + if (NAPI_GRO_CB(skb)->is_flist) {
> + if (!pskb_may_pull(skb, skb_gro_offset(skb)))
> + return NULL;
> + ret = skb_gro_receive_list(p, skb);
> + } else {
> + skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
> +
> + ret = skb_gro_receive(p, skb);
> + }
> +
> + if (!ret && NAPI_GRO_CB(p)->count > UDP_GRO_CNT_MAX)
> pp = p;
> else if (uh->len != uh2->len)
> pp = p;
> @@ -402,6 +429,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
> int flush = 1;
>
> if (!sk || !udp_sk(sk)->gro_receive) {
> + NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1;
This updates the choice of whether to use a list on each received skb.
Which is problematic as a socket can call the setsockopt in between
packets.
Actually, there no longer is a need for a route lookup for each skb at
all. We always apply GRO, which was the previous reason for the
lookup. And if a matching flow is found in the GRO table, we already
the choice to use a list is already stored.
> pp = call_gro_receive(udp_gro_receive_segment, head, skb);
> return pp;
> }
> @@ -530,6 +558,15 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
> const struct iphdr *iph = ip_hdr(skb);
> struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
>
> + if (NAPI_GRO_CB(skb)->is_flist) {
> + uh->len = htons(skb->len - nhoff);
> +
> + skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
> + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
> +
> + return 0;
> + }
> +
> if (uh->check)
> uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
> iph->daddr, 0);
> diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
> index 5f7937a4f71a..7c3f28310baa 100644
> --- a/net/ipv6/udp_offload.c
> +++ b/net/ipv6/udp_offload.c
> @@ -154,6 +154,15 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
> const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
> struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
>
> + if (NAPI_GRO_CB(skb)->is_flist) {
> + uh->len = htons(skb->len - nhoff);
> +
> + skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
> + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
> +
> + return 0;
> + }
> +
> if (uh->check)
> uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr,
> &ipv6h->daddr, 0);
> --
> 2.17.1
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH RFC v2 3/3] udp: Support UDP fraglist GRO/GSO.
2019-01-28 20:49 ` Willem de Bruijn
@ 2019-02-13 11:48 ` Steffen Klassert
0 siblings, 0 replies; 11+ messages in thread
From: Steffen Klassert @ 2019-02-13 11:48 UTC (permalink / raw)
To: Willem de Bruijn
Cc: Network Development, Willem de Bruijn, Paolo Abeni, Jason A. Donenfeld
On Mon, Jan 28, 2019 at 02:49:32PM -0600, Willem de Bruijn wrote:
> On Mon, Jan 28, 2019 at 2:51 AM Steffen Klassert
> <steffen.klassert@secunet.com> wrote:
> >
> > This patch extends UDP GRO to support fraglist GRO/GSO
> > by using the previously introduced infrastructure.
> > All UDP packets that are not targeted to a GRO capable
> > UDP sockets are going to fraglist GRO now (local input
> > and forward).
> > ---
> > net/ipv4/udp_offload.c | 45 ++++++++++++++++++++++++++++++++++++++----
> > net/ipv6/udp_offload.c | 9 +++++++++
> > 2 files changed, 50 insertions(+), 4 deletions(-)
> >
> > diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> > index 584635db9231..c0be33216750 100644
> > --- a/net/ipv4/udp_offload.c
> > +++ b/net/ipv4/udp_offload.c
> > @@ -188,6 +188,22 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
> > }
> > EXPORT_SYMBOL(skb_udp_tunnel_segment);
> >
> > +static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
> > + netdev_features_t features)
> > +{
> > + unsigned int mss = skb_shinfo(skb)->gso_size;
> > +
> > + skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
> > + if (IS_ERR(skb))
> > + return skb;
> > +
> > + udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss);
> > + skb->ip_summed = CHECKSUM_NONE;
> > + skb->csum_valid = 1;
>
> csum_valid is only used on ingress.
>
> Hardcoding CHECKSUM_NONE is probably fine as long as this function is
> only used for forwarding, assuming we don't care about verifiying
> checksums in the forwarding case.
>
> But this is fragile if we ever add local list segment output. Should
> convert the checksum field in skb_forward_csum, instead of at the GSO
> layer, just as for forwarding of non list skbs? Though that would
> require traversing the list yet another time. Other option is to
> already do this conversion when building the list in GRO.
>
> The comment also applies to the same logic in skb_segment_list. As a
> matter or fact, even if this belongs in GSO instead of core forwarding
> or GRO, then probably both the list head and frag_list skbs should be
> set in the same function, so skb_segment_list.
I had a deeper look into this now, it seems that the head skb has
already the correct conversion (either for local input or forwarding).
So we probaply just need to adjust the frag_list skbs to the
head skb checksum conversion.
> > /* pull encapsulating udp header */
> > skb_gro_pull(skb, sizeof(struct udphdr));
> > - skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
> >
> > list_for_each_entry(p, head, list) {
> > if (!NAPI_GRO_CB(p)->same_flow)
> > @@ -379,8 +397,17 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
> > * Under small packet flood GRO count could elsewhere grow a lot
> > * leading to execessive truesize values
> > */
> > - if (!skb_gro_receive(p, skb) &&
> > - NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX)
> > + if (NAPI_GRO_CB(skb)->is_flist) {
> > + if (!pskb_may_pull(skb, skb_gro_offset(skb)))
> > + return NULL;
> > + ret = skb_gro_receive_list(p, skb);
> > + } else {
> > + skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
> > +
> > + ret = skb_gro_receive(p, skb);
> > + }
> > +
> > + if (!ret && NAPI_GRO_CB(p)->count > UDP_GRO_CNT_MAX)
> > pp = p;
> > else if (uh->len != uh2->len)
> > pp = p;
> > @@ -402,6 +429,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
> > int flush = 1;
> >
> > if (!sk || !udp_sk(sk)->gro_receive) {
> > + NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1;
>
> This updates the choice of whether to use a list on each received skb.
> Which is problematic as a socket can call the setsockopt in between
> packets.
>
> Actually, there no longer is a need for a route lookup for each skb at
> all. We always apply GRO, which was the previous reason for the
> lookup. And if a matching flow is found in the GRO table, we already
> the choice to use a list is already stored.
Yes, that's true. I'll change that.
^ permalink raw reply [flat|nested] 11+ messages in thread