All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 0/4] UDP: implement GRO support for UDP_SEGMENT socket
@ 2018-09-14 15:43 Paolo Abeni
  2018-09-14 15:43 ` [RFC PATCH 1/4] net: add new helper to update an already registered offload Paolo Abeni
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Paolo Abeni @ 2018-09-14 15:43 UTC (permalink / raw)
  To: netdev; +Cc: David S. Miller, Willem de Bruijn, Steffen Klassert

This series implements GRO support for UDP sockets, as the RX counterpart
of ommit bec1f6f69736 ("udp: generate gso with UDP_SEGMENT"). 
The first two patches allow UDP GRO registration on demand, avoiding additional
overhead when no UDP_SEGMENT sockets are created, actually decreasing the GRO
engine costs for the default configuration for UDP packets. They could possibly
live on their own.
The third patch contains the actual UDP GRO implementation, while the 4th patch
allows using the udpgso_bench_rx program under selftest to trigger UDP GRO. A
full self-test is not there yet.

Paolo Abeni (4):
  net: add new helper to update an already registered offload
  net: enable UDP gro on demand.
  udp: implement GRO plain UDP sockets.
  selftests: add GRO support, fix port option processing

 include/linux/udp.h                           |  18 +-
 include/net/addrconf.h                        |   1 +
 include/net/protocol.h                        |   4 +
 include/net/udp.h                             |  12 ++
 net/ipv4/protocol.c                           |  13 +-
 net/ipv4/udp.c                                |   3 +
 net/ipv4/udp_offload.c                        | 170 +++++++++++++++---
 net/ipv4/udp_tunnel.c                         |   1 +
 net/ipv6/af_inet6.c                           |   1 +
 net/ipv6/protocol.c                           |  13 +-
 net/ipv6/udp_offload.c                        |  31 +++-
 tools/testing/selftests/net/udpgso_bench_rx.c |  18 +-
 12 files changed, 244 insertions(+), 41 deletions(-)

-- 
2.17.1

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [RFC PATCH 1/4] net: add new helper to update an already registered offload
  2018-09-14 15:43 [RFC PATCH 0/4] UDP: implement GRO support for UDP_SEGMENT socket Paolo Abeni
@ 2018-09-14 15:43 ` Paolo Abeni
  2018-09-14 15:43 ` [RFC PATCH 2/4] net: enable UDP gro on demand Paolo Abeni
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 11+ messages in thread
From: Paolo Abeni @ 2018-09-14 15:43 UTC (permalink / raw)
  To: netdev; +Cc: David S. Miller, Willem de Bruijn, Steffen Klassert

This will allow us to enable/disable UDP GRO at runtime in
a later patch.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/protocol.h |  4 ++++
 net/ipv4/protocol.c    | 13 +++++++++----
 net/ipv6/protocol.c    | 13 +++++++++----
 3 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/include/net/protocol.h b/include/net/protocol.h
index 4fc75f7ae23b..aa77e7feffab 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -104,6 +104,8 @@ extern struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS];
 int inet_add_protocol(const struct net_protocol *prot, unsigned char num);
 int inet_del_protocol(const struct net_protocol *prot, unsigned char num);
 int inet_add_offload(const struct net_offload *prot, unsigned char num);
+int inet_update_offload(const struct net_offload *old_prot,
+			const struct net_offload *new_prot, unsigned char num);
 int inet_del_offload(const struct net_offload *prot, unsigned char num);
 void inet_register_protosw(struct inet_protosw *p);
 void inet_unregister_protosw(struct inet_protosw *p);
@@ -115,6 +117,8 @@ int inet6_register_protosw(struct inet_protosw *p);
 void inet6_unregister_protosw(struct inet_protosw *p);
 #endif
 int inet6_add_offload(const struct net_offload *prot, unsigned char num);
+int inet6_update_offload(const struct net_offload *old_prot,
+			 const struct net_offload *new_prot, unsigned char num);
 int inet6_del_offload(const struct net_offload *prot, unsigned char num);
 
 #endif	/* _PROTOCOL_H */
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 32a691b7ce2c..b60f1686b918 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -65,12 +65,17 @@ int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
 }
 EXPORT_SYMBOL(inet_del_protocol);
 
-int inet_del_offload(const struct net_offload *prot, unsigned char protocol)
+int inet_update_offload(const struct net_offload *old_prot,
+			const struct net_offload *new_prot,
+			unsigned char protocol)
 {
-	int ret;
+	return (cmpxchg((const struct net_offload **)&inet_offloads[protocol],
+		        old_prot, new_prot) == old_prot) ? 0 : -1;
+}
 
-	ret = (cmpxchg((const struct net_offload **)&inet_offloads[protocol],
-		       prot, NULL) == prot) ? 0 : -1;
+int inet_del_offload(const struct net_offload *prot, unsigned char protocol)
+{
+	int ret = inet_update_offload(prot, NULL, protocol);
 
 	synchronize_net();
 
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index b5d54d4f995c..9ee6aff1f3fa 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -60,12 +60,17 @@ int inet6_add_offload(const struct net_offload *prot, unsigned char protocol)
 }
 EXPORT_SYMBOL(inet6_add_offload);
 
-int inet6_del_offload(const struct net_offload *prot, unsigned char protocol)
+int inet6_update_offload(const struct net_offload *old_prot,
+			 const struct net_offload *new_prot,
+			 unsigned char protocol)
 {
-	int ret;
+	return (cmpxchg((const struct net_offload **)&inet6_offloads[protocol],
+		        old_prot, new_prot) == old_prot) ? 0 : -1;
+}
 
-	ret = (cmpxchg((const struct net_offload **)&inet6_offloads[protocol],
-		       prot, NULL) == prot) ? 0 : -1;
+int inet6_del_offload(const struct net_offload *prot, unsigned char protocol)
+{
+	int ret = inet6_update_offload(prot, NULL, protocol);
 
 	synchronize_net();
 
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [RFC PATCH 2/4] net: enable UDP gro on demand.
  2018-09-14 15:43 [RFC PATCH 0/4] UDP: implement GRO support for UDP_SEGMENT socket Paolo Abeni
  2018-09-14 15:43 ` [RFC PATCH 1/4] net: add new helper to update an already registered offload Paolo Abeni
@ 2018-09-14 15:43 ` Paolo Abeni
  2018-09-14 17:16   ` Willem de Bruijn
  2018-09-14 15:43 ` [RFC PATCH 3/4] udp: implement GRO plain UDP sockets Paolo Abeni
  2018-09-14 15:43 ` [RFC PATCH 4/4] selftests: add GRO support, fix port option processing Paolo Abeni
  3 siblings, 1 reply; 11+ messages in thread
From: Paolo Abeni @ 2018-09-14 15:43 UTC (permalink / raw)
  To: netdev; +Cc: David S. Miller, Willem de Bruijn, Steffen Klassert

Currently, the UDP GRO callback is always invoked, regardless of
the existence of any actual user (e.g. a UDP tunnel). With retpoline
enabled, this causes measurable overhead.

This changeset introduces explicit accounting of the sockets requiring
UDP GRO and updates the UDP offloads at runtime accordingly, so that
the GRO callback is present (and invoked) only when there is at least
one socket requiring it.

Tested with pktgen vs udpgso_bench_rx
Before:
udp rx:     27 MB/s  1613271 calls/s

After:
udp rx:     30 MB/s  1771537 calls/s

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/udp.h    | 18 +++++++++++-
 include/net/addrconf.h |  1 +
 include/net/udp.h      | 12 ++++++++
 net/ipv4/udp.c         |  2 ++
 net/ipv4/udp_offload.c | 63 ++++++++++++++++++++++++++++++++++++++++--
 net/ipv4/udp_tunnel.c  |  1 +
 net/ipv6/af_inet6.c    |  1 +
 net/ipv6/udp_offload.c | 25 +++++++++++++++--
 8 files changed, 117 insertions(+), 6 deletions(-)

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 320d49d85484..56a321a55ba1 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -49,7 +49,8 @@ struct udp_sock {
 	unsigned int	 corkflag;	/* Cork is required */
 	__u8		 encap_type;	/* Is this an Encapsulation socket? */
 	unsigned char	 no_check6_tx:1,/* Send zero UDP6 checksums on TX? */
-			 no_check6_rx:1;/* Allow zero UDP6 checksums on RX? */
+			 no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */
+			 gro_in_use:1;	/* UDP GRO is requested */
 	/*
 	 * Following member retains the information to create a UDP header
 	 * when the socket is uncorked.
@@ -105,6 +106,11 @@ static inline void udp_set_no_check6_rx(struct sock *sk, bool val)
 	udp_sk(sk)->no_check6_rx = val;
 }
 
+static inline void udp_set_gro_in_use(struct sock *sk, bool val)
+{
+	udp_sk(sk)->gro_in_use = val;
+}
+
 static inline bool udp_get_no_check6_tx(struct sock *sk)
 {
 	return udp_sk(sk)->no_check6_tx;
@@ -115,6 +121,16 @@ static inline bool udp_get_no_check6_rx(struct sock *sk)
 	return udp_sk(sk)->no_check6_rx;
 }
 
+static inline bool udp_get_gro_in_use(struct sock *sk)
+{
+	return udp_sk(sk)->gro_in_use;
+}
+
+static inline bool udp_want_gro(struct sock *sk)
+{
+	return udp_sk(sk)->gro_receive;
+}
+
 #define udp_portaddr_for_each_entry(__sk, list) \
 	hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node)
 
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 6def0351bcc3..fb2ac3ca3417 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -254,6 +254,7 @@ struct ipv6_stub {
 				 struct in6_addr *saddr);
 
 	void (*udpv6_encap_enable)(void);
+	void (*udpv6_update_offload)(bool enable_gro);
 	void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,
 			      const struct in6_addr *solicited_addr,
 			      bool router, bool solicited, bool override, bool inc_opt);
diff --git a/include/net/udp.h b/include/net/udp.h
index 8482a990b0bb..eff2dfa0571b 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -444,8 +444,20 @@ int udpv4_offload_init(void);
 void udp_init(void);
 
 void udp_encap_enable(void);
+void udp_gro_in_use_changed(struct sock *sk);
+
 #if IS_ENABLED(CONFIG_IPV6)
 void udpv6_encap_enable(void);
+void udpv6_update_offload(bool);
 #endif
 
+static inline void udp_update_gro_in_use(struct sock *sk, bool want_gro)
+{
+	if (want_gro == udp_get_gro_in_use(sk))
+		return;
+
+	udp_set_gro_in_use(sk, want_gro);
+	udp_gro_in_use_changed(sk);
+}
+
 #endif	/* _UDP_H */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f4e35b2ff8b8..5ac794230013 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1438,6 +1438,8 @@ void udp_destruct_sock(struct sock *sk)
 	}
 	udp_rmem_release(sk, total, 0, true);
 
+	udp_update_gro_in_use(sk, 0);
+
 	inet_sock_destruct(sk);
 }
 EXPORT_SYMBOL_GPL(udp_destruct_sock);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 0c0522b79b43..08b225adf763 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -14,6 +14,10 @@
 #include <net/udp.h>
 #include <net/protocol.h>
 
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/addrconf.h>
+#endif
+
 static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
 	netdev_features_t features,
 	struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
@@ -472,7 +476,13 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
 	return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb);
 }
 
-static const struct net_offload udpv4_offload = {
+static const struct net_offload udpv4_no_gro_offload = {
+	.callbacks = {
+		.gso_segment = udp4_ufo_fragment,
+	},
+};
+
+static const struct net_offload udpv4_gro_offload = {
 	.callbacks = {
 		.gso_segment = udp4_ufo_fragment,
 		.gro_receive  =	udp4_gro_receive,
@@ -480,7 +490,56 @@ static const struct net_offload udpv4_offload = {
 	},
 };
 
+static atomic_t gro_needed;
+
+static void udp_update_offload(struct sock *sk, bool enable_gro)
+{
+	int ret;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	/* to keep the code simple ignore IPV6_V6ONLY */
+	if (sk->sk_family == PF_INET6)
+		ipv6_stub->udpv6_update_offload(enable_gro);
+#endif
+
+	if (enable_gro)
+		ret = inet_update_offload(&udpv4_no_gro_offload,
+					  &udpv4_gro_offload, IPPROTO_UDP);
+	else
+		ret = inet_update_offload(&udpv4_gro_offload,
+					  &udpv4_no_gro_offload, IPPROTO_UDP);
+	WARN_ON(ret);
+}
+
+static void udp_gro_enable(struct sock *sk)
+{
+	if (atomic_inc_return(&gro_needed) > 1)
+		return;
+
+	udp_update_offload(sk, true);
+}
+
+static void udp_gro_disable(struct sock *sk)
+{
+	int new_gro_needed = atomic_dec_return(&gro_needed);
+
+	WARN_ON(new_gro_needed < 0);
+	if (new_gro_needed != 0)
+		return;
+
+	udp_update_offload(sk, false);
+}
+
+void udp_gro_in_use_changed(struct sock *sk)
+{
+	if (udp_get_gro_in_use(sk))
+		udp_gro_enable(sk);
+	else
+		udp_gro_disable(sk);
+}
+EXPORT_SYMBOL_GPL(udp_gro_in_use_changed);
+
 int __init udpv4_offload_init(void)
 {
-	return inet_add_offload(&udpv4_offload, IPPROTO_UDP);
+	return inet_add_offload(&udpv4_no_gro_offload, IPPROTO_UDP);
 }
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 6539ff15e9a3..16457d41ff7f 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -73,6 +73,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
 	udp_sk(sk)->gro_complete = cfg->gro_complete;
 
 	udp_tunnel_encap_enable(sock);
+	udp_update_gro_in_use(sk, udp_want_gro(sk));
 }
 EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock);
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 77ef8478234f..ecb8aba7b535 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -897,6 +897,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
 	.fib6_multipath_select = fib6_multipath_select,
 	.ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
 	.udpv6_encap_enable = udpv6_encap_enable,
+	.udpv6_update_offload = udpv6_update_offload,
 	.ndisc_send_na = ndisc_send_na,
 	.nd_tbl	= &nd_tbl,
 };
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 95dee9ca8d22..1df968a3e788 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -158,7 +158,13 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
 	return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb);
 }
 
-static const struct net_offload udpv6_offload = {
+static const struct net_offload udpv6_no_gro_offload = {
+	.callbacks = {
+		.gso_segment	=	udp6_ufo_fragment,
+	},
+};
+
+static const struct net_offload udpv6_gro_offload = {
 	.callbacks = {
 		.gso_segment	=	udp6_ufo_fragment,
 		.gro_receive	=	udp6_gro_receive,
@@ -166,12 +172,25 @@ static const struct net_offload udpv6_offload = {
 	},
 };
 
+void udpv6_update_offload(bool enable_gro)
+{
+	int ret;
+
+	if (enable_gro)
+		ret = inet6_update_offload(&udpv6_no_gro_offload,
+					   &udpv6_gro_offload, IPPROTO_UDP);
+	else
+		ret = inet6_update_offload(&udpv6_gro_offload,
+					   &udpv6_no_gro_offload, IPPROTO_UDP);
+	WARN_ON(ret);
+}
+
 int udpv6_offload_init(void)
 {
-	return inet6_add_offload(&udpv6_offload, IPPROTO_UDP);
+	return inet6_add_offload(&udpv6_no_gro_offload, IPPROTO_UDP);
 }
 
 int udpv6_offload_exit(void)
 {
-	return inet6_del_offload(&udpv6_offload, IPPROTO_UDP);
+	return inet6_del_offload(&udpv6_no_gro_offload, IPPROTO_UDP);
 }
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [RFC PATCH 3/4] udp: implement GRO plain UDP sockets.
  2018-09-14 15:43 [RFC PATCH 0/4] UDP: implement GRO support for UDP_SEGMENT socket Paolo Abeni
  2018-09-14 15:43 ` [RFC PATCH 1/4] net: add new helper to update an already registered offload Paolo Abeni
  2018-09-14 15:43 ` [RFC PATCH 2/4] net: enable UDP gro on demand Paolo Abeni
@ 2018-09-14 15:43 ` Paolo Abeni
  2018-09-14 16:48   ` Eric Dumazet
  2018-09-14 15:43 ` [RFC PATCH 4/4] selftests: add GRO support, fix port option processing Paolo Abeni
  3 siblings, 1 reply; 11+ messages in thread
From: Paolo Abeni @ 2018-09-14 15:43 UTC (permalink / raw)
  To: netdev; +Cc: David S. Miller, Willem de Bruijn, Steffen Klassert

This is the RX counter part of commit bec1f6f69736 ("udp: generate gso
with UDP_SEGMENT"). When UDP_SEGMENT is enabled, such socket is also
eligible for GRO in the rx path: UDP segments directed to such socket
are assembled into a larger GSO_UDP_L4 packet.

The core UDP GRO support is enabled/updated on setsockopt(UDP_SEGMENT) and
disabled, if needed at socket destruction time.

Initial benchmark numbers:

Before:
udp rx:   1079 MB/s   769065 calls/s

After:
udp rx:   1466 MB/s    24877 calls/s

This change introduces a side effect in respect to UDP tunnels:
after an UDP tunnel creation, now the kernel performs a lookup per ingress UDP
packet, before such lookup happended only if the ingress packet carried a valid
internal header csum.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/udp.h    |   2 +-
 net/ipv4/udp.c         |   1 +
 net/ipv4/udp_offload.c | 107 +++++++++++++++++++++++++++++++++--------
 net/ipv6/udp_offload.c |   6 +--
 4 files changed, 90 insertions(+), 26 deletions(-)

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 56a321a55ba1..27dea956ef6e 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -128,7 +128,7 @@ static inline bool udp_get_gro_in_use(struct sock *sk)
 
 static inline bool udp_want_gro(struct sock *sk)
 {
-	return udp_sk(sk)->gro_receive;
+	return udp_sk(sk)->gro_receive || udp_sk(sk)->gso_size;
 }
 
 #define udp_portaddr_for_each_entry(__sk, list) \
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5ac794230013..871ee55afd96 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2450,6 +2450,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 		if (val < 0 || val > USHRT_MAX)
 			return -EINVAL;
 		up->gso_size = val;
+		udp_update_gro_in_use(sk, udp_want_gro(sk));
 		break;
 
 	/*
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 08b225adf763..4ff150bb84de 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -347,6 +347,54 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 	return segs;
 }
 
+#define UDO_GRO_CNT_MAX 64
+static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
+					       struct sk_buff *skb)
+{
+	struct udphdr *uh = udp_hdr(skb);
+	struct sk_buff *pp = NULL;
+	struct udphdr *uh2;
+	struct sk_buff *p;
+
+	/* requires non zero csum, for simmetry with GSO */
+	if (!uh->check) {
+		NAPI_GRO_CB(skb)->flush = 1;
+		return NULL;
+	}
+
+	/* pull encapsulating udp header */
+	skb_gro_pull(skb, sizeof(struct udphdr));
+	skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
+
+	list_for_each_entry(p, head, list) {
+		if (!NAPI_GRO_CB(p)->same_flow)
+			continue;
+
+		uh2 = udp_hdr(p);
+
+		/* Match ports only, as csum is always non zero */
+		if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) {
+			NAPI_GRO_CB(p)->same_flow = 0;
+			continue;
+		}
+
+		/* Terminate the flow on len mismatch or if it grow "too much".
+		 * Under small packet flood GRO count could elsewhere grow a lot
+		 * leading to execessive truesize values
+		 */
+		if (!skb_gro_receive(p, skb) &&
+		    NAPI_GRO_CB(p)->count > UDO_GRO_CNT_MAX)
+			pp = p;
+		else if (uh->len != uh2->len)
+			pp = p;
+
+		return pp;
+	}
+
+	/* mismatch, but we never need to flush */
+	return NULL;
+}
+
 struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
 				struct udphdr *uh, udp_lookup_t lookup)
 {
@@ -357,23 +405,29 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
 	int flush = 1;
 	struct sock *sk;
 
+	rcu_read_lock();
+	sk = (*lookup)(skb, uh->source, uh->dest);
+	if (!sk)
+		goto out_unlock;
+
+	if (udp_sk(sk)->gso_size) {
+		pp = call_gro_receive(udp_gro_receive_segment, head, skb);
+		rcu_read_unlock();
+		return pp;
+	}
+
 	if (NAPI_GRO_CB(skb)->encap_mark ||
 	    (skb->ip_summed != CHECKSUM_PARTIAL &&
 	     NAPI_GRO_CB(skb)->csum_cnt == 0 &&
 	     !NAPI_GRO_CB(skb)->csum_valid))
-		goto out;
+		goto out_unlock;
 
 	/* mark that this skb passed once through the tunnel gro layer */
 	NAPI_GRO_CB(skb)->encap_mark = 1;
 
-	rcu_read_lock();
-	sk = (*lookup)(skb, uh->source, uh->dest);
-
-	if (sk && udp_sk(sk)->gro_receive)
-		goto unflush;
-	goto out_unlock;
+	if (!udp_sk(sk)->gro_receive)
+		goto out_unlock;
 
-unflush:
 	flush = 0;
 
 	list_for_each_entry(p, head, list) {
@@ -398,7 +452,6 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
 
 out_unlock:
 	rcu_read_unlock();
-out:
 	skb_gro_flush_final(skb, pp, flush);
 	return pp;
 }
@@ -431,6 +484,19 @@ static struct sk_buff *udp4_gro_receive(struct list_head *head,
 	return NULL;
 }
 
+static int udp_gro_complete_segment(struct sk_buff *skb)
+{
+	struct udphdr *uh = udp_hdr(skb);
+
+	skb->csum_start = (unsigned char *)uh - skb->head;
+	skb->csum_offset = offsetof(struct udphdr, check);
+	skb->ip_summed = CHECKSUM_PARTIAL;
+
+	skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+	skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
+	return 0;
+}
+
 int udp_gro_complete(struct sk_buff *skb, int nhoff,
 		     udp_lookup_t lookup)
 {
@@ -441,16 +507,21 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
 
 	uh->len = newlen;
 
-	/* Set encapsulation before calling into inner gro_complete() functions
-	 * to make them set up the inner offsets.
-	 */
-	skb->encapsulation = 1;
-
 	rcu_read_lock();
 	sk = (*lookup)(skb, uh->source, uh->dest);
-	if (sk && udp_sk(sk)->gro_complete)
+	if (sk && udp_sk(sk)->gso_size) {
+		err = udp_gro_complete_segment(skb);
+	} else if (sk && udp_sk(sk)->gro_complete) {
+		skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM
+					: SKB_GSO_UDP_TUNNEL;
+
+		/* Set encapsulation before calling into inner gro_complete()
+		 * functions to make them set up the inner offsets.
+		 */
+		skb->encapsulation = 1;
 		err = udp_sk(sk)->gro_complete(sk, skb,
 				nhoff + sizeof(struct udphdr));
+	}
 	rcu_read_unlock();
 
 	if (skb->remcsum_offload)
@@ -465,13 +536,9 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
 	const struct iphdr *iph = ip_hdr(skb);
 	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
 
-	if (uh->check) {
-		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+	if (uh->check)
 		uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
 					  iph->daddr, 0);
-	} else {
-		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
-	}
 
 	return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb);
 }
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 1df968a3e788..f2731b7b4c75 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -147,13 +147,9 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
 	const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
 
-	if (uh->check) {
-		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+	if (uh->check)
 		uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr,
 					  &ipv6h->daddr, 0);
-	} else {
-		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
-	}
 
 	return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb);
 }
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [RFC PATCH 4/4] selftests: add GRO support, fix port option processing
  2018-09-14 15:43 [RFC PATCH 0/4] UDP: implement GRO support for UDP_SEGMENT socket Paolo Abeni
                   ` (2 preceding siblings ...)
  2018-09-14 15:43 ` [RFC PATCH 3/4] udp: implement GRO plain UDP sockets Paolo Abeni
@ 2018-09-14 15:43 ` Paolo Abeni
  3 siblings, 0 replies; 11+ messages in thread
From: Paolo Abeni @ 2018-09-14 15:43 UTC (permalink / raw)
  To: netdev; +Cc: David S. Miller, Willem de Bruijn, Steffen Klassert

Not a full test-case yet, but allows triggering the UDP GSO code
path.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/udpgso_bench_rx.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
index 727cf67a3f75..f8bb7ea6bd25 100644
--- a/tools/testing/selftests/net/udpgso_bench_rx.c
+++ b/tools/testing/selftests/net/udpgso_bench_rx.c
@@ -31,9 +31,14 @@
 #include <sys/wait.h>
 #include <unistd.h>
 
+#ifndef UDP_SEGMENT
+#define UDP_SEGMENT		103
+#endif
+
 static int  cfg_port		= 8000;
 static bool cfg_tcp;
 static bool cfg_verify;
+static bool cfg_gro_segment;
 
 static bool interrupted;
 static unsigned long packets, bytes;
@@ -199,10 +204,13 @@ static void parse_opts(int argc, char **argv)
 {
 	int c;
 
-	while ((c = getopt(argc, argv, "ptv")) != -1) {
+	while ((c = getopt(argc, argv, "p:Stv")) != -1) {
 		switch (c) {
 		case 'p':
-			cfg_port = htons(strtoul(optarg, NULL, 0));
+			cfg_port = strtoul(optarg, NULL, 0);
+			break;
+		case 'S':
+			cfg_gro_segment = true;
 			break;
 		case 't':
 			cfg_tcp = true;
@@ -227,6 +235,12 @@ static void do_recv(void)
 
 	fd = do_socket(cfg_tcp);
 
+	if (cfg_gro_segment) {
+		int val = 1;
+		if (setsockopt(fd, IPPROTO_UDP, UDP_SEGMENT, &val, sizeof(val)))
+			error(1, errno, "setsockopt UDP_SEGMENT");
+	}
+
 	treport = gettimeofday_ms() + 1000;
 	do {
 		do_poll(fd);
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH 3/4] udp: implement GRO plain UDP sockets.
  2018-09-14 15:43 ` [RFC PATCH 3/4] udp: implement GRO plain UDP sockets Paolo Abeni
@ 2018-09-14 16:48   ` Eric Dumazet
  2018-09-17 10:06     ` Paolo Abeni
  0 siblings, 1 reply; 11+ messages in thread
From: Eric Dumazet @ 2018-09-14 16:48 UTC (permalink / raw)
  To: Paolo Abeni, netdev; +Cc: David S. Miller, Willem de Bruijn, Steffen Klassert



On 09/14/2018 08:43 AM, Paolo Abeni wrote:
> This is the RX counter part of commit bec1f6f69736 ("udp: generate gso
> with UDP_SEGMENT"). When UDP_SEGMENT is enabled, such socket is also
> eligible for GRO in the rx path: UDP segments directed to such socket
> are assembled into a larger GSO_UDP_L4 packet.
> 
> The core UDP GRO support is enabled/updated on setsockopt(UDP_SEGMENT) and
> disabled, if needed at socket destruction time.
> 
> Initial benchmark numbers:
> 
> Before:
> udp rx:   1079 MB/s   769065 calls/s
> 
> After:
> udp rx:   1466 MB/s    24877 calls/s


Are you sure the data is actually fully copied to user space ?

tools/testing/selftests/net/udpgso_bench_rx.c

uses :

static char rbuf[ETH_DATA_LEN];
   /* MSG_TRUNC will make return value full datagram length */
   ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT);

So you need to change this program.

Also, GRO reception would mean that userspace can retrieve,
not only full bytes of X datagrams, but also the gso_size (or length of individual datagrams)

You can not know the size of the packets in advance, the sender will decide.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH 2/4] net: enable UDP gro on demand.
  2018-09-14 15:43 ` [RFC PATCH 2/4] net: enable UDP gro on demand Paolo Abeni
@ 2018-09-14 17:16   ` Willem de Bruijn
  2018-09-16 18:23     ` Willem de Bruijn
  0 siblings, 1 reply; 11+ messages in thread
From: Willem de Bruijn @ 2018-09-14 17:16 UTC (permalink / raw)
  To: Paolo Abeni
  Cc: Network Development, David Miller, Willem de Bruijn, steffen.klassert

On Fri, Sep 14, 2018 at 11:47 AM Paolo Abeni <pabeni@redhat.com> wrote:
>
> Currently, the UDP GRO callback is always invoked, regardless of
> the existence of any actual user (e.g. a UDP tunnel). With retpoline
> enabled, this causes measurable overhead.
>
> This changeset introduces explicit accounting of the sockets requiring
> UDP GRO and updates the UDP offloads at runtime accordingly, so that
> the GRO callback is present (and invoked) only when there is at least
> one socket requiring it.

I have a difference solution both to the UDP socket lookup avoidance
and configurable GRO in general.

The first can be achieved by exporting the udp_encap_needed_key static key:

"
diff --git a/include/net/udp.h b/include/net/udp.h
index 8482a990b0bb..9e82cb391dea 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -443,8 +443,10 @@ int udpv4_offload_init(void);

 void udp_init(void);

+DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key);
 void udp_encap_enable(void);

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f4e35b2ff8b8..bd873a5b8a86 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1889,7 +1889,7 @@ static int __udp_queue_rcv_skb(struct sock *sk,
struct sk_buff *skb)
        return 0;
 }

-static DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
+DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
 void udp_encap_enable(void)
 {
        static_branch_enable(&udp_encap_needed_key);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 4f6aa95a9b12..f44fe328aa0f 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -405,7 +405,7 @@ static struct sk_buff *udp4_gro_receive(struct
list_head *head,
 {
        struct udphdr *uh = udp_gro_udphdr(skb);

-       if (unlikely(!uh))
+       if (unlikely(!uh) || !static_branch_unlikely(&udp_encap_needed_key))
                goto flush;
 "

.. and same for ipv6.

The second is a larger patchset that converts dev_offload to
net_offload, so that all offloads share the same infrastructure, and a
sysctl interface to be able to disable all gro_receive types, not just
udp.

I've been sitting on it for too long. Let me slightly clean it up and
send it out for discussion sake..

>
> Tested with pktgen vs udpgso_bench_rx
> Before:
> udp rx:     27 MB/s  1613271 calls/s
>
> After:
> udp rx:     30 MB/s  1771537 calls/s
>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH 2/4] net: enable UDP gro on demand.
  2018-09-14 17:16   ` Willem de Bruijn
@ 2018-09-16 18:23     ` Willem de Bruijn
  2018-09-17 10:18       ` Paolo Abeni
  0 siblings, 1 reply; 11+ messages in thread
From: Willem de Bruijn @ 2018-09-16 18:23 UTC (permalink / raw)
  To: Paolo Abeni
  Cc: Network Development, David Miller, Willem de Bruijn, steffen.klassert

On Fri, Sep 14, 2018 at 1:16 PM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> On Fri, Sep 14, 2018 at 11:47 AM Paolo Abeni <pabeni@redhat.com> wrote:
> >
> > Currently, the UDP GRO callback is always invoked, regardless of
> > the existence of any actual user (e.g. a UDP tunnel). With retpoline
> > enabled, this causes measurable overhead.
> >
> > This changeset introduces explicit accounting of the sockets requiring
> > UDP GRO and updates the UDP offloads at runtime accordingly, so that
> > the GRO callback is present (and invoked) only when there is at least
> > one socket requiring it.
>
> I have a difference solution both to the UDP socket lookup avoidance
> and configurable GRO in general.
>
> I've been sitting on it for too long. Let me slightly clean it up and
> send it out for discussion sake..

http://patchwork.ozlabs.org/project/netdev/list/?series=65763

That udp gro implementation is clearly less complete than yours in
this patchset. The point I wanted to bring up for discussion is not the
protocol implementation, but the infrastructure for enabling it
conditionally.

Assuming cycle cost is comparable, what do you think of  using the
existing sk offload callbacks to enable this on a per-socket basis?

As for the protocol-wide knob, I do strongly prefer something that can
work for all protocols, not just UDP. I also implemented a version that
atomically swaps the struct ptr instead of the flag based approach I sent
for review. I'm fairly agnostic about that point. One subtle issue is that I
believe we need to keep the gro_complete callbacks enabled, as gro
packets may be queued for completion when gro_receive gets disabled.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH 3/4] udp: implement GRO plain UDP sockets.
  2018-09-14 16:48   ` Eric Dumazet
@ 2018-09-17 10:06     ` Paolo Abeni
  0 siblings, 0 replies; 11+ messages in thread
From: Paolo Abeni @ 2018-09-17 10:06 UTC (permalink / raw)
  To: Eric Dumazet, netdev; +Cc: David S. Miller, Willem de Bruijn, Steffen Klassert

Hi,

On Fri, 2018-09-14 at 09:48 -0700, Eric Dumazet wrote:
> Are you sure the data is actually fully copied to user space ?
> 
> tools/testing/selftests/net/udpgso_bench_rx.c
> 
> uses :
> 
> static char rbuf[ETH_DATA_LEN];
>    /* MSG_TRUNC will make return value full datagram length */
>    ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT);
> 
> So you need to change this program.

Thank for the feedback.

You are right, I need to update udpgso_bench_rx. Making it
unconditionally read up to 64K bytes, I measure:

Before:
udp rx:    962 MB/s   685339 calls/s

After:
udp rx:   1344 MB/s    22812 calls/s

Top perf offenders for udpgso_bench_rx:
  31.83%  [kernel]         [k] copy_user_enhanced_fast_string
   8.90%  [kernel]         [k] skb_release_data
   7.97%  [kernel]         [k] free_pcppages_bulk
   6.82%  [kernel]         [k] copy_page_to_iter
   3.41%  [kernel]         [k] skb_copy_datagram_iter
   2.01%  [kernel]         [k] free_unref_page
   1.92%  [kernel]         [k] __entry_SYSCALL_64_trampoline

Trivial note: with this even UDP sockets would benefit from remote skb
freeing, as the cost of skb_release_data is relevant for the GSO
packets.

> Also, GRO reception would mean that userspace can retrieve,
> not only full bytes of X datagrams, but also the gso_size (or length of individual datagrams)
> 
> You can not know the size of the packets in advance, the sender will decide.

Thanks for pointing that out. I guess that implementing something like
cmsg(UDP_SEGMENT) as Willem suggests in in 8/8 patch would do, right?

I can have a look at that _if_ there is interest in this approch,

Cheers,

Paolo

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH 2/4] net: enable UDP gro on demand.
  2018-09-16 18:23     ` Willem de Bruijn
@ 2018-09-17 10:18       ` Paolo Abeni
  2018-09-17 14:07         ` Willem de Bruijn
  0 siblings, 1 reply; 11+ messages in thread
From: Paolo Abeni @ 2018-09-17 10:18 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Network Development, David Miller, Willem de Bruijn, steffen.klassert

On Sun, 2018-09-16 at 14:23 -0400, Willem de Bruijn wrote:
> That udp gro implementation is clearly less complete than yours in
> this patchset. The point I wanted to bring up for discussion is not the
> protocol implementation, but the infrastructure for enabling it
> conditionally.

I'm still [trying to] processing your patchset ;) So please perdon me
for any obvious interpretation mistakes...

> Assuming cycle cost is comparable, what do you think of  using the
> existing sk offload callbacks to enable this on a per-socket basis?

I have no objection about that, if there are no performance drawbacks.
In my measures retpoline costs is relevant for every indirect call
added. Using the existing sk offload approach will require an
additional indirect call per packet compared to the implementation
here.

> As for the protocol-wide knob, I do strongly prefer something that can
> work for all protocols, not just UDP. 

I like the general infrastructure idea. I think there is some agreement
in avoiding the addition of more user-controllable knobs, as we already
have a lot of them. If I read your patch correctly, user-space need to
enable/disable the UDO GSO explicitly via procfs, right?

I tried to look for something that does not require user action.

> I also implemented a version that
> atomically swaps the struct ptr instead of the flag based approach I sent
> for review. I'm fairly agnostic about that point. 

I think/fear security oriented guys may scream for the somewhat large
deconstification ?!?

> One subtle issue is that I
> believe we need to keep the gro_complete callbacks enabled, as gro
> packets may be queued for completion when gro_receive gets disabled.

Good point, thanks! I missed that.

Cheers,

Paolo

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH 2/4] net: enable UDP gro on demand.
  2018-09-17 10:18       ` Paolo Abeni
@ 2018-09-17 14:07         ` Willem de Bruijn
  0 siblings, 0 replies; 11+ messages in thread
From: Willem de Bruijn @ 2018-09-17 14:07 UTC (permalink / raw)
  To: Paolo Abeni
  Cc: Network Development, David Miller, Willem de Bruijn, steffen.klassert

On Mon, Sep 17, 2018 at 6:18 AM Paolo Abeni <pabeni@redhat.com> wrote:
>
> On Sun, 2018-09-16 at 14:23 -0400, Willem de Bruijn wrote:
> > That udp gro implementation is clearly less complete than yours in
> > this patchset. The point I wanted to bring up for discussion is not the
> > protocol implementation, but the infrastructure for enabling it
> > conditionally.
>
> I'm still [trying to] processing your patchset ;) So please perdon me
> for any obvious interpretation mistakes...
>
> > Assuming cycle cost is comparable, what do you think of  using the
> > existing sk offload callbacks to enable this on a per-socket basis?
>
> I have no objection about that, if there are no performance drawbacks.
> In my measures retpoline costs is relevant for every indirect call
> added. Using the existing sk offload approach will require an
> additional indirect call per packet compared to the implementation
> here.

Fair enough. The question is whether it is significant to the real workload.
This is also an issue with GRO processing in general, all those callbacks
as well as the two cacheline lookups to get to each callback.

> > As for the protocol-wide knob, I do strongly prefer something that can
> > work for all protocols, not just UDP.
>
> I like the general infrastructure idea. I think there is some agreement
> in avoiding the addition of more user-controllable knobs, as we already
> have a lot of them. If I read your patch correctly, user-space need to
> enable/disable the UDO GSO explicitly via procfs, right?

No, like other GRO callbacks, the feature is enabled by default.

Patch 7/8 disables the most expensive part behind a static key
until a socket actually registers a GRO callback, whether tunnel
or the new application GRO.

Patch 6/9 makes it possible to disable any protocol completely,
indeed through a sysctl.

> I tried to look for something that does not require user action.
>
> > I also implemented a version that
> > atomically swaps the struct ptr instead of the flag based approach I sent
> > for review. I'm fairly agnostic about that point.
>
> I think/fear security oriented guys may scream for the somewhat large
> deconstification ?!?

Hmm.. yes, interesting point. Since const pointers are a compile time
feature, in practice I don't think that they buy any protection against
callback pointer rewriting. Let me think about that some more.

>
> > One subtle issue is that I
> > believe we need to keep the gro_complete callbacks enabled, as gro
> > packets may be queued for completion when gro_receive gets disabled.
>
> Good point, thanks! I missed that.
>
> Cheers,
>
> Paolo
>
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2018-09-17 19:35 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-14 15:43 [RFC PATCH 0/4] UDP: implement GRO support for UDP_SEGMENT socket Paolo Abeni
2018-09-14 15:43 ` [RFC PATCH 1/4] net: add new helper to update an already registered offload Paolo Abeni
2018-09-14 15:43 ` [RFC PATCH 2/4] net: enable UDP gro on demand Paolo Abeni
2018-09-14 17:16   ` Willem de Bruijn
2018-09-16 18:23     ` Willem de Bruijn
2018-09-17 10:18       ` Paolo Abeni
2018-09-17 14:07         ` Willem de Bruijn
2018-09-14 15:43 ` [RFC PATCH 3/4] udp: implement GRO plain UDP sockets Paolo Abeni
2018-09-14 16:48   ` Eric Dumazet
2018-09-17 10:06     ` Paolo Abeni
2018-09-14 15:43 ` [RFC PATCH 4/4] selftests: add GRO support, fix port option processing Paolo Abeni

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.