netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next 0/6] ipv6: Add lockless UDP send path
@ 2015-01-31 15:40 Vladislav Yasevich
  2015-01-31 15:40 ` [PATCH net-next 1/6] ipv6: pull cork initialization into its own function Vladislav Yasevich
                   ` (6 more replies)
  0 siblings, 7 replies; 14+ messages in thread
From: Vladislav Yasevich @ 2015-01-31 15:40 UTC (permalink / raw)
  To: netdev; +Cc: herbert, hannes, Vladislav Yasevich

This series introduces a lockless UDPv6 send path similar to
what Herbert Xu did for IPv4 a while ago.

There are some difference from IPv4.  IPv6 caching for flow
label is a bit different, as well as it requires another cork
cork structure that holds the IPv6 ancillary data.

Please take a look.

Thanks
-vlad

Vladislav Yasevich (5):
  ipv6: pull cork initialization into its own function.
  ipv6: Append sending data to arbitrary queue
  ipv6: introduce ipv6_make_skb
  ipv6: Introduce udpv6_send_skb()
  udpv6: Add lockless sendmsg() support

 include/linux/ipv6.h  |  12 +-
 include/net/ipv6.h    |  19 +++
 net/ipv6/ip6_output.c | 349 +++++++++++++++++++++++++++++++++-----------------
 net/ipv6/udp.c        |  91 ++++++++-----
 4 files changed, 317 insertions(+), 154 deletions(-)

-- 
1.9.3

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH net-next 1/6] ipv6: pull cork initialization into its own function.
  2015-01-31 15:40 [PATCH net-next 0/6] ipv6: Add lockless UDP send path Vladislav Yasevich
@ 2015-01-31 15:40 ` Vladislav Yasevich
  2015-01-31 15:40 ` [PATCH net-next 2/6] ipv6: Append sending data to arbitrary queue Vladislav Yasevich
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Vladislav Yasevich @ 2015-01-31 15:40 UTC (permalink / raw)
  To: netdev; +Cc: herbert, hannes, Vladislav Yasevich

Pull IPv6 cork initialization into its own function that
can be re-used.  IPv6 specific cork data did not have an
explicit data structure.  This patch creats eone so that
just ipv6 cork data can be as arguemts.  Also, since
IPv6 tries to save the flow label into inet_cork_full
tructure, pass the full cork.

Adjust ip6_cork_release() to take cork data structures.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
---
 include/linux/ipv6.h  |  12 ++--
 net/ipv6/ip6_output.c | 158 ++++++++++++++++++++++++++++----------------------
 2 files changed, 96 insertions(+), 74 deletions(-)

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 2805062..4d5169f 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -125,6 +125,12 @@ struct ipv6_mc_socklist;
 struct ipv6_ac_socklist;
 struct ipv6_fl_socklist;
 
+struct inet6_cork {
+	struct ipv6_txoptions *opt;
+	u8 hop_limit;
+	u8 tclass;
+};
+
 /**
  * struct ipv6_pinfo - ipv6 private area
  *
@@ -217,11 +223,7 @@ struct ipv6_pinfo {
 	struct ipv6_txoptions	*opt;
 	struct sk_buff		*pktoptions;
 	struct sk_buff		*rxpmtu;
-	struct {
-		struct ipv6_txoptions *opt;
-		u8 hop_limit;
-		u8 tclass;
-	} cork;
+	struct inet6_cork	cork;
 };
 
 /* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index ce69a12..f9f08c4 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1135,6 +1135,74 @@ static void ip6_append_data_mtu(unsigned int *mtu,
 	}
 }
 
+static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
+			  struct inet6_cork *v6_cork,
+			  int hlimit, int tclass, struct ipv6_txoptions *opt,
+			  struct rt6_info *rt, struct flowi6 *fl6)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	unsigned int mtu;
+
+	/*
+	 * setup for corking
+	 */
+	if (opt) {
+		if (WARN_ON(v6_cork->opt))
+			return -EINVAL;
+
+		v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
+		if (unlikely(v6_cork->opt == NULL))
+			return -ENOBUFS;
+
+		v6_cork->opt->tot_len = opt->tot_len;
+		v6_cork->opt->opt_flen = opt->opt_flen;
+		v6_cork->opt->opt_nflen = opt->opt_nflen;
+
+		v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
+						    sk->sk_allocation);
+		if (opt->dst0opt && !v6_cork->opt->dst0opt)
+			return -ENOBUFS;
+
+		v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
+						    sk->sk_allocation);
+		if (opt->dst1opt && !v6_cork->opt->dst1opt)
+			return -ENOBUFS;
+
+		v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
+						   sk->sk_allocation);
+		if (opt->hopopt && !v6_cork->opt->hopopt)
+			return -ENOBUFS;
+
+		v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
+						    sk->sk_allocation);
+		if (opt->srcrt && !v6_cork->opt->srcrt)
+			return -ENOBUFS;
+
+		/* need source address above miyazawa*/
+	}
+	dst_hold(&rt->dst);
+	cork->base.dst = &rt->dst;
+	cork->fl.u.ip6 = *fl6;
+	v6_cork->hop_limit = hlimit;
+	v6_cork->tclass = tclass;
+	if (rt->dst.flags & DST_XFRM_TUNNEL)
+		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
+		      rt->dst.dev->mtu : dst_mtu(&rt->dst);
+	else
+		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
+		      rt->dst.dev->mtu : dst_mtu(rt->dst.path);
+	if (np->frag_size < mtu) {
+		if (np->frag_size)
+			mtu = np->frag_size;
+	}
+	cork->base.fragsize = mtu;
+	if (dst_allfrag(rt->dst.path))
+		cork->base.flags |= IPCORK_ALLFRAG;
+	cork->base.length = 0;
+
+	return 0;
+}
+
 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 	int offset, int len, int odd, struct sk_buff *skb),
 	void *from, int length, int transhdrlen,
@@ -1162,59 +1230,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		/*
 		 * setup for corking
 		 */
-		if (opt) {
-			if (WARN_ON(np->cork.opt))
-				return -EINVAL;
-
-			np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
-			if (unlikely(np->cork.opt == NULL))
-				return -ENOBUFS;
-
-			np->cork.opt->tot_len = opt->tot_len;
-			np->cork.opt->opt_flen = opt->opt_flen;
-			np->cork.opt->opt_nflen = opt->opt_nflen;
-
-			np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
-							    sk->sk_allocation);
-			if (opt->dst0opt && !np->cork.opt->dst0opt)
-				return -ENOBUFS;
-
-			np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
-							    sk->sk_allocation);
-			if (opt->dst1opt && !np->cork.opt->dst1opt)
-				return -ENOBUFS;
-
-			np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
-							   sk->sk_allocation);
-			if (opt->hopopt && !np->cork.opt->hopopt)
-				return -ENOBUFS;
-
-			np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
-							    sk->sk_allocation);
-			if (opt->srcrt && !np->cork.opt->srcrt)
-				return -ENOBUFS;
-
-			/* need source address above miyazawa*/
-		}
-		dst_hold(&rt->dst);
-		cork->dst = &rt->dst;
-		inet->cork.fl.u.ip6 = *fl6;
-		np->cork.hop_limit = hlimit;
-		np->cork.tclass = tclass;
-		if (rt->dst.flags & DST_XFRM_TUNNEL)
-			mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
-			      rt->dst.dev->mtu : dst_mtu(&rt->dst);
-		else
-			mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
-			      rt->dst.dev->mtu : dst_mtu(rt->dst.path);
-		if (np->frag_size < mtu) {
-			if (np->frag_size)
-				mtu = np->frag_size;
-		}
-		cork->fragsize = mtu;
-		if (dst_allfrag(rt->dst.path))
-			cork->flags |= IPCORK_ALLFRAG;
-		cork->length = 0;
+		err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
+				     tclass, opt, rt, fl6);
+		if (err)
+			return err;
 		exthdrlen = (opt ? opt->opt_flen : 0);
 		length += exthdrlen;
 		transhdrlen += exthdrlen;
@@ -1226,8 +1245,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		transhdrlen = 0;
 		exthdrlen = 0;
 		dst_exthdrlen = 0;
-		mtu = cork->fragsize;
 	}
+	mtu = cork->fragsize;
 	orig_mtu = mtu;
 
 	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
@@ -1503,23 +1522,24 @@ error:
 }
 EXPORT_SYMBOL_GPL(ip6_append_data);
 
-static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
+static void ip6_cork_release(struct inet_cork_full *cork,
+			     struct inet6_cork *v6_cork)
 {
-	if (np->cork.opt) {
-		kfree(np->cork.opt->dst0opt);
-		kfree(np->cork.opt->dst1opt);
-		kfree(np->cork.opt->hopopt);
-		kfree(np->cork.opt->srcrt);
-		kfree(np->cork.opt);
-		np->cork.opt = NULL;
+	if (v6_cork->opt) {
+		kfree(v6_cork->opt->dst0opt);
+		kfree(v6_cork->opt->dst1opt);
+		kfree(v6_cork->opt->hopopt);
+		kfree(v6_cork->opt->srcrt);
+		kfree(v6_cork->opt);
+		v6_cork->opt = NULL;
 	}
 
-	if (inet->cork.base.dst) {
-		dst_release(inet->cork.base.dst);
-		inet->cork.base.dst = NULL;
-		inet->cork.base.flags &= ~IPCORK_ALLFRAG;
+	if (cork->base.dst) {
+		dst_release(cork->base.dst);
+		cork->base.dst = NULL;
+		cork->base.flags &= ~IPCORK_ALLFRAG;
 	}
-	memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
+	memset(&cork->fl, 0, sizeof(cork->fl));
 }
 
 int ip6_push_pending_frames(struct sock *sk)
@@ -1599,7 +1619,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	}
 
 out:
-	ip6_cork_release(inet, np);
+	ip6_cork_release(&inet->cork, &np->cork);
 	return err;
 error:
 	IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
@@ -1618,6 +1638,6 @@ void ip6_flush_pending_frames(struct sock *sk)
 		kfree_skb(skb);
 	}
 
-	ip6_cork_release(inet_sk(sk), inet6_sk(sk));
+	ip6_cork_release(&inet_sk(sk)->cork, &inet6_sk(sk)->cork);
 }
 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH net-next 2/6] ipv6: Append sending data to arbitrary queue
  2015-01-31 15:40 [PATCH net-next 0/6] ipv6: Add lockless UDP send path Vladislav Yasevich
  2015-01-31 15:40 ` [PATCH net-next 1/6] ipv6: pull cork initialization into its own function Vladislav Yasevich
@ 2015-01-31 15:40 ` Vladislav Yasevich
  2015-01-31 15:40 ` [PATCH net-next 3/6] ipv6: introduce ipv6_make_skb Vladislav Yasevich
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Vladislav Yasevich @ 2015-01-31 15:40 UTC (permalink / raw)
  To: netdev; +Cc: herbert, hannes, Vladislav Yasevich

Add the ability to append data to arbitrary queue.  This
will be needed later to implement lockless UDP sends.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
---
 net/ipv6/ip6_output.c | 106 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 67 insertions(+), 39 deletions(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f9f08c4..1b66453 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1041,6 +1041,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
 
 static inline int ip6_ufo_append_data(struct sock *sk,
+			struct sk_buff_head *queue,
 			int getfrag(void *from, char *to, int offset, int len,
 			int odd, struct sk_buff *skb),
 			void *from, int length, int hh_len, int fragheaderlen,
@@ -1056,7 +1057,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
 	 * device, so create one single skb packet containing complete
 	 * udp datagram
 	 */
-	skb = skb_peek_tail(&sk->sk_write_queue);
+	skb = skb_peek_tail(queue);
 	if (skb == NULL) {
 		skb = sock_alloc_send_skb(sk,
 			hh_len + fragheaderlen + transhdrlen + 20,
@@ -1079,7 +1080,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
 		skb->protocol = htons(ETH_P_IPV6);
 		skb->csum = 0;
 
-		__skb_queue_tail(&sk->sk_write_queue, skb);
+		__skb_queue_tail(queue, skb);
 	} else if (skb_is_gso(skb)) {
 		goto append;
 	}
@@ -1203,49 +1204,36 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
 	return 0;
 }
 
-int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
-	int offset, int len, int odd, struct sk_buff *skb),
-	void *from, int length, int transhdrlen,
-	int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
-	struct rt6_info *rt, unsigned int flags, int dontfrag)
+static int __ip6_append_data(struct sock *sk,
+			     struct flowi6 *fl6,
+			     struct sk_buff_head *queue,
+			     struct inet_cork *cork,
+			     struct inet6_cork *v6_cork,
+			     struct page_frag *pfrag,
+			     int getfrag(void *from, char *to, int offset,
+					 int len, int odd, struct sk_buff *skb),
+			     void *from, int length, int transhdrlen,
+			     unsigned int flags, int dontfrag)
 {
-	struct inet_sock *inet = inet_sk(sk);
-	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct inet_cork *cork;
 	struct sk_buff *skb, *skb_prev = NULL;
 	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
-	int exthdrlen;
-	int dst_exthdrlen;
+	int exthdrlen = 0;
+	int dst_exthdrlen = 0;
 	int hh_len;
 	int copy;
 	int err;
 	int offset = 0;
 	__u8 tx_flags = 0;
 	u32 tskey = 0;
+	struct rt6_info *rt = (struct rt6_info *)cork->dst;
+	struct ipv6_txoptions *opt = v6_cork->opt;
 
-	if (flags&MSG_PROBE)
-		return 0;
-	cork = &inet->cork.base;
-	if (skb_queue_empty(&sk->sk_write_queue)) {
-		/*
-		 * setup for corking
-		 */
-		err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
-				     tclass, opt, rt, fl6);
-		if (err)
-			return err;
-		exthdrlen = (opt ? opt->opt_flen : 0);
-		length += exthdrlen;
-		transhdrlen += exthdrlen;
+	skb = skb_peek_tail(queue);
+	if (!skb) {
+		exthdrlen = opt ? opt->opt_flen : 0;
 		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
-	} else {
-		rt = (struct rt6_info *)cork->dst;
-		fl6 = &inet->cork.fl.u.ip6;
-		opt = np->cork.opt;
-		transhdrlen = 0;
-		exthdrlen = 0;
-		dst_exthdrlen = 0;
 	}
+
 	mtu = cork->fragsize;
 	orig_mtu = mtu;
 
@@ -1311,13 +1299,12 @@ emsgsize:
 	 * --yoshfuji
 	 */
 
-	skb = skb_peek_tail(&sk->sk_write_queue);
 	cork->length += length;
 	if (((length > mtu) ||
 	     (skb && skb_is_gso(skb))) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
 	    (rt->dst.dev->features & NETIF_F_UFO)) {
-		err = ip6_ufo_append_data(sk, getfrag, from, length,
+		err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
 					  hh_len, fragheaderlen,
 					  transhdrlen, mtu, flags, rt);
 		if (err)
@@ -1458,7 +1445,7 @@ alloc_new_skb:
 			/*
 			 * Put the packet on the pending queue
 			 */
-			__skb_queue_tail(&sk->sk_write_queue, skb);
+			__skb_queue_tail(queue, skb);
 			continue;
 		}
 
@@ -1477,7 +1464,6 @@ alloc_new_skb:
 			}
 		} else {
 			int i = skb_shinfo(skb)->nr_frags;
-			struct page_frag *pfrag = sk_page_frag(sk);
 
 			err = -ENOMEM;
 			if (!sk_page_frag_refill(sk, pfrag))
@@ -1520,6 +1506,42 @@ error:
 	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
 	return err;
 }
+
+int ip6_append_data(struct sock *sk,
+		    int getfrag(void *from, char *to, int offset, int len,
+				int odd, struct sk_buff *skb),
+		    void *from, int length, int transhdrlen, int hlimit,
+		    int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
+		    struct rt6_info *rt, unsigned int flags, int dontfrag)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	int exthdrlen;
+	int err;
+
+	if (flags&MSG_PROBE)
+		return 0;
+	if (skb_queue_empty(&sk->sk_write_queue)) {
+		/*
+		 * setup for corking
+		 */
+		err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
+				     tclass, opt, rt, fl6);
+		if (err)
+			return err;
+
+		exthdrlen = (opt ? opt->opt_flen : 0);
+		length += exthdrlen;
+		transhdrlen += exthdrlen;
+	} else {
+		fl6 = &inet->cork.fl.u.ip6;
+		transhdrlen = 0;
+	}
+
+	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
+				 &np->cork, sk_page_frag(sk), getfrag,
+				 from, length, transhdrlen, flags, dontfrag);
+}
 EXPORT_SYMBOL_GPL(ip6_append_data);
 
 static void ip6_cork_release(struct inet_cork_full *cork,
@@ -1627,11 +1649,12 @@ error:
 }
 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
 
-void ip6_flush_pending_frames(struct sock *sk)
+static void __ip6_flush_pending_frames(struct sock *sk,
+				       struct sk_buff_head *queue)
 {
 	struct sk_buff *skb;
 
-	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
+	while ((skb = __skb_dequeue_tail(queue)) != NULL) {
 		if (skb_dst(skb))
 			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
 				      IPSTATS_MIB_OUTDISCARDS);
@@ -1640,4 +1663,9 @@ void ip6_flush_pending_frames(struct sock *sk)
 
 	ip6_cork_release(&inet_sk(sk)->cork, &inet6_sk(sk)->cork);
 }
+
+void ip6_flush_pending_frames(struct sock *sk)
+{
+	__ip6_flush_pending_frames(sk, &sk->sk_write_queue);
+}
 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH net-next 3/6] ipv6: introduce ipv6_make_skb
  2015-01-31 15:40 [PATCH net-next 0/6] ipv6: Add lockless UDP send path Vladislav Yasevich
  2015-01-31 15:40 ` [PATCH net-next 1/6] ipv6: pull cork initialization into its own function Vladislav Yasevich
  2015-01-31 15:40 ` [PATCH net-next 2/6] ipv6: Append sending data to arbitrary queue Vladislav Yasevich
@ 2015-01-31 15:40 ` Vladislav Yasevich
  2015-01-31 15:40 ` [PATCH net-next 4/6] ipv6: Introduce udpv6_send_skb() Vladislav Yasevich
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Vladislav Yasevich @ 2015-01-31 15:40 UTC (permalink / raw)
  To: netdev; +Cc: herbert, hannes, Vladislav Yasevich

This commit is very similar to
commit 1c32c5ad6fac8cee1a77449f5abf211e911ff830
Author: Herbert Xu <herbert@gondor.apana.org.au>
Date:   Tue Mar 1 02:36:47 2011 +0000

    inet: Add ip_make_skb and ip_finish_skb

It adds IPv6 version of the helpers ip6_make_skb and ip6_finish_skb.

The job of ip6_make_skb is to collect messages into an ipv6 packet
and poplulate ipv6 eader.  The job of ip6_finish_skb is to transmit
the generated skb.  Together they replicated the job of
ip6_push_pending_frames() while also provide the capability to be
called independently.  This will be needed to add lockless UDP sendmsg
support.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
---
 include/net/ipv6.h    |  19 ++++++++++
 net/ipv6/ip6_output.c | 103 ++++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 103 insertions(+), 19 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 4292929..8027ca5 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -788,6 +788,25 @@ int ip6_push_pending_frames(struct sock *sk);
 
 void ip6_flush_pending_frames(struct sock *sk);
 
+int ip6_send_skb(struct sk_buff *skb);
+
+struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff_head *queue,
+			       struct inet_cork_full *cork,
+			       struct inet6_cork *v6_cork);
+struct sk_buff *ip6_make_skb(struct sock *sk,
+			     int getfrag(void *from, char *to, int offset,
+					 int len, int odd, struct sk_buff *skb),
+			     void *from, int length, int transhdrlen,
+			     int hlimit, int tclass, struct ipv6_txoptions *opt,
+			     struct flowi6 *fl6, struct rt6_info *rt,
+			     unsigned int flags, int dontfrag);
+
+static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
+{
+	return __ip6_make_skb(sk, &sk->sk_write_queue, &inet_sk(sk)->cork,
+			      &inet6_sk(sk)->cork);
+}
+
 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6);
 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
 				      const struct in6_addr *final_dst);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 1b66453..b89d3c2 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1564,22 +1564,23 @@ static void ip6_cork_release(struct inet_cork_full *cork,
 	memset(&cork->fl, 0, sizeof(cork->fl));
 }
 
-int ip6_push_pending_frames(struct sock *sk)
+struct sk_buff *__ip6_make_skb(struct sock *sk,
+			       struct sk_buff_head *queue,
+			       struct inet_cork_full *cork,
+			       struct inet6_cork *v6_cork)
 {
 	struct sk_buff *skb, *tmp_skb;
 	struct sk_buff **tail_skb;
 	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
-	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct net *net = sock_net(sk);
 	struct ipv6hdr *hdr;
-	struct ipv6_txoptions *opt = np->cork.opt;
-	struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
-	struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
+	struct ipv6_txoptions *opt = v6_cork->opt;
+	struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
+	struct flowi6 *fl6 = &cork->fl.u.ip6;
 	unsigned char proto = fl6->flowi6_proto;
-	int err = 0;
 
-	skb = __skb_dequeue(&sk->sk_write_queue);
+	skb = __skb_dequeue(queue);
 	if (skb == NULL)
 		goto out;
 	tail_skb = &(skb_shinfo(skb)->frag_list);
@@ -1587,7 +1588,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	/* move skb->data to ip header from ext header */
 	if (skb->data < skb_network_header(skb))
 		__skb_pull(skb, skb_network_offset(skb));
-	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
+	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
 		__skb_pull(tmp_skb, skb_network_header_len(skb));
 		*tail_skb = tmp_skb;
 		tail_skb = &(tmp_skb->next);
@@ -1612,10 +1613,10 @@ int ip6_push_pending_frames(struct sock *sk)
 	skb_reset_network_header(skb);
 	hdr = ipv6_hdr(skb);
 
-	ip6_flow_hdr(hdr, np->cork.tclass,
+	ip6_flow_hdr(hdr, v6_cork->tclass,
 		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
 					np->autoflowlabel));
-	hdr->hop_limit = np->cork.hop_limit;
+	hdr->hop_limit = v6_cork->hop_limit;
 	hdr->nexthdr = proto;
 	hdr->saddr = fl6->saddr;
 	hdr->daddr = *final_dst;
@@ -1632,25 +1633,45 @@ int ip6_push_pending_frames(struct sock *sk)
 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
 	}
 
+	ip6_cork_release(cork, v6_cork);
+out:
+	return skb;
+}
+
+int ip6_send_skb(struct sk_buff *skb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+	int err;
+
 	err = ip6_local_out(skb);
 	if (err) {
 		if (err > 0)
 			err = net_xmit_errno(err);
 		if (err)
-			goto error;
+			IP6_INC_STATS(net, rt->rt6i_idev,
+				      IPSTATS_MIB_OUTDISCARDS);
 	}
 
-out:
-	ip6_cork_release(&inet->cork, &np->cork);
 	return err;
-error:
-	IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
-	goto out;
+}
+
+int ip6_push_pending_frames(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	skb = ip6_finish_skb(sk);
+	if (!skb)
+		return 0;
+
+	return ip6_send_skb(skb);
 }
 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
 
 static void __ip6_flush_pending_frames(struct sock *sk,
-				       struct sk_buff_head *queue)
+				       struct sk_buff_head *queue,
+				       struct inet_cork_full *cork,
+				       struct inet6_cork *v6_cork)
 {
 	struct sk_buff *skb;
 
@@ -1661,11 +1682,55 @@ static void __ip6_flush_pending_frames(struct sock *sk,
 		kfree_skb(skb);
 	}
 
-	ip6_cork_release(&inet_sk(sk)->cork, &inet6_sk(sk)->cork);
+	ip6_cork_release(cork, v6_cork);
 }
 
 void ip6_flush_pending_frames(struct sock *sk)
 {
-	__ip6_flush_pending_frames(sk, &sk->sk_write_queue);
+	__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
+				   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
 }
 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
+
+struct sk_buff *ip6_make_skb(struct sock *sk,
+			     int getfrag(void *from, char *to, int offset,
+					 int len, int odd, struct sk_buff *skb),
+			     void *from, int length, int transhdrlen,
+			     int hlimit, int tclass,
+			     struct ipv6_txoptions *opt, struct flowi6 *fl6,
+			     struct rt6_info *rt, unsigned int flags,
+			     int dontfrag)
+{
+	struct inet_cork_full cork;
+	struct inet6_cork v6_cork;
+	struct sk_buff_head queue;
+	int exthdrlen = (opt ? opt->opt_flen : 0);
+	int err;
+
+	if (flags & MSG_PROBE)
+		return NULL;
+
+	__skb_queue_head_init(&queue);
+
+	cork.base.flags = 0;
+	cork.base.addr = 0;
+	cork.base.opt = NULL;
+	v6_cork.opt = NULL;
+	err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
+	if (err)
+		return ERR_PTR(err);
+
+	if (dontfrag < 0)
+		dontfrag = inet6_sk(sk)->dontfrag;
+
+	err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
+				&current->task_frag, getfrag, from,
+				length + exthdrlen, transhdrlen + exthdrlen,
+				flags, dontfrag);
+	if (err) {
+		__ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
+		return ERR_PTR(err);
+	}
+
+	return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
+}
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH net-next 4/6] ipv6: Introduce udpv6_send_skb()
  2015-01-31 15:40 [PATCH net-next 0/6] ipv6: Add lockless UDP send path Vladislav Yasevich
                   ` (2 preceding siblings ...)
  2015-01-31 15:40 ` [PATCH net-next 3/6] ipv6: introduce ipv6_make_skb Vladislav Yasevich
@ 2015-01-31 15:40 ` Vladislav Yasevich
  2015-01-31 15:40 ` [PATCH net-next 5/6] udpv6: Add lockless sendmsg() support Vladislav Yasevich
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Vladislav Yasevich @ 2015-01-31 15:40 UTC (permalink / raw)
  To: netdev; +Cc: herbert, hannes, Vladislav Yasevich

Now that we can individually construct IPv6 skbs to send, add a
udpv6_send_skb() function to populate the udp header and send the
skb.  This allows udp_v6_push_pending_frames() to re-use this
function as well as enables us to add lockless sendmsg() support.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
---
 net/ipv6/udp.c | 67 +++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 40 insertions(+), 27 deletions(-)

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e41f017..67a3d70 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -990,9 +990,10 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
 {
 	unsigned int offset;
 	struct udphdr *uh = udp_hdr(skb);
+	struct sk_buff *frags = skb_shinfo(skb)->frag_list;
 	__wsum csum = 0;
 
-	if (skb_queue_len(&sk->sk_write_queue) == 1) {
+	if (!frags) {
 		/* Only one fragment on the socket.  */
 		skb->csum_start = skb_transport_header(skb) - skb->head;
 		skb->csum_offset = offsetof(struct udphdr, check);
@@ -1008,9 +1009,9 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
 
 		skb->ip_summed = CHECKSUM_NONE;
 
-		skb_queue_walk(&sk->sk_write_queue, skb) {
-			csum = csum_add(csum, skb->csum);
-		}
+		do {
+			csum = csum_add(csum, frags->csum);
+		} while ((frags = frags->next));
 
 		uh->check = csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP,
 					    csum);
@@ -1023,26 +1024,15 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
  *	Sending
  */
 
-static int udp_v6_push_pending_frames(struct sock *sk)
+static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6)
 {
-	struct sk_buff *skb;
+	struct sock *sk = skb->sk;
 	struct udphdr *uh;
-	struct udp_sock  *up = udp_sk(sk);
-	struct inet_sock *inet = inet_sk(sk);
-	struct flowi6 *fl6;
 	int err = 0;
 	int is_udplite = IS_UDPLITE(sk);
 	__wsum csum = 0;
-
-	if (up->pending == AF_INET)
-		return udp_push_pending_frames(sk);
-
-	fl6 = &inet->cork.fl.u.ip6;
-
-	/* Grab the skbuff where UDP header space exists. */
-	skb = skb_peek(&sk->sk_write_queue);
-	if (skb == NULL)
-		goto out;
+	int offset = skb_transport_offset(skb);
+	int len = skb->len - offset;
 
 	/*
 	 * Create a UDP header
@@ -1050,29 +1040,28 @@ static int udp_v6_push_pending_frames(struct sock *sk)
 	uh = udp_hdr(skb);
 	uh->source = fl6->fl6_sport;
 	uh->dest = fl6->fl6_dport;
-	uh->len = htons(up->len);
+	uh->len = htons(len);
 	uh->check = 0;
 
 	if (is_udplite)
-		csum = udplite_csum_outgoing(sk, skb);
-	else if (up->no_check6_tx) {   /* UDP csum disabled */
+		csum = udplite_csum(skb);
+	else if (udp_sk(sk)->no_check6_tx) {   /* UDP csum disabled */
 		skb->ip_summed = CHECKSUM_NONE;
 		goto send;
 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
-		udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr,
-				     up->len);
+		udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, len);
 		goto send;
 	} else
-		csum = udp_csum_outgoing(sk, skb);
+		csum = udp_csum(skb);
 
 	/* add protocol-dependent pseudo-header */
 	uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr,
-				    up->len, fl6->flowi6_proto, csum);
+				    len, fl6->flowi6_proto, csum);
 	if (uh->check == 0)
 		uh->check = CSUM_MANGLED_0;
 
 send:
-	err = ip6_push_pending_frames(sk);
+	err = ip6_send_skb(skb);
 	if (err) {
 		if (err == -ENOBUFS && !inet6_sk(sk)->recverr) {
 			UDP6_INC_STATS_USER(sock_net(sk),
@@ -1082,6 +1071,30 @@ send:
 	} else
 		UDP6_INC_STATS_USER(sock_net(sk),
 				    UDP_MIB_OUTDATAGRAMS, is_udplite);
+	return err;
+}
+
+static int udp_v6_push_pending_frames(struct sock *sk)
+{
+	struct sk_buff *skb;
+	struct udp_sock  *up = udp_sk(sk);
+	struct flowi6 fl6;
+	int err = 0;
+
+	if (up->pending == AF_INET)
+		return udp_push_pending_frames(sk);
+
+	/* ip6_finish_skb will release the cork, so make a copy of
+	 * fl6 here.
+	 */
+	fl6 = inet_sk(sk)->cork.fl.u.ip6;
+
+	skb = ip6_finish_skb(sk);
+	if (!skb)
+		goto out;
+
+	err = udp_v6_send_skb(skb, &fl6);
+
 out:
 	up->len = 0;
 	up->pending = 0;
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH net-next 5/6] udpv6: Add lockless sendmsg() support
  2015-01-31 15:40 [PATCH net-next 0/6] ipv6: Add lockless UDP send path Vladislav Yasevich
                   ` (3 preceding siblings ...)
  2015-01-31 15:40 ` [PATCH net-next 4/6] ipv6: Introduce udpv6_send_skb() Vladislav Yasevich
@ 2015-01-31 15:40 ` Vladislav Yasevich
  2015-01-31 20:49   ` Sergei Shtylyov
  2015-01-31 15:40 ` [PATCH net-next 6/6] ipv6: Allow for partial checksums on non-ufo packets Vladislav Yasevich
  2015-02-03  3:28 ` [PATCH net-next 0/6] ipv6: Add lockless UDP send path David Miller
  6 siblings, 1 reply; 14+ messages in thread
From: Vladislav Yasevich @ 2015-01-31 15:40 UTC (permalink / raw)
  To: netdev; +Cc: herbert, hannes, Vladislav Yasevich

This commit adds the same functionaliy to IPv6 that
commit 903ab86d195cca295379699299c5fc10beba31c7
Author: Herbert Xu <herbert@gondor.apana.org.au>
Date:   Tue Mar 1 02:36:48 2011 +0000

    udp: Add lockless transmit path

added to IPv4.

UDP transmit path can now run without a socket lock,
thus allowing multiple threads to send to a single socket
more efficiently.
This is only used when corking/MSG_MORE is not used.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
---
 net/ipv6/udp.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 67a3d70..d048d46 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1177,6 +1177,7 @@ do_udp_sendmsg:
 	if (len > INT_MAX - sizeof(struct udphdr))
 		return -EMSGSIZE;
 
+	getfrag  =  is_udplite ?  udplite_getfrag : ip_generic_getfrag;
 	if (up->pending) {
 		/*
 		 * There are pending frames.
@@ -1307,6 +1308,20 @@ do_udp_sendmsg:
 		goto do_confirm;
 back_from_confirm:
 
+	/* Lockless fast path for the non-corking case */
+	if (!corkreq) {
+		struct sk_buff *skb;
+
+		skb = ip6_make_skb(sk, getfrag, msg, ulen,
+				   sizeof(struct udphdr), hlimit, tclass, opt,
+				   &fl6, (struct rt6_info *)dst,
+				   msg->msg_flags, dontfrag);
+		err = PTR_ERR(skb);
+		if (!IS_ERR_OR_NULL(skb))
+			err = udp_v6_send_skb(skb, &fl6);
+		goto release_dst;
+	}
+
 	lock_sock(sk);
 	if (unlikely(up->pending)) {
 		/* The socket is already corked while preparing it. */
@@ -1324,7 +1339,6 @@ do_append_data:
 	if (dontfrag < 0)
 		dontfrag = np->dontfrag;
 	up->len += ulen;
-	getfrag  =  is_udplite ?  udplite_getfrag : ip_generic_getfrag;
 	err = ip6_append_data(sk, getfrag, msg, ulen,
 		sizeof(struct udphdr), hlimit, tclass, opt, &fl6,
 		(struct rt6_info *)dst,
@@ -1336,6 +1350,11 @@ do_append_data:
 	else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
 		up->pending = 0;
 
+	if (err > 0)
+		err = np->recverr ? net_xmit_errno(err) : 0;
+	release_sock(sk);
+
+release_dst:
 	if (dst) {
 		if (connected) {
 			ip6_dst_store(sk, dst,
@@ -1352,9 +1371,6 @@ do_append_data:
 		dst = NULL;
 	}
 
-	if (err > 0)
-		err = np->recverr ? net_xmit_errno(err) : 0;
-	release_sock(sk);
 out:
 	dst_release(dst);
 	fl6_sock_release(flowlabel);
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH net-next 6/6] ipv6: Allow for partial checksums on non-ufo packets
  2015-01-31 15:40 [PATCH net-next 0/6] ipv6: Add lockless UDP send path Vladislav Yasevich
                   ` (4 preceding siblings ...)
  2015-01-31 15:40 ` [PATCH net-next 5/6] udpv6: Add lockless sendmsg() support Vladislav Yasevich
@ 2015-01-31 15:40 ` Vladislav Yasevich
  2015-02-10 14:07   ` Sabrina Dubroca
  2015-02-03  3:28 ` [PATCH net-next 0/6] ipv6: Add lockless UDP send path David Miller
  6 siblings, 1 reply; 14+ messages in thread
From: Vladislav Yasevich @ 2015-01-31 15:40 UTC (permalink / raw)
  To: netdev; +Cc: herbert, hannes, Vladislav Yasevich

Currntly, if we are not doing UFO on the packet, all UDP
packets will start with CHECKSUM_NONE and thus perform full
checksum computations in software even if device support
IPv6 checksum offloading.

Let's start start with CHECKSUM_PARTIAL if the device
supports it and we are sending only a single packet at
or below mtu size.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
---
 net/ipv6/ip6_output.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index b89d3c2..1a036f3 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1227,6 +1227,7 @@ static int __ip6_append_data(struct sock *sk,
 	u32 tskey = 0;
 	struct rt6_info *rt = (struct rt6_info *)cork->dst;
 	struct ipv6_txoptions *opt = v6_cork->opt;
+	int csummode = CHECKSUM_NONE;
 
 	skb = skb_peek_tail(queue);
 	if (!skb) {
@@ -1283,6 +1284,14 @@ emsgsize:
 			tskey = sk->sk_tskey++;
 	}
 
+	/* If this is the first and only packet and device
+	 * supports checksum offloading, let's use it.
+	 */
+	if (!skb &&
+	    length + fragheaderlen < mtu &&
+	    rt->dst.dev->features & NETIF_F_V6_CSUM &&
+	    !exthdrlen)
+		csummode = CHECKSUM_PARTIAL;
 	/*
 	 * Let's try using as much space as possible.
 	 * Use MTU if total length of the message fits into the MTU.
@@ -1395,7 +1404,7 @@ alloc_new_skb:
 			 *	Fill in the control structures
 			 */
 			skb->protocol = htons(ETH_P_IPV6);
-			skb->ip_summed = CHECKSUM_NONE;
+			skb->ip_summed = csummode;
 			skb->csum = 0;
 			/* reserve for fragmentation and ipsec header */
 			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next 5/6] udpv6: Add lockless sendmsg() support
  2015-01-31 15:40 ` [PATCH net-next 5/6] udpv6: Add lockless sendmsg() support Vladislav Yasevich
@ 2015-01-31 20:49   ` Sergei Shtylyov
  2015-02-02 20:42     ` Vlad Yasevich
  0 siblings, 1 reply; 14+ messages in thread
From: Sergei Shtylyov @ 2015-01-31 20:49 UTC (permalink / raw)
  To: Vladislav Yasevich, netdev; +Cc: herbert, hannes, Vladislav Yasevich

Hello.

On 1/31/2015 6:40 PM, Vladislav Yasevich wrote:

> This commit adds the same functionaliy to IPv6 that
> commit 903ab86d195cca295379699299c5fc10beba31c7
> Author: Herbert Xu <herbert@gondor.apana.org.au>
> Date:   Tue Mar 1 02:36:48 2011 +0000

>      udp: Add lockless transmit path

> added to IPv4.

> UDP transmit path can now run without a socket lock,
> thus allowing multiple threads to send to a single socket
> more efficiently.
> This is only used when corking/MSG_MORE is not used.

> Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
> ---
>   net/ipv6/udp.c | 24 ++++++++++++++++++++----
>   1 file changed, 20 insertions(+), 4 deletions(-)

> diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
> index 67a3d70..d048d46 100644
> --- a/net/ipv6/udp.c
> +++ b/net/ipv6/udp.c
[...]
> @@ -1307,6 +1308,20 @@ do_udp_sendmsg:
>   		goto do_confirm;
>   back_from_confirm:
>
> +	/* Lockless fast path for the non-corking case */
> +	if (!corkreq) {
> +		struct sk_buff *skb;
> +
> +		skb = ip6_make_skb(sk, getfrag, msg, ulen,
> +				   sizeof(struct udphdr), hlimit, tclass, opt,
> +				   &fl6, (struct rt6_info *)dst,
> +				   msg->msg_flags, dontfrag);
> +		err = PTR_ERR(skb);

    You should use PTR_ERR_OR_ZERO() here, I think.

[...]

WBR, Sergei

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next 5/6] udpv6: Add lockless sendmsg() support
  2015-01-31 20:49   ` Sergei Shtylyov
@ 2015-02-02 20:42     ` Vlad Yasevich
  0 siblings, 0 replies; 14+ messages in thread
From: Vlad Yasevich @ 2015-02-02 20:42 UTC (permalink / raw)
  To: Sergei Shtylyov, netdev; +Cc: herbert, hannes, Vladislav Yasevich

On 01/31/2015 03:49 PM, Sergei Shtylyov wrote:
> Hello.
> 
> On 1/31/2015 6:40 PM, Vladislav Yasevich wrote:
> 
>> This commit adds the same functionaliy to IPv6 that
>> commit 903ab86d195cca295379699299c5fc10beba31c7
>> Author: Herbert Xu <herbert@gondor.apana.org.au>
>> Date:   Tue Mar 1 02:36:48 2011 +0000
> 
>>      udp: Add lockless transmit path
> 
>> added to IPv4.
> 
>> UDP transmit path can now run without a socket lock,
>> thus allowing multiple threads to send to a single socket
>> more efficiently.
>> This is only used when corking/MSG_MORE is not used.
> 
>> Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
>> ---
>>   net/ipv6/udp.c | 24 ++++++++++++++++++++----
>>   1 file changed, 20 insertions(+), 4 deletions(-)
> 
>> diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
>> index 67a3d70..d048d46 100644
>> --- a/net/ipv6/udp.c
>> +++ b/net/ipv6/udp.c
> [...]
>> @@ -1307,6 +1308,20 @@ do_udp_sendmsg:
>>           goto do_confirm;
>>   back_from_confirm:
>>
>> +    /* Lockless fast path for the non-corking case */
>> +    if (!corkreq) {
>> +        struct sk_buff *skb;
>>
>> +        skb = ip6_make_skb(sk, getfrag, msg, ulen,
>> +                   sizeof(struct udphdr), hlimit, tclass, opt,
>> +                   &fl6, (struct rt6_info *)dst,
>> +                   msg->msg_flags, dontfrag);
>> +        err = PTR_ERR(skb);
> 
>    You should use PTR_ERR_OR_ZERO() here, I think.
> 

That particular code was stolen from ipv4/udp.c.  You are
right, we can use PTR_ERR_OR_ZERO() and simplify the following
check as well.

Will fix.

Thanks
-vlad

> [...]
> 
> WBR, Sergei
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next 0/6] ipv6: Add lockless UDP send path
  2015-01-31 15:40 [PATCH net-next 0/6] ipv6: Add lockless UDP send path Vladislav Yasevich
                   ` (5 preceding siblings ...)
  2015-01-31 15:40 ` [PATCH net-next 6/6] ipv6: Allow for partial checksums on non-ufo packets Vladislav Yasevich
@ 2015-02-03  3:28 ` David Miller
  6 siblings, 0 replies; 14+ messages in thread
From: David Miller @ 2015-02-03  3:28 UTC (permalink / raw)
  To: vyasevich; +Cc: netdev, herbert, hannes, vyasevic

From: Vladislav Yasevich <vyasevich@gmail.com>
Date: Sat, 31 Jan 2015 10:40:12 -0500

> This series introduces a lockless UDPv6 send path similar to
> what Herbert Xu did for IPv4 a while ago.
> 
> There are some difference from IPv4.  IPv6 caching for flow
> label is a bit different, as well as it requires another cork
> cork structure that holds the IPv6 ancillary data.
> 
> Please take a look.

This looks good to me, applied, thanks Vlad.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next 6/6] ipv6: Allow for partial checksums on non-ufo packets
  2015-01-31 15:40 ` [PATCH net-next 6/6] ipv6: Allow for partial checksums on non-ufo packets Vladislav Yasevich
@ 2015-02-10 14:07   ` Sabrina Dubroca
  2015-02-10 15:34     ` Vlad Yasevich
  2015-02-10 15:55     ` [PATCH] ipv6: Partial checksum only UDP packets Vladislav Yasevich
  0 siblings, 2 replies; 14+ messages in thread
From: Sabrina Dubroca @ 2015-02-10 14:07 UTC (permalink / raw)
  To: Vladislav Yasevich; +Cc: netdev, herbert, hannes, Vladislav Yasevich

2015-01-31, 10:40:18 -0500, Vladislav Yasevich wrote:
> Currntly, if we are not doing UFO on the packet, all UDP
> packets will start with CHECKSUM_NONE and thus perform full
> checksum computations in software even if device support
> IPv6 checksum offloading.
> 
> Let's start start with CHECKSUM_PARTIAL if the device
> supports it and we are sending only a single packet at
> or below mtu size.
> 
> Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
> ---

This patch causes ICMPv6 checksumming issues for me.

On my tg3 device and on a qemu VM with e1000 emulation, outgoing pings
have a bad checksum.  Router solicitations also have a bad checksum,
so autoconf fails.  When I revert this patch, or when I disable
tx-checksumming with ethtool, everything looks okay again.

On tg3, replies to ping seem always good.

On e1000, replies to ping work (more or less).  Sometimes the checksum
is bad, sometimes it's good.


% ping6 fec0::3456
PING fec0::3456(fec0::3456) 56 data bytes
64 bytes from fec0::3456: icmp_seq=1 ttl=64 time=0.433 ms
64 bytes from fec0::3456: icmp_seq=2 ttl=64 time=0.457 ms
64 bytes from fec0::3456: icmp_seq=10 ttl=64 time=0.448 ms
64 bytes from fec0::3456: icmp_seq=11 ttl=64 time=0.451 ms
64 bytes from fec0::3456: icmp_seq=18 ttl=64 time=0.485 ms
64 bytes from fec0::3456: icmp_seq=20 ttl=64 time=0.476 ms
64 bytes from fec0::3456: icmp_seq=22 ttl=64 time=0.448 ms
64 bytes from fec0::3456: icmp_seq=26 ttl=64 time=0.438 ms
64 bytes from fec0::3456: icmp_seq=27 ttl=64 time=0.413 ms
64 bytes from fec0::3456: icmp_seq=28 ttl=64 time=0.452 ms
64 bytes from fec0::3456: icmp_seq=29 ttl=64 time=0.440 ms
64 bytes from fec0::3456: icmp_seq=30 ttl=64 time=0.485 ms
64 bytes from fec0::3456: icmp_seq=32 ttl=64 time=0.473 ms
64 bytes from fec0::3456: icmp_seq=33 ttl=64 time=0.472 ms
64 bytes from fec0::3456: icmp_seq=34 ttl=64 time=0.395 ms
64 bytes from fec0::3456: icmp_seq=35 ttl=64 time=0.456 ms
64 bytes from fec0::3456: icmp_seq=36 ttl=64 time=0.409 ms
^C
--- fec0::3456 ping statistics ---
36 packets transmitted, 17 received, 52% packet loss, time 34998ms
rtt min/avg/max/mdev = 0.395/0.448/0.485/0.037 ms


I've seen a few strange source addresses, but I don't know if it's
related.

% ping6 fec0::3456
PING fec0::3456(fec0::3456) 56 data bytes
64 bytes from fec0::ff:ff00:0:3456: icmp_seq=1 ttl=64 time=0.423 ms   <---
64 bytes from fec0::3456: icmp_seq=4 ttl=64 time=0.396 ms
64 bytes from fec0::3456: icmp_seq=5 ttl=64 time=0.400 ms


This could be a driver issue, or just exposing another problem
somewhere else, I don't know.

Any idea?


Thanks

-- 
Sabrina

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next 6/6] ipv6: Allow for partial checksums on non-ufo packets
  2015-02-10 14:07   ` Sabrina Dubroca
@ 2015-02-10 15:34     ` Vlad Yasevich
  2015-02-10 15:55     ` [PATCH] ipv6: Partial checksum only UDP packets Vladislav Yasevich
  1 sibling, 0 replies; 14+ messages in thread
From: Vlad Yasevich @ 2015-02-10 15:34 UTC (permalink / raw)
  To: Sabrina Dubroca, Vladislav Yasevich; +Cc: netdev, herbert, hannes

On 02/10/2015 09:07 AM, Sabrina Dubroca wrote:
> 2015-01-31, 10:40:18 -0500, Vladislav Yasevich wrote:
>> Currntly, if we are not doing UFO on the packet, all UDP
>> packets will start with CHECKSUM_NONE and thus perform full
>> checksum computations in software even if device support
>> IPv6 checksum offloading.
>>
>> Let's start start with CHECKSUM_PARTIAL if the device
>> supports it and we are sending only a single packet at
>> or below mtu size.
>>
>> Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
>> ---
> 
> This patch causes ICMPv6 checksumming issues for me.
> 
> On my tg3 device and on a qemu VM with e1000 emulation, outgoing pings
> have a bad checksum.  Router solicitations also have a bad checksum,
> so autoconf fails.  When I revert this patch, or when I disable
> tx-checksumming with ethtool, everything looks okay again.
> 
> On tg3, replies to ping seem always good.
> 
> On e1000, replies to ping work (more or less).  Sometimes the checksum
> is bad, sometimes it's good.
> 
> 
> % ping6 fec0::3456
> PING fec0::3456(fec0::3456) 56 data bytes
> 64 bytes from fec0::3456: icmp_seq=1 ttl=64 time=0.433 ms
> 64 bytes from fec0::3456: icmp_seq=2 ttl=64 time=0.457 ms
> 64 bytes from fec0::3456: icmp_seq=10 ttl=64 time=0.448 ms
> 64 bytes from fec0::3456: icmp_seq=11 ttl=64 time=0.451 ms
> 64 bytes from fec0::3456: icmp_seq=18 ttl=64 time=0.485 ms
> 64 bytes from fec0::3456: icmp_seq=20 ttl=64 time=0.476 ms
> 64 bytes from fec0::3456: icmp_seq=22 ttl=64 time=0.448 ms
> 64 bytes from fec0::3456: icmp_seq=26 ttl=64 time=0.438 ms
> 64 bytes from fec0::3456: icmp_seq=27 ttl=64 time=0.413 ms
> 64 bytes from fec0::3456: icmp_seq=28 ttl=64 time=0.452 ms
> 64 bytes from fec0::3456: icmp_seq=29 ttl=64 time=0.440 ms
> 64 bytes from fec0::3456: icmp_seq=30 ttl=64 time=0.485 ms
> 64 bytes from fec0::3456: icmp_seq=32 ttl=64 time=0.473 ms
> 64 bytes from fec0::3456: icmp_seq=33 ttl=64 time=0.472 ms
> 64 bytes from fec0::3456: icmp_seq=34 ttl=64 time=0.395 ms
> 64 bytes from fec0::3456: icmp_seq=35 ttl=64 time=0.456 ms
> 64 bytes from fec0::3456: icmp_seq=36 ttl=64 time=0.409 ms
> ^C
> --- fec0::3456 ping statistics ---
> 36 packets transmitted, 17 received, 52% packet loss, time 34998ms
> rtt min/avg/max/mdev = 0.395/0.448/0.485/0.037 ms
> 
> 
> I've seen a few strange source addresses, but I don't know if it's
> related.
> 
> % ping6 fec0::3456
> PING fec0::3456(fec0::3456) 56 data bytes
> 64 bytes from fec0::ff:ff00:0:3456: icmp_seq=1 ttl=64 time=0.423 ms   <---
> 64 bytes from fec0::3456: icmp_seq=4 ttl=64 time=0.396 ms
> 64 bytes from fec0::3456: icmp_seq=5 ttl=64 time=0.400 ms
> 
> 
> This could be a driver issue, or just exposing another problem
> somewhere else, I don't know.
> 
> Any idea?

Hi Sabrina

Thanks for reporting this.  I'll take a look.

-vlad

> 
> 
> Thanks
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH] ipv6: Partial checksum only UDP packets
  2015-02-10 14:07   ` Sabrina Dubroca
  2015-02-10 15:34     ` Vlad Yasevich
@ 2015-02-10 15:55     ` Vladislav Yasevich
  2015-02-10 16:23       ` Sabrina Dubroca
  1 sibling, 1 reply; 14+ messages in thread
From: Vladislav Yasevich @ 2015-02-10 15:55 UTC (permalink / raw)
  To: sd; +Cc: netdev, Vladislav Yasevich

ip6_append_data is used by other protocols and some of them can't
be partially checksummed.  Only partially checksum UDP protocol.

Fixes: 32dce968dd987a (ipv6: Allow for partial checksums on non-ufo packets)
Reported-by: Sabrian Dubroca <sd@queasysnail.net>
Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
---
Hi Sabrina

Can you try this patch.  Thanks.

 net/ipv6/ip6_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index d33df4c..8f2d558 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1273,7 +1273,7 @@ emsgsize:
 	/* If this is the first and only packet and device
 	 * supports checksum offloading, let's use it.
 	 */
-	if (!skb &&
+	if (!skb && sk->protocol == IPPROTO_UDP &&
 	    length + fragheaderlen < mtu &&
 	    rt->dst.dev->features & NETIF_F_V6_CSUM &&
 	    !exthdrlen)
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] ipv6: Partial checksum only UDP packets
  2015-02-10 15:55     ` [PATCH] ipv6: Partial checksum only UDP packets Vladislav Yasevich
@ 2015-02-10 16:23       ` Sabrina Dubroca
  0 siblings, 0 replies; 14+ messages in thread
From: Sabrina Dubroca @ 2015-02-10 16:23 UTC (permalink / raw)
  To: Vladislav Yasevich; +Cc: netdev, Vladislav Yasevich

2015-02-10, 10:55:52 -0500, Vladislav Yasevich wrote:
> ip6_append_data is used by other protocols and some of them can't
> be partially checksummed.  Only partially checksum UDP protocol.
> 
> Fixes: 32dce968dd987a (ipv6: Allow for partial checksums on non-ufo packets)
> Reported-by: Sabrian Dubroca <sd@queasysnail.net>

not a big deal, but since there's a small problem with this version
(see below):

s/Sabrian/Sabrina/

> Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
> ---
> Hi Sabrina
> 
> Can you try this patch.  Thanks.
> 
>  net/ipv6/ip6_output.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
> index d33df4c..8f2d558 100644
> --- a/net/ipv6/ip6_output.c
> +++ b/net/ipv6/ip6_output.c
> @@ -1273,7 +1273,7 @@ emsgsize:
>  	/* If this is the first and only packet and device
>  	 * supports checksum offloading, let's use it.
>  	 */
> -	if (!skb &&
> +	if (!skb && sk->protocol == IPPROTO_UDP &&
                        ^^^

should be  sk->sk_protocol


And you can add:

Tested-by: Sabrina Dubroca <sd@queasysnail.net>


Thanks Vlad!

>  	    length + fragheaderlen < mtu &&
>  	    rt->dst.dev->features & NETIF_F_V6_CSUM &&
>  	    !exthdrlen)
> -- 
> 1.9.3


-- 
Sabrina

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2015-02-10 16:23 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-01-31 15:40 [PATCH net-next 0/6] ipv6: Add lockless UDP send path Vladislav Yasevich
2015-01-31 15:40 ` [PATCH net-next 1/6] ipv6: pull cork initialization into its own function Vladislav Yasevich
2015-01-31 15:40 ` [PATCH net-next 2/6] ipv6: Append sending data to arbitrary queue Vladislav Yasevich
2015-01-31 15:40 ` [PATCH net-next 3/6] ipv6: introduce ipv6_make_skb Vladislav Yasevich
2015-01-31 15:40 ` [PATCH net-next 4/6] ipv6: Introduce udpv6_send_skb() Vladislav Yasevich
2015-01-31 15:40 ` [PATCH net-next 5/6] udpv6: Add lockless sendmsg() support Vladislav Yasevich
2015-01-31 20:49   ` Sergei Shtylyov
2015-02-02 20:42     ` Vlad Yasevich
2015-01-31 15:40 ` [PATCH net-next 6/6] ipv6: Allow for partial checksums on non-ufo packets Vladislav Yasevich
2015-02-10 14:07   ` Sabrina Dubroca
2015-02-10 15:34     ` Vlad Yasevich
2015-02-10 15:55     ` [PATCH] ipv6: Partial checksum only UDP packets Vladislav Yasevich
2015-02-10 16:23       ` Sabrina Dubroca
2015-02-03  3:28 ` [PATCH net-next 0/6] ipv6: Add lockless UDP send path David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).