All of lore.kernel.org
 help / color / mirror / Atom feed
From: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
To: netdev@vger.kernel.org
Cc: davem@davemloft.net, linux-api@vger.kernel.org,
	Willem de Bruijn <willemb@google.com>
Subject: [PATCH net-next v3 11/13] raw: enable MSG_ZEROCOPY with IP_HDRINCL
Date: Wed, 21 Jun 2017 17:18:14 -0400	[thread overview]
Message-ID: <20170621211816.53837-12-willemdebruijn.kernel@gmail.com> (raw)
In-Reply-To: <20170621211816.53837-1-willemdebruijn.kernel@gmail.com>

From: Willem de Bruijn <willemb@google.com>

Zerocopy support for udp also enables it for some raw sockets. Only
raw sockets that have hdrinc set take a different path. Add zerocopy
support for this variant.

Tested:
  msg_zerocopy.sh 4 raw_hdrincl:

  without zerocopy
    tx=150438 (9390 MB) txc=0 zc=n
    rx=150438 (9387 MB)

  with zerocopy
    tx=292454 (18255 MB) txc=292454 zc=y
    rx=292454 (18250 MB)

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 net/ipv4/raw.c | 23 +++++++++++++++++++----
 net/ipv6/raw.c | 20 +++++++++++++++++---
 2 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bdffad875691..0a5a3f2ce81b 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -351,7 +351,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 	unsigned int iphlen;
 	int err;
 	struct rtable *rt = *rtp;
-	int hlen, tlen;
+	int hlen, tlen, linear;
 
 	if (length > rt->dst.dev->mtu) {
 		ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
@@ -366,8 +366,14 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 
 	hlen = LL_RESERVED_SPACE(rt->dst.dev);
 	tlen = rt->dst.dev->needed_tailroom;
+	linear = length;
+
+	if (flags & MSG_ZEROCOPY &&
+	    rt->dst.dev->features & NETIF_F_SG)
+		linear = min_t(int, linear, MAX_HEADER);
+
 	skb = sock_alloc_send_skb(sk,
-				  length + hlen + tlen + 15,
+				  linear + hlen + tlen + 15,
 				  flags & MSG_DONTWAIT, &err);
 	if (!skb)
 		goto error;
@@ -380,7 +386,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 
 	skb_reset_network_header(skb);
 	iph = ip_hdr(skb);
-	skb_put(skb, length);
+	skb_put(skb, linear);
 
 	skb->ip_summed = CHECKSUM_NONE;
 
@@ -391,7 +397,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 
 	skb->transport_header = skb->network_header;
 	err = -EFAULT;
-	if (memcpy_from_msg(iph, msg, length))
+	if (memcpy_from_msg(iph, msg, linear))
 		goto error_free;
 
 	iphlen = iph->ihl * 4;
@@ -423,6 +429,13 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 				skb_transport_header(skb))->type);
 	}
 
+	if (flags & MSG_ZEROCOPY) {
+		err = skb_zerocopy_iter_alloc(skb, (void *)&msg,
+					      length - linear);
+		if (err)
+			goto error_zcopy;
+	}
+
 	err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
 		      net, sk, skb, NULL, rt->dst.dev,
 		      dst_output);
@@ -433,6 +446,8 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 out:
 	return 0;
 
+error_zcopy:
+	skb_zcopy_abort(skb);
 error_free:
 	kfree_skb(skb);
 error:
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 60be012fe708..206cca2d9b29 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -627,6 +627,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 	struct rt6_info *rt = (struct rt6_info *)*dstp;
 	int hlen = LL_RESERVED_SPACE(rt->dst.dev);
 	int tlen = rt->dst.dev->needed_tailroom;
+	int linear = length;
 
 	if (length > rt->dst.dev->mtu) {
 		ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu);
@@ -637,8 +638,12 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 	if (flags&MSG_PROBE)
 		goto out;
 
+	if (flags & MSG_ZEROCOPY &&
+	    rt->dst.dev->features & NETIF_F_SG)
+		linear = min_t(int, length, MAX_HEADER);
+
 	skb = sock_alloc_send_skb(sk,
-				  length + hlen + tlen + 15,
+				  linear + hlen + tlen + 15,
 				  flags & MSG_DONTWAIT, &err);
 	if (!skb)
 		goto error;
@@ -650,7 +655,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 	skb_dst_set(skb, &rt->dst);
 	*dstp = NULL;
 
-	skb_put(skb, length);
+	skb_put(skb, linear);
 	skb_reset_network_header(skb);
 	iph = ipv6_hdr(skb);
 
@@ -660,10 +665,17 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 		skb_set_dst_pending_confirm(skb, 1);
 
 	skb->transport_header = skb->network_header;
-	err = memcpy_from_msg(iph, msg, length);
+	err = memcpy_from_msg(iph, msg, linear);
 	if (err)
 		goto error_fault;
 
+	if (flags & MSG_ZEROCOPY) {
+		err = skb_zerocopy_iter_alloc(skb, (void *)&msg,
+					      length - linear);
+		if (err)
+			goto error_zcopy;
+	}
+
 	/* if egress device is enslaved to an L3 master device pass the
 	 * skb to its handler for processing
 	 */
@@ -681,6 +693,8 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 out:
 	return 0;
 
+error_zcopy:
+	skb_zcopy_abort(skb);
 error_fault:
 	err = -EFAULT;
 	kfree_skb(skb);
-- 
2.13.1.611.g7e3b11ae1-goog

  parent reply	other threads:[~2017-06-21 21:18 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-06-21 21:18 [PATCH net-next v3 00/13] socket sendmsg MSG_ZEROCOPY Willem de Bruijn
2017-06-21 21:18 ` [PATCH net-next v3 01/13] sock: allocate skbs from optmem Willem de Bruijn
     [not found] ` <20170621211816.53837-1-willemdebruijn.kernel-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-06-21 21:18   ` [PATCH net-next v3 02/13] sock: skb_copy_ubufs support for compound pages Willem de Bruijn
     [not found]     ` <20170621211816.53837-3-willemdebruijn.kernel-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-06-22 17:05       ` David Miller
     [not found]         ` <20170622.130528.1762873686654379973.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
2017-06-22 20:57           ` Willem de Bruijn
     [not found]             ` <CAF=yD-+WQ1b9BytG4JuhgHCR8gAKvUVJPUfi4t-u_DqAFnVrkA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-06-23  1:36               ` David Miller
2017-06-23  3:59                 ` Willem de Bruijn
     [not found]                   ` <CAF=yD-JGekEkBsFfgZa+-TN3-QBPy4sfK0QLPx83Uh3DAoodvg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-06-27 15:53                     ` Willem de Bruijn
     [not found]                       ` <CAF=yD-LJ-EvWbKtxqrcK1D=nRSzWzpPcFVOX1poZ3+P=aa0QfA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-06-29 15:54                         ` Willem de Bruijn
2017-06-21 21:18   ` [PATCH net-next v3 13/13] test: add msg_zerocopy test Willem de Bruijn
2017-06-21 21:18 ` [PATCH net-next v3 03/13] sock: add MSG_ZEROCOPY Willem de Bruijn
     [not found]   ` <20170621211816.53837-4-willemdebruijn.kernel-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-06-22 17:06     ` David Miller
2017-06-21 21:18 ` [PATCH net-next v3 04/13] sock: add SOCK_ZEROCOPY sockopt Willem de Bruijn
2017-06-21 21:18 ` [PATCH net-next v3 05/13] sock: enable MSG_ZEROCOPY Willem de Bruijn
2017-06-21 21:18 ` [PATCH net-next v3 06/13] sock: MSG_ZEROCOPY notification coalescing Willem de Bruijn
     [not found]   ` <20170621211816.53837-7-willemdebruijn.kernel-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-06-22 17:07     ` David Miller
2017-06-21 21:18 ` [PATCH net-next v3 07/13] sock: add ee_code SO_EE_CODE_ZEROCOPY_COPIED Willem de Bruijn
2017-06-21 21:18 ` [PATCH net-next v3 08/13] sock: ulimit on MSG_ZEROCOPY pages Willem de Bruijn
2017-06-21 21:18 ` [PATCH net-next v3 09/13] tcp: enable MSG_ZEROCOPY Willem de Bruijn
2017-06-21 21:18 ` [PATCH net-next v3 10/13] udp: " Willem de Bruijn
2017-06-21 21:18 ` Willem de Bruijn [this message]
2017-06-21 21:18 ` [PATCH net-next v3 12/13] packet: " Willem de Bruijn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170621211816.53837-12-willemdebruijn.kernel@gmail.com \
    --to=willemdebruijn.kernel@gmail.com \
    --cc=davem@davemloft.net \
    --cc=linux-api@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=willemb@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.