All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/4] packet: tpacket gso and csum offload
@ 2016-02-02 15:56 Willem de Bruijn
  2016-02-02 15:56 ` [PATCH net-next 1/4] packet: move vnet_hdr code to helper functions Willem de Bruijn
                   ` (3 more replies)
  0 siblings, 4 replies; 6+ messages in thread
From: Willem de Bruijn @ 2016-02-02 15:56 UTC (permalink / raw)
  To: netdev; +Cc: davem, daniel, mst, sri, Willem de Bruijn

From: Willem de Bruijn <willemb@google.com>

Extend PACKET_VNET_HDR socket option support to packet sockets with
memory mapped rings (PACKET_RX_RING, PACKET_TX_RING).

Patches 2 and 4 add support to tpacket_rcv and tpacket_snd.

Patch 1 prepares for this by moving the relevant virtio_net_hdr
logic out of packet_snd and packet_rcv into helper functions.

GSO transmission requires all headers in the skb linear section.
Patch 3 moves parsing of tx_ring slot headers before skb allocation
to enable allocation with sufficient linear size.

Willem de Bruijn (4):
  packet: move vnet_hdr code to helper functions
  packet: vnet_hdr support for tpacket_rcv
  packet: parse tpacket header before skb alloc
  packet: tpacket_snd gso and checksum offload

 net/packet/af_packet.c | 438 +++++++++++++++++++++++++++++--------------------
 1 file changed, 262 insertions(+), 176 deletions(-)

-- 
2.7.0.rc3.207.g0ac5344

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH net-next 1/4] packet: move vnet_hdr code to helper functions
  2016-02-02 15:56 [PATCH net-next 0/4] packet: tpacket gso and csum offload Willem de Bruijn
@ 2016-02-02 15:56 ` Willem de Bruijn
  2016-02-02 15:56 ` [PATCH net-next 2/4] packet: vnet_hdr support for tpacket_rcv Willem de Bruijn
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 6+ messages in thread
From: Willem de Bruijn @ 2016-02-02 15:56 UTC (permalink / raw)
  To: netdev; +Cc: davem, daniel, mst, sri, Willem de Bruijn

From: Willem de Bruijn <willemb@google.com>

packet_snd and packet_rcv support virtio net headers for GSO.
Move this logic into helper functions to be able to reuse it in
tpacket_snd and tpacket_rcv.

This is a straighforward code move with one exception. Instead of
creating and passing a separate gso_type variable, reuse
vnet_hdr.gso_type after conversion from virtio to kernel gso type.

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 net/packet/af_packet.c | 261 ++++++++++++++++++++++++++++---------------------
 1 file changed, 148 insertions(+), 113 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 992396a..bd3de7b 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1960,6 +1960,64 @@ static unsigned int run_filter(struct sk_buff *skb,
 	return res;
 }
 
+static int __packet_rcv_vnet(const struct sk_buff *skb,
+			     struct virtio_net_hdr *vnet_hdr)
+{
+	*vnet_hdr = (const struct virtio_net_hdr) { 0 };
+
+	if (skb_is_gso(skb)) {
+		struct skb_shared_info *sinfo = skb_shinfo(skb);
+
+		/* This is a hint as to how much should be linear. */
+		vnet_hdr->hdr_len =
+			__cpu_to_virtio16(vio_le(), skb_headlen(skb));
+		vnet_hdr->gso_size =
+			__cpu_to_virtio16(vio_le(), sinfo->gso_size);
+
+		if (sinfo->gso_type & SKB_GSO_TCPV4)
+			vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+		else if (sinfo->gso_type & SKB_GSO_TCPV6)
+			vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+		else if (sinfo->gso_type & SKB_GSO_UDP)
+			vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
+		else if (sinfo->gso_type & SKB_GSO_FCOE)
+			return -EINVAL;
+		else
+			BUG();
+
+		if (sinfo->gso_type & SKB_GSO_TCP_ECN)
+			vnet_hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
+	} else
+		vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+		vnet_hdr->csum_start = __cpu_to_virtio16(vio_le(),
+				  skb_checksum_start_offset(skb));
+		vnet_hdr->csum_offset = __cpu_to_virtio16(vio_le(),
+						 skb->csum_offset);
+	} else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+		vnet_hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
+	} /* else everything is zero */
+
+	return 0;
+}
+
+static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
+			   size_t *len)
+{
+	struct virtio_net_hdr vnet_hdr;
+
+	if (*len < sizeof(vnet_hdr))
+		return -EINVAL;
+	*len -= sizeof(vnet_hdr);
+
+	if (__packet_rcv_vnet(skb, &vnet_hdr))
+		return -EINVAL;
+
+	return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
+}
+
 /*
  * This function makes lazy skb cloning in hope that most of packets
  * are discarded by BPF.
@@ -2347,6 +2405,84 @@ static void tpacket_set_protocol(const struct net_device *dev,
 	}
 }
 
+static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
+{
+	unsigned short gso_type = 0;
+
+	if ((vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
+	    (__virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
+	     __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2 >
+	      __virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len)))
+		vnet_hdr->hdr_len = __cpu_to_virtio16(vio_le(),
+			 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
+			__virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2);
+
+	if (__virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len) > len)
+		return -EINVAL;
+
+	if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+		switch (vnet_hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+		case VIRTIO_NET_HDR_GSO_TCPV4:
+			gso_type = SKB_GSO_TCPV4;
+			break;
+		case VIRTIO_NET_HDR_GSO_TCPV6:
+			gso_type = SKB_GSO_TCPV6;
+			break;
+		case VIRTIO_NET_HDR_GSO_UDP:
+			gso_type = SKB_GSO_UDP;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		if (vnet_hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN)
+			gso_type |= SKB_GSO_TCP_ECN;
+
+		if (vnet_hdr->gso_size == 0)
+			return -EINVAL;
+	}
+
+	vnet_hdr->gso_type = gso_type;	/* changes type, temporary storage */
+	return 0;
+}
+
+static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
+				 struct virtio_net_hdr *vnet_hdr)
+{
+	int n;
+
+	if (*len < sizeof(*vnet_hdr))
+		return -EINVAL;
+	*len -= sizeof(*vnet_hdr);
+
+	n = copy_from_iter(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter);
+	if (n != sizeof(*vnet_hdr))
+		return -EFAULT;
+
+	return __packet_snd_vnet_parse(vnet_hdr, *len);
+}
+
+static int packet_snd_vnet_gso(struct sk_buff *skb,
+			       struct virtio_net_hdr *vnet_hdr)
+{
+	if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
+		u16 s = __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start);
+		u16 o = __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset);
+
+		if (!skb_partial_csum_set(skb, s, o))
+			return -EINVAL;
+	}
+
+	skb_shinfo(skb)->gso_size =
+		__virtio16_to_cpu(vio_le(), vnet_hdr->gso_size);
+	skb_shinfo(skb)->gso_type = vnet_hdr->gso_type;
+
+	/* Header must be checked, and gso_segs computed. */
+	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+	skb_shinfo(skb)->gso_segs = 0;
+	return 0;
+}
+
 static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 		void *frame, struct net_device *dev, int size_max,
 		__be16 proto, unsigned char *addr, int hlen)
@@ -2643,12 +2779,9 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	struct sockcm_cookie sockc;
 	struct virtio_net_hdr vnet_hdr = { 0 };
 	int offset = 0;
-	int vnet_hdr_len;
 	struct packet_sock *po = pkt_sk(sk);
-	unsigned short gso_type = 0;
 	int hlen, tlen;
 	int extra_len = 0;
-	ssize_t n;
 
 	/*
 	 *	Get and verify the address.
@@ -2686,53 +2819,9 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	if (sock->type == SOCK_RAW)
 		reserve = dev->hard_header_len;
 	if (po->has_vnet_hdr) {
-		vnet_hdr_len = sizeof(vnet_hdr);
-
-		err = -EINVAL;
-		if (len < vnet_hdr_len)
-			goto out_unlock;
-
-		len -= vnet_hdr_len;
-
-		err = -EFAULT;
-		n = copy_from_iter(&vnet_hdr, vnet_hdr_len, &msg->msg_iter);
-		if (n != vnet_hdr_len)
-			goto out_unlock;
-
-		if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
-		    (__virtio16_to_cpu(vio_le(), vnet_hdr.csum_start) +
-		     __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset) + 2 >
-		      __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len)))
-			vnet_hdr.hdr_len = __cpu_to_virtio16(vio_le(),
-				 __virtio16_to_cpu(vio_le(), vnet_hdr.csum_start) +
-				__virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset) + 2);
-
-		err = -EINVAL;
-		if (__virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len) > len)
+		err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
+		if (err)
 			goto out_unlock;
-
-		if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
-			switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
-			case VIRTIO_NET_HDR_GSO_TCPV4:
-				gso_type = SKB_GSO_TCPV4;
-				break;
-			case VIRTIO_NET_HDR_GSO_TCPV6:
-				gso_type = SKB_GSO_TCPV6;
-				break;
-			case VIRTIO_NET_HDR_GSO_UDP:
-				gso_type = SKB_GSO_UDP;
-				break;
-			default:
-				goto out_unlock;
-			}
-
-			if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
-				gso_type |= SKB_GSO_TCP_ECN;
-
-			if (vnet_hdr.gso_size == 0)
-				goto out_unlock;
-
-		}
 	}
 
 	if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
@@ -2744,7 +2833,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	}
 
 	err = -EMSGSIZE;
-	if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN + extra_len))
+	if (!vnet_hdr.gso_type &&
+	    (len > dev->mtu + reserve + VLAN_HLEN + extra_len))
 		goto out_unlock;
 
 	err = -ENOBUFS;
@@ -2775,7 +2865,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 
 	sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
 
-	if (!gso_type && (len > dev->mtu + reserve + extra_len) &&
+	if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) &&
 	    !packet_extra_vlan_len_allowed(dev, skb)) {
 		err = -EMSGSIZE;
 		goto out_free;
@@ -2789,24 +2879,10 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	packet_pick_tx_queue(dev, skb);
 
 	if (po->has_vnet_hdr) {
-		if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
-			u16 s = __virtio16_to_cpu(vio_le(), vnet_hdr.csum_start);
-			u16 o = __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset);
-			if (!skb_partial_csum_set(skb, s, o)) {
-				err = -EINVAL;
-				goto out_free;
-			}
-		}
-
-		skb_shinfo(skb)->gso_size =
-			__virtio16_to_cpu(vio_le(), vnet_hdr.gso_size);
-		skb_shinfo(skb)->gso_type = gso_type;
-
-		/* Header must be checked, and gso_segs computed. */
-		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
-		skb_shinfo(skb)->gso_segs = 0;
-
-		len += vnet_hdr_len;
+		err = packet_snd_vnet_gso(skb, &vnet_hdr);
+		if (err)
+			goto out_free;
+		len += sizeof(vnet_hdr);
 	}
 
 	skb_probe_transport_header(skb, reserve);
@@ -3177,51 +3253,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 		packet_rcv_has_room(pkt_sk(sk), NULL);
 
 	if (pkt_sk(sk)->has_vnet_hdr) {
-		struct virtio_net_hdr vnet_hdr = { 0 };
-
-		err = -EINVAL;
-		vnet_hdr_len = sizeof(vnet_hdr);
-		if (len < vnet_hdr_len)
-			goto out_free;
-
-		len -= vnet_hdr_len;
-
-		if (skb_is_gso(skb)) {
-			struct skb_shared_info *sinfo = skb_shinfo(skb);
-
-			/* This is a hint as to how much should be linear. */
-			vnet_hdr.hdr_len =
-				__cpu_to_virtio16(vio_le(), skb_headlen(skb));
-			vnet_hdr.gso_size =
-				__cpu_to_virtio16(vio_le(), sinfo->gso_size);
-			if (sinfo->gso_type & SKB_GSO_TCPV4)
-				vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
-			else if (sinfo->gso_type & SKB_GSO_TCPV6)
-				vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
-			else if (sinfo->gso_type & SKB_GSO_UDP)
-				vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
-			else if (sinfo->gso_type & SKB_GSO_FCOE)
-				goto out_free;
-			else
-				BUG();
-			if (sinfo->gso_type & SKB_GSO_TCP_ECN)
-				vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
-		} else
-			vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
-
-		if (skb->ip_summed == CHECKSUM_PARTIAL) {
-			vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
-			vnet_hdr.csum_start = __cpu_to_virtio16(vio_le(),
-					  skb_checksum_start_offset(skb));
-			vnet_hdr.csum_offset = __cpu_to_virtio16(vio_le(),
-							 skb->csum_offset);
-		} else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
-			vnet_hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID;
-		} /* else everything is zero */
-
-		err = memcpy_to_msg(msg, (void *)&vnet_hdr, vnet_hdr_len);
-		if (err < 0)
+		err = packet_rcv_vnet(msg, skb, &len);
+		if (err)
 			goto out_free;
+		vnet_hdr_len = sizeof(struct virtio_net_hdr);
 	}
 
 	/* You lose any data beyond the buffer you gave. If it worries
-- 
2.7.0.rc3.207.g0ac5344

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH net-next 2/4] packet: vnet_hdr support for tpacket_rcv
  2016-02-02 15:56 [PATCH net-next 0/4] packet: tpacket gso and csum offload Willem de Bruijn
  2016-02-02 15:56 ` [PATCH net-next 1/4] packet: move vnet_hdr code to helper functions Willem de Bruijn
@ 2016-02-02 15:56 ` Willem de Bruijn
  2016-02-02 15:56 ` [PATCH net-next 3/4] packet: parse tpacket header before skb alloc Willem de Bruijn
  2016-02-02 15:56 ` [PATCH net-next 4/4] packet: tpacket_snd gso and checksum offload Willem de Bruijn
  3 siblings, 0 replies; 6+ messages in thread
From: Willem de Bruijn @ 2016-02-02 15:56 UTC (permalink / raw)
  To: netdev; +Cc: davem, daniel, mst, sri, Willem de Bruijn

From: Willem de Bruijn <willemb@google.com>

Support socket option PACKET_VNET_HDR together with PACKET_RX_RING.
When enabled, a struct virtio_net_hdr will precede the data in the
packet ring slots.

Verified with test program at
github.com/wdebruij/kerneltools/blob/master/tests/psock_rxring_vnet.c

  pkt: 1454269209.798420 len=5066
  vnet: gso_type=tcpv4 gso_size=1448 hlen=66 ecn=off
  csum: start=34 off=16
  eth: proto=0x800
  ip: src=<masked> dst=<masked> proto=6 len=5052

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 net/packet/af_packet.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index bd3de7b..b26df32 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2206,7 +2206,9 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 		unsigned int maclen = skb_network_offset(skb);
 		netoff = TPACKET_ALIGN(po->tp_hdrlen +
 				       (maclen < 16 ? 16 : maclen)) +
-			po->tp_reserve;
+				       po->tp_reserve;
+		if (po->has_vnet_hdr)
+			netoff += sizeof(struct virtio_net_hdr);
 		macoff = netoff - maclen;
 	}
 	if (po->tp_version <= TPACKET_V2) {
@@ -2243,7 +2245,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 	h.raw = packet_current_rx_frame(po, skb,
 					TP_STATUS_KERNEL, (macoff+snaplen));
 	if (!h.raw)
-		goto ring_is_full;
+		goto drop_n_account;
 	if (po->tp_version <= TPACKET_V2) {
 		packet_increment_rx_head(po, &po->rx_ring);
 	/*
@@ -2262,6 +2264,14 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 	}
 	spin_unlock(&sk->sk_receive_queue.lock);
 
+	if (po->has_vnet_hdr) {
+		if (__packet_rcv_vnet(skb, h.raw + macoff -
+					   sizeof(struct virtio_net_hdr))) {
+			spin_lock(&sk->sk_receive_queue.lock);
+			goto drop_n_account;
+		}
+	}
+
 	skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
 
 	if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
@@ -2357,7 +2367,7 @@ drop:
 	kfree_skb(skb);
 	return 0;
 
-ring_is_full:
+drop_n_account:
 	po->stats.stats1.tp_drops++;
 	spin_unlock(&sk->sk_receive_queue.lock);
 
@@ -3587,7 +3597,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 		}
 		if (optlen < len)
 			return -EINVAL;
-		if (pkt_sk(sk)->has_vnet_hdr)
+		if (pkt_sk(sk)->has_vnet_hdr &&
+		    optname == PACKET_TX_RING)
 			return -EINVAL;
 		if (copy_from_user(&req_u.req, optval, len))
 			return -EFAULT;
-- 
2.7.0.rc3.207.g0ac5344

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH net-next 3/4] packet: parse tpacket header before skb alloc
  2016-02-02 15:56 [PATCH net-next 0/4] packet: tpacket gso and csum offload Willem de Bruijn
  2016-02-02 15:56 ` [PATCH net-next 1/4] packet: move vnet_hdr code to helper functions Willem de Bruijn
  2016-02-02 15:56 ` [PATCH net-next 2/4] packet: vnet_hdr support for tpacket_rcv Willem de Bruijn
@ 2016-02-02 15:56 ` Willem de Bruijn
  2016-02-02 15:56 ` [PATCH net-next 4/4] packet: tpacket_snd gso and checksum offload Willem de Bruijn
  3 siblings, 0 replies; 6+ messages in thread
From: Willem de Bruijn @ 2016-02-02 15:56 UTC (permalink / raw)
  To: netdev; +Cc: davem, daniel, mst, sri, Willem de Bruijn

From: Willem de Bruijn <willemb@google.com>

GSO packet headers must be stored in the linear skb segment.
Move tpacket header parsing before sock_alloc_send_skb. The GSO
follow-on patch will later increase the skb linear argument to
sock_alloc_send_skb if needed for large packets.

The header parsing code does not require an allocated skb, so is
safe to move. After allocation, pass to tpacket_fill_skb the computed
data start and length.

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 net/packet/af_packet.c | 111 +++++++++++++++++++++++++++++--------------------
 1 file changed, 65 insertions(+), 46 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b26df32..89377bf 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2494,14 +2494,13 @@ static int packet_snd_vnet_gso(struct sk_buff *skb,
 }
 
 static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
-		void *frame, struct net_device *dev, int size_max,
+		void *frame, struct net_device *dev, void *data, int tp_len,
 		__be16 proto, unsigned char *addr, int hlen)
 {
 	union tpacket_uhdr ph;
-	int to_write, offset, len, tp_len, nr_frags, len_max;
+	int to_write, offset, len, nr_frags, len_max;
 	struct socket *sock = po->sk.sk_socket;
 	struct page *page;
-	void *data;
 	int err;
 
 	ph.raw = frame;
@@ -2513,51 +2512,9 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 	sock_tx_timestamp(&po->sk, &skb_shinfo(skb)->tx_flags);
 	skb_shinfo(skb)->destructor_arg = ph.raw;
 
-	switch (po->tp_version) {
-	case TPACKET_V2:
-		tp_len = ph.h2->tp_len;
-		break;
-	default:
-		tp_len = ph.h1->tp_len;
-		break;
-	}
-	if (unlikely(tp_len > size_max)) {
-		pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
-		return -EMSGSIZE;
-	}
-
 	skb_reserve(skb, hlen);
 	skb_reset_network_header(skb);
 
-	if (unlikely(po->tp_tx_has_off)) {
-		int off_min, off_max, off;
-		off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
-		off_max = po->tx_ring.frame_size - tp_len;
-		if (sock->type == SOCK_DGRAM) {
-			switch (po->tp_version) {
-			case TPACKET_V2:
-				off = ph.h2->tp_net;
-				break;
-			default:
-				off = ph.h1->tp_net;
-				break;
-			}
-		} else {
-			switch (po->tp_version) {
-			case TPACKET_V2:
-				off = ph.h2->tp_mac;
-				break;
-			default:
-				off = ph.h1->tp_mac;
-				break;
-			}
-		}
-		if (unlikely((off < off_min) || (off_max < off)))
-			return -EINVAL;
-		data = ph.raw + off;
-	} else {
-		data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
-	}
 	to_write = tp_len;
 
 	if (sock->type == SOCK_DGRAM) {
@@ -2615,6 +2572,61 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 	return tp_len;
 }
 
+static int tpacket_parse_header(struct packet_sock *po, void *frame,
+				int size_max, void **data)
+{
+	union tpacket_uhdr ph;
+	int tp_len, off;
+
+	ph.raw = frame;
+
+	switch (po->tp_version) {
+	case TPACKET_V2:
+		tp_len = ph.h2->tp_len;
+		break;
+	default:
+		tp_len = ph.h1->tp_len;
+		break;
+	}
+	if (unlikely(tp_len > size_max)) {
+		pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
+		return -EMSGSIZE;
+	}
+
+	if (unlikely(po->tp_tx_has_off)) {
+		int off_min, off_max;
+
+		off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
+		off_max = po->tx_ring.frame_size - tp_len;
+		if (po->sk.sk_type == SOCK_DGRAM) {
+			switch (po->tp_version) {
+			case TPACKET_V2:
+				off = ph.h2->tp_net;
+				break;
+			default:
+				off = ph.h1->tp_net;
+				break;
+			}
+		} else {
+			switch (po->tp_version) {
+			case TPACKET_V2:
+				off = ph.h2->tp_mac;
+				break;
+			default:
+				off = ph.h1->tp_mac;
+				break;
+			}
+		}
+		if (unlikely((off < off_min) || (off_max < off)))
+			return -EINVAL;
+	} else {
+		off = po->tp_hdrlen - sizeof(struct sockaddr_ll);
+	}
+
+	*data = frame + off;
+	return tp_len;
+}
+
 static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 {
 	struct sk_buff *skb;
@@ -2626,6 +2638,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 	bool need_wait = !(msg->msg_flags & MSG_DONTWAIT);
 	int tp_len, size_max;
 	unsigned char *addr;
+	void *data;
 	int len_sum = 0;
 	int status = TP_STATUS_AVAILABLE;
 	int hlen, tlen;
@@ -2673,6 +2686,11 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 			continue;
 		}
 
+		skb = NULL;
+		tp_len = tpacket_parse_header(po, ph, size_max, &data);
+		if (tp_len < 0)
+			goto tpacket_error;
+
 		status = TP_STATUS_SEND_REQUEST;
 		hlen = LL_RESERVED_SPACE(dev);
 		tlen = dev->needed_tailroom;
@@ -2686,7 +2704,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 				err = len_sum;
 			goto out_status;
 		}
-		tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
+		tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto,
 					  addr, hlen);
 		if (likely(tp_len >= 0) &&
 		    tp_len > dev->mtu + reserve &&
@@ -2694,6 +2712,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 			tp_len = -EMSGSIZE;
 
 		if (unlikely(tp_len < 0)) {
+tpacket_error:
 			if (po->tp_loss) {
 				__packet_set_status(po, ph,
 						TP_STATUS_AVAILABLE);
-- 
2.7.0.rc3.207.g0ac5344

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH net-next 4/4] packet: tpacket_snd gso and checksum offload
  2016-02-02 15:56 [PATCH net-next 0/4] packet: tpacket gso and csum offload Willem de Bruijn
                   ` (2 preceding siblings ...)
  2016-02-02 15:56 ` [PATCH net-next 3/4] packet: parse tpacket header before skb alloc Willem de Bruijn
@ 2016-02-02 15:56 ` Willem de Bruijn
  2016-02-03 16:38   ` Willem de Bruijn
  3 siblings, 1 reply; 6+ messages in thread
From: Willem de Bruijn @ 2016-02-02 15:56 UTC (permalink / raw)
  To: netdev; +Cc: davem, daniel, mst, sri, Willem de Bruijn

From: Willem de Bruijn <willemb@google.com>

Support socket option PACKET_VNET_HDR together with PACKET_TX_RING.

When enabled, a struct virtio_net_hdr is expected to precede the data
in the ring. The vnet option must be set before the ring is created.

The implementation reuses the existing skb_copy_bits code that is used
when dev->hard_header_len is non-zero. Move this ll_header check to
before the skb alloc and combine it with a test for vnet_hdr->hdr_len.
Allocate and copy the max of the two.

Verified with test program at
github.com/wdebruij/kerneltools/blob/master/tests/psock_txring_vnet.c

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 net/packet/af_packet.c | 53 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 37 insertions(+), 16 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 89377bf..41e25b6 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2495,7 +2495,7 @@ static int packet_snd_vnet_gso(struct sk_buff *skb,
 
 static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 		void *frame, struct net_device *dev, void *data, int tp_len,
-		__be16 proto, unsigned char *addr, int hlen)
+		__be16 proto, unsigned char *addr, int hlen, int copylen)
 {
 	union tpacket_uhdr ph;
 	int to_write, offset, len, nr_frags, len_max;
@@ -2522,20 +2522,17 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 				NULL, tp_len);
 		if (unlikely(err < 0))
 			return -EINVAL;
-	} else if (dev->hard_header_len) {
-		if (ll_header_truncated(dev, tp_len))
-			return -EINVAL;
-
+	} else if (copylen) {
 		skb_push(skb, dev->hard_header_len);
-		err = skb_store_bits(skb, 0, data,
-				dev->hard_header_len);
+		skb_put(skb, copylen - dev->hard_header_len);
+		err = skb_store_bits(skb, 0, data, copylen);
 		if (unlikely(err))
 			return err;
 		if (!skb->protocol)
 			tpacket_set_protocol(dev, skb);
 
-		data += dev->hard_header_len;
-		to_write -= dev->hard_header_len;
+		data += copylen;
+		to_write -= copylen;
 	}
 
 	offset = offset_in_page(data);
@@ -2588,7 +2585,7 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame,
 		tp_len = ph.h1->tp_len;
 		break;
 	}
-	if (unlikely(tp_len > size_max)) {
+	if (unlikely(tp_len > size_max) && !po->has_vnet_hdr) {
 		pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
 		return -EMSGSIZE;
 	}
@@ -2631,6 +2628,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 {
 	struct sk_buff *skb;
 	struct net_device *dev;
+	struct virtio_net_hdr *vnet_hdr = NULL;
 	__be16 proto;
 	int err, reserve = 0;
 	void *ph;
@@ -2641,7 +2639,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 	void *data;
 	int len_sum = 0;
 	int status = TP_STATUS_AVAILABLE;
-	int hlen, tlen;
+	int hlen, tlen, copylen = 0;
 
 	mutex_lock(&po->pg_vec_lock);
 
@@ -2694,8 +2692,28 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 		status = TP_STATUS_SEND_REQUEST;
 		hlen = LL_RESERVED_SPACE(dev);
 		tlen = dev->needed_tailroom;
+		if (po->has_vnet_hdr) {
+			vnet_hdr = data;
+			if (tp_len < sizeof(*vnet_hdr) ||
+			    __packet_snd_vnet_parse(vnet_hdr, tp_len)) {
+				tp_len = -EINVAL;
+				goto tpacket_error;
+			}
+			data += sizeof(*vnet_hdr);
+			tp_len -= sizeof(*vnet_hdr);
+			copylen = __virtio16_to_cpu(vio_le(),
+						    vnet_hdr->hdr_len);
+		}
+		if (dev->hard_header_len) {
+			if (ll_header_truncated(dev, tp_len)) {
+				tp_len = -EINVAL;
+				goto tpacket_error;
+			}
+			copylen = max_t(int, copylen, dev->hard_header_len);
+		}
 		skb = sock_alloc_send_skb(&po->sk,
-				hlen + tlen + sizeof(struct sockaddr_ll),
+				hlen + tlen + sizeof(struct sockaddr_ll) +
+				(copylen - dev->hard_header_len),
 				!need_wait, &err);
 
 		if (unlikely(skb == NULL)) {
@@ -2705,9 +2723,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 			goto out_status;
 		}
 		tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto,
-					  addr, hlen);
+					  addr, hlen, copylen);
 		if (likely(tp_len >= 0) &&
 		    tp_len > dev->mtu + reserve &&
+		    !po->has_vnet_hdr &&
 		    !packet_extra_vlan_len_allowed(dev, skb))
 			tp_len = -EMSGSIZE;
 
@@ -2726,6 +2745,11 @@ tpacket_error:
 			}
 		}
 
+		if (po->has_vnet_hdr && packet_snd_vnet_gso(skb, vnet_hdr)) {
+			tp_len = -EINVAL;
+			goto tpacket_error;
+		}
+
 		packet_pick_tx_queue(dev, skb);
 
 		skb->destructor = tpacket_destruct_skb;
@@ -3616,9 +3640,6 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 		}
 		if (optlen < len)
 			return -EINVAL;
-		if (pkt_sk(sk)->has_vnet_hdr &&
-		    optname == PACKET_TX_RING)
-			return -EINVAL;
 		if (copy_from_user(&req_u.req, optval, len))
 			return -EFAULT;
 		return packet_set_ring(sk, &req_u, 0,
-- 
2.7.0.rc3.207.g0ac5344

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next 4/4] packet: tpacket_snd gso and checksum offload
  2016-02-02 15:56 ` [PATCH net-next 4/4] packet: tpacket_snd gso and checksum offload Willem de Bruijn
@ 2016-02-03 16:38   ` Willem de Bruijn
  0 siblings, 0 replies; 6+ messages in thread
From: Willem de Bruijn @ 2016-02-03 16:38 UTC (permalink / raw)
  To: Network Development
  Cc: David Miller, Daniel Borkmann, mst, sri, Willem de Bruijn

>         union tpacket_uhdr ph;
On Tue, Feb 2, 2016 at 10:56 AM, Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
> From: Willem de Bruijn <willemb@google.com>
>
> Support socket option PACKET_VNET_HDR together with PACKET_TX_RING.
>>
> Signed-off-by: Willem de Bruijn <willemb@google.com>
> ---
>  net/packet/af_packet.c | 53 +++++++++++++++++++++++++++++++++++---------------
>  1 file changed, 37 insertions(+), 16 deletions(-)
>
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index 89377bf..41e25b6 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -2495,7 +2495,7 @@ static int packet_snd_vnet_gso(struct sk_buff *skb,
>

> +               if (po->has_vnet_hdr) {
> +                       vnet_hdr = data;
> +                       if (tp_len < sizeof(*vnet_hdr) ||
> +                           __packet_snd_vnet_parse(vnet_hdr, tp_len)) {
> +                               tp_len = -EINVAL;
> +                               goto tpacket_error;
> +                       }
> +                       data += sizeof(*vnet_hdr);
> +                       tp_len -= sizeof(*vnet_hdr);

I left a bug here. tp_len must have the virtio_net_hdr subtracted before
bounds check vnet_hdr->hdr_len <= tp_len in __packet_snd_vnet_gso.

Will send a v2.

> +                       copylen = __virtio16_to_cpu(vio_le(),
> +                                                   vnet_hdr->hdr_len);
> +               }
> +               if (dev->hard_header_len) {
> +                       if (ll_header_truncated(dev, tp_len)) {
> +                               tp_len = -EINVAL;
> +                               goto tpacket_error;
> +                       }
> +                       copylen = max_t(int, copylen, dev->hard_header_len);
> +               }
>                 skb = sock_alloc_send_skb(&po->sk,
> -                               hlen + tlen + sizeof(struct sockaddr_ll),
> +                               hlen + tlen + sizeof(struct sockaddr_ll) +
> +                               (copylen - dev->hard_header_len),
>                                 !need_wait, &err);
>
>                 if (unlikely(skb == NULL)) {
> @@ -2705,9 +2723,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
>                         goto out_status;
>                 }
>                 tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto,
> -                                         addr, hlen);
> +                                         addr, hlen, copylen);
>                 if (likely(tp_len >= 0) &&
>                     tp_len > dev->mtu + reserve &&
> +                   !po->has_vnet_hdr &&
>                     !packet_extra_vlan_len_allowed(dev, skb))
>                         tp_len = -EMSGSIZE;
>
> @@ -2726,6 +2745,11 @@ tpacket_error:
>                         }
>                 }
>
> +               if (po->has_vnet_hdr && packet_snd_vnet_gso(skb, vnet_hdr)) {
> +                       tp_len = -EINVAL;
> +                       goto tpacket_error;
> +               }
> +
>                 packet_pick_tx_queue(dev, skb);
>
>                 skb->destructor = tpacket_destruct_skb;
> @@ -3616,9 +3640,6 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
>                 }
>                 if (optlen < len)
>                         return -EINVAL;
> -               if (pkt_sk(sk)->has_vnet_hdr &&
> -                   optname == PACKET_TX_RING)
> -                       return -EINVAL;
>                 if (copy_from_user(&req_u.req, optval, len))
>                         return -EFAULT;
>                 return packet_set_ring(sk, &req_u, 0,
> --
> 2.7.0.rc3.207.g0ac5344
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2016-02-03 16:38 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-02-02 15:56 [PATCH net-next 0/4] packet: tpacket gso and csum offload Willem de Bruijn
2016-02-02 15:56 ` [PATCH net-next 1/4] packet: move vnet_hdr code to helper functions Willem de Bruijn
2016-02-02 15:56 ` [PATCH net-next 2/4] packet: vnet_hdr support for tpacket_rcv Willem de Bruijn
2016-02-02 15:56 ` [PATCH net-next 3/4] packet: parse tpacket header before skb alloc Willem de Bruijn
2016-02-02 15:56 ` [PATCH net-next 4/4] packet: tpacket_snd gso and checksum offload Willem de Bruijn
2016-02-03 16:38   ` Willem de Bruijn

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.