* [PATCH net-next 0/4] packet: tpacket gso and csum offload
@ 2016-02-02 15:56 Willem de Bruijn
2016-02-02 15:56 ` [PATCH net-next 1/4] packet: move vnet_hdr code to helper functions Willem de Bruijn
` (3 more replies)
0 siblings, 4 replies; 6+ messages in thread
From: Willem de Bruijn @ 2016-02-02 15:56 UTC (permalink / raw)
To: netdev; +Cc: davem, daniel, mst, sri, Willem de Bruijn
From: Willem de Bruijn <willemb@google.com>
Extend PACKET_VNET_HDR socket option support to packet sockets with
memory mapped rings (PACKET_RX_RING, PACKET_TX_RING).
Patches 2 and 4 add support to tpacket_rcv and tpacket_snd.
Patch 1 prepares for this by moving the relevant virtio_net_hdr
logic out of packet_snd and packet_rcv into helper functions.
GSO transmission requires all headers in the skb linear section.
Patch 3 moves parsing of tx_ring slot headers before skb allocation
to enable allocation with sufficient linear size.
Willem de Bruijn (4):
packet: move vnet_hdr code to helper functions
packet: vnet_hdr support for tpacket_rcv
packet: parse tpacket header before skb alloc
packet: tpacket_snd gso and checksum offload
net/packet/af_packet.c | 438 +++++++++++++++++++++++++++++--------------------
1 file changed, 262 insertions(+), 176 deletions(-)
--
2.7.0.rc3.207.g0ac5344
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH net-next 1/4] packet: move vnet_hdr code to helper functions
2016-02-02 15:56 [PATCH net-next 0/4] packet: tpacket gso and csum offload Willem de Bruijn
@ 2016-02-02 15:56 ` Willem de Bruijn
2016-02-02 15:56 ` [PATCH net-next 2/4] packet: vnet_hdr support for tpacket_rcv Willem de Bruijn
` (2 subsequent siblings)
3 siblings, 0 replies; 6+ messages in thread
From: Willem de Bruijn @ 2016-02-02 15:56 UTC (permalink / raw)
To: netdev; +Cc: davem, daniel, mst, sri, Willem de Bruijn
From: Willem de Bruijn <willemb@google.com>
packet_snd and packet_rcv support virtio net headers for GSO.
Move this logic into helper functions to be able to reuse it in
tpacket_snd and tpacket_rcv.
This is a straighforward code move with one exception. Instead of
creating and passing a separate gso_type variable, reuse
vnet_hdr.gso_type after conversion from virtio to kernel gso type.
Signed-off-by: Willem de Bruijn <willemb@google.com>
---
net/packet/af_packet.c | 261 ++++++++++++++++++++++++++++---------------------
1 file changed, 148 insertions(+), 113 deletions(-)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 992396a..bd3de7b 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1960,6 +1960,64 @@ static unsigned int run_filter(struct sk_buff *skb,
return res;
}
+static int __packet_rcv_vnet(const struct sk_buff *skb,
+ struct virtio_net_hdr *vnet_hdr)
+{
+ *vnet_hdr = (const struct virtio_net_hdr) { 0 };
+
+ if (skb_is_gso(skb)) {
+ struct skb_shared_info *sinfo = skb_shinfo(skb);
+
+ /* This is a hint as to how much should be linear. */
+ vnet_hdr->hdr_len =
+ __cpu_to_virtio16(vio_le(), skb_headlen(skb));
+ vnet_hdr->gso_size =
+ __cpu_to_virtio16(vio_le(), sinfo->gso_size);
+
+ if (sinfo->gso_type & SKB_GSO_TCPV4)
+ vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+ else if (sinfo->gso_type & SKB_GSO_TCPV6)
+ vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+ else if (sinfo->gso_type & SKB_GSO_UDP)
+ vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
+ else if (sinfo->gso_type & SKB_GSO_FCOE)
+ return -EINVAL;
+ else
+ BUG();
+
+ if (sinfo->gso_type & SKB_GSO_TCP_ECN)
+ vnet_hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
+ } else
+ vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ vnet_hdr->csum_start = __cpu_to_virtio16(vio_le(),
+ skb_checksum_start_offset(skb));
+ vnet_hdr->csum_offset = __cpu_to_virtio16(vio_le(),
+ skb->csum_offset);
+ } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+ vnet_hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
+ } /* else everything is zero */
+
+ return 0;
+}
+
+static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
+ size_t *len)
+{
+ struct virtio_net_hdr vnet_hdr;
+
+ if (*len < sizeof(vnet_hdr))
+ return -EINVAL;
+ *len -= sizeof(vnet_hdr);
+
+ if (__packet_rcv_vnet(skb, &vnet_hdr))
+ return -EINVAL;
+
+ return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
+}
+
/*
* This function makes lazy skb cloning in hope that most of packets
* are discarded by BPF.
@@ -2347,6 +2405,84 @@ static void tpacket_set_protocol(const struct net_device *dev,
}
}
+static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
+{
+ unsigned short gso_type = 0;
+
+ if ((vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
+ (__virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
+ __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2 >
+ __virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len)))
+ vnet_hdr->hdr_len = __cpu_to_virtio16(vio_le(),
+ __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
+ __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2);
+
+ if (__virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len) > len)
+ return -EINVAL;
+
+ if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+ switch (vnet_hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+ case VIRTIO_NET_HDR_GSO_TCPV4:
+ gso_type = SKB_GSO_TCPV4;
+ break;
+ case VIRTIO_NET_HDR_GSO_TCPV6:
+ gso_type = SKB_GSO_TCPV6;
+ break;
+ case VIRTIO_NET_HDR_GSO_UDP:
+ gso_type = SKB_GSO_UDP;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (vnet_hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN)
+ gso_type |= SKB_GSO_TCP_ECN;
+
+ if (vnet_hdr->gso_size == 0)
+ return -EINVAL;
+ }
+
+ vnet_hdr->gso_type = gso_type; /* changes type, temporary storage */
+ return 0;
+}
+
+static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
+ struct virtio_net_hdr *vnet_hdr)
+{
+ int n;
+
+ if (*len < sizeof(*vnet_hdr))
+ return -EINVAL;
+ *len -= sizeof(*vnet_hdr);
+
+ n = copy_from_iter(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter);
+ if (n != sizeof(*vnet_hdr))
+ return -EFAULT;
+
+ return __packet_snd_vnet_parse(vnet_hdr, *len);
+}
+
+static int packet_snd_vnet_gso(struct sk_buff *skb,
+ struct virtio_net_hdr *vnet_hdr)
+{
+ if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
+ u16 s = __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start);
+ u16 o = __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset);
+
+ if (!skb_partial_csum_set(skb, s, o))
+ return -EINVAL;
+ }
+
+ skb_shinfo(skb)->gso_size =
+ __virtio16_to_cpu(vio_le(), vnet_hdr->gso_size);
+ skb_shinfo(skb)->gso_type = vnet_hdr->gso_type;
+
+ /* Header must be checked, and gso_segs computed. */
+ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+ skb_shinfo(skb)->gso_segs = 0;
+ return 0;
+}
+
static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
void *frame, struct net_device *dev, int size_max,
__be16 proto, unsigned char *addr, int hlen)
@@ -2643,12 +2779,9 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
struct sockcm_cookie sockc;
struct virtio_net_hdr vnet_hdr = { 0 };
int offset = 0;
- int vnet_hdr_len;
struct packet_sock *po = pkt_sk(sk);
- unsigned short gso_type = 0;
int hlen, tlen;
int extra_len = 0;
- ssize_t n;
/*
* Get and verify the address.
@@ -2686,53 +2819,9 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
if (sock->type == SOCK_RAW)
reserve = dev->hard_header_len;
if (po->has_vnet_hdr) {
- vnet_hdr_len = sizeof(vnet_hdr);
-
- err = -EINVAL;
- if (len < vnet_hdr_len)
- goto out_unlock;
-
- len -= vnet_hdr_len;
-
- err = -EFAULT;
- n = copy_from_iter(&vnet_hdr, vnet_hdr_len, &msg->msg_iter);
- if (n != vnet_hdr_len)
- goto out_unlock;
-
- if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
- (__virtio16_to_cpu(vio_le(), vnet_hdr.csum_start) +
- __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset) + 2 >
- __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len)))
- vnet_hdr.hdr_len = __cpu_to_virtio16(vio_le(),
- __virtio16_to_cpu(vio_le(), vnet_hdr.csum_start) +
- __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset) + 2);
-
- err = -EINVAL;
- if (__virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len) > len)
+ err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
+ if (err)
goto out_unlock;
-
- if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
- switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
- case VIRTIO_NET_HDR_GSO_TCPV4:
- gso_type = SKB_GSO_TCPV4;
- break;
- case VIRTIO_NET_HDR_GSO_TCPV6:
- gso_type = SKB_GSO_TCPV6;
- break;
- case VIRTIO_NET_HDR_GSO_UDP:
- gso_type = SKB_GSO_UDP;
- break;
- default:
- goto out_unlock;
- }
-
- if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
- gso_type |= SKB_GSO_TCP_ECN;
-
- if (vnet_hdr.gso_size == 0)
- goto out_unlock;
-
- }
}
if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
@@ -2744,7 +2833,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
}
err = -EMSGSIZE;
- if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN + extra_len))
+ if (!vnet_hdr.gso_type &&
+ (len > dev->mtu + reserve + VLAN_HLEN + extra_len))
goto out_unlock;
err = -ENOBUFS;
@@ -2775,7 +2865,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
- if (!gso_type && (len > dev->mtu + reserve + extra_len) &&
+ if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) &&
!packet_extra_vlan_len_allowed(dev, skb)) {
err = -EMSGSIZE;
goto out_free;
@@ -2789,24 +2879,10 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
packet_pick_tx_queue(dev, skb);
if (po->has_vnet_hdr) {
- if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
- u16 s = __virtio16_to_cpu(vio_le(), vnet_hdr.csum_start);
- u16 o = __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset);
- if (!skb_partial_csum_set(skb, s, o)) {
- err = -EINVAL;
- goto out_free;
- }
- }
-
- skb_shinfo(skb)->gso_size =
- __virtio16_to_cpu(vio_le(), vnet_hdr.gso_size);
- skb_shinfo(skb)->gso_type = gso_type;
-
- /* Header must be checked, and gso_segs computed. */
- skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
- skb_shinfo(skb)->gso_segs = 0;
-
- len += vnet_hdr_len;
+ err = packet_snd_vnet_gso(skb, &vnet_hdr);
+ if (err)
+ goto out_free;
+ len += sizeof(vnet_hdr);
}
skb_probe_transport_header(skb, reserve);
@@ -3177,51 +3253,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
packet_rcv_has_room(pkt_sk(sk), NULL);
if (pkt_sk(sk)->has_vnet_hdr) {
- struct virtio_net_hdr vnet_hdr = { 0 };
-
- err = -EINVAL;
- vnet_hdr_len = sizeof(vnet_hdr);
- if (len < vnet_hdr_len)
- goto out_free;
-
- len -= vnet_hdr_len;
-
- if (skb_is_gso(skb)) {
- struct skb_shared_info *sinfo = skb_shinfo(skb);
-
- /* This is a hint as to how much should be linear. */
- vnet_hdr.hdr_len =
- __cpu_to_virtio16(vio_le(), skb_headlen(skb));
- vnet_hdr.gso_size =
- __cpu_to_virtio16(vio_le(), sinfo->gso_size);
- if (sinfo->gso_type & SKB_GSO_TCPV4)
- vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
- else if (sinfo->gso_type & SKB_GSO_TCPV6)
- vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
- else if (sinfo->gso_type & SKB_GSO_UDP)
- vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
- else if (sinfo->gso_type & SKB_GSO_FCOE)
- goto out_free;
- else
- BUG();
- if (sinfo->gso_type & SKB_GSO_TCP_ECN)
- vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
- } else
- vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
-
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- vnet_hdr.csum_start = __cpu_to_virtio16(vio_le(),
- skb_checksum_start_offset(skb));
- vnet_hdr.csum_offset = __cpu_to_virtio16(vio_le(),
- skb->csum_offset);
- } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
- vnet_hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID;
- } /* else everything is zero */
-
- err = memcpy_to_msg(msg, (void *)&vnet_hdr, vnet_hdr_len);
- if (err < 0)
+ err = packet_rcv_vnet(msg, skb, &len);
+ if (err)
goto out_free;
+ vnet_hdr_len = sizeof(struct virtio_net_hdr);
}
/* You lose any data beyond the buffer you gave. If it worries
--
2.7.0.rc3.207.g0ac5344
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH net-next 2/4] packet: vnet_hdr support for tpacket_rcv
2016-02-02 15:56 [PATCH net-next 0/4] packet: tpacket gso and csum offload Willem de Bruijn
2016-02-02 15:56 ` [PATCH net-next 1/4] packet: move vnet_hdr code to helper functions Willem de Bruijn
@ 2016-02-02 15:56 ` Willem de Bruijn
2016-02-02 15:56 ` [PATCH net-next 3/4] packet: parse tpacket header before skb alloc Willem de Bruijn
2016-02-02 15:56 ` [PATCH net-next 4/4] packet: tpacket_snd gso and checksum offload Willem de Bruijn
3 siblings, 0 replies; 6+ messages in thread
From: Willem de Bruijn @ 2016-02-02 15:56 UTC (permalink / raw)
To: netdev; +Cc: davem, daniel, mst, sri, Willem de Bruijn
From: Willem de Bruijn <willemb@google.com>
Support socket option PACKET_VNET_HDR together with PACKET_RX_RING.
When enabled, a struct virtio_net_hdr will precede the data in the
packet ring slots.
Verified with test program at
github.com/wdebruij/kerneltools/blob/master/tests/psock_rxring_vnet.c
pkt: 1454269209.798420 len=5066
vnet: gso_type=tcpv4 gso_size=1448 hlen=66 ecn=off
csum: start=34 off=16
eth: proto=0x800
ip: src=<masked> dst=<masked> proto=6 len=5052
Signed-off-by: Willem de Bruijn <willemb@google.com>
---
net/packet/af_packet.c | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index bd3de7b..b26df32 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2206,7 +2206,9 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
unsigned int maclen = skb_network_offset(skb);
netoff = TPACKET_ALIGN(po->tp_hdrlen +
(maclen < 16 ? 16 : maclen)) +
- po->tp_reserve;
+ po->tp_reserve;
+ if (po->has_vnet_hdr)
+ netoff += sizeof(struct virtio_net_hdr);
macoff = netoff - maclen;
}
if (po->tp_version <= TPACKET_V2) {
@@ -2243,7 +2245,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
h.raw = packet_current_rx_frame(po, skb,
TP_STATUS_KERNEL, (macoff+snaplen));
if (!h.raw)
- goto ring_is_full;
+ goto drop_n_account;
if (po->tp_version <= TPACKET_V2) {
packet_increment_rx_head(po, &po->rx_ring);
/*
@@ -2262,6 +2264,14 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
}
spin_unlock(&sk->sk_receive_queue.lock);
+ if (po->has_vnet_hdr) {
+ if (__packet_rcv_vnet(skb, h.raw + macoff -
+ sizeof(struct virtio_net_hdr))) {
+ spin_lock(&sk->sk_receive_queue.lock);
+ goto drop_n_account;
+ }
+ }
+
skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
@@ -2357,7 +2367,7 @@ drop:
kfree_skb(skb);
return 0;
-ring_is_full:
+drop_n_account:
po->stats.stats1.tp_drops++;
spin_unlock(&sk->sk_receive_queue.lock);
@@ -3587,7 +3597,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
}
if (optlen < len)
return -EINVAL;
- if (pkt_sk(sk)->has_vnet_hdr)
+ if (pkt_sk(sk)->has_vnet_hdr &&
+ optname == PACKET_TX_RING)
return -EINVAL;
if (copy_from_user(&req_u.req, optval, len))
return -EFAULT;
--
2.7.0.rc3.207.g0ac5344
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH net-next 3/4] packet: parse tpacket header before skb alloc
2016-02-02 15:56 [PATCH net-next 0/4] packet: tpacket gso and csum offload Willem de Bruijn
2016-02-02 15:56 ` [PATCH net-next 1/4] packet: move vnet_hdr code to helper functions Willem de Bruijn
2016-02-02 15:56 ` [PATCH net-next 2/4] packet: vnet_hdr support for tpacket_rcv Willem de Bruijn
@ 2016-02-02 15:56 ` Willem de Bruijn
2016-02-02 15:56 ` [PATCH net-next 4/4] packet: tpacket_snd gso and checksum offload Willem de Bruijn
3 siblings, 0 replies; 6+ messages in thread
From: Willem de Bruijn @ 2016-02-02 15:56 UTC (permalink / raw)
To: netdev; +Cc: davem, daniel, mst, sri, Willem de Bruijn
From: Willem de Bruijn <willemb@google.com>
GSO packet headers must be stored in the linear skb segment.
Move tpacket header parsing before sock_alloc_send_skb. The GSO
follow-on patch will later increase the skb linear argument to
sock_alloc_send_skb if needed for large packets.
The header parsing code does not require an allocated skb, so is
safe to move. After allocation, pass to tpacket_fill_skb the computed
data start and length.
Signed-off-by: Willem de Bruijn <willemb@google.com>
---
net/packet/af_packet.c | 111 +++++++++++++++++++++++++++++--------------------
1 file changed, 65 insertions(+), 46 deletions(-)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b26df32..89377bf 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2494,14 +2494,13 @@ static int packet_snd_vnet_gso(struct sk_buff *skb,
}
static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
- void *frame, struct net_device *dev, int size_max,
+ void *frame, struct net_device *dev, void *data, int tp_len,
__be16 proto, unsigned char *addr, int hlen)
{
union tpacket_uhdr ph;
- int to_write, offset, len, tp_len, nr_frags, len_max;
+ int to_write, offset, len, nr_frags, len_max;
struct socket *sock = po->sk.sk_socket;
struct page *page;
- void *data;
int err;
ph.raw = frame;
@@ -2513,51 +2512,9 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
sock_tx_timestamp(&po->sk, &skb_shinfo(skb)->tx_flags);
skb_shinfo(skb)->destructor_arg = ph.raw;
- switch (po->tp_version) {
- case TPACKET_V2:
- tp_len = ph.h2->tp_len;
- break;
- default:
- tp_len = ph.h1->tp_len;
- break;
- }
- if (unlikely(tp_len > size_max)) {
- pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
- return -EMSGSIZE;
- }
-
skb_reserve(skb, hlen);
skb_reset_network_header(skb);
- if (unlikely(po->tp_tx_has_off)) {
- int off_min, off_max, off;
- off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
- off_max = po->tx_ring.frame_size - tp_len;
- if (sock->type == SOCK_DGRAM) {
- switch (po->tp_version) {
- case TPACKET_V2:
- off = ph.h2->tp_net;
- break;
- default:
- off = ph.h1->tp_net;
- break;
- }
- } else {
- switch (po->tp_version) {
- case TPACKET_V2:
- off = ph.h2->tp_mac;
- break;
- default:
- off = ph.h1->tp_mac;
- break;
- }
- }
- if (unlikely((off < off_min) || (off_max < off)))
- return -EINVAL;
- data = ph.raw + off;
- } else {
- data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
- }
to_write = tp_len;
if (sock->type == SOCK_DGRAM) {
@@ -2615,6 +2572,61 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
return tp_len;
}
+static int tpacket_parse_header(struct packet_sock *po, void *frame,
+ int size_max, void **data)
+{
+ union tpacket_uhdr ph;
+ int tp_len, off;
+
+ ph.raw = frame;
+
+ switch (po->tp_version) {
+ case TPACKET_V2:
+ tp_len = ph.h2->tp_len;
+ break;
+ default:
+ tp_len = ph.h1->tp_len;
+ break;
+ }
+ if (unlikely(tp_len > size_max)) {
+ pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
+ return -EMSGSIZE;
+ }
+
+ if (unlikely(po->tp_tx_has_off)) {
+ int off_min, off_max;
+
+ off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
+ off_max = po->tx_ring.frame_size - tp_len;
+ if (po->sk.sk_type == SOCK_DGRAM) {
+ switch (po->tp_version) {
+ case TPACKET_V2:
+ off = ph.h2->tp_net;
+ break;
+ default:
+ off = ph.h1->tp_net;
+ break;
+ }
+ } else {
+ switch (po->tp_version) {
+ case TPACKET_V2:
+ off = ph.h2->tp_mac;
+ break;
+ default:
+ off = ph.h1->tp_mac;
+ break;
+ }
+ }
+ if (unlikely((off < off_min) || (off_max < off)))
+ return -EINVAL;
+ } else {
+ off = po->tp_hdrlen - sizeof(struct sockaddr_ll);
+ }
+
+ *data = frame + off;
+ return tp_len;
+}
+
static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
{
struct sk_buff *skb;
@@ -2626,6 +2638,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
bool need_wait = !(msg->msg_flags & MSG_DONTWAIT);
int tp_len, size_max;
unsigned char *addr;
+ void *data;
int len_sum = 0;
int status = TP_STATUS_AVAILABLE;
int hlen, tlen;
@@ -2673,6 +2686,11 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
continue;
}
+ skb = NULL;
+ tp_len = tpacket_parse_header(po, ph, size_max, &data);
+ if (tp_len < 0)
+ goto tpacket_error;
+
status = TP_STATUS_SEND_REQUEST;
hlen = LL_RESERVED_SPACE(dev);
tlen = dev->needed_tailroom;
@@ -2686,7 +2704,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
err = len_sum;
goto out_status;
}
- tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
+ tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto,
addr, hlen);
if (likely(tp_len >= 0) &&
tp_len > dev->mtu + reserve &&
@@ -2694,6 +2712,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
tp_len = -EMSGSIZE;
if (unlikely(tp_len < 0)) {
+tpacket_error:
if (po->tp_loss) {
__packet_set_status(po, ph,
TP_STATUS_AVAILABLE);
--
2.7.0.rc3.207.g0ac5344
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH net-next 4/4] packet: tpacket_snd gso and checksum offload
2016-02-02 15:56 [PATCH net-next 0/4] packet: tpacket gso and csum offload Willem de Bruijn
` (2 preceding siblings ...)
2016-02-02 15:56 ` [PATCH net-next 3/4] packet: parse tpacket header before skb alloc Willem de Bruijn
@ 2016-02-02 15:56 ` Willem de Bruijn
2016-02-03 16:38 ` Willem de Bruijn
3 siblings, 1 reply; 6+ messages in thread
From: Willem de Bruijn @ 2016-02-02 15:56 UTC (permalink / raw)
To: netdev; +Cc: davem, daniel, mst, sri, Willem de Bruijn
From: Willem de Bruijn <willemb@google.com>
Support socket option PACKET_VNET_HDR together with PACKET_TX_RING.
When enabled, a struct virtio_net_hdr is expected to precede the data
in the ring. The vnet option must be set before the ring is created.
The implementation reuses the existing skb_copy_bits code that is used
when dev->hard_header_len is non-zero. Move this ll_header check to
before the skb alloc and combine it with a test for vnet_hdr->hdr_len.
Allocate and copy the max of the two.
Verified with test program at
github.com/wdebruij/kerneltools/blob/master/tests/psock_txring_vnet.c
Signed-off-by: Willem de Bruijn <willemb@google.com>
---
net/packet/af_packet.c | 53 +++++++++++++++++++++++++++++++++++---------------
1 file changed, 37 insertions(+), 16 deletions(-)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 89377bf..41e25b6 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2495,7 +2495,7 @@ static int packet_snd_vnet_gso(struct sk_buff *skb,
static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
void *frame, struct net_device *dev, void *data, int tp_len,
- __be16 proto, unsigned char *addr, int hlen)
+ __be16 proto, unsigned char *addr, int hlen, int copylen)
{
union tpacket_uhdr ph;
int to_write, offset, len, nr_frags, len_max;
@@ -2522,20 +2522,17 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
NULL, tp_len);
if (unlikely(err < 0))
return -EINVAL;
- } else if (dev->hard_header_len) {
- if (ll_header_truncated(dev, tp_len))
- return -EINVAL;
-
+ } else if (copylen) {
skb_push(skb, dev->hard_header_len);
- err = skb_store_bits(skb, 0, data,
- dev->hard_header_len);
+ skb_put(skb, copylen - dev->hard_header_len);
+ err = skb_store_bits(skb, 0, data, copylen);
if (unlikely(err))
return err;
if (!skb->protocol)
tpacket_set_protocol(dev, skb);
- data += dev->hard_header_len;
- to_write -= dev->hard_header_len;
+ data += copylen;
+ to_write -= copylen;
}
offset = offset_in_page(data);
@@ -2588,7 +2585,7 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame,
tp_len = ph.h1->tp_len;
break;
}
- if (unlikely(tp_len > size_max)) {
+ if (unlikely(tp_len > size_max) && !po->has_vnet_hdr) {
pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
return -EMSGSIZE;
}
@@ -2631,6 +2628,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
{
struct sk_buff *skb;
struct net_device *dev;
+ struct virtio_net_hdr *vnet_hdr = NULL;
__be16 proto;
int err, reserve = 0;
void *ph;
@@ -2641,7 +2639,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
void *data;
int len_sum = 0;
int status = TP_STATUS_AVAILABLE;
- int hlen, tlen;
+ int hlen, tlen, copylen = 0;
mutex_lock(&po->pg_vec_lock);
@@ -2694,8 +2692,28 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
status = TP_STATUS_SEND_REQUEST;
hlen = LL_RESERVED_SPACE(dev);
tlen = dev->needed_tailroom;
+ if (po->has_vnet_hdr) {
+ vnet_hdr = data;
+ if (tp_len < sizeof(*vnet_hdr) ||
+ __packet_snd_vnet_parse(vnet_hdr, tp_len)) {
+ tp_len = -EINVAL;
+ goto tpacket_error;
+ }
+ data += sizeof(*vnet_hdr);
+ tp_len -= sizeof(*vnet_hdr);
+ copylen = __virtio16_to_cpu(vio_le(),
+ vnet_hdr->hdr_len);
+ }
+ if (dev->hard_header_len) {
+ if (ll_header_truncated(dev, tp_len)) {
+ tp_len = -EINVAL;
+ goto tpacket_error;
+ }
+ copylen = max_t(int, copylen, dev->hard_header_len);
+ }
skb = sock_alloc_send_skb(&po->sk,
- hlen + tlen + sizeof(struct sockaddr_ll),
+ hlen + tlen + sizeof(struct sockaddr_ll) +
+ (copylen - dev->hard_header_len),
!need_wait, &err);
if (unlikely(skb == NULL)) {
@@ -2705,9 +2723,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
goto out_status;
}
tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto,
- addr, hlen);
+ addr, hlen, copylen);
if (likely(tp_len >= 0) &&
tp_len > dev->mtu + reserve &&
+ !po->has_vnet_hdr &&
!packet_extra_vlan_len_allowed(dev, skb))
tp_len = -EMSGSIZE;
@@ -2726,6 +2745,11 @@ tpacket_error:
}
}
+ if (po->has_vnet_hdr && packet_snd_vnet_gso(skb, vnet_hdr)) {
+ tp_len = -EINVAL;
+ goto tpacket_error;
+ }
+
packet_pick_tx_queue(dev, skb);
skb->destructor = tpacket_destruct_skb;
@@ -3616,9 +3640,6 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
}
if (optlen < len)
return -EINVAL;
- if (pkt_sk(sk)->has_vnet_hdr &&
- optname == PACKET_TX_RING)
- return -EINVAL;
if (copy_from_user(&req_u.req, optval, len))
return -EFAULT;
return packet_set_ring(sk, &req_u, 0,
--
2.7.0.rc3.207.g0ac5344
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH net-next 4/4] packet: tpacket_snd gso and checksum offload
2016-02-02 15:56 ` [PATCH net-next 4/4] packet: tpacket_snd gso and checksum offload Willem de Bruijn
@ 2016-02-03 16:38 ` Willem de Bruijn
0 siblings, 0 replies; 6+ messages in thread
From: Willem de Bruijn @ 2016-02-03 16:38 UTC (permalink / raw)
To: Network Development
Cc: David Miller, Daniel Borkmann, mst, sri, Willem de Bruijn
> union tpacket_uhdr ph;
On Tue, Feb 2, 2016 at 10:56 AM, Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
> From: Willem de Bruijn <willemb@google.com>
>
> Support socket option PACKET_VNET_HDR together with PACKET_TX_RING.
>>
> Signed-off-by: Willem de Bruijn <willemb@google.com>
> ---
> net/packet/af_packet.c | 53 +++++++++++++++++++++++++++++++++++---------------
> 1 file changed, 37 insertions(+), 16 deletions(-)
>
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index 89377bf..41e25b6 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -2495,7 +2495,7 @@ static int packet_snd_vnet_gso(struct sk_buff *skb,
>
> + if (po->has_vnet_hdr) {
> + vnet_hdr = data;
> + if (tp_len < sizeof(*vnet_hdr) ||
> + __packet_snd_vnet_parse(vnet_hdr, tp_len)) {
> + tp_len = -EINVAL;
> + goto tpacket_error;
> + }
> + data += sizeof(*vnet_hdr);
> + tp_len -= sizeof(*vnet_hdr);
I left a bug here. tp_len must have the virtio_net_hdr subtracted before
bounds check vnet_hdr->hdr_len <= tp_len in __packet_snd_vnet_gso.
Will send a v2.
> + copylen = __virtio16_to_cpu(vio_le(),
> + vnet_hdr->hdr_len);
> + }
> + if (dev->hard_header_len) {
> + if (ll_header_truncated(dev, tp_len)) {
> + tp_len = -EINVAL;
> + goto tpacket_error;
> + }
> + copylen = max_t(int, copylen, dev->hard_header_len);
> + }
> skb = sock_alloc_send_skb(&po->sk,
> - hlen + tlen + sizeof(struct sockaddr_ll),
> + hlen + tlen + sizeof(struct sockaddr_ll) +
> + (copylen - dev->hard_header_len),
> !need_wait, &err);
>
> if (unlikely(skb == NULL)) {
> @@ -2705,9 +2723,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
> goto out_status;
> }
> tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto,
> - addr, hlen);
> + addr, hlen, copylen);
> if (likely(tp_len >= 0) &&
> tp_len > dev->mtu + reserve &&
> + !po->has_vnet_hdr &&
> !packet_extra_vlan_len_allowed(dev, skb))
> tp_len = -EMSGSIZE;
>
> @@ -2726,6 +2745,11 @@ tpacket_error:
> }
> }
>
> + if (po->has_vnet_hdr && packet_snd_vnet_gso(skb, vnet_hdr)) {
> + tp_len = -EINVAL;
> + goto tpacket_error;
> + }
> +
> packet_pick_tx_queue(dev, skb);
>
> skb->destructor = tpacket_destruct_skb;
> @@ -3616,9 +3640,6 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
> }
> if (optlen < len)
> return -EINVAL;
> - if (pkt_sk(sk)->has_vnet_hdr &&
> - optname == PACKET_TX_RING)
> - return -EINVAL;
> if (copy_from_user(&req_u.req, optval, len))
> return -EFAULT;
> return packet_set_ring(sk, &req_u, 0,
> --
> 2.7.0.rc3.207.g0ac5344
>
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2016-02-03 16:38 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-02-02 15:56 [PATCH net-next 0/4] packet: tpacket gso and csum offload Willem de Bruijn
2016-02-02 15:56 ` [PATCH net-next 1/4] packet: move vnet_hdr code to helper functions Willem de Bruijn
2016-02-02 15:56 ` [PATCH net-next 2/4] packet: vnet_hdr support for tpacket_rcv Willem de Bruijn
2016-02-02 15:56 ` [PATCH net-next 3/4] packet: parse tpacket header before skb alloc Willem de Bruijn
2016-02-02 15:56 ` [PATCH net-next 4/4] packet: tpacket_snd gso and checksum offload Willem de Bruijn
2016-02-03 16:38 ` Willem de Bruijn
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.