From mboxrd@z Thu Jan 1 00:00:00 1970 From: Ophir Munk Subject: [PATCH v4 1/2] net/tap: calculate checksums of multi segs packets Date: Tue, 12 Jun 2018 16:31:47 +0000 Message-ID: <1528821108-12405-2-git-send-email-ophirmu@mellanox.com> References: <1520629826-23055-2-git-send-email-ophirmu@mellanox.com> <1528821108-12405-1-git-send-email-ophirmu@mellanox.com> Mime-Version: 1.0 Content-Type: text/plain Cc: Thomas Monjalon , Olga Shern , Ophir Munk To: dev@dpdk.org, Pascal Mazon , Keith Wiles Return-path: Received: from EUR02-VE1-obe.outbound.protection.outlook.com (mail-eopbgr20064.outbound.protection.outlook.com [40.107.2.64]) by dpdk.org (Postfix) with ESMTP id 3E7A21E9CB for ; Tue, 12 Jun 2018 18:32:06 +0200 (CEST) In-Reply-To: <1528821108-12405-1-git-send-email-ophirmu@mellanox.com> List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Prior to this commit IP/UDP/TCP checksum offload calculations were skipped in case of a multi segments packet. This commit enables TAP checksum calculations for multi segments packets. The only restriction is that the first segment must contain headers of layers 3 (IP) and 4 (UDP or TCP) Reviewed-by: Raslan Darawsheh Signed-off-by: Ophir Munk --- drivers/net/tap/rte_eth_tap.c | 158 +++++++++++++++++++++++++++++------------- 1 file changed, 110 insertions(+), 48 deletions(-) diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c index df396bf..c19f053 100644 --- a/drivers/net/tap/rte_eth_tap.c +++ b/drivers/net/tap/rte_eth_tap.c @@ -415,12 +415,43 @@ tap_tx_offload_get_queue_capa(void) DEV_TX_OFFLOAD_TCP_CKSUM; } +/* Finalize l4 checksum calculation */ static void -tap_tx_offload(char *packet, uint64_t ol_flags, unsigned int l2_len, - unsigned int l3_len) +tap_tx_l4_cksum(uint16_t *l4_cksum, uint16_t l4_phdr_cksum, + uint32_t l4_raw_cksum) { - void *l3_hdr = packet + l2_len; + if (l4_cksum) { + uint32_t cksum; + + cksum = __rte_raw_cksum_reduce(l4_raw_cksum); + cksum += l4_phdr_cksum; + + cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff); + cksum = (~cksum) & 0xffff; + if (cksum == 0) + cksum = 0xffff; + *l4_cksum = cksum; + } +} +/* Accumaulate L4 raw checksums */ +static void +tap_tx_l4_add_rcksum(char *l4_data, unsigned int l4_len, uint16_t *l4_cksum, + uint32_t *l4_raw_cksum) +{ + if (l4_cksum == NULL) + return; + + *l4_raw_cksum = __rte_raw_cksum(l4_data, l4_len, *l4_raw_cksum); +} + +/* L3 and L4 pseudo headers checksum offloads */ +static void +tap_tx_l3_cksum(char *packet, uint64_t ol_flags, unsigned int l2_len, + unsigned int l3_len, unsigned int l4_len, uint16_t **l4_cksum, + uint16_t *l4_phdr_cksum, uint32_t *l4_raw_cksum) +{ + void *l3_hdr = packet + l2_len; if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_IPV4)) { struct ipv4_hdr *iph = l3_hdr; uint16_t cksum; @@ -430,38 +461,21 @@ tap_tx_offload(char *packet, uint64_t ol_flags, unsigned int l2_len, iph->hdr_checksum = (cksum == 0xffff) ? cksum : ~cksum; } if (ol_flags & PKT_TX_L4_MASK) { - uint16_t l4_len; - uint32_t cksum; - uint16_t *l4_cksum; void *l4_hdr; l4_hdr = packet + l2_len + l3_len; if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM) - l4_cksum = &((struct udp_hdr *)l4_hdr)->dgram_cksum; + *l4_cksum = &((struct udp_hdr *)l4_hdr)->dgram_cksum; else if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_TCP_CKSUM) - l4_cksum = &((struct tcp_hdr *)l4_hdr)->cksum; + *l4_cksum = &((struct tcp_hdr *)l4_hdr)->cksum; else return; - *l4_cksum = 0; - if (ol_flags & PKT_TX_IPV4) { - struct ipv4_hdr *iph = l3_hdr; - - l4_len = rte_be_to_cpu_16(iph->total_length) - l3_len; - cksum = rte_ipv4_phdr_cksum(l3_hdr, 0); - } else { - struct ipv6_hdr *ip6h = l3_hdr; - - /* payload_len does not include ext headers */ - l4_len = rte_be_to_cpu_16(ip6h->payload_len) - - l3_len + sizeof(struct ipv6_hdr); - cksum = rte_ipv6_phdr_cksum(l3_hdr, 0); - } - cksum += rte_raw_cksum(l4_hdr, l4_len); - cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff); - cksum = (~cksum) & 0xffff; - if (cksum == 0) - cksum = 0xffff; - *l4_cksum = cksum; + **l4_cksum = 0; + if (ol_flags & PKT_TX_IPV4) + *l4_phdr_cksum = rte_ipv4_phdr_cksum(l3_hdr, 0); + else + *l4_phdr_cksum = rte_ipv6_phdr_cksum(l3_hdr, 0); + *l4_raw_cksum = __rte_raw_cksum(l4_hdr, l4_len, 0); } } @@ -482,17 +496,27 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) max_size = *txq->mtu + (ETHER_HDR_LEN + ETHER_CRC_LEN + 4); for (i = 0; i < nb_pkts; i++) { struct rte_mbuf *mbuf = bufs[num_tx]; - struct iovec iovecs[mbuf->nb_segs + 1]; + struct iovec iovecs[mbuf->nb_segs + 2]; struct tun_pi pi = { .flags = 0, .proto = 0x00 }; struct rte_mbuf *seg = mbuf; char m_copy[mbuf->data_len]; + int proto; int n; int j; + int k; /* first index in iovecs for copying segments */ + uint16_t l234_hlen; /* length of layers 2,3,4 headers */ + uint16_t seg_len; /* length of first segment */ + uint16_t nb_segs; + uint16_t *l4_cksum; /* l4 checksum (pseudo header + payload) */ + uint32_t l4_raw_cksum = 0; /* TCP/UDP payload raw checksum */ + uint16_t l4_phdr_cksum = 0; /* TCP/UDP pseudo header checksum */ + uint16_t is_cksum = 0; /* in case cksum should be offloaded */ /* stats.errs will be incremented */ if (rte_pktmbuf_pkt_len(mbuf) > max_size) break; + l4_cksum = NULL; if (txq->type == ETH_TUNTAP_TYPE_TUN) { /* * TUN and TAP are created with IFF_NO_PI disabled. @@ -505,35 +529,73 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) * is 4 or 6, then protocol field is updated. */ char *buff_data = rte_pktmbuf_mtod(seg, void *); - j = (*buff_data & 0xf0); - pi.proto = (j == 0x40) ? rte_cpu_to_be_16(ETHER_TYPE_IPv4) : - (j == 0x60) ? rte_cpu_to_be_16(ETHER_TYPE_IPv6) : 0x00; + proto = (*buff_data & 0xf0); + pi.proto = (proto == 0x40) ? + rte_cpu_to_be_16(ETHER_TYPE_IPv4) : + ((proto == 0x60) ? + rte_cpu_to_be_16(ETHER_TYPE_IPv6) : + 0x00); } - iovecs[0].iov_base = π - iovecs[0].iov_len = sizeof(pi); - for (j = 1; j <= mbuf->nb_segs; j++) { - iovecs[j].iov_len = rte_pktmbuf_data_len(seg); - iovecs[j].iov_base = - rte_pktmbuf_mtod(seg, void *); - seg = seg->next; - } + k = 0; + iovecs[k].iov_base = π + iovecs[k].iov_len = sizeof(pi); + k++; + nb_segs = mbuf->nb_segs; if (txq->csum && ((mbuf->ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_IPV4) || (mbuf->ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM || (mbuf->ol_flags & PKT_TX_L4_MASK) == PKT_TX_TCP_CKSUM))) { - /* Support only packets with all data in the same seg */ - if (mbuf->nb_segs > 1) + is_cksum = 1; + /* Support only packets with at least layer 4 + * header included in the first segment + */ + seg_len = rte_pktmbuf_data_len(mbuf); + l234_hlen = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len; + if (seg_len < l234_hlen) break; - /* To change checksums, work on a copy of data. */ + + /* To change checksums, work on a + * copy of l2, l3 l4 headers. + */ rte_memcpy(m_copy, rte_pktmbuf_mtod(mbuf, void *), - rte_pktmbuf_data_len(mbuf)); - tap_tx_offload(m_copy, mbuf->ol_flags, - mbuf->l2_len, mbuf->l3_len); - iovecs[1].iov_base = m_copy; + l234_hlen); + tap_tx_l3_cksum(m_copy, mbuf->ol_flags, + mbuf->l2_len, mbuf->l3_len, mbuf->l4_len, + &l4_cksum, &l4_phdr_cksum, + &l4_raw_cksum); + iovecs[k].iov_base = m_copy; + iovecs[k].iov_len = l234_hlen; + k++; + /* Update next iovecs[] beyond l2, l3, l4 headers */ + if (seg_len > l234_hlen) { + iovecs[k].iov_len = seg_len - l234_hlen; + iovecs[k].iov_base = + rte_pktmbuf_mtod(seg, char *) + + l234_hlen; + tap_tx_l4_add_rcksum(iovecs[k].iov_base, + iovecs[k].iov_len, l4_cksum, + &l4_raw_cksum); + k++; + nb_segs++; + } + seg = seg->next; } + for (j = k; j <= nb_segs; j++) { + iovecs[j].iov_len = rte_pktmbuf_data_len(seg); + iovecs[j].iov_base = rte_pktmbuf_mtod(seg, void *); + if (is_cksum) + tap_tx_l4_add_rcksum(iovecs[j].iov_base, + iovecs[j].iov_len, l4_cksum, + &l4_raw_cksum); + seg = seg->next; + } + + if (is_cksum) + tap_tx_l4_cksum(l4_cksum, l4_phdr_cksum, l4_raw_cksum); + /* copy the tx frame data */ - n = writev(txq->fd, iovecs, mbuf->nb_segs + 1); + n = writev(txq->fd, iovecs, j); if (n <= 0) break; -- 2.7.4