From mboxrd@z Thu Jan 1 00:00:00 1970 From: Tomasz Kulasek Subject: [PATCH v12 6/6] testpmd: use Tx preparation in csum engine Date: Wed, 23 Nov 2016 18:36:25 +0100 Message-ID: <1479922585-8640-7-git-send-email-tomaszx.kulasek@intel.com> References: <1477486575-25148-1-git-send-email-tomaszx.kulasek@intel.com> <1479922585-8640-1-git-send-email-tomaszx.kulasek@intel.com> Cc: konstantin.ananyev@intel.com, olivier.matz@6wind.com To: dev@dpdk.org Return-path: Received: from mga03.intel.com (mga03.intel.com [134.134.136.65]) by dpdk.org (Postfix) with ESMTP id B083498 for ; Wed, 23 Nov 2016 18:41:50 +0100 (CET) In-Reply-To: <1479922585-8640-1-git-send-email-tomaszx.kulasek@intel.com> List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Added "csum txprep (on|off)" command which allows to switch to the tx path using Tx preparation API. By default unchanged implementation is used. Using Tx preparation path, pseudo header calculation for udp/tcp/tso packets from application, and used Tx preparation API for packet preparation and verification. Adding additional step to the csum engine costs about 3-4% of performance drop, on my setup with ixgbe driver. It's caused mostly by the need of reaccessing and modification of packet data. Signed-off-by: Tomasz Kulasek Acked-by: Konstantin Ananyev --- app/test-pmd/cmdline.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++ app/test-pmd/csumonly.c | 33 ++++++++++++++++++++++++------- app/test-pmd/testpmd.c | 5 +++++ app/test-pmd/testpmd.h | 2 ++ 4 files changed, 82 insertions(+), 7 deletions(-) diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index 63b55dc..373fc59 100644 --- a/app/test-pmd/cmdline.c +++ b/app/test-pmd/cmdline.c @@ -366,6 +366,10 @@ static void cmd_help_long_parsed(void *parsed_result, "csum show (port_id)\n" " Display tx checksum offload configuration\n\n" + "csum txprep (on|off)" + " Enable tx preparation path in csum forward engine" + "\n\n" + "tso set (segsize) (portid)\n" " Enable TCP Segmentation Offload in csum forward" " engine.\n" @@ -3523,6 +3527,50 @@ struct cmd_csum_tunnel_result { }, }; +/* Enable/disable tx preparation path */ +struct cmd_csum_txprep_result { + cmdline_fixed_string_t csum; + cmdline_fixed_string_t parse; + cmdline_fixed_string_t onoff; +}; + +static void +cmd_csum_txprep_parsed(void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_csum_txprep_result *res = parsed_result; + + if (!strcmp(res->onoff, "on")) + tx_prepare = 1; + else + tx_prepare = 0; + +} + +cmdline_parse_token_string_t cmd_csum_txprep_csum = + TOKEN_STRING_INITIALIZER(struct cmd_csum_txprep_result, + csum, "csum"); +cmdline_parse_token_string_t cmd_csum_txprep_parse = + TOKEN_STRING_INITIALIZER(struct cmd_csum_txprep_result, + parse, "txprep"); +cmdline_parse_token_string_t cmd_csum_txprep_onoff = + TOKEN_STRING_INITIALIZER(struct cmd_csum_txprep_result, + onoff, "on#off"); + +cmdline_parse_inst_t cmd_csum_txprep = { + .f = cmd_csum_txprep_parsed, + .data = NULL, + .help_str = "enable/disable tx preparation path for csum engine: " + "csum txprep on|off", + .tokens = { + (void *)&cmd_csum_txprep_csum, + (void *)&cmd_csum_txprep_parse, + (void *)&cmd_csum_txprep_onoff, + NULL, + }, +}; + /* *** ENABLE HARDWARE SEGMENTATION IN TX NON-TUNNELED PACKETS *** */ struct cmd_tso_set_result { cmdline_fixed_string_t tso; @@ -11470,6 +11518,7 @@ struct cmd_set_vf_mac_addr_result { (cmdline_parse_inst_t *)&cmd_csum_set, (cmdline_parse_inst_t *)&cmd_csum_show, (cmdline_parse_inst_t *)&cmd_csum_tunnel, + (cmdline_parse_inst_t *)&cmd_csum_txprep, (cmdline_parse_inst_t *)&cmd_tso_set, (cmdline_parse_inst_t *)&cmd_tso_show, (cmdline_parse_inst_t *)&cmd_tunnel_tso_set, diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c index 57e6ae2..3afa9ab 100644 --- a/app/test-pmd/csumonly.c +++ b/app/test-pmd/csumonly.c @@ -372,8 +372,10 @@ struct simple_gre_hdr { udp_hdr->dgram_cksum = 0; if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) { ol_flags |= PKT_TX_UDP_CKSUM; - udp_hdr->dgram_cksum = get_psd_sum(l3_hdr, - info->ethertype, ol_flags); + if (!tx_prepare) + udp_hdr->dgram_cksum = get_psd_sum( + l3_hdr, info->ethertype, + ol_flags); } else { udp_hdr->dgram_cksum = get_udptcp_checksum(l3_hdr, udp_hdr, @@ -385,12 +387,15 @@ struct simple_gre_hdr { tcp_hdr->cksum = 0; if (tso_segsz) { ol_flags |= PKT_TX_TCP_SEG; - tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype, - ol_flags); + if (!tx_prepare) + tcp_hdr->cksum = get_psd_sum(l3_hdr, + info->ethertype, ol_flags); + } else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) { ol_flags |= PKT_TX_TCP_CKSUM; - tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype, - ol_flags); + if (!tx_prepare) + tcp_hdr->cksum = get_psd_sum(l3_hdr, + info->ethertype, ol_flags); } else { tcp_hdr->cksum = get_udptcp_checksum(l3_hdr, tcp_hdr, @@ -648,6 +653,7 @@ struct simple_gre_hdr { void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */ uint16_t nb_rx; uint16_t nb_tx; + uint16_t nb_prep; uint16_t i; uint64_t rx_ol_flags, tx_ol_flags; uint16_t testpmd_ol_flags; @@ -857,7 +863,20 @@ struct simple_gre_hdr { printf("\n"); } } - nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx); + + if (tx_prepare) { + nb_prep = rte_eth_tx_prepare(fs->tx_port, fs->tx_queue, + pkts_burst, nb_rx); + if (nb_prep != nb_rx) + printf("Preparing packet burst to transmit failed: %s\n", + rte_strerror(rte_errno)); + + nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, + nb_prep); + } else + nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, + nb_rx); + /* * Retry if necessary */ diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index a0332c2..c18bc28 100644 --- a/app/test-pmd/testpmd.c +++ b/app/test-pmd/testpmd.c @@ -180,6 +180,11 @@ struct fwd_engine * fwd_engines[] = { enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF; /**< Split policy for packets to TX. */ +/* + * Enable Tx preparation path in the "csum" engine. + */ +uint8_t tx_prepare = 0; + uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */ uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */ diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index 9c1e703..488a6e1 100644 --- a/app/test-pmd/testpmd.h +++ b/app/test-pmd/testpmd.h @@ -383,6 +383,8 @@ enum tx_pkt_split { extern enum tx_pkt_split tx_pkt_split; +extern uint8_t tx_prepare; + extern uint16_t nb_pkt_per_burst; extern uint16_t mb_mempool_cache; extern int8_t rx_pthresh; -- 1.7.9.5