All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tomasz Kulasek <tomaszx.kulasek@intel.com>
To: dev@dpdk.org
Subject: [PATCH v4] examples/l3fwd: em path performance fix
Date: Fri, 18 Mar 2016 10:36:47 +0100	[thread overview]
Message-ID: <1458293807-2604-1-git-send-email-tomaszx.kulasek@intel.com> (raw)
In-Reply-To: <1457698245-6756-1-git-send-email-tomaszx.kulasek@intel.com>

It seems that for the most use cases, previous hash_multi_lookup provides
better performance, and more, sequential lookup can cause significant
performance drop.

This patch sets previously optional hash_multi_lookup method as default.
It also provides some minor optimizations such as queue drain only on used
tx ports.


This patch should be applied after Maciej Czekaj's patch "l3fwd: Fix
compilation with HASH_MULTI_LOOKUP"


v4 changes:
 - rebased to be applicable after patch "l3fwd: Fix compilation with
   HASH_MULTI_LOOKUP" of Maciej Czekaj

v3 changes:
 - "lpm: extend IPv4 next hop field" patch extends dst_port table from
   uint16_t to uint32_t omiting previously disabled l3fwd_em_hlm_sse.h,
   what causes incompatible pointer type error after turning on this header

v2 changes:
 - fixed copy-paste error causing that not all packets are classified right
   in hash_multi_lookup implementation when burst size is not divisible
   by 8

Fixes: 94c54b4158d5 ("examples/l3fwd: rework exact-match")

Reported-by: Qian Xu <qian.q.xu@intel.com>
Signed-off-by: Tomasz Kulasek <tomaszx.kulasek@intel.com>
---
 examples/l3fwd/l3fwd.h            |    8 ++++++++
 examples/l3fwd/l3fwd_em.c         |    6 +++---
 examples/l3fwd/l3fwd_em_hlm_sse.h |   28 ++++++++++------------------
 examples/l3fwd/l3fwd_em_sse.h     |    9 +++++++++
 examples/l3fwd/l3fwd_lpm.c        |    4 ++--
 examples/l3fwd/main.c             |    7 +++++++
 6 files changed, 39 insertions(+), 23 deletions(-)

diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
index 7dcc7e5..69dcc17 100644
--- a/examples/l3fwd/l3fwd.h
+++ b/examples/l3fwd/l3fwd.h
@@ -40,6 +40,12 @@
 
 #define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1
 
+#define __ARM_NEON 1
+
+#if !defined(NO_HASH_MULTI_LOOKUP) && defined(__ARM_NEON)
+#define NO_HASH_MULTI_LOOKUP 1
+#endif
+
 #define MAX_PKT_BURST     32
 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
 
@@ -86,6 +92,8 @@ struct lcore_rx_queue {
 struct lcore_conf {
 	uint16_t n_rx_queue;
 	struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
+	uint16_t n_tx_port;
+	uint16_t tx_port_id[RTE_MAX_ETHPORTS];
 	uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
 	struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
 	void *ipv4_lookup_struct;
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index 0adf8f4..5a2e7ff 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -320,7 +320,7 @@ em_get_ipv6_dst_port(void *ipv6_hdr,  uint8_t portid, void *lookup_struct)
  * buffer optimization i.e. ENABLE_MULTI_BUFFER_OPTIMIZE=1.
  */
 #if defined(__SSE4_1__)
-#ifndef HASH_MULTI_LOOKUP
+#if defined(NO_HASH_MULTI_LOOKUP)
 #include "l3fwd_em_sse.h"
 #else
 #include "l3fwd_em_hlm_sse.h"
@@ -568,8 +568,8 @@ em_main_loop(__attribute__((unused)) void *dummy)
 		diff_tsc = cur_tsc - prev_tsc;
 		if (unlikely(diff_tsc > drain_tsc)) {
 
-			for (i = 0; i < qconf->n_rx_queue; i++) {
-				portid = qconf->rx_queue_list[i].port_id;
+			for (i = 0; i < qconf->n_tx_port; ++i) {
+				portid = qconf->tx_port_id[i];
 				if (qconf->tx_mbufs[portid].len == 0)
 					continue;
 				send_burst(qconf,
diff --git a/examples/l3fwd/l3fwd_em_hlm_sse.h b/examples/l3fwd/l3fwd_em_hlm_sse.h
index 891ae2e..7faf04a 100644
--- a/examples/l3fwd/l3fwd_em_hlm_sse.h
+++ b/examples/l3fwd/l3fwd_em_hlm_sse.h
@@ -34,17 +34,9 @@
 #ifndef __L3FWD_EM_HLM_SSE_H__
 #define __L3FWD_EM_HLM_SSE_H__
 
-/**
- * @file
- * This is an optional implementation of packet classification in Exact-Match
- * path using rte_hash_lookup_multi method from previous implementation.
- * While sequential classification seems to be faster, it's disabled by default
- * and can be enabled with HASH_LOOKUP_MULTI global define in compilation time.
- */
-
 #include "l3fwd_sse.h"
 
-static inline void
+static inline __attribute__((always_inline)) void
 em_get_dst_port_ipv4x8(struct lcore_conf *qconf, struct rte_mbuf *m[8],
 		uint8_t portid, uint32_t dst_port[8])
 {
@@ -168,7 +160,7 @@ get_ipv6_5tuple(struct rte_mbuf *m0, __m128i mask0,
 	key->xmm[2] = _mm_and_si128(tmpdata2, mask1);
 }
 
-static inline void
+static inline __attribute__((always_inline)) void
 em_get_dst_port_ipv6x8(struct lcore_conf *qconf, struct rte_mbuf *m[8],
 		uint8_t portid, uint32_t dst_port[8])
 {
@@ -322,17 +314,17 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
 
 		} else {
 			dst_port[j]   = em_get_dst_port(qconf, pkts_burst[j], portid);
-			dst_port[j+1] = em_get_dst_port(qconf, pkts_burst[j], portid);
-			dst_port[j+2] = em_get_dst_port(qconf, pkts_burst[j], portid);
-			dst_port[j+3] = em_get_dst_port(qconf, pkts_burst[j], portid);
-			dst_port[j+4] = em_get_dst_port(qconf, pkts_burst[j], portid);
-			dst_port[j+5] = em_get_dst_port(qconf, pkts_burst[j], portid);
-			dst_port[j+6] = em_get_dst_port(qconf, pkts_burst[j], portid);
-			dst_port[j+7] = em_get_dst_port(qconf, pkts_burst[j], portid);
+			dst_port[j+1] = em_get_dst_port(qconf, pkts_burst[j+1], portid);
+			dst_port[j+2] = em_get_dst_port(qconf, pkts_burst[j+2], portid);
+			dst_port[j+3] = em_get_dst_port(qconf, pkts_burst[j+3], portid);
+			dst_port[j+4] = em_get_dst_port(qconf, pkts_burst[j+4], portid);
+			dst_port[j+5] = em_get_dst_port(qconf, pkts_burst[j+5], portid);
+			dst_port[j+6] = em_get_dst_port(qconf, pkts_burst[j+6], portid);
+			dst_port[j+7] = em_get_dst_port(qconf, pkts_burst[j+7], portid);
 		}
 	}
 
-	for (; j < n; j++)
+	for (; j < nb_rx; j++)
 		dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid);
 
 	send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
diff --git a/examples/l3fwd/l3fwd_em_sse.h b/examples/l3fwd/l3fwd_em_sse.h
index d4a2a2d..8bd150a 100644
--- a/examples/l3fwd/l3fwd_em_sse.h
+++ b/examples/l3fwd/l3fwd_em_sse.h
@@ -34,6 +34,15 @@
 #ifndef __L3FWD_EM_SSE_H__
 #define __L3FWD_EM_SSE_H__
 
+/**
+ * @file
+ * This is an optional implementation of packet classification in Exact-Match
+ * path using sequential packet classification method.
+ * While hash lookup multi seems to provide better performance, it's disabled
+ * by default and can be enabled with NO_HASH_LOOKUP_MULTI global define in
+ * compilation time.
+ */
+
 #include "l3fwd_sse.h"
 
 static inline __attribute__((always_inline)) uint16_t
diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
index a354797..990a7f1 100644
--- a/examples/l3fwd/l3fwd_lpm.c
+++ b/examples/l3fwd/l3fwd_lpm.c
@@ -159,8 +159,8 @@ lpm_main_loop(__attribute__((unused)) void *dummy)
 		diff_tsc = cur_tsc - prev_tsc;
 		if (unlikely(diff_tsc > drain_tsc)) {
 
-			for (i = 0; i < qconf->n_rx_queue; i++) {
-				portid = qconf->rx_queue_list[i].port_id;
+			for (i = 0; i < qconf->n_tx_port; ++i) {
+				portid = qconf->tx_port_id[i];
 				if (qconf->tx_mbufs[portid].len == 0)
 					continue;
 				send_burst(qconf,
diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c
index 8520f71..792894f 100644
--- a/examples/l3fwd/main.c
+++ b/examples/l3fwd/main.c
@@ -791,6 +791,7 @@ main(int argc, char **argv)
 	unsigned lcore_id;
 	uint32_t n_tx_queue, nb_lcores;
 	uint8_t portid, nb_rx_queue, queue, socketid;
+	uint8_t nb_tx_port;
 
 	/* init EAL */
 	ret = rte_eal_init(argc, argv);
@@ -830,6 +831,7 @@ main(int argc, char **argv)
 		rte_exit(EXIT_FAILURE, "check_port_config failed\n");
 
 	nb_lcores = rte_lcore_count();
+	nb_tx_port = 0;
 
 	/* Setup function pointers for lookup method. */
 	setup_l3fwd_lookup_tables();
@@ -906,8 +908,13 @@ main(int argc, char **argv)
 			qconf = &lcore_conf[lcore_id];
 			qconf->tx_queue_id[portid] = queueid;
 			queueid++;
+
+			qconf->n_tx_port = nb_tx_port;
+			qconf->tx_port_id[qconf->n_tx_port] = portid;
 		}
 		printf("\n");
+
+		nb_tx_port++;
 	}
 
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
-- 
1.7.9.5

  parent reply	other threads:[~2016-03-18  9:37 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-03 17:23 [PATCH] examples/l3fwd: em path performance fix Tomasz Kulasek
2016-03-07  6:19 ` Xu, Qian Q
2016-03-08 12:58 ` [PATCH v2] " Tomasz Kulasek
2016-03-11 11:14   ` Thomas Monjalon
2016-03-11 12:28     ` Kulasek, TomaszX
2016-03-11 12:10   ` [PATCH v3] " Tomasz Kulasek
2016-03-11 16:23     ` Thomas Monjalon
2016-03-11 17:48       ` Kulasek, TomaszX
2016-03-15 14:31         ` Kulasek, TomaszX
2016-03-15 14:49           ` Thomas Monjalon
2016-03-15 16:06             ` Kulasek, TomaszX
2016-03-15 19:42               ` Odp.: " Czekaj, Maciej
2016-03-18  9:36     ` Tomasz Kulasek [this message]
2016-03-18  9:43       ` [PATCH v4] " Kulasek, TomaszX
2016-03-18  9:52       ` [PATCH v5] " Tomasz Kulasek
2016-03-18 10:04         ` Thomas Monjalon
2016-03-18 10:52           ` Jerin Jacob
2016-03-18 11:00             ` Thomas Monjalon
2016-03-18 11:16               ` [PATCH] examples/l3fwd: prefer probed NEON flag to ARM gcc flag Thomas Monjalon
2016-03-18 11:20                 ` [PATCH v2] " Thomas Monjalon
2016-03-18 11:56               ` [PATCH v5] examples/l3fwd: em path performance fix Jan Viktorin
2016-03-18 12:45                 ` Kulasek, TomaszX
2016-03-18 12:50                   ` Jan Viktorin
2016-03-18 13:31         ` [PATCH v6] " Tomasz Kulasek
2016-03-21 11:57           ` Thomas Monjalon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1458293807-2604-1-git-send-email-tomaszx.kulasek@intel.com \
    --to=tomaszx.kulasek@intel.com \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.