From mboxrd@z Thu Jan 1 00:00:00 1970 From: Pablo de Lara Subject: [PATCH 2/3] hash: add vectorized comparison Date: Fri, 26 Aug 2016 22:34:46 +0100 Message-ID: <1472247287-167011-3-git-send-email-pablo.de.lara.guarch@intel.com> References: <1472247287-167011-1-git-send-email-pablo.de.lara.guarch@intel.com> Cc: bruce.richardson@intel.com, Byron Marohn , Saikrishna Edupuganti , Pablo de Lara To: dev@dpdk.org Return-path: Received: from mga14.intel.com (mga14.intel.com [192.55.52.115]) by dpdk.org (Postfix) with ESMTP id AF63558DD for ; Fri, 26 Aug 2016 23:33:59 +0200 (CEST) In-Reply-To: <1472247287-167011-1-git-send-email-pablo.de.lara.guarch@intel.com> List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: Byron Marohn In lookup bulk function, the signatures of all entries are compared against the signature of the key that is being looked up. Now that all the signatures are together, they can be compared with vector instructions (SSE, AVX2), achieving higher lookup performance. Also, entries per bucket are increased to 8 when using processors with AVX2, as 256 bits can be compared at once, which is the size of 8x32-bit signatures. Signed-off-by: Byron Marohn Signed-off-by: Saikrishna Edupuganti Signed-off-by: Pablo de Lara --- lib/librte_hash/rte_cuckoo_hash.c | 41 ++++++++++++++++++++++++++++++++++----- lib/librte_hash/rte_cuckoo_hash.h | 4 ++++ 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c index 9d507b6..98713d3 100644 --- a/lib/librte_hash/rte_cuckoo_hash.c +++ b/lib/librte_hash/rte_cuckoo_hash.c @@ -939,6 +939,38 @@ lookup_stage1(unsigned idx, hash_sig_t *prim_hash, hash_sig_t *sec_hash, rte_prefetch0(*secondary_bkt); } +static inline void +compare_signatures(unsigned *prim_hash_matches, unsigned *sec_hash_matches, + const struct rte_hash_bucket *prim_bkt, + const struct rte_hash_bucket *sec_bkt, + hash_sig_t prim_hash, hash_sig_t sec_hash) +{ +/* 8 entries per bucket */ +#if defined(__AVX2__) + *prim_hash_matches |= _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32( + _mm256_load_si256((__m256i const *)prim_bkt->sig_current), + _mm256_set1_epi32(prim_hash))); + *sec_hash_matches |= _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32( + _mm256_load_si256((__m256i const *)sec_bkt->sig_current), + _mm256_set1_epi32(sec_hash))); +/* 4 entries per bucket */ +#elif defined(__SSE2__) + *prim_hash_matches |= _mm_movemask_ps((__m128)_mm_cmpeq_epi16( + _mm_load_si128((__m128i const *)prim_bkt->sig_current), + _mm_set1_epi32(prim_hash))); + *sec_hash_matches |= _mm_movemask_ps((__m128)_mm_cmpeq_epi16( + _mm_load_si128((__m128i const *)sec_bkt->sig_current), + _mm_set1_epi32(sec_hash))); +#else + unsigned i; + + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + *prim_hash_matches |= ((prim_hash == prim_bkt->sig_current[i]) << i); + *sec_hash_matches |= ((sec_hash == sec_bkt->sig_current[i]) << i); + } +#endif +} + /* * Lookup bulk stage 2: Search for match hashes in primary/secondary locations * and prefetch first key slot @@ -951,15 +983,14 @@ lookup_stage2(unsigned idx, hash_sig_t prim_hash, hash_sig_t sec_hash, uint64_t *extra_hits_mask, const void *keys, const struct rte_hash *h) { - unsigned prim_hash_matches, sec_hash_matches, key_idx, i; + unsigned prim_hash_matches, sec_hash_matches, key_idx; unsigned total_hash_matches; prim_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES; sec_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES; - for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - prim_hash_matches |= ((prim_hash == prim_bkt->sig_current[i]) << i); - sec_hash_matches |= ((sec_hash == sec_bkt->sig_current[i]) << i); - } + + compare_signatures(&prim_hash_matches, &sec_hash_matches, prim_bkt, + sec_bkt, prim_hash, sec_hash); key_idx = prim_bkt->key_idx[__builtin_ctzl(prim_hash_matches)]; if (key_idx == 0) diff --git a/lib/librte_hash/rte_cuckoo_hash.h b/lib/librte_hash/rte_cuckoo_hash.h index fe0654f..eb57d7e 100644 --- a/lib/librte_hash/rte_cuckoo_hash.h +++ b/lib/librte_hash/rte_cuckoo_hash.h @@ -130,7 +130,11 @@ enum add_key_case { }; /** Number of items per bucket. */ +#if defined(__AVX2__) +#define RTE_HASH_BUCKET_ENTRIES 8 +#else #define RTE_HASH_BUCKET_ENTRIES 4 +#endif #define NULL_SIGNATURE 0 -- 2.7.4