All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 1/3] eal: introduce integer divide through reciprocal
@ 2017-09-03 12:36 Pavan Nikhilesh
  2017-09-03 12:36 ` [PATCH v3 2/3] eal: add u64 bit variant for reciprocal Pavan Nikhilesh
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Pavan Nikhilesh @ 2017-09-03 12:36 UTC (permalink / raw)
  To: dev; +Cc: cristian.dumitrescu, stephen, Pavan Nikhilesh

In some use cases of integer division, denominator remains constant and
numerator varies. It is possible to optimize division for such specific
scenarios.

The librte_sched uses rte_reciprocal to optimize division so, moving it to
eal/common would allow other libraries and applications to use it.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com>
---

v3 changes:
 - fix x86_32 compilation issue
 - fix improper licence in test

v2 changes:
 - fix compilation issues with .map files
 - add test cases for correctness and performance
 - remove extra licence inclusion
 - fix coding style issues

 lib/librte_eal/bsdapp/eal/Makefile                               | 1 +
 lib/librte_eal/bsdapp/eal/rte_eal_version.map                    | 7 +++++++
 lib/librte_eal/common/Makefile                                   | 1 +
 lib/{librte_sched => librte_eal/common/include}/rte_reciprocal.h | 6 ++++--
 lib/{librte_sched => librte_eal/common}/rte_reciprocal.c         | 6 ++++--
 lib/librte_eal/linuxapp/eal/Makefile                             | 1 +
 lib/librte_eal/linuxapp/eal/rte_eal_version.map                  | 7 +++++++
 lib/librte_sched/Makefile                                        | 2 --
 lib/librte_sched/rte_sched.c                                     | 2 +-
 9 files changed, 26 insertions(+), 7 deletions(-)
 rename lib/{librte_sched => librte_eal/common/include}/rte_reciprocal.h (87%)
 rename lib/{librte_sched => librte_eal/common}/rte_reciprocal.c (96%)

diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile
index 005019e..56f9804 100644
--- a/lib/librte_eal/bsdapp/eal/Makefile
+++ b/lib/librte_eal/bsdapp/eal/Makefile
@@ -88,6 +88,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_elem.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_heap.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_keepalive.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_service.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_reciprocal.c

 # from arch dir
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_cpuflags.c
diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
index 79e7d31..d0bda66 100644
--- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
@@ -238,3 +238,10 @@ EXPERIMENTAL {
 	rte_service_unregister;

 } DPDK_17.08;
+
+DPDK_17.11 {
+	global:
+
+	rte_reciprocal_value;
+
+} DPDK_17.08;
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index e8fd67a..a680b2d 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -42,6 +42,7 @@ INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h rte_vdev.h
 INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
 INC += rte_malloc.h rte_keepalive.h rte_time.h
 INC += rte_service.h rte_service_component.h
+INC += rte_reciprocal.h

 GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
 GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h
diff --git a/lib/librte_sched/rte_reciprocal.h b/lib/librte_eal/common/include/rte_reciprocal.h
similarity index 87%
rename from lib/librte_sched/rte_reciprocal.h
rename to lib/librte_eal/common/include/rte_reciprocal.h
index 5e21f09..b6d752f 100644
--- a/lib/librte_sched/rte_reciprocal.h
+++ b/lib/librte_eal/common/include/rte_reciprocal.h
@@ -29,13 +29,15 @@ struct rte_reciprocal {
 	uint8_t sh1, sh2;
 };

-static inline uint32_t rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R)
+static inline uint32_t
+rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R)
 {
 	uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32);

 	return (t + ((a - t) >> R.sh1)) >> R.sh2;
 }

-struct rte_reciprocal rte_reciprocal_value(uint32_t d);
+struct rte_reciprocal
+rte_reciprocal_value(uint32_t d);

 #endif /* _RTE_RECIPROCAL_H_ */
diff --git a/lib/librte_sched/rte_reciprocal.c b/lib/librte_eal/common/rte_reciprocal.c
similarity index 96%
rename from lib/librte_sched/rte_reciprocal.c
rename to lib/librte_eal/common/rte_reciprocal.c
index 652f023..7ab99b4 100644
--- a/lib/librte_sched/rte_reciprocal.c
+++ b/lib/librte_eal/common/rte_reciprocal.c
@@ -41,7 +41,8 @@
 /* find largest set bit.
  * portable and slow but does not matter for this usage.
  */
-static inline int fls(uint32_t x)
+static inline int
+fls(uint32_t x)
 {
 	int b;

@@ -53,7 +54,8 @@ static inline int fls(uint32_t x)
 	return 0;
 }

-struct rte_reciprocal rte_reciprocal_value(uint32_t d)
+struct rte_reciprocal
+rte_reciprocal_value(uint32_t d)
 {
 	struct rte_reciprocal R;
 	uint64_t m;
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 90bca4d..98f3b8e 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -100,6 +100,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_elem.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_heap.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_keepalive.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_service.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_reciprocal.c

 # from arch dir
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_cpuflags.c
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index 468c706..65117cb 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -243,3 +243,10 @@ EXPERIMENTAL {
 	rte_service_unregister;

 } DPDK_17.08;
+
+DPDK_17.11 {
+	global:
+
+	rte_reciprocal_value;
+
+} DPDK_17.08;
diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile
index 18274e7..569656b 100644
--- a/lib/librte_sched/Makefile
+++ b/lib/librte_sched/Makefile
@@ -52,10 +52,8 @@ LIBABIVER := 1
 # all source are stored in SRCS-y
 #
 SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c
-SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_reciprocal.c

 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h
-SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_reciprocal.h

 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
index b7cba11..3b8ccaa 100644
--- a/lib/librte_sched/rte_sched.c
+++ b/lib/librte_sched/rte_sched.c
@@ -42,12 +42,12 @@
 #include <rte_prefetch.h>
 #include <rte_branch_prediction.h>
 #include <rte_mbuf.h>
+#include <rte_reciprocal.h>

 #include "rte_sched.h"
 #include "rte_bitmap.h"
 #include "rte_sched_common.h"
 #include "rte_approx.h"
-#include "rte_reciprocal.h"

 #ifdef __INTEL_COMPILER
 #pragma warning(disable:2259) /* conversion may lose significant bits */
--
2.7.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v3 2/3] eal: add u64 bit variant for reciprocal
  2017-09-03 12:36 [PATCH v3 1/3] eal: introduce integer divide through reciprocal Pavan Nikhilesh
@ 2017-09-03 12:36 ` Pavan Nikhilesh
  2017-09-04 14:34   ` Burakov, Anatoly
  2017-09-03 12:36 ` [PATCH v3 3/3] test: add tests for reciprocal based division Pavan Nikhilesh
  2017-09-04 14:01 ` [PATCH v3 1/3] eal: introduce integer divide through reciprocal Burakov, Anatoly
  2 siblings, 1 reply; 8+ messages in thread
From: Pavan Nikhilesh @ 2017-09-03 12:36 UTC (permalink / raw)
  To: dev; +Cc: cristian.dumitrescu, stephen, Pavan Nikhilesh

Currently, rte_reciprocal only supports unsigned 32bit divisors. This
commit adds support for unsigned 64bit divisors.

Rename unsigned 32bit specific functions appropriately and update
librte_sched accordingly.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com>
---
 lib/librte_eal/bsdapp/eal/rte_eal_version.map   |   3 +-
 lib/librte_eal/common/include/rte_reciprocal.h  | 111 ++++++++++++++++++++--
 lib/librte_eal/common/rte_reciprocal.c          | 120 +++++++++++++++++++++---
 lib/librte_eal/linuxapp/eal/rte_eal_version.map |   3 +-
 lib/librte_sched/Makefile                       |   4 +-
 lib/librte_sched/rte_sched.c                    |   9 +-
 6 files changed, 222 insertions(+), 28 deletions(-)

diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
index d0bda66..5fd6101 100644
--- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
@@ -242,6 +242,7 @@ EXPERIMENTAL {
 DPDK_17.11 {
 	global:
 
-	rte_reciprocal_value;
+	rte_reciprocal_value_u32;
+	rte_reciprocal_value_u64;
 
 } DPDK_17.08;
diff --git a/lib/librte_eal/common/include/rte_reciprocal.h b/lib/librte_eal/common/include/rte_reciprocal.h
index b6d752f..801d1c8 100644
--- a/lib/librte_eal/common/include/rte_reciprocal.h
+++ b/lib/librte_eal/common/include/rte_reciprocal.h
@@ -22,22 +22,117 @@
 #ifndef _RTE_RECIPROCAL_H_
 #define _RTE_RECIPROCAL_H_
 
-#include <stdint.h>
+#include <rte_memory.h>
 
-struct rte_reciprocal {
+/**
+ * Unsigned 32-bit divisor structure.
+ */
+struct rte_reciprocal_u32 {
 	uint32_t m;
 	uint8_t sh1, sh2;
-};
+} __rte_cache_aligned;
+
+/**
+ * Unsigned 64-bit divisor structure.
+ */
+struct rte_reciprocal_u64 {
+	uint64_t m;
+	uint8_t sh1;
+} __rte_cache_aligned;
 
+/**
+ * Divide given unsigned 32-bit integer with pre calculated divisor.
+ *
+ * @param a
+ *   The 32-bit dividend.
+ * @param R
+ *   The pointer to pre calculated divisor reciprocal structure.
+ *
+ * @return
+ *   The result of the division
+ */
 static inline uint32_t
-rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R)
+rte_reciprocal_divide_u32(uint32_t a, struct rte_reciprocal_u32 *R)
+{
+	uint32_t t = (((uint64_t)a * R->m) >> 32);
+
+	return (t + ((a - t) >> R->sh1)) >> R->sh2;
+}
+
+static inline uint64_t
+mullhi_u64(uint64_t x, uint64_t y)
+{
+#ifdef __SIZEOF_INT128__
+	__uint128_t xl = x;
+	__uint128_t rl = xl * y;
+
+	return (rl >> 64);
+#else
+	uint64_t u0, u1, v0, v1, k, t;
+	uint64_t w1, w2;
+	uint64_t whi;
+
+	u1 = x >> 32; u0 = x & 0xFFFFFFFF;
+	v1 = y >> 32; v0 = y & 0xFFFFFFFF;
+
+	t = u0*v0;
+	k = t >> 32;
+
+	t = u1*v0 + k;
+	w1 = t & 0xFFFFFFFF;
+	w2 = t >> 32;
+
+	t = u0*v1 + w1;
+	k = t >> 32;
+
+	whi = u1*v1 + w2 + k;
+
+	return whi;
+#endif
+}
+
+/**
+ * Divide given unsigned 64-bit integer with pre calculated divisor.
+ *
+ * @param a
+ *   The 64-bit dividend.
+ * @param R
+ *   The pointer to pre calculated divisor reciprocal structure.
+ *
+ * @return
+ *   The result of the division
+ */
+static inline uint64_t
+rte_reciprocal_divide_u64(uint64_t a, struct rte_reciprocal_u64 *R)
 {
-	uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32);
+	uint64_t q = mullhi_u64(R->m, a);
+	uint64_t t = ((a - q) >> 1) + q;
 
-	return (t + ((a - t) >> R.sh1)) >> R.sh2;
+	return t >> R->sh1;
 }
 
-struct rte_reciprocal
-rte_reciprocal_value(uint32_t d);
+/**
+ * Generate pre calculated divisor structure.
+ *
+ * @param d
+ *   The unsigned 32-bit divisor.
+ *
+ * @return
+ *   Divisor structure.
+ */
+struct rte_reciprocal_u32
+rte_reciprocal_value_u32(uint32_t d);
+
+/**
+ * Generate pre calculated divisor structure.
+ *
+ * @param d
+ *   The unsigned 64-bit divisor.
+ *
+ * @return
+ *   Divisor structure.
+ */
+struct rte_reciprocal_u64
+rte_reciprocal_value_u64(uint64_t d);
 
 #endif /* _RTE_RECIPROCAL_H_ */
diff --git a/lib/librte_eal/common/rte_reciprocal.c b/lib/librte_eal/common/rte_reciprocal.c
index 7ab99b4..5d7e367 100644
--- a/lib/librte_eal/common/rte_reciprocal.c
+++ b/lib/librte_eal/common/rte_reciprocal.c
@@ -31,18 +31,13 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <stdio.h>
-#include <stdint.h>
-
-#include <rte_common.h>
-
-#include "rte_reciprocal.h"
+#include <rte_reciprocal.h>
 
 /* find largest set bit.
  * portable and slow but does not matter for this usage.
  */
 static inline int
-fls(uint32_t x)
+fls_u32(uint32_t x)
 {
 	int b;
 
@@ -54,21 +49,120 @@ fls(uint32_t x)
 	return 0;
 }
 
-struct rte_reciprocal
-rte_reciprocal_value(uint32_t d)
+struct rte_reciprocal_u32
+rte_reciprocal_value_u32(uint32_t d)
 {
-	struct rte_reciprocal R;
+	struct rte_reciprocal_u32 R;
 	uint64_t m;
 	int l;
 
-	l = fls(d - 1);
+	l = fls_u32(d - 1);
 	m = ((1ULL << 32) * ((1ULL << l) - d));
 	m /= d;
 
 	++m;
 	R.m = m;
-	R.sh1 = RTE_MIN(l, 1);
-	R.sh2 = RTE_MAX(l - 1, 0);
+	R.sh1 = l > 1 ? 1 : l;
+	R.sh2 = (l - 1 > 0) ? l - 1 : 0;
+
+	return R;
+}
+
+/* Code taken from Hacker's Delight:
+ * http://www.hackersdelight.org/HDcode/divlu.c.
+ * License permits inclusion here per:
+ * http://www.hackersdelight.org/permissions.htm
+ */
+static inline uint64_t
+divide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r)
+{
+	const uint64_t b = (1ULL << 32); /* Number base (16 bits). */
+	uint64_t un1, un0,           /* Norm. dividend LSD's. */
+			 vn1, vn0,           /* Norm. divisor digits. */
+			 q1, q0,             /* Quotient digits. */
+			 un64, un21, un10,   /* Dividend digit pairs. */
+			 rhat;               /* A remainder. */
+	int s;                       /* Shift amount for norm. */
+
+    /* If overflow, set rem. to an impossible value. */
+	if (u1 >= v) {
+		if (r != NULL)
+			*r = (uint64_t) -1;
+		return (uint64_t) -1;
+	}
+
+	/* Count leading zeros. */
+	s = __builtin_clzll(v);
+	if (s > 0) {
+		v = v << s;
+		un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31));
+		un10 = u0 << s;
+	} else {
+
+		un64 = u1 | u0;
+		un10 = u0;
+	}
+
+	vn1 = v >> 32;
+	vn0 = v & 0xFFFFFFFF;
+
+	un1 = un10 >> 32;
+	un0 = un10 & 0xFFFFFFFF;
+
+	q1 = un64/vn1;
+	rhat = un64 - q1*vn1;
+again1:
+	if (q1 >= b || q1*vn0 > b*rhat + un1) {
+		q1 = q1 - 1;
+		rhat = rhat + vn1;
+		if (rhat < b)
+			goto again1;
+	}
+
+	un21 = un64*b + un1 - q1*v;
+
+	q0 = un21/vn1;
+	rhat = un21 - q0*vn1;
+again2:
+	if (q0 >= b || q0*vn0 > b*rhat + un0) {
+		q0 = q0 - 1;
+		rhat = rhat + vn1;
+		if (rhat < b)
+			goto again2;
+	}
+
+	if (r != NULL)
+		*r = (un21*b + un0 - q0*v) >> s;
+	return q1*b + q0;
+}
+
+struct rte_reciprocal_u64
+rte_reciprocal_value_u64(uint64_t d)
+{
+	struct rte_reciprocal_u64 R;
+
+	const uint32_t fld = 63 - __builtin_clzll(d);
+
+	if ((d & (d - 1)) == 0) {
+		R.m = 0;
+		R.sh1 = (fld - 1) | 0x40;
+	} else {
+		uint64_t rem;
+		uint64_t multiplier;
+		uint8_t more;
+
+		multiplier = divide_128_div_64_to_64(1ULL << fld, 0, d, &rem);
+		multiplier += multiplier;
+
+		const uint64_t twice_rem = rem + rem;
+		if (twice_rem >= d || twice_rem < rem)
+			multiplier += 1;
+		more = fld;
+		R.m = 1 + multiplier;
+		R.sh1 = more | 0x40;
+	}
+
+	R.sh1 &= 0x3F;
 
 	return R;
 }
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index 65117cb..63ff2b8 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -247,6 +247,7 @@ EXPERIMENTAL {
 DPDK_17.11 {
 	global:
 
-	rte_reciprocal_value;
+	rte_reciprocal_value_u32;
+	rte_reciprocal_value_u64;
 
 } DPDK_17.08;
diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile
index 569656b..a2fd6f3 100644
--- a/lib/librte_sched/Makefile
+++ b/lib/librte_sched/Makefile
@@ -54,6 +54,8 @@ LIBABIVER := 1
 SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c
 
 # install includes
-SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_sched_common.h rte_red.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_approx.h
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
index 3b8ccaa..7bb6d51 100644
--- a/lib/librte_sched/rte_sched.c
+++ b/lib/librte_sched/rte_sched.c
@@ -228,7 +228,7 @@ struct rte_sched_port {
 	uint64_t time_cpu_cycles;     /* Current CPU time measured in CPU cyles */
 	uint64_t time_cpu_bytes;      /* Current CPU time measured in bytes */
 	uint64_t time;                /* Current NIC TX time measured in bytes */
-	struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */
+	struct rte_reciprocal_u32 inv_cycles_per_byte; /* CPU cycles per byte */
 
 	/* Scheduling loop detection */
 	uint32_t pipe_loop;
@@ -677,7 +677,7 @@ rte_sched_port_config(struct rte_sched_port_params *params)
 
 	cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT)
 		/ params->rate;
-	port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte);
+	port->inv_cycles_per_byte = rte_reciprocal_value_u32(cycles_per_byte);
 
 	/* Scheduling loop detection */
 	port->pipe_loop = RTE_SCHED_PIPE_INVALID;
@@ -2147,8 +2147,9 @@ rte_sched_port_time_resync(struct rte_sched_port *port)
 	uint64_t bytes_diff;
 
 	/* Compute elapsed time in bytes */
-	bytes_diff = rte_reciprocal_divide(cycles_diff << RTE_SCHED_TIME_SHIFT,
-					   port->inv_cycles_per_byte);
+	bytes_diff = rte_reciprocal_divide_u32(
+			cycles_diff << RTE_SCHED_TIME_SHIFT,
+			&port->inv_cycles_per_byte);
 
 	/* Advance port time */
 	port->time_cpu_cycles = cycles;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v3 3/3] test: add tests for reciprocal based division
  2017-09-03 12:36 [PATCH v3 1/3] eal: introduce integer divide through reciprocal Pavan Nikhilesh
  2017-09-03 12:36 ` [PATCH v3 2/3] eal: add u64 bit variant for reciprocal Pavan Nikhilesh
@ 2017-09-03 12:36 ` Pavan Nikhilesh
  2017-09-04  9:18   ` Burakov, Anatoly
  2017-09-04 14:01 ` [PATCH v3 1/3] eal: introduce integer divide through reciprocal Burakov, Anatoly
  2 siblings, 1 reply; 8+ messages in thread
From: Pavan Nikhilesh @ 2017-09-03 12:36 UTC (permalink / raw)
  To: dev; +Cc: cristian.dumitrescu, stephen, Pavan Nikhilesh

This commit provides a set of tests for verifying the correctness and
performance of both unsigned 32 and 64bit reciprocal based division.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com>
---
 test/test/Makefile                        |   2 +
 test/test/test_reciprocal_division.c      | 109 ++++++++++++++++++
 test/test/test_reciprocal_division_perf.c | 181 ++++++++++++++++++++++++++++++
 3 files changed, 292 insertions(+)
 create mode 100644 test/test/test_reciprocal_division.c
 create mode 100644 test/test/test_reciprocal_division_perf.c

diff --git a/test/test/Makefile b/test/test/Makefile
index 42d9a49..6017862 100644
--- a/test/test/Makefile
+++ b/test/test/Makefile
@@ -94,6 +94,8 @@ SRCS-y += test_cycles.c
 SRCS-y += test_spinlock.c
 SRCS-y += test_memory.c
 SRCS-y += test_memzone.c
+SRCS-y += test_reciprocal_division.c
+SRCS-y += test_reciprocal_division_perf.c
 
 SRCS-y += test_ring.c
 SRCS-y += test_ring_perf.c
diff --git a/test/test/test_reciprocal_division.c b/test/test/test_reciprocal_division.c
new file mode 100644
index 0000000..771ea64
--- /dev/null
+++ b/test/test/test_reciprocal_division.c
@@ -0,0 +1,109 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium, Inc. 2017.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium, Inc nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "test.h"
+
+#include <stdio.h>
+#include <unistd.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_random.h>
+#include <rte_reciprocal.h>
+
+#define MAX_ITERATIONS	1000000
+#define DIVIDE_ITER		100
+
+static int
+test_reciprocal_division(void)
+{
+	int i;
+	int result = 0;
+	uint32_t divisor_u32 = 0;
+	uint32_t dividend_u32;
+	uint32_t nresult_u32;
+	uint32_t rresult_u32;
+	uint64_t divisor_u64 = 0;
+	uint64_t dividend_u64;
+	uint64_t nresult_u64;
+	uint64_t rresult_u64;
+	struct rte_reciprocal_u32 reci_u32;
+	struct rte_reciprocal_u64 reci_u64;
+
+	rte_srand(rte_rdtsc());
+	printf("Validating unsigned 32bit division.\n");
+	for (i = 0; i < MAX_ITERATIONS; i++) {
+		/* Change divisor every DIVIDE_ITER iterations. */
+		if (i % DIVIDE_ITER == 0) {
+			divisor_u32 = rte_rand();
+			reci_u32 = rte_reciprocal_value_u32(divisor_u32);
+		}
+
+		dividend_u32 = rte_rand();
+		nresult_u32 = dividend_u32 / divisor_u32;
+		rresult_u32 = rte_reciprocal_divide_u32(dividend_u32,
+				&reci_u32);
+		if (nresult_u32 != rresult_u32) {
+			printf("Division failed, expected %"PRIu32" "
+				   "result %"PRIu32"",
+					nresult_u32, rresult_u32);
+			result = 1;
+			break;
+		}
+	}
+
+	printf("Validating unsigned 64bit division.\n");
+	for (i = 0; i < MAX_ITERATIONS; i++) {
+		/* Change divisor every DIVIDE_ITER iterations. */
+		if (i % DIVIDE_ITER == 0) {
+			divisor_u64 = rte_rand();
+			reci_u64 = rte_reciprocal_value_u64(divisor_u64);
+		}
+
+		dividend_u64 = rte_rand();
+		nresult_u64 = dividend_u64 / divisor_u64;
+		rresult_u64 = rte_reciprocal_divide_u64(dividend_u64,
+				&reci_u64);
+		if (nresult_u64 != rresult_u64) {
+			printf("Division failed, expected %"PRIu64" "
+				   "result %"PRIu64"",
+					nresult_u64, rresult_u64);
+			result = 1;
+			break;
+		}
+	}
+
+	return result;
+}
+
+REGISTER_TEST_COMMAND(reciprocal_division, test_reciprocal_division);
diff --git a/test/test/test_reciprocal_division_perf.c b/test/test/test_reciprocal_division_perf.c
new file mode 100644
index 0000000..31f069d
--- /dev/null
+++ b/test/test/test_reciprocal_division_perf.c
@@ -0,0 +1,181 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium, Inc. 2017.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium, Inc nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "test.h"
+
+#include <stdio.h>
+#include <unistd.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_random.h>
+#include <rte_reciprocal.h>
+
+#define MAX_ITERATIONS	1000000
+#define DIVIDE_ITER		100
+
+static int
+test_reciprocal_division_perf(void)
+{
+	int result = 0;
+	uint32_t divisor_u32 = 0;
+	uint32_t dividend_u32;
+	uint32_t nresult_u32;
+	uint32_t rresult_u32;
+	uint64_t divisor_u64 = 0;
+	uint64_t dividend_u64;
+	uint64_t nresult_u64;
+	uint64_t rresult_u64;
+	uint64_t cur_cyc;
+	uint64_t tot_cyc_n = 0;
+	uint64_t tot_cyc_r = 0;
+	uint64_t i;
+	struct rte_reciprocal_u32 reci_u32 = {0};
+	struct rte_reciprocal_u64 reci_u64 = {0};
+
+	rte_srand(rte_rdtsc());
+	printf("Validating unsigned 32bit division.\n");
+	for (i = 0; i < MAX_ITERATIONS; i++) {
+		/* Change divisor every DIVIDE_ITER iterations. */
+		if (i % DIVIDE_ITER == 0) {
+			divisor_u32 = rte_rand();
+			reci_u32 = rte_reciprocal_value_u32(divisor_u32);
+		}
+
+		dividend_u32 = rte_rand();
+		cur_cyc = rte_rdtsc();
+		nresult_u32 = dividend_u32 / divisor_u32;
+		tot_cyc_n += rte_rdtsc() - cur_cyc;
+
+		cur_cyc = rte_rdtsc();
+		rresult_u32 = rte_reciprocal_divide_u32(dividend_u32,
+				&reci_u32);
+		tot_cyc_r += rte_rdtsc() - cur_cyc;
+		if (nresult_u32 != rresult_u32) {
+			printf("Division failed, expected %"PRIu32" "
+					"result %"PRIu32"",
+					nresult_u32, rresult_u32);
+			result = 1;
+			break;
+		}
+	}
+	printf("32bit Division results:\n");
+	printf("Total number of cycles normal division     : %"PRIu64"\n",
+			tot_cyc_n);
+	printf("Total number of cycles reciprocal division : %"PRIu64"\n",
+			tot_cyc_r);
+	printf("Cycles per division(normal) : %3.2f\n",
+			((double)tot_cyc_n)/i);
+	printf("Cycles per division(normal) : %3.2f\n\n",
+			((double)tot_cyc_r)/i);
+
+	tot_cyc_n = 0;
+	tot_cyc_r = 0;
+	printf("Validating unsigned 64bit division.\n");
+
+	for (i = 0; i < MAX_ITERATIONS; i++) {
+		/* Change divisor every DIVIDE_ITER iterations. */
+		if (i % DIVIDE_ITER == 0) {
+			divisor_u64 = rte_rand();
+			reci_u64 = rte_reciprocal_value_u64(divisor_u64);
+		}
+
+		dividend_u64 = rte_rand();
+		cur_cyc = rte_rdtsc();
+		nresult_u64 = dividend_u64 / divisor_u64;
+		tot_cyc_n += rte_rdtsc() - cur_cyc;
+
+		cur_cyc = rte_rdtsc();
+		rresult_u64 = rte_reciprocal_divide_u64(dividend_u64,
+				&reci_u64);
+		tot_cyc_r += rte_rdtsc() - cur_cyc;
+		if (nresult_u64 != rresult_u64) {
+			printf("Division failed, expected %"PRIu64" "
+					"result %"PRIu64"",
+					nresult_u64, rresult_u64);
+			result = 1;
+			break;
+		}
+	}
+	printf("64bit Division results:\n");
+	printf("Total number of cycles normal division     : %"PRIu64"\n",
+			tot_cyc_n);
+	printf("Total number of cycles reciprocal division : %"PRIu64"\n",
+			tot_cyc_r);
+	printf("Cycles per division(normal) : %3.2f\n",
+			((double)tot_cyc_n)/i);
+	printf("Cycles per division(normal) : %3.2f\n\n",
+			((double)tot_cyc_r)/i);
+
+	printf("Validating unsigned 64bit division with 32bit divisor.\n");
+	for (i = 0; i < MAX_ITERATIONS; i++) {
+		/* Change divisor every DIVIDE_ITER iterations. */
+		if (i % DIVIDE_ITER == 0) {
+			divisor_u64 = rte_rand() >> 32;
+			reci_u64 = rte_reciprocal_value_u64(divisor_u64);
+		}
+
+		dividend_u64 = rte_rand();
+		cur_cyc = rte_rdtsc();
+		nresult_u64 = dividend_u64 / divisor_u64;
+		tot_cyc_n += rte_rdtsc() - cur_cyc;
+
+		cur_cyc = rte_rdtsc();
+		rresult_u64 = rte_reciprocal_divide_u64(dividend_u64,
+				&reci_u64);
+		tot_cyc_r += rte_rdtsc() - cur_cyc;
+		if (nresult_u64 != rresult_u64) {
+			printf("Division failed, expected %"PRIu64" "
+					"result %"PRIu64"",
+					nresult_u64, rresult_u64);
+			result = 1;
+			break;
+		}
+	}
+	printf("64bit Division results:\n");
+	printf("Total number of cycles normal division     : %"PRIu64"\n",
+			tot_cyc_n);
+	printf("Total number of cycles reciprocal division : %"PRIu64"\n",
+			tot_cyc_r);
+	printf("Cycles per division(normal) : %3.2f\n",
+			((double)tot_cyc_n)/i);
+	printf("Cycles per division(normal) : %3.2f\n",
+			((double)tot_cyc_r)/i);
+
+	tot_cyc_n = 0;
+	tot_cyc_r = 0;
+
+	return result;
+}
+
+REGISTER_TEST_COMMAND(reciprocal_division_perf, test_reciprocal_division_perf);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 3/3] test: add tests for reciprocal based division
  2017-09-03 12:36 ` [PATCH v3 3/3] test: add tests for reciprocal based division Pavan Nikhilesh
@ 2017-09-04  9:18   ` Burakov, Anatoly
  2017-09-04  9:49     ` Pavan Nikhilesh Bhagavatula
  0 siblings, 1 reply; 8+ messages in thread
From: Burakov, Anatoly @ 2017-09-04  9:18 UTC (permalink / raw)
  To: Pavan Nikhilesh, dev; +Cc: Dumitrescu, Cristian, stephen

Hi Pavan,

> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pavan Nikhilesh
> Sent: Sunday, September 3, 2017 1:36 PM
> To: dev@dpdk.org
> Cc: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>;
> stephen@networkplumber.org; Pavan Nikhilesh
> <pbhagavatula@caviumnetworks.com>
> Subject: [dpdk-dev] [PATCH v3 3/3] test: add tests for reciprocal based
> division
> 
> This commit provides a set of tests for verifying the correctness and
> performance of both unsigned 32 and 64bit reciprocal based division.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com>
> ---
>  test/test/Makefile                        |   2 +
>  test/test/test_reciprocal_division.c      | 109 ++++++++++++++++++
>  test/test/test_reciprocal_division_perf.c | 181
> ++++++++++++++++++++++++++++++
>  3 files changed, 292 insertions(+)
>  create mode 100644 test/test/test_reciprocal_division.c
>  create mode 100644 test/test/test_reciprocal_division_perf.c
> 
> diff --git a/test/test/Makefile b/test/test/Makefile index 42d9a49..6017862
> 100644
> --- a/test/test/Makefile
> +++ b/test/test/Makefile
> @@ -94,6 +94,8 @@ SRCS-y += test_cycles.c  SRCS-y += test_spinlock.c
> SRCS-y += test_memory.c  SRCS-y += test_memzone.c
> +SRCS-y += test_reciprocal_division.c
> +SRCS-y += test_reciprocal_division_perf.c
> 
>  SRCS-y += test_ring.c
>  SRCS-y += test_ring_perf.c
> diff --git a/test/test/test_reciprocal_division.c
> b/test/test/test_reciprocal_division.c
> new file mode 100644
> index 0000000..771ea64
> --- /dev/null
> +++ b/test/test/test_reciprocal_division.c
> @@ -0,0 +1,109 @@
> +/*
> + *   BSD LICENSE
> + *
> + *   Copyright (C) Cavium, Inc. 2017.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Cavium, Inc nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
> OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> + */
> +
> +#include "test.h"
> +
> +#include <stdio.h>
> +#include <unistd.h>
> +#include <inttypes.h>
> +
> +#include <rte_common.h>
> +#include <rte_cycles.h>
> +#include <rte_random.h>
> +#include <rte_reciprocal.h>
> +
> +#define MAX_ITERATIONS	1000000
> +#define DIVIDE_ITER		100
> +
> +static int
> +test_reciprocal_division(void)
> +{
> +	int i;
> +	int result = 0;
> +	uint32_t divisor_u32 = 0;
> +	uint32_t dividend_u32;
> +	uint32_t nresult_u32;
> +	uint32_t rresult_u32;
> +	uint64_t divisor_u64 = 0;
> +	uint64_t dividend_u64;
> +	uint64_t nresult_u64;
> +	uint64_t rresult_u64;
> +	struct rte_reciprocal_u32 reci_u32;
> +	struct rte_reciprocal_u64 reci_u64;
> +
> +	rte_srand(rte_rdtsc());
> +	printf("Validating unsigned 32bit division.\n");
> +	for (i = 0; i < MAX_ITERATIONS; i++) {
> +		/* Change divisor every DIVIDE_ITER iterations. */
> +		if (i % DIVIDE_ITER == 0) {
> +			divisor_u32 = rte_rand();
> +			reci_u32 = rte_reciprocal_value_u32(divisor_u32);
> +		}
> +
> +		dividend_u32 = rte_rand();
> +		nresult_u32 = dividend_u32 / divisor_u32;
> +		rresult_u32 = rte_reciprocal_divide_u32(dividend_u32,
> +				&reci_u32);
> +		if (nresult_u32 != rresult_u32) {
> +			printf("Division failed, expected %"PRIu32" "
> +				   "result %"PRIu32"",
> +					nresult_u32, rresult_u32);
> +			result = 1;
> +			break;
> +		}
> +	}
> +
> +	printf("Validating unsigned 64bit division.\n");
> +	for (i = 0; i < MAX_ITERATIONS; i++) {
> +		/* Change divisor every DIVIDE_ITER iterations. */
> +		if (i % DIVIDE_ITER == 0) {
> +			divisor_u64 = rte_rand();
> +			reci_u64 = rte_reciprocal_value_u64(divisor_u64);
> +		}
> +
> +		dividend_u64 = rte_rand();
> +		nresult_u64 = dividend_u64 / divisor_u64;
> +		rresult_u64 = rte_reciprocal_divide_u64(dividend_u64,
> +				&reci_u64);
> +		if (nresult_u64 != rresult_u64) {
> +			printf("Division failed, expected %"PRIu64" "
> +				   "result %"PRIu64"",
> +					nresult_u64, rresult_u64);
> +			result = 1;
> +			break;
> +		}
> +	}
> +
> +	return result;
> +}
> +
> +REGISTER_TEST_COMMAND(reciprocal_division, test_reciprocal_division);
> diff --git a/test/test/test_reciprocal_division_perf.c
> b/test/test/test_reciprocal_division_perf.c
> new file mode 100644
> index 0000000..31f069d
> --- /dev/null
> +++ b/test/test/test_reciprocal_division_perf.c
> @@ -0,0 +1,181 @@
> +/*
> + *   BSD LICENSE
> + *
> + *   Copyright (C) Cavium, Inc. 2017.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Cavium, Inc nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
> OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> + */
> +
> +#include "test.h"
> +
> +#include <stdio.h>
> +#include <unistd.h>
> +#include <inttypes.h>
> +
> +#include <rte_common.h>
> +#include <rte_cycles.h>
> +#include <rte_random.h>
> +#include <rte_reciprocal.h>
> +
> +#define MAX_ITERATIONS	1000000
> +#define DIVIDE_ITER		100
> +
> +static int
> +test_reciprocal_division_perf(void)
> +{
> +	int result = 0;
> +	uint32_t divisor_u32 = 0;
> +	uint32_t dividend_u32;
> +	uint32_t nresult_u32;
> +	uint32_t rresult_u32;
> +	uint64_t divisor_u64 = 0;
> +	uint64_t dividend_u64;
> +	uint64_t nresult_u64;
> +	uint64_t rresult_u64;
> +	uint64_t cur_cyc;
> +	uint64_t tot_cyc_n = 0;
> +	uint64_t tot_cyc_r = 0;
> +	uint64_t i;
> +	struct rte_reciprocal_u32 reci_u32 = {0};
> +	struct rte_reciprocal_u64 reci_u64 = {0};
> +
> +	rte_srand(rte_rdtsc());
> +	printf("Validating unsigned 32bit division.\n");
> +	for (i = 0; i < MAX_ITERATIONS; i++) {
> +		/* Change divisor every DIVIDE_ITER iterations. */
> +		if (i % DIVIDE_ITER == 0) {
> +			divisor_u32 = rte_rand();
> +			reci_u32 = rte_reciprocal_value_u32(divisor_u32);
> +		}
> +
> +		dividend_u32 = rte_rand();
> +		cur_cyc = rte_rdtsc();
> +		nresult_u32 = dividend_u32 / divisor_u32;
> +		tot_cyc_n += rte_rdtsc() - cur_cyc;
> +
> +		cur_cyc = rte_rdtsc();

Is there any particular reason for calling rte_rdtsc() twice in a row? Instead of, say, doing something like this:

start = rdtsc();
// do something
split = rdtsc();
// do something else
end = rdtsc();
// figure out which cycle value goes where

> +		rresult_u32 = rte_reciprocal_divide_u32(dividend_u32,
> +				&reci_u32);
> +		tot_cyc_r += rte_rdtsc() - cur_cyc;
> +		if (nresult_u32 != rresult_u32) {
> +			printf("Division failed, expected %"PRIu32" "
> +					"result %"PRIu32"",
> +					nresult_u32, rresult_u32);
> +			result = 1;
> +			break;
> +		}
> +	}
> +	printf("32bit Division results:\n");
> +	printf("Total number of cycles normal division     : %"PRIu64"\n",
> +			tot_cyc_n);
> +	printf("Total number of cycles reciprocal division : %"PRIu64"\n",
> +			tot_cyc_r);
> +	printf("Cycles per division(normal) : %3.2f\n",
> +			((double)tot_cyc_n)/i);
> +	printf("Cycles per division(normal) : %3.2f\n\n",
> +			((double)tot_cyc_r)/i);
> +
> +	tot_cyc_n = 0;
> +	tot_cyc_r = 0;
> +	printf("Validating unsigned 64bit division.\n");
> +
> +	for (i = 0; i < MAX_ITERATIONS; i++) {
> +		/* Change divisor every DIVIDE_ITER iterations. */
> +		if (i % DIVIDE_ITER == 0) {
> +			divisor_u64 = rte_rand();
> +			reci_u64 = rte_reciprocal_value_u64(divisor_u64);
> +		}
> +
> +		dividend_u64 = rte_rand();
> +		cur_cyc = rte_rdtsc();
> +		nresult_u64 = dividend_u64 / divisor_u64;
> +		tot_cyc_n += rte_rdtsc() - cur_cyc;
> +
> +		cur_cyc = rte_rdtsc();

Same as above

> +		rresult_u64 = rte_reciprocal_divide_u64(dividend_u64,
> +				&reci_u64);
> +		tot_cyc_r += rte_rdtsc() - cur_cyc;
> +		if (nresult_u64 != rresult_u64) {
> +			printf("Division failed, expected %"PRIu64" "
> +					"result %"PRIu64"",
> +					nresult_u64, rresult_u64);
> +			result = 1;
> +			break;
> +		}
> +	}
> +	printf("64bit Division results:\n");
> +	printf("Total number of cycles normal division     : %"PRIu64"\n",
> +			tot_cyc_n);
> +	printf("Total number of cycles reciprocal division : %"PRIu64"\n",
> +			tot_cyc_r);
> +	printf("Cycles per division(normal) : %3.2f\n",
> +			((double)tot_cyc_n)/i);
> +	printf("Cycles per division(normal) : %3.2f\n\n",
> +			((double)tot_cyc_r)/i);
> +
> +	printf("Validating unsigned 64bit division with 32bit divisor.\n");
> +	for (i = 0; i < MAX_ITERATIONS; i++) {
> +		/* Change divisor every DIVIDE_ITER iterations. */
> +		if (i % DIVIDE_ITER == 0) {
> +			divisor_u64 = rte_rand() >> 32;
> +			reci_u64 = rte_reciprocal_value_u64(divisor_u64);
> +		}
> +
> +		dividend_u64 = rte_rand();
> +		cur_cyc = rte_rdtsc();
> +		nresult_u64 = dividend_u64 / divisor_u64;
> +		tot_cyc_n += rte_rdtsc() - cur_cyc;
> +
> +		cur_cyc = rte_rdtsc();
> +		rresult_u64 = rte_reciprocal_divide_u64(dividend_u64,
> +				&reci_u64);
> +		tot_cyc_r += rte_rdtsc() - cur_cyc;
> +		if (nresult_u64 != rresult_u64) {
> +			printf("Division failed, expected %"PRIu64" "
> +					"result %"PRIu64"",
> +					nresult_u64, rresult_u64);
> +			result = 1;
> +			break;
> +		}
> +	}
> +	printf("64bit Division results:\n");
> +	printf("Total number of cycles normal division     : %"PRIu64"\n",
> +			tot_cyc_n);
> +	printf("Total number of cycles reciprocal division : %"PRIu64"\n",
> +			tot_cyc_r);
> +	printf("Cycles per division(normal) : %3.2f\n",
> +			((double)tot_cyc_n)/i);
> +	printf("Cycles per division(normal) : %3.2f\n",
> +			((double)tot_cyc_r)/i);
> +
> +	tot_cyc_n = 0;
> +	tot_cyc_r = 0;
> +
> +	return result;
> +}
> +
> +REGISTER_TEST_COMMAND(reciprocal_division_perf,
> +test_reciprocal_division_perf);
> --
> 2.7.4

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 3/3] test: add tests for reciprocal based division
  2017-09-04  9:18   ` Burakov, Anatoly
@ 2017-09-04  9:49     ` Pavan Nikhilesh Bhagavatula
  0 siblings, 0 replies; 8+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2017-09-04  9:49 UTC (permalink / raw)
  To: Burakov, Anatoly; +Cc: dev

On Mon, Sep 04, 2017 at 09:18:37AM +0000, Burakov, Anatoly wrote:

Hi Burakov,

> Hi Pavan,
>
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pavan Nikhilesh
> > Sent: Sunday, September 3, 2017 1:36 PM
> > To: dev@dpdk.org
> > Cc: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>;
> > stephen@networkplumber.org; Pavan Nikhilesh
> > <pbhagavatula@caviumnetworks.com>
> > Subject: [dpdk-dev] [PATCH v3 3/3] test: add tests for reciprocal based
> > division
> >
> > This commit provides a set of tests for verifying the correctness and
> > performance of both unsigned 32 and 64bit reciprocal based division.
> >
> > Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com>
> > ---
> >  test/test/Makefile                        |   2 +
> >  test/test/test_reciprocal_division.c      | 109 ++++++++++++++++++
> >  test/test/test_reciprocal_division_perf.c | 181
> > ++++++++++++++++++++++++++++++
> >  3 files changed, 292 insertions(+)
> >  create mode 100644 test/test/test_reciprocal_division.c
> >  create mode 100644 test/test/test_reciprocal_division_perf.c
> >
> > diff --git a/test/test/Makefile b/test/test/Makefile index 42d9a49..6017862
> > 100644
> > --- a/test/test/Makefile
> > +++ b/test/test/Makefile
> > @@ -94,6 +94,8 @@ SRCS-y += test_cycles.c  SRCS-y += test_spinlock.c
> > SRCS-y += test_memory.c  SRCS-y += test_memzone.c
> > +SRCS-y += test_reciprocal_division.c
> > +SRCS-y += test_reciprocal_division_perf.c
> >
> >  SRCS-y += test_ring.c
> >  SRCS-y += test_ring_perf.c
> > diff --git a/test/test/test_reciprocal_division.c
> > b/test/test/test_reciprocal_division.c
> > new file mode 100644
> > index 0000000..771ea64
> > --- /dev/null
> > +++ b/test/test/test_reciprocal_division.c
> > @@ -0,0 +1,109 @@
> > +/*
> > + *   BSD LICENSE
> > + *
> > + *   Copyright (C) Cavium, Inc. 2017.
> > + *
> > + *   Redistribution and use in source and binary forms, with or without
> > + *   modification, are permitted provided that the following conditions
> > + *   are met:
> > + *
> > + *     * Redistributions of source code must retain the above copyright
> > + *       notice, this list of conditions and the following disclaimer.
> > + *     * Redistributions in binary form must reproduce the above copyright
> > + *       notice, this list of conditions and the following disclaimer in
> > + *       the documentation and/or other materials provided with the
> > + *       distribution.
> > + *     * Neither the name of Cavium, Inc nor the names of its
> > + *       contributors may be used to endorse or promote products derived
> > + *       from this software without specific prior written permission.
> > + *
> > + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> > CONTRIBUTORS
> > + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> > NOT
> > + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> > FITNESS FOR
> > + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> > COPYRIGHT
> > + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> > INCIDENTAL,
> > + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> > NOT
> > + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
> > OF USE,
> > + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> > AND ON ANY
> > + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> > TORT
> > + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> > THE USE
> > + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> > DAMAGE.
> > + */
> > +
> > +#include "test.h"
> > +
> > +#include <stdio.h>
> > +#include <unistd.h>
> > +#include <inttypes.h>
> > +
> > +#include <rte_common.h>
> > +#include <rte_cycles.h>
> > +#include <rte_random.h>
> > +#include <rte_reciprocal.h>
> > +
> > +#define MAX_ITERATIONS	1000000
> > +#define DIVIDE_ITER		100
> > +
> > +static int
> > +test_reciprocal_division(void)
> > +{
> > +	int i;
> > +	int result = 0;
> > +	uint32_t divisor_u32 = 0;
> > +	uint32_t dividend_u32;
> > +	uint32_t nresult_u32;
> > +	uint32_t rresult_u32;
> > +	uint64_t divisor_u64 = 0;
> > +	uint64_t dividend_u64;
> > +	uint64_t nresult_u64;
> > +	uint64_t rresult_u64;
> > +	struct rte_reciprocal_u32 reci_u32;
> > +	struct rte_reciprocal_u64 reci_u64;
> > +
> > +	rte_srand(rte_rdtsc());
> > +	printf("Validating unsigned 32bit division.\n");
> > +	for (i = 0; i < MAX_ITERATIONS; i++) {
> > +		/* Change divisor every DIVIDE_ITER iterations. */
> > +		if (i % DIVIDE_ITER == 0) {
> > +			divisor_u32 = rte_rand();
> > +			reci_u32 = rte_reciprocal_value_u32(divisor_u32);
> > +		}
> > +
> > +		dividend_u32 = rte_rand();
> > +		nresult_u32 = dividend_u32 / divisor_u32;
> > +		rresult_u32 = rte_reciprocal_divide_u32(dividend_u32,
> > +				&reci_u32);
> > +		if (nresult_u32 != rresult_u32) {
> > +			printf("Division failed, expected %"PRIu32" "
> > +				   "result %"PRIu32"",
> > +					nresult_u32, rresult_u32);
> > +			result = 1;
> > +			break;
> > +		}
> > +	}
> > +
> > +	printf("Validating unsigned 64bit division.\n");
> > +	for (i = 0; i < MAX_ITERATIONS; i++) {
> > +		/* Change divisor every DIVIDE_ITER iterations. */
> > +		if (i % DIVIDE_ITER == 0) {
> > +			divisor_u64 = rte_rand();
> > +			reci_u64 = rte_reciprocal_value_u64(divisor_u64);
> > +		}
> > +
> > +		dividend_u64 = rte_rand();
> > +		nresult_u64 = dividend_u64 / divisor_u64;
> > +		rresult_u64 = rte_reciprocal_divide_u64(dividend_u64,
> > +				&reci_u64);
> > +		if (nresult_u64 != rresult_u64) {
> > +			printf("Division failed, expected %"PRIu64" "
> > +				   "result %"PRIu64"",
> > +					nresult_u64, rresult_u64);
> > +			result = 1;
> > +			break;
> > +		}
> > +	}
> > +
> > +	return result;
> > +}
> > +
> > +REGISTER_TEST_COMMAND(reciprocal_division, test_reciprocal_division);
> > diff --git a/test/test/test_reciprocal_division_perf.c
> > b/test/test/test_reciprocal_division_perf.c
> > new file mode 100644
> > index 0000000..31f069d
> > --- /dev/null
> > +++ b/test/test/test_reciprocal_division_perf.c
> > @@ -0,0 +1,181 @@
> > +/*
> > + *   BSD LICENSE
> > + *
> > + *   Copyright (C) Cavium, Inc. 2017.
> > + *
> > + *   Redistribution and use in source and binary forms, with or without
> > + *   modification, are permitted provided that the following conditions
> > + *   are met:
> > + *
> > + *     * Redistributions of source code must retain the above copyright
> > + *       notice, this list of conditions and the following disclaimer.
> > + *     * Redistributions in binary form must reproduce the above copyright
> > + *       notice, this list of conditions and the following disclaimer in
> > + *       the documentation and/or other materials provided with the
> > + *       distribution.
> > + *     * Neither the name of Cavium, Inc nor the names of its
> > + *       contributors may be used to endorse or promote products derived
> > + *       from this software without specific prior written permission.
> > + *
> > + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> > CONTRIBUTORS
> > + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> > NOT
> > + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> > FITNESS FOR
> > + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> > COPYRIGHT
> > + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> > INCIDENTAL,
> > + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> > NOT
> > + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
> > OF USE,
> > + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> > AND ON ANY
> > + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> > TORT
> > + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> > THE USE
> > + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> > DAMAGE.
> > + */
> > +
> > +#include "test.h"
> > +
> > +#include <stdio.h>
> > +#include <unistd.h>
> > +#include <inttypes.h>
> > +
> > +#include <rte_common.h>
> > +#include <rte_cycles.h>
> > +#include <rte_random.h>
> > +#include <rte_reciprocal.h>
> > +
> > +#define MAX_ITERATIONS	1000000
> > +#define DIVIDE_ITER		100
> > +
> > +static int
> > +test_reciprocal_division_perf(void)
> > +{
> > +	int result = 0;
> > +	uint32_t divisor_u32 = 0;
> > +	uint32_t dividend_u32;
> > +	uint32_t nresult_u32;
> > +	uint32_t rresult_u32;
> > +	uint64_t divisor_u64 = 0;
> > +	uint64_t dividend_u64;
> > +	uint64_t nresult_u64;
> > +	uint64_t rresult_u64;
> > +	uint64_t cur_cyc;
> > +	uint64_t tot_cyc_n = 0;
> > +	uint64_t tot_cyc_r = 0;
> > +	uint64_t i;
> > +	struct rte_reciprocal_u32 reci_u32 = {0};
> > +	struct rte_reciprocal_u64 reci_u64 = {0};
> > +
> > +	rte_srand(rte_rdtsc());
> > +	printf("Validating unsigned 32bit division.\n");
> > +	for (i = 0; i < MAX_ITERATIONS; i++) {
> > +		/* Change divisor every DIVIDE_ITER iterations. */
> > +		if (i % DIVIDE_ITER == 0) {
> > +			divisor_u32 = rte_rand();
> > +			reci_u32 = rte_reciprocal_value_u32(divisor_u32);
> > +		}
> > +
> > +		dividend_u32 = rte_rand();
> > +		cur_cyc = rte_rdtsc();
> > +		nresult_u32 = dividend_u32 / divisor_u32;
> > +		tot_cyc_n += rte_rdtsc() - cur_cyc;
> > +
> > +		cur_cyc = rte_rdtsc();
>
> Is there any particular reason for calling rte_rdtsc() twice in a row? Instead of, say, doing something like this:
>
> start = rdtsc();
> // do something
> split = rdtsc();
> // do something else
> end = rdtsc();
> // figure out which cycle value goes where
>

No particular reason for doing so. I will do the changes in v4.

tot_cyc_n += split - start;
tot_cyc_r += end - split;

Thanks for the inputs.
-Pavan.

> > +		rresult_u32 = rte_reciprocal_divide_u32(dividend_u32,
> > +				&reci_u32);
> > +		tot_cyc_r += rte_rdtsc() - cur_cyc;
> > +		if (nresult_u32 != rresult_u32) {
> > +			printf("Division failed, expected %"PRIu32" "
> > +					"result %"PRIu32"",
> > +					nresult_u32, rresult_u32);
> > +			result = 1;
> > +			break;
> > +		}
> > +	}
> > +	printf("32bit Division results:\n");
> > +	printf("Total number of cycles normal division     : %"PRIu64"\n",
> > +			tot_cyc_n);
> > +	printf("Total number of cycles reciprocal division : %"PRIu64"\n",
> > +			tot_cyc_r);
> > +	printf("Cycles per division(normal) : %3.2f\n",
> > +			((double)tot_cyc_n)/i);
> > +	printf("Cycles per division(normal) : %3.2f\n\n",
> > +			((double)tot_cyc_r)/i);
> > +
> > +	tot_cyc_n = 0;
> > +	tot_cyc_r = 0;
> > +	printf("Validating unsigned 64bit division.\n");
> > +
> > +	for (i = 0; i < MAX_ITERATIONS; i++) {
> > +		/* Change divisor every DIVIDE_ITER iterations. */
> > +		if (i % DIVIDE_ITER == 0) {
> > +			divisor_u64 = rte_rand();
> > +			reci_u64 = rte_reciprocal_value_u64(divisor_u64);
> > +		}
> > +
> > +		dividend_u64 = rte_rand();
> > +		cur_cyc = rte_rdtsc();
> > +		nresult_u64 = dividend_u64 / divisor_u64;
> > +		tot_cyc_n += rte_rdtsc() - cur_cyc;
> > +
> > +		cur_cyc = rte_rdtsc();
>
> Same as above
>
> > +		rresult_u64 = rte_reciprocal_divide_u64(dividend_u64,
> > +				&reci_u64);
> > +		tot_cyc_r += rte_rdtsc() - cur_cyc;
> > +		if (nresult_u64 != rresult_u64) {
> > +			printf("Division failed, expected %"PRIu64" "
> > +					"result %"PRIu64"",
> > +					nresult_u64, rresult_u64);
> > +			result = 1;
> > +			break;
> > +		}
> > +	}
> > +	printf("64bit Division results:\n");
> > +	printf("Total number of cycles normal division     : %"PRIu64"\n",
> > +			tot_cyc_n);
> > +	printf("Total number of cycles reciprocal division : %"PRIu64"\n",
> > +			tot_cyc_r);
> > +	printf("Cycles per division(normal) : %3.2f\n",
> > +			((double)tot_cyc_n)/i);
> > +	printf("Cycles per division(normal) : %3.2f\n\n",
> > +			((double)tot_cyc_r)/i);
> > +
> > +	printf("Validating unsigned 64bit division with 32bit divisor.\n");
> > +	for (i = 0; i < MAX_ITERATIONS; i++) {
> > +		/* Change divisor every DIVIDE_ITER iterations. */
> > +		if (i % DIVIDE_ITER == 0) {
> > +			divisor_u64 = rte_rand() >> 32;
> > +			reci_u64 = rte_reciprocal_value_u64(divisor_u64);
> > +		}
> > +
> > +		dividend_u64 = rte_rand();
> > +		cur_cyc = rte_rdtsc();
> > +		nresult_u64 = dividend_u64 / divisor_u64;
> > +		tot_cyc_n += rte_rdtsc() - cur_cyc;
> > +
> > +		cur_cyc = rte_rdtsc();
> > +		rresult_u64 = rte_reciprocal_divide_u64(dividend_u64,
> > +				&reci_u64);
> > +		tot_cyc_r += rte_rdtsc() - cur_cyc;
> > +		if (nresult_u64 != rresult_u64) {
> > +			printf("Division failed, expected %"PRIu64" "
> > +					"result %"PRIu64"",
> > +					nresult_u64, rresult_u64);
> > +			result = 1;
> > +			break;
> > +		}
> > +	}
> > +	printf("64bit Division results:\n");
> > +	printf("Total number of cycles normal division     : %"PRIu64"\n",
> > +			tot_cyc_n);
> > +	printf("Total number of cycles reciprocal division : %"PRIu64"\n",
> > +			tot_cyc_r);
> > +	printf("Cycles per division(normal) : %3.2f\n",
> > +			((double)tot_cyc_n)/i);
> > +	printf("Cycles per division(normal) : %3.2f\n",
> > +			((double)tot_cyc_r)/i);
> > +
> > +	tot_cyc_n = 0;
> > +	tot_cyc_r = 0;
> > +
> > +	return result;
> > +}
> > +
> > +REGISTER_TEST_COMMAND(reciprocal_division_perf,
> > +test_reciprocal_division_perf);
> > --
> > 2.7.4
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 1/3] eal: introduce integer divide through reciprocal
  2017-09-03 12:36 [PATCH v3 1/3] eal: introduce integer divide through reciprocal Pavan Nikhilesh
  2017-09-03 12:36 ` [PATCH v3 2/3] eal: add u64 bit variant for reciprocal Pavan Nikhilesh
  2017-09-03 12:36 ` [PATCH v3 3/3] test: add tests for reciprocal based division Pavan Nikhilesh
@ 2017-09-04 14:01 ` Burakov, Anatoly
  2 siblings, 0 replies; 8+ messages in thread
From: Burakov, Anatoly @ 2017-09-04 14:01 UTC (permalink / raw)
  To: Pavan Nikhilesh, dev; +Cc: Dumitrescu, Cristian, stephen

> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pavan Nikhilesh
> Sent: Sunday, September 3, 2017 1:36 PM
> To: dev@dpdk.org
> Cc: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>;
> stephen@networkplumber.org; Pavan Nikhilesh
> <pbhagavatula@caviumnetworks.com>
> Subject: [dpdk-dev] [PATCH v3 1/3] eal: introduce integer divide through
> reciprocal
> 
> In some use cases of integer division, denominator remains constant and
> numerator varies. It is possible to optimize division for such specific
> scenarios.
> 
> The librte_sched uses rte_reciprocal to optimize division so, moving it to
> eal/common would allow other libraries and applications to use it.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com>
> ---

Reviewed-by: Anatoly Burakov <anatoly.burakov@intel.com>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 2/3] eal: add u64 bit variant for reciprocal
  2017-09-03 12:36 ` [PATCH v3 2/3] eal: add u64 bit variant for reciprocal Pavan Nikhilesh
@ 2017-09-04 14:34   ` Burakov, Anatoly
  2017-09-04 14:55     ` Pavan Nikhilesh Bhagavatula
  0 siblings, 1 reply; 8+ messages in thread
From: Burakov, Anatoly @ 2017-09-04 14:34 UTC (permalink / raw)
  To: Pavan Nikhilesh, dev; +Cc: Dumitrescu, Cristian, stephen

> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pavan Nikhilesh
> Sent: Sunday, September 3, 2017 1:36 PM
> To: dev@dpdk.org
> Cc: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>;
> stephen@networkplumber.org; Pavan Nikhilesh
> <pbhagavatula@caviumnetworks.com>
> Subject: [dpdk-dev] [PATCH v3 2/3] eal: add u64 bit variant for reciprocal
> 
> Currently, rte_reciprocal only supports unsigned 32bit divisors. This commit
> adds support for unsigned 64bit divisors.
> 
> Rename unsigned 32bit specific functions appropriately and update
> librte_sched accordingly.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com>
> ---
>  lib/librte_eal/bsdapp/eal/rte_eal_version.map   |   3 +-
>  lib/librte_eal/common/include/rte_reciprocal.h  | 111
> ++++++++++++++++++++--
>  lib/librte_eal/common/rte_reciprocal.c          | 120
> +++++++++++++++++++++---
>  lib/librte_eal/linuxapp/eal/rte_eal_version.map |   3 +-
>  lib/librte_sched/Makefile                       |   4 +-
>  lib/librte_sched/rte_sched.c                    |   9 +-
>  6 files changed, 222 insertions(+), 28 deletions(-)
> 
> diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> index d0bda66..5fd6101 100644
> --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> @@ -242,6 +242,7 @@ EXPERIMENTAL {
>  DPDK_17.11 {
>  	global:
> 
> -	rte_reciprocal_value;
> +	rte_reciprocal_value_u32;
> +	rte_reciprocal_value_u64;
> 
>  } DPDK_17.08;
> diff --git a/lib/librte_eal/common/include/rte_reciprocal.h
> b/lib/librte_eal/common/include/rte_reciprocal.h
> index b6d752f..801d1c8 100644
> --- a/lib/librte_eal/common/include/rte_reciprocal.h
> +++ b/lib/librte_eal/common/include/rte_reciprocal.h
> @@ -22,22 +22,117 @@
>  #ifndef _RTE_RECIPROCAL_H_
>  #define _RTE_RECIPROCAL_H_
> 
> -#include <stdint.h>
> +#include <rte_memory.h>
> 
> -struct rte_reciprocal {
> +/**
> + * Unsigned 32-bit divisor structure.
> + */
> +struct rte_reciprocal_u32 {
>  	uint32_t m;
>  	uint8_t sh1, sh2;
> -};
> +} __rte_cache_aligned;
> +
> +/**
> + * Unsigned 64-bit divisor structure.
> + */
> +struct rte_reciprocal_u64 {
> +	uint64_t m;
> +	uint8_t sh1;
> +} __rte_cache_aligned;
> 
> +/**
> + * Divide given unsigned 32-bit integer with pre calculated divisor.
> + *
> + * @param a
> + *   The 32-bit dividend.
> + * @param R
> + *   The pointer to pre calculated divisor reciprocal structure.
> + *
> + * @return
> + *   The result of the division
> + */
>  static inline uint32_t
> -rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R)
> +rte_reciprocal_divide_u32(uint32_t a, struct rte_reciprocal_u32 *R) {
> +	uint32_t t = (((uint64_t)a * R->m) >> 32);
> +
> +	return (t + ((a - t) >> R->sh1)) >> R->sh2; }
> +
> +static inline uint64_t
> +mullhi_u64(uint64_t x, uint64_t y)
> +{
> +#ifdef __SIZEOF_INT128__
> +	__uint128_t xl = x;
> +	__uint128_t rl = xl * y;
> +
> +	return (rl >> 64);
> +#else
> +	uint64_t u0, u1, v0, v1, k, t;
> +	uint64_t w1, w2;
> +	uint64_t whi;
> +
> +	u1 = x >> 32; u0 = x & 0xFFFFFFFF;
> +	v1 = y >> 32; v0 = y & 0xFFFFFFFF;
> +
> +	t = u0*v0;
> +	k = t >> 32;
> +
> +	t = u1*v0 + k;
> +	w1 = t & 0xFFFFFFFF;
> +	w2 = t >> 32;
> +
> +	t = u0*v1 + w1;
> +	k = t >> 32;
> +
> +	whi = u1*v1 + w2 + k;
> +
> +	return whi;
> +#endif
> +}
> +
> +/**
> + * Divide given unsigned 64-bit integer with pre calculated divisor.
> + *
> + * @param a
> + *   The 64-bit dividend.
> + * @param R
> + *   The pointer to pre calculated divisor reciprocal structure.
> + *
> + * @return
> + *   The result of the division
> + */
> +static inline uint64_t
> +rte_reciprocal_divide_u64(uint64_t a, struct rte_reciprocal_u64 *R)
>  {
> -	uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32);
> +	uint64_t q = mullhi_u64(R->m, a);
> +	uint64_t t = ((a - q) >> 1) + q;
> 
> -	return (t + ((a - t) >> R.sh1)) >> R.sh2;
> +	return t >> R->sh1;
>  }
> 
> -struct rte_reciprocal
> -rte_reciprocal_value(uint32_t d);
> +/**
> + * Generate pre calculated divisor structure.
> + *
> + * @param d
> + *   The unsigned 32-bit divisor.
> + *
> + * @return
> + *   Divisor structure.
> + */
> +struct rte_reciprocal_u32
> +rte_reciprocal_value_u32(uint32_t d);
> +
> +/**
> + * Generate pre calculated divisor structure.
> + *
> + * @param d
> + *   The unsigned 64-bit divisor.
> + *
> + * @return
> + *   Divisor structure.
> + */
> +struct rte_reciprocal_u64
> +rte_reciprocal_value_u64(uint64_t d);
> 
>  #endif /* _RTE_RECIPROCAL_H_ */
> diff --git a/lib/librte_eal/common/rte_reciprocal.c
> b/lib/librte_eal/common/rte_reciprocal.c
> index 7ab99b4..5d7e367 100644
> --- a/lib/librte_eal/common/rte_reciprocal.c
> +++ b/lib/librte_eal/common/rte_reciprocal.c
> @@ -31,18 +31,13 @@
>   *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
>   */
> 
> -#include <stdio.h>
> -#include <stdint.h>
> -
> -#include <rte_common.h>
> -
> -#include "rte_reciprocal.h"
> +#include <rte_reciprocal.h>
> 
>  /* find largest set bit.
>   * portable and slow but does not matter for this usage.
>   */
>  static inline int
> -fls(uint32_t x)
> +fls_u32(uint32_t x)
>  {
>  	int b;
> 
> @@ -54,21 +49,120 @@ fls(uint32_t x)
>  	return 0;
>  }
> 
> -struct rte_reciprocal
> -rte_reciprocal_value(uint32_t d)
> +struct rte_reciprocal_u32
> +rte_reciprocal_value_u32(uint32_t d)
>  {
> -	struct rte_reciprocal R;
> +	struct rte_reciprocal_u32 R;
>  	uint64_t m;
>  	int l;
> 
> -	l = fls(d - 1);
> +	l = fls_u32(d - 1);
>  	m = ((1ULL << 32) * ((1ULL << l) - d));
>  	m /= d;
> 
>  	++m;
>  	R.m = m;
> -	R.sh1 = RTE_MIN(l, 1);
> -	R.sh2 = RTE_MAX(l - 1, 0);
> +	R.sh1 = l > 1 ? 1 : l;
> +	R.sh2 = (l - 1 > 0) ? l - 1 : 0;

Is there a specific reason for changing from RTE_MIN to what you have? R.sh2 seems identical to the previous version (so reason for change is unclear), and R.sh1 changes behavior (R.sh1 can now potentially be zero, even if in practice it won't) without any explanation given.

Thanks,
Anatoly

> +
> +	return R;
> +}
> +
> +/* Code taken from Hacker's Delight:
> + * http://www.hackersdelight.org/HDcode/divlu.c.
> + * License permits inclusion here per:
> + * http://www.hackersdelight.org/permissions.htm
> + */
> +static inline uint64_t
> +divide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t
> +*r) {
> +	const uint64_t b = (1ULL << 32); /* Number base (16 bits). */
> +	uint64_t un1, un0,           /* Norm. dividend LSD's. */
> +			 vn1, vn0,           /* Norm. divisor digits. */
> +			 q1, q0,             /* Quotient digits. */
> +			 un64, un21, un10,   /* Dividend digit pairs. */
> +			 rhat;               /* A remainder. */
> +	int s;                       /* Shift amount for norm. */
> +
> +    /* If overflow, set rem. to an impossible value. */
> +	if (u1 >= v) {
> +		if (r != NULL)
> +			*r = (uint64_t) -1;
> +		return (uint64_t) -1;
> +	}
> +
> +	/* Count leading zeros. */
> +	s = __builtin_clzll(v);
> +	if (s > 0) {
> +		v = v << s;
> +		un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31));
> +		un10 = u0 << s;
> +	} else {
> +
> +		un64 = u1 | u0;
> +		un10 = u0;
> +	}
> +
> +	vn1 = v >> 32;
> +	vn0 = v & 0xFFFFFFFF;
> +
> +	un1 = un10 >> 32;
> +	un0 = un10 & 0xFFFFFFFF;
> +
> +	q1 = un64/vn1;
> +	rhat = un64 - q1*vn1;
> +again1:
> +	if (q1 >= b || q1*vn0 > b*rhat + un1) {
> +		q1 = q1 - 1;
> +		rhat = rhat + vn1;
> +		if (rhat < b)
> +			goto again1;
> +	}
> +
> +	un21 = un64*b + un1 - q1*v;
> +
> +	q0 = un21/vn1;
> +	rhat = un21 - q0*vn1;
> +again2:
> +	if (q0 >= b || q0*vn0 > b*rhat + un0) {
> +		q0 = q0 - 1;
> +		rhat = rhat + vn1;
> +		if (rhat < b)
> +			goto again2;
> +	}
> +
> +	if (r != NULL)
> +		*r = (un21*b + un0 - q0*v) >> s;
> +	return q1*b + q0;
> +}
> +
> +struct rte_reciprocal_u64
> +rte_reciprocal_value_u64(uint64_t d)
> +{
> +	struct rte_reciprocal_u64 R;
> +
> +	const uint32_t fld = 63 - __builtin_clzll(d);
> +
> +	if ((d & (d - 1)) == 0) {
> +		R.m = 0;
> +		R.sh1 = (fld - 1) | 0x40;
> +	} else {
> +		uint64_t rem;
> +		uint64_t multiplier;
> +		uint8_t more;
> +
> +		multiplier = divide_128_div_64_to_64(1ULL << fld, 0, d,
> &rem);
> +		multiplier += multiplier;
> +
> +		const uint64_t twice_rem = rem + rem;
> +		if (twice_rem >= d || twice_rem < rem)
> +			multiplier += 1;
> +		more = fld;
> +		R.m = 1 + multiplier;
> +		R.sh1 = more | 0x40;
> +	}
> +
> +	R.sh1 &= 0x3F;
> 
>  	return R;
>  }
> diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> index 65117cb..63ff2b8 100644
> --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> @@ -247,6 +247,7 @@ EXPERIMENTAL {
>  DPDK_17.11 {
>  	global:
> 
> -	rte_reciprocal_value;
> +	rte_reciprocal_value_u32;
> +	rte_reciprocal_value_u64;
> 
>  } DPDK_17.08;
> diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile index
> 569656b..a2fd6f3 100644
> --- a/lib/librte_sched/Makefile
> +++ b/lib/librte_sched/Makefile
> @@ -54,6 +54,8 @@ LIBABIVER := 1
>  SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c
> 
>  # install includes
> -SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h
> rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h
> rte_bitmap.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_sched_common.h
> +rte_red.h SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include +=
> rte_approx.h
> 
>  include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index
> 3b8ccaa..7bb6d51 100644
> --- a/lib/librte_sched/rte_sched.c
> +++ b/lib/librte_sched/rte_sched.c
> @@ -228,7 +228,7 @@ struct rte_sched_port {
>  	uint64_t time_cpu_cycles;     /* Current CPU time measured in CPU
> cyles */
>  	uint64_t time_cpu_bytes;      /* Current CPU time measured in bytes
> */
>  	uint64_t time;                /* Current NIC TX time measured in bytes */
> -	struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */
> +	struct rte_reciprocal_u32 inv_cycles_per_byte; /* CPU cycles per
> byte
> +*/
> 
>  	/* Scheduling loop detection */
>  	uint32_t pipe_loop;
> @@ -677,7 +677,7 @@ rte_sched_port_config(struct
> rte_sched_port_params *params)
> 
>  	cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT)
>  		/ params->rate;
> -	port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte);
> +	port->inv_cycles_per_byte =
> rte_reciprocal_value_u32(cycles_per_byte);
> 
>  	/* Scheduling loop detection */
>  	port->pipe_loop = RTE_SCHED_PIPE_INVALID; @@ -2147,8 +2147,9
> @@ rte_sched_port_time_resync(struct rte_sched_port *port)
>  	uint64_t bytes_diff;
> 
>  	/* Compute elapsed time in bytes */
> -	bytes_diff = rte_reciprocal_divide(cycles_diff <<
> RTE_SCHED_TIME_SHIFT,
> -					   port->inv_cycles_per_byte);
> +	bytes_diff = rte_reciprocal_divide_u32(
> +			cycles_diff << RTE_SCHED_TIME_SHIFT,
> +			&port->inv_cycles_per_byte);
> 
>  	/* Advance port time */
>  	port->time_cpu_cycles = cycles;
> --
> 2.7.4

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 2/3] eal: add u64 bit variant for reciprocal
  2017-09-04 14:34   ` Burakov, Anatoly
@ 2017-09-04 14:55     ` Pavan Nikhilesh Bhagavatula
  0 siblings, 0 replies; 8+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2017-09-04 14:55 UTC (permalink / raw)
  To: Burakov, Anatoly; +Cc: dev

On Mon, Sep 04, 2017 at 02:34:49PM +0000, Burakov, Anatoly wrote:
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pavan Nikhilesh
> > Sent: Sunday, September 3, 2017 1:36 PM
> > To: dev@dpdk.org
> > Cc: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>;
> > stephen@networkplumber.org; Pavan Nikhilesh
> > <pbhagavatula@caviumnetworks.com>
> > Subject: [dpdk-dev] [PATCH v3 2/3] eal: add u64 bit variant for reciprocal
> >
> > Currently, rte_reciprocal only supports unsigned 32bit divisors. This commit
> > adds support for unsigned 64bit divisors.
> >
> > Rename unsigned 32bit specific functions appropriately and update
> > librte_sched accordingly.
> >
> > Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com>
> > ---
> >  lib/librte_eal/bsdapp/eal/rte_eal_version.map   |   3 +-
> >  lib/librte_eal/common/include/rte_reciprocal.h  | 111
> > ++++++++++++++++++++--
> >  lib/librte_eal/common/rte_reciprocal.c          | 120
> > +++++++++++++++++++++---
> >  lib/librte_eal/linuxapp/eal/rte_eal_version.map |   3 +-
> >  lib/librte_sched/Makefile                       |   4 +-
> >  lib/librte_sched/rte_sched.c                    |   9 +-
> >  6 files changed, 222 insertions(+), 28 deletions(-)
> >
> > diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> > b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> > index d0bda66..5fd6101 100644
> > --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> > +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> > @@ -242,6 +242,7 @@ EXPERIMENTAL {
> >  DPDK_17.11 {
> >  	global:
> >
> > -	rte_reciprocal_value;
> > +	rte_reciprocal_value_u32;
> > +	rte_reciprocal_value_u64;
> >
> >  } DPDK_17.08;
> > diff --git a/lib/librte_eal/common/include/rte_reciprocal.h
> > b/lib/librte_eal/common/include/rte_reciprocal.h
> > index b6d752f..801d1c8 100644
> > --- a/lib/librte_eal/common/include/rte_reciprocal.h
> > +++ b/lib/librte_eal/common/include/rte_reciprocal.h
> > @@ -22,22 +22,117 @@
> >  #ifndef _RTE_RECIPROCAL_H_
> >  #define _RTE_RECIPROCAL_H_
> >
> > -#include <stdint.h>
> > +#include <rte_memory.h>
> >
> > -struct rte_reciprocal {
> > +/**
> > + * Unsigned 32-bit divisor structure.
> > + */
> > +struct rte_reciprocal_u32 {
> >  	uint32_t m;
> >  	uint8_t sh1, sh2;
> > -};
> > +} __rte_cache_aligned;
> > +
> > +/**
> > + * Unsigned 64-bit divisor structure.
> > + */
> > +struct rte_reciprocal_u64 {
> > +	uint64_t m;
> > +	uint8_t sh1;
> > +} __rte_cache_aligned;
> >
> > +/**
> > + * Divide given unsigned 32-bit integer with pre calculated divisor.
> > + *
> > + * @param a
> > + *   The 32-bit dividend.
> > + * @param R
> > + *   The pointer to pre calculated divisor reciprocal structure.
> > + *
> > + * @return
> > + *   The result of the division
> > + */
> >  static inline uint32_t
> > -rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R)
> > +rte_reciprocal_divide_u32(uint32_t a, struct rte_reciprocal_u32 *R) {
> > +	uint32_t t = (((uint64_t)a * R->m) >> 32);
> > +
> > +	return (t + ((a - t) >> R->sh1)) >> R->sh2; }
> > +
> > +static inline uint64_t
> > +mullhi_u64(uint64_t x, uint64_t y)
> > +{
> > +#ifdef __SIZEOF_INT128__
> > +	__uint128_t xl = x;
> > +	__uint128_t rl = xl * y;
> > +
> > +	return (rl >> 64);
> > +#else
> > +	uint64_t u0, u1, v0, v1, k, t;
> > +	uint64_t w1, w2;
> > +	uint64_t whi;
> > +
> > +	u1 = x >> 32; u0 = x & 0xFFFFFFFF;
> > +	v1 = y >> 32; v0 = y & 0xFFFFFFFF;
> > +
> > +	t = u0*v0;
> > +	k = t >> 32;
> > +
> > +	t = u1*v0 + k;
> > +	w1 = t & 0xFFFFFFFF;
> > +	w2 = t >> 32;
> > +
> > +	t = u0*v1 + w1;
> > +	k = t >> 32;
> > +
> > +	whi = u1*v1 + w2 + k;
> > +
> > +	return whi;
> > +#endif
> > +}
> > +
> > +/**
> > + * Divide given unsigned 64-bit integer with pre calculated divisor.
> > + *
> > + * @param a
> > + *   The 64-bit dividend.
> > + * @param R
> > + *   The pointer to pre calculated divisor reciprocal structure.
> > + *
> > + * @return
> > + *   The result of the division
> > + */
> > +static inline uint64_t
> > +rte_reciprocal_divide_u64(uint64_t a, struct rte_reciprocal_u64 *R)
> >  {
> > -	uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32);
> > +	uint64_t q = mullhi_u64(R->m, a);
> > +	uint64_t t = ((a - q) >> 1) + q;
> >
> > -	return (t + ((a - t) >> R.sh1)) >> R.sh2;
> > +	return t >> R->sh1;
> >  }
> >
> > -struct rte_reciprocal
> > -rte_reciprocal_value(uint32_t d);
> > +/**
> > + * Generate pre calculated divisor structure.
> > + *
> > + * @param d
> > + *   The unsigned 32-bit divisor.
> > + *
> > + * @return
> > + *   Divisor structure.
> > + */
> > +struct rte_reciprocal_u32
> > +rte_reciprocal_value_u32(uint32_t d);
> > +
> > +/**
> > + * Generate pre calculated divisor structure.
> > + *
> > + * @param d
> > + *   The unsigned 64-bit divisor.
> > + *
> > + * @return
> > + *   Divisor structure.
> > + */
> > +struct rte_reciprocal_u64
> > +rte_reciprocal_value_u64(uint64_t d);
> >
> >  #endif /* _RTE_RECIPROCAL_H_ */
> > diff --git a/lib/librte_eal/common/rte_reciprocal.c
> > b/lib/librte_eal/common/rte_reciprocal.c
> > index 7ab99b4..5d7e367 100644
> > --- a/lib/librte_eal/common/rte_reciprocal.c
> > +++ b/lib/librte_eal/common/rte_reciprocal.c
> > @@ -31,18 +31,13 @@
> >   *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> > DAMAGE.
> >   */
> >
> > -#include <stdio.h>
> > -#include <stdint.h>
> > -
> > -#include <rte_common.h>
> > -
> > -#include "rte_reciprocal.h"
> > +#include <rte_reciprocal.h>
> >
> >  /* find largest set bit.
> >   * portable and slow but does not matter for this usage.
> >   */
> >  static inline int
> > -fls(uint32_t x)
> > +fls_u32(uint32_t x)
> >  {
> >  	int b;
> >
> > @@ -54,21 +49,120 @@ fls(uint32_t x)
> >  	return 0;
> >  }
> >
> > -struct rte_reciprocal
> > -rte_reciprocal_value(uint32_t d)
> > +struct rte_reciprocal_u32
> > +rte_reciprocal_value_u32(uint32_t d)
> >  {
> > -	struct rte_reciprocal R;
> > +	struct rte_reciprocal_u32 R;
> >  	uint64_t m;
> >  	int l;
> >
> > -	l = fls(d - 1);
> > +	l = fls_u32(d - 1);
> >  	m = ((1ULL << 32) * ((1ULL << l) - d));
> >  	m /= d;
> >
> >  	++m;
> >  	R.m = m;
> > -	R.sh1 = RTE_MIN(l, 1);
> > -	R.sh2 = RTE_MAX(l - 1, 0);
> > +	R.sh1 = l > 1 ? 1 : l;
> > +	R.sh2 = (l - 1 > 0) ? l - 1 : 0;
>
> Is there a specific reason for changing from RTE_MIN to what you have? R.sh2 seems identical to the previous version (so reason for change is unclear), and R.sh1 changes behavior (R.sh1 can now potentially be zero, even if in practice it won't) without any explanation given.
>

This was a oversight from my end (Testing with standalone app). Thanks for the
catch, will undo in v4.

-Pavan

> Thanks,
> Anatoly
>
> > +
> > +	return R;
> > +}
> > +
> > +/* Code taken from Hacker's Delight:
> > + * http://www.hackersdelight.org/HDcode/divlu.c.
> > + * License permits inclusion here per:
> > + * http://www.hackersdelight.org/permissions.htm
> > + */
> > +static inline uint64_t
> > +divide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t
> > +*r) {
> > +	const uint64_t b = (1ULL << 32); /* Number base (16 bits). */
> > +	uint64_t un1, un0,           /* Norm. dividend LSD's. */
> > +			 vn1, vn0,           /* Norm. divisor digits. */
> > +			 q1, q0,             /* Quotient digits. */
> > +			 un64, un21, un10,   /* Dividend digit pairs. */
> > +			 rhat;               /* A remainder. */
> > +	int s;                       /* Shift amount for norm. */
> > +
> > +    /* If overflow, set rem. to an impossible value. */
> > +	if (u1 >= v) {
> > +		if (r != NULL)
> > +			*r = (uint64_t) -1;
> > +		return (uint64_t) -1;
> > +	}
> > +
> > +	/* Count leading zeros. */
> > +	s = __builtin_clzll(v);
> > +	if (s > 0) {
> > +		v = v << s;
> > +		un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31));
> > +		un10 = u0 << s;
> > +	} else {
> > +
> > +		un64 = u1 | u0;
> > +		un10 = u0;
> > +	}
> > +
> > +	vn1 = v >> 32;
> > +	vn0 = v & 0xFFFFFFFF;
> > +
> > +	un1 = un10 >> 32;
> > +	un0 = un10 & 0xFFFFFFFF;
> > +
> > +	q1 = un64/vn1;
> > +	rhat = un64 - q1*vn1;
> > +again1:
> > +	if (q1 >= b || q1*vn0 > b*rhat + un1) {
> > +		q1 = q1 - 1;
> > +		rhat = rhat + vn1;
> > +		if (rhat < b)
> > +			goto again1;
> > +	}
> > +
> > +	un21 = un64*b + un1 - q1*v;
> > +
> > +	q0 = un21/vn1;
> > +	rhat = un21 - q0*vn1;
> > +again2:
> > +	if (q0 >= b || q0*vn0 > b*rhat + un0) {
> > +		q0 = q0 - 1;
> > +		rhat = rhat + vn1;
> > +		if (rhat < b)
> > +			goto again2;
> > +	}
> > +
> > +	if (r != NULL)
> > +		*r = (un21*b + un0 - q0*v) >> s;
> > +	return q1*b + q0;
> > +}
> > +
> > +struct rte_reciprocal_u64
> > +rte_reciprocal_value_u64(uint64_t d)
> > +{
> > +	struct rte_reciprocal_u64 R;
> > +
> > +	const uint32_t fld = 63 - __builtin_clzll(d);
> > +
> > +	if ((d & (d - 1)) == 0) {
> > +		R.m = 0;
> > +		R.sh1 = (fld - 1) | 0x40;
> > +	} else {
> > +		uint64_t rem;
> > +		uint64_t multiplier;
> > +		uint8_t more;
> > +
> > +		multiplier = divide_128_div_64_to_64(1ULL << fld, 0, d,
> > &rem);
> > +		multiplier += multiplier;
> > +
> > +		const uint64_t twice_rem = rem + rem;
> > +		if (twice_rem >= d || twice_rem < rem)
> > +			multiplier += 1;
> > +		more = fld;
> > +		R.m = 1 + multiplier;
> > +		R.sh1 = more | 0x40;
> > +	}
> > +
> > +	R.sh1 &= 0x3F;
> >
> >  	return R;
> >  }
> > diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > index 65117cb..63ff2b8 100644
> > --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > @@ -247,6 +247,7 @@ EXPERIMENTAL {
> >  DPDK_17.11 {
> >  	global:
> >
> > -	rte_reciprocal_value;
> > +	rte_reciprocal_value_u32;
> > +	rte_reciprocal_value_u64;
> >
> >  } DPDK_17.08;
> > diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile index
> > 569656b..a2fd6f3 100644
> > --- a/lib/librte_sched/Makefile
> > +++ b/lib/librte_sched/Makefile
> > @@ -54,6 +54,8 @@ LIBABIVER := 1
> >  SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c
> >
> >  # install includes
> > -SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h
> > rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h
> > +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h
> > rte_bitmap.h
> > +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_sched_common.h
> > +rte_red.h SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include +=
> > rte_approx.h
> >
> >  include $(RTE_SDK)/mk/rte.lib.mk
> > diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index
> > 3b8ccaa..7bb6d51 100644
> > --- a/lib/librte_sched/rte_sched.c
> > +++ b/lib/librte_sched/rte_sched.c
> > @@ -228,7 +228,7 @@ struct rte_sched_port {
> >  	uint64_t time_cpu_cycles;     /* Current CPU time measured in CPU
> > cyles */
> >  	uint64_t time_cpu_bytes;      /* Current CPU time measured in bytes
> > */
> >  	uint64_t time;                /* Current NIC TX time measured in bytes */
> > -	struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */
> > +	struct rte_reciprocal_u32 inv_cycles_per_byte; /* CPU cycles per
> > byte
> > +*/
> >
> >  	/* Scheduling loop detection */
> >  	uint32_t pipe_loop;
> > @@ -677,7 +677,7 @@ rte_sched_port_config(struct
> > rte_sched_port_params *params)
> >
> >  	cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT)
> >  		/ params->rate;
> > -	port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte);
> > +	port->inv_cycles_per_byte =
> > rte_reciprocal_value_u32(cycles_per_byte);
> >
> >  	/* Scheduling loop detection */
> >  	port->pipe_loop = RTE_SCHED_PIPE_INVALID; @@ -2147,8 +2147,9
> > @@ rte_sched_port_time_resync(struct rte_sched_port *port)
> >  	uint64_t bytes_diff;
> >
> >  	/* Compute elapsed time in bytes */
> > -	bytes_diff = rte_reciprocal_divide(cycles_diff <<
> > RTE_SCHED_TIME_SHIFT,
> > -					   port->inv_cycles_per_byte);
> > +	bytes_diff = rte_reciprocal_divide_u32(
> > +			cycles_diff << RTE_SCHED_TIME_SHIFT,
> > +			&port->inv_cycles_per_byte);
> >
> >  	/* Advance port time */
> >  	port->time_cpu_cycles = cycles;
> > --
> > 2.7.4
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2017-09-04 14:55 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-09-03 12:36 [PATCH v3 1/3] eal: introduce integer divide through reciprocal Pavan Nikhilesh
2017-09-03 12:36 ` [PATCH v3 2/3] eal: add u64 bit variant for reciprocal Pavan Nikhilesh
2017-09-04 14:34   ` Burakov, Anatoly
2017-09-04 14:55     ` Pavan Nikhilesh Bhagavatula
2017-09-03 12:36 ` [PATCH v3 3/3] test: add tests for reciprocal based division Pavan Nikhilesh
2017-09-04  9:18   ` Burakov, Anatoly
2017-09-04  9:49     ` Pavan Nikhilesh Bhagavatula
2017-09-04 14:01 ` [PATCH v3 1/3] eal: introduce integer divide through reciprocal Burakov, Anatoly

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.