All of lore.kernel.org
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf
@ 2020-10-29 15:36 Dharmik Thakkar
  2020-10-29 15:36 ` [dpdk-dev] [PATCH 2/4] test/lpm: return error on failure " Dharmik Thakkar
                   ` (5 more replies)
  0 siblings, 6 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-10-29 15:36 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Honnappa Nagarahalli,
	Ruifeng Wang, Gavin Hu
  Cc: dev, nd, Dharmik Thakkar, stable

Fix incorrect calculations for LPM adds, LPM deletes,
and average cycles in RCU QSBR perf tests

Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org

Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_lpm_perf.c | 43 ++++++++++++++--------------------------
 1 file changed, 15 insertions(+), 28 deletions(-)

diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index c5a238b9d1e8..0a2d76a983c3 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -23,7 +23,6 @@ static struct rte_rcu_qsbr *rv;
 static volatile uint8_t writer_done;
 static volatile uint32_t thr_id;
 static uint64_t gwrite_cycles;
-static uint64_t gwrites;
 /* LPM APIs are not thread safe, use mutex to provide thread safety */
 static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
 
@@ -60,6 +59,8 @@ static uint32_t num_ldepth_route_entries;
 #define NUM_ROUTE_ENTRIES num_route_entries
 #define NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries
 
+#define TOTAL_WRITES (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES)
+
 enum {
 	IP_CLASS_A,
 	IP_CLASS_B,
@@ -432,7 +433,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	uint8_t core_id = (uint8_t)((uintptr_t)arg);
 	uint32_t next_hop_add = 0xAA;
 
-	RTE_SET_USED(arg);
 	/* 2 writer threads are used */
 	if (core_id % 2 == 0) {
 		si = 0;
@@ -472,9 +472,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	total_cycles = rte_rdtsc_precise() - begin;
 
 	__atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED);
-	__atomic_fetch_add(&gwrites,
-			2 * NUM_LDEPTH_ROUTE_ENTRIES * RCU_ITERATIONS,
-			__ATOMIC_RELAXED);
 
 	return 0;
 }
@@ -528,7 +525,6 @@ test_lpm_rcu_perf_multi_writer(void)
 
 	writer_done = 0;
 	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
 
 	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
@@ -548,13 +544,10 @@ test_lpm_rcu_perf_multi_writer(void)
 		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 			goto error;
 
-	printf("Total LPM Adds: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
-			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
+		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / TOTAL_WRITES
 		);
 
 	/* Wait and check return value from reader threads */
@@ -581,7 +574,6 @@ test_lpm_rcu_perf_multi_writer(void)
 
 	writer_done = 0;
 	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
 	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
 	/* Launch reader threads */
@@ -600,14 +592,11 @@ test_lpm_rcu_perf_multi_writer(void)
 		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 			goto error;
 
-	printf("Total LPM Adds: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
-			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
-		);
+		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+		/ TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
@@ -711,11 +700,10 @@ test_lpm_rcu_perf(void)
 	}
 	total_cycles = rte_rdtsc_precise() - begin;
 
-	printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS));
+		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
@@ -771,11 +759,10 @@ test_lpm_rcu_perf(void)
 	}
 	total_cycles = rte_rdtsc_precise() - begin;
 
-	printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS));
+		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH 2/4] test/lpm: return error on failure in rcu qsbr perf
  2020-10-29 15:36 [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
@ 2020-10-29 15:36 ` Dharmik Thakkar
  2020-10-29 15:36 ` [dpdk-dev] [PATCH 3/4] test/lpm: remove error checking " Dharmik Thakkar
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-10-29 15:36 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Gavin Hu, Ruifeng Wang,
	Honnappa Nagarahalli
  Cc: dev, nd, Dharmik Thakkar, stable

Return error if Add/Delete fail in multiwriter perf test

Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org

Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_lpm_perf.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index 0a2d76a983c3..251ea12345ae 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -453,6 +453,8 @@ test_lpm_rcu_qsbr_writer(void *arg)
 					next_hop_add) != 0) {
 				printf("Failed to add iteration %d, route# %d\n",
 					i, j);
+				pthread_mutex_unlock(&lpm_mutex);
+				return -1;
 			}
 			pthread_mutex_unlock(&lpm_mutex);
 		}
@@ -464,6 +466,8 @@ test_lpm_rcu_qsbr_writer(void *arg)
 				large_ldepth_route_table[j].depth) != 0) {
 				printf("Failed to delete iteration %d, route# %d\n",
 					i, j);
+				pthread_mutex_unlock(&lpm_mutex);
+				return -1;
 			}
 			pthread_mutex_unlock(&lpm_mutex);
 		}
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH 3/4] test/lpm: remove error checking in rcu qsbr perf
  2020-10-29 15:36 [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
  2020-10-29 15:36 ` [dpdk-dev] [PATCH 2/4] test/lpm: return error on failure " Dharmik Thakkar
@ 2020-10-29 15:36 ` Dharmik Thakkar
  2020-10-29 15:36 ` [dpdk-dev] [PATCH 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-10-29 15:36 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Gavin Hu, Ruifeng Wang,
	Honnappa Nagarahalli
  Cc: dev, nd, Dharmik Thakkar, stable

Remove redundant error checking for reader threads
since they never return error.

Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org

Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_lpm_perf.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index 251ea12345ae..4f15db4f85ee 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -554,11 +554,10 @@ test_lpm_rcu_perf_multi_writer(void)
 		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / TOTAL_WRITES
 		);
 
-	/* Wait and check return value from reader threads */
 	writer_done = 1;
+	/* Wait until all readers have exited */
 	for (i = 2; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 	rte_free(rv);
@@ -603,10 +602,9 @@ test_lpm_rcu_perf_multi_writer(void)
 		/ TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 2; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 
@@ -710,10 +708,9 @@ test_lpm_rcu_perf(void)
 		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 0; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		if (rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 	rte_free(rv);
@@ -769,11 +766,9 @@ test_lpm_rcu_perf(void)
 		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 0; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			printf("Warning: lcore %u not finished.\n",
-				enabled_core_ids[i]);
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-10-29 15:36 [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
  2020-10-29 15:36 ` [dpdk-dev] [PATCH 2/4] test/lpm: return error on failure " Dharmik Thakkar
  2020-10-29 15:36 ` [dpdk-dev] [PATCH 3/4] test/lpm: remove error checking " Dharmik Thakkar
@ 2020-10-29 15:36 ` Dharmik Thakkar
  2020-11-02 17:17   ` Medvedkin, Vladimir
  2020-11-02 10:08 ` [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation " David Marchand
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 52+ messages in thread
From: Dharmik Thakkar @ 2020-10-29 15:36 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin; +Cc: dev, nd, Dharmik Thakkar

Avoid code duplication by combining single and multi threaded tests

Signed-off-by: Dharmik Thakkar<dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_lpm_perf.c | 338 +++++++++------------------------------
 1 file changed, 73 insertions(+), 265 deletions(-)

diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index 4f15db4f85ee..08312023b661 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -430,11 +430,16 @@ test_lpm_rcu_qsbr_writer(void *arg)
 {
 	unsigned int i, j, si, ei;
 	uint64_t begin, total_cycles;
-	uint8_t core_id = (uint8_t)((uintptr_t)arg);
+	uint8_t writer_id = (uint8_t)((uintptr_t)arg);
 	uint32_t next_hop_add = 0xAA;
 
-	/* 2 writer threads are used */
-	if (core_id % 2 == 0) {
+	/* Single writer (writer_id = 1) */
+	if (writer_id == 1) {
+		si = 0;
+		ei = NUM_LDEPTH_ROUTE_ENTRIES;
+	}
+	/* 2 Writers (writer_id = 2/3)*/
+	else if (writer_id == 2) {
 		si = 0;
 		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
 	} else {
@@ -482,16 +487,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
 
 /*
  * Functional test:
- * 2 writers, rest are readers
+ * 1/2 writers, rest are readers
  */
 static int
-test_lpm_rcu_perf_multi_writer(void)
+test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
 {
 	struct rte_lpm_config config;
 	size_t sz;
-	unsigned int i;
+	unsigned int i, j;
 	uint16_t core_id;
 	struct rte_lpm_rcu_config rcu_cfg = {0};
+	int (*reader_f)(void *arg) = NULL;
 
 	if (rte_lcore_count() < 3) {
 		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n");
@@ -504,273 +510,76 @@ test_lpm_rcu_perf_multi_writer(void)
 		num_cores++;
 	}
 
-	printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n",
-		num_cores - 2);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	/* Init RCU variable */
-	sz = rte_rcu_qsbr_get_memsize(num_cores);
-	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
-						RTE_CACHE_LINE_SIZE);
-	rte_rcu_qsbr_init(rv, num_cores);
-
-	rcu_cfg.v = rv;
-	/* Assign the RCU variable to LPM */
-	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
-		printf("RCU variable assignment failed\n");
-		goto error;
-	}
-
-	writer_done = 0;
-	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Launch writer threads */
-	for (i = 0; i < 2; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
-					(void *)(uintptr_t)i,
-					enabled_core_ids[i]);
-
-	/* Wait for writer threads */
-	for (i = 0; i < 2; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / TOTAL_WRITES
-		);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-	lpm = NULL;
-	rv = NULL;
-
-	/* Test without RCU integration */
-	printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n",
-		num_cores - 2);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	writer_done = 0;
-	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Launch writer threads */
-	for (i = 0; i < 2; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
-					(void *)(uintptr_t)i,
-					enabled_core_ids[i]);
-
-	/* Wait for writer threads */
-	for (i = 0; i < 2; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
-		/ TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-
-	return 0;
-
-error:
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	rte_eal_mp_wait_lcore();
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-
-	return -1;
-}
-
-/*
- * Functional test:
- * Single writer, rest are readers
- */
-static int
-test_lpm_rcu_perf(void)
-{
-	struct rte_lpm_config config;
-	uint64_t begin, total_cycles;
-	size_t sz;
-	unsigned int i, j;
-	uint16_t core_id;
-	uint32_t next_hop_add = 0xAA;
-	struct rte_lpm_rcu_config rcu_cfg = {0};
-
-	if (rte_lcore_count() < 2) {
-		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n");
-		return TEST_SKIPPED;
-	}
-
-	num_cores = 0;
-	RTE_LCORE_FOREACH_WORKER(core_id) {
-		enabled_core_ids[num_cores] = core_id;
-		num_cores++;
-	}
-
-	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
-		num_cores);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	/* Init RCU variable */
-	sz = rte_rcu_qsbr_get_memsize(num_cores);
-	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
-						RTE_CACHE_LINE_SIZE);
-	rte_rcu_qsbr_init(rv, num_cores);
-
-	rcu_cfg.v = rv;
-	/* Assign the RCU variable to LPM */
-	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
-		printf("RCU variable assignment failed\n");
-		goto error;
-	}
-
-	writer_done = 0;
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Measure add/delete. */
-	begin = rte_rdtsc_precise();
-	for (i = 0; i < RCU_ITERATIONS; i++) {
-		/* Add all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
-					large_ldepth_route_table[j].depth,
-					next_hop_add) != 0) {
-				printf("Failed to add iteration %d, route# %d\n",
-					i, j);
+	for (j = 1; j < 3; j++) {
+		if (use_rcu)
+			printf("\nPerf test: %d writer(s), %d reader(s),"
+			       " RCU integration enabled\n", j, num_cores - j);
+		else
+			printf("\nPerf test: %d writer(s), %d reader(s),"
+			       " RCU integration disabled\n", j, num_cores - j);
+
+		/* Create LPM table */
+		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
+		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
+		config.flags = 0;
+		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
+		TEST_LPM_ASSERT(lpm != NULL);
+
+		/* Init RCU variable */
+		if (use_rcu) {
+			sz = rte_rcu_qsbr_get_memsize(num_cores);
+			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
+							RTE_CACHE_LINE_SIZE);
+			rte_rcu_qsbr_init(rv, num_cores);
+
+			rcu_cfg.v = rv;
+			/* Assign the RCU variable to LPM */
+			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
+				printf("RCU variable assignment failed\n");
 				goto error;
 			}
 
-		/* Delete all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
-				large_ldepth_route_table[j].depth) != 0) {
-				printf("Failed to delete iteration %d, route# %d\n",
-					i, j);
-				goto error;
-			}
-	}
-	total_cycles = rte_rdtsc_precise() - begin;
+			reader_f = test_lpm_rcu_qsbr_reader;
+		} else
+			reader_f = test_lpm_reader;
 
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / TOTAL_WRITES);
+		writer_done = 0;
+		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
 
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 0; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]);
+		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
-	rte_lpm_free(lpm);
-	rte_free(rv);
-	lpm = NULL;
-	rv = NULL;
+		/* Launch reader threads */
+		for (i = j; i < num_cores; i++)
+			rte_eal_remote_launch(reader_f, NULL,
+						enabled_core_ids[i]);
 
-	/* Test without RCU integration */
-	printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n",
-		num_cores);
+		/* Launch writer threads */
+		for (i = 0; i < j; i++)
+			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
+						(void *)(uintptr_t)(i + j),
+						enabled_core_ids[i]);
 
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	writer_done = 0;
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Measure add/delete. */
-	begin = rte_rdtsc_precise();
-	for (i = 0; i < RCU_ITERATIONS; i++) {
-		/* Add all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
-					large_ldepth_route_table[j].depth,
-					next_hop_add) != 0) {
-				printf("Failed to add iteration %d, route# %d\n",
-					i, j);
+		/* Wait for writer threads */
+		for (i = 0; i < j; i++)
+			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 				goto error;
-			}
 
-		/* Delete all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
-				large_ldepth_route_table[j].depth) != 0) {
-				printf("Failed to delete iteration %d, route# %d\n",
-					i, j);
-				goto error;
-			}
+		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
+		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
+			__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+			/ TOTAL_WRITES);
+
+		writer_done = 1;
+		/* Wait until all readers have exited */
+		for (i = j; i < num_cores; i++)
+			rte_eal_wait_lcore(enabled_core_ids[i]);
+
+		rte_lpm_free(lpm);
+		rte_free(rv);
+		lpm = NULL;
+		rv = NULL;
 	}
-	total_cycles = rte_rdtsc_precise() - begin;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
 
 	return 0;
 
@@ -946,9 +755,8 @@ test_lpm_perf(void)
 	rte_lpm_delete_all(lpm);
 	rte_lpm_free(lpm);
 
-	test_lpm_rcu_perf();
-
-	test_lpm_rcu_perf_multi_writer();
+	test_lpm_rcu_perf_multi_writer(0);
+	test_lpm_rcu_perf_multi_writer(1);
 
 	return 0;
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf
  2020-10-29 15:36 [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
                   ` (2 preceding siblings ...)
  2020-10-29 15:36 ` [dpdk-dev] [PATCH 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
@ 2020-11-02 10:08 ` David Marchand
  2020-11-02 15:11 ` Bruce Richardson
  2020-11-02 23:51 ` [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  5 siblings, 0 replies; 52+ messages in thread
From: David Marchand @ 2020-11-02 10:08 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin
  Cc: Honnappa Nagarahalli, Ruifeng Wang, Gavin Hu, dev, nd,
	Dharmik Thakkar, dpdk stable

On Thu, Oct 29, 2020 at 4:37 PM Dharmik Thakkar <dharmik.thakkar@arm.com> wrote:
>
> Fix incorrect calculations for LPM adds, LPM deletes,
> and average cycles in RCU QSBR perf tests
>
> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
> Cc: honnappa.nagarahalli@arm.com
> Cc: stable@dpdk.org
>
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>

Bruce, Vladimir, reviews for this series please?
Thanks.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf
  2020-10-29 15:36 [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
                   ` (3 preceding siblings ...)
  2020-11-02 10:08 ` [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation " David Marchand
@ 2020-11-02 15:11 ` Bruce Richardson
  2020-11-02 16:58   ` Dharmik Thakkar
  2020-11-02 23:51 ` [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  5 siblings, 1 reply; 52+ messages in thread
From: Bruce Richardson @ 2020-11-02 15:11 UTC (permalink / raw)
  To: Dharmik Thakkar
  Cc: Vladimir Medvedkin, Honnappa Nagarahalli, Ruifeng Wang, Gavin Hu,
	dev, nd, stable

On Thu, Oct 29, 2020 at 10:36:31AM -0500, Dharmik Thakkar wrote:
> Fix incorrect calculations for LPM adds, LPM deletes,
> and average cycles in RCU QSBR perf tests
> 

To help review this patch, could you provide some more details in the
commit log as to what exactly was wrong with the calculation and how this
patch fixes things?

> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
> Cc: honnappa.nagarahalli@arm.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf
  2020-11-02 15:11 ` Bruce Richardson
@ 2020-11-02 16:58   ` Dharmik Thakkar
  2020-11-02 17:21     ` Medvedkin, Vladimir
  2020-11-02 17:33     ` Bruce Richardson
  0 siblings, 2 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-02 16:58 UTC (permalink / raw)
  To: Bruce Richardson
  Cc: Vladimir Medvedkin, Honnappa Nagarahalli, Ruifeng Wang, Gavin Hu,
	dev, nd, dpdk stable


> On Nov 2, 2020, at 9:11 AM, Bruce Richardson <bruce.richardson@intel.com> wrote:
> 
> On Thu, Oct 29, 2020 at 10:36:31AM -0500, Dharmik Thakkar wrote:
>> Fix incorrect calculations for LPM adds, LPM deletes,
>> and average cycles in RCU QSBR perf tests
>> 
> 
> To help review this patch, could you provide some more details in the
> commit log as to what exactly was wrong with the calculation and how this
> patch fixes things?
> 

I will update the commit message in the next version. Adding it here as well:

Since, rcu qsbr tests run for ‘RCU_ITERATIONS’ and not ‘ITERATIONS’,
replace ‘ITERATIONS’ with ‘RCU_ITERATIONS’ for calculating adds, deletes, and cycles.

Also, for multi-writer perf test, each writer only writes half of NUM_LDEPTH_ROUTE_ENTRIES.
For 2 writers, total adds (or deletes) should be (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) instead of
(2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES).

Since, for both the single and multi writer tests, total adds/deletes is equal to (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES),
this has been replaced with a macro ’TOTAL_WRITES’ and furthermore, ‘g_writes’ has been removed since it is always a fixed value 
equal to TOTAL_WRITES.

>> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
>> Cc: honnappa.nagarahalli@arm.com
>> Cc: stable@dpdk.org
>> 
>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>


^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-10-29 15:36 ` [dpdk-dev] [PATCH 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
@ 2020-11-02 17:17   ` Medvedkin, Vladimir
  2020-11-02 22:11     ` Dharmik Thakkar
  0 siblings, 1 reply; 52+ messages in thread
From: Medvedkin, Vladimir @ 2020-11-02 17:17 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson; +Cc: dev, nd

Hi Dharmik,

Thanks for the patches, see comments inlined


On 29/10/2020 15:36, Dharmik Thakkar wrote:
> Avoid code duplication by combining single and multi threaded tests
> 
> Signed-off-by: Dharmik Thakkar<dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>   app/test/test_lpm_perf.c | 338 +++++++++------------------------------
>   1 file changed, 73 insertions(+), 265 deletions(-)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
> index 4f15db4f85ee..08312023b661 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -430,11 +430,16 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   {
>   	unsigned int i, j, si, ei;
>   	uint64_t begin, total_cycles;
> -	uint8_t core_id = (uint8_t)((uintptr_t)arg);
> +	uint8_t writer_id = (uint8_t)((uintptr_t)arg);
>   	uint32_t next_hop_add = 0xAA;
>   
> -	/* 2 writer threads are used */
> -	if (core_id % 2 == 0) {
> +	/* Single writer (writer_id = 1) */
> +	if (writer_id == 1) {

Probably it would be better to use enum here instead of 1/2/3?

> +		si = 0;
> +		ei = NUM_LDEPTH_ROUTE_ENTRIES;
> +	}
> +	/* 2 Writers (writer_id = 2/3)*/
> +	else if (writer_id == 2) {
>   		si = 0;
>   		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
>   	} else {
> @@ -482,16 +487,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   
>   /*
>    * Functional test:
> - * 2 writers, rest are readers
> + * 1/2 writers, rest are readers
>    */
>   static int
> -test_lpm_rcu_perf_multi_writer(void)
> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>   {
>   	struct rte_lpm_config config;
>   	size_t sz;
> -	unsigned int i;
> +	unsigned int i, j;
>   	uint16_t core_id;
>   	struct rte_lpm_rcu_config rcu_cfg = {0};
> +	int (*reader_f)(void *arg) = NULL;
>   
>   	if (rte_lcore_count() < 3) {
>   		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n");
> @@ -504,273 +510,76 @@ test_lpm_rcu_perf_multi_writer(void)
>   		num_cores++;
>   	}
>   
> -	printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n",
> -		num_cores - 2);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	/* Init RCU variable */
> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> -						RTE_CACHE_LINE_SIZE);
> -	rte_rcu_qsbr_init(rv, num_cores);
> -
> -	rcu_cfg.v = rv;
> -	/* Assign the RCU variable to LPM */
> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> -		printf("RCU variable assignment failed\n");
> -		goto error;
> -	}
> -
> -	writer_done = 0;
> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Launch writer threads */
> -	for (i = 0; i < 2; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> -					(void *)(uintptr_t)i,
> -					enabled_core_ids[i]);
> -
> -	/* Wait for writer threads */
> -	for (i = 0; i < 2; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / TOTAL_WRITES
> -		);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -	lpm = NULL;
> -	rv = NULL;
> -
> -	/* Test without RCU integration */
> -	printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n",
> -		num_cores - 2);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	writer_done = 0;
> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Launch writer threads */
> -	for (i = 0; i < 2; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> -					(void *)(uintptr_t)i,
> -					enabled_core_ids[i]);
> -
> -	/* Wait for writer threads */
> -	for (i = 0; i < 2; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> -		/ TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -
> -	return 0;
> -
> -error:
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	rte_eal_mp_wait_lcore();
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -
> -	return -1;
> -}
> -
> -/*
> - * Functional test:
> - * Single writer, rest are readers
> - */
> -static int
> -test_lpm_rcu_perf(void)
> -{
> -	struct rte_lpm_config config;
> -	uint64_t begin, total_cycles;
> -	size_t sz;
> -	unsigned int i, j;
> -	uint16_t core_id;
> -	uint32_t next_hop_add = 0xAA;
> -	struct rte_lpm_rcu_config rcu_cfg = {0};
> -
> -	if (rte_lcore_count() < 2) {
> -		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n");
> -		return TEST_SKIPPED;
> -	}
> -
> -	num_cores = 0;
> -	RTE_LCORE_FOREACH_WORKER(core_id) {
> -		enabled_core_ids[num_cores] = core_id;
> -		num_cores++;
> -	}
> -
> -	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
> -		num_cores);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	/* Init RCU variable */
> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> -						RTE_CACHE_LINE_SIZE);
> -	rte_rcu_qsbr_init(rv, num_cores);
> -
> -	rcu_cfg.v = rv;
> -	/* Assign the RCU variable to LPM */
> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> -		printf("RCU variable assignment failed\n");
> -		goto error;
> -	}
> -
> -	writer_done = 0;
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Measure add/delete. */
> -	begin = rte_rdtsc_precise();
> -	for (i = 0; i < RCU_ITERATIONS; i++) {
> -		/* Add all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> -					large_ldepth_route_table[j].depth,
> -					next_hop_add) != 0) {
> -				printf("Failed to add iteration %d, route# %d\n",
> -					i, j);
> +	for (j = 1; j < 3; j++) {
> +		if (use_rcu)
> +			printf("\nPerf test: %d writer(s), %d reader(s),"
> +			       " RCU integration enabled\n", j, num_cores - j);
> +		else
> +			printf("\nPerf test: %d writer(s), %d reader(s),"
> +			       " RCU integration disabled\n", j, num_cores - j);
> +
> +		/* Create LPM table */
> +		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> +		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> +		config.flags = 0;
> +		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> +		TEST_LPM_ASSERT(lpm != NULL);
> +
> +		/* Init RCU variable */
> +		if (use_rcu) {
> +			sz = rte_rcu_qsbr_get_memsize(num_cores);
> +			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> +							RTE_CACHE_LINE_SIZE);
> +			rte_rcu_qsbr_init(rv, num_cores);
> +
> +			rcu_cfg.v = rv;
> +			/* Assign the RCU variable to LPM */
> +			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> +				printf("RCU variable assignment failed\n");
>   				goto error;
>   			}
>   
> -		/* Delete all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
> -				large_ldepth_route_table[j].depth) != 0) {
> -				printf("Failed to delete iteration %d, route# %d\n",
> -					i, j);
> -				goto error;
> -			}
> -	}
> -	total_cycles = rte_rdtsc_precise() - begin;
> +			reader_f = test_lpm_rcu_qsbr_reader;
> +		} else
> +			reader_f = test_lpm_reader;
>   
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / TOTAL_WRITES);
> +		writer_done = 0;
> +		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>   
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 0; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]);
> +		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>   
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -	lpm = NULL;
> -	rv = NULL;
> +		/* Launch reader threads */
> +		for (i = j; i < num_cores; i++)
> +			rte_eal_remote_launch(reader_f, NULL,
> +						enabled_core_ids[i]);
>   
> -	/* Test without RCU integration */
> -	printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n",
> -		num_cores);
> +		/* Launch writer threads */
> +		for (i = 0; i < j; i++)
> +			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,

So now even single writer will acquire a lock for every _add/_delete 
operation. I don't think it is necessary.

> +						(void *)(uintptr_t)(i + j),
> +						enabled_core_ids[i]);
>   
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	writer_done = 0;
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Measure add/delete. */
> -	begin = rte_rdtsc_precise();
> -	for (i = 0; i < RCU_ITERATIONS; i++) {
> -		/* Add all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> -					large_ldepth_route_table[j].depth,
> -					next_hop_add) != 0) {
> -				printf("Failed to add iteration %d, route# %d\n",
> -					i, j);
> +		/* Wait for writer threads */
> +		for (i = 0; i < j; i++)
> +			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>   				goto error;
> -			}
>   
> -		/* Delete all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
> -				large_ldepth_route_table[j].depth) != 0) {
> -				printf("Failed to delete iteration %d, route# %d\n",
> -					i, j);
> -				goto error;
> -			}
> +		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> +		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> +			__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> +			/ TOTAL_WRITES);
> +
> +		writer_done = 1;
> +		/* Wait until all readers have exited */
> +		for (i = j; i < num_cores; i++)
> +			rte_eal_wait_lcore(enabled_core_ids[i]);
> +
> +		rte_lpm_free(lpm);
> +		rte_free(rv);
> +		lpm = NULL;
> +		rv = NULL;
>   	}
> -	total_cycles = rte_rdtsc_precise() - begin;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
>   
>   	return 0;
>   
> @@ -946,9 +755,8 @@ test_lpm_perf(void)
>   	rte_lpm_delete_all(lpm);
>   	rte_lpm_free(lpm);
>   
> -	test_lpm_rcu_perf();
> -
> -	test_lpm_rcu_perf_multi_writer();
> +	test_lpm_rcu_perf_multi_writer(0);
> +	test_lpm_rcu_perf_multi_writer(1);
>   
>   	return 0;
>   }
> 

-- 
Regards,
Vladimir

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf
  2020-11-02 16:58   ` Dharmik Thakkar
@ 2020-11-02 17:21     ` Medvedkin, Vladimir
  2020-11-02 17:33     ` Bruce Richardson
  1 sibling, 0 replies; 52+ messages in thread
From: Medvedkin, Vladimir @ 2020-11-02 17:21 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson
  Cc: Honnappa Nagarahalli, Ruifeng Wang, Gavin Hu, dev, nd, dpdk stable

Hi Dharmik,

On 02/11/2020 16:58, Dharmik Thakkar wrote:
> 
>> On Nov 2, 2020, at 9:11 AM, Bruce Richardson <bruce.richardson@intel.com> wrote:
>>
>> On Thu, Oct 29, 2020 at 10:36:31AM -0500, Dharmik Thakkar wrote:
>>> Fix incorrect calculations for LPM adds, LPM deletes,
>>> and average cycles in RCU QSBR perf tests
>>>
>>
>> To help review this patch, could you provide some more details in the
>> commit log as to what exactly was wrong with the calculation and how this
>> patch fixes things?
>>
> 
> I will update the commit message in the next version. Adding it here as well:
> 
> Since, rcu qsbr tests run for ‘RCU_ITERATIONS’ and not ‘ITERATIONS’,
> replace ‘ITERATIONS’ with ‘RCU_ITERATIONS’ for calculating adds, deletes, and cycles.
> 
> Also, for multi-writer perf test, each writer only writes half of NUM_LDEPTH_ROUTE_ENTRIES.
> For 2 writers, total adds (or deletes) should be (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) instead of
> (2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES).
> 
> Since, for both the single and multi writer tests, total adds/deletes is equal to (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES),
> this has been replaced with a macro ’TOTAL_WRITES’ and furthermore, ‘g_writes’ has been removed since it is always a fixed value
> equal to TOTAL_WRITES.
> 

Thanks for the clarification. I left a few comments regarding 4-th 
patch. First 3 patches LGTM, just put more details in the commit message.

>>> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
>>> Cc: honnappa.nagarahalli@arm.com
>>> Cc: stable@dpdk.org
>>>
>>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
>>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> 

-- 
Regards,
Vladimir

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf
  2020-11-02 16:58   ` Dharmik Thakkar
  2020-11-02 17:21     ` Medvedkin, Vladimir
@ 2020-11-02 17:33     ` Bruce Richardson
  1 sibling, 0 replies; 52+ messages in thread
From: Bruce Richardson @ 2020-11-02 17:33 UTC (permalink / raw)
  To: Dharmik Thakkar
  Cc: Vladimir Medvedkin, Honnappa Nagarahalli, Ruifeng Wang, Gavin Hu,
	dev, nd, dpdk stable

On Mon, Nov 02, 2020 at 04:58:43PM +0000, Dharmik Thakkar wrote:
> 
> > On Nov 2, 2020, at 9:11 AM, Bruce Richardson <bruce.richardson@intel.com> wrote:
> > 
> > On Thu, Oct 29, 2020 at 10:36:31AM -0500, Dharmik Thakkar wrote:
> >> Fix incorrect calculations for LPM adds, LPM deletes,
> >> and average cycles in RCU QSBR perf tests
> >> 
> > 
> > To help review this patch, could you provide some more details in the
> > commit log as to what exactly was wrong with the calculation and how this
> > patch fixes things?
> > 
> 
> I will update the commit message in the next version. Adding it here as well:
> 
> Since, rcu qsbr tests run for ‘RCU_ITERATIONS’ and not ‘ITERATIONS’,
> replace ‘ITERATIONS’ with ‘RCU_ITERATIONS’ for calculating adds, deletes, and cycles.
> 
> Also, for multi-writer perf test, each writer only writes half of NUM_LDEPTH_ROUTE_ENTRIES.
> For 2 writers, total adds (or deletes) should be (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) instead of
> (2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES).
> 
> Since, for both the single and multi writer tests, total adds/deletes is equal to (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES),
> this has been replaced with a macro ’TOTAL_WRITES’ and furthermore, ‘g_writes’ has been removed since it is always a fixed value 
> equal to TOTAL_WRITES.
> 
Thanks for the clear explanation.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-02 17:17   ` Medvedkin, Vladimir
@ 2020-11-02 22:11     ` Dharmik Thakkar
  0 siblings, 0 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-02 22:11 UTC (permalink / raw)
  To: Medvedkin, Vladimir; +Cc: Bruce Richardson, dev, nd


> On Nov 2, 2020, at 11:17 AM, Medvedkin, Vladimir <vladimir.medvedkin@intel.com> wrote:
> 
> Hi Dharmik,
> 
> Thanks for the patches, see comments inlined
> 
> 
> On 29/10/2020 15:36, Dharmik Thakkar wrote:
>> Avoid code duplication by combining single and multi threaded tests
>> Signed-off-by: Dharmik Thakkar<dharmik.thakkar@arm.com>
>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>> ---
>>  app/test/test_lpm_perf.c | 338 +++++++++------------------------------
>>  1 file changed, 73 insertions(+), 265 deletions(-)
>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
>> index 4f15db4f85ee..08312023b661 100644
>> --- a/app/test/test_lpm_perf.c
>> +++ b/app/test/test_lpm_perf.c
>> @@ -430,11 +430,16 @@ test_lpm_rcu_qsbr_writer(void *arg)
>>  {
>>  	unsigned int i, j, si, ei;
>>  	uint64_t begin, total_cycles;
>> -	uint8_t core_id = (uint8_t)((uintptr_t)arg);
>> +	uint8_t writer_id = (uint8_t)((uintptr_t)arg);
>>  	uint32_t next_hop_add = 0xAA;
>>  -	/* 2 writer threads are used */
>> -	if (core_id % 2 == 0) {
>> +	/* Single writer (writer_id = 1) */
>> +	if (writer_id == 1) {
> 
> Probably it would be better to use enum here instead of 1/2/3?
> 

Yes, I will update the patch.

>> +		si = 0;
>> +		ei = NUM_LDEPTH_ROUTE_ENTRIES;
>> +	}
>> +	/* 2 Writers (writer_id = 2/3)*/
>> +	else if (writer_id == 2) {
>>  		si = 0;
>>  		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
>>  	} else {
>> @@ -482,16 +487,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
>>    /*
>>   * Functional test:
>> - * 2 writers, rest are readers
>> + * 1/2 writers, rest are readers
>>   */
>>  static int
>> -test_lpm_rcu_perf_multi_writer(void)
>> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>>  {
>>  	struct rte_lpm_config config;
>>  	size_t sz;
>> -	unsigned int i;
>> +	unsigned int i, j;
>>  	uint16_t core_id;
>>  	struct rte_lpm_rcu_config rcu_cfg = {0};
>> +	int (*reader_f)(void *arg) = NULL;
>>    	if (rte_lcore_count() < 3) {
>>  		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n");
>> @@ -504,273 +510,76 @@ test_lpm_rcu_perf_multi_writer(void)
>>  		num_cores++;
>>  	}
>>  -	printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n",
>> -		num_cores - 2);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> -
>> -	/* Init RCU variable */
>> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
>> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> -						RTE_CACHE_LINE_SIZE);
>> -	rte_rcu_qsbr_init(rv, num_cores);
>> -
>> -	rcu_cfg.v = rv;
>> -	/* Assign the RCU variable to LPM */
>> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> -		printf("RCU variable assignment failed\n");
>> -		goto error;
>> -	}
>> -
>> -	writer_done = 0;
>> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>> -
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> -	/* Launch reader threads */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Launch writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>> -					(void *)(uintptr_t)i,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Wait for writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> -			goto error;
>> -
>> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / TOTAL_WRITES
>> -		);
>> -
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>> -	rte_free(rv);
>> -	lpm = NULL;
>> -	rv = NULL;
>> -
>> -	/* Test without RCU integration */
>> -	printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n",
>> -		num_cores - 2);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> -
>> -	writer_done = 0;
>> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> -	/* Launch reader threads */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_reader, NULL,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Launch writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>> -					(void *)(uintptr_t)i,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Wait for writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> -			goto error;
>> -
>> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>> -		/ TOTAL_WRITES);
>> -
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>> -
>> -	return 0;
>> -
>> -error:
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	rte_eal_mp_wait_lcore();
>> -
>> -	rte_lpm_free(lpm);
>> -	rte_free(rv);
>> -
>> -	return -1;
>> -}
>> -
>> -/*
>> - * Functional test:
>> - * Single writer, rest are readers
>> - */
>> -static int
>> -test_lpm_rcu_perf(void)
>> -{
>> -	struct rte_lpm_config config;
>> -	uint64_t begin, total_cycles;
>> -	size_t sz;
>> -	unsigned int i, j;
>> -	uint16_t core_id;
>> -	uint32_t next_hop_add = 0xAA;
>> -	struct rte_lpm_rcu_config rcu_cfg = {0};
>> -
>> -	if (rte_lcore_count() < 2) {
>> -		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n");
>> -		return TEST_SKIPPED;
>> -	}
>> -
>> -	num_cores = 0;
>> -	RTE_LCORE_FOREACH_WORKER(core_id) {
>> -		enabled_core_ids[num_cores] = core_id;
>> -		num_cores++;
>> -	}
>> -
>> -	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
>> -		num_cores);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> -
>> -	/* Init RCU variable */
>> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
>> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> -						RTE_CACHE_LINE_SIZE);
>> -	rte_rcu_qsbr_init(rv, num_cores);
>> -
>> -	rcu_cfg.v = rv;
>> -	/* Assign the RCU variable to LPM */
>> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> -		printf("RCU variable assignment failed\n");
>> -		goto error;
>> -	}
>> -
>> -	writer_done = 0;
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> -	/* Launch reader threads */
>> -	for (i = 0; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Measure add/delete. */
>> -	begin = rte_rdtsc_precise();
>> -	for (i = 0; i < RCU_ITERATIONS; i++) {
>> -		/* Add all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>> -					large_ldepth_route_table[j].depth,
>> -					next_hop_add) != 0) {
>> -				printf("Failed to add iteration %d, route# %d\n",
>> -					i, j);
>> +	for (j = 1; j < 3; j++) {
>> +		if (use_rcu)
>> +			printf("\nPerf test: %d writer(s), %d reader(s),"
>> +			       " RCU integration enabled\n", j, num_cores - j);
>> +		else
>> +			printf("\nPerf test: %d writer(s), %d reader(s),"
>> +			       " RCU integration disabled\n", j, num_cores - j);
>> +
>> +		/* Create LPM table */
>> +		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> +		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> +		config.flags = 0;
>> +		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> +		TEST_LPM_ASSERT(lpm != NULL);
>> +
>> +		/* Init RCU variable */
>> +		if (use_rcu) {
>> +			sz = rte_rcu_qsbr_get_memsize(num_cores);
>> +			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> +							RTE_CACHE_LINE_SIZE);
>> +			rte_rcu_qsbr_init(rv, num_cores);
>> +
>> +			rcu_cfg.v = rv;
>> +			/* Assign the RCU variable to LPM */
>> +			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> +				printf("RCU variable assignment failed\n");
>>  				goto error;
>>  			}
>>  -		/* Delete all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
>> -				large_ldepth_route_table[j].depth) != 0) {
>> -				printf("Failed to delete iteration %d, route# %d\n",
>> -					i, j);
>> -				goto error;
>> -			}
>> -	}
>> -	total_cycles = rte_rdtsc_precise() - begin;
>> +			reader_f = test_lpm_rcu_qsbr_reader;
>> +		} else
>> +			reader_f = test_lpm_reader;
>>  -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %g cycles\n",
>> -		(double)total_cycles / TOTAL_WRITES);
>> +		writer_done = 0;
>> +		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>>  -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 0; i < num_cores; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]);
>> +		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>  -	rte_lpm_free(lpm);
>> -	rte_free(rv);
>> -	lpm = NULL;
>> -	rv = NULL;
>> +		/* Launch reader threads */
>> +		for (i = j; i < num_cores; i++)
>> +			rte_eal_remote_launch(reader_f, NULL,
>> +						enabled_core_ids[i]);
>>  -	/* Test without RCU integration */
>> -	printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n",
>> -		num_cores);
>> +		/* Launch writer threads */
>> +		for (i = 0; i < j; i++)
>> +			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> 
> So now even single writer will acquire a lock for every _add/_delete operation. I don't think it is necessary.

Yes, agreed it is not necessary. I wanted to avoid additional if () statement, but I can add it in the new version.

> 
>> +						(void *)(uintptr_t)(i + j),
>> +						enabled_core_ids[i]);
>>  -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> -
>> -	writer_done = 0;
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> -	/* Launch reader threads */
>> -	for (i = 0; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_reader, NULL,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Measure add/delete. */
>> -	begin = rte_rdtsc_precise();
>> -	for (i = 0; i < RCU_ITERATIONS; i++) {
>> -		/* Add all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>> -					large_ldepth_route_table[j].depth,
>> -					next_hop_add) != 0) {
>> -				printf("Failed to add iteration %d, route# %d\n",
>> -					i, j);
>> +		/* Wait for writer threads */
>> +		for (i = 0; i < j; i++)
>> +			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>>  				goto error;
>> -			}
>>  -		/* Delete all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
>> -				large_ldepth_route_table[j].depth) != 0) {
>> -				printf("Failed to delete iteration %d, route# %d\n",
>> -					i, j);
>> -				goto error;
>> -			}
>> +		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> +		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> +		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> +			__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>> +			/ TOTAL_WRITES);
>> +
>> +		writer_done = 1;
>> +		/* Wait until all readers have exited */
>> +		for (i = j; i < num_cores; i++)
>> +			rte_eal_wait_lcore(enabled_core_ids[i]);
>> +
>> +		rte_lpm_free(lpm);
>> +		rte_free(rv);
>> +		lpm = NULL;
>> +		rv = NULL;
>>  	}
>> -	total_cycles = rte_rdtsc_precise() - begin;
>> -
>> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %g cycles\n",
>> -		(double)total_cycles / TOTAL_WRITES);
>> -
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 0; i < num_cores; i++)
>> -		rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>>    	return 0;
>>  @@ -946,9 +755,8 @@ test_lpm_perf(void)
>>  	rte_lpm_delete_all(lpm);
>>  	rte_lpm_free(lpm);
>>  -	test_lpm_rcu_perf();
>> -
>> -	test_lpm_rcu_perf_multi_writer();
>> +	test_lpm_rcu_perf_multi_writer(0);
>> +	test_lpm_rcu_perf_multi_writer(1);
>>    	return 0;
>>  }
> 
> -- 
> Regards,
> Vladimir


^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test
  2020-10-29 15:36 [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
                   ` (4 preceding siblings ...)
  2020-11-02 15:11 ` Bruce Richardson
@ 2020-11-02 23:51 ` Dharmik Thakkar
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
                     ` (4 more replies)
  5 siblings, 5 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-02 23:51 UTC (permalink / raw)
  Cc: dev, nd, Dharmik Thakkar

Fix LPM adds, LPM deletes, and cycle calculation.
Return error if LPM add/delete fails in multi-writer test.
Remove redundant error checking for readers.
Combine single and multi threaded test cases to avoid code duplication.
---
v2:
 - Add more details about the fix to the commit message
 - Replace hard coded values with an enum
 - Remove lock acquire/release for single writer

Dharmik Thakkar (4):
  test/lpm: fix cycle calculation in rcu qsbr perf
  test/lpm: return error on failure in rcu qsbr perf
  test/lpm: remove error checking in rcu qsbr perf
  test/lpm: avoid code duplication in rcu qsbr perf

 app/test/test_lpm_perf.c | 384 ++++++++++-----------------------------
 1 file changed, 95 insertions(+), 289 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v2 1/4] test/lpm: fix cycle calculation in rcu qsbr perf
  2020-11-02 23:51 ` [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
@ 2020-11-02 23:52   ` Dharmik Thakkar
  2020-11-03  1:30     ` Honnappa Nagarahalli
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 2/4] test/lpm: return error on failure " Dharmik Thakkar
                     ` (3 subsequent siblings)
  4 siblings, 1 reply; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-02 23:52 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Honnappa Nagarahalli,
	Ruifeng Wang, Gavin Hu
  Cc: dev, nd, Dharmik Thakkar, stable

Fix incorrect calculations for LPM adds, LPM deletes,
and average cycles in RCU QSBR perf tests

Since, rcu qsbr tests run for 'RCU_ITERATIONS' and not
'ITERATIONS', replace 'ITERATIONS' with 'RCU_ITERATIONS'
for calculating adds, deletes, and cycles.

Also, for multi-writer perf test, each writer only writes
half of NUM_LDEPTH_ROUTE_ENTRIES.
For 2 writers, total adds (or deletes) should be
(RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) instead of
(2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES).

Since, for both the single and multi writer tests, total adds/deletes
is equal to (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES),
this has been replaced with a macro 'TOTAL_WRITES' and furthermore,
'g_writes' has been removed since it is always a fixed value
equal to TOTAL_WRITES.

Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org

Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_lpm_perf.c | 45 ++++++++++++++--------------------------
 1 file changed, 16 insertions(+), 29 deletions(-)

diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index c5a238b9d1e8..45164b23214b 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -23,7 +23,6 @@ static struct rte_rcu_qsbr *rv;
 static volatile uint8_t writer_done;
 static volatile uint32_t thr_id;
 static uint64_t gwrite_cycles;
-static uint64_t gwrites;
 /* LPM APIs are not thread safe, use mutex to provide thread safety */
 static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
 
@@ -60,6 +59,8 @@ static uint32_t num_ldepth_route_entries;
 #define NUM_ROUTE_ENTRIES num_route_entries
 #define NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries
 
+#define TOTAL_WRITES (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES)
+
 enum {
 	IP_CLASS_A,
 	IP_CLASS_B,
@@ -432,7 +433,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	uint8_t core_id = (uint8_t)((uintptr_t)arg);
 	uint32_t next_hop_add = 0xAA;
 
-	RTE_SET_USED(arg);
 	/* 2 writer threads are used */
 	if (core_id % 2 == 0) {
 		si = 0;
@@ -472,9 +472,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	total_cycles = rte_rdtsc_precise() - begin;
 
 	__atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED);
-	__atomic_fetch_add(&gwrites,
-			2 * NUM_LDEPTH_ROUTE_ENTRIES * RCU_ITERATIONS,
-			__ATOMIC_RELAXED);
 
 	return 0;
 }
@@ -528,7 +525,6 @@ test_lpm_rcu_perf_multi_writer(void)
 
 	writer_done = 0;
 	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
 
 	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
@@ -548,14 +544,11 @@ test_lpm_rcu_perf_multi_writer(void)
 		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 			goto error;
 
-	printf("Total LPM Adds: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
-			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
-		);
+		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+		/ TOTAL_WRITES);
 
 	/* Wait and check return value from reader threads */
 	writer_done = 1;
@@ -581,7 +574,6 @@ test_lpm_rcu_perf_multi_writer(void)
 
 	writer_done = 0;
 	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
 	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
 	/* Launch reader threads */
@@ -600,14 +592,11 @@ test_lpm_rcu_perf_multi_writer(void)
 		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 			goto error;
 
-	printf("Total LPM Adds: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
-			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
-		);
+		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+		/ TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
@@ -711,11 +700,10 @@ test_lpm_rcu_perf(void)
 	}
 	total_cycles = rte_rdtsc_precise() - begin;
 
-	printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS));
+		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
@@ -771,11 +759,10 @@ test_lpm_rcu_perf(void)
 	}
 	total_cycles = rte_rdtsc_precise() - begin;
 
-	printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS));
+		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v2 2/4] test/lpm: return error on failure in rcu qsbr perf
  2020-11-02 23:51 ` [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
@ 2020-11-02 23:52   ` Dharmik Thakkar
  2020-11-03  1:28     ` Honnappa Nagarahalli
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 3/4] test/lpm: remove error checking " Dharmik Thakkar
                     ` (2 subsequent siblings)
  4 siblings, 1 reply; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-02 23:52 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Honnappa Nagarahalli,
	Gavin Hu, Ruifeng Wang
  Cc: dev, nd, Dharmik Thakkar, stable

Return error if Add/Delete fail in multiwriter perf test

Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org

Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_lpm_perf.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index 45164b23214b..55084816ab91 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -453,6 +453,8 @@ test_lpm_rcu_qsbr_writer(void *arg)
 					next_hop_add) != 0) {
 				printf("Failed to add iteration %d, route# %d\n",
 					i, j);
+				pthread_mutex_unlock(&lpm_mutex);
+				return -1;
 			}
 			pthread_mutex_unlock(&lpm_mutex);
 		}
@@ -464,6 +466,8 @@ test_lpm_rcu_qsbr_writer(void *arg)
 				large_ldepth_route_table[j].depth) != 0) {
 				printf("Failed to delete iteration %d, route# %d\n",
 					i, j);
+				pthread_mutex_unlock(&lpm_mutex);
+				return -1;
 			}
 			pthread_mutex_unlock(&lpm_mutex);
 		}
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v2 3/4] test/lpm: remove error checking in rcu qsbr perf
  2020-11-02 23:51 ` [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 2/4] test/lpm: return error on failure " Dharmik Thakkar
@ 2020-11-02 23:52   ` Dharmik Thakkar
  2020-11-03  1:21     ` Honnappa Nagarahalli
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
  2020-11-03  5:12   ` [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  4 siblings, 1 reply; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-02 23:52 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Ruifeng Wang,
	Honnappa Nagarahalli, Gavin Hu
  Cc: dev, nd, Dharmik Thakkar, stable

Remove redundant error checking for reader threads
since they never return error.

Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org

Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_lpm_perf.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index 55084816ab91..224c92fa3d65 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -554,11 +554,10 @@ test_lpm_rcu_perf_multi_writer(void)
 		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
 		/ TOTAL_WRITES);
 
-	/* Wait and check return value from reader threads */
 	writer_done = 1;
+	/* Wait until all readers have exited */
 	for (i = 2; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 	rte_free(rv);
@@ -603,10 +602,9 @@ test_lpm_rcu_perf_multi_writer(void)
 		/ TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 2; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 
@@ -710,10 +708,9 @@ test_lpm_rcu_perf(void)
 		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 0; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		if (rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 	rte_free(rv);
@@ -769,11 +766,9 @@ test_lpm_rcu_perf(void)
 		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 0; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			printf("Warning: lcore %u not finished.\n",
-				enabled_core_ids[i]);
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-02 23:51 ` [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
                     ` (2 preceding siblings ...)
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 3/4] test/lpm: remove error checking " Dharmik Thakkar
@ 2020-11-02 23:52   ` Dharmik Thakkar
  2020-11-03  4:21     ` Honnappa Nagarahalli
  2020-11-03  5:12   ` [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  4 siblings, 1 reply; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-02 23:52 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin; +Cc: dev, nd, Dharmik Thakkar

Avoid code duplication by combining single and multi threaded tests

Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_lpm_perf.c | 362 ++++++++++-----------------------------
 1 file changed, 91 insertions(+), 271 deletions(-)

diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index 224c92fa3d65..229c835c23f7 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -67,6 +67,12 @@ enum {
 	IP_CLASS_C
 };
 
+enum {
+	SINGLE_WRITER = 1,
+	MULTI_WRITER_1,
+	MULTI_WRITER_2
+};
+
 /* struct route_rule_count defines the total number of rules in following a/b/c
  * each item in a[]/b[]/c[] is the number of common IP address class A/B/C, not
  * including the ones for private local network.
@@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg)
 {
 	unsigned int i, j, si, ei;
 	uint64_t begin, total_cycles;
-	uint8_t core_id = (uint8_t)((uintptr_t)arg);
+	uint8_t writer_id = (uint8_t)((uintptr_t)arg);
 	uint32_t next_hop_add = 0xAA;
 
-	/* 2 writer threads are used */
-	if (core_id % 2 == 0) {
+	/* Single writer (writer_id = 1) */
+	if (writer_id == SINGLE_WRITER) {
+		si = 0;
+		ei = NUM_LDEPTH_ROUTE_ENTRIES;
+	}
+	/* 2 Writers (writer_id = 2/3)*/
+	else if (writer_id == MULTI_WRITER_1) {
 		si = 0;
 		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
 	} else {
@@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	for (i = 0; i < RCU_ITERATIONS; i++) {
 		/* Add all the entries */
 		for (j = si; j < ei; j++) {
-			pthread_mutex_lock(&lpm_mutex);
+			if (writer_id != SINGLE_WRITER)
+				pthread_mutex_lock(&lpm_mutex);
 			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
 					large_ldepth_route_table[j].depth,
 					next_hop_add) != 0) {
 				printf("Failed to add iteration %d, route# %d\n",
 					i, j);
-				pthread_mutex_unlock(&lpm_mutex);
+				if (writer_id != SINGLE_WRITER)
+					pthread_mutex_unlock(&lpm_mutex);
 				return -1;
 			}
-			pthread_mutex_unlock(&lpm_mutex);
+			if (writer_id != SINGLE_WRITER)
+				pthread_mutex_unlock(&lpm_mutex);
 		}
 
 		/* Delete all the entries */
 		for (j = si; j < ei; j++) {
-			pthread_mutex_lock(&lpm_mutex);
+			if (writer_id != SINGLE_WRITER)
+				pthread_mutex_lock(&lpm_mutex);
 			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
 				large_ldepth_route_table[j].depth) != 0) {
 				printf("Failed to delete iteration %d, route# %d\n",
 					i, j);
-				pthread_mutex_unlock(&lpm_mutex);
+				if (writer_id != SINGLE_WRITER)
+					pthread_mutex_unlock(&lpm_mutex);
 				return -1;
 			}
-			pthread_mutex_unlock(&lpm_mutex);
+			if (writer_id != SINGLE_WRITER)
+				pthread_mutex_unlock(&lpm_mutex);
 		}
 	}
 
@@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
 
 /*
  * Functional test:
- * 2 writers, rest are readers
+ * 1/2 writers, rest are readers
  */
 static int
-test_lpm_rcu_perf_multi_writer(void)
+test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
 {
 	struct rte_lpm_config config;
 	size_t sz;
-	unsigned int i;
+	unsigned int i, j;
 	uint16_t core_id;
 	struct rte_lpm_rcu_config rcu_cfg = {0};
+	int (*reader_f)(void *arg) = NULL;
 
 	if (rte_lcore_count() < 3) {
 		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n");
@@ -504,273 +522,76 @@ test_lpm_rcu_perf_multi_writer(void)
 		num_cores++;
 	}
 
-	printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n",
-		num_cores - 2);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	/* Init RCU variable */
-	sz = rte_rcu_qsbr_get_memsize(num_cores);
-	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
-						RTE_CACHE_LINE_SIZE);
-	rte_rcu_qsbr_init(rv, num_cores);
-
-	rcu_cfg.v = rv;
-	/* Assign the RCU variable to LPM */
-	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
-		printf("RCU variable assignment failed\n");
-		goto error;
-	}
-
-	writer_done = 0;
-	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Launch writer threads */
-	for (i = 0; i < 2; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
-					(void *)(uintptr_t)i,
-					enabled_core_ids[i]);
-
-	/* Wait for writer threads */
-	for (i = 0; i < 2; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
-		/ TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-	lpm = NULL;
-	rv = NULL;
-
-	/* Test without RCU integration */
-	printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n",
-		num_cores - 2);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	writer_done = 0;
-	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Launch writer threads */
-	for (i = 0; i < 2; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
-					(void *)(uintptr_t)i,
-					enabled_core_ids[i]);
-
-	/* Wait for writer threads */
-	for (i = 0; i < 2; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
-		/ TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-
-	return 0;
-
-error:
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	rte_eal_mp_wait_lcore();
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-
-	return -1;
-}
-
-/*
- * Functional test:
- * Single writer, rest are readers
- */
-static int
-test_lpm_rcu_perf(void)
-{
-	struct rte_lpm_config config;
-	uint64_t begin, total_cycles;
-	size_t sz;
-	unsigned int i, j;
-	uint16_t core_id;
-	uint32_t next_hop_add = 0xAA;
-	struct rte_lpm_rcu_config rcu_cfg = {0};
-
-	if (rte_lcore_count() < 2) {
-		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n");
-		return TEST_SKIPPED;
-	}
-
-	num_cores = 0;
-	RTE_LCORE_FOREACH_WORKER(core_id) {
-		enabled_core_ids[num_cores] = core_id;
-		num_cores++;
-	}
-
-	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
-		num_cores);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	/* Init RCU variable */
-	sz = rte_rcu_qsbr_get_memsize(num_cores);
-	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
-						RTE_CACHE_LINE_SIZE);
-	rte_rcu_qsbr_init(rv, num_cores);
-
-	rcu_cfg.v = rv;
-	/* Assign the RCU variable to LPM */
-	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
-		printf("RCU variable assignment failed\n");
-		goto error;
-	}
-
-	writer_done = 0;
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Measure add/delete. */
-	begin = rte_rdtsc_precise();
-	for (i = 0; i < RCU_ITERATIONS; i++) {
-		/* Add all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
-					large_ldepth_route_table[j].depth,
-					next_hop_add) != 0) {
-				printf("Failed to add iteration %d, route# %d\n",
-					i, j);
+	for (j = 1; j < 3; j++) {
+		if (use_rcu)
+			printf("\nPerf test: %d writer(s), %d reader(s),"
+			       " RCU integration enabled\n", j, num_cores - j);
+		else
+			printf("\nPerf test: %d writer(s), %d reader(s),"
+			       " RCU integration disabled\n", j, num_cores - j);
+
+		/* Create LPM table */
+		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
+		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
+		config.flags = 0;
+		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
+		TEST_LPM_ASSERT(lpm != NULL);
+
+		/* Init RCU variable */
+		if (use_rcu) {
+			sz = rte_rcu_qsbr_get_memsize(num_cores);
+			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
+							RTE_CACHE_LINE_SIZE);
+			rte_rcu_qsbr_init(rv, num_cores);
+
+			rcu_cfg.v = rv;
+			/* Assign the RCU variable to LPM */
+			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
+				printf("RCU variable assignment failed\n");
 				goto error;
 			}
 
-		/* Delete all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
-				large_ldepth_route_table[j].depth) != 0) {
-				printf("Failed to delete iteration %d, route# %d\n",
-					i, j);
-				goto error;
-			}
-	}
-	total_cycles = rte_rdtsc_precise() - begin;
+			reader_f = test_lpm_rcu_qsbr_reader;
+		} else
+			reader_f = test_lpm_reader;
 
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / TOTAL_WRITES);
+		writer_done = 0;
+		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
 
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 0; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-	lpm = NULL;
-	rv = NULL;
-
-	/* Test without RCU integration */
-	printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n",
-		num_cores);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
+		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
-	writer_done = 0;
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
+		/* Launch reader threads */
+		for (i = j; i < num_cores; i++)
+			rte_eal_remote_launch(reader_f, NULL,
+						enabled_core_ids[i]);
 
-	/* Launch reader threads */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_reader, NULL,
-					enabled_core_ids[i]);
+		/* Launch writer threads */
+		for (i = 0; i < j; i++)
+			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
+						(void *)(uintptr_t)(i + j),
+						enabled_core_ids[i]);
 
-	/* Measure add/delete. */
-	begin = rte_rdtsc_precise();
-	for (i = 0; i < RCU_ITERATIONS; i++) {
-		/* Add all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
-					large_ldepth_route_table[j].depth,
-					next_hop_add) != 0) {
-				printf("Failed to add iteration %d, route# %d\n",
-					i, j);
+		/* Wait for writer threads */
+		for (i = 0; i < j; i++)
+			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 				goto error;
-			}
 
-		/* Delete all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
-				large_ldepth_route_table[j].depth) != 0) {
-				printf("Failed to delete iteration %d, route# %d\n",
-					i, j);
-				goto error;
-			}
+		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
+		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
+			__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+			/ TOTAL_WRITES);
+
+		writer_done = 1;
+		/* Wait until all readers have exited */
+		for (i = j; i < num_cores; i++)
+			rte_eal_wait_lcore(enabled_core_ids[i]);
+
+		rte_lpm_free(lpm);
+		rte_free(rv);
+		lpm = NULL;
+		rv = NULL;
 	}
-	total_cycles = rte_rdtsc_precise() - begin;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
 
 	return 0;
 
@@ -946,9 +767,8 @@ test_lpm_perf(void)
 	rte_lpm_delete_all(lpm);
 	rte_lpm_free(lpm);
 
-	test_lpm_rcu_perf();
-
-	test_lpm_rcu_perf_multi_writer();
+	test_lpm_rcu_perf_multi_writer(0);
+	test_lpm_rcu_perf_multi_writer(1);
 
 	return 0;
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v2 3/4] test/lpm: remove error checking in rcu qsbr perf
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 3/4] test/lpm: remove error checking " Dharmik Thakkar
@ 2020-11-03  1:21     ` Honnappa Nagarahalli
  2020-11-03  4:56       ` Dharmik Thakkar
  0 siblings, 1 reply; 52+ messages in thread
From: Honnappa Nagarahalli @ 2020-11-03  1:21 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson, Vladimir Medvedkin,
	Ruifeng Wang, Gavin Hu
  Cc: dev, nd, Dharmik Thakkar, stable, Honnappa Nagarahalli, nd

<snip>

> 
> Remove redundant error checking for reader threads since they never return
> error.
> 
> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
> Cc: honnappa.nagarahalli@arm.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>  app/test/test_lpm_perf.c | 21 ++++++++-------------
>  1 file changed, 8 insertions(+), 13 deletions(-)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index
> 55084816ab91..224c92fa3d65 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -554,11 +554,10 @@ test_lpm_rcu_perf_multi_writer(void)
>  		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>  		/ TOTAL_WRITES);
> 
> -	/* Wait and check return value from reader threads */
>  	writer_done = 1;
> +	/* Wait until all readers have exited */
>  	for (i = 2; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> +		rte_eal_wait_lcore(enabled_core_ids[i]);
> 
>  	rte_lpm_free(lpm);
>  	rte_free(rv);
> @@ -603,10 +602,9 @@ test_lpm_rcu_perf_multi_writer(void)
>  		/ TOTAL_WRITES);
> 
>  	writer_done = 1;
> -	/* Wait and check return value from reader threads */
> +	/* Wait until all readers have exited */
>  	for (i = 2; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> +		rte_eal_wait_lcore(enabled_core_ids[i]);
> 
>  	rte_lpm_free(lpm);
> 
> @@ -710,10 +708,9 @@ test_lpm_rcu_perf(void)
>  		(double)total_cycles / TOTAL_WRITES);
> 
>  	writer_done = 1;
> -	/* Wait and check return value from reader threads */
> +	/* Wait until all readers have exited */
>  	for (i = 0; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> +		if (rte_eal_wait_lcore(enabled_core_ids[i]);
                             ^^ Do we need the 'if' statement?
> 
>  	rte_lpm_free(lpm);
>  	rte_free(rv);
> @@ -769,11 +766,9 @@ test_lpm_rcu_perf(void)
>  		(double)total_cycles / TOTAL_WRITES);
> 
>  	writer_done = 1;
> -	/* Wait and check return value from reader threads */
> +	/* Wait until all readers have exited */
>  	for (i = 0; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			printf("Warning: lcore %u not finished.\n",
> -				enabled_core_ids[i]);
> +		rte_eal_wait_lcore(enabled_core_ids[i]);
> 
>  	rte_lpm_free(lpm);
> 
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/4] test/lpm: return error on failure in rcu qsbr perf
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 2/4] test/lpm: return error on failure " Dharmik Thakkar
@ 2020-11-03  1:28     ` Honnappa Nagarahalli
  2020-11-03  4:42       ` Dharmik Thakkar
  0 siblings, 1 reply; 52+ messages in thread
From: Honnappa Nagarahalli @ 2020-11-03  1:28 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson, Vladimir Medvedkin, Gavin Hu,
	Ruifeng Wang
  Cc: dev, nd, Dharmik Thakkar, stable, Honnappa Nagarahalli, nd

<snip>

> 
> Return error if Add/Delete fail in multiwriter perf test
> 
> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
> Cc: honnappa.nagarahalli@arm.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>  app/test/test_lpm_perf.c | 4 ++++
>  1 file changed, 4 insertions(+)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index
> 45164b23214b..55084816ab91 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -453,6 +453,8 @@ test_lpm_rcu_qsbr_writer(void *arg)
>  					next_hop_add) != 0) {
>  				printf("Failed to add iteration %d, route#
> %d\n",
>  					i, j);
> +				pthread_mutex_unlock(&lpm_mutex);
> +				return -1;
Would be good to use the "goto error" method used in this file in other functions.

>  			}
>  			pthread_mutex_unlock(&lpm_mutex);
>  		}
> @@ -464,6 +466,8 @@ test_lpm_rcu_qsbr_writer(void *arg)
>  				large_ldepth_route_table[j].depth) != 0) {
>  				printf("Failed to delete iteration %d, route#
> %d\n",
>  					i, j);
> +				pthread_mutex_unlock(&lpm_mutex);
> +				return -1;
>  			}
>  			pthread_mutex_unlock(&lpm_mutex);
>  		}
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/4] test/lpm: fix cycle calculation in rcu qsbr perf
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
@ 2020-11-03  1:30     ` Honnappa Nagarahalli
  0 siblings, 0 replies; 52+ messages in thread
From: Honnappa Nagarahalli @ 2020-11-03  1:30 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson, Vladimir Medvedkin,
	Ruifeng Wang, Gavin Hu
  Cc: dev, nd, Dharmik Thakkar, stable, Honnappa Nagarahalli, nd

<snip>

> 
> Fix incorrect calculations for LPM adds, LPM deletes, and average cycles in
> RCU QSBR perf tests
> 
> Since, rcu qsbr tests run for 'RCU_ITERATIONS' and not 'ITERATIONS', replace
> 'ITERATIONS' with 'RCU_ITERATIONS'
> for calculating adds, deletes, and cycles.
> 
> Also, for multi-writer perf test, each writer only writes half of
> NUM_LDEPTH_ROUTE_ENTRIES.
> For 2 writers, total adds (or deletes) should be (RCU_ITERATIONS *
> NUM_LDEPTH_ROUTE_ENTRIES) instead of
> (2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES).
> 
> Since, for both the single and multi writer tests, total adds/deletes is equal to
> (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES), this has been replaced
> with a macro 'TOTAL_WRITES' and furthermore, 'g_writes' has been removed
> since it is always a fixed value equal to TOTAL_WRITES.
> 
> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
> Cc: honnappa.nagarahalli@arm.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Looks good
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalliu@arm.com>

> ---
>  app/test/test_lpm_perf.c | 45 ++++++++++++++--------------------------
>  1 file changed, 16 insertions(+), 29 deletions(-)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index
> c5a238b9d1e8..45164b23214b 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -23,7 +23,6 @@ static struct rte_rcu_qsbr *rv;  static volatile uint8_t
> writer_done;  static volatile uint32_t thr_id;  static uint64_t gwrite_cycles; -
> static uint64_t gwrites;
>  /* LPM APIs are not thread safe, use mutex to provide thread safety */
> static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
> 
> @@ -60,6 +59,8 @@ static uint32_t num_ldepth_route_entries;  #define
> NUM_ROUTE_ENTRIES num_route_entries  #define
> NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries
> 
> +#define TOTAL_WRITES (RCU_ITERATIONS *
> NUM_LDEPTH_ROUTE_ENTRIES)
> +
>  enum {
>  	IP_CLASS_A,
>  	IP_CLASS_B,
> @@ -432,7 +433,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
>  	uint8_t core_id = (uint8_t)((uintptr_t)arg);
>  	uint32_t next_hop_add = 0xAA;
> 
> -	RTE_SET_USED(arg);
>  	/* 2 writer threads are used */
>  	if (core_id % 2 == 0) {
>  		si = 0;
> @@ -472,9 +472,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
>  	total_cycles = rte_rdtsc_precise() - begin;
> 
>  	__atomic_fetch_add(&gwrite_cycles, total_cycles,
> __ATOMIC_RELAXED);
> -	__atomic_fetch_add(&gwrites,
> -			2 * NUM_LDEPTH_ROUTE_ENTRIES *
> RCU_ITERATIONS,
> -			__ATOMIC_RELAXED);
> 
>  	return 0;
>  }
> @@ -528,7 +525,6 @@ test_lpm_rcu_perf_multi_writer(void)
> 
>  	writer_done = 0;
>  	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
> 
>  	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> 
> @@ -548,14 +544,11 @@ test_lpm_rcu_perf_multi_writer(void)
>  		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>  			goto error;
> 
> -	printf("Total LPM Adds: %d\n",
> -		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> -	printf("Total LPM Deletes: %d\n",
> -		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> +	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>  	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
> -			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
> -		);
> +		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> +		/ TOTAL_WRITES);
> 
>  	/* Wait and check return value from reader threads */
>  	writer_done = 1;
> @@ -581,7 +574,6 @@ test_lpm_rcu_perf_multi_writer(void)
> 
>  	writer_done = 0;
>  	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
>  	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> 
>  	/* Launch reader threads */
> @@ -600,14 +592,11 @@ test_lpm_rcu_perf_multi_writer(void)
>  		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>  			goto error;
> 
> -	printf("Total LPM Adds: %d\n",
> -		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> -	printf("Total LPM Deletes: %d\n",
> -		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> +	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>  	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
> -			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
> -		);
> +		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> +		/ TOTAL_WRITES);
> 
>  	writer_done = 1;
>  	/* Wait and check return value from reader threads */ @@ -711,11
> +700,10 @@ test_lpm_rcu_perf(void)
>  	}
>  	total_cycles = rte_rdtsc_precise() - begin;
> 
> -	printf("Total LPM Adds: %d\n", ITERATIONS *
> NUM_LDEPTH_ROUTE_ENTRIES);
> -	printf("Total LPM Deletes: %d\n",
> -		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> +	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>  	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES *
> ITERATIONS));
> +		(double)total_cycles / TOTAL_WRITES);
> 
>  	writer_done = 1;
>  	/* Wait and check return value from reader threads */ @@ -771,11
> +759,10 @@ test_lpm_rcu_perf(void)
>  	}
>  	total_cycles = rte_rdtsc_precise() - begin;
> 
> -	printf("Total LPM Adds: %d\n", ITERATIONS *
> NUM_LDEPTH_ROUTE_ENTRIES);
> -	printf("Total LPM Deletes: %d\n",
> -		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> +	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>  	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES *
> ITERATIONS));
> +		(double)total_cycles / TOTAL_WRITES);
> 
>  	writer_done = 1;
>  	/* Wait and check return value from reader threads */
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
@ 2020-11-03  4:21     ` Honnappa Nagarahalli
  2020-11-03  4:33       ` Dharmik Thakkar
  0 siblings, 1 reply; 52+ messages in thread
From: Honnappa Nagarahalli @ 2020-11-03  4:21 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson, Vladimir Medvedkin
  Cc: dev, nd, Dharmik Thakkar, Honnappa Nagarahalli, nd

<snip>
> 
> Avoid code duplication by combining single and multi threaded tests
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>  app/test/test_lpm_perf.c | 362 ++++++++++-----------------------------
>  1 file changed, 91 insertions(+), 271 deletions(-)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index
> 224c92fa3d65..229c835c23f7 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -67,6 +67,12 @@ enum {
>  	IP_CLASS_C
>  };
> 
> +enum {
> +	SINGLE_WRITER = 1,
> +	MULTI_WRITER_1,
> +	MULTI_WRITER_2
> +};
Do we need this? Can we use the number of cores instead?

> +
>  /* struct route_rule_count defines the total number of rules in following
> a/b/c
>   * each item in a[]/b[]/c[] is the number of common IP address class A/B/C,
> not
>   * including the ones for private local network.
> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg)  {
>  	unsigned int i, j, si, ei;
>  	uint64_t begin, total_cycles;
> -	uint8_t core_id = (uint8_t)((uintptr_t)arg);
> +	uint8_t writer_id = (uint8_t)((uintptr_t)arg);
>  	uint32_t next_hop_add = 0xAA;
> 
> -	/* 2 writer threads are used */
> -	if (core_id % 2 == 0) {
> +	/* Single writer (writer_id = 1) */
> +	if (writer_id == SINGLE_WRITER) {
> +		si = 0;
> +		ei = NUM_LDEPTH_ROUTE_ENTRIES;
> +	}
> +	/* 2 Writers (writer_id = 2/3)*/
> +	else if (writer_id == MULTI_WRITER_1) {
>  		si = 0;
>  		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
>  	} else {
> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg)
>  	for (i = 0; i < RCU_ITERATIONS; i++) {
>  		/* Add all the entries */
>  		for (j = si; j < ei; j++) {
> -			pthread_mutex_lock(&lpm_mutex);
> +			if (writer_id != SINGLE_WRITER)
> +				pthread_mutex_lock(&lpm_mutex);
>  			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>  					large_ldepth_route_table[j].depth,
>  					next_hop_add) != 0) {
>  				printf("Failed to add iteration %d, route#
> %d\n",
>  					i, j);
> -				pthread_mutex_unlock(&lpm_mutex);
> +				if (writer_id != SINGLE_WRITER)
> +
> 	pthread_mutex_unlock(&lpm_mutex);
>  				return -1;
>  			}
> -			pthread_mutex_unlock(&lpm_mutex);
> +			if (writer_id != SINGLE_WRITER)
> +				pthread_mutex_unlock(&lpm_mutex);
>  		}
> 
>  		/* Delete all the entries */
>  		for (j = si; j < ei; j++) {
> -			pthread_mutex_lock(&lpm_mutex);
> +			if (writer_id != SINGLE_WRITER)
> +				pthread_mutex_lock(&lpm_mutex);
>  			if (rte_lpm_delete(lpm,
> large_ldepth_route_table[j].ip,
>  				large_ldepth_route_table[j].depth) != 0) {
>  				printf("Failed to delete iteration %d, route#
> %d\n",
>  					i, j);
> -				pthread_mutex_unlock(&lpm_mutex);
> +				if (writer_id != SINGLE_WRITER)
> +
> 	pthread_mutex_unlock(&lpm_mutex);
>  				return -1;
>  			}
> -			pthread_mutex_unlock(&lpm_mutex);
> +			if (writer_id != SINGLE_WRITER)
> +				pthread_mutex_unlock(&lpm_mutex);
>  		}
>  	}
> 
> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
> 
>  /*
>   * Functional test:
> - * 2 writers, rest are readers
> + * 1/2 writers, rest are readers
>   */
>  static int
> -test_lpm_rcu_perf_multi_writer(void)
> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>  {
>  	struct rte_lpm_config config;
>  	size_t sz;
> -	unsigned int i;
> +	unsigned int i, j;
>  	uint16_t core_id;
>  	struct rte_lpm_rcu_config rcu_cfg = {0};
> +	int (*reader_f)(void *arg) = NULL;
> 
>  	if (rte_lcore_count() < 3) {
>  		printf("Not enough cores for lpm_rcu_perf_autotest,
> expecting at least 3\n"); @@ -504,273 +522,76 @@
> test_lpm_rcu_perf_multi_writer(void)
>  		num_cores++;
>  	}
> 
> -	printf("\nPerf test: 2 writers, %d readers, RCU integration
> enabled\n",
> -		num_cores - 2);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	/* Init RCU variable */
> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> -						RTE_CACHE_LINE_SIZE);
> -	rte_rcu_qsbr_init(rv, num_cores);
> -
> -	rcu_cfg.v = rv;
> -	/* Assign the RCU variable to LPM */
> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> -		printf("RCU variable assignment failed\n");
> -		goto error;
> -	}
> -
> -	writer_done = 0;
> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Launch writer threads */
> -	for (i = 0; i < 2; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> -					(void *)(uintptr_t)i,
> -					enabled_core_ids[i]);
> -
> -	/* Wait for writer threads */
> -	for (i = 0; i < 2; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> -		/ TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -	lpm = NULL;
> -	rv = NULL;
> -
> -	/* Test without RCU integration */
> -	printf("\nPerf test: 2 writers, %d readers, RCU integration
> disabled\n",
> -		num_cores - 2);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	writer_done = 0;
> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Launch writer threads */
> -	for (i = 0; i < 2; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> -					(void *)(uintptr_t)i,
> -					enabled_core_ids[i]);
> -
> -	/* Wait for writer threads */
> -	for (i = 0; i < 2; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> -		/ TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -
> -	return 0;
> -
> -error:
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	rte_eal_mp_wait_lcore();
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -
> -	return -1;
> -}
> -
> -/*
> - * Functional test:
> - * Single writer, rest are readers
> - */
> -static int
> -test_lpm_rcu_perf(void)
> -{
> -	struct rte_lpm_config config;
> -	uint64_t begin, total_cycles;
> -	size_t sz;
> -	unsigned int i, j;
> -	uint16_t core_id;
> -	uint32_t next_hop_add = 0xAA;
> -	struct rte_lpm_rcu_config rcu_cfg = {0};
> -
> -	if (rte_lcore_count() < 2) {
> -		printf("Not enough cores for lpm_rcu_perf_autotest,
> expecting at least 2\n");
> -		return TEST_SKIPPED;
> -	}
> -
> -	num_cores = 0;
> -	RTE_LCORE_FOREACH_WORKER(core_id) {
> -		enabled_core_ids[num_cores] = core_id;
> -		num_cores++;
> -	}
> -
> -	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
> -		num_cores);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	/* Init RCU variable */
> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> -						RTE_CACHE_LINE_SIZE);
> -	rte_rcu_qsbr_init(rv, num_cores);
> -
> -	rcu_cfg.v = rv;
> -	/* Assign the RCU variable to LPM */
> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> -		printf("RCU variable assignment failed\n");
> -		goto error;
> -	}
> -
> -	writer_done = 0;
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Measure add/delete. */
> -	begin = rte_rdtsc_precise();
> -	for (i = 0; i < RCU_ITERATIONS; i++) {
> -		/* Add all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> -					large_ldepth_route_table[j].depth,
> -					next_hop_add) != 0) {
> -				printf("Failed to add iteration %d, route#
> %d\n",
> -					i, j);
> +	for (j = 1; j < 3; j++) {
> +		if (use_rcu)
> +			printf("\nPerf test: %d writer(s), %d reader(s),"
> +			       " RCU integration enabled\n", j, num_cores - j);
> +		else
> +			printf("\nPerf test: %d writer(s), %d reader(s),"
> +			       " RCU integration disabled\n", j, num_cores - j);
> +
> +		/* Create LPM table */
> +		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> +		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> +		config.flags = 0;
> +		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> +		TEST_LPM_ASSERT(lpm != NULL);
> +
> +		/* Init RCU variable */
> +		if (use_rcu) {
> +			sz = rte_rcu_qsbr_get_memsize(num_cores);
> +			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> +
> 	RTE_CACHE_LINE_SIZE);
> +			rte_rcu_qsbr_init(rv, num_cores);
> +
> +			rcu_cfg.v = rv;
> +			/* Assign the RCU variable to LPM */
> +			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> +				printf("RCU variable assignment failed\n");
>  				goto error;
>  			}
> 
> -		/* Delete all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_delete(lpm,
> large_ldepth_route_table[j].ip,
> -				large_ldepth_route_table[j].depth) != 0) {
> -				printf("Failed to delete iteration %d, route#
> %d\n",
> -					i, j);
> -				goto error;
> -			}
> -	}
> -	total_cycles = rte_rdtsc_precise() - begin;
> +			reader_f = test_lpm_rcu_qsbr_reader;
> +		} else
> +			reader_f = test_lpm_reader;
> 
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / TOTAL_WRITES);
> +		writer_done = 0;
> +		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> 
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 0; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -	lpm = NULL;
> -	rv = NULL;
> -
> -	/* Test without RCU integration */
> -	printf("\nPerf test: 1 writer, %d readers, RCU integration
> disabled\n",
> -		num_cores);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> +		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> 
> -	writer_done = 0;
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> +		/* Launch reader threads */
> +		for (i = j; i < num_cores; i++)
> +			rte_eal_remote_launch(reader_f, NULL,
> +						enabled_core_ids[i]);
> 
> -	/* Launch reader threads */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_reader, NULL,
> -					enabled_core_ids[i]);
> +		/* Launch writer threads */
> +		for (i = 0; i < j; i++)
> +			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> +						(void *)(uintptr_t)(i + j),
This can be just 'j'?

> +						enabled_core_ids[i]);
> 
> -	/* Measure add/delete. */
> -	begin = rte_rdtsc_precise();
> -	for (i = 0; i < RCU_ITERATIONS; i++) {
> -		/* Add all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> -					large_ldepth_route_table[j].depth,
> -					next_hop_add) != 0) {
> -				printf("Failed to add iteration %d, route#
> %d\n",
> -					i, j);
> +		/* Wait for writer threads */
> +		for (i = 0; i < j; i++)
> +			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>  				goto error;
> -			}
> 
> -		/* Delete all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_delete(lpm,
> large_ldepth_route_table[j].ip,
> -				large_ldepth_route_table[j].depth) != 0) {
> -				printf("Failed to delete iteration %d, route#
> %d\n",
> -					i, j);
> -				goto error;
> -			}
> +		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> +		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> +			__atomic_load_n(&gwrite_cycles,
> __ATOMIC_RELAXED)
> +			/ TOTAL_WRITES);
> +
> +		writer_done = 1;
> +		/* Wait until all readers have exited */
> +		for (i = j; i < num_cores; i++)
> +			rte_eal_wait_lcore(enabled_core_ids[i]);
> +
> +		rte_lpm_free(lpm);
> +		rte_free(rv);
> +		lpm = NULL;
> +		rv = NULL;
>  	}
> -	total_cycles = rte_rdtsc_precise() - begin;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> 
>  	return 0;
> 
> @@ -946,9 +767,8 @@ test_lpm_perf(void)
>  	rte_lpm_delete_all(lpm);
>  	rte_lpm_free(lpm);
> 
> -	test_lpm_rcu_perf();
> -
> -	test_lpm_rcu_perf_multi_writer();
> +	test_lpm_rcu_perf_multi_writer(0);
> +	test_lpm_rcu_perf_multi_writer(1);
> 
>  	return 0;
>  }
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-03  4:21     ` Honnappa Nagarahalli
@ 2020-11-03  4:33       ` Dharmik Thakkar
  2020-11-03  5:32         ` Honnappa Nagarahalli
  0 siblings, 1 reply; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03  4:33 UTC (permalink / raw)
  To: Honnappa Nagarahalli; +Cc: Bruce Richardson, Vladimir Medvedkin, dev, nd



> On Nov 2, 2020, at 10:21 PM, Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> 
> <snip>
>> 
>> Avoid code duplication by combining single and multi threaded tests
>> 
>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>> ---
>> app/test/test_lpm_perf.c | 362 ++++++++++-----------------------------
>> 1 file changed, 91 insertions(+), 271 deletions(-)
>> 
>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index
>> 224c92fa3d65..229c835c23f7 100644
>> --- a/app/test/test_lpm_perf.c
>> +++ b/app/test/test_lpm_perf.c
>> @@ -67,6 +67,12 @@ enum {
>> 	IP_CLASS_C
>> };
>> 
>> +enum {
>> +	SINGLE_WRITER = 1,
>> +	MULTI_WRITER_1,
>> +	MULTI_WRITER_2
>> +};
> Do we need this? Can we use the number of cores instead?
> 

There are 3 combinations of writes (adds/deletes):
1. Write all the entries - in case of a single writer
2. Write half of the entries - in case of multiple writers
3. Write remaining half of the entries - in case of multiple writers

So, I think this is required.

>> +
>> /* struct route_rule_count defines the total number of rules in following
>> a/b/c
>>  * each item in a[]/b[]/c[] is the number of common IP address class A/B/C,
>> not
>>  * including the ones for private local network.
>> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg)  {
>> 	unsigned int i, j, si, ei;
>> 	uint64_t begin, total_cycles;
>> -	uint8_t core_id = (uint8_t)((uintptr_t)arg);
>> +	uint8_t writer_id = (uint8_t)((uintptr_t)arg);
>> 	uint32_t next_hop_add = 0xAA;
>> 
>> -	/* 2 writer threads are used */
>> -	if (core_id % 2 == 0) {
>> +	/* Single writer (writer_id = 1) */
>> +	if (writer_id == SINGLE_WRITER) {
>> +		si = 0;
>> +		ei = NUM_LDEPTH_ROUTE_ENTRIES;
>> +	}
>> +	/* 2 Writers (writer_id = 2/3)*/
>> +	else if (writer_id == MULTI_WRITER_1) {
>> 		si = 0;
>> 		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
>> 	} else {
>> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg)
>> 	for (i = 0; i < RCU_ITERATIONS; i++) {
>> 		/* Add all the entries */
>> 		for (j = si; j < ei; j++) {
>> -			pthread_mutex_lock(&lpm_mutex);
>> +			if (writer_id != SINGLE_WRITER)
>> +				pthread_mutex_lock(&lpm_mutex);
>> 			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>> 					large_ldepth_route_table[j].depth,
>> 					next_hop_add) != 0) {
>> 				printf("Failed to add iteration %d, route#
>> %d\n",
>> 					i, j);
>> -				pthread_mutex_unlock(&lpm_mutex);
>> +				if (writer_id != SINGLE_WRITER)
>> +
>> 	pthread_mutex_unlock(&lpm_mutex);
>> 				return -1;
>> 			}
>> -			pthread_mutex_unlock(&lpm_mutex);
>> +			if (writer_id != SINGLE_WRITER)
>> +				pthread_mutex_unlock(&lpm_mutex);
>> 		}
>> 
>> 		/* Delete all the entries */
>> 		for (j = si; j < ei; j++) {
>> -			pthread_mutex_lock(&lpm_mutex);
>> +			if (writer_id != SINGLE_WRITER)
>> +				pthread_mutex_lock(&lpm_mutex);
>> 			if (rte_lpm_delete(lpm,
>> large_ldepth_route_table[j].ip,
>> 				large_ldepth_route_table[j].depth) != 0) {
>> 				printf("Failed to delete iteration %d, route#
>> %d\n",
>> 					i, j);
>> -				pthread_mutex_unlock(&lpm_mutex);
>> +				if (writer_id != SINGLE_WRITER)
>> +
>> 	pthread_mutex_unlock(&lpm_mutex);
>> 				return -1;
>> 			}
>> -			pthread_mutex_unlock(&lpm_mutex);
>> +			if (writer_id != SINGLE_WRITER)
>> +				pthread_mutex_unlock(&lpm_mutex);
>> 		}
>> 	}
>> 
>> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
>> 
>> /*
>>  * Functional test:
>> - * 2 writers, rest are readers
>> + * 1/2 writers, rest are readers
>>  */
>> static int
>> -test_lpm_rcu_perf_multi_writer(void)
>> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>> {
>> 	struct rte_lpm_config config;
>> 	size_t sz;
>> -	unsigned int i;
>> +	unsigned int i, j;
>> 	uint16_t core_id;
>> 	struct rte_lpm_rcu_config rcu_cfg = {0};
>> +	int (*reader_f)(void *arg) = NULL;
>> 
>> 	if (rte_lcore_count() < 3) {
>> 		printf("Not enough cores for lpm_rcu_perf_autotest,
>> expecting at least 3\n"); @@ -504,273 +522,76 @@
>> test_lpm_rcu_perf_multi_writer(void)
>> 		num_cores++;
>> 	}
>> 
>> -	printf("\nPerf test: 2 writers, %d readers, RCU integration
>> enabled\n",
>> -		num_cores - 2);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> -
>> -	/* Init RCU variable */
>> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
>> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> -						RTE_CACHE_LINE_SIZE);
>> -	rte_rcu_qsbr_init(rv, num_cores);
>> -
>> -	rcu_cfg.v = rv;
>> -	/* Assign the RCU variable to LPM */
>> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> -		printf("RCU variable assignment failed\n");
>> -		goto error;
>> -	}
>> -
>> -	writer_done = 0;
>> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>> -
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> -	/* Launch reader threads */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Launch writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>> -					(void *)(uintptr_t)i,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Wait for writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> -			goto error;
>> -
>> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>> -		/ TOTAL_WRITES);
>> -
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>> -	rte_free(rv);
>> -	lpm = NULL;
>> -	rv = NULL;
>> -
>> -	/* Test without RCU integration */
>> -	printf("\nPerf test: 2 writers, %d readers, RCU integration
>> disabled\n",
>> -		num_cores - 2);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> -
>> -	writer_done = 0;
>> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> -	/* Launch reader threads */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_reader, NULL,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Launch writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>> -					(void *)(uintptr_t)i,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Wait for writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> -			goto error;
>> -
>> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>> -		/ TOTAL_WRITES);
>> -
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>> -
>> -	return 0;
>> -
>> -error:
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	rte_eal_mp_wait_lcore();
>> -
>> -	rte_lpm_free(lpm);
>> -	rte_free(rv);
>> -
>> -	return -1;
>> -}
>> -
>> -/*
>> - * Functional test:
>> - * Single writer, rest are readers
>> - */
>> -static int
>> -test_lpm_rcu_perf(void)
>> -{
>> -	struct rte_lpm_config config;
>> -	uint64_t begin, total_cycles;
>> -	size_t sz;
>> -	unsigned int i, j;
>> -	uint16_t core_id;
>> -	uint32_t next_hop_add = 0xAA;
>> -	struct rte_lpm_rcu_config rcu_cfg = {0};
>> -
>> -	if (rte_lcore_count() < 2) {
>> -		printf("Not enough cores for lpm_rcu_perf_autotest,
>> expecting at least 2\n");
>> -		return TEST_SKIPPED;
>> -	}
>> -
>> -	num_cores = 0;
>> -	RTE_LCORE_FOREACH_WORKER(core_id) {
>> -		enabled_core_ids[num_cores] = core_id;
>> -		num_cores++;
>> -	}
>> -
>> -	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
>> -		num_cores);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> -
>> -	/* Init RCU variable */
>> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
>> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> -						RTE_CACHE_LINE_SIZE);
>> -	rte_rcu_qsbr_init(rv, num_cores);
>> -
>> -	rcu_cfg.v = rv;
>> -	/* Assign the RCU variable to LPM */
>> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> -		printf("RCU variable assignment failed\n");
>> -		goto error;
>> -	}
>> -
>> -	writer_done = 0;
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> -	/* Launch reader threads */
>> -	for (i = 0; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Measure add/delete. */
>> -	begin = rte_rdtsc_precise();
>> -	for (i = 0; i < RCU_ITERATIONS; i++) {
>> -		/* Add all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>> -					large_ldepth_route_table[j].depth,
>> -					next_hop_add) != 0) {
>> -				printf("Failed to add iteration %d, route#
>> %d\n",
>> -					i, j);
>> +	for (j = 1; j < 3; j++) {
>> +		if (use_rcu)
>> +			printf("\nPerf test: %d writer(s), %d reader(s),"
>> +			       " RCU integration enabled\n", j, num_cores - j);
>> +		else
>> +			printf("\nPerf test: %d writer(s), %d reader(s),"
>> +			       " RCU integration disabled\n", j, num_cores - j);
>> +
>> +		/* Create LPM table */
>> +		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> +		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> +		config.flags = 0;
>> +		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> +		TEST_LPM_ASSERT(lpm != NULL);
>> +
>> +		/* Init RCU variable */
>> +		if (use_rcu) {
>> +			sz = rte_rcu_qsbr_get_memsize(num_cores);
>> +			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> +
>> 	RTE_CACHE_LINE_SIZE);
>> +			rte_rcu_qsbr_init(rv, num_cores);
>> +
>> +			rcu_cfg.v = rv;
>> +			/* Assign the RCU variable to LPM */
>> +			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> +				printf("RCU variable assignment failed\n");
>> 				goto error;
>> 			}
>> 
>> -		/* Delete all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_delete(lpm,
>> large_ldepth_route_table[j].ip,
>> -				large_ldepth_route_table[j].depth) != 0) {
>> -				printf("Failed to delete iteration %d, route#
>> %d\n",
>> -					i, j);
>> -				goto error;
>> -			}
>> -	}
>> -	total_cycles = rte_rdtsc_precise() - begin;
>> +			reader_f = test_lpm_rcu_qsbr_reader;
>> +		} else
>> +			reader_f = test_lpm_reader;
>> 
>> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %g cycles\n",
>> -		(double)total_cycles / TOTAL_WRITES);
>> +		writer_done = 0;
>> +		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>> 
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 0; i < num_cores; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>> -	rte_free(rv);
>> -	lpm = NULL;
>> -	rv = NULL;
>> -
>> -	/* Test without RCU integration */
>> -	printf("\nPerf test: 1 writer, %d readers, RCU integration
>> disabled\n",
>> -		num_cores);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> +		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> 
>> -	writer_done = 0;
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> +		/* Launch reader threads */
>> +		for (i = j; i < num_cores; i++)
>> +			rte_eal_remote_launch(reader_f, NULL,
>> +						enabled_core_ids[i]);
>> 
>> -	/* Launch reader threads */
>> -	for (i = 0; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_reader, NULL,
>> -					enabled_core_ids[i]);
>> +		/* Launch writer threads */
>> +		for (i = 0; i < j; i++)
>> +			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>> +						(void *)(uintptr_t)(i + j),
> This can be just 'j'?
> 
>> +						enabled_core_ids[i]);
>> 
>> -	/* Measure add/delete. */
>> -	begin = rte_rdtsc_precise();
>> -	for (i = 0; i < RCU_ITERATIONS; i++) {
>> -		/* Add all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>> -					large_ldepth_route_table[j].depth,
>> -					next_hop_add) != 0) {
>> -				printf("Failed to add iteration %d, route#
>> %d\n",
>> -					i, j);
>> +		/* Wait for writer threads */
>> +		for (i = 0; i < j; i++)
>> +			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> 				goto error;
>> -			}
>> 
>> -		/* Delete all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_delete(lpm,
>> large_ldepth_route_table[j].ip,
>> -				large_ldepth_route_table[j].depth) != 0) {
>> -				printf("Failed to delete iteration %d, route#
>> %d\n",
>> -					i, j);
>> -				goto error;
>> -			}
>> +		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> +		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> +		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> +			__atomic_load_n(&gwrite_cycles,
>> __ATOMIC_RELAXED)
>> +			/ TOTAL_WRITES);
>> +
>> +		writer_done = 1;
>> +		/* Wait until all readers have exited */
>> +		for (i = j; i < num_cores; i++)
>> +			rte_eal_wait_lcore(enabled_core_ids[i]);
>> +
>> +		rte_lpm_free(lpm);
>> +		rte_free(rv);
>> +		lpm = NULL;
>> +		rv = NULL;
>> 	}
>> -	total_cycles = rte_rdtsc_precise() - begin;
>> -
>> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %g cycles\n",
>> -		(double)total_cycles / TOTAL_WRITES);
>> -
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 0; i < num_cores; i++)
>> -		rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>> 
>> 	return 0;
>> 
>> @@ -946,9 +767,8 @@ test_lpm_perf(void)
>> 	rte_lpm_delete_all(lpm);
>> 	rte_lpm_free(lpm);
>> 
>> -	test_lpm_rcu_perf();
>> -
>> -	test_lpm_rcu_perf_multi_writer();
>> +	test_lpm_rcu_perf_multi_writer(0);
>> +	test_lpm_rcu_perf_multi_writer(1);
>> 
>> 	return 0;
>> }
>> --
>> 2.17.1


^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/4] test/lpm: return error on failure in rcu qsbr perf
  2020-11-03  1:28     ` Honnappa Nagarahalli
@ 2020-11-03  4:42       ` Dharmik Thakkar
  0 siblings, 0 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03  4:42 UTC (permalink / raw)
  To: Honnappa Nagarahalli
  Cc: Bruce Richardson, Vladimir Medvedkin, Gavin Hu, Ruifeng Wang,
	dev, nd, stable



> On Nov 2, 2020, at 7:28 PM, Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> 
> <snip>
> 
>> 
>> Return error if Add/Delete fail in multiwriter perf test
>> 
>> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
>> Cc: honnappa.nagarahalli@arm.com
>> Cc: stable@dpdk.org
>> 
>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>> ---
>> app/test/test_lpm_perf.c | 4 ++++
>> 1 file changed, 4 insertions(+)
>> 
>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index
>> 45164b23214b..55084816ab91 100644
>> --- a/app/test/test_lpm_perf.c
>> +++ b/app/test/test_lpm_perf.c
>> @@ -453,6 +453,8 @@ test_lpm_rcu_qsbr_writer(void *arg)
>> 					next_hop_add) != 0) {
>> 				printf("Failed to add iteration %d, route#
>> %d\n",
>> 					i, j);
>> +				pthread_mutex_unlock(&lpm_mutex);
>> +				return -1;
> Would be good to use the "goto error" method used in this file in other functions.

Yes, will update in the next version.

> 
>> 			}
>> 			pthread_mutex_unlock(&lpm_mutex);
>> 		}
>> @@ -464,6 +466,8 @@ test_lpm_rcu_qsbr_writer(void *arg)
>> 				large_ldepth_route_table[j].depth) != 0) {
>> 				printf("Failed to delete iteration %d, route#
>> %d\n",
>> 					i, j);
>> +				pthread_mutex_unlock(&lpm_mutex);
>> +				return -1;
>> 			}
>> 			pthread_mutex_unlock(&lpm_mutex);
>> 		}
>> --
>> 2.17.1


^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v2 3/4] test/lpm: remove error checking in rcu qsbr perf
  2020-11-03  1:21     ` Honnappa Nagarahalli
@ 2020-11-03  4:56       ` Dharmik Thakkar
  0 siblings, 0 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03  4:56 UTC (permalink / raw)
  To: Honnappa Nagarahalli
  Cc: Bruce Richardson, Vladimir Medvedkin, Ruifeng Wang, Gavin Hu,
	dev, nd, stable



> On Nov 2, 2020, at 7:21 PM, Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> 
> <snip>
> 
>> 
>> Remove redundant error checking for reader threads since they never return
>> error.
>> 
>> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
>> Cc: honnappa.nagarahalli@arm.com
>> Cc: stable@dpdk.org
>> 
>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>> ---
>> app/test/test_lpm_perf.c | 21 ++++++++-------------
>> 1 file changed, 8 insertions(+), 13 deletions(-)
>> 
>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index
>> 55084816ab91..224c92fa3d65 100644
>> --- a/app/test/test_lpm_perf.c
>> +++ b/app/test/test_lpm_perf.c
>> @@ -554,11 +554,10 @@ test_lpm_rcu_perf_multi_writer(void)
>> 		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>> 		/ TOTAL_WRITES);
>> 
>> -	/* Wait and check return value from reader threads */
>> 	writer_done = 1;
>> +	/* Wait until all readers have exited */
>> 	for (i = 2; i < num_cores; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> -			goto error;
>> +		rte_eal_wait_lcore(enabled_core_ids[i]);
>> 
>> 	rte_lpm_free(lpm);
>> 	rte_free(rv);
>> @@ -603,10 +602,9 @@ test_lpm_rcu_perf_multi_writer(void)
>> 		/ TOTAL_WRITES);
>> 
>> 	writer_done = 1;
>> -	/* Wait and check return value from reader threads */
>> +	/* Wait until all readers have exited */
>> 	for (i = 2; i < num_cores; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> -			goto error;
>> +		rte_eal_wait_lcore(enabled_core_ids[i]);
>> 
>> 	rte_lpm_free(lpm);
>> 
>> @@ -710,10 +708,9 @@ test_lpm_rcu_perf(void)
>> 		(double)total_cycles / TOTAL_WRITES);
>> 
>> 	writer_done = 1;
>> -	/* Wait and check return value from reader threads */
>> +	/* Wait until all readers have exited */
>> 	for (i = 0; i < num_cores; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> -			goto error;
>> +		if (rte_eal_wait_lcore(enabled_core_ids[i]);
>                             ^^ Do we need the 'if' statement?

No, will remove in the next version.

>> 
>> 	rte_lpm_free(lpm);
>> 	rte_free(rv);
>> @@ -769,11 +766,9 @@ test_lpm_rcu_perf(void)
>> 		(double)total_cycles / TOTAL_WRITES);
>> 
>> 	writer_done = 1;
>> -	/* Wait and check return value from reader threads */
>> +	/* Wait until all readers have exited */
>> 	for (i = 0; i < num_cores; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> -			printf("Warning: lcore %u not finished.\n",
>> -				enabled_core_ids[i]);
>> +		rte_eal_wait_lcore(enabled_core_ids[i]);
>> 
>> 	rte_lpm_free(lpm);
>> 
>> --
>> 2.17.1


^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test
  2020-11-02 23:51 ` [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
                     ` (3 preceding siblings ...)
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
@ 2020-11-03  5:12   ` Dharmik Thakkar
  2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
                       ` (4 more replies)
  4 siblings, 5 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03  5:12 UTC (permalink / raw)
  Cc: dev, nd, Dharmik Thakkar

Fix LPM adds, LPM deletes, and cycle calculation.
Return error if LPM add/delete fails in multi-writer test.
Remove redundant error checking for readers.
Combine single and multi threaded test cases to avoid code duplication.
---
v3:
 - Add 'goto error'
 - Remove unnecessary if statement

v2:
 - Add more details about the fix to the commit message
 - Replace hard coded values with an enum
 - Remove lock acquire/release for single writer

Dharmik Thakkar (4):
  test/lpm: fix cycle calculation in rcu qsbr perf
  test/lpm: return error on failure in rcu qsbr perf
  test/lpm: remove error checking in rcu qsbr perf
  test/lpm: avoid code duplication in rcu qsbr perf

 app/test/test_lpm_perf.c | 381 ++++++++++-----------------------------
 1 file changed, 94 insertions(+), 287 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v3 1/4] test/lpm: fix cycle calculation in rcu qsbr perf
  2020-11-03  5:12   ` [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
@ 2020-11-03  5:12     ` Dharmik Thakkar
  2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 2/4] test/lpm: return error on failure " Dharmik Thakkar
                       ` (3 subsequent siblings)
  4 siblings, 0 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03  5:12 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Gavin Hu,
	Honnappa Nagarahalli, Ruifeng Wang
  Cc: dev, nd, Dharmik Thakkar, stable

Fix incorrect calculations for LPM adds, LPM deletes,
and average cycles in RCU QSBR perf tests

Since, rcu qsbr tests run for 'RCU_ITERATIONS' and not
'ITERATIONS', replace 'ITERATIONS' with 'RCU_ITERATIONS'
for calculating adds, deletes, and cycles.

Also, for multi-writer perf test, each writer only writes
half of NUM_LDEPTH_ROUTE_ENTRIES.
For 2 writers, total adds (or deletes) should be
(RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) instead of
(2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES).

Since, for both the single and multi writer tests, total adds/deletes
is equal to (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES),
this has been replaced with a macro 'TOTAL_WRITES' and furthermore,
'g_writes' has been removed since it is always a fixed value
equal to TOTAL_WRITES.

Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org

Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 app/test/test_lpm_perf.c | 45 ++++++++++++++--------------------------
 1 file changed, 16 insertions(+), 29 deletions(-)

diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index c5a238b9d1e8..45164b23214b 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -23,7 +23,6 @@ static struct rte_rcu_qsbr *rv;
 static volatile uint8_t writer_done;
 static volatile uint32_t thr_id;
 static uint64_t gwrite_cycles;
-static uint64_t gwrites;
 /* LPM APIs are not thread safe, use mutex to provide thread safety */
 static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
 
@@ -60,6 +59,8 @@ static uint32_t num_ldepth_route_entries;
 #define NUM_ROUTE_ENTRIES num_route_entries
 #define NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries
 
+#define TOTAL_WRITES (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES)
+
 enum {
 	IP_CLASS_A,
 	IP_CLASS_B,
@@ -432,7 +433,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	uint8_t core_id = (uint8_t)((uintptr_t)arg);
 	uint32_t next_hop_add = 0xAA;
 
-	RTE_SET_USED(arg);
 	/* 2 writer threads are used */
 	if (core_id % 2 == 0) {
 		si = 0;
@@ -472,9 +472,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	total_cycles = rte_rdtsc_precise() - begin;
 
 	__atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED);
-	__atomic_fetch_add(&gwrites,
-			2 * NUM_LDEPTH_ROUTE_ENTRIES * RCU_ITERATIONS,
-			__ATOMIC_RELAXED);
 
 	return 0;
 }
@@ -528,7 +525,6 @@ test_lpm_rcu_perf_multi_writer(void)
 
 	writer_done = 0;
 	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
 
 	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
@@ -548,14 +544,11 @@ test_lpm_rcu_perf_multi_writer(void)
 		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 			goto error;
 
-	printf("Total LPM Adds: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
-			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
-		);
+		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+		/ TOTAL_WRITES);
 
 	/* Wait and check return value from reader threads */
 	writer_done = 1;
@@ -581,7 +574,6 @@ test_lpm_rcu_perf_multi_writer(void)
 
 	writer_done = 0;
 	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
 	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
 	/* Launch reader threads */
@@ -600,14 +592,11 @@ test_lpm_rcu_perf_multi_writer(void)
 		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 			goto error;
 
-	printf("Total LPM Adds: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
-			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
-		);
+		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+		/ TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
@@ -711,11 +700,10 @@ test_lpm_rcu_perf(void)
 	}
 	total_cycles = rte_rdtsc_precise() - begin;
 
-	printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS));
+		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
@@ -771,11 +759,10 @@ test_lpm_rcu_perf(void)
 	}
 	total_cycles = rte_rdtsc_precise() - begin;
 
-	printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS));
+		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v3 2/4] test/lpm: return error on failure in rcu qsbr perf
  2020-11-03  5:12   ` [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
@ 2020-11-03  5:12     ` Dharmik Thakkar
  2020-11-03  5:21       ` Honnappa Nagarahalli
  2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 3/4] test/lpm: remove error checking " Dharmik Thakkar
                       ` (2 subsequent siblings)
  4 siblings, 1 reply; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03  5:12 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Ruifeng Wang, Gavin Hu,
	Honnappa Nagarahalli
  Cc: dev, nd, Dharmik Thakkar, stable

Return error if Add/Delete fail in multiwriter perf test

Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org

Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_lpm_perf.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index 45164b23214b..fc4c9b60cbbc 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -453,6 +453,7 @@ test_lpm_rcu_qsbr_writer(void *arg)
 					next_hop_add) != 0) {
 				printf("Failed to add iteration %d, route# %d\n",
 					i, j);
+				goto error;
 			}
 			pthread_mutex_unlock(&lpm_mutex);
 		}
@@ -464,6 +465,7 @@ test_lpm_rcu_qsbr_writer(void *arg)
 				large_ldepth_route_table[j].depth) != 0) {
 				printf("Failed to delete iteration %d, route# %d\n",
 					i, j);
+				goto error;
 			}
 			pthread_mutex_unlock(&lpm_mutex);
 		}
@@ -474,6 +476,10 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	__atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED);
 
 	return 0;
+
+error:
+	pthread_mutex_unlock(&lpm_mutex);
+	return -1;
 }
 
 /*
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v3 3/4] test/lpm: remove error checking in rcu qsbr perf
  2020-11-03  5:12   ` [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
  2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 2/4] test/lpm: return error on failure " Dharmik Thakkar
@ 2020-11-03  5:12     ` Dharmik Thakkar
  2020-11-03  5:22       ` Honnappa Nagarahalli
  2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
  2020-11-03 22:23     ` [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  4 siblings, 1 reply; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03  5:12 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Gavin Hu,
	Honnappa Nagarahalli, Ruifeng Wang
  Cc: dev, nd, Dharmik Thakkar, stable

Remove redundant error checking for reader threads
since they never return error.

Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org

Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_lpm_perf.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index fc4c9b60cbbc..fa6ebc4f7547 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -556,11 +556,10 @@ test_lpm_rcu_perf_multi_writer(void)
 		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
 		/ TOTAL_WRITES);
 
-	/* Wait and check return value from reader threads */
 	writer_done = 1;
+	/* Wait until all readers have exited */
 	for (i = 2; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 	rte_free(rv);
@@ -605,10 +604,9 @@ test_lpm_rcu_perf_multi_writer(void)
 		/ TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 2; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 
@@ -712,10 +710,9 @@ test_lpm_rcu_perf(void)
 		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 0; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 	rte_free(rv);
@@ -771,11 +768,9 @@ test_lpm_rcu_perf(void)
 		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 0; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			printf("Warning: lcore %u not finished.\n",
-				enabled_core_ids[i]);
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v3 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-03  5:12   ` [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
                       ` (2 preceding siblings ...)
  2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 3/4] test/lpm: remove error checking " Dharmik Thakkar
@ 2020-11-03  5:12     ` Dharmik Thakkar
  2020-11-03 22:23     ` [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  4 siblings, 0 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03  5:12 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin; +Cc: dev, nd, Dharmik Thakkar

Avoid code duplication by combining single and multi threaded tests

Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_lpm_perf.c | 359 ++++++++++-----------------------------
 1 file changed, 89 insertions(+), 270 deletions(-)

diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index fa6ebc4f7547..147801634210 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -67,6 +67,12 @@ enum {
 	IP_CLASS_C
 };
 
+enum {
+	SINGLE_WRITER = 1,
+	MULTI_WRITER_1,
+	MULTI_WRITER_2
+};
+
 /* struct route_rule_count defines the total number of rules in following a/b/c
  * each item in a[]/b[]/c[] is the number of common IP address class A/B/C, not
  * including the ones for private local network.
@@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg)
 {
 	unsigned int i, j, si, ei;
 	uint64_t begin, total_cycles;
-	uint8_t core_id = (uint8_t)((uintptr_t)arg);
+	uint8_t writer_id = (uint8_t)((uintptr_t)arg);
 	uint32_t next_hop_add = 0xAA;
 
-	/* 2 writer threads are used */
-	if (core_id % 2 == 0) {
+	/* Single writer (writer_id = 1) */
+	if (writer_id == SINGLE_WRITER) {
+		si = 0;
+		ei = NUM_LDEPTH_ROUTE_ENTRIES;
+	}
+	/* 2 Writers (writer_id = 2/3)*/
+	else if (writer_id == MULTI_WRITER_1) {
 		si = 0;
 		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
 	} else {
@@ -447,7 +458,8 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	for (i = 0; i < RCU_ITERATIONS; i++) {
 		/* Add all the entries */
 		for (j = si; j < ei; j++) {
-			pthread_mutex_lock(&lpm_mutex);
+			if (writer_id != SINGLE_WRITER)
+				pthread_mutex_lock(&lpm_mutex);
 			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
 					large_ldepth_route_table[j].depth,
 					next_hop_add) != 0) {
@@ -455,19 +467,22 @@ test_lpm_rcu_qsbr_writer(void *arg)
 					i, j);
 				goto error;
 			}
-			pthread_mutex_unlock(&lpm_mutex);
+			if (writer_id != SINGLE_WRITER)
+				pthread_mutex_unlock(&lpm_mutex);
 		}
 
 		/* Delete all the entries */
 		for (j = si; j < ei; j++) {
-			pthread_mutex_lock(&lpm_mutex);
+			if (writer_id != SINGLE_WRITER)
+				pthread_mutex_lock(&lpm_mutex);
 			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
 				large_ldepth_route_table[j].depth) != 0) {
 				printf("Failed to delete iteration %d, route# %d\n",
 					i, j);
 				goto error;
 			}
-			pthread_mutex_unlock(&lpm_mutex);
+			if (writer_id != SINGLE_WRITER)
+				pthread_mutex_unlock(&lpm_mutex);
 		}
 	}
 
@@ -478,22 +493,24 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	return 0;
 
 error:
-	pthread_mutex_unlock(&lpm_mutex);
+	if (writer_id != SINGLE_WRITER)
+		pthread_mutex_unlock(&lpm_mutex);
 	return -1;
 }
 
 /*
  * Functional test:
- * 2 writers, rest are readers
+ * 1/2 writers, rest are readers
  */
 static int
-test_lpm_rcu_perf_multi_writer(void)
+test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
 {
 	struct rte_lpm_config config;
 	size_t sz;
-	unsigned int i;
+	unsigned int i, j;
 	uint16_t core_id;
 	struct rte_lpm_rcu_config rcu_cfg = {0};
+	int (*reader_f)(void *arg) = NULL;
 
 	if (rte_lcore_count() < 3) {
 		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n");
@@ -506,273 +523,76 @@ test_lpm_rcu_perf_multi_writer(void)
 		num_cores++;
 	}
 
-	printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n",
-		num_cores - 2);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	/* Init RCU variable */
-	sz = rte_rcu_qsbr_get_memsize(num_cores);
-	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
-						RTE_CACHE_LINE_SIZE);
-	rte_rcu_qsbr_init(rv, num_cores);
-
-	rcu_cfg.v = rv;
-	/* Assign the RCU variable to LPM */
-	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
-		printf("RCU variable assignment failed\n");
-		goto error;
-	}
-
-	writer_done = 0;
-	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Launch writer threads */
-	for (i = 0; i < 2; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
-					(void *)(uintptr_t)i,
-					enabled_core_ids[i]);
-
-	/* Wait for writer threads */
-	for (i = 0; i < 2; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
-		/ TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-	lpm = NULL;
-	rv = NULL;
-
-	/* Test without RCU integration */
-	printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n",
-		num_cores - 2);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	writer_done = 0;
-	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Launch writer threads */
-	for (i = 0; i < 2; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
-					(void *)(uintptr_t)i,
-					enabled_core_ids[i]);
-
-	/* Wait for writer threads */
-	for (i = 0; i < 2; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
-		/ TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-
-	return 0;
-
-error:
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	rte_eal_mp_wait_lcore();
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-
-	return -1;
-}
-
-/*
- * Functional test:
- * Single writer, rest are readers
- */
-static int
-test_lpm_rcu_perf(void)
-{
-	struct rte_lpm_config config;
-	uint64_t begin, total_cycles;
-	size_t sz;
-	unsigned int i, j;
-	uint16_t core_id;
-	uint32_t next_hop_add = 0xAA;
-	struct rte_lpm_rcu_config rcu_cfg = {0};
-
-	if (rte_lcore_count() < 2) {
-		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n");
-		return TEST_SKIPPED;
-	}
-
-	num_cores = 0;
-	RTE_LCORE_FOREACH_WORKER(core_id) {
-		enabled_core_ids[num_cores] = core_id;
-		num_cores++;
-	}
-
-	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
-		num_cores);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	/* Init RCU variable */
-	sz = rte_rcu_qsbr_get_memsize(num_cores);
-	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
-						RTE_CACHE_LINE_SIZE);
-	rte_rcu_qsbr_init(rv, num_cores);
-
-	rcu_cfg.v = rv;
-	/* Assign the RCU variable to LPM */
-	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
-		printf("RCU variable assignment failed\n");
-		goto error;
-	}
-
-	writer_done = 0;
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Measure add/delete. */
-	begin = rte_rdtsc_precise();
-	for (i = 0; i < RCU_ITERATIONS; i++) {
-		/* Add all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
-					large_ldepth_route_table[j].depth,
-					next_hop_add) != 0) {
-				printf("Failed to add iteration %d, route# %d\n",
-					i, j);
+	for (j = 1; j < 3; j++) {
+		if (use_rcu)
+			printf("\nPerf test: %d writer(s), %d reader(s),"
+			       " RCU integration enabled\n", j, num_cores - j);
+		else
+			printf("\nPerf test: %d writer(s), %d reader(s),"
+			       " RCU integration disabled\n", j, num_cores - j);
+
+		/* Create LPM table */
+		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
+		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
+		config.flags = 0;
+		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
+		TEST_LPM_ASSERT(lpm != NULL);
+
+		/* Init RCU variable */
+		if (use_rcu) {
+			sz = rte_rcu_qsbr_get_memsize(num_cores);
+			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
+							RTE_CACHE_LINE_SIZE);
+			rte_rcu_qsbr_init(rv, num_cores);
+
+			rcu_cfg.v = rv;
+			/* Assign the RCU variable to LPM */
+			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
+				printf("RCU variable assignment failed\n");
 				goto error;
 			}
 
-		/* Delete all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
-				large_ldepth_route_table[j].depth) != 0) {
-				printf("Failed to delete iteration %d, route# %d\n",
-					i, j);
-				goto error;
-			}
-	}
-	total_cycles = rte_rdtsc_precise() - begin;
+			reader_f = test_lpm_rcu_qsbr_reader;
+		} else
+			reader_f = test_lpm_reader;
 
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / TOTAL_WRITES);
+		writer_done = 0;
+		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
 
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-	lpm = NULL;
-	rv = NULL;
-
-	/* Test without RCU integration */
-	printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n",
-		num_cores);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
+		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
-	writer_done = 0;
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
+		/* Launch reader threads */
+		for (i = j; i < num_cores; i++)
+			rte_eal_remote_launch(reader_f, NULL,
+						enabled_core_ids[i]);
 
-	/* Launch reader threads */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_reader, NULL,
-					enabled_core_ids[i]);
+		/* Launch writer threads */
+		for (i = 0; i < j; i++)
+			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
+						(void *)(uintptr_t)(i + j),
+						enabled_core_ids[i]);
 
-	/* Measure add/delete. */
-	begin = rte_rdtsc_precise();
-	for (i = 0; i < RCU_ITERATIONS; i++) {
-		/* Add all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
-					large_ldepth_route_table[j].depth,
-					next_hop_add) != 0) {
-				printf("Failed to add iteration %d, route# %d\n",
-					i, j);
+		/* Wait for writer threads */
+		for (i = 0; i < j; i++)
+			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 				goto error;
-			}
 
-		/* Delete all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
-				large_ldepth_route_table[j].depth) != 0) {
-				printf("Failed to delete iteration %d, route# %d\n",
-					i, j);
-				goto error;
-			}
+		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
+		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
+			__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+			/ TOTAL_WRITES);
+
+		writer_done = 1;
+		/* Wait until all readers have exited */
+		for (i = j; i < num_cores; i++)
+			rte_eal_wait_lcore(enabled_core_ids[i]);
+
+		rte_lpm_free(lpm);
+		rte_free(rv);
+		lpm = NULL;
+		rv = NULL;
 	}
-	total_cycles = rte_rdtsc_precise() - begin;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
 
 	return 0;
 
@@ -948,9 +768,8 @@ test_lpm_perf(void)
 	rte_lpm_delete_all(lpm);
 	rte_lpm_free(lpm);
 
-	test_lpm_rcu_perf();
-
-	test_lpm_rcu_perf_multi_writer();
+	test_lpm_rcu_perf_multi_writer(0);
+	test_lpm_rcu_perf_multi_writer(1);
 
 	return 0;
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v3 2/4] test/lpm: return error on failure in rcu qsbr perf
  2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 2/4] test/lpm: return error on failure " Dharmik Thakkar
@ 2020-11-03  5:21       ` Honnappa Nagarahalli
  0 siblings, 0 replies; 52+ messages in thread
From: Honnappa Nagarahalli @ 2020-11-03  5:21 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson, Vladimir Medvedkin,
	Ruifeng Wang, Gavin Hu
  Cc: dev, nd, Dharmik Thakkar, stable, Honnappa Nagarahalli, nd

<snip>
> 
> Return error if Add/Delete fail in multiwriter perf test
> 
> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
> Cc: honnappa.nagarahalli@arm.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Looks good
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

> ---
>  app/test/test_lpm_perf.c | 6 ++++++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index
> 45164b23214b..fc4c9b60cbbc 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -453,6 +453,7 @@ test_lpm_rcu_qsbr_writer(void *arg)
>  					next_hop_add) != 0) {
>  				printf("Failed to add iteration %d, route#
> %d\n",
>  					i, j);
> +				goto error;
>  			}
>  			pthread_mutex_unlock(&lpm_mutex);
>  		}
> @@ -464,6 +465,7 @@ test_lpm_rcu_qsbr_writer(void *arg)
>  				large_ldepth_route_table[j].depth) != 0) {
>  				printf("Failed to delete iteration %d, route#
> %d\n",
>  					i, j);
> +				goto error;
>  			}
>  			pthread_mutex_unlock(&lpm_mutex);
>  		}
> @@ -474,6 +476,10 @@ test_lpm_rcu_qsbr_writer(void *arg)
>  	__atomic_fetch_add(&gwrite_cycles, total_cycles,
> __ATOMIC_RELAXED);
> 
>  	return 0;
> +
> +error:
> +	pthread_mutex_unlock(&lpm_mutex);
> +	return -1;
>  }
> 
>  /*
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v3 3/4] test/lpm: remove error checking in rcu qsbr perf
  2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 3/4] test/lpm: remove error checking " Dharmik Thakkar
@ 2020-11-03  5:22       ` Honnappa Nagarahalli
  0 siblings, 0 replies; 52+ messages in thread
From: Honnappa Nagarahalli @ 2020-11-03  5:22 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson, Vladimir Medvedkin, Gavin Hu,
	Ruifeng Wang
  Cc: dev, nd, Dharmik Thakkar, stable, Honnappa Nagarahalli, nd



> -----Original Message-----
> From: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Sent: Monday, November 2, 2020 11:12 PM
> To: Bruce Richardson <bruce.richardson@intel.com>; Vladimir Medvedkin
> <vladimir.medvedkin@intel.com>; Gavin Hu <Gavin.Hu@arm.com>;
> Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>
> Cc: dev@dpdk.org; nd <nd@arm.com>; Dharmik Thakkar
> <Dharmik.Thakkar@arm.com>; stable@dpdk.org
> Subject: [PATCH v3 3/4] test/lpm: remove error checking in rcu qsbr perf
> 
> Remove redundant error checking for reader threads since they never return
> error.
> 
> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
> Cc: honnappa.nagarahalli@arm.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Looks good
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

> ---
>  app/test/test_lpm_perf.c | 21 ++++++++-------------
>  1 file changed, 8 insertions(+), 13 deletions(-)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index
> fc4c9b60cbbc..fa6ebc4f7547 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -556,11 +556,10 @@ test_lpm_rcu_perf_multi_writer(void)
>  		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>  		/ TOTAL_WRITES);
> 
> -	/* Wait and check return value from reader threads */
>  	writer_done = 1;
> +	/* Wait until all readers have exited */
>  	for (i = 2; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> +		rte_eal_wait_lcore(enabled_core_ids[i]);
> 
>  	rte_lpm_free(lpm);
>  	rte_free(rv);
> @@ -605,10 +604,9 @@ test_lpm_rcu_perf_multi_writer(void)
>  		/ TOTAL_WRITES);
> 
>  	writer_done = 1;
> -	/* Wait and check return value from reader threads */
> +	/* Wait until all readers have exited */
>  	for (i = 2; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> +		rte_eal_wait_lcore(enabled_core_ids[i]);
> 
>  	rte_lpm_free(lpm);
> 
> @@ -712,10 +710,9 @@ test_lpm_rcu_perf(void)
>  		(double)total_cycles / TOTAL_WRITES);
> 
>  	writer_done = 1;
> -	/* Wait and check return value from reader threads */
> +	/* Wait until all readers have exited */
>  	for (i = 0; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> +		rte_eal_wait_lcore(enabled_core_ids[i]);
> 
>  	rte_lpm_free(lpm);
>  	rte_free(rv);
> @@ -771,11 +768,9 @@ test_lpm_rcu_perf(void)
>  		(double)total_cycles / TOTAL_WRITES);
> 
>  	writer_done = 1;
> -	/* Wait and check return value from reader threads */
> +	/* Wait until all readers have exited */
>  	for (i = 0; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			printf("Warning: lcore %u not finished.\n",
> -				enabled_core_ids[i]);
> +		rte_eal_wait_lcore(enabled_core_ids[i]);
> 
>  	rte_lpm_free(lpm);
> 
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-03  4:33       ` Dharmik Thakkar
@ 2020-11-03  5:32         ` Honnappa Nagarahalli
  2020-11-03 14:03           ` Dharmik Thakkar
  0 siblings, 1 reply; 52+ messages in thread
From: Honnappa Nagarahalli @ 2020-11-03  5:32 UTC (permalink / raw)
  To: Dharmik Thakkar
  Cc: Bruce Richardson, Vladimir Medvedkin, dev, nd, Honnappa Nagarahalli, nd

<snip>

> >>
> >> Avoid code duplication by combining single and multi threaded tests
> >>
> >> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> >> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> >> ---
> >> app/test/test_lpm_perf.c | 362
> >> ++++++++++-----------------------------
> >> 1 file changed, 91 insertions(+), 271 deletions(-)
> >>
> >> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
> >> index
> >> 224c92fa3d65..229c835c23f7 100644
> >> --- a/app/test/test_lpm_perf.c
> >> +++ b/app/test/test_lpm_perf.c
> >> @@ -67,6 +67,12 @@ enum {
> >> IP_CLASS_C
> >> };
> >>
> >> +enum {
> >> +SINGLE_WRITER = 1,
> >> +MULTI_WRITER_1,
> >> +MULTI_WRITER_2
> >> +};
> > Do we need this? Can we use the number of cores instead?
> >
> 
> There are 3 combinations of writes (adds/deletes):
> 1. Write all the entries - in case of a single writer 2. Write half of the entries -
> in case of multiple writers 3. Write remaining half of the entries - in case of
> multiple writers
> 
> So, I think this is required.
IMO, this is not scalable. Essentially, we need 2 parameters to divide the routes among each writer thread. We need 2 parameters, 1) total number of writers 2) the core ID in the linear space.
Creating a structure with these 2 and passing that to the writer thread would be better and scalable.

> 
> >> +
> >> /* struct route_rule_count defines the total number of rules in
> >> following a/b/c
> >>  * each item in a[]/b[]/c[] is the number of common IP address class
> >> A/B/C, not
> >>  * including the ones for private local network.
> >> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg)  {
> unsigned
> >> int i, j, si, ei; uint64_t begin, total_cycles; -uint8_t core_id =
> >> (uint8_t)((uintptr_t)arg);
> >> +uint8_t writer_id = (uint8_t)((uintptr_t)arg);
> >> uint32_t next_hop_add = 0xAA;
> >>
> >> -/* 2 writer threads are used */
> >> -if (core_id % 2 == 0) {
> >> +/* Single writer (writer_id = 1) */
> >> +if (writer_id == SINGLE_WRITER) {
> >> +si = 0;
> >> +ei = NUM_LDEPTH_ROUTE_ENTRIES;
> >> +}
> >> +/* 2 Writers (writer_id = 2/3)*/
> >> +else if (writer_id == MULTI_WRITER_1) {
> >> si = 0;
> >> ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
> >> } else {
> >> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg) for (i = 0;
> >> i < RCU_ITERATIONS; i++) {
> >> /* Add all the entries */
> >> for (j = si; j < ei; j++) {
> >> -pthread_mutex_lock(&lpm_mutex);
> >> +if (writer_id != SINGLE_WRITER)
> >> +pthread_mutex_lock(&lpm_mutex);
> >> if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> >> large_ldepth_route_table[j].depth,
> >> next_hop_add) != 0) {
> >> printf("Failed to add iteration %d, route# %d\n", i, j);
> >> -pthread_mutex_unlock(&lpm_mutex);
> >> +if (writer_id != SINGLE_WRITER)
> >> +
> >> pthread_mutex_unlock(&lpm_mutex);
> >> return -1;
> >> }
> >> -pthread_mutex_unlock(&lpm_mutex);
> >> +if (writer_id != SINGLE_WRITER)
> >> +pthread_mutex_unlock(&lpm_mutex);
> >> }
> >>
> >> /* Delete all the entries */
> >> for (j = si; j < ei; j++) {
> >> -pthread_mutex_lock(&lpm_mutex);
> >> +if (writer_id != SINGLE_WRITER)
> >> +pthread_mutex_lock(&lpm_mutex);
> >> if (rte_lpm_delete(lpm,
> >> large_ldepth_route_table[j].ip,
> >> large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete
> >> iteration %d, route# %d\n", i, j); -pthread_mutex_unlock(&lpm_mutex);
> >> +if (writer_id != SINGLE_WRITER)
> >> +
> >> pthread_mutex_unlock(&lpm_mutex);
> >> return -1;
> >> }
> >> -pthread_mutex_unlock(&lpm_mutex);
> >> +if (writer_id != SINGLE_WRITER)
> >> +pthread_mutex_unlock(&lpm_mutex);
> >> }
> >> }
> >>
> >> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
> >>
> >> /*
> >>  * Functional test:
> >> - * 2 writers, rest are readers
> >> + * 1/2 writers, rest are readers
> >>  */
> >> static int
> >> -test_lpm_rcu_perf_multi_writer(void)
> >> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
> >> {
> >> struct rte_lpm_config config;
> >> size_t sz;
> >> -unsigned int i;
> >> +unsigned int i, j;
> >> uint16_t core_id;
> >> struct rte_lpm_rcu_config rcu_cfg = {0};
> >> +int (*reader_f)(void *arg) = NULL;
> >>
> >> if (rte_lcore_count() < 3) {
> >> printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
> >> least 3\n"); @@ -504,273 +522,76 @@
> >> test_lpm_rcu_perf_multi_writer(void)
> >> num_cores++;
> >> }
> >>
> >> -printf("\nPerf test: 2 writers, %d readers, RCU integration
> >> enabled\n", -num_cores - 2);
> >> -
> >> -/* Create LPM table */
> >> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
> config.number_tbl8s =
> >> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
> >> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >> -TEST_LPM_ASSERT(lpm != NULL);
> >> -
> >> -/* Init RCU variable */
> >> -sz = rte_rcu_qsbr_get_memsize(num_cores);
> >> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> >> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
> >> -
> >> -rcu_cfg.v = rv;
> >> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm,
> >> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto
> >> error; -}
> >> -
> >> -writer_done = 0;
> >> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> >> -
> >> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >> -
> >> -/* Launch reader threads */
> >> -for (i = 2; i < num_cores; i++)
> >> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> >> -enabled_core_ids[i]);
> >> -
> >> -/* Launch writer threads */
> >> -for (i = 0; i < 2; i++)
> >> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> >> -(void *)(uintptr_t)i,
> >> -enabled_core_ids[i]);
> >> -
> >> -/* Wait for writer threads */
> >> -for (i = 0; i < 2; i++)
> >> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
> >> -
> >> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
> >> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
> >> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
> >> __ATOMIC_RELAXED) -/ TOTAL_WRITES);
> >> -
> >> -writer_done = 1;
> >> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores;
> >> i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
> >> -
> >> -rte_lpm_free(lpm);
> >> -rte_free(rv);
> >> -lpm = NULL;
> >> -rv = NULL;
> >> -
> >> -/* Test without RCU integration */
> >> -printf("\nPerf test: 2 writers, %d readers, RCU integration
> >> disabled\n", -num_cores - 2);
> >> -
> >> -/* Create LPM table */
> >> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
> config.number_tbl8s =
> >> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
> >> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >> -TEST_LPM_ASSERT(lpm != NULL);
> >> -
> >> -writer_done = 0;
> >> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> >> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >> -
> >> -/* Launch reader threads */
> >> -for (i = 2; i < num_cores; i++)
> >> -rte_eal_remote_launch(test_lpm_reader, NULL, -enabled_core_ids[i]);
> >> -
> >> -/* Launch writer threads */
> >> -for (i = 0; i < 2; i++)
> >> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> >> -(void *)(uintptr_t)i,
> >> -enabled_core_ids[i]);
> >> -
> >> -/* Wait for writer threads */
> >> -for (i = 0; i < 2; i++)
> >> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
> >> -
> >> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
> >> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
> >> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
> >> __ATOMIC_RELAXED) -/ TOTAL_WRITES);
> >> -
> >> -writer_done = 1;
> >> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores;
> >> i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
> >> -
> >> -rte_lpm_free(lpm);
> >> -
> >> -return 0;
> >> -
> >> -error:
> >> -writer_done = 1;
> >> -/* Wait until all readers have exited */ -rte_eal_mp_wait_lcore();
> >> -
> >> -rte_lpm_free(lpm);
> >> -rte_free(rv);
> >> -
> >> -return -1;
> >> -}
> >> -
> >> -/*
> >> - * Functional test:
> >> - * Single writer, rest are readers
> >> - */
> >> -static int
> >> -test_lpm_rcu_perf(void)
> >> -{
> >> -struct rte_lpm_config config;
> >> -uint64_t begin, total_cycles;
> >> -size_t sz;
> >> -unsigned int i, j;
> >> -uint16_t core_id;
> >> -uint32_t next_hop_add = 0xAA;
> >> -struct rte_lpm_rcu_config rcu_cfg = {0};
> >> -
> >> -if (rte_lcore_count() < 2) {
> >> -printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
> >> least 2\n"); -return TEST_SKIPPED; -}
> >> -
> >> -num_cores = 0;
> >> -RTE_LCORE_FOREACH_WORKER(core_id) {
> >> -enabled_core_ids[num_cores] = core_id; -num_cores++; -}
> >> -
> >> -printf("\nPerf test: 1 writer, %d readers, RCU integration
> >> enabled\n", -num_cores);
> >> -
> >> -/* Create LPM table */
> >> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
> config.number_tbl8s =
> >> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
> >> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >> -TEST_LPM_ASSERT(lpm != NULL);
> >> -
> >> -/* Init RCU variable */
> >> -sz = rte_rcu_qsbr_get_memsize(num_cores);
> >> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> >> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
> >> -
> >> -rcu_cfg.v = rv;
> >> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm,
> >> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto
> >> error; -}
> >> -
> >> -writer_done = 0;
> >> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >> -
> >> -/* Launch reader threads */
> >> -for (i = 0; i < num_cores; i++)
> >> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> >> -enabled_core_ids[i]);
> >> -
> >> -/* Measure add/delete. */
> >> -begin = rte_rdtsc_precise();
> >> -for (i = 0; i < RCU_ITERATIONS; i++) {
> >> -/* Add all the entries */
> >> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if (rte_lpm_add(lpm,
> >> large_ldepth_route_table[j].ip, -large_ldepth_route_table[j].depth,
> >> -next_hop_add) != 0) {
> >> -printf("Failed to add iteration %d, route# %d\n", -i, j);
> >> +for (j = 1; j < 3; j++) {
> >> +if (use_rcu)
> >> +printf("\nPerf test: %d writer(s), %d reader(s),"
> >> +       " RCU integration enabled\n", j, num_cores - j); else
> >> +printf("\nPerf test: %d writer(s), %d reader(s),"
> >> +       " RCU integration disabled\n", j, num_cores - j);
> >> +
> >> +/* Create LPM table */
> >> +config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> config.number_tbl8s =
> >> +NUM_LDEPTH_ROUTE_ENTRIES; config.flags = 0; lpm =
> >> +rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >> +TEST_LPM_ASSERT(lpm != NULL);
> >> +
> >> +/* Init RCU variable */
> >> +if (use_rcu) {
> >> +sz = rte_rcu_qsbr_get_memsize(num_cores);
> >> +rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> >> +
> >> RTE_CACHE_LINE_SIZE);
> >> +rte_rcu_qsbr_init(rv, num_cores);
> >> +
> >> +rcu_cfg.v = rv;
> >> +/* Assign the RCU variable to LPM */ if (rte_lpm_rcu_qsbr_add(lpm,
> >> +&rcu_cfg) != 0) { printf("RCU variable assignment failed\n");
> >> goto error;
> >> }
> >>
> >> -/* Delete all the entries */
> >> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if
> >> (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
> >> -large_ldepth_route_table[j].depth) != 0) { -printf("Failed to delete
> >> iteration %d, route# %d\n", -i, j); -goto error; -} -} -total_cycles
> >> = rte_rdtsc_precise() - begin;
> >> +reader_f = test_lpm_rcu_qsbr_reader; } else reader_f =
> >> +test_lpm_reader;
> >>
> >> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
> >> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: %g
> >> cycles\n", -(double)total_cycles / TOTAL_WRITES);
> >> +writer_done = 0;
> >> +__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> >>
> >> -writer_done = 1;
> >> -/* Wait until all readers have exited */ -for (i = 0; i < num_cores;
> >> i++) -if (rte_eal_wait_lcore(enabled_core_ids[i]);
> >> -
> >> -rte_lpm_free(lpm);
> >> -rte_free(rv);
> >> -lpm = NULL;
> >> -rv = NULL;
> >> -
> >> -/* Test without RCU integration */
> >> -printf("\nPerf test: 1 writer, %d readers, RCU integration
> >> disabled\n", -num_cores);
> >> -
> >> -/* Create LPM table */
> >> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
> config.number_tbl8s =
> >> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
> >> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >> -TEST_LPM_ASSERT(lpm != NULL);
> >> +__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >>
> >> -writer_done = 0;
> >> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >> +/* Launch reader threads */
> >> +for (i = j; i < num_cores; i++)
> >> +rte_eal_remote_launch(reader_f, NULL,
> >> +enabled_core_ids[i]);
> >>
> >> -/* Launch reader threads */
> >> -for (i = 0; i < num_cores; i++)
> >> -rte_eal_remote_launch(test_lpm_reader, NULL,
> >> -enabled_core_ids[i]);
> >> +/* Launch writer threads */
> >> +for (i = 0; i < j; i++)
> >> +rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> >> +(void *)(uintptr_t)(i + j),
> > This can be just 'j'?
> >
> >> +enabled_core_ids[i]);
> >>
> >> -/* Measure add/delete. */
> >> -begin = rte_rdtsc_precise();
> >> -for (i = 0; i < RCU_ITERATIONS; i++) {
> >> -/* Add all the entries */
> >> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> >> -if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> >> -large_ldepth_route_table[j].depth,
> >> -next_hop_add) != 0) {
> >> -printf("Failed to add iteration %d, route#
> >> %d\n",
> >> -i, j);
> >> +/* Wait for writer threads */
> >> +for (i = 0; i < j; i++)
> >> +if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> >> goto error;
> >> -}
> >>
> >> -/* Delete all the entries */
> >> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> >> -if (rte_lpm_delete(lpm,
> >> large_ldepth_route_table[j].ip,
> >> -large_ldepth_route_table[j].depth) != 0) {
> >> -printf("Failed to delete iteration %d, route#
> >> %d\n",
> >> -i, j);
> >> -goto error;
> >> -}
> >> +printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> >> +printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> >> +printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> >> +__atomic_load_n(&gwrite_cycles,
> >> __ATOMIC_RELAXED)
> >> +/ TOTAL_WRITES);
> >> +
> >> +writer_done = 1;
> >> +/* Wait until all readers have exited */
> >> +for (i = j; i < num_cores; i++)
> >> +rte_eal_wait_lcore(enabled_core_ids[i]);
> >> +
> >> +rte_lpm_free(lpm);
> >> +rte_free(rv);
> >> +lpm = NULL;
> >> +rv = NULL;
> >> }
> >> -total_cycles = rte_rdtsc_precise() - begin;
> >> -
> >> -printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> >> -printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> >> -printf("Average LPM Add/Del: %g cycles\n",
> >> -(double)total_cycles / TOTAL_WRITES);
> >> -
> >> -writer_done = 1;
> >> -/* Wait until all readers have exited */
> >> -for (i = 0; i < num_cores; i++)
> >> -rte_eal_wait_lcore(enabled_core_ids[i]);
> >> -
> >> -rte_lpm_free(lpm);
> >>
> >> return 0;
> >>
> >> @@ -946,9 +767,8 @@ test_lpm_perf(void)
> >> rte_lpm_delete_all(lpm);
> >> rte_lpm_free(lpm);
> >>
> >> -test_lpm_rcu_perf();
> >> -
> >> -test_lpm_rcu_perf_multi_writer();
> >> +test_lpm_rcu_perf_multi_writer(0);
> >> +test_lpm_rcu_perf_multi_writer(1);
> >>
> >> return 0;
> >> }
> >> --
> >> 2.17.1
> 


^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-03  5:32         ` Honnappa Nagarahalli
@ 2020-11-03 14:03           ` Dharmik Thakkar
  2020-11-03 14:51             ` Honnappa Nagarahalli
  2020-11-03 18:01             ` Medvedkin, Vladimir
  0 siblings, 2 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03 14:03 UTC (permalink / raw)
  To: Honnappa Nagarahalli; +Cc: Bruce Richardson, Vladimir Medvedkin, dev, nd



> On Nov 2, 2020, at 11:32 PM, Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> 
> <snip>
> 
>>>> 
>>>> Avoid code duplication by combining single and multi threaded tests
>>>> 
>>>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
>>>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>>>> ---
>>>> app/test/test_lpm_perf.c | 362
>>>> ++++++++++-----------------------------
>>>> 1 file changed, 91 insertions(+), 271 deletions(-)
>>>> 
>>>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
>>>> index
>>>> 224c92fa3d65..229c835c23f7 100644
>>>> --- a/app/test/test_lpm_perf.c
>>>> +++ b/app/test/test_lpm_perf.c
>>>> @@ -67,6 +67,12 @@ enum {
>>>> IP_CLASS_C
>>>> };
>>>> 
>>>> +enum {
>>>> +SINGLE_WRITER = 1,
>>>> +MULTI_WRITER_1,
>>>> +MULTI_WRITER_2
>>>> +};
>>> Do we need this? Can we use the number of cores instead?
>>> 
>> 
>> There are 3 combinations of writes (adds/deletes):
>> 1. Write all the entries - in case of a single writer 2. Write half of the entries -
>> in case of multiple writers 3. Write remaining half of the entries - in case of
>> multiple writers
>> 
>> So, I think this is required.
> IMO, this is not scalable. Essentially, we need 2 parameters to divide the routes among each writer thread. We need 2 parameters, 1) total number of writers 2) the core ID in the linear space.
> Creating a structure with these 2 and passing that to the writer thread would be better and scalable.

Yes, agreed this is only applicable for 2 writers. Currently, the multi writer test is only limited to a maximum of 2 writers.
To support more number of writers, we need something like this (which I believe is in lines with your suggestion):
1. Calculate what each writer will write: single_insert = TOTAL_WRITES / num_writers
2. Pass core ID in linear space as an argument to the writer function: pos_core
3. Calculate si and ei in the writer function: si = pos_core * single_insert; ei = si + single_insert

I can update the patch to enable more than 2 writers.
Do you also suggest we expand the scope of the test to test with more than 2 writers?
This will increase the time for which the test is running (which currently is significant even with 2 writers).

> 
>> 
>>>> +
>>>> /* struct route_rule_count defines the total number of rules in
>>>> following a/b/c
>>>> * each item in a[]/b[]/c[] is the number of common IP address class
>>>> A/B/C, not
>>>> * including the ones for private local network.
>>>> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg)  {
>> unsigned
>>>> int i, j, si, ei; uint64_t begin, total_cycles; -uint8_t core_id =
>>>> (uint8_t)((uintptr_t)arg);
>>>> +uint8_t writer_id = (uint8_t)((uintptr_t)arg);
>>>> uint32_t next_hop_add = 0xAA;
>>>> 
>>>> -/* 2 writer threads are used */
>>>> -if (core_id % 2 == 0) {
>>>> +/* Single writer (writer_id = 1) */
>>>> +if (writer_id == SINGLE_WRITER) {
>>>> +si = 0;
>>>> +ei = NUM_LDEPTH_ROUTE_ENTRIES;
>>>> +}
>>>> +/* 2 Writers (writer_id = 2/3)*/
>>>> +else if (writer_id == MULTI_WRITER_1) {
>>>> si = 0;
>>>> ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
>>>> } else {
>>>> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg) for (i = 0;
>>>> i < RCU_ITERATIONS; i++) {
>>>> /* Add all the entries */
>>>> for (j = si; j < ei; j++) {
>>>> -pthread_mutex_lock(&lpm_mutex);
>>>> +if (writer_id != SINGLE_WRITER)
>>>> +pthread_mutex_lock(&lpm_mutex);
>>>> if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>>>> large_ldepth_route_table[j].depth,
>>>> next_hop_add) != 0) {
>>>> printf("Failed to add iteration %d, route# %d\n", i, j);
>>>> -pthread_mutex_unlock(&lpm_mutex);
>>>> +if (writer_id != SINGLE_WRITER)
>>>> +
>>>> pthread_mutex_unlock(&lpm_mutex);
>>>> return -1;
>>>> }
>>>> -pthread_mutex_unlock(&lpm_mutex);
>>>> +if (writer_id != SINGLE_WRITER)
>>>> +pthread_mutex_unlock(&lpm_mutex);
>>>> }
>>>> 
>>>> /* Delete all the entries */
>>>> for (j = si; j < ei; j++) {
>>>> -pthread_mutex_lock(&lpm_mutex);
>>>> +if (writer_id != SINGLE_WRITER)
>>>> +pthread_mutex_lock(&lpm_mutex);
>>>> if (rte_lpm_delete(lpm,
>>>> large_ldepth_route_table[j].ip,
>>>> large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete
>>>> iteration %d, route# %d\n", i, j); -pthread_mutex_unlock(&lpm_mutex);
>>>> +if (writer_id != SINGLE_WRITER)
>>>> +
>>>> pthread_mutex_unlock(&lpm_mutex);
>>>> return -1;
>>>> }
>>>> -pthread_mutex_unlock(&lpm_mutex);
>>>> +if (writer_id != SINGLE_WRITER)
>>>> +pthread_mutex_unlock(&lpm_mutex);
>>>> }
>>>> }
>>>> 
>>>> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
>>>> 
>>>> /*
>>>> * Functional test:
>>>> - * 2 writers, rest are readers
>>>> + * 1/2 writers, rest are readers
>>>> */
>>>> static int
>>>> -test_lpm_rcu_perf_multi_writer(void)
>>>> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>>>> {
>>>> struct rte_lpm_config config;
>>>> size_t sz;
>>>> -unsigned int i;
>>>> +unsigned int i, j;
>>>> uint16_t core_id;
>>>> struct rte_lpm_rcu_config rcu_cfg = {0};
>>>> +int (*reader_f)(void *arg) = NULL;
>>>> 
>>>> if (rte_lcore_count() < 3) {
>>>> printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
>>>> least 3\n"); @@ -504,273 +522,76 @@
>>>> test_lpm_rcu_perf_multi_writer(void)
>>>> num_cores++;
>>>> }
>>>> 
>>>> -printf("\nPerf test: 2 writers, %d readers, RCU integration
>>>> enabled\n", -num_cores - 2);
>>>> -
>>>> -/* Create LPM table */
>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
>> config.number_tbl8s =
>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>> -TEST_LPM_ASSERT(lpm != NULL);
>>>> -
>>>> -/* Init RCU variable */
>>>> -sz = rte_rcu_qsbr_get_memsize(num_cores);
>>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
>>>> -
>>>> -rcu_cfg.v = rv;
>>>> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm,
>>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto
>>>> error; -}
>>>> -
>>>> -writer_done = 0;
>>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>>>> -
>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>> -
>>>> -/* Launch reader threads */
>>>> -for (i = 2; i < num_cores; i++)
>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>>>> -enabled_core_ids[i]);
>>>> -
>>>> -/* Launch writer threads */
>>>> -for (i = 0; i < 2; i++)
>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>>>> -(void *)(uintptr_t)i,
>>>> -enabled_core_ids[i]);
>>>> -
>>>> -/* Wait for writer threads */
>>>> -for (i = 0; i < 2; i++)
>>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
>>>> -
>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
>>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
>>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
>>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES);
>>>> -
>>>> -writer_done = 1;
>>>> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores;
>>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
>>>> -
>>>> -rte_lpm_free(lpm);
>>>> -rte_free(rv);
>>>> -lpm = NULL;
>>>> -rv = NULL;
>>>> -
>>>> -/* Test without RCU integration */
>>>> -printf("\nPerf test: 2 writers, %d readers, RCU integration
>>>> disabled\n", -num_cores - 2);
>>>> -
>>>> -/* Create LPM table */
>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
>> config.number_tbl8s =
>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>> -TEST_LPM_ASSERT(lpm != NULL);
>>>> -
>>>> -writer_done = 0;
>>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>> -
>>>> -/* Launch reader threads */
>>>> -for (i = 2; i < num_cores; i++)
>>>> -rte_eal_remote_launch(test_lpm_reader, NULL, -enabled_core_ids[i]);
>>>> -
>>>> -/* Launch writer threads */
>>>> -for (i = 0; i < 2; i++)
>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>>>> -(void *)(uintptr_t)i,
>>>> -enabled_core_ids[i]);
>>>> -
>>>> -/* Wait for writer threads */
>>>> -for (i = 0; i < 2; i++)
>>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
>>>> -
>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
>>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
>>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
>>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES);
>>>> -
>>>> -writer_done = 1;
>>>> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores;
>>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
>>>> -
>>>> -rte_lpm_free(lpm);
>>>> -
>>>> -return 0;
>>>> -
>>>> -error:
>>>> -writer_done = 1;
>>>> -/* Wait until all readers have exited */ -rte_eal_mp_wait_lcore();
>>>> -
>>>> -rte_lpm_free(lpm);
>>>> -rte_free(rv);
>>>> -
>>>> -return -1;
>>>> -}
>>>> -
>>>> -/*
>>>> - * Functional test:
>>>> - * Single writer, rest are readers
>>>> - */
>>>> -static int
>>>> -test_lpm_rcu_perf(void)
>>>> -{
>>>> -struct rte_lpm_config config;
>>>> -uint64_t begin, total_cycles;
>>>> -size_t sz;
>>>> -unsigned int i, j;
>>>> -uint16_t core_id;
>>>> -uint32_t next_hop_add = 0xAA;
>>>> -struct rte_lpm_rcu_config rcu_cfg = {0};
>>>> -
>>>> -if (rte_lcore_count() < 2) {
>>>> -printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
>>>> least 2\n"); -return TEST_SKIPPED; -}
>>>> -
>>>> -num_cores = 0;
>>>> -RTE_LCORE_FOREACH_WORKER(core_id) {
>>>> -enabled_core_ids[num_cores] = core_id; -num_cores++; -}
>>>> -
>>>> -printf("\nPerf test: 1 writer, %d readers, RCU integration
>>>> enabled\n", -num_cores);
>>>> -
>>>> -/* Create LPM table */
>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
>> config.number_tbl8s =
>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>> -TEST_LPM_ASSERT(lpm != NULL);
>>>> -
>>>> -/* Init RCU variable */
>>>> -sz = rte_rcu_qsbr_get_memsize(num_cores);
>>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
>>>> -
>>>> -rcu_cfg.v = rv;
>>>> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm,
>>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto
>>>> error; -}
>>>> -
>>>> -writer_done = 0;
>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>> -
>>>> -/* Launch reader threads */
>>>> -for (i = 0; i < num_cores; i++)
>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>>>> -enabled_core_ids[i]);
>>>> -
>>>> -/* Measure add/delete. */
>>>> -begin = rte_rdtsc_precise();
>>>> -for (i = 0; i < RCU_ITERATIONS; i++) {
>>>> -/* Add all the entries */
>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if (rte_lpm_add(lpm,
>>>> large_ldepth_route_table[j].ip, -large_ldepth_route_table[j].depth,
>>>> -next_hop_add) != 0) {
>>>> -printf("Failed to add iteration %d, route# %d\n", -i, j);
>>>> +for (j = 1; j < 3; j++) {
>>>> +if (use_rcu)
>>>> +printf("\nPerf test: %d writer(s), %d reader(s),"
>>>> +       " RCU integration enabled\n", j, num_cores - j); else
>>>> +printf("\nPerf test: %d writer(s), %d reader(s),"
>>>> +       " RCU integration disabled\n", j, num_cores - j);
>>>> +
>>>> +/* Create LPM table */
>>>> +config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> config.number_tbl8s =
>>>> +NUM_LDEPTH_ROUTE_ENTRIES; config.flags = 0; lpm =
>>>> +rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>> +TEST_LPM_ASSERT(lpm != NULL);
>>>> +
>>>> +/* Init RCU variable */
>>>> +if (use_rcu) {
>>>> +sz = rte_rcu_qsbr_get_memsize(num_cores);
>>>> +rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>>>> +
>>>> RTE_CACHE_LINE_SIZE);
>>>> +rte_rcu_qsbr_init(rv, num_cores);
>>>> +
>>>> +rcu_cfg.v = rv;
>>>> +/* Assign the RCU variable to LPM */ if (rte_lpm_rcu_qsbr_add(lpm,
>>>> +&rcu_cfg) != 0) { printf("RCU variable assignment failed\n");
>>>> goto error;
>>>> }
>>>> 
>>>> -/* Delete all the entries */
>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if
>>>> (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
>>>> -large_ldepth_route_table[j].depth) != 0) { -printf("Failed to delete
>>>> iteration %d, route# %d\n", -i, j); -goto error; -} -} -total_cycles
>>>> = rte_rdtsc_precise() - begin;
>>>> +reader_f = test_lpm_rcu_qsbr_reader; } else reader_f =
>>>> +test_lpm_reader;
>>>> 
>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
>>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: %g
>>>> cycles\n", -(double)total_cycles / TOTAL_WRITES);
>>>> +writer_done = 0;
>>>> +__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>>>> 
>>>> -writer_done = 1;
>>>> -/* Wait until all readers have exited */ -for (i = 0; i < num_cores;
>>>> i++) -if (rte_eal_wait_lcore(enabled_core_ids[i]);
>>>> -
>>>> -rte_lpm_free(lpm);
>>>> -rte_free(rv);
>>>> -lpm = NULL;
>>>> -rv = NULL;
>>>> -
>>>> -/* Test without RCU integration */
>>>> -printf("\nPerf test: 1 writer, %d readers, RCU integration
>>>> disabled\n", -num_cores);
>>>> -
>>>> -/* Create LPM table */
>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
>> config.number_tbl8s =
>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>> -TEST_LPM_ASSERT(lpm != NULL);
>>>> +__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>> 
>>>> -writer_done = 0;
>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>> +/* Launch reader threads */
>>>> +for (i = j; i < num_cores; i++)
>>>> +rte_eal_remote_launch(reader_f, NULL,
>>>> +enabled_core_ids[i]);
>>>> 
>>>> -/* Launch reader threads */
>>>> -for (i = 0; i < num_cores; i++)
>>>> -rte_eal_remote_launch(test_lpm_reader, NULL,
>>>> -enabled_core_ids[i]);
>>>> +/* Launch writer threads */
>>>> +for (i = 0; i < j; i++)
>>>> +rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>>>> +(void *)(uintptr_t)(i + j),
>>> This can be just 'j'?
>>> 
>>>> +enabled_core_ids[i]);
>>>> 
>>>> -/* Measure add/delete. */
>>>> -begin = rte_rdtsc_precise();
>>>> -for (i = 0; i < RCU_ITERATIONS; i++) {
>>>> -/* Add all the entries */
>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>>>> -if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>>>> -large_ldepth_route_table[j].depth,
>>>> -next_hop_add) != 0) {
>>>> -printf("Failed to add iteration %d, route#
>>>> %d\n",
>>>> -i, j);
>>>> +/* Wait for writer threads */
>>>> +for (i = 0; i < j; i++)
>>>> +if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>>>> goto error;
>>>> -}
>>>> 
>>>> -/* Delete all the entries */
>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>>>> -if (rte_lpm_delete(lpm,
>>>> large_ldepth_route_table[j].ip,
>>>> -large_ldepth_route_table[j].depth) != 0) {
>>>> -printf("Failed to delete iteration %d, route#
>>>> %d\n",
>>>> -i, j);
>>>> -goto error;
>>>> -}
>>>> +printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>>>> +printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>>>> +printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>>>> +__atomic_load_n(&gwrite_cycles,
>>>> __ATOMIC_RELAXED)
>>>> +/ TOTAL_WRITES);
>>>> +
>>>> +writer_done = 1;
>>>> +/* Wait until all readers have exited */
>>>> +for (i = j; i < num_cores; i++)
>>>> +rte_eal_wait_lcore(enabled_core_ids[i]);
>>>> +
>>>> +rte_lpm_free(lpm);
>>>> +rte_free(rv);
>>>> +lpm = NULL;
>>>> +rv = NULL;
>>>> }
>>>> -total_cycles = rte_rdtsc_precise() - begin;
>>>> -
>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>>>> -printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>>>> -printf("Average LPM Add/Del: %g cycles\n",
>>>> -(double)total_cycles / TOTAL_WRITES);
>>>> -
>>>> -writer_done = 1;
>>>> -/* Wait until all readers have exited */
>>>> -for (i = 0; i < num_cores; i++)
>>>> -rte_eal_wait_lcore(enabled_core_ids[i]);
>>>> -
>>>> -rte_lpm_free(lpm);
>>>> 
>>>> return 0;
>>>> 
>>>> @@ -946,9 +767,8 @@ test_lpm_perf(void)
>>>> rte_lpm_delete_all(lpm);
>>>> rte_lpm_free(lpm);
>>>> 
>>>> -test_lpm_rcu_perf();
>>>> -
>>>> -test_lpm_rcu_perf_multi_writer();
>>>> +test_lpm_rcu_perf_multi_writer(0);
>>>> +test_lpm_rcu_perf_multi_writer(1);
>>>> 
>>>> return 0;
>>>> }
>>>> --
>>>> 2.17.1


^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-03 14:03           ` Dharmik Thakkar
@ 2020-11-03 14:51             ` Honnappa Nagarahalli
  2020-11-03 18:01             ` Medvedkin, Vladimir
  1 sibling, 0 replies; 52+ messages in thread
From: Honnappa Nagarahalli @ 2020-11-03 14:51 UTC (permalink / raw)
  To: Dharmik Thakkar
  Cc: Bruce Richardson, Vladimir Medvedkin, dev, nd, Honnappa Nagarahalli, nd

<snip>

> >>>>
> >>>> Avoid code duplication by combining single and multi threaded tests
> >>>>
> >>>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> >>>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> >>>> ---
> >>>> app/test/test_lpm_perf.c | 362
> >>>> ++++++++++-----------------------------
> >>>> 1 file changed, 91 insertions(+), 271 deletions(-)
> >>>>
> >>>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
> >>>> index
> >>>> 224c92fa3d65..229c835c23f7 100644
> >>>> --- a/app/test/test_lpm_perf.c
> >>>> +++ b/app/test/test_lpm_perf.c
> >>>> @@ -67,6 +67,12 @@ enum {
> >>>> IP_CLASS_C
> >>>> };
> >>>>
> >>>> +enum {
> >>>> +SINGLE_WRITER = 1,
> >>>> +MULTI_WRITER_1,
> >>>> +MULTI_WRITER_2
> >>>> +};
> >>> Do we need this? Can we use the number of cores instead?
> >>>
> >>
> >> There are 3 combinations of writes (adds/deletes):
> >> 1. Write all the entries - in case of a single writer 2. Write half
> >> of the entries - in case of multiple writers 3. Write remaining half
> >> of the entries - in case of multiple writers
> >>
> >> So, I think this is required.
> > IMO, this is not scalable. Essentially, we need 2 parameters to divide the
> routes among each writer thread. We need 2 parameters, 1) total number of
> writers 2) the core ID in the linear space.
> > Creating a structure with these 2 and passing that to the writer thread
> would be better and scalable.
> 
> Yes, agreed this is only applicable for 2 writers. Currently, the multi writer
> test is only limited to a maximum of 2 writers.
> To support more number of writers, we need something like this (which I
> believe is in lines with your suggestion):
> 1. Calculate what each writer will write: single_insert = TOTAL_WRITES /
> num_writers 2. Pass core ID in linear space as an argument to the writer
> function: pos_core 3. Calculate si and ei in the writer function: si = pos_core *
> single_insert; ei = si + single_insert
> 
> I can update the patch to enable more than 2 writers.
> Do you also suggest we expand the scope of the test to test with more than
> 2 writers?
> This will increase the time for which the test is running (which currently is
> significant even with 2 writers).
Agree, no to increasing the number of writers. Yes for making the code more generic.

> 
> >
> >>
> >>>> +
> >>>> /* struct route_rule_count defines the total number of rules in
> >>>> following a/b/c
> >>>> * each item in a[]/b[]/c[] is the number of common IP address class
> >>>> A/B/C, not
> >>>> * including the ones for private local network.
> >>>> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg)  {
> >> unsigned
> >>>> int i, j, si, ei; uint64_t begin, total_cycles; -uint8_t core_id =
> >>>> (uint8_t)((uintptr_t)arg);
> >>>> +uint8_t writer_id = (uint8_t)((uintptr_t)arg);
> >>>> uint32_t next_hop_add = 0xAA;
> >>>>
> >>>> -/* 2 writer threads are used */
> >>>> -if (core_id % 2 == 0) {
> >>>> +/* Single writer (writer_id = 1) */ if (writer_id ==
> >>>> +SINGLE_WRITER) { si = 0; ei = NUM_LDEPTH_ROUTE_ENTRIES; }
> >>>> +/* 2 Writers (writer_id = 2/3)*/
> >>>> +else if (writer_id == MULTI_WRITER_1) {
> >>>> si = 0;
> >>>> ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
> >>>> } else {
> >>>> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg) for (i =
> >>>> 0; i < RCU_ITERATIONS; i++) {
> >>>> /* Add all the entries */
> >>>> for (j = si; j < ei; j++) {
> >>>> -pthread_mutex_lock(&lpm_mutex);
> >>>> +if (writer_id != SINGLE_WRITER)
> >>>> +pthread_mutex_lock(&lpm_mutex);
> >>>> if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> >>>> large_ldepth_route_table[j].depth,
> >>>> next_hop_add) != 0) {
> >>>> printf("Failed to add iteration %d, route# %d\n", i, j);
> >>>> -pthread_mutex_unlock(&lpm_mutex);
> >>>> +if (writer_id != SINGLE_WRITER)
> >>>> +
> >>>> pthread_mutex_unlock(&lpm_mutex);
> >>>> return -1;
> >>>> }
> >>>> -pthread_mutex_unlock(&lpm_mutex);
> >>>> +if (writer_id != SINGLE_WRITER)
> >>>> +pthread_mutex_unlock(&lpm_mutex);
> >>>> }
> >>>>
> >>>> /* Delete all the entries */
> >>>> for (j = si; j < ei; j++) {
> >>>> -pthread_mutex_lock(&lpm_mutex);
> >>>> +if (writer_id != SINGLE_WRITER)
> >>>> +pthread_mutex_lock(&lpm_mutex);
> >>>> if (rte_lpm_delete(lpm,
> >>>> large_ldepth_route_table[j].ip,
> >>>> large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete
> >>>> iteration %d, route# %d\n", i, j);
> >>>> -pthread_mutex_unlock(&lpm_mutex);
> >>>> +if (writer_id != SINGLE_WRITER)
> >>>> +
> >>>> pthread_mutex_unlock(&lpm_mutex);
> >>>> return -1;
> >>>> }
> >>>> -pthread_mutex_unlock(&lpm_mutex);
> >>>> +if (writer_id != SINGLE_WRITER)
> >>>> +pthread_mutex_unlock(&lpm_mutex);
> >>>> }
> >>>> }
> >>>>
> >>>> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
> >>>>
> >>>> /*
> >>>> * Functional test:
> >>>> - * 2 writers, rest are readers
> >>>> + * 1/2 writers, rest are readers
> >>>> */
> >>>> static int
> >>>> -test_lpm_rcu_perf_multi_writer(void)
> >>>> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
> >>>> {
> >>>> struct rte_lpm_config config;
> >>>> size_t sz;
> >>>> -unsigned int i;
> >>>> +unsigned int i, j;
> >>>> uint16_t core_id;
> >>>> struct rte_lpm_rcu_config rcu_cfg = {0};
> >>>> +int (*reader_f)(void *arg) = NULL;
> >>>>
> >>>> if (rte_lcore_count() < 3) {
> >>>> printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
> >>>> least 3\n"); @@ -504,273 +522,76 @@
> >>>> test_lpm_rcu_perf_multi_writer(void)
> >>>> num_cores++;
> >>>> }
> >>>>
> >>>> -printf("\nPerf test: 2 writers, %d readers, RCU integration
> >>>> enabled\n", -num_cores - 2);
> >>>> -
> >>>> -/* Create LPM table */
> >>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
> >> config.number_tbl8s =
> >>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
> >>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >>>> -TEST_LPM_ASSERT(lpm != NULL);
> >>>> -
> >>>> -/* Init RCU variable */
> >>>> -sz = rte_rcu_qsbr_get_memsize(num_cores);
> >>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> >>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
> >>>> -
> >>>> -rcu_cfg.v = rv;
> >>>> -/* Assign the RCU variable to LPM */ -if
> >>>> (rte_lpm_rcu_qsbr_add(lpm,
> >>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n");
> >>>> -goto error; -}
> >>>> -
> >>>> -writer_done = 0;
> >>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> >>>> -
> >>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >>>> -
> >>>> -/* Launch reader threads */
> >>>> -for (i = 2; i < num_cores; i++)
> >>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> >>>> -enabled_core_ids[i]);
> >>>> -
> >>>> -/* Launch writer threads */
> >>>> -for (i = 0; i < 2; i++)
> >>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> >>>> -(void *)(uintptr_t)i,
> >>>> -enabled_core_ids[i]);
> >>>> -
> >>>> -/* Wait for writer threads */
> >>>> -for (i = 0; i < 2; i++)
> >>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
> >>>> -
> >>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
> >>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
> >>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
> >>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES);
> >>>> -
> >>>> -writer_done = 1;
> >>>> -/* Wait until all readers have exited */ -for (i = 2; i <
> >>>> num_cores;
> >>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
> >>>> -
> >>>> -rte_lpm_free(lpm);
> >>>> -rte_free(rv);
> >>>> -lpm = NULL;
> >>>> -rv = NULL;
> >>>> -
> >>>> -/* Test without RCU integration */ -printf("\nPerf test: 2
> >>>> writers, %d readers, RCU integration disabled\n", -num_cores - 2);
> >>>> -
> >>>> -/* Create LPM table */
> >>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
> >> config.number_tbl8s =
> >>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
> >>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >>>> -TEST_LPM_ASSERT(lpm != NULL);
> >>>> -
> >>>> -writer_done = 0;
> >>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> >>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >>>> -
> >>>> -/* Launch reader threads */
> >>>> -for (i = 2; i < num_cores; i++)
> >>>> -rte_eal_remote_launch(test_lpm_reader, NULL,
> >>>> -enabled_core_ids[i]);
> >>>> -
> >>>> -/* Launch writer threads */
> >>>> -for (i = 0; i < 2; i++)
> >>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> >>>> -(void *)(uintptr_t)i,
> >>>> -enabled_core_ids[i]);
> >>>> -
> >>>> -/* Wait for writer threads */
> >>>> -for (i = 0; i < 2; i++)
> >>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
> >>>> -
> >>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
> >>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
> >>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
> >>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES);
> >>>> -
> >>>> -writer_done = 1;
> >>>> -/* Wait until all readers have exited */ -for (i = 2; i <
> >>>> num_cores;
> >>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
> >>>> -
> >>>> -rte_lpm_free(lpm);
> >>>> -
> >>>> -return 0;
> >>>> -
> >>>> -error:
> >>>> -writer_done = 1;
> >>>> -/* Wait until all readers have exited */ -rte_eal_mp_wait_lcore();
> >>>> -
> >>>> -rte_lpm_free(lpm);
> >>>> -rte_free(rv);
> >>>> -
> >>>> -return -1;
> >>>> -}
> >>>> -
> >>>> -/*
> >>>> - * Functional test:
> >>>> - * Single writer, rest are readers
> >>>> - */
> >>>> -static int
> >>>> -test_lpm_rcu_perf(void)
> >>>> -{
> >>>> -struct rte_lpm_config config;
> >>>> -uint64_t begin, total_cycles;
> >>>> -size_t sz;
> >>>> -unsigned int i, j;
> >>>> -uint16_t core_id;
> >>>> -uint32_t next_hop_add = 0xAA;
> >>>> -struct rte_lpm_rcu_config rcu_cfg = {0};
> >>>> -
> >>>> -if (rte_lcore_count() < 2) {
> >>>> -printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
> >>>> least 2\n"); -return TEST_SKIPPED; -}
> >>>> -
> >>>> -num_cores = 0;
> >>>> -RTE_LCORE_FOREACH_WORKER(core_id) { -
> enabled_core_ids[num_cores] =
> >>>> core_id; -num_cores++; -}
> >>>> -
> >>>> -printf("\nPerf test: 1 writer, %d readers, RCU integration
> >>>> enabled\n", -num_cores);
> >>>> -
> >>>> -/* Create LPM table */
> >>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
> >> config.number_tbl8s =
> >>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
> >>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >>>> -TEST_LPM_ASSERT(lpm != NULL);
> >>>> -
> >>>> -/* Init RCU variable */
> >>>> -sz = rte_rcu_qsbr_get_memsize(num_cores);
> >>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> >>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
> >>>> -
> >>>> -rcu_cfg.v = rv;
> >>>> -/* Assign the RCU variable to LPM */ -if
> >>>> (rte_lpm_rcu_qsbr_add(lpm,
> >>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n");
> >>>> -goto error; -}
> >>>> -
> >>>> -writer_done = 0;
> >>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >>>> -
> >>>> -/* Launch reader threads */
> >>>> -for (i = 0; i < num_cores; i++)
> >>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> >>>> -enabled_core_ids[i]);
> >>>> -
> >>>> -/* Measure add/delete. */
> >>>> -begin = rte_rdtsc_precise();
> >>>> -for (i = 0; i < RCU_ITERATIONS; i++) {
> >>>> -/* Add all the entries */
> >>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if
> >>>> (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> >>>> -large_ldepth_route_table[j].depth,
> >>>> -next_hop_add) != 0) {
> >>>> -printf("Failed to add iteration %d, route# %d\n", -i, j);
> >>>> +for (j = 1; j < 3; j++) {
> >>>> +if (use_rcu)
> >>>> +printf("\nPerf test: %d writer(s), %d reader(s),"
> >>>> +       " RCU integration enabled\n", j, num_cores - j); else
> >>>> +printf("\nPerf test: %d writer(s), %d reader(s),"
> >>>> +       " RCU integration disabled\n", j, num_cores - j);
> >>>> +
> >>>> +/* Create LPM table */
> >>>> +config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> >> config.number_tbl8s =
> >>>> +NUM_LDEPTH_ROUTE_ENTRIES; config.flags = 0; lpm =
> >>>> +rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >>>> +TEST_LPM_ASSERT(lpm != NULL);
> >>>> +
> >>>> +/* Init RCU variable */
> >>>> +if (use_rcu) {
> >>>> +sz = rte_rcu_qsbr_get_memsize(num_cores);
> >>>> +rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> >>>> +
> >>>> RTE_CACHE_LINE_SIZE);
> >>>> +rte_rcu_qsbr_init(rv, num_cores);
> >>>> +
> >>>> +rcu_cfg.v = rv;
> >>>> +/* Assign the RCU variable to LPM */ if (rte_lpm_rcu_qsbr_add(lpm,
> >>>> +&rcu_cfg) != 0) { printf("RCU variable assignment failed\n");
> >>>> goto error;
> >>>> }
> >>>>
> >>>> -/* Delete all the entries */
> >>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if
> >>>> (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
> >>>> -large_ldepth_route_table[j].depth) != 0) { -printf("Failed to
> >>>> delete iteration %d, route# %d\n", -i, j); -goto error; -} -}
> >>>> -total_cycles = rte_rdtsc_precise() - begin;
> >>>> +reader_f = test_lpm_rcu_qsbr_reader; } else reader_f =
> >>>> +test_lpm_reader;
> >>>>
> >>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
> >>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: %g
> >>>> cycles\n", -(double)total_cycles / TOTAL_WRITES);
> >>>> +writer_done = 0;
> >>>> +__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> >>>>
> >>>> -writer_done = 1;
> >>>> -/* Wait until all readers have exited */ -for (i = 0; i <
> >>>> num_cores;
> >>>> i++) -if (rte_eal_wait_lcore(enabled_core_ids[i]);
> >>>> -
> >>>> -rte_lpm_free(lpm);
> >>>> -rte_free(rv);
> >>>> -lpm = NULL;
> >>>> -rv = NULL;
> >>>> -
> >>>> -/* Test without RCU integration */ -printf("\nPerf test: 1 writer,
> >>>> %d readers, RCU integration disabled\n", -num_cores);
> >>>> -
> >>>> -/* Create LPM table */
> >>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
> >> config.number_tbl8s =
> >>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
> >>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >>>> -TEST_LPM_ASSERT(lpm != NULL);
> >>>> +__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >>>>
> >>>> -writer_done = 0;
> >>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >>>> +/* Launch reader threads */
> >>>> +for (i = j; i < num_cores; i++)
> >>>> +rte_eal_remote_launch(reader_f, NULL, enabled_core_ids[i]);
> >>>>
> >>>> -/* Launch reader threads */
> >>>> -for (i = 0; i < num_cores; i++)
> >>>> -rte_eal_remote_launch(test_lpm_reader, NULL,
> >>>> -enabled_core_ids[i]);
> >>>> +/* Launch writer threads */
> >>>> +for (i = 0; i < j; i++)
> >>>> +rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> >>>> +(void *)(uintptr_t)(i + j),
> >>> This can be just 'j'?
> >>>
> >>>> +enabled_core_ids[i]);
> >>>>
> >>>> -/* Measure add/delete. */
> >>>> -begin = rte_rdtsc_precise();
> >>>> -for (i = 0; i < RCU_ITERATIONS; i++) {
> >>>> -/* Add all the entries */
> >>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if
> >>>> (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> >>>> -large_ldepth_route_table[j].depth,
> >>>> -next_hop_add) != 0) {
> >>>> -printf("Failed to add iteration %d, route# %d\n", -i, j);
> >>>> +/* Wait for writer threads */
> >>>> +for (i = 0; i < j; i++)
> >>>> +if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> >>>> goto error;
> >>>> -}
> >>>>
> >>>> -/* Delete all the entries */
> >>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if
> >>>> (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
> >>>> -large_ldepth_route_table[j].depth) != 0) { -printf("Failed to
> >>>> delete iteration %d, route# %d\n", -i, j); -goto error; -}
> >>>> +printf("Total LPM Adds: %d\n", TOTAL_WRITES); printf("Total LPM
> >>>> +Deletes: %d\n", TOTAL_WRITES); printf("Average LPM Add/Del:
> >>>> +%"PRIu64" cycles\n", __atomic_load_n(&gwrite_cycles,
> >>>> __ATOMIC_RELAXED)
> >>>> +/ TOTAL_WRITES);
> >>>> +
> >>>> +writer_done = 1;
> >>>> +/* Wait until all readers have exited */ for (i = j; i <
> >>>> +num_cores; i++) rte_eal_wait_lcore(enabled_core_ids[i]);
> >>>> +
> >>>> +rte_lpm_free(lpm);
> >>>> +rte_free(rv);
> >>>> +lpm = NULL;
> >>>> +rv = NULL;
> >>>> }
> >>>> -total_cycles = rte_rdtsc_precise() - begin;
> >>>> -
> >>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
> >>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: %g
> >>>> cycles\n", -(double)total_cycles / TOTAL_WRITES);
> >>>> -
> >>>> -writer_done = 1;
> >>>> -/* Wait until all readers have exited */ -for (i = 0; i <
> >>>> num_cores; i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
> >>>> -
> >>>> -rte_lpm_free(lpm);
> >>>>
> >>>> return 0;
> >>>>
> >>>> @@ -946,9 +767,8 @@ test_lpm_perf(void) rte_lpm_delete_all(lpm);
> >>>> rte_lpm_free(lpm);
> >>>>
> >>>> -test_lpm_rcu_perf();
> >>>> -
> >>>> -test_lpm_rcu_perf_multi_writer();
> >>>> +test_lpm_rcu_perf_multi_writer(0);
> >>>> +test_lpm_rcu_perf_multi_writer(1);
> >>>>
> >>>> return 0;
> >>>> }
> >>>> --
> >>>> 2.17.1
> 


^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-03 14:03           ` Dharmik Thakkar
  2020-11-03 14:51             ` Honnappa Nagarahalli
@ 2020-11-03 18:01             ` Medvedkin, Vladimir
  1 sibling, 0 replies; 52+ messages in thread
From: Medvedkin, Vladimir @ 2020-11-03 18:01 UTC (permalink / raw)
  To: Dharmik Thakkar, Honnappa Nagarahalli; +Cc: Bruce Richardson, dev, nd

Hi,

On 03/11/2020 14:03, Dharmik Thakkar wrote:
> 
> 
>> On Nov 2, 2020, at 11:32 PM, Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
>>
>> <snip>
>>
>>>>>
>>>>> Avoid code duplication by combining single and multi threaded tests
>>>>>
>>>>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
>>>>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>>>>> ---
>>>>> app/test/test_lpm_perf.c | 362
>>>>> ++++++++++-----------------------------
>>>>> 1 file changed, 91 insertions(+), 271 deletions(-)
>>>>>
>>>>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
>>>>> index
>>>>> 224c92fa3d65..229c835c23f7 100644
>>>>> --- a/app/test/test_lpm_perf.c
>>>>> +++ b/app/test/test_lpm_perf.c
>>>>> @@ -67,6 +67,12 @@ enum {
>>>>> IP_CLASS_C
>>>>> };
>>>>>
>>>>> +enum {
>>>>> +SINGLE_WRITER = 1,
>>>>> +MULTI_WRITER_1,
>>>>> +MULTI_WRITER_2
>>>>> +};
>>>> Do we need this? Can we use the number of cores instead?
>>>>
>>>
>>> There are 3 combinations of writes (adds/deletes):
>>> 1. Write all the entries - in case of a single writer 2. Write half of the entries -
>>> in case of multiple writers 3. Write remaining half of the entries - in case of
>>> multiple writers
>>>
>>> So, I think this is required.
>> IMO, this is not scalable. Essentially, we need 2 parameters to divide the routes among each writer thread. We need 2 parameters, 1) total number of writers 2) the core ID in the linear space.
>> Creating a structure with these 2 and passing that to the writer thread would be better and scalable.
> 
> Yes, agreed this is only applicable for 2 writers. Currently, the multi writer test is only limited to a maximum of 2 writers.
> To support more number of writers, we need something like this (which I believe is in lines with your suggestion):
> 1. Calculate what each writer will write: single_insert = TOTAL_WRITES / num_writers
> 2. Pass core ID in linear space as an argument to the writer function: pos_core
> 3. Calculate si and ei in the writer function: si = pos_core * single_insert; ei = si + single_insert
> 

Agree to Honnappa suggestion, for me it looks good, better than previous 
implementation.

> I can update the patch to enable more than 2 writers.
> Do you also suggest we expand the scope of the test to test with more than 2 writers?
> This will increase the time for which the test is running (which currently is significant even with 2 writers).
> 

I don't see any reason to increase the number of writers more than 2.

>>
>>>
>>>>> +
>>>>> /* struct route_rule_count defines the total number of rules in
>>>>> following a/b/c
>>>>> * each item in a[]/b[]/c[] is the number of common IP address class
>>>>> A/B/C, not
>>>>> * including the ones for private local network.
>>>>> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg)  {
>>> unsigned
>>>>> int i, j, si, ei; uint64_t begin, total_cycles; -uint8_t core_id =
>>>>> (uint8_t)((uintptr_t)arg);
>>>>> +uint8_t writer_id = (uint8_t)((uintptr_t)arg);
>>>>> uint32_t next_hop_add = 0xAA;
>>>>>
>>>>> -/* 2 writer threads are used */
>>>>> -if (core_id % 2 == 0) {
>>>>> +/* Single writer (writer_id = 1) */
>>>>> +if (writer_id == SINGLE_WRITER) {
>>>>> +si = 0;
>>>>> +ei = NUM_LDEPTH_ROUTE_ENTRIES;
>>>>> +}
>>>>> +/* 2 Writers (writer_id = 2/3)*/
>>>>> +else if (writer_id == MULTI_WRITER_1) {
>>>>> si = 0;
>>>>> ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
>>>>> } else {
>>>>> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg) for (i = 0;
>>>>> i < RCU_ITERATIONS; i++) {
>>>>> /* Add all the entries */
>>>>> for (j = si; j < ei; j++) {
>>>>> -pthread_mutex_lock(&lpm_mutex);
>>>>> +if (writer_id != SINGLE_WRITER)
>>>>> +pthread_mutex_lock(&lpm_mutex);
>>>>> if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>>>>> large_ldepth_route_table[j].depth,
>>>>> next_hop_add) != 0) {
>>>>> printf("Failed to add iteration %d, route# %d\n", i, j);
>>>>> -pthread_mutex_unlock(&lpm_mutex);
>>>>> +if (writer_id != SINGLE_WRITER)
>>>>> +
>>>>> pthread_mutex_unlock(&lpm_mutex);
>>>>> return -1;
>>>>> }
>>>>> -pthread_mutex_unlock(&lpm_mutex);
>>>>> +if (writer_id != SINGLE_WRITER)
>>>>> +pthread_mutex_unlock(&lpm_mutex);
>>>>> }
>>>>>
>>>>> /* Delete all the entries */
>>>>> for (j = si; j < ei; j++) {
>>>>> -pthread_mutex_lock(&lpm_mutex);
>>>>> +if (writer_id != SINGLE_WRITER)
>>>>> +pthread_mutex_lock(&lpm_mutex);
>>>>> if (rte_lpm_delete(lpm,
>>>>> large_ldepth_route_table[j].ip,
>>>>> large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete
>>>>> iteration %d, route# %d\n", i, j); -pthread_mutex_unlock(&lpm_mutex);
>>>>> +if (writer_id != SINGLE_WRITER)
>>>>> +
>>>>> pthread_mutex_unlock(&lpm_mutex);
>>>>> return -1;
>>>>> }
>>>>> -pthread_mutex_unlock(&lpm_mutex);
>>>>> +if (writer_id != SINGLE_WRITER)
>>>>> +pthread_mutex_unlock(&lpm_mutex);
>>>>> }
>>>>> }
>>>>>
>>>>> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
>>>>>
>>>>> /*
>>>>> * Functional test:
>>>>> - * 2 writers, rest are readers
>>>>> + * 1/2 writers, rest are readers
>>>>> */
>>>>> static int
>>>>> -test_lpm_rcu_perf_multi_writer(void)
>>>>> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>>>>> {
>>>>> struct rte_lpm_config config;
>>>>> size_t sz;
>>>>> -unsigned int i;
>>>>> +unsigned int i, j;
>>>>> uint16_t core_id;
>>>>> struct rte_lpm_rcu_config rcu_cfg = {0};
>>>>> +int (*reader_f)(void *arg) = NULL;
>>>>>
>>>>> if (rte_lcore_count() < 3) {
>>>>> printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
>>>>> least 3\n"); @@ -504,273 +522,76 @@
>>>>> test_lpm_rcu_perf_multi_writer(void)
>>>>> num_cores++;
>>>>> }
>>>>>
>>>>> -printf("\nPerf test: 2 writers, %d readers, RCU integration
>>>>> enabled\n", -num_cores - 2);
>>>>> -
>>>>> -/* Create LPM table */
>>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
>>> config.number_tbl8s =
>>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
>>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>>> -TEST_LPM_ASSERT(lpm != NULL);
>>>>> -
>>>>> -/* Init RCU variable */
>>>>> -sz = rte_rcu_qsbr_get_memsize(num_cores);
>>>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>>>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
>>>>> -
>>>>> -rcu_cfg.v = rv;
>>>>> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm,
>>>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto
>>>>> error; -}
>>>>> -
>>>>> -writer_done = 0;
>>>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>>>>> -
>>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>>> -
>>>>> -/* Launch reader threads */
>>>>> -for (i = 2; i < num_cores; i++)
>>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>>>>> -enabled_core_ids[i]);
>>>>> -
>>>>> -/* Launch writer threads */
>>>>> -for (i = 0; i < 2; i++)
>>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>>>>> -(void *)(uintptr_t)i,
>>>>> -enabled_core_ids[i]);
>>>>> -
>>>>> -/* Wait for writer threads */
>>>>> -for (i = 0; i < 2; i++)
>>>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
>>>>> -
>>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
>>>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
>>>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
>>>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES);
>>>>> -
>>>>> -writer_done = 1;
>>>>> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores;
>>>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
>>>>> -
>>>>> -rte_lpm_free(lpm);
>>>>> -rte_free(rv);
>>>>> -lpm = NULL;
>>>>> -rv = NULL;
>>>>> -
>>>>> -/* Test without RCU integration */
>>>>> -printf("\nPerf test: 2 writers, %d readers, RCU integration
>>>>> disabled\n", -num_cores - 2);
>>>>> -
>>>>> -/* Create LPM table */
>>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
>>> config.number_tbl8s =
>>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
>>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>>> -TEST_LPM_ASSERT(lpm != NULL);
>>>>> -
>>>>> -writer_done = 0;
>>>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>>> -
>>>>> -/* Launch reader threads */
>>>>> -for (i = 2; i < num_cores; i++)
>>>>> -rte_eal_remote_launch(test_lpm_reader, NULL, -enabled_core_ids[i]);
>>>>> -
>>>>> -/* Launch writer threads */
>>>>> -for (i = 0; i < 2; i++)
>>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>>>>> -(void *)(uintptr_t)i,
>>>>> -enabled_core_ids[i]);
>>>>> -
>>>>> -/* Wait for writer threads */
>>>>> -for (i = 0; i < 2; i++)
>>>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
>>>>> -
>>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
>>>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
>>>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
>>>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES);
>>>>> -
>>>>> -writer_done = 1;
>>>>> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores;
>>>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
>>>>> -
>>>>> -rte_lpm_free(lpm);
>>>>> -
>>>>> -return 0;
>>>>> -
>>>>> -error:
>>>>> -writer_done = 1;
>>>>> -/* Wait until all readers have exited */ -rte_eal_mp_wait_lcore();
>>>>> -
>>>>> -rte_lpm_free(lpm);
>>>>> -rte_free(rv);
>>>>> -
>>>>> -return -1;
>>>>> -}
>>>>> -
>>>>> -/*
>>>>> - * Functional test:
>>>>> - * Single writer, rest are readers
>>>>> - */
>>>>> -static int
>>>>> -test_lpm_rcu_perf(void)
>>>>> -{
>>>>> -struct rte_lpm_config config;
>>>>> -uint64_t begin, total_cycles;
>>>>> -size_t sz;
>>>>> -unsigned int i, j;
>>>>> -uint16_t core_id;
>>>>> -uint32_t next_hop_add = 0xAA;
>>>>> -struct rte_lpm_rcu_config rcu_cfg = {0};
>>>>> -
>>>>> -if (rte_lcore_count() < 2) {
>>>>> -printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
>>>>> least 2\n"); -return TEST_SKIPPED; -}
>>>>> -
>>>>> -num_cores = 0;
>>>>> -RTE_LCORE_FOREACH_WORKER(core_id) {
>>>>> -enabled_core_ids[num_cores] = core_id; -num_cores++; -}
>>>>> -
>>>>> -printf("\nPerf test: 1 writer, %d readers, RCU integration
>>>>> enabled\n", -num_cores);
>>>>> -
>>>>> -/* Create LPM table */
>>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
>>> config.number_tbl8s =
>>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
>>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>>> -TEST_LPM_ASSERT(lpm != NULL);
>>>>> -
>>>>> -/* Init RCU variable */
>>>>> -sz = rte_rcu_qsbr_get_memsize(num_cores);
>>>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>>>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
>>>>> -
>>>>> -rcu_cfg.v = rv;
>>>>> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm,
>>>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto
>>>>> error; -}
>>>>> -
>>>>> -writer_done = 0;
>>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>>> -
>>>>> -/* Launch reader threads */
>>>>> -for (i = 0; i < num_cores; i++)
>>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>>>>> -enabled_core_ids[i]);
>>>>> -
>>>>> -/* Measure add/delete. */
>>>>> -begin = rte_rdtsc_precise();
>>>>> -for (i = 0; i < RCU_ITERATIONS; i++) {
>>>>> -/* Add all the entries */
>>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if (rte_lpm_add(lpm,
>>>>> large_ldepth_route_table[j].ip, -large_ldepth_route_table[j].depth,
>>>>> -next_hop_add) != 0) {
>>>>> -printf("Failed to add iteration %d, route# %d\n", -i, j);
>>>>> +for (j = 1; j < 3; j++) {
>>>>> +if (use_rcu)
>>>>> +printf("\nPerf test: %d writer(s), %d reader(s),"
>>>>> +       " RCU integration enabled\n", j, num_cores - j); else
>>>>> +printf("\nPerf test: %d writer(s), %d reader(s),"
>>>>> +       " RCU integration disabled\n", j, num_cores - j);
>>>>> +
>>>>> +/* Create LPM table */
>>>>> +config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>>> config.number_tbl8s =
>>>>> +NUM_LDEPTH_ROUTE_ENTRIES; config.flags = 0; lpm =
>>>>> +rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>>> +TEST_LPM_ASSERT(lpm != NULL);
>>>>> +
>>>>> +/* Init RCU variable */
>>>>> +if (use_rcu) {
>>>>> +sz = rte_rcu_qsbr_get_memsize(num_cores);
>>>>> +rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>>>>> +
>>>>> RTE_CACHE_LINE_SIZE);
>>>>> +rte_rcu_qsbr_init(rv, num_cores);
>>>>> +
>>>>> +rcu_cfg.v = rv;
>>>>> +/* Assign the RCU variable to LPM */ if (rte_lpm_rcu_qsbr_add(lpm,
>>>>> +&rcu_cfg) != 0) { printf("RCU variable assignment failed\n");
>>>>> goto error;
>>>>> }
>>>>>
>>>>> -/* Delete all the entries */
>>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if
>>>>> (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
>>>>> -large_ldepth_route_table[j].depth) != 0) { -printf("Failed to delete
>>>>> iteration %d, route# %d\n", -i, j); -goto error; -} -} -total_cycles
>>>>> = rte_rdtsc_precise() - begin;
>>>>> +reader_f = test_lpm_rcu_qsbr_reader; } else reader_f =
>>>>> +test_lpm_reader;
>>>>>
>>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
>>>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: %g
>>>>> cycles\n", -(double)total_cycles / TOTAL_WRITES);
>>>>> +writer_done = 0;
>>>>> +__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>>>>>
>>>>> -writer_done = 1;
>>>>> -/* Wait until all readers have exited */ -for (i = 0; i < num_cores;
>>>>> i++) -if (rte_eal_wait_lcore(enabled_core_ids[i]);
>>>>> -
>>>>> -rte_lpm_free(lpm);
>>>>> -rte_free(rv);
>>>>> -lpm = NULL;
>>>>> -rv = NULL;
>>>>> -
>>>>> -/* Test without RCU integration */
>>>>> -printf("\nPerf test: 1 writer, %d readers, RCU integration
>>>>> disabled\n", -num_cores);
>>>>> -
>>>>> -/* Create LPM table */
>>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
>>> config.number_tbl8s =
>>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
>>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>>> -TEST_LPM_ASSERT(lpm != NULL);
>>>>> +__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>>>
>>>>> -writer_done = 0;
>>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>>> +/* Launch reader threads */
>>>>> +for (i = j; i < num_cores; i++)
>>>>> +rte_eal_remote_launch(reader_f, NULL,
>>>>> +enabled_core_ids[i]);
>>>>>
>>>>> -/* Launch reader threads */
>>>>> -for (i = 0; i < num_cores; i++)
>>>>> -rte_eal_remote_launch(test_lpm_reader, NULL,
>>>>> -enabled_core_ids[i]);
>>>>> +/* Launch writer threads */
>>>>> +for (i = 0; i < j; i++)
>>>>> +rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>>>>> +(void *)(uintptr_t)(i + j),
>>>> This can be just 'j'?
>>>>
>>>>> +enabled_core_ids[i]);
>>>>>
>>>>> -/* Measure add/delete. */
>>>>> -begin = rte_rdtsc_precise();
>>>>> -for (i = 0; i < RCU_ITERATIONS; i++) {
>>>>> -/* Add all the entries */
>>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>>>>> -if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>>>>> -large_ldepth_route_table[j].depth,
>>>>> -next_hop_add) != 0) {
>>>>> -printf("Failed to add iteration %d, route#
>>>>> %d\n",
>>>>> -i, j);
>>>>> +/* Wait for writer threads */
>>>>> +for (i = 0; i < j; i++)
>>>>> +if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>>>>> goto error;
>>>>> -}
>>>>>
>>>>> -/* Delete all the entries */
>>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>>>>> -if (rte_lpm_delete(lpm,
>>>>> large_ldepth_route_table[j].ip,
>>>>> -large_ldepth_route_table[j].depth) != 0) {
>>>>> -printf("Failed to delete iteration %d, route#
>>>>> %d\n",
>>>>> -i, j);
>>>>> -goto error;
>>>>> -}
>>>>> +printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>>>>> +printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>>>>> +printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>>>>> +__atomic_load_n(&gwrite_cycles,
>>>>> __ATOMIC_RELAXED)
>>>>> +/ TOTAL_WRITES);
>>>>> +
>>>>> +writer_done = 1;
>>>>> +/* Wait until all readers have exited */
>>>>> +for (i = j; i < num_cores; i++)
>>>>> +rte_eal_wait_lcore(enabled_core_ids[i]);
>>>>> +
>>>>> +rte_lpm_free(lpm);
>>>>> +rte_free(rv);
>>>>> +lpm = NULL;
>>>>> +rv = NULL;
>>>>> }
>>>>> -total_cycles = rte_rdtsc_precise() - begin;
>>>>> -
>>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>>>>> -printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>>>>> -printf("Average LPM Add/Del: %g cycles\n",
>>>>> -(double)total_cycles / TOTAL_WRITES);
>>>>> -
>>>>> -writer_done = 1;
>>>>> -/* Wait until all readers have exited */
>>>>> -for (i = 0; i < num_cores; i++)
>>>>> -rte_eal_wait_lcore(enabled_core_ids[i]);
>>>>> -
>>>>> -rte_lpm_free(lpm);
>>>>>
>>>>> return 0;
>>>>>
>>>>> @@ -946,9 +767,8 @@ test_lpm_perf(void)
>>>>> rte_lpm_delete_all(lpm);
>>>>> rte_lpm_free(lpm);
>>>>>
>>>>> -test_lpm_rcu_perf();
>>>>> -
>>>>> -test_lpm_rcu_perf_multi_writer();
>>>>> +test_lpm_rcu_perf_multi_writer(0);
>>>>> +test_lpm_rcu_perf_multi_writer(1);
>>>>>
>>>>> return 0;
>>>>> }
>>>>> --
>>>>> 2.17.1
> 

-- 
Regards,
Vladimir

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test
  2020-11-03  5:12   ` [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
                       ` (3 preceding siblings ...)
  2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
@ 2020-11-03 22:23     ` Dharmik Thakkar
  2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
                         ` (4 more replies)
  4 siblings, 5 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03 22:23 UTC (permalink / raw)
  Cc: dev, nd, Dharmik Thakkar

Fix LPM adds, LPM deletes, and cycle calculation.
Return error if LPM add/delete fails in multi-writer test.
Return error if single or multi writer test fails
Remove redundant error checking for readers.
Combine single and multi threaded test cases to avoid code duplication.
---
v4:
 - Return error if rcu qsbr test fails
 - Improve multi writer test to enable more than 2 writers

v3:
 - Add 'goto error'
 - Remove unnecessary if statement

v2:
 - Add more details about the fix to the commit message
 - Replace hard coded values with an enum
 - Remove lock acquire/release for single writer

Dharmik Thakkar (4):
  test/lpm: fix cycle calculation in rcu qsbr perf
  test/lpm: return error on failure in rcu qsbr perf
  test/lpm: remove error checking in rcu qsbr perf
  test/lpm: avoid code duplication in rcu qsbr perf

 app/test/test_lpm_perf.c | 383 ++++++++++-----------------------------
 1 file changed, 91 insertions(+), 292 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v4 1/4] test/lpm: fix cycle calculation in rcu qsbr perf
  2020-11-03 22:23     ` [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
@ 2020-11-03 22:23       ` Dharmik Thakkar
  2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 2/4] test/lpm: return error on failure " Dharmik Thakkar
                         ` (3 subsequent siblings)
  4 siblings, 0 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03 22:23 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Ruifeng Wang,
	Honnappa Nagarahalli, Gavin Hu
  Cc: dev, nd, Dharmik Thakkar, stable

Fix incorrect calculations for LPM adds, LPM deletes,
and average cycles in RCU QSBR perf tests

Since, rcu qsbr tests run for 'RCU_ITERATIONS' and not
'ITERATIONS', replace 'ITERATIONS' with 'RCU_ITERATIONS'
for calculating adds, deletes, and cycles.

Also, for multi-writer perf test, each writer only writes
half of NUM_LDEPTH_ROUTE_ENTRIES.
For 2 writers, total adds (or deletes) should be
(RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) instead of
(2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES).

Since, for both the single and multi writer tests, total adds/deletes
is equal to (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES),
this has been replaced with a macro 'TOTAL_WRITES' and furthermore,
'g_writes' has been removed since it is always a fixed value
equal to TOTAL_WRITES.

Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org

Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 app/test/test_lpm_perf.c | 45 ++++++++++++++--------------------------
 1 file changed, 16 insertions(+), 29 deletions(-)

diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index c5a238b9d1e8..45164b23214b 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -23,7 +23,6 @@ static struct rte_rcu_qsbr *rv;
 static volatile uint8_t writer_done;
 static volatile uint32_t thr_id;
 static uint64_t gwrite_cycles;
-static uint64_t gwrites;
 /* LPM APIs are not thread safe, use mutex to provide thread safety */
 static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
 
@@ -60,6 +59,8 @@ static uint32_t num_ldepth_route_entries;
 #define NUM_ROUTE_ENTRIES num_route_entries
 #define NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries
 
+#define TOTAL_WRITES (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES)
+
 enum {
 	IP_CLASS_A,
 	IP_CLASS_B,
@@ -432,7 +433,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	uint8_t core_id = (uint8_t)((uintptr_t)arg);
 	uint32_t next_hop_add = 0xAA;
 
-	RTE_SET_USED(arg);
 	/* 2 writer threads are used */
 	if (core_id % 2 == 0) {
 		si = 0;
@@ -472,9 +472,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	total_cycles = rte_rdtsc_precise() - begin;
 
 	__atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED);
-	__atomic_fetch_add(&gwrites,
-			2 * NUM_LDEPTH_ROUTE_ENTRIES * RCU_ITERATIONS,
-			__ATOMIC_RELAXED);
 
 	return 0;
 }
@@ -528,7 +525,6 @@ test_lpm_rcu_perf_multi_writer(void)
 
 	writer_done = 0;
 	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
 
 	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
@@ -548,14 +544,11 @@ test_lpm_rcu_perf_multi_writer(void)
 		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 			goto error;
 
-	printf("Total LPM Adds: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
-			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
-		);
+		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+		/ TOTAL_WRITES);
 
 	/* Wait and check return value from reader threads */
 	writer_done = 1;
@@ -581,7 +574,6 @@ test_lpm_rcu_perf_multi_writer(void)
 
 	writer_done = 0;
 	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
 	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
 	/* Launch reader threads */
@@ -600,14 +592,11 @@ test_lpm_rcu_perf_multi_writer(void)
 		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 			goto error;
 
-	printf("Total LPM Adds: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
-			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
-		);
+		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+		/ TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
@@ -711,11 +700,10 @@ test_lpm_rcu_perf(void)
 	}
 	total_cycles = rte_rdtsc_precise() - begin;
 
-	printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS));
+		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
@@ -771,11 +759,10 @@ test_lpm_rcu_perf(void)
 	}
 	total_cycles = rte_rdtsc_precise() - begin;
 
-	printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS));
+		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v4 2/4] test/lpm: return error on failure in rcu qsbr perf
  2020-11-03 22:23     ` [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
@ 2020-11-03 22:23       ` Dharmik Thakkar
  2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 3/4] test/lpm: remove error checking " Dharmik Thakkar
                         ` (2 subsequent siblings)
  4 siblings, 0 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03 22:23 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Honnappa Nagarahalli,
	Ruifeng Wang, Gavin Hu
  Cc: dev, nd, Dharmik Thakkar, stable

Return error if Add/Delete fail in multiwriter perf test

Return error if single or multi writer test fails

Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org

Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 app/test/test_lpm_perf.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index 45164b23214b..873ecf511c97 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -453,6 +453,7 @@ test_lpm_rcu_qsbr_writer(void *arg)
 					next_hop_add) != 0) {
 				printf("Failed to add iteration %d, route# %d\n",
 					i, j);
+				goto error;
 			}
 			pthread_mutex_unlock(&lpm_mutex);
 		}
@@ -464,6 +465,7 @@ test_lpm_rcu_qsbr_writer(void *arg)
 				large_ldepth_route_table[j].depth) != 0) {
 				printf("Failed to delete iteration %d, route# %d\n",
 					i, j);
+				goto error;
 			}
 			pthread_mutex_unlock(&lpm_mutex);
 		}
@@ -474,6 +476,10 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	__atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED);
 
 	return 0;
+
+error:
+	pthread_mutex_unlock(&lpm_mutex);
+	return -1;
 }
 
 /*
@@ -947,9 +953,11 @@ test_lpm_perf(void)
 	rte_lpm_delete_all(lpm);
 	rte_lpm_free(lpm);
 
-	test_lpm_rcu_perf();
+	if (test_lpm_rcu_perf() < 0)
+		return -1;
 
-	test_lpm_rcu_perf_multi_writer();
+	if (test_lpm_rcu_perf_multi_writer() < 0)
+		return -1;
 
 	return 0;
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v4 3/4] test/lpm: remove error checking in rcu qsbr perf
  2020-11-03 22:23     ` [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
  2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 2/4] test/lpm: return error on failure " Dharmik Thakkar
@ 2020-11-03 22:23       ` Dharmik Thakkar
  2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
  2020-11-04 18:58       ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  4 siblings, 0 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03 22:23 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Honnappa Nagarahalli,
	Gavin Hu, Ruifeng Wang
  Cc: dev, nd, Dharmik Thakkar, stable

Remove redundant error checking for reader threads
since they never return error.

Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org

Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 app/test/test_lpm_perf.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index 873ecf511c97..c8e70ec89ff5 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -556,11 +556,10 @@ test_lpm_rcu_perf_multi_writer(void)
 		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
 		/ TOTAL_WRITES);
 
-	/* Wait and check return value from reader threads */
 	writer_done = 1;
+	/* Wait until all readers have exited */
 	for (i = 2; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 	rte_free(rv);
@@ -605,10 +604,9 @@ test_lpm_rcu_perf_multi_writer(void)
 		/ TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 2; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 
@@ -712,10 +710,9 @@ test_lpm_rcu_perf(void)
 		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 0; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 	rte_free(rv);
@@ -771,11 +768,9 @@ test_lpm_rcu_perf(void)
 		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 0; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			printf("Warning: lcore %u not finished.\n",
-				enabled_core_ids[i]);
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-03 22:23     ` [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
                         ` (2 preceding siblings ...)
  2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 3/4] test/lpm: remove error checking " Dharmik Thakkar
@ 2020-11-03 22:23       ` Dharmik Thakkar
  2020-11-03 22:35         ` Honnappa Nagarahalli
  2020-11-04 15:46         ` Medvedkin, Vladimir
  2020-11-04 18:58       ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  4 siblings, 2 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03 22:23 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin; +Cc: dev, nd, Dharmik Thakkar

Avoid code duplication by combining single and multi threaded tests

Also, enable support for more than 2 writers

Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_lpm_perf.c | 359 +++++++++------------------------------
 1 file changed, 84 insertions(+), 275 deletions(-)

diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index c8e70ec89ff5..a1485e74e77f 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -23,6 +23,7 @@ static struct rte_rcu_qsbr *rv;
 static volatile uint8_t writer_done;
 static volatile uint32_t thr_id;
 static uint64_t gwrite_cycles;
+static uint32_t single_insert;
 /* LPM APIs are not thread safe, use mutex to provide thread safety */
 static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
 
@@ -430,24 +431,21 @@ test_lpm_rcu_qsbr_writer(void *arg)
 {
 	unsigned int i, j, si, ei;
 	uint64_t begin, total_cycles;
-	uint8_t core_id = (uint8_t)((uintptr_t)arg);
 	uint32_t next_hop_add = 0xAA;
+	bool single_writer = (single_insert == NUM_LDEPTH_ROUTE_ENTRIES) ?
+				true : false;
+	uint8_t pos_core = (uint8_t)((uintptr_t)arg);
 
-	/* 2 writer threads are used */
-	if (core_id % 2 == 0) {
-		si = 0;
-		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
-	} else {
-		si = NUM_LDEPTH_ROUTE_ENTRIES / 2;
-		ei = NUM_LDEPTH_ROUTE_ENTRIES;
-	}
+	si = pos_core * single_insert;
+	ei = si + single_insert;
 
 	/* Measure add/delete. */
 	begin = rte_rdtsc_precise();
 	for (i = 0; i < RCU_ITERATIONS; i++) {
 		/* Add all the entries */
 		for (j = si; j < ei; j++) {
-			pthread_mutex_lock(&lpm_mutex);
+			if (!single_writer)
+				pthread_mutex_lock(&lpm_mutex);
 			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
 					large_ldepth_route_table[j].depth,
 					next_hop_add) != 0) {
@@ -455,19 +453,22 @@ test_lpm_rcu_qsbr_writer(void *arg)
 					i, j);
 				goto error;
 			}
-			pthread_mutex_unlock(&lpm_mutex);
+			if (!single_writer)
+				pthread_mutex_unlock(&lpm_mutex);
 		}
 
 		/* Delete all the entries */
 		for (j = si; j < ei; j++) {
-			pthread_mutex_lock(&lpm_mutex);
+			if (!single_writer)
+				pthread_mutex_lock(&lpm_mutex);
 			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
 				large_ldepth_route_table[j].depth) != 0) {
 				printf("Failed to delete iteration %d, route# %d\n",
 					i, j);
 				goto error;
 			}
-			pthread_mutex_unlock(&lpm_mutex);
+			if (!single_writer)
+				pthread_mutex_unlock(&lpm_mutex);
 		}
 	}
 
@@ -478,22 +479,24 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	return 0;
 
 error:
-	pthread_mutex_unlock(&lpm_mutex);
+	if (!single_writer)
+		pthread_mutex_unlock(&lpm_mutex);
 	return -1;
 }
 
 /*
  * Functional test:
- * 2 writers, rest are readers
+ * 1/2 writers, rest are readers
  */
 static int
-test_lpm_rcu_perf_multi_writer(void)
+test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
 {
 	struct rte_lpm_config config;
 	size_t sz;
-	unsigned int i;
+	unsigned int i, j;
 	uint16_t core_id;
 	struct rte_lpm_rcu_config rcu_cfg = {0};
+	int (*reader_f)(void *arg) = NULL;
 
 	if (rte_lcore_count() < 3) {
 		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n");
@@ -506,273 +509,79 @@ test_lpm_rcu_perf_multi_writer(void)
 		num_cores++;
 	}
 
-	printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n",
-		num_cores - 2);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	/* Init RCU variable */
-	sz = rte_rcu_qsbr_get_memsize(num_cores);
-	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
-						RTE_CACHE_LINE_SIZE);
-	rte_rcu_qsbr_init(rv, num_cores);
-
-	rcu_cfg.v = rv;
-	/* Assign the RCU variable to LPM */
-	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
-		printf("RCU variable assignment failed\n");
-		goto error;
-	}
-
-	writer_done = 0;
-	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Launch writer threads */
-	for (i = 0; i < 2; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
-					(void *)(uintptr_t)i,
-					enabled_core_ids[i]);
-
-	/* Wait for writer threads */
-	for (i = 0; i < 2; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
-		/ TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-	lpm = NULL;
-	rv = NULL;
-
-	/* Test without RCU integration */
-	printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n",
-		num_cores - 2);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	writer_done = 0;
-	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Launch writer threads */
-	for (i = 0; i < 2; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
-					(void *)(uintptr_t)i,
-					enabled_core_ids[i]);
-
-	/* Wait for writer threads */
-	for (i = 0; i < 2; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
-		/ TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-
-	return 0;
-
-error:
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	rte_eal_mp_wait_lcore();
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-
-	return -1;
-}
-
-/*
- * Functional test:
- * Single writer, rest are readers
- */
-static int
-test_lpm_rcu_perf(void)
-{
-	struct rte_lpm_config config;
-	uint64_t begin, total_cycles;
-	size_t sz;
-	unsigned int i, j;
-	uint16_t core_id;
-	uint32_t next_hop_add = 0xAA;
-	struct rte_lpm_rcu_config rcu_cfg = {0};
-
-	if (rte_lcore_count() < 2) {
-		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n");
-		return TEST_SKIPPED;
-	}
-
-	num_cores = 0;
-	RTE_LCORE_FOREACH_WORKER(core_id) {
-		enabled_core_ids[num_cores] = core_id;
-		num_cores++;
-	}
-
-	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
-		num_cores);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	/* Init RCU variable */
-	sz = rte_rcu_qsbr_get_memsize(num_cores);
-	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
-						RTE_CACHE_LINE_SIZE);
-	rte_rcu_qsbr_init(rv, num_cores);
-
-	rcu_cfg.v = rv;
-	/* Assign the RCU variable to LPM */
-	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
-		printf("RCU variable assignment failed\n");
-		goto error;
-	}
-
-	writer_done = 0;
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Measure add/delete. */
-	begin = rte_rdtsc_precise();
-	for (i = 0; i < RCU_ITERATIONS; i++) {
-		/* Add all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
-					large_ldepth_route_table[j].depth,
-					next_hop_add) != 0) {
-				printf("Failed to add iteration %d, route# %d\n",
-					i, j);
-				goto error;
-			}
-
-		/* Delete all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
-				large_ldepth_route_table[j].depth) != 0) {
-				printf("Failed to delete iteration %d, route# %d\n",
-					i, j);
+	for (j = 1; j < 3; j++) {
+		if (use_rcu)
+			printf("\nPerf test: %d writer(s), %d reader(s),"
+			       " RCU integration enabled\n", j, num_cores - j);
+		else
+			printf("\nPerf test: %d writer(s), %d reader(s),"
+			       " RCU integration disabled\n", j, num_cores - j);
+
+		/* Calculate writes by each writer */
+		single_insert = NUM_LDEPTH_ROUTE_ENTRIES / j;
+
+		/* Create LPM table */
+		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
+		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
+		config.flags = 0;
+		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
+		TEST_LPM_ASSERT(lpm != NULL);
+
+		/* Init RCU variable */
+		if (use_rcu) {
+			sz = rte_rcu_qsbr_get_memsize(num_cores);
+			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
+							RTE_CACHE_LINE_SIZE);
+			rte_rcu_qsbr_init(rv, num_cores);
+
+			rcu_cfg.v = rv;
+			/* Assign the RCU variable to LPM */
+			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
+				printf("RCU variable assignment failed\n");
 				goto error;
 			}
-	}
-	total_cycles = rte_rdtsc_precise() - begin;
 
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / TOTAL_WRITES);
+			reader_f = test_lpm_rcu_qsbr_reader;
+		} else
+			reader_f = test_lpm_reader;
 
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-	lpm = NULL;
-	rv = NULL;
+		writer_done = 0;
+		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
 
-	/* Test without RCU integration */
-	printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n",
-		num_cores);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
+		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
-	writer_done = 0;
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
+		/* Launch reader threads */
+		for (i = j; i < num_cores; i++)
+			rte_eal_remote_launch(reader_f, NULL,
+						enabled_core_ids[i]);
 
-	/* Launch reader threads */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_reader, NULL,
-					enabled_core_ids[i]);
+		/* Launch writer threads */
+		for (i = 0; i < j; i++)
+			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
+						(void *)(uintptr_t)i,
+						enabled_core_ids[i]);
 
-	/* Measure add/delete. */
-	begin = rte_rdtsc_precise();
-	for (i = 0; i < RCU_ITERATIONS; i++) {
-		/* Add all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
-					large_ldepth_route_table[j].depth,
-					next_hop_add) != 0) {
-				printf("Failed to add iteration %d, route# %d\n",
-					i, j);
+		/* Wait for writer threads */
+		for (i = 0; i < j; i++)
+			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 				goto error;
-			}
 
-		/* Delete all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
-				large_ldepth_route_table[j].depth) != 0) {
-				printf("Failed to delete iteration %d, route# %d\n",
-					i, j);
-				goto error;
-			}
+		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
+		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
+			__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+			/ TOTAL_WRITES);
+
+		writer_done = 1;
+		/* Wait until all readers have exited */
+		for (i = j; i < num_cores; i++)
+			rte_eal_wait_lcore(enabled_core_ids[i]);
+
+		rte_lpm_free(lpm);
+		rte_free(rv);
+		lpm = NULL;
+		rv = NULL;
 	}
-	total_cycles = rte_rdtsc_precise() - begin;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
 
 	return 0;
 
@@ -948,10 +757,10 @@ test_lpm_perf(void)
 	rte_lpm_delete_all(lpm);
 	rte_lpm_free(lpm);
 
-	if (test_lpm_rcu_perf() < 0)
+	if (test_lpm_rcu_perf_multi_writer(0) < 0)
 		return -1;
 
-	if (test_lpm_rcu_perf_multi_writer() < 0)
+	if (test_lpm_rcu_perf_multi_writer(1) < 0)
 		return -1;
 
 	return 0;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
@ 2020-11-03 22:35         ` Honnappa Nagarahalli
  2020-11-04 15:46         ` Medvedkin, Vladimir
  1 sibling, 0 replies; 52+ messages in thread
From: Honnappa Nagarahalli @ 2020-11-03 22:35 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson, Vladimir Medvedkin
  Cc: dev, nd, Dharmik Thakkar, Honnappa Nagarahalli, nd



> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Dharmik Thakkar
> Sent: Tuesday, November 3, 2020 4:24 PM
> To: Bruce Richardson <bruce.richardson@intel.com>; Vladimir Medvedkin
> <vladimir.medvedkin@intel.com>
> Cc: dev@dpdk.org; nd <nd@arm.com>; Dharmik Thakkar
> <Dharmik.Thakkar@arm.com>
> Subject: [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication in rcu
> qsbr perf
> 
> Avoid code duplication by combining single and multi threaded tests
> 
> Also, enable support for more than 2 writers
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Looks good
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

> ---
>  app/test/test_lpm_perf.c | 359 +++++++++------------------------------
>  1 file changed, 84 insertions(+), 275 deletions(-)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index
> c8e70ec89ff5..a1485e74e77f 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -23,6 +23,7 @@ static struct rte_rcu_qsbr *rv;  static volatile uint8_t
> writer_done;  static volatile uint32_t thr_id;  static uint64_t gwrite_cycles;
> +static uint32_t single_insert;
>  /* LPM APIs are not thread safe, use mutex to provide thread safety */
> static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
> 
> @@ -430,24 +431,21 @@ test_lpm_rcu_qsbr_writer(void *arg)  {
>  	unsigned int i, j, si, ei;
>  	uint64_t begin, total_cycles;
> -	uint8_t core_id = (uint8_t)((uintptr_t)arg);
>  	uint32_t next_hop_add = 0xAA;
> +	bool single_writer = (single_insert ==
> NUM_LDEPTH_ROUTE_ENTRIES) ?
> +				true : false;
> +	uint8_t pos_core = (uint8_t)((uintptr_t)arg);
> 
> -	/* 2 writer threads are used */
> -	if (core_id % 2 == 0) {
> -		si = 0;
> -		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
> -	} else {
> -		si = NUM_LDEPTH_ROUTE_ENTRIES / 2;
> -		ei = NUM_LDEPTH_ROUTE_ENTRIES;
> -	}
> +	si = pos_core * single_insert;
> +	ei = si + single_insert;
> 
>  	/* Measure add/delete. */
>  	begin = rte_rdtsc_precise();
>  	for (i = 0; i < RCU_ITERATIONS; i++) {
>  		/* Add all the entries */
>  		for (j = si; j < ei; j++) {
> -			pthread_mutex_lock(&lpm_mutex);
> +			if (!single_writer)
> +				pthread_mutex_lock(&lpm_mutex);
>  			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>  					large_ldepth_route_table[j].depth,
>  					next_hop_add) != 0) {
> @@ -455,19 +453,22 @@ test_lpm_rcu_qsbr_writer(void *arg)
>  					i, j);
>  				goto error;
>  			}
> -			pthread_mutex_unlock(&lpm_mutex);
> +			if (!single_writer)
> +				pthread_mutex_unlock(&lpm_mutex);
>  		}
> 
>  		/* Delete all the entries */
>  		for (j = si; j < ei; j++) {
> -			pthread_mutex_lock(&lpm_mutex);
> +			if (!single_writer)
> +				pthread_mutex_lock(&lpm_mutex);
>  			if (rte_lpm_delete(lpm,
> large_ldepth_route_table[j].ip,
>  				large_ldepth_route_table[j].depth) != 0) {
>  				printf("Failed to delete iteration %d, route#
> %d\n",
>  					i, j);
>  				goto error;
>  			}
> -			pthread_mutex_unlock(&lpm_mutex);
> +			if (!single_writer)
> +				pthread_mutex_unlock(&lpm_mutex);
>  		}
>  	}
> 
> @@ -478,22 +479,24 @@ test_lpm_rcu_qsbr_writer(void *arg)
>  	return 0;
> 
>  error:
> -	pthread_mutex_unlock(&lpm_mutex);
> +	if (!single_writer)
> +		pthread_mutex_unlock(&lpm_mutex);
>  	return -1;
>  }
> 
>  /*
>   * Functional test:
> - * 2 writers, rest are readers
> + * 1/2 writers, rest are readers
>   */
>  static int
> -test_lpm_rcu_perf_multi_writer(void)
> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>  {
>  	struct rte_lpm_config config;
>  	size_t sz;
> -	unsigned int i;
> +	unsigned int i, j;
>  	uint16_t core_id;
>  	struct rte_lpm_rcu_config rcu_cfg = {0};
> +	int (*reader_f)(void *arg) = NULL;
> 
>  	if (rte_lcore_count() < 3) {
>  		printf("Not enough cores for lpm_rcu_perf_autotest,
> expecting at least 3\n"); @@ -506,273 +509,79 @@
> test_lpm_rcu_perf_multi_writer(void)
>  		num_cores++;
>  	}
> 
> -	printf("\nPerf test: 2 writers, %d readers, RCU integration
> enabled\n",
> -		num_cores - 2);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	/* Init RCU variable */
> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> -						RTE_CACHE_LINE_SIZE);
> -	rte_rcu_qsbr_init(rv, num_cores);
> -
> -	rcu_cfg.v = rv;
> -	/* Assign the RCU variable to LPM */
> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> -		printf("RCU variable assignment failed\n");
> -		goto error;
> -	}
> -
> -	writer_done = 0;
> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Launch writer threads */
> -	for (i = 0; i < 2; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> -					(void *)(uintptr_t)i,
> -					enabled_core_ids[i]);
> -
> -	/* Wait for writer threads */
> -	for (i = 0; i < 2; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> -		/ TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -	lpm = NULL;
> -	rv = NULL;
> -
> -	/* Test without RCU integration */
> -	printf("\nPerf test: 2 writers, %d readers, RCU integration
> disabled\n",
> -		num_cores - 2);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	writer_done = 0;
> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Launch writer threads */
> -	for (i = 0; i < 2; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> -					(void *)(uintptr_t)i,
> -					enabled_core_ids[i]);
> -
> -	/* Wait for writer threads */
> -	for (i = 0; i < 2; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> -		/ TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -
> -	return 0;
> -
> -error:
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	rte_eal_mp_wait_lcore();
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -
> -	return -1;
> -}
> -
> -/*
> - * Functional test:
> - * Single writer, rest are readers
> - */
> -static int
> -test_lpm_rcu_perf(void)
> -{
> -	struct rte_lpm_config config;
> -	uint64_t begin, total_cycles;
> -	size_t sz;
> -	unsigned int i, j;
> -	uint16_t core_id;
> -	uint32_t next_hop_add = 0xAA;
> -	struct rte_lpm_rcu_config rcu_cfg = {0};
> -
> -	if (rte_lcore_count() < 2) {
> -		printf("Not enough cores for lpm_rcu_perf_autotest,
> expecting at least 2\n");
> -		return TEST_SKIPPED;
> -	}
> -
> -	num_cores = 0;
> -	RTE_LCORE_FOREACH_WORKER(core_id) {
> -		enabled_core_ids[num_cores] = core_id;
> -		num_cores++;
> -	}
> -
> -	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
> -		num_cores);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	/* Init RCU variable */
> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> -						RTE_CACHE_LINE_SIZE);
> -	rte_rcu_qsbr_init(rv, num_cores);
> -
> -	rcu_cfg.v = rv;
> -	/* Assign the RCU variable to LPM */
> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> -		printf("RCU variable assignment failed\n");
> -		goto error;
> -	}
> -
> -	writer_done = 0;
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Measure add/delete. */
> -	begin = rte_rdtsc_precise();
> -	for (i = 0; i < RCU_ITERATIONS; i++) {
> -		/* Add all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> -					large_ldepth_route_table[j].depth,
> -					next_hop_add) != 0) {
> -				printf("Failed to add iteration %d, route#
> %d\n",
> -					i, j);
> -				goto error;
> -			}
> -
> -		/* Delete all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_delete(lpm,
> large_ldepth_route_table[j].ip,
> -				large_ldepth_route_table[j].depth) != 0) {
> -				printf("Failed to delete iteration %d, route#
> %d\n",
> -					i, j);
> +	for (j = 1; j < 3; j++) {
> +		if (use_rcu)
> +			printf("\nPerf test: %d writer(s), %d reader(s),"
> +			       " RCU integration enabled\n", j, num_cores - j);
> +		else
> +			printf("\nPerf test: %d writer(s), %d reader(s),"
> +			       " RCU integration disabled\n", j, num_cores - j);
> +
> +		/* Calculate writes by each writer */
> +		single_insert = NUM_LDEPTH_ROUTE_ENTRIES / j;
> +
> +		/* Create LPM table */
> +		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> +		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> +		config.flags = 0;
> +		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> +		TEST_LPM_ASSERT(lpm != NULL);
> +
> +		/* Init RCU variable */
> +		if (use_rcu) {
> +			sz = rte_rcu_qsbr_get_memsize(num_cores);
> +			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> +
> 	RTE_CACHE_LINE_SIZE);
> +			rte_rcu_qsbr_init(rv, num_cores);
> +
> +			rcu_cfg.v = rv;
> +			/* Assign the RCU variable to LPM */
> +			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> +				printf("RCU variable assignment failed\n");
>  				goto error;
>  			}
> -	}
> -	total_cycles = rte_rdtsc_precise() - begin;
> 
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / TOTAL_WRITES);
> +			reader_f = test_lpm_rcu_qsbr_reader;
> +		} else
> +			reader_f = test_lpm_reader;
> 
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -	lpm = NULL;
> -	rv = NULL;
> +		writer_done = 0;
> +		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> 
> -	/* Test without RCU integration */
> -	printf("\nPerf test: 1 writer, %d readers, RCU integration
> disabled\n",
> -		num_cores);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> +		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> 
> -	writer_done = 0;
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> +		/* Launch reader threads */
> +		for (i = j; i < num_cores; i++)
> +			rte_eal_remote_launch(reader_f, NULL,
> +						enabled_core_ids[i]);
> 
> -	/* Launch reader threads */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_reader, NULL,
> -					enabled_core_ids[i]);
> +		/* Launch writer threads */
> +		for (i = 0; i < j; i++)
> +			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> +						(void *)(uintptr_t)i,
> +						enabled_core_ids[i]);
> 
> -	/* Measure add/delete. */
> -	begin = rte_rdtsc_precise();
> -	for (i = 0; i < RCU_ITERATIONS; i++) {
> -		/* Add all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> -					large_ldepth_route_table[j].depth,
> -					next_hop_add) != 0) {
> -				printf("Failed to add iteration %d, route#
> %d\n",
> -					i, j);
> +		/* Wait for writer threads */
> +		for (i = 0; i < j; i++)
> +			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>  				goto error;
> -			}
> 
> -		/* Delete all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_delete(lpm,
> large_ldepth_route_table[j].ip,
> -				large_ldepth_route_table[j].depth) != 0) {
> -				printf("Failed to delete iteration %d, route#
> %d\n",
> -					i, j);
> -				goto error;
> -			}
> +		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> +		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> +			__atomic_load_n(&gwrite_cycles,
> __ATOMIC_RELAXED)
> +			/ TOTAL_WRITES);
> +
> +		writer_done = 1;
> +		/* Wait until all readers have exited */
> +		for (i = j; i < num_cores; i++)
> +			rte_eal_wait_lcore(enabled_core_ids[i]);
> +
> +		rte_lpm_free(lpm);
> +		rte_free(rv);
> +		lpm = NULL;
> +		rv = NULL;
>  	}
> -	total_cycles = rte_rdtsc_precise() - begin;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> 
>  	return 0;
> 
> @@ -948,10 +757,10 @@ test_lpm_perf(void)
>  	rte_lpm_delete_all(lpm);
>  	rte_lpm_free(lpm);
> 
> -	if (test_lpm_rcu_perf() < 0)
> +	if (test_lpm_rcu_perf_multi_writer(0) < 0)
>  		return -1;
> 
> -	if (test_lpm_rcu_perf_multi_writer() < 0)
> +	if (test_lpm_rcu_perf_multi_writer(1) < 0)
>  		return -1;
> 
>  	return 0;
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
  2020-11-03 22:35         ` Honnappa Nagarahalli
@ 2020-11-04 15:46         ` Medvedkin, Vladimir
  2020-11-04 16:49           ` Dharmik Thakkar
  1 sibling, 1 reply; 52+ messages in thread
From: Medvedkin, Vladimir @ 2020-11-04 15:46 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson; +Cc: dev, nd

Hi Thakkar,

On 03/11/2020 22:23, Dharmik Thakkar wrote:
> Avoid code duplication by combining single and multi threaded tests
> 
> Also, enable support for more than 2 writers
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>   app/test/test_lpm_perf.c | 359 +++++++++------------------------------
>   1 file changed, 84 insertions(+), 275 deletions(-)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
> index c8e70ec89ff5..a1485e74e77f 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -23,6 +23,7 @@ static struct rte_rcu_qsbr *rv;
>   static volatile uint8_t writer_done;
>   static volatile uint32_t thr_id;
>   static uint64_t gwrite_cycles;
> +static uint32_t single_insert;
>   /* LPM APIs are not thread safe, use mutex to provide thread safety */
>   static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
>   
> @@ -430,24 +431,21 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   {
>   	unsigned int i, j, si, ei;
>   	uint64_t begin, total_cycles;
> -	uint8_t core_id = (uint8_t)((uintptr_t)arg);
>   	uint32_t next_hop_add = 0xAA;
> +	bool single_writer = (single_insert == NUM_LDEPTH_ROUTE_ENTRIES) ?
> +				true : false;
> +	uint8_t pos_core = (uint8_t)((uintptr_t)arg);
>   
> -	/* 2 writer threads are used */
> -	if (core_id % 2 == 0) {
> -		si = 0;
> -		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
> -	} else {
> -		si = NUM_LDEPTH_ROUTE_ENTRIES / 2;
> -		ei = NUM_LDEPTH_ROUTE_ENTRIES;
> -	}
> +	si = pos_core * single_insert;
> +	ei = si + single_insert;
> 

In this case, given that you are doing
           "single_insert = NUM_LDEPTH_ROUTE_ENTRIES / j;"
below, the number of ldepth_routes must be a multiple of the number of 
writers, so some number of routes can be skipped in the opposite case. 
Consider something like:

number_of_writers = j;
...
si = (pos_core * NUM_LDEPTH_ROUTE_ENTRIES)/number_of_writers;
ei = ((pos_core + 1 ) * NUM_LDEPTH_ROUTE_ENTRIES)/number_of_writers;


>   	/* Measure add/delete. */
>   	begin = rte_rdtsc_precise();
>   	for (i = 0; i < RCU_ITERATIONS; i++) {
>   		/* Add all the entries */
>   		for (j = si; j < ei; j++) {
> -			pthread_mutex_lock(&lpm_mutex);
> +			if (!single_writer)
> +				pthread_mutex_lock(&lpm_mutex);
>   			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>   					large_ldepth_route_table[j].depth,
>   					next_hop_add) != 0) {
> @@ -455,19 +453,22 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   					i, j);
>   				goto error;
>   			}
> -			pthread_mutex_unlock(&lpm_mutex);
> +			if (!single_writer)
> +				pthread_mutex_unlock(&lpm_mutex);
>   		}
>   
>   		/* Delete all the entries */
>   		for (j = si; j < ei; j++) {
> -			pthread_mutex_lock(&lpm_mutex);
> +			if (!single_writer)
> +				pthread_mutex_lock(&lpm_mutex);
>   			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
>   				large_ldepth_route_table[j].depth) != 0) {
>   				printf("Failed to delete iteration %d, route# %d\n",
>   					i, j);
>   				goto error;
>   			}
> -			pthread_mutex_unlock(&lpm_mutex);
> +			if (!single_writer)
> +				pthread_mutex_unlock(&lpm_mutex);
>   		}
>   	}
>   
> @@ -478,22 +479,24 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   	return 0;
>   
>   error:
> -	pthread_mutex_unlock(&lpm_mutex);
> +	if (!single_writer)
> +		pthread_mutex_unlock(&lpm_mutex);
>   	return -1;
>   }
>   
>   /*
>    * Functional test:
> - * 2 writers, rest are readers
> + * 1/2 writers, rest are readers
>    */
>   static int
> -test_lpm_rcu_perf_multi_writer(void)
> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>   {
>   	struct rte_lpm_config config;
>   	size_t sz;
> -	unsigned int i;
> +	unsigned int i, j;
>   	uint16_t core_id;
>   	struct rte_lpm_rcu_config rcu_cfg = {0};
> +	int (*reader_f)(void *arg) = NULL;
>   
>   	if (rte_lcore_count() < 3) {
>   		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n");
> @@ -506,273 +509,79 @@ test_lpm_rcu_perf_multi_writer(void)
>   		num_cores++;
>   	}
>   
> -	printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n",
> -		num_cores - 2);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	/* Init RCU variable */
> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> -						RTE_CACHE_LINE_SIZE);
> -	rte_rcu_qsbr_init(rv, num_cores);
> -
> -	rcu_cfg.v = rv;
> -	/* Assign the RCU variable to LPM */
> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> -		printf("RCU variable assignment failed\n");
> -		goto error;
> -	}
> -
> -	writer_done = 0;
> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Launch writer threads */
> -	for (i = 0; i < 2; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> -					(void *)(uintptr_t)i,
> -					enabled_core_ids[i]);
> -
> -	/* Wait for writer threads */
> -	for (i = 0; i < 2; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> -		/ TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -	lpm = NULL;
> -	rv = NULL;
> -
> -	/* Test without RCU integration */
> -	printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n",
> -		num_cores - 2);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	writer_done = 0;
> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Launch writer threads */
> -	for (i = 0; i < 2; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> -					(void *)(uintptr_t)i,
> -					enabled_core_ids[i]);
> -
> -	/* Wait for writer threads */
> -	for (i = 0; i < 2; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> -		/ TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -
> -	return 0;
> -
> -error:
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	rte_eal_mp_wait_lcore();
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -
> -	return -1;
> -}
> -
> -/*
> - * Functional test:
> - * Single writer, rest are readers
> - */
> -static int
> -test_lpm_rcu_perf(void)
> -{
> -	struct rte_lpm_config config;
> -	uint64_t begin, total_cycles;
> -	size_t sz;
> -	unsigned int i, j;
> -	uint16_t core_id;
> -	uint32_t next_hop_add = 0xAA;
> -	struct rte_lpm_rcu_config rcu_cfg = {0};
> -
> -	if (rte_lcore_count() < 2) {
> -		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n");
> -		return TEST_SKIPPED;
> -	}
> -
> -	num_cores = 0;
> -	RTE_LCORE_FOREACH_WORKER(core_id) {
> -		enabled_core_ids[num_cores] = core_id;
> -		num_cores++;
> -	}
> -
> -	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
> -		num_cores);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	/* Init RCU variable */
> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> -						RTE_CACHE_LINE_SIZE);
> -	rte_rcu_qsbr_init(rv, num_cores);
> -
> -	rcu_cfg.v = rv;
> -	/* Assign the RCU variable to LPM */
> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> -		printf("RCU variable assignment failed\n");
> -		goto error;
> -	}
> -
> -	writer_done = 0;
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Measure add/delete. */
> -	begin = rte_rdtsc_precise();
> -	for (i = 0; i < RCU_ITERATIONS; i++) {
> -		/* Add all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> -					large_ldepth_route_table[j].depth,
> -					next_hop_add) != 0) {
> -				printf("Failed to add iteration %d, route# %d\n",
> -					i, j);
> -				goto error;
> -			}
> -
> -		/* Delete all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
> -				large_ldepth_route_table[j].depth) != 0) {
> -				printf("Failed to delete iteration %d, route# %d\n",
> -					i, j);
> +	for (j = 1; j < 3; j++) {
> +		if (use_rcu)
> +			printf("\nPerf test: %d writer(s), %d reader(s),"
> +			       " RCU integration enabled\n", j, num_cores - j);
> +		else
> +			printf("\nPerf test: %d writer(s), %d reader(s),"
> +			       " RCU integration disabled\n", j, num_cores - j);
> +
> +		/* Calculate writes by each writer */
> +		single_insert = NUM_LDEPTH_ROUTE_ENTRIES / j;
> +
> +		/* Create LPM table */
> +		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> +		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> +		config.flags = 0;
> +		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> +		TEST_LPM_ASSERT(lpm != NULL);
> +
> +		/* Init RCU variable */
> +		if (use_rcu) {
> +			sz = rte_rcu_qsbr_get_memsize(num_cores);
> +			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> +							RTE_CACHE_LINE_SIZE);
> +			rte_rcu_qsbr_init(rv, num_cores);
> +
> +			rcu_cfg.v = rv;
> +			/* Assign the RCU variable to LPM */
> +			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> +				printf("RCU variable assignment failed\n");
>   				goto error;
>   			}
> -	}
> -	total_cycles = rte_rdtsc_precise() - begin;
>   
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / TOTAL_WRITES);
> +			reader_f = test_lpm_rcu_qsbr_reader;
> +		} else
> +			reader_f = test_lpm_reader;
>   
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -	lpm = NULL;
> -	rv = NULL;
> +		writer_done = 0;
> +		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>   
> -	/* Test without RCU integration */
> -	printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n",
> -		num_cores);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> +		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>   
> -	writer_done = 0;
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> +		/* Launch reader threads */
> +		for (i = j; i < num_cores; i++)
> +			rte_eal_remote_launch(reader_f, NULL,
> +						enabled_core_ids[i]);
>   
> -	/* Launch reader threads */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_reader, NULL,
> -					enabled_core_ids[i]);
> +		/* Launch writer threads */
> +		for (i = 0; i < j; i++)
> +			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> +						(void *)(uintptr_t)i,
> +						enabled_core_ids[i]);
>   
> -	/* Measure add/delete. */
> -	begin = rte_rdtsc_precise();
> -	for (i = 0; i < RCU_ITERATIONS; i++) {
> -		/* Add all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> -					large_ldepth_route_table[j].depth,
> -					next_hop_add) != 0) {
> -				printf("Failed to add iteration %d, route# %d\n",
> -					i, j);
> +		/* Wait for writer threads */
> +		for (i = 0; i < j; i++)
> +			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>   				goto error;
> -			}
>   
> -		/* Delete all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
> -				large_ldepth_route_table[j].depth) != 0) {
> -				printf("Failed to delete iteration %d, route# %d\n",
> -					i, j);
> -				goto error;
> -			}
> +		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> +		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> +			__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> +			/ TOTAL_WRITES);
> +
> +		writer_done = 1;
> +		/* Wait until all readers have exited */
> +		for (i = j; i < num_cores; i++)
> +			rte_eal_wait_lcore(enabled_core_ids[i]);
> +
> +		rte_lpm_free(lpm);
> +		rte_free(rv);
> +		lpm = NULL;
> +		rv = NULL;
>   	}
> -	total_cycles = rte_rdtsc_precise() - begin;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
>   
>   	return 0;
>   
> @@ -948,10 +757,10 @@ test_lpm_perf(void)
>   	rte_lpm_delete_all(lpm);
>   	rte_lpm_free(lpm);
>   
> -	if (test_lpm_rcu_perf() < 0)
> +	if (test_lpm_rcu_perf_multi_writer(0) < 0)
>   		return -1;
>   
> -	if (test_lpm_rcu_perf_multi_writer() < 0)
> +	if (test_lpm_rcu_perf_multi_writer(1) < 0)
>   		return -1;
>   
>   	return 0;
> 

-- 
Regards,
Vladimir

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-04 15:46         ` Medvedkin, Vladimir
@ 2020-11-04 16:49           ` Dharmik Thakkar
  0 siblings, 0 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-04 16:49 UTC (permalink / raw)
  To: Medvedkin, Vladimir; +Cc: Bruce Richardson, dev, nd



> On Nov 4, 2020, at 9:46 AM, Medvedkin, Vladimir <vladimir.medvedkin@intel.com> wrote:
> 
> Hi Thakkar,
> 
> On 03/11/2020 22:23, Dharmik Thakkar wrote:
>> Avoid code duplication by combining single and multi threaded tests
>> Also, enable support for more than 2 writers
>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>> ---
>>  app/test/test_lpm_perf.c | 359 +++++++++------------------------------
>>  1 file changed, 84 insertions(+), 275 deletions(-)
>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
>> index c8e70ec89ff5..a1485e74e77f 100644
>> --- a/app/test/test_lpm_perf.c
>> +++ b/app/test/test_lpm_perf.c
>> @@ -23,6 +23,7 @@ static struct rte_rcu_qsbr *rv;
>>  static volatile uint8_t writer_done;
>>  static volatile uint32_t thr_id;
>>  static uint64_t gwrite_cycles;
>> +static uint32_t single_insert;
>>  /* LPM APIs are not thread safe, use mutex to provide thread safety */
>>  static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
>>  @@ -430,24 +431,21 @@ test_lpm_rcu_qsbr_writer(void *arg)
>>  {
>>  	unsigned int i, j, si, ei;
>>  	uint64_t begin, total_cycles;
>> -	uint8_t core_id = (uint8_t)((uintptr_t)arg);
>>  	uint32_t next_hop_add = 0xAA;
>> +	bool single_writer = (single_insert == NUM_LDEPTH_ROUTE_ENTRIES) ?
>> +				true : false;
>> +	uint8_t pos_core = (uint8_t)((uintptr_t)arg);
>>  -	/* 2 writer threads are used */
>> -	if (core_id % 2 == 0) {
>> -		si = 0;
>> -		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
>> -	} else {
>> -		si = NUM_LDEPTH_ROUTE_ENTRIES / 2;
>> -		ei = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	}
>> +	si = pos_core * single_insert;
>> +	ei = si + single_insert;
> 
> In this case, given that you are doing
>          "single_insert = NUM_LDEPTH_ROUTE_ENTRIES / j;"
> below, the number of ldepth_routes must be a multiple of the number of writers, so some number of routes can be skipped in the opposite case. Consider something like:
> 
> number_of_writers = j;
> ...
> si = (pos_core * NUM_LDEPTH_ROUTE_ENTRIES)/number_of_writers;
> ei = ((pos_core + 1 ) * NUM_LDEPTH_ROUTE_ENTRIES)/number_of_writers;
> 

Yes, agreed some routes can be skipped. I will update the patch with the above changes. Thanks!

> 
>>  	/* Measure add/delete. */
>>  	begin = rte_rdtsc_precise();
>>  	for (i = 0; i < RCU_ITERATIONS; i++) {
>>  		/* Add all the entries */
>>  		for (j = si; j < ei; j++) {
>> -			pthread_mutex_lock(&lpm_mutex);
>> +			if (!single_writer)
>> +				pthread_mutex_lock(&lpm_mutex);
>>  			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>>  					large_ldepth_route_table[j].depth,
>>  					next_hop_add) != 0) {
>> @@ -455,19 +453,22 @@ test_lpm_rcu_qsbr_writer(void *arg)
>>  					i, j);
>>  				goto error;
>>  			}
>> -			pthread_mutex_unlock(&lpm_mutex);
>> +			if (!single_writer)
>> +				pthread_mutex_unlock(&lpm_mutex);
>>  		}
>>    		/* Delete all the entries */
>>  		for (j = si; j < ei; j++) {
>> -			pthread_mutex_lock(&lpm_mutex);
>> +			if (!single_writer)
>> +				pthread_mutex_lock(&lpm_mutex);
>>  			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
>>  				large_ldepth_route_table[j].depth) != 0) {
>>  				printf("Failed to delete iteration %d, route# %d\n",
>>  					i, j);
>>  				goto error;
>>  			}
>> -			pthread_mutex_unlock(&lpm_mutex);
>> +			if (!single_writer)
>> +				pthread_mutex_unlock(&lpm_mutex);
>>  		}
>>  	}
>>  @@ -478,22 +479,24 @@ test_lpm_rcu_qsbr_writer(void *arg)
>>  	return 0;
>>    error:
>> -	pthread_mutex_unlock(&lpm_mutex);
>> +	if (!single_writer)
>> +		pthread_mutex_unlock(&lpm_mutex);
>>  	return -1;
>>  }
>>    /*
>>   * Functional test:
>> - * 2 writers, rest are readers
>> + * 1/2 writers, rest are readers
>>   */
>>  static int
>> -test_lpm_rcu_perf_multi_writer(void)
>> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>>  {
>>  	struct rte_lpm_config config;
>>  	size_t sz;
>> -	unsigned int i;
>> +	unsigned int i, j;
>>  	uint16_t core_id;
>>  	struct rte_lpm_rcu_config rcu_cfg = {0};
>> +	int (*reader_f)(void *arg) = NULL;
>>    	if (rte_lcore_count() < 3) {
>>  		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n");
>> @@ -506,273 +509,79 @@ test_lpm_rcu_perf_multi_writer(void)
>>  		num_cores++;
>>  	}
>>  -	printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n",
>> -		num_cores - 2);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> -
>> -	/* Init RCU variable */
>> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
>> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> -						RTE_CACHE_LINE_SIZE);
>> -	rte_rcu_qsbr_init(rv, num_cores);
>> -
>> -	rcu_cfg.v = rv;
>> -	/* Assign the RCU variable to LPM */
>> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> -		printf("RCU variable assignment failed\n");
>> -		goto error;
>> -	}
>> -
>> -	writer_done = 0;
>> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>> -
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> -	/* Launch reader threads */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Launch writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>> -					(void *)(uintptr_t)i,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Wait for writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> -			goto error;
>> -
>> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>> -		/ TOTAL_WRITES);
>> -
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>> -	rte_free(rv);
>> -	lpm = NULL;
>> -	rv = NULL;
>> -
>> -	/* Test without RCU integration */
>> -	printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n",
>> -		num_cores - 2);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> -
>> -	writer_done = 0;
>> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> -	/* Launch reader threads */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_reader, NULL,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Launch writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>> -					(void *)(uintptr_t)i,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Wait for writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> -			goto error;
>> -
>> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>> -		/ TOTAL_WRITES);
>> -
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>> -
>> -	return 0;
>> -
>> -error:
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	rte_eal_mp_wait_lcore();
>> -
>> -	rte_lpm_free(lpm);
>> -	rte_free(rv);
>> -
>> -	return -1;
>> -}
>> -
>> -/*
>> - * Functional test:
>> - * Single writer, rest are readers
>> - */
>> -static int
>> -test_lpm_rcu_perf(void)
>> -{
>> -	struct rte_lpm_config config;
>> -	uint64_t begin, total_cycles;
>> -	size_t sz;
>> -	unsigned int i, j;
>> -	uint16_t core_id;
>> -	uint32_t next_hop_add = 0xAA;
>> -	struct rte_lpm_rcu_config rcu_cfg = {0};
>> -
>> -	if (rte_lcore_count() < 2) {
>> -		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n");
>> -		return TEST_SKIPPED;
>> -	}
>> -
>> -	num_cores = 0;
>> -	RTE_LCORE_FOREACH_WORKER(core_id) {
>> -		enabled_core_ids[num_cores] = core_id;
>> -		num_cores++;
>> -	}
>> -
>> -	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
>> -		num_cores);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> -
>> -	/* Init RCU variable */
>> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
>> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> -						RTE_CACHE_LINE_SIZE);
>> -	rte_rcu_qsbr_init(rv, num_cores);
>> -
>> -	rcu_cfg.v = rv;
>> -	/* Assign the RCU variable to LPM */
>> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> -		printf("RCU variable assignment failed\n");
>> -		goto error;
>> -	}
>> -
>> -	writer_done = 0;
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> -	/* Launch reader threads */
>> -	for (i = 0; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Measure add/delete. */
>> -	begin = rte_rdtsc_precise();
>> -	for (i = 0; i < RCU_ITERATIONS; i++) {
>> -		/* Add all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>> -					large_ldepth_route_table[j].depth,
>> -					next_hop_add) != 0) {
>> -				printf("Failed to add iteration %d, route# %d\n",
>> -					i, j);
>> -				goto error;
>> -			}
>> -
>> -		/* Delete all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
>> -				large_ldepth_route_table[j].depth) != 0) {
>> -				printf("Failed to delete iteration %d, route# %d\n",
>> -					i, j);
>> +	for (j = 1; j < 3; j++) {
>> +		if (use_rcu)
>> +			printf("\nPerf test: %d writer(s), %d reader(s),"
>> +			       " RCU integration enabled\n", j, num_cores - j);
>> +		else
>> +			printf("\nPerf test: %d writer(s), %d reader(s),"
>> +			       " RCU integration disabled\n", j, num_cores - j);
>> +
>> +		/* Calculate writes by each writer */
>> +		single_insert = NUM_LDEPTH_ROUTE_ENTRIES / j;
>> +
>> +		/* Create LPM table */
>> +		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> +		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> +		config.flags = 0;
>> +		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> +		TEST_LPM_ASSERT(lpm != NULL);
>> +
>> +		/* Init RCU variable */
>> +		if (use_rcu) {
>> +			sz = rte_rcu_qsbr_get_memsize(num_cores);
>> +			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> +							RTE_CACHE_LINE_SIZE);
>> +			rte_rcu_qsbr_init(rv, num_cores);
>> +
>> +			rcu_cfg.v = rv;
>> +			/* Assign the RCU variable to LPM */
>> +			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> +				printf("RCU variable assignment failed\n");
>>  				goto error;
>>  			}
>> -	}
>> -	total_cycles = rte_rdtsc_precise() - begin;
>>  -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %g cycles\n",
>> -		(double)total_cycles / TOTAL_WRITES);
>> +			reader_f = test_lpm_rcu_qsbr_reader;
>> +		} else
>> +			reader_f = test_lpm_reader;
>>  -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 0; i < num_cores; i++)
>> -		rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>> -	rte_free(rv);
>> -	lpm = NULL;
>> -	rv = NULL;
>> +		writer_done = 0;
>> +		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>>  -	/* Test without RCU integration */
>> -	printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n",
>> -		num_cores);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> +		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>  -	writer_done = 0;
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> +		/* Launch reader threads */
>> +		for (i = j; i < num_cores; i++)
>> +			rte_eal_remote_launch(reader_f, NULL,
>> +						enabled_core_ids[i]);
>>  -	/* Launch reader threads */
>> -	for (i = 0; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_reader, NULL,
>> -					enabled_core_ids[i]);
>> +		/* Launch writer threads */
>> +		for (i = 0; i < j; i++)
>> +			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>> +						(void *)(uintptr_t)i,
>> +						enabled_core_ids[i]);
>>  -	/* Measure add/delete. */
>> -	begin = rte_rdtsc_precise();
>> -	for (i = 0; i < RCU_ITERATIONS; i++) {
>> -		/* Add all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>> -					large_ldepth_route_table[j].depth,
>> -					next_hop_add) != 0) {
>> -				printf("Failed to add iteration %d, route# %d\n",
>> -					i, j);
>> +		/* Wait for writer threads */
>> +		for (i = 0; i < j; i++)
>> +			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>>  				goto error;
>> -			}
>>  -		/* Delete all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
>> -				large_ldepth_route_table[j].depth) != 0) {
>> -				printf("Failed to delete iteration %d, route# %d\n",
>> -					i, j);
>> -				goto error;
>> -			}
>> +		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> +		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> +		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> +			__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>> +			/ TOTAL_WRITES);
>> +
>> +		writer_done = 1;
>> +		/* Wait until all readers have exited */
>> +		for (i = j; i < num_cores; i++)
>> +			rte_eal_wait_lcore(enabled_core_ids[i]);
>> +
>> +		rte_lpm_free(lpm);
>> +		rte_free(rv);
>> +		lpm = NULL;
>> +		rv = NULL;
>>  	}
>> -	total_cycles = rte_rdtsc_precise() - begin;
>> -
>> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %g cycles\n",
>> -		(double)total_cycles / TOTAL_WRITES);
>> -
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 0; i < num_cores; i++)
>> -		rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>>    	return 0;
>>  @@ -948,10 +757,10 @@ test_lpm_perf(void)
>>  	rte_lpm_delete_all(lpm);
>>  	rte_lpm_free(lpm);
>>  -	if (test_lpm_rcu_perf() < 0)
>> +	if (test_lpm_rcu_perf_multi_writer(0) < 0)
>>  		return -1;
>>  -	if (test_lpm_rcu_perf_multi_writer() < 0)
>> +	if (test_lpm_rcu_perf_multi_writer(1) < 0)
>>  		return -1;
>>    	return 0;
> 
> -- 
> Regards,
> Vladimir


^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test
  2020-11-03 22:23     ` [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
                         ` (3 preceding siblings ...)
  2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
@ 2020-11-04 18:58       ` Dharmik Thakkar
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
                           ` (4 more replies)
  4 siblings, 5 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-04 18:58 UTC (permalink / raw)
  Cc: dev, nd, Dharmik Thakkar

Fix LPM adds, LPM deletes, and cycle calculation.
Return error if LPM add/delete fails in multi-writer test.
Return error if single or multi writer test fails
Remove redundant error checking for readers.
Combine single and multi threaded test cases to avoid code duplication.
---
v5:
 - Update logic for routes inserted by each writer in multi writer
   test to avoid possibility of routes getting skipped

v4:
 - Return error if rcu qsbr test fails
 - Improve multi writer test to enable more than 2 writers

v3:
 - Add 'goto error'
 - Remove unnecessary if statement

v2:
 - Add more details about the fix to the commit message
 - Replace hard coded values with an enum
 - Remove lock acquire/release for single writer

Dharmik Thakkar (4):
  test/lpm: fix cycle calculation in rcu qsbr perf
  test/lpm: return error on failure in rcu qsbr perf
  test/lpm: remove error checking in rcu qsbr perf
  test/lpm: avoid code duplication in rcu qsbr perf

 app/test/test_lpm_perf.c | 380 +++++++++------------------------------
 1 file changed, 88 insertions(+), 292 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v5 1/4] test/lpm: fix cycle calculation in rcu qsbr perf
  2020-11-04 18:58       ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
@ 2020-11-04 18:58         ` Dharmik Thakkar
  2020-11-04 19:34           ` Medvedkin, Vladimir
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 2/4] test/lpm: return error on failure " Dharmik Thakkar
                           ` (3 subsequent siblings)
  4 siblings, 1 reply; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-04 18:58 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Gavin Hu, Ruifeng Wang,
	Honnappa Nagarahalli
  Cc: dev, nd, Dharmik Thakkar, stable

Fix incorrect calculations for LPM adds, LPM deletes,
and average cycles in RCU QSBR perf tests

Since, rcu qsbr tests run for 'RCU_ITERATIONS' and not
'ITERATIONS', replace 'ITERATIONS' with 'RCU_ITERATIONS'
for calculating adds, deletes, and cycles.

Also, for multi-writer perf test, each writer only writes
half of NUM_LDEPTH_ROUTE_ENTRIES.
For 2 writers, total adds (or deletes) should be
(RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) instead of
(2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES).

Since, for both the single and multi writer tests, total adds/deletes
is equal to (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES),
this has been replaced with a macro 'TOTAL_WRITES' and furthermore,
'g_writes' has been removed since it is always a fixed value
equal to TOTAL_WRITES.

Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org

Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 app/test/test_lpm_perf.c | 45 ++++++++++++++--------------------------
 1 file changed, 16 insertions(+), 29 deletions(-)

diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index c5a238b9d1e8..45164b23214b 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -23,7 +23,6 @@ static struct rte_rcu_qsbr *rv;
 static volatile uint8_t writer_done;
 static volatile uint32_t thr_id;
 static uint64_t gwrite_cycles;
-static uint64_t gwrites;
 /* LPM APIs are not thread safe, use mutex to provide thread safety */
 static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
 
@@ -60,6 +59,8 @@ static uint32_t num_ldepth_route_entries;
 #define NUM_ROUTE_ENTRIES num_route_entries
 #define NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries
 
+#define TOTAL_WRITES (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES)
+
 enum {
 	IP_CLASS_A,
 	IP_CLASS_B,
@@ -432,7 +433,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	uint8_t core_id = (uint8_t)((uintptr_t)arg);
 	uint32_t next_hop_add = 0xAA;
 
-	RTE_SET_USED(arg);
 	/* 2 writer threads are used */
 	if (core_id % 2 == 0) {
 		si = 0;
@@ -472,9 +472,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	total_cycles = rte_rdtsc_precise() - begin;
 
 	__atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED);
-	__atomic_fetch_add(&gwrites,
-			2 * NUM_LDEPTH_ROUTE_ENTRIES * RCU_ITERATIONS,
-			__ATOMIC_RELAXED);
 
 	return 0;
 }
@@ -528,7 +525,6 @@ test_lpm_rcu_perf_multi_writer(void)
 
 	writer_done = 0;
 	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
 
 	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
@@ -548,14 +544,11 @@ test_lpm_rcu_perf_multi_writer(void)
 		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 			goto error;
 
-	printf("Total LPM Adds: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
-			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
-		);
+		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+		/ TOTAL_WRITES);
 
 	/* Wait and check return value from reader threads */
 	writer_done = 1;
@@ -581,7 +574,6 @@ test_lpm_rcu_perf_multi_writer(void)
 
 	writer_done = 0;
 	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
 	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
 	/* Launch reader threads */
@@ -600,14 +592,11 @@ test_lpm_rcu_perf_multi_writer(void)
 		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 			goto error;
 
-	printf("Total LPM Adds: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
-			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
-		);
+		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+		/ TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
@@ -711,11 +700,10 @@ test_lpm_rcu_perf(void)
 	}
 	total_cycles = rte_rdtsc_precise() - begin;
 
-	printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS));
+		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
@@ -771,11 +759,10 @@ test_lpm_rcu_perf(void)
 	}
 	total_cycles = rte_rdtsc_precise() - begin;
 
-	printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS));
+		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v5 2/4] test/lpm: return error on failure in rcu qsbr perf
  2020-11-04 18:58       ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
@ 2020-11-04 18:58         ` Dharmik Thakkar
  2020-11-04 19:35           ` Medvedkin, Vladimir
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 3/4] test/lpm: remove error checking " Dharmik Thakkar
                           ` (2 subsequent siblings)
  4 siblings, 1 reply; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-04 18:58 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Gavin Hu,
	Honnappa Nagarahalli, Ruifeng Wang
  Cc: dev, nd, Dharmik Thakkar, stable

Return error if Add/Delete fail in multiwriter perf test

Return error if single or multi writer test fails

Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org

Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 app/test/test_lpm_perf.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index 45164b23214b..873ecf511c97 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -453,6 +453,7 @@ test_lpm_rcu_qsbr_writer(void *arg)
 					next_hop_add) != 0) {
 				printf("Failed to add iteration %d, route# %d\n",
 					i, j);
+				goto error;
 			}
 			pthread_mutex_unlock(&lpm_mutex);
 		}
@@ -464,6 +465,7 @@ test_lpm_rcu_qsbr_writer(void *arg)
 				large_ldepth_route_table[j].depth) != 0) {
 				printf("Failed to delete iteration %d, route# %d\n",
 					i, j);
+				goto error;
 			}
 			pthread_mutex_unlock(&lpm_mutex);
 		}
@@ -474,6 +476,10 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	__atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED);
 
 	return 0;
+
+error:
+	pthread_mutex_unlock(&lpm_mutex);
+	return -1;
 }
 
 /*
@@ -947,9 +953,11 @@ test_lpm_perf(void)
 	rte_lpm_delete_all(lpm);
 	rte_lpm_free(lpm);
 
-	test_lpm_rcu_perf();
+	if (test_lpm_rcu_perf() < 0)
+		return -1;
 
-	test_lpm_rcu_perf_multi_writer();
+	if (test_lpm_rcu_perf_multi_writer() < 0)
+		return -1;
 
 	return 0;
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v5 3/4] test/lpm: remove error checking in rcu qsbr perf
  2020-11-04 18:58       ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 2/4] test/lpm: return error on failure " Dharmik Thakkar
@ 2020-11-04 18:58         ` Dharmik Thakkar
  2020-11-04 19:35           ` Medvedkin, Vladimir
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
  2020-11-05 15:58         ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test David Marchand
  4 siblings, 1 reply; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-04 18:58 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Honnappa Nagarahalli,
	Ruifeng Wang, Gavin Hu
  Cc: dev, nd, Dharmik Thakkar, stable

Remove redundant error checking for reader threads
since they never return error.

Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org

Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 app/test/test_lpm_perf.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index 873ecf511c97..c8e70ec89ff5 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -556,11 +556,10 @@ test_lpm_rcu_perf_multi_writer(void)
 		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
 		/ TOTAL_WRITES);
 
-	/* Wait and check return value from reader threads */
 	writer_done = 1;
+	/* Wait until all readers have exited */
 	for (i = 2; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 	rte_free(rv);
@@ -605,10 +604,9 @@ test_lpm_rcu_perf_multi_writer(void)
 		/ TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 2; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 
@@ -712,10 +710,9 @@ test_lpm_rcu_perf(void)
 		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 0; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 	rte_free(rv);
@@ -771,11 +768,9 @@ test_lpm_rcu_perf(void)
 		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 0; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			printf("Warning: lcore %u not finished.\n",
-				enabled_core_ids[i]);
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [dpdk-dev] [PATCH v5 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-04 18:58       ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
                           ` (2 preceding siblings ...)
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 3/4] test/lpm: remove error checking " Dharmik Thakkar
@ 2020-11-04 18:58         ` Dharmik Thakkar
  2020-11-04 19:35           ` Medvedkin, Vladimir
  2020-11-05 15:58         ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test David Marchand
  4 siblings, 1 reply; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-04 18:58 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin; +Cc: dev, nd, Dharmik Thakkar

Avoid code duplication by combining single and multi threaded tests

Also, enable support for more than 2 writers

Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 app/test/test_lpm_perf.c | 356 +++++++++------------------------------
 1 file changed, 81 insertions(+), 275 deletions(-)

diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index c8e70ec89ff5..2bed00d0648f 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -23,6 +23,7 @@ static struct rte_rcu_qsbr *rv;
 static volatile uint8_t writer_done;
 static volatile uint32_t thr_id;
 static uint64_t gwrite_cycles;
+static uint32_t num_writers;
 /* LPM APIs are not thread safe, use mutex to provide thread safety */
 static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
 
@@ -430,24 +431,19 @@ test_lpm_rcu_qsbr_writer(void *arg)
 {
 	unsigned int i, j, si, ei;
 	uint64_t begin, total_cycles;
-	uint8_t core_id = (uint8_t)((uintptr_t)arg);
 	uint32_t next_hop_add = 0xAA;
+	uint8_t pos_core = (uint8_t)((uintptr_t)arg);
 
-	/* 2 writer threads are used */
-	if (core_id % 2 == 0) {
-		si = 0;
-		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
-	} else {
-		si = NUM_LDEPTH_ROUTE_ENTRIES / 2;
-		ei = NUM_LDEPTH_ROUTE_ENTRIES;
-	}
+	si = (pos_core * NUM_LDEPTH_ROUTE_ENTRIES) / num_writers;
+	ei = ((pos_core + 1) * NUM_LDEPTH_ROUTE_ENTRIES) / num_writers;
 
 	/* Measure add/delete. */
 	begin = rte_rdtsc_precise();
 	for (i = 0; i < RCU_ITERATIONS; i++) {
 		/* Add all the entries */
 		for (j = si; j < ei; j++) {
-			pthread_mutex_lock(&lpm_mutex);
+			if (num_writers > 1)
+				pthread_mutex_lock(&lpm_mutex);
 			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
 					large_ldepth_route_table[j].depth,
 					next_hop_add) != 0) {
@@ -455,19 +451,22 @@ test_lpm_rcu_qsbr_writer(void *arg)
 					i, j);
 				goto error;
 			}
-			pthread_mutex_unlock(&lpm_mutex);
+			if (num_writers > 1)
+				pthread_mutex_unlock(&lpm_mutex);
 		}
 
 		/* Delete all the entries */
 		for (j = si; j < ei; j++) {
-			pthread_mutex_lock(&lpm_mutex);
+			if (num_writers > 1)
+				pthread_mutex_lock(&lpm_mutex);
 			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
 				large_ldepth_route_table[j].depth) != 0) {
 				printf("Failed to delete iteration %d, route# %d\n",
 					i, j);
 				goto error;
 			}
-			pthread_mutex_unlock(&lpm_mutex);
+			if (num_writers > 1)
+				pthread_mutex_unlock(&lpm_mutex);
 		}
 	}
 
@@ -478,22 +477,24 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	return 0;
 
 error:
-	pthread_mutex_unlock(&lpm_mutex);
+	if (num_writers > 1)
+		pthread_mutex_unlock(&lpm_mutex);
 	return -1;
 }
 
 /*
  * Functional test:
- * 2 writers, rest are readers
+ * 1/2 writers, rest are readers
  */
 static int
-test_lpm_rcu_perf_multi_writer(void)
+test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
 {
 	struct rte_lpm_config config;
 	size_t sz;
-	unsigned int i;
+	unsigned int i, j;
 	uint16_t core_id;
 	struct rte_lpm_rcu_config rcu_cfg = {0};
+	int (*reader_f)(void *arg) = NULL;
 
 	if (rte_lcore_count() < 3) {
 		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n");
@@ -506,273 +507,78 @@ test_lpm_rcu_perf_multi_writer(void)
 		num_cores++;
 	}
 
-	printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n",
-		num_cores - 2);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	/* Init RCU variable */
-	sz = rte_rcu_qsbr_get_memsize(num_cores);
-	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
-						RTE_CACHE_LINE_SIZE);
-	rte_rcu_qsbr_init(rv, num_cores);
-
-	rcu_cfg.v = rv;
-	/* Assign the RCU variable to LPM */
-	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
-		printf("RCU variable assignment failed\n");
-		goto error;
-	}
-
-	writer_done = 0;
-	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Launch writer threads */
-	for (i = 0; i < 2; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
-					(void *)(uintptr_t)i,
-					enabled_core_ids[i]);
-
-	/* Wait for writer threads */
-	for (i = 0; i < 2; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
-		/ TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-	lpm = NULL;
-	rv = NULL;
-
-	/* Test without RCU integration */
-	printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n",
-		num_cores - 2);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	writer_done = 0;
-	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Launch writer threads */
-	for (i = 0; i < 2; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
-					(void *)(uintptr_t)i,
-					enabled_core_ids[i]);
-
-	/* Wait for writer threads */
-	for (i = 0; i < 2; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
-		/ TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-
-	return 0;
-
-error:
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	rte_eal_mp_wait_lcore();
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-
-	return -1;
-}
-
-/*
- * Functional test:
- * Single writer, rest are readers
- */
-static int
-test_lpm_rcu_perf(void)
-{
-	struct rte_lpm_config config;
-	uint64_t begin, total_cycles;
-	size_t sz;
-	unsigned int i, j;
-	uint16_t core_id;
-	uint32_t next_hop_add = 0xAA;
-	struct rte_lpm_rcu_config rcu_cfg = {0};
-
-	if (rte_lcore_count() < 2) {
-		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n");
-		return TEST_SKIPPED;
-	}
-
-	num_cores = 0;
-	RTE_LCORE_FOREACH_WORKER(core_id) {
-		enabled_core_ids[num_cores] = core_id;
-		num_cores++;
-	}
-
-	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
-		num_cores);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	/* Init RCU variable */
-	sz = rte_rcu_qsbr_get_memsize(num_cores);
-	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
-						RTE_CACHE_LINE_SIZE);
-	rte_rcu_qsbr_init(rv, num_cores);
-
-	rcu_cfg.v = rv;
-	/* Assign the RCU variable to LPM */
-	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
-		printf("RCU variable assignment failed\n");
-		goto error;
-	}
-
-	writer_done = 0;
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Measure add/delete. */
-	begin = rte_rdtsc_precise();
-	for (i = 0; i < RCU_ITERATIONS; i++) {
-		/* Add all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
-					large_ldepth_route_table[j].depth,
-					next_hop_add) != 0) {
-				printf("Failed to add iteration %d, route# %d\n",
-					i, j);
-				goto error;
-			}
-
-		/* Delete all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
-				large_ldepth_route_table[j].depth) != 0) {
-				printf("Failed to delete iteration %d, route# %d\n",
-					i, j);
+	for (j = 1; j < 3; j++) {
+		if (use_rcu)
+			printf("\nPerf test: %d writer(s), %d reader(s),"
+			       " RCU integration enabled\n", j, num_cores - j);
+		else
+			printf("\nPerf test: %d writer(s), %d reader(s),"
+			       " RCU integration disabled\n", j, num_cores - j);
+
+		num_writers = j;
+
+		/* Create LPM table */
+		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
+		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
+		config.flags = 0;
+		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
+		TEST_LPM_ASSERT(lpm != NULL);
+
+		/* Init RCU variable */
+		if (use_rcu) {
+			sz = rte_rcu_qsbr_get_memsize(num_cores);
+			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
+							RTE_CACHE_LINE_SIZE);
+			rte_rcu_qsbr_init(rv, num_cores);
+
+			rcu_cfg.v = rv;
+			/* Assign the RCU variable to LPM */
+			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
+				printf("RCU variable assignment failed\n");
 				goto error;
 			}
-	}
-	total_cycles = rte_rdtsc_precise() - begin;
 
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / TOTAL_WRITES);
+			reader_f = test_lpm_rcu_qsbr_reader;
+		} else
+			reader_f = test_lpm_reader;
 
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-	lpm = NULL;
-	rv = NULL;
+		writer_done = 0;
+		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
 
-	/* Test without RCU integration */
-	printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n",
-		num_cores);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
+		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
-	writer_done = 0;
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
+		/* Launch reader threads */
+		for (i = j; i < num_cores; i++)
+			rte_eal_remote_launch(reader_f, NULL,
+						enabled_core_ids[i]);
 
-	/* Launch reader threads */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_reader, NULL,
-					enabled_core_ids[i]);
+		/* Launch writer threads */
+		for (i = 0; i < j; i++)
+			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
+						(void *)(uintptr_t)i,
+						enabled_core_ids[i]);
 
-	/* Measure add/delete. */
-	begin = rte_rdtsc_precise();
-	for (i = 0; i < RCU_ITERATIONS; i++) {
-		/* Add all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
-					large_ldepth_route_table[j].depth,
-					next_hop_add) != 0) {
-				printf("Failed to add iteration %d, route# %d\n",
-					i, j);
+		/* Wait for writer threads */
+		for (i = 0; i < j; i++)
+			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 				goto error;
-			}
 
-		/* Delete all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
-				large_ldepth_route_table[j].depth) != 0) {
-				printf("Failed to delete iteration %d, route# %d\n",
-					i, j);
-				goto error;
-			}
+		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
+		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
+			__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+			/ TOTAL_WRITES);
+
+		writer_done = 1;
+		/* Wait until all readers have exited */
+		for (i = j; i < num_cores; i++)
+			rte_eal_wait_lcore(enabled_core_ids[i]);
+
+		rte_lpm_free(lpm);
+		rte_free(rv);
+		lpm = NULL;
+		rv = NULL;
 	}
-	total_cycles = rte_rdtsc_precise() - begin;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
 
 	return 0;
 
@@ -948,10 +754,10 @@ test_lpm_perf(void)
 	rte_lpm_delete_all(lpm);
 	rte_lpm_free(lpm);
 
-	if (test_lpm_rcu_perf() < 0)
+	if (test_lpm_rcu_perf_multi_writer(0) < 0)
 		return -1;
 
-	if (test_lpm_rcu_perf_multi_writer() < 0)
+	if (test_lpm_rcu_perf_multi_writer(1) < 0)
 		return -1;
 
 	return 0;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v5 1/4] test/lpm: fix cycle calculation in rcu qsbr perf
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
@ 2020-11-04 19:34           ` Medvedkin, Vladimir
  0 siblings, 0 replies; 52+ messages in thread
From: Medvedkin, Vladimir @ 2020-11-04 19:34 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson, Gavin Hu, Ruifeng Wang,
	Honnappa Nagarahalli
  Cc: dev, nd, stable



On 04/11/2020 18:58, Dharmik Thakkar wrote:
> Fix incorrect calculations for LPM adds, LPM deletes,
> and average cycles in RCU QSBR perf tests
> 
> Since, rcu qsbr tests run for 'RCU_ITERATIONS' and not
> 'ITERATIONS', replace 'ITERATIONS' with 'RCU_ITERATIONS'
> for calculating adds, deletes, and cycles.
> 
> Also, for multi-writer perf test, each writer only writes
> half of NUM_LDEPTH_ROUTE_ENTRIES.
> For 2 writers, total adds (or deletes) should be
> (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) instead of
> (2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES).
> 
> Since, for both the single and multi writer tests, total adds/deletes
> is equal to (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES),
> this has been replaced with a macro 'TOTAL_WRITES' and furthermore,
> 'g_writes' has been removed since it is always a fixed value
> equal to TOTAL_WRITES.
> 
> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
> Cc: honnappa.nagarahalli@arm.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> ---
>   app/test/test_lpm_perf.c | 45 ++++++++++++++--------------------------
>   1 file changed, 16 insertions(+), 29 deletions(-)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
> index c5a238b9d1e8..45164b23214b 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -23,7 +23,6 @@ static struct rte_rcu_qsbr *rv;
>   static volatile uint8_t writer_done;
>   static volatile uint32_t thr_id;
>   static uint64_t gwrite_cycles;
> -static uint64_t gwrites;
>   /* LPM APIs are not thread safe, use mutex to provide thread safety */
>   static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
>   
> @@ -60,6 +59,8 @@ static uint32_t num_ldepth_route_entries;
>   #define NUM_ROUTE_ENTRIES num_route_entries
>   #define NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries
>   
> +#define TOTAL_WRITES (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES)
> +
>   enum {
>   	IP_CLASS_A,
>   	IP_CLASS_B,
> @@ -432,7 +433,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   	uint8_t core_id = (uint8_t)((uintptr_t)arg);
>   	uint32_t next_hop_add = 0xAA;
>   
> -	RTE_SET_USED(arg);
>   	/* 2 writer threads are used */
>   	if (core_id % 2 == 0) {
>   		si = 0;
> @@ -472,9 +472,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   	total_cycles = rte_rdtsc_precise() - begin;
>   
>   	__atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED);
> -	__atomic_fetch_add(&gwrites,
> -			2 * NUM_LDEPTH_ROUTE_ENTRIES * RCU_ITERATIONS,
> -			__ATOMIC_RELAXED);
>   
>   	return 0;
>   }
> @@ -528,7 +525,6 @@ test_lpm_rcu_perf_multi_writer(void)
>   
>   	writer_done = 0;
>   	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
>   
>   	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>   
> @@ -548,14 +544,11 @@ test_lpm_rcu_perf_multi_writer(void)
>   		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>   			goto error;
>   
> -	printf("Total LPM Adds: %d\n",
> -		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> -	printf("Total LPM Deletes: %d\n",
> -		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> +	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>   	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
> -			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
> -		);
> +		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> +		/ TOTAL_WRITES);
>   
>   	/* Wait and check return value from reader threads */
>   	writer_done = 1;
> @@ -581,7 +574,6 @@ test_lpm_rcu_perf_multi_writer(void)
>   
>   	writer_done = 0;
>   	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
>   	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>   
>   	/* Launch reader threads */
> @@ -600,14 +592,11 @@ test_lpm_rcu_perf_multi_writer(void)
>   		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>   			goto error;
>   
> -	printf("Total LPM Adds: %d\n",
> -		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> -	printf("Total LPM Deletes: %d\n",
> -		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> +	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>   	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
> -			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
> -		);
> +		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> +		/ TOTAL_WRITES);
>   
>   	writer_done = 1;
>   	/* Wait and check return value from reader threads */
> @@ -711,11 +700,10 @@ test_lpm_rcu_perf(void)
>   	}
>   	total_cycles = rte_rdtsc_precise() - begin;
>   
> -	printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> -	printf("Total LPM Deletes: %d\n",
> -		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> +	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>   	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS));
> +		(double)total_cycles / TOTAL_WRITES);
>   
>   	writer_done = 1;
>   	/* Wait and check return value from reader threads */
> @@ -771,11 +759,10 @@ test_lpm_rcu_perf(void)
>   	}
>   	total_cycles = rte_rdtsc_precise() - begin;
>   
> -	printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> -	printf("Total LPM Deletes: %d\n",
> -		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> +	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>   	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS));
> +		(double)total_cycles / TOTAL_WRITES);
>   
>   	writer_done = 1;
>   	/* Wait and check return value from reader threads */
> 

Acked-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>

-- 
Regards,
Vladimir

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v5 2/4] test/lpm: return error on failure in rcu qsbr perf
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 2/4] test/lpm: return error on failure " Dharmik Thakkar
@ 2020-11-04 19:35           ` Medvedkin, Vladimir
  0 siblings, 0 replies; 52+ messages in thread
From: Medvedkin, Vladimir @ 2020-11-04 19:35 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson, Gavin Hu,
	Honnappa Nagarahalli, Ruifeng Wang
  Cc: dev, nd, stable



On 04/11/2020 18:58, Dharmik Thakkar wrote:
> Return error if Add/Delete fail in multiwriter perf test
> 
> Return error if single or multi writer test fails
> 
> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
> Cc: honnappa.nagarahalli@arm.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> ---
>   app/test/test_lpm_perf.c | 12 ++++++++++--
>   1 file changed, 10 insertions(+), 2 deletions(-)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
> index 45164b23214b..873ecf511c97 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -453,6 +453,7 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   					next_hop_add) != 0) {
>   				printf("Failed to add iteration %d, route# %d\n",
>   					i, j);
> +				goto error;
>   			}
>   			pthread_mutex_unlock(&lpm_mutex);
>   		}
> @@ -464,6 +465,7 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   				large_ldepth_route_table[j].depth) != 0) {
>   				printf("Failed to delete iteration %d, route# %d\n",
>   					i, j);
> +				goto error;
>   			}
>   			pthread_mutex_unlock(&lpm_mutex);
>   		}
> @@ -474,6 +476,10 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   	__atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED);
>   
>   	return 0;
> +
> +error:
> +	pthread_mutex_unlock(&lpm_mutex);
> +	return -1;
>   }
>   
>   /*
> @@ -947,9 +953,11 @@ test_lpm_perf(void)
>   	rte_lpm_delete_all(lpm);
>   	rte_lpm_free(lpm);
>   
> -	test_lpm_rcu_perf();
> +	if (test_lpm_rcu_perf() < 0)
> +		return -1;
>   
> -	test_lpm_rcu_perf_multi_writer();
> +	if (test_lpm_rcu_perf_multi_writer() < 0)
> +		return -1;
>   
>   	return 0;
>   }
> 

Acked-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>

-- 
Regards,
Vladimir

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v5 3/4] test/lpm: remove error checking in rcu qsbr perf
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 3/4] test/lpm: remove error checking " Dharmik Thakkar
@ 2020-11-04 19:35           ` Medvedkin, Vladimir
  0 siblings, 0 replies; 52+ messages in thread
From: Medvedkin, Vladimir @ 2020-11-04 19:35 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson, Honnappa Nagarahalli,
	Ruifeng Wang, Gavin Hu
  Cc: dev, nd, stable



On 04/11/2020 18:58, Dharmik Thakkar wrote:
> Remove redundant error checking for reader threads
> since they never return error.
> 
> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
> Cc: honnappa.nagarahalli@arm.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> ---
>   app/test/test_lpm_perf.c | 21 ++++++++-------------
>   1 file changed, 8 insertions(+), 13 deletions(-)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
> index 873ecf511c97..c8e70ec89ff5 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -556,11 +556,10 @@ test_lpm_rcu_perf_multi_writer(void)
>   		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>   		/ TOTAL_WRITES);
>   
> -	/* Wait and check return value from reader threads */
>   	writer_done = 1;
> +	/* Wait until all readers have exited */
>   	for (i = 2; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> +		rte_eal_wait_lcore(enabled_core_ids[i]);
>   
>   	rte_lpm_free(lpm);
>   	rte_free(rv);
> @@ -605,10 +604,9 @@ test_lpm_rcu_perf_multi_writer(void)
>   		/ TOTAL_WRITES);
>   
>   	writer_done = 1;
> -	/* Wait and check return value from reader threads */
> +	/* Wait until all readers have exited */
>   	for (i = 2; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> +		rte_eal_wait_lcore(enabled_core_ids[i]);
>   
>   	rte_lpm_free(lpm);
>   
> @@ -712,10 +710,9 @@ test_lpm_rcu_perf(void)
>   		(double)total_cycles / TOTAL_WRITES);
>   
>   	writer_done = 1;
> -	/* Wait and check return value from reader threads */
> +	/* Wait until all readers have exited */
>   	for (i = 0; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> +		rte_eal_wait_lcore(enabled_core_ids[i]);
>   
>   	rte_lpm_free(lpm);
>   	rte_free(rv);
> @@ -771,11 +768,9 @@ test_lpm_rcu_perf(void)
>   		(double)total_cycles / TOTAL_WRITES);
>   
>   	writer_done = 1;
> -	/* Wait and check return value from reader threads */
> +	/* Wait until all readers have exited */
>   	for (i = 0; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			printf("Warning: lcore %u not finished.\n",
> -				enabled_core_ids[i]);
> +		rte_eal_wait_lcore(enabled_core_ids[i]);
>   
>   	rte_lpm_free(lpm);
>   
> 

Acked-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>

-- 
Regards,
Vladimir

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v5 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
@ 2020-11-04 19:35           ` Medvedkin, Vladimir
  0 siblings, 0 replies; 52+ messages in thread
From: Medvedkin, Vladimir @ 2020-11-04 19:35 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson; +Cc: dev, nd



On 04/11/2020 18:58, Dharmik Thakkar wrote:
> Avoid code duplication by combining single and multi threaded tests
> 
> Also, enable support for more than 2 writers
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> ---
>   app/test/test_lpm_perf.c | 356 +++++++++------------------------------
>   1 file changed, 81 insertions(+), 275 deletions(-)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
> index c8e70ec89ff5..2bed00d0648f 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -23,6 +23,7 @@ static struct rte_rcu_qsbr *rv;
>   static volatile uint8_t writer_done;
>   static volatile uint32_t thr_id;
>   static uint64_t gwrite_cycles;
> +static uint32_t num_writers;
>   /* LPM APIs are not thread safe, use mutex to provide thread safety */
>   static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
>   
> @@ -430,24 +431,19 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   {
>   	unsigned int i, j, si, ei;
>   	uint64_t begin, total_cycles;
> -	uint8_t core_id = (uint8_t)((uintptr_t)arg);
>   	uint32_t next_hop_add = 0xAA;
> +	uint8_t pos_core = (uint8_t)((uintptr_t)arg);
>   
> -	/* 2 writer threads are used */
> -	if (core_id % 2 == 0) {
> -		si = 0;
> -		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
> -	} else {
> -		si = NUM_LDEPTH_ROUTE_ENTRIES / 2;
> -		ei = NUM_LDEPTH_ROUTE_ENTRIES;
> -	}
> +	si = (pos_core * NUM_LDEPTH_ROUTE_ENTRIES) / num_writers;
> +	ei = ((pos_core + 1) * NUM_LDEPTH_ROUTE_ENTRIES) / num_writers;
>   
>   	/* Measure add/delete. */
>   	begin = rte_rdtsc_precise();
>   	for (i = 0; i < RCU_ITERATIONS; i++) {
>   		/* Add all the entries */
>   		for (j = si; j < ei; j++) {
> -			pthread_mutex_lock(&lpm_mutex);
> +			if (num_writers > 1)
> +				pthread_mutex_lock(&lpm_mutex);
>   			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>   					large_ldepth_route_table[j].depth,
>   					next_hop_add) != 0) {
> @@ -455,19 +451,22 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   					i, j);
>   				goto error;
>   			}
> -			pthread_mutex_unlock(&lpm_mutex);
> +			if (num_writers > 1)
> +				pthread_mutex_unlock(&lpm_mutex);
>   		}
>   
>   		/* Delete all the entries */
>   		for (j = si; j < ei; j++) {
> -			pthread_mutex_lock(&lpm_mutex);
> +			if (num_writers > 1)
> +				pthread_mutex_lock(&lpm_mutex);
>   			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
>   				large_ldepth_route_table[j].depth) != 0) {
>   				printf("Failed to delete iteration %d, route# %d\n",
>   					i, j);
>   				goto error;
>   			}
> -			pthread_mutex_unlock(&lpm_mutex);
> +			if (num_writers > 1)
> +				pthread_mutex_unlock(&lpm_mutex);
>   		}
>   	}
>   
> @@ -478,22 +477,24 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   	return 0;
>   
>   error:
> -	pthread_mutex_unlock(&lpm_mutex);
> +	if (num_writers > 1)
> +		pthread_mutex_unlock(&lpm_mutex);
>   	return -1;
>   }
>   
>   /*
>    * Functional test:
> - * 2 writers, rest are readers
> + * 1/2 writers, rest are readers
>    */
>   static int
> -test_lpm_rcu_perf_multi_writer(void)
> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>   {
>   	struct rte_lpm_config config;
>   	size_t sz;
> -	unsigned int i;
> +	unsigned int i, j;
>   	uint16_t core_id;
>   	struct rte_lpm_rcu_config rcu_cfg = {0};
> +	int (*reader_f)(void *arg) = NULL;
>   
>   	if (rte_lcore_count() < 3) {
>   		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n");
> @@ -506,273 +507,78 @@ test_lpm_rcu_perf_multi_writer(void)
>   		num_cores++;
>   	}
>   
> -	printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n",
> -		num_cores - 2);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	/* Init RCU variable */
> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> -						RTE_CACHE_LINE_SIZE);
> -	rte_rcu_qsbr_init(rv, num_cores);
> -
> -	rcu_cfg.v = rv;
> -	/* Assign the RCU variable to LPM */
> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> -		printf("RCU variable assignment failed\n");
> -		goto error;
> -	}
> -
> -	writer_done = 0;
> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Launch writer threads */
> -	for (i = 0; i < 2; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> -					(void *)(uintptr_t)i,
> -					enabled_core_ids[i]);
> -
> -	/* Wait for writer threads */
> -	for (i = 0; i < 2; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> -		/ TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -	lpm = NULL;
> -	rv = NULL;
> -
> -	/* Test without RCU integration */
> -	printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n",
> -		num_cores - 2);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	writer_done = 0;
> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Launch writer threads */
> -	for (i = 0; i < 2; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> -					(void *)(uintptr_t)i,
> -					enabled_core_ids[i]);
> -
> -	/* Wait for writer threads */
> -	for (i = 0; i < 2; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> -		/ TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -
> -	return 0;
> -
> -error:
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	rte_eal_mp_wait_lcore();
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -
> -	return -1;
> -}
> -
> -/*
> - * Functional test:
> - * Single writer, rest are readers
> - */
> -static int
> -test_lpm_rcu_perf(void)
> -{
> -	struct rte_lpm_config config;
> -	uint64_t begin, total_cycles;
> -	size_t sz;
> -	unsigned int i, j;
> -	uint16_t core_id;
> -	uint32_t next_hop_add = 0xAA;
> -	struct rte_lpm_rcu_config rcu_cfg = {0};
> -
> -	if (rte_lcore_count() < 2) {
> -		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n");
> -		return TEST_SKIPPED;
> -	}
> -
> -	num_cores = 0;
> -	RTE_LCORE_FOREACH_WORKER(core_id) {
> -		enabled_core_ids[num_cores] = core_id;
> -		num_cores++;
> -	}
> -
> -	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
> -		num_cores);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	/* Init RCU variable */
> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> -						RTE_CACHE_LINE_SIZE);
> -	rte_rcu_qsbr_init(rv, num_cores);
> -
> -	rcu_cfg.v = rv;
> -	/* Assign the RCU variable to LPM */
> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> -		printf("RCU variable assignment failed\n");
> -		goto error;
> -	}
> -
> -	writer_done = 0;
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Measure add/delete. */
> -	begin = rte_rdtsc_precise();
> -	for (i = 0; i < RCU_ITERATIONS; i++) {
> -		/* Add all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> -					large_ldepth_route_table[j].depth,
> -					next_hop_add) != 0) {
> -				printf("Failed to add iteration %d, route# %d\n",
> -					i, j);
> -				goto error;
> -			}
> -
> -		/* Delete all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
> -				large_ldepth_route_table[j].depth) != 0) {
> -				printf("Failed to delete iteration %d, route# %d\n",
> -					i, j);
> +	for (j = 1; j < 3; j++) {
> +		if (use_rcu)
> +			printf("\nPerf test: %d writer(s), %d reader(s),"
> +			       " RCU integration enabled\n", j, num_cores - j);
> +		else
> +			printf("\nPerf test: %d writer(s), %d reader(s),"
> +			       " RCU integration disabled\n", j, num_cores - j);
> +
> +		num_writers = j;
> +
> +		/* Create LPM table */
> +		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> +		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> +		config.flags = 0;
> +		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> +		TEST_LPM_ASSERT(lpm != NULL);
> +
> +		/* Init RCU variable */
> +		if (use_rcu) {
> +			sz = rte_rcu_qsbr_get_memsize(num_cores);
> +			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> +							RTE_CACHE_LINE_SIZE);
> +			rte_rcu_qsbr_init(rv, num_cores);
> +
> +			rcu_cfg.v = rv;
> +			/* Assign the RCU variable to LPM */
> +			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> +				printf("RCU variable assignment failed\n");
>   				goto error;
>   			}
> -	}
> -	total_cycles = rte_rdtsc_precise() - begin;
>   
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / TOTAL_WRITES);
> +			reader_f = test_lpm_rcu_qsbr_reader;
> +		} else
> +			reader_f = test_lpm_reader;
>   
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -	lpm = NULL;
> -	rv = NULL;
> +		writer_done = 0;
> +		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>   
> -	/* Test without RCU integration */
> -	printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n",
> -		num_cores);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> +		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>   
> -	writer_done = 0;
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> +		/* Launch reader threads */
> +		for (i = j; i < num_cores; i++)
> +			rte_eal_remote_launch(reader_f, NULL,
> +						enabled_core_ids[i]);
>   
> -	/* Launch reader threads */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_reader, NULL,
> -					enabled_core_ids[i]);
> +		/* Launch writer threads */
> +		for (i = 0; i < j; i++)
> +			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> +						(void *)(uintptr_t)i,
> +						enabled_core_ids[i]);
>   
> -	/* Measure add/delete. */
> -	begin = rte_rdtsc_precise();
> -	for (i = 0; i < RCU_ITERATIONS; i++) {
> -		/* Add all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> -					large_ldepth_route_table[j].depth,
> -					next_hop_add) != 0) {
> -				printf("Failed to add iteration %d, route# %d\n",
> -					i, j);
> +		/* Wait for writer threads */
> +		for (i = 0; i < j; i++)
> +			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>   				goto error;
> -			}
>   
> -		/* Delete all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
> -				large_ldepth_route_table[j].depth) != 0) {
> -				printf("Failed to delete iteration %d, route# %d\n",
> -					i, j);
> -				goto error;
> -			}
> +		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> +		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> +			__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> +			/ TOTAL_WRITES);
> +
> +		writer_done = 1;
> +		/* Wait until all readers have exited */
> +		for (i = j; i < num_cores; i++)
> +			rte_eal_wait_lcore(enabled_core_ids[i]);
> +
> +		rte_lpm_free(lpm);
> +		rte_free(rv);
> +		lpm = NULL;
> +		rv = NULL;
>   	}
> -	total_cycles = rte_rdtsc_precise() - begin;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
>   
>   	return 0;
>   
> @@ -948,10 +754,10 @@ test_lpm_perf(void)
>   	rte_lpm_delete_all(lpm);
>   	rte_lpm_free(lpm);
>   
> -	if (test_lpm_rcu_perf() < 0)
> +	if (test_lpm_rcu_perf_multi_writer(0) < 0)
>   		return -1;
>   
> -	if (test_lpm_rcu_perf_multi_writer() < 0)
> +	if (test_lpm_rcu_perf_multi_writer(1) < 0)
>   		return -1;
>   
>   	return 0;
> 

Acked-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>

-- 
Regards,
Vladimir

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test
  2020-11-04 18:58       ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
                           ` (3 preceding siblings ...)
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
@ 2020-11-05 15:58         ` David Marchand
  4 siblings, 0 replies; 52+ messages in thread
From: David Marchand @ 2020-11-05 15:58 UTC (permalink / raw)
  To: Dharmik Thakkar; +Cc: dev, nd

On Wed, Nov 4, 2020 at 7:59 PM Dharmik Thakkar <dharmik.thakkar@arm.com> wrote:
>
> Fix LPM adds, LPM deletes, and cycle calculation.
> Return error if LPM add/delete fails in multi-writer test.
> Return error if single or multi writer test fails
> Remove redundant error checking for readers.
> Combine single and multi threaded test cases to avoid code duplication.

Series applied, thanks Dharmik.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 52+ messages in thread

end of thread, other threads:[~2020-11-05 15:59 UTC | newest]

Thread overview: 52+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-29 15:36 [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
2020-10-29 15:36 ` [dpdk-dev] [PATCH 2/4] test/lpm: return error on failure " Dharmik Thakkar
2020-10-29 15:36 ` [dpdk-dev] [PATCH 3/4] test/lpm: remove error checking " Dharmik Thakkar
2020-10-29 15:36 ` [dpdk-dev] [PATCH 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
2020-11-02 17:17   ` Medvedkin, Vladimir
2020-11-02 22:11     ` Dharmik Thakkar
2020-11-02 10:08 ` [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation " David Marchand
2020-11-02 15:11 ` Bruce Richardson
2020-11-02 16:58   ` Dharmik Thakkar
2020-11-02 17:21     ` Medvedkin, Vladimir
2020-11-02 17:33     ` Bruce Richardson
2020-11-02 23:51 ` [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
2020-11-03  1:30     ` Honnappa Nagarahalli
2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 2/4] test/lpm: return error on failure " Dharmik Thakkar
2020-11-03  1:28     ` Honnappa Nagarahalli
2020-11-03  4:42       ` Dharmik Thakkar
2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 3/4] test/lpm: remove error checking " Dharmik Thakkar
2020-11-03  1:21     ` Honnappa Nagarahalli
2020-11-03  4:56       ` Dharmik Thakkar
2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
2020-11-03  4:21     ` Honnappa Nagarahalli
2020-11-03  4:33       ` Dharmik Thakkar
2020-11-03  5:32         ` Honnappa Nagarahalli
2020-11-03 14:03           ` Dharmik Thakkar
2020-11-03 14:51             ` Honnappa Nagarahalli
2020-11-03 18:01             ` Medvedkin, Vladimir
2020-11-03  5:12   ` [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 2/4] test/lpm: return error on failure " Dharmik Thakkar
2020-11-03  5:21       ` Honnappa Nagarahalli
2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 3/4] test/lpm: remove error checking " Dharmik Thakkar
2020-11-03  5:22       ` Honnappa Nagarahalli
2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
2020-11-03 22:23     ` [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 2/4] test/lpm: return error on failure " Dharmik Thakkar
2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 3/4] test/lpm: remove error checking " Dharmik Thakkar
2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
2020-11-03 22:35         ` Honnappa Nagarahalli
2020-11-04 15:46         ` Medvedkin, Vladimir
2020-11-04 16:49           ` Dharmik Thakkar
2020-11-04 18:58       ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
2020-11-04 19:34           ` Medvedkin, Vladimir
2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 2/4] test/lpm: return error on failure " Dharmik Thakkar
2020-11-04 19:35           ` Medvedkin, Vladimir
2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 3/4] test/lpm: remove error checking " Dharmik Thakkar
2020-11-04 19:35           ` Medvedkin, Vladimir
2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
2020-11-04 19:35           ` Medvedkin, Vladimir
2020-11-05 15:58         ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test David Marchand

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.