All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dharmik Thakkar <dharmik.thakkar@arm.com>
To: John McNamara <john.mcnamara@intel.com>
Cc: dev@dpdk.org, nd@arm.com, honnappa.nagarahalli@arm.com,
	ruifeng.wang@arm.com, joyce.kong@arm.com,
	dharmik.thakkar@arm.com
Subject: [dpdk-dev] [PATCH v3 4/8] examples/performance-thread: use compiler atomics for sync
Date: Wed, 13 Oct 2021 13:54:03 -0500	[thread overview]
Message-ID: <20211013185407.2841183-5-dharmik.thakkar@arm.com> (raw)
In-Reply-To: <20211013185407.2841183-1-dharmik.thakkar@arm.com>

From: Joyce Kong <joyce.kong@arm.com>

Convert rte_atomic usages to compiler atomic built-ins
for thread sync.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 examples/performance-thread/common/lthread.c  | 10 +++---
 .../performance-thread/common/lthread_diag.h  | 10 +++---
 .../performance-thread/common/lthread_int.h   |  1 -
 .../performance-thread/common/lthread_mutex.c | 26 +++++++-------
 .../performance-thread/common/lthread_mutex.h |  2 +-
 .../performance-thread/common/lthread_sched.c | 34 ++++++++-----------
 .../performance-thread/common/lthread_tls.c   |  5 +--
 .../performance-thread/l3fwd-thread/main.c    | 22 +++++-------
 8 files changed, 53 insertions(+), 57 deletions(-)

diff --git a/examples/performance-thread/common/lthread.c b/examples/performance-thread/common/lthread.c
index 3f1f48db433e..98123f34f840 100644
--- a/examples/performance-thread/common/lthread.c
+++ b/examples/performance-thread/common/lthread.c
@@ -357,9 +357,10 @@ void lthread_exit(void *ptr)
 	 *  - if exit before join then we suspend and resume on join
 	 *  - if join before exit then we resume the joining thread
 	 */
+	uint64_t join_initial = LT_JOIN_INITIAL;
 	if ((lt->join == LT_JOIN_INITIAL)
-	    && rte_atomic64_cmpset(&lt->join, LT_JOIN_INITIAL,
-				   LT_JOIN_EXITING)) {
+	    && __atomic_compare_exchange_n(&lt->join, &join_initial,
+		LT_JOIN_EXITING, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
 
 		DIAG_EVENT(lt, LT_DIAG_LTHREAD_EXIT, 1, 0);
 		_suspend();
@@ -415,9 +416,10 @@ int lthread_join(struct lthread *lt, void **ptr)
 	 *  - if join before exit we suspend and will resume when exit is called
 	 *  - if exit before join we resume the exiting thread
 	 */
+	uint64_t join_initial = LT_JOIN_INITIAL;
 	if ((lt->join == LT_JOIN_INITIAL)
-	    && rte_atomic64_cmpset(&lt->join, LT_JOIN_INITIAL,
-				   LT_JOIN_THREAD_SET)) {
+	    && __atomic_compare_exchange_n(&lt->join, &join_initial,
+		LT_JOIN_THREAD_SET, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
 
 		DIAG_EVENT(current, LT_DIAG_LTHREAD_JOIN, lt, 1);
 		_suspend();
diff --git a/examples/performance-thread/common/lthread_diag.h b/examples/performance-thread/common/lthread_diag.h
index e876dda6da3a..7ee89eef388d 100644
--- a/examples/performance-thread/common/lthread_diag.h
+++ b/examples/performance-thread/common/lthread_diag.h
@@ -78,11 +78,11 @@ extern uint64_t diag_mask;
 	}								\
 } while (0)
 
-#define DIAG_COUNT_DEFINE(x) rte_atomic64_t count_##x
-#define DIAG_COUNT_INIT(o, x) rte_atomic64_init(&((o)->count_##x))
-#define DIAG_COUNT_INC(o, x) rte_atomic64_inc(&((o)->count_##x))
-#define DIAG_COUNT_DEC(o, x) rte_atomic64_dec(&((o)->count_##x))
-#define DIAG_COUNT(o, x) rte_atomic64_read(&((o)->count_##x))
+#define DIAG_COUNT_DEFINE(x) uint64_t count_##x
+#define DIAG_COUNT_INIT(o, x) __atomic_store_n(&((o)->count_##x), 0, __ATOMIC_RELAXED)
+#define DIAG_COUNT_INC(o, x) __atomic_fetch_add(&((o)->count_##x), 1, __ATOMIC_RELAXED)
+#define DIAG_COUNT_DEC(o, x) __atomic_fetch_sub(&((o)->count_##x), 1, __ATOMIC_RELAXED)
+#define DIAG_COUNT(o, x) __atomic_load_n(&((o)->count_##x), __ATOMIC_RELAXED)
 
 #define DIAG_USED
 
diff --git a/examples/performance-thread/common/lthread_int.h b/examples/performance-thread/common/lthread_int.h
index a352f13b7568..d010126f1681 100644
--- a/examples/performance-thread/common/lthread_int.h
+++ b/examples/performance-thread/common/lthread_int.h
@@ -21,7 +21,6 @@ extern "C" {
 #include <rte_cycles.h>
 #include <rte_per_lcore.h>
 #include <rte_timer.h>
-#include <rte_atomic_64.h>
 #include <rte_spinlock.h>
 #include <ctx.h>
 
diff --git a/examples/performance-thread/common/lthread_mutex.c b/examples/performance-thread/common/lthread_mutex.c
index 01da6cad4f61..061fc5c19a6b 100644
--- a/examples/performance-thread/common/lthread_mutex.c
+++ b/examples/performance-thread/common/lthread_mutex.c
@@ -60,7 +60,7 @@ lthread_mutex_init(char *name, struct lthread_mutex **mutex,
 	m->root_sched = THIS_SCHED;
 	m->owner = NULL;
 
-	rte_atomic64_init(&m->count);
+	__atomic_store_n(&m->count, 0, __ATOMIC_RELAXED);
 
 	DIAG_CREATE_EVENT(m, LT_DIAG_MUTEX_CREATE);
 	/* success */
@@ -115,10 +115,11 @@ int lthread_mutex_lock(struct lthread_mutex *m)
 	}
 
 	for (;;) {
-		rte_atomic64_inc(&m->count);
+		__atomic_fetch_add(&m->count, 1, __ATOMIC_RELAXED);
 		do {
-			if (rte_atomic64_cmpset
-			    ((uint64_t *) &m->owner, 0, (uint64_t) lt)) {
+			uint64_t lt_init = 0;
+			if (__atomic_compare_exchange_n((uint64_t *) &m->owner, &lt_init,
+				(uint64_t) lt, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
 				/* happy days, we got the lock */
 				DIAG_EVENT(m, LT_DIAG_MUTEX_LOCK, m, 0);
 				return 0;
@@ -126,7 +127,7 @@ int lthread_mutex_lock(struct lthread_mutex *m)
 			/* spin due to race with unlock when
 			* nothing was blocked
 			*/
-		} while ((rte_atomic64_read(&m->count) == 1) &&
+		} while ((__atomic_load_n(&m->count, __ATOMIC_RELAXED) == 1) &&
 				(m->owner == NULL));
 
 		/* queue the current thread in the blocked queue
@@ -160,16 +161,17 @@ int lthread_mutex_trylock(struct lthread_mutex *m)
 		return POSIX_ERRNO(EDEADLK);
 	}
 
-	rte_atomic64_inc(&m->count);
-	if (rte_atomic64_cmpset
-	    ((uint64_t *) &m->owner, (uint64_t) NULL, (uint64_t) lt)) {
+	__atomic_fetch_add(&m->count, 1, __ATOMIC_RELAXED);
+	uint64_t lt_init = 0;
+	if (__atomic_compare_exchange_n((uint64_t *) &m->owner, &lt_init,
+		(uint64_t) lt, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
 		/* got the lock */
 		DIAG_EVENT(m, LT_DIAG_MUTEX_TRYLOCK, m, 0);
 		return 0;
 	}
 
 	/* failed so return busy */
-	rte_atomic64_dec(&m->count);
+	__atomic_fetch_sub(&m->count, 1, __ATOMIC_RELAXED);
 	DIAG_EVENT(m, LT_DIAG_MUTEX_TRYLOCK, m, POSIX_ERRNO(EBUSY));
 	return POSIX_ERRNO(EBUSY);
 }
@@ -193,13 +195,13 @@ int lthread_mutex_unlock(struct lthread_mutex *m)
 		return POSIX_ERRNO(EPERM);
 	}
 
-	rte_atomic64_dec(&m->count);
+	__atomic_fetch_sub(&m->count, 1, __ATOMIC_RELAXED);
 	/* if there are blocked threads then make one ready */
-	while (rte_atomic64_read(&m->count) > 0) {
+	while (__atomic_load_n(&m->count, __ATOMIC_RELAXED) > 0) {
 		unblocked = _lthread_queue_remove(m->blocked);
 
 		if (unblocked != NULL) {
-			rte_atomic64_dec(&m->count);
+			__atomic_fetch_sub(&m->count, 1, __ATOMIC_RELAXED);
 			DIAG_EVENT(m, LT_DIAG_MUTEX_UNLOCKED, m, unblocked);
 			RTE_ASSERT(unblocked->sched != NULL);
 			_ready_queue_insert((struct lthread_sched *)
diff --git a/examples/performance-thread/common/lthread_mutex.h b/examples/performance-thread/common/lthread_mutex.h
index cd866f87b889..730092bdf8bb 100644
--- a/examples/performance-thread/common/lthread_mutex.h
+++ b/examples/performance-thread/common/lthread_mutex.h
@@ -17,7 +17,7 @@ extern "C" {
 
 struct lthread_mutex {
 	struct lthread *owner;
-	rte_atomic64_t	count;
+	uint64_t count;
 	struct lthread_queue *blocked __rte_cache_aligned;
 	struct lthread_sched *root_sched;
 	char			name[MAX_MUTEX_NAME_SIZE];
diff --git a/examples/performance-thread/common/lthread_sched.c b/examples/performance-thread/common/lthread_sched.c
index 38ca0c45cbf8..3784b010c221 100644
--- a/examples/performance-thread/common/lthread_sched.c
+++ b/examples/performance-thread/common/lthread_sched.c
@@ -22,8 +22,6 @@
 
 #include <rte_prefetch.h>
 #include <rte_per_lcore.h>
-#include <rte_atomic.h>
-#include <rte_atomic_64.h>
 #include <rte_log.h>
 #include <rte_common.h>
 #include <rte_branch_prediction.h>
@@ -47,8 +45,8 @@
  * When a scheduler shuts down it is assumed that the application is terminating
  */
 
-static rte_atomic16_t num_schedulers;
-static rte_atomic16_t active_schedulers;
+static uint16_t num_schedulers;
+static uint16_t active_schedulers;
 
 /* one scheduler per lcore */
 RTE_DEFINE_PER_LCORE(struct lthread_sched *, this_sched) = NULL;
@@ -64,10 +62,8 @@ uint64_t diag_mask;
 RTE_INIT(lthread_sched_ctor)
 {
 	memset(schedcore, 0, sizeof(schedcore));
-	rte_atomic16_init(&num_schedulers);
-	rte_atomic16_set(&num_schedulers, 1);
-	rte_atomic16_init(&active_schedulers);
-	rte_atomic16_set(&active_schedulers, 0);
+	__atomic_store_n(&num_schedulers, 1, __ATOMIC_RELAXED);
+	__atomic_store_n(&active_schedulers, 0, __ATOMIC_RELAXED);
 	diag_cb = NULL;
 }
 
@@ -260,8 +256,8 @@ struct lthread_sched *_lthread_sched_create(size_t stack_size)
  */
 int lthread_num_schedulers_set(int num)
 {
-	rte_atomic16_set(&num_schedulers, num);
-	return (int)rte_atomic16_read(&num_schedulers);
+	__atomic_store_n(&num_schedulers, num, __ATOMIC_RELAXED);
+	return (int)__atomic_load_n(&num_schedulers, __ATOMIC_RELAXED);
 }
 
 /*
@@ -269,7 +265,7 @@ int lthread_num_schedulers_set(int num)
  */
 int lthread_active_schedulers(void)
 {
-	return (int)rte_atomic16_read(&active_schedulers);
+	return (int)__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED);
 }
 
 
@@ -299,8 +295,8 @@ void lthread_scheduler_shutdown_all(void)
 	 * for the possibility of a pthread wrapper on lthread_yield(),
 	 * something that is not possible unless the scheduler is running.
 	 */
-	while (rte_atomic16_read(&active_schedulers) <
-	       rte_atomic16_read(&num_schedulers))
+	while (__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED) <
+	       __atomic_load_n(&num_schedulers, __ATOMIC_RELAXED))
 		sched_yield();
 
 	for (i = 0; i < LTHREAD_MAX_LCORES; i++) {
@@ -415,15 +411,15 @@ static inline int _lthread_sched_isdone(struct lthread_sched *sched)
  */
 static inline void _lthread_schedulers_sync_start(void)
 {
-	rte_atomic16_inc(&active_schedulers);
+	__atomic_fetch_add(&active_schedulers, 1, __ATOMIC_RELAXED);
 
 	/* wait for lthread schedulers
 	 * Note we use sched_yield() rather than pthread_yield() to allow
 	 * for the possibility of a pthread wrapper on lthread_yield(),
 	 * something that is not possible unless the scheduler is running.
 	 */
-	while (rte_atomic16_read(&active_schedulers) <
-	       rte_atomic16_read(&num_schedulers))
+	while (__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED) <
+	       __atomic_load_n(&num_schedulers, __ATOMIC_RELAXED))
 		sched_yield();
 
 }
@@ -433,15 +429,15 @@ static inline void _lthread_schedulers_sync_start(void)
  */
 static inline void _lthread_schedulers_sync_stop(void)
 {
-	rte_atomic16_dec(&active_schedulers);
-	rte_atomic16_dec(&num_schedulers);
+	__atomic_fetch_sub(&active_schedulers, 1, __ATOMIC_RELAXED);
+	__atomic_fetch_sub(&num_schedulers, 1, __ATOMIC_RELAXED);
 
 	/* wait for schedulers
 	 * Note we use sched_yield() rather than pthread_yield() to allow
 	 * for the possibility of a pthread wrapper on lthread_yield(),
 	 * something that is not possible unless the scheduler is running.
 	 */
-	while (rte_atomic16_read(&active_schedulers) > 0)
+	while (__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED) > 0)
 		sched_yield();
 
 }
diff --git a/examples/performance-thread/common/lthread_tls.c b/examples/performance-thread/common/lthread_tls.c
index 07de6cafabf5..4ab2e3558b1c 100644
--- a/examples/performance-thread/common/lthread_tls.c
+++ b/examples/performance-thread/common/lthread_tls.c
@@ -18,7 +18,6 @@
 #include <rte_malloc.h>
 #include <rte_log.h>
 #include <rte_ring.h>
-#include <rte_atomic_64.h>
 
 #include "lthread_tls.h"
 #include "lthread_queue.h"
@@ -52,8 +51,10 @@ void _lthread_key_pool_init(void)
 
 	bzero(key_table, sizeof(key_table));
 
+	uint64_t pool_init = 0;
 	/* only one lcore should do this */
-	if (rte_atomic64_cmpset(&key_pool_init, 0, 1)) {
+	if (__atomic_compare_exchange_n(&key_pool_init, &pool_init, 1, 0,
+			__ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
 
 		snprintf(name,
 			MAX_LTHREAD_NAME_SIZE,
diff --git a/examples/performance-thread/l3fwd-thread/main.c b/examples/performance-thread/l3fwd-thread/main.c
index 2905199743a7..50ecc4e820f6 100644
--- a/examples/performance-thread/l3fwd-thread/main.c
+++ b/examples/performance-thread/l3fwd-thread/main.c
@@ -26,7 +26,6 @@
 #include <rte_memcpy.h>
 #include <rte_eal.h>
 #include <rte_launch.h>
-#include <rte_atomic.h>
 #include <rte_cycles.h>
 #include <rte_prefetch.h>
 #include <rte_lcore.h>
@@ -570,8 +569,8 @@ RTE_DEFINE_PER_LCORE(struct lcore_conf *, lcore_conf);
  */
 static int lthreads_on = 1; /**< Use lthreads for processing*/
 
-rte_atomic16_t rx_counter;  /**< Number of spawned rx threads */
-rte_atomic16_t tx_counter;  /**< Number of spawned tx threads */
+uint16_t rx_counter;  /**< Number of spawned rx threads */
+uint16_t tx_counter;  /**< Number of spawned tx threads */
 
 struct thread_conf {
 	uint16_t lcore_id;      /**< Initial lcore for rx thread */
@@ -1910,11 +1909,8 @@ cpu_load_collector(__rte_unused void *arg) {
 	printf("Waiting for %d rx threads and %d tx threads\n", n_rx_thread,
 			n_tx_thread);
 
-	while (rte_atomic16_read(&rx_counter) < n_rx_thread)
-		rte_pause();
-
-	while (rte_atomic16_read(&tx_counter) < n_tx_thread)
-		rte_pause();
+	rte_wait_until_equal_16(&rx_counter, n_rx_thread, __ATOMIC_RELAXED);
+	rte_wait_until_equal_16(&tx_counter, n_tx_thread, __ATOMIC_RELAXED);
 
 	for (i = 0; i < n_rx_thread; i++) {
 
@@ -2036,7 +2032,7 @@ lthread_tx_per_ring(void *dummy)
 	RTE_LOG(INFO, L3FWD, "entering main tx loop on lcore %u\n", rte_lcore_id());
 
 	nb_rx = 0;
-	rte_atomic16_inc(&tx_counter);
+	__atomic_fetch_add(&tx_counter, 1, __ATOMIC_RELAXED);
 	while (1) {
 
 		/*
@@ -2161,7 +2157,7 @@ lthread_rx(void *dummy)
 	worker_id = 0;
 
 	rx_conf->conf.cpu_id = sched_getcpu();
-	rte_atomic16_inc(&rx_counter);
+	__atomic_fetch_add(&rx_counter, 1, __ATOMIC_RELAXED);
 	while (1) {
 
 		/*
@@ -2243,7 +2239,7 @@ lthread_spawner(__rte_unused void *arg)
 	 * scheduler as this lthread, yielding is required to let them to run and
 	 * prevent deadlock here.
 	 */
-	while (rte_atomic16_read(&rx_counter) < n_rx_thread)
+	while (__atomic_load_n(&rx_counter, __ATOMIC_RELAXED) < n_rx_thread)
 		lthread_sleep(100000);
 
 	/*
@@ -2323,7 +2319,7 @@ pthread_tx(void *dummy)
 	RTE_LOG(INFO, L3FWD, "Entering main Tx loop on lcore %u\n", rte_lcore_id());
 
 	tx_conf->conf.cpu_id = sched_getcpu();
-	rte_atomic16_inc(&tx_counter);
+	__atomic_fetch_add(&tx_counter, 1, __ATOMIC_RELAXED);
 	while (1) {
 
 		cur_tsc = rte_rdtsc();
@@ -2406,7 +2402,7 @@ pthread_rx(void *dummy)
 
 	worker_id = 0;
 	rx_conf->conf.cpu_id = sched_getcpu();
-	rte_atomic16_inc(&rx_counter);
+	__atomic_fetch_add(&rx_counter, 1, __ATOMIC_RELAXED);
 	while (1) {
 
 		/*
-- 
2.25.1


  parent reply	other threads:[~2021-10-13 18:54 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-23  5:49 [dpdk-dev] [PATCH v2 0/8] use compiler atomic builtins for examples Joyce Kong
2021-08-23  5:49 ` [dpdk-dev] [PATCH v2 1/8] examples/bbdev_app: use compiler atomics for flag sync Joyce Kong
2021-08-23  5:49 ` [dpdk-dev] [PATCH v2 2/8] examples/multi_process: use compiler atomics for sync Joyce Kong
2021-08-23  5:49 ` [dpdk-dev] [PATCH v2 3/8] examples/kni: use compiler atomics for status sync Joyce Kong
2021-08-23  5:49 ` [dpdk-dev] [PATCH v2 4/8] examples/performance-thread: use compiler atomics for sync Joyce Kong
2021-08-23  5:49 ` [dpdk-dev] [PATCH v2 5/8] examples/l2fwd-jobstats: use compiler atomics for stats sync Joyce Kong
2021-08-23  5:49 ` [dpdk-dev] [PATCH v2 6/8] examples/vm_power_manager: use compiler atomics for sync Joyce Kong
2021-08-23  5:49 ` [dpdk-dev] [PATCH v2 7/8] examples/server_node_efd: " Joyce Kong
2021-08-23  5:49 ` [dpdk-dev] [PATCH v2 8/8] examples: remove unnecessary include of atomic Joyce Kong
2021-08-23 11:29   ` Xia, Chenbo
2021-08-24  2:30     ` Joyce Kong
2021-10-13 18:53 ` [dpdk-dev] [PATCH v3 0/8] use compiler atomic builtins for examples Dharmik Thakkar
2021-10-13 18:54   ` [dpdk-dev] [PATCH v3 1/8] examples/bbdev_app: use compiler atomics for flag sync Dharmik Thakkar
2021-10-13 18:54   ` [dpdk-dev] [PATCH v3 2/8] examples/multi_process: use compiler atomics for sync Dharmik Thakkar
2021-10-13 18:54   ` [dpdk-dev] [PATCH v3 3/8] examples/kni: use compiler atomics for status sync Dharmik Thakkar
2021-10-13 18:54   ` Dharmik Thakkar [this message]
2021-10-13 18:54   ` [dpdk-dev] [PATCH v3 5/8] examples/l2fwd-jobstats: use compiler atomics for stats sync Dharmik Thakkar
2021-10-13 18:54   ` [dpdk-dev] [PATCH v3 6/8] examples/vm_power_manager: use compiler atomics for sync Dharmik Thakkar
2021-10-13 18:54   ` [dpdk-dev] [PATCH v3 7/8] examples/server_node_efd: " Dharmik Thakkar
2021-10-13 18:54   ` [dpdk-dev] [PATCH v3 8/8] examples: remove unnecessary include of atomic Dharmik Thakkar
2021-10-15 23:30     ` Dharmik Thakkar
2021-10-19 15:12       ` David Marchand
2021-10-19 15:12   ` [dpdk-dev] [PATCH v3 0/8] use compiler atomic builtins for examples David Marchand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211013185407.2841183-5-dharmik.thakkar@arm.com \
    --to=dharmik.thakkar@arm.com \
    --cc=dev@dpdk.org \
    --cc=honnappa.nagarahalli@arm.com \
    --cc=john.mcnamara@intel.com \
    --cc=joyce.kong@arm.com \
    --cc=nd@arm.com \
    --cc=ruifeng.wang@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.