DPDK-dev Archive on lore.kernel.org
 help / color / Atom feed
* [dpdk-dev] [PATCH 0/7] Register external threads as lcore
@ 2020-06-10 14:44 David Marchand
  2020-06-10 14:45 ` [dpdk-dev] [PATCH 1/7] eal: relocate per thread symbols to common David Marchand
                   ` (12 more replies)
  0 siblings, 13 replies; 126+ messages in thread
From: David Marchand @ 2020-06-10 14:44 UTC (permalink / raw)
  To: dev

OVS and some other applications have been hacking into DPDK internals to
fake EAL threads and avoid performance penalty of only having non-EAL
threads.

This series proposes to add a new type of lcores and maps those external
threads to such lcores.
Those threads won't run the DPDK eal mainloop and as a consequence part of
the EAL threads API cannot work.

Having new lcores appearing during the process lifetime is not expected
by some DPDK components. This is addressed by notifying of such lcore
hotplug.

This patchset has still some more work (like refusing new lcore type in
incompatible EAL threads API, updating the documentation and adding unit
tests) but I am sending it anyway as I would like to get this in for
20.08.

For the interested parties, I have a patch [1] against dpdk-latest OVS
branch that makes use of this series.

1: https://github.com/david-marchand/ovs/commit/dpdk-latest-external-lcores

-- 
David Marchand

David Marchand (7):
  eal: relocate per thread symbols to common
  eal: fix multiple definition of per lcore thread id
  eal: introduce thread init helper
  eal: introduce thread uninit helper
  eal: register non-EAL threads as lcore
  eal: dump lcores
  eal: add lcore hotplug notifications

 doc/guides/howto/debug_troubleshoot.rst   |   5 +-
 lib/librte_eal/common/eal_common_lcore.c  | 167 +++++++++++++++++++++-
 lib/librte_eal/common/eal_common_thread.c | 143 ++++++++++++++----
 lib/librte_eal/common/eal_common_trace.c  |  49 ++++++-
 lib/librte_eal/common/eal_private.h       |  43 ++++++
 lib/librte_eal/common/eal_thread.h        |  26 +++-
 lib/librte_eal/common/eal_trace.h         |   1 +
 lib/librte_eal/freebsd/eal.c              |  16 ++-
 lib/librte_eal/freebsd/eal_thread.c       |  38 +----
 lib/librte_eal/include/rte_eal.h          |  12 +-
 lib/librte_eal/include/rte_lcore.h        | 112 +++++++++++++--
 lib/librte_eal/linux/eal.c                |  17 ++-
 lib/librte_eal/linux/eal_thread.c         |  38 +----
 lib/librte_eal/rte_eal_version.map        |  14 ++
 lib/librte_eal/windows/eal.c              |   3 +-
 lib/librte_eal/windows/eal_thread.c       |  14 +-
 16 files changed, 536 insertions(+), 162 deletions(-)

-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH 1/7] eal: relocate per thread symbols to common
  2020-06-10 14:44 [dpdk-dev] [PATCH 0/7] Register external threads as lcore David Marchand
@ 2020-06-10 14:45 ` David Marchand
  2020-06-10 14:45 ` [dpdk-dev] [PATCH 2/7] eal: fix multiple definition of per lcore thread id David Marchand
                   ` (11 subsequent siblings)
  12 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-06-10 14:45 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Harini Ramakrishnan, Omar Cardona,
	Pallavi Kadam, Ranjit Menon

We have per lcore thread symbols scattered in OS implementations but
common code relies on them.
Move all of them in common.

RTE_PER_LCORE(_socket_id) and RTE_PER_LCORE(_cpuset) have public
accessors and are not exported through the library map, they can be
made static.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 lib/librte_eal/common/eal_common_thread.c | 5 ++++-
 lib/librte_eal/freebsd/eal_thread.c       | 4 ----
 lib/librte_eal/include/rte_lcore.h        | 1 -
 lib/librte_eal/linux/eal_thread.c         | 4 ----
 lib/librte_eal/windows/eal_thread.c       | 4 ----
 5 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index f9f588c173..25200e5a99 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -23,7 +23,10 @@
 #include "eal_private.h"
 #include "eal_thread.h"
 
-RTE_DECLARE_PER_LCORE(unsigned , _socket_id);
+RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
+static RTE_DEFINE_PER_LCORE(unsigned int, _socket_id) =
+	(unsigned int)SOCKET_ID_ANY;
+static RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
 
 unsigned rte_socket_id(void)
 {
diff --git a/lib/librte_eal/freebsd/eal_thread.c b/lib/librte_eal/freebsd/eal_thread.c
index b52019782a..40676d9ef5 100644
--- a/lib/librte_eal/freebsd/eal_thread.c
+++ b/lib/librte_eal/freebsd/eal_thread.c
@@ -25,10 +25,6 @@
 #include "eal_private.h"
 #include "eal_thread.h"
 
-RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
-RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY;
-RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
-
 /*
  * Send a message to a slave lcore identified by slave_id to call a
  * function f with argument arg. Once the execution is done, the
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index 339046bc86..5c1d1926e9 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -23,7 +23,6 @@ extern "C" {
 #define LCORE_ID_ANY     UINT32_MAX       /**< Any lcore. */
 
 RTE_DECLARE_PER_LCORE(unsigned, _lcore_id);  /**< Per thread "lcore id". */
-RTE_DECLARE_PER_LCORE(rte_cpuset_t, _cpuset); /**< Per thread "cpuset". */
 
 /**
  * Get a lcore's role.
diff --git a/lib/librte_eal/linux/eal_thread.c b/lib/librte_eal/linux/eal_thread.c
index cd9d6e0ebf..a52ebef3a4 100644
--- a/lib/librte_eal/linux/eal_thread.c
+++ b/lib/librte_eal/linux/eal_thread.c
@@ -25,10 +25,6 @@
 #include "eal_private.h"
 #include "eal_thread.h"
 
-RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
-RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY;
-RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
-
 /*
  * Send a message to a slave lcore identified by slave_id to call a
  * function f with argument arg. Once the execution is done, the
diff --git a/lib/librte_eal/windows/eal_thread.c b/lib/librte_eal/windows/eal_thread.c
index e149199a6f..fc245ceb05 100644
--- a/lib/librte_eal/windows/eal_thread.c
+++ b/lib/librte_eal/windows/eal_thread.c
@@ -16,10 +16,6 @@
 #include "eal_private.h"
 #include "eal_windows.h"
 
-RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
-RTE_DEFINE_PER_LCORE(unsigned int, _socket_id) = (unsigned int)SOCKET_ID_ANY;
-RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
-
 /*
  * Send a message to a slave lcore identified by slave_id to call a
  * function f with argument arg. Once the execution is done, the
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH 2/7] eal: fix multiple definition of per lcore thread id
  2020-06-10 14:44 [dpdk-dev] [PATCH 0/7] Register external threads as lcore David Marchand
  2020-06-10 14:45 ` [dpdk-dev] [PATCH 1/7] eal: relocate per thread symbols to common David Marchand
@ 2020-06-10 14:45 ` David Marchand
  2020-06-15  6:46   ` Kinsella, Ray
  2020-06-10 14:45 ` [dpdk-dev] [PATCH 3/7] eal: introduce thread init helper David Marchand
                   ` (10 subsequent siblings)
  12 siblings, 1 reply; 126+ messages in thread
From: David Marchand @ 2020-06-10 14:45 UTC (permalink / raw)
  To: dev
  Cc: Ray Kinsella, Neil Horman, Cunming Liang, Konstantin Ananyev,
	Olivier Matz

Because of the inline accessor + static declaration in rte_gettid(),
we end up with multiple symbols for RTE_PER_LCORE(_thread_id).
Each compilation unit will pay a cost when accessing this information
for the first time.

$ nm build/app/dpdk-testpmd | grep per_lcore__thread_id
0000000000000054 d per_lcore__thread_id.5037
0000000000000040 d per_lcore__thread_id.5103
0000000000000048 d per_lcore__thread_id.5259
000000000000004c d per_lcore__thread_id.5259
0000000000000044 d per_lcore__thread_id.5933
0000000000000058 d per_lcore__thread_id.6261
0000000000000050 d per_lcore__thread_id.7378
000000000000005c d per_lcore__thread_id.7496
000000000000000c d per_lcore__thread_id.8016
0000000000000010 d per_lcore__thread_id.8431

Make it global as part of the DPDK_21 stable ABI.

Fixes: ef76436c6834 ("eal: get unique thread id")

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 lib/librte_eal/common/eal_common_thread.c | 1 +
 lib/librte_eal/include/rte_eal.h          | 3 ++-
 lib/librte_eal/rte_eal_version.map        | 7 +++++++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 25200e5a99..f04d880880 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -24,6 +24,7 @@
 #include "eal_thread.h"
 
 RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
+RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
 static RTE_DEFINE_PER_LCORE(unsigned int, _socket_id) =
 	(unsigned int)SOCKET_ID_ANY;
 static RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
index 2f9ed298de..2edf8c6556 100644
--- a/lib/librte_eal/include/rte_eal.h
+++ b/lib/librte_eal/include/rte_eal.h
@@ -447,6 +447,8 @@ enum rte_intr_mode rte_eal_vfio_intr_mode(void);
  */
 int rte_sys_gettid(void);
 
+RTE_DECLARE_PER_LCORE(int, _thread_id);
+
 /**
  * Get system unique thread id.
  *
@@ -456,7 +458,6 @@ int rte_sys_gettid(void);
  */
 static inline int rte_gettid(void)
 {
-	static RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
 	if (RTE_PER_LCORE(_thread_id) == -1)
 		RTE_PER_LCORE(_thread_id) = rte_sys_gettid();
 	return RTE_PER_LCORE(_thread_id);
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index d8038749a4..fdfc3f1a88 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -221,6 +221,13 @@ DPDK_20.0 {
 	local: *;
 };
 
+DPDK_21 {
+	global:
+
+	per_lcore__thread_id;
+
+} DPDK_20.0;
+
 EXPERIMENTAL {
 	global:
 
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH 3/7] eal: introduce thread init helper
  2020-06-10 14:44 [dpdk-dev] [PATCH 0/7] Register external threads as lcore David Marchand
  2020-06-10 14:45 ` [dpdk-dev] [PATCH 1/7] eal: relocate per thread symbols to common David Marchand
  2020-06-10 14:45 ` [dpdk-dev] [PATCH 2/7] eal: fix multiple definition of per lcore thread id David Marchand
@ 2020-06-10 14:45 ` David Marchand
  2020-06-10 14:45 ` [dpdk-dev] [PATCH 4/7] eal: introduce thread uninit helper David Marchand
                   ` (9 subsequent siblings)
  12 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-06-10 14:45 UTC (permalink / raw)
  To: dev
  Cc: Bruce Richardson, Harini Ramakrishnan, Omar Cardona,
	Pallavi Kadam, Ranjit Menon

Introduce a helper responsible for initialising the per thread context.
We can then have a unified context for EAL and non-EAL threads and
remove copy/paste'd OS-specific helpers.

Per EAL thread CPU affinity setting is separated from the thread init.
It is to accommodate with Windows EAL where CPU affinity is not set at
the moment.
Besides, having affinity set by the master lcore in FreeBSD and Linux
will make it possible to detect errors rather than panic in the child
thread. But the cleanup when such an event happens is left for later.

Document Windows limitation wrt recursive locks not working because of
lack of gettid().

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 lib/librte_eal/common/eal_common_thread.c | 58 ++++++++++++++---------
 lib/librte_eal/common/eal_thread.h        |  8 ++--
 lib/librte_eal/freebsd/eal.c              | 14 +++++-
 lib/librte_eal/freebsd/eal_thread.c       | 32 +------------
 lib/librte_eal/linux/eal.c                | 15 +++++-
 lib/librte_eal/linux/eal_thread.c         | 32 +------------
 lib/librte_eal/windows/eal.c              |  3 +-
 lib/librte_eal/windows/eal_thread.c       | 10 +---
 8 files changed, 72 insertions(+), 100 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index f04d880880..fd5c41a2af 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -73,20 +73,10 @@ eal_cpuset_socket_id(rte_cpuset_t *cpusetp)
 	return socket_id;
 }
 
-int
-rte_thread_set_affinity(rte_cpuset_t *cpusetp)
+static void
+thread_update_affinity(rte_cpuset_t *cpusetp)
 {
-	int s;
-	unsigned lcore_id;
-	pthread_t tid;
-
-	tid = pthread_self();
-
-	s = pthread_setaffinity_np(tid, sizeof(rte_cpuset_t), cpusetp);
-	if (s != 0) {
-		RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
-		return -1;
-	}
+	unsigned int lcore_id = rte_lcore_id();
 
 	/* store socket_id in TLS for quick access */
 	RTE_PER_LCORE(_socket_id) =
@@ -96,14 +86,24 @@ rte_thread_set_affinity(rte_cpuset_t *cpusetp)
 	memmove(&RTE_PER_LCORE(_cpuset), cpusetp,
 		sizeof(rte_cpuset_t));
 
-	lcore_id = rte_lcore_id();
 	if (lcore_id != (unsigned)LCORE_ID_ANY) {
 		/* EAL thread will update lcore_config */
 		lcore_config[lcore_id].socket_id = RTE_PER_LCORE(_socket_id);
 		memmove(&lcore_config[lcore_id].cpuset, cpusetp,
 			sizeof(rte_cpuset_t));
 	}
+}
+
+int
+rte_thread_set_affinity(rte_cpuset_t *cpusetp)
+{
+	if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+			cpusetp) != 0) {
+		RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
+		return -1;
+	}
 
+	thread_update_affinity(cpusetp);
 	return 0;
 }
 
@@ -149,6 +149,25 @@ eal_thread_dump_affinity(char *str, unsigned size)
 	return ret;
 }
 
+void
+rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset)
+{
+	/* set the lcore ID in per-lcore memory area */
+	RTE_PER_LCORE(_lcore_id) = lcore_id;
+
+#ifndef RTE_EXEC_ENV_WINDOWS
+	/* acquire system unique id  */
+	rte_gettid();
+#else
+	/* FIXME: gettid unimplemented => recursive locks can't work */
+#endif
+
+	thread_update_affinity(cpuset);
+
+#ifndef RTE_EXEC_ENV_WINDOWS
+	__rte_trace_mem_per_thread_alloc();
+#endif
+}
 
 struct rte_thread_ctrl_params {
 	void *(*start_routine)(void *);
@@ -156,16 +175,14 @@ struct rte_thread_ctrl_params {
 	pthread_barrier_t configured;
 };
 
-static void *rte_thread_init(void *arg)
+static void *ctrl_thread_init(void *arg)
 {
 	int ret;
-	rte_cpuset_t *cpuset = &internal_config.ctrl_cpuset;
 	struct rte_thread_ctrl_params *params = arg;
 	void *(*start_routine)(void *) = params->start_routine;
 	void *routine_arg = params->arg;
 
-	/* Store cpuset in TLS for quick access */
-	memmove(&RTE_PER_LCORE(_cpuset), cpuset, sizeof(rte_cpuset_t));
+	rte_thread_init(rte_lcore_id(), &internal_config.ctrl_cpuset);
 
 	ret = pthread_barrier_wait(&params->configured);
 	if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
@@ -173,9 +190,6 @@ static void *rte_thread_init(void *arg)
 		free(params);
 	}
 
-#ifndef RTE_EXEC_ENV_WINDOWS
-	__rte_trace_mem_per_thread_alloc();
-#endif
 	return start_routine(routine_arg);
 }
 
@@ -197,7 +211,7 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
 
 	pthread_barrier_init(&params->configured, NULL, 2);
 
-	ret = pthread_create(thread, attr, rte_thread_init, (void *)params);
+	ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
 	if (ret != 0) {
 		free(params);
 		return -ret;
diff --git a/lib/librte_eal/common/eal_thread.h b/lib/librte_eal/common/eal_thread.h
index b40ed249ed..da5e7c93ba 100644
--- a/lib/librte_eal/common/eal_thread.h
+++ b/lib/librte_eal/common/eal_thread.h
@@ -16,12 +16,14 @@
 __rte_noreturn void *eal_thread_loop(void *arg);
 
 /**
- * Init per-lcore info for master thread
+ * Init per-lcore info in current thread.
  *
  * @param lcore_id
- *   identifier of master lcore
+ *   identifier of lcore.
+ * @param cpuset
+ *   CPU affinity for this thread.
  */
-void eal_thread_init_master(unsigned lcore_id);
+void rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset);
 
 /**
  * Get the NUMA socket id from cpu id.
diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
index c41f265fac..b5ea11df16 100644
--- a/lib/librte_eal/freebsd/eal.c
+++ b/lib/librte_eal/freebsd/eal.c
@@ -877,7 +877,14 @@ rte_eal_init(int argc, char **argv)
 
 	eal_check_mem_on_local_socket();
 
-	eal_thread_init_master(rte_config.master_lcore);
+	if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+			&lcore_config[rte_config.master_lcore].cpuset) != 0) {
+		rte_eal_init_alert("Cannot set affinity");
+		rte_errno = EINVAL;
+		return -1;
+	}
+	rte_thread_init(rte_config.master_lcore,
+		&lcore_config[rte_config.master_lcore].cpuset);
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
 
@@ -908,6 +915,11 @@ rte_eal_init(int argc, char **argv)
 		snprintf(thread_name, sizeof(thread_name),
 				"lcore-slave-%d", i);
 		rte_thread_setname(lcore_config[i].thread_id, thread_name);
+
+		ret = pthread_setaffinity_np(lcore_config[i].thread_id,
+			sizeof(rte_cpuset_t), &lcore_config[i].cpuset);
+		if (ret != 0)
+			rte_panic("Cannot set affinity\n");
 	}
 
 	/*
diff --git a/lib/librte_eal/freebsd/eal_thread.c b/lib/librte_eal/freebsd/eal_thread.c
index 40676d9ef5..c1fb8eb2d8 100644
--- a/lib/librte_eal/freebsd/eal_thread.c
+++ b/lib/librte_eal/freebsd/eal_thread.c
@@ -66,29 +66,6 @@ rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
 	return rc;
 }
 
-/* set affinity for current thread */
-static int
-eal_thread_set_affinity(void)
-{
-	unsigned lcore_id = rte_lcore_id();
-
-	/* acquire system unique id  */
-	rte_gettid();
-
-	/* update EAL thread core affinity */
-	return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset);
-}
-
-void eal_thread_init_master(unsigned lcore_id)
-{
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
-}
-
 /* main loop of threads */
 __rte_noreturn void *
 eal_thread_loop(__rte_unused void *arg)
@@ -113,19 +90,12 @@ eal_thread_loop(__rte_unused void *arg)
 	m2s = lcore_config[lcore_id].pipe_master2slave[0];
 	s2m = lcore_config[lcore_id].pipe_slave2master[1];
 
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
+	rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
-
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
 		lcore_id, thread_id, cpuset, ret == 0 ? "" : "...");
 
-	__rte_trace_mem_per_thread_alloc();
 	rte_eal_trace_thread_lcore_ready(lcore_id, cpuset);
 
 	/* read on our pipe to get commands */
diff --git a/lib/librte_eal/linux/eal.c b/lib/librte_eal/linux/eal.c
index f162124a37..8638376b8a 100644
--- a/lib/librte_eal/linux/eal.c
+++ b/lib/librte_eal/linux/eal.c
@@ -1205,10 +1205,16 @@ rte_eal_init(int argc, char **argv)
 
 	eal_check_mem_on_local_socket();
 
-	eal_thread_init_master(rte_config.master_lcore);
+	if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+			&lcore_config[rte_config.master_lcore].cpuset) != 0) {
+		rte_eal_init_alert("Cannot set affinity");
+		rte_errno = EINVAL;
+		return -1;
+	}
+	rte_thread_init(rte_config.master_lcore,
+		&lcore_config[rte_config.master_lcore].cpuset);
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
-
 	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
 		rte_config.master_lcore, (uintptr_t)thread_id, cpuset,
 		ret == 0 ? "" : "...");
@@ -1240,6 +1246,11 @@ rte_eal_init(int argc, char **argv)
 		if (ret != 0)
 			RTE_LOG(DEBUG, EAL,
 				"Cannot set name for lcore thread\n");
+
+		ret = pthread_setaffinity_np(lcore_config[i].thread_id,
+			sizeof(rte_cpuset_t), &lcore_config[i].cpuset);
+		if (ret != 0)
+			rte_panic("Cannot set affinity\n");
 	}
 
 	/*
diff --git a/lib/librte_eal/linux/eal_thread.c b/lib/librte_eal/linux/eal_thread.c
index a52ebef3a4..07aec0c44d 100644
--- a/lib/librte_eal/linux/eal_thread.c
+++ b/lib/librte_eal/linux/eal_thread.c
@@ -66,29 +66,6 @@ rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
 	return rc;
 }
 
-/* set affinity for current EAL thread */
-static int
-eal_thread_set_affinity(void)
-{
-	unsigned lcore_id = rte_lcore_id();
-
-	/* acquire system unique id  */
-	rte_gettid();
-
-	/* update EAL thread core affinity */
-	return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset);
-}
-
-void eal_thread_init_master(unsigned lcore_id)
-{
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
-}
-
 /* main loop of threads */
 __rte_noreturn void *
 eal_thread_loop(__rte_unused void *arg)
@@ -113,19 +90,12 @@ eal_thread_loop(__rte_unused void *arg)
 	m2s = lcore_config[lcore_id].pipe_master2slave[0];
 	s2m = lcore_config[lcore_id].pipe_slave2master[1];
 
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
+	rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
-
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
 		lcore_id, (uintptr_t)thread_id, cpuset, ret == 0 ? "" : "...");
 
-	__rte_trace_mem_per_thread_alloc();
 	rte_eal_trace_thread_lcore_ready(lcore_id, cpuset);
 
 	/* read on our pipe to get commands */
diff --git a/lib/librte_eal/windows/eal.c b/lib/librte_eal/windows/eal.c
index d084606a66..924a10d8d5 100644
--- a/lib/librte_eal/windows/eal.c
+++ b/lib/librte_eal/windows/eal.c
@@ -244,7 +244,8 @@ rte_eal_init(int argc, char **argv)
 	if (fctret < 0)
 		exit(1);
 
-	eal_thread_init_master(rte_config.master_lcore);
+	rte_thread_init(rte_config.master_lcore,
+		&lcore_config[rte_config.master_lcore].cpuset);
 
 	RTE_LCORE_FOREACH_SLAVE(i) {
 
diff --git a/lib/librte_eal/windows/eal_thread.c b/lib/librte_eal/windows/eal_thread.c
index fc245ceb05..4f39db713e 100644
--- a/lib/librte_eal/windows/eal_thread.c
+++ b/lib/librte_eal/windows/eal_thread.c
@@ -53,13 +53,6 @@ rte_eal_remote_launch(lcore_function_t *f, void *arg, unsigned int slave_id)
 	return 0;
 }
 
-void
-eal_thread_init_master(unsigned int lcore_id)
-{
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-}
-
 static inline pthread_t
 eal_thread_self(void)
 {
@@ -90,8 +83,7 @@ eal_thread_loop(void *arg __rte_unused)
 	m2s = lcore_config[lcore_id].pipe_master2slave[0];
 	s2m = lcore_config[lcore_id].pipe_slave2master[1];
 
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
+	rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%zx;cpuset=[%s])\n",
 		lcore_id, (uintptr_t)thread_id, cpuset);
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH 4/7] eal: introduce thread uninit helper
  2020-06-10 14:44 [dpdk-dev] [PATCH 0/7] Register external threads as lcore David Marchand
                   ` (2 preceding siblings ...)
  2020-06-10 14:45 ` [dpdk-dev] [PATCH 3/7] eal: introduce thread init helper David Marchand
@ 2020-06-10 14:45 ` David Marchand
  2020-06-10 14:45 ` [dpdk-dev] [PATCH 5/7] eal: register non-EAL threads as lcore David Marchand
                   ` (8 subsequent siblings)
  12 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-06-10 14:45 UTC (permalink / raw)
  To: dev; +Cc: Jerin Jacob, Sunil Kumar Kori

This is a preparation step for dynamically unregistering external
threads.

Since we explicitly allocate a per thread trace buffer in
rte_thread_init, add an internal helper to free this buffer.

Note: I preferred renaming the current internal function to free all
threads trace buffers (new name trace_mem_free()) and reuse the previous
name (trace_mem_per_thread_free()) when freeing this buffer for a given
thread.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 lib/librte_eal/common/eal_common_thread.c |  9 +++++
 lib/librte_eal/common/eal_common_trace.c  | 49 +++++++++++++++++++----
 lib/librte_eal/common/eal_thread.h        |  5 +++
 lib/librte_eal/common/eal_trace.h         |  1 +
 4 files changed, 57 insertions(+), 7 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index fd5c41a2af..8a973ca8ac 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -22,6 +22,7 @@
 #include "eal_internal_cfg.h"
 #include "eal_private.h"
 #include "eal_thread.h"
+#include "eal_trace.h"
 
 RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
 RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
@@ -169,6 +170,14 @@ rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset)
 #endif
 }
 
+void
+rte_thread_uninit(void)
+{
+#ifndef RTE_EXEC_ENV_WINDOWS
+	trace_mem_per_thread_free();
+#endif
+}
+
 struct rte_thread_ctrl_params {
 	void *(*start_routine)(void *);
 	void *arg;
diff --git a/lib/librte_eal/common/eal_common_trace.c b/lib/librte_eal/common/eal_common_trace.c
index 875553d7e5..cd2d217b02 100644
--- a/lib/librte_eal/common/eal_common_trace.c
+++ b/lib/librte_eal/common/eal_common_trace.c
@@ -101,7 +101,7 @@ eal_trace_fini(void)
 {
 	if (!rte_trace_is_enabled())
 		return;
-	trace_mem_per_thread_free();
+	trace_mem_free();
 	trace_metadata_destroy();
 	eal_trace_args_free();
 }
@@ -370,24 +370,59 @@ __rte_trace_mem_per_thread_alloc(void)
 	rte_spinlock_unlock(&trace->lock);
 }
 
+static void
+trace_mem_per_thread_free_unlocked(struct thread_mem_meta *meta)
+{
+	if (meta->area == TRACE_AREA_HUGEPAGE)
+		eal_free_no_trace(meta->mem);
+	else if (meta->area == TRACE_AREA_HEAP)
+		free(meta->mem);
+}
+
 void
 trace_mem_per_thread_free(void)
+{
+	struct trace *trace = trace_obj_get();
+	struct __rte_trace_header *header;
+	uint32_t count;
+
+	if (RTE_PER_LCORE(trace_mem) == NULL)
+		return;
+
+	header = RTE_PER_LCORE(trace_mem);
+	rte_spinlock_lock(&trace->lock);
+	for (count = 0; count < trace->nb_trace_mem_list; count++) {
+		if (trace->lcore_meta[count].mem == header)
+			break;
+	}
+	if (count != trace->nb_trace_mem_list) {
+		struct thread_mem_meta *meta = &trace->lcore_meta[count];
+
+		trace_mem_per_thread_free_unlocked(meta);
+		if (count != trace->nb_trace_mem_list - 1) {
+			memmove(meta, meta + 1,
+				sizeof(*meta) *
+				 (trace->nb_trace_mem_list - count - 1));
+		}
+		trace->nb_trace_mem_list--;
+	}
+	rte_spinlock_unlock(&trace->lock);
+}
+
+void
+trace_mem_free(void)
 {
 	struct trace *trace = trace_obj_get();
 	uint32_t count;
-	void *mem;
 
 	if (!rte_trace_is_enabled())
 		return;
 
 	rte_spinlock_lock(&trace->lock);
 	for (count = 0; count < trace->nb_trace_mem_list; count++) {
-		mem = trace->lcore_meta[count].mem;
-		if (trace->lcore_meta[count].area == TRACE_AREA_HUGEPAGE)
-			eal_free_no_trace(mem);
-		else if (trace->lcore_meta[count].area == TRACE_AREA_HEAP)
-			free(mem);
+		trace_mem_per_thread_free_unlocked(&trace->lcore_meta[count]);
 	}
+	trace->nb_trace_mem_list = 0;
 	rte_spinlock_unlock(&trace->lock);
 }
 
diff --git a/lib/librte_eal/common/eal_thread.h b/lib/librte_eal/common/eal_thread.h
index da5e7c93ba..4ecd8fd53a 100644
--- a/lib/librte_eal/common/eal_thread.h
+++ b/lib/librte_eal/common/eal_thread.h
@@ -25,6 +25,11 @@ __rte_noreturn void *eal_thread_loop(void *arg);
  */
 void rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset);
 
+/**
+ * Uninitialize per-lcore info for current thread.
+ */
+void rte_thread_uninit(void);
+
 /**
  * Get the NUMA socket id from cpu id.
  * This function is private to EAL.
diff --git a/lib/librte_eal/common/eal_trace.h b/lib/librte_eal/common/eal_trace.h
index 8f60616156..92c5951c3a 100644
--- a/lib/librte_eal/common/eal_trace.h
+++ b/lib/librte_eal/common/eal_trace.h
@@ -106,6 +106,7 @@ int trace_metadata_create(void);
 void trace_metadata_destroy(void);
 int trace_mkdir(void);
 int trace_epoch_time_save(void);
+void trace_mem_free(void);
 void trace_mem_per_thread_free(void);
 
 /* EAL interface */
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH 5/7] eal: register non-EAL threads as lcore
  2020-06-10 14:44 [dpdk-dev] [PATCH 0/7] Register external threads as lcore David Marchand
                   ` (3 preceding siblings ...)
  2020-06-10 14:45 ` [dpdk-dev] [PATCH 4/7] eal: introduce thread uninit helper David Marchand
@ 2020-06-10 14:45 ` David Marchand
  2020-06-15  6:43   ` Kinsella, Ray
  2020-06-10 14:45 ` [dpdk-dev] [PATCH 6/7] eal: dump lcores David Marchand
                   ` (7 subsequent siblings)
  12 siblings, 1 reply; 126+ messages in thread
From: David Marchand @ 2020-06-10 14:45 UTC (permalink / raw)
  To: dev; +Cc: John McNamara, Marko Kovacevic, Ray Kinsella, Neil Horman

DPDK allows calling some part of its API from a non-EAL thread but this
has some limitations.
OVS (and other applications) has its own thread management but still
want to avoid such limitations by hacking RTE_PER_LCORE(_lcore_id) and
faking EAL threads potentially unknown of some DPDK component.

Introduce a new API to register non-EAL thread and associate them to a
free lcore with a new EXTERNAL role.
This role denotes lcores that do not run DPDK mainloop and as such
prevents use of rte_eal_wait_lcore() and consorts.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 doc/guides/howto/debug_troubleshoot.rst   |  5 ++-
 lib/librte_eal/common/eal_common_lcore.c  | 44 +++++++++++++++++--
 lib/librte_eal/common/eal_common_thread.c | 45 ++++++++++++++++++++
 lib/librte_eal/common/eal_private.h       | 17 ++++++++
 lib/librte_eal/include/rte_eal.h          |  9 ----
 lib/librte_eal/include/rte_lcore.h        | 52 +++++++++++++++++------
 lib/librte_eal/rte_eal_version.map        |  4 ++
 7 files changed, 147 insertions(+), 29 deletions(-)

diff --git a/doc/guides/howto/debug_troubleshoot.rst b/doc/guides/howto/debug_troubleshoot.rst
index cef016b2fe..056f556a73 100644
--- a/doc/guides/howto/debug_troubleshoot.rst
+++ b/doc/guides/howto/debug_troubleshoot.rst
@@ -307,8 +307,9 @@ Custom worker function :numref:`dtg_distributor_worker`.
 
 #. Configuration issue isolation
 
-   * Identify core role using ``rte_eal_lcore_role`` to identify RTE, OFF and
-     SERVICE. Check performance functions are mapped to run on the cores.
+   * Identify core role using ``rte_eal_lcore_role`` to identify RTE, OFF,
+     SERVICE and EXTERNAL. Check performance functions are mapped to run on the
+     cores.
 
    * For high-performance execution logic ensure running it on correct NUMA
      and non-master core.
diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
index 5404922a87..35d6c1295e 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -6,13 +6,15 @@
 #include <limits.h>
 #include <string.h>
 
-#include <rte_errno.h>
-#include <rte_log.h>
-#include <rte_eal.h>
-#include <rte_lcore.h>
 #include <rte_common.h>
 #include <rte_debug.h>
+#include <rte_eal.h>
+#include <rte_errno.h>
+#include <rte_lcore.h>
+#include <rte_log.h>
+#include <rte_spinlock.h>
 
+#include "eal_internal_cfg.h"
 #include "eal_private.h"
 #include "eal_thread.h"
 
@@ -209,3 +211,37 @@ rte_socket_id_by_idx(unsigned int idx)
 	}
 	return config->numa_nodes[idx];
 }
+
+rte_spinlock_t external_lcore_lock = RTE_SPINLOCK_INITIALIZER;
+
+unsigned int
+eal_lcore_external_reserve(void)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	unsigned int lcore_id;
+
+	rte_spinlock_lock(&external_lcore_lock);
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (rte_eal_lcore_role(lcore_id) != ROLE_OFF)
+			continue;
+		cfg->lcore_role[lcore_id] = ROLE_EXTERNAL;
+		cfg->lcore_count++;
+		break;
+	}
+	rte_spinlock_unlock(&external_lcore_lock);
+
+	return lcore_id;
+}
+
+void
+eal_lcore_external_release(unsigned int lcore_id)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+
+	rte_spinlock_lock(&external_lcore_lock);
+	if (rte_eal_lcore_role(lcore_id) == ROLE_EXTERNAL) {
+		cfg->lcore_role[lcore_id] = ROLE_OFF;
+		cfg->lcore_count--;
+	}
+	rte_spinlock_unlock(&external_lcore_lock);
+}
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 8a973ca8ac..d57a6ec797 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -29,6 +29,7 @@ RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
 static RTE_DEFINE_PER_LCORE(unsigned int, _socket_id) =
 	(unsigned int)SOCKET_ID_ANY;
 static RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
+static RTE_DEFINE_PER_LCORE(bool, thread_registered);
 
 unsigned rte_socket_id(void)
 {
@@ -255,3 +256,47 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
 	pthread_join(*thread, NULL);
 	return -ret;
 }
+
+void
+rte_thread_register(void)
+{
+	unsigned int lcore_id;
+	rte_cpuset_t cpuset;
+
+	/* EAL init flushes all lcores, we can't register before. */
+	assert(internal_config.init_complete == 1);
+
+	if (RTE_PER_LCORE(thread_registered))
+		return;
+
+	if (pthread_getaffinity_np(pthread_self(), sizeof(cpuset),
+			&cpuset) != 0)
+		CPU_ZERO(&cpuset);
+
+	lcore_id = eal_lcore_external_reserve();
+	if (lcore_id >= RTE_MAX_LCORE)
+		lcore_id = LCORE_ID_ANY;
+
+	rte_thread_init(lcore_id, &cpuset);
+
+	RTE_LOG(DEBUG, EAL, "Registered thread as lcore %u.\n", lcore_id);
+	RTE_PER_LCORE(thread_registered) = true;
+}
+
+void
+rte_thread_unregister(void)
+{
+	unsigned int lcore_id;
+
+	if (!RTE_PER_LCORE(thread_registered))
+		return;
+
+	lcore_id = RTE_PER_LCORE(_lcore_id);
+	if (lcore_id != LCORE_ID_ANY)
+		eal_lcore_external_release(lcore_id);
+
+	rte_thread_uninit();
+
+	RTE_LOG(DEBUG, EAL, "Unregistered thread (was lcore %u).\n", lcore_id);
+	RTE_PER_LCORE(thread_registered) = false;
+}
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 869ce183ad..8dd850f68a 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -274,6 +274,23 @@ uint64_t get_tsc_freq(void);
  */
 uint64_t get_tsc_freq_arch(void);
 
+/**
+ * Ask for a free lcore to associate to a non-EAL thread.
+ *
+ * @return
+ *   The id of a lcore with role ROLE_EXTERNAL or RTE_MAX_LCORE if none was
+ *   available.
+ */
+unsigned int eal_lcore_external_reserve(void);
+
+/**
+ * Release an external lcore.
+ *
+ * @param lcore_id
+ *   The lcore with role ROLE_EXTERNAL to release.
+ */
+void eal_lcore_external_release(unsigned int lcore_id);
+
 /**
  * Prepare physical memory mapping
  * i.e. hugepages on Linux and
diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
index 2edf8c6556..0913d1947c 100644
--- a/lib/librte_eal/include/rte_eal.h
+++ b/lib/librte_eal/include/rte_eal.h
@@ -31,15 +31,6 @@ extern "C" {
 /* Maximum thread_name length. */
 #define RTE_MAX_THREAD_NAME_LEN 16
 
-/**
- * The lcore role (used in RTE or not).
- */
-enum rte_lcore_role_t {
-	ROLE_RTE,
-	ROLE_OFF,
-	ROLE_SERVICE,
-};
-
 /**
  * The type of process in a linux, multi-process setup
  */
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index 5c1d1926e9..d59c4edc67 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -24,6 +24,16 @@ extern "C" {
 
 RTE_DECLARE_PER_LCORE(unsigned, _lcore_id);  /**< Per thread "lcore id". */
 
+/**
+ * The lcore role (used in RTE or not).
+ */
+enum rte_lcore_role_t {
+	ROLE_RTE,
+	ROLE_OFF,
+	ROLE_SERVICE,
+	ROLE_EXTERNAL,
+};
+
 /**
  * Get a lcore's role.
  *
@@ -34,6 +44,20 @@ RTE_DECLARE_PER_LCORE(unsigned, _lcore_id);  /**< Per thread "lcore id". */
  */
 enum rte_lcore_role_t rte_eal_lcore_role(unsigned int lcore_id);
 
+/**
+ * Test if the core supplied has a specific role
+ *
+ * @param lcore_id
+ *   The identifier of the lcore, which MUST be between 0 and
+ *   RTE_MAX_LCORE-1.
+ * @param role
+ *   The role to be checked against.
+ * @return
+ *   Boolean value: positive if test is true; otherwise returns 0.
+ */
+int
+rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
+
 /**
  * Return the Application thread ID of the execution unit.
  *
@@ -256,6 +280,20 @@ int rte_thread_setname(pthread_t id, const char *name);
 __rte_experimental
 int rte_thread_getname(pthread_t id, char *name, size_t len);
 
+/**
+ * Register current non-EAL thread as a lcore.
+ */
+__rte_experimental
+void
+rte_thread_register(void);
+
+/**
+ * Unregister current thread and release lcore if one was associated.
+ */
+__rte_experimental
+void
+rte_thread_unregister(void);
+
 /**
  * Create a control thread.
  *
@@ -283,20 +321,6 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
 		const pthread_attr_t *attr,
 		void *(*start_routine)(void *), void *arg);
 
-/**
- * Test if the core supplied has a specific role
- *
- * @param lcore_id
- *   The identifier of the lcore, which MUST be between 0 and
- *   RTE_MAX_LCORE-1.
- * @param role
- *   The role to be checked against.
- * @return
- *   Boolean value: positive if test is true; otherwise returns 0.
- */
-int
-rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index fdfc3f1a88..f5b68d2815 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -393,4 +393,8 @@ EXPERIMENTAL {
 	rte_trace_point_lookup;
 	rte_trace_regexp;
 	rte_trace_save;
+
+	# added in 20.08
+	rte_thread_register;
+	rte_thread_unregister;
 };
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH 6/7] eal: dump lcores
  2020-06-10 14:44 [dpdk-dev] [PATCH 0/7] Register external threads as lcore David Marchand
                   ` (4 preceding siblings ...)
  2020-06-10 14:45 ` [dpdk-dev] [PATCH 5/7] eal: register non-EAL threads as lcore David Marchand
@ 2020-06-10 14:45 ` David Marchand
  2020-06-15  6:40   ` Kinsella, Ray
  2020-06-10 14:45 ` [dpdk-dev] [PATCH 7/7] eal: add lcore hotplug notifications David Marchand
                   ` (6 subsequent siblings)
  12 siblings, 1 reply; 126+ messages in thread
From: David Marchand @ 2020-06-10 14:45 UTC (permalink / raw)
  To: dev; +Cc: Bruce Richardson, Ray Kinsella, Neil Horman

Add a little helper to dump all lcores.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 lib/librte_eal/common/eal_common_lcore.c  | 32 +++++++++++++++++++++++
 lib/librte_eal/common/eal_common_thread.c | 16 ++++++++----
 lib/librte_eal/common/eal_thread.h        | 13 +++++++--
 lib/librte_eal/freebsd/eal.c              |  2 +-
 lib/librte_eal/freebsd/eal_thread.c       |  2 +-
 lib/librte_eal/include/rte_lcore.h        | 10 +++++++
 lib/librte_eal/linux/eal.c                |  2 +-
 lib/librte_eal/linux/eal_thread.c         |  2 +-
 lib/librte_eal/rte_eal_version.map        |  1 +
 9 files changed, 69 insertions(+), 11 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
index 35d6c1295e..6aca1b2fee 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -245,3 +245,35 @@ eal_lcore_external_release(unsigned int lcore_id)
 	}
 	rte_spinlock_unlock(&external_lcore_lock);
 }
+
+void
+rte_lcore_dump(FILE *f)
+{
+	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
+	unsigned int lcore_id;
+	const char *role;
+	int ret;
+
+	rte_spinlock_lock(&external_lcore_lock);
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		switch (rte_eal_lcore_role(lcore_id)) {
+		case ROLE_RTE:
+			role = "RTE";
+			break;
+		case ROLE_SERVICE:
+			role = "SERVICE";
+			break;
+		case ROLE_EXTERNAL:
+			role = "EXTERNAL";
+			break;
+		default:
+			continue;
+		}
+
+		ret = eal_thread_dump_affinity(&lcore_config[lcore_id].cpuset,
+			cpuset, sizeof(cpuset));
+		fprintf(f, "lcore %u, role %s, cpuset %s%s\n", lcore_id, role,
+			cpuset, ret == 0 ? "" : "...");
+	}
+	rte_spinlock_unlock(&external_lcore_lock);
+}
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index d57a6ec797..a81b192ff3 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -118,17 +118,14 @@ rte_thread_get_affinity(rte_cpuset_t *cpusetp)
 }
 
 int
-eal_thread_dump_affinity(char *str, unsigned size)
+eal_thread_dump_affinity(rte_cpuset_t *cpuset, char *str, unsigned int size)
 {
-	rte_cpuset_t cpuset;
 	unsigned cpu;
 	int ret;
 	unsigned int out = 0;
 
-	rte_thread_get_affinity(&cpuset);
-
 	for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
-		if (!CPU_ISSET(cpu, &cpuset))
+		if (!CPU_ISSET(cpu, cpuset))
 			continue;
 
 		ret = snprintf(str + out,
@@ -151,6 +148,15 @@ eal_thread_dump_affinity(char *str, unsigned size)
 	return ret;
 }
 
+int
+eal_thread_dump_current_affinity(char *str, unsigned int size)
+{
+	rte_cpuset_t cpuset;
+
+	rte_thread_get_affinity(&cpuset);
+	return eal_thread_dump_affinity(&cpuset, str, size);
+}
+
 void
 rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset)
 {
diff --git a/lib/librte_eal/common/eal_thread.h b/lib/librte_eal/common/eal_thread.h
index 4ecd8fd53a..13ec252e01 100644
--- a/lib/librte_eal/common/eal_thread.h
+++ b/lib/librte_eal/common/eal_thread.h
@@ -47,13 +47,15 @@ unsigned eal_cpu_socket_id(unsigned cpu_id);
 #define RTE_CPU_AFFINITY_STR_LEN            256
 
 /**
- * Dump the current pthread cpuset.
+ * Dump the cpuset as a human readable string.
  * This function is private to EAL.
  *
  * Note:
  *   If the dump size is greater than the size of given buffer,
  *   the string will be truncated and with '\0' at the end.
  *
+ * @param cpuset
+ *   The CPU affinity object to dump.
  * @param str
  *   The string buffer the cpuset will dump to.
  * @param size
@@ -62,6 +64,13 @@ unsigned eal_cpu_socket_id(unsigned cpu_id);
  *   0 for success, -1 if truncation happens.
  */
 int
-eal_thread_dump_affinity(char *str, unsigned size);
+eal_thread_dump_affinity(rte_cpuset_t *cpuset, char *str, unsigned int size);
+
+/**
+ * Dump the current thread cpuset.
+ * This is a wrapper on eal_thread_dump_affinity().
+ */
+int
+eal_thread_dump_current_affinity(char *str, unsigned int size);
 
 #endif /* EAL_THREAD_H */
diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
index b5ea11df16..69a6f7d8c4 100644
--- a/lib/librte_eal/freebsd/eal.c
+++ b/lib/librte_eal/freebsd/eal.c
@@ -886,7 +886,7 @@ rte_eal_init(int argc, char **argv)
 	rte_thread_init(rte_config.master_lcore,
 		&lcore_config[rte_config.master_lcore].cpuset);
 
-	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
 
 	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
 		rte_config.master_lcore, thread_id, cpuset,
diff --git a/lib/librte_eal/freebsd/eal_thread.c b/lib/librte_eal/freebsd/eal_thread.c
index c1fb8eb2d8..b1a3619f51 100644
--- a/lib/librte_eal/freebsd/eal_thread.c
+++ b/lib/librte_eal/freebsd/eal_thread.c
@@ -92,7 +92,7 @@ eal_thread_loop(__rte_unused void *arg)
 
 	rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
-	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
 		lcore_id, thread_id, cpuset, ret == 0 ? "" : "...");
 
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index d59c4edc67..9cf34efef4 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -228,6 +228,16 @@ unsigned int rte_get_next_lcore(unsigned int i, int skip_master, int wrap);
 	     i<RTE_MAX_LCORE;						\
 	     i = rte_get_next_lcore(i, 1, 0))
 
+/**
+ * List all lcores.
+ *
+ * @param f
+ *   The output stream where the dump should be sent.
+ */
+__rte_experimental
+void
+rte_lcore_dump(FILE *f);
+
 /**
  * Set core affinity of the current thread.
  * Support both EAL and non-EAL thread and update TLS.
diff --git a/lib/librte_eal/linux/eal.c b/lib/librte_eal/linux/eal.c
index 8638376b8a..2f0efd7cd3 100644
--- a/lib/librte_eal/linux/eal.c
+++ b/lib/librte_eal/linux/eal.c
@@ -1214,7 +1214,7 @@ rte_eal_init(int argc, char **argv)
 	rte_thread_init(rte_config.master_lcore,
 		&lcore_config[rte_config.master_lcore].cpuset);
 
-	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
 	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
 		rte_config.master_lcore, (uintptr_t)thread_id, cpuset,
 		ret == 0 ? "" : "...");
diff --git a/lib/librte_eal/linux/eal_thread.c b/lib/librte_eal/linux/eal_thread.c
index 07aec0c44d..22d9bc8c01 100644
--- a/lib/librte_eal/linux/eal_thread.c
+++ b/lib/librte_eal/linux/eal_thread.c
@@ -92,7 +92,7 @@ eal_thread_loop(__rte_unused void *arg)
 
 	rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
-	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
 		lcore_id, (uintptr_t)thread_id, cpuset, ret == 0 ? "" : "...");
 
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index f5b68d2815..6754d52543 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -395,6 +395,7 @@ EXPERIMENTAL {
 	rte_trace_save;
 
 	# added in 20.08
+	rte_lcore_dump;
 	rte_thread_register;
 	rte_thread_unregister;
 };
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH 7/7] eal: add lcore hotplug notifications
  2020-06-10 14:44 [dpdk-dev] [PATCH 0/7] Register external threads as lcore David Marchand
                   ` (5 preceding siblings ...)
  2020-06-10 14:45 ` [dpdk-dev] [PATCH 6/7] eal: dump lcores David Marchand
@ 2020-06-10 14:45 ` David Marchand
  2020-06-15  6:34   ` Kinsella, Ray
  2020-06-10 15:09 ` [dpdk-dev] [PATCH 0/7] Register external threads as lcore Jerin Jacob
                   ` (5 subsequent siblings)
  12 siblings, 1 reply; 126+ messages in thread
From: David Marchand @ 2020-06-10 14:45 UTC (permalink / raw)
  To: dev; +Cc: Ray Kinsella, Neil Horman

Now that lcores can be dynamically allocated/freed, we will have to
notify DPDK components and applications of such events for cases where
per lcore context must be allocated/initialised.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 lib/librte_eal/common/eal_common_lcore.c  | 91 +++++++++++++++++++++++
 lib/librte_eal/common/eal_common_thread.c | 11 ++-
 lib/librte_eal/common/eal_private.h       | 26 +++++++
 lib/librte_eal/include/rte_lcore.h        | 49 ++++++++++++
 lib/librte_eal/rte_eal_version.map        |  2 +
 5 files changed, 178 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
index 6aca1b2fee..3a997d8115 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -212,6 +212,47 @@ rte_socket_id_by_idx(unsigned int idx)
 	return config->numa_nodes[idx];
 }
 
+struct lcore_notifier {
+	TAILQ_ENTRY(lcore_notifier) next;
+	rte_lcore_notifier_cb cb;
+	void *arg;
+};
+static TAILQ_HEAD(lcore_notifiers_head, lcore_notifier) lcore_notifiers =
+	TAILQ_HEAD_INITIALIZER(lcore_notifiers);
+static rte_spinlock_t lcore_notifiers_lock = RTE_SPINLOCK_INITIALIZER;
+
+void *
+rte_lcore_notifier_register(rte_lcore_notifier_cb cb, void *arg)
+{
+	struct lcore_notifier *notifier;
+
+	if (cb == NULL)
+		return NULL;
+
+	notifier = calloc(1, sizeof(*notifier));
+	if (notifier == NULL)
+		return NULL;
+
+	notifier->cb = cb;
+	notifier->arg = arg;
+	rte_spinlock_lock(&lcore_notifiers_lock);
+	TAILQ_INSERT_TAIL(&lcore_notifiers, notifier, next);
+	rte_spinlock_unlock(&lcore_notifiers_lock);
+
+	return notifier;
+}
+
+void
+rte_lcore_notifier_unregister(void *handle)
+{
+	struct lcore_notifier *notifier = handle;
+
+	rte_spinlock_lock(&lcore_notifiers_lock);
+	TAILQ_REMOVE(&lcore_notifiers, notifier, next);
+	rte_spinlock_unlock(&lcore_notifiers_lock);
+	free(notifier);
+}
+
 rte_spinlock_t external_lcore_lock = RTE_SPINLOCK_INITIALIZER;
 
 unsigned int
@@ -277,3 +318,53 @@ rte_lcore_dump(FILE *f)
 	}
 	rte_spinlock_unlock(&external_lcore_lock);
 }
+
+int
+eal_lcore_external_notify_allocated(unsigned int lcore_id)
+{
+	struct lcore_notifier *notifier;
+	int ret = 0;
+
+	RTE_LOG(DEBUG, EAL, "New lcore %u.\n", lcore_id);
+	rte_spinlock_lock(&lcore_notifiers_lock);
+	TAILQ_FOREACH(notifier, &lcore_notifiers, next) {
+		if (notifier->cb(lcore_id, RTE_LCORE_EVENT_NEW_EXTERNAL,
+				notifier->arg) == 0)
+			continue;
+
+		/* Some notifier refused the new lcore, inform all notifiers
+		 * that acked it.
+		 */
+		RTE_LOG(DEBUG, EAL, "A lcore notifier refused new lcore %u.\n",
+			lcore_id);
+
+		notifier = TAILQ_PREV(notifier, lcore_notifiers_head, next);
+		while (notifier != NULL) {
+			notifier->cb(lcore_id,
+				RTE_LCORE_EVENT_RELEASE_EXTERNAL,
+				notifier->arg);
+			notifier = TAILQ_PREV(notifier, lcore_notifiers_head,
+				next);
+		}
+		ret = -1;
+		break;
+	}
+	rte_spinlock_unlock(&lcore_notifiers_lock);
+
+	return ret;
+}
+
+void
+eal_lcore_external_notify_removed(unsigned int lcore_id)
+{
+	struct lcore_notifier *notifier;
+
+	RTE_LOG(DEBUG, EAL, "Released lcore %u.\n", lcore_id);
+	rte_spinlock_lock(&lcore_notifiers_lock);
+	TAILQ_FOREACH_REVERSE(notifier, &lcore_notifiers, lcore_notifiers_head,
+			next) {
+		notifier->cb(lcore_id, RTE_LCORE_EVENT_RELEASE_EXTERNAL,
+			notifier->arg);
+	}
+	rte_spinlock_unlock(&lcore_notifiers_lock);
+}
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index a81b192ff3..f66d1ccaef 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -285,6 +285,12 @@ rte_thread_register(void)
 
 	rte_thread_init(lcore_id, &cpuset);
 
+	if (lcore_id != LCORE_ID_ANY &&
+			eal_lcore_external_notify_allocated(lcore_id) < 0) {
+		eal_lcore_external_release(lcore_id);
+		RTE_PER_LCORE(_lcore_id) = lcore_id = LCORE_ID_ANY;
+	}
+
 	RTE_LOG(DEBUG, EAL, "Registered thread as lcore %u.\n", lcore_id);
 	RTE_PER_LCORE(thread_registered) = true;
 }
@@ -298,8 +304,11 @@ rte_thread_unregister(void)
 		return;
 
 	lcore_id = RTE_PER_LCORE(_lcore_id);
-	if (lcore_id != LCORE_ID_ANY)
+	if (lcore_id != LCORE_ID_ANY) {
+		eal_lcore_external_notify_removed(lcore_id);
 		eal_lcore_external_release(lcore_id);
+		RTE_PER_LCORE(_lcore_id) = LCORE_ID_ANY;
+	}
 
 	rte_thread_uninit();
 
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 8dd850f68a..649697c368 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -283,6 +283,21 @@ uint64_t get_tsc_freq_arch(void);
  */
 unsigned int eal_lcore_external_reserve(void);
 
+/**
+ * Evaluate all lcore notifiers with a RTE_LCORE_EVENT_NEW_EXTERNAL event for
+ * the passed lcore.
+ * If an error is returned by one of them, then this change is rolled back:
+ * all previous lcore notifiers that had acked the RTE_LCORE_EVENT_NEW_EXTERNAL
+ * event receive a RTE_LCORE_EVENT_RELEASE_EXTERNAL event for the passed lcore.
+ *
+ * @param lcore_id
+ *   The lcore to consider.
+ * @return
+ *   - 0 if all notifiers agreed on the new lcore
+ *   - -1 if one of them refused
+ */
+int eal_lcore_external_notify_allocated(unsigned int lcore_id);
+
 /**
  * Release an external lcore.
  *
@@ -291,6 +306,17 @@ unsigned int eal_lcore_external_reserve(void);
  */
 void eal_lcore_external_release(unsigned int lcore_id);
 
+/**
+ * Evaluate all lcore notifiers with a RTE_LCORE_EVENT_RELEASE_EXTERNAL event
+ * for the passed lcore.
+ * This function must be called with a lcore that successfully passed
+ * eal_lcore_external_notify_allocated().
+ *
+ * @param lcore_id
+ *   The lcore with role ROLE_EXTERNAL to release.
+ */
+void eal_lcore_external_notify_removed(unsigned int lcore_id);
+
 /**
  * Prepare physical memory mapping
  * i.e. hugepages on Linux and
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index 9cf34efef4..e0fec33d5a 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -238,6 +238,55 @@ __rte_experimental
 void
 rte_lcore_dump(FILE *f);
 
+enum rte_lcore_event_type {
+	RTE_LCORE_EVENT_NEW_EXTERNAL,
+	RTE_LCORE_EVENT_RELEASE_EXTERNAL,
+};
+
+/**
+ * Callback prototype for getting lcore events.
+ *
+ * @param lcore_id
+ *   The lcore to consider for this event.
+ * @param event
+ *   The type of event on the lcore.
+ * @param arg
+ *   An opaque pointer passed at notifier registration.
+ * @return
+ *   - -1 when refusing this event,
+ *   - 0 otherwise.
+ */
+typedef int (*rte_lcore_notifier_cb)(unsigned int lcore_id,
+	enum rte_lcore_event_type event, void *arg);
+
+/**
+ * Register a lcore notifier.
+ *
+ * @param cb
+ *   The callback invoked for each lcore event with the arg argument.
+ *   See rte_lcore_notifier_cb description.
+ * @param arg
+ *   An optional argument that gets passed to the callback when it gets
+ *   invoked.
+ * @return
+ *   On success, returns an opaque pointer for the created notifier.
+ *   NULL on failure.
+ */
+__rte_experimental
+void *
+rte_lcore_notifier_register(rte_lcore_notifier_cb cb, void *arg);
+
+/**
+ * Unregister a lcore notifier.
+ *
+ * @param handle
+ *   The handle pointer returned by a former successful call to
+ *   rte_lcore_notifier_register.
+ */
+__rte_experimental
+void
+rte_lcore_notifier_unregister(void *handle);
+
 /**
  * Set core affinity of the current thread.
  * Support both EAL and non-EAL thread and update TLS.
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 6754d52543..1e6f2aaacc 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -396,6 +396,8 @@ EXPERIMENTAL {
 
 	# added in 20.08
 	rte_lcore_dump;
+	rte_lcore_notifier_register;
+	rte_lcore_notifier_unregister;
 	rte_thread_register;
 	rte_thread_unregister;
 };
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH 0/7] Register external threads as lcore
  2020-06-10 14:44 [dpdk-dev] [PATCH 0/7] Register external threads as lcore David Marchand
                   ` (6 preceding siblings ...)
  2020-06-10 14:45 ` [dpdk-dev] [PATCH 7/7] eal: add lcore hotplug notifications David Marchand
@ 2020-06-10 15:09 ` Jerin Jacob
  2020-06-10 15:13   ` Bruce Richardson
  2020-06-10 15:18   ` David Marchand
  2020-06-19 16:22 ` [dpdk-dev] [PATCH v2 0/9] Register non-EAL " David Marchand
                   ` (4 subsequent siblings)
  12 siblings, 2 replies; 126+ messages in thread
From: Jerin Jacob @ 2020-06-10 15:09 UTC (permalink / raw)
  To: David Marchand; +Cc: dpdk-dev

On Wed, Jun 10, 2020 at 8:15 PM David Marchand
<david.marchand@redhat.com> wrote:
>
> OVS and some other applications have been hacking into DPDK internals to
> fake EAL threads and avoid performance penalty of only having non-EAL
> threads.
>
> This series proposes to add a new type of lcores and maps those external
> threads to such lcores.
> Those threads won't run the DPDK eal mainloop and as a consequence part of
> the EAL threads API cannot work.
>
> Having new lcores appearing during the process lifetime is not expected
> by some DPDK components. This is addressed by notifying of such lcore
> hotplug.
>
> This patchset has still some more work (like refusing new lcore type in
> incompatible EAL threads API, updating the documentation and adding unit
> tests) but I am sending it anyway as I would like to get this in for
> 20.08.

Cool feature.

Is mempool's lcore local cache working for external cores with this scheme?


>
> For the interested parties, I have a patch [1] against dpdk-latest OVS
> branch that makes use of this series.
>
> 1: https://github.com/david-marchand/ovs/commit/dpdk-latest-external-lcores

Usage is clear from the example code,
Looking forward to having documentation and UT for the new API
in the next version.



>
> --
> David Marchand
>
> David Marchand (7):
>   eal: relocate per thread symbols to common
>   eal: fix multiple definition of per lcore thread id
>   eal: introduce thread init helper
>   eal: introduce thread uninit helper
>   eal: register non-EAL threads as lcore
>   eal: dump lcores
>   eal: add lcore hotplug notifications
>
>  doc/guides/howto/debug_troubleshoot.rst   |   5 +-
>  lib/librte_eal/common/eal_common_lcore.c  | 167 +++++++++++++++++++++-
>  lib/librte_eal/common/eal_common_thread.c | 143 ++++++++++++++----
>  lib/librte_eal/common/eal_common_trace.c  |  49 ++++++-
>  lib/librte_eal/common/eal_private.h       |  43 ++++++
>  lib/librte_eal/common/eal_thread.h        |  26 +++-
>  lib/librte_eal/common/eal_trace.h         |   1 +
>  lib/librte_eal/freebsd/eal.c              |  16 ++-
>  lib/librte_eal/freebsd/eal_thread.c       |  38 +----
>  lib/librte_eal/include/rte_eal.h          |  12 +-
>  lib/librte_eal/include/rte_lcore.h        | 112 +++++++++++++--
>  lib/librte_eal/linux/eal.c                |  17 ++-
>  lib/librte_eal/linux/eal_thread.c         |  38 +----
>  lib/librte_eal/rte_eal_version.map        |  14 ++
>  lib/librte_eal/windows/eal.c              |   3 +-
>  lib/librte_eal/windows/eal_thread.c       |  14 +-
>  16 files changed, 536 insertions(+), 162 deletions(-)
>
> --
> 2.23.0
>

^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH 0/7] Register external threads as lcore
  2020-06-10 15:09 ` [dpdk-dev] [PATCH 0/7] Register external threads as lcore Jerin Jacob
@ 2020-06-10 15:13   ` Bruce Richardson
  2020-06-10 15:18   ` David Marchand
  1 sibling, 0 replies; 126+ messages in thread
From: Bruce Richardson @ 2020-06-10 15:13 UTC (permalink / raw)
  To: Jerin Jacob; +Cc: David Marchand, dpdk-dev

On Wed, Jun 10, 2020 at 08:39:30PM +0530, Jerin Jacob wrote:
> On Wed, Jun 10, 2020 at 8:15 PM David Marchand
> <david.marchand@redhat.com> wrote:
> >
> > OVS and some other applications have been hacking into DPDK internals to
> > fake EAL threads and avoid performance penalty of only having non-EAL
> > threads.
> >
> > This series proposes to add a new type of lcores and maps those external
> > threads to such lcores.
> > Those threads won't run the DPDK eal mainloop and as a consequence part of
> > the EAL threads API cannot work.
> >
> > Having new lcores appearing during the process lifetime is not expected
> > by some DPDK components. This is addressed by notifying of such lcore
> > hotplug.
> >
> > This patchset has still some more work (like refusing new lcore type in
> > incompatible EAL threads API, updating the documentation and adding unit
> > tests) but I am sending it anyway as I would like to get this in for
> > 20.08.
> 
> Cool feature.
> 

+1 for this, this is something that I think has been needed for a long
time. Thanks!

> Is mempool's lcore local cache working for external cores with this scheme?
> 
> 
> >
> > For the interested parties, I have a patch [1] against dpdk-latest OVS
> > branch that makes use of this series.
> >
> > 1: https://github.com/david-marchand/ovs/commit/dpdk-latest-external-lcores
> 
> Usage is clear from the example code,
> Looking forward to having documentation and UT for the new API
> in the next version.
> 
Yes, I think we need a doc update explaining usage. The EAL chapter of the
programmers guide needs an update [1], and I think a description in the
howto would be good also [2].

[1] https://doc.dpdk.org/guides-20.05/prog_guide/env_abstraction_layer.html
[2] https://doc.dpdk.org/guides-20.05/howto/index.html

Regards,
/Bruce

^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH 0/7] Register external threads as lcore
  2020-06-10 15:09 ` [dpdk-dev] [PATCH 0/7] Register external threads as lcore Jerin Jacob
  2020-06-10 15:13   ` Bruce Richardson
@ 2020-06-10 15:18   ` David Marchand
  2020-06-10 15:33     ` Jerin Jacob
  1 sibling, 1 reply; 126+ messages in thread
From: David Marchand @ 2020-06-10 15:18 UTC (permalink / raw)
  To: Jerin Jacob; +Cc: dpdk-dev

On Wed, Jun 10, 2020 at 5:09 PM Jerin Jacob <jerinjacobk@gmail.com> wrote:
>
> On Wed, Jun 10, 2020 at 8:15 PM David Marchand
> <david.marchand@redhat.com> wrote:
> >
> > OVS and some other applications have been hacking into DPDK internals to
> > fake EAL threads and avoid performance penalty of only having non-EAL
> > threads.
> >
> > This series proposes to add a new type of lcores and maps those external
> > threads to such lcores.
> > Those threads won't run the DPDK eal mainloop and as a consequence part of
> > the EAL threads API cannot work.
> >
> > Having new lcores appearing during the process lifetime is not expected
> > by some DPDK components. This is addressed by notifying of such lcore
> > hotplug.
> >
> > This patchset has still some more work (like refusing new lcore type in
> > incompatible EAL threads API, updating the documentation and adding unit
> > tests) but I am sending it anyway as I would like to get this in for
> > 20.08.
>
> Cool feature.
>
> Is mempool's lcore local cache working for external cores with this scheme?

Yes, as it is stateless, all we need is a unique lcore_id in [0,
RTE_MAX_LCORE-1] range.
We could imagine flushing such caches on unregistering.

And we can fix other mempool drivers like the bucket driver.

-- 
David Marchand


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH 0/7] Register external threads as lcore
  2020-06-10 15:18   ` David Marchand
@ 2020-06-10 15:33     ` Jerin Jacob
  2020-06-15  7:11       ` David Marchand
  0 siblings, 1 reply; 126+ messages in thread
From: Jerin Jacob @ 2020-06-10 15:33 UTC (permalink / raw)
  To: David Marchand; +Cc: dpdk-dev

On Wed, Jun 10, 2020 at 8:48 PM David Marchand
<david.marchand@redhat.com> wrote:
>
> On Wed, Jun 10, 2020 at 5:09 PM Jerin Jacob <jerinjacobk@gmail.com> wrote:
> >
> > On Wed, Jun 10, 2020 at 8:15 PM David Marchand
> > <david.marchand@redhat.com> wrote:
> > >
> > > OVS and some other applications have been hacking into DPDK internals to
> > > fake EAL threads and avoid performance penalty of only having non-EAL
> > > threads.
> > >
> > > This series proposes to add a new type of lcores and maps those external
> > > threads to such lcores.
> > > Those threads won't run the DPDK eal mainloop and as a consequence part of
> > > the EAL threads API cannot work.
> > >
> > > Having new lcores appearing during the process lifetime is not expected
> > > by some DPDK components. This is addressed by notifying of such lcore
> > > hotplug.
> > >
> > > This patchset has still some more work (like refusing new lcore type in
> > > incompatible EAL threads API, updating the documentation and adding unit
> > > tests) but I am sending it anyway as I would like to get this in for
> > > 20.08.
> >
> > Cool feature.
> >
> > Is mempool's lcore local cache working for external cores with this scheme?
>
> Yes, as it is stateless, all we need is a unique lcore_id in [0,
> RTE_MAX_LCORE-1] range.

Was it the case when lcore registered and then mempool created? What
about other case, mempool created and then lcore registered?


> We could imagine flushing such caches on unregistering.
>
> And we can fix other mempool drivers like the bucket driver.
>
> --
> David Marchand
>

^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH 7/7] eal: add lcore hotplug notifications
  2020-06-10 14:45 ` [dpdk-dev] [PATCH 7/7] eal: add lcore hotplug notifications David Marchand
@ 2020-06-15  6:34   ` Kinsella, Ray
  2020-06-15  7:13     ` David Marchand
  0 siblings, 1 reply; 126+ messages in thread
From: Kinsella, Ray @ 2020-06-15  6:34 UTC (permalink / raw)
  To: David Marchand, dev; +Cc: Neil Horman, Richardson, Bruce, Thomas Monjalon

From ABI  PoV, you are 100%.

Is the agreed term 'callback'?, not 'notifier' for example rte_dev_event_callback_register, rte_mem_event_callback_register

I did wonder however, if all these cb's would be better handled through a EventDev event notification style approach.

Ray K

On 10/06/2020 15:45, David Marchand wrote:
> Now that lcores can be dynamically allocated/freed, we will have to
> notify DPDK components and applications of such events for cases where
> per lcore context must be allocated/initialised.
>
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---
>  lib/librte_eal/common/eal_common_lcore.c  | 91 +++++++++++++++++++++++
>  lib/librte_eal/common/eal_common_thread.c | 11 ++-
>  lib/librte_eal/common/eal_private.h       | 26 +++++++
>  lib/librte_eal/include/rte_lcore.h        | 49 ++++++++++++
>  lib/librte_eal/rte_eal_version.map        |  2 +
>  5 files changed, 178 insertions(+), 1 deletion(-)
>
> diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
> index 6aca1b2fee..3a997d8115 100644
> --- a/lib/librte_eal/common/eal_common_lcore.c
> +++ b/lib/librte_eal/common/eal_common_lcore.c
> @@ -212,6 +212,47 @@ rte_socket_id_by_idx(unsigned int idx)
>  	return config->numa_nodes[idx];
>  }
>  
> +struct lcore_notifier {
> +	TAILQ_ENTRY(lcore_notifier) next;
> +	rte_lcore_notifier_cb cb;
> +	void *arg;
> +};
> +static TAILQ_HEAD(lcore_notifiers_head, lcore_notifier) lcore_notifiers =
> +	TAILQ_HEAD_INITIALIZER(lcore_notifiers);
> +static rte_spinlock_t lcore_notifiers_lock = RTE_SPINLOCK_INITIALIZER;
> +
> +void *
> +rte_lcore_notifier_register(rte_lcore_notifier_cb cb, void *arg)
> +{
> +	struct lcore_notifier *notifier;
> +
> +	if (cb == NULL)
> +		return NULL;
> +
> +	notifier = calloc(1, sizeof(*notifier));
> +	if (notifier == NULL)
> +		return NULL;
> +
> +	notifier->cb = cb;
> +	notifier->arg = arg;
> +	rte_spinlock_lock(&lcore_notifiers_lock);
> +	TAILQ_INSERT_TAIL(&lcore_notifiers, notifier, next);
> +	rte_spinlock_unlock(&lcore_notifiers_lock);
> +
> +	return notifier;
> +}
> +
> +void
> +rte_lcore_notifier_unregister(void *handle)
> +{
> +	struct lcore_notifier *notifier = handle;
> +
> +	rte_spinlock_lock(&lcore_notifiers_lock);
> +	TAILQ_REMOVE(&lcore_notifiers, notifier, next);
> +	rte_spinlock_unlock(&lcore_notifiers_lock);
> +	free(notifier);
> +}
> +
>  rte_spinlock_t external_lcore_lock = RTE_SPINLOCK_INITIALIZER;
>  
>  unsigned int
> @@ -277,3 +318,53 @@ rte_lcore_dump(FILE *f)
>  	}
>  	rte_spinlock_unlock(&external_lcore_lock);
>  }
> +
> +int
> +eal_lcore_external_notify_allocated(unsigned int lcore_id)
> +{
> +	struct lcore_notifier *notifier;
> +	int ret = 0;
> +
> +	RTE_LOG(DEBUG, EAL, "New lcore %u.\n", lcore_id);
> +	rte_spinlock_lock(&lcore_notifiers_lock);
> +	TAILQ_FOREACH(notifier, &lcore_notifiers, next) {
> +		if (notifier->cb(lcore_id, RTE_LCORE_EVENT_NEW_EXTERNAL,
> +				notifier->arg) == 0)
> +			continue;
> +
> +		/* Some notifier refused the new lcore, inform all notifiers
> +		 * that acked it.
> +		 */
> +		RTE_LOG(DEBUG, EAL, "A lcore notifier refused new lcore %u.\n",
> +			lcore_id);
> +
> +		notifier = TAILQ_PREV(notifier, lcore_notifiers_head, next);
> +		while (notifier != NULL) {
> +			notifier->cb(lcore_id,
> +				RTE_LCORE_EVENT_RELEASE_EXTERNAL,
> +				notifier->arg);
> +			notifier = TAILQ_PREV(notifier, lcore_notifiers_head,
> +				next);
> +		}
> +		ret = -1;
> +		break;
> +	}
> +	rte_spinlock_unlock(&lcore_notifiers_lock);
> +
> +	return ret;
> +}
> +
> +void
> +eal_lcore_external_notify_removed(unsigned int lcore_id)
> +{
> +	struct lcore_notifier *notifier;
> +
> +	RTE_LOG(DEBUG, EAL, "Released lcore %u.\n", lcore_id);
> +	rte_spinlock_lock(&lcore_notifiers_lock);
> +	TAILQ_FOREACH_REVERSE(notifier, &lcore_notifiers, lcore_notifiers_head,
> +			next) {
> +		notifier->cb(lcore_id, RTE_LCORE_EVENT_RELEASE_EXTERNAL,
> +			notifier->arg);
> +	}
> +	rte_spinlock_unlock(&lcore_notifiers_lock);
> +}
> diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
> index a81b192ff3..f66d1ccaef 100644
> --- a/lib/librte_eal/common/eal_common_thread.c
> +++ b/lib/librte_eal/common/eal_common_thread.c
> @@ -285,6 +285,12 @@ rte_thread_register(void)
>  
>  	rte_thread_init(lcore_id, &cpuset);
>  
> +	if (lcore_id != LCORE_ID_ANY &&
> +			eal_lcore_external_notify_allocated(lcore_id) < 0) {
> +		eal_lcore_external_release(lcore_id);
> +		RTE_PER_LCORE(_lcore_id) = lcore_id = LCORE_ID_ANY;
> +	}
> +
>  	RTE_LOG(DEBUG, EAL, "Registered thread as lcore %u.\n", lcore_id);
>  	RTE_PER_LCORE(thread_registered) = true;
>  }
> @@ -298,8 +304,11 @@ rte_thread_unregister(void)
>  		return;
>  
>  	lcore_id = RTE_PER_LCORE(_lcore_id);
> -	if (lcore_id != LCORE_ID_ANY)
> +	if (lcore_id != LCORE_ID_ANY) {
> +		eal_lcore_external_notify_removed(lcore_id);
>  		eal_lcore_external_release(lcore_id);
> +		RTE_PER_LCORE(_lcore_id) = LCORE_ID_ANY;
> +	}
>  
>  	rte_thread_uninit();
>  
> diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
> index 8dd850f68a..649697c368 100644
> --- a/lib/librte_eal/common/eal_private.h
> +++ b/lib/librte_eal/common/eal_private.h
> @@ -283,6 +283,21 @@ uint64_t get_tsc_freq_arch(void);
>   */
>  unsigned int eal_lcore_external_reserve(void);
>  
> +/**
> + * Evaluate all lcore notifiers with a RTE_LCORE_EVENT_NEW_EXTERNAL event for
> + * the passed lcore.
> + * If an error is returned by one of them, then this change is rolled back:
> + * all previous lcore notifiers that had acked the RTE_LCORE_EVENT_NEW_EXTERNAL
> + * event receive a RTE_LCORE_EVENT_RELEASE_EXTERNAL event for the passed lcore.
> + *
> + * @param lcore_id
> + *   The lcore to consider.
> + * @return
> + *   - 0 if all notifiers agreed on the new lcore
> + *   - -1 if one of them refused
> + */
> +int eal_lcore_external_notify_allocated(unsigned int lcore_id);
> +
>  /**
>   * Release an external lcore.
>   *
> @@ -291,6 +306,17 @@ unsigned int eal_lcore_external_reserve(void);
>   */
>  void eal_lcore_external_release(unsigned int lcore_id);
>  
> +/**
> + * Evaluate all lcore notifiers with a RTE_LCORE_EVENT_RELEASE_EXTERNAL event
> + * for the passed lcore.
> + * This function must be called with a lcore that successfully passed
> + * eal_lcore_external_notify_allocated().
> + *
> + * @param lcore_id
> + *   The lcore with role ROLE_EXTERNAL to release.
> + */
> +void eal_lcore_external_notify_removed(unsigned int lcore_id);
> +
>  /**
>   * Prepare physical memory mapping
>   * i.e. hugepages on Linux and
> diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
> index 9cf34efef4..e0fec33d5a 100644
> --- a/lib/librte_eal/include/rte_lcore.h
> +++ b/lib/librte_eal/include/rte_lcore.h
> @@ -238,6 +238,55 @@ __rte_experimental
>  void
>  rte_lcore_dump(FILE *f);
>  
> +enum rte_lcore_event_type {
> +	RTE_LCORE_EVENT_NEW_EXTERNAL,
> +	RTE_LCORE_EVENT_RELEASE_EXTERNAL,
> +};
> +
> +/**
> + * Callback prototype for getting lcore events.
> + *
> + * @param lcore_id
> + *   The lcore to consider for this event.
> + * @param event
> + *   The type of event on the lcore.
> + * @param arg
> + *   An opaque pointer passed at notifier registration.
> + * @return
> + *   - -1 when refusing this event,
> + *   - 0 otherwise.
> + */
> +typedef int (*rte_lcore_notifier_cb)(unsigned int lcore_id,
> +	enum rte_lcore_event_type event, void *arg);
> +
> +/**
> + * Register a lcore notifier.
> + *
> + * @param cb
> + *   The callback invoked for each lcore event with the arg argument.
> + *   See rte_lcore_notifier_cb description.
> + * @param arg
> + *   An optional argument that gets passed to the callback when it gets
> + *   invoked.
> + * @return
> + *   On success, returns an opaque pointer for the created notifier.
> + *   NULL on failure.
> + */
> +__rte_experimental
> +void *
> +rte_lcore_notifier_register(rte_lcore_notifier_cb cb, void *arg);
> +
> +/**
> + * Unregister a lcore notifier.
> + *
> + * @param handle
> + *   The handle pointer returned by a former successful call to
> + *   rte_lcore_notifier_register.
> + */
> +__rte_experimental
> +void
> +rte_lcore_notifier_unregister(void *handle);
> +
>  /**
>   * Set core affinity of the current thread.
>   * Support both EAL and non-EAL thread and update TLS.
> diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
> index 6754d52543..1e6f2aaacc 100644
> --- a/lib/librte_eal/rte_eal_version.map
> +++ b/lib/librte_eal/rte_eal_version.map
> @@ -396,6 +396,8 @@ EXPERIMENTAL {
>  
>  	# added in 20.08
>  	rte_lcore_dump;
> +	rte_lcore_notifier_register;
> +	rte_lcore_notifier_unregister;
>  	rte_thread_register;
>  	rte_thread_unregister;
>  };


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH 6/7] eal: dump lcores
  2020-06-10 14:45 ` [dpdk-dev] [PATCH 6/7] eal: dump lcores David Marchand
@ 2020-06-15  6:40   ` Kinsella, Ray
  0 siblings, 0 replies; 126+ messages in thread
From: Kinsella, Ray @ 2020-06-15  6:40 UTC (permalink / raw)
  To: David Marchand, dev; +Cc: Bruce Richardson, Neil Horman


On 10/06/2020 15:45, David Marchand wrote:
> Add a little helper to dump all lcores.
>
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---
>  lib/librte_eal/common/eal_common_lcore.c  | 32 +++++++++++++++++++++++
>  lib/librte_eal/common/eal_common_thread.c | 16 ++++++++----
>  lib/librte_eal/common/eal_thread.h        | 13 +++++++--
>  lib/librte_eal/freebsd/eal.c              |  2 +-
>  lib/librte_eal/freebsd/eal_thread.c       |  2 +-
>  lib/librte_eal/include/rte_lcore.h        | 10 +++++++
>  lib/librte_eal/linux/eal.c                |  2 +-
>  lib/librte_eal/linux/eal_thread.c         |  2 +-
>  lib/librte_eal/rte_eal_version.map        |  1 +
>  9 files changed, 69 insertions(+), 11 deletions(-)
>
> diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
> index 35d6c1295e..6aca1b2fee 100644
> --- a/lib/librte_eal/common/eal_common_lcore.c
> +++ b/lib/librte_eal/common/eal_common_lcore.c
> @@ -245,3 +245,35 @@ eal_lcore_external_release(unsigned int lcore_id)
>  	}
>  	rte_spinlock_unlock(&external_lcore_lock);
>  }
> +
> +void
> +rte_lcore_dump(FILE *f)
> +{
> +	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
> +	unsigned int lcore_id;
> +	const char *role;
> +	int ret;
> +
> +	rte_spinlock_lock(&external_lcore_lock);
> +	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> +		switch (rte_eal_lcore_role(lcore_id)) {
> +		case ROLE_RTE:
> +			role = "RTE";
> +			break;
> +		case ROLE_SERVICE:
> +			role = "SERVICE";
> +			break;
> +		case ROLE_EXTERNAL:
> +			role = "EXTERNAL";
> +			break;
> +		default:
> +			continue;
> +		}
> +
> +		ret = eal_thread_dump_affinity(&lcore_config[lcore_id].cpuset,
> +			cpuset, sizeof(cpuset));
> +		fprintf(f, "lcore %u, role %s, cpuset %s%s\n", lcore_id, role,
> +			cpuset, ret == 0 ? "" : "...");

Out of interest - why fprintf instead of rte_log?

> +	}
> +	rte_spinlock_unlock(&external_lcore_lock);
> +}
> diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
> index d57a6ec797..a81b192ff3 100644
> --- a/lib/librte_eal/common/eal_common_thread.c
> +++ b/lib/librte_eal/common/eal_common_thread.c
> @@ -118,17 +118,14 @@ rte_thread_get_affinity(rte_cpuset_t *cpusetp)
>  }
>  
>  int
> -eal_thread_dump_affinity(char *str, unsigned size)
> +eal_thread_dump_affinity(rte_cpuset_t *cpuset, char *str, unsigned int size)
>  {
> -	rte_cpuset_t cpuset;
>  	unsigned cpu;
>  	int ret;
>  	unsigned int out = 0;
>  
> -	rte_thread_get_affinity(&cpuset);
> -
>  	for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
> -		if (!CPU_ISSET(cpu, &cpuset))
> +		if (!CPU_ISSET(cpu, cpuset))
>  			continue;
>  
>  		ret = snprintf(str + out,
> @@ -151,6 +148,15 @@ eal_thread_dump_affinity(char *str, unsigned size)
>  	return ret;
>  }
>  
> +int
> +eal_thread_dump_current_affinity(char *str, unsigned int size)
> +{
> +	rte_cpuset_t cpuset;
> +
> +	rte_thread_get_affinity(&cpuset);
> +	return eal_thread_dump_affinity(&cpuset, str, size);
> +}
> +
>  void
>  rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset)
>  {
> diff --git a/lib/librte_eal/common/eal_thread.h b/lib/librte_eal/common/eal_thread.h
> index 4ecd8fd53a..13ec252e01 100644
> --- a/lib/librte_eal/common/eal_thread.h
> +++ b/lib/librte_eal/common/eal_thread.h
> @@ -47,13 +47,15 @@ unsigned eal_cpu_socket_id(unsigned cpu_id);
>  #define RTE_CPU_AFFINITY_STR_LEN            256
>  
>  /**
> - * Dump the current pthread cpuset.
> + * Dump the cpuset as a human readable string.
>   * This function is private to EAL.
>   *
>   * Note:
>   *   If the dump size is greater than the size of given buffer,
>   *   the string will be truncated and with '\0' at the end.
>   *
> + * @param cpuset
> + *   The CPU affinity object to dump.
>   * @param str
>   *   The string buffer the cpuset will dump to.
>   * @param size
> @@ -62,6 +64,13 @@ unsigned eal_cpu_socket_id(unsigned cpu_id);
>   *   0 for success, -1 if truncation happens.
>   */
>  int
> -eal_thread_dump_affinity(char *str, unsigned size);
> +eal_thread_dump_affinity(rte_cpuset_t *cpuset, char *str, unsigned int size);
> +
> +/**
> + * Dump the current thread cpuset.
> + * This is a wrapper on eal_thread_dump_affinity().
> + */
> +int
> +eal_thread_dump_current_affinity(char *str, unsigned int size);
>  
>  #endif /* EAL_THREAD_H */
> diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
> index b5ea11df16..69a6f7d8c4 100644
> --- a/lib/librte_eal/freebsd/eal.c
> +++ b/lib/librte_eal/freebsd/eal.c
> @@ -886,7 +886,7 @@ rte_eal_init(int argc, char **argv)
>  	rte_thread_init(rte_config.master_lcore,
>  		&lcore_config[rte_config.master_lcore].cpuset);
>  
> -	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
> +	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
>  
>  	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
>  		rte_config.master_lcore, thread_id, cpuset,
> diff --git a/lib/librte_eal/freebsd/eal_thread.c b/lib/librte_eal/freebsd/eal_thread.c
> index c1fb8eb2d8..b1a3619f51 100644
> --- a/lib/librte_eal/freebsd/eal_thread.c
> +++ b/lib/librte_eal/freebsd/eal_thread.c
> @@ -92,7 +92,7 @@ eal_thread_loop(__rte_unused void *arg)
>  
>  	rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
>  
> -	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
> +	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
>  	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
>  		lcore_id, thread_id, cpuset, ret == 0 ? "" : "...");
>  
> diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
> index d59c4edc67..9cf34efef4 100644
> --- a/lib/librte_eal/include/rte_lcore.h
> +++ b/lib/librte_eal/include/rte_lcore.h
> @@ -228,6 +228,16 @@ unsigned int rte_get_next_lcore(unsigned int i, int skip_master, int wrap);
>  	     i<RTE_MAX_LCORE;						\
>  	     i = rte_get_next_lcore(i, 1, 0))
>  
> +/**
> + * List all lcores.
> + *
> + * @param f
> + *   The output stream where the dump should be sent.
> + */
> +__rte_experimental
> +void
> +rte_lcore_dump(FILE *f);
> +
>  /**
>   * Set core affinity of the current thread.
>   * Support both EAL and non-EAL thread and update TLS.
> diff --git a/lib/librte_eal/linux/eal.c b/lib/librte_eal/linux/eal.c
> index 8638376b8a..2f0efd7cd3 100644
> --- a/lib/librte_eal/linux/eal.c
> +++ b/lib/librte_eal/linux/eal.c
> @@ -1214,7 +1214,7 @@ rte_eal_init(int argc, char **argv)
>  	rte_thread_init(rte_config.master_lcore,
>  		&lcore_config[rte_config.master_lcore].cpuset);
>  
> -	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
> +	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
>  	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
>  		rte_config.master_lcore, (uintptr_t)thread_id, cpuset,
>  		ret == 0 ? "" : "...");
> diff --git a/lib/librte_eal/linux/eal_thread.c b/lib/librte_eal/linux/eal_thread.c
> index 07aec0c44d..22d9bc8c01 100644
> --- a/lib/librte_eal/linux/eal_thread.c
> +++ b/lib/librte_eal/linux/eal_thread.c
> @@ -92,7 +92,7 @@ eal_thread_loop(__rte_unused void *arg)
>  
>  	rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
>  
> -	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
> +	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
>  	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
>  		lcore_id, (uintptr_t)thread_id, cpuset, ret == 0 ? "" : "...");
>  
> diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
> index f5b68d2815..6754d52543 100644
> --- a/lib/librte_eal/rte_eal_version.map
> +++ b/lib/librte_eal/rte_eal_version.map
> @@ -395,6 +395,7 @@ EXPERIMENTAL {
>  	rte_trace_save;
>  
>  	# added in 20.08
> +	rte_lcore_dump;
>  	rte_thread_register;
>  	rte_thread_unregister;
>  };

^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH 5/7] eal: register non-EAL threads as lcore
  2020-06-10 14:45 ` [dpdk-dev] [PATCH 5/7] eal: register non-EAL threads as lcore David Marchand
@ 2020-06-15  6:43   ` Kinsella, Ray
  0 siblings, 0 replies; 126+ messages in thread
From: Kinsella, Ray @ 2020-06-15  6:43 UTC (permalink / raw)
  To: David Marchand, dev; +Cc: John McNamara, Marko Kovacevic, Neil Horman


On 10/06/2020 15:45, David Marchand wrote:
> DPDK allows calling some part of its API from a non-EAL thread but this
> has some limitations.
> OVS (and other applications) has its own thread management but still
> want to avoid such limitations by hacking RTE_PER_LCORE(_lcore_id) and
> faking EAL threads potentially unknown of some DPDK component.
>
> Introduce a new API to register non-EAL thread and associate them to a
> free lcore with a new EXTERNAL role.
> This role denotes lcores that do not run DPDK mainloop and as such
> prevents use of rte_eal_wait_lcore() and consorts.
>
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---
>  doc/guides/howto/debug_troubleshoot.rst   |  5 ++-
>  lib/librte_eal/common/eal_common_lcore.c  | 44 +++++++++++++++++--
>  lib/librte_eal/common/eal_common_thread.c | 45 ++++++++++++++++++++
>  lib/librte_eal/common/eal_private.h       | 17 ++++++++
>  lib/librte_eal/include/rte_eal.h          |  9 ----
>  lib/librte_eal/include/rte_lcore.h        | 52 +++++++++++++++++------
>  lib/librte_eal/rte_eal_version.map        |  4 ++
>  7 files changed, 147 insertions(+), 29 deletions(-)
>
> diff --git a/doc/guides/howto/debug_troubleshoot.rst b/doc/guides/howto/debug_troubleshoot.rst
> index cef016b2fe..056f556a73 100644
> --- a/doc/guides/howto/debug_troubleshoot.rst
> +++ b/doc/guides/howto/debug_troubleshoot.rst
> @@ -307,8 +307,9 @@ Custom worker function :numref:`dtg_distributor_worker`.
>  
>  #. Configuration issue isolation
>  
> -   * Identify core role using ``rte_eal_lcore_role`` to identify RTE, OFF and
> -     SERVICE. Check performance functions are mapped to run on the cores.
> +   * Identify core role using ``rte_eal_lcore_role`` to identify RTE, OFF,
> +     SERVICE and EXTERNAL. Check performance functions are mapped to run on the
> +     cores.
>  
>     * For high-performance execution logic ensure running it on correct NUMA
>       and non-master core.
> diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
> index 5404922a87..35d6c1295e 100644
> --- a/lib/librte_eal/common/eal_common_lcore.c
> +++ b/lib/librte_eal/common/eal_common_lcore.c
> @@ -6,13 +6,15 @@
>  #include <limits.h>
>  #include <string.h>
>  
> -#include <rte_errno.h>
> -#include <rte_log.h>
> -#include <rte_eal.h>
> -#include <rte_lcore.h>
>  #include <rte_common.h>
>  #include <rte_debug.h>
> +#include <rte_eal.h>
> +#include <rte_errno.h>
> +#include <rte_lcore.h>
> +#include <rte_log.h>
> +#include <rte_spinlock.h>
>  
> +#include "eal_internal_cfg.h"
>  #include "eal_private.h"
>  #include "eal_thread.h"
>  
> @@ -209,3 +211,37 @@ rte_socket_id_by_idx(unsigned int idx)
>  	}
>  	return config->numa_nodes[idx];
>  }
> +
> +rte_spinlock_t external_lcore_lock = RTE_SPINLOCK_INITIALIZER;
> +
> +unsigned int
> +eal_lcore_external_reserve(void)
> +{
> +	struct rte_config *cfg = rte_eal_get_configuration();
> +	unsigned int lcore_id;
> +
> +	rte_spinlock_lock(&external_lcore_lock);
> +	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> +		if (rte_eal_lcore_role(lcore_id) != ROLE_OFF)
> +			continue;
> +		cfg->lcore_role[lcore_id] = ROLE_EXTERNAL;
> +		cfg->lcore_count++;
> +		break;
> +	}
> +	rte_spinlock_unlock(&external_lcore_lock);
> +
> +	return lcore_id;
> +}
> +
> +void
> +eal_lcore_external_release(unsigned int lcore_id)
> +{
> +	struct rte_config *cfg = rte_eal_get_configuration();
> +
> +	rte_spinlock_lock(&external_lcore_lock);
> +	if (rte_eal_lcore_role(lcore_id) == ROLE_EXTERNAL) {
> +		cfg->lcore_role[lcore_id] = ROLE_OFF;
> +		cfg->lcore_count--;
> +	}
> +	rte_spinlock_unlock(&external_lcore_lock);
> +}
> diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
> index 8a973ca8ac..d57a6ec797 100644
> --- a/lib/librte_eal/common/eal_common_thread.c
> +++ b/lib/librte_eal/common/eal_common_thread.c
> @@ -29,6 +29,7 @@ RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
>  static RTE_DEFINE_PER_LCORE(unsigned int, _socket_id) =
>  	(unsigned int)SOCKET_ID_ANY;
>  static RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
> +static RTE_DEFINE_PER_LCORE(bool, thread_registered);
>  
>  unsigned rte_socket_id(void)
>  {
> @@ -255,3 +256,47 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
>  	pthread_join(*thread, NULL);
>  	return -ret;
>  }
> +
> +void
> +rte_thread_register(void)
> +{
> +	unsigned int lcore_id;
> +	rte_cpuset_t cpuset;
> +
> +	/* EAL init flushes all lcores, we can't register before. */
> +	assert(internal_config.init_complete == 1);
> +
> +	if (RTE_PER_LCORE(thread_registered))
> +		return;
> +
> +	if (pthread_getaffinity_np(pthread_self(), sizeof(cpuset),
> +			&cpuset) != 0)
> +		CPU_ZERO(&cpuset);
> +
> +	lcore_id = eal_lcore_external_reserve();
> +	if (lcore_id >= RTE_MAX_LCORE)
> +		lcore_id = LCORE_ID_ANY;
> +
> +	rte_thread_init(lcore_id, &cpuset);
> +
> +	RTE_LOG(DEBUG, EAL, "Registered thread as lcore %u.\n", lcore_id);
> +	RTE_PER_LCORE(thread_registered) = true;
> +}
> +
> +void
> +rte_thread_unregister(void)
> +{
> +	unsigned int lcore_id;
> +
> +	if (!RTE_PER_LCORE(thread_registered))
> +		return;
> +
> +	lcore_id = RTE_PER_LCORE(_lcore_id);
> +	if (lcore_id != LCORE_ID_ANY)
> +		eal_lcore_external_release(lcore_id);
> +
> +	rte_thread_uninit();
> +
> +	RTE_LOG(DEBUG, EAL, "Unregistered thread (was lcore %u).\n", lcore_id);
> +	RTE_PER_LCORE(thread_registered) = false;
> +}
> diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
> index 869ce183ad..8dd850f68a 100644
> --- a/lib/librte_eal/common/eal_private.h
> +++ b/lib/librte_eal/common/eal_private.h
> @@ -274,6 +274,23 @@ uint64_t get_tsc_freq(void);
>   */
>  uint64_t get_tsc_freq_arch(void);
>  
> +/**
> + * Ask for a free lcore to associate to a non-EAL thread.
> + *
> + * @return
> + *   The id of a lcore with role ROLE_EXTERNAL or RTE_MAX_LCORE if none was
> + *   available.
> + */
> +unsigned int eal_lcore_external_reserve(void);
> +
> +/**
> + * Release an external lcore.
> + *
> + * @param lcore_id
> + *   The lcore with role ROLE_EXTERNAL to release.
> + */
> +void eal_lcore_external_release(unsigned int lcore_id);
> +
>  /**
>   * Prepare physical memory mapping
>   * i.e. hugepages on Linux and
> diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
> index 2edf8c6556..0913d1947c 100644
> --- a/lib/librte_eal/include/rte_eal.h
> +++ b/lib/librte_eal/include/rte_eal.h
> @@ -31,15 +31,6 @@ extern "C" {
>  /* Maximum thread_name length. */
>  #define RTE_MAX_THREAD_NAME_LEN 16
>  
> -/**
> - * The lcore role (used in RTE or not).
> - */
> -enum rte_lcore_role_t {
> -	ROLE_RTE,
> -	ROLE_OFF,
> -	ROLE_SERVICE,
> -};
> -
>  /**
>   * The type of process in a linux, multi-process setup
>   */
> diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
> index 5c1d1926e9..d59c4edc67 100644
> --- a/lib/librte_eal/include/rte_lcore.h
> +++ b/lib/librte_eal/include/rte_lcore.h
> @@ -24,6 +24,16 @@ extern "C" {
>  
>  RTE_DECLARE_PER_LCORE(unsigned, _lcore_id);  /**< Per thread "lcore id". */
>  
> +/**
> + * The lcore role (used in RTE or not).
> + */
> +enum rte_lcore_role_t {
> +	ROLE_RTE,
> +	ROLE_OFF,
> +	ROLE_SERVICE,
> +	ROLE_EXTERNAL,
> +};
> +
>  /**
>   * Get a lcore's role.
>   *
> @@ -34,6 +44,20 @@ RTE_DECLARE_PER_LCORE(unsigned, _lcore_id);  /**< Per thread "lcore id". */
>   */
>  enum rte_lcore_role_t rte_eal_lcore_role(unsigned int lcore_id);
>  
> +/**
> + * Test if the core supplied has a specific role
> + *
> + * @param lcore_id
> + *   The identifier of the lcore, which MUST be between 0 and
> + *   RTE_MAX_LCORE-1.
> + * @param role
> + *   The role to be checked against.
> + * @return
> + *   Boolean value: positive if test is true; otherwise returns 0.
> + */
> +int
> +rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
> +
>  /**
>   * Return the Application thread ID of the execution unit.
>   *
> @@ -256,6 +280,20 @@ int rte_thread_setname(pthread_t id, const char *name);
>  __rte_experimental
>  int rte_thread_getname(pthread_t id, char *name, size_t len);
>  
> +/**
> + * Register current non-EAL thread as a lcore.
> + */
> +__rte_experimental
> +void
> +rte_thread_register(void);
> +
> +/**
> + * Unregister current thread and release lcore if one was associated.
> + */
> +__rte_experimental
> +void
> +rte_thread_unregister(void);
> +
>  /**
>   * Create a control thread.
>   *
> @@ -283,20 +321,6 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
>  		const pthread_attr_t *attr,
>  		void *(*start_routine)(void *), void *arg);
>  
> -/**
> - * Test if the core supplied has a specific role
> - *
> - * @param lcore_id
> - *   The identifier of the lcore, which MUST be between 0 and
> - *   RTE_MAX_LCORE-1.
> - * @param role
> - *   The role to be checked against.
> - * @return
> - *   Boolean value: positive if test is true; otherwise returns 0.
> - */
> -int
> -rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
> -
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
> index fdfc3f1a88..f5b68d2815 100644
> --- a/lib/librte_eal/rte_eal_version.map
> +++ b/lib/librte_eal/rte_eal_version.map
> @@ -393,4 +393,8 @@ EXPERIMENTAL {
>  	rte_trace_point_lookup;
>  	rte_trace_regexp;
>  	rte_trace_save;
> +
> +	# added in 20.08
> +	rte_thread_register;
> +	rte_thread_unregister;
>  };
Acked-by: Ray Kinsella <mdr@ashroe.eu>

^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH 2/7] eal: fix multiple definition of per lcore thread id
  2020-06-10 14:45 ` [dpdk-dev] [PATCH 2/7] eal: fix multiple definition of per lcore thread id David Marchand
@ 2020-06-15  6:46   ` Kinsella, Ray
  0 siblings, 0 replies; 126+ messages in thread
From: Kinsella, Ray @ 2020-06-15  6:46 UTC (permalink / raw)
  To: David Marchand, dev
  Cc: Neil Horman, Cunming Liang, Konstantin Ananyev, Olivier Matz


On 10/06/2020 15:45, David Marchand wrote:
> Because of the inline accessor + static declaration in rte_gettid(),
> we end up with multiple symbols for RTE_PER_LCORE(_thread_id).
> Each compilation unit will pay a cost when accessing this information
> for the first time.
>
> $ nm build/app/dpdk-testpmd | grep per_lcore__thread_id
> 0000000000000054 d per_lcore__thread_id.5037
> 0000000000000040 d per_lcore__thread_id.5103
> 0000000000000048 d per_lcore__thread_id.5259
> 000000000000004c d per_lcore__thread_id.5259
> 0000000000000044 d per_lcore__thread_id.5933
> 0000000000000058 d per_lcore__thread_id.6261
> 0000000000000050 d per_lcore__thread_id.7378
> 000000000000005c d per_lcore__thread_id.7496
> 000000000000000c d per_lcore__thread_id.8016
> 0000000000000010 d per_lcore__thread_id.8431
>
> Make it global as part of the DPDK_21 stable ABI.
>
> Fixes: ef76436c6834 ("eal: get unique thread id")
>
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---
>  lib/librte_eal/common/eal_common_thread.c | 1 +
>  lib/librte_eal/include/rte_eal.h          | 3 ++-
>  lib/librte_eal/rte_eal_version.map        | 7 +++++++
>  3 files changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
> index 25200e5a99..f04d880880 100644
> --- a/lib/librte_eal/common/eal_common_thread.c
> +++ b/lib/librte_eal/common/eal_common_thread.c
> @@ -24,6 +24,7 @@
>  #include "eal_thread.h"
>  
>  RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
> +RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
>  static RTE_DEFINE_PER_LCORE(unsigned int, _socket_id) =
>  	(unsigned int)SOCKET_ID_ANY;
>  static RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
> diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
> index 2f9ed298de..2edf8c6556 100644
> --- a/lib/librte_eal/include/rte_eal.h
> +++ b/lib/librte_eal/include/rte_eal.h
> @@ -447,6 +447,8 @@ enum rte_intr_mode rte_eal_vfio_intr_mode(void);
>   */
>  int rte_sys_gettid(void);
>  
> +RTE_DECLARE_PER_LCORE(int, _thread_id);
> +
>  /**
>   * Get system unique thread id.
>   *
> @@ -456,7 +458,6 @@ int rte_sys_gettid(void);
>   */
>  static inline int rte_gettid(void)
>  {
> -	static RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
>  	if (RTE_PER_LCORE(_thread_id) == -1)
>  		RTE_PER_LCORE(_thread_id) = rte_sys_gettid();
>  	return RTE_PER_LCORE(_thread_id);
> diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
> index d8038749a4..fdfc3f1a88 100644
> --- a/lib/librte_eal/rte_eal_version.map
> +++ b/lib/librte_eal/rte_eal_version.map
> @@ -221,6 +221,13 @@ DPDK_20.0 {
>  	local: *;
>  };
>  
> +DPDK_21 {
> +	global:
> +
> +	per_lcore__thread_id;
> +
> +} DPDK_20.0;
> +
>  EXPERIMENTAL {
>  	global: 

Acked-by: Ray Kinsella <mdr@ashroe.eu>


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH 0/7] Register external threads as lcore
  2020-06-10 15:33     ` Jerin Jacob
@ 2020-06-15  7:11       ` David Marchand
  0 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-06-15  7:11 UTC (permalink / raw)
  To: Jerin Jacob; +Cc: dpdk-dev

On Wed, Jun 10, 2020 at 5:33 PM Jerin Jacob <jerinjacobk@gmail.com> wrote:
>
> On Wed, Jun 10, 2020 at 8:48 PM David Marchand
> <david.marchand@redhat.com> wrote:
> >
> > On Wed, Jun 10, 2020 at 5:09 PM Jerin Jacob <jerinjacobk@gmail.com> wrote:
> > >
> > > On Wed, Jun 10, 2020 at 8:15 PM David Marchand
> > > <david.marchand@redhat.com> wrote:
> > > >
> > > > OVS and some other applications have been hacking into DPDK internals to
> > > > fake EAL threads and avoid performance penalty of only having non-EAL
> > > > threads.
> > > >
> > > > This series proposes to add a new type of lcores and maps those external
> > > > threads to such lcores.
> > > > Those threads won't run the DPDK eal mainloop and as a consequence part of
> > > > the EAL threads API cannot work.
> > > >
> > > > Having new lcores appearing during the process lifetime is not expected
> > > > by some DPDK components. This is addressed by notifying of such lcore
> > > > hotplug.
> > > >
> > > > This patchset has still some more work (like refusing new lcore type in
> > > > incompatible EAL threads API, updating the documentation and adding unit
> > > > tests) but I am sending it anyway as I would like to get this in for
> > > > 20.08.
> > >
> > > Cool feature.
> > >
> > > Is mempool's lcore local cache working for external cores with this scheme?
> >
> > Yes, as it is stateless, all we need is a unique lcore_id in [0,
> > RTE_MAX_LCORE-1] range.
>
> Was it the case when lcore registered and then mempool created? What
> about other case, mempool created and then lcore registered?

- All caches are initialised for all possible lcores for each mempool.
https://git.dpdk.org/dpdk/tree/lib/librte_mempool/rte_mempool.c#n965
So any order is fine wrt the local mempool cache.

- If the mempool drivers want to initialise per lcore data on demand
[1], the driver have to register a lcore notifier per mempool.
1: https://git.dpdk.org/dpdk/tree/drivers/mempool/bucket/rte_mempool_bucket.c#n437

But this current series implementation does not handle registering in any order.
I will fix this in v2 (and rework the locking which is quite ugly)
hopefully this week.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH 7/7] eal: add lcore hotplug notifications
  2020-06-15  6:34   ` Kinsella, Ray
@ 2020-06-15  7:13     ` David Marchand
  0 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-06-15  7:13 UTC (permalink / raw)
  To: Kinsella, Ray; +Cc: dev, Neil Horman, Richardson, Bruce, Thomas Monjalon

On Mon, Jun 15, 2020 at 8:34 AM Kinsella, Ray <mdr@ashroe.eu> wrote:
>
> From ABI  PoV, you are 100%.
>
> Is the agreed term 'callback'?, not 'notifier' for example rte_dev_event_callback_register, rte_mem_event_callback_register
>
> I did wonder however, if all these cb's would be better handled through a EventDev event notification style approach.

I am reconsidering the term.
callback seems better yes and, actually, there is no need for a lcore
event framework.
Cooking a v2 for this week.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v2 0/9] Register non-EAL threads as lcore
  2020-06-10 14:44 [dpdk-dev] [PATCH 0/7] Register external threads as lcore David Marchand
                   ` (7 preceding siblings ...)
  2020-06-10 15:09 ` [dpdk-dev] [PATCH 0/7] Register external threads as lcore Jerin Jacob
@ 2020-06-19 16:22 ` David Marchand
  2020-06-19 16:22   ` [dpdk-dev] [PATCH v2 1/9] eal: relocate per thread symbols to common David Marchand
                     ` (8 more replies)
  2020-06-22 13:25 ` [dpdk-dev] [PATCH v3 0/9] Register non-EAL threads as lcore David Marchand
                   ` (3 subsequent siblings)
  12 siblings, 9 replies; 126+ messages in thread
From: David Marchand @ 2020-06-19 16:22 UTC (permalink / raw)
  To: dev; +Cc: jerinjacobk, bruce.richardson, mdr, ktraynor, ian.stokes, i.maximets

OVS and some other applications have been hacking into DPDK internals to
fake EAL threads and avoid performance penalty of only having non-EAL
threads.

This series proposes to add a new type of lcores and maps those threads
to such lcores.
non-EAL threads won't run the DPDK eal mainloop.
As a consequence, part of the EAL threads API cannot work.

Having new lcores appearing during the process lifetime is not expected
by some DPDK components. This is addressed by introducing init/uninit
callacks invoked when hotplugging of such lcore.

There is still some work:
- refuse new lcore role in incompatible EAL threads API (or document it
  only as those API were already incompatible?),
- finish unit tests in patch 7,
- think about deprecation notices for existing RTE_FOREACH_LCORE macros
  and consorts, it is probably worth discussing on how to iterate over
  lcores,

For the interested parties, I have a patch [1] against dpdk-latest OVS
branch that makes use of this series.

1: https://github.com/david-marchand/ovs/commit/dpdk-latest-external-lcores

Changes since v1:
- rebased on master (conflicts on merged Windows series),
- separated lcore role code cleanup in a patch,
- tried to use a single naming, so kept non-EAL threads as the main
  notion. non-EAL threads are then distinguished between registered and
  unregistered non-EAL threads,
- added unit tests (still missing some coverage, marked with a FIXME),
- reworked callbacks call under a common rwlock lock which protects
  lcores allocations and callbacks registration,
- introduced lcore iterators and converted the bucket mempool driver,

-- 
David Marchand

David Marchand (9):
  eal: relocate per thread symbols to common
  eal: fix multiple definition of per lcore thread id
  eal: introduce thread init helper
  eal: introduce thread uninit helper
  eal: move lcore role code
  eal: register non-EAL threads as lcores
  eal: add lcore init callbacks
  eal: add lcore iterators
  mempool/bucket: handle non-EAL lcores

 MAINTAINERS                                   |   1 +
 app/test/Makefile                             |   1 +
 app/test/autotest_data.py                     |   6 +
 app/test/meson.build                          |   2 +
 app/test/test_lcores.c                        | 234 +++++++++++++++++
 doc/guides/howto/debug_troubleshoot.rst       |   5 +-
 .../prog_guide/env_abstraction_layer.rst      |  22 +-
 doc/guides/prog_guide/mempool_lib.rst         |   2 +-
 drivers/mempool/bucket/rte_mempool_bucket.c   | 131 ++++++----
 lib/librte_eal/common/eal_common_lcore.c      | 241 +++++++++++++++++-
 lib/librte_eal/common/eal_common_thread.c     | 126 ++++++---
 lib/librte_eal/common/eal_common_trace.c      |  49 +++-
 lib/librte_eal/common/eal_private.h           |  19 ++
 lib/librte_eal/common/eal_thread.h            |  26 +-
 lib/librte_eal/common/eal_trace.h             |   1 +
 lib/librte_eal/freebsd/eal.c                  |  16 +-
 lib/librte_eal/freebsd/eal_thread.c           |  38 +--
 lib/librte_eal/include/rte_eal.h              |  12 +-
 lib/librte_eal/include/rte_lcore.h            | 167 ++++++++++--
 lib/librte_eal/linux/eal.c                    |  17 +-
 lib/librte_eal/linux/eal_thread.c             |  38 +--
 lib/librte_eal/rte_eal_version.map            |  15 ++
 lib/librte_eal/windows/eal.c                  |   3 +-
 lib/librte_eal/windows/eal_thread.c           |  14 +-
 lib/librte_mempool/rte_mempool.h              |  11 +-
 25 files changed, 960 insertions(+), 237 deletions(-)
 create mode 100644 app/test/test_lcores.c

-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v2 1/9] eal: relocate per thread symbols to common
  2020-06-19 16:22 ` [dpdk-dev] [PATCH v2 0/9] Register non-EAL " David Marchand
@ 2020-06-19 16:22   ` David Marchand
  2020-06-19 16:22   ` [dpdk-dev] [PATCH v2 2/9] eal: fix multiple definition of per lcore thread id David Marchand
                     ` (7 subsequent siblings)
  8 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-06-19 16:22 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, ktraynor, ian.stokes,
	i.maximets, Harini Ramakrishnan, Omar Cardona, Pallavi Kadam,
	Ranjit Menon

We have per lcore thread symbols scattered in OS implementations but
common code relies on them.
Move all of them in common.

RTE_PER_LCORE(_socket_id) and RTE_PER_LCORE(_cpuset) have public
accessors and are not exported through the library map, they can be
made static.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 lib/librte_eal/common/eal_common_thread.c | 5 ++++-
 lib/librte_eal/freebsd/eal_thread.c       | 4 ----
 lib/librte_eal/include/rte_lcore.h        | 1 -
 lib/librte_eal/linux/eal_thread.c         | 4 ----
 lib/librte_eal/windows/eal_thread.c       | 4 ----
 5 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 370bb1b634..a5f67d811c 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -21,7 +21,10 @@
 #include "eal_private.h"
 #include "eal_thread.h"
 
-RTE_DECLARE_PER_LCORE(unsigned , _socket_id);
+RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
+static RTE_DEFINE_PER_LCORE(unsigned int, _socket_id) =
+	(unsigned int)SOCKET_ID_ANY;
+static RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
 
 unsigned rte_socket_id(void)
 {
diff --git a/lib/librte_eal/freebsd/eal_thread.c b/lib/librte_eal/freebsd/eal_thread.c
index b52019782a..40676d9ef5 100644
--- a/lib/librte_eal/freebsd/eal_thread.c
+++ b/lib/librte_eal/freebsd/eal_thread.c
@@ -25,10 +25,6 @@
 #include "eal_private.h"
 #include "eal_thread.h"
 
-RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
-RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY;
-RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
-
 /*
  * Send a message to a slave lcore identified by slave_id to call a
  * function f with argument arg. Once the execution is done, the
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index 339046bc86..5c1d1926e9 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -23,7 +23,6 @@ extern "C" {
 #define LCORE_ID_ANY     UINT32_MAX       /**< Any lcore. */
 
 RTE_DECLARE_PER_LCORE(unsigned, _lcore_id);  /**< Per thread "lcore id". */
-RTE_DECLARE_PER_LCORE(rte_cpuset_t, _cpuset); /**< Per thread "cpuset". */
 
 /**
  * Get a lcore's role.
diff --git a/lib/librte_eal/linux/eal_thread.c b/lib/librte_eal/linux/eal_thread.c
index cd9d6e0ebf..a52ebef3a4 100644
--- a/lib/librte_eal/linux/eal_thread.c
+++ b/lib/librte_eal/linux/eal_thread.c
@@ -25,10 +25,6 @@
 #include "eal_private.h"
 #include "eal_thread.h"
 
-RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
-RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY;
-RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
-
 /*
  * Send a message to a slave lcore identified by slave_id to call a
  * function f with argument arg. Once the execution is done, the
diff --git a/lib/librte_eal/windows/eal_thread.c b/lib/librte_eal/windows/eal_thread.c
index 3dd56519c9..f12a2ec6ad 100644
--- a/lib/librte_eal/windows/eal_thread.c
+++ b/lib/librte_eal/windows/eal_thread.c
@@ -16,10 +16,6 @@
 #include "eal_private.h"
 #include "eal_windows.h"
 
-RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
-RTE_DEFINE_PER_LCORE(unsigned int, _socket_id) = (unsigned int)SOCKET_ID_ANY;
-RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
-
 /*
  * Send a message to a slave lcore identified by slave_id to call a
  * function f with argument arg. Once the execution is done, the
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v2 2/9] eal: fix multiple definition of per lcore thread id
  2020-06-19 16:22 ` [dpdk-dev] [PATCH v2 0/9] Register non-EAL " David Marchand
  2020-06-19 16:22   ` [dpdk-dev] [PATCH v2 1/9] eal: relocate per thread symbols to common David Marchand
@ 2020-06-19 16:22   ` David Marchand
  2020-06-19 16:22   ` [dpdk-dev] [PATCH v2 3/9] eal: introduce thread init helper David Marchand
                     ` (6 subsequent siblings)
  8 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-06-19 16:22 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, ktraynor, ian.stokes,
	i.maximets, Neil Horman, Cunming Liang, Konstantin Ananyev,
	Olivier Matz

Because of the inline accessor + static declaration in rte_gettid(),
we end up with multiple symbols for RTE_PER_LCORE(_thread_id).
Each compilation unit will pay a cost when accessing this information
for the first time.

$ nm build/app/dpdk-testpmd | grep per_lcore__thread_id
0000000000000054 d per_lcore__thread_id.5037
0000000000000040 d per_lcore__thread_id.5103
0000000000000048 d per_lcore__thread_id.5259
000000000000004c d per_lcore__thread_id.5259
0000000000000044 d per_lcore__thread_id.5933
0000000000000058 d per_lcore__thread_id.6261
0000000000000050 d per_lcore__thread_id.7378
000000000000005c d per_lcore__thread_id.7496
000000000000000c d per_lcore__thread_id.8016
0000000000000010 d per_lcore__thread_id.8431

Make it global as part of the DPDK_21 stable ABI.

Fixes: ef76436c6834 ("eal: get unique thread id")

Signed-off-by: David Marchand <david.marchand@redhat.com>
Acked-by: Ray Kinsella <mdr@ashroe.eu>
---
 lib/librte_eal/common/eal_common_thread.c | 1 +
 lib/librte_eal/include/rte_eal.h          | 3 ++-
 lib/librte_eal/rte_eal_version.map        | 7 +++++++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index a5f67d811c..280c64bb76 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -22,6 +22,7 @@
 #include "eal_thread.h"
 
 RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
+RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
 static RTE_DEFINE_PER_LCORE(unsigned int, _socket_id) =
 	(unsigned int)SOCKET_ID_ANY;
 static RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
index 2f9ed298de..2edf8c6556 100644
--- a/lib/librte_eal/include/rte_eal.h
+++ b/lib/librte_eal/include/rte_eal.h
@@ -447,6 +447,8 @@ enum rte_intr_mode rte_eal_vfio_intr_mode(void);
  */
 int rte_sys_gettid(void);
 
+RTE_DECLARE_PER_LCORE(int, _thread_id);
+
 /**
  * Get system unique thread id.
  *
@@ -456,7 +458,6 @@ int rte_sys_gettid(void);
  */
 static inline int rte_gettid(void)
 {
-	static RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
 	if (RTE_PER_LCORE(_thread_id) == -1)
 		RTE_PER_LCORE(_thread_id) = rte_sys_gettid();
 	return RTE_PER_LCORE(_thread_id);
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 196eef5afa..0d42d44ce9 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -221,6 +221,13 @@ DPDK_20.0 {
 	local: *;
 };
 
+DPDK_21 {
+	global:
+
+	per_lcore__thread_id;
+
+} DPDK_20.0;
+
 EXPERIMENTAL {
 	global:
 
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v2 3/9] eal: introduce thread init helper
  2020-06-19 16:22 ` [dpdk-dev] [PATCH v2 0/9] Register non-EAL " David Marchand
  2020-06-19 16:22   ` [dpdk-dev] [PATCH v2 1/9] eal: relocate per thread symbols to common David Marchand
  2020-06-19 16:22   ` [dpdk-dev] [PATCH v2 2/9] eal: fix multiple definition of per lcore thread id David Marchand
@ 2020-06-19 16:22   ` David Marchand
  2020-06-19 16:22   ` [dpdk-dev] [PATCH v2 4/9] eal: introduce thread uninit helper David Marchand
                     ` (5 subsequent siblings)
  8 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-06-19 16:22 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, ktraynor, ian.stokes,
	i.maximets, Harini Ramakrishnan, Omar Cardona, Pallavi Kadam,
	Ranjit Menon

Introduce a helper responsible for initialising the per thread context.
We can then have a unified context for EAL and non-EAL threads and
remove copy/paste'd OS-specific helpers.

Per EAL thread CPU affinity setting is separated from the thread init.
It is to accommodate with Windows EAL where CPU affinity is not set at
the moment.
Besides, having affinity set by the master lcore in FreeBSD and Linux
will make it possible to detect errors rather than panic in the child
thread. But the cleanup when such an event happens is left for later.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Changes since v1:
- rebased on master, removed Windows workarounds wrt gettid and traces
  support,

---
 lib/librte_eal/common/eal_common_thread.c | 51 +++++++++++++----------
 lib/librte_eal/common/eal_thread.h        |  8 ++--
 lib/librte_eal/freebsd/eal.c              | 14 ++++++-
 lib/librte_eal/freebsd/eal_thread.c       | 32 +-------------
 lib/librte_eal/linux/eal.c                | 15 ++++++-
 lib/librte_eal/linux/eal_thread.c         | 32 +-------------
 lib/librte_eal/windows/eal.c              |  3 +-
 lib/librte_eal/windows/eal_thread.c       | 10 +----
 8 files changed, 66 insertions(+), 99 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 280c64bb76..afb30236c5 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -71,20 +71,10 @@ eal_cpuset_socket_id(rte_cpuset_t *cpusetp)
 	return socket_id;
 }
 
-int
-rte_thread_set_affinity(rte_cpuset_t *cpusetp)
+static void
+thread_update_affinity(rte_cpuset_t *cpusetp)
 {
-	int s;
-	unsigned lcore_id;
-	pthread_t tid;
-
-	tid = pthread_self();
-
-	s = pthread_setaffinity_np(tid, sizeof(rte_cpuset_t), cpusetp);
-	if (s != 0) {
-		RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
-		return -1;
-	}
+	unsigned int lcore_id = rte_lcore_id();
 
 	/* store socket_id in TLS for quick access */
 	RTE_PER_LCORE(_socket_id) =
@@ -94,14 +84,24 @@ rte_thread_set_affinity(rte_cpuset_t *cpusetp)
 	memmove(&RTE_PER_LCORE(_cpuset), cpusetp,
 		sizeof(rte_cpuset_t));
 
-	lcore_id = rte_lcore_id();
 	if (lcore_id != (unsigned)LCORE_ID_ANY) {
 		/* EAL thread will update lcore_config */
 		lcore_config[lcore_id].socket_id = RTE_PER_LCORE(_socket_id);
 		memmove(&lcore_config[lcore_id].cpuset, cpusetp,
 			sizeof(rte_cpuset_t));
 	}
+}
 
+int
+rte_thread_set_affinity(rte_cpuset_t *cpusetp)
+{
+	if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+			cpusetp) != 0) {
+		RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
+		return -1;
+	}
+
+	thread_update_affinity(cpusetp);
 	return 0;
 }
 
@@ -147,6 +147,19 @@ eal_thread_dump_affinity(char *str, unsigned size)
 	return ret;
 }
 
+void
+rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset)
+{
+	/* set the lcore ID in per-lcore memory area */
+	RTE_PER_LCORE(_lcore_id) = lcore_id;
+
+	/* acquire system unique id  */
+	rte_gettid();
+
+	thread_update_affinity(cpuset);
+
+	__rte_trace_mem_per_thread_alloc();
+}
 
 struct rte_thread_ctrl_params {
 	void *(*start_routine)(void *);
@@ -154,16 +167,14 @@ struct rte_thread_ctrl_params {
 	pthread_barrier_t configured;
 };
 
-static void *rte_thread_init(void *arg)
+static void *ctrl_thread_init(void *arg)
 {
 	int ret;
-	rte_cpuset_t *cpuset = &internal_config.ctrl_cpuset;
 	struct rte_thread_ctrl_params *params = arg;
 	void *(*start_routine)(void *) = params->start_routine;
 	void *routine_arg = params->arg;
 
-	/* Store cpuset in TLS for quick access */
-	memmove(&RTE_PER_LCORE(_cpuset), cpuset, sizeof(rte_cpuset_t));
+	rte_thread_init(rte_lcore_id(), &internal_config.ctrl_cpuset);
 
 	ret = pthread_barrier_wait(&params->configured);
 	if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
@@ -171,8 +182,6 @@ static void *rte_thread_init(void *arg)
 		free(params);
 	}
 
-	__rte_trace_mem_per_thread_alloc();
-
 	return start_routine(routine_arg);
 }
 
@@ -194,7 +203,7 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
 
 	pthread_barrier_init(&params->configured, NULL, 2);
 
-	ret = pthread_create(thread, attr, rte_thread_init, (void *)params);
+	ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
 	if (ret != 0) {
 		free(params);
 		return -ret;
diff --git a/lib/librte_eal/common/eal_thread.h b/lib/librte_eal/common/eal_thread.h
index b40ed249ed..da5e7c93ba 100644
--- a/lib/librte_eal/common/eal_thread.h
+++ b/lib/librte_eal/common/eal_thread.h
@@ -16,12 +16,14 @@
 __rte_noreturn void *eal_thread_loop(void *arg);
 
 /**
- * Init per-lcore info for master thread
+ * Init per-lcore info in current thread.
  *
  * @param lcore_id
- *   identifier of master lcore
+ *   identifier of lcore.
+ * @param cpuset
+ *   CPU affinity for this thread.
  */
-void eal_thread_init_master(unsigned lcore_id);
+void rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset);
 
 /**
  * Get the NUMA socket id from cpu id.
diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
index c41f265fac..b5ea11df16 100644
--- a/lib/librte_eal/freebsd/eal.c
+++ b/lib/librte_eal/freebsd/eal.c
@@ -877,7 +877,14 @@ rte_eal_init(int argc, char **argv)
 
 	eal_check_mem_on_local_socket();
 
-	eal_thread_init_master(rte_config.master_lcore);
+	if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+			&lcore_config[rte_config.master_lcore].cpuset) != 0) {
+		rte_eal_init_alert("Cannot set affinity");
+		rte_errno = EINVAL;
+		return -1;
+	}
+	rte_thread_init(rte_config.master_lcore,
+		&lcore_config[rte_config.master_lcore].cpuset);
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
 
@@ -908,6 +915,11 @@ rte_eal_init(int argc, char **argv)
 		snprintf(thread_name, sizeof(thread_name),
 				"lcore-slave-%d", i);
 		rte_thread_setname(lcore_config[i].thread_id, thread_name);
+
+		ret = pthread_setaffinity_np(lcore_config[i].thread_id,
+			sizeof(rte_cpuset_t), &lcore_config[i].cpuset);
+		if (ret != 0)
+			rte_panic("Cannot set affinity\n");
 	}
 
 	/*
diff --git a/lib/librte_eal/freebsd/eal_thread.c b/lib/librte_eal/freebsd/eal_thread.c
index 40676d9ef5..c1fb8eb2d8 100644
--- a/lib/librte_eal/freebsd/eal_thread.c
+++ b/lib/librte_eal/freebsd/eal_thread.c
@@ -66,29 +66,6 @@ rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
 	return rc;
 }
 
-/* set affinity for current thread */
-static int
-eal_thread_set_affinity(void)
-{
-	unsigned lcore_id = rte_lcore_id();
-
-	/* acquire system unique id  */
-	rte_gettid();
-
-	/* update EAL thread core affinity */
-	return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset);
-}
-
-void eal_thread_init_master(unsigned lcore_id)
-{
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
-}
-
 /* main loop of threads */
 __rte_noreturn void *
 eal_thread_loop(__rte_unused void *arg)
@@ -113,19 +90,12 @@ eal_thread_loop(__rte_unused void *arg)
 	m2s = lcore_config[lcore_id].pipe_master2slave[0];
 	s2m = lcore_config[lcore_id].pipe_slave2master[1];
 
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
+	rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
-
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
 		lcore_id, thread_id, cpuset, ret == 0 ? "" : "...");
 
-	__rte_trace_mem_per_thread_alloc();
 	rte_eal_trace_thread_lcore_ready(lcore_id, cpuset);
 
 	/* read on our pipe to get commands */
diff --git a/lib/librte_eal/linux/eal.c b/lib/librte_eal/linux/eal.c
index f162124a37..8638376b8a 100644
--- a/lib/librte_eal/linux/eal.c
+++ b/lib/librte_eal/linux/eal.c
@@ -1205,10 +1205,16 @@ rte_eal_init(int argc, char **argv)
 
 	eal_check_mem_on_local_socket();
 
-	eal_thread_init_master(rte_config.master_lcore);
+	if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+			&lcore_config[rte_config.master_lcore].cpuset) != 0) {
+		rte_eal_init_alert("Cannot set affinity");
+		rte_errno = EINVAL;
+		return -1;
+	}
+	rte_thread_init(rte_config.master_lcore,
+		&lcore_config[rte_config.master_lcore].cpuset);
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
-
 	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
 		rte_config.master_lcore, (uintptr_t)thread_id, cpuset,
 		ret == 0 ? "" : "...");
@@ -1240,6 +1246,11 @@ rte_eal_init(int argc, char **argv)
 		if (ret != 0)
 			RTE_LOG(DEBUG, EAL,
 				"Cannot set name for lcore thread\n");
+
+		ret = pthread_setaffinity_np(lcore_config[i].thread_id,
+			sizeof(rte_cpuset_t), &lcore_config[i].cpuset);
+		if (ret != 0)
+			rte_panic("Cannot set affinity\n");
 	}
 
 	/*
diff --git a/lib/librte_eal/linux/eal_thread.c b/lib/librte_eal/linux/eal_thread.c
index a52ebef3a4..07aec0c44d 100644
--- a/lib/librte_eal/linux/eal_thread.c
+++ b/lib/librte_eal/linux/eal_thread.c
@@ -66,29 +66,6 @@ rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
 	return rc;
 }
 
-/* set affinity for current EAL thread */
-static int
-eal_thread_set_affinity(void)
-{
-	unsigned lcore_id = rte_lcore_id();
-
-	/* acquire system unique id  */
-	rte_gettid();
-
-	/* update EAL thread core affinity */
-	return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset);
-}
-
-void eal_thread_init_master(unsigned lcore_id)
-{
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
-}
-
 /* main loop of threads */
 __rte_noreturn void *
 eal_thread_loop(__rte_unused void *arg)
@@ -113,19 +90,12 @@ eal_thread_loop(__rte_unused void *arg)
 	m2s = lcore_config[lcore_id].pipe_master2slave[0];
 	s2m = lcore_config[lcore_id].pipe_slave2master[1];
 
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
+	rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
-
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
 		lcore_id, (uintptr_t)thread_id, cpuset, ret == 0 ? "" : "...");
 
-	__rte_trace_mem_per_thread_alloc();
 	rte_eal_trace_thread_lcore_ready(lcore_id, cpuset);
 
 	/* read on our pipe to get commands */
diff --git a/lib/librte_eal/windows/eal.c b/lib/librte_eal/windows/eal.c
index 427a5557fa..23de12ab43 100644
--- a/lib/librte_eal/windows/eal.c
+++ b/lib/librte_eal/windows/eal.c
@@ -354,7 +354,8 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
-	eal_thread_init_master(rte_config.master_lcore);
+	rte_thread_init(rte_config.master_lcore,
+		&lcore_config[rte_config.master_lcore].cpuset);
 
 	RTE_LCORE_FOREACH_SLAVE(i) {
 
diff --git a/lib/librte_eal/windows/eal_thread.c b/lib/librte_eal/windows/eal_thread.c
index f12a2ec6ad..4f01881240 100644
--- a/lib/librte_eal/windows/eal_thread.c
+++ b/lib/librte_eal/windows/eal_thread.c
@@ -53,13 +53,6 @@ rte_eal_remote_launch(lcore_function_t *f, void *arg, unsigned int slave_id)
 	return 0;
 }
 
-void
-eal_thread_init_master(unsigned int lcore_id)
-{
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-}
-
 /* main loop of threads */
 void *
 eal_thread_loop(void *arg __rte_unused)
@@ -84,8 +77,7 @@ eal_thread_loop(void *arg __rte_unused)
 	m2s = lcore_config[lcore_id].pipe_master2slave[0];
 	s2m = lcore_config[lcore_id].pipe_slave2master[1];
 
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
+	rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%zx;cpuset=[%s])\n",
 		lcore_id, (uintptr_t)thread_id, cpuset);
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v2 4/9] eal: introduce thread uninit helper
  2020-06-19 16:22 ` [dpdk-dev] [PATCH v2 0/9] Register non-EAL " David Marchand
                     ` (2 preceding siblings ...)
  2020-06-19 16:22   ` [dpdk-dev] [PATCH v2 3/9] eal: introduce thread init helper David Marchand
@ 2020-06-19 16:22   ` David Marchand
  2020-06-19 16:22   ` [dpdk-dev] [PATCH v2 5/9] eal: move lcore role code David Marchand
                     ` (4 subsequent siblings)
  8 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-06-19 16:22 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, ktraynor, ian.stokes,
	i.maximets, Jerin Jacob, Sunil Kumar Kori

This is a preparation step for dynamically unregistering threads.

Since we explicitly allocate a per thread trace buffer in
rte_thread_init, add an internal helper to free this buffer.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Note: I preferred renaming the current internal function to free all
threads trace buffers (new name trace_mem_free()) and reuse the previous
name (trace_mem_per_thread_free()) when freeing this buffer for a given
thread.

Changes since v1:
- rebased on master, removed Windows workaround wrt traces support,

---
 lib/librte_eal/common/eal_common_thread.c |  9 +++++
 lib/librte_eal/common/eal_common_trace.c  | 49 +++++++++++++++++++----
 lib/librte_eal/common/eal_thread.h        |  5 +++
 lib/librte_eal/common/eal_trace.h         |  1 +
 4 files changed, 57 insertions(+), 7 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index afb30236c5..9b9af75dc1 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -20,6 +20,7 @@
 #include "eal_internal_cfg.h"
 #include "eal_private.h"
 #include "eal_thread.h"
+#include "eal_trace.h"
 
 RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
 RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
@@ -161,6 +162,14 @@ rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset)
 	__rte_trace_mem_per_thread_alloc();
 }
 
+void
+rte_thread_uninit(void)
+{
+	trace_mem_per_thread_free();
+
+	RTE_PER_LCORE(_lcore_id) = LCORE_ID_ANY;
+}
+
 struct rte_thread_ctrl_params {
 	void *(*start_routine)(void *);
 	void *arg;
diff --git a/lib/librte_eal/common/eal_common_trace.c b/lib/librte_eal/common/eal_common_trace.c
index 875553d7e5..cd2d217b02 100644
--- a/lib/librte_eal/common/eal_common_trace.c
+++ b/lib/librte_eal/common/eal_common_trace.c
@@ -101,7 +101,7 @@ eal_trace_fini(void)
 {
 	if (!rte_trace_is_enabled())
 		return;
-	trace_mem_per_thread_free();
+	trace_mem_free();
 	trace_metadata_destroy();
 	eal_trace_args_free();
 }
@@ -370,24 +370,59 @@ __rte_trace_mem_per_thread_alloc(void)
 	rte_spinlock_unlock(&trace->lock);
 }
 
+static void
+trace_mem_per_thread_free_unlocked(struct thread_mem_meta *meta)
+{
+	if (meta->area == TRACE_AREA_HUGEPAGE)
+		eal_free_no_trace(meta->mem);
+	else if (meta->area == TRACE_AREA_HEAP)
+		free(meta->mem);
+}
+
 void
 trace_mem_per_thread_free(void)
+{
+	struct trace *trace = trace_obj_get();
+	struct __rte_trace_header *header;
+	uint32_t count;
+
+	if (RTE_PER_LCORE(trace_mem) == NULL)
+		return;
+
+	header = RTE_PER_LCORE(trace_mem);
+	rte_spinlock_lock(&trace->lock);
+	for (count = 0; count < trace->nb_trace_mem_list; count++) {
+		if (trace->lcore_meta[count].mem == header)
+			break;
+	}
+	if (count != trace->nb_trace_mem_list) {
+		struct thread_mem_meta *meta = &trace->lcore_meta[count];
+
+		trace_mem_per_thread_free_unlocked(meta);
+		if (count != trace->nb_trace_mem_list - 1) {
+			memmove(meta, meta + 1,
+				sizeof(*meta) *
+				 (trace->nb_trace_mem_list - count - 1));
+		}
+		trace->nb_trace_mem_list--;
+	}
+	rte_spinlock_unlock(&trace->lock);
+}
+
+void
+trace_mem_free(void)
 {
 	struct trace *trace = trace_obj_get();
 	uint32_t count;
-	void *mem;
 
 	if (!rte_trace_is_enabled())
 		return;
 
 	rte_spinlock_lock(&trace->lock);
 	for (count = 0; count < trace->nb_trace_mem_list; count++) {
-		mem = trace->lcore_meta[count].mem;
-		if (trace->lcore_meta[count].area == TRACE_AREA_HUGEPAGE)
-			eal_free_no_trace(mem);
-		else if (trace->lcore_meta[count].area == TRACE_AREA_HEAP)
-			free(mem);
+		trace_mem_per_thread_free_unlocked(&trace->lcore_meta[count]);
 	}
+	trace->nb_trace_mem_list = 0;
 	rte_spinlock_unlock(&trace->lock);
 }
 
diff --git a/lib/librte_eal/common/eal_thread.h b/lib/librte_eal/common/eal_thread.h
index da5e7c93ba..4ecd8fd53a 100644
--- a/lib/librte_eal/common/eal_thread.h
+++ b/lib/librte_eal/common/eal_thread.h
@@ -25,6 +25,11 @@ __rte_noreturn void *eal_thread_loop(void *arg);
  */
 void rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset);
 
+/**
+ * Uninitialize per-lcore info for current thread.
+ */
+void rte_thread_uninit(void);
+
 /**
  * Get the NUMA socket id from cpu id.
  * This function is private to EAL.
diff --git a/lib/librte_eal/common/eal_trace.h b/lib/librte_eal/common/eal_trace.h
index 8f60616156..92c5951c3a 100644
--- a/lib/librte_eal/common/eal_trace.h
+++ b/lib/librte_eal/common/eal_trace.h
@@ -106,6 +106,7 @@ int trace_metadata_create(void);
 void trace_metadata_destroy(void);
 int trace_mkdir(void);
 int trace_epoch_time_save(void);
+void trace_mem_free(void);
 void trace_mem_per_thread_free(void);
 
 /* EAL interface */
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v2 5/9] eal: move lcore role code
  2020-06-19 16:22 ` [dpdk-dev] [PATCH v2 0/9] Register non-EAL " David Marchand
                     ` (3 preceding siblings ...)
  2020-06-19 16:22   ` [dpdk-dev] [PATCH v2 4/9] eal: introduce thread uninit helper David Marchand
@ 2020-06-19 16:22   ` David Marchand
  2020-06-19 16:22   ` [dpdk-dev] [PATCH v2 6/9] eal: register non-EAL threads as lcores David Marchand
                     ` (3 subsequent siblings)
  8 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-06-19 16:22 UTC (permalink / raw)
  To: dev; +Cc: jerinjacobk, bruce.richardson, mdr, ktraynor, ian.stokes, i.maximets

For consistency sake, move all lcore role code in the dedicated
compilation unit / header.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 lib/librte_eal/common/eal_common_lcore.c  | 11 +++++++
 lib/librte_eal/common/eal_common_thread.c | 11 -------
 lib/librte_eal/include/rte_eal.h          |  9 ------
 lib/librte_eal/include/rte_lcore.h        | 37 ++++++++++++++---------
 4 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
index 5404922a87..86d32a3dd7 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -63,6 +63,17 @@ rte_eal_lcore_role(unsigned int lcore_id)
 	return cfg->lcore_role[lcore_id];
 }
 
+int
+rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+
+	if (lcore_id >= RTE_MAX_LCORE)
+		return -EINVAL;
+
+	return cfg->lcore_role[lcore_id] == role;
+}
+
 int rte_lcore_is_enabled(unsigned int lcore_id)
 {
 	struct rte_config *cfg = rte_eal_get_configuration();
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 9b9af75dc1..4f96c50424 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -33,17 +33,6 @@ unsigned rte_socket_id(void)
 	return RTE_PER_LCORE(_socket_id);
 }
 
-int
-rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role)
-{
-	struct rte_config *cfg = rte_eal_get_configuration();
-
-	if (lcore_id >= RTE_MAX_LCORE)
-		return -EINVAL;
-
-	return cfg->lcore_role[lcore_id] == role;
-}
-
 static int
 eal_cpuset_socket_id(rte_cpuset_t *cpusetp)
 {
diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
index 2edf8c6556..0913d1947c 100644
--- a/lib/librte_eal/include/rte_eal.h
+++ b/lib/librte_eal/include/rte_eal.h
@@ -31,15 +31,6 @@ extern "C" {
 /* Maximum thread_name length. */
 #define RTE_MAX_THREAD_NAME_LEN 16
 
-/**
- * The lcore role (used in RTE or not).
- */
-enum rte_lcore_role_t {
-	ROLE_RTE,
-	ROLE_OFF,
-	ROLE_SERVICE,
-};
-
 /**
  * The type of process in a linux, multi-process setup
  */
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index 5c1d1926e9..3968c40693 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -24,6 +24,15 @@ extern "C" {
 
 RTE_DECLARE_PER_LCORE(unsigned, _lcore_id);  /**< Per thread "lcore id". */
 
+/**
+ * The lcore role (used in RTE or not).
+ */
+enum rte_lcore_role_t {
+	ROLE_RTE,
+	ROLE_OFF,
+	ROLE_SERVICE,
+};
+
 /**
  * Get a lcore's role.
  *
@@ -34,6 +43,20 @@ RTE_DECLARE_PER_LCORE(unsigned, _lcore_id);  /**< Per thread "lcore id". */
  */
 enum rte_lcore_role_t rte_eal_lcore_role(unsigned int lcore_id);
 
+/**
+ * Test if the core supplied has a specific role
+ *
+ * @param lcore_id
+ *   The identifier of the lcore, which MUST be between 0 and
+ *   RTE_MAX_LCORE-1.
+ * @param role
+ *   The role to be checked against.
+ * @return
+ *   Boolean value: positive if test is true; otherwise returns 0.
+ */
+int
+rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
+
 /**
  * Return the Application thread ID of the execution unit.
  *
@@ -283,20 +306,6 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
 		const pthread_attr_t *attr,
 		void *(*start_routine)(void *), void *arg);
 
-/**
- * Test if the core supplied has a specific role
- *
- * @param lcore_id
- *   The identifier of the lcore, which MUST be between 0 and
- *   RTE_MAX_LCORE-1.
- * @param role
- *   The role to be checked against.
- * @return
- *   Boolean value: positive if test is true; otherwise returns 0.
- */
-int
-rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
-
 #ifdef __cplusplus
 }
 #endif
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v2 6/9] eal: register non-EAL threads as lcores
  2020-06-19 16:22 ` [dpdk-dev] [PATCH v2 0/9] Register non-EAL " David Marchand
                     ` (4 preceding siblings ...)
  2020-06-19 16:22   ` [dpdk-dev] [PATCH v2 5/9] eal: move lcore role code David Marchand
@ 2020-06-19 16:22   ` David Marchand
  2020-06-19 16:22   ` [dpdk-dev] [PATCH v2 7/9] eal: add lcore init callbacks David Marchand
                     ` (2 subsequent siblings)
  8 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-06-19 16:22 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, ktraynor, ian.stokes,
	i.maximets, Thomas Monjalon, John McNamara, Marko Kovacevic,
	Anatoly Burakov, Olivier Matz, Andrew Rybchenko, Neil Horman

DPDK allows calling some part of its API from a non-EAL thread but this
has some limitations.
OVS (and other applications) has its own thread management but still
want to avoid such limitations by hacking RTE_PER_LCORE(_lcore_id) and
faking EAL threads potentially unknown of some DPDK component.

Introduce a new API to register non-EAL thread and associate them to a
free lcore with a new NON_EAL role.
This role denotes lcores that do not run DPDK mainloop and as such
prevents use of rte_eal_wait_lcore() and consorts.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Changes since v1:
- moved cleanup on lcore role code in patch 5,
- added unit test,
- updated documentation,
- changed naming from "external thread" to "registered non-EAL thread"

---
 MAINTAINERS                                   |   1 +
 app/test/Makefile                             |   1 +
 app/test/autotest_data.py                     |   6 +
 app/test/meson.build                          |   2 +
 app/test/test_lcores.c                        | 139 ++++++++++++++++++
 doc/guides/howto/debug_troubleshoot.rst       |   5 +-
 .../prog_guide/env_abstraction_layer.rst      |  22 +--
 doc/guides/prog_guide/mempool_lib.rst         |   2 +-
 lib/librte_eal/common/eal_common_lcore.c      |  44 +++++-
 lib/librte_eal/common/eal_common_thread.c     |  33 +++++
 lib/librte_eal/common/eal_private.h           |  18 +++
 lib/librte_eal/include/rte_lcore.h            |  18 ++-
 lib/librte_eal/rte_eal_version.map            |   4 +
 lib/librte_mempool/rte_mempool.h              |  11 +-
 14 files changed, 284 insertions(+), 22 deletions(-)
 create mode 100644 app/test/test_lcores.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 816696caf2..fe9e74ffbc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -183,6 +183,7 @@ F: app/test/test_debug.c
 F: app/test/test_eal*
 F: app/test/test_errno.c
 F: app/test/test_interrupts.c
+F: app/test/test_lcores.c
 F: app/test/test_logs.c
 F: app/test/test_memcpy*
 F: app/test/test_per_lcore.c
diff --git a/app/test/Makefile b/app/test/Makefile
index 7b96a03a64..4a8dea2425 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -97,6 +97,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_FLOW_CLASSIFY) += test_flow_classify.c
 endif
 
 SRCS-y += test_rwlock.c
+SRCS-y += test_lcores.c
 
 SRCS-$(CONFIG_RTE_LIBRTE_STACK) += test_stack.c
 SRCS-$(CONFIG_RTE_LIBRTE_STACK) += test_stack_perf.c
diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index fc3fcc159e..600b130966 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -62,6 +62,12 @@
         "Func":    rwlock_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Lcores autotest",
+        "Command": "lcores_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "Logs autotest",
         "Command": "logs_autotest",
diff --git a/app/test/meson.build b/app/test/meson.build
index 5233ead46e..a57477b7cc 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -67,6 +67,7 @@ test_sources = files('commands.c',
 	'test_ipsec_perf.c',
 	'test_kni.c',
 	'test_kvargs.c',
+	'test_lcores.c',
 	'test_logs.c',
 	'test_lpm.c',
 	'test_lpm6.c',
@@ -206,6 +207,7 @@ fast_tests = [
         ['hash_autotest', true],
         ['interrupt_autotest', true],
         ['ipfrag_autotest', false],
+        ['lcores_autotest', true],
         ['logs_autotest', true],
         ['lpm_autotest', true],
         ['lpm6_autotest', true],
diff --git a/app/test/test_lcores.c b/app/test/test_lcores.c
new file mode 100644
index 0000000000..155b255b94
--- /dev/null
+++ b/app/test/test_lcores.c
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2020 Red Hat, Inc.
+ */
+
+#include <pthread.h>
+#include <string.h>
+
+#include <rte_lcore.h>
+
+#include "test.h"
+
+struct thread_context {
+	enum { INIT, ERROR, DONE } state;
+	bool lcore_id_any;
+	pthread_t id;
+	unsigned int *registered_count;
+};
+static void *thread_loop(void *arg)
+{
+	struct thread_context *t = arg;
+	unsigned int lcore_id;
+
+	lcore_id = rte_lcore_id();
+	if (lcore_id != LCORE_ID_ANY) {
+		printf("Incorrect lcore id for new thread %u\n", lcore_id);
+		t->state = ERROR;
+	}
+	rte_thread_register();
+	lcore_id = rte_lcore_id();
+	if ((t->lcore_id_any && lcore_id != LCORE_ID_ANY) ||
+			(!t->lcore_id_any && lcore_id == LCORE_ID_ANY)) {
+		printf("Could not register new thread, got %u while %sexpecting %u\n",
+			lcore_id, t->lcore_id_any ? "" : "not ", LCORE_ID_ANY);
+		t->state = ERROR;
+	}
+	/* Report register happened to the control thread. */
+	__atomic_add_fetch(t->registered_count, 1, __ATOMIC_RELEASE);
+
+	/* Wait for release from the control thread. */
+	while (__atomic_load_n(t->registered_count, __ATOMIC_ACQUIRE) != 0)
+		;
+	rte_thread_unregister();
+	lcore_id = rte_lcore_id();
+	if (lcore_id != LCORE_ID_ANY) {
+		printf("Could not unregister new thread, %u still assigned\n",
+			lcore_id);
+		t->state = ERROR;
+	}
+
+	if (t->state != ERROR)
+		t->state = DONE;
+
+	return NULL;
+}
+
+static int
+test_non_eal_lcores(unsigned int eal_threads_count)
+{
+	struct thread_context thread_contexts[RTE_MAX_LCORE];
+	unsigned int non_eal_threads_count;
+	struct thread_context *t;
+	unsigned int registered_count;
+	unsigned int i;
+	int ret;
+
+	non_eal_threads_count = 0;
+	registered_count = 0;
+
+	/* Try to create as many threads as possible. */
+	for (i = 0; i < RTE_MAX_LCORE - eal_threads_count; i++) {
+		t = &thread_contexts[i];
+		t->state = INIT;
+		t->registered_count = &registered_count;
+		t->lcore_id_any = false;
+		if (pthread_create(&t->id, NULL, thread_loop, t) != 0)
+			break;
+		non_eal_threads_count++;
+	}
+	printf("non-EAL threads count: %u\n", non_eal_threads_count);
+	/* Wait all non-EAL threads to register. */
+	while (__atomic_load_n(&registered_count, __ATOMIC_ACQUIRE) !=
+			non_eal_threads_count)
+		;
+
+	/* We managed to create the max number of threads, let's try to create
+	 * one more. This will allow one more check.
+	 */
+	if (eal_threads_count + non_eal_threads_count < RTE_MAX_LCORE)
+		goto skip_lcore_any;
+	t = &thread_contexts[non_eal_threads_count];
+	t->state = INIT;
+	t->registered_count = &registered_count;
+	t->lcore_id_any = true;
+	if (pthread_create(&t->id, NULL, thread_loop, t) == 0) {
+		non_eal_threads_count++;
+		printf("non-EAL threads count: %u\n", non_eal_threads_count);
+		while (__atomic_load_n(&registered_count, __ATOMIC_ACQUIRE) !=
+				non_eal_threads_count)
+			;
+	}
+
+skip_lcore_any:
+	/* Release all threads, and check their states. */
+	__atomic_store_n(&registered_count, 0, __ATOMIC_RELEASE);
+	ret = 0;
+	for (i = 0; i < non_eal_threads_count; i++) {
+		t = &thread_contexts[i];
+		pthread_join(t->id, NULL);
+		if (t->state != DONE)
+			ret = -1;
+	}
+
+	return ret;
+}
+
+static int
+test_lcores(void)
+{
+	unsigned int eal_threads_count = 0;
+	unsigned int i;
+
+	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		if (!rte_lcore_has_role(i, ROLE_OFF))
+			eal_threads_count++;
+	}
+	if (eal_threads_count == 0) {
+		printf("Something is broken, no EAL thread detected.\n");
+		return TEST_FAILED;
+	}
+	printf("EAL threads count: %u, RTE_MAX_LCORE=%u\n", eal_threads_count,
+		RTE_MAX_LCORE);
+
+	if (test_non_eal_lcores(eal_threads_count) < 0)
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+REGISTER_TEST_COMMAND(lcores_autotest, test_lcores);
diff --git a/doc/guides/howto/debug_troubleshoot.rst b/doc/guides/howto/debug_troubleshoot.rst
index cef016b2fe..5a46f5fba3 100644
--- a/doc/guides/howto/debug_troubleshoot.rst
+++ b/doc/guides/howto/debug_troubleshoot.rst
@@ -307,8 +307,9 @@ Custom worker function :numref:`dtg_distributor_worker`.
 
 #. Configuration issue isolation
 
-   * Identify core role using ``rte_eal_lcore_role`` to identify RTE, OFF and
-     SERVICE. Check performance functions are mapped to run on the cores.
+   * Identify core role using ``rte_eal_lcore_role`` to identify RTE, OFF,
+     SERVICE and NON_EAL. Check performance functions are mapped to run on the
+     cores.
 
    * For high-performance execution logic ensure running it on correct NUMA
      and non-master core.
diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst b/doc/guides/prog_guide/env_abstraction_layer.rst
index 48a2fec066..f64ae953d1 100644
--- a/doc/guides/prog_guide/env_abstraction_layer.rst
+++ b/doc/guides/prog_guide/env_abstraction_layer.rst
@@ -564,9 +564,13 @@ It's also compatible with the pattern of corelist('-l') option.
 non-EAL pthread support
 ~~~~~~~~~~~~~~~~~~~~~~~
 
-It is possible to use the DPDK execution context with any user pthread (aka. Non-EAL pthreads).
-In a non-EAL pthread, the *_lcore_id* is always LCORE_ID_ANY which identifies that it is not an EAL thread with a valid, unique, *_lcore_id*.
-Some libraries will use an alternative unique ID (e.g. TID), some will not be impacted at all, and some will work but with limitations (e.g. timer and mempool libraries).
+It is possible to use the DPDK execution context with any user pthread (aka. non-EAL pthreads).
+There are two kinds of non-EAL pthreads:
+
+- a registered non-EAL pthread with a valid *_lcore_id* that was successfully assigned by calling ``rte_thread_register()``,
+- a non registered non-EAL pthread with a LCORE_ID_ANY,
+
+For non registered non-EAL pthread (with a LCORE_ID_ANY *_lcore_id*), some libraries will use an alternative unique ID (e.g. TID), some will not be impacted at all, and some will work but with limitations (e.g. timer and mempool libraries).
 
 All these impacts are mentioned in :ref:`known_issue_label` section.
 
@@ -613,9 +617,9 @@ Known Issues
 + rte_mempool
 
   The rte_mempool uses a per-lcore cache inside the mempool.
-  For non-EAL pthreads, ``rte_lcore_id()`` will not return a valid number.
-  So for now, when rte_mempool is used with non-EAL pthreads, the put/get operations will bypass the default mempool cache and there is a performance penalty because of this bypass.
-  Only user-owned external caches can be used in a non-EAL context in conjunction with ``rte_mempool_generic_put()`` and ``rte_mempool_generic_get()`` that accept an explicit cache parameter.
+  For unregistered non-EAL pthreads, ``rte_lcore_id()`` will not return a valid number.
+  So for now, when rte_mempool is used with unregistered non-EAL pthreads, the put/get operations will bypass the default mempool cache and there is a performance penalty because of this bypass.
+  Only user-owned external caches can be used in an unregistered non-EAL context in conjunction with ``rte_mempool_generic_put()`` and ``rte_mempool_generic_get()`` that accept an explicit cache parameter.
 
 + rte_ring
 
@@ -660,15 +664,15 @@ Known Issues
 
 + rte_timer
 
-  Running  ``rte_timer_manage()`` on a non-EAL pthread is not allowed. However, resetting/stopping the timer from a non-EAL pthread is allowed.
+  Running  ``rte_timer_manage()`` on an unregistered non-EAL pthread is not allowed. However, resetting/stopping the timer from a non-EAL pthread is allowed.
 
 + rte_log
 
-  In non-EAL pthreads, there is no per thread loglevel and logtype, global loglevels are used.
+  In unregistered non-EAL pthreads, there is no per thread loglevel and logtype, global loglevels are used.
 
 + misc
 
-  The debug statistics of rte_ring, rte_mempool and rte_timer are not supported in a non-EAL pthread.
+  The debug statistics of rte_ring, rte_mempool and rte_timer are not supported in an unregistered non-EAL pthread.
 
 cgroup control
 ~~~~~~~~~~~~~~
diff --git a/doc/guides/prog_guide/mempool_lib.rst b/doc/guides/prog_guide/mempool_lib.rst
index f8b430d656..e3e1f940be 100644
--- a/doc/guides/prog_guide/mempool_lib.rst
+++ b/doc/guides/prog_guide/mempool_lib.rst
@@ -103,7 +103,7 @@ The maximum size of the cache is static and is defined at compilation time (CONF
 Alternatively to the internal default per-lcore local cache, an application can create and manage external caches through the ``rte_mempool_cache_create()``, ``rte_mempool_cache_free()`` and ``rte_mempool_cache_flush()`` calls.
 These user-owned caches can be explicitly passed to ``rte_mempool_generic_put()`` and ``rte_mempool_generic_get()``.
 The ``rte_mempool_default_cache()`` call returns the default internal cache if any.
-In contrast to the default caches, user-owned caches can be used by non-EAL threads too.
+In contrast to the default caches, user-owned caches can be used by unregistered non-EAL threads too.
 
 Mempool Handlers
 ------------------------
diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
index 86d32a3dd7..7db05428e7 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -6,12 +6,13 @@
 #include <limits.h>
 #include <string.h>
 
-#include <rte_errno.h>
-#include <rte_log.h>
-#include <rte_eal.h>
-#include <rte_lcore.h>
 #include <rte_common.h>
 #include <rte_debug.h>
+#include <rte_eal.h>
+#include <rte_errno.h>
+#include <rte_lcore.h>
+#include <rte_log.h>
+#include <rte_spinlock.h>
 
 #include "eal_private.h"
 #include "eal_thread.h"
@@ -220,3 +221,38 @@ rte_socket_id_by_idx(unsigned int idx)
 	}
 	return config->numa_nodes[idx];
 }
+
+static rte_spinlock_t lcore_lock = RTE_SPINLOCK_INITIALIZER;
+
+unsigned int
+eal_lcore_non_eal_allocate(void)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	unsigned int lcore_id;
+
+	rte_spinlock_lock(&lcore_lock);
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (cfg->lcore_role[lcore_id] != ROLE_OFF)
+			continue;
+		cfg->lcore_role[lcore_id] = ROLE_NON_EAL;
+		cfg->lcore_count++;
+		break;
+	}
+	if (lcore_id == RTE_MAX_LCORE)
+		RTE_LOG(DEBUG, EAL, "No lcore available.\n");
+	rte_spinlock_unlock(&lcore_lock);
+	return lcore_id;
+}
+
+void
+eal_lcore_non_eal_release(unsigned int lcore_id)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+
+	rte_spinlock_lock(&lcore_lock);
+	if (cfg->lcore_role[lcore_id] == ROLE_NON_EAL) {
+		cfg->lcore_role[lcore_id] = ROLE_OFF;
+		cfg->lcore_count--;
+	}
+	rte_spinlock_unlock(&lcore_lock);
+}
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 4f96c50424..338194832b 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -236,3 +236,36 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
 	pthread_join(*thread, NULL);
 	return -ret;
 }
+
+void
+rte_thread_register(void)
+{
+	unsigned int lcore_id;
+	rte_cpuset_t cpuset;
+
+	/* EAL init flushes all lcores, we can't register before. */
+	assert(internal_config.init_complete == 1);
+	if (pthread_getaffinity_np(pthread_self(), sizeof(cpuset),
+			&cpuset) != 0)
+		CPU_ZERO(&cpuset);
+	lcore_id = eal_lcore_non_eal_allocate();
+	if (lcore_id >= RTE_MAX_LCORE)
+		lcore_id = LCORE_ID_ANY;
+	rte_thread_init(lcore_id, &cpuset);
+	if (lcore_id != LCORE_ID_ANY)
+		RTE_LOG(DEBUG, EAL, "Registered non-EAL thread as lcore %u.\n",
+			lcore_id);
+}
+
+void
+rte_thread_unregister(void)
+{
+	unsigned int lcore_id = rte_lcore_id();
+
+	if (lcore_id != LCORE_ID_ANY)
+		eal_lcore_non_eal_release(lcore_id);
+	rte_thread_uninit();
+	if (lcore_id != LCORE_ID_ANY)
+		RTE_LOG(DEBUG, EAL, "Unregistered non-EAL thread (was lcore %u).\n",
+			lcore_id);
+}
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 0592fcd694..73238ff157 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -396,6 +396,24 @@ uint64_t get_tsc_freq(void);
  */
 uint64_t get_tsc_freq_arch(void);
 
+/**
+ * Allocate a free lcore to associate to a non-EAL thread.
+ *
+ * @return
+ *   - the id of a lcore with role ROLE_NON_EAL on success.
+ *   - RTE_MAX_LCORE if none was available.
+ */
+unsigned int eal_lcore_non_eal_allocate(void);
+
+/**
+ * Release the lcore used by a non-EAL thread.
+ * Counterpart of eal_lcore_non_eal_allocate().
+ *
+ * @param lcore_id
+ *   The lcore with role ROLE_NON_EAL to release.
+ */
+void eal_lcore_non_eal_release(unsigned int lcore_id);
+
 /**
  * Prepare physical memory mapping
  * i.e. hugepages on Linux and
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index 3968c40693..ea86220394 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -31,6 +31,7 @@ enum rte_lcore_role_t {
 	ROLE_RTE,
 	ROLE_OFF,
 	ROLE_SERVICE,
+	ROLE_NON_EAL,
 };
 
 /**
@@ -67,7 +68,8 @@ rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
  *   to run threads with lcore IDs 0, 1, 2 and 3 on physical core 10..
  *
  * @return
- *  Logical core ID (in EAL thread) or LCORE_ID_ANY (in non-EAL thread)
+ *  Logical core ID (in EAL thread or registered non-EAL thread) or
+ *  LCORE_ID_ANY (in unregistered non-EAL thread)
  */
 static inline unsigned
 rte_lcore_id(void)
@@ -279,6 +281,20 @@ int rte_thread_setname(pthread_t id, const char *name);
 __rte_experimental
 int rte_thread_getname(pthread_t id, char *name, size_t len);
 
+/**
+ * Register current non-EAL thread as a lcore.
+ */
+__rte_experimental
+void
+rte_thread_register(void);
+
+/**
+ * Unregister current thread and release lcore if one was associated.
+ */
+__rte_experimental
+void
+rte_thread_unregister(void);
+
 /**
  * Create a control thread.
  *
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 0d42d44ce9..5503dd7620 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -393,6 +393,10 @@ EXPERIMENTAL {
 	rte_trace_point_lookup;
 	rte_trace_regexp;
 	rte_trace_save;
+
+	# added in 20.08
+	rte_thread_register;
+	rte_thread_unregister;
 };
 
 INTERNAL {
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 652d19f9f1..9e0ee052b3 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -28,9 +28,9 @@
  * rte_mempool_get() or rte_mempool_put() are designed to be called from an EAL
  * thread due to the internal per-lcore cache. Due to the lack of caching,
  * rte_mempool_get() or rte_mempool_put() performance will suffer when called
- * by non-EAL threads. Instead, non-EAL threads should call
- * rte_mempool_generic_get() or rte_mempool_generic_put() with a user cache
- * created with rte_mempool_cache_create().
+ * by unregistered non-EAL threads. Instead, unregistered non-EAL threads
+ * should call rte_mempool_generic_get() or rte_mempool_generic_put() with a
+ * user cache created with rte_mempool_cache_create().
  */
 
 #include <stdio.h>
@@ -1233,7 +1233,7 @@ void rte_mempool_dump(FILE *f, struct rte_mempool *mp);
 /**
  * Create a user-owned mempool cache.
  *
- * This can be used by non-EAL threads to enable caching when they
+ * This can be used by unregistered non-EAL threads to enable caching when they
  * interact with a mempool.
  *
  * @param size
@@ -1264,7 +1264,8 @@ rte_mempool_cache_free(struct rte_mempool_cache *cache);
  * @param lcore_id
  *   The logical core id.
  * @return
- *   A pointer to the mempool cache or NULL if disabled or non-EAL thread.
+ *   A pointer to the mempool cache or NULL if disabled or unregistered non-EAL
+ *   thread.
  */
 static __rte_always_inline struct rte_mempool_cache *
 rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id)
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v2 7/9] eal: add lcore init callbacks
  2020-06-19 16:22 ` [dpdk-dev] [PATCH v2 0/9] Register non-EAL " David Marchand
                     ` (5 preceding siblings ...)
  2020-06-19 16:22   ` [dpdk-dev] [PATCH v2 6/9] eal: register non-EAL threads as lcores David Marchand
@ 2020-06-19 16:22   ` David Marchand
  2020-06-19 16:22   ` [dpdk-dev] [PATCH v2 8/9] eal: add lcore iterators David Marchand
  2020-06-19 16:22   ` [dpdk-dev] [PATCH v2 9/9] mempool/bucket: handle non-EAL lcores David Marchand
  8 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-06-19 16:22 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, ktraynor, ian.stokes,
	i.maximets, Neil Horman

DPDK components and applications can have their say when a new lcore is
initialized. For this, they can register a callback for initializing and
releasing their private data.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Changes since v1:
- added unit test (since missing some coverage, for v3),
- preferred callback and removed mention of notification,

---
 app/test/test_lcores.c                   |  95 ++++++++++++++++
 lib/librte_eal/common/eal_common_lcore.c | 139 ++++++++++++++++++++++-
 lib/librte_eal/common/eal_private.h      |   3 +-
 lib/librte_eal/include/rte_lcore.h       |  68 +++++++++++
 lib/librte_eal/rte_eal_version.map       |   2 +
 5 files changed, 301 insertions(+), 6 deletions(-)

diff --git a/app/test/test_lcores.c b/app/test/test_lcores.c
index 155b255b94..cf2439f71d 100644
--- a/app/test/test_lcores.c
+++ b/app/test/test_lcores.c
@@ -5,6 +5,7 @@
 #include <pthread.h>
 #include <string.h>
 
+#include <rte_common.h>
 #include <rte_lcore.h>
 
 #include "test.h"
@@ -113,6 +114,95 @@ test_non_eal_lcores(unsigned int eal_threads_count)
 	return ret;
 }
 
+struct limit_lcore_context {
+	unsigned int init;
+	unsigned int max;
+	unsigned int uninit;
+};
+static int
+limit_lcores_init(unsigned int lcore_id __rte_unused, void *arg)
+{
+	struct limit_lcore_context *l = arg;
+
+	l->init++;
+	if (l->init > l->max)
+		return -1;
+	return 0;
+}
+static void
+limit_lcores_uninit(unsigned int lcore_id __rte_unused, void *arg)
+{
+	struct limit_lcore_context *l = arg;
+
+	l->uninit++;
+}
+
+static int
+test_lcores_callback(unsigned int eal_threads_count)
+{
+	struct limit_lcore_context l;
+	void *handle;
+
+	/* Refuse last lcore => callback register error. */
+	memset(&l, 0, sizeof(l));
+	l.max = eal_threads_count - 1;
+	handle = rte_lcore_callback_register("limit", limit_lcores_init,
+		limit_lcores_uninit, &l);
+	if (handle != NULL) {
+		printf("lcore callback register should have failed\n");
+		goto error;
+	}
+	/* Refusal happens at the n th call to the init callback.
+	 * Besides, n - 1 were accepted, so we expect as many uninit calls when
+	 * the rollback happens.
+	 */
+	if (l.init != eal_threads_count) {
+		printf("lcore init calls failed: expected %u, got %u\n",
+			eal_threads_count, l.init);
+		goto error;
+	}
+	if (l.uninit != eal_threads_count - 1) {
+		printf("lcore uninit calls failed: expected %u, got %u\n",
+			eal_threads_count - 1, l.uninit);
+		goto error;
+	}
+
+	/* Accept all lcore and unregister. */
+	memset(&l, 0, sizeof(l));
+	l.max = eal_threads_count;
+	handle = rte_lcore_callback_register("limit", limit_lcores_init,
+		limit_lcores_uninit, &l);
+	if (handle == NULL) {
+		printf("lcore callback register failed\n");
+		goto error;
+	}
+	if (l.uninit != 0) {
+		printf("lcore uninit got called %u times during register\n",
+			l.uninit);
+		goto error;
+	}
+	rte_lcore_callback_unregister(handle);
+	handle = NULL;
+	if (l.init != eal_threads_count) {
+		printf("lcore init got called %u times during unregister (expected %u)\n",
+			l.init, eal_threads_count);
+		goto error;
+	}
+	if (l.uninit != eal_threads_count) {
+		printf("lcore uninit calls failed: expected %u, got %u\n",
+			eal_threads_count, l.uninit);
+		goto error;
+	}
+
+	return 0;
+
+error:
+	if (handle != NULL)
+		rte_lcore_callback_unregister(handle);
+
+	return -1;
+}
+
 static int
 test_lcores(void)
 {
@@ -133,6 +223,11 @@ test_lcores(void)
 	if (test_non_eal_lcores(eal_threads_count) < 0)
 		return TEST_FAILED;
 
+	if (test_lcores_callback(eal_threads_count) < 0)
+		return TEST_FAILED;
+
+	/* FIXME: missing a test on callback + registering non-EAL threads */
+
 	return TEST_SUCCESS;
 }
 
diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
index 7db05428e7..5140026b6c 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -223,11 +223,114 @@ rte_socket_id_by_idx(unsigned int idx)
 }
 
 static rte_spinlock_t lcore_lock = RTE_SPINLOCK_INITIALIZER;
+struct lcore_callback {
+	TAILQ_ENTRY(lcore_callback) next;
+	char *name;
+	rte_lcore_init_cb init;
+	rte_lcore_uninit_cb uninit;
+	void *arg;
+};
+static TAILQ_HEAD(lcore_callbacks_head, lcore_callback) lcore_callbacks =
+	TAILQ_HEAD_INITIALIZER(lcore_callbacks);
+
+static int
+callback_init(struct lcore_callback *callback, unsigned int lcore_id)
+{
+	if (callback->init == NULL)
+		return 0;
+	RTE_LOG(DEBUG, EAL, "Call init for lcore callback %s, lcore_id %u\n",
+		callback->name, lcore_id);
+	return callback->init(lcore_id, callback->arg);
+}
+
+static void
+callback_uninit(struct lcore_callback *callback, unsigned int lcore_id)
+{
+	if (callback->uninit == NULL)
+		return;
+	RTE_LOG(DEBUG, EAL, "Call uninit for lcore callback %s, lcore_id %u\n",
+		callback->name, lcore_id);
+	callback->uninit(lcore_id, callback->arg);
+}
+
+void *
+rte_lcore_callback_register(const char *name, rte_lcore_init_cb init,
+	rte_lcore_uninit_cb uninit, void *arg)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	struct lcore_callback *callback;
+	unsigned int lcore_id;
+
+	callback = calloc(1, sizeof(*callback));
+	if (callback == NULL)
+		return NULL;
+	if (asprintf(&callback->name, "%s-%p", name, arg) == -1) {
+		free(callback);
+		return NULL;
+	}
+	callback->init = init;
+	callback->uninit = uninit;
+	callback->arg = arg;
+	rte_spinlock_lock(&lcore_lock);
+	if (callback->init == NULL)
+		goto no_init;
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (cfg->lcore_role[lcore_id] == ROLE_OFF)
+			continue;
+		if (callback_init(callback, lcore_id) == 0)
+			continue;
+		/* Callback refused init for this lcore, uninitialize all
+		 * previous lcore.
+		 */
+		for (; lcore_id != 0; lcore_id--) {
+			if (cfg->lcore_role[lcore_id - 1] == ROLE_OFF)
+				continue;
+			callback_uninit(callback, lcore_id - 1);
+		}
+		free(callback);
+		callback = NULL;
+		goto out;
+	}
+no_init:
+	TAILQ_INSERT_TAIL(&lcore_callbacks, callback, next);
+	RTE_LOG(DEBUG, EAL, "Registered new lcore callback %s (%sinit, %suninit).\n",
+		callback->name, callback->init == NULL ? "NO " : "",
+		callback->uninit == NULL ? "NO " : "");
+out:
+	rte_spinlock_unlock(&lcore_lock);
+	return callback;
+}
+
+void
+rte_lcore_callback_unregister(void *handle)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	struct lcore_callback *callback = handle;
+	unsigned int lcore_id;
+
+	rte_spinlock_lock(&lcore_lock);
+	if (callback->uninit == NULL)
+		goto no_uninit;
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (cfg->lcore_role[lcore_id] == ROLE_OFF)
+			continue;
+		callback_uninit(callback, lcore_id);
+	}
+no_uninit:
+	TAILQ_REMOVE(&lcore_callbacks, callback, next);
+	rte_spinlock_unlock(&lcore_lock);
+	RTE_LOG(DEBUG, EAL, "Unregistered lcore callback %s-%p.\n",
+		callback->name, callback->arg);
+	free(callback->name);
+	free(callback);
+}
 
 unsigned int
 eal_lcore_non_eal_allocate(void)
 {
 	struct rte_config *cfg = rte_eal_get_configuration();
+	struct lcore_callback *callback;
+	struct lcore_callback *prev;
 	unsigned int lcore_id;
 
 	rte_spinlock_lock(&lcore_lock);
@@ -238,8 +341,30 @@ eal_lcore_non_eal_allocate(void)
 		cfg->lcore_count++;
 		break;
 	}
-	if (lcore_id == RTE_MAX_LCORE)
+	if (lcore_id == RTE_MAX_LCORE) {
 		RTE_LOG(DEBUG, EAL, "No lcore available.\n");
+		goto out;
+	}
+	TAILQ_FOREACH(callback, &lcore_callbacks, next) {
+		if (callback_init(callback, lcore_id) == 0)
+			continue;
+		/* Callback refused init for this lcore, call uninit for all
+		 * previous callbacks.
+		 */
+		prev = TAILQ_PREV(callback, lcore_callbacks_head, next);
+		while (prev != NULL) {
+			callback_uninit(prev, lcore_id);
+			prev = TAILQ_PREV(callback, lcore_callbacks_head,
+				next);
+		}
+		RTE_LOG(DEBUG, EAL, "Initialization refused for lcore %u.\n",
+			lcore_id);
+		cfg->lcore_role[lcore_id] = ROLE_OFF;
+		cfg->lcore_count--;
+		lcore_id = RTE_MAX_LCORE;
+		goto out;
+	}
+out:
 	rte_spinlock_unlock(&lcore_lock);
 	return lcore_id;
 }
@@ -248,11 +373,15 @@ void
 eal_lcore_non_eal_release(unsigned int lcore_id)
 {
 	struct rte_config *cfg = rte_eal_get_configuration();
+	struct lcore_callback *callback;
 
 	rte_spinlock_lock(&lcore_lock);
-	if (cfg->lcore_role[lcore_id] == ROLE_NON_EAL) {
-		cfg->lcore_role[lcore_id] = ROLE_OFF;
-		cfg->lcore_count--;
-	}
+	if (cfg->lcore_role[lcore_id] != ROLE_NON_EAL)
+		goto out;
+	TAILQ_FOREACH(callback, &lcore_callbacks, next)
+		callback_uninit(callback, lcore_id);
+	cfg->lcore_role[lcore_id] = ROLE_OFF;
+	cfg->lcore_count--;
+out:
 	rte_spinlock_unlock(&lcore_lock);
 }
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 73238ff157..fc79564111 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -401,7 +401,8 @@ uint64_t get_tsc_freq_arch(void);
  *
  * @return
  *   - the id of a lcore with role ROLE_NON_EAL on success.
- *   - RTE_MAX_LCORE if none was available.
+ *   - RTE_MAX_LCORE if none was available or initializing was refused (see
+ *     rte_lcore_callback_register).
  */
 unsigned int eal_lcore_non_eal_allocate(void);
 
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index ea86220394..27b29a1f87 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -229,6 +229,74 @@ unsigned int rte_get_next_lcore(unsigned int i, int skip_master, int wrap);
 	     i<RTE_MAX_LCORE;						\
 	     i = rte_get_next_lcore(i, 1, 0))
 
+/**
+ * Callback prototype for initializing lcores.
+ *
+ * @param lcore_id
+ *   The lcore to consider.
+ * @param arg
+ *   An opaque pointer passed at callback registration.
+ * @return
+ *   - -1 when refusing this operation,
+ *   - 0 otherwise.
+ */
+typedef int (*rte_lcore_init_cb)(unsigned int lcore_id, void *arg);
+
+/**
+ * Callback prototype for uninitializing lcores.
+ *
+ * @param lcore_id
+ *   The lcore to consider.
+ * @param arg
+ *   An opaque pointer passed at callback registration.
+ */
+typedef void (*rte_lcore_uninit_cb)(unsigned int lcore_id, void *arg);
+
+/**
+ * Register callbacks invoked when initializing and uninitializing a lcore.
+ *
+ * This function calls the init callback with all initialized lcores.
+ * Any error reported by the init callback triggers a rollback calling the
+ * uninit callback for each lcore.
+ * If this step succeeds, the callbacks are put in the lcore callbacks list
+ * that will get called for each lcore allocation/release.
+ *
+ * Note: callbacks execution is serialised under a lock protecting the lcores
+ * and callbacks list.
+ *
+ * @param name
+ *   A name serving as a small description for this callback.
+ * @param init
+ *   The callback invoked when a lcore_id is initialized.
+ * @param uninit
+ *   The callback invoked when a lcore_id is uninitialized.
+ * @param arg
+ *   An optional argument that gets passed to the callback when it gets
+ *   invoked.
+ * @return
+ *   On success, returns an opaque pointer for the registered object.
+ *   On failure (either memory allocation issue in the function itself or an
+ *   error is returned by the init callback itself), returns NULL.
+ */
+__rte_experimental
+void *
+rte_lcore_callback_register(const char *name, rte_lcore_init_cb init,
+	rte_lcore_uninit_cb uninit, void *arg);
+
+/**
+ * Unregister callbacks previously registered with rte_lcore_callback_register.
+ *
+ * This function calls the uninit callback with all initialized lcores.
+ * The callbacks are then removed from the lcore callbacks list.
+ *
+ * @param handle
+ *   The handle pointer returned by a former successful call to
+ *   rte_lcore_callback_register.
+ */
+__rte_experimental
+void
+rte_lcore_callback_unregister(void *handle);
+
 /**
  * Set core affinity of the current thread.
  * Support both EAL and non-EAL thread and update TLS.
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 5503dd7620..c3e762c1d9 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -395,6 +395,8 @@ EXPERIMENTAL {
 	rte_trace_save;
 
 	# added in 20.08
+	rte_lcore_callback_register;
+	rte_lcore_callback_unregister;
 	rte_thread_register;
 	rte_thread_unregister;
 };
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v2 8/9] eal: add lcore iterators
  2020-06-19 16:22 ` [dpdk-dev] [PATCH v2 0/9] Register non-EAL " David Marchand
                     ` (6 preceding siblings ...)
  2020-06-19 16:22   ` [dpdk-dev] [PATCH v2 7/9] eal: add lcore init callbacks David Marchand
@ 2020-06-19 16:22   ` David Marchand
  2020-06-20  2:21     ` Stephen Hemminger
  2020-06-19 16:22   ` [dpdk-dev] [PATCH v2 9/9] mempool/bucket: handle non-EAL lcores David Marchand
  8 siblings, 1 reply; 126+ messages in thread
From: David Marchand @ 2020-06-19 16:22 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, ktraynor, ian.stokes,
	i.maximets, Neil Horman

Add a helper to iterate all lcores.
The iterator callback is read-only wrt the lcores list.

Implement a dump function on top of this for debugging.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Changes since v1:
- introduced lcore iterators and implemented rte_lcore_dump,
  this iterator mechanism can then be used outside of EAL,

---
 lib/librte_eal/common/eal_common_lcore.c  | 77 ++++++++++++++++++++---
 lib/librte_eal/common/eal_common_thread.c | 16 +++--
 lib/librte_eal/common/eal_thread.h        | 13 +++-
 lib/librte_eal/freebsd/eal.c              |  2 +-
 lib/librte_eal/freebsd/eal_thread.c       |  2 +-
 lib/librte_eal/include/rte_lcore.h        | 47 +++++++++++++-
 lib/librte_eal/linux/eal.c                |  2 +-
 lib/librte_eal/linux/eal_thread.c         |  2 +-
 lib/librte_eal/rte_eal_version.map        |  2 +
 9 files changed, 140 insertions(+), 23 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
index 5140026b6c..2524356a88 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -12,7 +12,7 @@
 #include <rte_errno.h>
 #include <rte_lcore.h>
 #include <rte_log.h>
-#include <rte_spinlock.h>
+#include <rte_rwlock.h>
 
 #include "eal_private.h"
 #include "eal_thread.h"
@@ -222,7 +222,7 @@ rte_socket_id_by_idx(unsigned int idx)
 	return config->numa_nodes[idx];
 }
 
-static rte_spinlock_t lcore_lock = RTE_SPINLOCK_INITIALIZER;
+static rte_rwlock_t lcore_lock = RTE_RWLOCK_INITIALIZER;
 struct lcore_callback {
 	TAILQ_ENTRY(lcore_callback) next;
 	char *name;
@@ -271,7 +271,7 @@ rte_lcore_callback_register(const char *name, rte_lcore_init_cb init,
 	callback->init = init;
 	callback->uninit = uninit;
 	callback->arg = arg;
-	rte_spinlock_lock(&lcore_lock);
+	rte_rwlock_write_lock(&lcore_lock);
 	if (callback->init == NULL)
 		goto no_init;
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
@@ -297,7 +297,7 @@ rte_lcore_callback_register(const char *name, rte_lcore_init_cb init,
 		callback->name, callback->init == NULL ? "NO " : "",
 		callback->uninit == NULL ? "NO " : "");
 out:
-	rte_spinlock_unlock(&lcore_lock);
+	rte_rwlock_write_unlock(&lcore_lock);
 	return callback;
 }
 
@@ -308,7 +308,7 @@ rte_lcore_callback_unregister(void *handle)
 	struct lcore_callback *callback = handle;
 	unsigned int lcore_id;
 
-	rte_spinlock_lock(&lcore_lock);
+	rte_rwlock_write_lock(&lcore_lock);
 	if (callback->uninit == NULL)
 		goto no_uninit;
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
@@ -318,7 +318,7 @@ rte_lcore_callback_unregister(void *handle)
 	}
 no_uninit:
 	TAILQ_REMOVE(&lcore_callbacks, callback, next);
-	rte_spinlock_unlock(&lcore_lock);
+	rte_rwlock_write_unlock(&lcore_lock);
 	RTE_LOG(DEBUG, EAL, "Unregistered lcore callback %s-%p.\n",
 		callback->name, callback->arg);
 	free(callback->name);
@@ -333,7 +333,7 @@ eal_lcore_non_eal_allocate(void)
 	struct lcore_callback *prev;
 	unsigned int lcore_id;
 
-	rte_spinlock_lock(&lcore_lock);
+	rte_rwlock_write_lock(&lcore_lock);
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
 		if (cfg->lcore_role[lcore_id] != ROLE_OFF)
 			continue;
@@ -365,7 +365,7 @@ eal_lcore_non_eal_allocate(void)
 		goto out;
 	}
 out:
-	rte_spinlock_unlock(&lcore_lock);
+	rte_rwlock_write_unlock(&lcore_lock);
 	return lcore_id;
 }
 
@@ -375,7 +375,7 @@ eal_lcore_non_eal_release(unsigned int lcore_id)
 	struct rte_config *cfg = rte_eal_get_configuration();
 	struct lcore_callback *callback;
 
-	rte_spinlock_lock(&lcore_lock);
+	rte_rwlock_write_lock(&lcore_lock);
 	if (cfg->lcore_role[lcore_id] != ROLE_NON_EAL)
 		goto out;
 	TAILQ_FOREACH(callback, &lcore_callbacks, next)
@@ -383,5 +383,62 @@ eal_lcore_non_eal_release(unsigned int lcore_id)
 	cfg->lcore_role[lcore_id] = ROLE_OFF;
 	cfg->lcore_count--;
 out:
-	rte_spinlock_unlock(&lcore_lock);
+	rte_rwlock_write_unlock(&lcore_lock);
+}
+
+int
+rte_lcore_iterate(rte_lcore_iterate_cb cb, void *arg)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	unsigned int lcore_id;
+	int ret = 0;
+
+	rte_rwlock_read_lock(&lcore_lock);
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (cfg->lcore_role[lcore_id] == ROLE_OFF)
+			continue;
+		ret = cb(lcore_id, arg);
+		if (ret != 0)
+			break;
+	}
+	rte_rwlock_read_unlock(&lcore_lock);
+	return ret;
+}
+
+static int
+lcore_dump_cb(unsigned int lcore_id, void *arg)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
+	const char *role;
+	FILE *f = arg;
+	int ret;
+
+	switch (cfg->lcore_role[lcore_id]) {
+	case ROLE_RTE:
+		role = "RTE";
+		break;
+	case ROLE_SERVICE:
+		role = "SERVICE";
+		break;
+	case ROLE_NON_EAL:
+		role = "NON_EAL";
+		break;
+	default:
+		role = "UNKNOWN";
+		break;
+	}
+
+	ret = eal_thread_dump_affinity(&lcore_config[lcore_id].cpuset, cpuset,
+		sizeof(cpuset));
+	fprintf(f, "lcore %u, socket %u, role %s, cpuset %s%s\n", lcore_id,
+		rte_lcore_to_socket_id(lcore_id), role, cpuset,
+		ret == 0 ? "" : "...");
+	return 0;
+}
+
+void
+rte_lcore_dump(FILE *f)
+{
+	rte_lcore_iterate(lcore_dump_cb, f);
 }
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 338194832b..5dc0b12f42 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -104,17 +104,14 @@ rte_thread_get_affinity(rte_cpuset_t *cpusetp)
 }
 
 int
-eal_thread_dump_affinity(char *str, unsigned size)
+eal_thread_dump_affinity(rte_cpuset_t *cpuset, char *str, unsigned int size)
 {
-	rte_cpuset_t cpuset;
 	unsigned cpu;
 	int ret;
 	unsigned int out = 0;
 
-	rte_thread_get_affinity(&cpuset);
-
 	for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
-		if (!CPU_ISSET(cpu, &cpuset))
+		if (!CPU_ISSET(cpu, cpuset))
 			continue;
 
 		ret = snprintf(str + out,
@@ -137,6 +134,15 @@ eal_thread_dump_affinity(char *str, unsigned size)
 	return ret;
 }
 
+int
+eal_thread_dump_current_affinity(char *str, unsigned int size)
+{
+	rte_cpuset_t cpuset;
+
+	rte_thread_get_affinity(&cpuset);
+	return eal_thread_dump_affinity(&cpuset, str, size);
+}
+
 void
 rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset)
 {
diff --git a/lib/librte_eal/common/eal_thread.h b/lib/librte_eal/common/eal_thread.h
index 4ecd8fd53a..13ec252e01 100644
--- a/lib/librte_eal/common/eal_thread.h
+++ b/lib/librte_eal/common/eal_thread.h
@@ -47,13 +47,15 @@ unsigned eal_cpu_socket_id(unsigned cpu_id);
 #define RTE_CPU_AFFINITY_STR_LEN            256
 
 /**
- * Dump the current pthread cpuset.
+ * Dump the cpuset as a human readable string.
  * This function is private to EAL.
  *
  * Note:
  *   If the dump size is greater than the size of given buffer,
  *   the string will be truncated and with '\0' at the end.
  *
+ * @param cpuset
+ *   The CPU affinity object to dump.
  * @param str
  *   The string buffer the cpuset will dump to.
  * @param size
@@ -62,6 +64,13 @@ unsigned eal_cpu_socket_id(unsigned cpu_id);
  *   0 for success, -1 if truncation happens.
  */
 int
-eal_thread_dump_affinity(char *str, unsigned size);
+eal_thread_dump_affinity(rte_cpuset_t *cpuset, char *str, unsigned int size);
+
+/**
+ * Dump the current thread cpuset.
+ * This is a wrapper on eal_thread_dump_affinity().
+ */
+int
+eal_thread_dump_current_affinity(char *str, unsigned int size);
 
 #endif /* EAL_THREAD_H */
diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
index b5ea11df16..69a6f7d8c4 100644
--- a/lib/librte_eal/freebsd/eal.c
+++ b/lib/librte_eal/freebsd/eal.c
@@ -886,7 +886,7 @@ rte_eal_init(int argc, char **argv)
 	rte_thread_init(rte_config.master_lcore,
 		&lcore_config[rte_config.master_lcore].cpuset);
 
-	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
 
 	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
 		rte_config.master_lcore, thread_id, cpuset,
diff --git a/lib/librte_eal/freebsd/eal_thread.c b/lib/librte_eal/freebsd/eal_thread.c
index c1fb8eb2d8..b1a3619f51 100644
--- a/lib/librte_eal/freebsd/eal_thread.c
+++ b/lib/librte_eal/freebsd/eal_thread.c
@@ -92,7 +92,7 @@ eal_thread_loop(__rte_unused void *arg)
 
 	rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
-	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
 		lcore_id, thread_id, cpuset, ret == 0 ? "" : "...");
 
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index 27b29a1f87..4dee7cbcd7 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -261,8 +261,8 @@ typedef void (*rte_lcore_uninit_cb)(unsigned int lcore_id, void *arg);
  * If this step succeeds, the callbacks are put in the lcore callbacks list
  * that will get called for each lcore allocation/release.
  *
- * Note: callbacks execution is serialised under a lock protecting the lcores
- * and callbacks list.
+ * Note: callbacks execution is serialised under a write lock protecting the
+ * lcores and callbacks list.
  *
  * @param name
  *   A name serving as a small description for this callback.
@@ -297,6 +297,49 @@ __rte_experimental
 void
 rte_lcore_callback_unregister(void *handle);
 
+/**
+ * Callback prototype for iterating over lcores.
+ *
+ * @param lcore_id
+ *   The lcore to consider.
+ * @param arg
+ *   An opaque pointer coming from the caller.
+ * @return
+ *   - 0 lets the iteration continue.
+ *   - !0 makes the iteration stop.
+ */
+typedef int (*rte_lcore_iterate_cb)(unsigned int lcore_id, void *arg);
+
+/**
+ * Iterate on all active lcores (ROLE_RTE, ROLE_SERVICE and ROLE_NON_EAL).
+ * No modification on the lcore states is allowed in the callback.
+ *
+ * Note: as opposed to init/uninit callbacks, iteration callbacks can be
+ * invoked in parallel as they are run under a read lock protecting the lcores
+ * and callbacks list.
+ *
+ * @param cb
+ *   The callback that gets passed each lcore.
+ * @param arg
+ *   An opaque pointer passed to cb.
+ * @return
+ *   Same return code as the callback last invocation (see rte_lcore_iterate_cb
+ *   description).
+ */
+__rte_experimental
+int
+rte_lcore_iterate(rte_lcore_iterate_cb cb, void *arg);
+
+/**
+ * List all lcores.
+ *
+ * @param f
+ *   The output stream where the dump should be sent.
+ */
+__rte_experimental
+void
+rte_lcore_dump(FILE *f);
+
 /**
  * Set core affinity of the current thread.
  * Support both EAL and non-EAL thread and update TLS.
diff --git a/lib/librte_eal/linux/eal.c b/lib/librte_eal/linux/eal.c
index 8638376b8a..2f0efd7cd3 100644
--- a/lib/librte_eal/linux/eal.c
+++ b/lib/librte_eal/linux/eal.c
@@ -1214,7 +1214,7 @@ rte_eal_init(int argc, char **argv)
 	rte_thread_init(rte_config.master_lcore,
 		&lcore_config[rte_config.master_lcore].cpuset);
 
-	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
 	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
 		rte_config.master_lcore, (uintptr_t)thread_id, cpuset,
 		ret == 0 ? "" : "...");
diff --git a/lib/librte_eal/linux/eal_thread.c b/lib/librte_eal/linux/eal_thread.c
index 07aec0c44d..22d9bc8c01 100644
--- a/lib/librte_eal/linux/eal_thread.c
+++ b/lib/librte_eal/linux/eal_thread.c
@@ -92,7 +92,7 @@ eal_thread_loop(__rte_unused void *arg)
 
 	rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
-	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
 		lcore_id, (uintptr_t)thread_id, cpuset, ret == 0 ? "" : "...");
 
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index c3e762c1d9..3aeb5b11ab 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -397,6 +397,8 @@ EXPERIMENTAL {
 	# added in 20.08
 	rte_lcore_callback_register;
 	rte_lcore_callback_unregister;
+	rte_lcore_dump;
+	rte_lcore_iterate;
 	rte_thread_register;
 	rte_thread_unregister;
 };
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v2 9/9] mempool/bucket: handle non-EAL lcores
  2020-06-19 16:22 ` [dpdk-dev] [PATCH v2 0/9] Register non-EAL " David Marchand
                     ` (7 preceding siblings ...)
  2020-06-19 16:22   ` [dpdk-dev] [PATCH v2 8/9] eal: add lcore iterators David Marchand
@ 2020-06-19 16:22   ` David Marchand
  8 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-06-19 16:22 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, ktraynor, ian.stokes,
	i.maximets, Artem V. Andreev, Andrew Rybchenko

Convert to new lcore API to support non-EAL lcores.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 drivers/mempool/bucket/rte_mempool_bucket.c | 131 ++++++++++++--------
 1 file changed, 82 insertions(+), 49 deletions(-)

diff --git a/drivers/mempool/bucket/rte_mempool_bucket.c b/drivers/mempool/bucket/rte_mempool_bucket.c
index 5ce1ef16fb..0b4f42d330 100644
--- a/drivers/mempool/bucket/rte_mempool_bucket.c
+++ b/drivers/mempool/bucket/rte_mempool_bucket.c
@@ -55,6 +55,7 @@ struct bucket_data {
 	struct rte_ring *shared_orphan_ring;
 	struct rte_mempool *pool;
 	unsigned int bucket_mem_size;
+	void *lcore_callback_handle;
 };
 
 static struct bucket_stack *
@@ -345,6 +346,22 @@ bucket_dequeue_contig_blocks(struct rte_mempool *mp, void **first_obj_table,
 	return 0;
 }
 
+struct bucket_per_lcore_ctx {
+	const struct bucket_data *bd;
+	unsigned int count;
+};
+
+static int
+count_per_lcore(unsigned int lcore_id, void *arg)
+{
+	struct bucket_per_lcore_ctx *ctx = arg;
+
+	ctx->count += ctx->bd->obj_per_bucket *
+		ctx->bd->buckets[lcore_id]->top;
+	ctx->count += rte_ring_count(ctx->bd->adoption_buffer_rings[lcore_id]);
+	return 0;
+}
+
 static void
 count_underfilled_buckets(struct rte_mempool *mp,
 			  void *opaque,
@@ -373,23 +390,66 @@ count_underfilled_buckets(struct rte_mempool *mp,
 static unsigned int
 bucket_get_count(const struct rte_mempool *mp)
 {
-	const struct bucket_data *bd = mp->pool_data;
-	unsigned int count =
-		bd->obj_per_bucket * rte_ring_count(bd->shared_bucket_ring) +
-		rte_ring_count(bd->shared_orphan_ring);
-	unsigned int i;
+	struct bucket_per_lcore_ctx ctx;
 
-	for (i = 0; i < RTE_MAX_LCORE; i++) {
-		if (!rte_lcore_is_enabled(i))
-			continue;
-		count += bd->obj_per_bucket * bd->buckets[i]->top +
-			rte_ring_count(bd->adoption_buffer_rings[i]);
-	}
+	ctx.bd = mp->pool_data;
+	ctx.count = ctx.bd->obj_per_bucket *
+		rte_ring_count(ctx.bd->shared_bucket_ring);
+	ctx.count += rte_ring_count(ctx.bd->shared_orphan_ring);
 
+	rte_lcore_iterate(count_per_lcore, &ctx);
 	rte_mempool_mem_iter((struct rte_mempool *)(uintptr_t)mp,
-			     count_underfilled_buckets, &count);
+			     count_underfilled_buckets, &ctx.count);
+
+	return ctx.count;
+}
+
+static int
+bucket_init_per_lcore(unsigned int lcore_id, void *arg)
+{
+	char rg_name[RTE_RING_NAMESIZE];
+	struct bucket_data *bd = arg;
+	struct rte_mempool *mp;
+	int rg_flags;
+	int rc;
+
+	mp = bd->pool;
+	bd->buckets[lcore_id] = bucket_stack_create(mp,
+		mp->size / bd->obj_per_bucket);
+	if (bd->buckets[lcore_id] == NULL)
+		goto error;
+
+	rc = snprintf(rg_name, sizeof(rg_name), RTE_MEMPOOL_MZ_FORMAT ".a%u",
+		mp->name, lcore_id);
+	if (rc < 0 || rc >= (int)sizeof(rg_name))
+		goto error;
+
+	rg_flags = RING_F_SC_DEQ;
+	if (mp->flags & MEMPOOL_F_SP_PUT)
+		rg_flags |= RING_F_SP_ENQ;
+	if (mp->flags & MEMPOOL_F_SC_GET)
+		rg_flags |= RING_F_SC_DEQ;
+	bd->adoption_buffer_rings[lcore_id] = rte_ring_create(rg_name,
+		rte_align32pow2(mp->size + 1), mp->socket_id, rg_flags);
+	if (bd->adoption_buffer_rings[lcore_id] == NULL)
+		goto error;
 
-	return count;
+	return 0;
+error:
+	rte_free(bd->buckets[lcore_id]);
+	bd->buckets[lcore_id] = NULL;
+	return -1;
+}
+
+static void
+bucket_uninit_per_lcore(unsigned int lcore_id, void *arg)
+{
+	struct bucket_data *bd = arg;
+
+	rte_ring_free(bd->adoption_buffer_rings[lcore_id]);
+	bd->adoption_buffer_rings[lcore_id] = NULL;
+	rte_free(bd->buckets[lcore_id]);
+	bd->buckets[lcore_id] = NULL;
 }
 
 static int
@@ -399,7 +459,6 @@ bucket_alloc(struct rte_mempool *mp)
 	int rc = 0;
 	char rg_name[RTE_RING_NAMESIZE];
 	struct bucket_data *bd;
-	unsigned int i;
 	unsigned int bucket_header_size;
 	size_t pg_sz;
 
@@ -429,36 +488,17 @@ bucket_alloc(struct rte_mempool *mp)
 	/* eventually this should be a tunable parameter */
 	bd->bucket_stack_thresh = (mp->size / bd->obj_per_bucket) * 4 / 3;
 
+	bd->lcore_callback_handle = rte_lcore_callback_register("bucket",
+		bucket_init_per_lcore, bucket_uninit_per_lcore, bd);
+	if (bd->lcore_callback_handle == NULL) {
+		rc = -ENOMEM;
+		goto no_mem_for_stacks;
+	}
+
 	if (mp->flags & MEMPOOL_F_SP_PUT)
 		rg_flags |= RING_F_SP_ENQ;
 	if (mp->flags & MEMPOOL_F_SC_GET)
 		rg_flags |= RING_F_SC_DEQ;
-
-	for (i = 0; i < RTE_MAX_LCORE; i++) {
-		if (!rte_lcore_is_enabled(i))
-			continue;
-		bd->buckets[i] =
-			bucket_stack_create(mp, mp->size / bd->obj_per_bucket);
-		if (bd->buckets[i] == NULL) {
-			rc = -ENOMEM;
-			goto no_mem_for_stacks;
-		}
-		rc = snprintf(rg_name, sizeof(rg_name),
-			      RTE_MEMPOOL_MZ_FORMAT ".a%u", mp->name, i);
-		if (rc < 0 || rc >= (int)sizeof(rg_name)) {
-			rc = -ENAMETOOLONG;
-			goto no_mem_for_stacks;
-		}
-		bd->adoption_buffer_rings[i] =
-			rte_ring_create(rg_name, rte_align32pow2(mp->size + 1),
-					mp->socket_id,
-					rg_flags | RING_F_SC_DEQ);
-		if (bd->adoption_buffer_rings[i] == NULL) {
-			rc = -rte_errno;
-			goto no_mem_for_stacks;
-		}
-	}
-
 	rc = snprintf(rg_name, sizeof(rg_name),
 		      RTE_MEMPOOL_MZ_FORMAT ".0", mp->name);
 	if (rc < 0 || rc >= (int)sizeof(rg_name)) {
@@ -498,11 +538,8 @@ bucket_alloc(struct rte_mempool *mp)
 	rte_ring_free(bd->shared_orphan_ring);
 cannot_create_shared_orphan_ring:
 invalid_shared_orphan_ring:
+	rte_lcore_callback_unregister(bd->lcore_callback_handle);
 no_mem_for_stacks:
-	for (i = 0; i < RTE_MAX_LCORE; i++) {
-		rte_free(bd->buckets[i]);
-		rte_ring_free(bd->adoption_buffer_rings[i]);
-	}
 	rte_free(bd);
 no_mem_for_data:
 	rte_errno = -rc;
@@ -512,16 +549,12 @@ bucket_alloc(struct rte_mempool *mp)
 static void
 bucket_free(struct rte_mempool *mp)
 {
-	unsigned int i;
 	struct bucket_data *bd = mp->pool_data;
 
 	if (bd == NULL)
 		return;
 
-	for (i = 0; i < RTE_MAX_LCORE; i++) {
-		rte_free(bd->buckets[i]);
-		rte_ring_free(bd->adoption_buffer_rings[i]);
-	}
+	rte_lcore_callback_unregister(bd->lcore_callback_handle);
 
 	rte_ring_free(bd->shared_orphan_ring);
 	rte_ring_free(bd->shared_bucket_ring);
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v2 8/9] eal: add lcore iterators
  2020-06-19 16:22   ` [dpdk-dev] [PATCH v2 8/9] eal: add lcore iterators David Marchand
@ 2020-06-20  2:21     ` Stephen Hemminger
  0 siblings, 0 replies; 126+ messages in thread
From: Stephen Hemminger @ 2020-06-20  2:21 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, jerinjacobk, bruce.richardson, mdr, ktraynor, ian.stokes,
	i.maximets, Neil Horman

On Fri, 19 Jun 2020 18:22:43 +0200
David Marchand <david.marchand@redhat.com> wrote:

> +	rte_rwlock_read_lock(&lcore_lock);

I see you converted a spin lock to a reader lock.
Are you sure this is a good idea, although conceptually faster,
the implementation on most cpu's is slower than a simple spin lock.

https://www.kernel.org/doc/htmldocs/kernel-locking/Efficiency.html

	If your code divides neatly along reader/writer lines (as our
	cache code does), and the lock is held by readers for significant
	lengths of time, using these locks can help. They are slightly slower
	than the normal locks though, so in practice rwlock_t is not usually
	worthwhile.



^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v3 0/9] Register non-EAL threads as lcore
  2020-06-10 14:44 [dpdk-dev] [PATCH 0/7] Register external threads as lcore David Marchand
                   ` (8 preceding siblings ...)
  2020-06-19 16:22 ` [dpdk-dev] [PATCH v2 0/9] Register non-EAL " David Marchand
@ 2020-06-22 13:25 ` David Marchand
  2020-06-22 13:25   ` [dpdk-dev] [PATCH v3 1/9] eal: relocate per thread symbols to common David Marchand
                     ` (8 more replies)
  2020-06-26 14:47 ` [dpdk-dev] [PATCH v4 0/9] Register non-EAL threads as lcore David Marchand
                   ` (2 subsequent siblings)
  12 siblings, 9 replies; 126+ messages in thread
From: David Marchand @ 2020-06-22 13:25 UTC (permalink / raw)
  To: dev; +Cc: jerinjacobk, bruce.richardson, mdr, ktraynor, ian.stokes, i.maximets

OVS and some other applications have been hacking into DPDK internals to
fake EAL threads and avoid performance penalty of only having non-EAL
threads.

This series proposes to add a new type of lcores and maps those threads
to such lcores.
non-EAL threads won't run the DPDK eal mainloop.
As a consequence, part of the EAL threads API cannot work.

Having new lcores appearing during the process lifetime is not expected
by some DPDK components. This is addressed by introducing init/uninit
callacks invoked when hotplugging of such lcore.

There is still some work/discussion:
- refuse new lcore role in incompatible EAL threads API (or document it
  only as those API were already incompatible?),
- think about deprecation notices for existing RTE_FOREACH_LCORE macros
  and consorts, it is probably worth discussing on how to iterate over
  lcores,

For the interested parties, I have a patch [1] against dpdk-latest OVS
branch that makes use of this series.

1: https://github.com/david-marchand/ovs/commit/dpdk-latest-external-lcores

Changes since v2:
- fixed windows build error due to missing trace stub,
- fixed bug when rolling back on lcore register,

Changes since v1:
- rebased on master (conflicts on merged Windows series),
- separated lcore role code cleanup in a patch,
- tried to use a single naming, so kept non-EAL threads as the main
  notion. non-EAL threads are then distinguished between registered and
  unregistered non-EAL threads,
- added unit tests (still missing some coverage, marked with a FIXME),
- reworked callbacks call under a common rwlock lock which protects
  lcores allocations and callbacks registration,
- introduced lcore iterators and converted the bucket mempool driver,

-- 
David Marchand

David Marchand (9):
  eal: relocate per thread symbols to common
  eal: fix multiple definition of per lcore thread id
  eal: introduce thread init helper
  eal: introduce thread uninit helper
  eal: move lcore role code
  eal: register non-EAL threads as lcores
  eal: add lcore init callbacks
  eal: add lcore iterators
  mempool/bucket: handle non-EAL lcores

 MAINTAINERS                                   |   1 +
 app/test/Makefile                             |   1 +
 app/test/autotest_data.py                     |   6 +
 app/test/meson.build                          |   2 +
 app/test/test_lcores.c                        | 372 ++++++++++++++++++
 doc/guides/howto/debug_troubleshoot.rst       |   5 +-
 .../prog_guide/env_abstraction_layer.rst      |  22 +-
 doc/guides/prog_guide/mempool_lib.rst         |   2 +-
 drivers/mempool/bucket/rte_mempool_bucket.c   | 131 +++---
 lib/librte_eal/common/eal_common_lcore.c      | 240 ++++++++++-
 lib/librte_eal/common/eal_common_thread.c     | 126 ++++--
 lib/librte_eal/common/eal_common_trace.c      |  51 ++-
 lib/librte_eal/common/eal_private.h           |  19 +
 lib/librte_eal/common/eal_thread.h            |  26 +-
 lib/librte_eal/common/eal_trace.h             |   2 +-
 lib/librte_eal/freebsd/eal.c                  |  16 +-
 lib/librte_eal/freebsd/eal_thread.c           |  38 +-
 lib/librte_eal/include/rte_eal.h              |  12 +-
 lib/librte_eal/include/rte_lcore.h            | 167 +++++++-
 lib/librte_eal/include/rte_trace_point.h      |   9 +
 lib/librte_eal/linux/eal.c                    |  17 +-
 lib/librte_eal/linux/eal_thread.c             |  38 +-
 lib/librte_eal/rte_eal_version.map            |  16 +
 lib/librte_eal/windows/eal.c                  |   8 +-
 lib/librte_eal/windows/eal_thread.c           |  14 +-
 lib/librte_mempool/rte_mempool.h              |  11 +-
 26 files changed, 1113 insertions(+), 239 deletions(-)
 create mode 100644 app/test/test_lcores.c

-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v3 1/9] eal: relocate per thread symbols to common
  2020-06-22 13:25 ` [dpdk-dev] [PATCH v3 0/9] Register non-EAL threads as lcore David Marchand
@ 2020-06-22 13:25   ` David Marchand
  2020-06-22 13:25   ` [dpdk-dev] [PATCH v3 2/9] eal: fix multiple definition of per lcore thread id David Marchand
                     ` (7 subsequent siblings)
  8 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-06-22 13:25 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, ktraynor, ian.stokes,
	i.maximets, Harini Ramakrishnan, Omar Cardona, Pallavi Kadam,
	Ranjit Menon

We have per lcore thread symbols scattered in OS implementations but
common code relies on them.
Move all of them in common.

RTE_PER_LCORE(_socket_id) and RTE_PER_LCORE(_cpuset) have public
accessors and are not exported through the library map, they can be
made static.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 lib/librte_eal/common/eal_common_thread.c | 5 ++++-
 lib/librte_eal/freebsd/eal_thread.c       | 4 ----
 lib/librte_eal/include/rte_lcore.h        | 1 -
 lib/librte_eal/linux/eal_thread.c         | 4 ----
 lib/librte_eal/windows/eal_thread.c       | 4 ----
 5 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 370bb1b634..a5f67d811c 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -21,7 +21,10 @@
 #include "eal_private.h"
 #include "eal_thread.h"
 
-RTE_DECLARE_PER_LCORE(unsigned , _socket_id);
+RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
+static RTE_DEFINE_PER_LCORE(unsigned int, _socket_id) =
+	(unsigned int)SOCKET_ID_ANY;
+static RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
 
 unsigned rte_socket_id(void)
 {
diff --git a/lib/librte_eal/freebsd/eal_thread.c b/lib/librte_eal/freebsd/eal_thread.c
index b52019782a..40676d9ef5 100644
--- a/lib/librte_eal/freebsd/eal_thread.c
+++ b/lib/librte_eal/freebsd/eal_thread.c
@@ -25,10 +25,6 @@
 #include "eal_private.h"
 #include "eal_thread.h"
 
-RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
-RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY;
-RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
-
 /*
  * Send a message to a slave lcore identified by slave_id to call a
  * function f with argument arg. Once the execution is done, the
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index 339046bc86..5c1d1926e9 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -23,7 +23,6 @@ extern "C" {
 #define LCORE_ID_ANY     UINT32_MAX       /**< Any lcore. */
 
 RTE_DECLARE_PER_LCORE(unsigned, _lcore_id);  /**< Per thread "lcore id". */
-RTE_DECLARE_PER_LCORE(rte_cpuset_t, _cpuset); /**< Per thread "cpuset". */
 
 /**
  * Get a lcore's role.
diff --git a/lib/librte_eal/linux/eal_thread.c b/lib/librte_eal/linux/eal_thread.c
index cd9d6e0ebf..a52ebef3a4 100644
--- a/lib/librte_eal/linux/eal_thread.c
+++ b/lib/librte_eal/linux/eal_thread.c
@@ -25,10 +25,6 @@
 #include "eal_private.h"
 #include "eal_thread.h"
 
-RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
-RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY;
-RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
-
 /*
  * Send a message to a slave lcore identified by slave_id to call a
  * function f with argument arg. Once the execution is done, the
diff --git a/lib/librte_eal/windows/eal_thread.c b/lib/librte_eal/windows/eal_thread.c
index 3dd56519c9..f12a2ec6ad 100644
--- a/lib/librte_eal/windows/eal_thread.c
+++ b/lib/librte_eal/windows/eal_thread.c
@@ -16,10 +16,6 @@
 #include "eal_private.h"
 #include "eal_windows.h"
 
-RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
-RTE_DEFINE_PER_LCORE(unsigned int, _socket_id) = (unsigned int)SOCKET_ID_ANY;
-RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
-
 /*
  * Send a message to a slave lcore identified by slave_id to call a
  * function f with argument arg. Once the execution is done, the
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v3 2/9] eal: fix multiple definition of per lcore thread id
  2020-06-22 13:25 ` [dpdk-dev] [PATCH v3 0/9] Register non-EAL threads as lcore David Marchand
  2020-06-22 13:25   ` [dpdk-dev] [PATCH v3 1/9] eal: relocate per thread symbols to common David Marchand
@ 2020-06-22 13:25   ` David Marchand
  2020-06-22 13:25   ` [dpdk-dev] [PATCH v3 3/9] eal: introduce thread init helper David Marchand
                     ` (6 subsequent siblings)
  8 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-06-22 13:25 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, ktraynor, ian.stokes,
	i.maximets, Neil Horman, Cunming Liang, Konstantin Ananyev,
	Olivier Matz

Because of the inline accessor + static declaration in rte_gettid(),
we end up with multiple symbols for RTE_PER_LCORE(_thread_id).
Each compilation unit will pay a cost when accessing this information
for the first time.

$ nm build/app/dpdk-testpmd | grep per_lcore__thread_id
0000000000000054 d per_lcore__thread_id.5037
0000000000000040 d per_lcore__thread_id.5103
0000000000000048 d per_lcore__thread_id.5259
000000000000004c d per_lcore__thread_id.5259
0000000000000044 d per_lcore__thread_id.5933
0000000000000058 d per_lcore__thread_id.6261
0000000000000050 d per_lcore__thread_id.7378
000000000000005c d per_lcore__thread_id.7496
000000000000000c d per_lcore__thread_id.8016
0000000000000010 d per_lcore__thread_id.8431

Make it global as part of the DPDK_21 stable ABI.

Fixes: ef76436c6834 ("eal: get unique thread id")

Signed-off-by: David Marchand <david.marchand@redhat.com>
Acked-by: Ray Kinsella <mdr@ashroe.eu>
---
 lib/librte_eal/common/eal_common_thread.c | 1 +
 lib/librte_eal/include/rte_eal.h          | 3 ++-
 lib/librte_eal/rte_eal_version.map        | 7 +++++++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index a5f67d811c..280c64bb76 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -22,6 +22,7 @@
 #include "eal_thread.h"
 
 RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
+RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
 static RTE_DEFINE_PER_LCORE(unsigned int, _socket_id) =
 	(unsigned int)SOCKET_ID_ANY;
 static RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
index 2f9ed298de..2edf8c6556 100644
--- a/lib/librte_eal/include/rte_eal.h
+++ b/lib/librte_eal/include/rte_eal.h
@@ -447,6 +447,8 @@ enum rte_intr_mode rte_eal_vfio_intr_mode(void);
  */
 int rte_sys_gettid(void);
 
+RTE_DECLARE_PER_LCORE(int, _thread_id);
+
 /**
  * Get system unique thread id.
  *
@@ -456,7 +458,6 @@ int rte_sys_gettid(void);
  */
 static inline int rte_gettid(void)
 {
-	static RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
 	if (RTE_PER_LCORE(_thread_id) == -1)
 		RTE_PER_LCORE(_thread_id) = rte_sys_gettid();
 	return RTE_PER_LCORE(_thread_id);
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 196eef5afa..0d42d44ce9 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -221,6 +221,13 @@ DPDK_20.0 {
 	local: *;
 };
 
+DPDK_21 {
+	global:
+
+	per_lcore__thread_id;
+
+} DPDK_20.0;
+
 EXPERIMENTAL {
 	global:
 
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v3 3/9] eal: introduce thread init helper
  2020-06-22 13:25 ` [dpdk-dev] [PATCH v3 0/9] Register non-EAL threads as lcore David Marchand
  2020-06-22 13:25   ` [dpdk-dev] [PATCH v3 1/9] eal: relocate per thread symbols to common David Marchand
  2020-06-22 13:25   ` [dpdk-dev] [PATCH v3 2/9] eal: fix multiple definition of per lcore thread id David Marchand
@ 2020-06-22 13:25   ` David Marchand
  2020-06-22 13:25   ` [dpdk-dev] [PATCH v3 4/9] eal: introduce thread uninit helper David Marchand
                     ` (5 subsequent siblings)
  8 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-06-22 13:25 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, ktraynor, ian.stokes,
	i.maximets, Harini Ramakrishnan, Omar Cardona, Pallavi Kadam,
	Ranjit Menon

Introduce a helper responsible for initialising the per thread context.
We can then have a unified context for EAL and non-EAL threads and
remove copy/paste'd OS-specific helpers.

Per EAL thread CPU affinity setting is separated from the thread init.
It is to accommodate with Windows EAL where CPU affinity is not set at
the moment.
Besides, having affinity set by the master lcore in FreeBSD and Linux
will make it possible to detect errors rather than panic in the child
thread. But the cleanup when such an event happens is left for later.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Changes since v1:
- rebased on master, removed Windows workarounds wrt gettid and traces
  support,

---
 lib/librte_eal/common/eal_common_thread.c | 51 +++++++++++++----------
 lib/librte_eal/common/eal_thread.h        |  8 ++--
 lib/librte_eal/freebsd/eal.c              | 14 ++++++-
 lib/librte_eal/freebsd/eal_thread.c       | 32 +-------------
 lib/librte_eal/linux/eal.c                | 15 ++++++-
 lib/librte_eal/linux/eal_thread.c         | 32 +-------------
 lib/librte_eal/windows/eal.c              |  3 +-
 lib/librte_eal/windows/eal_thread.c       | 10 +----
 8 files changed, 66 insertions(+), 99 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 280c64bb76..afb30236c5 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -71,20 +71,10 @@ eal_cpuset_socket_id(rte_cpuset_t *cpusetp)
 	return socket_id;
 }
 
-int
-rte_thread_set_affinity(rte_cpuset_t *cpusetp)
+static void
+thread_update_affinity(rte_cpuset_t *cpusetp)
 {
-	int s;
-	unsigned lcore_id;
-	pthread_t tid;
-
-	tid = pthread_self();
-
-	s = pthread_setaffinity_np(tid, sizeof(rte_cpuset_t), cpusetp);
-	if (s != 0) {
-		RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
-		return -1;
-	}
+	unsigned int lcore_id = rte_lcore_id();
 
 	/* store socket_id in TLS for quick access */
 	RTE_PER_LCORE(_socket_id) =
@@ -94,14 +84,24 @@ rte_thread_set_affinity(rte_cpuset_t *cpusetp)
 	memmove(&RTE_PER_LCORE(_cpuset), cpusetp,
 		sizeof(rte_cpuset_t));
 
-	lcore_id = rte_lcore_id();
 	if (lcore_id != (unsigned)LCORE_ID_ANY) {
 		/* EAL thread will update lcore_config */
 		lcore_config[lcore_id].socket_id = RTE_PER_LCORE(_socket_id);
 		memmove(&lcore_config[lcore_id].cpuset, cpusetp,
 			sizeof(rte_cpuset_t));
 	}
+}
 
+int
+rte_thread_set_affinity(rte_cpuset_t *cpusetp)
+{
+	if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+			cpusetp) != 0) {
+		RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
+		return -1;
+	}
+
+	thread_update_affinity(cpusetp);
 	return 0;
 }
 
@@ -147,6 +147,19 @@ eal_thread_dump_affinity(char *str, unsigned size)
 	return ret;
 }
 
+void
+rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset)
+{
+	/* set the lcore ID in per-lcore memory area */
+	RTE_PER_LCORE(_lcore_id) = lcore_id;
+
+	/* acquire system unique id  */
+	rte_gettid();
+
+	thread_update_affinity(cpuset);
+
+	__rte_trace_mem_per_thread_alloc();
+}
 
 struct rte_thread_ctrl_params {
 	void *(*start_routine)(void *);
@@ -154,16 +167,14 @@ struct rte_thread_ctrl_params {
 	pthread_barrier_t configured;
 };
 
-static void *rte_thread_init(void *arg)
+static void *ctrl_thread_init(void *arg)
 {
 	int ret;
-	rte_cpuset_t *cpuset = &internal_config.ctrl_cpuset;
 	struct rte_thread_ctrl_params *params = arg;
 	void *(*start_routine)(void *) = params->start_routine;
 	void *routine_arg = params->arg;
 
-	/* Store cpuset in TLS for quick access */
-	memmove(&RTE_PER_LCORE(_cpuset), cpuset, sizeof(rte_cpuset_t));
+	rte_thread_init(rte_lcore_id(), &internal_config.ctrl_cpuset);
 
 	ret = pthread_barrier_wait(&params->configured);
 	if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
@@ -171,8 +182,6 @@ static void *rte_thread_init(void *arg)
 		free(params);
 	}
 
-	__rte_trace_mem_per_thread_alloc();
-
 	return start_routine(routine_arg);
 }
 
@@ -194,7 +203,7 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
 
 	pthread_barrier_init(&params->configured, NULL, 2);
 
-	ret = pthread_create(thread, attr, rte_thread_init, (void *)params);
+	ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
 	if (ret != 0) {
 		free(params);
 		return -ret;
diff --git a/lib/librte_eal/common/eal_thread.h b/lib/librte_eal/common/eal_thread.h
index b40ed249ed..da5e7c93ba 100644
--- a/lib/librte_eal/common/eal_thread.h
+++ b/lib/librte_eal/common/eal_thread.h
@@ -16,12 +16,14 @@
 __rte_noreturn void *eal_thread_loop(void *arg);
 
 /**
- * Init per-lcore info for master thread
+ * Init per-lcore info in current thread.
  *
  * @param lcore_id
- *   identifier of master lcore
+ *   identifier of lcore.
+ * @param cpuset
+ *   CPU affinity for this thread.
  */
-void eal_thread_init_master(unsigned lcore_id);
+void rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset);
 
 /**
  * Get the NUMA socket id from cpu id.
diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
index c41f265fac..b5ea11df16 100644
--- a/lib/librte_eal/freebsd/eal.c
+++ b/lib/librte_eal/freebsd/eal.c
@@ -877,7 +877,14 @@ rte_eal_init(int argc, char **argv)
 
 	eal_check_mem_on_local_socket();
 
-	eal_thread_init_master(rte_config.master_lcore);
+	if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+			&lcore_config[rte_config.master_lcore].cpuset) != 0) {
+		rte_eal_init_alert("Cannot set affinity");
+		rte_errno = EINVAL;
+		return -1;
+	}
+	rte_thread_init(rte_config.master_lcore,
+		&lcore_config[rte_config.master_lcore].cpuset);
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
 
@@ -908,6 +915,11 @@ rte_eal_init(int argc, char **argv)
 		snprintf(thread_name, sizeof(thread_name),
 				"lcore-slave-%d", i);
 		rte_thread_setname(lcore_config[i].thread_id, thread_name);
+
+		ret = pthread_setaffinity_np(lcore_config[i].thread_id,
+			sizeof(rte_cpuset_t), &lcore_config[i].cpuset);
+		if (ret != 0)
+			rte_panic("Cannot set affinity\n");
 	}
 
 	/*
diff --git a/lib/librte_eal/freebsd/eal_thread.c b/lib/librte_eal/freebsd/eal_thread.c
index 40676d9ef5..c1fb8eb2d8 100644
--- a/lib/librte_eal/freebsd/eal_thread.c
+++ b/lib/librte_eal/freebsd/eal_thread.c
@@ -66,29 +66,6 @@ rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
 	return rc;
 }
 
-/* set affinity for current thread */
-static int
-eal_thread_set_affinity(void)
-{
-	unsigned lcore_id = rte_lcore_id();
-
-	/* acquire system unique id  */
-	rte_gettid();
-
-	/* update EAL thread core affinity */
-	return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset);
-}
-
-void eal_thread_init_master(unsigned lcore_id)
-{
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
-}
-
 /* main loop of threads */
 __rte_noreturn void *
 eal_thread_loop(__rte_unused void *arg)
@@ -113,19 +90,12 @@ eal_thread_loop(__rte_unused void *arg)
 	m2s = lcore_config[lcore_id].pipe_master2slave[0];
 	s2m = lcore_config[lcore_id].pipe_slave2master[1];
 
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
+	rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
-
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
 		lcore_id, thread_id, cpuset, ret == 0 ? "" : "...");
 
-	__rte_trace_mem_per_thread_alloc();
 	rte_eal_trace_thread_lcore_ready(lcore_id, cpuset);
 
 	/* read on our pipe to get commands */
diff --git a/lib/librte_eal/linux/eal.c b/lib/librte_eal/linux/eal.c
index f162124a37..8638376b8a 100644
--- a/lib/librte_eal/linux/eal.c
+++ b/lib/librte_eal/linux/eal.c
@@ -1205,10 +1205,16 @@ rte_eal_init(int argc, char **argv)
 
 	eal_check_mem_on_local_socket();
 
-	eal_thread_init_master(rte_config.master_lcore);
+	if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+			&lcore_config[rte_config.master_lcore].cpuset) != 0) {
+		rte_eal_init_alert("Cannot set affinity");
+		rte_errno = EINVAL;
+		return -1;
+	}
+	rte_thread_init(rte_config.master_lcore,
+		&lcore_config[rte_config.master_lcore].cpuset);
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
-
 	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
 		rte_config.master_lcore, (uintptr_t)thread_id, cpuset,
 		ret == 0 ? "" : "...");
@@ -1240,6 +1246,11 @@ rte_eal_init(int argc, char **argv)
 		if (ret != 0)
 			RTE_LOG(DEBUG, EAL,
 				"Cannot set name for lcore thread\n");
+
+		ret = pthread_setaffinity_np(lcore_config[i].thread_id,
+			sizeof(rte_cpuset_t), &lcore_config[i].cpuset);
+		if (ret != 0)
+			rte_panic("Cannot set affinity\n");
 	}
 
 	/*
diff --git a/lib/librte_eal/linux/eal_thread.c b/lib/librte_eal/linux/eal_thread.c
index a52ebef3a4..07aec0c44d 100644
--- a/lib/librte_eal/linux/eal_thread.c
+++ b/lib/librte_eal/linux/eal_thread.c
@@ -66,29 +66,6 @@ rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
 	return rc;
 }
 
-/* set affinity for current EAL thread */
-static int
-eal_thread_set_affinity(void)
-{
-	unsigned lcore_id = rte_lcore_id();
-
-	/* acquire system unique id  */
-	rte_gettid();
-
-	/* update EAL thread core affinity */
-	return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset);
-}
-
-void eal_thread_init_master(unsigned lcore_id)
-{
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
-}
-
 /* main loop of threads */
 __rte_noreturn void *
 eal_thread_loop(__rte_unused void *arg)
@@ -113,19 +90,12 @@ eal_thread_loop(__rte_unused void *arg)
 	m2s = lcore_config[lcore_id].pipe_master2slave[0];
 	s2m = lcore_config[lcore_id].pipe_slave2master[1];
 
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
+	rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
-
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
 		lcore_id, (uintptr_t)thread_id, cpuset, ret == 0 ? "" : "...");
 
-	__rte_trace_mem_per_thread_alloc();
 	rte_eal_trace_thread_lcore_ready(lcore_id, cpuset);
 
 	/* read on our pipe to get commands */
diff --git a/lib/librte_eal/windows/eal.c b/lib/librte_eal/windows/eal.c
index 427a5557fa..23de12ab43 100644
--- a/lib/librte_eal/windows/eal.c
+++ b/lib/librte_eal/windows/eal.c
@@ -354,7 +354,8 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
-	eal_thread_init_master(rte_config.master_lcore);
+	rte_thread_init(rte_config.master_lcore,
+		&lcore_config[rte_config.master_lcore].cpuset);
 
 	RTE_LCORE_FOREACH_SLAVE(i) {
 
diff --git a/lib/librte_eal/windows/eal_thread.c b/lib/librte_eal/windows/eal_thread.c
index f12a2ec6ad..4f01881240 100644
--- a/lib/librte_eal/windows/eal_thread.c
+++ b/lib/librte_eal/windows/eal_thread.c
@@ -53,13 +53,6 @@ rte_eal_remote_launch(lcore_function_t *f, void *arg, unsigned int slave_id)
 	return 0;
 }
 
-void
-eal_thread_init_master(unsigned int lcore_id)
-{
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-}
-
 /* main loop of threads */
 void *
 eal_thread_loop(void *arg __rte_unused)
@@ -84,8 +77,7 @@ eal_thread_loop(void *arg __rte_unused)
 	m2s = lcore_config[lcore_id].pipe_master2slave[0];
 	s2m = lcore_config[lcore_id].pipe_slave2master[1];
 
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
+	rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%zx;cpuset=[%s])\n",
 		lcore_id, (uintptr_t)thread_id, cpuset);
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v3 4/9] eal: introduce thread uninit helper
  2020-06-22 13:25 ` [dpdk-dev] [PATCH v3 0/9] Register non-EAL threads as lcore David Marchand
                     ` (2 preceding siblings ...)
  2020-06-22 13:25   ` [dpdk-dev] [PATCH v3 3/9] eal: introduce thread init helper David Marchand
@ 2020-06-22 13:25   ` David Marchand
  2020-06-22 13:25   ` [dpdk-dev] [PATCH v3 5/9] eal: move lcore role code David Marchand
                     ` (4 subsequent siblings)
  8 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-06-22 13:25 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, ktraynor, ian.stokes,
	i.maximets, Jerin Jacob, Sunil Kumar Kori, Neil Horman,
	Harini Ramakrishnan, Omar Cardona, Pallavi Kadam, Ranjit Menon

This is a preparation step for dynamically unregistering threads.

Since we explicitly allocate a per thread trace buffer in
rte_thread_init, add an internal helper to free this buffer.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Note: I preferred renaming the current internal function to free all
threads trace buffers (new name trace_mem_free()) and reuse the previous
name (trace_mem_per_thread_free()) when freeing this buffer for a given
thread.

Changes since v2:
- added missing stub for windows tracing support,
- moved free symbol to exported (experimental) ABI as a counterpart of
  the alloc symbol we already had,

Changes since v1:
- rebased on master, removed Windows workaround wrt traces support,

---
 lib/librte_eal/common/eal_common_thread.c |  9 ++++
 lib/librte_eal/common/eal_common_trace.c  | 51 +++++++++++++++++++----
 lib/librte_eal/common/eal_thread.h        |  5 +++
 lib/librte_eal/common/eal_trace.h         |  2 +-
 lib/librte_eal/include/rte_trace_point.h  |  9 ++++
 lib/librte_eal/rte_eal_version.map        |  3 ++
 lib/librte_eal/windows/eal.c              |  5 +++
 7 files changed, 75 insertions(+), 9 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index afb30236c5..3b30cc99d9 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -20,6 +20,7 @@
 #include "eal_internal_cfg.h"
 #include "eal_private.h"
 #include "eal_thread.h"
+#include "eal_trace.h"
 
 RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
 RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
@@ -161,6 +162,14 @@ rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset)
 	__rte_trace_mem_per_thread_alloc();
 }
 
+void
+rte_thread_uninit(void)
+{
+	__rte_trace_mem_per_thread_free();
+
+	RTE_PER_LCORE(_lcore_id) = LCORE_ID_ANY;
+}
+
 struct rte_thread_ctrl_params {
 	void *(*start_routine)(void *);
 	void *arg;
diff --git a/lib/librte_eal/common/eal_common_trace.c b/lib/librte_eal/common/eal_common_trace.c
index 875553d7e5..3e620d76ed 100644
--- a/lib/librte_eal/common/eal_common_trace.c
+++ b/lib/librte_eal/common/eal_common_trace.c
@@ -101,7 +101,7 @@ eal_trace_fini(void)
 {
 	if (!rte_trace_is_enabled())
 		return;
-	trace_mem_per_thread_free();
+	trace_mem_free();
 	trace_metadata_destroy();
 	eal_trace_args_free();
 }
@@ -370,24 +370,59 @@ __rte_trace_mem_per_thread_alloc(void)
 	rte_spinlock_unlock(&trace->lock);
 }
 
+static void
+trace_mem_per_thread_free_unlocked(struct thread_mem_meta *meta)
+{
+	if (meta->area == TRACE_AREA_HUGEPAGE)
+		eal_free_no_trace(meta->mem);
+	else if (meta->area == TRACE_AREA_HEAP)
+		free(meta->mem);
+}
+
+void
+__rte_trace_mem_per_thread_free(void)
+{
+	struct trace *trace = trace_obj_get();
+	struct __rte_trace_header *header;
+	uint32_t count;
+
+	if (RTE_PER_LCORE(trace_mem) == NULL)
+		return;
+
+	header = RTE_PER_LCORE(trace_mem);
+	rte_spinlock_lock(&trace->lock);
+	for (count = 0; count < trace->nb_trace_mem_list; count++) {
+		if (trace->lcore_meta[count].mem == header)
+			break;
+	}
+	if (count != trace->nb_trace_mem_list) {
+		struct thread_mem_meta *meta = &trace->lcore_meta[count];
+
+		trace_mem_per_thread_free_unlocked(meta);
+		if (count != trace->nb_trace_mem_list - 1) {
+			memmove(meta, meta + 1,
+				sizeof(*meta) *
+				 (trace->nb_trace_mem_list - count - 1));
+		}
+		trace->nb_trace_mem_list--;
+	}
+	rte_spinlock_unlock(&trace->lock);
+}
+
 void
-trace_mem_per_thread_free(void)
+trace_mem_free(void)
 {
 	struct trace *trace = trace_obj_get();
 	uint32_t count;
-	void *mem;
 
 	if (!rte_trace_is_enabled())
 		return;
 
 	rte_spinlock_lock(&trace->lock);
 	for (count = 0; count < trace->nb_trace_mem_list; count++) {
-		mem = trace->lcore_meta[count].mem;
-		if (trace->lcore_meta[count].area == TRACE_AREA_HUGEPAGE)
-			eal_free_no_trace(mem);
-		else if (trace->lcore_meta[count].area == TRACE_AREA_HEAP)
-			free(mem);
+		trace_mem_per_thread_free_unlocked(&trace->lcore_meta[count]);
 	}
+	trace->nb_trace_mem_list = 0;
 	rte_spinlock_unlock(&trace->lock);
 }
 
diff --git a/lib/librte_eal/common/eal_thread.h b/lib/librte_eal/common/eal_thread.h
index da5e7c93ba..4ecd8fd53a 100644
--- a/lib/librte_eal/common/eal_thread.h
+++ b/lib/librte_eal/common/eal_thread.h
@@ -25,6 +25,11 @@ __rte_noreturn void *eal_thread_loop(void *arg);
  */
 void rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset);
 
+/**
+ * Uninitialize per-lcore info for current thread.
+ */
+void rte_thread_uninit(void);
+
 /**
  * Get the NUMA socket id from cpu id.
  * This function is private to EAL.
diff --git a/lib/librte_eal/common/eal_trace.h b/lib/librte_eal/common/eal_trace.h
index 8f60616156..bbb6e1645c 100644
--- a/lib/librte_eal/common/eal_trace.h
+++ b/lib/librte_eal/common/eal_trace.h
@@ -106,7 +106,7 @@ int trace_metadata_create(void);
 void trace_metadata_destroy(void);
 int trace_mkdir(void);
 int trace_epoch_time_save(void);
-void trace_mem_per_thread_free(void);
+void trace_mem_free(void);
 
 /* EAL interface */
 int eal_trace_init(void);
diff --git a/lib/librte_eal/include/rte_trace_point.h b/lib/librte_eal/include/rte_trace_point.h
index 377c2414aa..686b86fdb1 100644
--- a/lib/librte_eal/include/rte_trace_point.h
+++ b/lib/librte_eal/include/rte_trace_point.h
@@ -230,6 +230,15 @@ __rte_trace_point_fp_is_enabled(void)
 __rte_experimental
 void __rte_trace_mem_per_thread_alloc(void);
 
+/**
+ * @internal
+ *
+ * Free trace memory buffer per thread.
+ *
+ */
+__rte_experimental
+void __rte_trace_mem_per_thread_free(void);
+
 /**
  * @internal
  *
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 0d42d44ce9..5831eea4b0 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -393,6 +393,9 @@ EXPERIMENTAL {
 	rte_trace_point_lookup;
 	rte_trace_regexp;
 	rte_trace_save;
+
+	# added in 20.08
+	__rte_trace_mem_per_thread_free;
 };
 
 INTERNAL {
diff --git a/lib/librte_eal/windows/eal.c b/lib/librte_eal/windows/eal.c
index 23de12ab43..09cc1aef63 100644
--- a/lib/librte_eal/windows/eal.c
+++ b/lib/librte_eal/windows/eal.c
@@ -255,6 +255,11 @@ __rte_trace_mem_per_thread_alloc(void)
 {
 }
 
+void
+__rte_trace_mem_per_thread_free(void)
+{
+}
+
 void
 __rte_trace_point_emit_field(size_t sz, const char *field,
 	const char *type)
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v3 5/9] eal: move lcore role code
  2020-06-22 13:25 ` [dpdk-dev] [PATCH v3 0/9] Register non-EAL threads as lcore David Marchand
                     ` (3 preceding siblings ...)
  2020-06-22 13:25   ` [dpdk-dev] [PATCH v3 4/9] eal: introduce thread uninit helper David Marchand
@ 2020-06-22 13:25   ` David Marchand
  2020-06-22 13:25   ` [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores David Marchand
                     ` (3 subsequent siblings)
  8 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-06-22 13:25 UTC (permalink / raw)
  To: dev; +Cc: jerinjacobk, bruce.richardson, mdr, ktraynor, ian.stokes, i.maximets

For consistency sake, move all lcore role code in the dedicated
compilation unit / header.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 lib/librte_eal/common/eal_common_lcore.c  | 11 +++++++
 lib/librte_eal/common/eal_common_thread.c | 11 -------
 lib/librte_eal/include/rte_eal.h          |  9 ------
 lib/librte_eal/include/rte_lcore.h        | 37 ++++++++++++++---------
 4 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
index 5404922a87..86d32a3dd7 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -63,6 +63,17 @@ rte_eal_lcore_role(unsigned int lcore_id)
 	return cfg->lcore_role[lcore_id];
 }
 
+int
+rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+
+	if (lcore_id >= RTE_MAX_LCORE)
+		return -EINVAL;
+
+	return cfg->lcore_role[lcore_id] == role;
+}
+
 int rte_lcore_is_enabled(unsigned int lcore_id)
 {
 	struct rte_config *cfg = rte_eal_get_configuration();
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 3b30cc99d9..a7ae0691bf 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -33,17 +33,6 @@ unsigned rte_socket_id(void)
 	return RTE_PER_LCORE(_socket_id);
 }
 
-int
-rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role)
-{
-	struct rte_config *cfg = rte_eal_get_configuration();
-
-	if (lcore_id >= RTE_MAX_LCORE)
-		return -EINVAL;
-
-	return cfg->lcore_role[lcore_id] == role;
-}
-
 static int
 eal_cpuset_socket_id(rte_cpuset_t *cpusetp)
 {
diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
index 2edf8c6556..0913d1947c 100644
--- a/lib/librte_eal/include/rte_eal.h
+++ b/lib/librte_eal/include/rte_eal.h
@@ -31,15 +31,6 @@ extern "C" {
 /* Maximum thread_name length. */
 #define RTE_MAX_THREAD_NAME_LEN 16
 
-/**
- * The lcore role (used in RTE or not).
- */
-enum rte_lcore_role_t {
-	ROLE_RTE,
-	ROLE_OFF,
-	ROLE_SERVICE,
-};
-
 /**
  * The type of process in a linux, multi-process setup
  */
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index 5c1d1926e9..3968c40693 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -24,6 +24,15 @@ extern "C" {
 
 RTE_DECLARE_PER_LCORE(unsigned, _lcore_id);  /**< Per thread "lcore id". */
 
+/**
+ * The lcore role (used in RTE or not).
+ */
+enum rte_lcore_role_t {
+	ROLE_RTE,
+	ROLE_OFF,
+	ROLE_SERVICE,
+};
+
 /**
  * Get a lcore's role.
  *
@@ -34,6 +43,20 @@ RTE_DECLARE_PER_LCORE(unsigned, _lcore_id);  /**< Per thread "lcore id". */
  */
 enum rte_lcore_role_t rte_eal_lcore_role(unsigned int lcore_id);
 
+/**
+ * Test if the core supplied has a specific role
+ *
+ * @param lcore_id
+ *   The identifier of the lcore, which MUST be between 0 and
+ *   RTE_MAX_LCORE-1.
+ * @param role
+ *   The role to be checked against.
+ * @return
+ *   Boolean value: positive if test is true; otherwise returns 0.
+ */
+int
+rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
+
 /**
  * Return the Application thread ID of the execution unit.
  *
@@ -283,20 +306,6 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
 		const pthread_attr_t *attr,
 		void *(*start_routine)(void *), void *arg);
 
-/**
- * Test if the core supplied has a specific role
- *
- * @param lcore_id
- *   The identifier of the lcore, which MUST be between 0 and
- *   RTE_MAX_LCORE-1.
- * @param role
- *   The role to be checked against.
- * @return
- *   Boolean value: positive if test is true; otherwise returns 0.
- */
-int
-rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
-
 #ifdef __cplusplus
 }
 #endif
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-06-22 13:25 ` [dpdk-dev] [PATCH v3 0/9] Register non-EAL threads as lcore David Marchand
                     ` (4 preceding siblings ...)
  2020-06-22 13:25   ` [dpdk-dev] [PATCH v3 5/9] eal: move lcore role code David Marchand
@ 2020-06-22 13:25   ` David Marchand
  2020-06-22 15:49     ` Ananyev, Konstantin
  2020-06-23 17:02     ` Andrew Rybchenko
  2020-06-22 13:25   ` [dpdk-dev] [PATCH v3 7/9] eal: add lcore init callbacks David Marchand
                     ` (2 subsequent siblings)
  8 siblings, 2 replies; 126+ messages in thread
From: David Marchand @ 2020-06-22 13:25 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, ktraynor, ian.stokes,
	i.maximets, Thomas Monjalon, John McNamara, Marko Kovacevic,
	Anatoly Burakov, Olivier Matz, Andrew Rybchenko, Neil Horman

DPDK allows calling some part of its API from a non-EAL thread but this
has some limitations.
OVS (and other applications) has its own thread management but still
want to avoid such limitations by hacking RTE_PER_LCORE(_lcore_id) and
faking EAL threads potentially unknown of some DPDK component.

Introduce a new API to register non-EAL thread and associate them to a
free lcore with a new NON_EAL role.
This role denotes lcores that do not run DPDK mainloop and as such
prevents use of rte_eal_wait_lcore() and consorts.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Changes since v1:
- moved cleanup on lcore role code in patch 5,
- added unit test,
- updated documentation,
- changed naming from "external thread" to "registered non-EAL thread"

---
 MAINTAINERS                                   |   1 +
 app/test/Makefile                             |   1 +
 app/test/autotest_data.py                     |   6 +
 app/test/meson.build                          |   2 +
 app/test/test_lcores.c                        | 139 ++++++++++++++++++
 doc/guides/howto/debug_troubleshoot.rst       |   5 +-
 .../prog_guide/env_abstraction_layer.rst      |  22 +--
 doc/guides/prog_guide/mempool_lib.rst         |   2 +-
 lib/librte_eal/common/eal_common_lcore.c      |  44 +++++-
 lib/librte_eal/common/eal_common_thread.c     |  33 +++++
 lib/librte_eal/common/eal_private.h           |  18 +++
 lib/librte_eal/include/rte_lcore.h            |  18 ++-
 lib/librte_eal/rte_eal_version.map            |   2 +
 lib/librte_mempool/rte_mempool.h              |  11 +-
 14 files changed, 282 insertions(+), 22 deletions(-)
 create mode 100644 app/test/test_lcores.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 816696caf2..fe9e74ffbc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -183,6 +183,7 @@ F: app/test/test_debug.c
 F: app/test/test_eal*
 F: app/test/test_errno.c
 F: app/test/test_interrupts.c
+F: app/test/test_lcores.c
 F: app/test/test_logs.c
 F: app/test/test_memcpy*
 F: app/test/test_per_lcore.c
diff --git a/app/test/Makefile b/app/test/Makefile
index 7b96a03a64..4a8dea2425 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -97,6 +97,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_FLOW_CLASSIFY) += test_flow_classify.c
 endif
 
 SRCS-y += test_rwlock.c
+SRCS-y += test_lcores.c
 
 SRCS-$(CONFIG_RTE_LIBRTE_STACK) += test_stack.c
 SRCS-$(CONFIG_RTE_LIBRTE_STACK) += test_stack_perf.c
diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index fc3fcc159e..600b130966 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -62,6 +62,12 @@
         "Func":    rwlock_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Lcores autotest",
+        "Command": "lcores_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "Logs autotest",
         "Command": "logs_autotest",
diff --git a/app/test/meson.build b/app/test/meson.build
index 5233ead46e..a57477b7cc 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -67,6 +67,7 @@ test_sources = files('commands.c',
 	'test_ipsec_perf.c',
 	'test_kni.c',
 	'test_kvargs.c',
+	'test_lcores.c',
 	'test_logs.c',
 	'test_lpm.c',
 	'test_lpm6.c',
@@ -206,6 +207,7 @@ fast_tests = [
         ['hash_autotest', true],
         ['interrupt_autotest', true],
         ['ipfrag_autotest', false],
+        ['lcores_autotest', true],
         ['logs_autotest', true],
         ['lpm_autotest', true],
         ['lpm6_autotest', true],
diff --git a/app/test/test_lcores.c b/app/test/test_lcores.c
new file mode 100644
index 0000000000..864bcbade7
--- /dev/null
+++ b/app/test/test_lcores.c
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2020 Red Hat, Inc.
+ */
+
+#include <pthread.h>
+#include <string.h>
+
+#include <rte_lcore.h>
+
+#include "test.h"
+
+struct thread_context {
+	enum { INIT, ERROR, DONE } state;
+	bool lcore_id_any;
+	pthread_t id;
+	unsigned int *registered_count;
+};
+static void *thread_loop(void *arg)
+{
+	struct thread_context *t = arg;
+	unsigned int lcore_id;
+
+	lcore_id = rte_lcore_id();
+	if (lcore_id != LCORE_ID_ANY) {
+		printf("Incorrect lcore id for new thread %u\n", lcore_id);
+		t->state = ERROR;
+	}
+	rte_thread_register();
+	lcore_id = rte_lcore_id();
+	if ((t->lcore_id_any && lcore_id != LCORE_ID_ANY) ||
+			(!t->lcore_id_any && lcore_id == LCORE_ID_ANY)) {
+		printf("Could not register new thread, got %u while %sexpecting %u\n",
+			lcore_id, t->lcore_id_any ? "" : "not ", LCORE_ID_ANY);
+		t->state = ERROR;
+	}
+	/* Report register happened to the control thread. */
+	__atomic_add_fetch(t->registered_count, 1, __ATOMIC_RELEASE);
+
+	/* Wait for release from the control thread. */
+	while (__atomic_load_n(t->registered_count, __ATOMIC_ACQUIRE) != 0)
+		;
+	rte_thread_unregister();
+	lcore_id = rte_lcore_id();
+	if (lcore_id != LCORE_ID_ANY) {
+		printf("Could not unregister new thread, %u still assigned\n",
+			lcore_id);
+		t->state = ERROR;
+	}
+
+	if (t->state != ERROR)
+		t->state = DONE;
+
+	return NULL;
+}
+
+static int
+test_non_eal_lcores(unsigned int eal_threads_count)
+{
+	struct thread_context thread_contexts[RTE_MAX_LCORE];
+	unsigned int non_eal_threads_count;
+	unsigned int registered_count;
+	struct thread_context *t;
+	unsigned int i;
+	int ret;
+
+	non_eal_threads_count = 0;
+	registered_count = 0;
+
+	/* Try to create as many threads as possible. */
+	for (i = 0; i < RTE_MAX_LCORE - eal_threads_count; i++) {
+		t = &thread_contexts[i];
+		t->state = INIT;
+		t->registered_count = &registered_count;
+		t->lcore_id_any = false;
+		if (pthread_create(&t->id, NULL, thread_loop, t) != 0)
+			break;
+		non_eal_threads_count++;
+	}
+	printf("non-EAL threads count: %u\n", non_eal_threads_count);
+	/* Wait all non-EAL threads to register. */
+	while (__atomic_load_n(&registered_count, __ATOMIC_ACQUIRE) !=
+			non_eal_threads_count)
+		;
+
+	/* We managed to create the max number of threads, let's try to create
+	 * one more. This will allow one more check.
+	 */
+	if (eal_threads_count + non_eal_threads_count < RTE_MAX_LCORE)
+		goto skip_lcore_any;
+	t = &thread_contexts[non_eal_threads_count];
+	t->state = INIT;
+	t->registered_count = &registered_count;
+	t->lcore_id_any = true;
+	if (pthread_create(&t->id, NULL, thread_loop, t) == 0) {
+		non_eal_threads_count++;
+		printf("non-EAL threads count: %u\n", non_eal_threads_count);
+		while (__atomic_load_n(&registered_count, __ATOMIC_ACQUIRE) !=
+				non_eal_threads_count)
+			;
+	}
+
+skip_lcore_any:
+	/* Release all threads, and check their states. */
+	__atomic_store_n(&registered_count, 0, __ATOMIC_RELEASE);
+	ret = 0;
+	for (i = 0; i < non_eal_threads_count; i++) {
+		t = &thread_contexts[i];
+		pthread_join(t->id, NULL);
+		if (t->state != DONE)
+			ret = -1;
+	}
+
+	return ret;
+}
+
+static int
+test_lcores(void)
+{
+	unsigned int eal_threads_count = 0;
+	unsigned int i;
+
+	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		if (!rte_lcore_has_role(i, ROLE_OFF))
+			eal_threads_count++;
+	}
+	if (eal_threads_count == 0) {
+		printf("Something is broken, no EAL thread detected.\n");
+		return TEST_FAILED;
+	}
+	printf("EAL threads count: %u, RTE_MAX_LCORE=%u\n", eal_threads_count,
+		RTE_MAX_LCORE);
+
+	if (test_non_eal_lcores(eal_threads_count) < 0)
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+REGISTER_TEST_COMMAND(lcores_autotest, test_lcores);
diff --git a/doc/guides/howto/debug_troubleshoot.rst b/doc/guides/howto/debug_troubleshoot.rst
index cef016b2fe..5a46f5fba3 100644
--- a/doc/guides/howto/debug_troubleshoot.rst
+++ b/doc/guides/howto/debug_troubleshoot.rst
@@ -307,8 +307,9 @@ Custom worker function :numref:`dtg_distributor_worker`.
 
 #. Configuration issue isolation
 
-   * Identify core role using ``rte_eal_lcore_role`` to identify RTE, OFF and
-     SERVICE. Check performance functions are mapped to run on the cores.
+   * Identify core role using ``rte_eal_lcore_role`` to identify RTE, OFF,
+     SERVICE and NON_EAL. Check performance functions are mapped to run on the
+     cores.
 
    * For high-performance execution logic ensure running it on correct NUMA
      and non-master core.
diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst b/doc/guides/prog_guide/env_abstraction_layer.rst
index 48a2fec066..f64ae953d1 100644
--- a/doc/guides/prog_guide/env_abstraction_layer.rst
+++ b/doc/guides/prog_guide/env_abstraction_layer.rst
@@ -564,9 +564,13 @@ It's also compatible with the pattern of corelist('-l') option.
 non-EAL pthread support
 ~~~~~~~~~~~~~~~~~~~~~~~
 
-It is possible to use the DPDK execution context with any user pthread (aka. Non-EAL pthreads).
-In a non-EAL pthread, the *_lcore_id* is always LCORE_ID_ANY which identifies that it is not an EAL thread with a valid, unique, *_lcore_id*.
-Some libraries will use an alternative unique ID (e.g. TID), some will not be impacted at all, and some will work but with limitations (e.g. timer and mempool libraries).
+It is possible to use the DPDK execution context with any user pthread (aka. non-EAL pthreads).
+There are two kinds of non-EAL pthreads:
+
+- a registered non-EAL pthread with a valid *_lcore_id* that was successfully assigned by calling ``rte_thread_register()``,
+- a non registered non-EAL pthread with a LCORE_ID_ANY,
+
+For non registered non-EAL pthread (with a LCORE_ID_ANY *_lcore_id*), some libraries will use an alternative unique ID (e.g. TID), some will not be impacted at all, and some will work but with limitations (e.g. timer and mempool libraries).
 
 All these impacts are mentioned in :ref:`known_issue_label` section.
 
@@ -613,9 +617,9 @@ Known Issues
 + rte_mempool
 
   The rte_mempool uses a per-lcore cache inside the mempool.
-  For non-EAL pthreads, ``rte_lcore_id()`` will not return a valid number.
-  So for now, when rte_mempool is used with non-EAL pthreads, the put/get operations will bypass the default mempool cache and there is a performance penalty because of this bypass.
-  Only user-owned external caches can be used in a non-EAL context in conjunction with ``rte_mempool_generic_put()`` and ``rte_mempool_generic_get()`` that accept an explicit cache parameter.
+  For unregistered non-EAL pthreads, ``rte_lcore_id()`` will not return a valid number.
+  So for now, when rte_mempool is used with unregistered non-EAL pthreads, the put/get operations will bypass the default mempool cache and there is a performance penalty because of this bypass.
+  Only user-owned external caches can be used in an unregistered non-EAL context in conjunction with ``rte_mempool_generic_put()`` and ``rte_mempool_generic_get()`` that accept an explicit cache parameter.
 
 + rte_ring
 
@@ -660,15 +664,15 @@ Known Issues
 
 + rte_timer
 
-  Running  ``rte_timer_manage()`` on a non-EAL pthread is not allowed. However, resetting/stopping the timer from a non-EAL pthread is allowed.
+  Running  ``rte_timer_manage()`` on an unregistered non-EAL pthread is not allowed. However, resetting/stopping the timer from a non-EAL pthread is allowed.
 
 + rte_log
 
-  In non-EAL pthreads, there is no per thread loglevel and logtype, global loglevels are used.
+  In unregistered non-EAL pthreads, there is no per thread loglevel and logtype, global loglevels are used.
 
 + misc
 
-  The debug statistics of rte_ring, rte_mempool and rte_timer are not supported in a non-EAL pthread.
+  The debug statistics of rte_ring, rte_mempool and rte_timer are not supported in an unregistered non-EAL pthread.
 
 cgroup control
 ~~~~~~~~~~~~~~
diff --git a/doc/guides/prog_guide/mempool_lib.rst b/doc/guides/prog_guide/mempool_lib.rst
index f8b430d656..e3e1f940be 100644
--- a/doc/guides/prog_guide/mempool_lib.rst
+++ b/doc/guides/prog_guide/mempool_lib.rst
@@ -103,7 +103,7 @@ The maximum size of the cache is static and is defined at compilation time (CONF
 Alternatively to the internal default per-lcore local cache, an application can create and manage external caches through the ``rte_mempool_cache_create()``, ``rte_mempool_cache_free()`` and ``rte_mempool_cache_flush()`` calls.
 These user-owned caches can be explicitly passed to ``rte_mempool_generic_put()`` and ``rte_mempool_generic_get()``.
 The ``rte_mempool_default_cache()`` call returns the default internal cache if any.
-In contrast to the default caches, user-owned caches can be used by non-EAL threads too.
+In contrast to the default caches, user-owned caches can be used by unregistered non-EAL threads too.
 
 Mempool Handlers
 ------------------------
diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
index 86d32a3dd7..7db05428e7 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -6,12 +6,13 @@
 #include <limits.h>
 #include <string.h>
 
-#include <rte_errno.h>
-#include <rte_log.h>
-#include <rte_eal.h>
-#include <rte_lcore.h>
 #include <rte_common.h>
 #include <rte_debug.h>
+#include <rte_eal.h>
+#include <rte_errno.h>
+#include <rte_lcore.h>
+#include <rte_log.h>
+#include <rte_spinlock.h>
 
 #include "eal_private.h"
 #include "eal_thread.h"
@@ -220,3 +221,38 @@ rte_socket_id_by_idx(unsigned int idx)
 	}
 	return config->numa_nodes[idx];
 }
+
+static rte_spinlock_t lcore_lock = RTE_SPINLOCK_INITIALIZER;
+
+unsigned int
+eal_lcore_non_eal_allocate(void)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	unsigned int lcore_id;
+
+	rte_spinlock_lock(&lcore_lock);
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (cfg->lcore_role[lcore_id] != ROLE_OFF)
+			continue;
+		cfg->lcore_role[lcore_id] = ROLE_NON_EAL;
+		cfg->lcore_count++;
+		break;
+	}
+	if (lcore_id == RTE_MAX_LCORE)
+		RTE_LOG(DEBUG, EAL, "No lcore available.\n");
+	rte_spinlock_unlock(&lcore_lock);
+	return lcore_id;
+}
+
+void
+eal_lcore_non_eal_release(unsigned int lcore_id)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+
+	rte_spinlock_lock(&lcore_lock);
+	if (cfg->lcore_role[lcore_id] == ROLE_NON_EAL) {
+		cfg->lcore_role[lcore_id] = ROLE_OFF;
+		cfg->lcore_count--;
+	}
+	rte_spinlock_unlock(&lcore_lock);
+}
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index a7ae0691bf..1cbddc4b5b 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -236,3 +236,36 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
 	pthread_join(*thread, NULL);
 	return -ret;
 }
+
+void
+rte_thread_register(void)
+{
+	unsigned int lcore_id;
+	rte_cpuset_t cpuset;
+
+	/* EAL init flushes all lcores, we can't register before. */
+	assert(internal_config.init_complete == 1);
+	if (pthread_getaffinity_np(pthread_self(), sizeof(cpuset),
+			&cpuset) != 0)
+		CPU_ZERO(&cpuset);
+	lcore_id = eal_lcore_non_eal_allocate();
+	if (lcore_id >= RTE_MAX_LCORE)
+		lcore_id = LCORE_ID_ANY;
+	rte_thread_init(lcore_id, &cpuset);
+	if (lcore_id != LCORE_ID_ANY)
+		RTE_LOG(DEBUG, EAL, "Registered non-EAL thread as lcore %u.\n",
+			lcore_id);
+}
+
+void
+rte_thread_unregister(void)
+{
+	unsigned int lcore_id = rte_lcore_id();
+
+	if (lcore_id != LCORE_ID_ANY)
+		eal_lcore_non_eal_release(lcore_id);
+	rte_thread_uninit();
+	if (lcore_id != LCORE_ID_ANY)
+		RTE_LOG(DEBUG, EAL, "Unregistered non-EAL thread (was lcore %u).\n",
+			lcore_id);
+}
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 0592fcd694..73238ff157 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -396,6 +396,24 @@ uint64_t get_tsc_freq(void);
  */
 uint64_t get_tsc_freq_arch(void);
 
+/**
+ * Allocate a free lcore to associate to a non-EAL thread.
+ *
+ * @return
+ *   - the id of a lcore with role ROLE_NON_EAL on success.
+ *   - RTE_MAX_LCORE if none was available.
+ */
+unsigned int eal_lcore_non_eal_allocate(void);
+
+/**
+ * Release the lcore used by a non-EAL thread.
+ * Counterpart of eal_lcore_non_eal_allocate().
+ *
+ * @param lcore_id
+ *   The lcore with role ROLE_NON_EAL to release.
+ */
+void eal_lcore_non_eal_release(unsigned int lcore_id);
+
 /**
  * Prepare physical memory mapping
  * i.e. hugepages on Linux and
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index 3968c40693..ea86220394 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -31,6 +31,7 @@ enum rte_lcore_role_t {
 	ROLE_RTE,
 	ROLE_OFF,
 	ROLE_SERVICE,
+	ROLE_NON_EAL,
 };
 
 /**
@@ -67,7 +68,8 @@ rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
  *   to run threads with lcore IDs 0, 1, 2 and 3 on physical core 10..
  *
  * @return
- *  Logical core ID (in EAL thread) or LCORE_ID_ANY (in non-EAL thread)
+ *  Logical core ID (in EAL thread or registered non-EAL thread) or
+ *  LCORE_ID_ANY (in unregistered non-EAL thread)
  */
 static inline unsigned
 rte_lcore_id(void)
@@ -279,6 +281,20 @@ int rte_thread_setname(pthread_t id, const char *name);
 __rte_experimental
 int rte_thread_getname(pthread_t id, char *name, size_t len);
 
+/**
+ * Register current non-EAL thread as a lcore.
+ */
+__rte_experimental
+void
+rte_thread_register(void);
+
+/**
+ * Unregister current thread and release lcore if one was associated.
+ */
+__rte_experimental
+void
+rte_thread_unregister(void);
+
 /**
  * Create a control thread.
  *
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 5831eea4b0..39c41d445d 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -396,6 +396,8 @@ EXPERIMENTAL {
 
 	# added in 20.08
 	__rte_trace_mem_per_thread_free;
+	rte_thread_register;
+	rte_thread_unregister;
 };
 
 INTERNAL {
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 652d19f9f1..9e0ee052b3 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -28,9 +28,9 @@
  * rte_mempool_get() or rte_mempool_put() are designed to be called from an EAL
  * thread due to the internal per-lcore cache. Due to the lack of caching,
  * rte_mempool_get() or rte_mempool_put() performance will suffer when called
- * by non-EAL threads. Instead, non-EAL threads should call
- * rte_mempool_generic_get() or rte_mempool_generic_put() with a user cache
- * created with rte_mempool_cache_create().
+ * by unregistered non-EAL threads. Instead, unregistered non-EAL threads
+ * should call rte_mempool_generic_get() or rte_mempool_generic_put() with a
+ * user cache created with rte_mempool_cache_create().
  */
 
 #include <stdio.h>
@@ -1233,7 +1233,7 @@ void rte_mempool_dump(FILE *f, struct rte_mempool *mp);
 /**
  * Create a user-owned mempool cache.
  *
- * This can be used by non-EAL threads to enable caching when they
+ * This can be used by unregistered non-EAL threads to enable caching when they
  * interact with a mempool.
  *
  * @param size
@@ -1264,7 +1264,8 @@ rte_mempool_cache_free(struct rte_mempool_cache *cache);
  * @param lcore_id
  *   The logical core id.
  * @return
- *   A pointer to the mempool cache or NULL if disabled or non-EAL thread.
+ *   A pointer to the mempool cache or NULL if disabled or unregistered non-EAL
+ *   thread.
  */
 static __rte_always_inline struct rte_mempool_cache *
 rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id)
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v3 7/9] eal: add lcore init callbacks
  2020-06-22 13:25 ` [dpdk-dev] [PATCH v3 0/9] Register non-EAL threads as lcore David Marchand
                     ` (5 preceding siblings ...)
  2020-06-22 13:25   ` [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores David Marchand
@ 2020-06-22 13:25   ` David Marchand
  2020-06-22 13:25   ` [dpdk-dev] [PATCH v3 8/9] eal: add lcore iterators David Marchand
  2020-06-22 13:25   ` [dpdk-dev] [PATCH v3 9/9] mempool/bucket: handle non-EAL lcores David Marchand
  8 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-06-22 13:25 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, ktraynor, ian.stokes,
	i.maximets, Neil Horman

DPDK components and applications can have their say when a new lcore is
initialized. For this, they can register a callback for initializing and
releasing their private data.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Changes since v2:
- added missing test,
- fixed rollback on lcore register,

Changes since v1:
- added unit test (since missing some coverage, for v3),
- preferred callback and removed mention of notification,

---
 app/test/test_lcores.c                   | 230 +++++++++++++++++++++++
 lib/librte_eal/common/eal_common_lcore.c | 138 +++++++++++++-
 lib/librte_eal/common/eal_private.h      |   3 +-
 lib/librte_eal/include/rte_lcore.h       |  68 +++++++
 lib/librte_eal/rte_eal_version.map       |   2 +
 5 files changed, 435 insertions(+), 6 deletions(-)

diff --git a/app/test/test_lcores.c b/app/test/test_lcores.c
index 864bcbade7..e36dceedf9 100644
--- a/app/test/test_lcores.c
+++ b/app/test/test_lcores.c
@@ -5,6 +5,7 @@
 #include <pthread.h>
 #include <string.h>
 
+#include <rte_common.h>
 #include <rte_lcore.h>
 
 #include "test.h"
@@ -113,6 +114,229 @@ test_non_eal_lcores(unsigned int eal_threads_count)
 	return ret;
 }
 
+struct limit_lcore_context {
+	unsigned int init;
+	unsigned int max;
+	unsigned int uninit;
+};
+static int
+limit_lcores_init(unsigned int lcore_id __rte_unused, void *arg)
+{
+	struct limit_lcore_context *l = arg;
+
+	l->init++;
+	if (l->init > l->max)
+		return -1;
+	return 0;
+}
+static void
+limit_lcores_uninit(unsigned int lcore_id __rte_unused, void *arg)
+{
+	struct limit_lcore_context *l = arg;
+
+	l->uninit++;
+}
+
+static int
+test_lcores_callback(unsigned int eal_threads_count)
+{
+	struct limit_lcore_context l;
+	void *handle;
+
+	/* Refuse last lcore => callback register error. */
+	memset(&l, 0, sizeof(l));
+	l.max = eal_threads_count - 1;
+	handle = rte_lcore_callback_register("limit", limit_lcores_init,
+		limit_lcores_uninit, &l);
+	if (handle != NULL) {
+		printf("lcore callback register should have failed\n");
+		goto error;
+	}
+	/* Refusal happens at the n th call to the init callback.
+	 * Besides, n - 1 were accepted, so we expect as many uninit calls when
+	 * the rollback happens.
+	 */
+	if (l.init != eal_threads_count) {
+		printf("lcore callback register failed but incorrect init calls, expected %u, got %u\n",
+			eal_threads_count, l.init);
+		goto error;
+	}
+	if (l.uninit != eal_threads_count - 1) {
+		printf("lcore callback register failed but incorrect uninit calls, expected %u, got %u\n",
+			eal_threads_count - 1, l.uninit);
+		goto error;
+	}
+
+	/* Accept all lcore and unregister. */
+	memset(&l, 0, sizeof(l));
+	l.max = eal_threads_count;
+	handle = rte_lcore_callback_register("limit", limit_lcores_init,
+		limit_lcores_uninit, &l);
+	if (handle == NULL) {
+		printf("lcore callback register failed\n");
+		goto error;
+	}
+	if (l.uninit != 0) {
+		printf("lcore callback register succeeded but incorrect uninit calls, expected 0, got %u\n",
+			l.uninit);
+		goto error;
+	}
+	rte_lcore_callback_unregister(handle);
+	handle = NULL;
+	if (l.init != eal_threads_count) {
+		printf("lcore callback unregister done but incorrect init calls, expected %u, got %u\n",
+			eal_threads_count, l.init);
+		goto error;
+	}
+	if (l.uninit != eal_threads_count) {
+		printf("lcore callback unregister done but incorrect uninit calls, expected %u, got %u\n",
+			eal_threads_count, l.uninit);
+		goto error;
+	}
+
+	return 0;
+
+error:
+	if (handle != NULL)
+		rte_lcore_callback_unregister(handle);
+
+	return -1;
+}
+
+static int
+test_non_eal_lcores_callback(unsigned int eal_threads_count)
+{
+	struct thread_context thread_contexts[2];
+	unsigned int non_eal_threads_count;
+	struct limit_lcore_context l[2];
+	unsigned int registered_count;
+	struct thread_context *t;
+	void *handle[2];
+	unsigned int i;
+	int ret;
+
+	memset(l, 0, sizeof(l));
+	handle[0] = handle[1] = NULL;
+	non_eal_threads_count = 0;
+	registered_count = 0;
+
+	/* This test requires two empty slots to be sure lcore init refusal is
+	 * because of callback execution.
+	 */
+	if (eal_threads_count + 2 >= RTE_MAX_LCORE)
+		return 0;
+
+	/* Register two callbacks:
+	 * - first one accepts any lcore,
+	 * - second one accepts all EAL lcore + one more for the first non-EAL
+	 *   thread, then refuses the next lcore.
+	 */
+	l[0].max = UINT_MAX;
+	handle[0] = rte_lcore_callback_register("no_limit", limit_lcores_init,
+		limit_lcores_uninit, &l[0]);
+	if (handle[0] == NULL) {
+		printf("lcore callback [0] register failed\n");
+		goto error;
+	}
+	l[1].max = eal_threads_count + 1;
+	handle[1] = rte_lcore_callback_register("limit", limit_lcores_init,
+		limit_lcores_uninit, &l[1]);
+	if (handle[1] == NULL) {
+		printf("lcore callback [1] register failed\n");
+		goto error;
+	}
+	if (l[0].init != eal_threads_count || l[1].init != eal_threads_count) {
+		printf("lcore callbacks register succeeded but incorrect init calls, expected %u, %u, got %u, %u\n",
+			eal_threads_count, eal_threads_count,
+			l[0].init, l[1].init);
+		goto error;
+	}
+	if (l[0].uninit != 0 || l[1].uninit != 0) {
+		printf("lcore callbacks register succeeded but incorrect uninit calls, expected 0, 1, got %u, %u\n",
+			l[0].uninit, l[1].uninit);
+		goto error;
+	}
+	/* First thread that expects a valid lcore id. */
+	t = &thread_contexts[0];
+	t->state = INIT;
+	t->registered_count = &registered_count;
+	t->lcore_id_any = false;
+	if (pthread_create(&t->id, NULL, thread_loop, t) != 0)
+		goto cleanup_threads;
+	non_eal_threads_count++;
+	while (__atomic_load_n(&registered_count, __ATOMIC_ACQUIRE) !=
+			non_eal_threads_count)
+		;
+	if (l[0].init != eal_threads_count + 1 ||
+			l[1].init != eal_threads_count + 1) {
+		printf("Incorrect init calls, expected %u, %u, got %u, %u\n",
+			eal_threads_count + 1, eal_threads_count + 1,
+			l[0].init, l[1].init);
+		goto cleanup_threads;
+	}
+	if (l[0].uninit != 0 || l[1].uninit != 0) {
+		printf("Incorrect uninit calls, expected 0, 0, got %u, %u\n",
+			l[0].uninit, l[1].uninit);
+		goto cleanup_threads;
+	}
+	/* Second thread, that expects LCORE_ID_ANY because of init refusal. */
+	t = &thread_contexts[1];
+	t->state = INIT;
+	t->registered_count = &registered_count;
+	t->lcore_id_any = true;
+	if (pthread_create(&t->id, NULL, thread_loop, t) != 0)
+		goto cleanup_threads;
+	non_eal_threads_count++;
+	while (__atomic_load_n(&registered_count, __ATOMIC_ACQUIRE) !=
+			non_eal_threads_count)
+		;
+	if (l[0].init != eal_threads_count + 2 ||
+			l[1].init != eal_threads_count + 2) {
+		printf("Incorrect init calls, expected %u, %u, got %u, %u\n",
+			eal_threads_count + 2, eal_threads_count + 2,
+			l[0].init, l[1].init);
+		goto cleanup_threads;
+	}
+	if (l[0].uninit != 1 || l[1].uninit != 0) {
+		printf("Incorrect uninit calls, expected 1, 0, got %u, %u\n",
+			l[0].uninit, l[1].uninit);
+		goto cleanup_threads;
+	}
+	/* Release all threads, and check their states. */
+	__atomic_store_n(&registered_count, 0, __ATOMIC_RELEASE);
+	ret = 0;
+	for (i = 0; i < non_eal_threads_count; i++) {
+		t = &thread_contexts[i];
+		pthread_join(t->id, NULL);
+		if (t->state != DONE)
+			ret = -1;
+	}
+	if (ret < 0)
+		goto error;
+	if (l[0].uninit != 2 || l[1].uninit != 1) {
+		printf("Threads reported having successfully registered and unregistered, but incorrect uninit calls, expected 2, 1, got %u, %u\n",
+			l[0].uninit, l[1].uninit);
+		goto error;
+	}
+	rte_lcore_callback_unregister(handle[0]);
+	rte_lcore_callback_unregister(handle[1]);
+	return 0;
+
+cleanup_threads:
+	/* Release all threads */
+	__atomic_store_n(&registered_count, 0, __ATOMIC_RELEASE);
+	for (i = 0; i < non_eal_threads_count; i++) {
+		t = &thread_contexts[i];
+		pthread_join(t->id, NULL);
+	}
+error:
+	if (handle[1] != NULL)
+		rte_lcore_callback_unregister(handle[1]);
+	if (handle[0] != NULL)
+		rte_lcore_callback_unregister(handle[0]);
+	return -1;
+}
+
 static int
 test_lcores(void)
 {
@@ -133,6 +357,12 @@ test_lcores(void)
 	if (test_non_eal_lcores(eal_threads_count) < 0)
 		return TEST_FAILED;
 
+	if (test_lcores_callback(eal_threads_count) < 0)
+		return TEST_FAILED;
+
+	if (test_non_eal_lcores_callback(eal_threads_count) < 0)
+		return TEST_FAILED;
+
 	return TEST_SUCCESS;
 }
 
diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
index 7db05428e7..3737a75ece 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -223,11 +223,114 @@ rte_socket_id_by_idx(unsigned int idx)
 }
 
 static rte_spinlock_t lcore_lock = RTE_SPINLOCK_INITIALIZER;
+struct lcore_callback {
+	TAILQ_ENTRY(lcore_callback) next;
+	char *name;
+	rte_lcore_init_cb init;
+	rte_lcore_uninit_cb uninit;
+	void *arg;
+};
+static TAILQ_HEAD(lcore_callbacks_head, lcore_callback) lcore_callbacks =
+	TAILQ_HEAD_INITIALIZER(lcore_callbacks);
+
+static int
+callback_init(struct lcore_callback *callback, unsigned int lcore_id)
+{
+	if (callback->init == NULL)
+		return 0;
+	RTE_LOG(DEBUG, EAL, "Call init for lcore callback %s, lcore_id %u\n",
+		callback->name, lcore_id);
+	return callback->init(lcore_id, callback->arg);
+}
+
+static void
+callback_uninit(struct lcore_callback *callback, unsigned int lcore_id)
+{
+	if (callback->uninit == NULL)
+		return;
+	RTE_LOG(DEBUG, EAL, "Call uninit for lcore callback %s, lcore_id %u\n",
+		callback->name, lcore_id);
+	callback->uninit(lcore_id, callback->arg);
+}
+
+void *
+rte_lcore_callback_register(const char *name, rte_lcore_init_cb init,
+	rte_lcore_uninit_cb uninit, void *arg)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	struct lcore_callback *callback;
+	unsigned int lcore_id;
+
+	callback = calloc(1, sizeof(*callback));
+	if (callback == NULL)
+		return NULL;
+	if (asprintf(&callback->name, "%s-%p", name, arg) == -1) {
+		free(callback);
+		return NULL;
+	}
+	callback->init = init;
+	callback->uninit = uninit;
+	callback->arg = arg;
+	rte_spinlock_lock(&lcore_lock);
+	if (callback->init == NULL)
+		goto no_init;
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (cfg->lcore_role[lcore_id] == ROLE_OFF)
+			continue;
+		if (callback_init(callback, lcore_id) == 0)
+			continue;
+		/* Callback refused init for this lcore, uninitialize all
+		 * previous lcore.
+		 */
+		for (; lcore_id != 0; lcore_id--) {
+			if (cfg->lcore_role[lcore_id - 1] == ROLE_OFF)
+				continue;
+			callback_uninit(callback, lcore_id - 1);
+		}
+		free(callback);
+		callback = NULL;
+		goto out;
+	}
+no_init:
+	TAILQ_INSERT_TAIL(&lcore_callbacks, callback, next);
+	RTE_LOG(DEBUG, EAL, "Registered new lcore callback %s (%sinit, %suninit).\n",
+		callback->name, callback->init == NULL ? "NO " : "",
+		callback->uninit == NULL ? "NO " : "");
+out:
+	rte_spinlock_unlock(&lcore_lock);
+	return callback;
+}
+
+void
+rte_lcore_callback_unregister(void *handle)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	struct lcore_callback *callback = handle;
+	unsigned int lcore_id;
+
+	rte_spinlock_lock(&lcore_lock);
+	if (callback->uninit == NULL)
+		goto no_uninit;
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (cfg->lcore_role[lcore_id] == ROLE_OFF)
+			continue;
+		callback_uninit(callback, lcore_id);
+	}
+no_uninit:
+	TAILQ_REMOVE(&lcore_callbacks, callback, next);
+	rte_spinlock_unlock(&lcore_lock);
+	RTE_LOG(DEBUG, EAL, "Unregistered lcore callback %s-%p.\n",
+		callback->name, callback->arg);
+	free(callback->name);
+	free(callback);
+}
 
 unsigned int
 eal_lcore_non_eal_allocate(void)
 {
 	struct rte_config *cfg = rte_eal_get_configuration();
+	struct lcore_callback *callback;
+	struct lcore_callback *prev;
 	unsigned int lcore_id;
 
 	rte_spinlock_lock(&lcore_lock);
@@ -238,8 +341,29 @@ eal_lcore_non_eal_allocate(void)
 		cfg->lcore_count++;
 		break;
 	}
-	if (lcore_id == RTE_MAX_LCORE)
+	if (lcore_id == RTE_MAX_LCORE) {
 		RTE_LOG(DEBUG, EAL, "No lcore available.\n");
+		goto out;
+	}
+	TAILQ_FOREACH(callback, &lcore_callbacks, next) {
+		if (callback_init(callback, lcore_id) == 0)
+			continue;
+		/* Callback refused init for this lcore, call uninit for all
+		 * previous callbacks.
+		 */
+		prev = TAILQ_PREV(callback, lcore_callbacks_head, next);
+		while (prev != NULL) {
+			callback_uninit(prev, lcore_id);
+			prev = TAILQ_PREV(prev, lcore_callbacks_head, next);
+		}
+		RTE_LOG(DEBUG, EAL, "Initialization refused for lcore %u.\n",
+			lcore_id);
+		cfg->lcore_role[lcore_id] = ROLE_OFF;
+		cfg->lcore_count--;
+		lcore_id = RTE_MAX_LCORE;
+		goto out;
+	}
+out:
 	rte_spinlock_unlock(&lcore_lock);
 	return lcore_id;
 }
@@ -248,11 +372,15 @@ void
 eal_lcore_non_eal_release(unsigned int lcore_id)
 {
 	struct rte_config *cfg = rte_eal_get_configuration();
+	struct lcore_callback *callback;
 
 	rte_spinlock_lock(&lcore_lock);
-	if (cfg->lcore_role[lcore_id] == ROLE_NON_EAL) {
-		cfg->lcore_role[lcore_id] = ROLE_OFF;
-		cfg->lcore_count--;
-	}
+	if (cfg->lcore_role[lcore_id] != ROLE_NON_EAL)
+		goto out;
+	TAILQ_FOREACH(callback, &lcore_callbacks, next)
+		callback_uninit(callback, lcore_id);
+	cfg->lcore_role[lcore_id] = ROLE_OFF;
+	cfg->lcore_count--;
+out:
 	rte_spinlock_unlock(&lcore_lock);
 }
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 73238ff157..fc79564111 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -401,7 +401,8 @@ uint64_t get_tsc_freq_arch(void);
  *
  * @return
  *   - the id of a lcore with role ROLE_NON_EAL on success.
- *   - RTE_MAX_LCORE if none was available.
+ *   - RTE_MAX_LCORE if none was available or initializing was refused (see
+ *     rte_lcore_callback_register).
  */
 unsigned int eal_lcore_non_eal_allocate(void);
 
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index ea86220394..27b29a1f87 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -229,6 +229,74 @@ unsigned int rte_get_next_lcore(unsigned int i, int skip_master, int wrap);
 	     i<RTE_MAX_LCORE;						\
 	     i = rte_get_next_lcore(i, 1, 0))
 
+/**
+ * Callback prototype for initializing lcores.
+ *
+ * @param lcore_id
+ *   The lcore to consider.
+ * @param arg
+ *   An opaque pointer passed at callback registration.
+ * @return
+ *   - -1 when refusing this operation,
+ *   - 0 otherwise.
+ */
+typedef int (*rte_lcore_init_cb)(unsigned int lcore_id, void *arg);
+
+/**
+ * Callback prototype for uninitializing lcores.
+ *
+ * @param lcore_id
+ *   The lcore to consider.
+ * @param arg
+ *   An opaque pointer passed at callback registration.
+ */
+typedef void (*rte_lcore_uninit_cb)(unsigned int lcore_id, void *arg);
+
+/**
+ * Register callbacks invoked when initializing and uninitializing a lcore.
+ *
+ * This function calls the init callback with all initialized lcores.
+ * Any error reported by the init callback triggers a rollback calling the
+ * uninit callback for each lcore.
+ * If this step succeeds, the callbacks are put in the lcore callbacks list
+ * that will get called for each lcore allocation/release.
+ *
+ * Note: callbacks execution is serialised under a lock protecting the lcores
+ * and callbacks list.
+ *
+ * @param name
+ *   A name serving as a small description for this callback.
+ * @param init
+ *   The callback invoked when a lcore_id is initialized.
+ * @param uninit
+ *   The callback invoked when a lcore_id is uninitialized.
+ * @param arg
+ *   An optional argument that gets passed to the callback when it gets
+ *   invoked.
+ * @return
+ *   On success, returns an opaque pointer for the registered object.
+ *   On failure (either memory allocation issue in the function itself or an
+ *   error is returned by the init callback itself), returns NULL.
+ */
+__rte_experimental
+void *
+rte_lcore_callback_register(const char *name, rte_lcore_init_cb init,
+	rte_lcore_uninit_cb uninit, void *arg);
+
+/**
+ * Unregister callbacks previously registered with rte_lcore_callback_register.
+ *
+ * This function calls the uninit callback with all initialized lcores.
+ * The callbacks are then removed from the lcore callbacks list.
+ *
+ * @param handle
+ *   The handle pointer returned by a former successful call to
+ *   rte_lcore_callback_register.
+ */
+__rte_experimental
+void
+rte_lcore_callback_unregister(void *handle);
+
 /**
  * Set core affinity of the current thread.
  * Support both EAL and non-EAL thread and update TLS.
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 39c41d445d..aeee7cf431 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -396,6 +396,8 @@ EXPERIMENTAL {
 
 	# added in 20.08
 	__rte_trace_mem_per_thread_free;
+	rte_lcore_callback_register;
+	rte_lcore_callback_unregister;
 	rte_thread_register;
 	rte_thread_unregister;
 };
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v3 8/9] eal: add lcore iterators
  2020-06-22 13:25 ` [dpdk-dev] [PATCH v3 0/9] Register non-EAL threads as lcore David Marchand
                     ` (6 preceding siblings ...)
  2020-06-22 13:25   ` [dpdk-dev] [PATCH v3 7/9] eal: add lcore init callbacks David Marchand
@ 2020-06-22 13:25   ` David Marchand
  2020-06-22 13:25   ` [dpdk-dev] [PATCH v3 9/9] mempool/bucket: handle non-EAL lcores David Marchand
  8 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-06-22 13:25 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, ktraynor, ian.stokes,
	i.maximets, Neil Horman

Add a helper to iterate all lcores.
The iterator callback is read-only wrt the lcores list.

Implement a dump function on top of this for debugging.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Changes since v2:
- added rte_lcore_dump calls in unit test, for basic check,

Changes since v1:
- introduced lcore iterators and implemented rte_lcore_dump,
  this iterator mechanism can then be used outside of EAL,

---
 app/test/test_lcores.c                    |  3 +
 lib/librte_eal/common/eal_common_lcore.c  | 77 ++++++++++++++++++++---
 lib/librte_eal/common/eal_common_thread.c | 16 +++--
 lib/librte_eal/common/eal_thread.h        | 13 +++-
 lib/librte_eal/freebsd/eal.c              |  2 +-
 lib/librte_eal/freebsd/eal_thread.c       |  2 +-
 lib/librte_eal/include/rte_lcore.h        | 47 +++++++++++++-
 lib/librte_eal/linux/eal.c                |  2 +-
 lib/librte_eal/linux/eal_thread.c         |  2 +-
 lib/librte_eal/rte_eal_version.map        |  2 +
 10 files changed, 143 insertions(+), 23 deletions(-)

diff --git a/app/test/test_lcores.c b/app/test/test_lcores.c
index e36dceedf9..dd6fa466c8 100644
--- a/app/test/test_lcores.c
+++ b/app/test/test_lcores.c
@@ -302,6 +302,7 @@ test_non_eal_lcores_callback(unsigned int eal_threads_count)
 			l[0].uninit, l[1].uninit);
 		goto cleanup_threads;
 	}
+	rte_lcore_dump(stdout);
 	/* Release all threads, and check their states. */
 	__atomic_store_n(&registered_count, 0, __ATOMIC_RELEASE);
 	ret = 0;
@@ -313,6 +314,7 @@ test_non_eal_lcores_callback(unsigned int eal_threads_count)
 	}
 	if (ret < 0)
 		goto error;
+	rte_lcore_dump(stdout);
 	if (l[0].uninit != 2 || l[1].uninit != 1) {
 		printf("Threads reported having successfully registered and unregistered, but incorrect uninit calls, expected 2, 1, got %u, %u\n",
 			l[0].uninit, l[1].uninit);
@@ -353,6 +355,7 @@ test_lcores(void)
 	}
 	printf("EAL threads count: %u, RTE_MAX_LCORE=%u\n", eal_threads_count,
 		RTE_MAX_LCORE);
+	rte_lcore_dump(stdout);
 
 	if (test_non_eal_lcores(eal_threads_count) < 0)
 		return TEST_FAILED;
diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
index 3737a75ece..5a54b18fd6 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -12,7 +12,7 @@
 #include <rte_errno.h>
 #include <rte_lcore.h>
 #include <rte_log.h>
-#include <rte_spinlock.h>
+#include <rte_rwlock.h>
 
 #include "eal_private.h"
 #include "eal_thread.h"
@@ -222,7 +222,7 @@ rte_socket_id_by_idx(unsigned int idx)
 	return config->numa_nodes[idx];
 }
 
-static rte_spinlock_t lcore_lock = RTE_SPINLOCK_INITIALIZER;
+static rte_rwlock_t lcore_lock = RTE_RWLOCK_INITIALIZER;
 struct lcore_callback {
 	TAILQ_ENTRY(lcore_callback) next;
 	char *name;
@@ -271,7 +271,7 @@ rte_lcore_callback_register(const char *name, rte_lcore_init_cb init,
 	callback->init = init;
 	callback->uninit = uninit;
 	callback->arg = arg;
-	rte_spinlock_lock(&lcore_lock);
+	rte_rwlock_write_lock(&lcore_lock);
 	if (callback->init == NULL)
 		goto no_init;
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
@@ -297,7 +297,7 @@ rte_lcore_callback_register(const char *name, rte_lcore_init_cb init,
 		callback->name, callback->init == NULL ? "NO " : "",
 		callback->uninit == NULL ? "NO " : "");
 out:
-	rte_spinlock_unlock(&lcore_lock);
+	rte_rwlock_write_unlock(&lcore_lock);
 	return callback;
 }
 
@@ -308,7 +308,7 @@ rte_lcore_callback_unregister(void *handle)
 	struct lcore_callback *callback = handle;
 	unsigned int lcore_id;
 
-	rte_spinlock_lock(&lcore_lock);
+	rte_rwlock_write_lock(&lcore_lock);
 	if (callback->uninit == NULL)
 		goto no_uninit;
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
@@ -318,7 +318,7 @@ rte_lcore_callback_unregister(void *handle)
 	}
 no_uninit:
 	TAILQ_REMOVE(&lcore_callbacks, callback, next);
-	rte_spinlock_unlock(&lcore_lock);
+	rte_rwlock_write_unlock(&lcore_lock);
 	RTE_LOG(DEBUG, EAL, "Unregistered lcore callback %s-%p.\n",
 		callback->name, callback->arg);
 	free(callback->name);
@@ -333,7 +333,7 @@ eal_lcore_non_eal_allocate(void)
 	struct lcore_callback *prev;
 	unsigned int lcore_id;
 
-	rte_spinlock_lock(&lcore_lock);
+	rte_rwlock_write_lock(&lcore_lock);
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
 		if (cfg->lcore_role[lcore_id] != ROLE_OFF)
 			continue;
@@ -364,7 +364,7 @@ eal_lcore_non_eal_allocate(void)
 		goto out;
 	}
 out:
-	rte_spinlock_unlock(&lcore_lock);
+	rte_rwlock_write_unlock(&lcore_lock);
 	return lcore_id;
 }
 
@@ -374,7 +374,7 @@ eal_lcore_non_eal_release(unsigned int lcore_id)
 	struct rte_config *cfg = rte_eal_get_configuration();
 	struct lcore_callback *callback;
 
-	rte_spinlock_lock(&lcore_lock);
+	rte_rwlock_write_lock(&lcore_lock);
 	if (cfg->lcore_role[lcore_id] != ROLE_NON_EAL)
 		goto out;
 	TAILQ_FOREACH(callback, &lcore_callbacks, next)
@@ -382,5 +382,62 @@ eal_lcore_non_eal_release(unsigned int lcore_id)
 	cfg->lcore_role[lcore_id] = ROLE_OFF;
 	cfg->lcore_count--;
 out:
-	rte_spinlock_unlock(&lcore_lock);
+	rte_rwlock_write_unlock(&lcore_lock);
+}
+
+int
+rte_lcore_iterate(rte_lcore_iterate_cb cb, void *arg)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	unsigned int lcore_id;
+	int ret = 0;
+
+	rte_rwlock_read_lock(&lcore_lock);
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (cfg->lcore_role[lcore_id] == ROLE_OFF)
+			continue;
+		ret = cb(lcore_id, arg);
+		if (ret != 0)
+			break;
+	}
+	rte_rwlock_read_unlock(&lcore_lock);
+	return ret;
+}
+
+static int
+lcore_dump_cb(unsigned int lcore_id, void *arg)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
+	const char *role;
+	FILE *f = arg;
+	int ret;
+
+	switch (cfg->lcore_role[lcore_id]) {
+	case ROLE_RTE:
+		role = "RTE";
+		break;
+	case ROLE_SERVICE:
+		role = "SERVICE";
+		break;
+	case ROLE_NON_EAL:
+		role = "NON_EAL";
+		break;
+	default:
+		role = "UNKNOWN";
+		break;
+	}
+
+	ret = eal_thread_dump_affinity(&lcore_config[lcore_id].cpuset, cpuset,
+		sizeof(cpuset));
+	fprintf(f, "lcore %u, socket %u, role %s, cpuset %s%s\n", lcore_id,
+		rte_lcore_to_socket_id(lcore_id), role, cpuset,
+		ret == 0 ? "" : "...");
+	return 0;
+}
+
+void
+rte_lcore_dump(FILE *f)
+{
+	rte_lcore_iterate(lcore_dump_cb, f);
 }
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 1cbddc4b5b..e00374b06f 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -104,17 +104,14 @@ rte_thread_get_affinity(rte_cpuset_t *cpusetp)
 }
 
 int
-eal_thread_dump_affinity(char *str, unsigned size)
+eal_thread_dump_affinity(rte_cpuset_t *cpuset, char *str, unsigned int size)
 {
-	rte_cpuset_t cpuset;
 	unsigned cpu;
 	int ret;
 	unsigned int out = 0;
 
-	rte_thread_get_affinity(&cpuset);
-
 	for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
-		if (!CPU_ISSET(cpu, &cpuset))
+		if (!CPU_ISSET(cpu, cpuset))
 			continue;
 
 		ret = snprintf(str + out,
@@ -137,6 +134,15 @@ eal_thread_dump_affinity(char *str, unsigned size)
 	return ret;
 }
 
+int
+eal_thread_dump_current_affinity(char *str, unsigned int size)
+{
+	rte_cpuset_t cpuset;
+
+	rte_thread_get_affinity(&cpuset);
+	return eal_thread_dump_affinity(&cpuset, str, size);
+}
+
 void
 rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset)
 {
diff --git a/lib/librte_eal/common/eal_thread.h b/lib/librte_eal/common/eal_thread.h
index 4ecd8fd53a..13ec252e01 100644
--- a/lib/librte_eal/common/eal_thread.h
+++ b/lib/librte_eal/common/eal_thread.h
@@ -47,13 +47,15 @@ unsigned eal_cpu_socket_id(unsigned cpu_id);
 #define RTE_CPU_AFFINITY_STR_LEN            256
 
 /**
- * Dump the current pthread cpuset.
+ * Dump the cpuset as a human readable string.
  * This function is private to EAL.
  *
  * Note:
  *   If the dump size is greater than the size of given buffer,
  *   the string will be truncated and with '\0' at the end.
  *
+ * @param cpuset
+ *   The CPU affinity object to dump.
  * @param str
  *   The string buffer the cpuset will dump to.
  * @param size
@@ -62,6 +64,13 @@ unsigned eal_cpu_socket_id(unsigned cpu_id);
  *   0 for success, -1 if truncation happens.
  */
 int
-eal_thread_dump_affinity(char *str, unsigned size);
+eal_thread_dump_affinity(rte_cpuset_t *cpuset, char *str, unsigned int size);
+
+/**
+ * Dump the current thread cpuset.
+ * This is a wrapper on eal_thread_dump_affinity().
+ */
+int
+eal_thread_dump_current_affinity(char *str, unsigned int size);
 
 #endif /* EAL_THREAD_H */
diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
index b5ea11df16..69a6f7d8c4 100644
--- a/lib/librte_eal/freebsd/eal.c
+++ b/lib/librte_eal/freebsd/eal.c
@@ -886,7 +886,7 @@ rte_eal_init(int argc, char **argv)
 	rte_thread_init(rte_config.master_lcore,
 		&lcore_config[rte_config.master_lcore].cpuset);
 
-	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
 
 	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
 		rte_config.master_lcore, thread_id, cpuset,
diff --git a/lib/librte_eal/freebsd/eal_thread.c b/lib/librte_eal/freebsd/eal_thread.c
index c1fb8eb2d8..b1a3619f51 100644
--- a/lib/librte_eal/freebsd/eal_thread.c
+++ b/lib/librte_eal/freebsd/eal_thread.c
@@ -92,7 +92,7 @@ eal_thread_loop(__rte_unused void *arg)
 
 	rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
-	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
 		lcore_id, thread_id, cpuset, ret == 0 ? "" : "...");
 
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index 27b29a1f87..4dee7cbcd7 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -261,8 +261,8 @@ typedef void (*rte_lcore_uninit_cb)(unsigned int lcore_id, void *arg);
  * If this step succeeds, the callbacks are put in the lcore callbacks list
  * that will get called for each lcore allocation/release.
  *
- * Note: callbacks execution is serialised under a lock protecting the lcores
- * and callbacks list.
+ * Note: callbacks execution is serialised under a write lock protecting the
+ * lcores and callbacks list.
  *
  * @param name
  *   A name serving as a small description for this callback.
@@ -297,6 +297,49 @@ __rte_experimental
 void
 rte_lcore_callback_unregister(void *handle);
 
+/**
+ * Callback prototype for iterating over lcores.
+ *
+ * @param lcore_id
+ *   The lcore to consider.
+ * @param arg
+ *   An opaque pointer coming from the caller.
+ * @return
+ *   - 0 lets the iteration continue.
+ *   - !0 makes the iteration stop.
+ */
+typedef int (*rte_lcore_iterate_cb)(unsigned int lcore_id, void *arg);
+
+/**
+ * Iterate on all active lcores (ROLE_RTE, ROLE_SERVICE and ROLE_NON_EAL).
+ * No modification on the lcore states is allowed in the callback.
+ *
+ * Note: as opposed to init/uninit callbacks, iteration callbacks can be
+ * invoked in parallel as they are run under a read lock protecting the lcores
+ * and callbacks list.
+ *
+ * @param cb
+ *   The callback that gets passed each lcore.
+ * @param arg
+ *   An opaque pointer passed to cb.
+ * @return
+ *   Same return code as the callback last invocation (see rte_lcore_iterate_cb
+ *   description).
+ */
+__rte_experimental
+int
+rte_lcore_iterate(rte_lcore_iterate_cb cb, void *arg);
+
+/**
+ * List all lcores.
+ *
+ * @param f
+ *   The output stream where the dump should be sent.
+ */
+__rte_experimental
+void
+rte_lcore_dump(FILE *f);
+
 /**
  * Set core affinity of the current thread.
  * Support both EAL and non-EAL thread and update TLS.
diff --git a/lib/librte_eal/linux/eal.c b/lib/librte_eal/linux/eal.c
index 8638376b8a..2f0efd7cd3 100644
--- a/lib/librte_eal/linux/eal.c
+++ b/lib/librte_eal/linux/eal.c
@@ -1214,7 +1214,7 @@ rte_eal_init(int argc, char **argv)
 	rte_thread_init(rte_config.master_lcore,
 		&lcore_config[rte_config.master_lcore].cpuset);
 
-	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
 	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
 		rte_config.master_lcore, (uintptr_t)thread_id, cpuset,
 		ret == 0 ? "" : "...");
diff --git a/lib/librte_eal/linux/eal_thread.c b/lib/librte_eal/linux/eal_thread.c
index 07aec0c44d..22d9bc8c01 100644
--- a/lib/librte_eal/linux/eal_thread.c
+++ b/lib/librte_eal/linux/eal_thread.c
@@ -92,7 +92,7 @@ eal_thread_loop(__rte_unused void *arg)
 
 	rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
-	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
 		lcore_id, (uintptr_t)thread_id, cpuset, ret == 0 ? "" : "...");
 
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index aeee7cf431..23a1565dfb 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -398,6 +398,8 @@ EXPERIMENTAL {
 	__rte_trace_mem_per_thread_free;
 	rte_lcore_callback_register;
 	rte_lcore_callback_unregister;
+	rte_lcore_dump;
+	rte_lcore_iterate;
 	rte_thread_register;
 	rte_thread_unregister;
 };
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v3 9/9] mempool/bucket: handle non-EAL lcores
  2020-06-22 13:25 ` [dpdk-dev] [PATCH v3 0/9] Register non-EAL threads as lcore David Marchand
                     ` (7 preceding siblings ...)
  2020-06-22 13:25   ` [dpdk-dev] [PATCH v3 8/9] eal: add lcore iterators David Marchand
@ 2020-06-22 13:25   ` David Marchand
  2020-06-23 17:28     ` Andrew Rybchenko
  8 siblings, 1 reply; 126+ messages in thread
From: David Marchand @ 2020-06-22 13:25 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, ktraynor, ian.stokes,
	i.maximets, Artem V. Andreev, Andrew Rybchenko

Convert to new lcore API to support non-EAL lcores.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 drivers/mempool/bucket/rte_mempool_bucket.c | 131 ++++++++++++--------
 1 file changed, 82 insertions(+), 49 deletions(-)

diff --git a/drivers/mempool/bucket/rte_mempool_bucket.c b/drivers/mempool/bucket/rte_mempool_bucket.c
index 5ce1ef16fb..0b4f42d330 100644
--- a/drivers/mempool/bucket/rte_mempool_bucket.c
+++ b/drivers/mempool/bucket/rte_mempool_bucket.c
@@ -55,6 +55,7 @@ struct bucket_data {
 	struct rte_ring *shared_orphan_ring;
 	struct rte_mempool *pool;
 	unsigned int bucket_mem_size;
+	void *lcore_callback_handle;
 };
 
 static struct bucket_stack *
@@ -345,6 +346,22 @@ bucket_dequeue_contig_blocks(struct rte_mempool *mp, void **first_obj_table,
 	return 0;
 }
 
+struct bucket_per_lcore_ctx {
+	const struct bucket_data *bd;
+	unsigned int count;
+};
+
+static int
+count_per_lcore(unsigned int lcore_id, void *arg)
+{
+	struct bucket_per_lcore_ctx *ctx = arg;
+
+	ctx->count += ctx->bd->obj_per_bucket *
+		ctx->bd->buckets[lcore_id]->top;
+	ctx->count += rte_ring_count(ctx->bd->adoption_buffer_rings[lcore_id]);
+	return 0;
+}
+
 static void
 count_underfilled_buckets(struct rte_mempool *mp,
 			  void *opaque,
@@ -373,23 +390,66 @@ count_underfilled_buckets(struct rte_mempool *mp,
 static unsigned int
 bucket_get_count(const struct rte_mempool *mp)
 {
-	const struct bucket_data *bd = mp->pool_data;
-	unsigned int count =
-		bd->obj_per_bucket * rte_ring_count(bd->shared_bucket_ring) +
-		rte_ring_count(bd->shared_orphan_ring);
-	unsigned int i;
+	struct bucket_per_lcore_ctx ctx;
 
-	for (i = 0; i < RTE_MAX_LCORE; i++) {
-		if (!rte_lcore_is_enabled(i))
-			continue;
-		count += bd->obj_per_bucket * bd->buckets[i]->top +
-			rte_ring_count(bd->adoption_buffer_rings[i]);
-	}
+	ctx.bd = mp->pool_data;
+	ctx.count = ctx.bd->obj_per_bucket *
+		rte_ring_count(ctx.bd->shared_bucket_ring);
+	ctx.count += rte_ring_count(ctx.bd->shared_orphan_ring);
 
+	rte_lcore_iterate(count_per_lcore, &ctx);
 	rte_mempool_mem_iter((struct rte_mempool *)(uintptr_t)mp,
-			     count_underfilled_buckets, &count);
+			     count_underfilled_buckets, &ctx.count);
+
+	return ctx.count;
+}
+
+static int
+bucket_init_per_lcore(unsigned int lcore_id, void *arg)
+{
+	char rg_name[RTE_RING_NAMESIZE];
+	struct bucket_data *bd = arg;
+	struct rte_mempool *mp;
+	int rg_flags;
+	int rc;
+
+	mp = bd->pool;
+	bd->buckets[lcore_id] = bucket_stack_create(mp,
+		mp->size / bd->obj_per_bucket);
+	if (bd->buckets[lcore_id] == NULL)
+		goto error;
+
+	rc = snprintf(rg_name, sizeof(rg_name), RTE_MEMPOOL_MZ_FORMAT ".a%u",
+		mp->name, lcore_id);
+	if (rc < 0 || rc >= (int)sizeof(rg_name))
+		goto error;
+
+	rg_flags = RING_F_SC_DEQ;
+	if (mp->flags & MEMPOOL_F_SP_PUT)
+		rg_flags |= RING_F_SP_ENQ;
+	if (mp->flags & MEMPOOL_F_SC_GET)
+		rg_flags |= RING_F_SC_DEQ;
+	bd->adoption_buffer_rings[lcore_id] = rte_ring_create(rg_name,
+		rte_align32pow2(mp->size + 1), mp->socket_id, rg_flags);
+	if (bd->adoption_buffer_rings[lcore_id] == NULL)
+		goto error;
 
-	return count;
+	return 0;
+error:
+	rte_free(bd->buckets[lcore_id]);
+	bd->buckets[lcore_id] = NULL;
+	return -1;
+}
+
+static void
+bucket_uninit_per_lcore(unsigned int lcore_id, void *arg)
+{
+	struct bucket_data *bd = arg;
+
+	rte_ring_free(bd->adoption_buffer_rings[lcore_id]);
+	bd->adoption_buffer_rings[lcore_id] = NULL;
+	rte_free(bd->buckets[lcore_id]);
+	bd->buckets[lcore_id] = NULL;
 }
 
 static int
@@ -399,7 +459,6 @@ bucket_alloc(struct rte_mempool *mp)
 	int rc = 0;
 	char rg_name[RTE_RING_NAMESIZE];
 	struct bucket_data *bd;
-	unsigned int i;
 	unsigned int bucket_header_size;
 	size_t pg_sz;
 
@@ -429,36 +488,17 @@ bucket_alloc(struct rte_mempool *mp)
 	/* eventually this should be a tunable parameter */
 	bd->bucket_stack_thresh = (mp->size / bd->obj_per_bucket) * 4 / 3;
 
+	bd->lcore_callback_handle = rte_lcore_callback_register("bucket",
+		bucket_init_per_lcore, bucket_uninit_per_lcore, bd);
+	if (bd->lcore_callback_handle == NULL) {
+		rc = -ENOMEM;
+		goto no_mem_for_stacks;
+	}
+
 	if (mp->flags & MEMPOOL_F_SP_PUT)
 		rg_flags |= RING_F_SP_ENQ;
 	if (mp->flags & MEMPOOL_F_SC_GET)
 		rg_flags |= RING_F_SC_DEQ;
-
-	for (i = 0; i < RTE_MAX_LCORE; i++) {
-		if (!rte_lcore_is_enabled(i))
-			continue;
-		bd->buckets[i] =
-			bucket_stack_create(mp, mp->size / bd->obj_per_bucket);
-		if (bd->buckets[i] == NULL) {
-			rc = -ENOMEM;
-			goto no_mem_for_stacks;
-		}
-		rc = snprintf(rg_name, sizeof(rg_name),
-			      RTE_MEMPOOL_MZ_FORMAT ".a%u", mp->name, i);
-		if (rc < 0 || rc >= (int)sizeof(rg_name)) {
-			rc = -ENAMETOOLONG;
-			goto no_mem_for_stacks;
-		}
-		bd->adoption_buffer_rings[i] =
-			rte_ring_create(rg_name, rte_align32pow2(mp->size + 1),
-					mp->socket_id,
-					rg_flags | RING_F_SC_DEQ);
-		if (bd->adoption_buffer_rings[i] == NULL) {
-			rc = -rte_errno;
-			goto no_mem_for_stacks;
-		}
-	}
-
 	rc = snprintf(rg_name, sizeof(rg_name),
 		      RTE_MEMPOOL_MZ_FORMAT ".0", mp->name);
 	if (rc < 0 || rc >= (int)sizeof(rg_name)) {
@@ -498,11 +538,8 @@ bucket_alloc(struct rte_mempool *mp)
 	rte_ring_free(bd->shared_orphan_ring);
 cannot_create_shared_orphan_ring:
 invalid_shared_orphan_ring:
+	rte_lcore_callback_unregister(bd->lcore_callback_handle);
 no_mem_for_stacks:
-	for (i = 0; i < RTE_MAX_LCORE; i++) {
-		rte_free(bd->buckets[i]);
-		rte_ring_free(bd->adoption_buffer_rings[i]);
-	}
 	rte_free(bd);
 no_mem_for_data:
 	rte_errno = -rc;
@@ -512,16 +549,12 @@ bucket_alloc(struct rte_mempool *mp)
 static void
 bucket_free(struct rte_mempool *mp)
 {
-	unsigned int i;
 	struct bucket_data *bd = mp->pool_data;
 
 	if (bd == NULL)
 		return;
 
-	for (i = 0; i < RTE_MAX_LCORE; i++) {
-		rte_free(bd->buckets[i]);
-		rte_ring_free(bd->adoption_buffer_rings[i]);
-	}
+	rte_lcore_callback_unregister(bd->lcore_callback_handle);
 
 	rte_ring_free(bd->shared_orphan_ring);
 	rte_ring_free(bd->shared_bucket_ring);
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-06-22 13:25   ` [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores David Marchand
@ 2020-06-22 15:49     ` Ananyev, Konstantin
  2020-06-22 16:37       ` Ananyev, Konstantin
  2020-06-23  7:49       ` David Marchand
  2020-06-23 17:02     ` Andrew Rybchenko
  1 sibling, 2 replies; 126+ messages in thread
From: Ananyev, Konstantin @ 2020-06-22 15:49 UTC (permalink / raw)
  To: David Marchand, dev
  Cc: jerinjacobk, Richardson, Bruce, mdr, ktraynor, Stokes, Ian,
	i.maximets, Thomas Monjalon, Mcnamara, John, Kovacevic, Marko,
	Burakov, Anatoly, Olivier Matz, Andrew Rybchenko, Neil Horman


Hi David,

> diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
> index 86d32a3dd7..7db05428e7 100644
> --- a/lib/librte_eal/common/eal_common_lcore.c
> +++ b/lib/librte_eal/common/eal_common_lcore.c
> @@ -6,12 +6,13 @@
>  #include <limits.h>
>  #include <string.h>
> 
> -#include <rte_errno.h>
> -#include <rte_log.h>
> -#include <rte_eal.h>
> -#include <rte_lcore.h>
>  #include <rte_common.h>
>  #include <rte_debug.h>
> +#include <rte_eal.h>
> +#include <rte_errno.h>
> +#include <rte_lcore.h>
> +#include <rte_log.h>
> +#include <rte_spinlock.h>
> 
>  #include "eal_private.h"
>  #include "eal_thread.h"
> @@ -220,3 +221,38 @@ rte_socket_id_by_idx(unsigned int idx)
>  	}
>  	return config->numa_nodes[idx];
>  }
> +
> +static rte_spinlock_t lcore_lock = RTE_SPINLOCK_INITIALIZER;
> +
> +unsigned int
> +eal_lcore_non_eal_allocate(void)
> +{
> +	struct rte_config *cfg = rte_eal_get_configuration();
> +	unsigned int lcore_id;
> +
> +	rte_spinlock_lock(&lcore_lock);

I think it will break current DPDK MP modes.
The problem here - rte_config (and lcore_role[]) is in shared memory,
while the lock is local.
Simplest way probably to move lcore_lock to rte_config.

> +	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> +		if (cfg->lcore_role[lcore_id] != ROLE_OFF)
> +			continue;
> +		cfg->lcore_role[lcore_id] = ROLE_NON_EAL;
> +		cfg->lcore_count++;
> +		break;
> +	}
> +	if (lcore_id == RTE_MAX_LCORE)
> +		RTE_LOG(DEBUG, EAL, "No lcore available.\n");
> +	rte_spinlock_unlock(&lcore_lock);
> +	return lcore_id;
> +}
> +
> +void
> +eal_lcore_non_eal_release(unsigned int lcore_id)
> +{
> +	struct rte_config *cfg = rte_eal_get_configuration();
> +
> +	rte_spinlock_lock(&lcore_lock);
> +	if (cfg->lcore_role[lcore_id] == ROLE_NON_EAL) {
> +		cfg->lcore_role[lcore_id] = ROLE_OFF;
> +		cfg->lcore_count--;
> +	}
> +	rte_spinlock_unlock(&lcore_lock);
> +}
> diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
> index a7ae0691bf..1cbddc4b5b 100644
> --- a/lib/librte_eal/common/eal_common_thread.c
> +++ b/lib/librte_eal/common/eal_common_thread.c
> @@ -236,3 +236,36 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
>  	pthread_join(*thread, NULL);
>  	return -ret;
>  }
> +
> +void
> +rte_thread_register(void)
> +{
> +	unsigned int lcore_id;
> +	rte_cpuset_t cpuset;
> +
> +	/* EAL init flushes all lcores, we can't register before. */
> +	assert(internal_config.init_complete == 1);
> +	if (pthread_getaffinity_np(pthread_self(), sizeof(cpuset),
> +			&cpuset) != 0)
> +		CPU_ZERO(&cpuset);
> +	lcore_id = eal_lcore_non_eal_allocate();
> +	if (lcore_id >= RTE_MAX_LCORE)
> +		lcore_id = LCORE_ID_ANY;
> +	rte_thread_init(lcore_id, &cpuset);

So we just setting affinity to the same value, right?
Not a big deal, but might be easier to allow rte_thread_init()
to accept cpuset==NULL (and just don't change thread affinity in that case)

> +	if (lcore_id != LCORE_ID_ANY)
> +		RTE_LOG(DEBUG, EAL, "Registered non-EAL thread as lcore %u.\n",
> +			lcore_id);
> +}
> +
> +void
> +rte_thread_unregister(void)
> +{
> +	unsigned int lcore_id = rte_lcore_id();
> +
> +	if (lcore_id != LCORE_ID_ANY)
> +		eal_lcore_non_eal_release(lcore_id);
> +	rte_thread_uninit();
> +	if (lcore_id != LCORE_ID_ANY)
> +		RTE_LOG(DEBUG, EAL, "Unregistered non-EAL thread (was lcore %u).\n",
> +			lcore_id);
> +}
> diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
> index 0592fcd694..73238ff157 100644
> --- a/lib/librte_eal/common/eal_private.h
> +++ b/lib/librte_eal/common/eal_private.h
> @@ -396,6 +396,24 @@ uint64_t get_tsc_freq(void);
>   */
>  uint64_t get_tsc_freq_arch(void);
> 
> +/**
> + * Allocate a free lcore to associate to a non-EAL thread.
> + *
> + * @return
> + *   - the id of a lcore with role ROLE_NON_EAL on success.
> + *   - RTE_MAX_LCORE if none was available.
> + */
> +unsigned int eal_lcore_non_eal_allocate(void);
> +
> +/**
> + * Release the lcore used by a non-EAL thread.
> + * Counterpart of eal_lcore_non_eal_allocate().
> + *
> + * @param lcore_id
> + *   The lcore with role ROLE_NON_EAL to release.
> + */
> +void eal_lcore_non_eal_release(unsigned int lcore_id);
> +
>  /**
>   * Prepare physical memory mapping
>   * i.e. hugepages on Linux and
> diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
> index 3968c40693..ea86220394 100644
> --- a/lib/librte_eal/include/rte_lcore.h
> +++ b/lib/librte_eal/include/rte_lcore.h
> @@ -31,6 +31,7 @@ enum rte_lcore_role_t {
>  	ROLE_RTE,
>  	ROLE_OFF,
>  	ROLE_SERVICE,
> +	ROLE_NON_EAL,
>  };
> 
>  /**
> @@ -67,7 +68,8 @@ rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
>   *   to run threads with lcore IDs 0, 1, 2 and 3 on physical core 10..
>   *
>   * @return
> - *  Logical core ID (in EAL thread) or LCORE_ID_ANY (in non-EAL thread)
> + *  Logical core ID (in EAL thread or registered non-EAL thread) or
> + *  LCORE_ID_ANY (in unregistered non-EAL thread)
>   */
>  static inline unsigned
>  rte_lcore_id(void)
> @@ -279,6 +281,20 @@ int rte_thread_setname(pthread_t id, const char *name);
>  __rte_experimental
>  int rte_thread_getname(pthread_t id, char *name, size_t len);
> 
> +/**
> + * Register current non-EAL thread as a lcore.
> + */
> +__rte_experimental
> +void
> +rte_thread_register(void);
> +
> +/**
> + * Unregister current thread and release lcore if one was associated.
> + */
> +__rte_experimental
> +void
> +rte_thread_unregister(void);
> +
>  /**
>   * Create a control thread.
>   *
> diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
> index 5831eea4b0..39c41d445d 100644
> --- a/lib/librte_eal/rte_eal_version.map
> +++ b/lib/librte_eal/rte_eal_version.map
> @@ -396,6 +396,8 @@ EXPERIMENTAL {
> 
>  	# added in 20.08
>  	__rte_trace_mem_per_thread_free;
> +	rte_thread_register;
> +	rte_thread_unregister;
>  };
> 
>  INTERNAL {
> diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
> index 652d19f9f1..9e0ee052b3 100644
> --- a/lib/librte_mempool/rte_mempool.h
> +++ b/lib/librte_mempool/rte_mempool.h
> @@ -28,9 +28,9 @@
>   * rte_mempool_get() or rte_mempool_put() are designed to be called from an EAL
>   * thread due to the internal per-lcore cache. Due to the lack of caching,
>   * rte_mempool_get() or rte_mempool_put() performance will suffer when called
> - * by non-EAL threads. Instead, non-EAL threads should call
> - * rte_mempool_generic_get() or rte_mempool_generic_put() with a user cache
> - * created with rte_mempool_cache_create().
> + * by unregistered non-EAL threads. Instead, unregistered non-EAL threads
> + * should call rte_mempool_generic_get() or rte_mempool_generic_put() with a
> + * user cache created with rte_mempool_cache_create().
>   */
> 
>  #include <stdio.h>
> @@ -1233,7 +1233,7 @@ void rte_mempool_dump(FILE *f, struct rte_mempool *mp);
>  /**
>   * Create a user-owned mempool cache.
>   *
> - * This can be used by non-EAL threads to enable caching when they
> + * This can be used by unregistered non-EAL threads to enable caching when they
>   * interact with a mempool.
>   *
>   * @param size
> @@ -1264,7 +1264,8 @@ rte_mempool_cache_free(struct rte_mempool_cache *cache);
>   * @param lcore_id
>   *   The logical core id.
>   * @return
> - *   A pointer to the mempool cache or NULL if disabled or non-EAL thread.
> + *   A pointer to the mempool cache or NULL if disabled or unregistered non-EAL
> + *   thread.
>   */
>  static __rte_always_inline struct rte_mempool_cache *
>  rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id)
> --
> 2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-06-22 15:49     ` Ananyev, Konstantin
@ 2020-06-22 16:37       ` Ananyev, Konstantin
  2020-06-23  7:49       ` David Marchand
  1 sibling, 0 replies; 126+ messages in thread
From: Ananyev, Konstantin @ 2020-06-22 16:37 UTC (permalink / raw)
  To: Ananyev, Konstantin, David Marchand, dev
  Cc: jerinjacobk, Richardson, Bruce, mdr, ktraynor, Stokes, Ian,
	i.maximets, Thomas Monjalon, Mcnamara, John, Kovacevic, Marko,
	Burakov, Anatoly, Olivier Matz, Andrew Rybchenko, Neil Horman


> 
> Hi David,
> 
> > diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
> > index 86d32a3dd7..7db05428e7 100644
> > --- a/lib/librte_eal/common/eal_common_lcore.c
> > +++ b/lib/librte_eal/common/eal_common_lcore.c
> > @@ -6,12 +6,13 @@
> >  #include <limits.h>
> >  #include <string.h>
> >
> > -#include <rte_errno.h>
> > -#include <rte_log.h>
> > -#include <rte_eal.h>
> > -#include <rte_lcore.h>
> >  #include <rte_common.h>
> >  #include <rte_debug.h>
> > +#include <rte_eal.h>
> > +#include <rte_errno.h>
> > +#include <rte_lcore.h>
> > +#include <rte_log.h>
> > +#include <rte_spinlock.h>
> >
> >  #include "eal_private.h"
> >  #include "eal_thread.h"
> > @@ -220,3 +221,38 @@ rte_socket_id_by_idx(unsigned int idx)
> >  	}
> >  	return config->numa_nodes[idx];
> >  }
> > +
> > +static rte_spinlock_t lcore_lock = RTE_SPINLOCK_INITIALIZER;
> > +
> > +unsigned int
> > +eal_lcore_non_eal_allocate(void)
> > +{
> > +	struct rte_config *cfg = rte_eal_get_configuration();
> > +	unsigned int lcore_id;
> > +
> > +	rte_spinlock_lock(&lcore_lock);
> 
> I think it will break current DPDK MP modes.
> The problem here - rte_config (and lcore_role[]) is in shared memory,
> while the lock is local.
> Simplest way probably to move lcore_lock to rte_config.

Actually sorry, I was wrong - rte_config is local.
So having lcore_lock local seems ok here.
Though then, I think another issue arises:
For MP case 2 processes might get the same lcore_id via this function. 
And, as I remember, rte_mempool cache is by default located in shared memory.
So two threads might end-up racing for the same mempool cache slot.
Same story probably about some other shared data that uses lcore_id as an index

> 
> > +	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> > +		if (cfg->lcore_role[lcore_id] != ROLE_OFF)
> > +			continue;
> > +		cfg->lcore_role[lcore_id] = ROLE_NON_EAL;
> > +		cfg->lcore_count++;
> > +		break;
> > +	}
> > +	if (lcore_id == RTE_MAX_LCORE)
> > +		RTE_LOG(DEBUG, EAL, "No lcore available.\n");
> > +	rte_spinlock_unlock(&lcore_lock);
> > +	return lcore_id;
> > +}
> > +
> > +void
> > +eal_lcore_non_eal_release(unsigned int lcore_id)
> > +{
> > +	struct rte_config *cfg = rte_eal_get_configuration();
> > +
> > +	rte_spinlock_lock(&lcore_lock);
> > +	if (cfg->lcore_role[lcore_id] == ROLE_NON_EAL) {
> > +		cfg->lcore_role[lcore_id] = ROLE_OFF;
> > +		cfg->lcore_count--;
> > +	}
> > +	rte_spinlock_unlock(&lcore_lock);
> > +}

^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-06-22 15:49     ` Ananyev, Konstantin
  2020-06-22 16:37       ` Ananyev, Konstantin
@ 2020-06-23  7:49       ` David Marchand
  2020-06-23  9:14         ` Bruce Richardson
  2020-06-23 13:15         ` Ananyev, Konstantin
  1 sibling, 2 replies; 126+ messages in thread
From: David Marchand @ 2020-06-23  7:49 UTC (permalink / raw)
  To: Ananyev, Konstantin
  Cc: dev, jerinjacobk, Richardson, Bruce, mdr, ktraynor, Stokes, Ian,
	i.maximets, Thomas Monjalon, Mcnamara, John, Kovacevic, Marko,
	Burakov, Anatoly, Olivier Matz, Andrew Rybchenko, Neil Horman

Hello Konstantin,

On Mon, Jun 22, 2020 at 5:49 PM Ananyev, Konstantin
<konstantin.ananyev@intel.com> wrote:
> > diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
> > index 86d32a3dd7..7db05428e7 100644
> > --- a/lib/librte_eal/common/eal_common_lcore.c
> > +++ b/lib/librte_eal/common/eal_common_lcore.c
> > @@ -220,3 +221,38 @@ rte_socket_id_by_idx(unsigned int idx)
> >       }
> >       return config->numa_nodes[idx];
> >  }
> > +
> > +static rte_spinlock_t lcore_lock = RTE_SPINLOCK_INITIALIZER;
> > +
> > +unsigned int
> > +eal_lcore_non_eal_allocate(void)
> > +{
> > +     struct rte_config *cfg = rte_eal_get_configuration();
> > +     unsigned int lcore_id;
> > +
> > +     rte_spinlock_lock(&lcore_lock);
>
> I think it will break current DPDK MP modes.
> The problem here - rte_config (and lcore_role[]) is in shared memory,
> while the lock is local.
> Simplest way probably to move lcore_lock to rte_config.

Even before this series, MP has no protection on lcore placing between
primary and secondary processes.
Personally, I have no use for DPDK MP and marking MP as not supporting
this new feature is tempting for a first phase.
If this is a strong requirement, I can look at it in a second phase.
What do you think?


>
> > +     for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> > +             if (cfg->lcore_role[lcore_id] != ROLE_OFF)
> > +                     continue;
> > +             cfg->lcore_role[lcore_id] = ROLE_NON_EAL;
> > +             cfg->lcore_count++;
> > +             break;
> > +     }
> > +     if (lcore_id == RTE_MAX_LCORE)
> > +             RTE_LOG(DEBUG, EAL, "No lcore available.\n");
> > +     rte_spinlock_unlock(&lcore_lock);
> > +     return lcore_id;
> > +}
> > +
> > +void
> > +eal_lcore_non_eal_release(unsigned int lcore_id)
> > +{
> > +     struct rte_config *cfg = rte_eal_get_configuration();
> > +
> > +     rte_spinlock_lock(&lcore_lock);
> > +     if (cfg->lcore_role[lcore_id] == ROLE_NON_EAL) {
> > +             cfg->lcore_role[lcore_id] = ROLE_OFF;
> > +             cfg->lcore_count--;
> > +     }
> > +     rte_spinlock_unlock(&lcore_lock);
> > +}
> > diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
> > index a7ae0691bf..1cbddc4b5b 100644
> > --- a/lib/librte_eal/common/eal_common_thread.c
> > +++ b/lib/librte_eal/common/eal_common_thread.c
> > @@ -236,3 +236,36 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
> >       pthread_join(*thread, NULL);
> >       return -ret;
> >  }
> > +
> > +void
> > +rte_thread_register(void)
> > +{
> > +     unsigned int lcore_id;
> > +     rte_cpuset_t cpuset;
> > +
> > +     /* EAL init flushes all lcores, we can't register before. */
> > +     assert(internal_config.init_complete == 1);
> > +     if (pthread_getaffinity_np(pthread_self(), sizeof(cpuset),
> > +                     &cpuset) != 0)
> > +             CPU_ZERO(&cpuset);
> > +     lcore_id = eal_lcore_non_eal_allocate();
> > +     if (lcore_id >= RTE_MAX_LCORE)
> > +             lcore_id = LCORE_ID_ANY;
> > +     rte_thread_init(lcore_id, &cpuset);
>
> So we just setting affinity to the same value, right?
> Not a big deal, but might be easier to allow rte_thread_init()
> to accept cpuset==NULL (and just don't change thread affinity in that case)

rte_thread_init does not change the thread cpu affinity, it handles
per thread (TLS included) variables initialization.

So do you mean accepting cpuset == NULL and do the getaffinity in this case?
rte_thread_init is EAL private for now.
That saves us some code in this function, but we will call with a !=
NULL cpuset in all other EAL code.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-06-23  7:49       ` David Marchand
@ 2020-06-23  9:14         ` Bruce Richardson
  2020-06-23 12:49           ` David Marchand
  2020-06-23 13:15         ` Ananyev, Konstantin
  1 sibling, 1 reply; 126+ messages in thread
From: Bruce Richardson @ 2020-06-23  9:14 UTC (permalink / raw)
  To: David Marchand
  Cc: Ananyev, Konstantin, dev, jerinjacobk, mdr, ktraynor, Stokes,
	Ian, i.maximets, Thomas Monjalon, Mcnamara, John, Kovacevic,
	Marko, Burakov, Anatoly, Olivier Matz, Andrew Rybchenko,
	Neil Horman

On Tue, Jun 23, 2020 at 09:49:18AM +0200, David Marchand wrote:
> Hello Konstantin,
> 
> On Mon, Jun 22, 2020 at 5:49 PM Ananyev, Konstantin
> <konstantin.ananyev@intel.com> wrote:
> > > diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
> > > index 86d32a3dd7..7db05428e7 100644
> > > --- a/lib/librte_eal/common/eal_common_lcore.c
> > > +++ b/lib/librte_eal/common/eal_common_lcore.c
> > > @@ -220,3 +221,38 @@ rte_socket_id_by_idx(unsigned int idx)
> > >       }
> > >       return config->numa_nodes[idx];
> > >  }
> > > +
> > > +static rte_spinlock_t lcore_lock = RTE_SPINLOCK_INITIALIZER;
> > > +
> > > +unsigned int
> > > +eal_lcore_non_eal_allocate(void)
> > > +{
> > > +     struct rte_config *cfg = rte_eal_get_configuration();
> > > +     unsigned int lcore_id;
> > > +
> > > +     rte_spinlock_lock(&lcore_lock);
> >
> > I think it will break current DPDK MP modes.
> > The problem here - rte_config (and lcore_role[]) is in shared memory,
> > while the lock is local.
> > Simplest way probably to move lcore_lock to rte_config.
> 
> Even before this series, MP has no protection on lcore placing between
> primary and secondary processes.
> Personally, I have no use for DPDK MP and marking MP as not supporting
> this new feature is tempting for a first phase.
> If this is a strong requirement, I can look at it in a second phase.
> What do you think?
> 
I think that is reasonable for a new feature. I suspect those wanting to
dynamically manage their own threads probably do not care about
multi-process mode. 

However, this limitation probably needs to be clearly called out in the
docs.

/Bruce

^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-06-23  9:14         ` Bruce Richardson
@ 2020-06-23 12:49           ` David Marchand
  0 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-06-23 12:49 UTC (permalink / raw)
  To: Bruce Richardson
  Cc: Ananyev, Konstantin, dev, jerinjacobk, mdr, ktraynor, Stokes,
	Ian, i.maximets, Thomas Monjalon, Mcnamara, John, Kovacevic,
	Marko, Burakov, Anatoly, Olivier Matz, Andrew Rybchenko,
	Neil Horman

On Tue, Jun 23, 2020 at 11:14 AM Bruce Richardson
<bruce.richardson@intel.com> wrote:
>
> On Tue, Jun 23, 2020 at 09:49:18AM +0200, David Marchand wrote:
> > Hello Konstantin,
> >
> > On Mon, Jun 22, 2020 at 5:49 PM Ananyev, Konstantin
> > <konstantin.ananyev@intel.com> wrote:
> > > > diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
> > > > index 86d32a3dd7..7db05428e7 100644
> > > > --- a/lib/librte_eal/common/eal_common_lcore.c
> > > > +++ b/lib/librte_eal/common/eal_common_lcore.c
> > > > @@ -220,3 +221,38 @@ rte_socket_id_by_idx(unsigned int idx)
> > > >       }
> > > >       return config->numa_nodes[idx];
> > > >  }
> > > > +
> > > > +static rte_spinlock_t lcore_lock = RTE_SPINLOCK_INITIALIZER;
> > > > +
> > > > +unsigned int
> > > > +eal_lcore_non_eal_allocate(void)
> > > > +{
> > > > +     struct rte_config *cfg = rte_eal_get_configuration();
> > > > +     unsigned int lcore_id;
> > > > +
> > > > +     rte_spinlock_lock(&lcore_lock);
> > >
> > > I think it will break current DPDK MP modes.
> > > The problem here - rte_config (and lcore_role[]) is in shared memory,
> > > while the lock is local.
> > > Simplest way probably to move lcore_lock to rte_config.
> >
> > Even before this series, MP has no protection on lcore placing between
> > primary and secondary processes.
> > Personally, I have no use for DPDK MP and marking MP as not supporting
> > this new feature is tempting for a first phase.
> > If this is a strong requirement, I can look at it in a second phase.
> > What do you think?
> >
> I think that is reasonable for a new feature. I suspect those wanting to
> dynamically manage their own threads probably do not care about
> multi-process mode.
>
> However, this limitation probably needs to be clearly called out in the
> docs.

Again, *disclaimer* I am not a user of the MP feature.
But I suppose users of such a feature are relying on DPDK init and
threads management, and I would not expect them to use this new API.


I will add a note in rte_thread_register() doxygen.
But I wonder if adding some check when a secondary attaches would make sense...
Like how we have a version check, I could "taint" the dpdk primary
process: a variable in shared memory could do the trick.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-06-23  7:49       ` David Marchand
  2020-06-23  9:14         ` Bruce Richardson
@ 2020-06-23 13:15         ` Ananyev, Konstantin
  2020-06-24  9:23           ` David Marchand
  1 sibling, 1 reply; 126+ messages in thread
From: Ananyev, Konstantin @ 2020-06-23 13:15 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, jerinjacobk, Richardson, Bruce, mdr, ktraynor, Stokes, Ian,
	i.maximets, Thomas Monjalon, Mcnamara, John, Kovacevic, Marko,
	Burakov, Anatoly, Olivier Matz, Andrew Rybchenko, Neil Horman

Hi David,

> > > diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
> > > index 86d32a3dd7..7db05428e7 100644
> > > --- a/lib/librte_eal/common/eal_common_lcore.c
> > > +++ b/lib/librte_eal/common/eal_common_lcore.c
> > > @@ -220,3 +221,38 @@ rte_socket_id_by_idx(unsigned int idx)
> > >       }
> > >       return config->numa_nodes[idx];
> > >  }
> > > +
> > > +static rte_spinlock_t lcore_lock = RTE_SPINLOCK_INITIALIZER;
> > > +
> > > +unsigned int
> > > +eal_lcore_non_eal_allocate(void)
> > > +{
> > > +     struct rte_config *cfg = rte_eal_get_configuration();
> > > +     unsigned int lcore_id;
> > > +
> > > +     rte_spinlock_lock(&lcore_lock);
> >
> > I think it will break current DPDK MP modes.
> > The problem here - rte_config (and lcore_role[]) is in shared memory,
> > while the lock is local.
> > Simplest way probably to move lcore_lock to rte_config.
> 
> Even before this series, MP has no protection on lcore placing between
> primary and secondary processes.

Agree, it is not a new problem, it has been there for a while.
Though making lcore assignment dynamic will make it more noticeable and harder to avoid.
With static only lcore distribution it is much easier to control things.
 
> Personally, I have no use for DPDK MP and marking MP as not supporting
> this new feature is tempting for a first phase.
> If this is a strong requirement, I can look at it in a second phase.
> What do you think?

In theory it is possible to mark this new API as not supported for MP.
At least for now. Though I think it is sort of temporal solution.
AFAIK, MP is used by customers, so sooner or later someone will hit that problem.
Let say, we do have pdump app/library in our mainline.
As I can see - it will be affected when users will start using this new dynamic lcore API
inside their apps.    

> 
> >
> > > +     for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> > > +             if (cfg->lcore_role[lcore_id] != ROLE_OFF)
> > > +                     continue;
> > > +             cfg->lcore_role[lcore_id] = ROLE_NON_EAL;
> > > +             cfg->lcore_count++;
> > > +             break;
> > > +     }
> > > +     if (lcore_id == RTE_MAX_LCORE)
> > > +             RTE_LOG(DEBUG, EAL, "No lcore available.\n");
> > > +     rte_spinlock_unlock(&lcore_lock);
> > > +     return lcore_id;
> > > +}
> > > +
> > > +void
> > > +eal_lcore_non_eal_release(unsigned int lcore_id)
> > > +{
> > > +     struct rte_config *cfg = rte_eal_get_configuration();
> > > +
> > > +     rte_spinlock_lock(&lcore_lock);
> > > +     if (cfg->lcore_role[lcore_id] == ROLE_NON_EAL) {
> > > +             cfg->lcore_role[lcore_id] = ROLE_OFF;
> > > +             cfg->lcore_count--;
> > > +     }
> > > +     rte_spinlock_unlock(&lcore_lock);
> > > +}
> > > diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
> > > index a7ae0691bf..1cbddc4b5b 100644
> > > --- a/lib/librte_eal/common/eal_common_thread.c
> > > +++ b/lib/librte_eal/common/eal_common_thread.c
> > > @@ -236,3 +236,36 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
> > >       pthread_join(*thread, NULL);
> > >       return -ret;
> > >  }
> > > +
> > > +void
> > > +rte_thread_register(void)
> > > +{
> > > +     unsigned int lcore_id;
> > > +     rte_cpuset_t cpuset;
> > > +
> > > +     /* EAL init flushes all lcores, we can't register before. */
> > > +     assert(internal_config.init_complete == 1);
> > > +     if (pthread_getaffinity_np(pthread_self(), sizeof(cpuset),
> > > +                     &cpuset) != 0)
> > > +             CPU_ZERO(&cpuset);
> > > +     lcore_id = eal_lcore_non_eal_allocate();
> > > +     if (lcore_id >= RTE_MAX_LCORE)
> > > +             lcore_id = LCORE_ID_ANY;
> > > +     rte_thread_init(lcore_id, &cpuset);
> >
> > So we just setting affinity to the same value, right?
> > Not a big deal, but might be easier to allow rte_thread_init()
> > to accept cpuset==NULL (and just don't change thread affinity in that case)
> 
> rte_thread_init does not change the thread cpu affinity, it handles
> per thread (TLS included) variables initialization.

Right, didn't read the code properly.
Please scratch that comment.

> 
> So do you mean accepting cpuset == NULL and do the getaffinity in this case?
> rte_thread_init is EAL private for now.
> That saves us some code in this function, but we will call with a !=
> NULL cpuset in all other EAL code.

^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-06-22 13:25   ` [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores David Marchand
  2020-06-22 15:49     ` Ananyev, Konstantin
@ 2020-06-23 17:02     ` Andrew Rybchenko
  1 sibling, 0 replies; 126+ messages in thread
From: Andrew Rybchenko @ 2020-06-23 17:02 UTC (permalink / raw)
  To: David Marchand, dev
  Cc: jerinjacobk, bruce.richardson, mdr, ktraynor, ian.stokes,
	i.maximets, Thomas Monjalon, John McNamara, Marko Kovacevic,
	Anatoly Burakov, Olivier Matz, Neil Horman

On 6/22/20 4:25 PM, David Marchand wrote:
> DPDK allows calling some part of its API from a non-EAL thread but this
> has some limitations.
> OVS (and other applications) has its own thread management but still
> want to avoid such limitations by hacking RTE_PER_LCORE(_lcore_id) and
> faking EAL threads potentially unknown of some DPDK component.
> 
> Introduce a new API to register non-EAL thread and associate them to a
> free lcore with a new NON_EAL role.
> This role denotes lcores that do not run DPDK mainloop and as such
> prevents use of rte_eal_wait_lcore() and consorts.
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>

Acked-by: Andrew Rybchenko <arybchenko@solarflare.com>


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 9/9] mempool/bucket: handle non-EAL lcores
  2020-06-22 13:25   ` [dpdk-dev] [PATCH v3 9/9] mempool/bucket: handle non-EAL lcores David Marchand
@ 2020-06-23 17:28     ` Andrew Rybchenko
  2020-06-26 14:13       ` David Marchand
  0 siblings, 1 reply; 126+ messages in thread
From: Andrew Rybchenko @ 2020-06-23 17:28 UTC (permalink / raw)
  To: David Marchand, dev
  Cc: jerinjacobk, bruce.richardson, mdr, ktraynor, ian.stokes,
	i.maximets, Artem V. Andreev

On 6/22/20 4:25 PM, David Marchand wrote:
> Convert to new lcore API to support non-EAL lcores.
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---
>  drivers/mempool/bucket/rte_mempool_bucket.c | 131 ++++++++++++--------
>  1 file changed, 82 insertions(+), 49 deletions(-)
> 
> diff --git a/drivers/mempool/bucket/rte_mempool_bucket.c b/drivers/mempool/bucket/rte_mempool_bucket.c
> index 5ce1ef16fb..0b4f42d330 100644
> --- a/drivers/mempool/bucket/rte_mempool_bucket.c
> +++ b/drivers/mempool/bucket/rte_mempool_bucket.c
> @@ -55,6 +55,7 @@ struct bucket_data {
>  	struct rte_ring *shared_orphan_ring;
>  	struct rte_mempool *pool;
>  	unsigned int bucket_mem_size;
> +	void *lcore_callback_handle;
>  };
>  
>  static struct bucket_stack *
> @@ -345,6 +346,22 @@ bucket_dequeue_contig_blocks(struct rte_mempool *mp, void **first_obj_table,
>  	return 0;
>  }
>  
> +struct bucket_per_lcore_ctx {

The structure is not used in per-lcore init and uninit
functions. So, it is better to add _count to make it
count specified. I.e. bucket_count_per_lcore_ctx.

> +	const struct bucket_data *bd;
> +	unsigned int count;
> +};
> +
> +static int
> +count_per_lcore(unsigned int lcore_id, void *arg)
> +{
> +	struct bucket_per_lcore_ctx *ctx = arg;
> +
> +	ctx->count += ctx->bd->obj_per_bucket *
> +		ctx->bd->buckets[lcore_id]->top;
> +	ctx->count += rte_ring_count(ctx->bd->adoption_buffer_rings[lcore_id]);
> +	return 0;
> +}
> +
>  static void
>  count_underfilled_buckets(struct rte_mempool *mp,
>  			  void *opaque,
> @@ -373,23 +390,66 @@ count_underfilled_buckets(struct rte_mempool *mp,
>  static unsigned int
>  bucket_get_count(const struct rte_mempool *mp)
>  {
> -	const struct bucket_data *bd = mp->pool_data;
> -	unsigned int count =
> -		bd->obj_per_bucket * rte_ring_count(bd->shared_bucket_ring) +
> -		rte_ring_count(bd->shared_orphan_ring);
> -	unsigned int i;
> +	struct bucket_per_lcore_ctx ctx;

Just a nit, but I think that ctx is too generic.
(some time ago bucket_data bd was ctx in fact :) )
May be bplc? Up to you.

>  
> -	for (i = 0; i < RTE_MAX_LCORE; i++) {
> -		if (!rte_lcore_is_enabled(i))
> -			continue;
> -		count += bd->obj_per_bucket * bd->buckets[i]->top +
> -			rte_ring_count(bd->adoption_buffer_rings[i]);
> -	}
> +	ctx.bd = mp->pool_data;
> +	ctx.count = ctx.bd->obj_per_bucket *
> +		rte_ring_count(ctx.bd->shared_bucket_ring);
> +	ctx.count += rte_ring_count(ctx.bd->shared_orphan_ring);
>  
> +	rte_lcore_iterate(count_per_lcore, &ctx);
>  	rte_mempool_mem_iter((struct rte_mempool *)(uintptr_t)mp,
> -			     count_underfilled_buckets, &count);
> +			     count_underfilled_buckets, &ctx.count);
> +
> +	return ctx.count;
> +}
> +
> +static int
> +bucket_init_per_lcore(unsigned int lcore_id, void *arg)

It should be no bucket_ prefix here, or it should be bucket_
prefix above in count_per_lcore.

> +{
> +	char rg_name[RTE_RING_NAMESIZE];
> +	struct bucket_data *bd = arg;
> +	struct rte_mempool *mp;
> +	int rg_flags;
> +	int rc;
> +
> +	mp = bd->pool;
> +	bd->buckets[lcore_id] = bucket_stack_create(mp,
> +		mp->size / bd->obj_per_bucket);
> +	if (bd->buckets[lcore_id] == NULL)
> +		goto error;
> +
> +	rc = snprintf(rg_name, sizeof(rg_name), RTE_MEMPOOL_MZ_FORMAT ".a%u",
> +		mp->name, lcore_id);
> +	if (rc < 0 || rc >= (int)sizeof(rg_name))
> +		goto error;
> +
> +	rg_flags = RING_F_SC_DEQ;
> +	if (mp->flags & MEMPOOL_F_SP_PUT)
> +		rg_flags |= RING_F_SP_ENQ;
> +	if (mp->flags & MEMPOOL_F_SC_GET)
> +		rg_flags |= RING_F_SC_DEQ;

There is not point to have two above lines here, since
RING_F_SC_DEQ is always set.

> +	bd->adoption_buffer_rings[lcore_id] = rte_ring_create(rg_name,
> +		rte_align32pow2(mp->size + 1), mp->socket_id, rg_flags);
> +	if (bd->adoption_buffer_rings[lcore_id] == NULL)
> +		goto error;
>  
> -	return count;
> +	return 0;
> +error:
> +	rte_free(bd->buckets[lcore_id]);
> +	bd->buckets[lcore_id] = NULL;
> +	return -1;

Why does the API collapse all negative errnos into -1?
(I don't think it is critical, just want to know why).

> +}
> +
> +static void
> +bucket_uninit_per_lcore(unsigned int lcore_id, void *arg)

Same note about bucket_ prefix.


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-06-23 13:15         ` Ananyev, Konstantin
@ 2020-06-24  9:23           ` David Marchand
  2020-06-24  9:56             ` Bruce Richardson
  2020-06-24 10:39             ` Ananyev, Konstantin
  0 siblings, 2 replies; 126+ messages in thread
From: David Marchand @ 2020-06-24  9:23 UTC (permalink / raw)
  To: Ananyev, Konstantin
  Cc: dev, jerinjacobk, Richardson, Bruce, mdr, ktraynor, Stokes, Ian,
	i.maximets, Thomas Monjalon, Mcnamara, John, Kovacevic, Marko,
	Burakov, Anatoly, Olivier Matz, Andrew Rybchenko, Neil Horman

On Tue, Jun 23, 2020 at 3:16 PM Ananyev, Konstantin
<konstantin.ananyev@intel.com> wrote:
> > Even before this series, MP has no protection on lcore placing between
> > primary and secondary processes.
>
> Agree, it is not a new problem, it has been there for a while.
> Though making lcore assignment dynamic will make it more noticeable and harder to avoid.
> With static only lcore distribution it is much easier to control things.
>
> > Personally, I have no use for DPDK MP and marking MP as not supporting
> > this new feature is tempting for a first phase.
> > If this is a strong requirement, I can look at it in a second phase.
> > What do you think?
>
> In theory it is possible to mark this new API as not supported for MP.
> At least for now. Though I think it is sort of temporal solution.
> AFAIK, MP is used by customers, so sooner or later someone will hit that problem.

I understand this argument.
But then we don't see those customers giving feedback.


> Let say, we do have pdump app/library in our mainline.
> As I can see - it will be affected when users will start using this new dynamic lcore API
> inside their apps.

Supporting lcore allocation in MP requires exchanges between
primary/secondary processes like what we have for memory allocations.
It will be quite a beast to get to work fine, while not even knowing
if people actually want to use both.

For v4, I added a check to exclude MP and the new API.
I am still willing to help if people do care about using both features together.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-06-24  9:23           ` David Marchand
@ 2020-06-24  9:56             ` Bruce Richardson
  2020-06-24 10:08               ` Thomas Monjalon
  2020-06-24 10:39             ` Ananyev, Konstantin
  1 sibling, 1 reply; 126+ messages in thread
From: Bruce Richardson @ 2020-06-24  9:56 UTC (permalink / raw)
  To: David Marchand
  Cc: Ananyev, Konstantin, dev, jerinjacobk, mdr, ktraynor, Stokes,
	Ian, i.maximets, Thomas Monjalon, Mcnamara, John, Kovacevic,
	Marko, Burakov, Anatoly, Olivier Matz, Andrew Rybchenko,
	Neil Horman

On Wed, Jun 24, 2020 at 11:23:55AM +0200, David Marchand wrote:
> On Tue, Jun 23, 2020 at 3:16 PM Ananyev, Konstantin
> <konstantin.ananyev@intel.com> wrote:
> > > Even before this series, MP has no protection on lcore placing between
> > > primary and secondary processes.
> >
> > Agree, it is not a new problem, it has been there for a while.
> > Though making lcore assignment dynamic will make it more noticeable and harder to avoid.
> > With static only lcore distribution it is much easier to control things.
> >
> > > Personally, I have no use for DPDK MP and marking MP as not supporting
> > > this new feature is tempting for a first phase.
> > > If this is a strong requirement, I can look at it in a second phase.
> > > What do you think?
> >
> > In theory it is possible to mark this new API as not supported for MP.
> > At least for now. Though I think it is sort of temporal solution.
> > AFAIK, MP is used by customers, so sooner or later someone will hit that problem.
> 
> I understand this argument.
> But then we don't see those customers giving feedback.
> 
> 
> > Let say, we do have pdump app/library in our mainline.
> > As I can see - it will be affected when users will start using this new dynamic lcore API
> > inside their apps.
> 
> Supporting lcore allocation in MP requires exchanges between
> primary/secondary processes like what we have for memory allocations.
> It will be quite a beast to get to work fine, while not even knowing
> if people actually want to use both.
> 
> For v4, I added a check to exclude MP and the new API.
> I am still willing to help if people do care about using both features together.

I wonder how much we could simplify DPDK generally if we had to enable a
specific runtime flag to enable multi-process support and it was off by
default. This would break proc_info I think, but maybe we could provide
telemetry callbacks to provide the same data, but beyond that it would just
allow us to know whether a DPDK app is actually using MP, or just running
as a single process.

/Bruce

^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-06-24  9:56             ` Bruce Richardson
@ 2020-06-24 10:08               ` Thomas Monjalon
  2020-06-24 10:45                 ` Ananyev, Konstantin
  0 siblings, 1 reply; 126+ messages in thread
From: Thomas Monjalon @ 2020-06-24 10:08 UTC (permalink / raw)
  To: David Marchand, Bruce Richardson
  Cc: Ananyev, Konstantin, dev, jerinjacobk, mdr, ktraynor, Stokes,
	Ian, i.maximets, Mcnamara, John, Kovacevic, Marko, Burakov,
	Anatoly, Olivier Matz, Andrew Rybchenko, Neil Horman

24/06/2020 11:56, Bruce Richardson:
> On Wed, Jun 24, 2020 at 11:23:55AM +0200, David Marchand wrote:
> > On Tue, Jun 23, 2020 at 3:16 PM Ananyev, Konstantin
> > <konstantin.ananyev@intel.com> wrote:
> > > > Even before this series, MP has no protection on lcore placing between
> > > > primary and secondary processes.
> > >
> > > Agree, it is not a new problem, it has been there for a while.
> > > Though making lcore assignment dynamic will make it more noticeable and harder to avoid.
> > > With static only lcore distribution it is much easier to control things.
> > >
> > > > Personally, I have no use for DPDK MP and marking MP as not supporting
> > > > this new feature is tempting for a first phase.
> > > > If this is a strong requirement, I can look at it in a second phase.
> > > > What do you think?
> > >
> > > In theory it is possible to mark this new API as not supported for MP.
> > > At least for now. Though I think it is sort of temporal solution.
> > > AFAIK, MP is used by customers, so sooner or later someone will hit that problem.
> > 
> > I understand this argument.
> > But then we don't see those customers giving feedback.
> > 
> > 
> > > Let say, we do have pdump app/library in our mainline.
> > > As I can see - it will be affected when users will start using this new dynamic lcore API
> > > inside their apps.
> > 
> > Supporting lcore allocation in MP requires exchanges between
> > primary/secondary processes like what we have for memory allocations.
> > It will be quite a beast to get to work fine, while not even knowing
> > if people actually want to use both.
> > 
> > For v4, I added a check to exclude MP and the new API.
> > I am still willing to help if people do care about using both features together.
> 
> I wonder how much we could simplify DPDK generally if we had to enable a
> specific runtime flag to enable multi-process support and it was off by
> default. This would break proc_info I think, but maybe we could provide
> telemetry callbacks to provide the same data, but beyond that it would just
> allow us to know whether a DPDK app is actually using MP, or just running
> as a single process.

Same thought here.
I like the idea of a "mode flag" when multi-process is in use.
Should it be an user explicit flag or an automatic one?



^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-06-24  9:23           ` David Marchand
  2020-06-24  9:56             ` Bruce Richardson
@ 2020-06-24 10:39             ` Ananyev, Konstantin
  2020-06-24 10:48               ` David Marchand
  1 sibling, 1 reply; 126+ messages in thread
From: Ananyev, Konstantin @ 2020-06-24 10:39 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, jerinjacobk, Richardson, Bruce, mdr, ktraynor, Stokes, Ian,
	i.maximets, Thomas Monjalon, Mcnamara, John, Kovacevic, Marko,
	Burakov, Anatoly, Olivier Matz, Andrew Rybchenko, Neil Horman



> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: Wednesday, June 24, 2020 10:24 AM
> To: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> Cc: dev@dpdk.org; jerinjacobk@gmail.com; Richardson, Bruce <bruce.richardson@intel.com>; mdr@ashroe.eu; ktraynor@redhat.com;
> Stokes, Ian <ian.stokes@intel.com>; i.maximets@ovn.org; Thomas Monjalon <thomas@monjalon.net>; Mcnamara, John
> <john.mcnamara@intel.com>; Kovacevic, Marko <marko.kovacevic@intel.com>; Burakov, Anatoly <anatoly.burakov@intel.com>; Olivier
> Matz <olivier.matz@6wind.com>; Andrew Rybchenko <arybchenko@solarflare.com>; Neil Horman <nhorman@tuxdriver.com>
> Subject: Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
> 
> On Tue, Jun 23, 2020 at 3:16 PM Ananyev, Konstantin
> <konstantin.ananyev@intel.com> wrote:
> > > Even before this series, MP has no protection on lcore placing between
> > > primary and secondary processes.
> >
> > Agree, it is not a new problem, it has been there for a while.
> > Though making lcore assignment dynamic will make it more noticeable and harder to avoid.
> > With static only lcore distribution it is much easier to control things.
> >
> > > Personally, I have no use for DPDK MP and marking MP as not supporting
> > > this new feature is tempting for a first phase.
> > > If this is a strong requirement, I can look at it in a second phase.
> > > What do you think?
> >
> > In theory it is possible to mark this new API as not supported for MP.
> > At least for now. Though I think it is sort of temporal solution.
> > AFAIK, MP is used by customers, so sooner or later someone will hit that problem.
> 
> I understand this argument.
> But then we don't see those customers giving feedback.
> 
> 
> > Let say, we do have pdump app/library in our mainline.
> > As I can see - it will be affected when users will start using this new dynamic lcore API
> > inside their apps.
> 
> Supporting lcore allocation in MP requires exchanges between
> primary/secondary processes like what we have for memory allocations.
> It will be quite a beast to get to work fine, while not even knowing
> if people actually want to use both.

I don't think we need to re-implement RPC as we did for memory subsystem.
One relatively simple approach - move lcore_role[] and related lock into
shared memory (separate memzone or so).
I think it should help a lot and will solve majority of the problems.
One limitation - init/fini callbacks can be static only.
As the drawback, it will introduce change in current behaviour:
secondary process with lcore-mask that intersects with master lcore-mask
will fail to start.
Second approach - make lcore_id local process entity:
prohibit indexing by lcore_id in shared data structures.
Let say for mempool - make cache local (per process).
While that approach is probably more elegant and consistent,
it would require more work and will cause ABI (maybe API also) breakage.
 
> For v4, I added a check to exclude MP and the new API.

Do you mean - make this new dynamic-lcore API return an error if callied
from secondary process?

> I am still willing to help if people do care about using both features together.



^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-06-24 10:08               ` Thomas Monjalon
@ 2020-06-24 10:45                 ` Ananyev, Konstantin
  0 siblings, 0 replies; 126+ messages in thread
From: Ananyev, Konstantin @ 2020-06-24 10:45 UTC (permalink / raw)
  To: Thomas Monjalon, David Marchand, Richardson, Bruce
  Cc: dev, jerinjacobk, mdr, ktraynor, Stokes, Ian, i.maximets,
	Mcnamara, John, Kovacevic, Marko, Burakov, Anatoly, Olivier Matz,
	Andrew Rybchenko, Neil Horman

> 
> 24/06/2020 11:56, Bruce Richardson:
> > On Wed, Jun 24, 2020 at 11:23:55AM +0200, David Marchand wrote:
> > > On Tue, Jun 23, 2020 at 3:16 PM Ananyev, Konstantin
> > > <konstantin.ananyev@intel.com> wrote:
> > > > > Even before this series, MP has no protection on lcore placing between
> > > > > primary and secondary processes.
> > > >
> > > > Agree, it is not a new problem, it has been there for a while.
> > > > Though making lcore assignment dynamic will make it more noticeable and harder to avoid.
> > > > With static only lcore distribution it is much easier to control things.
> > > >
> > > > > Personally, I have no use for DPDK MP and marking MP as not supporting
> > > > > this new feature is tempting for a first phase.
> > > > > If this is a strong requirement, I can look at it in a second phase.
> > > > > What do you think?
> > > >
> > > > In theory it is possible to mark this new API as not supported for MP.
> > > > At least for now. Though I think it is sort of temporal solution.
> > > > AFAIK, MP is used by customers, so sooner or later someone will hit that problem.
> > >
> > > I understand this argument.
> > > But then we don't see those customers giving feedback.
> > >
> > >
> > > > Let say, we do have pdump app/library in our mainline.
> > > > As I can see - it will be affected when users will start using this new dynamic lcore API
> > > > inside their apps.
> > >
> > > Supporting lcore allocation in MP requires exchanges between
> > > primary/secondary processes like what we have for memory allocations.
> > > It will be quite a beast to get to work fine, while not even knowing
> > > if people actually want to use both.
> > >
> > > For v4, I added a check to exclude MP and the new API.
> > > I am still willing to help if people do care about using both features together.
> >
> > I wonder how much we could simplify DPDK generally if we had to enable a
> > specific runtime flag to enable multi-process support and it was off by
> > default. This would break proc_info I think, but maybe we could provide
> > telemetry callbacks to provide the same data, but beyond that it would just
> > allow us to know whether a DPDK app is actually using MP, or just running
> > as a single process.
> 
> Same thought here.
> I like the idea of a "mode flag" when multi-process is in use.
> Should it be an user explicit flag or an automatic one?

It is probably possible, but that would mean will need to start splitting
core parts of DPDK code into two paths: standalone/MP, right?   
Wonder how much effort it would require in terms of code  rework, testing,
maintenance, etc. At first glance seems quite substantial.



^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-06-24 10:39             ` Ananyev, Konstantin
@ 2020-06-24 10:48               ` David Marchand
  2020-06-24 11:59                 ` Ananyev, Konstantin
  0 siblings, 1 reply; 126+ messages in thread
From: David Marchand @ 2020-06-24 10:48 UTC (permalink / raw)
  To: Ananyev, Konstantin
  Cc: dev, jerinjacobk, Richardson, Bruce, mdr, ktraynor, Stokes, Ian,
	i.maximets, Thomas Monjalon, Mcnamara, John, Kovacevic, Marko,
	Burakov, Anatoly, Olivier Matz, Andrew Rybchenko, Neil Horman

On Wed, Jun 24, 2020 at 12:40 PM Ananyev, Konstantin
<konstantin.ananyev@intel.com> wrote:
> > Supporting lcore allocation in MP requires exchanges between
> > primary/secondary processes like what we have for memory allocations.
> > It will be quite a beast to get to work fine, while not even knowing
> > if people actually want to use both.
>
> I don't think we need to re-implement RPC as we did for memory subsystem.
> One relatively simple approach - move lcore_role[] and related lock into
> shared memory (separate memzone or so).
> I think it should help a lot and will solve majority of the problems.
> One limitation - init/fini callbacks can be static only.
> As the drawback, it will introduce change in current behaviour:
> secondary process with lcore-mask that intersects with master lcore-mask
> will fail to start.
> Second approach - make lcore_id local process entity:
> prohibit indexing by lcore_id in shared data structures.
> Let say for mempool - make cache local (per process).
> While that approach is probably more elegant and consistent,
> it would require more work and will cause ABI (maybe API also) breakage.

In all scenarii, this is quite some work.


>
> > For v4, I added a check to exclude MP and the new API.
>
> Do you mean - make this new dynamic-lcore API return an error if callied
> from secondary process?
>

Yes, and prohibiting from attaching a secondary process if dynamic
lcore API has been used in primary.
I intend to squash in patch 6:
https://github.com/david-marchand/dpdk/commit/e5861ee734bfe2e4dc23d9b919b0db2a32a58aee


-- 
David Marchand


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-06-24 10:48               ` David Marchand
@ 2020-06-24 11:59                 ` Ananyev, Konstantin
  2020-06-26 14:43                   ` David Marchand
  0 siblings, 1 reply; 126+ messages in thread
From: Ananyev, Konstantin @ 2020-06-24 11:59 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, jerinjacobk, Richardson, Bruce, mdr, ktraynor, Stokes, Ian,
	i.maximets, Thomas Monjalon, Mcnamara, John, Kovacevic, Marko,
	Burakov, Anatoly, Olivier Matz, Andrew Rybchenko, Neil Horman


> 
> On Wed, Jun 24, 2020 at 12:40 PM Ananyev, Konstantin
> <konstantin.ananyev@intel.com> wrote:
> > > Supporting lcore allocation in MP requires exchanges between
> > > primary/secondary processes like what we have for memory allocations.
> > > It will be quite a beast to get to work fine, while not even knowing
> > > if people actually want to use both.
> >
> > I don't think we need to re-implement RPC as we did for memory subsystem.
> > One relatively simple approach - move lcore_role[] and related lock into
> > shared memory (separate memzone or so).
> > I think it should help a lot and will solve majority of the problems.
> > One limitation - init/fini callbacks can be static only.
> > As the drawback, it will introduce change in current behaviour:
> > secondary process with lcore-mask that intersects with master lcore-mask
> > will fail to start.
> > Second approach - make lcore_id local process entity:
> > prohibit indexing by lcore_id in shared data structures.
> > Let say for mempool - make cache local (per process).
> > While that approach is probably more elegant and consistent,
> > it would require more work and will cause ABI (maybe API also) breakage.
> 
> In all scenarii, this is quite some work.
> 
> 
> >
> > > For v4, I added a check to exclude MP and the new API.
> >
> > Do you mean - make this new dynamic-lcore API return an error if callied
> > from secondary process?
> >
> 
> Yes, and prohibiting from attaching a secondary process if dynamic
> lcore API has been used in primary.
> I intend to squash in patch 6:
> https://github.com/david-marchand/dpdk/commit/e5861ee734bfe2e4dc23d9b919b0db2a32a58aee

But secondary process can attach before lcore_register, so we'll have some sort of inconsistency in behaviour.
If we really  want to go ahead with such workaround -
probably better to introduce explicit EAL flag ( --single-process or so).
As Thomas and  Bruce suggested, if I understood them properly.




^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 9/9] mempool/bucket: handle non-EAL lcores
  2020-06-23 17:28     ` Andrew Rybchenko
@ 2020-06-26 14:13       ` David Marchand
  2020-06-26 14:34         ` Andrew Rybchenko
  0 siblings, 1 reply; 126+ messages in thread
From: David Marchand @ 2020-06-26 14:13 UTC (permalink / raw)
  To: Andrew Rybchenko
  Cc: dev, Jerin Jacob, Bruce Richardson, Ray Kinsella, Kevin Traynor,
	Ian Stokes, Ilya Maximets, Artem V. Andreev

On Tue, Jun 23, 2020 at 7:28 PM Andrew Rybchenko
<arybchenko@solarflare.com> wrote:
>
> On 6/22/20 4:25 PM, David Marchand wrote:
> > Convert to new lcore API to support non-EAL lcores.
> >
> > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > ---
> >  drivers/mempool/bucket/rte_mempool_bucket.c | 131 ++++++++++++--------
> >  1 file changed, 82 insertions(+), 49 deletions(-)
> >
> > diff --git a/drivers/mempool/bucket/rte_mempool_bucket.c b/drivers/mempool/bucket/rte_mempool_bucket.c
> > index 5ce1ef16fb..0b4f42d330 100644
> > --- a/drivers/mempool/bucket/rte_mempool_bucket.c
> > +++ b/drivers/mempool/bucket/rte_mempool_bucket.c
> > @@ -55,6 +55,7 @@ struct bucket_data {
> >       struct rte_ring *shared_orphan_ring;
> >       struct rte_mempool *pool;
> >       unsigned int bucket_mem_size;
> > +     void *lcore_callback_handle;
> >  };
> >
> >  static struct bucket_stack *
> > @@ -345,6 +346,22 @@ bucket_dequeue_contig_blocks(struct rte_mempool *mp, void **first_obj_table,
> >       return 0;
> >  }
> >
> > +struct bucket_per_lcore_ctx {
>
> The structure is not used in per-lcore init and uninit
> functions. So, it is better to add _count to make it
> count specified. I.e. bucket_count_per_lcore_ctx.

Yes, and this aligns with its only user being renamed from
count_per_lcore to bucket_count_per_lcore.

>
> > +     const struct bucket_data *bd;
> > +     unsigned int count;
> > +};
> > +
> > +static int
> > +count_per_lcore(unsigned int lcore_id, void *arg)
> > +{
> > +     struct bucket_per_lcore_ctx *ctx = arg;
> > +
> > +     ctx->count += ctx->bd->obj_per_bucket *
> > +             ctx->bd->buckets[lcore_id]->top;
> > +     ctx->count += rte_ring_count(ctx->bd->adoption_buffer_rings[lcore_id]);
> > +     return 0;
> > +}
> > +
> >  static void
> >  count_underfilled_buckets(struct rte_mempool *mp,
> >                         void *opaque,
> > @@ -373,23 +390,66 @@ count_underfilled_buckets(struct rte_mempool *mp,
> >  static unsigned int
> >  bucket_get_count(const struct rte_mempool *mp)
> >  {
> > -     const struct bucket_data *bd = mp->pool_data;
> > -     unsigned int count =
> > -             bd->obj_per_bucket * rte_ring_count(bd->shared_bucket_ring) +
> > -             rte_ring_count(bd->shared_orphan_ring);
> > -     unsigned int i;
> > +     struct bucket_per_lcore_ctx ctx;
>
> Just a nit, but I think that ctx is too generic.
> (some time ago bucket_data bd was ctx in fact :) )
> May be bplc? Up to you.

Ack.


>
> >
> > -     for (i = 0; i < RTE_MAX_LCORE; i++) {
> > -             if (!rte_lcore_is_enabled(i))
> > -                     continue;
> > -             count += bd->obj_per_bucket * bd->buckets[i]->top +
> > -                     rte_ring_count(bd->adoption_buffer_rings[i]);
> > -     }
> > +     ctx.bd = mp->pool_data;
> > +     ctx.count = ctx.bd->obj_per_bucket *
> > +             rte_ring_count(ctx.bd->shared_bucket_ring);
> > +     ctx.count += rte_ring_count(ctx.bd->shared_orphan_ring);
> >
> > +     rte_lcore_iterate(count_per_lcore, &ctx);
> >       rte_mempool_mem_iter((struct rte_mempool *)(uintptr_t)mp,
> > -                          count_underfilled_buckets, &count);
> > +                          count_underfilled_buckets, &ctx.count);
> > +
> > +     return ctx.count;
> > +}
> > +
> > +static int
> > +bucket_init_per_lcore(unsigned int lcore_id, void *arg)
>
> It should be no bucket_ prefix here, or it should be bucket_
> prefix above in count_per_lcore.

As mentioned before, ack.


>
> > +{
> > +     char rg_name[RTE_RING_NAMESIZE];
> > +     struct bucket_data *bd = arg;
> > +     struct rte_mempool *mp;
> > +     int rg_flags;
> > +     int rc;
> > +
> > +     mp = bd->pool;
> > +     bd->buckets[lcore_id] = bucket_stack_create(mp,
> > +             mp->size / bd->obj_per_bucket);
> > +     if (bd->buckets[lcore_id] == NULL)
> > +             goto error;
> > +
> > +     rc = snprintf(rg_name, sizeof(rg_name), RTE_MEMPOOL_MZ_FORMAT ".a%u",
> > +             mp->name, lcore_id);
> > +     if (rc < 0 || rc >= (int)sizeof(rg_name))
> > +             goto error;
> > +
> > +     rg_flags = RING_F_SC_DEQ;
> > +     if (mp->flags & MEMPOOL_F_SP_PUT)
> > +             rg_flags |= RING_F_SP_ENQ;
> > +     if (mp->flags & MEMPOOL_F_SC_GET)
> > +             rg_flags |= RING_F_SC_DEQ;
>
> There is not point to have two above lines here, since
> RING_F_SC_DEQ is always set.

Ah yes, I did not realise when moving the code.


>
> > +     bd->adoption_buffer_rings[lcore_id] = rte_ring_create(rg_name,
> > +             rte_align32pow2(mp->size + 1), mp->socket_id, rg_flags);
> > +     if (bd->adoption_buffer_rings[lcore_id] == NULL)
> > +             goto error;
> >
> > -     return count;
> > +     return 0;
> > +error:
> > +     rte_free(bd->buckets[lcore_id]);
> > +     bd->buckets[lcore_id] = NULL;
> > +     return -1;
>
> Why does the API collapse all negative errnos into -1?
> (I don't think it is critical, just want to know why).

I collapsed everything as a single error as we have a partial idea of
what went wrong when calling this callback with all lcores at
registration.
We could get a specific error reported by this callback, but then we
would not know on which lcore (programmatically).
And in the end, all errors will summarize as a lack of resources, I do
not expect a need for input validation.

Maybe you have other use cases in mind?


Thanks for the review.

-- 
David Marchand


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 9/9] mempool/bucket: handle non-EAL lcores
  2020-06-26 14:13       ` David Marchand
@ 2020-06-26 14:34         ` Andrew Rybchenko
  0 siblings, 0 replies; 126+ messages in thread
From: Andrew Rybchenko @ 2020-06-26 14:34 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, Jerin Jacob, Bruce Richardson, Ray Kinsella, Kevin Traynor,
	Ian Stokes, Ilya Maximets, Artem V. Andreev

On 6/26/20 5:13 PM, David Marchand wrote:
> On Tue, Jun 23, 2020 at 7:28 PM Andrew Rybchenko
> <arybchenko@solarflare.com> wrote:
>>
>> On 6/22/20 4:25 PM, David Marchand wrote:
>>> Convert to new lcore API to support non-EAL lcores.
>>>
>>> Signed-off-by: David Marchand <david.marchand@redhat.com>
>>> ---

[snip]

>>> +     bd->adoption_buffer_rings[lcore_id] = rte_ring_create(rg_name,
>>> +             rte_align32pow2(mp->size + 1), mp->socket_id, rg_flags);
>>> +     if (bd->adoption_buffer_rings[lcore_id] == NULL)
>>> +             goto error;
>>>
>>> -     return count;
>>> +     return 0;
>>> +error:
>>> +     rte_free(bd->buckets[lcore_id]);
>>> +     bd->buckets[lcore_id] = NULL;
>>> +     return -1;
>>
>> Why does the API collapse all negative errnos into -1?
>> (I don't think it is critical, just want to know why).
> 
> I collapsed everything as a single error as we have a partial idea of
> what went wrong when calling this callback with all lcores at
> registration.
> We could get a specific error reported by this callback, but then we
> would not know on which lcore (programmatically).
> And in the end, all errors will summarize as a lack of resources, I do
> not expect a need for input validation.

Makes sense. Thanks for explanations.

> Maybe you have other use cases in mind?

No.

^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-06-24 11:59                 ` Ananyev, Konstantin
@ 2020-06-26 14:43                   ` David Marchand
  2020-06-30 10:35                     ` Thomas Monjalon
  0 siblings, 1 reply; 126+ messages in thread
From: David Marchand @ 2020-06-26 14:43 UTC (permalink / raw)
  To: Ananyev, Konstantin
  Cc: dev, jerinjacobk, Richardson, Bruce, mdr, ktraynor, Stokes, Ian,
	i.maximets, Thomas Monjalon, Mcnamara, John, Kovacevic, Marko,
	Burakov, Anatoly, Olivier Matz, Andrew Rybchenko, Neil Horman

On Wed, Jun 24, 2020 at 1:59 PM Ananyev, Konstantin
<konstantin.ananyev@intel.com> wrote:
> > > Do you mean - make this new dynamic-lcore API return an error if callied
> > > from secondary process?
> > >
> >
> > Yes, and prohibiting from attaching a secondary process if dynamic
> > lcore API has been used in primary.
> > I intend to squash in patch 6:
> > https://github.com/david-marchand/dpdk/commit/e5861ee734bfe2e4dc23d9b919b0db2a32a58aee
>
> But secondary process can attach before lcore_register, so we'll have some sort of inconsistency in behaviour.

If the developer tries to use both features, he gets an ERROR log in
the two init path.
So whatever the order at runtime, we inform the developer (who did not
read/understand the rte_thread_register() documentation) that what he
is doing is unsupported.


> If we really  want to go ahead with such workaround -
> probably better to introduce explicit EAL flag ( --single-process or so).
> As Thomas and  Bruce suggested, if I understood them properly.

A EAL flag is a stable API from the start, as there is nothing
describing how we can remove one.
So a new EAL flag for an experimental API/feature seems contradictory.

Going with a new features status API... I think it is beyond this series.

Thomas seems to suggest an automatic resolution when features conflict
happens.. ?

I'll send the v4, let's discuss it there if you want.
Thanks.

-- 
David Marchand


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v4 0/9] Register non-EAL threads as lcore
  2020-06-10 14:44 [dpdk-dev] [PATCH 0/7] Register external threads as lcore David Marchand
                   ` (9 preceding siblings ...)
  2020-06-22 13:25 ` [dpdk-dev] [PATCH v3 0/9] Register non-EAL threads as lcore David Marchand
@ 2020-06-26 14:47 ` David Marchand
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 1/9] eal: relocate per thread symbols to common David Marchand
                     ` (8 more replies)
  2020-07-06 14:15 ` [dpdk-dev] [PATCH v5 00/10] Register non-EAL threads as lcore David Marchand
  2020-07-06 20:52 ` [dpdk-dev] [PATCH v6 00/10] Register non-EAL threads as lcore David Marchand
  12 siblings, 9 replies; 126+ messages in thread
From: David Marchand @ 2020-06-26 14:47 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets

OVS and some other applications have been hacking into DPDK internals to
fake EAL threads and avoid performance penalty of only having non-EAL
threads.

This series proposes to add a new type of lcores and maps those threads
to such lcores.
non-EAL threads won't run the DPDK eal mainloop.
As a consequence, part of the EAL threads API cannot work.

Having new lcores appearing during the process lifetime is not expected
by some DPDK components. This is addressed by introducing init/uninit
callacks invoked when hotplugging of such lcore.

There is still some work/discussion:
- refuse new lcore role in incompatible EAL threads API (or document it
  only as those API were already incompatible?),
- think about deprecation notices for existing RTE_FOREACH_LCORE macros
  and consorts, it is probably worth discussing on how to iterate over
  lcores,

For the interested parties, I have a patch [1] against dpdk-latest OVS
branch that makes use of this series.

1: https://patchwork.ozlabs.org/project/openvswitch/patch/20200626123017.28555-1-david.marchand@redhat.com/

Changes since v3:
- added init failure when trying to use in conjunction with multiprocess,
- addressed Andrew comments,

Changes since v2:
- fixed windows build error due to missing trace stub,
- fixed bug when rolling back on lcore register,

Changes since v1:
- rebased on master (conflicts on merged Windows series),
- separated lcore role code cleanup in a patch,
- tried to use a single naming, so kept non-EAL threads as the main
  notion. non-EAL threads are then distinguished between registered and
  unregistered non-EAL threads,
- added unit tests (still missing some coverage, marked with a FIXME),
- reworked callbacks call under a common rwlock lock which protects
  lcores allocations and callbacks registration,
- introduced lcore iterators and converted the bucket mempool driver,

-- 
David Marchand

David Marchand (9):
  eal: relocate per thread symbols to common
  eal: fix multiple definition of per lcore thread id
  eal: introduce thread init helper
  eal: introduce thread uninit helper
  eal: move lcore role code
  eal: register non-EAL threads as lcores
  eal: add lcore init callbacks
  eal: add lcore iterators
  mempool/bucket: handle non-EAL lcores

 MAINTAINERS                                   |   1 +
 app/test/Makefile                             |   1 +
 app/test/autotest_data.py                     |   6 +
 app/test/meson.build                          |   2 +
 app/test/test_lcores.c                        | 372 ++++++++++++++++++
 doc/guides/howto/debug_troubleshoot.rst       |   5 +-
 .../prog_guide/env_abstraction_layer.rst      |  22 +-
 doc/guides/prog_guide/mempool_lib.rst         |   2 +-
 drivers/mempool/bucket/rte_mempool_bucket.c   | 130 +++---
 lib/librte_eal/common/eal_common_lcore.c      | 246 +++++++++++-
 lib/librte_eal/common/eal_common_mcfg.c       |  36 ++
 lib/librte_eal/common/eal_common_thread.c     | 126 ++++--
 lib/librte_eal/common/eal_common_trace.c      |  51 ++-
 lib/librte_eal/common/eal_memcfg.h            |  10 +
 lib/librte_eal/common/eal_private.h           |  19 +
 lib/librte_eal/common/eal_thread.h            |  26 +-
 lib/librte_eal/common/eal_trace.h             |   2 +-
 lib/librte_eal/freebsd/eal.c                  |  20 +-
 lib/librte_eal/freebsd/eal_thread.c           |  38 +-
 lib/librte_eal/include/rte_eal.h              |  12 +-
 lib/librte_eal/include/rte_lcore.h            | 174 +++++++-
 lib/librte_eal/include/rte_trace_point.h      |   9 +
 lib/librte_eal/linux/eal.c                    |  21 +-
 lib/librte_eal/linux/eal_thread.c             |  38 +-
 lib/librte_eal/rte_eal_version.map            |  16 +
 lib/librte_eal/windows/eal.c                  |   8 +-
 lib/librte_eal/windows/eal_thread.c           |  14 +-
 lib/librte_mempool/rte_mempool.h              |  11 +-
 28 files changed, 1179 insertions(+), 239 deletions(-)
 create mode 100644 app/test/test_lcores.c

-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v4 1/9] eal: relocate per thread symbols to common
  2020-06-26 14:47 ` [dpdk-dev] [PATCH v4 0/9] Register non-EAL threads as lcore David Marchand
@ 2020-06-26 14:47   ` David Marchand
  2020-06-30  9:33     ` Olivier Matz
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 2/9] eal: fix multiple definition of per lcore thread id David Marchand
                     ` (7 subsequent siblings)
  8 siblings, 1 reply; 126+ messages in thread
From: David Marchand @ 2020-06-26 14:47 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets, Harini Ramakrishnan, Omar Cardona,
	Pallavi Kadam, Ranjit Menon

We have per lcore thread symbols scattered in OS implementations but
common code relies on them.
Move all of them in common.

RTE_PER_LCORE(_socket_id) and RTE_PER_LCORE(_cpuset) have public
accessors and are not exported through the library map, they can be
made static.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 lib/librte_eal/common/eal_common_thread.c | 5 ++++-
 lib/librte_eal/freebsd/eal_thread.c       | 4 ----
 lib/librte_eal/include/rte_lcore.h        | 1 -
 lib/librte_eal/linux/eal_thread.c         | 4 ----
 lib/librte_eal/windows/eal_thread.c       | 4 ----
 5 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 370bb1b634..a5f67d811c 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -21,7 +21,10 @@
 #include "eal_private.h"
 #include "eal_thread.h"
 
-RTE_DECLARE_PER_LCORE(unsigned , _socket_id);
+RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
+static RTE_DEFINE_PER_LCORE(unsigned int, _socket_id) =
+	(unsigned int)SOCKET_ID_ANY;
+static RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
 
 unsigned rte_socket_id(void)
 {
diff --git a/lib/librte_eal/freebsd/eal_thread.c b/lib/librte_eal/freebsd/eal_thread.c
index b52019782a..40676d9ef5 100644
--- a/lib/librte_eal/freebsd/eal_thread.c
+++ b/lib/librte_eal/freebsd/eal_thread.c
@@ -25,10 +25,6 @@
 #include "eal_private.h"
 #include "eal_thread.h"
 
-RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
-RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY;
-RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
-
 /*
  * Send a message to a slave lcore identified by slave_id to call a
  * function f with argument arg. Once the execution is done, the
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index 339046bc86..5c1d1926e9 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -23,7 +23,6 @@ extern "C" {
 #define LCORE_ID_ANY     UINT32_MAX       /**< Any lcore. */
 
 RTE_DECLARE_PER_LCORE(unsigned, _lcore_id);  /**< Per thread "lcore id". */
-RTE_DECLARE_PER_LCORE(rte_cpuset_t, _cpuset); /**< Per thread "cpuset". */
 
 /**
  * Get a lcore's role.
diff --git a/lib/librte_eal/linux/eal_thread.c b/lib/librte_eal/linux/eal_thread.c
index cd9d6e0ebf..a52ebef3a4 100644
--- a/lib/librte_eal/linux/eal_thread.c
+++ b/lib/librte_eal/linux/eal_thread.c
@@ -25,10 +25,6 @@
 #include "eal_private.h"
 #include "eal_thread.h"
 
-RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
-RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY;
-RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
-
 /*
  * Send a message to a slave lcore identified by slave_id to call a
  * function f with argument arg. Once the execution is done, the
diff --git a/lib/librte_eal/windows/eal_thread.c b/lib/librte_eal/windows/eal_thread.c
index 3dd56519c9..f12a2ec6ad 100644
--- a/lib/librte_eal/windows/eal_thread.c
+++ b/lib/librte_eal/windows/eal_thread.c
@@ -16,10 +16,6 @@
 #include "eal_private.h"
 #include "eal_windows.h"
 
-RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
-RTE_DEFINE_PER_LCORE(unsigned int, _socket_id) = (unsigned int)SOCKET_ID_ANY;
-RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
-
 /*
  * Send a message to a slave lcore identified by slave_id to call a
  * function f with argument arg. Once the execution is done, the
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v4 2/9] eal: fix multiple definition of per lcore thread id
  2020-06-26 14:47 ` [dpdk-dev] [PATCH v4 0/9] Register non-EAL threads as lcore David Marchand
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 1/9] eal: relocate per thread symbols to common David Marchand
@ 2020-06-26 14:47   ` David Marchand
  2020-06-30  9:34     ` Olivier Matz
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 3/9] eal: introduce thread init helper David Marchand
                     ` (6 subsequent siblings)
  8 siblings, 1 reply; 126+ messages in thread
From: David Marchand @ 2020-06-26 14:47 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets, Neil Horman, Cunming Liang,
	Konstantin Ananyev, Olivier Matz

Because of the inline accessor + static declaration in rte_gettid(),
we end up with multiple symbols for RTE_PER_LCORE(_thread_id).
Each compilation unit will pay a cost when accessing this information
for the first time.

$ nm build/app/dpdk-testpmd | grep per_lcore__thread_id
0000000000000054 d per_lcore__thread_id.5037
0000000000000040 d per_lcore__thread_id.5103
0000000000000048 d per_lcore__thread_id.5259
000000000000004c d per_lcore__thread_id.5259
0000000000000044 d per_lcore__thread_id.5933
0000000000000058 d per_lcore__thread_id.6261
0000000000000050 d per_lcore__thread_id.7378
000000000000005c d per_lcore__thread_id.7496
000000000000000c d per_lcore__thread_id.8016
0000000000000010 d per_lcore__thread_id.8431

Make it global as part of the DPDK_21 stable ABI.

Fixes: ef76436c6834 ("eal: get unique thread id")

Signed-off-by: David Marchand <david.marchand@redhat.com>
Acked-by: Ray Kinsella <mdr@ashroe.eu>
---
 lib/librte_eal/common/eal_common_thread.c | 1 +
 lib/librte_eal/include/rte_eal.h          | 3 ++-
 lib/librte_eal/rte_eal_version.map        | 7 +++++++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index a5f67d811c..280c64bb76 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -22,6 +22,7 @@
 #include "eal_thread.h"
 
 RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
+RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
 static RTE_DEFINE_PER_LCORE(unsigned int, _socket_id) =
 	(unsigned int)SOCKET_ID_ANY;
 static RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
index 2f9ed298de..2edf8c6556 100644
--- a/lib/librte_eal/include/rte_eal.h
+++ b/lib/librte_eal/include/rte_eal.h
@@ -447,6 +447,8 @@ enum rte_intr_mode rte_eal_vfio_intr_mode(void);
  */
 int rte_sys_gettid(void);
 
+RTE_DECLARE_PER_LCORE(int, _thread_id);
+
 /**
  * Get system unique thread id.
  *
@@ -456,7 +458,6 @@ int rte_sys_gettid(void);
  */
 static inline int rte_gettid(void)
 {
-	static RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
 	if (RTE_PER_LCORE(_thread_id) == -1)
 		RTE_PER_LCORE(_thread_id) = rte_sys_gettid();
 	return RTE_PER_LCORE(_thread_id);
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 196eef5afa..0d42d44ce9 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -221,6 +221,13 @@ DPDK_20.0 {
 	local: *;
 };
 
+DPDK_21 {
+	global:
+
+	per_lcore__thread_id;
+
+} DPDK_20.0;
+
 EXPERIMENTAL {
 	global:
 
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v4 3/9] eal: introduce thread init helper
  2020-06-26 14:47 ` [dpdk-dev] [PATCH v4 0/9] Register non-EAL threads as lcore David Marchand
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 1/9] eal: relocate per thread symbols to common David Marchand
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 2/9] eal: fix multiple definition of per lcore thread id David Marchand
@ 2020-06-26 14:47   ` David Marchand
  2020-06-30  9:37     ` Olivier Matz
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 4/9] eal: introduce thread uninit helper David Marchand
                     ` (5 subsequent siblings)
  8 siblings, 1 reply; 126+ messages in thread
From: David Marchand @ 2020-06-26 14:47 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets, Harini Ramakrishnan, Omar Cardona,
	Pallavi Kadam, Ranjit Menon

Introduce a helper responsible for initialising the per thread context.
We can then have a unified context for EAL and non-EAL threads and
remove copy/paste'd OS-specific helpers.

Per EAL thread CPU affinity setting is separated from the thread init.
It is to accommodate with Windows EAL where CPU affinity is not set at
the moment.
Besides, having affinity set by the master lcore in FreeBSD and Linux
will make it possible to detect errors rather than panic in the child
thread. But the cleanup when such an event happens is left for later.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Changes since v1:
- rebased on master, removed Windows workarounds wrt gettid and traces
  support,

---
 lib/librte_eal/common/eal_common_thread.c | 51 +++++++++++++----------
 lib/librte_eal/common/eal_thread.h        |  8 ++--
 lib/librte_eal/freebsd/eal.c              | 14 ++++++-
 lib/librte_eal/freebsd/eal_thread.c       | 32 +-------------
 lib/librte_eal/linux/eal.c                | 15 ++++++-
 lib/librte_eal/linux/eal_thread.c         | 32 +-------------
 lib/librte_eal/windows/eal.c              |  3 +-
 lib/librte_eal/windows/eal_thread.c       | 10 +----
 8 files changed, 66 insertions(+), 99 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 280c64bb76..afb30236c5 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -71,20 +71,10 @@ eal_cpuset_socket_id(rte_cpuset_t *cpusetp)
 	return socket_id;
 }
 
-int
-rte_thread_set_affinity(rte_cpuset_t *cpusetp)
+static void
+thread_update_affinity(rte_cpuset_t *cpusetp)
 {
-	int s;
-	unsigned lcore_id;
-	pthread_t tid;
-
-	tid = pthread_self();
-
-	s = pthread_setaffinity_np(tid, sizeof(rte_cpuset_t), cpusetp);
-	if (s != 0) {
-		RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
-		return -1;
-	}
+	unsigned int lcore_id = rte_lcore_id();
 
 	/* store socket_id in TLS for quick access */
 	RTE_PER_LCORE(_socket_id) =
@@ -94,14 +84,24 @@ rte_thread_set_affinity(rte_cpuset_t *cpusetp)
 	memmove(&RTE_PER_LCORE(_cpuset), cpusetp,
 		sizeof(rte_cpuset_t));
 
-	lcore_id = rte_lcore_id();
 	if (lcore_id != (unsigned)LCORE_ID_ANY) {
 		/* EAL thread will update lcore_config */
 		lcore_config[lcore_id].socket_id = RTE_PER_LCORE(_socket_id);
 		memmove(&lcore_config[lcore_id].cpuset, cpusetp,
 			sizeof(rte_cpuset_t));
 	}
+}
 
+int
+rte_thread_set_affinity(rte_cpuset_t *cpusetp)
+{
+	if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+			cpusetp) != 0) {
+		RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
+		return -1;
+	}
+
+	thread_update_affinity(cpusetp);
 	return 0;
 }
 
@@ -147,6 +147,19 @@ eal_thread_dump_affinity(char *str, unsigned size)
 	return ret;
 }
 
+void
+rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset)
+{
+	/* set the lcore ID in per-lcore memory area */
+	RTE_PER_LCORE(_lcore_id) = lcore_id;
+
+	/* acquire system unique id  */
+	rte_gettid();
+
+	thread_update_affinity(cpuset);
+
+	__rte_trace_mem_per_thread_alloc();
+}
 
 struct rte_thread_ctrl_params {
 	void *(*start_routine)(void *);
@@ -154,16 +167,14 @@ struct rte_thread_ctrl_params {
 	pthread_barrier_t configured;
 };
 
-static void *rte_thread_init(void *arg)
+static void *ctrl_thread_init(void *arg)
 {
 	int ret;
-	rte_cpuset_t *cpuset = &internal_config.ctrl_cpuset;
 	struct rte_thread_ctrl_params *params = arg;
 	void *(*start_routine)(void *) = params->start_routine;
 	void *routine_arg = params->arg;
 
-	/* Store cpuset in TLS for quick access */
-	memmove(&RTE_PER_LCORE(_cpuset), cpuset, sizeof(rte_cpuset_t));
+	rte_thread_init(rte_lcore_id(), &internal_config.ctrl_cpuset);
 
 	ret = pthread_barrier_wait(&params->configured);
 	if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
@@ -171,8 +182,6 @@ static void *rte_thread_init(void *arg)
 		free(params);
 	}
 
-	__rte_trace_mem_per_thread_alloc();
-
 	return start_routine(routine_arg);
 }
 
@@ -194,7 +203,7 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
 
 	pthread_barrier_init(&params->configured, NULL, 2);
 
-	ret = pthread_create(thread, attr, rte_thread_init, (void *)params);
+	ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
 	if (ret != 0) {
 		free(params);
 		return -ret;
diff --git a/lib/librte_eal/common/eal_thread.h b/lib/librte_eal/common/eal_thread.h
index b40ed249ed..da5e7c93ba 100644
--- a/lib/librte_eal/common/eal_thread.h
+++ b/lib/librte_eal/common/eal_thread.h
@@ -16,12 +16,14 @@
 __rte_noreturn void *eal_thread_loop(void *arg);
 
 /**
- * Init per-lcore info for master thread
+ * Init per-lcore info in current thread.
  *
  * @param lcore_id
- *   identifier of master lcore
+ *   identifier of lcore.
+ * @param cpuset
+ *   CPU affinity for this thread.
  */
-void eal_thread_init_master(unsigned lcore_id);
+void rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset);
 
 /**
  * Get the NUMA socket id from cpu id.
diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
index 00fc66bf7f..13e5de006f 100644
--- a/lib/librte_eal/freebsd/eal.c
+++ b/lib/librte_eal/freebsd/eal.c
@@ -877,7 +877,14 @@ rte_eal_init(int argc, char **argv)
 
 	eal_check_mem_on_local_socket();
 
-	eal_thread_init_master(rte_config.master_lcore);
+	if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+			&lcore_config[rte_config.master_lcore].cpuset) != 0) {
+		rte_eal_init_alert("Cannot set affinity");
+		rte_errno = EINVAL;
+		return -1;
+	}
+	rte_thread_init(rte_config.master_lcore,
+		&lcore_config[rte_config.master_lcore].cpuset);
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
 
@@ -908,6 +915,11 @@ rte_eal_init(int argc, char **argv)
 		snprintf(thread_name, sizeof(thread_name),
 				"lcore-slave-%d", i);
 		rte_thread_setname(lcore_config[i].thread_id, thread_name);
+
+		ret = pthread_setaffinity_np(lcore_config[i].thread_id,
+			sizeof(rte_cpuset_t), &lcore_config[i].cpuset);
+		if (ret != 0)
+			rte_panic("Cannot set affinity\n");
 	}
 
 	/*
diff --git a/lib/librte_eal/freebsd/eal_thread.c b/lib/librte_eal/freebsd/eal_thread.c
index 40676d9ef5..c1fb8eb2d8 100644
--- a/lib/librte_eal/freebsd/eal_thread.c
+++ b/lib/librte_eal/freebsd/eal_thread.c
@@ -66,29 +66,6 @@ rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
 	return rc;
 }
 
-/* set affinity for current thread */
-static int
-eal_thread_set_affinity(void)
-{
-	unsigned lcore_id = rte_lcore_id();
-
-	/* acquire system unique id  */
-	rte_gettid();
-
-	/* update EAL thread core affinity */
-	return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset);
-}
-
-void eal_thread_init_master(unsigned lcore_id)
-{
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
-}
-
 /* main loop of threads */
 __rte_noreturn void *
 eal_thread_loop(__rte_unused void *arg)
@@ -113,19 +90,12 @@ eal_thread_loop(__rte_unused void *arg)
 	m2s = lcore_config[lcore_id].pipe_master2slave[0];
 	s2m = lcore_config[lcore_id].pipe_slave2master[1];
 
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
+	rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
-
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
 		lcore_id, thread_id, cpuset, ret == 0 ? "" : "...");
 
-	__rte_trace_mem_per_thread_alloc();
 	rte_eal_trace_thread_lcore_ready(lcore_id, cpuset);
 
 	/* read on our pipe to get commands */
diff --git a/lib/librte_eal/linux/eal.c b/lib/librte_eal/linux/eal.c
index 28a8b78517..8894cea50a 100644
--- a/lib/librte_eal/linux/eal.c
+++ b/lib/librte_eal/linux/eal.c
@@ -1205,10 +1205,16 @@ rte_eal_init(int argc, char **argv)
 
 	eal_check_mem_on_local_socket();
 
-	eal_thread_init_master(rte_config.master_lcore);
+	if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+			&lcore_config[rte_config.master_lcore].cpuset) != 0) {
+		rte_eal_init_alert("Cannot set affinity");
+		rte_errno = EINVAL;
+		return -1;
+	}
+	rte_thread_init(rte_config.master_lcore,
+		&lcore_config[rte_config.master_lcore].cpuset);
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
-
 	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
 		rte_config.master_lcore, (uintptr_t)thread_id, cpuset,
 		ret == 0 ? "" : "...");
@@ -1240,6 +1246,11 @@ rte_eal_init(int argc, char **argv)
 		if (ret != 0)
 			RTE_LOG(DEBUG, EAL,
 				"Cannot set name for lcore thread\n");
+
+		ret = pthread_setaffinity_np(lcore_config[i].thread_id,
+			sizeof(rte_cpuset_t), &lcore_config[i].cpuset);
+		if (ret != 0)
+			rte_panic("Cannot set affinity\n");
 	}
 
 	/*
diff --git a/lib/librte_eal/linux/eal_thread.c b/lib/librte_eal/linux/eal_thread.c
index a52ebef3a4..07aec0c44d 100644
--- a/lib/librte_eal/linux/eal_thread.c
+++ b/lib/librte_eal/linux/eal_thread.c
@@ -66,29 +66,6 @@ rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
 	return rc;
 }
 
-/* set affinity for current EAL thread */
-static int
-eal_thread_set_affinity(void)
-{
-	unsigned lcore_id = rte_lcore_id();
-
-	/* acquire system unique id  */
-	rte_gettid();
-
-	/* update EAL thread core affinity */
-	return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset);
-}
-
-void eal_thread_init_master(unsigned lcore_id)
-{
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
-}
-
 /* main loop of threads */
 __rte_noreturn void *
 eal_thread_loop(__rte_unused void *arg)
@@ -113,19 +90,12 @@ eal_thread_loop(__rte_unused void *arg)
 	m2s = lcore_config[lcore_id].pipe_master2slave[0];
 	s2m = lcore_config[lcore_id].pipe_slave2master[1];
 
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
+	rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
-
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
 		lcore_id, (uintptr_t)thread_id, cpuset, ret == 0 ? "" : "...");
 
-	__rte_trace_mem_per_thread_alloc();
 	rte_eal_trace_thread_lcore_ready(lcore_id, cpuset);
 
 	/* read on our pipe to get commands */
diff --git a/lib/librte_eal/windows/eal.c b/lib/librte_eal/windows/eal.c
index 97c8427c73..adfaa00275 100644
--- a/lib/librte_eal/windows/eal.c
+++ b/lib/librte_eal/windows/eal.c
@@ -367,7 +367,8 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
-	eal_thread_init_master(rte_config.master_lcore);
+	rte_thread_init(rte_config.master_lcore,
+		&lcore_config[rte_config.master_lcore].cpuset);
 
 	RTE_LCORE_FOREACH_SLAVE(i) {
 
diff --git a/lib/librte_eal/windows/eal_thread.c b/lib/librte_eal/windows/eal_thread.c
index f12a2ec6ad..4f01881240 100644
--- a/lib/librte_eal/windows/eal_thread.c
+++ b/lib/librte_eal/windows/eal_thread.c
@@ -53,13 +53,6 @@ rte_eal_remote_launch(lcore_function_t *f, void *arg, unsigned int slave_id)
 	return 0;
 }
 
-void
-eal_thread_init_master(unsigned int lcore_id)
-{
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-}
-
 /* main loop of threads */
 void *
 eal_thread_loop(void *arg __rte_unused)
@@ -84,8 +77,7 @@ eal_thread_loop(void *arg __rte_unused)
 	m2s = lcore_config[lcore_id].pipe_master2slave[0];
 	s2m = lcore_config[lcore_id].pipe_slave2master[1];
 
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
+	rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%zx;cpuset=[%s])\n",
 		lcore_id, (uintptr_t)thread_id, cpuset);
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v4 4/9] eal: introduce thread uninit helper
  2020-06-26 14:47 ` [dpdk-dev] [PATCH v4 0/9] Register non-EAL threads as lcore David Marchand
                     ` (2 preceding siblings ...)
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 3/9] eal: introduce thread init helper David Marchand
@ 2020-06-26 14:47   ` David Marchand
  2020-06-26 15:00     ` Jerin Jacob
                       ` (2 more replies)
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 5/9] eal: move lcore role code David Marchand
                     ` (4 subsequent siblings)
  8 siblings, 3 replies; 126+ messages in thread
From: David Marchand @ 2020-06-26 14:47 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets, Jerin Jacob, Sunil Kumar Kori,
	Neil Horman, Harini Ramakrishnan, Omar Cardona, Pallavi Kadam,
	Ranjit Menon

This is a preparation step for dynamically unregistering threads.

Since we explicitly allocate a per thread trace buffer in
rte_thread_init, add an internal helper to free this buffer.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Note: I preferred renaming the current internal function to free all
threads trace buffers (new name trace_mem_free()) and reuse the previous
name (trace_mem_per_thread_free()) when freeing this buffer for a given
thread.

Changes since v2:
- added missing stub for windows tracing support,
- moved free symbol to exported (experimental) ABI as a counterpart of
  the alloc symbol we already had,

Changes since v1:
- rebased on master, removed Windows workaround wrt traces support,

---
 lib/librte_eal/common/eal_common_thread.c |  9 ++++
 lib/librte_eal/common/eal_common_trace.c  | 51 +++++++++++++++++++----
 lib/librte_eal/common/eal_thread.h        |  5 +++
 lib/librte_eal/common/eal_trace.h         |  2 +-
 lib/librte_eal/include/rte_trace_point.h  |  9 ++++
 lib/librte_eal/rte_eal_version.map        |  3 ++
 lib/librte_eal/windows/eal.c              |  5 +++
 7 files changed, 75 insertions(+), 9 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index afb30236c5..3b30cc99d9 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -20,6 +20,7 @@
 #include "eal_internal_cfg.h"
 #include "eal_private.h"
 #include "eal_thread.h"
+#include "eal_trace.h"
 
 RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
 RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
@@ -161,6 +162,14 @@ rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset)
 	__rte_trace_mem_per_thread_alloc();
 }
 
+void
+rte_thread_uninit(void)
+{
+	__rte_trace_mem_per_thread_free();
+
+	RTE_PER_LCORE(_lcore_id) = LCORE_ID_ANY;
+}
+
 struct rte_thread_ctrl_params {
 	void *(*start_routine)(void *);
 	void *arg;
diff --git a/lib/librte_eal/common/eal_common_trace.c b/lib/librte_eal/common/eal_common_trace.c
index 875553d7e5..3e620d76ed 100644
--- a/lib/librte_eal/common/eal_common_trace.c
+++ b/lib/librte_eal/common/eal_common_trace.c
@@ -101,7 +101,7 @@ eal_trace_fini(void)
 {
 	if (!rte_trace_is_enabled())
 		return;
-	trace_mem_per_thread_free();
+	trace_mem_free();
 	trace_metadata_destroy();
 	eal_trace_args_free();
 }
@@ -370,24 +370,59 @@ __rte_trace_mem_per_thread_alloc(void)
 	rte_spinlock_unlock(&trace->lock);
 }
 
+static void
+trace_mem_per_thread_free_unlocked(struct thread_mem_meta *meta)
+{
+	if (meta->area == TRACE_AREA_HUGEPAGE)
+		eal_free_no_trace(meta->mem);
+	else if (meta->area == TRACE_AREA_HEAP)
+		free(meta->mem);
+}
+
+void
+__rte_trace_mem_per_thread_free(void)
+{
+	struct trace *trace = trace_obj_get();
+	struct __rte_trace_header *header;
+	uint32_t count;
+
+	if (RTE_PER_LCORE(trace_mem) == NULL)
+		return;
+
+	header = RTE_PER_LCORE(trace_mem);
+	rte_spinlock_lock(&trace->lock);
+	for (count = 0; count < trace->nb_trace_mem_list; count++) {
+		if (trace->lcore_meta[count].mem == header)
+			break;
+	}
+	if (count != trace->nb_trace_mem_list) {
+		struct thread_mem_meta *meta = &trace->lcore_meta[count];
+
+		trace_mem_per_thread_free_unlocked(meta);
+		if (count != trace->nb_trace_mem_list - 1) {
+			memmove(meta, meta + 1,
+				sizeof(*meta) *
+				 (trace->nb_trace_mem_list - count - 1));
+		}
+		trace->nb_trace_mem_list--;
+	}
+	rte_spinlock_unlock(&trace->lock);
+}
+
 void
-trace_mem_per_thread_free(void)
+trace_mem_free(void)
 {
 	struct trace *trace = trace_obj_get();
 	uint32_t count;
-	void *mem;
 
 	if (!rte_trace_is_enabled())
 		return;
 
 	rte_spinlock_lock(&trace->lock);
 	for (count = 0; count < trace->nb_trace_mem_list; count++) {
-		mem = trace->lcore_meta[count].mem;
-		if (trace->lcore_meta[count].area == TRACE_AREA_HUGEPAGE)
-			eal_free_no_trace(mem);
-		else if (trace->lcore_meta[count].area == TRACE_AREA_HEAP)
-			free(mem);
+		trace_mem_per_thread_free_unlocked(&trace->lcore_meta[count]);
 	}
+	trace->nb_trace_mem_list = 0;
 	rte_spinlock_unlock(&trace->lock);
 }
 
diff --git a/lib/librte_eal/common/eal_thread.h b/lib/librte_eal/common/eal_thread.h
index da5e7c93ba..4ecd8fd53a 100644
--- a/lib/librte_eal/common/eal_thread.h
+++ b/lib/librte_eal/common/eal_thread.h
@@ -25,6 +25,11 @@ __rte_noreturn void *eal_thread_loop(void *arg);
  */
 void rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset);
 
+/**
+ * Uninitialize per-lcore info for current thread.
+ */
+void rte_thread_uninit(void);
+
 /**
  * Get the NUMA socket id from cpu id.
  * This function is private to EAL.
diff --git a/lib/librte_eal/common/eal_trace.h b/lib/librte_eal/common/eal_trace.h
index 8f60616156..bbb6e1645c 100644
--- a/lib/librte_eal/common/eal_trace.h
+++ b/lib/librte_eal/common/eal_trace.h
@@ -106,7 +106,7 @@ int trace_metadata_create(void);
 void trace_metadata_destroy(void);
 int trace_mkdir(void);
 int trace_epoch_time_save(void);
-void trace_mem_per_thread_free(void);
+void trace_mem_free(void);
 
 /* EAL interface */
 int eal_trace_init(void);
diff --git a/lib/librte_eal/include/rte_trace_point.h b/lib/librte_eal/include/rte_trace_point.h
index 377c2414aa..686b86fdb1 100644
--- a/lib/librte_eal/include/rte_trace_point.h
+++ b/lib/librte_eal/include/rte_trace_point.h
@@ -230,6 +230,15 @@ __rte_trace_point_fp_is_enabled(void)
 __rte_experimental
 void __rte_trace_mem_per_thread_alloc(void);
 
+/**
+ * @internal
+ *
+ * Free trace memory buffer per thread.
+ *
+ */
+__rte_experimental
+void __rte_trace_mem_per_thread_free(void);
+
 /**
  * @internal
  *
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 0d42d44ce9..5831eea4b0 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -393,6 +393,9 @@ EXPERIMENTAL {
 	rte_trace_point_lookup;
 	rte_trace_regexp;
 	rte_trace_save;
+
+	# added in 20.08
+	__rte_trace_mem_per_thread_free;
 };
 
 INTERNAL {
diff --git a/lib/librte_eal/windows/eal.c b/lib/librte_eal/windows/eal.c
index adfaa00275..27a44c49ff 100644
--- a/lib/librte_eal/windows/eal.c
+++ b/lib/librte_eal/windows/eal.c
@@ -255,6 +255,11 @@ __rte_trace_mem_per_thread_alloc(void)
 {
 }
 
+void
+__rte_trace_mem_per_thread_free(void)
+{
+}
+
 void
 __rte_trace_point_emit_field(size_t sz, const char *field,
 	const char *type)
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v4 5/9] eal: move lcore role code
  2020-06-26 14:47 ` [dpdk-dev] [PATCH v4 0/9] Register non-EAL threads as lcore David Marchand
                     ` (3 preceding siblings ...)
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 4/9] eal: introduce thread uninit helper David Marchand
@ 2020-06-26 14:47   ` David Marchand
  2020-06-30  9:45     ` Olivier Matz
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 6/9] eal: register non-EAL threads as lcores David Marchand
                     ` (3 subsequent siblings)
  8 siblings, 1 reply; 126+ messages in thread
From: David Marchand @ 2020-06-26 14:47 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets

For consistency sake, move all lcore role code in the dedicated
compilation unit / header.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 lib/librte_eal/common/eal_common_lcore.c  | 11 +++++++
 lib/librte_eal/common/eal_common_thread.c | 11 -------
 lib/librte_eal/include/rte_eal.h          |  9 ------
 lib/librte_eal/include/rte_lcore.h        | 37 ++++++++++++++---------
 4 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
index 5404922a87..86d32a3dd7 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -63,6 +63,17 @@ rte_eal_lcore_role(unsigned int lcore_id)
 	return cfg->lcore_role[lcore_id];
 }
 
+int
+rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+
+	if (lcore_id >= RTE_MAX_LCORE)
+		return -EINVAL;
+
+	return cfg->lcore_role[lcore_id] == role;
+}
+
 int rte_lcore_is_enabled(unsigned int lcore_id)
 {
 	struct rte_config *cfg = rte_eal_get_configuration();
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 3b30cc99d9..a7ae0691bf 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -33,17 +33,6 @@ unsigned rte_socket_id(void)
 	return RTE_PER_LCORE(_socket_id);
 }
 
-int
-rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role)
-{
-	struct rte_config *cfg = rte_eal_get_configuration();
-
-	if (lcore_id >= RTE_MAX_LCORE)
-		return -EINVAL;
-
-	return cfg->lcore_role[lcore_id] == role;
-}
-
 static int
 eal_cpuset_socket_id(rte_cpuset_t *cpusetp)
 {
diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
index 2edf8c6556..0913d1947c 100644
--- a/lib/librte_eal/include/rte_eal.h
+++ b/lib/librte_eal/include/rte_eal.h
@@ -31,15 +31,6 @@ extern "C" {
 /* Maximum thread_name length. */
 #define RTE_MAX_THREAD_NAME_LEN 16
 
-/**
- * The lcore role (used in RTE or not).
- */
-enum rte_lcore_role_t {
-	ROLE_RTE,
-	ROLE_OFF,
-	ROLE_SERVICE,
-};
-
 /**
  * The type of process in a linux, multi-process setup
  */
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index 5c1d1926e9..3968c40693 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -24,6 +24,15 @@ extern "C" {
 
 RTE_DECLARE_PER_LCORE(unsigned, _lcore_id);  /**< Per thread "lcore id". */
 
+/**
+ * The lcore role (used in RTE or not).
+ */
+enum rte_lcore_role_t {
+	ROLE_RTE,
+	ROLE_OFF,
+	ROLE_SERVICE,
+};
+
 /**
  * Get a lcore's role.
  *
@@ -34,6 +43,20 @@ RTE_DECLARE_PER_LCORE(unsigned, _lcore_id);  /**< Per thread "lcore id". */
  */
 enum rte_lcore_role_t rte_eal_lcore_role(unsigned int lcore_id);
 
+/**
+ * Test if the core supplied has a specific role
+ *
+ * @param lcore_id
+ *   The identifier of the lcore, which MUST be between 0 and
+ *   RTE_MAX_LCORE-1.
+ * @param role
+ *   The role to be checked against.
+ * @return
+ *   Boolean value: positive if test is true; otherwise returns 0.
+ */
+int
+rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
+
 /**
  * Return the Application thread ID of the execution unit.
  *
@@ -283,20 +306,6 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
 		const pthread_attr_t *attr,
 		void *(*start_routine)(void *), void *arg);
 
-/**
- * Test if the core supplied has a specific role
- *
- * @param lcore_id
- *   The identifier of the lcore, which MUST be between 0 and
- *   RTE_MAX_LCORE-1.
- * @param role
- *   The role to be checked against.
- * @return
- *   Boolean value: positive if test is true; otherwise returns 0.
- */
-int
-rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
-
 #ifdef __cplusplus
 }
 #endif
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v4 6/9] eal: register non-EAL threads as lcores
  2020-06-26 14:47 ` [dpdk-dev] [PATCH v4 0/9] Register non-EAL threads as lcore David Marchand
                     ` (4 preceding siblings ...)
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 5/9] eal: move lcore role code David Marchand
@ 2020-06-26 14:47   ` David Marchand
  2020-06-29 14:27     ` Ananyev, Konstantin
  2020-06-30 10:07     ` Olivier Matz
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 7/9] eal: add lcore init callbacks David Marchand
                     ` (2 subsequent siblings)
  8 siblings, 2 replies; 126+ messages in thread
From: David Marchand @ 2020-06-26 14:47 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets, John McNamara, Marko Kovacevic,
	Anatoly Burakov, Olivier Matz, Neil Horman

DPDK allows calling some part of its API from a non-EAL thread but this
has some limitations.
OVS (and other applications) has its own thread management but still
want to avoid such limitations by hacking RTE_PER_LCORE(_lcore_id) and
faking EAL threads potentially unknown of some DPDK component.

Introduce a new API to register non-EAL thread and associate them to a
free lcore with a new NON_EAL role.
This role denotes lcores that do not run DPDK mainloop and as such
prevents use of rte_eal_wait_lcore() and consorts.

Multiprocess is not supported as the need for cohabitation with this new
feature is unclear at the moment.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Acked-by: Andrew Rybchenko <arybchenko@solarflare.com>
---
Changes since v2:
- refused multiprocess init once rte_thread_register got called, and
  vice versa,
- added warning on multiprocess in rte_thread_register doxygen,

Changes since v1:
- moved cleanup on lcore role code in patch 5,
- added unit test,
- updated documentation,
- changed naming from "external thread" to "registered non-EAL thread"

---
 MAINTAINERS                                   |   1 +
 app/test/Makefile                             |   1 +
 app/test/autotest_data.py                     |   6 +
 app/test/meson.build                          |   2 +
 app/test/test_lcores.c                        | 139 ++++++++++++++++++
 doc/guides/howto/debug_troubleshoot.rst       |   5 +-
 .../prog_guide/env_abstraction_layer.rst      |  22 +--
 doc/guides/prog_guide/mempool_lib.rst         |   2 +-
 lib/librte_eal/common/eal_common_lcore.c      |  50 ++++++-
 lib/librte_eal/common/eal_common_mcfg.c       |  36 +++++
 lib/librte_eal/common/eal_common_thread.c     |  33 +++++
 lib/librte_eal/common/eal_memcfg.h            |  10 ++
 lib/librte_eal/common/eal_private.h           |  18 +++
 lib/librte_eal/freebsd/eal.c                  |   4 +
 lib/librte_eal/include/rte_lcore.h            |  25 +++-
 lib/librte_eal/linux/eal.c                    |   4 +
 lib/librte_eal/rte_eal_version.map            |   2 +
 lib/librte_mempool/rte_mempool.h              |  11 +-
 18 files changed, 349 insertions(+), 22 deletions(-)
 create mode 100644 app/test/test_lcores.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 45b6c3a990..88c6d85e07 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -182,6 +182,7 @@ F: app/test/test_cycles.c
 F: app/test/test_debug.c
 F: app/test/test_eal*
 F: app/test/test_errno.c
+F: app/test/test_lcores.c
 F: app/test/test_logs.c
 F: app/test/test_memcpy*
 F: app/test/test_per_lcore.c
diff --git a/app/test/Makefile b/app/test/Makefile
index 7b96a03a64..4a8dea2425 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -97,6 +97,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_FLOW_CLASSIFY) += test_flow_classify.c
 endif
 
 SRCS-y += test_rwlock.c
+SRCS-y += test_lcores.c
 
 SRCS-$(CONFIG_RTE_LIBRTE_STACK) += test_stack.c
 SRCS-$(CONFIG_RTE_LIBRTE_STACK) += test_stack_perf.c
diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 238ab631b4..4b7da45e09 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -62,6 +62,12 @@
         "Func":    rwlock_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Lcores autotest",
+        "Command": "lcores_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "Logs autotest",
         "Command": "logs_autotest",
diff --git a/app/test/meson.build b/app/test/meson.build
index 5233ead46e..a57477b7cc 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -67,6 +67,7 @@ test_sources = files('commands.c',
 	'test_ipsec_perf.c',
 	'test_kni.c',
 	'test_kvargs.c',
+	'test_lcores.c',
 	'test_logs.c',
 	'test_lpm.c',
 	'test_lpm6.c',
@@ -206,6 +207,7 @@ fast_tests = [
         ['hash_autotest', true],
         ['interrupt_autotest', true],
         ['ipfrag_autotest', false],
+        ['lcores_autotest', true],
         ['logs_autotest', true],
         ['lpm_autotest', true],
         ['lpm6_autotest', true],
diff --git a/app/test/test_lcores.c b/app/test/test_lcores.c
new file mode 100644
index 0000000000..864bcbade7
--- /dev/null
+++ b/app/test/test_lcores.c
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2020 Red Hat, Inc.
+ */
+
+#include <pthread.h>
+#include <string.h>
+
+#include <rte_lcore.h>
+
+#include "test.h"
+
+struct thread_context {
+	enum { INIT, ERROR, DONE } state;
+	bool lcore_id_any;
+	pthread_t id;
+	unsigned int *registered_count;
+};
+static void *thread_loop(void *arg)
+{
+	struct thread_context *t = arg;
+	unsigned int lcore_id;
+
+	lcore_id = rte_lcore_id();
+	if (lcore_id != LCORE_ID_ANY) {
+		printf("Incorrect lcore id for new thread %u\n", lcore_id);
+		t->state = ERROR;
+	}
+	rte_thread_register();
+	lcore_id = rte_lcore_id();
+	if ((t->lcore_id_any && lcore_id != LCORE_ID_ANY) ||
+			(!t->lcore_id_any && lcore_id == LCORE_ID_ANY)) {
+		printf("Could not register new thread, got %u while %sexpecting %u\n",
+			lcore_id, t->lcore_id_any ? "" : "not ", LCORE_ID_ANY);
+		t->state = ERROR;
+	}
+	/* Report register happened to the control thread. */
+	__atomic_add_fetch(t->registered_count, 1, __ATOMIC_RELEASE);
+
+	/* Wait for release from the control thread. */
+	while (__atomic_load_n(t->registered_count, __ATOMIC_ACQUIRE) != 0)
+		;
+	rte_thread_unregister();
+	lcore_id = rte_lcore_id();
+	if (lcore_id != LCORE_ID_ANY) {
+		printf("Could not unregister new thread, %u still assigned\n",
+			lcore_id);
+		t->state = ERROR;
+	}
+
+	if (t->state != ERROR)
+		t->state = DONE;
+
+	return NULL;
+}
+
+static int
+test_non_eal_lcores(unsigned int eal_threads_count)
+{
+	struct thread_context thread_contexts[RTE_MAX_LCORE];
+	unsigned int non_eal_threads_count;
+	unsigned int registered_count;
+	struct thread_context *t;
+	unsigned int i;
+	int ret;
+
+	non_eal_threads_count = 0;
+	registered_count = 0;
+
+	/* Try to create as many threads as possible. */
+	for (i = 0; i < RTE_MAX_LCORE - eal_threads_count; i++) {
+		t = &thread_contexts[i];
+		t->state = INIT;
+		t->registered_count = &registered_count;
+		t->lcore_id_any = false;
+		if (pthread_create(&t->id, NULL, thread_loop, t) != 0)
+			break;
+		non_eal_threads_count++;
+	}
+	printf("non-EAL threads count: %u\n", non_eal_threads_count);
+	/* Wait all non-EAL threads to register. */
+	while (__atomic_load_n(&registered_count, __ATOMIC_ACQUIRE) !=
+			non_eal_threads_count)
+		;
+
+	/* We managed to create the max number of threads, let's try to create
+	 * one more. This will allow one more check.
+	 */
+	if (eal_threads_count + non_eal_threads_count < RTE_MAX_LCORE)
+		goto skip_lcore_any;
+	t = &thread_contexts[non_eal_threads_count];
+	t->state = INIT;
+	t->registered_count = &registered_count;
+	t->lcore_id_any = true;
+	if (pthread_create(&t->id, NULL, thread_loop, t) == 0) {
+		non_eal_threads_count++;
+		printf("non-EAL threads count: %u\n", non_eal_threads_count);
+		while (__atomic_load_n(&registered_count, __ATOMIC_ACQUIRE) !=
+				non_eal_threads_count)
+			;
+	}
+
+skip_lcore_any:
+	/* Release all threads, and check their states. */
+	__atomic_store_n(&registered_count, 0, __ATOMIC_RELEASE);
+	ret = 0;
+	for (i = 0; i < non_eal_threads_count; i++) {
+		t = &thread_contexts[i];
+		pthread_join(t->id, NULL);
+		if (t->state != DONE)
+			ret = -1;
+	}
+
+	return ret;
+}
+
+static int
+test_lcores(void)
+{
+	unsigned int eal_threads_count = 0;
+	unsigned int i;
+
+	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		if (!rte_lcore_has_role(i, ROLE_OFF))
+			eal_threads_count++;
+	}
+	if (eal_threads_count == 0) {
+		printf("Something is broken, no EAL thread detected.\n");
+		return TEST_FAILED;
+	}
+	printf("EAL threads count: %u, RTE_MAX_LCORE=%u\n", eal_threads_count,
+		RTE_MAX_LCORE);
+
+	if (test_non_eal_lcores(eal_threads_count) < 0)
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+REGISTER_TEST_COMMAND(lcores_autotest, test_lcores);
diff --git a/doc/guides/howto/debug_troubleshoot.rst b/doc/guides/howto/debug_troubleshoot.rst
index cef016b2fe..5a46f5fba3 100644
--- a/doc/guides/howto/debug_troubleshoot.rst
+++ b/doc/guides/howto/debug_troubleshoot.rst
@@ -307,8 +307,9 @@ Custom worker function :numref:`dtg_distributor_worker`.
 
 #. Configuration issue isolation
 
-   * Identify core role using ``rte_eal_lcore_role`` to identify RTE, OFF and
-     SERVICE. Check performance functions are mapped to run on the cores.
+   * Identify core role using ``rte_eal_lcore_role`` to identify RTE, OFF,
+     SERVICE and NON_EAL. Check performance functions are mapped to run on the
+     cores.
 
    * For high-performance execution logic ensure running it on correct NUMA
      and non-master core.
diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst b/doc/guides/prog_guide/env_abstraction_layer.rst
index 48a2fec066..f64ae953d1 100644
--- a/doc/guides/prog_guide/env_abstraction_layer.rst
+++ b/doc/guides/prog_guide/env_abstraction_layer.rst
@@ -564,9 +564,13 @@ It's also compatible with the pattern of corelist('-l') option.
 non-EAL pthread support
 ~~~~~~~~~~~~~~~~~~~~~~~
 
-It is possible to use the DPDK execution context with any user pthread (aka. Non-EAL pthreads).
-In a non-EAL pthread, the *_lcore_id* is always LCORE_ID_ANY which identifies that it is not an EAL thread with a valid, unique, *_lcore_id*.
-Some libraries will use an alternative unique ID (e.g. TID), some will not be impacted at all, and some will work but with limitations (e.g. timer and mempool libraries).
+It is possible to use the DPDK execution context with any user pthread (aka. non-EAL pthreads).
+There are two kinds of non-EAL pthreads:
+
+- a registered non-EAL pthread with a valid *_lcore_id* that was successfully assigned by calling ``rte_thread_register()``,
+- a non registered non-EAL pthread with a LCORE_ID_ANY,
+
+For non registered non-EAL pthread (with a LCORE_ID_ANY *_lcore_id*), some libraries will use an alternative unique ID (e.g. TID), some will not be impacted at all, and some will work but with limitations (e.g. timer and mempool libraries).
 
 All these impacts are mentioned in :ref:`known_issue_label` section.
 
@@ -613,9 +617,9 @@ Known Issues
 + rte_mempool
 
   The rte_mempool uses a per-lcore cache inside the mempool.
-  For non-EAL pthreads, ``rte_lcore_id()`` will not return a valid number.
-  So for now, when rte_mempool is used with non-EAL pthreads, the put/get operations will bypass the default mempool cache and there is a performance penalty because of this bypass.
-  Only user-owned external caches can be used in a non-EAL context in conjunction with ``rte_mempool_generic_put()`` and ``rte_mempool_generic_get()`` that accept an explicit cache parameter.
+  For unregistered non-EAL pthreads, ``rte_lcore_id()`` will not return a valid number.
+  So for now, when rte_mempool is used with unregistered non-EAL pthreads, the put/get operations will bypass the default mempool cache and there is a performance penalty because of this bypass.
+  Only user-owned external caches can be used in an unregistered non-EAL context in conjunction with ``rte_mempool_generic_put()`` and ``rte_mempool_generic_get()`` that accept an explicit cache parameter.
 
 + rte_ring
 
@@ -660,15 +664,15 @@ Known Issues
 
 + rte_timer
 
-  Running  ``rte_timer_manage()`` on a non-EAL pthread is not allowed. However, resetting/stopping the timer from a non-EAL pthread is allowed.
+  Running  ``rte_timer_manage()`` on an unregistered non-EAL pthread is not allowed. However, resetting/stopping the timer from a non-EAL pthread is allowed.
 
 + rte_log
 
-  In non-EAL pthreads, there is no per thread loglevel and logtype, global loglevels are used.
+  In unregistered non-EAL pthreads, there is no per thread loglevel and logtype, global loglevels are used.
 
 + misc
 
-  The debug statistics of rte_ring, rte_mempool and rte_timer are not supported in a non-EAL pthread.
+  The debug statistics of rte_ring, rte_mempool and rte_timer are not supported in an unregistered non-EAL pthread.
 
 cgroup control
 ~~~~~~~~~~~~~~
diff --git a/doc/guides/prog_guide/mempool_lib.rst b/doc/guides/prog_guide/mempool_lib.rst
index f8b430d656..e3e1f940be 100644
--- a/doc/guides/prog_guide/mempool_lib.rst
+++ b/doc/guides/prog_guide/mempool_lib.rst
@@ -103,7 +103,7 @@ The maximum size of the cache is static and is defined at compilation time (CONF
 Alternatively to the internal default per-lcore local cache, an application can create and manage external caches through the ``rte_mempool_cache_create()``, ``rte_mempool_cache_free()`` and ``rte_mempool_cache_flush()`` calls.
 These user-owned caches can be explicitly passed to ``rte_mempool_generic_put()`` and ``rte_mempool_generic_get()``.
 The ``rte_mempool_default_cache()`` call returns the default internal cache if any.
-In contrast to the default caches, user-owned caches can be used by non-EAL threads too.
+In contrast to the default caches, user-owned caches can be used by unregistered non-EAL threads too.
 
 Mempool Handlers
 ------------------------
diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
index 86d32a3dd7..a61824a779 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -6,13 +6,15 @@
 #include <limits.h>
 #include <string.h>
 
-#include <rte_errno.h>
-#include <rte_log.h>
-#include <rte_eal.h>
-#include <rte_lcore.h>
 #include <rte_common.h>
 #include <rte_debug.h>
+#include <rte_eal.h>
+#include <rte_errno.h>
+#include <rte_lcore.h>
+#include <rte_log.h>
+#include <rte_spinlock.h>
 
+#include "eal_memcfg.h"
 #include "eal_private.h"
 #include "eal_thread.h"
 
@@ -220,3 +222,43 @@ rte_socket_id_by_idx(unsigned int idx)
 	}
 	return config->numa_nodes[idx];
 }
+
+static rte_spinlock_t lcore_lock = RTE_SPINLOCK_INITIALIZER;
+
+unsigned int
+eal_lcore_non_eal_allocate(void)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	unsigned int lcore_id;
+
+	if (cfg->process_type == RTE_PROC_SECONDARY ||
+			!eal_mcfg_forbid_multiprocess()) {
+		RTE_LOG(ERR, EAL, "Multiprocess in use, cannot allocate new lcore.\n");
+		return RTE_MAX_LCORE;
+	}
+	rte_spinlock_lock(&lcore_lock);
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (cfg->lcore_role[lcore_id] != ROLE_OFF)
+			continue;
+		cfg->lcore_role[lcore_id] = ROLE_NON_EAL;
+		cfg->lcore_count++;
+		break;
+	}
+	if (lcore_id == RTE_MAX_LCORE)
+		RTE_LOG(DEBUG, EAL, "No lcore available.\n");
+	rte_spinlock_unlock(&lcore_lock);
+	return lcore_id;
+}
+
+void
+eal_lcore_non_eal_release(unsigned int lcore_id)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+
+	rte_spinlock_lock(&lcore_lock);
+	if (cfg->lcore_role[lcore_id] == ROLE_NON_EAL) {
+		cfg->lcore_role[lcore_id] = ROLE_OFF;
+		cfg->lcore_count--;
+	}
+	rte_spinlock_unlock(&lcore_lock);
+}
diff --git a/lib/librte_eal/common/eal_common_mcfg.c b/lib/librte_eal/common/eal_common_mcfg.c
index 49d3ed0ce5..5b42d454e2 100644
--- a/lib/librte_eal/common/eal_common_mcfg.c
+++ b/lib/librte_eal/common/eal_common_mcfg.c
@@ -44,6 +44,42 @@ eal_mcfg_check_version(void)
 	return 0;
 }
 
+enum mp_status {
+	MP_UNKNOWN,
+	MP_FORBIDDEN,
+	MP_ENABLED,
+};
+
+static bool
+eal_mcfg_set_mp_status(enum mp_status status)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	uint8_t expected;
+	uint8_t desired;
+
+	expected = MP_UNKNOWN;
+	desired = status;
+	if (__atomic_compare_exchange_n(&mcfg->mp_status, &expected, desired,
+			false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
+		return true;
+
+	return __atomic_load_n(&mcfg->mp_status, __ATOMIC_RELAXED) == desired;
+}
+
+bool
+eal_mcfg_forbid_multiprocess(void)
+{
+	assert(rte_eal_get_configuration()->process_type == RTE_PROC_PRIMARY);
+	return eal_mcfg_set_mp_status(MP_FORBIDDEN);
+}
+
+bool
+eal_mcfg_enable_multiprocess(void)
+{
+	assert(rte_eal_get_configuration()->process_type == RTE_PROC_SECONDARY);
+	return eal_mcfg_set_mp_status(MP_ENABLED);
+}
+
 void
 eal_mcfg_update_internal(void)
 {
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index a7ae0691bf..1cbddc4b5b 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -236,3 +236,36 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
 	pthread_join(*thread, NULL);
 	return -ret;
 }
+
+void
+rte_thread_register(void)
+{
+	unsigned int lcore_id;
+	rte_cpuset_t cpuset;
+
+	/* EAL init flushes all lcores, we can't register before. */
+	assert(internal_config.init_complete == 1);
+	if (pthread_getaffinity_np(pthread_self(), sizeof(cpuset),
+			&cpuset) != 0)
+		CPU_ZERO(&cpuset);
+	lcore_id = eal_lcore_non_eal_allocate();
+	if (lcore_id >= RTE_MAX_LCORE)
+		lcore_id = LCORE_ID_ANY;
+	rte_thread_init(lcore_id, &cpuset);
+	if (lcore_id != LCORE_ID_ANY)
+		RTE_LOG(DEBUG, EAL, "Registered non-EAL thread as lcore %u.\n",
+			lcore_id);
+}
+
+void
+rte_thread_unregister(void)
+{
+	unsigned int lcore_id = rte_lcore_id();
+
+	if (lcore_id != LCORE_ID_ANY)
+		eal_lcore_non_eal_release(lcore_id);
+	rte_thread_uninit();
+	if (lcore_id != LCORE_ID_ANY)
+		RTE_LOG(DEBUG, EAL, "Unregistered non-EAL thread (was lcore %u).\n",
+			lcore_id);
+}
diff --git a/lib/librte_eal/common/eal_memcfg.h b/lib/librte_eal/common/eal_memcfg.h
index 583fcb5953..340e523c6a 100644
--- a/lib/librte_eal/common/eal_memcfg.h
+++ b/lib/librte_eal/common/eal_memcfg.h
@@ -41,6 +41,8 @@ struct rte_mem_config {
 	rte_rwlock_t memory_hotplug_lock;
 	/**< Indicates whether memory hotplug request is in progress. */
 
+	uint8_t mp_status; /**< Indicates whether multiprocess can be used. */
+
 	/* memory segments and zones */
 	struct rte_fbarray memzones; /**< Memzone descriptors. */
 
@@ -91,6 +93,14 @@ eal_mcfg_wait_complete(void);
 int
 eal_mcfg_check_version(void);
 
+/* mark primary process as not supporting multi-process. */
+bool
+eal_mcfg_forbid_multiprocess(void);
+
+/* instruct primary process that a secondary process attached once. */
+bool
+eal_mcfg_enable_multiprocess(void);
+
 /* set mem config as complete */
 void
 eal_mcfg_complete(void);
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 0592fcd694..73238ff157 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -396,6 +396,24 @@ uint64_t get_tsc_freq(void);
  */
 uint64_t get_tsc_freq_arch(void);
 
+/**
+ * Allocate a free lcore to associate to a non-EAL thread.
+ *
+ * @return
+ *   - the id of a lcore with role ROLE_NON_EAL on success.
+ *   - RTE_MAX_LCORE if none was available.
+ */
+unsigned int eal_lcore_non_eal_allocate(void);
+
+/**
+ * Release the lcore used by a non-EAL thread.
+ * Counterpart of eal_lcore_non_eal_allocate().
+ *
+ * @param lcore_id
+ *   The lcore with role ROLE_NON_EAL to release.
+ */
+void eal_lcore_non_eal_release(unsigned int lcore_id);
+
 /**
  * Prepare physical memory mapping
  * i.e. hugepages on Linux and
diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
index 13e5de006f..32a3d999b8 100644
--- a/lib/librte_eal/freebsd/eal.c
+++ b/lib/librte_eal/freebsd/eal.c
@@ -424,6 +424,10 @@ rte_config_init(void)
 		}
 		if (rte_eal_config_reattach() < 0)
 			return -1;
+		if (!eal_mcfg_enable_multiprocess()) {
+			RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n");
+			return -1;
+		}
 		eal_mcfg_update_internal();
 		break;
 	case RTE_PROC_AUTO:
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index 3968c40693..43747e88df 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -31,6 +31,7 @@ enum rte_lcore_role_t {
 	ROLE_RTE,
 	ROLE_OFF,
 	ROLE_SERVICE,
+	ROLE_NON_EAL,
 };
 
 /**
@@ -67,7 +68,8 @@ rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
  *   to run threads with lcore IDs 0, 1, 2 and 3 on physical core 10..
  *
  * @return
- *  Logical core ID (in EAL thread) or LCORE_ID_ANY (in non-EAL thread)
+ *  Logical core ID (in EAL thread or registered non-EAL thread) or
+ *  LCORE_ID_ANY (in unregistered non-EAL thread)
  */
 static inline unsigned
 rte_lcore_id(void)
@@ -279,6 +281,27 @@ int rte_thread_setname(pthread_t id, const char *name);
 __rte_experimental
 int rte_thread_getname(pthread_t id, char *name, size_t len);
 
+/**
+ * Register current non-EAL thread as a lcore.
+ *
+ * @note This API is not compatible with the multi-process feature:
+ * - if a primary process registers a non-EAL thread, then no secondary process
+ *   will initialise.
+ * - if a secondary process initialises successfully, trying to register a
+ *   non-EAL thread from either primary or secondary processes will always end
+ *   up with the thread getting LCORE_ID_ANY as lcore.
+ */
+__rte_experimental
+void
+rte_thread_register(void);
+
+/**
+ * Unregister current thread and release lcore if one was associated.
+ */
+__rte_experimental
+void
+rte_thread_unregister(void);
+
 /**
  * Create a control thread.
  *
diff --git a/lib/librte_eal/linux/eal.c b/lib/librte_eal/linux/eal.c
index 8894cea50a..1d90d1c0e3 100644
--- a/lib/librte_eal/linux/eal.c
+++ b/lib/librte_eal/linux/eal.c
@@ -514,6 +514,10 @@ rte_config_init(void)
 		}
 		if (rte_eal_config_reattach() < 0)
 			return -1;
+		if (!eal_mcfg_enable_multiprocess()) {
+			RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n");
+			return -1;
+		}
 		eal_mcfg_update_internal();
 		break;
 	case RTE_PROC_AUTO:
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 5831eea4b0..39c41d445d 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -396,6 +396,8 @@ EXPERIMENTAL {
 
 	# added in 20.08
 	__rte_trace_mem_per_thread_free;
+	rte_thread_register;
+	rte_thread_unregister;
 };
 
 INTERNAL {
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 652d19f9f1..9e0ee052b3 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -28,9 +28,9 @@
  * rte_mempool_get() or rte_mempool_put() are designed to be called from an EAL
  * thread due to the internal per-lcore cache. Due to the lack of caching,
  * rte_mempool_get() or rte_mempool_put() performance will suffer when called
- * by non-EAL threads. Instead, non-EAL threads should call
- * rte_mempool_generic_get() or rte_mempool_generic_put() with a user cache
- * created with rte_mempool_cache_create().
+ * by unregistered non-EAL threads. Instead, unregistered non-EAL threads
+ * should call rte_mempool_generic_get() or rte_mempool_generic_put() with a
+ * user cache created with rte_mempool_cache_create().
  */
 
 #include <stdio.h>
@@ -1233,7 +1233,7 @@ void rte_mempool_dump(FILE *f, struct rte_mempool *mp);
 /**
  * Create a user-owned mempool cache.
  *
- * This can be used by non-EAL threads to enable caching when they
+ * This can be used by unregistered non-EAL threads to enable caching when they
  * interact with a mempool.
  *
  * @param size
@@ -1264,7 +1264,8 @@ rte_mempool_cache_free(struct rte_mempool_cache *cache);
  * @param lcore_id
  *   The logical core id.
  * @return
- *   A pointer to the mempool cache or NULL if disabled or non-EAL thread.
+ *   A pointer to the mempool cache or NULL if disabled or unregistered non-EAL
+ *   thread.
  */
 static __rte_always_inline struct rte_mempool_cache *
 rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id)
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v4 7/9] eal: add lcore init callbacks
  2020-06-26 14:47 ` [dpdk-dev] [PATCH v4 0/9] Register non-EAL threads as lcore David Marchand
                     ` (5 preceding siblings ...)
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 6/9] eal: register non-EAL threads as lcores David Marchand
@ 2020-06-26 14:47   ` David Marchand
  2020-06-29 12:46     ` Ananyev, Konstantin
                       ` (2 more replies)
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 8/9] eal: add lcore iterators David Marchand
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 9/9] mempool/bucket: handle non-EAL lcores David Marchand
  8 siblings, 3 replies; 126+ messages in thread
From: David Marchand @ 2020-06-26 14:47 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets, Neil Horman

DPDK components and applications can have their say when a new lcore is
initialized. For this, they can register a callback for initializing and
releasing their private data.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Changes since v2:
- added missing test,
- fixed rollback on lcore register,

Changes since v1:
- added unit test (since missing some coverage, for v3),
- preferred callback and removed mention of notification,

---
 app/test/test_lcores.c                   | 230 +++++++++++++++++++++++
 lib/librte_eal/common/eal_common_lcore.c | 138 +++++++++++++-
 lib/librte_eal/common/eal_private.h      |   3 +-
 lib/librte_eal/include/rte_lcore.h       |  68 +++++++
 lib/librte_eal/rte_eal_version.map       |   2 +
 5 files changed, 435 insertions(+), 6 deletions(-)

diff --git a/app/test/test_lcores.c b/app/test/test_lcores.c
index 864bcbade7..e36dceedf9 100644
--- a/app/test/test_lcores.c
+++ b/app/test/test_lcores.c
@@ -5,6 +5,7 @@
 #include <pthread.h>
 #include <string.h>
 
+#include <rte_common.h>
 #include <rte_lcore.h>
 
 #include "test.h"
@@ -113,6 +114,229 @@ test_non_eal_lcores(unsigned int eal_threads_count)
 	return ret;
 }
 
+struct limit_lcore_context {
+	unsigned int init;
+	unsigned int max;
+	unsigned int uninit;
+};
+static int
+limit_lcores_init(unsigned int lcore_id __rte_unused, void *arg)
+{
+	struct limit_lcore_context *l = arg;
+
+	l->init++;
+	if (l->init > l->max)
+		return -1;
+	return 0;
+}
+static void
+limit_lcores_uninit(unsigned int lcore_id __rte_unused, void *arg)
+{
+	struct limit_lcore_context *l = arg;
+
+	l->uninit++;
+}
+
+static int
+test_lcores_callback(unsigned int eal_threads_count)
+{
+	struct limit_lcore_context l;
+	void *handle;
+
+	/* Refuse last lcore => callback register error. */
+	memset(&l, 0, sizeof(l));
+	l.max = eal_threads_count - 1;
+	handle = rte_lcore_callback_register("limit", limit_lcores_init,
+		limit_lcores_uninit, &l);
+	if (handle != NULL) {
+		printf("lcore callback register should have failed\n");
+		goto error;
+	}
+	/* Refusal happens at the n th call to the init callback.
+	 * Besides, n - 1 were accepted, so we expect as many uninit calls when
+	 * the rollback happens.
+	 */
+	if (l.init != eal_threads_count) {
+		printf("lcore callback register failed but incorrect init calls, expected %u, got %u\n",
+			eal_threads_count, l.init);
+		goto error;
+	}
+	if (l.uninit != eal_threads_count - 1) {
+		printf("lcore callback register failed but incorrect uninit calls, expected %u, got %u\n",
+			eal_threads_count - 1, l.uninit);
+		goto error;
+	}
+
+	/* Accept all lcore and unregister. */
+	memset(&l, 0, sizeof(l));
+	l.max = eal_threads_count;
+	handle = rte_lcore_callback_register("limit", limit_lcores_init,
+		limit_lcores_uninit, &l);
+	if (handle == NULL) {
+		printf("lcore callback register failed\n");
+		goto error;
+	}
+	if (l.uninit != 0) {
+		printf("lcore callback register succeeded but incorrect uninit calls, expected 0, got %u\n",
+			l.uninit);
+		goto error;
+	}
+	rte_lcore_callback_unregister(handle);
+	handle = NULL;
+	if (l.init != eal_threads_count) {
+		printf("lcore callback unregister done but incorrect init calls, expected %u, got %u\n",
+			eal_threads_count, l.init);
+		goto error;
+	}
+	if (l.uninit != eal_threads_count) {
+		printf("lcore callback unregister done but incorrect uninit calls, expected %u, got %u\n",
+			eal_threads_count, l.uninit);
+		goto error;
+	}
+
+	return 0;
+
+error:
+	if (handle != NULL)
+		rte_lcore_callback_unregister(handle);
+
+	return -1;
+}
+
+static int
+test_non_eal_lcores_callback(unsigned int eal_threads_count)
+{
+	struct thread_context thread_contexts[2];
+	unsigned int non_eal_threads_count;
+	struct limit_lcore_context l[2];
+	unsigned int registered_count;
+	struct thread_context *t;
+	void *handle[2];
+	unsigned int i;
+	int ret;
+
+	memset(l, 0, sizeof(l));
+	handle[0] = handle[1] = NULL;
+	non_eal_threads_count = 0;
+	registered_count = 0;
+
+	/* This test requires two empty slots to be sure lcore init refusal is
+	 * because of callback execution.
+	 */
+	if (eal_threads_count + 2 >= RTE_MAX_LCORE)
+		return 0;
+
+	/* Register two callbacks:
+	 * - first one accepts any lcore,
+	 * - second one accepts all EAL lcore + one more for the first non-EAL
+	 *   thread, then refuses the next lcore.
+	 */
+	l[0].max = UINT_MAX;
+	handle[0] = rte_lcore_callback_register("no_limit", limit_lcores_init,
+		limit_lcores_uninit, &l[0]);
+	if (handle[0] == NULL) {
+		printf("lcore callback [0] register failed\n");
+		goto error;
+	}
+	l[1].max = eal_threads_count + 1;
+	handle[1] = rte_lcore_callback_register("limit", limit_lcores_init,
+		limit_lcores_uninit, &l[1]);
+	if (handle[1] == NULL) {
+		printf("lcore callback [1] register failed\n");
+		goto error;
+	}
+	if (l[0].init != eal_threads_count || l[1].init != eal_threads_count) {
+		printf("lcore callbacks register succeeded but incorrect init calls, expected %u, %u, got %u, %u\n",
+			eal_threads_count, eal_threads_count,
+			l[0].init, l[1].init);
+		goto error;
+	}
+	if (l[0].uninit != 0 || l[1].uninit != 0) {
+		printf("lcore callbacks register succeeded but incorrect uninit calls, expected 0, 1, got %u, %u\n",
+			l[0].uninit, l[1].uninit);
+		goto error;
+	}
+	/* First thread that expects a valid lcore id. */
+	t = &thread_contexts[0];
+	t->state = INIT;
+	t->registered_count = &registered_count;
+	t->lcore_id_any = false;
+	if (pthread_create(&t->id, NULL, thread_loop, t) != 0)
+		goto cleanup_threads;
+	non_eal_threads_count++;
+	while (__atomic_load_n(&registered_count, __ATOMIC_ACQUIRE) !=
+			non_eal_threads_count)
+		;
+	if (l[0].init != eal_threads_count + 1 ||
+			l[1].init != eal_threads_count + 1) {
+		printf("Incorrect init calls, expected %u, %u, got %u, %u\n",
+			eal_threads_count + 1, eal_threads_count + 1,
+			l[0].init, l[1].init);
+		goto cleanup_threads;
+	}
+	if (l[0].uninit != 0 || l[1].uninit != 0) {
+		printf("Incorrect uninit calls, expected 0, 0, got %u, %u\n",
+			l[0].uninit, l[1].uninit);
+		goto cleanup_threads;
+	}
+	/* Second thread, that expects LCORE_ID_ANY because of init refusal. */
+	t = &thread_contexts[1];
+	t->state = INIT;
+	t->registered_count = &registered_count;
+	t->lcore_id_any = true;
+	if (pthread_create(&t->id, NULL, thread_loop, t) != 0)
+		goto cleanup_threads;
+	non_eal_threads_count++;
+	while (__atomic_load_n(&registered_count, __ATOMIC_ACQUIRE) !=
+			non_eal_threads_count)
+		;
+	if (l[0].init != eal_threads_count + 2 ||
+			l[1].init != eal_threads_count + 2) {
+		printf("Incorrect init calls, expected %u, %u, got %u, %u\n",
+			eal_threads_count + 2, eal_threads_count + 2,
+			l[0].init, l[1].init);
+		goto cleanup_threads;
+	}
+	if (l[0].uninit != 1 || l[1].uninit != 0) {
+		printf("Incorrect uninit calls, expected 1, 0, got %u, %u\n",
+			l[0].uninit, l[1].uninit);
+		goto cleanup_threads;
+	}
+	/* Release all threads, and check their states. */
+	__atomic_store_n(&registered_count, 0, __ATOMIC_RELEASE);
+	ret = 0;
+	for (i = 0; i < non_eal_threads_count; i++) {
+		t = &thread_contexts[i];
+		pthread_join(t->id, NULL);
+		if (t->state != DONE)
+			ret = -1;
+	}
+	if (ret < 0)
+		goto error;
+	if (l[0].uninit != 2 || l[1].uninit != 1) {
+		printf("Threads reported having successfully registered and unregistered, but incorrect uninit calls, expected 2, 1, got %u, %u\n",
+			l[0].uninit, l[1].uninit);
+		goto error;
+	}
+	rte_lcore_callback_unregister(handle[0]);
+	rte_lcore_callback_unregister(handle[1]);
+	return 0;
+
+cleanup_threads:
+	/* Release all threads */
+	__atomic_store_n(&registered_count, 0, __ATOMIC_RELEASE);
+	for (i = 0; i < non_eal_threads_count; i++) {
+		t = &thread_contexts[i];
+		pthread_join(t->id, NULL);
+	}
+error:
+	if (handle[1] != NULL)
+		rte_lcore_callback_unregister(handle[1]);
+	if (handle[0] != NULL)
+		rte_lcore_callback_unregister(handle[0]);
+	return -1;
+}
+
 static int
 test_lcores(void)
 {
@@ -133,6 +357,12 @@ test_lcores(void)
 	if (test_non_eal_lcores(eal_threads_count) < 0)
 		return TEST_FAILED;
 
+	if (test_lcores_callback(eal_threads_count) < 0)
+		return TEST_FAILED;
+
+	if (test_non_eal_lcores_callback(eal_threads_count) < 0)
+		return TEST_FAILED;
+
 	return TEST_SUCCESS;
 }
 
diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
index a61824a779..52c46a4cea 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -224,11 +224,114 @@ rte_socket_id_by_idx(unsigned int idx)
 }
 
 static rte_spinlock_t lcore_lock = RTE_SPINLOCK_INITIALIZER;
+struct lcore_callback {
+	TAILQ_ENTRY(lcore_callback) next;
+	char *name;
+	rte_lcore_init_cb init;
+	rte_lcore_uninit_cb uninit;
+	void *arg;
+};
+static TAILQ_HEAD(lcore_callbacks_head, lcore_callback) lcore_callbacks =
+	TAILQ_HEAD_INITIALIZER(lcore_callbacks);
+
+static int
+callback_init(struct lcore_callback *callback, unsigned int lcore_id)
+{
+	if (callback->init == NULL)
+		return 0;
+	RTE_LOG(DEBUG, EAL, "Call init for lcore callback %s, lcore_id %u\n",
+		callback->name, lcore_id);
+	return callback->init(lcore_id, callback->arg);
+}
+
+static void
+callback_uninit(struct lcore_callback *callback, unsigned int lcore_id)
+{
+	if (callback->uninit == NULL)
+		return;
+	RTE_LOG(DEBUG, EAL, "Call uninit for lcore callback %s, lcore_id %u\n",
+		callback->name, lcore_id);
+	callback->uninit(lcore_id, callback->arg);
+}
+
+void *
+rte_lcore_callback_register(const char *name, rte_lcore_init_cb init,
+	rte_lcore_uninit_cb uninit, void *arg)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	struct lcore_callback *callback;
+	unsigned int lcore_id;
+
+	callback = calloc(1, sizeof(*callback));
+	if (callback == NULL)
+		return NULL;
+	if (asprintf(&callback->name, "%s-%p", name, arg) == -1) {
+		free(callback);
+		return NULL;
+	}
+	callback->init = init;
+	callback->uninit = uninit;
+	callback->arg = arg;
+	rte_spinlock_lock(&lcore_lock);
+	if (callback->init == NULL)
+		goto no_init;
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (cfg->lcore_role[lcore_id] == ROLE_OFF)
+			continue;
+		if (callback_init(callback, lcore_id) == 0)
+			continue;
+		/* Callback refused init for this lcore, uninitialize all
+		 * previous lcore.
+		 */
+		for (; lcore_id != 0; lcore_id--) {
+			if (cfg->lcore_role[lcore_id - 1] == ROLE_OFF)
+				continue;
+			callback_uninit(callback, lcore_id - 1);
+		}
+		free(callback);
+		callback = NULL;
+		goto out;
+	}
+no_init:
+	TAILQ_INSERT_TAIL(&lcore_callbacks, callback, next);
+	RTE_LOG(DEBUG, EAL, "Registered new lcore callback %s (%sinit, %suninit).\n",
+		callback->name, callback->init == NULL ? "NO " : "",
+		callback->uninit == NULL ? "NO " : "");
+out:
+	rte_spinlock_unlock(&lcore_lock);
+	return callback;
+}
+
+void
+rte_lcore_callback_unregister(void *handle)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	struct lcore_callback *callback = handle;
+	unsigned int lcore_id;
+
+	rte_spinlock_lock(&lcore_lock);
+	if (callback->uninit == NULL)
+		goto no_uninit;
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (cfg->lcore_role[lcore_id] == ROLE_OFF)
+			continue;
+		callback_uninit(callback, lcore_id);
+	}
+no_uninit:
+	TAILQ_REMOVE(&lcore_callbacks, callback, next);
+	rte_spinlock_unlock(&lcore_lock);
+	RTE_LOG(DEBUG, EAL, "Unregistered lcore callback %s-%p.\n",
+		callback->name, callback->arg);
+	free(callback->name);
+	free(callback);
+}
 
 unsigned int
 eal_lcore_non_eal_allocate(void)
 {
 	struct rte_config *cfg = rte_eal_get_configuration();
+	struct lcore_callback *callback;
+	struct lcore_callback *prev;
 	unsigned int lcore_id;
 
 	if (cfg->process_type == RTE_PROC_SECONDARY ||
@@ -244,8 +347,29 @@ eal_lcore_non_eal_allocate(void)
 		cfg->lcore_count++;
 		break;
 	}
-	if (lcore_id == RTE_MAX_LCORE)
+	if (lcore_id == RTE_MAX_LCORE) {
 		RTE_LOG(DEBUG, EAL, "No lcore available.\n");
+		goto out;
+	}
+	TAILQ_FOREACH(callback, &lcore_callbacks, next) {
+		if (callback_init(callback, lcore_id) == 0)
+			continue;
+		/* Callback refused init for this lcore, call uninit for all
+		 * previous callbacks.
+		 */
+		prev = TAILQ_PREV(callback, lcore_callbacks_head, next);
+		while (prev != NULL) {
+			callback_uninit(prev, lcore_id);
+			prev = TAILQ_PREV(prev, lcore_callbacks_head, next);
+		}
+		RTE_LOG(DEBUG, EAL, "Initialization refused for lcore %u.\n",
+			lcore_id);
+		cfg->lcore_role[lcore_id] = ROLE_OFF;
+		cfg->lcore_count--;
+		lcore_id = RTE_MAX_LCORE;
+		goto out;
+	}
+out:
 	rte_spinlock_unlock(&lcore_lock);
 	return lcore_id;
 }
@@ -254,11 +378,15 @@ void
 eal_lcore_non_eal_release(unsigned int lcore_id)
 {
 	struct rte_config *cfg = rte_eal_get_configuration();
+	struct lcore_callback *callback;
 
 	rte_spinlock_lock(&lcore_lock);
-	if (cfg->lcore_role[lcore_id] == ROLE_NON_EAL) {
-		cfg->lcore_role[lcore_id] = ROLE_OFF;
-		cfg->lcore_count--;
-	}
+	if (cfg->lcore_role[lcore_id] != ROLE_NON_EAL)
+		goto out;
+	TAILQ_FOREACH(callback, &lcore_callbacks, next)
+		callback_uninit(callback, lcore_id);
+	cfg->lcore_role[lcore_id] = ROLE_OFF;
+	cfg->lcore_count--;
+out:
 	rte_spinlock_unlock(&lcore_lock);
 }
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 73238ff157..fc79564111 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -401,7 +401,8 @@ uint64_t get_tsc_freq_arch(void);
  *
  * @return
  *   - the id of a lcore with role ROLE_NON_EAL on success.
- *   - RTE_MAX_LCORE if none was available.
+ *   - RTE_MAX_LCORE if none was available or initializing was refused (see
+ *     rte_lcore_callback_register).
  */
 unsigned int eal_lcore_non_eal_allocate(void);
 
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index 43747e88df..5a2d6ca7af 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -229,6 +229,74 @@ unsigned int rte_get_next_lcore(unsigned int i, int skip_master, int wrap);
 	     i<RTE_MAX_LCORE;						\
 	     i = rte_get_next_lcore(i, 1, 0))
 
+/**
+ * Callback prototype for initializing lcores.
+ *
+ * @param lcore_id
+ *   The lcore to consider.
+ * @param arg
+ *   An opaque pointer passed at callback registration.
+ * @return
+ *   - -1 when refusing this operation,
+ *   - 0 otherwise.
+ */
+typedef int (*rte_lcore_init_cb)(unsigned int lcore_id, void *arg);
+
+/**
+ * Callback prototype for uninitializing lcores.
+ *
+ * @param lcore_id
+ *   The lcore to consider.
+ * @param arg
+ *   An opaque pointer passed at callback registration.
+ */
+typedef void (*rte_lcore_uninit_cb)(unsigned int lcore_id, void *arg);
+
+/**
+ * Register callbacks invoked when initializing and uninitializing a lcore.
+ *
+ * This function calls the init callback with all initialized lcores.
+ * Any error reported by the init callback triggers a rollback calling the
+ * uninit callback for each lcore.
+ * If this step succeeds, the callbacks are put in the lcore callbacks list
+ * that will get called for each lcore allocation/release.
+ *
+ * Note: callbacks execution is serialised under a lock protecting the lcores
+ * and callbacks list.
+ *
+ * @param name
+ *   A name serving as a small description for this callback.
+ * @param init
+ *   The callback invoked when a lcore_id is initialized.
+ * @param uninit
+ *   The callback invoked when a lcore_id is uninitialized.
+ * @param arg
+ *   An optional argument that gets passed to the callback when it gets
+ *   invoked.
+ * @return
+ *   On success, returns an opaque pointer for the registered object.
+ *   On failure (either memory allocation issue in the function itself or an
+ *   error is returned by the init callback itself), returns NULL.
+ */
+__rte_experimental
+void *
+rte_lcore_callback_register(const char *name, rte_lcore_init_cb init,
+	rte_lcore_uninit_cb uninit, void *arg);
+
+/**
+ * Unregister callbacks previously registered with rte_lcore_callback_register.
+ *
+ * This function calls the uninit callback with all initialized lcores.
+ * The callbacks are then removed from the lcore callbacks list.
+ *
+ * @param handle
+ *   The handle pointer returned by a former successful call to
+ *   rte_lcore_callback_register.
+ */
+__rte_experimental
+void
+rte_lcore_callback_unregister(void *handle);
+
 /**
  * Set core affinity of the current thread.
  * Support both EAL and non-EAL thread and update TLS.
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 39c41d445d..aeee7cf431 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -396,6 +396,8 @@ EXPERIMENTAL {
 
 	# added in 20.08
 	__rte_trace_mem_per_thread_free;
+	rte_lcore_callback_register;
+	rte_lcore_callback_unregister;
 	rte_thread_register;
 	rte_thread_unregister;
 };
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v4 8/9] eal: add lcore iterators
  2020-06-26 14:47 ` [dpdk-dev] [PATCH v4 0/9] Register non-EAL threads as lcore David Marchand
                     ` (6 preceding siblings ...)
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 7/9] eal: add lcore init callbacks David Marchand
@ 2020-06-26 14:47   ` David Marchand
  2020-06-30 10:11     ` Olivier Matz
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 9/9] mempool/bucket: handle non-EAL lcores David Marchand
  8 siblings, 1 reply; 126+ messages in thread
From: David Marchand @ 2020-06-26 14:47 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets, Neil Horman

Add a helper to iterate all lcores.
The iterator callback is read-only wrt the lcores list.

Implement a dump function on top of this for debugging.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Changes since v2:
- added rte_lcore_dump calls in unit test, for basic check,

Changes since v1:
- introduced lcore iterators and implemented rte_lcore_dump,
  this iterator mechanism can then be used outside of EAL,

---
 app/test/test_lcores.c                    |  3 +
 lib/librte_eal/common/eal_common_lcore.c  | 77 ++++++++++++++++++++---
 lib/librte_eal/common/eal_common_thread.c | 16 +++--
 lib/librte_eal/common/eal_thread.h        | 13 +++-
 lib/librte_eal/freebsd/eal.c              |  2 +-
 lib/librte_eal/freebsd/eal_thread.c       |  2 +-
 lib/librte_eal/include/rte_lcore.h        | 47 +++++++++++++-
 lib/librte_eal/linux/eal.c                |  2 +-
 lib/librte_eal/linux/eal_thread.c         |  2 +-
 lib/librte_eal/rte_eal_version.map        |  2 +
 10 files changed, 143 insertions(+), 23 deletions(-)

diff --git a/app/test/test_lcores.c b/app/test/test_lcores.c
index e36dceedf9..dd6fa466c8 100644
--- a/app/test/test_lcores.c
+++ b/app/test/test_lcores.c
@@ -302,6 +302,7 @@ test_non_eal_lcores_callback(unsigned int eal_threads_count)
 			l[0].uninit, l[1].uninit);
 		goto cleanup_threads;
 	}
+	rte_lcore_dump(stdout);
 	/* Release all threads, and check their states. */
 	__atomic_store_n(&registered_count, 0, __ATOMIC_RELEASE);
 	ret = 0;
@@ -313,6 +314,7 @@ test_non_eal_lcores_callback(unsigned int eal_threads_count)
 	}
 	if (ret < 0)
 		goto error;
+	rte_lcore_dump(stdout);
 	if (l[0].uninit != 2 || l[1].uninit != 1) {
 		printf("Threads reported having successfully registered and unregistered, but incorrect uninit calls, expected 2, 1, got %u, %u\n",
 			l[0].uninit, l[1].uninit);
@@ -353,6 +355,7 @@ test_lcores(void)
 	}
 	printf("EAL threads count: %u, RTE_MAX_LCORE=%u\n", eal_threads_count,
 		RTE_MAX_LCORE);
+	rte_lcore_dump(stdout);
 
 	if (test_non_eal_lcores(eal_threads_count) < 0)
 		return TEST_FAILED;
diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
index 52c46a4cea..a801cc537a 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -12,7 +12,7 @@
 #include <rte_errno.h>
 #include <rte_lcore.h>
 #include <rte_log.h>
-#include <rte_spinlock.h>
+#include <rte_rwlock.h>
 
 #include "eal_memcfg.h"
 #include "eal_private.h"
@@ -223,7 +223,7 @@ rte_socket_id_by_idx(unsigned int idx)
 	return config->numa_nodes[idx];
 }
 
-static rte_spinlock_t lcore_lock = RTE_SPINLOCK_INITIALIZER;
+static rte_rwlock_t lcore_lock = RTE_RWLOCK_INITIALIZER;
 struct lcore_callback {
 	TAILQ_ENTRY(lcore_callback) next;
 	char *name;
@@ -272,7 +272,7 @@ rte_lcore_callback_register(const char *name, rte_lcore_init_cb init,
 	callback->init = init;
 	callback->uninit = uninit;
 	callback->arg = arg;
-	rte_spinlock_lock(&lcore_lock);
+	rte_rwlock_write_lock(&lcore_lock);
 	if (callback->init == NULL)
 		goto no_init;
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
@@ -298,7 +298,7 @@ rte_lcore_callback_register(const char *name, rte_lcore_init_cb init,
 		callback->name, callback->init == NULL ? "NO " : "",
 		callback->uninit == NULL ? "NO " : "");
 out:
-	rte_spinlock_unlock(&lcore_lock);
+	rte_rwlock_write_unlock(&lcore_lock);
 	return callback;
 }
 
@@ -309,7 +309,7 @@ rte_lcore_callback_unregister(void *handle)
 	struct lcore_callback *callback = handle;
 	unsigned int lcore_id;
 
-	rte_spinlock_lock(&lcore_lock);
+	rte_rwlock_write_lock(&lcore_lock);
 	if (callback->uninit == NULL)
 		goto no_uninit;
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
@@ -319,7 +319,7 @@ rte_lcore_callback_unregister(void *handle)
 	}
 no_uninit:
 	TAILQ_REMOVE(&lcore_callbacks, callback, next);
-	rte_spinlock_unlock(&lcore_lock);
+	rte_rwlock_write_unlock(&lcore_lock);
 	RTE_LOG(DEBUG, EAL, "Unregistered lcore callback %s-%p.\n",
 		callback->name, callback->arg);
 	free(callback->name);
@@ -339,7 +339,7 @@ eal_lcore_non_eal_allocate(void)
 		RTE_LOG(ERR, EAL, "Multiprocess in use, cannot allocate new lcore.\n");
 		return RTE_MAX_LCORE;
 	}
-	rte_spinlock_lock(&lcore_lock);
+	rte_rwlock_write_lock(&lcore_lock);
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
 		if (cfg->lcore_role[lcore_id] != ROLE_OFF)
 			continue;
@@ -370,7 +370,7 @@ eal_lcore_non_eal_allocate(void)
 		goto out;
 	}
 out:
-	rte_spinlock_unlock(&lcore_lock);
+	rte_rwlock_write_unlock(&lcore_lock);
 	return lcore_id;
 }
 
@@ -380,7 +380,7 @@ eal_lcore_non_eal_release(unsigned int lcore_id)
 	struct rte_config *cfg = rte_eal_get_configuration();
 	struct lcore_callback *callback;
 
-	rte_spinlock_lock(&lcore_lock);
+	rte_rwlock_write_lock(&lcore_lock);
 	if (cfg->lcore_role[lcore_id] != ROLE_NON_EAL)
 		goto out;
 	TAILQ_FOREACH(callback, &lcore_callbacks, next)
@@ -388,5 +388,62 @@ eal_lcore_non_eal_release(unsigned int lcore_id)
 	cfg->lcore_role[lcore_id] = ROLE_OFF;
 	cfg->lcore_count--;
 out:
-	rte_spinlock_unlock(&lcore_lock);
+	rte_rwlock_write_unlock(&lcore_lock);
+}
+
+int
+rte_lcore_iterate(rte_lcore_iterate_cb cb, void *arg)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	unsigned int lcore_id;
+	int ret = 0;
+
+	rte_rwlock_read_lock(&lcore_lock);
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (cfg->lcore_role[lcore_id] == ROLE_OFF)
+			continue;
+		ret = cb(lcore_id, arg);
+		if (ret != 0)
+			break;
+	}
+	rte_rwlock_read_unlock(&lcore_lock);
+	return ret;
+}
+
+static int
+lcore_dump_cb(unsigned int lcore_id, void *arg)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
+	const char *role;
+	FILE *f = arg;
+	int ret;
+
+	switch (cfg->lcore_role[lcore_id]) {
+	case ROLE_RTE:
+		role = "RTE";
+		break;
+	case ROLE_SERVICE:
+		role = "SERVICE";
+		break;
+	case ROLE_NON_EAL:
+		role = "NON_EAL";
+		break;
+	default:
+		role = "UNKNOWN";
+		break;
+	}
+
+	ret = eal_thread_dump_affinity(&lcore_config[lcore_id].cpuset, cpuset,
+		sizeof(cpuset));
+	fprintf(f, "lcore %u, socket %u, role %s, cpuset %s%s\n", lcore_id,
+		rte_lcore_to_socket_id(lcore_id), role, cpuset,
+		ret == 0 ? "" : "...");
+	return 0;
+}
+
+void
+rte_lcore_dump(FILE *f)
+{
+	rte_lcore_iterate(lcore_dump_cb, f);
 }
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 1cbddc4b5b..e00374b06f 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -104,17 +104,14 @@ rte_thread_get_affinity(rte_cpuset_t *cpusetp)
 }
 
 int
-eal_thread_dump_affinity(char *str, unsigned size)
+eal_thread_dump_affinity(rte_cpuset_t *cpuset, char *str, unsigned int size)
 {
-	rte_cpuset_t cpuset;
 	unsigned cpu;
 	int ret;
 	unsigned int out = 0;
 
-	rte_thread_get_affinity(&cpuset);
-
 	for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
-		if (!CPU_ISSET(cpu, &cpuset))
+		if (!CPU_ISSET(cpu, cpuset))
 			continue;
 
 		ret = snprintf(str + out,
@@ -137,6 +134,15 @@ eal_thread_dump_affinity(char *str, unsigned size)
 	return ret;
 }
 
+int
+eal_thread_dump_current_affinity(char *str, unsigned int size)
+{
+	rte_cpuset_t cpuset;
+
+	rte_thread_get_affinity(&cpuset);
+	return eal_thread_dump_affinity(&cpuset, str, size);
+}
+
 void
 rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset)
 {
diff --git a/lib/librte_eal/common/eal_thread.h b/lib/librte_eal/common/eal_thread.h
index 4ecd8fd53a..13ec252e01 100644
--- a/lib/librte_eal/common/eal_thread.h
+++ b/lib/librte_eal/common/eal_thread.h
@@ -47,13 +47,15 @@ unsigned eal_cpu_socket_id(unsigned cpu_id);
 #define RTE_CPU_AFFINITY_STR_LEN            256
 
 /**
- * Dump the current pthread cpuset.
+ * Dump the cpuset as a human readable string.
  * This function is private to EAL.
  *
  * Note:
  *   If the dump size is greater than the size of given buffer,
  *   the string will be truncated and with '\0' at the end.
  *
+ * @param cpuset
+ *   The CPU affinity object to dump.
  * @param str
  *   The string buffer the cpuset will dump to.
  * @param size
@@ -62,6 +64,13 @@ unsigned eal_cpu_socket_id(unsigned cpu_id);
  *   0 for success, -1 if truncation happens.
  */
 int
-eal_thread_dump_affinity(char *str, unsigned size);
+eal_thread_dump_affinity(rte_cpuset_t *cpuset, char *str, unsigned int size);
+
+/**
+ * Dump the current thread cpuset.
+ * This is a wrapper on eal_thread_dump_affinity().
+ */
+int
+eal_thread_dump_current_affinity(char *str, unsigned int size);
 
 #endif /* EAL_THREAD_H */
diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
index 32a3d999b8..b93c6fa909 100644
--- a/lib/librte_eal/freebsd/eal.c
+++ b/lib/librte_eal/freebsd/eal.c
@@ -890,7 +890,7 @@ rte_eal_init(int argc, char **argv)
 	rte_thread_init(rte_config.master_lcore,
 		&lcore_config[rte_config.master_lcore].cpuset);
 
-	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
 
 	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
 		rte_config.master_lcore, thread_id, cpuset,
diff --git a/lib/librte_eal/freebsd/eal_thread.c b/lib/librte_eal/freebsd/eal_thread.c
index c1fb8eb2d8..b1a3619f51 100644
--- a/lib/librte_eal/freebsd/eal_thread.c
+++ b/lib/librte_eal/freebsd/eal_thread.c
@@ -92,7 +92,7 @@ eal_thread_loop(__rte_unused void *arg)
 
 	rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
-	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
 		lcore_id, thread_id, cpuset, ret == 0 ? "" : "...");
 
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index 5a2d6ca7af..a9bcbbc25d 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -261,8 +261,8 @@ typedef void (*rte_lcore_uninit_cb)(unsigned int lcore_id, void *arg);
  * If this step succeeds, the callbacks are put in the lcore callbacks list
  * that will get called for each lcore allocation/release.
  *
- * Note: callbacks execution is serialised under a lock protecting the lcores
- * and callbacks list.
+ * Note: callbacks execution is serialised under a write lock protecting the
+ * lcores and callbacks list.
  *
  * @param name
  *   A name serving as a small description for this callback.
@@ -297,6 +297,49 @@ __rte_experimental
 void
 rte_lcore_callback_unregister(void *handle);
 
+/**
+ * Callback prototype for iterating over lcores.
+ *
+ * @param lcore_id
+ *   The lcore to consider.
+ * @param arg
+ *   An opaque pointer coming from the caller.
+ * @return
+ *   - 0 lets the iteration continue.
+ *   - !0 makes the iteration stop.
+ */
+typedef int (*rte_lcore_iterate_cb)(unsigned int lcore_id, void *arg);
+
+/**
+ * Iterate on all active lcores (ROLE_RTE, ROLE_SERVICE and ROLE_NON_EAL).
+ * No modification on the lcore states is allowed in the callback.
+ *
+ * Note: as opposed to init/uninit callbacks, iteration callbacks can be
+ * invoked in parallel as they are run under a read lock protecting the lcores
+ * and callbacks list.
+ *
+ * @param cb
+ *   The callback that gets passed each lcore.
+ * @param arg
+ *   An opaque pointer passed to cb.
+ * @return
+ *   Same return code as the callback last invocation (see rte_lcore_iterate_cb
+ *   description).
+ */
+__rte_experimental
+int
+rte_lcore_iterate(rte_lcore_iterate_cb cb, void *arg);
+
+/**
+ * List all lcores.
+ *
+ * @param f
+ *   The output stream where the dump should be sent.
+ */
+__rte_experimental
+void
+rte_lcore_dump(FILE *f);
+
 /**
  * Set core affinity of the current thread.
  * Support both EAL and non-EAL thread and update TLS.
diff --git a/lib/librte_eal/linux/eal.c b/lib/librte_eal/linux/eal.c
index 1d90d1c0e3..dfa92b11a1 100644
--- a/lib/librte_eal/linux/eal.c
+++ b/lib/librte_eal/linux/eal.c
@@ -1218,7 +1218,7 @@ rte_eal_init(int argc, char **argv)
 	rte_thread_init(rte_config.master_lcore,
 		&lcore_config[rte_config.master_lcore].cpuset);
 
-	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
 	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
 		rte_config.master_lcore, (uintptr_t)thread_id, cpuset,
 		ret == 0 ? "" : "...");
diff --git a/lib/librte_eal/linux/eal_thread.c b/lib/librte_eal/linux/eal_thread.c
index 07aec0c44d..22d9bc8c01 100644
--- a/lib/librte_eal/linux/eal_thread.c
+++ b/lib/librte_eal/linux/eal_thread.c
@@ -92,7 +92,7 @@ eal_thread_loop(__rte_unused void *arg)
 
 	rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
-	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
 		lcore_id, (uintptr_t)thread_id, cpuset, ret == 0 ? "" : "...");
 
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index aeee7cf431..23a1565dfb 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -398,6 +398,8 @@ EXPERIMENTAL {
 	__rte_trace_mem_per_thread_free;
 	rte_lcore_callback_register;
 	rte_lcore_callback_unregister;
+	rte_lcore_dump;
+	rte_lcore_iterate;
 	rte_thread_register;
 	rte_thread_unregister;
 };
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v4 9/9] mempool/bucket: handle non-EAL lcores
  2020-06-26 14:47 ` [dpdk-dev] [PATCH v4 0/9] Register non-EAL threads as lcore David Marchand
                     ` (7 preceding siblings ...)
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 8/9] eal: add lcore iterators David Marchand
@ 2020-06-26 14:47   ` David Marchand
  2020-06-26 14:52     ` Andrew Rybchenko
  8 siblings, 1 reply; 126+ messages in thread
From: David Marchand @ 2020-06-26 14:47 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets, Artem V. Andreev

Convert to new lcore API to support non-EAL lcores.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Changes since v3:
- addressed Andrew comments,

---
 drivers/mempool/bucket/rte_mempool_bucket.c | 130 ++++++++++++--------
 1 file changed, 81 insertions(+), 49 deletions(-)

diff --git a/drivers/mempool/bucket/rte_mempool_bucket.c b/drivers/mempool/bucket/rte_mempool_bucket.c
index 5ce1ef16fb..8b9daa9782 100644
--- a/drivers/mempool/bucket/rte_mempool_bucket.c
+++ b/drivers/mempool/bucket/rte_mempool_bucket.c
@@ -55,6 +55,7 @@ struct bucket_data {
 	struct rte_ring *shared_orphan_ring;
 	struct rte_mempool *pool;
 	unsigned int bucket_mem_size;
+	void *lcore_callback_handle;
 };
 
 static struct bucket_stack *
@@ -345,6 +346,23 @@ bucket_dequeue_contig_blocks(struct rte_mempool *mp, void **first_obj_table,
 	return 0;
 }
 
+struct bucket_count_per_lcore_ctx {
+	const struct bucket_data *bd;
+	unsigned int count;
+};
+
+static int
+bucket_count_per_lcore(unsigned int lcore_id, void *arg)
+{
+	struct bucket_count_per_lcore_ctx *bplc = arg;
+
+	bplc->count += bplc->bd->obj_per_bucket *
+		bplc->bd->buckets[lcore_id]->top;
+	bplc->count +=
+		rte_ring_count(bplc->bd->adoption_buffer_rings[lcore_id]);
+	return 0;
+}
+
 static void
 count_underfilled_buckets(struct rte_mempool *mp,
 			  void *opaque,
@@ -373,23 +391,64 @@ count_underfilled_buckets(struct rte_mempool *mp,
 static unsigned int
 bucket_get_count(const struct rte_mempool *mp)
 {
-	const struct bucket_data *bd = mp->pool_data;
-	unsigned int count =
-		bd->obj_per_bucket * rte_ring_count(bd->shared_bucket_ring) +
-		rte_ring_count(bd->shared_orphan_ring);
-	unsigned int i;
+	struct bucket_count_per_lcore_ctx bplc;
 
-	for (i = 0; i < RTE_MAX_LCORE; i++) {
-		if (!rte_lcore_is_enabled(i))
-			continue;
-		count += bd->obj_per_bucket * bd->buckets[i]->top +
-			rte_ring_count(bd->adoption_buffer_rings[i]);
-	}
+	bplc.bd = mp->pool_data;
+	bplc.count = bplc.bd->obj_per_bucket *
+		rte_ring_count(bplc.bd->shared_bucket_ring);
+	bplc.count += rte_ring_count(bplc.bd->shared_orphan_ring);
 
+	rte_lcore_iterate(bucket_count_per_lcore, &bplc);
 	rte_mempool_mem_iter((struct rte_mempool *)(uintptr_t)mp,
-			     count_underfilled_buckets, &count);
+			     count_underfilled_buckets, &bplc.count);
+
+	return bplc.count;
+}
+
+static int
+bucket_init_per_lcore(unsigned int lcore_id, void *arg)
+{
+	char rg_name[RTE_RING_NAMESIZE];
+	struct bucket_data *bd = arg;
+	struct rte_mempool *mp;
+	int rg_flags;
+	int rc;
+
+	mp = bd->pool;
+	bd->buckets[lcore_id] = bucket_stack_create(mp,
+		mp->size / bd->obj_per_bucket);
+	if (bd->buckets[lcore_id] == NULL)
+		goto error;
+
+	rc = snprintf(rg_name, sizeof(rg_name), RTE_MEMPOOL_MZ_FORMAT ".a%u",
+		mp->name, lcore_id);
+	if (rc < 0 || rc >= (int)sizeof(rg_name))
+		goto error;
+
+	rg_flags = RING_F_SC_DEQ;
+	if (mp->flags & MEMPOOL_F_SP_PUT)
+		rg_flags |= RING_F_SP_ENQ;
+	bd->adoption_buffer_rings[lcore_id] = rte_ring_create(rg_name,
+		rte_align32pow2(mp->size + 1), mp->socket_id, rg_flags);
+	if (bd->adoption_buffer_rings[lcore_id] == NULL)
+		goto error;
 
-	return count;
+	return 0;
+error:
+	rte_free(bd->buckets[lcore_id]);
+	bd->buckets[lcore_id] = NULL;
+	return -1;
+}
+
+static void
+bucket_uninit_per_lcore(unsigned int lcore_id, void *arg)
+{
+	struct bucket_data *bd = arg;
+
+	rte_ring_free(bd->adoption_buffer_rings[lcore_id]);
+	bd->adoption_buffer_rings[lcore_id] = NULL;
+	rte_free(bd->buckets[lcore_id]);
+	bd->buckets[lcore_id] = NULL;
 }
 
 static int
@@ -399,7 +458,6 @@ bucket_alloc(struct rte_mempool *mp)
 	int rc = 0;
 	char rg_name[RTE_RING_NAMESIZE];
 	struct bucket_data *bd;
-	unsigned int i;
 	unsigned int bucket_header_size;
 	size_t pg_sz;
 
@@ -429,36 +487,17 @@ bucket_alloc(struct rte_mempool *mp)
 	/* eventually this should be a tunable parameter */
 	bd->bucket_stack_thresh = (mp->size / bd->obj_per_bucket) * 4 / 3;
 
+	bd->lcore_callback_handle = rte_lcore_callback_register("bucket",
+		bucket_init_per_lcore, bucket_uninit_per_lcore, bd);
+	if (bd->lcore_callback_handle == NULL) {
+		rc = -ENOMEM;
+		goto no_mem_for_stacks;
+	}
+
 	if (mp->flags & MEMPOOL_F_SP_PUT)
 		rg_flags |= RING_F_SP_ENQ;
 	if (mp->flags & MEMPOOL_F_SC_GET)
 		rg_flags |= RING_F_SC_DEQ;
-
-	for (i = 0; i < RTE_MAX_LCORE; i++) {
-		if (!rte_lcore_is_enabled(i))
-			continue;
-		bd->buckets[i] =
-			bucket_stack_create(mp, mp->size / bd->obj_per_bucket);
-		if (bd->buckets[i] == NULL) {
-			rc = -ENOMEM;
-			goto no_mem_for_stacks;
-		}
-		rc = snprintf(rg_name, sizeof(rg_name),
-			      RTE_MEMPOOL_MZ_FORMAT ".a%u", mp->name, i);
-		if (rc < 0 || rc >= (int)sizeof(rg_name)) {
-			rc = -ENAMETOOLONG;
-			goto no_mem_for_stacks;
-		}
-		bd->adoption_buffer_rings[i] =
-			rte_ring_create(rg_name, rte_align32pow2(mp->size + 1),
-					mp->socket_id,
-					rg_flags | RING_F_SC_DEQ);
-		if (bd->adoption_buffer_rings[i] == NULL) {
-			rc = -rte_errno;
-			goto no_mem_for_stacks;
-		}
-	}
-
 	rc = snprintf(rg_name, sizeof(rg_name),
 		      RTE_MEMPOOL_MZ_FORMAT ".0", mp->name);
 	if (rc < 0 || rc >= (int)sizeof(rg_name)) {
@@ -498,11 +537,8 @@ bucket_alloc(struct rte_mempool *mp)
 	rte_ring_free(bd->shared_orphan_ring);
 cannot_create_shared_orphan_ring:
 invalid_shared_orphan_ring:
+	rte_lcore_callback_unregister(bd->lcore_callback_handle);
 no_mem_for_stacks:
-	for (i = 0; i < RTE_MAX_LCORE; i++) {
-		rte_free(bd->buckets[i]);
-		rte_ring_free(bd->adoption_buffer_rings[i]);
-	}
 	rte_free(bd);
 no_mem_for_data:
 	rte_errno = -rc;
@@ -512,16 +548,12 @@ bucket_alloc(struct rte_mempool *mp)
 static void
 bucket_free(struct rte_mempool *mp)
 {
-	unsigned int i;
 	struct bucket_data *bd = mp->pool_data;
 
 	if (bd == NULL)
 		return;
 
-	for (i = 0; i < RTE_MAX_LCORE; i++) {
-		rte_free(bd->buckets[i]);
-		rte_ring_free(bd->adoption_buffer_rings[i]);
-	}
+	rte_lcore_callback_unregister(bd->lcore_callback_handle);
 
 	rte_ring_free(bd->shared_orphan_ring);
 	rte_ring_free(bd->shared_bucket_ring);
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v4 9/9] mempool/bucket: handle non-EAL lcores
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 9/9] mempool/bucket: handle non-EAL lcores David Marchand
@ 2020-06-26 14:52     ` Andrew Rybchenko
  0 siblings, 0 replies; 126+ messages in thread
From: Andrew Rybchenko @ 2020-06-26 14:52 UTC (permalink / raw)
  To: David Marchand, dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, ktraynor, ian.stokes,
	i.maximets, Artem V. Andreev

On 6/26/20 5:47 PM, David Marchand wrote:
> Convert to new lcore API to support non-EAL lcores.
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>

Reviewed-by: Andrew Rybchenko <arybchenko@solarflare.com>



^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v4 4/9] eal: introduce thread uninit helper
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 4/9] eal: introduce thread uninit helper David Marchand
@ 2020-06-26 15:00     ` Jerin Jacob
  2020-06-29  9:07       ` David Marchand
  2020-06-29  8:59     ` [dpdk-dev] [EXT] " Sunil Kumar Kori
  2020-06-30  9:42     ` [dpdk-dev] " Olivier Matz
  2 siblings, 1 reply; 126+ messages in thread
From: Jerin Jacob @ 2020-06-26 15:00 UTC (permalink / raw)
  To: David Marchand
  Cc: dpdk-dev, Richardson, Bruce, Ray Kinsella, Thomas Monjalon,
	Andrew Rybchenko, Kevin Traynor, ian.stokes, i.maximets,
	Jerin Jacob, Sunil Kumar Kori, Neil Horman, Harini Ramakrishnan,
	Omar Cardona, Pallavi Kadam, Ranjit Menon

On Fri, Jun 26, 2020 at 8:18 PM David Marchand
<david.marchand@redhat.com> wrote:
>
> This is a preparation step for dynamically unregistering threads.
>
> Since we explicitly allocate a per thread trace buffer in
> rte_thread_init, add an internal helper to free this buffer.
>
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---
> Note: I preferred renaming the current internal function to free all
> threads trace buffers (new name trace_mem_free()) and reuse the previous
> name (trace_mem_per_thread_free()) when freeing this buffer for a given
> thread.
>
> Changes since v2:
> - added missing stub for windows tracing support,
> - moved free symbol to exported (experimental) ABI as a counterpart of
>   the alloc symbol we already had,
>
> Changes since v1:
> - rebased on master, removed Windows workaround wrt traces support,

> +/**
> + * Uninitialize per-lcore info for current thread.
> + */
> +void rte_thread_uninit(void);
> +

Is it a public API? I guess not as it not adding in .map file.
If it is private API, Is n't it better to change as eal_thread_ like
another private API in eal_thread.h?

^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [EXT] [PATCH v4 4/9] eal: introduce thread uninit helper
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 4/9] eal: introduce thread uninit helper David Marchand
  2020-06-26 15:00     ` Jerin Jacob
@ 2020-06-29  8:59     ` Sunil Kumar Kori
  2020-06-29  9:25       ` David Marchand
  2020-06-30  9:42     ` [dpdk-dev] " Olivier Matz
  2 siblings, 1 reply; 126+ messages in thread
From: Sunil Kumar Kori @ 2020-06-29  8:59 UTC (permalink / raw)
  To: David Marchand, dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets, Jerin Jacob Kollanukkaran, Neil Horman,
	Harini Ramakrishnan, Omar Cardona, Pallavi Kadam, Ranjit Menon

>-----Original Message-----
>From: David Marchand <david.marchand@redhat.com>
>Sent: Friday, June 26, 2020 8:18 PM
>To: dev@dpdk.org
>Cc: jerinjacobk@gmail.com; bruce.richardson@intel.com; mdr@ashroe.eu;
>thomas@monjalon.net; arybchenko@solarflare.com; ktraynor@redhat.com;
>ian.stokes@intel.com; i.maximets@ovn.org; Jerin Jacob Kollanukkaran
><jerinj@marvell.com>; Sunil Kumar Kori <skori@marvell.com>; Neil Horman
><nhorman@tuxdriver.com>; Harini Ramakrishnan
><harini.ramakrishnan@microsoft.com>; Omar Cardona
><ocardona@microsoft.com>; Pallavi Kadam <pallavi.kadam@intel.com>;
>Ranjit Menon <ranjit.menon@intel.com>
>Subject: [EXT] [PATCH v4 4/9] eal: introduce thread uninit helper
>
>External Email
>
>----------------------------------------------------------------------
>This is a preparation step for dynamically unregistering threads.
>
>Since we explicitly allocate a per thread trace buffer in rte_thread_init, add an
>internal helper to free this buffer.
>
>Signed-off-by: David Marchand <david.marchand@redhat.com>
>---
>Note: I preferred renaming the current internal function to free all threads
>trace buffers (new name trace_mem_free()) and reuse the previous name
>(trace_mem_per_thread_free()) when freeing this buffer for a given thread.
>
>Changes since v2:
>- added missing stub for windows tracing support,
>- moved free symbol to exported (experimental) ABI as a counterpart of
>  the alloc symbol we already had,
>
>Changes since v1:
>- rebased on master, removed Windows workaround wrt traces support,
>
>---
> lib/librte_eal/common/eal_common_thread.c |  9 ++++
>lib/librte_eal/common/eal_common_trace.c  | 51 +++++++++++++++++++----
> lib/librte_eal/common/eal_thread.h        |  5 +++
> lib/librte_eal/common/eal_trace.h         |  2 +-
> lib/librte_eal/include/rte_trace_point.h  |  9 ++++
> lib/librte_eal/rte_eal_version.map        |  3 ++
> lib/librte_eal/windows/eal.c              |  5 +++
> 7 files changed, 75 insertions(+), 9 deletions(-)
>
>diff --git a/lib/librte_eal/common/eal_common_thread.c
>b/lib/librte_eal/common/eal_common_thread.c
>index afb30236c5..3b30cc99d9 100644
>--- a/lib/librte_eal/common/eal_common_thread.c
>+++ b/lib/librte_eal/common/eal_common_thread.c
>@@ -20,6 +20,7 @@
> #include "eal_internal_cfg.h"
> #include "eal_private.h"
> #include "eal_thread.h"
>+#include "eal_trace.h"
>
> RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
>RTE_DEFINE_PER_LCORE(int, _thread_id) = -1; @@ -161,6 +162,14 @@
>rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset)
> 	__rte_trace_mem_per_thread_alloc();
> }
>
>+void
>+rte_thread_uninit(void)
>+{

Need to check whether trace is enabled or not similar to trace_mem_free(). 
>+	__rte_trace_mem_per_thread_free();
>+
>+	RTE_PER_LCORE(_lcore_id) = LCORE_ID_ANY; }
>+
> struct rte_thread_ctrl_params {
> 	void *(*start_routine)(void *);
> 	void *arg;

[snipped]

>2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v4 4/9] eal: introduce thread uninit helper
  2020-06-26 15:00     ` Jerin Jacob
@ 2020-06-29  9:07       ` David Marchand
  0 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-06-29  9:07 UTC (permalink / raw)
  To: Jerin Jacob
  Cc: dpdk-dev, Richardson, Bruce, Ray Kinsella, Thomas Monjalon,
	Andrew Rybchenko, Kevin Traynor, Ian Stokes, Ilya Maximets,
	Jerin Jacob, Sunil Kumar Kori, Neil Horman, Harini Ramakrishnan,
	Omar Cardona, Pallavi Kadam, Ranjit Menon

On Fri, Jun 26, 2020 at 5:00 PM Jerin Jacob <jerinjacobk@gmail.com> wrote:
>
> On Fri, Jun 26, 2020 at 8:18 PM David Marchand
> <david.marchand@redhat.com> wrote:
> >
> > This is a preparation step for dynamically unregistering threads.
> >
> > Since we explicitly allocate a per thread trace buffer in
> > rte_thread_init, add an internal helper to free this buffer.
> >
> > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > ---
> > Note: I preferred renaming the current internal function to free all
> > threads trace buffers (new name trace_mem_free()) and reuse the previous
> > name (trace_mem_per_thread_free()) when freeing this buffer for a given
> > thread.
> >
> > Changes since v2:
> > - added missing stub for windows tracing support,
> > - moved free symbol to exported (experimental) ABI as a counterpart of
> >   the alloc symbol we already had,
> >
> > Changes since v1:
> > - rebased on master, removed Windows workaround wrt traces support,
>
> > +/**
> > + * Uninitialize per-lcore info for current thread.
> > + */
> > +void rte_thread_uninit(void);
> > +
>
> Is it a public API? I guess not as it not adding in .map file.
> If it is private API, Is n't it better to change as eal_thread_ like
> another private API in eal_thread.h?

Before this series, we have:
- rte_thread_ public APIs for both EAL and non-EAL threads (declared
in rte_eal_interrupts.h and rte_lcore.h),
- eal_thread_ internal APIs that apply to EAL threads (declared in
eal_thread.h),

I guess __rte_thread_ could do the trick and I will move this to eal_private.h.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [EXT] [PATCH v4 4/9] eal: introduce thread uninit helper
  2020-06-29  8:59     ` [dpdk-dev] [EXT] " Sunil Kumar Kori
@ 2020-06-29  9:25       ` David Marchand
  0 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-06-29  9:25 UTC (permalink / raw)
  To: Sunil Kumar Kori
  Cc: dev, jerinjacobk, bruce.richardson, mdr, thomas, arybchenko,
	ktraynor, ian.stokes, i.maximets, Jerin Jacob Kollanukkaran,
	Neil Horman, Harini Ramakrishnan, Omar Cardona, Pallavi Kadam,
	Ranjit Menon

On Mon, Jun 29, 2020 at 10:59 AM Sunil Kumar Kori <skori@marvell.com> wrote:
> >diff --git a/lib/librte_eal/common/eal_common_thread.c
> >b/lib/librte_eal/common/eal_common_thread.c
> >index afb30236c5..3b30cc99d9 100644
> >--- a/lib/librte_eal/common/eal_common_thread.c
> >+++ b/lib/librte_eal/common/eal_common_thread.c
> >@@ -20,6 +20,7 @@
> > #include "eal_internal_cfg.h"
> > #include "eal_private.h"
> > #include "eal_thread.h"
> >+#include "eal_trace.h"
> >
> > RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
> >RTE_DEFINE_PER_LCORE(int, _thread_id) = -1; @@ -161,6 +162,14 @@
> >rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset)
> >       __rte_trace_mem_per_thread_alloc();
> > }
> >
> >+void
> >+rte_thread_uninit(void)
> >+{
>
> Need to check whether trace is enabled or not similar to trace_mem_free().

The internal trace api abstracts this.
It should be in the trace code itself, in the same way it is done for
allocating the trace buffer.

But is an additional check needed?
The check on the trace buffer being initialised in
__rte_trace_mem_per_thread_free should be enough.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v4 7/9] eal: add lcore init callbacks
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 7/9] eal: add lcore init callbacks David Marchand
@ 2020-06-29 12:46     ` Ananyev, Konstantin
  2020-06-30 10:09     ` Olivier Matz
  2020-06-30 10:15     ` Olivier Matz
  2 siblings, 0 replies; 126+ messages in thread
From: Ananyev, Konstantin @ 2020-06-29 12:46 UTC (permalink / raw)
  To: David Marchand, dev
  Cc: jerinjacobk, Richardson, Bruce, mdr, thomas, arybchenko,
	ktraynor, Stokes, Ian, i.maximets, Neil Horman


 
> diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
> index a61824a779..52c46a4cea 100644
> --- a/lib/librte_eal/common/eal_common_lcore.c
> +++ b/lib/librte_eal/common/eal_common_lcore.c
> @@ -224,11 +224,114 @@ rte_socket_id_by_idx(unsigned int idx)
>  }
> 
>  static rte_spinlock_t lcore_lock = RTE_SPINLOCK_INITIALIZER;
> +struct lcore_callback {
> +	TAILQ_ENTRY(lcore_callback) next;
> +	char *name;
> +	rte_lcore_init_cb init;
> +	rte_lcore_uninit_cb uninit;
> +	void *arg;
> +};
> +static TAILQ_HEAD(lcore_callbacks_head, lcore_callback) lcore_callbacks =
> +	TAILQ_HEAD_INITIALIZER(lcore_callbacks);
> +
> +static int
> +callback_init(struct lcore_callback *callback, unsigned int lcore_id)
> +{
> +	if (callback->init == NULL)
> +		return 0;
> +	RTE_LOG(DEBUG, EAL, "Call init for lcore callback %s, lcore_id %u\n",
> +		callback->name, lcore_id);
> +	return callback->init(lcore_id, callback->arg);
> +}
> +
> +static void
> +callback_uninit(struct lcore_callback *callback, unsigned int lcore_id)
> +{
> +	if (callback->uninit == NULL)
> +		return;
> +	RTE_LOG(DEBUG, EAL, "Call uninit for lcore callback %s, lcore_id %u\n",
> +		callback->name, lcore_id);
> +	callback->uninit(lcore_id, callback->arg);
> +}
> +
> +void *
> +rte_lcore_callback_register(const char *name, rte_lcore_init_cb init,
> +	rte_lcore_uninit_cb uninit, void *arg)
> +{
> +	struct rte_config *cfg = rte_eal_get_configuration();
> +	struct lcore_callback *callback;
> +	unsigned int lcore_id;
> +
> +	callback = calloc(1, sizeof(*callback));
> +	if (callback == NULL)
> +		return NULL;
> +	if (asprintf(&callback->name, "%s-%p", name, arg) == -1) {

If name is null, I presume asprintf() would segfault, right?
As this is a public (and control-plane) API, I think it is worth to
add formal checking for input parameters.
Same for other new public functions. 

> +		free(callback);
> +		return NULL;
> +	}
> +	callback->init = init;
> +	callback->uninit = uninit;
> +	callback->arg = arg;
> +	rte_spinlock_lock(&lcore_lock);
> +	if (callback->init == NULL)
> +		goto no_init;
> +	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> +		if (cfg->lcore_role[lcore_id] == ROLE_OFF)
> +			continue;
> +		if (callback_init(callback, lcore_id) == 0)
> +			continue;
> +		/* Callback refused init for this lcore, uninitialize all
> +		 * previous lcore.
> +		 */
> +		for (; lcore_id != 0; lcore_id--) {
> +			if (cfg->lcore_role[lcore_id - 1] == ROLE_OFF)
> +				continue;
> +			callback_uninit(callback, lcore_id - 1);
> +		}

Just as a nit, you can do here instead:
while (lcore_id-- != 0) { if (cfg->lcore_role[lcore_id] == ROLE_OFF) ...}
To avoid 'lcore_id -1' inside loop body.
Might look a bit nicer.

> +		free(callback);

I think you forgot:
free(callback->name);
here.
Might be worth to have a separate helper function around these 2 frees.

> +		callback = NULL;
> +		goto out;
> +	}
> +no_init:
> +	TAILQ_INSERT_TAIL(&lcore_callbacks, callback, next);
> +	RTE_LOG(DEBUG, EAL, "Registered new lcore callback %s (%sinit, %suninit).\n",
> +		callback->name, callback->init == NULL ? "NO " : "",
> +		callback->uninit == NULL ? "NO " : "");
> +out:
> +	rte_spinlock_unlock(&lcore_lock);
> +	return callback;
> +}
> +
> +void
> +rte_lcore_callback_unregister(void *handle)
> +{
> +	struct rte_config *cfg = rte_eal_get_configuration();
> +	struct lcore_callback *callback = handle;
> +	unsigned int lcore_id;
> +
> +	rte_spinlock_lock(&lcore_lock);
> +	if (callback->uninit == NULL)
> +		goto no_uninit;
> +	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> +		if (cfg->lcore_role[lcore_id] == ROLE_OFF)
> +			continue;
> +		callback_uninit(callback, lcore_id);
> +	}
> +no_uninit:
> +	TAILQ_REMOVE(&lcore_callbacks, callback, next);
> +	rte_spinlock_unlock(&lcore_lock);
> +	RTE_LOG(DEBUG, EAL, "Unregistered lcore callback %s-%p.\n",
> +		callback->name, callback->arg);
> +	free(callback->name);
> +	free(callback);
> +}
> 
>  unsigned int
>  eal_lcore_non_eal_allocate(void)
>  {
>  	struct rte_config *cfg = rte_eal_get_configuration();
> +	struct lcore_callback *callback;
> +	struct lcore_callback *prev;
>  	unsigned int lcore_id;
> 
>  	if (cfg->process_type == RTE_PROC_SECONDARY ||
> @@ -244,8 +347,29 @@ eal_lcore_non_eal_allocate(void)
>  		cfg->lcore_count++;
>  		break;
>  	}
> -	if (lcore_id == RTE_MAX_LCORE)
> +	if (lcore_id == RTE_MAX_LCORE) {
>  		RTE_LOG(DEBUG, EAL, "No lcore available.\n");
> +		goto out;
> +	}
> +	TAILQ_FOREACH(callback, &lcore_callbacks, next) {
> +		if (callback_init(callback, lcore_id) == 0)
> +			continue;
> +		/* Callback refused init for this lcore, call uninit for all
> +		 * previous callbacks.
> +		 */
> +		prev = TAILQ_PREV(callback, lcore_callbacks_head, next);
> +		while (prev != NULL) {
> +			callback_uninit(prev, lcore_id);
> +			prev = TAILQ_PREV(prev, lcore_callbacks_head, next);
> +		}
> +		RTE_LOG(DEBUG, EAL, "Initialization refused for lcore %u.\n",
> +			lcore_id);
> +		cfg->lcore_role[lcore_id] = ROLE_OFF;
> +		cfg->lcore_count--;
> +		lcore_id = RTE_MAX_LCORE;
> +		goto out;
> +	}
> +out:
>  	rte_spinlock_unlock(&lcore_lock);
>  	return lcore_id;
>  }
> @@ -254,11 +378,15 @@ void
>  eal_lcore_non_eal_release(unsigned int lcore_id)
>  {
>  	struct rte_config *cfg = rte_eal_get_configuration();
> +	struct lcore_callback *callback;
> 
>  	rte_spinlock_lock(&lcore_lock);
> -	if (cfg->lcore_role[lcore_id] == ROLE_NON_EAL) {
> -		cfg->lcore_role[lcore_id] = ROLE_OFF;
> -		cfg->lcore_count--;
> -	}
> +	if (cfg->lcore_role[lcore_id] != ROLE_NON_EAL)
> +		goto out;
> +	TAILQ_FOREACH(callback, &lcore_callbacks, next)
> +		callback_uninit(callback, lcore_id);
> +	cfg->lcore_role[lcore_id] = ROLE_OFF;
> +	cfg->lcore_count--;
> +out:
>  	rte_spinlock_unlock(&lcore_lock);
>  }

^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v4 6/9] eal: register non-EAL threads as lcores
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 6/9] eal: register non-EAL threads as lcores David Marchand
@ 2020-06-29 14:27     ` Ananyev, Konstantin
  2020-06-30 10:07     ` Olivier Matz
  1 sibling, 0 replies; 126+ messages in thread
From: Ananyev, Konstantin @ 2020-06-29 14:27 UTC (permalink / raw)
  To: David Marchand, dev
  Cc: jerinjacobk, Richardson, Bruce, mdr, thomas, arybchenko,
	ktraynor, Stokes, Ian, i.maximets, Mcnamara, John, Kovacevic,
	Marko, Burakov, Anatoly, Olivier Matz, Neil Horman

> diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
> index 86d32a3dd7..a61824a779 100644
> --- a/lib/librte_eal/common/eal_common_lcore.c
> +++ b/lib/librte_eal/common/eal_common_lcore.c
> @@ -6,13 +6,15 @@
>  #include <limits.h>
>  #include <string.h>
> 
> -#include <rte_errno.h>
> -#include <rte_log.h>
> -#include <rte_eal.h>
> -#include <rte_lcore.h>
>  #include <rte_common.h>
>  #include <rte_debug.h>
> +#include <rte_eal.h>
> +#include <rte_errno.h>
> +#include <rte_lcore.h>
> +#include <rte_log.h>
> +#include <rte_spinlock.h>
> 
> +#include "eal_memcfg.h"
>  #include "eal_private.h"
>  #include "eal_thread.h"
> 
> @@ -220,3 +222,43 @@ rte_socket_id_by_idx(unsigned int idx)
>  	}
>  	return config->numa_nodes[idx];
>  }
> +
> +static rte_spinlock_t lcore_lock = RTE_SPINLOCK_INITIALIZER;
> +
> +unsigned int
> +eal_lcore_non_eal_allocate(void)
> +{
> +	struct rte_config *cfg = rte_eal_get_configuration();
> +	unsigned int lcore_id;
> +
> +	if (cfg->process_type == RTE_PROC_SECONDARY ||
> +			!eal_mcfg_forbid_multiprocess()) {
> +		RTE_LOG(ERR, EAL, "Multiprocess in use, cannot allocate new lcore.\n");
> +		return RTE_MAX_LCORE;
> +	}
> +	rte_spinlock_lock(&lcore_lock);
> +	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> +		if (cfg->lcore_role[lcore_id] != ROLE_OFF)
> +			continue;
> +		cfg->lcore_role[lcore_id] = ROLE_NON_EAL;
> +		cfg->lcore_count++;
> +		break;
> +	}
> +	if (lcore_id == RTE_MAX_LCORE)
> +		RTE_LOG(DEBUG, EAL, "No lcore available.\n");
> +	rte_spinlock_unlock(&lcore_lock);
> +	return lcore_id;
> +}
> +
> +void
> +eal_lcore_non_eal_release(unsigned int lcore_id)
> +{
> +	struct rte_config *cfg = rte_eal_get_configuration();
> +
> +	rte_spinlock_lock(&lcore_lock);
> +	if (cfg->lcore_role[lcore_id] == ROLE_NON_EAL) {
> +		cfg->lcore_role[lcore_id] = ROLE_OFF;
> +		cfg->lcore_count--;
> +	}
> +	rte_spinlock_unlock(&lcore_lock);
> +}
> diff --git a/lib/librte_eal/common/eal_common_mcfg.c b/lib/librte_eal/common/eal_common_mcfg.c
> index 49d3ed0ce5..5b42d454e2 100644
> --- a/lib/librte_eal/common/eal_common_mcfg.c
> +++ b/lib/librte_eal/common/eal_common_mcfg.c
> @@ -44,6 +44,42 @@ eal_mcfg_check_version(void)
>  	return 0;
>  }
> 
> +enum mp_status {
> +	MP_UNKNOWN,
> +	MP_FORBIDDEN,
> +	MP_ENABLED,
> +};
> +
> +static bool
> +eal_mcfg_set_mp_status(enum mp_status status)
> +{
> +	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
> +	uint8_t expected;
> +	uint8_t desired;
> +
> +	expected = MP_UNKNOWN;
> +	desired = status;
> +	if (__atomic_compare_exchange_n(&mcfg->mp_status, &expected, desired,
> +			false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
> +		return true;
> +
> +	return __atomic_load_n(&mcfg->mp_status, __ATOMIC_RELAXED) == desired;
> +}
> +
> +bool
> +eal_mcfg_forbid_multiprocess(void)
> +{
> +	assert(rte_eal_get_configuration()->process_type == RTE_PROC_PRIMARY);
> +	return eal_mcfg_set_mp_status(MP_FORBIDDEN);
> +}
> +
> +bool
> +eal_mcfg_enable_multiprocess(void)
> +{
> +	assert(rte_eal_get_configuration()->process_type == RTE_PROC_SECONDARY);
> +	return eal_mcfg_set_mp_status(MP_ENABLED);
> +}

I still don't think it is a good idea to allow to change primary proc behaviour
(allow/forbid secondary procs to attach) on the fly.
Imagine the situation - there is a primary proc (supposed to run forever)
that does  rte_thread_register/rte_thread_unregister during its lifetime.
Plus from time to time user runs some secondary process to collect stats/debug
the primary one (proc-info or so).
Now behaviour of such system will be completely non-deterministic:
In some runs primary proc will do rte_thread_register() first, and then secondary
proc will be never able to attach.
In other cases - secondary will win the race, and then for primary 
eal_lcore_non_eal_allocate() will always fail.
Which means different behaviour, significantly varying performance, etc.

I am not big fun to introduce such workaround at all, but at least startup flag,
will guarantee consistent behaviour: secondary proc will always fail to attach
and  eal_lcore_non_eal_allocate() will always succeed
(as long as there are free lcore_ids off-course).

From your previous mail:
> A EAL flag is a stable API from the start, as there is nothing
> describing how we can remove one.
> So a new EAL flag for an experimental API/feature seems contradictory.

Hm, yes there is a gap, but why eal flag can't also be an experimental one?
What will be the difference between flag and API call here?
We can still reserve the right to remove/change it at any time.

As another thought about startup parameters -
would it make sense to have new one: --lcore-allow-list=...?
That would limit lcore_ids available for the process.
Without this new parameter specified -
lcore_allowed_list would be equal to startup lcore list (static ones),
and no dynamic lcore allocations will be allowed.
As an example:
dpdk_app --lcores=6,7 --lcore-allow=0-100
will reserve lcore_ids 6,7 at startup (same as we do now),
and leave [0-5] and [8-100] available for dynamic usage.
 
>  void
>  eal_mcfg_update_internal(void)
>  {
> diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
> index a7ae0691bf..1cbddc4b5b 100644
> --- a/lib/librte_eal/common/eal_common_thread.c
> +++ b/lib/librte_eal/common/eal_common_thread.c
> @@ -236,3 +236,36 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
>  	pthread_join(*thread, NULL);
>  	return -ret;
>  }
> +
> +void
> +rte_thread_register(void)
> +{
> +	unsigned int lcore_id;
> +	rte_cpuset_t cpuset;
> +
> +	/* EAL init flushes all lcores, we can't register before. */
> +	assert(internal_config.init_complete == 1);
> +	if (pthread_getaffinity_np(pthread_self(), sizeof(cpuset),
> +			&cpuset) != 0)
> +		CPU_ZERO(&cpuset);
> +	lcore_id = eal_lcore_non_eal_allocate();
> +	if (lcore_id >= RTE_MAX_LCORE)
> +		lcore_id = LCORE_ID_ANY;
> +	rte_thread_init(lcore_id, &cpuset);
> +	if (lcore_id != LCORE_ID_ANY)
> +		RTE_LOG(DEBUG, EAL, "Registered non-EAL thread as lcore %u.\n",
> +			lcore_id);
> +}
> +
> +void
> +rte_thread_unregister(void)
> +{
> +	unsigned int lcore_id = rte_lcore_id();
> +
> +	if (lcore_id != LCORE_ID_ANY)
> +		eal_lcore_non_eal_release(lcore_id);
> +	rte_thread_uninit();
> +	if (lcore_id != LCORE_ID_ANY)
> +		RTE_LOG(DEBUG, EAL, "Unregistered non-EAL thread (was lcore %u).\n",
> +			lcore_id);
> +}
> diff --git a/lib/librte_eal/common/eal_memcfg.h b/lib/librte_eal/common/eal_memcfg.h
> index 583fcb5953..340e523c6a 100644
> --- a/lib/librte_eal/common/eal_memcfg.h
> +++ b/lib/librte_eal/common/eal_memcfg.h
> @@ -41,6 +41,8 @@ struct rte_mem_config {
>  	rte_rwlock_t memory_hotplug_lock;
>  	/**< Indicates whether memory hotplug request is in progress. */
> 
> +	uint8_t mp_status; /**< Indicates whether multiprocess can be used. */
> +
>  	/* memory segments and zones */
>  	struct rte_fbarray memzones; /**< Memzone descriptors. */
> 
> @@ -91,6 +93,14 @@ eal_mcfg_wait_complete(void);
>  int
>  eal_mcfg_check_version(void);
> 
> +/* mark primary process as not supporting multi-process. */
> +bool
> +eal_mcfg_forbid_multiprocess(void);
> +
> +/* instruct primary process that a secondary process attached once. */
> +bool
> +eal_mcfg_enable_multiprocess(void);
> +
>  /* set mem config as complete */
>  void
>  eal_mcfg_complete(void);
> diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
> index 0592fcd694..73238ff157 100644
> --- a/lib/librte_eal/common/eal_private.h
> +++ b/lib/librte_eal/common/eal_private.h
> @@ -396,6 +396,24 @@ uint64_t get_tsc_freq(void);
>   */
>  uint64_t get_tsc_freq_arch(void);
> 
> +/**
> + * Allocate a free lcore to associate to a non-EAL thread.
> + *
> + * @return
> + *   - the id of a lcore with role ROLE_NON_EAL on success.
> + *   - RTE_MAX_LCORE if none was available.
> + */
> +unsigned int eal_lcore_non_eal_allocate(void);
> +
> +/**
> + * Release the lcore used by a non-EAL thread.
> + * Counterpart of eal_lcore_non_eal_allocate().
> + *
> + * @param lcore_id
> + *   The lcore with role ROLE_NON_EAL to release.
> + */
> +void eal_lcore_non_eal_release(unsigned int lcore_id);
> +
>  /**
>   * Prepare physical memory mapping
>   * i.e. hugepages on Linux and
> diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
> index 13e5de006f..32a3d999b8 100644
> --- a/lib/librte_eal/freebsd/eal.c
> +++ b/lib/librte_eal/freebsd/eal.c
> @@ -424,6 +424,10 @@ rte_config_init(void)
>  		}
>  		if (rte_eal_config_reattach() < 0)
>  			return -1;
> +		if (!eal_mcfg_enable_multiprocess()) {
> +			RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n");
> +			return -1;
> +		}
>  		eal_mcfg_update_internal();
>  		break;
>  	case RTE_PROC_AUTO:
> diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
> index 3968c40693..43747e88df 100644
> --- a/lib/librte_eal/include/rte_lcore.h
> +++ b/lib/librte_eal/include/rte_lcore.h
> @@ -31,6 +31,7 @@ enum rte_lcore_role_t {
>  	ROLE_RTE,
>  	ROLE_OFF,
>  	ROLE_SERVICE,
> +	ROLE_NON_EAL,
>  };
> 
>  /**
> @@ -67,7 +68,8 @@ rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
>   *   to run threads with lcore IDs 0, 1, 2 and 3 on physical core 10..
>   *
>   * @return
> - *  Logical core ID (in EAL thread) or LCORE_ID_ANY (in non-EAL thread)
> + *  Logical core ID (in EAL thread or registered non-EAL thread) or
> + *  LCORE_ID_ANY (in unregistered non-EAL thread)
>   */
>  static inline unsigned
>  rte_lcore_id(void)
> @@ -279,6 +281,27 @@ int rte_thread_setname(pthread_t id, const char *name);
>  __rte_experimental
>  int rte_thread_getname(pthread_t id, char *name, size_t len);
> 
> +/**
> + * Register current non-EAL thread as a lcore.
> + *
> + * @note This API is not compatible with the multi-process feature:
> + * - if a primary process registers a non-EAL thread, then no secondary process
> + *   will initialise.
> + * - if a secondary process initialises successfully, trying to register a
> + *   non-EAL thread from either primary or secondary processes will always end
> + *   up with the thread getting LCORE_ID_ANY as lcore.
> + */
> +__rte_experimental
> +void
> +rte_thread_register(void);
> +
> +/**
> + * Unregister current thread and release lcore if one was associated.
> + */
> +__rte_experimental
> +void
> +rte_thread_unregister(void);
> +
>  /**
>   * Create a control thread.
>   *
> diff --git a/lib/librte_eal/linux/eal.c b/lib/librte_eal/linux/eal.c
> index 8894cea50a..1d90d1c0e3 100644
> --- a/lib/librte_eal/linux/eal.c
> +++ b/lib/librte_eal/linux/eal.c
> @@ -514,6 +514,10 @@ rte_config_init(void)
>  		}
>  		if (rte_eal_config_reattach() < 0)
>  			return -1;
> +		if (!eal_mcfg_enable_multiprocess()) {
> +			RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n");
> +			return -1;
> +		}
>  		eal_mcfg_update_internal();
>  		break;
>  	case RTE_PROC_AUTO:
> diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
> index 5831eea4b0..39c41d445d 100644
> --- a/lib/librte_eal/rte_eal_version.map
> +++ b/lib/librte_eal/rte_eal_version.map
> @@ -396,6 +396,8 @@ EXPERIMENTAL {
> 
>  	# added in 20.08
>  	__rte_trace_mem_per_thread_free;
> +	rte_thread_register;
> +	rte_thread_unregister;
>  };
> 
>  INTERNAL {
> diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
> index 652d19f9f1..9e0ee052b3 100644
> --- a/lib/librte_mempool/rte_mempool.h
> +++ b/lib/librte_mempool/rte_mempool.h
> @@ -28,9 +28,9 @@
>   * rte_mempool_get() or rte_mempool_put() are designed to be called from an EAL
>   * thread due to the internal per-lcore cache. Due to the lack of caching,
>   * rte_mempool_get() or rte_mempool_put() performance will suffer when called
> - * by non-EAL threads. Instead, non-EAL threads should call
> - * rte_mempool_generic_get() or rte_mempool_generic_put() with a user cache
> - * created with rte_mempool_cache_create().
> + * by unregistered non-EAL threads. Instead, unregistered non-EAL threads
> + * should call rte_mempool_generic_get() or rte_mempool_generic_put() with a
> + * user cache created with rte_mempool_cache_create().
>   */
> 
>  #include <stdio.h>
> @@ -1233,7 +1233,7 @@ void rte_mempool_dump(FILE *f, struct rte_mempool *mp);
>  /**
>   * Create a user-owned mempool cache.
>   *
> - * This can be used by non-EAL threads to enable caching when they
> + * This can be used by unregistered non-EAL threads to enable caching when they
>   * interact with a mempool.
>   *
>   * @param size
> @@ -1264,7 +1264,8 @@ rte_mempool_cache_free(struct rte_mempool_cache *cache);
>   * @param lcore_id
>   *   The logical core id.
>   * @return
> - *   A pointer to the mempool cache or NULL if disabled or non-EAL thread.
> + *   A pointer to the mempool cache or NULL if disabled or unregistered non-EAL
> + *   thread.
>   */
>  static __rte_always_inline struct rte_mempool_cache *
>  rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id)
> --
> 2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/9] eal: relocate per thread symbols to common
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 1/9] eal: relocate per thread symbols to common David Marchand
@ 2020-06-30  9:33     ` Olivier Matz
  0 siblings, 0 replies; 126+ messages in thread
From: Olivier Matz @ 2020-06-30  9:33 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, jerinjacobk, bruce.richardson, mdr, thomas, arybchenko,
	ktraynor, ian.stokes, i.maximets, Harini Ramakrishnan,
	Omar Cardona, Pallavi Kadam, Ranjit Menon

On Fri, Jun 26, 2020 at 04:47:28PM +0200, David Marchand wrote:
> We have per lcore thread symbols scattered in OS implementations but
> common code relies on them.
> Move all of them in common.
> 
> RTE_PER_LCORE(_socket_id) and RTE_PER_LCORE(_cpuset) have public
> accessors and are not exported through the library map, they can be
> made static.
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>

Reviewed-by: Olivier Matz <olivier.matz@6wind.com>

^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v4 2/9] eal: fix multiple definition of per lcore thread id
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 2/9] eal: fix multiple definition of per lcore thread id David Marchand
@ 2020-06-30  9:34     ` Olivier Matz
  0 siblings, 0 replies; 126+ messages in thread
From: Olivier Matz @ 2020-06-30  9:34 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, jerinjacobk, bruce.richardson, mdr, thomas, arybchenko,
	ktraynor, ian.stokes, i.maximets, Neil Horman, Cunming Liang,
	Konstantin Ananyev

On Fri, Jun 26, 2020 at 04:47:29PM +0200, David Marchand wrote:
> Because of the inline accessor + static declaration in rte_gettid(),
> we end up with multiple symbols for RTE_PER_LCORE(_thread_id).
> Each compilation unit will pay a cost when accessing this information
> for the first time.
> 
> $ nm build/app/dpdk-testpmd | grep per_lcore__thread_id
> 0000000000000054 d per_lcore__thread_id.5037
> 0000000000000040 d per_lcore__thread_id.5103
> 0000000000000048 d per_lcore__thread_id.5259
> 000000000000004c d per_lcore__thread_id.5259
> 0000000000000044 d per_lcore__thread_id.5933
> 0000000000000058 d per_lcore__thread_id.6261
> 0000000000000050 d per_lcore__thread_id.7378
> 000000000000005c d per_lcore__thread_id.7496
> 000000000000000c d per_lcore__thread_id.8016
> 0000000000000010 d per_lcore__thread_id.8431
> 
> Make it global as part of the DPDK_21 stable ABI.
> 
> Fixes: ef76436c6834 ("eal: get unique thread id")
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> Acked-by: Ray Kinsella <mdr@ashroe.eu>

Reviewed-by: Olivier Matz <olivier.matz@6wind.com>

^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v4 3/9] eal: introduce thread init helper
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 3/9] eal: introduce thread init helper David Marchand
@ 2020-06-30  9:37     ` Olivier Matz
  2020-06-30 12:04       ` David Marchand
  0 siblings, 1 reply; 126+ messages in thread
From: Olivier Matz @ 2020-06-30  9:37 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, jerinjacobk, bruce.richardson, mdr, thomas, arybchenko,
	ktraynor, ian.stokes, i.maximets, Harini Ramakrishnan,
	Omar Cardona, Pallavi Kadam, Ranjit Menon

Hi David,

one minor comment below

On Fri, Jun 26, 2020 at 04:47:30PM +0200, David Marchand wrote:
> Introduce a helper responsible for initialising the per thread context.
> We can then have a unified context for EAL and non-EAL threads and
> remove copy/paste'd OS-specific helpers.
> 
> Per EAL thread CPU affinity setting is separated from the thread init.
> It is to accommodate with Windows EAL where CPU affinity is not set at
> the moment.
> Besides, having affinity set by the master lcore in FreeBSD and Linux
> will make it possible to detect errors rather than panic in the child
> thread. But the cleanup when such an event happens is left for later.
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---
> Changes since v1:
> - rebased on master, removed Windows workarounds wrt gettid and traces
>   support,
> 
> ---
>  lib/librte_eal/common/eal_common_thread.c | 51 +++++++++++++----------
>  lib/librte_eal/common/eal_thread.h        |  8 ++--
>  lib/librte_eal/freebsd/eal.c              | 14 ++++++-
>  lib/librte_eal/freebsd/eal_thread.c       | 32 +-------------
>  lib/librte_eal/linux/eal.c                | 15 ++++++-
>  lib/librte_eal/linux/eal_thread.c         | 32 +-------------
>  lib/librte_eal/windows/eal.c              |  3 +-
>  lib/librte_eal/windows/eal_thread.c       | 10 +----
>  8 files changed, 66 insertions(+), 99 deletions(-)
> 
> diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
> index 280c64bb76..afb30236c5 100644
> --- a/lib/librte_eal/common/eal_common_thread.c
> +++ b/lib/librte_eal/common/eal_common_thread.c
> @@ -71,20 +71,10 @@ eal_cpuset_socket_id(rte_cpuset_t *cpusetp)
>  	return socket_id;
>  }
>  
> -int
> -rte_thread_set_affinity(rte_cpuset_t *cpusetp)
> +static void
> +thread_update_affinity(rte_cpuset_t *cpusetp)
>  {
> -	int s;
> -	unsigned lcore_id;
> -	pthread_t tid;
> -
> -	tid = pthread_self();
> -
> -	s = pthread_setaffinity_np(tid, sizeof(rte_cpuset_t), cpusetp);
> -	if (s != 0) {
> -		RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
> -		return -1;
> -	}
> +	unsigned int lcore_id = rte_lcore_id();
>  
>  	/* store socket_id in TLS for quick access */
>  	RTE_PER_LCORE(_socket_id) =
> @@ -94,14 +84,24 @@ rte_thread_set_affinity(rte_cpuset_t *cpusetp)
>  	memmove(&RTE_PER_LCORE(_cpuset), cpusetp,
>  		sizeof(rte_cpuset_t));
>  
> -	lcore_id = rte_lcore_id();
>  	if (lcore_id != (unsigned)LCORE_ID_ANY) {
>  		/* EAL thread will update lcore_config */
>  		lcore_config[lcore_id].socket_id = RTE_PER_LCORE(_socket_id);
>  		memmove(&lcore_config[lcore_id].cpuset, cpusetp,
>  			sizeof(rte_cpuset_t));
>  	}
> +}
>  
> +int
> +rte_thread_set_affinity(rte_cpuset_t *cpusetp)
> +{
> +	if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
> +			cpusetp) != 0) {
> +		RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
> +		return -1;
> +	}
> +
> +	thread_update_affinity(cpusetp);
>  	return 0;
>  }
>  
> @@ -147,6 +147,19 @@ eal_thread_dump_affinity(char *str, unsigned size)
>  	return ret;
>  }
>  
> +void
> +rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset)
> +{
> +	/* set the lcore ID in per-lcore memory area */
> +	RTE_PER_LCORE(_lcore_id) = lcore_id;
> +
> +	/* acquire system unique id  */
> +	rte_gettid();

If I understand properly, rte_gettid() is now also called for control
thread. I don't think this behavior change can break anything, but it may
be good to highlight it in the commit log.

also, there are 2 spaces before "*/"

^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v4 4/9] eal: introduce thread uninit helper
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 4/9] eal: introduce thread uninit helper David Marchand
  2020-06-26 15:00     ` Jerin Jacob
  2020-06-29  8:59     ` [dpdk-dev] [EXT] " Sunil Kumar Kori
@ 2020-06-30  9:42     ` Olivier Matz
  2020-07-01  8:00       ` David Marchand
  2 siblings, 1 reply; 126+ messages in thread
From: Olivier Matz @ 2020-06-30  9:42 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, jerinjacobk, bruce.richardson, mdr, thomas, arybchenko,
	ktraynor, ian.stokes, i.maximets, Jerin Jacob, Sunil Kumar Kori,
	Neil Horman, Harini Ramakrishnan, Omar Cardona, Pallavi Kadam,
	Ranjit Menon

On Fri, Jun 26, 2020 at 04:47:31PM +0200, David Marchand wrote:
> This is a preparation step for dynamically unregistering threads.
> 
> Since we explicitly allocate a per thread trace buffer in
> rte_thread_init, add an internal helper to free this buffer.
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---
> Note: I preferred renaming the current internal function to free all
> threads trace buffers (new name trace_mem_free()) and reuse the previous
> name (trace_mem_per_thread_free()) when freeing this buffer for a given
> thread.
> 
> Changes since v2:
> - added missing stub for windows tracing support,
> - moved free symbol to exported (experimental) ABI as a counterpart of
>   the alloc symbol we already had,
> 
> Changes since v1:
> - rebased on master, removed Windows workaround wrt traces support,
> 
> ---
>  lib/librte_eal/common/eal_common_thread.c |  9 ++++
>  lib/librte_eal/common/eal_common_trace.c  | 51 +++++++++++++++++++----
>  lib/librte_eal/common/eal_thread.h        |  5 +++
>  lib/librte_eal/common/eal_trace.h         |  2 +-
>  lib/librte_eal/include/rte_trace_point.h  |  9 ++++
>  lib/librte_eal/rte_eal_version.map        |  3 ++
>  lib/librte_eal/windows/eal.c              |  5 +++
>  7 files changed, 75 insertions(+), 9 deletions(-)

[...]

> diff --git a/lib/librte_eal/common/eal_common_trace.c b/lib/librte_eal/common/eal_common_trace.c
> index 875553d7e5..3e620d76ed 100644
> --- a/lib/librte_eal/common/eal_common_trace.c
> +++ b/lib/librte_eal/common/eal_common_trace.c
> @@ -101,7 +101,7 @@ eal_trace_fini(void)
>  {
>  	if (!rte_trace_is_enabled())
>  		return;
> -	trace_mem_per_thread_free();
> +	trace_mem_free();
>  	trace_metadata_destroy();
>  	eal_trace_args_free();
>  }
> @@ -370,24 +370,59 @@ __rte_trace_mem_per_thread_alloc(void)
>  	rte_spinlock_unlock(&trace->lock);
>  }
>  
> +static void
> +trace_mem_per_thread_free_unlocked(struct thread_mem_meta *meta)
> +{
> +	if (meta->area == TRACE_AREA_HUGEPAGE)
> +		eal_free_no_trace(meta->mem);
> +	else if (meta->area == TRACE_AREA_HEAP)
> +		free(meta->mem);
> +}
> +
> +void
> +__rte_trace_mem_per_thread_free(void)
> +{
> +	struct trace *trace = trace_obj_get();
> +	struct __rte_trace_header *header;
> +	uint32_t count;
> +
> +	if (RTE_PER_LCORE(trace_mem) == NULL)
> +		return;
> +
> +	header = RTE_PER_LCORE(trace_mem);

nit:

	header = RTE_PER_LCORE(trace_mem);
	if (header == NULL)
		return;

[...]

> diff --git a/lib/librte_eal/include/rte_trace_point.h b/lib/librte_eal/include/rte_trace_point.h
> index 377c2414aa..686b86fdb1 100644
> --- a/lib/librte_eal/include/rte_trace_point.h
> +++ b/lib/librte_eal/include/rte_trace_point.h
> @@ -230,6 +230,15 @@ __rte_trace_point_fp_is_enabled(void)
>  __rte_experimental
>  void __rte_trace_mem_per_thread_alloc(void);
>  
> +/**
> + * @internal
> + *
> + * Free trace memory buffer per thread.
> + *
> + */
> +__rte_experimental
> +void __rte_trace_mem_per_thread_free(void);

Maybe the doc comment could be reworded a bit
(and the empty line can be removed by the way).

> +
>  /**
>   * @internal
>   *
> diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
> index 0d42d44ce9..5831eea4b0 100644
> --- a/lib/librte_eal/rte_eal_version.map
> +++ b/lib/librte_eal/rte_eal_version.map
> @@ -393,6 +393,9 @@ EXPERIMENTAL {
>  	rte_trace_point_lookup;
>  	rte_trace_regexp;
>  	rte_trace_save;
> +
> +	# added in 20.08
> +	__rte_trace_mem_per_thread_free;

Is it really needed to export this function?


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v4 5/9] eal: move lcore role code
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 5/9] eal: move lcore role code David Marchand
@ 2020-06-30  9:45     ` Olivier Matz
  0 siblings, 0 replies; 126+ messages in thread
From: Olivier Matz @ 2020-06-30  9:45 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, jerinjacobk, bruce.richardson, mdr, thomas, arybchenko,
	ktraynor, ian.stokes, i.maximets

On Fri, Jun 26, 2020 at 04:47:32PM +0200, David Marchand wrote:
> For consistency sake, move all lcore role code in the dedicated
> compilation unit / header.
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>

Reviewed-by: Olivier Matz <olivier.matz@6wind.com>

^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v4 6/9] eal: register non-EAL threads as lcores
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 6/9] eal: register non-EAL threads as lcores David Marchand
  2020-06-29 14:27     ` Ananyev, Konstantin
@ 2020-06-30 10:07     ` Olivier Matz
  2020-07-01  7:13       ` David Marchand
  1 sibling, 1 reply; 126+ messages in thread
From: Olivier Matz @ 2020-06-30 10:07 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, jerinjacobk, bruce.richardson, mdr, thomas, arybchenko,
	ktraynor, ian.stokes, i.maximets, John McNamara, Marko Kovacevic,
	Anatoly Burakov, Neil Horman

On Fri, Jun 26, 2020 at 04:47:33PM +0200, David Marchand wrote:
> DPDK allows calling some part of its API from a non-EAL thread but this
> has some limitations.
> OVS (and other applications) has its own thread management but still
> want to avoid such limitations by hacking RTE_PER_LCORE(_lcore_id) and
> faking EAL threads potentially unknown of some DPDK component.
> 
> Introduce a new API to register non-EAL thread and associate them to a
> free lcore with a new NON_EAL role.
> This role denotes lcores that do not run DPDK mainloop and as such
> prevents use of rte_eal_wait_lcore() and consorts.
> 
> Multiprocess is not supported as the need for cohabitation with this new
> feature is unclear at the moment.
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> Acked-by: Andrew Rybchenko <arybchenko@solarflare.com>
> ---
> Changes since v2:
> - refused multiprocess init once rte_thread_register got called, and
>   vice versa,
> - added warning on multiprocess in rte_thread_register doxygen,
> 
> Changes since v1:
> - moved cleanup on lcore role code in patch 5,
> - added unit test,
> - updated documentation,
> - changed naming from "external thread" to "registered non-EAL thread"
> 
> ---
>  MAINTAINERS                                   |   1 +
>  app/test/Makefile                             |   1 +
>  app/test/autotest_data.py                     |   6 +
>  app/test/meson.build                          |   2 +
>  app/test/test_lcores.c                        | 139 ++++++++++++++++++
>  doc/guides/howto/debug_troubleshoot.rst       |   5 +-
>  .../prog_guide/env_abstraction_layer.rst      |  22 +--
>  doc/guides/prog_guide/mempool_lib.rst         |   2 +-
>  lib/librte_eal/common/eal_common_lcore.c      |  50 ++++++-
>  lib/librte_eal/common/eal_common_mcfg.c       |  36 +++++
>  lib/librte_eal/common/eal_common_thread.c     |  33 +++++
>  lib/librte_eal/common/eal_memcfg.h            |  10 ++
>  lib/librte_eal/common/eal_private.h           |  18 +++
>  lib/librte_eal/freebsd/eal.c                  |   4 +
>  lib/librte_eal/include/rte_lcore.h            |  25 +++-
>  lib/librte_eal/linux/eal.c                    |   4 +
>  lib/librte_eal/rte_eal_version.map            |   2 +
>  lib/librte_mempool/rte_mempool.h              |  11 +-
>  18 files changed, 349 insertions(+), 22 deletions(-)
>  create mode 100644 app/test/test_lcores.c
> 

[...]

> diff --git a/app/test/test_lcores.c b/app/test/test_lcores.c
> new file mode 100644
> index 0000000000..864bcbade7
> --- /dev/null
> +++ b/app/test/test_lcores.c
> @@ -0,0 +1,139 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright (c) 2020 Red Hat, Inc.
> + */
> +
> +#include <pthread.h>
> +#include <string.h>
> +
> +#include <rte_lcore.h>
> +
> +#include "test.h"
> +
> +struct thread_context {
> +	enum { INIT, ERROR, DONE } state;
> +	bool lcore_id_any;
> +	pthread_t id;
> +	unsigned int *registered_count;
> +};
> +static void *thread_loop(void *arg)
> +{

missing an empty line here

> +	struct thread_context *t = arg;
> +	unsigned int lcore_id;
> +
> +	lcore_id = rte_lcore_id();
> +	if (lcore_id != LCORE_ID_ANY) {
> +		printf("Incorrect lcore id for new thread %u\n", lcore_id);
> +		t->state = ERROR;
> +	}
> +	rte_thread_register();
> +	lcore_id = rte_lcore_id();
> +	if ((t->lcore_id_any && lcore_id != LCORE_ID_ANY) ||
> +			(!t->lcore_id_any && lcore_id == LCORE_ID_ANY)) {
> +		printf("Could not register new thread, got %u while %sexpecting %u\n",
> +			lcore_id, t->lcore_id_any ? "" : "not ", LCORE_ID_ANY);
> +		t->state = ERROR;
> +	}

To check if rte_thread_register() succedeed, we need to look at
lcore_id. I wonder if rte_thread_register() shouldn't return the lcore
id on success, and -1 on error (rte_errno could be set to give some
info on the error).

The same could be done for rte_thread_init()

[...]

> diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
> index a7ae0691bf..1cbddc4b5b 100644
> --- a/lib/librte_eal/common/eal_common_thread.c
> +++ b/lib/librte_eal/common/eal_common_thread.c
> @@ -236,3 +236,36 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
>  	pthread_join(*thread, NULL);
>  	return -ret;
>  }
> +
> +void
> +rte_thread_register(void)
> +{
> +	unsigned int lcore_id;
> +	rte_cpuset_t cpuset;
> +
> +	/* EAL init flushes all lcores, we can't register before. */
> +	assert(internal_config.init_complete == 1);
> +	if (pthread_getaffinity_np(pthread_self(), sizeof(cpuset),
> +			&cpuset) != 0)
> +		CPU_ZERO(&cpuset);
> +	lcore_id = eal_lcore_non_eal_allocate();
> +	if (lcore_id >= RTE_MAX_LCORE)
> +		lcore_id = LCORE_ID_ANY;
> +	rte_thread_init(lcore_id, &cpuset);
> +	if (lcore_id != LCORE_ID_ANY)
> +		RTE_LOG(DEBUG, EAL, "Registered non-EAL thread as lcore %u.\n",
> +			lcore_id);
> +}

So, in this case, the affinity of the pthread is kept and saved, in other
words there is no link between the lcore id and the affinity. It means we
are allowing an application to register lcores for dataplane with conflicting
affinities.

I wonder if it could be useful to have an API that automatically sets
the affinity according to the lcore_id. Or a function that creates a
pthread using the specified lcore id, and setting the correct affinity.
I could simplify the work for applications that want to create/destroy
dataplane threads dynamically.

This could be done later however, just an idea.

[...]
> diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
> index 13e5de006f..32a3d999b8 100644
> --- a/lib/librte_eal/freebsd/eal.c
> +++ b/lib/librte_eal/freebsd/eal.c
> @@ -424,6 +424,10 @@ rte_config_init(void)
>  		}
>  		if (rte_eal_config_reattach() < 0)
>  			return -1;
> +		if (!eal_mcfg_enable_multiprocess()) {
> +			RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n");
> +			return -1;
> +		}
>  		eal_mcfg_update_internal();
>  		break;
>  	case RTE_PROC_AUTO:
> diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
> index 3968c40693..43747e88df 100644
> --- a/lib/librte_eal/include/rte_lcore.h
> +++ b/lib/librte_eal/include/rte_lcore.h
> @@ -31,6 +31,7 @@ enum rte_lcore_role_t {
>  	ROLE_RTE,
>  	ROLE_OFF,
>  	ROLE_SERVICE,
> +	ROLE_NON_EAL,
>  };

If find the name ROLE_NON_EAL a bit heavy (this was also my impression
when reading the doc part).

I understand that there are several types of threads:

- eal (pthread created by eal): ROLE_RTE and ROLE_SERVICE
- unregistered (pthread not created by eal, and not registered): ROLE_OFF
  (note that ROLE_OFF also applies for unexistant threads)
- dynamic: pthread not created by eal, but registered

What about using ROLE_DYN ? I'm not sure about this name either, it's just
to open the discussion :)


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v4 7/9] eal: add lcore init callbacks
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 7/9] eal: add lcore init callbacks David Marchand
  2020-06-29 12:46     ` Ananyev, Konstantin
@ 2020-06-30 10:09     ` Olivier Matz
  2020-06-30 10:15     ` Olivier Matz
  2 siblings, 0 replies; 126+ messages in thread
From: Olivier Matz @ 2020-06-30 10:09 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, jerinjacobk, bruce.richardson, mdr, thomas, arybchenko,
	ktraynor, ian.stokes, i.maximets, Neil Horman

On Fri, Jun 26, 2020 at 04:47:34PM +0200, David Marchand wrote:
> DPDK components and applications can have their say when a new lcore is
> initialized. For this, they can register a callback for initializing and
> releasing their private data.
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>

[...]

> +/**
> + * Register callbacks invoked when initializing and uninitializing a lcore.
> + *
> + * This function calls the init callback with all initialized lcores.
> + * Any error reported by the init callback triggers a rollback calling the
> + * uninit callback for each lcore.
> + * If this step succeeds, the callbacks are put in the lcore callbacks list
> + * that will get called for each lcore allocation/release.
> + *
> + * Note: callbacks execution is serialised under a lock protecting the lcores
> + * and callbacks list.
> + *
> + * @param name
> + *   A name serving as a small description for this callback.
> + * @param init
> + *   The callback invoked when a lcore_id is initialized.
> + * @param uninit
> + *   The callback invoked when a lcore_id is uninitialized.

nit: it could be highlighted that init or uninit can be NULL.

> + * @param arg
> + *   An optional argument that gets passed to the callback when it gets
> + *   invoked.
> + * @return
> + *   On success, returns an opaque pointer for the registered object.
> + *   On failure (either memory allocation issue in the function itself or an
> + *   error is returned by the init callback itself), returns NULL.
> + */
> +__rte_experimental
> +void *
> +rte_lcore_callback_register(const char *name, rte_lcore_init_cb init,
> +	rte_lcore_uninit_cb uninit, void *arg);

^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v4 8/9] eal: add lcore iterators
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 8/9] eal: add lcore iterators David Marchand
@ 2020-06-30 10:11     ` Olivier Matz
  0 siblings, 0 replies; 126+ messages in thread
From: Olivier Matz @ 2020-06-30 10:11 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, jerinjacobk, bruce.richardson, mdr, thomas, arybchenko,
	ktraynor, ian.stokes, i.maximets, Neil Horman

On Fri, Jun 26, 2020 at 04:47:35PM +0200, David Marchand wrote:
> Add a helper to iterate all lcores.
> The iterator callback is read-only wrt the lcores list.
> 
> Implement a dump function on top of this for debugging.
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>

Reviewed-by: Olivier Matz <olivier.matz@6wind.com>

^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v4 7/9] eal: add lcore init callbacks
  2020-06-26 14:47   ` [dpdk-dev] [PATCH v4 7/9] eal: add lcore init callbacks David Marchand
  2020-06-29 12:46     ` Ananyev, Konstantin
  2020-06-30 10:09     ` Olivier Matz
@ 2020-06-30 10:15     ` Olivier Matz
  2 siblings, 0 replies; 126+ messages in thread
From: Olivier Matz @ 2020-06-30 10:15 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, jerinjacobk, bruce.richardson, mdr, thomas, arybchenko,
	ktraynor, ian.stokes, i.maximets, Neil Horman

On Fri, Jun 26, 2020 at 04:47:34PM +0200, David Marchand wrote:
> DPDK components and applications can have their say when a new lcore is
> initialized. For this, they can register a callback for initializing and
> releasing their private data.
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>

2 more minor comments.

> diff --git a/app/test/test_lcores.c b/app/test/test_lcores.c
> index 864bcbade7..e36dceedf9 100644
> --- a/app/test/test_lcores.c
> +++ b/app/test/test_lcores.c
> @@ -5,6 +5,7 @@
>  #include <pthread.h>
>  #include <string.h>
>  
> +#include <rte_common.h>
>  #include <rte_lcore.h>
>  
>  #include "test.h"
> @@ -113,6 +114,229 @@ test_non_eal_lcores(unsigned int eal_threads_count)
>  	return ret;
>  }
>  
> +struct limit_lcore_context {
> +	unsigned int init;
> +	unsigned int max;
> +	unsigned int uninit;
> +};
> +static int
> +limit_lcores_init(unsigned int lcore_id __rte_unused, void *arg)
> +{
> +	struct limit_lcore_context *l = arg;
> +
> +	l->init++;
> +	if (l->init > l->max)
> +		return -1;
> +	return 0;
> +}
> +static void
> +limit_lcores_uninit(unsigned int lcore_id __rte_unused, void *arg)
> +{
> +	struct limit_lcore_context *l = arg;
> +
> +	l->uninit++;
> +}

missing empty lines

[...]

> +static int
> +test_non_eal_lcores_callback(unsigned int eal_threads_count)
> +{
> +	struct thread_context thread_contexts[2];
> +	unsigned int non_eal_threads_count;
> +	struct limit_lcore_context l[2];
> +	unsigned int registered_count;
> +	struct thread_context *t;
> +	void *handle[2];
> +	unsigned int i;
> +	int ret;
> +
> +	memset(l, 0, sizeof(l));
> +	handle[0] = handle[1] = NULL;
> +	non_eal_threads_count = 0;
> +	registered_count = 0;
> +

what about initializing it at declaration?

	struct thread_context thread_contexts[2] = {};
	struct limit_lcore_context l[2] = {};
	void *handle[2] = {};


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-06-26 14:43                   ` David Marchand
@ 2020-06-30 10:35                     ` Thomas Monjalon
  2020-06-30 12:07                       ` Ananyev, Konstantin
  0 siblings, 1 reply; 126+ messages in thread
From: Thomas Monjalon @ 2020-06-30 10:35 UTC (permalink / raw)
  To: Ananyev, Konstantin, David Marchand
  Cc: dev, jerinjacobk, Richardson, Bruce, mdr, ktraynor, Stokes, Ian,
	i.maximets, Mcnamara, John, Kovacevic, Marko, Burakov, Anatoly,
	Olivier Matz, Andrew Rybchenko, Neil Horman

26/06/2020 16:43, David Marchand:
> On Wed, Jun 24, 2020 at 1:59 PM Ananyev, Konstantin
> <konstantin.ananyev@intel.com> wrote:
> > > > Do you mean - make this new dynamic-lcore API return an error if callied
> > > > from secondary process?
> > >
> > > Yes, and prohibiting from attaching a secondary process if dynamic
> > > lcore API has been used in primary.
> > > I intend to squash in patch 6:
> > > https://github.com/david-marchand/dpdk/commit/e5861ee734bfe2e4dc23d9b919b0db2a32a58aee
> >
> > But secondary process can attach before lcore_register, so we'll have some sort of inconsistency in behaviour.
> 
> If the developer tries to use both features, he gets an ERROR log in
> the two init path.
> So whatever the order at runtime, we inform the developer (who did not
> read/understand the rte_thread_register() documentation) that what he
> is doing is unsupported.

I agree.
Before this patch, pinning a thread on a random core can
trigger some issues.
After this patch, register an external thread will
take care of logging errors in case of inconsistencies.
So the user will know he is doing something not supported
by the app.

It is an nice improvement.

> > If we really  want to go ahead with such workaround -

It is not a workaround.
It is fixing some old issues and making clear what is really impossible.

> > probably better to introduce explicit EAL flag ( --single-process or so).
> > As Thomas and  Bruce suggested, if I understood them properly.

No I was thinking to maintain the tri-state information:
	- secondary is possible
	- secondary is attached
	- secondary is forbidden

Asking the user to use an option to forbid attaching a secondary process
is the same as telling him it is forbidden.
The error log is enough in my opinion.

> A EAL flag is a stable API from the start, as there is nothing
> describing how we can remove one.
> So a new EAL flag for an experimental API/feature seems contradictory.
> 
> Going with a new features status API... I think it is beyond this series.
> 
> Thomas seems to suggest an automatic resolution when features conflict
> happens.. ?

I suggest allowing the maximum and raise an error when usage conflicts.
It seems this is what you did in v4.

> I'll send the v4, let's discuss it there if you want.



^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v4 3/9] eal: introduce thread init helper
  2020-06-30  9:37     ` Olivier Matz
@ 2020-06-30 12:04       ` David Marchand
  0 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-06-30 12:04 UTC (permalink / raw)
  To: Olivier Matz
  Cc: dev, Jerin Jacob, Bruce Richardson, Ray Kinsella,
	Thomas Monjalon, Andrew Rybchenko, Kevin Traynor, Ian Stokes,
	Ilya Maximets, Harini Ramakrishnan, Omar Cardona, Pallavi Kadam,
	Ranjit Menon

On Tue, Jun 30, 2020 at 11:37 AM Olivier Matz <olivier.matz@6wind.com> wrote:
> >
> > +void
> > +rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset)
> > +{
> > +     /* set the lcore ID in per-lcore memory area */
> > +     RTE_PER_LCORE(_lcore_id) = lcore_id;
> > +
> > +     /* acquire system unique id  */
> > +     rte_gettid();
>
> If I understand properly, rte_gettid() is now also called for control
> thread. I don't think this behavior change can break anything, but it may
> be good to highlight it in the commit log.

Control thread could not use recursive locks before, because this
rte_gettid() was missing.
Worth mentioning yes.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-06-30 10:35                     ` Thomas Monjalon
@ 2020-06-30 12:07                       ` Ananyev, Konstantin
  2020-06-30 12:44                         ` Olivier Matz
  2020-06-30 14:35                         ` Thomas Monjalon
  0 siblings, 2 replies; 126+ messages in thread
From: Ananyev, Konstantin @ 2020-06-30 12:07 UTC (permalink / raw)
  To: Thomas Monjalon, David Marchand
  Cc: dev, jerinjacobk, Richardson, Bruce, mdr, ktraynor, Stokes, Ian,
	i.maximets, Mcnamara, John, Kovacevic, Marko, Burakov, Anatoly,
	Olivier Matz, Andrew Rybchenko, Neil Horman

> 
> 26/06/2020 16:43, David Marchand:
> > On Wed, Jun 24, 2020 at 1:59 PM Ananyev, Konstantin
> > <konstantin.ananyev@intel.com> wrote:
> > > > > Do you mean - make this new dynamic-lcore API return an error if callied
> > > > > from secondary process?
> > > >
> > > > Yes, and prohibiting from attaching a secondary process if dynamic
> > > > lcore API has been used in primary.
> > > > I intend to squash in patch 6:
> > > > https://github.com/david-marchand/dpdk/commit/e5861ee734bfe2e4dc23d9b919b0db2a32a58aee
> > >
> > > But secondary process can attach before lcore_register, so we'll have some sort of inconsistency in behaviour.
> >
> > If the developer tries to use both features, he gets an ERROR log in
> > the two init path.
> > So whatever the order at runtime, we inform the developer (who did not
> > read/understand the rte_thread_register() documentation) that what he
> > is doing is unsupported.
> 
> I agree.
> Before this patch, pinning a thread on a random core can
> trigger some issues.
> After this patch, register an external thread will
> take care of logging errors in case of inconsistencies.
> So the user will know he is doing something not supported
> by the app.

I understand that, and return a meaningful error is definitely
better the silent crash or memory corruption.
The problem with that approach, as I said before, MP group
behaviour becomes non-deterministic. 

> 
> It is an nice improvement.
> 
> > > If we really  want to go ahead with such workaround -
> 
> It is not a workaround.
> It is fixing some old issues and making clear what is really impossible.

The root cause of the problem is in our MP model design decisions:
from one side we treat lcore_id as process local data, from other side
in some shared data-structures we use lcore_id as an index.
I think to fix it properly we need either: 
make lcore_id data shared or stop using lcore_id as an index for shared data. 
So from my perspective this approach is just one of possible workarounds.
BTW, there is nothing wrong to have a workaround for the problem
we are not ready to fix right now.
 
> > > probably better to introduce explicit EAL flag ( --single-process or so).
> > > As Thomas and  Bruce suggested, if I understood them properly.
> 
> No I was thinking to maintain the tri-state information:
> 	- secondary is possible
> 	- secondary is attached
> 	- secondary is forbidden

Ok, then I misunderstood you.
 
> Asking the user to use an option to forbid attaching a secondary process
> is the same as telling him it is forbidden.

I don't think it is the same.
On a live and complex system user can't always predict will the primary proc 
use dynamic lcore and if it will at what particular moment.
Same for secondary process launching - user might never start it,
might start it straight after the primary one,
or might be after several hours. 

> The error log is enough in my opinion.

I think it is better than nothing, but probably not the best one.
Apart from possible non-consistent behaviour, it is quite restrictive:
dynamic lcore_id wouldn't be available on any DPDK MP deployment.
Which is a pity - I think it is a cool and useful feature.
 
What do you guys think about different approach:
introduce new optional EAL parameter to restrict lcore_id
values available for the process.

#let say to start primary proc that can use lcore_id=[0-99] only:
dpdk_primary --lcore-allow=0-99 ... --file-prefix=xz1

#to start secondary one for it with allowed lcore_id=[100-109]:
dpdk_secondary --lcore-allow=100-109 ... --file-prefix=xz1 --proc-type=secondary  
 
It is still a workaround, but that way we don't need to
add any new limitations for dynamic lcores and secondary process usage. 
Now it is up to user to decide would multiple-process use the same shared data
and if so - split lcore_id space properly among them
(same as he has to do now with static lcores).

> > A EAL flag is a stable API from the start, as there is nothing
> > describing how we can remove one.
> > So a new EAL flag for an experimental API/feature seems contradictory.
> >
> > Going with a new features status API... I think it is beyond this series.
> >
> > Thomas seems to suggest an automatic resolution when features conflict
> > happens.. ?
> 
> I suggest allowing the maximum and raise an error when usage conflicts.
> It seems this is what you did in v4.
> 
> > I'll send the v4, let's discuss it there if you want.
> 


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-06-30 12:07                       ` Ananyev, Konstantin
@ 2020-06-30 12:44                         ` Olivier Matz
  2020-06-30 14:37                           ` Thomas Monjalon
  2020-06-30 19:02                           ` Ananyev, Konstantin
  2020-06-30 14:35                         ` Thomas Monjalon
  1 sibling, 2 replies; 126+ messages in thread
From: Olivier Matz @ 2020-06-30 12:44 UTC (permalink / raw)
  To: Ananyev, Konstantin
  Cc: Thomas Monjalon, David Marchand, dev, jerinjacobk, Richardson,
	Bruce, mdr, ktraynor, Stokes, Ian, i.maximets, Mcnamara, John,
	Kovacevic, Marko, Burakov, Anatoly, Andrew Rybchenko,
	Neil Horman

On Tue, Jun 30, 2020 at 12:07:32PM +0000, Ananyev, Konstantin wrote:
> > 
> > 26/06/2020 16:43, David Marchand:
> > > On Wed, Jun 24, 2020 at 1:59 PM Ananyev, Konstantin
> > > <konstantin.ananyev@intel.com> wrote:
> > > > > > Do you mean - make this new dynamic-lcore API return an error if callied
> > > > > > from secondary process?
> > > > >
> > > > > Yes, and prohibiting from attaching a secondary process if dynamic
> > > > > lcore API has been used in primary.
> > > > > I intend to squash in patch 6:
> > > > > https://github.com/david-marchand/dpdk/commit/e5861ee734bfe2e4dc23d9b919b0db2a32a58aee
> > > >
> > > > But secondary process can attach before lcore_register, so we'll have some sort of inconsistency in behaviour.
> > >
> > > If the developer tries to use both features, he gets an ERROR log in
> > > the two init path.
> > > So whatever the order at runtime, we inform the developer (who did not
> > > read/understand the rte_thread_register() documentation) that what he
> > > is doing is unsupported.
> > 
> > I agree.
> > Before this patch, pinning a thread on a random core can
> > trigger some issues.
> > After this patch, register an external thread will
> > take care of logging errors in case of inconsistencies.
> > So the user will know he is doing something not supported
> > by the app.
> 
> I understand that, and return a meaningful error is definitely
> better the silent crash or memory corruption.
> The problem with that approach, as I said before, MP group
> behaviour becomes non-deterministic. 
> 
> > 
> > It is an nice improvement.
> > 
> > > > If we really  want to go ahead with such workaround -
> > 
> > It is not a workaround.
> > It is fixing some old issues and making clear what is really impossible.
> 
> The root cause of the problem is in our MP model design decisions:
> from one side we treat lcore_id as process local data, from other side
> in some shared data-structures we use lcore_id as an index.
> I think to fix it properly we need either: 
> make lcore_id data shared or stop using lcore_id as an index for shared data. 
> So from my perspective this approach is just one of possible workarounds.
> BTW, there is nothing wrong to have a workaround for the problem
> we are not ready to fix right now.
>  
> > > > probably better to introduce explicit EAL flag ( --single-process or so).
> > > > As Thomas and  Bruce suggested, if I understood them properly.
> > 
> > No I was thinking to maintain the tri-state information:
> > 	- secondary is possible
> > 	- secondary is attached
> > 	- secondary is forbidden
> 
> Ok, then I misunderstood you.
>  
> > Asking the user to use an option to forbid attaching a secondary process
> > is the same as telling him it is forbidden.
> 
> I don't think it is the same.
> On a live and complex system user can't always predict will the primary proc 
> use dynamic lcore and if it will at what particular moment.
> Same for secondary process launching - user might never start it,
> might start it straight after the primary one,
> or might be after several hours. 
> 
> > The error log is enough in my opinion.
> 
> I think it is better than nothing, but probably not the best one.
> Apart from possible non-consistent behaviour, it is quite restrictive:
> dynamic lcore_id wouldn't be available on any DPDK MP deployment.
> Which is a pity - I think it is a cool and useful feature.
>  
> What do you guys think about different approach:
> introduce new optional EAL parameter to restrict lcore_id
> values available for the process.
> 
> #let say to start primary proc that can use lcore_id=[0-99] only:
> dpdk_primary --lcore-allow=0-99 ... --file-prefix=xz1
> 
> #to start secondary one for it with allowed lcore_id=[100-109]:
> dpdk_secondary --lcore-allow=100-109 ... --file-prefix=xz1 --proc-type=secondary  
>  
> It is still a workaround, but that way we don't need to
> add any new limitations for dynamic lcores and secondary process usage. 
> Now it is up to user to decide would multiple-process use the same shared data
> and if so - split lcore_id space properly among them
> (same as he has to do now with static lcores).

A variant (more simple) of your approach could be to add
"--proc-type=standalone" to explicitly disable MP and enable dynamic thread
registration.



> > > A EAL flag is a stable API from the start, as there is nothing
> > > describing how we can remove one.
> > > So a new EAL flag for an experimental API/feature seems contradictory.
> > >
> > > Going with a new features status API... I think it is beyond this series.
> > >
> > > Thomas seems to suggest an automatic resolution when features conflict
> > > happens.. ?
> > 
> > I suggest allowing the maximum and raise an error when usage conflicts.
> > It seems this is what you did in v4.
> > 
> > > I'll send the v4, let's discuss it there if you want.
> > 
> 

^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-06-30 12:07                       ` Ananyev, Konstantin
  2020-06-30 12:44                         ` Olivier Matz
@ 2020-06-30 14:35                         ` Thomas Monjalon
  2020-06-30 18:57                           ` Ananyev, Konstantin
  1 sibling, 1 reply; 126+ messages in thread
From: Thomas Monjalon @ 2020-06-30 14:35 UTC (permalink / raw)
  To: David Marchand, Ananyev, Konstantin
  Cc: dev, jerinjacobk, Richardson, Bruce, mdr, ktraynor, Stokes, Ian,
	i.maximets, Mcnamara, John, Kovacevic, Marko, Burakov, Anatoly,
	Olivier Matz, Andrew Rybchenko, Neil Horman

30/06/2020 14:07, Ananyev, Konstantin:
> > 26/06/2020 16:43, David Marchand:
> > > On Wed, Jun 24, 2020 at 1:59 PM Ananyev, Konstantin
> > > <konstantin.ananyev@intel.com> wrote:
> > > > > > Do you mean - make this new dynamic-lcore API return an error if callied
> > > > > > from secondary process?
> > > > >
> > > > > Yes, and prohibiting from attaching a secondary process if dynamic
> > > > > lcore API has been used in primary.
> > > > > I intend to squash in patch 6:
> > > > > https://github.com/david-marchand/dpdk/commit/e5861ee734bfe2e4dc23d9b919b0db2a32a58aee
> > > >
> > > > But secondary process can attach before lcore_register, so we'll have some sort of inconsistency in behaviour.
> > >
> > > If the developer tries to use both features, he gets an ERROR log in
> > > the two init path.
> > > So whatever the order at runtime, we inform the developer (who did not
> > > read/understand the rte_thread_register() documentation) that what he
> > > is doing is unsupported.
> > 
> > I agree.
> > Before this patch, pinning a thread on a random core can
> > trigger some issues.
> > After this patch, register an external thread will
> > take care of logging errors in case of inconsistencies.
> > So the user will know he is doing something not supported
> > by the app.
> 
> I understand that, and return a meaningful error is definitely
> better the silent crash or memory corruption.
> The problem with that approach, as I said before, MP group
> behaviour becomes non-deterministic.

It was already non-deterministic before these patches.

> > It is an nice improvement.
> > 
> > > > If we really  want to go ahead with such workaround -
> > 
> > It is not a workaround.
> > It is fixing some old issues and making clear what is really impossible.
> 
> The root cause of the problem is in our MP model design decisions:
> from one side we treat lcore_id as process local data, from other side
> in some shared data-structures we use lcore_id as an index.
> I think to fix it properly we need either: 
> make lcore_id data shared or stop using lcore_id as an index for shared data. 
> So from my perspective this approach is just one of possible workarounds.
> BTW, there is nothing wrong to have a workaround for the problem
> we are not ready to fix right now.

I think you are trying to fix multi-process handling.
This patch is not about multi-process, it only highlight incompatibilities.

> > > > probably better to introduce explicit EAL flag ( --single-process or so).
> > > > As Thomas and  Bruce suggested, if I understood them properly.
> > 
> > No I was thinking to maintain the tri-state information:
> > 	- secondary is possible
> > 	- secondary is attached
> > 	- secondary is forbidden
> 
> Ok, then I misunderstood you.
>  
> > Asking the user to use an option to forbid attaching a secondary process
> > is the same as telling him it is forbidden.
> 
> I don't think it is the same.
> On a live and complex system user can't always predict will the primary proc 
> use dynamic lcore and if it will at what particular moment.
> Same for secondary process launching - user might never start it,
> might start it straight after the primary one,
> or might be after several hours. 

I don't see the difference.
An app which register external threads is not compatible
with multi-process. It needs to be clear.
If the user tries to do it anyway, there can be some error, OK.

> > The error log is enough in my opinion.
> 
> I think it is better than nothing, but probably not the best one.
> Apart from possible non-consistent behaviour, it is quite restrictive:
> dynamic lcore_id wouldn't be available on any DPDK MP deployment.
> Which is a pity - I think it is a cool and useful feature.

So you are asking to extend the feature.
Honestly, I'm not a fan of multi-process,
so I would not push any feature for it.

If we don't add any new option now, and restrict MP handling
to error messages, it would not prevent from extending
in future, right?


> What do you guys think about different approach:
> introduce new optional EAL parameter to restrict lcore_id
> values available for the process.
> 
> #let say to start primary proc that can use lcore_id=[0-99] only:
> dpdk_primary --lcore-allow=0-99 ... --file-prefix=xz1
> 
> #to start secondary one for it with allowed lcore_id=[100-109]:
> dpdk_secondary --lcore-allow=100-109 ... --file-prefix=xz1 --proc-type=secondary  
>  
> It is still a workaround, but that way we don't need to
> add any new limitations for dynamic lcores and secondary process usage. 
> Now it is up to user to decide would multiple-process use the same shared data
> and if so - split lcore_id space properly among them
> (same as he has to do now with static lcores).

Isn't it pushing too much to the user?


> > > A EAL flag is a stable API from the start, as there is nothing
> > > describing how we can remove one.
> > > So a new EAL flag for an experimental API/feature seems contradictory.
> > >
> > > Going with a new features status API... I think it is beyond this series.
> > >
> > > Thomas seems to suggest an automatic resolution when features conflict
> > > happens.. ?
> > 
> > I suggest allowing the maximum and raise an error when usage conflicts.
> > It seems this is what you did in v4.
> > 
> > > I'll send the v4, let's discuss it there if you want.




^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-06-30 12:44                         ` Olivier Matz
@ 2020-06-30 14:37                           ` Thomas Monjalon
  2020-06-30 19:02                           ` Ananyev, Konstantin
  1 sibling, 0 replies; 126+ messages in thread
From: Thomas Monjalon @ 2020-06-30 14:37 UTC (permalink / raw)
  To: Olivier Matz
  Cc: Ananyev, Konstantin, David Marchand, dev, jerinjacobk,
	Richardson, Bruce, mdr, ktraynor, Stokes, Ian, i.maximets,
	Mcnamara, John, Kovacevic, Marko, Burakov, Anatoly,
	Andrew Rybchenko, Neil Horman

30/06/2020 14:44, Olivier Matz:
> On Tue, Jun 30, 2020 at 12:07:32PM +0000, Ananyev, Konstantin wrote:
> > I think it is better than nothing, but probably not the best one.
> > Apart from possible non-consistent behaviour, it is quite restrictive:
> > dynamic lcore_id wouldn't be available on any DPDK MP deployment.
> > Which is a pity - I think it is a cool and useful feature.
> >  
> > What do you guys think about different approach:
> > introduce new optional EAL parameter to restrict lcore_id
> > values available for the process.
> > 
> > #let say to start primary proc that can use lcore_id=[0-99] only:
> > dpdk_primary --lcore-allow=0-99 ... --file-prefix=xz1
> > 
> > #to start secondary one for it with allowed lcore_id=[100-109]:
> > dpdk_secondary --lcore-allow=100-109 ... --file-prefix=xz1 --proc-type=secondary  
> >  
> > It is still a workaround, but that way we don't need to
> > add any new limitations for dynamic lcores and secondary process usage. 
> > Now it is up to user to decide would multiple-process use the same shared data
> > and if so - split lcore_id space properly among them
> > (same as he has to do now with static lcores).
> 
> A variant (more simple) of your approach could be to add
> "--proc-type=standalone" to explicitly disable MP and enable dynamic thread
> registration.

I don't adding a restriction from user input is adding a feature.
Konstantin wants to support multi-process with non-EAL threads,
which is the opposite of your proposal, Olivier :-)



^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-06-30 14:35                         ` Thomas Monjalon
@ 2020-06-30 18:57                           ` Ananyev, Konstantin
  2020-07-01  7:48                             ` David Marchand
  0 siblings, 1 reply; 126+ messages in thread
From: Ananyev, Konstantin @ 2020-06-30 18:57 UTC (permalink / raw)
  To: Thomas Monjalon, David Marchand
  Cc: dev, jerinjacobk, Richardson, Bruce, mdr, ktraynor, Stokes, Ian,
	i.maximets, Mcnamara, John, Kovacevic, Marko, Burakov, Anatoly,
	Olivier Matz, Andrew Rybchenko, Neil Horman


 
> 30/06/2020 14:07, Ananyev, Konstantin:
> > > 26/06/2020 16:43, David Marchand:
> > > > On Wed, Jun 24, 2020 at 1:59 PM Ananyev, Konstantin
> > > > <konstantin.ananyev@intel.com> wrote:
> > > > > > > Do you mean - make this new dynamic-lcore API return an error if callied
> > > > > > > from secondary process?
> > > > > >
> > > > > > Yes, and prohibiting from attaching a secondary process if dynamic
> > > > > > lcore API has been used in primary.
> > > > > > I intend to squash in patch 6:
> > > > > > https://github.com/david-marchand/dpdk/commit/e5861ee734bfe2e4dc23d9b919b0db2a32a58aee
> > > > >
> > > > > But secondary process can attach before lcore_register, so we'll have some sort of inconsistency in behaviour.
> > > >
> > > > If the developer tries to use both features, he gets an ERROR log in
> > > > the two init path.
> > > > So whatever the order at runtime, we inform the developer (who did not
> > > > read/understand the rte_thread_register() documentation) that what he
> > > > is doing is unsupported.
> > >
> > > I agree.
> > > Before this patch, pinning a thread on a random core can
> > > trigger some issues.
> > > After this patch, register an external thread will
> > > take care of logging errors in case of inconsistencies.
> > > So the user will know he is doing something not supported
> > > by the app.
> >
> > I understand that, and return a meaningful error is definitely
> > better the silent crash or memory corruption.
> > The problem with that approach, as I said before, MP group
> > behaviour becomes non-deterministic.
> 
> It was already non-deterministic before these patches.
> > > It is an nice improvement.
> > >
> > > > > If we really  want to go ahead with such workaround -
> > >
> > > It is not a workaround.
> > > It is fixing some old issues and making clear what is really impossible.
> >
> > The root cause of the problem is in our MP model design decisions:
> > from one side we treat lcore_id as process local data, from other side
> > in some shared data-structures we use lcore_id as an index.
> > I think to fix it properly we need either:
> > make lcore_id data shared or stop using lcore_id as an index for shared data.
> > So from my perspective this approach is just one of possible workarounds.
> > BTW, there is nothing wrong to have a workaround for the problem
> > we are not ready to fix right now.
> 
> I think you are trying to fix multi-process handling.
> This patch is not about multi-process, it only highlight incompatibilities.

Yes, the problem has been there for a while.
David's patch just made it more visible.
We discussing different workarounds for the problem.

> > > > > probably better to introduce explicit EAL flag ( --single-process or so).
> > > > > As Thomas and  Bruce suggested, if I understood them properly.
> > >
> > > No I was thinking to maintain the tri-state information:
> > > 	- secondary is possible
> > > 	- secondary is attached
> > > 	- secondary is forbidden
> >
> > Ok, then I misunderstood you.
> >
> > > Asking the user to use an option to forbid attaching a secondary process
> > > is the same as telling him it is forbidden.
> >
> > I don't think it is the same.
> > On a live and complex system user can't always predict will the primary proc
> > use dynamic lcore and if it will at what particular moment.
> > Same for secondary process launching - user might never start it,
> > might start it straight after the primary one,
> > or might be after several hours.
> 
> I don't see the difference.
> An app which register external threads is not compatible
> with multi-process. It needs to be clear.
> If the user tries to do it anyway, there can be some error, OK.

Copying from other mail thread:
Imagine the situation - there is a primary proc (supposed to run forever)
that does  rte_thread_register/rte_thread_unregister during its lifetime.
Plus from time to time user runs some secondary process to collect stats/debug
the primary one (proc-info or so).
Now behaviour of such system will be non-deterministic:
In some runs primary proc will do rte_thread_register() first,
and then secondary proc will be never able to attach.
In other cases - secondary will win the race, and then for primary 
eal_lcore_non_eal_allocate() will always fail.
Which means different behaviour between runs, varying performance, etc.

> > > The error log is enough in my opinion.
> >
> > I think it is better than nothing, but probably not the best one.
> > Apart from possible non-consistent behaviour, it is quite restrictive:
> > dynamic lcore_id wouldn't be available on any DPDK MP deployment.
> > Which is a pity - I think it is a cool and useful feature.
> 
> So you are asking to extend the feature.

I am asking for solution that would guarantee deterministic behaviour to the user.
If dynamic lcores and MP support need to be mutually exclusive,
then there should be a clean way for the user to *always* enable
one and disable the other.
"--proc-type=standalone" will at least guarantee such consistent behaviour between runs:
secondary proc will always fail to attach and  eal_lcore_non_eal_allocate() will always succeed
(as long as there are free lcore_ids off-course).
Though I think even better would be not to make them mutually exclusive,
but instead let user to split lcore_id space accordingly.
Let me list the options currently under discussion:

a)   New EAL parameter '--lcore-allow=...'
	Explicit EAL parameter to enable dyn-lcore=Y
	Consistent behaviour between runs=Y
	DYN-lcores/MP-support are mutually exclusive=N 

b)  Extend '--proc-type' EAL parameter with new 'standalone' type
	Explicit EAL parameter to enable dyn-lcore =Y
	Consistent behaviour between runs=Y
	Dyn lcores/MP-support are mutually exclusive=Y

c) dynamic allow/forbid dynamic-lcore/MP support
	Explicit EAL parameter=N
	Consistent behaviour between runs=N
	Dyn lcores/MP-support are mutually exclusive=Y

My preference list (from top to bottom): a, b, c.

> Honestly, I'm not a fan of multi-process,
> so I would not push any feature for it.

Me too, but as we can't drop it, we probably have no
choice but to live with it. 

> 
> If we don't add any new option now, and restrict MP handling
> to error messages, it would not prevent from extending
> in future, right?

It shouldn't I think.
Though what is the urgency to push this feature without having an
agreement first?
 
> 
> > What do you guys think about different approach:
> > introduce new optional EAL parameter to restrict lcore_id
> > values available for the process.
> >
> > #let say to start primary proc that can use lcore_id=[0-99] only:
> > dpdk_primary --lcore-allow=0-99 ... --file-prefix=xz1
> >
> > #to start secondary one for it with allowed lcore_id=[100-109]:
> > dpdk_secondary --lcore-allow=100-109 ... --file-prefix=xz1 --proc-type=secondary
> >
> > It is still a workaround, but that way we don't need to
> > add any new limitations for dynamic lcores and secondary process usage.
> > Now it is up to user to decide would multiple-process use the same shared data
> > and if so - split lcore_id space properly among them
> > (same as he has to do now with static lcores).
> 
> Isn't it pushing too much to the user?

User has to do the similar thing with static lcores right now.
 
> 
> > > > A EAL flag is a stable API from the start, as there is nothing
> > > > describing how we can remove one.
> > > > So a new EAL flag for an experimental API/feature seems contradictory.
> > > >
> > > > Going with a new features status API... I think it is beyond this series.
> > > >
> > > > Thomas seems to suggest an automatic resolution when features conflict
> > > > happens.. ?
> > >
> > > I suggest allowing the maximum and raise an error when usage conflicts.
> > > It seems this is what you did in v4.
> > >
> > > > I'll send the v4, let's discuss it there if you want.
> 
> 


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-06-30 12:44                         ` Olivier Matz
  2020-06-30 14:37                           ` Thomas Monjalon
@ 2020-06-30 19:02                           ` Ananyev, Konstantin
  1 sibling, 0 replies; 126+ messages in thread
From: Ananyev, Konstantin @ 2020-06-30 19:02 UTC (permalink / raw)
  To: Olivier Matz
  Cc: Thomas Monjalon, David Marchand, dev, jerinjacobk, Richardson,
	Bruce, mdr, ktraynor, Stokes, Ian, i.maximets, Mcnamara, John,
	Kovacevic, Marko, Burakov, Anatoly, Andrew Rybchenko,
	Neil Horman

> 
> On Tue, Jun 30, 2020 at 12:07:32PM +0000, Ananyev, Konstantin wrote:
> > >
> > > 26/06/2020 16:43, David Marchand:
> > > > On Wed, Jun 24, 2020 at 1:59 PM Ananyev, Konstantin
> > > > <konstantin.ananyev@intel.com> wrote:
> > > > > > > Do you mean - make this new dynamic-lcore API return an error if callied
> > > > > > > from secondary process?
> > > > > >
> > > > > > Yes, and prohibiting from attaching a secondary process if dynamic
> > > > > > lcore API has been used in primary.
> > > > > > I intend to squash in patch 6:
> > > > > > https://github.com/david-marchand/dpdk/commit/e5861ee734bfe2e4dc23d9b919b0db2a32a58aee
> > > > >
> > > > > But secondary process can attach before lcore_register, so we'll have some sort of inconsistency in behaviour.
> > > >
> > > > If the developer tries to use both features, he gets an ERROR log in
> > > > the two init path.
> > > > So whatever the order at runtime, we inform the developer (who did not
> > > > read/understand the rte_thread_register() documentation) that what he
> > > > is doing is unsupported.
> > >
> > > I agree.
> > > Before this patch, pinning a thread on a random core can
> > > trigger some issues.
> > > After this patch, register an external thread will
> > > take care of logging errors in case of inconsistencies.
> > > So the user will know he is doing something not supported
> > > by the app.
> >
> > I understand that, and return a meaningful error is definitely
> > better the silent crash or memory corruption.
> > The problem with that approach, as I said before, MP group
> > behaviour becomes non-deterministic.
> >
> > >
> > > It is an nice improvement.
> > >
> > > > > If we really  want to go ahead with such workaround -
> > >
> > > It is not a workaround.
> > > It is fixing some old issues and making clear what is really impossible.
> >
> > The root cause of the problem is in our MP model design decisions:
> > from one side we treat lcore_id as process local data, from other side
> > in some shared data-structures we use lcore_id as an index.
> > I think to fix it properly we need either:
> > make lcore_id data shared or stop using lcore_id as an index for shared data.
> > So from my perspective this approach is just one of possible workarounds.
> > BTW, there is nothing wrong to have a workaround for the problem
> > we are not ready to fix right now.
> >
> > > > > probably better to introduce explicit EAL flag ( --single-process or so).
> > > > > As Thomas and  Bruce suggested, if I understood them properly.
> > >
> > > No I was thinking to maintain the tri-state information:
> > > 	- secondary is possible
> > > 	- secondary is attached
> > > 	- secondary is forbidden
> >
> > Ok, then I misunderstood you.
> >
> > > Asking the user to use an option to forbid attaching a secondary process
> > > is the same as telling him it is forbidden.
> >
> > I don't think it is the same.
> > On a live and complex system user can't always predict will the primary proc
> > use dynamic lcore and if it will at what particular moment.
> > Same for secondary process launching - user might never start it,
> > might start it straight after the primary one,
> > or might be after several hours.
> >
> > > The error log is enough in my opinion.
> >
> > I think it is better than nothing, but probably not the best one.
> > Apart from possible non-consistent behaviour, it is quite restrictive:
> > dynamic lcore_id wouldn't be available on any DPDK MP deployment.
> > Which is a pity - I think it is a cool and useful feature.
> >
> > What do you guys think about different approach:
> > introduce new optional EAL parameter to restrict lcore_id
> > values available for the process.
> >
> > #let say to start primary proc that can use lcore_id=[0-99] only:
> > dpdk_primary --lcore-allow=0-99 ... --file-prefix=xz1
> >
> > #to start secondary one for it with allowed lcore_id=[100-109]:
> > dpdk_secondary --lcore-allow=100-109 ... --file-prefix=xz1 --proc-type=secondary
> >
> > It is still a workaround, but that way we don't need to
> > add any new limitations for dynamic lcores and secondary process usage.
> > Now it is up to user to decide would multiple-process use the same shared data
> > and if so - split lcore_id space properly among them
> > (same as he has to do now with static lcores).
> 
> A variant (more simple) of your approach could be to add
> "--proc-type=standalone" to explicitly disable MP and enable dynamic thread
> registration.
> 

For me it is a bit too restrictive, but yes it is a possible option,
and from my perspective - a better one then disabling secondary proc support on the fly. 
I tried to summarize different options under discussion in another mail of this thread.
Please have a look. 

> 
> > > > A EAL flag is a stable API from the start, as there is nothing
> > > > describing how we can remove one.
> > > > So a new EAL flag for an experimental API/feature seems contradictory.
> > > >
> > > > Going with a new features status API... I think it is beyond this series.
> > > >
> > > > Thomas seems to suggest an automatic resolution when features conflict
> > > > happens.. ?
> > >
> > > I suggest allowing the maximum and raise an error when usage conflicts.
> > > It seems this is what you did in v4.
> > >
> > > > I'll send the v4, let's discuss it there if you want.
> > >
> >

^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v4 6/9] eal: register non-EAL threads as lcores
  2020-06-30 10:07     ` Olivier Matz
@ 2020-07-01  7:13       ` David Marchand
  2020-07-01  9:11         ` Olivier Matz
  0 siblings, 1 reply; 126+ messages in thread
From: David Marchand @ 2020-07-01  7:13 UTC (permalink / raw)
  To: Olivier Matz
  Cc: dev, Jerin Jacob, Bruce Richardson, Ray Kinsella,
	Thomas Monjalon, Andrew Rybchenko, Kevin Traynor, Ian Stokes,
	Ilya Maximets, John McNamara, Marko Kovacevic, Anatoly Burakov,
	Neil Horman

On Tue, Jun 30, 2020 at 12:07 PM Olivier Matz <olivier.matz@6wind.com> wrote:
>
> On Fri, Jun 26, 2020 at 04:47:33PM +0200, David Marchand wrote:
> > DPDK allows calling some part of its API from a non-EAL thread but this
> > has some limitations.
> > OVS (and other applications) has its own thread management but still
> > want to avoid such limitations by hacking RTE_PER_LCORE(_lcore_id) and
> > faking EAL threads potentially unknown of some DPDK component.
> >
> > Introduce a new API to register non-EAL thread and associate them to a
> > free lcore with a new NON_EAL role.
> > This role denotes lcores that do not run DPDK mainloop and as such
> > prevents use of rte_eal_wait_lcore() and consorts.
> >
> > Multiprocess is not supported as the need for cohabitation with this new
> > feature is unclear at the moment.
> >
> > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > Acked-by: Andrew Rybchenko <arybchenko@solarflare.com>
> > ---
> > Changes since v2:
> > - refused multiprocess init once rte_thread_register got called, and
> >   vice versa,
> > - added warning on multiprocess in rte_thread_register doxygen,
> >
> > Changes since v1:
> > - moved cleanup on lcore role code in patch 5,
> > - added unit test,
> > - updated documentation,
> > - changed naming from "external thread" to "registered non-EAL thread"
> >
> > ---
> >  MAINTAINERS                                   |   1 +
> >  app/test/Makefile                             |   1 +
> >  app/test/autotest_data.py                     |   6 +
> >  app/test/meson.build                          |   2 +
> >  app/test/test_lcores.c                        | 139 ++++++++++++++++++
> >  doc/guides/howto/debug_troubleshoot.rst       |   5 +-
> >  .../prog_guide/env_abstraction_layer.rst      |  22 +--
> >  doc/guides/prog_guide/mempool_lib.rst         |   2 +-
> >  lib/librte_eal/common/eal_common_lcore.c      |  50 ++++++-
> >  lib/librte_eal/common/eal_common_mcfg.c       |  36 +++++
> >  lib/librte_eal/common/eal_common_thread.c     |  33 +++++
> >  lib/librte_eal/common/eal_memcfg.h            |  10 ++
> >  lib/librte_eal/common/eal_private.h           |  18 +++
> >  lib/librte_eal/freebsd/eal.c                  |   4 +
> >  lib/librte_eal/include/rte_lcore.h            |  25 +++-
> >  lib/librte_eal/linux/eal.c                    |   4 +
> >  lib/librte_eal/rte_eal_version.map            |   2 +
> >  lib/librte_mempool/rte_mempool.h              |  11 +-
> >  18 files changed, 349 insertions(+), 22 deletions(-)
> >  create mode 100644 app/test/test_lcores.c
> >
>
> [...]
>
> > diff --git a/app/test/test_lcores.c b/app/test/test_lcores.c
> > new file mode 100644
> > index 0000000000..864bcbade7
> > --- /dev/null
> > +++ b/app/test/test_lcores.c
> > @@ -0,0 +1,139 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright (c) 2020 Red Hat, Inc.
> > + */
> > +
> > +#include <pthread.h>
> > +#include <string.h>
> > +
> > +#include <rte_lcore.h>
> > +
> > +#include "test.h"
> > +
> > +struct thread_context {
> > +     enum { INIT, ERROR, DONE } state;
> > +     bool lcore_id_any;
> > +     pthread_t id;
> > +     unsigned int *registered_count;
> > +};
> > +static void *thread_loop(void *arg)
> > +{
>
> missing an empty line here
>
> > +     struct thread_context *t = arg;
> > +     unsigned int lcore_id;
> > +
> > +     lcore_id = rte_lcore_id();
> > +     if (lcore_id != LCORE_ID_ANY) {
> > +             printf("Incorrect lcore id for new thread %u\n", lcore_id);
> > +             t->state = ERROR;
> > +     }
> > +     rte_thread_register();
> > +     lcore_id = rte_lcore_id();
> > +     if ((t->lcore_id_any && lcore_id != LCORE_ID_ANY) ||
> > +                     (!t->lcore_id_any && lcore_id == LCORE_ID_ANY)) {
> > +             printf("Could not register new thread, got %u while %sexpecting %u\n",
> > +                     lcore_id, t->lcore_id_any ? "" : "not ", LCORE_ID_ANY);
> > +             t->state = ERROR;
> > +     }
>
> To check if rte_thread_register() succedeed, we need to look at
> lcore_id. I wonder if rte_thread_register() shouldn't return the lcore
> id on success, and -1 on error (rte_errno could be set to give some
> info on the error).

lcore_id are unsigned integers with the special value LCORE_ID_ANY
mapped to UINT32_MAX (should be UINT_MAX? anyway...).

rte_thread_register could return an error code as there are no ERROR
level logs about why a lcore allocation failed.
We could then distinguish a shortage of lcore (or init callback
refusal) from an invalid call before rte_eal_init() or when mp is in
use.

About returning the lcore_id as part of the return code, this would
map to -1 for LCORE_ID_ANY.
This is probably not a problem but still odd.


>
> The same could be done for rte_thread_init()

?
Not sure where this one could fail.


>
> [...]
>
> > diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
> > index a7ae0691bf..1cbddc4b5b 100644
> > --- a/lib/librte_eal/common/eal_common_thread.c
> > +++ b/lib/librte_eal/common/eal_common_thread.c
> > @@ -236,3 +236,36 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
> >       pthread_join(*thread, NULL);
> >       return -ret;
> >  }
> > +
> > +void
> > +rte_thread_register(void)
> > +{
> > +     unsigned int lcore_id;
> > +     rte_cpuset_t cpuset;
> > +
> > +     /* EAL init flushes all lcores, we can't register before. */
> > +     assert(internal_config.init_complete == 1);
> > +     if (pthread_getaffinity_np(pthread_self(), sizeof(cpuset),
> > +                     &cpuset) != 0)
> > +             CPU_ZERO(&cpuset);
> > +     lcore_id = eal_lcore_non_eal_allocate();
> > +     if (lcore_id >= RTE_MAX_LCORE)
> > +             lcore_id = LCORE_ID_ANY;
> > +     rte_thread_init(lcore_id, &cpuset);
> > +     if (lcore_id != LCORE_ID_ANY)
> > +             RTE_LOG(DEBUG, EAL, "Registered non-EAL thread as lcore %u.\n",
> > +                     lcore_id);
> > +}
>
> So, in this case, the affinity of the pthread is kept and saved, in other
> words there is no link between the lcore id and the affinity. It means we
> are allowing an application to register lcores for dataplane with conflicting
> affinities.

This is not something new, applications using --lcores option already
live with this.
We have warnings in the documentation about non-EAL threads and about
the dangers of conflicting affinities.
Hopefully, the users of this API know what they are doing since they
chose not to use EAL threads.


>
> I wonder if it could be useful to have an API that automatically sets
> the affinity according to the lcore_id. Or a function that creates a
> pthread using the specified lcore id, and setting the correct affinity.
> I could simplify the work for applications that want to create/destroy
> dataplane threads dynamically.

Do you mean EAL threads dynamic creation/suppression?


>
> This could be done later however, just an idea.

For now, I don't see the need.


>
> [...]
> > diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
> > index 13e5de006f..32a3d999b8 100644
> > --- a/lib/librte_eal/freebsd/eal.c
> > +++ b/lib/librte_eal/freebsd/eal.c
> > @@ -424,6 +424,10 @@ rte_config_init(void)
> >               }
> >               if (rte_eal_config_reattach() < 0)
> >                       return -1;
> > +             if (!eal_mcfg_enable_multiprocess()) {
> > +                     RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n");
> > +                     return -1;
> > +             }
> >               eal_mcfg_update_internal();
> >               break;
> >       case RTE_PROC_AUTO:
> > diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
> > index 3968c40693..43747e88df 100644
> > --- a/lib/librte_eal/include/rte_lcore.h
> > +++ b/lib/librte_eal/include/rte_lcore.h
> > @@ -31,6 +31,7 @@ enum rte_lcore_role_t {
> >       ROLE_RTE,
> >       ROLE_OFF,
> >       ROLE_SERVICE,
> > +     ROLE_NON_EAL,
> >  };
>
> If find the name ROLE_NON_EAL a bit heavy (this was also my impression
> when reading the doc part).
>
> I understand that there are several types of threads:
>
> - eal (pthread created by eal): ROLE_RTE and ROLE_SERVICE
> - unregistered (pthread not created by eal, and not registered): ROLE_OFF
>   (note that ROLE_OFF also applies for unexistant threads)
> - dynamic: pthread not created by eal, but registered

Last two cases both are non-EAL threads as described in the doc so far.


>
> What about using ROLE_DYN ? I'm not sure about this name either, it's just
> to open the discussion :)
>

Well, at the moment, all those new lcores are mapped only to non-EAL threads.
A dynamic role feels like you want to take dynamic EAL threads into
account from the start.
I prefer to stick to non-EAL.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-06-30 18:57                           ` Ananyev, Konstantin
@ 2020-07-01  7:48                             ` David Marchand
  2020-07-01 11:58                               ` Ananyev, Konstantin
  0 siblings, 1 reply; 126+ messages in thread
From: David Marchand @ 2020-07-01  7:48 UTC (permalink / raw)
  To: Ananyev, Konstantin
  Cc: Thomas Monjalon, dev, jerinjacobk, Richardson, Bruce, mdr,
	ktraynor, Stokes, Ian, i.maximets, Mcnamara, John, Kovacevic,
	Marko, Burakov, Anatoly, Olivier Matz, Andrew Rybchenko,
	Neil Horman

On Tue, Jun 30, 2020 at 8:57 PM Ananyev, Konstantin
<konstantin.ananyev@intel.com> wrote:
> Imagine the situation - there is a primary proc (supposed to run forever)
> that does  rte_thread_register/rte_thread_unregister during its lifetime.
> Plus from time to time user runs some secondary process to collect stats/debug
> the primary one (proc-info or so).
> Now behaviour of such system will be non-deterministic:
> In some runs primary proc will do rte_thread_register() first,
> and then secondary proc will be never able to attach.
> In other cases - secondary will win the race, and then for primary
> eal_lcore_non_eal_allocate() will always fail.
> Which means different behaviour between runs, varying performance, etc.

If the final users finally hit the situation you describe, it means
that the multiprocess had been in use so far and was known to be in
use (*hopefully*).
So is it not a problem of design/non-regression testing when
integrating the new API in the first place?


> > If we don't add any new option now, and restrict MP handling
> > to error messages, it would not prevent from extending
> > in future, right?
>
> It shouldn't I think.
> Though what is the urgency to push this feature without having an
> agreement first?

I waited to see others' opinions (and pinged some OVS-DPDK people).
I'd like an agreement too.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v4 4/9] eal: introduce thread uninit helper
  2020-06-30  9:42     ` [dpdk-dev] " Olivier Matz
@ 2020-07-01  8:00       ` David Marchand
  0 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-07-01  8:00 UTC (permalink / raw)
  To: Olivier Matz, Jerin Jacob, Sunil Kumar Kori
  Cc: dev, Bruce Richardson, Ray Kinsella, Thomas Monjalon,
	Andrew Rybchenko, Kevin Traynor, Ian Stokes, Ilya Maximets,
	Neil Horman, Harini Ramakrishnan, Omar Cardona, Pallavi Kadam,
	Ranjit Menon

On Tue, Jun 30, 2020 at 11:42 AM Olivier Matz <olivier.matz@6wind.com> wrote:
> > diff --git a/lib/librte_eal/include/rte_trace_point.h b/lib/librte_eal/include/rte_trace_point.h
> > index 377c2414aa..686b86fdb1 100644
> > --- a/lib/librte_eal/include/rte_trace_point.h
> > +++ b/lib/librte_eal/include/rte_trace_point.h
> > @@ -230,6 +230,15 @@ __rte_trace_point_fp_is_enabled(void)
> >  __rte_experimental
> >  void __rte_trace_mem_per_thread_alloc(void);
> >
> > +/**
> > + * @internal
> > + *
> > + * Free trace memory buffer per thread.
> > + *
> > + */
> > +__rte_experimental
> > +void __rte_trace_mem_per_thread_free(void);
>
> Maybe the doc comment could be reworded a bit
> (and the empty line can be removed by the way).

Copy/paste.
If we keep this symbol, I'll reword.


>
> > +
> >  /**
> >   * @internal
> >   *
> > diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
> > index 0d42d44ce9..5831eea4b0 100644
> > --- a/lib/librte_eal/rte_eal_version.map
> > +++ b/lib/librte_eal/rte_eal_version.map
> > @@ -393,6 +393,9 @@ EXPERIMENTAL {
> >       rte_trace_point_lookup;
> >       rte_trace_regexp;
> >       rte_trace_save;
> > +
> > +     # added in 20.08
> > +     __rte_trace_mem_per_thread_free;
>
> Is it really needed to export this function?
>

There is no need for the series.

When an application non-EAL thread (not talking about threads that
dpdk is aware of) calls a tracepoint callback, there is an implicit
call to _alloc.
We end up with a memory leak and the application has no way to fix this.
I left this symbol exported, but this is not documented properly.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v4 6/9] eal: register non-EAL threads as lcores
  2020-07-01  7:13       ` David Marchand
@ 2020-07-01  9:11         ` Olivier Matz
  0 siblings, 0 replies; 126+ messages in thread
From: Olivier Matz @ 2020-07-01  9:11 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, Jerin Jacob, Bruce Richardson, Ray Kinsella,
	Thomas Monjalon, Andrew Rybchenko, Kevin Traynor, Ian Stokes,
	Ilya Maximets, John McNamara, Marko Kovacevic, Anatoly Burakov,
	Neil Horman

On Wed, Jul 01, 2020 at 09:13:36AM +0200, David Marchand wrote:
> On Tue, Jun 30, 2020 at 12:07 PM Olivier Matz <olivier.matz@6wind.com> wrote:
> >
> > On Fri, Jun 26, 2020 at 04:47:33PM +0200, David Marchand wrote:
> > > DPDK allows calling some part of its API from a non-EAL thread but this
> > > has some limitations.
> > > OVS (and other applications) has its own thread management but still
> > > want to avoid such limitations by hacking RTE_PER_LCORE(_lcore_id) and
> > > faking EAL threads potentially unknown of some DPDK component.
> > >
> > > Introduce a new API to register non-EAL thread and associate them to a
> > > free lcore with a new NON_EAL role.
> > > This role denotes lcores that do not run DPDK mainloop and as such
> > > prevents use of rte_eal_wait_lcore() and consorts.
> > >
> > > Multiprocess is not supported as the need for cohabitation with this new
> > > feature is unclear at the moment.
> > >
> > > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > > Acked-by: Andrew Rybchenko <arybchenko@solarflare.com>
> > > ---
> > > Changes since v2:
> > > - refused multiprocess init once rte_thread_register got called, and
> > >   vice versa,
> > > - added warning on multiprocess in rte_thread_register doxygen,
> > >
> > > Changes since v1:
> > > - moved cleanup on lcore role code in patch 5,
> > > - added unit test,
> > > - updated documentation,
> > > - changed naming from "external thread" to "registered non-EAL thread"
> > >
> > > ---
> > >  MAINTAINERS                                   |   1 +
> > >  app/test/Makefile                             |   1 +
> > >  app/test/autotest_data.py                     |   6 +
> > >  app/test/meson.build                          |   2 +
> > >  app/test/test_lcores.c                        | 139 ++++++++++++++++++
> > >  doc/guides/howto/debug_troubleshoot.rst       |   5 +-
> > >  .../prog_guide/env_abstraction_layer.rst      |  22 +--
> > >  doc/guides/prog_guide/mempool_lib.rst         |   2 +-
> > >  lib/librte_eal/common/eal_common_lcore.c      |  50 ++++++-
> > >  lib/librte_eal/common/eal_common_mcfg.c       |  36 +++++
> > >  lib/librte_eal/common/eal_common_thread.c     |  33 +++++
> > >  lib/librte_eal/common/eal_memcfg.h            |  10 ++
> > >  lib/librte_eal/common/eal_private.h           |  18 +++
> > >  lib/librte_eal/freebsd/eal.c                  |   4 +
> > >  lib/librte_eal/include/rte_lcore.h            |  25 +++-
> > >  lib/librte_eal/linux/eal.c                    |   4 +
> > >  lib/librte_eal/rte_eal_version.map            |   2 +
> > >  lib/librte_mempool/rte_mempool.h              |  11 +-
> > >  18 files changed, 349 insertions(+), 22 deletions(-)
> > >  create mode 100644 app/test/test_lcores.c
> > >
> >
> > [...]
> >
> > > diff --git a/app/test/test_lcores.c b/app/test/test_lcores.c
> > > new file mode 100644
> > > index 0000000000..864bcbade7
> > > --- /dev/null
> > > +++ b/app/test/test_lcores.c
> > > @@ -0,0 +1,139 @@
> > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > + * Copyright (c) 2020 Red Hat, Inc.
> > > + */
> > > +
> > > +#include <pthread.h>
> > > +#include <string.h>
> > > +
> > > +#include <rte_lcore.h>
> > > +
> > > +#include "test.h"
> > > +
> > > +struct thread_context {
> > > +     enum { INIT, ERROR, DONE } state;
> > > +     bool lcore_id_any;
> > > +     pthread_t id;
> > > +     unsigned int *registered_count;
> > > +};
> > > +static void *thread_loop(void *arg)
> > > +{
> >
> > missing an empty line here
> >
> > > +     struct thread_context *t = arg;
> > > +     unsigned int lcore_id;
> > > +
> > > +     lcore_id = rte_lcore_id();
> > > +     if (lcore_id != LCORE_ID_ANY) {
> > > +             printf("Incorrect lcore id for new thread %u\n", lcore_id);
> > > +             t->state = ERROR;
> > > +     }
> > > +     rte_thread_register();
> > > +     lcore_id = rte_lcore_id();
> > > +     if ((t->lcore_id_any && lcore_id != LCORE_ID_ANY) ||
> > > +                     (!t->lcore_id_any && lcore_id == LCORE_ID_ANY)) {
> > > +             printf("Could not register new thread, got %u while %sexpecting %u\n",
> > > +                     lcore_id, t->lcore_id_any ? "" : "not ", LCORE_ID_ANY);
> > > +             t->state = ERROR;
> > > +     }
> >
> > To check if rte_thread_register() succedeed, we need to look at
> > lcore_id. I wonder if rte_thread_register() shouldn't return the lcore
> > id on success, and -1 on error (rte_errno could be set to give some
> > info on the error).
> 
> lcore_id are unsigned integers with the special value LCORE_ID_ANY
> mapped to UINT32_MAX (should be UINT_MAX? anyway...).
> 
> rte_thread_register could return an error code as there are no ERROR
> level logs about why a lcore allocation failed.
> We could then distinguish a shortage of lcore (or init callback
> refusal) from an invalid call before rte_eal_init() or when mp is in
> use.
> 
> About returning the lcore_id as part of the return code, this would
> map to -1 for LCORE_ID_ANY.
> This is probably not a problem but still odd.

Yes, it would be a bit odd like this. What about changing the definition of
LCORE_ID_ANY to ((unsigned int)-1) ? I think it does not change the effective
value on any architecture, but would make the above change clearer.


> >
> > The same could be done for rte_thread_init()
> 
> ?
> Not sure where this one could fail.

I was thinking about __rte_trace_mem_per_thread_alloc(), but maybe it's not
needed.

> >
> > [...]
> >
> > > diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
> > > index a7ae0691bf..1cbddc4b5b 100644
> > > --- a/lib/librte_eal/common/eal_common_thread.c
> > > +++ b/lib/librte_eal/common/eal_common_thread.c
> > > @@ -236,3 +236,36 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
> > >       pthread_join(*thread, NULL);
> > >       return -ret;
> > >  }
> > > +
> > > +void
> > > +rte_thread_register(void)
> > > +{
> > > +     unsigned int lcore_id;
> > > +     rte_cpuset_t cpuset;
> > > +
> > > +     /* EAL init flushes all lcores, we can't register before. */
> > > +     assert(internal_config.init_complete == 1);
> > > +     if (pthread_getaffinity_np(pthread_self(), sizeof(cpuset),
> > > +                     &cpuset) != 0)
> > > +             CPU_ZERO(&cpuset);
> > > +     lcore_id = eal_lcore_non_eal_allocate();
> > > +     if (lcore_id >= RTE_MAX_LCORE)
> > > +             lcore_id = LCORE_ID_ANY;
> > > +     rte_thread_init(lcore_id, &cpuset);
> > > +     if (lcore_id != LCORE_ID_ANY)
> > > +             RTE_LOG(DEBUG, EAL, "Registered non-EAL thread as lcore %u.\n",
> > > +                     lcore_id);
> > > +}
> >
> > So, in this case, the affinity of the pthread is kept and saved, in other
> > words there is no link between the lcore id and the affinity. It means we
> > are allowing an application to register lcores for dataplane with conflicting
> > affinities.
> 
> This is not something new, applications using --lcores option already
> live with this.
> We have warnings in the documentation about non-EAL threads and about
> the dangers of conflicting affinities.
> Hopefully, the users of this API know what they are doing since they
> chose not to use EAL threads.
> 
> 
> >
> > I wonder if it could be useful to have an API that automatically sets
> > the affinity according to the lcore_id. Or a function that creates a
> > pthread using the specified lcore id, and setting the correct affinity.
> > I could simplify the work for applications that want to create/destroy
> > dataplane threads dynamically.
> 
> Do you mean EAL threads dynamic creation/suppression?
> 
> 
> >
> > This could be done later however, just an idea.
> 
> For now, I don't see the need.
> 
> 
> >
> > [...]
> > > diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
> > > index 13e5de006f..32a3d999b8 100644
> > > --- a/lib/librte_eal/freebsd/eal.c
> > > +++ b/lib/librte_eal/freebsd/eal.c
> > > @@ -424,6 +424,10 @@ rte_config_init(void)
> > >               }
> > >               if (rte_eal_config_reattach() < 0)
> > >                       return -1;
> > > +             if (!eal_mcfg_enable_multiprocess()) {
> > > +                     RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n");
> > > +                     return -1;
> > > +             }
> > >               eal_mcfg_update_internal();
> > >               break;
> > >       case RTE_PROC_AUTO:
> > > diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
> > > index 3968c40693..43747e88df 100644
> > > --- a/lib/librte_eal/include/rte_lcore.h
> > > +++ b/lib/librte_eal/include/rte_lcore.h
> > > @@ -31,6 +31,7 @@ enum rte_lcore_role_t {
> > >       ROLE_RTE,
> > >       ROLE_OFF,
> > >       ROLE_SERVICE,
> > > +     ROLE_NON_EAL,
> > >  };
> >
> > If find the name ROLE_NON_EAL a bit heavy (this was also my impression
> > when reading the doc part).
> >
> > I understand that there are several types of threads:
> >
> > - eal (pthread created by eal): ROLE_RTE and ROLE_SERVICE
> > - unregistered (pthread not created by eal, and not registered): ROLE_OFF
> >   (note that ROLE_OFF also applies for unexistant threads)
> > - dynamic: pthread not created by eal, but registered
> 
> Last two cases both are non-EAL threads as described in the doc so far.

Yes, but only the last case has the NON_EAL role.
I feel that currently role != thread_type, so may be a bit confusing to
use a pthread_type name in the role enum.

> 
> 
> >
> > What about using ROLE_DYN ? I'm not sure about this name either, it's just
> > to open the discussion :)
> >
> 
> Well, at the moment, all those new lcores are mapped only to non-EAL threads.
> A dynamic role feels like you want to take dynamic EAL threads into
> account from the start.
> I prefer to stick to non-EAL.
> 
> 
> -- 
> David Marchand
> 

^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-07-01  7:48                             ` David Marchand
@ 2020-07-01 11:58                               ` Ananyev, Konstantin
  2020-07-02 13:06                                 ` David Marchand
  0 siblings, 1 reply; 126+ messages in thread
From: Ananyev, Konstantin @ 2020-07-01 11:58 UTC (permalink / raw)
  To: David Marchand
  Cc: Thomas Monjalon, dev, jerinjacobk, Richardson, Bruce, mdr,
	ktraynor, Stokes, Ian, i.maximets, Mcnamara, John, Kovacevic,
	Marko, Burakov, Anatoly, Olivier Matz, Andrew Rybchenko,
	Neil Horman


> 
> On Tue, Jun 30, 2020 at 8:57 PM Ananyev, Konstantin
> <konstantin.ananyev@intel.com> wrote:
> > Imagine the situation - there is a primary proc (supposed to run forever)
> > that does  rte_thread_register/rte_thread_unregister during its lifetime.
> > Plus from time to time user runs some secondary process to collect stats/debug
> > the primary one (proc-info or so).
> > Now behaviour of such system will be non-deterministic:
> > In some runs primary proc will do rte_thread_register() first,
> > and then secondary proc will be never able to attach.
> > In other cases - secondary will win the race, and then for primary
> > eal_lcore_non_eal_allocate() will always fail.
> > Which means different behaviour between runs, varying performance, etc.
> 
> If the final users finally hit the situation you describe, it means
> that the multiprocess had been in use so far and was known to be in
> use (*hopefully*).

Yes. 

> So is it not a problem of design/non-regression testing when
> integrating the new API in the first place?

Not sure I understand you here...
If you saying that for SP benchmarking/testing current approach
is sufficient, then - yes it is.
Or are you saying it would be hard to create a test-case to
reproduce such problematic scenario? 

> 
> > > If we don't add any new option now, and restrict MP handling
> > > to error messages, it would not prevent from extending
> > > in future, right?
> >
> > It shouldn't I think.
> > Though what is the urgency to push this feature without having an
> > agreement first?
> 
> I waited to see others' opinions (and pinged some OVS-DPDK people).
> I'd like an agreement too.
> 
> 
> --
> David Marchand


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-07-01 11:58                               ` Ananyev, Konstantin
@ 2020-07-02 13:06                                 ` David Marchand
  2020-07-03 15:15                                   ` Thomas Monjalon
  2020-07-03 16:40                                   ` Ananyev, Konstantin
  0 siblings, 2 replies; 126+ messages in thread
From: David Marchand @ 2020-07-02 13:06 UTC (permalink / raw)
  To: Ananyev, Konstantin
  Cc: Thomas Monjalon, dev, jerinjacobk, Richardson, Bruce, mdr,
	ktraynor, Stokes, Ian, i.maximets, Mcnamara, John, Kovacevic,
	Marko, Burakov, Anatoly, Olivier Matz, Andrew Rybchenko,
	Neil Horman

On Wed, Jul 1, 2020 at 1:58 PM Ananyev, Konstantin
<konstantin.ananyev@intel.com> wrote:
> > If the final users finally hit the situation you describe, it means
> > that the multiprocess had been in use so far and was known to be in
> > use (*hopefully*).
>
> Yes.
>
> > So is it not a problem of design/non-regression testing when
> > integrating the new API in the first place?
>
> Not sure I understand you here...
> If you saying that for SP benchmarking/testing current approach
> is sufficient, then - yes it is.
> Or are you saying it would be hard to create a test-case to
> reproduce such problematic scenario?

I am saying that getting to a problematic scenario that only the final
users get, would be a failure in the development, documentation and
validation of the application.

When the developer integrates this new API, the developer will read
the API description.

- If the limitation on mp is understood and accepted, the application
documentation will be updated to reflect this.
Users can then know mp is not available.
If the users still try to use it, it can be a support issue.
The users will then report to support people who should be aware this
is not supported (the documentation says so).

- If the application needs mp support because of X, Y reasons, the
developer integrating the new API, should complain upstream that the
new API requires mp support.
If the developer does not complain but still uses the API.. well too
bad (or it falls through to the following point).

- The application needs mp support, the developer did not catch the
warning in the API (the kids are home, hard to concentrate)...
The new API will be used for datapath processing threads, so non
regression perf tests will be run.
On the other hand, the application uses mp for X, Y reasons, so there
will be associated test cases.
I can't tell for sure, but I find it hard to believe a validation team
would never do tests that combine both.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-07-02 13:06                                 ` David Marchand
@ 2020-07-03 15:15                                   ` Thomas Monjalon
  2020-07-03 16:40                                   ` Ananyev, Konstantin
  1 sibling, 0 replies; 126+ messages in thread
From: Thomas Monjalon @ 2020-07-03 15:15 UTC (permalink / raw)
  To: Ananyev, Konstantin, David Marchand
  Cc: dev, jerinjacobk, Richardson, Bruce, mdr, ktraynor, Stokes, Ian,
	i.maximets, Mcnamara, John, Kovacevic, Marko, Burakov, Anatoly,
	Olivier Matz, Andrew Rybchenko, Neil Horman

02/07/2020 15:06, David Marchand:
> On Wed, Jul 1, 2020 at 1:58 PM Ananyev, Konstantin
> <konstantin.ananyev@intel.com> wrote:
> > > If the final users finally hit the situation you describe, it means
> > > that the multiprocess had been in use so far and was known to be in
> > > use (*hopefully*).
> >
> > Yes.
> >
> > > So is it not a problem of design/non-regression testing when
> > > integrating the new API in the first place?
> >
> > Not sure I understand you here...
> > If you saying that for SP benchmarking/testing current approach
> > is sufficient, then - yes it is.
> > Or are you saying it would be hard to create a test-case to
> > reproduce such problematic scenario?
> 
> I am saying that getting to a problematic scenario that only the final
> users get, would be a failure in the development, documentation and
> validation of the application.
> 
> When the developer integrates this new API, the developer will read
> the API description.
> 
> - If the limitation on mp is understood and accepted, the application
> documentation will be updated to reflect this.
> Users can then know mp is not available.
> If the users still try to use it, it can be a support issue.
> The users will then report to support people who should be aware this
> is not supported (the documentation says so).
> 
> - If the application needs mp support because of X, Y reasons, the
> developer integrating the new API, should complain upstream that the
> new API requires mp support.
> If the developer does not complain but still uses the API.. well too
> bad (or it falls through to the following point).
> 
> - The application needs mp support, the developer did not catch the
> warning in the API (the kids are home, hard to concentrate)...
> The new API will be used for datapath processing threads, so non
> regression perf tests will be run.
> On the other hand, the application uses mp for X, Y reasons, so there
> will be associated test cases.
> I can't tell for sure, but I find it hard to believe a validation team
> would never do tests that combine both.

Please let's conclude.

The proposed API for thread registration does not support multi-process.
This limitation is documented and there are some runtime checks.
If this API is used in an application design, it should be clear
that attaching a secondary process is forbidden.
If a user tries to attach a secondary process before the application
registers a thread, then future thread registration will fail.
If a user tries to attach a secondary process after a thread registration,
then the secondary process will fail.
It means that depending on when the user *wrongly* attach a secondary
process (despite the documented limitation of the application),
the failure will happen in a different context.
I think it is OK.

The alternative is to introduce a new EAL flag or a new API to make sure
the failure will happen when attaching a secondary process,
even if no thread is registered yet.
I think adding such addition would be weird from user or API perspective.

Please note that accepting the thread registration API does not prevent
from adding an EAL flag or a new API later.
That's why I vote for merging this series.

Acked-by: Thomas Monjalon <thomas@monjalon.net>



^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-07-02 13:06                                 ` David Marchand
  2020-07-03 15:15                                   ` Thomas Monjalon
@ 2020-07-03 16:40                                   ` Ananyev, Konstantin
  2020-07-04 15:00                                     ` David Marchand
  1 sibling, 1 reply; 126+ messages in thread
From: Ananyev, Konstantin @ 2020-07-03 16:40 UTC (permalink / raw)
  To: David Marchand
  Cc: Thomas Monjalon, dev, jerinjacobk, Richardson, Bruce, mdr,
	ktraynor, Stokes, Ian, i.maximets, Mcnamara, John, Kovacevic,
	Marko, Burakov, Anatoly, Olivier Matz, Andrew Rybchenko,
	Neil Horman



> > > If the final users finally hit the situation you describe, it means
> > > that the multiprocess had been in use so far and was known to be in
> > > use (*hopefully*).
> >
> > Yes.
> >
> > > So is it not a problem of design/non-regression testing when
> > > integrating the new API in the first place?
> >
> > Not sure I understand you here...
> > If you saying that for SP benchmarking/testing current approach
> > is sufficient, then - yes it is.
> > Or are you saying it would be hard to create a test-case to
> > reproduce such problematic scenario?
> 
> I am saying that getting to a problematic scenario that only the final
> users get, would be a failure in the development, documentation and
> validation of the application.
> 
> When the developer integrates this new API, the developer will read
> the API description.
> 
> - If the limitation on mp is understood and accepted, the application
> documentation will be updated to reflect this.
> Users can then know mp is not available.
> If the users still try to use it, it can be a support issue.
> The users will then report to support people who should be aware this
> is not supported (the documentation says so).
> 
> - If the application needs mp support because of X, Y reasons, the
> developer integrating the new API, should complain upstream that the
> new API requires mp support.
> If the developer does not complain but still uses the API.. well too
> bad (or it falls through to the following point).
> 
> - The application needs mp support, the developer did not catch the
> warning in the API (the kids are home, hard to concentrate)...
> The new API will be used for datapath processing threads, so non
> regression perf tests will be run.
> On the other hand, the application uses mp for X, Y reasons, so there
> will be associated test cases.
> I can't tell for sure, but I find it hard to believe a validation team
> would never do tests that combine both.

If there is one team(/organization) doing everything:
development, deployment and support - then yes things
are more or less straightforward.   
Though it is not always a case - one app(/lib) can be used in
dozen different deployments by various organizations and
development team might not always be aware about all
possible deployment and usage scenarios.
Let say, right now dpdk app/proc-info can be used with any
primary dpdk proc, even if it was designed as a standalone app.    
With this patch, it will not be the case anymore.
It might be ok by itself, but what looks like a problem to me -
there is no a clear and easy way for the user to determine 
when he can use proc-info on the given dpdk proc
without causing problems for this app, and when he can't.  
Actually, sort of an opposite question -
what are the advantages of current approach (forbid MP support on the fly)
over explicit start-up parameters (either --proc-type=single or --lcore-allow=...)? 
Why do you think it is a better one?





^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-07-03 16:40                                   ` Ananyev, Konstantin
@ 2020-07-04 15:00                                     ` David Marchand
  2020-07-04 21:24                                       ` Ananyev, Konstantin
  0 siblings, 1 reply; 126+ messages in thread
From: David Marchand @ 2020-07-04 15:00 UTC (permalink / raw)
  To: Ananyev, Konstantin, Thomas Monjalon
  Cc: Stokes, Ian, dev, jerinjacobk, Richardson, Bruce, mdr, ktraynor,
	i.maximets, Mcnamara, John, Kovacevic, Marko, Burakov, Anatoly,
	Olivier Matz, Andrew Rybchenko, Neil Horman

On Fri, Jul 3, 2020 at 6:40 PM Ananyev, Konstantin
<konstantin.ananyev@intel.com> wrote:
> what are the advantages of current approach (forbid MP support on the fly)
> over explicit start-up parameters (either --proc-type=single or --lcore-allow=...)?
> Why do you think it is a better one?

I don't want to perpetuate the "please carefully set your command line" habit.
This feature is added through a C API, with documentation and flagged
as experimental, it should be enough.

How about moving the mp disable in rte_thread_register() as a separate API?
Then a developer must call rte_mp_disable() before attempting
rte_thread_register().
That would be equivalent to --proc-type=single.

Why not convert lcore-allow into an API?
This would force us to put something in the init so that external
users see how the application has been started and adjust the
secondary commandline.
On the other hand, rte_mp_disable() is easy to do with my current v4 +
I am running out of time for rc1.


We can still revisit this experimental API.

-- 
David Marchand


^ permalink raw reply	[flat|nested] 126+ messages in thread

* Re: [dpdk-dev] [PATCH v3 6/9] eal: register non-EAL threads as lcores
  2020-07-04 15:00                                     ` David Marchand
@ 2020-07-04 21:24                                       ` Ananyev, Konstantin
  0 siblings, 0 replies; 126+ messages in thread
From: Ananyev, Konstantin @ 2020-07-04 21:24 UTC (permalink / raw)
  To: David Marchand, Thomas Monjalon
  Cc: Stokes, Ian, dev, jerinjacobk, Richardson, Bruce, mdr, ktraynor,
	i.maximets, Mcnamara, John, Kovacevic,  Marko, Burakov, Anatoly,
	Olivier Matz, Andrew Rybchenko, Neil Horman


> 
> On Fri, Jul 3, 2020 at 6:40 PM Ananyev, Konstantin
> <konstantin.ananyev@intel.com> wrote:
> > what are the advantages of current approach (forbid MP support on the fly)
> > over explicit start-up parameters (either --proc-type=single or --lcore-allow=...)?
> > Why do you think it is a better one?
> 
> I don't want to perpetuate the "please carefully set your command line" habit.
> This feature is added through a C API, with documentation and flagged
> as experimental, it should be enough.
> 
> How about moving the mp disable in rte_thread_register() as a separate API?
> Then a developer must call rte_mp_disable() before attempting
> rte_thread_register().
> That would be equivalent to --proc-type=single.

It wouldn't be exactly the same thing, but yes,
I agree user can call it as first thing after rte_eal_init()
and it should help to prevent non-consistency in behaviour.
I think it is a good compromise.

> 
> Why not convert lcore-allow into an API?
> This would force us to put something in the init so that external
> users see how the application has been started and adjust the
> secondary commandline.

Not sure I understand you here...
If we'll make lcore-allow dynamic it is basically the same as moving
lcore_role[] (or some similar structure) into shared memory.
I am ok with that, but I thought you stated that it would require
quite a lot of work. 

> On the other hand, rte_mp_disable() is easy to do with my current v4 +
> I am running out of time for rc1.

Yes, as I said above such approach seems good enough to me
(at least for now).

Konstantin


 

^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v5 00/10] Register non-EAL threads as lcore
  2020-06-10 14:44 [dpdk-dev] [PATCH 0/7] Register external threads as lcore David Marchand
                   ` (10 preceding siblings ...)
  2020-06-26 14:47 ` [dpdk-dev] [PATCH v4 0/9] Register non-EAL threads as lcore David Marchand
@ 2020-07-06 14:15 ` David Marchand
  2020-07-06 14:15   ` [dpdk-dev] [PATCH v5 01/10] eal: relocate per thread symbols to common David Marchand
                     ` (9 more replies)
  2020-07-06 20:52 ` [dpdk-dev] [PATCH v6 00/10] Register non-EAL threads as lcore David Marchand
  12 siblings, 10 replies; 126+ messages in thread
From: David Marchand @ 2020-07-06 14:15 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets, olivier.matz, konstantin.ananyev

OVS and some other applications have been hacking into DPDK internals to
fake EAL threads and avoid performance penalty of only having non-EAL
threads.

This series proposes to add a new type of lcores and maps those threads
to such lcores.
non-EAL threads won't run the DPDK eal mainloop.
As a consequence, part of the EAL threads API cannot work.

Having new lcores appearing during the process lifetime is not expected
by some DPDK components. This is addressed by introducing init/uninit
callacks invoked when hotplugging of such lcore.

There is still some work/discussion:
- refuse new lcore role in incompatible EAL threads API (or document it
  only as those API were already incompatible?),
- think about deprecation notices for existing RTE_FOREACH_LCORE macros
  and consorts, it is probably worth discussing on how to iterate over
  lcores,

For the interested parties, I have a patch [1] against dpdk-latest OVS
branch that makes use of this series (this patch probably won't work with
v5, it will be rebased once dpdk side is ready).

1: https://patchwork.ozlabs.org/project/openvswitch/patch/20200626123017.28555-1-david.marchand@redhat.com/

Changes since v4:
- added separate API to control mp feature activation,
- addressed Konstantin and Olivier comments,

Changes since v3:
- added init failure when trying to use in conjunction with multiprocess,
- addressed Andrew comments,

Changes since v2:
- fixed windows build error due to missing trace stub,
- fixed bug when rolling back on lcore register,

Changes since v1:
- rebased on master (conflicts on merged Windows series),
- separated lcore role code cleanup in a patch,
- tried to use a single naming, so kept non-EAL threads as the main
  notion. non-EAL threads are then distinguished between registered and
  unregistered non-EAL threads,
- added unit tests (still missing some coverage, marked with a FIXME),
- reworked callbacks call under a common rwlock lock which protects
  lcores allocations and callbacks registration,
- introduced lcore iterators and converted the bucket mempool driver,

-- 
David Marchand
David Marchand (10):
  eal: relocate per thread symbols to common
  eal: fix multiple definition of per lcore thread id
  eal: introduce thread init helper
  eal: introduce thread uninit helper
  eal: move lcore role code
  eal: register non-EAL threads as lcores
  eal: add lcore init callbacks
  eal: add lcore iterators
  mempool/bucket: handle non-EAL lcores
  eal: add multiprocess disable API

 MAINTAINERS                                   |   1 +
 app/test/Makefile                             |   1 +
 app/test/autotest_data.py                     |   6 +
 app/test/meson.build                          |   2 +
 app/test/test_lcores.c                        | 373 ++++++++++++++++++
 app/test/test_mp_secondary.c                  |   7 +
 doc/guides/howto/debug_troubleshoot.rst       |   5 +-
 .../prog_guide/env_abstraction_layer.rst      |  22 +-
 doc/guides/prog_guide/mempool_lib.rst         |   2 +-
 drivers/mempool/bucket/rte_mempool_bucket.c   | 130 +++---
 lib/librte_eal/common/eal_common_lcore.c      | 249 +++++++++++-
 lib/librte_eal/common/eal_common_proc.c       |  41 ++
 lib/librte_eal/common/eal_common_thread.c     | 141 +++++--
 lib/librte_eal/common/eal_common_trace.c      |  49 ++-
 lib/librte_eal/common/eal_memcfg.h            |   2 +
 lib/librte_eal/common/eal_private.h           |  39 ++
 lib/librte_eal/common/eal_thread.h            |  21 +-
 lib/librte_eal/common/eal_trace.h             |   1 +
 lib/librte_eal/freebsd/eal.c                  |  20 +-
 lib/librte_eal/freebsd/eal_thread.c           |  38 +-
 lib/librte_eal/include/rte_eal.h              |  26 +-
 lib/librte_eal/include/rte_lcore.h            | 179 ++++++++-
 lib/librte_eal/linux/eal.c                    |  21 +-
 lib/librte_eal/linux/eal_thread.c             |  38 +-
 lib/librte_eal/rte_eal_version.map            |  16 +
 lib/librte_eal/windows/eal.c                  |   8 +-
 lib/librte_eal/windows/eal_thread.c           |  14 +-
 lib/librte_mempool/rte_mempool.h              |  11 +-
 28 files changed, 1221 insertions(+), 242 deletions(-)
 create mode 100644 app/test/test_lcores.c

-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v5 01/10] eal: relocate per thread symbols to common
  2020-07-06 14:15 ` [dpdk-dev] [PATCH v5 00/10] Register non-EAL threads as lcore David Marchand
@ 2020-07-06 14:15   ` David Marchand
  2020-07-06 14:15   ` [dpdk-dev] [PATCH v5 02/10] eal: fix multiple definition of per lcore thread id David Marchand
                     ` (8 subsequent siblings)
  9 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-07-06 14:15 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets, olivier.matz, konstantin.ananyev,
	Harini Ramakrishnan, Omar Cardona, Pallavi Kadam, Ranjit Menon

We have per lcore thread symbols scattered in OS implementations but
common code relies on them.
Move all of them in common.

RTE_PER_LCORE(_socket_id) and RTE_PER_LCORE(_cpuset) have public
accessors and are not exported through the library map, they can be
made static.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Reviewed-by: Olivier Matz <olivier.matz@6wind.com>
---
 lib/librte_eal/common/eal_common_thread.c | 5 ++++-
 lib/librte_eal/freebsd/eal_thread.c       | 4 ----
 lib/librte_eal/include/rte_lcore.h        | 1 -
 lib/librte_eal/linux/eal_thread.c         | 4 ----
 lib/librte_eal/windows/eal_thread.c       | 4 ----
 5 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index a341070926..7be80c292e 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -21,7 +21,10 @@
 #include "eal_private.h"
 #include "eal_thread.h"
 
-RTE_DECLARE_PER_LCORE(unsigned , _socket_id);
+RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
+static RTE_DEFINE_PER_LCORE(unsigned int, _socket_id) =
+	(unsigned int)SOCKET_ID_ANY;
+static RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
 
 unsigned rte_socket_id(void)
 {
diff --git a/lib/librte_eal/freebsd/eal_thread.c b/lib/librte_eal/freebsd/eal_thread.c
index b52019782a..40676d9ef5 100644
--- a/lib/librte_eal/freebsd/eal_thread.c
+++ b/lib/librte_eal/freebsd/eal_thread.c
@@ -25,10 +25,6 @@
 #include "eal_private.h"
 #include "eal_thread.h"
 
-RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
-RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY;
-RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
-
 /*
  * Send a message to a slave lcore identified by slave_id to call a
  * function f with argument arg. Once the execution is done, the
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index 339046bc86..5c1d1926e9 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -23,7 +23,6 @@ extern "C" {
 #define LCORE_ID_ANY     UINT32_MAX       /**< Any lcore. */
 
 RTE_DECLARE_PER_LCORE(unsigned, _lcore_id);  /**< Per thread "lcore id". */
-RTE_DECLARE_PER_LCORE(rte_cpuset_t, _cpuset); /**< Per thread "cpuset". */
 
 /**
  * Get a lcore's role.
diff --git a/lib/librte_eal/linux/eal_thread.c b/lib/librte_eal/linux/eal_thread.c
index cd9d6e0ebf..a52ebef3a4 100644
--- a/lib/librte_eal/linux/eal_thread.c
+++ b/lib/librte_eal/linux/eal_thread.c
@@ -25,10 +25,6 @@
 #include "eal_private.h"
 #include "eal_thread.h"
 
-RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
-RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY;
-RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
-
 /*
  * Send a message to a slave lcore identified by slave_id to call a
  * function f with argument arg. Once the execution is done, the
diff --git a/lib/librte_eal/windows/eal_thread.c b/lib/librte_eal/windows/eal_thread.c
index 3dd56519c9..f12a2ec6ad 100644
--- a/lib/librte_eal/windows/eal_thread.c
+++ b/lib/librte_eal/windows/eal_thread.c
@@ -16,10 +16,6 @@
 #include "eal_private.h"
 #include "eal_windows.h"
 
-RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
-RTE_DEFINE_PER_LCORE(unsigned int, _socket_id) = (unsigned int)SOCKET_ID_ANY;
-RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
-
 /*
  * Send a message to a slave lcore identified by slave_id to call a
  * function f with argument arg. Once the execution is done, the
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v5 02/10] eal: fix multiple definition of per lcore thread id
  2020-07-06 14:15 ` [dpdk-dev] [PATCH v5 00/10] Register non-EAL threads as lcore David Marchand
  2020-07-06 14:15   ` [dpdk-dev] [PATCH v5 01/10] eal: relocate per thread symbols to common David Marchand
@ 2020-07-06 14:15   ` David Marchand
  2020-07-06 14:15   ` [dpdk-dev] [PATCH v5 03/10] eal: introduce thread init helper David Marchand
                     ` (7 subsequent siblings)
  9 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-07-06 14:15 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets, olivier.matz, konstantin.ananyev,
	Neil Horman, Cunming Liang

Because of the inline accessor + static declaration in rte_gettid(),
we end up with multiple symbols for RTE_PER_LCORE(_thread_id).
Each compilation unit will pay a cost when accessing this information
for the first time.

$ nm build/app/dpdk-testpmd | grep per_lcore__thread_id
0000000000000054 d per_lcore__thread_id.5037
0000000000000040 d per_lcore__thread_id.5103
0000000000000048 d per_lcore__thread_id.5259
000000000000004c d per_lcore__thread_id.5259
0000000000000044 d per_lcore__thread_id.5933
0000000000000058 d per_lcore__thread_id.6261
0000000000000050 d per_lcore__thread_id.7378
000000000000005c d per_lcore__thread_id.7496
000000000000000c d per_lcore__thread_id.8016
0000000000000010 d per_lcore__thread_id.8431

Make it global as part of the DPDK_21 stable ABI.

Fixes: ef76436c6834 ("eal: get unique thread id")

Signed-off-by: David Marchand <david.marchand@redhat.com>
Acked-by: Ray Kinsella <mdr@ashroe.eu>
Reviewed-by: Olivier Matz <olivier.matz@6wind.com>
---
 lib/librte_eal/common/eal_common_thread.c | 1 +
 lib/librte_eal/include/rte_eal.h          | 3 ++-
 lib/librte_eal/rte_eal_version.map        | 7 +++++++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 7be80c292e..fd13453fee 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -22,6 +22,7 @@
 #include "eal_thread.h"
 
 RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
+RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
 static RTE_DEFINE_PER_LCORE(unsigned int, _socket_id) =
 	(unsigned int)SOCKET_ID_ANY;
 static RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
index 2f9ed298de..2edf8c6556 100644
--- a/lib/librte_eal/include/rte_eal.h
+++ b/lib/librte_eal/include/rte_eal.h
@@ -447,6 +447,8 @@ enum rte_intr_mode rte_eal_vfio_intr_mode(void);
  */
 int rte_sys_gettid(void);
 
+RTE_DECLARE_PER_LCORE(int, _thread_id);
+
 /**
  * Get system unique thread id.
  *
@@ -456,7 +458,6 @@ int rte_sys_gettid(void);
  */
 static inline int rte_gettid(void)
 {
-	static RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
 	if (RTE_PER_LCORE(_thread_id) == -1)
 		RTE_PER_LCORE(_thread_id) = rte_sys_gettid();
 	return RTE_PER_LCORE(_thread_id);
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 196eef5afa..0d42d44ce9 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -221,6 +221,13 @@ DPDK_20.0 {
 	local: *;
 };
 
+DPDK_21 {
+	global:
+
+	per_lcore__thread_id;
+
+} DPDK_20.0;
+
 EXPERIMENTAL {
 	global:
 
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v5 03/10] eal: introduce thread init helper
  2020-07-06 14:15 ` [dpdk-dev] [PATCH v5 00/10] Register non-EAL threads as lcore David Marchand
  2020-07-06 14:15   ` [dpdk-dev] [PATCH v5 01/10] eal: relocate per thread symbols to common David Marchand
  2020-07-06 14:15   ` [dpdk-dev] [PATCH v5 02/10] eal: fix multiple definition of per lcore thread id David Marchand
@ 2020-07-06 14:15   ` David Marchand
  2020-07-06 14:16   ` [dpdk-dev] [PATCH v5 04/10] eal: introduce thread uninit helper David Marchand
                     ` (6 subsequent siblings)
  9 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-07-06 14:15 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets, olivier.matz, konstantin.ananyev,
	Harini Ramakrishnan, Omar Cardona, Pallavi Kadam, Ranjit Menon

Introduce a helper responsible for initialising the per thread context.
We can then have a unified context for EAL and non-EAL threads and
remove copy/paste'd OS-specific helpers.

Per EAL thread CPU affinity setting is separated from the thread init.
It is to accommodate with Windows EAL where CPU affinity is not set at
the moment.
Besides, having affinity set by the master lcore in FreeBSD and Linux
will make it possible to detect errors rather than panic in the child
thread. But the cleanup when such an event happens is left for later.

A side-effect of this patch is that control threads can now use
recursive locks (rte_gettid() was not called before).

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Changes since v4:
- renamed rte_thread_init as __rte_thread_init and moved to
  eal_private.h,

Changes since v1:
- rebased on master, removed Windows workarounds wrt gettid and traces
  support,

---
 lib/librte_eal/common/eal_common_thread.c | 50 ++++++++++++++---------
 lib/librte_eal/common/eal_private.h       | 10 +++++
 lib/librte_eal/common/eal_thread.h        |  8 ----
 lib/librte_eal/freebsd/eal.c              | 14 ++++++-
 lib/librte_eal/freebsd/eal_thread.c       | 32 +--------------
 lib/librte_eal/linux/eal.c                | 15 ++++++-
 lib/librte_eal/linux/eal_thread.c         | 32 +--------------
 lib/librte_eal/windows/eal.c              |  3 +-
 lib/librte_eal/windows/eal_thread.c       | 10 +----
 9 files changed, 71 insertions(+), 103 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index fd13453fee..fb06f8f802 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -71,20 +71,10 @@ eal_cpuset_socket_id(rte_cpuset_t *cpusetp)
 	return socket_id;
 }
 
-int
-rte_thread_set_affinity(rte_cpuset_t *cpusetp)
+static void
+thread_update_affinity(rte_cpuset_t *cpusetp)
 {
-	int s;
-	unsigned lcore_id;
-	pthread_t tid;
-
-	tid = pthread_self();
-
-	s = pthread_setaffinity_np(tid, sizeof(rte_cpuset_t), cpusetp);
-	if (s != 0) {
-		RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
-		return -1;
-	}
+	unsigned int lcore_id = rte_lcore_id();
 
 	/* store socket_id in TLS for quick access */
 	RTE_PER_LCORE(_socket_id) =
@@ -94,14 +84,24 @@ rte_thread_set_affinity(rte_cpuset_t *cpusetp)
 	memmove(&RTE_PER_LCORE(_cpuset), cpusetp,
 		sizeof(rte_cpuset_t));
 
-	lcore_id = rte_lcore_id();
 	if (lcore_id != (unsigned)LCORE_ID_ANY) {
 		/* EAL thread will update lcore_config */
 		lcore_config[lcore_id].socket_id = RTE_PER_LCORE(_socket_id);
 		memmove(&lcore_config[lcore_id].cpuset, cpusetp,
 			sizeof(rte_cpuset_t));
 	}
+}
+
+int
+rte_thread_set_affinity(rte_cpuset_t *cpusetp)
+{
+	if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+			cpusetp) != 0) {
+		RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
+		return -1;
+	}
 
+	thread_update_affinity(cpusetp);
 	return 0;
 }
 
@@ -147,6 +147,19 @@ eal_thread_dump_affinity(char *str, unsigned size)
 	return ret;
 }
 
+void
+__rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset)
+{
+	/* set the lcore ID in per-lcore memory area */
+	RTE_PER_LCORE(_lcore_id) = lcore_id;
+
+	/* acquire system unique id */
+	rte_gettid();
+
+	thread_update_affinity(cpuset);
+
+	__rte_trace_mem_per_thread_alloc();
+}
 
 struct rte_thread_ctrl_params {
 	void *(*start_routine)(void *);
@@ -154,7 +167,7 @@ struct rte_thread_ctrl_params {
 	pthread_barrier_t configured;
 };
 
-static void *rte_thread_init(void *arg)
+static void *ctrl_thread_init(void *arg)
 {
 	int ret;
 	struct internal_config *internal_conf =
@@ -164,8 +177,7 @@ static void *rte_thread_init(void *arg)
 	void *(*start_routine)(void *) = params->start_routine;
 	void *routine_arg = params->arg;
 
-	/* Store cpuset in TLS for quick access */
-	memmove(&RTE_PER_LCORE(_cpuset), cpuset, sizeof(rte_cpuset_t));
+	__rte_thread_init(rte_lcore_id(), cpuset);
 
 	ret = pthread_barrier_wait(&params->configured);
 	if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
@@ -173,8 +185,6 @@ static void *rte_thread_init(void *arg)
 		free(params);
 	}
 
-	__rte_trace_mem_per_thread_alloc();
-
 	return start_routine(routine_arg);
 }
 
@@ -198,7 +208,7 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
 
 	pthread_barrier_init(&params->configured, NULL, 2);
 
-	ret = pthread_create(thread, attr, rte_thread_init, (void *)params);
+	ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
 	if (ret != 0) {
 		free(params);
 		return -ret;
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 46bcae9305..5d8b53882d 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -699,4 +699,14 @@ eal_get_internal_configuration(void);
 rte_usage_hook_t
 eal_get_application_usage_hook(void);
 
+/**
+ * Init per-lcore info in current thread.
+ *
+ * @param lcore_id
+ *   identifier of lcore.
+ * @param cpuset
+ *   CPU affinity for this thread.
+ */
+void __rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset);
+
 #endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/common/eal_thread.h b/lib/librte_eal/common/eal_thread.h
index b40ed249ed..dc1fc6eb99 100644
--- a/lib/librte_eal/common/eal_thread.h
+++ b/lib/librte_eal/common/eal_thread.h
@@ -15,14 +15,6 @@
  */
 __rte_noreturn void *eal_thread_loop(void *arg);
 
-/**
- * Init per-lcore info for master thread
- *
- * @param lcore_id
- *   identifier of master lcore
- */
-void eal_thread_init_master(unsigned lcore_id);
-
 /**
  * Get the NUMA socket id from cpu id.
  * This function is private to EAL.
diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
index 8c75cba79a..fd577daf44 100644
--- a/lib/librte_eal/freebsd/eal.c
+++ b/lib/librte_eal/freebsd/eal.c
@@ -845,7 +845,14 @@ rte_eal_init(int argc, char **argv)
 
 	eal_check_mem_on_local_socket();
 
-	eal_thread_init_master(config->master_lcore);
+	if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+			&lcore_config[config->master_lcore].cpuset) != 0) {
+		rte_eal_init_alert("Cannot set affinity");
+		rte_errno = EINVAL;
+		return -1;
+	}
+	__rte_thread_init(config->master_lcore,
+		&lcore_config[config->master_lcore].cpuset);
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
 
@@ -876,6 +883,11 @@ rte_eal_init(int argc, char **argv)
 		snprintf(thread_name, sizeof(thread_name),
 				"lcore-slave-%d", i);
 		rte_thread_setname(lcore_config[i].thread_id, thread_name);
+
+		ret = pthread_setaffinity_np(lcore_config[i].thread_id,
+			sizeof(rte_cpuset_t), &lcore_config[i].cpuset);
+		if (ret != 0)
+			rte_panic("Cannot set affinity\n");
 	}
 
 	/*
diff --git a/lib/librte_eal/freebsd/eal_thread.c b/lib/librte_eal/freebsd/eal_thread.c
index 40676d9ef5..0788a54fe6 100644
--- a/lib/librte_eal/freebsd/eal_thread.c
+++ b/lib/librte_eal/freebsd/eal_thread.c
@@ -66,29 +66,6 @@ rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
 	return rc;
 }
 
-/* set affinity for current thread */
-static int
-eal_thread_set_affinity(void)
-{
-	unsigned lcore_id = rte_lcore_id();
-
-	/* acquire system unique id  */
-	rte_gettid();
-
-	/* update EAL thread core affinity */
-	return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset);
-}
-
-void eal_thread_init_master(unsigned lcore_id)
-{
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
-}
-
 /* main loop of threads */
 __rte_noreturn void *
 eal_thread_loop(__rte_unused void *arg)
@@ -113,19 +90,12 @@ eal_thread_loop(__rte_unused void *arg)
 	m2s = lcore_config[lcore_id].pipe_master2slave[0];
 	s2m = lcore_config[lcore_id].pipe_slave2master[1];
 
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
+	__rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
-
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
 		lcore_id, thread_id, cpuset, ret == 0 ? "" : "...");
 
-	__rte_trace_mem_per_thread_alloc();
 	rte_eal_trace_thread_lcore_ready(lcore_id, cpuset);
 
 	/* read on our pipe to get commands */
diff --git a/lib/librte_eal/linux/eal.c b/lib/librte_eal/linux/eal.c
index 3b56d14da1..bd089cdd44 100644
--- a/lib/librte_eal/linux/eal.c
+++ b/lib/librte_eal/linux/eal.c
@@ -1184,10 +1184,16 @@ rte_eal_init(int argc, char **argv)
 
 	eal_check_mem_on_local_socket();
 
-	eal_thread_init_master(config->master_lcore);
+	if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+			&lcore_config[config->master_lcore].cpuset) != 0) {
+		rte_eal_init_alert("Cannot set affinity");
+		rte_errno = EINVAL;
+		return -1;
+	}
+	__rte_thread_init(config->master_lcore,
+		&lcore_config[config->master_lcore].cpuset);
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
-
 	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
 		config->master_lcore, (uintptr_t)thread_id, cpuset,
 		ret == 0 ? "" : "...");
@@ -1219,6 +1225,11 @@ rte_eal_init(int argc, char **argv)
 		if (ret != 0)
 			RTE_LOG(DEBUG, EAL,
 				"Cannot set name for lcore thread\n");
+
+		ret = pthread_setaffinity_np(lcore_config[i].thread_id,
+			sizeof(rte_cpuset_t), &lcore_config[i].cpuset);
+		if (ret != 0)
+			rte_panic("Cannot set affinity\n");
 	}
 
 	/*
diff --git a/lib/librte_eal/linux/eal_thread.c b/lib/librte_eal/linux/eal_thread.c
index a52ebef3a4..e0440c0000 100644
--- a/lib/librte_eal/linux/eal_thread.c
+++ b/lib/librte_eal/linux/eal_thread.c
@@ -66,29 +66,6 @@ rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
 	return rc;
 }
 
-/* set affinity for current EAL thread */
-static int
-eal_thread_set_affinity(void)
-{
-	unsigned lcore_id = rte_lcore_id();
-
-	/* acquire system unique id  */
-	rte_gettid();
-
-	/* update EAL thread core affinity */
-	return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset);
-}
-
-void eal_thread_init_master(unsigned lcore_id)
-{
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
-}
-
 /* main loop of threads */
 __rte_noreturn void *
 eal_thread_loop(__rte_unused void *arg)
@@ -113,19 +90,12 @@ eal_thread_loop(__rte_unused void *arg)
 	m2s = lcore_config[lcore_id].pipe_master2slave[0];
 	s2m = lcore_config[lcore_id].pipe_slave2master[1];
 
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
+	__rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
-
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
 		lcore_id, (uintptr_t)thread_id, cpuset, ret == 0 ? "" : "...");
 
-	__rte_trace_mem_per_thread_alloc();
 	rte_eal_trace_thread_lcore_ready(lcore_id, cpuset);
 
 	/* read on our pipe to get commands */
diff --git a/lib/librte_eal/windows/eal.c b/lib/librte_eal/windows/eal.c
index eb10b4ef96..9f5d019e64 100644
--- a/lib/librte_eal/windows/eal.c
+++ b/lib/librte_eal/windows/eal.c
@@ -333,7 +333,8 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
-	eal_thread_init_master(config->master_lcore);
+	__rte_thread_init(config->master_lcore,
+		&lcore_config[config->master_lcore].cpuset);
 
 	bscan = rte_bus_scan();
 	if (bscan < 0) {
diff --git a/lib/librte_eal/windows/eal_thread.c b/lib/librte_eal/windows/eal_thread.c
index f12a2ec6ad..20889b6196 100644
--- a/lib/librte_eal/windows/eal_thread.c
+++ b/lib/librte_eal/windows/eal_thread.c
@@ -53,13 +53,6 @@ rte_eal_remote_launch(lcore_function_t *f, void *arg, unsigned int slave_id)
 	return 0;
 }
 
-void
-eal_thread_init_master(unsigned int lcore_id)
-{
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-}
-
 /* main loop of threads */
 void *
 eal_thread_loop(void *arg __rte_unused)
@@ -84,8 +77,7 @@ eal_thread_loop(void *arg __rte_unused)
 	m2s = lcore_config[lcore_id].pipe_master2slave[0];
 	s2m = lcore_config[lcore_id].pipe_slave2master[1];
 
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
+	__rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%zx;cpuset=[%s])\n",
 		lcore_id, (uintptr_t)thread_id, cpuset);
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v5 04/10] eal: introduce thread uninit helper
  2020-07-06 14:15 ` [dpdk-dev] [PATCH v5 00/10] Register non-EAL threads as lcore David Marchand
                     ` (2 preceding siblings ...)
  2020-07-06 14:15   ` [dpdk-dev] [PATCH v5 03/10] eal: introduce thread init helper David Marchand
@ 2020-07-06 14:16   ` David Marchand
  2020-07-06 14:16   ` [dpdk-dev] [PATCH v5 05/10] eal: move lcore role code David Marchand
                     ` (5 subsequent siblings)
  9 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-07-06 14:16 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets, olivier.matz, konstantin.ananyev,
	Jerin Jacob, Sunil Kumar Kori, Harini Ramakrishnan, Omar Cardona,
	Pallavi Kadam, Ranjit Menon

This is a preparation step for dynamically unregistering threads.

Since we explicitly allocate a per thread trace buffer in
__rte_thread_init, add an internal helper to free this buffer.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Changes since v4:
- renamed rte_thread_uninit and moved to eal_private.h,
- hid freeing helper,

Changes since v2:
- added missing stub for windows tracing support,
- moved free symbol to exported (experimental) ABI as a counterpart of
  the alloc symbol we already had,

Changes since v1:
- rebased on master, removed Windows workaround wrt traces support,

---
 lib/librte_eal/common/eal_common_thread.c |  9 +++++
 lib/librte_eal/common/eal_common_trace.c  | 49 +++++++++++++++++++----
 lib/librte_eal/common/eal_private.h       |  5 +++
 lib/librte_eal/common/eal_trace.h         |  1 +
 lib/librte_eal/windows/eal.c              |  5 +++
 5 files changed, 62 insertions(+), 7 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index fb06f8f802..6d1c87b1c2 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -20,6 +20,7 @@
 #include "eal_internal_cfg.h"
 #include "eal_private.h"
 #include "eal_thread.h"
+#include "eal_trace.h"
 
 RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
 RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
@@ -161,6 +162,14 @@ __rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset)
 	__rte_trace_mem_per_thread_alloc();
 }
 
+void
+__rte_thread_uninit(void)
+{
+	trace_mem_per_thread_free();
+
+	RTE_PER_LCORE(_lcore_id) = LCORE_ID_ANY;
+}
+
 struct rte_thread_ctrl_params {
 	void *(*start_routine)(void *);
 	void *arg;
diff --git a/lib/librte_eal/common/eal_common_trace.c b/lib/librte_eal/common/eal_common_trace.c
index 875553d7e5..b6da5537fe 100644
--- a/lib/librte_eal/common/eal_common_trace.c
+++ b/lib/librte_eal/common/eal_common_trace.c
@@ -101,7 +101,7 @@ eal_trace_fini(void)
 {
 	if (!rte_trace_is_enabled())
 		return;
-	trace_mem_per_thread_free();
+	trace_mem_free();
 	trace_metadata_destroy();
 	eal_trace_args_free();
 }
@@ -370,24 +370,59 @@ __rte_trace_mem_per_thread_alloc(void)
 	rte_spinlock_unlock(&trace->lock);
 }
 
+static void
+trace_mem_per_thread_free_unlocked(struct thread_mem_meta *meta)
+{
+	if (meta->area == TRACE_AREA_HUGEPAGE)
+		eal_free_no_trace(meta->mem);
+	else if (meta->area == TRACE_AREA_HEAP)
+		free(meta->mem);
+}
+
 void
 trace_mem_per_thread_free(void)
+{
+	struct trace *trace = trace_obj_get();
+	struct __rte_trace_header *header;
+	uint32_t count;
+
+	header = RTE_PER_LCORE(trace_mem);
+	if (header == NULL)
+		return;
+
+	rte_spinlock_lock(&trace->lock);
+	for (count = 0; count < trace->nb_trace_mem_list; count++) {
+		if (trace->lcore_meta[count].mem == header)
+			break;
+	}
+	if (count != trace->nb_trace_mem_list) {
+		struct thread_mem_meta *meta = &trace->lcore_meta[count];
+
+		trace_mem_per_thread_free_unlocked(meta);
+		if (count != trace->nb_trace_mem_list - 1) {
+			memmove(meta, meta + 1,
+				sizeof(*meta) *
+				 (trace->nb_trace_mem_list - count - 1));
+		}
+		trace->nb_trace_mem_list--;
+	}
+	rte_spinlock_unlock(&trace->lock);
+}
+
+void
+trace_mem_free(void)
 {
 	struct trace *trace = trace_obj_get();
 	uint32_t count;
-	void *mem;
 
 	if (!rte_trace_is_enabled())
 		return;
 
 	rte_spinlock_lock(&trace->lock);
 	for (count = 0; count < trace->nb_trace_mem_list; count++) {
-		mem = trace->lcore_meta[count].mem;
-		if (trace->lcore_meta[count].area == TRACE_AREA_HUGEPAGE)
-			eal_free_no_trace(mem);
-		else if (trace->lcore_meta[count].area == TRACE_AREA_HEAP)
-			free(mem);
+		trace_mem_per_thread_free_unlocked(&trace->lcore_meta[count]);
 	}
+	trace->nb_trace_mem_list = 0;
 	rte_spinlock_unlock(&trace->lock);
 }
 
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 5d8b53882d..a77ac7a963 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -709,4 +709,9 @@ eal_get_application_usage_hook(void);
  */
 void __rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset);
 
+/**
+ * Uninitialize per-lcore info for current thread.
+ */
+void __rte_thread_uninit(void);
+
 #endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/common/eal_trace.h b/lib/librte_eal/common/eal_trace.h
index 8f60616156..92c5951c3a 100644
--- a/lib/librte_eal/common/eal_trace.h
+++ b/lib/librte_eal/common/eal_trace.h
@@ -106,6 +106,7 @@ int trace_metadata_create(void);
 void trace_metadata_destroy(void);
 int trace_mkdir(void);
 int trace_epoch_time_save(void);
+void trace_mem_free(void);
 void trace_mem_per_thread_free(void);
 
 /* EAL interface */
diff --git a/lib/librte_eal/windows/eal.c b/lib/librte_eal/windows/eal.c
index 9f5d019e64..a11daee68b 100644
--- a/lib/librte_eal/windows/eal.c
+++ b/lib/librte_eal/windows/eal.c
@@ -215,6 +215,11 @@ __rte_trace_mem_per_thread_alloc(void)
 {
 }
 
+void
+trace_mem_per_thread_free(void)
+{
+}
+
 void
 __rte_trace_point_emit_field(size_t sz, const char *field,
 	const char *type)
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v5 05/10] eal: move lcore role code
  2020-07-06 14:15 ` [dpdk-dev] [PATCH v5 00/10] Register non-EAL threads as lcore David Marchand
                     ` (3 preceding siblings ...)
  2020-07-06 14:16   ` [dpdk-dev] [PATCH v5 04/10] eal: introduce thread uninit helper David Marchand
@ 2020-07-06 14:16   ` David Marchand
  2020-07-06 14:16   ` [dpdk-dev] [PATCH v5 06/10] eal: register non-EAL threads as lcores David Marchand
                     ` (4 subsequent siblings)
  9 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-07-06 14:16 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets, olivier.matz, konstantin.ananyev

For consistency sake, move all lcore role code in the dedicated
compilation unit / header.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Reviewed-by: Olivier Matz <olivier.matz@6wind.com>
---
 lib/librte_eal/common/eal_common_lcore.c  | 11 +++++++
 lib/librte_eal/common/eal_common_thread.c | 11 -------
 lib/librte_eal/include/rte_eal.h          |  9 ------
 lib/librte_eal/include/rte_lcore.h        | 37 ++++++++++++++---------
 4 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
index 5404922a87..86d32a3dd7 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -63,6 +63,17 @@ rte_eal_lcore_role(unsigned int lcore_id)
 	return cfg->lcore_role[lcore_id];
 }
 
+int
+rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+
+	if (lcore_id >= RTE_MAX_LCORE)
+		return -EINVAL;
+
+	return cfg->lcore_role[lcore_id] == role;
+}
+
 int rte_lcore_is_enabled(unsigned int lcore_id)
 {
 	struct rte_config *cfg = rte_eal_get_configuration();
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 6d1c87b1c2..b1b69ea58c 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -33,17 +33,6 @@ unsigned rte_socket_id(void)
 	return RTE_PER_LCORE(_socket_id);
 }
 
-int
-rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role)
-{
-	struct rte_config *cfg = rte_eal_get_configuration();
-
-	if (lcore_id >= RTE_MAX_LCORE)
-		return -EINVAL;
-
-	return cfg->lcore_role[lcore_id] == role;
-}
-
 static int
 eal_cpuset_socket_id(rte_cpuset_t *cpusetp)
 {
diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
index 2edf8c6556..0913d1947c 100644
--- a/lib/librte_eal/include/rte_eal.h
+++ b/lib/librte_eal/include/rte_eal.h
@@ -31,15 +31,6 @@ extern "C" {
 /* Maximum thread_name length. */
 #define RTE_MAX_THREAD_NAME_LEN 16
 
-/**
- * The lcore role (used in RTE or not).
- */
-enum rte_lcore_role_t {
-	ROLE_RTE,
-	ROLE_OFF,
-	ROLE_SERVICE,
-};
-
 /**
  * The type of process in a linux, multi-process setup
  */
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index 5c1d1926e9..3968c40693 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -24,6 +24,15 @@ extern "C" {
 
 RTE_DECLARE_PER_LCORE(unsigned, _lcore_id);  /**< Per thread "lcore id". */
 
+/**
+ * The lcore role (used in RTE or not).
+ */
+enum rte_lcore_role_t {
+	ROLE_RTE,
+	ROLE_OFF,
+	ROLE_SERVICE,
+};
+
 /**
  * Get a lcore's role.
  *
@@ -34,6 +43,20 @@ RTE_DECLARE_PER_LCORE(unsigned, _lcore_id);  /**< Per thread "lcore id". */
  */
 enum rte_lcore_role_t rte_eal_lcore_role(unsigned int lcore_id);
 
+/**
+ * Test if the core supplied has a specific role
+ *
+ * @param lcore_id
+ *   The identifier of the lcore, which MUST be between 0 and
+ *   RTE_MAX_LCORE-1.
+ * @param role
+ *   The role to be checked against.
+ * @return
+ *   Boolean value: positive if test is true; otherwise returns 0.
+ */
+int
+rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
+
 /**
  * Return the Application thread ID of the execution unit.
  *
@@ -283,20 +306,6 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
 		const pthread_attr_t *attr,
 		void *(*start_routine)(void *), void *arg);
 
-/**
- * Test if the core supplied has a specific role
- *
- * @param lcore_id
- *   The identifier of the lcore, which MUST be between 0 and
- *   RTE_MAX_LCORE-1.
- * @param role
- *   The role to be checked against.
- * @return
- *   Boolean value: positive if test is true; otherwise returns 0.
- */
-int
-rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
-
 #ifdef __cplusplus
 }
 #endif
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v5 06/10] eal: register non-EAL threads as lcores
  2020-07-06 14:15 ` [dpdk-dev] [PATCH v5 00/10] Register non-EAL threads as lcore David Marchand
                     ` (4 preceding siblings ...)
  2020-07-06 14:16   ` [dpdk-dev] [PATCH v5 05/10] eal: move lcore role code David Marchand
@ 2020-07-06 14:16   ` David Marchand
  2020-07-06 14:16   ` [dpdk-dev] [PATCH v5 07/10] eal: add lcore init callbacks David Marchand
                     ` (3 subsequent siblings)
  9 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-07-06 14:16 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets, olivier.matz, konstantin.ananyev,
	John McNamara, Marko Kovacevic, Anatoly Burakov, Neil Horman

DPDK allows calling some part of its API from a non-EAL thread but this
has some limitations.
OVS (and other applications) has its own thread management but still
want to avoid such limitations by hacking RTE_PER_LCORE(_lcore_id) and
faking EAL threads potentially unknown of some DPDK component.

Introduce a new API to register non-EAL thread and associate them to a
free lcore with a new NON_EAL role.
This role denotes lcores that do not run DPDK mainloop and as such
prevents use of rte_eal_wait_lcore() and consorts.

Multiprocess is not supported as the need for cohabitation with this new
feature is unclear at the moment.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Acked-by: Andrew Rybchenko <arybchenko@solarflare.com>
Acked-by: Thomas Monjalon <thomas@monjalon.net>
---
Changes since v4:
- returned an error when rte_thread_register fails + set rte_errno,
- prefixed unit tests logs with Error: when applicable,

Changes since v2:
- refused multiprocess init once rte_thread_register got called, and
  vice versa,
- added warning on multiprocess in rte_thread_register doxygen,

Changes since v1:
- moved cleanup on lcore role code in patch 5,
- added unit test,
- updated documentation,
- changed naming from "external thread" to "registered non-EAL thread"

---
 MAINTAINERS                                   |   1 +
 app/test/Makefile                             |   1 +
 app/test/autotest_data.py                     |   6 +
 app/test/meson.build                          |   2 +
 app/test/test_lcores.c                        | 143 ++++++++++++++++++
 doc/guides/howto/debug_troubleshoot.rst       |   5 +-
 .../prog_guide/env_abstraction_layer.rst      |  22 +--
 doc/guides/prog_guide/mempool_lib.rst         |   2 +-
 lib/librte_eal/common/eal_common_lcore.c      |  45 +++++-
 lib/librte_eal/common/eal_common_proc.c       |  41 +++++
 lib/librte_eal/common/eal_common_thread.c     |  49 +++++-
 lib/librte_eal/common/eal_memcfg.h            |   2 +
 lib/librte_eal/common/eal_private.h           |  28 ++++
 lib/librte_eal/freebsd/eal.c                  |   4 +
 lib/librte_eal/include/rte_lcore.h            |  28 +++-
 lib/librte_eal/linux/eal.c                    |   4 +
 lib/librte_eal/rte_eal_version.map            |   4 +
 lib/librte_mempool/rte_mempool.h              |  11 +-
 18 files changed, 375 insertions(+), 23 deletions(-)
 create mode 100644 app/test/test_lcores.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 27a7be8e64..5a0e1b48b2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -182,6 +182,7 @@ F: app/test/test_cycles.c
 F: app/test/test_debug.c
 F: app/test/test_eal*
 F: app/test/test_errno.c
+F: app/test/test_lcores.c
 F: app/test/test_logs.c
 F: app/test/test_memcpy*
 F: app/test/test_per_lcore.c
diff --git a/app/test/Makefile b/app/test/Makefile
index e5440774b1..f4065271e4 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -98,6 +98,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_FLOW_CLASSIFY) += test_flow_classify.c
 endif
 
 SRCS-y += test_rwlock.c
+SRCS-y += test_lcores.c
 
 SRCS-$(CONFIG_RTE_LIBRTE_STACK) += test_stack.c
 SRCS-$(CONFIG_RTE_LIBRTE_STACK) += test_stack_perf.c
diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 238ab631b4..4b7da45e09 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -62,6 +62,12 @@
         "Func":    rwlock_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Lcores autotest",
+        "Command": "lcores_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "Logs autotest",
         "Command": "logs_autotest",
diff --git a/app/test/meson.build b/app/test/meson.build
index 56591db4e0..86b4d4d490 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -67,6 +67,7 @@ test_sources = files('commands.c',
 	'test_ipsec_perf.c',
 	'test_kni.c',
 	'test_kvargs.c',
+	'test_lcores.c',
 	'test_logs.c',
 	'test_lpm.c',
 	'test_lpm6.c',
@@ -207,6 +208,7 @@ fast_tests = [
         ['hash_autotest', true],
         ['interrupt_autotest', true],
         ['ipfrag_autotest', false],
+        ['lcores_autotest', true],
         ['logs_autotest', true],
         ['lpm_autotest', true],
         ['lpm6_autotest', true],
diff --git a/app/test/test_lcores.c b/app/test/test_lcores.c
new file mode 100644
index 0000000000..afb9cdd444
--- /dev/null
+++ b/app/test/test_lcores.c
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2020 Red Hat, Inc.
+ */
+
+#include <pthread.h>
+#include <string.h>
+
+#include <rte_errno.h>
+#include <rte_lcore.h>
+
+#include "test.h"
+
+struct thread_context {
+	enum { INIT, ERROR, DONE } state;
+	bool lcore_id_any;
+	pthread_t id;
+	unsigned int *registered_count;
+};
+
+static void *thread_loop(void *arg)
+{
+	struct thread_context *t = arg;
+	unsigned int lcore_id;
+
+	lcore_id = rte_lcore_id();
+	if (lcore_id != LCORE_ID_ANY) {
+		printf("Error: incorrect lcore id for new thread %u\n", lcore_id);
+		t->state = ERROR;
+	}
+	if (rte_thread_register() < 0)
+		printf("Warning: could not register new thread (this might be expected during this test), reason %s\n",
+			rte_strerror(rte_errno));
+	lcore_id = rte_lcore_id();
+	if ((t->lcore_id_any && lcore_id != LCORE_ID_ANY) ||
+			(!t->lcore_id_any && lcore_id == LCORE_ID_ANY)) {
+		printf("Error: could not register new thread, got %u while %sexpecting %u\n",
+			lcore_id, t->lcore_id_any ? "" : "not ", LCORE_ID_ANY);
+		t->state = ERROR;
+	}
+	/* Report register happened to the control thread. */
+	__atomic_add_fetch(t->registered_count, 1, __ATOMIC_RELEASE);
+
+	/* Wait for release from the control thread. */
+	while (__atomic_load_n(t->registered_count, __ATOMIC_ACQUIRE) != 0)
+		;
+	rte_thread_unregister();
+	lcore_id = rte_lcore_id();
+	if (lcore_id != LCORE_ID_ANY) {
+		printf("Error: could not unregister new thread, %u still assigned\n",
+			lcore_id);
+		t->state = ERROR;
+	}
+
+	if (t->state != ERROR)
+		t->state = DONE;
+
+	return NULL;
+}
+
+static int
+test_non_eal_lcores(unsigned int eal_threads_count)
+{
+	struct thread_context thread_contexts[RTE_MAX_LCORE];
+	unsigned int non_eal_threads_count;
+	unsigned int registered_count;
+	struct thread_context *t;
+	unsigned int i;
+	int ret;
+
+	non_eal_threads_count = 0;
+	registered_count = 0;
+
+	/* Try to create as many threads as possible. */
+	for (i = 0; i < RTE_MAX_LCORE - eal_threads_count; i++) {
+		t = &thread_contexts[i];
+		t->state = INIT;
+		t->registered_count = &registered_count;
+		t->lcore_id_any = false;
+		if (pthread_create(&t->id, NULL, thread_loop, t) != 0)
+			break;
+		non_eal_threads_count++;
+	}
+	printf("non-EAL threads count: %u\n", non_eal_threads_count);
+	/* Wait all non-EAL threads to register. */
+	while (__atomic_load_n(&registered_count, __ATOMIC_ACQUIRE) !=
+			non_eal_threads_count)
+		;
+
+	/* We managed to create the max number of threads, let's try to create
+	 * one more. This will allow one more check.
+	 */
+	if (eal_threads_count + non_eal_threads_count < RTE_MAX_LCORE)
+		goto skip_lcore_any;
+	t = &thread_contexts[non_eal_threads_count];
+	t->state = INIT;
+	t->registered_count = &registered_count;
+	t->lcore_id_any = true;
+	if (pthread_create(&t->id, NULL, thread_loop, t) == 0) {
+		non_eal_threads_count++;
+		printf("non-EAL threads count: %u\n", non_eal_threads_count);
+		while (__atomic_load_n(&registered_count, __ATOMIC_ACQUIRE) !=
+				non_eal_threads_count)
+			;
+	}
+
+skip_lcore_any:
+	/* Release all threads, and check their states. */
+	__atomic_store_n(&registered_count, 0, __ATOMIC_RELEASE);
+	ret = 0;
+	for (i = 0; i < non_eal_threads_count; i++) {
+		t = &thread_contexts[i];
+		pthread_join(t->id, NULL);
+		if (t->state != DONE)
+			ret = -1;
+	}
+
+	return ret;
+}
+
+static int
+test_lcores(void)
+{
+	unsigned int eal_threads_count = 0;
+	unsigned int i;
+
+	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		if (!rte_lcore_has_role(i, ROLE_OFF))
+			eal_threads_count++;
+	}
+	if (eal_threads_count == 0) {
+		printf("Error: something is broken, no EAL thread detected.\n");
+		return TEST_FAILED;
+	}
+	printf("EAL threads count: %u, RTE_MAX_LCORE=%u\n", eal_threads_count,
+		RTE_MAX_LCORE);
+
+	if (test_non_eal_lcores(eal_threads_count) < 0)
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+REGISTER_TEST_COMMAND(lcores_autotest, test_lcores);
diff --git a/doc/guides/howto/debug_troubleshoot.rst b/doc/guides/howto/debug_troubleshoot.rst
index cef016b2fe..5a46f5fba3 100644
--- a/doc/guides/howto/debug_troubleshoot.rst
+++ b/doc/guides/howto/debug_troubleshoot.rst
@@ -307,8 +307,9 @@ Custom worker function :numref:`dtg_distributor_worker`.
 
 #. Configuration issue isolation
 
-   * Identify core role using ``rte_eal_lcore_role`` to identify RTE, OFF and
-     SERVICE. Check performance functions are mapped to run on the cores.
+   * Identify core role using ``rte_eal_lcore_role`` to identify RTE, OFF,
+     SERVICE and NON_EAL. Check performance functions are mapped to run on the
+     cores.
 
    * For high-performance execution logic ensure running it on correct NUMA
      and non-master core.
diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst b/doc/guides/prog_guide/env_abstraction_layer.rst
index 48a2fec066..f64ae953d1 100644
--- a/doc/guides/prog_guide/env_abstraction_layer.rst
+++ b/doc/guides/prog_guide/env_abstraction_layer.rst
@@ -564,9 +564,13 @@ It's also compatible with the pattern of corelist('-l') option.
 non-EAL pthread support
 ~~~~~~~~~~~~~~~~~~~~~~~
 
-It is possible to use the DPDK execution context with any user pthread (aka. Non-EAL pthreads).
-In a non-EAL pthread, the *_lcore_id* is always LCORE_ID_ANY which identifies that it is not an EAL thread with a valid, unique, *_lcore_id*.
-Some libraries will use an alternative unique ID (e.g. TID), some will not be impacted at all, and some will work but with limitations (e.g. timer and mempool libraries).
+It is possible to use the DPDK execution context with any user pthread (aka. non-EAL pthreads).
+There are two kinds of non-EAL pthreads:
+
+- a registered non-EAL pthread with a valid *_lcore_id* that was successfully assigned by calling ``rte_thread_register()``,
+- a non registered non-EAL pthread with a LCORE_ID_ANY,
+
+For non registered non-EAL pthread (with a LCORE_ID_ANY *_lcore_id*), some libraries will use an alternative unique ID (e.g. TID), some will not be impacted at all, and some will work but with limitations (e.g. timer and mempool libraries).
 
 All these impacts are mentioned in :ref:`known_issue_label` section.
 
@@ -613,9 +617,9 @@ Known Issues
 + rte_mempool
 
   The rte_mempool uses a per-lcore cache inside the mempool.
-  For non-EAL pthreads, ``rte_lcore_id()`` will not return a valid number.
-  So for now, when rte_mempool is used with non-EAL pthreads, the put/get operations will bypass the default mempool cache and there is a performance penalty because of this bypass.
-  Only user-owned external caches can be used in a non-EAL context in conjunction with ``rte_mempool_generic_put()`` and ``rte_mempool_generic_get()`` that accept an explicit cache parameter.
+  For unregistered non-EAL pthreads, ``rte_lcore_id()`` will not return a valid number.
+  So for now, when rte_mempool is used with unregistered non-EAL pthreads, the put/get operations will bypass the default mempool cache and there is a performance penalty because of this bypass.
+  Only user-owned external caches can be used in an unregistered non-EAL context in conjunction with ``rte_mempool_generic_put()`` and ``rte_mempool_generic_get()`` that accept an explicit cache parameter.
 
 + rte_ring
 
@@ -660,15 +664,15 @@ Known Issues
 
 + rte_timer
 
-  Running  ``rte_timer_manage()`` on a non-EAL pthread is not allowed. However, resetting/stopping the timer from a non-EAL pthread is allowed.
+  Running  ``rte_timer_manage()`` on an unregistered non-EAL pthread is not allowed. However, resetting/stopping the timer from a non-EAL pthread is allowed.
 
 + rte_log
 
-  In non-EAL pthreads, there is no per thread loglevel and logtype, global loglevels are used.
+  In unregistered non-EAL pthreads, there is no per thread loglevel and logtype, global loglevels are used.
 
 + misc
 
-  The debug statistics of rte_ring, rte_mempool and rte_timer are not supported in a non-EAL pthread.
+  The debug statistics of rte_ring, rte_mempool and rte_timer are not supported in an unregistered non-EAL pthread.
 
 cgroup control
 ~~~~~~~~~~~~~~
diff --git a/doc/guides/prog_guide/mempool_lib.rst b/doc/guides/prog_guide/mempool_lib.rst
index f8b430d656..e3e1f940be 100644
--- a/doc/guides/prog_guide/mempool_lib.rst
+++ b/doc/guides/prog_guide/mempool_lib.rst
@@ -103,7 +103,7 @@ The maximum size of the cache is static and is defined at compilation time (CONF
 Alternatively to the internal default per-lcore local cache, an application can create and manage external caches through the ``rte_mempool_cache_create()``, ``rte_mempool_cache_free()`` and ``rte_mempool_cache_flush()`` calls.
 These user-owned caches can be explicitly passed to ``rte_mempool_generic_put()`` and ``rte_mempool_generic_get()``.
 The ``rte_mempool_default_cache()`` call returns the default internal cache if any.
-In contrast to the default caches, user-owned caches can be used by non-EAL threads too.
+In contrast to the default caches, user-owned caches can be used by unregistered non-EAL threads too.
 
 Mempool Handlers
 ------------------------
diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
index 86d32a3dd7..2b7d262372 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -6,13 +6,15 @@
 #include <limits.h>
 #include <string.h>
 
-#include <rte_errno.h>
-#include <rte_log.h>
-#include <rte_eal.h>
-#include <rte_lcore.h>
 #include <rte_common.h>
 #include <rte_debug.h>
+#include <rte_eal.h>
+#include <rte_errno.h>
+#include <rte_lcore.h>
+#include <rte_log.h>
+#include <rte_spinlock.h>
 
+#include "eal_memcfg.h"
 #include "eal_private.h"
 #include "eal_thread.h"
 
@@ -220,3 +222,38 @@ rte_socket_id_by_idx(unsigned int idx)
 	}
 	return config->numa_nodes[idx];
 }
+
+static rte_spinlock_t lcore_lock = RTE_SPINLOCK_INITIALIZER;
+
+unsigned int
+eal_lcore_non_eal_allocate(void)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	unsigned int lcore_id;
+
+	rte_spinlock_lock(&lcore_lock);
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (cfg->lcore_role[lcore_id] != ROLE_OFF)
+			continue;
+		cfg->lcore_role[lcore_id] = ROLE_NON_EAL;
+		cfg->lcore_count++;
+		break;
+	}
+	if (lcore_id == RTE_MAX_LCORE)
+		RTE_LOG(DEBUG, EAL, "No lcore available.\n");
+	rte_spinlock_unlock(&lcore_lock);
+	return lcore_id;
+}
+
+void
+eal_lcore_non_eal_release(unsigned int lcore_id)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+
+	rte_spinlock_lock(&lcore_lock);
+	if (cfg->lcore_role[lcore_id] == ROLE_NON_EAL) {
+		cfg->lcore_role[lcore_id] = ROLE_OFF;
+		cfg->lcore_count--;
+	}
+	rte_spinlock_unlock(&lcore_lock);
+}
diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index c649789a52..f0cf2b65d2 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -29,6 +29,7 @@
 #include <rte_log.h>
 #include <rte_tailq.h>
 
+#include "eal_memcfg.h"
 #include "eal_private.h"
 #include "eal_filesystem.h"
 #include "eal_internal_cfg.h"
@@ -1232,3 +1233,43 @@ rte_mp_reply(struct rte_mp_msg *msg, const char *peer)
 
 	return mp_send(msg, peer, MP_REP);
 }
+
+/* Internally, the status of the mp feature is represented as a three-state:
+ * - "unknown" as long as no secondary process attached to a primary process
+ *   and there was no call to rte_mp_(dis|en)able yet,
+ * - "enabled" as soon as a secondary process attaches to a primary process,
+ * - "disabled" when a primary process successfully called rte_mp_disable,
+ */
+enum mp_status {
+	MP_STATUS_UNKNOWN,
+	MP_STATUS_DISABLED,
+	MP_STATUS_ENABLED,
+};
+
+static bool
+set_mp_status(enum mp_status status)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	uint8_t expected;
+	uint8_t desired;
+
+	expected = MP_STATUS_UNKNOWN;
+	desired = status;
+	if (__atomic_compare_exchange_n(&mcfg->mp_status, &expected, desired,
+			false, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+		return true;
+
+	return __atomic_load_n(&mcfg->mp_status, __ATOMIC_RELAXED) == desired;
+}
+
+bool
+eal_disable_multiprocess(void)
+{
+	return set_mp_status(MP_STATUS_DISABLED);
+}
+
+bool
+eal_enable_multiprocess(void)
+{
+	return set_mp_status(MP_STATUS_ENABLED);
+}
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index b1b69ea58c..cead8ea5cf 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -12,9 +12,10 @@
 #include <assert.h>
 #include <string.h>
 
+#include <rte_errno.h>
 #include <rte_lcore.h>
-#include <rte_memory.h>
 #include <rte_log.h>
+#include <rte_memory.h>
 #include <rte_trace_point.h>
 
 #include "eal_internal_cfg.h"
@@ -241,3 +242,49 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
 	pthread_join(*thread, NULL);
 	return -ret;
 }
+
+int
+rte_thread_register(void)
+{
+	unsigned int lcore_id;
+	rte_cpuset_t cpuset;
+
+	/* EAL init flushes all lcores, we can't register before. */
+	if (eal_get_internal_configuration()->init_complete != 1) {
+		RTE_LOG(DEBUG, EAL, "Called %s before EAL init.\n", __func__);
+		rte_errno = EINVAL;
+		return -1;
+	}
+	if (!eal_disable_multiprocess()) {
+		RTE_LOG(ERR, EAL, "Multiprocess in use, registering non-EAL threads is not supported.\n");
+		rte_errno = EINVAL;
+		return -1;
+	}
+	if (pthread_getaffinity_np(pthread_self(), sizeof(cpuset),
+			&cpuset) != 0)
+		CPU_ZERO(&cpuset);
+	lcore_id = eal_lcore_non_eal_allocate();
+	if (lcore_id >= RTE_MAX_LCORE)
+		lcore_id = LCORE_ID_ANY;
+	__rte_thread_init(lcore_id, &cpuset);
+	if (lcore_id == LCORE_ID_ANY) {
+		rte_errno = ENOMEM;
+		return -1;
+	}
+	RTE_LOG(DEBUG, EAL, "Registered non-EAL thread as lcore %u.\n",
+		lcore_id);
+	return 0;
+}
+
+void
+rte_thread_unregister(void)
+{
+	unsigned int lcore_id = rte_lcore_id();
+
+	if (lcore_id != LCORE_ID_ANY)
+		eal_lcore_non_eal_release(lcore_id);
+	__rte_thread_uninit();
+	if (lcore_id != LCORE_ID_ANY)
+		RTE_LOG(DEBUG, EAL, "Unregistered non-EAL thread (was lcore %u).\n",
+			lcore_id);
+}
diff --git a/lib/librte_eal/common/eal_memcfg.h b/lib/librte_eal/common/eal_memcfg.h
index 583fcb5953..ea013a9daf 100644
--- a/lib/librte_eal/common/eal_memcfg.h
+++ b/lib/librte_eal/common/eal_memcfg.h
@@ -41,6 +41,8 @@ struct rte_mem_config {
 	rte_rwlock_t memory_hotplug_lock;
 	/**< Indicates whether memory hotplug request is in progress. */
 
+	uint8_t mp_status; /**< Multiprocess status. */
+
 	/* memory segments and zones */
 	struct rte_fbarray memzones; /**< Memzone descriptors. */
 
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index a77ac7a963..e82fb80aa0 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -396,6 +396,24 @@ uint64_t get_tsc_freq(void);
  */
 uint64_t get_tsc_freq_arch(void);
 
+/**
+ * Allocate a free lcore to associate to a non-EAL thread.
+ *
+ * @return
+ *   - the id of a lcore with role ROLE_NON_EAL on success.
+ *   - RTE_MAX_LCORE if none was available.
+ */
+unsigned int eal_lcore_non_eal_allocate(void);
+
+/**
+ * Release the lcore used by a non-EAL thread.
+ * Counterpart of eal_lcore_non_eal_allocate().
+ *
+ * @param lcore_id
+ *   The lcore with role ROLE_NON_EAL to release.
+ */
+void eal_lcore_non_eal_release(unsigned int lcore_id);
+
 /**
  * Prepare physical memory mapping
  * i.e. hugepages on Linux and
@@ -714,4 +732,14 @@ void __rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset);
  */
 void __rte_thread_uninit(void);
 
+/**
+ * Mark primary process as not supporting multi-process.
+ */
+bool eal_disable_multiprocess(void);
+
+/**
+ * Instruct primary process that a secondary process wants to attach.
+ */
+bool eal_enable_multiprocess(void);
+
 #endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
index fd577daf44..7e5001a864 100644
--- a/lib/librte_eal/freebsd/eal.c
+++ b/lib/librte_eal/freebsd/eal.c
@@ -400,6 +400,10 @@ rte_config_init(void)
 		}
 		if (rte_eal_config_reattach() < 0)
 			return -1;
+		if (!eal_enable_multiprocess()) {
+			RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n");
+			return -1;
+		}
 		eal_mcfg_update_internal();
 		break;
 	case RTE_PROC_AUTO:
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index 3968c40693..2fd1a03275 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -31,6 +31,7 @@ enum rte_lcore_role_t {
 	ROLE_RTE,
 	ROLE_OFF,
 	ROLE_SERVICE,
+	ROLE_NON_EAL,
 };
 
 /**
@@ -67,7 +68,8 @@ rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
  *   to run threads with lcore IDs 0, 1, 2 and 3 on physical core 10..
  *
  * @return
- *  Logical core ID (in EAL thread) or LCORE_ID_ANY (in non-EAL thread)
+ *  Logical core ID (in EAL thread or registered non-EAL thread) or
+ *  LCORE_ID_ANY (in unregistered non-EAL thread)
  */
 static inline unsigned
 rte_lcore_id(void)
@@ -279,6 +281,30 @@ int rte_thread_setname(pthread_t id, const char *name);
 __rte_experimental
 int rte_thread_getname(pthread_t id, char *name, size_t len);
 
+/**
+ * Register current non-EAL thread as a lcore.
+ *
+ * @note This API is not compatible with the multi-process feature:
+ * - if a primary process registers a non-EAL thread, then no secondary process
+ *   will initialise.
+ * - if a secondary process initialises successfully, trying to register a
+ *   non-EAL thread from either primary or secondary processes will always end
+ *   up with the thread getting LCORE_ID_ANY as lcore.
+ *
+ * @return
+ *   On success, return 0; otherwise return -1 with rte_errno set.
+ */
+__rte_experimental
+int
+rte_thread_register(void);
+
+/**
+ * Unregister current thread and release lcore if one was associated.
+ */
+__rte_experimental
+void
+rte_thread_unregister(void);
+
 /**
  * Create a control thread.
  *
diff --git a/lib/librte_eal/linux/eal.c b/lib/librte_eal/linux/eal.c
index bd089cdd44..f4daf46c4e 100644
--- a/lib/librte_eal/linux/eal.c
+++ b/lib/librte_eal/linux/eal.c
@@ -492,6 +492,10 @@ rte_config_init(void)
 		}
 		if (rte_eal_config_reattach() < 0)
 			return -1;
+		if (!eal_enable_multiprocess()) {
+			RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n");
+			return -1;
+		}
 		eal_mcfg_update_internal();
 		break;
 	case RTE_PROC_AUTO:
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 0d42d44ce9..5503dd7620 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -393,6 +393,10 @@ EXPERIMENTAL {
 	rte_trace_point_lookup;
 	rte_trace_regexp;
 	rte_trace_save;
+
+	# added in 20.08
+	rte_thread_register;
+	rte_thread_unregister;
 };
 
 INTERNAL {
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 652d19f9f1..9e0ee052b3 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -28,9 +28,9 @@
  * rte_mempool_get() or rte_mempool_put() are designed to be called from an EAL
  * thread due to the internal per-lcore cache. Due to the lack of caching,
  * rte_mempool_get() or rte_mempool_put() performance will suffer when called
- * by non-EAL threads. Instead, non-EAL threads should call
- * rte_mempool_generic_get() or rte_mempool_generic_put() with a user cache
- * created with rte_mempool_cache_create().
+ * by unregistered non-EAL threads. Instead, unregistered non-EAL threads
+ * should call rte_mempool_generic_get() or rte_mempool_generic_put() with a
+ * user cache created with rte_mempool_cache_create().
  */
 
 #include <stdio.h>
@@ -1233,7 +1233,7 @@ void rte_mempool_dump(FILE *f, struct rte_mempool *mp);
 /**
  * Create a user-owned mempool cache.
  *
- * This can be used by non-EAL threads to enable caching when they
+ * This can be used by unregistered non-EAL threads to enable caching when they
  * interact with a mempool.
  *
  * @param size
@@ -1264,7 +1264,8 @@ rte_mempool_cache_free(struct rte_mempool_cache *cache);
  * @param lcore_id
  *   The logical core id.
  * @return
- *   A pointer to the mempool cache or NULL if disabled or non-EAL thread.
+ *   A pointer to the mempool cache or NULL if disabled or unregistered non-EAL
+ *   thread.
  */
 static __rte_always_inline struct rte_mempool_cache *
 rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id)
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v5 07/10] eal: add lcore init callbacks
  2020-07-06 14:15 ` [dpdk-dev] [PATCH v5 00/10] Register non-EAL threads as lcore David Marchand
                     ` (5 preceding siblings ...)
  2020-07-06 14:16   ` [dpdk-dev] [PATCH v5 06/10] eal: register non-EAL threads as lcores David Marchand
@ 2020-07-06 14:16   ` David Marchand
  2020-07-06 14:16   ` [dpdk-dev] [PATCH v5 08/10] eal: add lcore iterators David Marchand
                     ` (2 subsequent siblings)
  9 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-07-06 14:16 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets, olivier.matz, konstantin.ananyev,
	Neil Horman

DPDK components and applications can have their say when a new lcore is
initialized. For this, they can register a callback for initializing and
releasing their private data.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Changes since v4:
- fixed leak on callback register failure,
- fixed nits from Konstantin and Olivier,
- prefixed unit tests logs with Error: when applicable,

Changes since v2:
- added missing test,
- fixed rollback on lcore register,

Changes since v1:
- added unit test (since missing some coverage, for v3),
- preferred callback and removed mention of notification,

---
 app/test/test_lcores.c                   | 227 +++++++++++++++++++++++
 lib/librte_eal/common/eal_common_lcore.c | 146 ++++++++++++++-
 lib/librte_eal/common/eal_private.h      |   3 +-
 lib/librte_eal/include/rte_lcore.h       |  70 +++++++
 lib/librte_eal/rte_eal_version.map       |   2 +
 5 files changed, 442 insertions(+), 6 deletions(-)

diff --git a/app/test/test_lcores.c b/app/test/test_lcores.c
index afb9cdd444..7df827b4e8 100644
--- a/app/test/test_lcores.c
+++ b/app/test/test_lcores.c
@@ -5,6 +5,7 @@
 #include <pthread.h>
 #include <string.h>
 
+#include <rte_common.h>
 #include <rte_errno.h>
 #include <rte_lcore.h>
 
@@ -117,6 +118,226 @@ test_non_eal_lcores(unsigned int eal_threads_count)
 	return ret;
 }
 
+struct limit_lcore_context {
+	unsigned int init;
+	unsigned int max;
+	unsigned int uninit;
+};
+
+static int
+limit_lcores_init(unsigned int lcore_id __rte_unused, void *arg)
+{
+	struct limit_lcore_context *l = arg;
+
+	l->init++;
+	if (l->init > l->max)
+		return -1;
+	return 0;
+}
+
+static void
+limit_lcores_uninit(unsigned int lcore_id __rte_unused, void *arg)
+{
+	struct limit_lcore_context *l = arg;
+
+	l->uninit++;
+}
+
+static int
+test_lcores_callback(unsigned int eal_threads_count)
+{
+	struct limit_lcore_context l;
+	void *handle;
+
+	/* Refuse last lcore => callback register error. */
+	memset(&l, 0, sizeof(l));
+	l.max = eal_threads_count - 1;
+	handle = rte_lcore_callback_register("limit", limit_lcores_init,
+		limit_lcores_uninit, &l);
+	if (handle != NULL) {
+		printf("Error: lcore callback register should have failed\n");
+		goto error;
+	}
+	/* Refusal happens at the n th call to the init callback.
+	 * Besides, n - 1 were accepted, so we expect as many uninit calls when
+	 * the rollback happens.
+	 */
+	if (l.init != eal_threads_count) {
+		printf("Error: lcore callback register failed but incorrect init calls, expected %u, got %u\n",
+			eal_threads_count, l.init);
+		goto error;
+	}
+	if (l.uninit != eal_threads_count - 1) {
+		printf("Error: lcore callback register failed but incorrect uninit calls, expected %u, got %u\n",
+			eal_threads_count - 1, l.uninit);
+		goto error;
+	}
+
+	/* Accept all lcore and unregister. */
+	memset(&l, 0, sizeof(l));
+	l.max = eal_threads_count;
+	handle = rte_lcore_callback_register("limit", limit_lcores_init,
+		limit_lcores_uninit, &l);
+	if (handle == NULL) {
+		printf("Error: lcore callback register failed\n");
+		goto error;
+	}
+	if (l.uninit != 0) {
+		printf("Error: lcore callback register succeeded but incorrect uninit calls, expected 0, got %u\n",
+			l.uninit);
+		goto error;
+	}
+	rte_lcore_callback_unregister(handle);
+	handle = NULL;
+	if (l.init != eal_threads_count) {
+		printf("Error: lcore callback unregister done but incorrect init calls, expected %u, got %u\n",
+			eal_threads_count, l.init);
+		goto error;
+	}
+	if (l.uninit != eal_threads_count) {
+		printf("Error: lcore callback unregister done but incorrect uninit calls, expected %u, got %u\n",
+			eal_threads_count, l.uninit);
+		goto error;
+	}
+
+	return 0;
+
+error:
+	if (handle != NULL)
+		rte_lcore_callback_unregister(handle);
+
+	return -1;
+}
+
+static int
+test_non_eal_lcores_callback(unsigned int eal_threads_count)
+{
+	struct thread_context thread_contexts[2];
+	unsigned int non_eal_threads_count = 0;
+	struct limit_lcore_context l[2] = {};
+	unsigned int registered_count = 0;
+	struct thread_context *t;
+	void *handle[2] = {};
+	unsigned int i;
+	int ret;
+
+	/* This test requires two empty slots to be sure lcore init refusal is
+	 * because of callback execution.
+	 */
+	if (eal_threads_count + 2 >= RTE_MAX_LCORE)
+		return 0;
+
+	/* Register two callbacks:
+	 * - first one accepts any lcore,
+	 * - second one accepts all EAL lcore + one more for the first non-EAL
+	 *   thread, then refuses the next lcore.
+	 */
+	l[0].max = UINT_MAX;
+	handle[0] = rte_lcore_callback_register("no_limit", limit_lcores_init,
+		limit_lcores_uninit, &l[0]);
+	if (handle[0] == NULL) {
+		printf("Error: lcore callback [0] register failed\n");
+		goto error;
+	}
+	l[1].max = eal_threads_count + 1;
+	handle[1] = rte_lcore_callback_register("limit", limit_lcores_init,
+		limit_lcores_uninit, &l[1]);
+	if (handle[1] == NULL) {
+		printf("Error: lcore callback [1] register failed\n");
+		goto error;
+	}
+	if (l[0].init != eal_threads_count || l[1].init != eal_threads_count) {
+		printf("Error: lcore callbacks register succeeded but incorrect init calls, expected %u, %u, got %u, %u\n",
+			eal_threads_count, eal_threads_count,
+			l[0].init, l[1].init);
+		goto error;
+	}
+	if (l[0].uninit != 0 || l[1].uninit != 0) {
+		printf("Error: lcore callbacks register succeeded but incorrect uninit calls, expected 0, 1, got %u, %u\n",
+			l[0].uninit, l[1].uninit);
+		goto error;
+	}
+	/* First thread that expects a valid lcore id. */
+	t = &thread_contexts[0];
+	t->state = INIT;
+	t->registered_count = &registered_count;
+	t->lcore_id_any = false;
+	if (pthread_create(&t->id, NULL, thread_loop, t) != 0)
+		goto cleanup_threads;
+	non_eal_threads_count++;
+	while (__atomic_load_n(&registered_count, __ATOMIC_ACQUIRE) !=
+			non_eal_threads_count)
+		;
+	if (l[0].init != eal_threads_count + 1 ||
+			l[1].init != eal_threads_count + 1) {
+		printf("Error: incorrect init calls, expected %u, %u, got %u, %u\n",
+			eal_threads_count + 1, eal_threads_count + 1,
+			l[0].init, l[1].init);
+		goto cleanup_threads;
+	}
+	if (l[0].uninit != 0 || l[1].uninit != 0) {
+		printf("Error: incorrect uninit calls, expected 0, 0, got %u, %u\n",
+			l[0].uninit, l[1].uninit);
+		goto cleanup_threads;
+	}
+	/* Second thread, that expects LCORE_ID_ANY because of init refusal. */
+	t = &thread_contexts[1];
+	t->state = INIT;
+	t->registered_count = &registered_count;
+	t->lcore_id_any = true;
+	if (pthread_create(&t->id, NULL, thread_loop, t) != 0)
+		goto cleanup_threads;
+	non_eal_threads_count++;
+	while (__atomic_load_n(&registered_count, __ATOMIC_ACQUIRE) !=
+			non_eal_threads_count)
+		;
+	if (l[0].init != eal_threads_count + 2 ||
+			l[1].init != eal_threads_count + 2) {
+		printf("Error: incorrect init calls, expected %u, %u, got %u, %u\n",
+			eal_threads_count + 2, eal_threads_count + 2,
+			l[0].init, l[1].init);
+		goto cleanup_threads;
+	}
+	if (l[0].uninit != 1 || l[1].uninit != 0) {
+		printf("Error: incorrect uninit calls, expected 1, 0, got %u, %u\n",
+			l[0].uninit, l[1].uninit);
+		goto cleanup_threads;
+	}
+	/* Release all threads, and check their states. */
+	__atomic_store_n(&registered_count, 0, __ATOMIC_RELEASE);
+	ret = 0;
+	for (i = 0; i < non_eal_threads_count; i++) {
+		t = &thread_contexts[i];
+		pthread_join(t->id, NULL);
+		if (t->state != DONE)
+			ret = -1;
+	}
+	if (ret < 0)
+		goto error;
+	if (l[0].uninit != 2 || l[1].uninit != 1) {
+		printf("Error: threads reported having successfully registered and unregistered, but incorrect uninit calls, expected 2, 1, got %u, %u\n",
+			l[0].uninit, l[1].uninit);
+		goto error;
+	}
+	rte_lcore_callback_unregister(handle[0]);
+	rte_lcore_callback_unregister(handle[1]);
+	return 0;
+
+cleanup_threads:
+	/* Release all threads */
+	__atomic_store_n(&registered_count, 0, __ATOMIC_RELEASE);
+	for (i = 0; i < non_eal_threads_count; i++) {
+		t = &thread_contexts[i];
+		pthread_join(t->id, NULL);
+	}
+error:
+	if (handle[1] != NULL)
+		rte_lcore_callback_unregister(handle[1]);
+	if (handle[0] != NULL)
+		rte_lcore_callback_unregister(handle[0]);
+	return -1;
+}
+
 static int
 test_lcores(void)
 {
@@ -137,6 +358,12 @@ test_lcores(void)
 	if (test_non_eal_lcores(eal_threads_count) < 0)
 		return TEST_FAILED;
 
+	if (test_lcores_callback(eal_threads_count) < 0)
+		return TEST_FAILED;
+
+	if (test_non_eal_lcores_callback(eal_threads_count) < 0)
+		return TEST_FAILED;
+
 	return TEST_SUCCESS;
 }
 
diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
index 2b7d262372..90139c77ff 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -224,11 +224,122 @@ rte_socket_id_by_idx(unsigned int idx)
 }
 
 static rte_spinlock_t lcore_lock = RTE_SPINLOCK_INITIALIZER;
+struct lcore_callback {
+	TAILQ_ENTRY(lcore_callback) next;
+	char *name;
+	rte_lcore_init_cb init;
+	rte_lcore_uninit_cb uninit;
+	void *arg;
+};
+static TAILQ_HEAD(lcore_callbacks_head, lcore_callback) lcore_callbacks =
+	TAILQ_HEAD_INITIALIZER(lcore_callbacks);
+
+static int
+callback_init(struct lcore_callback *callback, unsigned int lcore_id)
+{
+	if (callback->init == NULL)
+		return 0;
+	RTE_LOG(DEBUG, EAL, "Call init for lcore callback %s, lcore_id %u\n",
+		callback->name, lcore_id);
+	return callback->init(lcore_id, callback->arg);
+}
+
+static void
+callback_uninit(struct lcore_callback *callback, unsigned int lcore_id)
+{
+	if (callback->uninit == NULL)
+		return;
+	RTE_LOG(DEBUG, EAL, "Call uninit for lcore callback %s, lcore_id %u\n",
+		callback->name, lcore_id);
+	callback->uninit(lcore_id, callback->arg);
+}
+
+static void
+free_callback(struct lcore_callback *callback)
+{
+	free(callback->name);
+	free(callback);
+}
+
+void *
+rte_lcore_callback_register(const char *name, rte_lcore_init_cb init,
+	rte_lcore_uninit_cb uninit, void *arg)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	struct lcore_callback *callback;
+	unsigned int lcore_id;
+
+	if (name == NULL)
+		return NULL;
+	callback = calloc(1, sizeof(*callback));
+	if (callback == NULL)
+		return NULL;
+	if (asprintf(&callback->name, "%s-%p", name, arg) == -1) {
+		free(callback);
+		return NULL;
+	}
+	callback->init = init;
+	callback->uninit = uninit;
+	callback->arg = arg;
+	rte_spinlock_lock(&lcore_lock);
+	if (callback->init == NULL)
+		goto no_init;
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (cfg->lcore_role[lcore_id] == ROLE_OFF)
+			continue;
+		if (callback_init(callback, lcore_id) == 0)
+			continue;
+		/* Callback refused init for this lcore, uninitialize all
+		 * previous lcore.
+		 */
+		while (lcore_id-- != 0) {
+			if (cfg->lcore_role[lcore_id] == ROLE_OFF)
+				continue;
+			callback_uninit(callback, lcore_id);
+		}
+		free_callback(callback);
+		callback = NULL;
+		goto out;
+	}
+no_init:
+	TAILQ_INSERT_TAIL(&lcore_callbacks, callback, next);
+	RTE_LOG(DEBUG, EAL, "Registered new lcore callback %s (%sinit, %suninit).\n",
+		callback->name, callback->init == NULL ? "NO " : "",
+		callback->uninit == NULL ? "NO " : "");
+out:
+	rte_spinlock_unlock(&lcore_lock);
+	return callback;
+}
+
+void
+rte_lcore_callback_unregister(void *handle)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	struct lcore_callback *callback = handle;
+	unsigned int lcore_id;
+
+	rte_spinlock_lock(&lcore_lock);
+	if (callback->uninit == NULL)
+		goto no_uninit;
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (cfg->lcore_role[lcore_id] == ROLE_OFF)
+			continue;
+		callback_uninit(callback, lcore_id);
+	}
+no_uninit:
+	TAILQ_REMOVE(&lcore_callbacks, callback, next);
+	rte_spinlock_unlock(&lcore_lock);
+	RTE_LOG(DEBUG, EAL, "Unregistered lcore callback %s-%p.\n",
+		callback->name, callback->arg);
+	free_callback(callback);
+}
 
 unsigned int
 eal_lcore_non_eal_allocate(void)
 {
 	struct rte_config *cfg = rte_eal_get_configuration();
+	struct lcore_callback *callback;
+	struct lcore_callback *prev;
 	unsigned int lcore_id;
 
 	rte_spinlock_lock(&lcore_lock);
@@ -239,8 +350,29 @@ eal_lcore_non_eal_allocate(void)
 		cfg->lcore_count++;
 		break;
 	}
-	if (lcore_id == RTE_MAX_LCORE)
+	if (lcore_id == RTE_MAX_LCORE) {
 		RTE_LOG(DEBUG, EAL, "No lcore available.\n");
+		goto out;
+	}
+	TAILQ_FOREACH(callback, &lcore_callbacks, next) {
+		if (callback_init(callback, lcore_id) == 0)
+			continue;
+		/* Callback refused init for this lcore, call uninit for all
+		 * previous callbacks.
+		 */
+		prev = TAILQ_PREV(callback, lcore_callbacks_head, next);
+		while (prev != NULL) {
+			callback_uninit(prev, lcore_id);
+			prev = TAILQ_PREV(prev, lcore_callbacks_head, next);
+		}
+		RTE_LOG(DEBUG, EAL, "Initialization refused for lcore %u.\n",
+			lcore_id);
+		cfg->lcore_role[lcore_id] = ROLE_OFF;
+		cfg->lcore_count--;
+		lcore_id = RTE_MAX_LCORE;
+		goto out;
+	}
+out:
 	rte_spinlock_unlock(&lcore_lock);
 	return lcore_id;
 }
@@ -249,11 +381,15 @@ void
 eal_lcore_non_eal_release(unsigned int lcore_id)
 {
 	struct rte_config *cfg = rte_eal_get_configuration();
+	struct lcore_callback *callback;
 
 	rte_spinlock_lock(&lcore_lock);
-	if (cfg->lcore_role[lcore_id] == ROLE_NON_EAL) {
-		cfg->lcore_role[lcore_id] = ROLE_OFF;
-		cfg->lcore_count--;
-	}
+	if (cfg->lcore_role[lcore_id] != ROLE_NON_EAL)
+		goto out;
+	TAILQ_FOREACH(callback, &lcore_callbacks, next)
+		callback_uninit(callback, lcore_id);
+	cfg->lcore_role[lcore_id] = ROLE_OFF;
+	cfg->lcore_count--;
+out:
 	rte_spinlock_unlock(&lcore_lock);
 }
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index e82fb80aa0..535e008474 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -401,7 +401,8 @@ uint64_t get_tsc_freq_arch(void);
  *
  * @return
  *   - the id of a lcore with role ROLE_NON_EAL on success.
- *   - RTE_MAX_LCORE if none was available.
+ *   - RTE_MAX_LCORE if none was available or initializing was refused (see
+ *     rte_lcore_callback_register).
  */
 unsigned int eal_lcore_non_eal_allocate(void);
 
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index 2fd1a03275..6e7206c79f 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -229,6 +229,76 @@ unsigned int rte_get_next_lcore(unsigned int i, int skip_master, int wrap);
 	     i<RTE_MAX_LCORE;						\
 	     i = rte_get_next_lcore(i, 1, 0))
 
+/**
+ * Callback prototype for initializing lcores.
+ *
+ * @param lcore_id
+ *   The lcore to consider.
+ * @param arg
+ *   An opaque pointer passed at callback registration.
+ * @return
+ *   - -1 when refusing this operation,
+ *   - 0 otherwise.
+ */
+typedef int (*rte_lcore_init_cb)(unsigned int lcore_id, void *arg);
+
+/**
+ * Callback prototype for uninitializing lcores.
+ *
+ * @param lcore_id
+ *   The lcore to consider.
+ * @param arg
+ *   An opaque pointer passed at callback registration.
+ */
+typedef void (*rte_lcore_uninit_cb)(unsigned int lcore_id, void *arg);
+
+/**
+ * Register callbacks invoked when initializing and uninitializing a lcore.
+ *
+ * This function calls the init callback with all initialized lcores.
+ * Any error reported by the init callback triggers a rollback calling the
+ * uninit callback for each lcore.
+ * If this step succeeds, the callbacks are put in the lcore callbacks list
+ * that will get called for each lcore allocation/release.
+ *
+ * Note: callbacks execution is serialised under a lock protecting the lcores
+ * and callbacks list.
+ *
+ * @param name
+ *   A name serving as a small description for this callback.
+ * @param init
+ *   The callback invoked when a lcore_id is initialized.
+ *   init can be NULL.
+ * @param uninit
+ *   The callback invoked when a lcore_id is uninitialized.
+ *   uninit can be NULL.
+ * @param arg
+ *   An optional argument that gets passed to the callback when it gets
+ *   invoked.
+ * @return
+ *   On success, returns an opaque pointer for the registered object.
+ *   On failure (either memory allocation issue in the function itself or an
+ *   error is returned by the init callback itself), returns NULL.
+ */
+__rte_experimental
+void *
+rte_lcore_callback_register(const char *name, rte_lcore_init_cb init,
+	rte_lcore_uninit_cb uninit, void *arg);
+
+/**
+ * Unregister callbacks previously registered with rte_lcore_callback_register.
+ *
+ * This function calls the uninit callback with all initialized lcores.
+ * The callbacks are then removed from the lcore callbacks list.
+ *
+ * @param handle
+ *   The handle pointer returned by a former successful call to
+ *   rte_lcore_callback_register.
+ */
+__rte_experimental
+void
+rte_lcore_callback_unregister(void *handle);
+
 /**
  * Set core affinity of the current thread.
  * Support both EAL and non-EAL thread and update TLS.
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 5503dd7620..c3e762c1d9 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -395,6 +395,8 @@ EXPERIMENTAL {
 	rte_trace_save;
 
 	# added in 20.08
+	rte_lcore_callback_register;
+	rte_lcore_callback_unregister;
 	rte_thread_register;
 	rte_thread_unregister;
 };
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v5 08/10] eal: add lcore iterators
  2020-07-06 14:15 ` [dpdk-dev] [PATCH v5 00/10] Register non-EAL threads as lcore David Marchand
                     ` (6 preceding siblings ...)
  2020-07-06 14:16   ` [dpdk-dev] [PATCH v5 07/10] eal: add lcore init callbacks David Marchand
@ 2020-07-06 14:16   ` David Marchand
  2020-07-06 14:16   ` [dpdk-dev] [PATCH v5 09/10] mempool/bucket: handle non-EAL lcores David Marchand
  2020-07-06 14:16   ` [dpdk-dev] [PATCH v5 10/10] eal: add multiprocess disable API David Marchand
  9 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-07-06 14:16 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets, olivier.matz, konstantin.ananyev,
	Neil Horman

Add a helper to iterate all lcores.
The iterator callback is read-only wrt the lcores list.

Implement a dump function on top of this for debugging.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Reviewed-by: Olivier Matz <olivier.matz@6wind.com>
---
Changes since v2:
- added rte_lcore_dump calls in unit test, for basic check,

Changes since v1:
- introduced lcore iterators and implemented rte_lcore_dump,
  this iterator mechanism can then be used outside of EAL,

---
 app/test/test_lcores.c                    |  3 +
 lib/librte_eal/common/eal_common_lcore.c  | 77 ++++++++++++++++++++---
 lib/librte_eal/common/eal_common_thread.c | 16 +++--
 lib/librte_eal/common/eal_thread.h        | 13 +++-
 lib/librte_eal/freebsd/eal.c              |  2 +-
 lib/librte_eal/freebsd/eal_thread.c       |  2 +-
 lib/librte_eal/include/rte_lcore.h        | 47 +++++++++++++-
 lib/librte_eal/linux/eal.c                |  2 +-
 lib/librte_eal/linux/eal_thread.c         |  2 +-
 lib/librte_eal/rte_eal_version.map        |  2 +
 10 files changed, 143 insertions(+), 23 deletions(-)

diff --git a/app/test/test_lcores.c b/app/test/test_lcores.c
index 7df827b4e8..19a7ab9fce 100644
--- a/app/test/test_lcores.c
+++ b/app/test/test_lcores.c
@@ -303,6 +303,7 @@ test_non_eal_lcores_callback(unsigned int eal_threads_count)
 			l[0].uninit, l[1].uninit);
 		goto cleanup_threads;
 	}
+	rte_lcore_dump(stdout);
 	/* Release all threads, and check their states. */
 	__atomic_store_n(&registered_count, 0, __ATOMIC_RELEASE);
 	ret = 0;
@@ -314,6 +315,7 @@ test_non_eal_lcores_callback(unsigned int eal_threads_count)
 	}
 	if (ret < 0)
 		goto error;
+	rte_lcore_dump(stdout);
 	if (l[0].uninit != 2 || l[1].uninit != 1) {
 		printf("Error: threads reported having successfully registered and unregistered, but incorrect uninit calls, expected 2, 1, got %u, %u\n",
 			l[0].uninit, l[1].uninit);
@@ -354,6 +356,7 @@ test_lcores(void)
 	}
 	printf("EAL threads count: %u, RTE_MAX_LCORE=%u\n", eal_threads_count,
 		RTE_MAX_LCORE);
+	rte_lcore_dump(stdout);
 
 	if (test_non_eal_lcores(eal_threads_count) < 0)
 		return TEST_FAILED;
diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
index 90139c77ff..f2bd30c70a 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -12,7 +12,7 @@
 #include <rte_errno.h>
 #include <rte_lcore.h>
 #include <rte_log.h>
-#include <rte_spinlock.h>
+#include <rte_rwlock.h>
 
 #include "eal_memcfg.h"
 #include "eal_private.h"
@@ -223,7 +223,7 @@ rte_socket_id_by_idx(unsigned int idx)
 	return config->numa_nodes[idx];
 }
 
-static rte_spinlock_t lcore_lock = RTE_SPINLOCK_INITIALIZER;
+static rte_rwlock_t lcore_lock = RTE_RWLOCK_INITIALIZER;
 struct lcore_callback {
 	TAILQ_ENTRY(lcore_callback) next;
 	char *name;
@@ -281,7 +281,7 @@ rte_lcore_callback_register(const char *name, rte_lcore_init_cb init,
 	callback->init = init;
 	callback->uninit = uninit;
 	callback->arg = arg;
-	rte_spinlock_lock(&lcore_lock);
+	rte_rwlock_write_lock(&lcore_lock);
 	if (callback->init == NULL)
 		goto no_init;
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
@@ -307,7 +307,7 @@ rte_lcore_callback_register(const char *name, rte_lcore_init_cb init,
 		callback->name, callback->init == NULL ? "NO " : "",
 		callback->uninit == NULL ? "NO " : "");
 out:
-	rte_spinlock_unlock(&lcore_lock);
+	rte_rwlock_write_unlock(&lcore_lock);
 	return callback;
 }
 
@@ -318,7 +318,7 @@ rte_lcore_callback_unregister(void *handle)
 	struct lcore_callback *callback = handle;
 	unsigned int lcore_id;
 
-	rte_spinlock_lock(&lcore_lock);
+	rte_rwlock_write_lock(&lcore_lock);
 	if (callback->uninit == NULL)
 		goto no_uninit;
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
@@ -328,7 +328,7 @@ rte_lcore_callback_unregister(void *handle)
 	}
 no_uninit:
 	TAILQ_REMOVE(&lcore_callbacks, callback, next);
-	rte_spinlock_unlock(&lcore_lock);
+	rte_rwlock_write_unlock(&lcore_lock);
 	RTE_LOG(DEBUG, EAL, "Unregistered lcore callback %s-%p.\n",
 		callback->name, callback->arg);
 	free_callback(callback);
@@ -342,7 +342,7 @@ eal_lcore_non_eal_allocate(void)
 	struct lcore_callback *prev;
 	unsigned int lcore_id;
 
-	rte_spinlock_lock(&lcore_lock);
+	rte_rwlock_write_lock(&lcore_lock);
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
 		if (cfg->lcore_role[lcore_id] != ROLE_OFF)
 			continue;
@@ -373,7 +373,7 @@ eal_lcore_non_eal_allocate(void)
 		goto out;
 	}
 out:
-	rte_spinlock_unlock(&lcore_lock);
+	rte_rwlock_write_unlock(&lcore_lock);
 	return lcore_id;
 }
 
@@ -383,7 +383,7 @@ eal_lcore_non_eal_release(unsigned int lcore_id)
 	struct rte_config *cfg = rte_eal_get_configuration();
 	struct lcore_callback *callback;
 
-	rte_spinlock_lock(&lcore_lock);
+	rte_rwlock_write_lock(&lcore_lock);
 	if (cfg->lcore_role[lcore_id] != ROLE_NON_EAL)
 		goto out;
 	TAILQ_FOREACH(callback, &lcore_callbacks, next)
@@ -391,5 +391,62 @@ eal_lcore_non_eal_release(unsigned int lcore_id)
 	cfg->lcore_role[lcore_id] = ROLE_OFF;
 	cfg->lcore_count--;
 out:
-	rte_spinlock_unlock(&lcore_lock);
+	rte_rwlock_write_unlock(&lcore_lock);
+}
+
+int
+rte_lcore_iterate(rte_lcore_iterate_cb cb, void *arg)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	unsigned int lcore_id;
+	int ret = 0;
+
+	rte_rwlock_read_lock(&lcore_lock);
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (cfg->lcore_role[lcore_id] == ROLE_OFF)
+			continue;
+		ret = cb(lcore_id, arg);
+		if (ret != 0)
+			break;
+	}
+	rte_rwlock_read_unlock(&lcore_lock);
+	return ret;
+}
+
+static int
+lcore_dump_cb(unsigned int lcore_id, void *arg)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
+	const char *role;
+	FILE *f = arg;
+	int ret;
+
+	switch (cfg->lcore_role[lcore_id]) {
+	case ROLE_RTE:
+		role = "RTE";
+		break;
+	case ROLE_SERVICE:
+		role = "SERVICE";
+		break;
+	case ROLE_NON_EAL:
+		role = "NON_EAL";
+		break;
+	default:
+		role = "UNKNOWN";
+		break;
+	}
+
+	ret = eal_thread_dump_affinity(&lcore_config[lcore_id].cpuset, cpuset,
+		sizeof(cpuset));
+	fprintf(f, "lcore %u, socket %u, role %s, cpuset %s%s\n", lcore_id,
+		rte_lcore_to_socket_id(lcore_id), role, cpuset,
+		ret == 0 ? "" : "...");
+	return 0;
+}
+
+void
+rte_lcore_dump(FILE *f)
+{
+	rte_lcore_iterate(lcore_dump_cb, f);
 }
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index cead8ea5cf..ea09a4f3e0 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -105,17 +105,14 @@ rte_thread_get_affinity(rte_cpuset_t *cpusetp)
 }
 
 int
-eal_thread_dump_affinity(char *str, unsigned size)
+eal_thread_dump_affinity(rte_cpuset_t *cpuset, char *str, unsigned int size)
 {
-	rte_cpuset_t cpuset;
 	unsigned cpu;
 	int ret;
 	unsigned int out = 0;
 
-	rte_thread_get_affinity(&cpuset);
-
 	for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
-		if (!CPU_ISSET(cpu, &cpuset))
+		if (!CPU_ISSET(cpu, cpuset))
 			continue;
 
 		ret = snprintf(str + out,
@@ -138,6 +135,15 @@ eal_thread_dump_affinity(char *str, unsigned size)
 	return ret;
 }
 
+int
+eal_thread_dump_current_affinity(char *str, unsigned int size)
+{
+	rte_cpuset_t cpuset;
+
+	rte_thread_get_affinity(&cpuset);
+	return eal_thread_dump_affinity(&cpuset, str, size);
+}
+
 void
 __rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset)
 {
diff --git a/lib/librte_eal/common/eal_thread.h b/lib/librte_eal/common/eal_thread.h
index dc1fc6eb99..4a49117be8 100644
--- a/lib/librte_eal/common/eal_thread.h
+++ b/lib/librte_eal/common/eal_thread.h
@@ -32,13 +32,15 @@ unsigned eal_cpu_socket_id(unsigned cpu_id);
 #define RTE_CPU_AFFINITY_STR_LEN            256
 
 /**
- * Dump the current pthread cpuset.
+ * Dump the cpuset as a human readable string.
  * This function is private to EAL.
  *
  * Note:
  *   If the dump size is greater than the size of given buffer,
  *   the string will be truncated and with '\0' at the end.
  *
+ * @param cpuset
+ *   The CPU affinity object to dump.
  * @param str
  *   The string buffer the cpuset will dump to.
  * @param size
@@ -47,6 +49,13 @@ unsigned eal_cpu_socket_id(unsigned cpu_id);
  *   0 for success, -1 if truncation happens.
  */
 int
-eal_thread_dump_affinity(char *str, unsigned size);
+eal_thread_dump_affinity(rte_cpuset_t *cpuset, char *str, unsigned int size);
+
+/**
+ * Dump the current thread cpuset.
+ * This is a wrapper on eal_thread_dump_affinity().
+ */
+int
+eal_thread_dump_current_affinity(char *str, unsigned int size);
 
 #endif /* EAL_THREAD_H */
diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
index 7e5001a864..7d6f4aa09b 100644
--- a/lib/librte_eal/freebsd/eal.c
+++ b/lib/librte_eal/freebsd/eal.c
@@ -858,7 +858,7 @@ rte_eal_init(int argc, char **argv)
 	__rte_thread_init(config->master_lcore,
 		&lcore_config[config->master_lcore].cpuset);
 
-	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
 
 	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
 		config->master_lcore, thread_id, cpuset,
diff --git a/lib/librte_eal/freebsd/eal_thread.c b/lib/librte_eal/freebsd/eal_thread.c
index 0788a54fe6..99b5fefc4c 100644
--- a/lib/librte_eal/freebsd/eal_thread.c
+++ b/lib/librte_eal/freebsd/eal_thread.c
@@ -92,7 +92,7 @@ eal_thread_loop(__rte_unused void *arg)
 
 	__rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
-	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
 		lcore_id, thread_id, cpuset, ret == 0 ? "" : "...");
 
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index 6e7206c79f..b8b64a6252 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -261,8 +261,8 @@ typedef void (*rte_lcore_uninit_cb)(unsigned int lcore_id, void *arg);
  * If this step succeeds, the callbacks are put in the lcore callbacks list
  * that will get called for each lcore allocation/release.
  *
- * Note: callbacks execution is serialised under a lock protecting the lcores
- * and callbacks list.
+ * Note: callbacks execution is serialised under a write lock protecting the
+ * lcores and callbacks list.
  *
  * @param name
  *   A name serving as a small description for this callback.
@@ -299,6 +299,49 @@ __rte_experimental
 void
 rte_lcore_callback_unregister(void *handle);
 
+/**
+ * Callback prototype for iterating over lcores.
+ *
+ * @param lcore_id
+ *   The lcore to consider.
+ * @param arg
+ *   An opaque pointer coming from the caller.
+ * @return
+ *   - 0 lets the iteration continue.
+ *   - !0 makes the iteration stop.
+ */
+typedef int (*rte_lcore_iterate_cb)(unsigned int lcore_id, void *arg);
+
+/**
+ * Iterate on all active lcores (ROLE_RTE, ROLE_SERVICE and ROLE_NON_EAL).
+ * No modification on the lcore states is allowed in the callback.
+ *
+ * Note: as opposed to init/uninit callbacks, iteration callbacks can be
+ * invoked in parallel as they are run under a read lock protecting the lcores
+ * and callbacks list.
+ *
+ * @param cb
+ *   The callback that gets passed each lcore.
+ * @param arg
+ *   An opaque pointer passed to cb.
+ * @return
+ *   Same return code as the callback last invocation (see rte_lcore_iterate_cb
+ *   description).
+ */
+__rte_experimental
+int
+rte_lcore_iterate(rte_lcore_iterate_cb cb, void *arg);
+
+/**
+ * List all lcores.
+ *
+ * @param f
+ *   The output stream where the dump should be sent.
+ */
+__rte_experimental
+void
+rte_lcore_dump(FILE *f);
+
 /**
  * Set core affinity of the current thread.
  * Support both EAL and non-EAL thread and update TLS.
diff --git a/lib/librte_eal/linux/eal.c b/lib/librte_eal/linux/eal.c
index f4daf46c4e..2cce19dd3d 100644
--- a/lib/librte_eal/linux/eal.c
+++ b/lib/librte_eal/linux/eal.c
@@ -1197,7 +1197,7 @@ rte_eal_init(int argc, char **argv)
 	__rte_thread_init(config->master_lcore,
 		&lcore_config[config->master_lcore].cpuset);
 
-	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
 	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
 		config->master_lcore, (uintptr_t)thread_id, cpuset,
 		ret == 0 ? "" : "...");
diff --git a/lib/librte_eal/linux/eal_thread.c b/lib/librte_eal/linux/eal_thread.c
index e0440c0000..48a2c1124b 100644
--- a/lib/librte_eal/linux/eal_thread.c
+++ b/lib/librte_eal/linux/eal_thread.c
@@ -92,7 +92,7 @@ eal_thread_loop(__rte_unused void *arg)
 
 	__rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
-	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+	ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
 		lcore_id, (uintptr_t)thread_id, cpuset, ret == 0 ? "" : "...");
 
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index c3e762c1d9..3aeb5b11ab 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -397,6 +397,8 @@ EXPERIMENTAL {
 	# added in 20.08
 	rte_lcore_callback_register;
 	rte_lcore_callback_unregister;
+	rte_lcore_dump;
+	rte_lcore_iterate;
 	rte_thread_register;
 	rte_thread_unregister;
 };
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v5 09/10] mempool/bucket: handle non-EAL lcores
  2020-07-06 14:15 ` [dpdk-dev] [PATCH v5 00/10] Register non-EAL threads as lcore David Marchand
                     ` (7 preceding siblings ...)
  2020-07-06 14:16   ` [dpdk-dev] [PATCH v5 08/10] eal: add lcore iterators David Marchand
@ 2020-07-06 14:16   ` David Marchand
  2020-07-06 14:16   ` [dpdk-dev] [PATCH v5 10/10] eal: add multiprocess disable API David Marchand
  9 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-07-06 14:16 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets, olivier.matz, konstantin.ananyev,
	Artem V. Andreev

Convert to new lcore API to support non-EAL lcores.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Reviewed-by: Andrew Rybchenko <arybchenko@solarflare.com>
---
Changes since v3:
- addressed Andrew comments,

---
 drivers/mempool/bucket/rte_mempool_bucket.c | 130 ++++++++++++--------
 1 file changed, 81 insertions(+), 49 deletions(-)

diff --git a/drivers/mempool/bucket/rte_mempool_bucket.c b/drivers/mempool/bucket/rte_mempool_bucket.c
index 5ce1ef16fb..8b9daa9782 100644
--- a/drivers/mempool/bucket/rte_mempool_bucket.c
+++ b/drivers/mempool/bucket/rte_mempool_bucket.c
@@ -55,6 +55,7 @@ struct bucket_data {
 	struct rte_ring *shared_orphan_ring;
 	struct rte_mempool *pool;
 	unsigned int bucket_mem_size;
+	void *lcore_callback_handle;
 };
 
 static struct bucket_stack *
@@ -345,6 +346,23 @@ bucket_dequeue_contig_blocks(struct rte_mempool *mp, void **first_obj_table,
 	return 0;
 }
 
+struct bucket_count_per_lcore_ctx {
+	const struct bucket_data *bd;
+	unsigned int count;
+};
+
+static int
+bucket_count_per_lcore(unsigned int lcore_id, void *arg)
+{
+	struct bucket_count_per_lcore_ctx *bplc = arg;
+
+	bplc->count += bplc->bd->obj_per_bucket *
+		bplc->bd->buckets[lcore_id]->top;
+	bplc->count +=
+		rte_ring_count(bplc->bd->adoption_buffer_rings[lcore_id]);
+	return 0;
+}
+
 static void
 count_underfilled_buckets(struct rte_mempool *mp,
 			  void *opaque,
@@ -373,23 +391,64 @@ count_underfilled_buckets(struct rte_mempool *mp,
 static unsigned int
 bucket_get_count(const struct rte_mempool *mp)
 {
-	const struct bucket_data *bd = mp->pool_data;
-	unsigned int count =
-		bd->obj_per_bucket * rte_ring_count(bd->shared_bucket_ring) +
-		rte_ring_count(bd->shared_orphan_ring);
-	unsigned int i;
+	struct bucket_count_per_lcore_ctx bplc;
 
-	for (i = 0; i < RTE_MAX_LCORE; i++) {
-		if (!rte_lcore_is_enabled(i))
-			continue;
-		count += bd->obj_per_bucket * bd->buckets[i]->top +
-			rte_ring_count(bd->adoption_buffer_rings[i]);
-	}
+	bplc.bd = mp->pool_data;
+	bplc.count = bplc.bd->obj_per_bucket *
+		rte_ring_count(bplc.bd->shared_bucket_ring);
+	bplc.count += rte_ring_count(bplc.bd->shared_orphan_ring);
 
+	rte_lcore_iterate(bucket_count_per_lcore, &bplc);
 	rte_mempool_mem_iter((struct rte_mempool *)(uintptr_t)mp,
-			     count_underfilled_buckets, &count);
+			     count_underfilled_buckets, &bplc.count);
+
+	return bplc.count;
+}
+
+static int
+bucket_init_per_lcore(unsigned int lcore_id, void *arg)
+{
+	char rg_name[RTE_RING_NAMESIZE];
+	struct bucket_data *bd = arg;
+	struct rte_mempool *mp;
+	int rg_flags;
+	int rc;
+
+	mp = bd->pool;
+	bd->buckets[lcore_id] = bucket_stack_create(mp,
+		mp->size / bd->obj_per_bucket);
+	if (bd->buckets[lcore_id] == NULL)
+		goto error;
+
+	rc = snprintf(rg_name, sizeof(rg_name), RTE_MEMPOOL_MZ_FORMAT ".a%u",
+		mp->name, lcore_id);
+	if (rc < 0 || rc >= (int)sizeof(rg_name))
+		goto error;
+
+	rg_flags = RING_F_SC_DEQ;
+	if (mp->flags & MEMPOOL_F_SP_PUT)
+		rg_flags |= RING_F_SP_ENQ;
+	bd->adoption_buffer_rings[lcore_id] = rte_ring_create(rg_name,
+		rte_align32pow2(mp->size + 1), mp->socket_id, rg_flags);
+	if (bd->adoption_buffer_rings[lcore_id] == NULL)
+		goto error;
 
-	return count;
+	return 0;
+error:
+	rte_free(bd->buckets[lcore_id]);
+	bd->buckets[lcore_id] = NULL;
+	return -1;
+}
+
+static void
+bucket_uninit_per_lcore(unsigned int lcore_id, void *arg)
+{
+	struct bucket_data *bd = arg;
+
+	rte_ring_free(bd->adoption_buffer_rings[lcore_id]);
+	bd->adoption_buffer_rings[lcore_id] = NULL;
+	rte_free(bd->buckets[lcore_id]);
+	bd->buckets[lcore_id] = NULL;
 }
 
 static int
@@ -399,7 +458,6 @@ bucket_alloc(struct rte_mempool *mp)
 	int rc = 0;
 	char rg_name[RTE_RING_NAMESIZE];
 	struct bucket_data *bd;
-	unsigned int i;
 	unsigned int bucket_header_size;
 	size_t pg_sz;
 
@@ -429,36 +487,17 @@ bucket_alloc(struct rte_mempool *mp)
 	/* eventually this should be a tunable parameter */
 	bd->bucket_stack_thresh = (mp->size / bd->obj_per_bucket) * 4 / 3;
 
+	bd->lcore_callback_handle = rte_lcore_callback_register("bucket",
+		bucket_init_per_lcore, bucket_uninit_per_lcore, bd);
+	if (bd->lcore_callback_handle == NULL) {
+		rc = -ENOMEM;
+		goto no_mem_for_stacks;
+	}
+
 	if (mp->flags & MEMPOOL_F_SP_PUT)
 		rg_flags |= RING_F_SP_ENQ;
 	if (mp->flags & MEMPOOL_F_SC_GET)
 		rg_flags |= RING_F_SC_DEQ;
-
-	for (i = 0; i < RTE_MAX_LCORE; i++) {
-		if (!rte_lcore_is_enabled(i))
-			continue;
-		bd->buckets[i] =
-			bucket_stack_create(mp, mp->size / bd->obj_per_bucket);
-		if (bd->buckets[i] == NULL) {
-			rc = -ENOMEM;
-			goto no_mem_for_stacks;
-		}
-		rc = snprintf(rg_name, sizeof(rg_name),
-			      RTE_MEMPOOL_MZ_FORMAT ".a%u", mp->name, i);
-		if (rc < 0 || rc >= (int)sizeof(rg_name)) {
-			rc = -ENAMETOOLONG;
-			goto no_mem_for_stacks;
-		}
-		bd->adoption_buffer_rings[i] =
-			rte_ring_create(rg_name, rte_align32pow2(mp->size + 1),
-					mp->socket_id,
-					rg_flags | RING_F_SC_DEQ);
-		if (bd->adoption_buffer_rings[i] == NULL) {
-			rc = -rte_errno;
-			goto no_mem_for_stacks;
-		}
-	}
-
 	rc = snprintf(rg_name, sizeof(rg_name),
 		      RTE_MEMPOOL_MZ_FORMAT ".0", mp->name);
 	if (rc < 0 || rc >= (int)sizeof(rg_name)) {
@@ -498,11 +537,8 @@ bucket_alloc(struct rte_mempool *mp)
 	rte_ring_free(bd->shared_orphan_ring);
 cannot_create_shared_orphan_ring:
 invalid_shared_orphan_ring:
+	rte_lcore_callback_unregister(bd->lcore_callback_handle);
 no_mem_for_stacks:
-	for (i = 0; i < RTE_MAX_LCORE; i++) {
-		rte_free(bd->buckets[i]);
-		rte_ring_free(bd->adoption_buffer_rings[i]);
-	}
 	rte_free(bd);
 no_mem_for_data:
 	rte_errno = -rc;
@@ -512,16 +548,12 @@ bucket_alloc(struct rte_mempool *mp)
 static void
 bucket_free(struct rte_mempool *mp)
 {
-	unsigned int i;
 	struct bucket_data *bd = mp->pool_data;
 
 	if (bd == NULL)
 		return;
 
-	for (i = 0; i < RTE_MAX_LCORE; i++) {
-		rte_free(bd->buckets[i]);
-		rte_ring_free(bd->adoption_buffer_rings[i]);
-	}
+	rte_lcore_callback_unregister(bd->lcore_callback_handle);
 
 	rte_ring_free(bd->shared_orphan_ring);
 	rte_ring_free(bd->shared_bucket_ring);
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v5 10/10] eal: add multiprocess disable API
  2020-07-06 14:15 ` [dpdk-dev] [PATCH v5 00/10] Register non-EAL threads as lcore David Marchand
                     ` (8 preceding siblings ...)
  2020-07-06 14:16   ` [dpdk-dev] [PATCH v5 09/10] mempool/bucket: handle non-EAL lcores David Marchand
@ 2020-07-06 14:16   ` David Marchand
  9 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-07-06 14:16 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets, olivier.matz, konstantin.ananyev,
	Anatoly Burakov, Neil Horman

The multiprocess feature has been implicitly enabled so far.
Applications might want to explicitly disable like when using with the
non-EAL registering API.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 app/test/test_mp_secondary.c              |  7 +++++++
 lib/librte_eal/common/eal_common_proc.c   |  4 ++--
 lib/librte_eal/common/eal_common_thread.c |  2 +-
 lib/librte_eal/common/eal_private.h       |  5 -----
 lib/librte_eal/include/rte_eal.h          | 14 ++++++++++++++
 lib/librte_eal/rte_eal_version.map        |  1 +
 6 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/app/test/test_mp_secondary.c b/app/test/test_mp_secondary.c
index ac15ddbf20..3a655c3400 100644
--- a/app/test/test_mp_secondary.c
+++ b/app/test/test_mp_secondary.c
@@ -97,6 +97,13 @@ run_secondary_instances(void)
 			(1 << rte_get_master_lcore()));
 
 	ret |= launch_proc(argv1);
+	printf("### Testing rte_mp_disable() reject:\n");
+	if (rte_mp_disable()) {
+		printf("Error: rte_mp_disable() has been accepted\n");
+		ret |= -1;
+	} else {
+		printf("# Checked rte_mp_disable() is refused\n");
+	}
 	ret |= launch_proc(argv2);
 
 	ret |= !(launch_proc(argv3));
diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index f0cf2b65d2..d35b9c0380 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -1236,7 +1236,7 @@ rte_mp_reply(struct rte_mp_msg *msg, const char *peer)
 
 /* Internally, the status of the mp feature is represented as a three-state:
  * - "unknown" as long as no secondary process attached to a primary process
- *   and there was no call to rte_mp_(dis|en)able yet,
+ *   and there was no call to rte_mp_disable yet,
  * - "enabled" as soon as a secondary process attaches to a primary process,
  * - "disabled" when a primary process successfully called rte_mp_disable,
  */
@@ -1263,7 +1263,7 @@ set_mp_status(enum mp_status status)
 }
 
 bool
-eal_disable_multiprocess(void)
+rte_mp_disable(void)
 {
 	return set_mp_status(MP_STATUS_DISABLED);
 }
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index ea09a4f3e0..73a055902a 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -261,7 +261,7 @@ rte_thread_register(void)
 		rte_errno = EINVAL;
 		return -1;
 	}
-	if (!eal_disable_multiprocess()) {
+	if (!rte_mp_disable()) {
 		RTE_LOG(ERR, EAL, "Multiprocess in use, registering non-EAL threads is not supported.\n");
 		rte_errno = EINVAL;
 		return -1;
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 535e008474..a917727e41 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -733,11 +733,6 @@ void __rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset);
  */
 void __rte_thread_uninit(void);
 
-/**
- * Mark primary process as not supporting multi-process.
- */
-bool eal_disable_multiprocess(void);
-
 /**
  * Instruct primary process that a secondary process wants to attach.
  */
diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
index 0913d1947c..fc7db2d427 100644
--- a/lib/librte_eal/include/rte_eal.h
+++ b/lib/librte_eal/include/rte_eal.h
@@ -151,6 +151,20 @@ int rte_eal_cleanup(void);
  */
 int rte_eal_primary_proc_alive(const char *config_file_path);
 
+/**
+ * Disable multiprocess.
+ *
+ * This function can be called to indicate that multiprocess won't be used for
+ * the rest of the application life.
+ *
+ * @return
+ *   - true if called from a primary process that had no secondary processes
+ *     attached,
+ *   - false, otherwise.
+ */
+__rte_experimental
+bool rte_mp_disable(void);
+
 #define RTE_MP_MAX_FD_NUM	8    /* The max amount of fds */
 #define RTE_MP_MAX_NAME_LEN	64   /* The max length of action name */
 #define RTE_MP_MAX_PARAM_LEN	256  /* The max length of param */
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 3aeb5b11ab..d17f13f8d3 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -399,6 +399,7 @@ EXPERIMENTAL {
 	rte_lcore_callback_unregister;
 	rte_lcore_dump;
 	rte_lcore_iterate;
+	rte_mp_disable;
 	rte_thread_register;
 	rte_thread_unregister;
 };
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v6 00/10] Register non-EAL threads as lcore
  2020-06-10 14:44 [dpdk-dev] [PATCH 0/7] Register external threads as lcore David Marchand
                   ` (11 preceding siblings ...)
  2020-07-06 14:15 ` [dpdk-dev] [PATCH v5 00/10] Register non-EAL threads as lcore David Marchand
@ 2020-07-06 20:52 ` David Marchand
  2020-07-06 20:52   ` [dpdk-dev] [PATCH v6 01/10] eal: relocate per thread symbols to common David Marchand
                     ` (11 more replies)
  12 siblings, 12 replies; 126+ messages in thread
From: David Marchand @ 2020-07-06 20:52 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets, olivier.matz, konstantin.ananyev

OVS and some other applications have been hacking into DPDK internals to
fake EAL threads and avoid performance penalty of only having non-EAL
threads.

This series proposes to add a new type of lcores and maps those threads
to such lcores.
non-EAL threads won't run the DPDK eal mainloop.
As a consequence, part of the EAL threads API cannot work.

Having new lcores appearing during the process lifetime is not expected
by some DPDK components. This is addressed by introducing init/uninit
callacks invoked when hotplugging of such lcore.

There is still some work/discussion:
- refuse new lcore role in incompatible EAL threads API (or document it
  only as those API were already incompatible?),
- think about deprecation notices for existing RTE_FOREACH_LCORE macros
  and consorts, it is probably worth discussing on how to iterate over
  lcores,

For the interested parties, I have a patch [1] against dpdk-latest OVS
branch that makes use of this series (this patch probably won't work with
v5, it will be rebased once dpdk side is ready).

1: https://patchwork.ozlabs.org/project/openvswitch/patch/20200626123017.28555-1-david.marchand@redhat.com/

Changes since v5:
- fixed windows build,

Changes since v4:
- added separate API to control mp feature activation,
- addressed Konstantin and Olivier comments,

Changes since v3:
- added init failure when trying to use in conjunction with multiprocess,
- addressed Andrew comments,

Changes since v2:
- fixed windows build error due to missing trace stub,
- fixed bug when rolling back on lcore register,

Changes since v1:
- rebased on master (conflicts on merged Windows series),
- separated lcore role code cleanup in a patch,
- tried to use a single naming, so kept non-EAL threads as the main
  notion. non-EAL threads are then distinguished between registered and
  unregistered non-EAL threads,
- added unit tests (still missing some coverage, marked with a FIXME),
- reworked callbacks call under a common rwlock lock which protects
  lcores allocations and callbacks registration,
- introduced lcore iterators and converted the bucket mempool driver,

-- 
David Marchand

David Marchand (10):
  eal: relocate per thread symbols to common
  eal: fix multiple definition of per lcore thread id
  eal: introduce thread init helper
  eal: introduce thread uninit helper
  eal: move lcore role code
  eal: register non-EAL threads as lcores
  eal: add lcore init callbacks
  eal: add lcore iterators
  mempool/bucket: handle non-EAL lcores
  eal: add multiprocess disable API

 MAINTAINERS                                   |   1 +
 app/test/Makefile                             |   1 +
 app/test/autotest_data.py                     |   6 +
 app/test/meson.build                          |   2 +
 app/test/test_lcores.c                        | 373 ++++++++++++++++++
 app/test/test_mp_secondary.c                  |   7 +
 doc/guides/howto/debug_troubleshoot.rst       |   5 +-
 .../prog_guide/env_abstraction_layer.rst      |  22 +-
 doc/guides/prog_guide/mempool_lib.rst         |   2 +-
 doc/guides/rel_notes/release_20_08.rst        |   6 +
 drivers/mempool/bucket/rte_mempool_bucket.c   | 130 +++---
 lib/librte_eal/common/eal_common_lcore.c      | 249 +++++++++++-
 lib/librte_eal/common/eal_common_proc.c       |  41 ++
 lib/librte_eal/common/eal_common_thread.c     | 141 +++++--
 lib/librte_eal/common/eal_common_trace.c      |  49 ++-
 lib/librte_eal/common/eal_memcfg.h            |   2 +
 lib/librte_eal/common/eal_private.h           |  39 ++
 lib/librte_eal/common/eal_thread.h            |  21 +-
 lib/librte_eal/common/eal_trace.h             |   1 +
 lib/librte_eal/freebsd/eal.c                  |  20 +-
 lib/librte_eal/freebsd/eal_thread.c           |  38 +-
 lib/librte_eal/include/rte_eal.h              |  26 +-
 lib/librte_eal/include/rte_lcore.h            | 179 ++++++++-
 lib/librte_eal/linux/eal.c                    |  21 +-
 lib/librte_eal/linux/eal_thread.c             |  38 +-
 lib/librte_eal/rte_eal_version.map            |  16 +
 lib/librte_eal/windows/eal.c                  |  16 +-
 lib/librte_eal/windows/eal_thread.c           |  14 +-
 lib/librte_mempool/rte_mempool.h              |  11 +-
 29 files changed, 1234 insertions(+), 243 deletions(-)
 create mode 100644 app/test/test_lcores.c

-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v6 01/10] eal: relocate per thread symbols to common
  2020-07-06 20:52 ` [dpdk-dev] [PATCH v6 00/10] Register non-EAL threads as lcore David Marchand
@ 2020-07-06 20:52   ` David Marchand
  2020-07-06 20:52   ` [dpdk-dev] [PATCH v6 02/10] eal: fix multiple definition of per lcore thread id David Marchand
                     ` (10 subsequent siblings)
  11 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-07-06 20:52 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets, olivier.matz, konstantin.ananyev,
	Harini Ramakrishnan, Omar Cardona, Pallavi Kadam, Ranjit Menon

We have per lcore thread symbols scattered in OS implementations but
common code relies on them.
Move all of them in common.

RTE_PER_LCORE(_socket_id) and RTE_PER_LCORE(_cpuset) have public
accessors and are not exported through the library map, they can be
made static.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Reviewed-by: Olivier Matz <olivier.matz@6wind.com>
---
 lib/librte_eal/common/eal_common_thread.c | 5 ++++-
 lib/librte_eal/freebsd/eal_thread.c       | 4 ----
 lib/librte_eal/include/rte_lcore.h        | 1 -
 lib/librte_eal/linux/eal_thread.c         | 4 ----
 lib/librte_eal/windows/eal_thread.c       | 4 ----
 5 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index a341070926..7be80c292e 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -21,7 +21,10 @@
 #include "eal_private.h"
 #include "eal_thread.h"
 
-RTE_DECLARE_PER_LCORE(unsigned , _socket_id);
+RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
+static RTE_DEFINE_PER_LCORE(unsigned int, _socket_id) =
+	(unsigned int)SOCKET_ID_ANY;
+static RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
 
 unsigned rte_socket_id(void)
 {
diff --git a/lib/librte_eal/freebsd/eal_thread.c b/lib/librte_eal/freebsd/eal_thread.c
index b52019782a..40676d9ef5 100644
--- a/lib/librte_eal/freebsd/eal_thread.c
+++ b/lib/librte_eal/freebsd/eal_thread.c
@@ -25,10 +25,6 @@
 #include "eal_private.h"
 #include "eal_thread.h"
 
-RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
-RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY;
-RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
-
 /*
  * Send a message to a slave lcore identified by slave_id to call a
  * function f with argument arg. Once the execution is done, the
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index 339046bc86..5c1d1926e9 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -23,7 +23,6 @@ extern "C" {
 #define LCORE_ID_ANY     UINT32_MAX       /**< Any lcore. */
 
 RTE_DECLARE_PER_LCORE(unsigned, _lcore_id);  /**< Per thread "lcore id". */
-RTE_DECLARE_PER_LCORE(rte_cpuset_t, _cpuset); /**< Per thread "cpuset". */
 
 /**
  * Get a lcore's role.
diff --git a/lib/librte_eal/linux/eal_thread.c b/lib/librte_eal/linux/eal_thread.c
index cd9d6e0ebf..a52ebef3a4 100644
--- a/lib/librte_eal/linux/eal_thread.c
+++ b/lib/librte_eal/linux/eal_thread.c
@@ -25,10 +25,6 @@
 #include "eal_private.h"
 #include "eal_thread.h"
 
-RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
-RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY;
-RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
-
 /*
  * Send a message to a slave lcore identified by slave_id to call a
  * function f with argument arg. Once the execution is done, the
diff --git a/lib/librte_eal/windows/eal_thread.c b/lib/librte_eal/windows/eal_thread.c
index 3dd56519c9..f12a2ec6ad 100644
--- a/lib/librte_eal/windows/eal_thread.c
+++ b/lib/librte_eal/windows/eal_thread.c
@@ -16,10 +16,6 @@
 #include "eal_private.h"
 #include "eal_windows.h"
 
-RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
-RTE_DEFINE_PER_LCORE(unsigned int, _socket_id) = (unsigned int)SOCKET_ID_ANY;
-RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
-
 /*
  * Send a message to a slave lcore identified by slave_id to call a
  * function f with argument arg. Once the execution is done, the
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v6 02/10] eal: fix multiple definition of per lcore thread id
  2020-07-06 20:52 ` [dpdk-dev] [PATCH v6 00/10] Register non-EAL threads as lcore David Marchand
  2020-07-06 20:52   ` [dpdk-dev] [PATCH v6 01/10] eal: relocate per thread symbols to common David Marchand
@ 2020-07-06 20:52   ` David Marchand
  2020-07-06 20:52   ` [dpdk-dev] [PATCH v6 03/10] eal: introduce thread init helper David Marchand
                     ` (9 subsequent siblings)
  11 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-07-06 20:52 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets, olivier.matz, konstantin.ananyev,
	Neil Horman, Cunming Liang

Because of the inline accessor + static declaration in rte_gettid(),
we end up with multiple symbols for RTE_PER_LCORE(_thread_id).
Each compilation unit will pay a cost when accessing this information
for the first time.

$ nm build/app/dpdk-testpmd | grep per_lcore__thread_id
0000000000000054 d per_lcore__thread_id.5037
0000000000000040 d per_lcore__thread_id.5103
0000000000000048 d per_lcore__thread_id.5259
000000000000004c d per_lcore__thread_id.5259
0000000000000044 d per_lcore__thread_id.5933
0000000000000058 d per_lcore__thread_id.6261
0000000000000050 d per_lcore__thread_id.7378
000000000000005c d per_lcore__thread_id.7496
000000000000000c d per_lcore__thread_id.8016
0000000000000010 d per_lcore__thread_id.8431

Make it global as part of the DPDK_21 stable ABI.

Fixes: ef76436c6834 ("eal: get unique thread id")

Signed-off-by: David Marchand <david.marchand@redhat.com>
Acked-by: Ray Kinsella <mdr@ashroe.eu>
Reviewed-by: Olivier Matz <olivier.matz@6wind.com>
---
 lib/librte_eal/common/eal_common_thread.c | 1 +
 lib/librte_eal/include/rte_eal.h          | 3 ++-
 lib/librte_eal/rte_eal_version.map        | 7 +++++++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 7be80c292e..fd13453fee 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -22,6 +22,7 @@
 #include "eal_thread.h"
 
 RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
+RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
 static RTE_DEFINE_PER_LCORE(unsigned int, _socket_id) =
 	(unsigned int)SOCKET_ID_ANY;
 static RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
diff --git a/lib/librte_eal/include/rte_eal.h b/lib/librte_eal/include/rte_eal.h
index 2f9ed298de..2edf8c6556 100644
--- a/lib/librte_eal/include/rte_eal.h
+++ b/lib/librte_eal/include/rte_eal.h
@@ -447,6 +447,8 @@ enum rte_intr_mode rte_eal_vfio_intr_mode(void);
  */
 int rte_sys_gettid(void);
 
+RTE_DECLARE_PER_LCORE(int, _thread_id);
+
 /**
  * Get system unique thread id.
  *
@@ -456,7 +458,6 @@ int rte_sys_gettid(void);
  */
 static inline int rte_gettid(void)
 {
-	static RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
 	if (RTE_PER_LCORE(_thread_id) == -1)
 		RTE_PER_LCORE(_thread_id) = rte_sys_gettid();
 	return RTE_PER_LCORE(_thread_id);
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 196eef5afa..0d42d44ce9 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -221,6 +221,13 @@ DPDK_20.0 {
 	local: *;
 };
 
+DPDK_21 {
+	global:
+
+	per_lcore__thread_id;
+
+} DPDK_20.0;
+
 EXPERIMENTAL {
 	global:
 
-- 
2.23.0


^ permalink raw reply	[flat|nested] 126+ messages in thread

* [dpdk-dev] [PATCH v6 03/10] eal: introduce thread init helper
  2020-07-06 20:52 ` [dpdk-dev] [PATCH v6 00/10] Register non-EAL threads as lcore David Marchand
  2020-07-06 20:52   ` [dpdk-dev] [PATCH v6 01/10] eal: relocate per thread symbols to common David Marchand
  2020-07-06 20:52   ` [dpdk-dev] [PATCH v6 02/10] eal: fix multiple definition of per lcore thread id David Marchand
@ 2020-07-06 20:52   ` David Marchand
  2020-07-06 20:52   ` [dpdk-dev] [PATCH v6 04/10] eal: introduce thread uninit helper David Marchand
                     ` (8 subsequent siblings)
  11 siblings, 0 replies; 126+ messages in thread
From: David Marchand @ 2020-07-06 20:52 UTC (permalink / raw)
  To: dev
  Cc: jerinjacobk, bruce.richardson, mdr, thomas, arybchenko, ktraynor,
	ian.stokes, i.maximets, olivier.matz, konstantin.ananyev,
	Harini Ramakrishnan, Omar Cardona, Pallavi Kadam, Ranjit Menon

Introduce a helper responsible for initialising the per thread context.
We can then have a unified context for EAL and non-EAL threads and
remove copy/paste'd OS-specific helpers.

Per EAL thread CPU affinity setting is separated from the thread init.
It is to accommodate with Windows EAL where CPU affinity is not set at
the moment.
Besides, having affinity set by the master lcore in FreeBSD and Linux
will make it possible to detect errors rather than panic in the child
thread. But the cleanup when such an event happens is left for later.

A side-effect of this patch is that control threads can now use
recursive locks (rte_gettid() was not called before).

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Changes since v4:
- renamed rte_thread_init as __rte_thread_init and moved to
  eal_private.h,

Changes since v1:
- rebased on master, removed Windows workarounds wrt gettid and traces
  support,

---
 lib/librte_eal/common/eal_common_thread.c | 50 ++++++++++++++---------
 lib/librte_eal/common/eal_private.h       | 10 +++++
 lib/librte_eal/common/eal_thread.h        |  8 ----
 lib/librte_eal/freebsd/eal.c              | 14 ++++++-
 lib/librte_eal/freebsd/eal_thread.c       | 32 +--------------
 lib/librte_eal/linux/eal.c                | 15 ++++++-
 lib/librte_eal/linux/eal_thread.c         | 32 +--------------
 lib/librte_eal/windows/eal.c              |  3 +-
 lib/librte_eal/windows/eal_thread.c       | 10 +----
 9 files changed, 71 insertions(+), 103 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index fd13453fee..fb06f8f802 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -71,20 +71,10 @@ eal_cpuset_socket_id(rte_cpuset_t *cpusetp)
 	return socket_id;
 }
 
-int
-rte_thread_set_affinity(rte_cpuset_t *cpusetp)
+static void
+thread_update_affinity(rte_cpuset_t *cpusetp)
 {
-	int s;
-	unsigned lcore_id;
-	pthread_t tid;
-
-	tid = pthread_self();
-
-	s = pthread_setaffinity_np(tid, sizeof(rte_cpuset_t), cpusetp);
-	if (s != 0) {
-		RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
-		return -1;
-	}
+	unsigned int lcore_id = rte_lcore_id();
 
 	/* store socket_id in TLS for quick access */
 	RTE_PER_LCORE(_socket_id) =
@@ -94,14 +84,24 @@ rte_thread_set_affinity(rte_cpuset_t *cpusetp)
 	memmove(&RTE_PER_LCORE(_cpuset), cpusetp,
 		sizeof(rte_cpuset_t));
 
-	lcore_id = rte_lcore_id();
 	if (lcore_id != (unsigned)LCORE_ID_ANY) {
 		/* EAL thread will update lcore_config */
 		lcore_config[lcore_id].socket_id = RTE_PER_LCORE(_socket_id);
 		memmove(&lcore_config[lcore_id].cpuset, cpusetp,
 			sizeof(rte_cpuset_t));
 	}
+}
+
+int
+rte_thread_set_affinity(rte_cpuset_t *cpusetp)
+{
+	if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+			cpusetp) != 0) {
+		RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
+		return -1;
+	}
 
+	thread_update_affinity(cpusetp);
 	return 0;
 }
 
@@ -147,6 +147,19 @@ eal_thread_dump_affinity(char *str, unsigned size)
 	return ret;
 }
 
+void
+__rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset)
+{
+	/* set the lcore ID in per-lcore memory area */
+	RTE_PER_LCORE(_lcore_id) = lcore_id;
+
+	/* acquire system unique id */
+	rte_gettid();
+
+	thread_update_affinity(cpuset);
+
+	__rte_trace_mem_per_thread_alloc();
+}
 
 struct rte_thread_ctrl_params {
 	void *(*start_routine)(void *);
@@ -154,7 +167,7 @@ struct rte_thread_ctrl_params {
 	pthread_barrier_t configured;
 };
 
-static void *rte_thread_init(void *arg)
+static void *ctrl_thread_init(void *arg)
 {
 	int ret;
 	struct internal_config *internal_conf =
@@ -164,8 +177,7 @@ static void *rte_thread_init(void *arg)
 	void *(*start_routine)(void *) = params->start_routine;
 	void *routine_arg = params->arg;
 
-	/* Store cpuset in TLS for quick access */
-	memmove(&RTE_PER_LCORE(_cpuset), cpuset, sizeof(rte_cpuset_t));
+	__rte_thread_init(rte_lcore_id(), cpuset);
 
 	ret = pthread_barrier_wait(&params->configured);
 	if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
@@ -173,8 +185,6 @@ static void *rte_thread_init(void *arg)
 		free(params);
 	}
 
-	__rte_trace_mem_per_thread_alloc();
-
 	return start_routine(routine_arg);
 }
 
@@ -198,7 +208,7 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
 
 	pthread_barrier_init(&params->configured, NULL, 2);
 
-	ret = pthread_create(thread, attr, rte_thread_init, (void *)params);
+	ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
 	if (ret != 0) {
 		free(params);
 		return -ret;
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 46bcae9305..5d8b53882d 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -699,4 +699,14 @@ eal_get_internal_configuration(void);
 rte_usage_hook_t
 eal_get_application_usage_hook(void);
 
+/**
+ * Init per-lcore info in current thread.
+ *
+ * @param lcore_id
+ *   identifier of lcore.
+ * @param cpuset
+ *   CPU affinity for this thread.
+ */
+void __rte_thread_init(unsigned int lcore_id, rte_cpuset_t *cpuset);
+
 #endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/common/eal_thread.h b/lib/librte_eal/common/eal_thread.h
index b40ed249ed..dc1fc6eb99 100644
--- a/lib/librte_eal/common/eal_thread.h
+++ b/lib/librte_eal/common/eal_thread.h
@@ -15,14 +15,6 @@
  */
 __rte_noreturn void *eal_thread_loop(void *arg);
 
-/**
- * Init per-lcore info for master thread
- *
- * @param lcore_id
- *   identifier of master lcore
- */
-void eal_thread_init_master(unsigned lcore_id);
-
 /**
  * Get the NUMA socket id from cpu id.
  * This function is private to EAL.
diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
index 8c75cba79a..fd577daf44 100644
--- a/lib/librte_eal/freebsd/eal.c
+++ b/lib/librte_eal/freebsd/eal.c
@@ -845,7 +845,14 @@ rte_eal_init(int argc, char **argv)
 
 	eal_check_mem_on_local_socket();
 
-	eal_thread_init_master(config->master_lcore);
+	if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+			&lcore_config[config->master_lcore].cpuset) != 0) {
+		rte_eal_init_alert("Cannot set affinity");
+		rte_errno = EINVAL;
+		return -1;
+	}
+	__rte_thread_init(config->master_lcore,
+		&lcore_config[config->master_lcore].cpuset);
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
 
@@ -876,6 +883,11 @@ rte_eal_init(int argc, char **argv)
 		snprintf(thread_name, sizeof(thread_name),
 				"lcore-slave-%d", i);
 		rte_thread_setname(lcore_config[i].thread_id, thread_name);
+
+		ret = pthread_setaffinity_np(lcore_config[i].thread_id,
+			sizeof(rte_cpuset_t), &lcore_config[i].cpuset);
+		if (ret != 0)
+			rte_panic("Cannot set affinity\n");
 	}
 
 	/*
diff --git a/lib/librte_eal/freebsd/eal_thread.c b/lib/librte_eal/freebsd/eal_thread.c
index 40676d9ef5..0788a54fe6 100644
--- a/lib/librte_eal/freebsd/eal_thread.c
+++ b/lib/librte_eal/freebsd/eal_thread.c
@@ -66,29 +66,6 @@ rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
 	return rc;
 }
 
-/* set affinity for current thread */
-static int
-eal_thread_set_affinity(void)
-{
-	unsigned lcore_id = rte_lcore_id();
-
-	/* acquire system unique id  */
-	rte_gettid();
-
-	/* update EAL thread core affinity */
-	return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset);
-}
-
-void eal_thread_init_master(unsigned lcore_id)
-{
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
-}
-
 /* main loop of threads */
 __rte_noreturn void *
 eal_thread_loop(__rte_unused void *arg)
@@ -113,19 +90,12 @@ eal_thread_loop(__rte_unused void *arg)
 	m2s = lcore_config[lcore_id].pipe_master2slave[0];
 	s2m = lcore_config[lcore_id].pipe_slave2master[1];
 
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
+	__rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
-
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
 		lcore_id, thread_id, cpuset, ret == 0 ? "" : "...");
 
-	__rte_trace_mem_per_thread_alloc();
 	rte_eal_trace_thread_lcore_ready(lcore_id, cpuset);
 
 	/* read on our pipe to get commands */
diff --git a/lib/librte_eal/linux/eal.c b/lib/librte_eal/linux/eal.c
index 3b56d14da1..bd089cdd44 100644
--- a/lib/librte_eal/linux/eal.c
+++ b/lib/librte_eal/linux/eal.c
@@ -1184,10 +1184,16 @@ rte_eal_init(int argc, char **argv)
 
 	eal_check_mem_on_local_socket();
 
-	eal_thread_init_master(config->master_lcore);
+	if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+			&lcore_config[config->master_lcore].cpuset) != 0) {
+		rte_eal_init_alert("Cannot set affinity");
+		rte_errno = EINVAL;
+		return -1;
+	}
+	__rte_thread_init(config->master_lcore,
+		&lcore_config[config->master_lcore].cpuset);
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
-
 	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
 		config->master_lcore, (uintptr_t)thread_id, cpuset,
 		ret == 0 ? "" : "...");
@@ -1219,6 +1225,11 @@ rte_eal_init(int argc, char **argv)
 		if (ret != 0)
 			RTE_LOG(DEBUG, EAL,
 				"Cannot set name for lcore thread\n");
+
+		ret = pthread_setaffinity_np(lcore_config[i].thread_id,
+			sizeof(rte_cpuset_t), &lcore_config[i].cpuset);
+		if (ret != 0)
+			rte_panic("Cannot set affinity\n");
 	}
 
 	/*
diff --git a/lib/librte_eal/linux/eal_thread.c b/lib/librte_eal/linux/eal_thread.c
index a52ebef3a4..e0440c0000 100644
--- a/lib/librte_eal/linux/eal_thread.c
+++ b/lib/librte_eal/linux/eal_thread.c
@@ -66,29 +66,6 @@ rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
 	return rc;
 }
 
-/* set affinity for current EAL thread */
-static int
-eal_thread_set_affinity(void)
-{
-	unsigned lcore_id = rte_lcore_id();
-
-	/* acquire system unique id  */
-	rte_gettid();
-
-	/* update EAL thread core affinity */
-	return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset);
-}
-
-void eal_thread_init_master(unsigned lcore_id)
-{
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
-}
-
 /* main loop of threads */
 __rte_noreturn void *
 eal_thread_loop(__rte_unused void *arg)
@@ -113,19 +90,12 @@ eal_thread_loop(__rte_unused void *arg)
 	m2s = lcore_config[lcore_id].pipe_master2slave[0];
 	s2m = lcore_config[lcore_id].pipe_slave2master[1];
 
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-
-	/* set CPU affinity */
-	if (eal_thread_set_affinity() < 0)
-		rte_panic("cannot set affinity\n");
+	__rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
-
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
 		lcore_id, (uintptr_t)thread_id, cpuset, ret == 0 ? "" : "...");
 
-	__rte_trace_mem_per_thread_alloc();
 	rte_eal_trace_thread_lcore_ready(lcore_id, cpuset);
 
 	/* read on our pipe to get commands */
diff --git a/lib/librte_eal/windows/eal.c b/lib/librte_eal/windows/eal.c
index eb10b4ef96..9f5d019e64 100644
--- a/lib/librte_eal/windows/eal.c
+++ b/lib/librte_eal/windows/eal.c
@@ -333,7 +333,8 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
-	eal_thread_init_master(config->master_lcore);
+	__rte_thread_init(config->master_lcore,
+		&lcore_config[config->master_lcore].cpuset);
 
 	bscan = rte_bus_scan();
 	if (bscan < 0) {
diff --git a/lib/librte_eal/windows/eal_thread.c b/lib/librte_eal/windows/eal_thread.c
index f12a2ec6ad..20889b6196 100644
--- a/lib/librte_eal/windows/eal_thread.c
+++ b/lib/librte_eal/windows/eal_thread.c
@@ -53,13 +53,6 @@ rte_eal_remote_launch(lcore_function_t *f, void *arg, unsigned int slave_id)
 	return 0;
 }
 
-void
-eal_thread_init_master(unsigned int lcore_id)
-{
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
-}
-
 /* main loop of threads */
 void *
 eal_thread_loop(void *arg __rte_unused)
@@ -84,8 +77,7 @@ eal_thread_loop(void *arg __rte_unused)
 	m2s = lcore_config[lcore_id].pipe_master2slave[0];
 	s2m = lcore_config[lcore_id].pipe_slave2master[1];
 
-	/* set the lcore ID in per-lcore memory area */
-	RTE_PER_LCORE(_lcore_id) = lcore_id;
+	__rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset);
 
 	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%zx;cpuset=[%s])\n",
 		lcore_id, (uintptr_t)thread_id, cpuset);
-- 
2.23.0


^ permalink raw