All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH v2 01/13] Extend health thread lifetime
       [not found] <20170918225206.17725-1-jonathan.rajotte-julien@efficios.com>
@ 2017-09-18 22:51 ` Jonathan Rajotte
  2017-09-18 22:51 ` [RFC PATCH v2 02/13] Reorder pthread_join for easier ordering later on Jonathan Rajotte
                   ` (24 subsequent siblings)
  25 siblings, 0 replies; 27+ messages in thread
From: Jonathan Rajotte @ 2017-09-18 22:51 UTC (permalink / raw)
  To: lttng-dev; +Cc: jgalar

Allow easier control over the thread by providing a dedicated quit pipe.

Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
---
 src/bin/lttng-sessiond/main.c | 74 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 55 insertions(+), 19 deletions(-)

diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c
index 03f695ec..5d7df744 100644
--- a/src/bin/lttng-sessiond/main.c
+++ b/src/bin/lttng-sessiond/main.c
@@ -204,6 +204,7 @@ static int kernel_poll_pipe[2] = { -1, -1 };
  * for all threads when receiving an event on the pipe.
  */
 static int thread_quit_pipe[2] = { -1, -1 };
+static int thread_health_teardown_trigger_pipe[2] = { -1, -1 };
 
 /*
  * This pipe is used to inform the thread managing application communication
@@ -477,6 +478,11 @@ static int init_thread_quit_pipe(void)
 	return __init_thread_quit_pipe(thread_quit_pipe);
 }
 
+static int init_thread_health_teardown_trigger_pipe(void)
+{
+	return __init_thread_quit_pipe(thread_health_teardown_trigger_pipe);
+}
+
 /*
  * Stop all threads by closing the thread quit pipe.
  */
@@ -4297,11 +4303,13 @@ static void *thread_manage_health(void *data)
 		goto error;
 	}
 
-	/*
-	 * Pass 2 as size here for the thread quit pipe and client_sock. Nothing
-	 * more will be added to this poll set.
-	 */
-	ret = sessiond_set_thread_pollset(&events, 2);
+	ret = lttng_poll_create(&events, 2, LTTNG_CLOEXEC);
+	if (ret < 0) {
+		goto error;
+	}
+
+	/* Add teardown trigger */
+	ret = lttng_poll_add(&events, thread_health_teardown_trigger_pipe[0], LPOLLIN | LPOLLERR);
 	if (ret < 0) {
 		goto error;
 	}
@@ -4343,10 +4351,18 @@ restart:
 			}
 
 			/* Thread quit pipe has been closed. Killing thread. */
-			ret = sessiond_check_thread_quit_pipe(pollfd, revents);
-			if (ret) {
-				err = 0;
-				goto exit;
+			if (pollfd == thread_health_teardown_trigger_pipe[0]) {
+				if (revents & LPOLLIN) {
+					/* Normal exit */
+					err = 0;
+					goto exit;
+				} else if (revents & LPOLLERR) {
+					ERR("Health teardown poll error");
+					goto error;
+				} else {
+					ERR("Unexpected poll events %u for teardown sock", revents);
+					goto error;
+				}
 			}
 
 			/* Event on the registration socket */
@@ -4412,8 +4428,13 @@ restart:
 		}
 	}
 
-exit:
 error:
+	/*
+	 * Perform stop_thread only on error path since in a normal teardown the
+	 * health thread is in the last thread to terminate.
+	 */
+	stop_threads();
+exit:
 	if (err) {
 		ERR("Health error occurred in %s", __func__);
 	}
@@ -4425,9 +4446,7 @@ error:
 			PERROR("close");
 		}
 	}
-
 	lttng_poll_clean(&events);
-	stop_threads();
 	rcu_unregister_thread();
 	return NULL;
 }
@@ -5631,6 +5650,7 @@ int main(int argc, char **argv)
 			*ust64_channel_monitor_pipe = NULL,
 			*kernel_channel_monitor_pipe = NULL;
 	bool notification_thread_running = false;
+	bool health_thread_running = false;
 
 	init_kernel_workarounds();
 
@@ -5717,6 +5737,11 @@ int main(int argc, char **argv)
 		goto exit_init_data;
 	}
 
+	if (init_thread_health_teardown_trigger_pipe()) {
+		retval = -1;
+		goto exit_init_data;
+	}
+
 	/* Check if daemon is UID = 0 */
 	is_root = !getuid();
 
@@ -6119,6 +6144,7 @@ int main(int argc, char **argv)
 		retval = -1;
 		goto exit_health;
 	}
+	health_thread_running = true;
 
 	/* notification_thread_data acquires the pipes' read side. */
 	notification_thread_handle = notification_thread_handle_create(
@@ -6311,13 +6337,6 @@ exit_dispatch:
 
 exit_client:
 exit_notification:
-	ret = pthread_join(health_thread, &status);
-	if (ret) {
-		errno = ret;
-		PERROR("pthread_join health thread");
-		retval = -1;
-	}
-
 exit_health:
 exit_init_data:
 	/*
@@ -6359,6 +6378,20 @@ exit_init_data:
 		notification_thread_handle_destroy(notification_thread_handle);
 	}
 
+	if (health_thread_running) {
+		ret = notify_thread_pipe(thread_health_teardown_trigger_pipe[1]);
+		if (ret < 0) {
+			ERR("write error on thread quit pipe");
+		}
+
+		ret = pthread_join(health_thread, &status);
+		if (ret) {
+			errno = ret;
+			PERROR("pthread_join health thread");
+			retval = -1;
+		}
+	}
+
 	rcu_thread_offline();
 	rcu_unregister_thread();
 
@@ -6366,6 +6399,9 @@ exit_init_data:
 	if (ret) {
 		retval = -1;
 	}
+
+	/* Health thread teardown pipe cleanup */
+	utils_close_pipe(thread_health_teardown_trigger_pipe);
 	lttng_pipe_destroy(ust32_channel_monitor_pipe);
 	lttng_pipe_destroy(ust64_channel_monitor_pipe);
 	lttng_pipe_destroy(kernel_channel_monitor_pipe);
-- 
2.11.0

_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [RFC PATCH v2 02/13] Reorder pthread_join for easier ordering later on
       [not found] <20170918225206.17725-1-jonathan.rajotte-julien@efficios.com>
  2017-09-18 22:51 ` [RFC PATCH v2 01/13] Extend health thread lifetime Jonathan Rajotte
@ 2017-09-18 22:51 ` Jonathan Rajotte
  2017-09-18 22:51 ` [RFC PATCH v2 03/13] Terminate dispatch thread after reg_apps_thread is terminated Jonathan Rajotte
                   ` (23 subsequent siblings)
  25 siblings, 0 replies; 27+ messages in thread
From: Jonathan Rajotte @ 2017-09-18 22:51 UTC (permalink / raw)
  To: lttng-dev; +Cc: jgalar

Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
---
 src/bin/lttng-sessiond/main.c | 96 ++++++++++++++++++++-----------------------
 1 file changed, 45 insertions(+), 51 deletions(-)

diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c
index 5d7df744..45c0270e 100644
--- a/src/bin/lttng-sessiond/main.c
+++ b/src/bin/lttng-sessiond/main.c
@@ -6170,15 +6170,26 @@ int main(int argc, char **argv)
 	}
 	notification_thread_running = true;
 
-	/* Create thread to manage the client socket */
-	ret = pthread_create(&client_thread, default_pthread_attr(),
-			thread_manage_clients, (void *) NULL);
+	/* Create thread to manage application notify socket */
+	ret = pthread_create(&apps_notify_thread, default_pthread_attr(),
+			ust_thread_manage_notify, (void *) NULL);
 	if (ret) {
 		errno = ret;
-		PERROR("pthread_create clients");
+		PERROR("pthread_create notify");
 		retval = -1;
 		stop_threads();
-		goto exit_client;
+		goto exit_apps_notify;
+	}
+
+	/* Create thread to manage application socket */
+	ret = pthread_create(&apps_thread, default_pthread_attr(),
+			thread_manage_apps, (void *) NULL);
+	if (ret) {
+		errno = ret;
+		PERROR("pthread_create apps");
+		retval = -1;
+		stop_threads();
+		goto exit_apps;
 	}
 
 	/* Create thread to dispatch registration */
@@ -6203,26 +6214,15 @@ int main(int argc, char **argv)
 		goto exit_reg_apps;
 	}
 
-	/* Create thread to manage application socket */
-	ret = pthread_create(&apps_thread, default_pthread_attr(),
-			thread_manage_apps, (void *) NULL);
+	/* Create thread to manage the client socket */
+	ret = pthread_create(&client_thread, default_pthread_attr(),
+			thread_manage_clients, (void *) NULL);
 	if (ret) {
 		errno = ret;
-		PERROR("pthread_create apps");
+		PERROR("pthread_create clients");
 		retval = -1;
 		stop_threads();
-		goto exit_apps;
-	}
-
-	/* Create thread to manage application notify socket */
-	ret = pthread_create(&apps_notify_thread, default_pthread_attr(),
-			ust_thread_manage_notify, (void *) NULL);
-	if (ret) {
-		errno = ret;
-		PERROR("pthread_create notify");
-		retval = -1;
-		stop_threads();
-		goto exit_apps_notify;
+		goto exit_client;
 	}
 
 	/* Create agent registration thread. */
@@ -6278,12 +6278,12 @@ exit_load_session:
 		ret = pthread_join(kernel_thread, &status);
 		if (ret) {
 			errno = ret;
-			PERROR("pthread_join");
+			PERROR("pthread_join kernel");
 			retval = -1;
 		}
 	}
-exit_kernel:
 
+exit_kernel:
 	ret = pthread_join(agent_reg_thread, &status);
 	if (ret) {
 		errno = ret;
@@ -6291,51 +6291,45 @@ exit_kernel:
 		retval = -1;
 	}
 exit_agent_reg:
-
-	ret = pthread_join(apps_notify_thread, &status);
+	ret = pthread_join(client_thread, &status);
 	if (ret) {
 		errno = ret;
-		PERROR("pthread_join apps notify");
+		PERROR("pthread_join client");
 		retval = -1;
 	}
-exit_apps_notify:
 
+exit_client:
+	ret = pthread_join(reg_apps_thread, &status);
+	if (ret) {
+		errno = ret;
+		PERROR("pthread_join registration app" );
+		retval = -1;
+	}
+exit_reg_apps:
+	ret = pthread_join(dispatch_thread, &status);
+	if (ret) {
+		errno = ret;
+		PERROR("pthread_join dispatch");
+		retval = -1;
+	}
+
+exit_dispatch:
 	ret = pthread_join(apps_thread, &status);
 	if (ret) {
 		errno = ret;
 		PERROR("pthread_join apps");
 		retval = -1;
 	}
+
 exit_apps:
-
-	ret = pthread_join(reg_apps_thread, &status);
+	ret = pthread_join(apps_notify_thread, &status);
 	if (ret) {
 		errno = ret;
-		PERROR("pthread_join");
-		retval = -1;
-	}
-exit_reg_apps:
-
-	/*
-	 * Join dispatch thread after joining reg_apps_thread to ensure
-	 * we don't leak applications in the queue.
-	 */
-	ret = pthread_join(dispatch_thread, &status);
-	if (ret) {
-		errno = ret;
-		PERROR("pthread_join");
-		retval = -1;
-	}
-exit_dispatch:
-
-	ret = pthread_join(client_thread, &status);
-	if (ret) {
-		errno = ret;
-		PERROR("pthread_join");
+		PERROR("pthread_join apps notify");
 		retval = -1;
 	}
 
-exit_client:
+exit_apps_notify:
 exit_notification:
 exit_health:
 exit_init_data:
-- 
2.11.0

_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [RFC PATCH v2 03/13] Terminate dispatch thread after reg_apps_thread is terminated
       [not found] <20170918225206.17725-1-jonathan.rajotte-julien@efficios.com>
  2017-09-18 22:51 ` [RFC PATCH v2 01/13] Extend health thread lifetime Jonathan Rajotte
  2017-09-18 22:51 ` [RFC PATCH v2 02/13] Reorder pthread_join for easier ordering later on Jonathan Rajotte
@ 2017-09-18 22:51 ` Jonathan Rajotte
  2017-09-18 22:51 ` [RFC PATCH v2 04/13] Order termination of thread_manage_apps after dispatch_thread Jonathan Rajotte
                   ` (22 subsequent siblings)
  25 siblings, 0 replies; 27+ messages in thread
From: Jonathan Rajotte @ 2017-09-18 22:51 UTC (permalink / raw)
  To: lttng-dev; +Cc: jgalar

Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
---
 src/bin/lttng-sessiond/main.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c
index 45c0270e..0ca72ec8 100644
--- a/src/bin/lttng-sessiond/main.c
+++ b/src/bin/lttng-sessiond/main.c
@@ -497,9 +497,6 @@ static void stop_threads(void)
 		ERR("write error on thread quit pipe");
 	}
 
-	/* Dispatch thread */
-	CMM_STORE_SHARED(dispatch_thread_exit, 1);
-	futex_nto1_wake(&ust_cmd_queue.futex);
 }
 
 /*
@@ -6306,6 +6303,10 @@ exit_client:
 		retval = -1;
 	}
 exit_reg_apps:
+	/* Instruct the dispatch thread to quit */
+	CMM_STORE_SHARED(dispatch_thread_exit, 1);
+	futex_nto1_wake(&ust_cmd_queue.futex);
+
 	ret = pthread_join(dispatch_thread, &status);
 	if (ret) {
 		errno = ret;
-- 
2.11.0

_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [RFC PATCH v2 04/13] Order termination of thread_manage_apps after dispatch_thread
       [not found] <20170918225206.17725-1-jonathan.rajotte-julien@efficios.com>
                   ` (2 preceding siblings ...)
  2017-09-18 22:51 ` [RFC PATCH v2 03/13] Terminate dispatch thread after reg_apps_thread is terminated Jonathan Rajotte
@ 2017-09-18 22:51 ` Jonathan Rajotte
  2017-09-18 22:51 ` [RFC PATCH v2 05/13] Control thread_apps_notify lifetime with specialized quit pipe Jonathan Rajotte
                   ` (21 subsequent siblings)
  25 siblings, 0 replies; 27+ messages in thread
From: Jonathan Rajotte @ 2017-09-18 22:51 UTC (permalink / raw)
  To: lttng-dev; +Cc: jgalar

This ensure that no registration are ongoing when thread_manage_apps
perform it's cleanup and termination.

Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
---
 src/bin/lttng-sessiond/main.c | 46 +++++++++++++++++++++++++++++++++++++------
 1 file changed, 40 insertions(+), 6 deletions(-)

diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c
index 0ca72ec8..8d9b14b5 100644
--- a/src/bin/lttng-sessiond/main.c
+++ b/src/bin/lttng-sessiond/main.c
@@ -205,6 +205,7 @@ static int kernel_poll_pipe[2] = { -1, -1 };
  */
 static int thread_quit_pipe[2] = { -1, -1 };
 static int thread_health_teardown_trigger_pipe[2] = { -1, -1 };
+static int thread_apps_teardown_trigger_pipe[2] = { -1, -1 };
 
 /*
  * This pipe is used to inform the thread managing application communication
@@ -483,6 +484,11 @@ static int init_thread_health_teardown_trigger_pipe(void)
 	return __init_thread_quit_pipe(thread_health_teardown_trigger_pipe);
 }
 
+static int init_thread_apps_teardown_trigger_pipe(void)
+{
+	return __init_thread_quit_pipe(thread_apps_teardown_trigger_pipe);
+}
+
 /*
  * Stop all threads by closing the thread quit pipe.
  */
@@ -496,7 +502,6 @@ static void stop_threads(void)
 	if (ret < 0) {
 		ERR("write error on thread quit pipe");
 	}
-
 }
 
 /*
@@ -1634,11 +1639,17 @@ static void *thread_manage_apps(void *data)
 
 	health_code_update();
 
-	ret = sessiond_set_thread_pollset(&events, 2);
+	ret = lttng_poll_create(&events, 2, LTTNG_CLOEXEC);
 	if (ret < 0) {
 		goto error_poll_create;
 	}
 
+	/* Add quit pipe */
+	ret = lttng_poll_add(&events, thread_apps_teardown_trigger_pipe[0], LPOLLIN | LPOLLERR);
+	if (ret < 0) {
+		goto error;
+	}
+
 	ret = lttng_poll_add(&events, apps_cmd_pipe[0], LPOLLIN | LPOLLRDHUP);
 	if (ret < 0) {
 		goto error;
@@ -1685,10 +1696,18 @@ static void *thread_manage_apps(void *data)
 			}
 
 			/* Thread quit pipe has been closed. Killing thread. */
-			ret = sessiond_check_thread_quit_pipe(pollfd, revents);
-			if (ret) {
-				err = 0;
-				goto exit;
+			if (pollfd == thread_apps_teardown_trigger_pipe[0]) {
+				if (revents & LPOLLIN) {
+					/* Normal exit */
+					err = 0;
+					goto exit;
+				} else if (revents & LPOLLERR) {
+					ERR("Quit pipe error");
+					goto error;
+				} else {
+					ERR("Unknown poll events %u for quit pipe", revents);
+					goto error;
+				}
 			}
 
 			/* Inspect the apps cmd pipe */
@@ -5739,6 +5758,11 @@ int main(int argc, char **argv)
 		goto exit_init_data;
 	}
 
+	if (init_thread_apps_teardown_trigger_pipe()) {
+		retval = -1;
+		goto exit_init_data;
+	}
+
 	/* Check if daemon is UID = 0 */
 	is_root = !getuid();
 
@@ -6302,6 +6326,7 @@ exit_client:
 		PERROR("pthread_join registration app" );
 		retval = -1;
 	}
+
 exit_reg_apps:
 	/* Instruct the dispatch thread to quit */
 	CMM_STORE_SHARED(dispatch_thread_exit, 1);
@@ -6315,6 +6340,12 @@ exit_reg_apps:
 	}
 
 exit_dispatch:
+	/* Instruct the apps thread to quit */
+	ret = notify_thread_pipe(thread_apps_teardown_trigger_pipe[1]);
+	if (ret < 0) {
+		ERR("write error on thread quit pipe");
+	}
+
 	ret = pthread_join(apps_thread, &status);
 	if (ret) {
 		errno = ret;
@@ -6397,6 +6428,9 @@ exit_init_data:
 
 	/* Health thread teardown pipe cleanup */
 	utils_close_pipe(thread_health_teardown_trigger_pipe);
+	/* Apps thread teardown pipe cleanup */
+	utils_close_pipe(thread_apps_teardown_trigger_pipe);
+
 	lttng_pipe_destroy(ust32_channel_monitor_pipe);
 	lttng_pipe_destroy(ust64_channel_monitor_pipe);
 	lttng_pipe_destroy(kernel_channel_monitor_pipe);
-- 
2.11.0

_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [RFC PATCH v2 05/13] Control thread_apps_notify lifetime with specialized quit pipe
       [not found] <20170918225206.17725-1-jonathan.rajotte-julien@efficios.com>
                   ` (3 preceding siblings ...)
  2017-09-18 22:51 ` [RFC PATCH v2 04/13] Order termination of thread_manage_apps after dispatch_thread Jonathan Rajotte
@ 2017-09-18 22:51 ` Jonathan Rajotte
  2017-09-18 22:51 ` [RFC PATCH v2 06/13] Fix: unregister app notify socket on sessiond tear down Jonathan Rajotte
                   ` (20 subsequent siblings)
  25 siblings, 0 replies; 27+ messages in thread
From: Jonathan Rajotte @ 2017-09-18 22:51 UTC (permalink / raw)
  To: lttng-dev; +Cc: jgalar

Listen to application longer and quit only when most other thread are
terterminated. This simplifies data interaction with other threads.

Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
---
 src/bin/lttng-sessiond/lttng-sessiond.h |  2 ++
 src/bin/lttng-sessiond/main.c           | 20 +++++++++++++++++++-
 src/bin/lttng-sessiond/ust-thread.c     | 27 ++++++++++++++++++++-------
 3 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/src/bin/lttng-sessiond/lttng-sessiond.h b/src/bin/lttng-sessiond/lttng-sessiond.h
index 74552db6..8fbcac60 100644
--- a/src/bin/lttng-sessiond/lttng-sessiond.h
+++ b/src/bin/lttng-sessiond/lttng-sessiond.h
@@ -94,6 +94,8 @@ struct ust_reg_wait_node {
  */
 extern int apps_cmd_notify_pipe[2];
 
+extern int thread_apps_notify_teardown_trigger_pipe[2];
+
 /*
  * Used to notify that a hash table needs to be destroyed by dedicated
  * thread. Required by design because we don't want to move destroy
diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c
index 8d9b14b5..3596d7e3 100644
--- a/src/bin/lttng-sessiond/main.c
+++ b/src/bin/lttng-sessiond/main.c
@@ -206,6 +206,7 @@ static int kernel_poll_pipe[2] = { -1, -1 };
 static int thread_quit_pipe[2] = { -1, -1 };
 static int thread_health_teardown_trigger_pipe[2] = { -1, -1 };
 static int thread_apps_teardown_trigger_pipe[2] = { -1, -1 };
+int thread_apps_notify_teardown_trigger_pipe[2] = { -1, -1 };
 
 /*
  * This pipe is used to inform the thread managing application communication
@@ -489,6 +490,11 @@ static int init_thread_apps_teardown_trigger_pipe(void)
 	return __init_thread_quit_pipe(thread_apps_teardown_trigger_pipe);
 }
 
+static int init_thread_apps_notify_teardown_trigger_pipe(void)
+{
+	return __init_thread_quit_pipe(thread_apps_notify_teardown_trigger_pipe);
+}
+
 /*
  * Stop all threads by closing the thread quit pipe.
  */
@@ -5763,6 +5769,11 @@ int main(int argc, char **argv)
 		goto exit_init_data;
 	}
 
+	if (init_thread_apps_notify_teardown_trigger_pipe()) {
+		retval = -1;
+		goto exit_init_data;
+	}
+
 	/* Check if daemon is UID = 0 */
 	is_root = !getuid();
 
@@ -6354,6 +6365,12 @@ exit_dispatch:
 	}
 
 exit_apps:
+	/* Instruct the apps_notify thread to quit */
+	ret = notify_thread_pipe(thread_apps_notify_teardown_trigger_pipe[1]);
+	if (ret < 0) {
+		ERR("write error on thread quit pipe");
+	}
+
 	ret = pthread_join(apps_notify_thread, &status);
 	if (ret) {
 		errno = ret;
@@ -6430,7 +6447,8 @@ exit_init_data:
 	utils_close_pipe(thread_health_teardown_trigger_pipe);
 	/* Apps thread teardown pipe cleanup */
 	utils_close_pipe(thread_apps_teardown_trigger_pipe);
-
+	/* Apps notify thread teardown pipe cleanup */
+	utils_close_pipe(thread_apps_notify_teardown_trigger_pipe);
 	lttng_pipe_destroy(ust32_channel_monitor_pipe);
 	lttng_pipe_destroy(ust64_channel_monitor_pipe);
 	lttng_pipe_destroy(kernel_channel_monitor_pipe);
diff --git a/src/bin/lttng-sessiond/ust-thread.c b/src/bin/lttng-sessiond/ust-thread.c
index 7fb18a78..1e7a8229 100644
--- a/src/bin/lttng-sessiond/ust-thread.c
+++ b/src/bin/lttng-sessiond/ust-thread.c
@@ -51,9 +51,15 @@ void *ust_thread_manage_notify(void *data)
 
 	health_code_update();
 
-	ret = sessiond_set_thread_pollset(&events, 2);
+	ret = lttng_poll_create(&events, 2, LTTNG_CLOEXEC);
 	if (ret < 0) {
-		goto error_poll_create;
+		goto error;
+	}
+
+	/* Add quit pipe */
+	ret = lttng_poll_add(&events, thread_apps_notify_teardown_trigger_pipe[0], LPOLLIN | LPOLLERR);
+	if (ret < 0) {
+		goto error;
 	}
 
 	/* Add notify pipe to the pollset. */
@@ -99,11 +105,18 @@ restart:
 				continue;
 			}
 
-			/* Thread quit pipe has been closed. Killing thread. */
-			ret = sessiond_check_thread_quit_pipe(pollfd, revents);
-			if (ret) {
-				err = 0;
-				goto exit;
+			if (pollfd == thread_apps_notify_teardown_trigger_pipe[0]) {
+				if (revents & LPOLLIN) {
+					/* Normal exit */
+					err = 0;
+					goto exit;
+				} else if (revents & LPOLLERR) {
+					ERR("Apps notify quit error");
+					goto error;
+				} else {
+					ERR("Unexpected poll events %u for quit pipe", revents);
+					goto error;
+				}
 			}
 
 			/* Inspect the apps cmd pipe */
-- 
2.11.0

_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [RFC PATCH v2 06/13] Fix: unregister app notify socket on sessiond tear down
       [not found] <20170918225206.17725-1-jonathan.rajotte-julien@efficios.com>
                   ` (4 preceding siblings ...)
  2017-09-18 22:51 ` [RFC PATCH v2 05/13] Control thread_apps_notify lifetime with specialized quit pipe Jonathan Rajotte
@ 2017-09-18 22:51 ` Jonathan Rajotte
  2017-09-18 22:52 ` [RFC PATCH v2 07/13] Always reply to an inquiring app Jonathan Rajotte
                   ` (19 subsequent siblings)
  25 siblings, 0 replies; 27+ messages in thread
From: Jonathan Rajotte @ 2017-09-18 22:51 UTC (permalink / raw)
  To: lttng-dev; +Cc: jgalar

A race between the sessiond tear down and applications initialization
can lead to a deadlock.

Applications try to communicate via the notify sockets while sessiond
does not listen anymore on these sockets since the thread responsible
for reception/response is terminated (ust_thread_manage_notify). These
sockets are never closed hence an application could hang on
communication.

Sessiond hang happen during call to cmd_destroy_session during
sessiond_cleanup. Sessiond is trying to communicate with the app while
the app is waiting for a response on the app notification socket.

To prevent this situation a call to ust_app_notify_sock_unregister is
performed on all entry of the ust_app_ht_by_notify_sock hash table at
the time of termination. This ensure that any pending communication
initiated by the application will be terminated since all sockets will
be closed at the end of the grace period via call_rcu inside
ust_app_notify_sock_unregister. The use of ust_app_ht_by_notify_sock
instead of the ust_app_ht prevent a double call_rcu since entries are
removed from ust_app_ht_by_notify_sock during ust_app_notify_sock_unregister.

This can be reproduced using the sessiond_teardown_active_session
scenario provided by [1].

[1] https://github.com/PSRCode/lttng-stress

Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
---
 src/bin/lttng-sessiond/ust-thread.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/bin/lttng-sessiond/ust-thread.c b/src/bin/lttng-sessiond/ust-thread.c
index 1e7a8229..8f11133a 100644
--- a/src/bin/lttng-sessiond/ust-thread.c
+++ b/src/bin/lttng-sessiond/ust-thread.c
@@ -27,6 +27,19 @@
 #include "health-sessiond.h"
 #include "testpoint.h"
 
+
+static
+void notify_sock_unregister_all()
+{
+	struct lttng_ht_iter iter;
+	struct ust_app *app;
+	rcu_read_lock();
+	cds_lfht_for_each_entry(ust_app_ht_by_notify_sock->ht, &iter.iter, app, notify_sock_n.node) {
+		ust_app_notify_sock_unregister(app->notify_sock);
+	}
+	rcu_read_unlock();
+}
+
 /*
  * This thread manage application notify communication.
  */
@@ -53,7 +66,7 @@ void *ust_thread_manage_notify(void *data)
 
 	ret = lttng_poll_create(&events, 2, LTTNG_CLOEXEC);
 	if (ret < 0) {
-		goto error;
+		goto error_poll_create;
 	}
 
 	/* Add quit pipe */
@@ -197,6 +210,8 @@ error_poll_create:
 error_testpoint:
 	utils_close_pipe(apps_cmd_notify_pipe);
 	apps_cmd_notify_pipe[0] = apps_cmd_notify_pipe[1] = -1;
+	notify_sock_unregister_all();
+
 	DBG("Application notify communication apps thread cleanup complete");
 	if (err) {
 		health_error();
-- 
2.11.0

_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [RFC PATCH v2 07/13] Always reply to an inquiring app
       [not found] <20170918225206.17725-1-jonathan.rajotte-julien@efficios.com>
                   ` (5 preceding siblings ...)
  2017-09-18 22:51 ` [RFC PATCH v2 06/13] Fix: unregister app notify socket on sessiond tear down Jonathan Rajotte
@ 2017-09-18 22:52 ` Jonathan Rajotte
  2017-09-18 22:52 ` [RFC PATCH v2 08/13] Fix: perform lookup then test for condition Jonathan Rajotte
                   ` (18 subsequent siblings)
  25 siblings, 0 replies; 27+ messages in thread
From: Jonathan Rajotte @ 2017-09-18 22:52 UTC (permalink / raw)
  To: lttng-dev; +Cc: jgalar

Reply to the app on errors to prevent an app-side receive hang.

Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
---
 src/bin/lttng-sessiond/ust-app.c | 64 ++++++++++++++++++++++------------------
 1 file changed, 36 insertions(+), 28 deletions(-)

diff --git a/src/bin/lttng-sessiond/ust-app.c b/src/bin/lttng-sessiond/ust-app.c
index e79455b0..20ac469b 100644
--- a/src/bin/lttng-sessiond/ust-app.c
+++ b/src/bin/lttng-sessiond/ust-app.c
@@ -5394,13 +5394,13 @@ static int reply_ust_register_channel(int sock, int sobjd, int cobjd,
 		size_t nr_fields, struct ustctl_field *fields)
 {
 	int ret, ret_code = 0;
-	uint32_t chan_id, reg_count;
-	uint64_t chan_reg_key;
-	enum ustctl_channel_header type;
+	uint32_t chan_id = 0, reg_count = 0;
+	uint64_t chan_reg_key = 0;
+	enum ustctl_channel_header type = USTCTL_CHANNEL_HEADER_UNKNOWN;
 	struct ust_app *app;
 	struct ust_app_channel *ua_chan;
 	struct ust_app_session *ua_sess;
-	struct ust_registry_session *registry;
+	struct ust_registry_session *registry = NULL;
 	struct ust_registry_channel *chan_reg;
 
 	rcu_read_lock();
@@ -5410,16 +5410,16 @@ static int reply_ust_register_channel(int sock, int sobjd, int cobjd,
 	if (!app) {
 		DBG("Application socket %d is being torn down. Abort event notify",
 				sock);
-		ret = 0;
-		goto error_rcu_unlock;
+		ret_code = -1;
+		goto reply;
 	}
 
 	/* Lookup channel by UST object descriptor. */
 	ua_chan = find_channel_by_objd(app, cobjd);
 	if (!ua_chan) {
 		DBG("Application channel is being torn down. Abort event notify");
-		ret = 0;
-		goto error_rcu_unlock;
+		ret_code = -1;
+		goto reply;
 	}
 
 	assert(ua_chan->session);
@@ -5429,8 +5429,8 @@ static int reply_ust_register_channel(int sock, int sobjd, int cobjd,
 	registry = get_session_registry(ua_sess);
 	if (!registry) {
 		DBG("Application session is being torn down. Abort event notify");
-		ret = 0;
-		goto error_rcu_unlock;
+		ret_code = -1;
+		goto reply;
 	};
 
 	/* Depending on the buffer type, a different channel key is used. */
@@ -5469,10 +5469,13 @@ static int reply_ust_register_channel(int sock, int sobjd, int cobjd,
 		ret_code = ust_metadata_channel_statedump(registry, chan_reg);
 		if (ret_code) {
 			ERR("Error appending channel metadata (errno = %d)", ret_code);
-			goto reply;
+			goto unlock_reply;
 		}
 	}
 
+unlock_reply:
+	pthread_mutex_unlock(&registry->lock);
+
 reply:
 	DBG3("UST app replying to register channel key %" PRIu64
 			" with id %u, type: %d, ret: %d", chan_reg_key, chan_id, type,
@@ -5488,12 +5491,13 @@ reply:
 		goto error;
 	}
 
-	/* This channel registry registration is completed. */
+	if (ret_code < 0) {
+		goto error;
+	}
+
 	chan_reg->register_done = 1;
 
 error:
-	pthread_mutex_unlock(&registry->lock);
-error_rcu_unlock:
 	rcu_read_unlock();
 	free(fields);
 	return ret;
@@ -5527,16 +5531,16 @@ static int add_event_ust_registry(int sock, int sobjd, int cobjd, char *name,
 	if (!app) {
 		DBG("Application socket %d is being torn down. Abort event notify",
 				sock);
-		ret = 0;
-		goto error_rcu_unlock;
+		ret_code = -1;
+		goto reply;
 	}
 
 	/* Lookup channel by UST object descriptor. */
 	ua_chan = find_channel_by_objd(app, cobjd);
 	if (!ua_chan) {
 		DBG("Application channel is being torn down. Abort event notify");
-		ret = 0;
-		goto error_rcu_unlock;
+		ret_code = -1;
+		goto reply;
 	}
 
 	assert(ua_chan->session);
@@ -5545,8 +5549,8 @@ static int add_event_ust_registry(int sock, int sobjd, int cobjd, char *name,
 	registry = get_session_registry(ua_sess);
 	if (!registry) {
 		DBG("Application session is being torn down. Abort event notify");
-		ret = 0;
-		goto error_rcu_unlock;
+		ret_code = -1;
+		goto reply;
 	}
 
 	if (ua_sess->buffer_type == LTTNG_BUFFER_PER_UID) {
@@ -5570,6 +5574,9 @@ static int add_event_ust_registry(int sock, int sobjd, int cobjd, char *name,
 	fields = NULL;
 	model_emf_uri = NULL;
 
+	pthread_mutex_unlock(&registry->lock);
+
+reply:
 	/*
 	 * The return value is returned to ustctl so in case of an error, the
 	 * application can be notified. In case of an error, it's important not to
@@ -5593,8 +5600,6 @@ static int add_event_ust_registry(int sock, int sobjd, int cobjd, char *name,
 			name, event_id);
 
 error:
-	pthread_mutex_unlock(&registry->lock);
-error_rcu_unlock:
 	rcu_read_unlock();
 	free(sig);
 	free(fields);
@@ -5627,8 +5632,9 @@ static int add_enum_ust_registry(int sock, int sobjd, char *name,
 		/* Return an error since this is not an error */
 		DBG("Application socket %d is being torn down. Aborting enum registration",
 				sock);
+		ret_code = -1;
 		free(entries);
-		goto error_rcu_unlock;
+		goto reply;
 	}
 
 	/* Lookup session by UST object descriptor. */
@@ -5637,14 +5643,16 @@ static int add_enum_ust_registry(int sock, int sobjd, char *name,
 		/* Return an error since this is not an error */
 		DBG("Application session is being torn down (session not found). Aborting enum registration.");
 		free(entries);
-		goto error_rcu_unlock;
+		ret_code = -1;
+		goto reply;
 	}
 
 	registry = get_session_registry(ua_sess);
 	if (!registry) {
 		DBG("Application session is being torn down (registry not found). Aborting enum registration.");
 		free(entries);
-		goto error_rcu_unlock;
+		ret_code = -1;
+		goto reply;
 	}
 
 	pthread_mutex_lock(&registry->lock);
@@ -5658,6 +5666,9 @@ static int add_enum_ust_registry(int sock, int sobjd, char *name,
 			entries, nr_entries, &enum_id);
 	entries = NULL;
 
+	pthread_mutex_unlock(&registry->lock);
+
+reply:
 	/*
 	 * The return value is returned to ustctl so in case of an error, the
 	 * application can be notified. In case of an error, it's important not to
@@ -5678,10 +5689,7 @@ static int add_enum_ust_registry(int sock, int sobjd, char *name,
 	}
 
 	DBG3("UST registry enum %s added successfully or already found", name);
-
 error:
-	pthread_mutex_unlock(&registry->lock);
-error_rcu_unlock:
 	rcu_read_unlock();
 	return ret;
 }
-- 
2.11.0

_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [RFC PATCH v2 08/13] Fix: perform lookup then test for condition
       [not found] <20170918225206.17725-1-jonathan.rajotte-julien@efficios.com>
                   ` (6 preceding siblings ...)
  2017-09-18 22:52 ` [RFC PATCH v2 07/13] Always reply to an inquiring app Jonathan Rajotte
@ 2017-09-18 22:52 ` Jonathan Rajotte
  2017-09-18 22:52 ` [RFC PATCH v2 09/13] Perform ust_app_unregister on thread_manage_apps teardown Jonathan Rajotte
                   ` (17 subsequent siblings)
  25 siblings, 0 replies; 27+ messages in thread
From: Jonathan Rajotte @ 2017-09-18 22:52 UTC (permalink / raw)
  To: lttng-dev; +Cc: jgalar

Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
---
 src/bin/lttng-sessiond/main.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c
index 3596d7e3..8cffa6f6 100644
--- a/src/bin/lttng-sessiond/main.c
+++ b/src/bin/lttng-sessiond/main.c
@@ -1899,8 +1899,11 @@ static void sanitize_wait_queue(struct ust_reg_wait_queue *wait_queue)
 
 		cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
 				&wait_queue->head, head) {
-			if (pollfd == wait_node->app->sock &&
-					(revents & (LPOLLHUP | LPOLLERR))) {
+			if (pollfd != wait_node->app->sock) {
+				continue;
+			}
+
+			if (revents & (LPOLLHUP | LPOLLERR)) {
 				cds_list_del(&wait_node->head);
 				wait_queue->count--;
 				ust_app_destroy(wait_node->app);
-- 
2.11.0

_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [RFC PATCH v2 09/13] Perform ust_app_unregister on thread_manage_apps teardown
       [not found] <20170918225206.17725-1-jonathan.rajotte-julien@efficios.com>
                   ` (7 preceding siblings ...)
  2017-09-18 22:52 ` [RFC PATCH v2 08/13] Fix: perform lookup then test for condition Jonathan Rajotte
@ 2017-09-18 22:52 ` Jonathan Rajotte
  2017-09-18 22:52 ` [RFC PATCH v2 10/13] Teardown apps_notify_thread before thread_manage_apps Jonathan Rajotte
                   ` (16 subsequent siblings)
  25 siblings, 0 replies; 27+ messages in thread
From: Jonathan Rajotte @ 2017-09-18 22:52 UTC (permalink / raw)
  To: lttng-dev; +Cc: jgalar

Previous work on thread termination ordering permits the dismissal of
the following comment:

   We don't clean the UST app hash table here since already registered
   applications can still be controlled so let them be until the session
   daemon dies or the applications stop.

At the moment of termination control thread are already terminated.

Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
---
 src/bin/lttng-sessiond/main.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c
index 8cffa6f6..4a2a661f 100644
--- a/src/bin/lttng-sessiond/main.c
+++ b/src/bin/lttng-sessiond/main.c
@@ -1780,11 +1780,15 @@ error_testpoint:
 	utils_close_pipe(apps_cmd_pipe);
 	apps_cmd_pipe[0] = apps_cmd_pipe[1] = -1;
 
-	/*
-	 * We don't clean the UST app hash table here since already registered
-	 * applications can still be controlled so let them be until the session
-	 * daemon dies or the applications stop.
-	 */
+	{
+		struct lttng_ht_iter iter;
+		struct ust_app *app;
+		rcu_read_lock();
+		cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) {
+			ust_app_unregister(app->sock);
+		}
+		rcu_read_unlock();
+	}
 
 	if (err) {
 		health_error();
-- 
2.11.0

_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [RFC PATCH v2 10/13] Teardown apps_notify_thread before thread_manage_apps
       [not found] <20170918225206.17725-1-jonathan.rajotte-julien@efficios.com>
                   ` (8 preceding siblings ...)
  2017-09-18 22:52 ` [RFC PATCH v2 09/13] Perform ust_app_unregister on thread_manage_apps teardown Jonathan Rajotte
@ 2017-09-18 22:52 ` Jonathan Rajotte
  2017-09-18 22:52 ` [RFC PATCH v2 11/13] Comments update Jonathan Rajotte
                   ` (15 subsequent siblings)
  25 siblings, 0 replies; 27+ messages in thread
From: Jonathan Rajotte @ 2017-09-18 22:52 UTC (permalink / raw)
  To: lttng-dev; +Cc: jgalar

RFC:
This is necessary since we use the ust_app_ht_by_notify_sock to
perform the cleanup operation. Since both ust_app_unregister and
ust_app_ust_app_notify_unregister perform a remove of the app on the
ust_ust_app_by_notify_sock but only ust_app_notify_unregister actually
end up performing a close(call_rcu) on the socket.

Other way to do fix this problem?

Could we simply not remove it on a ust_app_unregister? And always defer
to the apps_notify_thread for cleanup?

Update the value in the hash table to -1 and emit a close and remove
from the hash table if the value is -1?

We could also keep a local list of fd in apps_notify_thread and use it for
cleanup instead of relying on ust_ust_app_by_notify_sock.

I'm not sure what is the best/elegant solution here. I am not a fan of
the current solution but it working.

Obviously this commit will be reworded and modified accordingly.

Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
---
 src/bin/lttng-sessiond/main.c | 55 ++++++++++++++++++++++++-------------------
 1 file changed, 31 insertions(+), 24 deletions(-)

diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c
index 4a2a661f..216d0da6 100644
--- a/src/bin/lttng-sessiond/main.c
+++ b/src/bin/lttng-sessiond/main.c
@@ -6209,16 +6209,6 @@ int main(int argc, char **argv)
 	}
 	notification_thread_running = true;
 
-	/* Create thread to manage application notify socket */
-	ret = pthread_create(&apps_notify_thread, default_pthread_attr(),
-			ust_thread_manage_notify, (void *) NULL);
-	if (ret) {
-		errno = ret;
-		PERROR("pthread_create notify");
-		retval = -1;
-		stop_threads();
-		goto exit_apps_notify;
-	}
 
 	/* Create thread to manage application socket */
 	ret = pthread_create(&apps_thread, default_pthread_attr(),
@@ -6231,6 +6221,17 @@ int main(int argc, char **argv)
 		goto exit_apps;
 	}
 
+	/* Create thread to manage application notify socket */
+	ret = pthread_create(&apps_notify_thread, default_pthread_attr(),
+			ust_thread_manage_notify, (void *) NULL);
+	if (ret) {
+		errno = ret;
+		PERROR("pthread_create notify");
+		retval = -1;
+		stop_threads();
+		goto exit_apps_notify;
+	}
+
 	/* Create thread to dispatch registration */
 	ret = pthread_create(&dispatch_thread, default_pthread_attr(),
 			thread_dispatch_ust_registration, (void *) NULL);
@@ -6358,20 +6359,6 @@ exit_reg_apps:
 	}
 
 exit_dispatch:
-	/* Instruct the apps thread to quit */
-	ret = notify_thread_pipe(thread_apps_teardown_trigger_pipe[1]);
-	if (ret < 0) {
-		ERR("write error on thread quit pipe");
-	}
-
-	ret = pthread_join(apps_thread, &status);
-	if (ret) {
-		errno = ret;
-		PERROR("pthread_join apps");
-		retval = -1;
-	}
-
-exit_apps:
 	/* Instruct the apps_notify thread to quit */
 	ret = notify_thread_pipe(thread_apps_notify_teardown_trigger_pipe[1]);
 	if (ret < 0) {
@@ -6386,6 +6373,26 @@ exit_apps:
 	}
 
 exit_apps_notify:
+	/*
+	 * The barrier ensure that all previous resources, notify sockets in
+	 * particular, are freed/closed.
+	 */
+	rcu_barrier();
+
+	/* Instruct the apps thread to quit */
+	ret = notify_thread_pipe(thread_apps_teardown_trigger_pipe[1]);
+	if (ret < 0) {
+		ERR("write error on thread quit pipe");
+	}
+
+	ret = pthread_join(apps_thread, &status);
+	if (ret) {
+		errno = ret;
+		PERROR("pthread_join apps");
+		retval = -1;
+	}
+
+exit_apps:
 exit_notification:
 exit_health:
 exit_init_data:
-- 
2.11.0

_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [RFC PATCH v2 11/13] Comments update
       [not found] <20170918225206.17725-1-jonathan.rajotte-julien@efficios.com>
                   ` (9 preceding siblings ...)
  2017-09-18 22:52 ` [RFC PATCH v2 10/13] Teardown apps_notify_thread before thread_manage_apps Jonathan Rajotte
@ 2017-09-18 22:52 ` Jonathan Rajotte
  2017-09-18 22:52 ` [RFC PATCH v2 12/13] Fix: delay termination on consumerd to allow metadata flushing Jonathan Rajotte
                   ` (14 subsequent siblings)
  25 siblings, 0 replies; 27+ messages in thread
From: Jonathan Rajotte @ 2017-09-18 22:52 UTC (permalink / raw)
  To: lttng-dev; +Cc: jgalar

Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
---
 src/bin/lttng-sessiond/main.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c
index 216d0da6..a840e8de 100644
--- a/src/bin/lttng-sessiond/main.c
+++ b/src/bin/lttng-sessiond/main.c
@@ -496,14 +496,18 @@ static int init_thread_apps_notify_teardown_trigger_pipe(void)
 }
 
 /*
- * Stop all threads by closing the thread quit pipe.
+ * Stop first wave threads by closing the thread quit pipe.
+ *  - kernel thread
+ *  - client thread
+ *  - agent thread
+ *  - reg apps thread
  */
 static void stop_threads(void)
 {
 	int ret;
 
-	/* Stopping all threads */
-	DBG("Terminating all threads");
+	/* Stopping first wave threads */
+	DBG("Terminating first wave threads");
 	ret = notify_thread_pipe(thread_quit_pipe[1]);
 	if (ret < 0) {
 		ERR("write error on thread quit pipe");
-- 
2.11.0

_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [RFC PATCH v2 12/13] Fix: delay termination on consumerd to allow metadata flushing
       [not found] <20170918225206.17725-1-jonathan.rajotte-julien@efficios.com>
                   ` (10 preceding siblings ...)
  2017-09-18 22:52 ` [RFC PATCH v2 11/13] Comments update Jonathan Rajotte
@ 2017-09-18 22:52 ` Jonathan Rajotte
  2017-09-18 22:52 ` [RFC PATCH v2 13/13] Fix: quit early if instructed to Jonathan Rajotte
                   ` (13 subsequent siblings)
  25 siblings, 0 replies; 27+ messages in thread
From: Jonathan Rajotte @ 2017-09-18 22:52 UTC (permalink / raw)
  To: lttng-dev; +Cc: jgalar

Move consumerd ownership to thread_manage_consumer to scope the lifetime
on the consumerd to its manager thread.

"thread_manage_consumer" is responsible for signaling and waiting the
termination of its consumerd.

All thread_manage_consumer threads now wait on a unique quit pipe
different from the global thread quit pipe. This allow control over its
lifetime.

The termination notification is sent during sessiond_cleanup after the
destroy session command to ensure that no session are still active at
the moment the consumerds are terminated.

Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
---
 src/bin/lttng-sessiond/main.c | 174 +++++++++++++++++++++++++++---------------
 1 file changed, 112 insertions(+), 62 deletions(-)

diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c
index a840e8de..fb58ab4b 100644
--- a/src/bin/lttng-sessiond/main.c
+++ b/src/bin/lttng-sessiond/main.c
@@ -206,6 +206,7 @@ static int kernel_poll_pipe[2] = { -1, -1 };
 static int thread_quit_pipe[2] = { -1, -1 };
 static int thread_health_teardown_trigger_pipe[2] = { -1, -1 };
 static int thread_apps_teardown_trigger_pipe[2] = { -1, -1 };
+static int thread_consumers_teardown_trigger_pipe[2] = { -1, -1 };
 int thread_apps_notify_teardown_trigger_pipe[2] = { -1, -1 };
 
 /*
@@ -495,6 +496,11 @@ static int init_thread_apps_notify_teardown_trigger_pipe(void)
 	return __init_thread_quit_pipe(thread_apps_notify_teardown_trigger_pipe);
 }
 
+static int init_thread_consumers_teardown_trigger_pipe(void)
+{
+	return __init_thread_quit_pipe(thread_consumers_teardown_trigger_pipe);
+}
+
 /*
  * Stop first wave threads by closing the thread quit pipe.
  *  - kernel thread
@@ -601,14 +607,15 @@ static int generate_lock_file_path(char *path, size_t len)
 /*
  * Wait on consumer process termination.
  *
- * Need to be called with the consumer data lock held or from a context
- * ensuring no concurrent access to data (e.g: cleanup).
+ * Need to be called with the consumer data lock held.
  */
 static void wait_consumer(struct consumer_data *consumer_data)
 {
 	pid_t ret;
 	int status;
 
+	assert(consumer_data);
+
 	if (consumer_data->pid <= 0) {
 		return;
 	}
@@ -626,6 +633,52 @@ static void wait_consumer(struct consumer_data *consumer_data)
 }
 
 /*
+ * Signal to the consumer process to terminate.
+ *
+ * Need to be called with the consumer data lock held.
+ */
+static void kill_consumer(struct consumer_data *consumer_data)
+{
+	int ret;
+
+	assert(consumer_data);
+
+	/* Consumer pid must be a real one. */
+	if (consumer_data->pid <= 0) {
+		goto end;
+	}
+
+	ret = kill(consumer_data->pid, SIGTERM);
+	if (ret) {
+		PERROR("Error killing consumer daemon");
+		goto end;
+	}
+end:
+	return;
+}
+
+static int join_thread_consumer(struct consumer_data *consumer_data)
+{
+	int ret;
+	void *status;
+
+	assert(consumer_data);
+
+	/* Consumer pid must be a real one. */
+	if (consumer_data->pid <= 0) {
+		ret = 0;
+		goto end;
+	}
+
+	ret = pthread_join(consumer_data->thread, &status);
+	if (ret) {
+		ERR("Joining consumer thread pid %d", consumer_data->pid);
+	}
+end:
+	return ret;
+}
+
+/*
  * Cleanup the session daemon's data structures.
  */
 static void sessiond_cleanup(void)
@@ -707,7 +760,6 @@ static void sessiond_cleanup(void)
 	(void) rmdir(path);
 
 	DBG("Cleaning up all sessions");
-
 	/* Destroy session list mutex */
 	if (session_list_ptr != NULL) {
 		pthread_mutex_destroy(&session_list_ptr->lock);
@@ -719,9 +771,35 @@ static void sessiond_cleanup(void)
 		}
 	}
 
-	wait_consumer(&kconsumer_data);
-	wait_consumer(&ustconsumer64_data);
-	wait_consumer(&ustconsumer32_data);
+	/*
+	 * Delay the termination of manage_consumer_thread threads to allow
+	 * proper metadata flushing, following the session destroy. Use a
+	 * barrier to ensure that all call_rcu are executed at this point.
+	 */
+	DBG("Teardown consurmer thread");
+	rcu_barrier();
+	ret = notify_thread_pipe(thread_consumers_teardown_trigger_pipe[1]);
+	if (ret < 0) {
+		ERR("write error on thread consumer quit pipe");
+	}
+
+	ret = join_thread_consumer(&kconsumer_data);
+	if (ret) {
+		errno = ret;
+		PERROR("join_consumer kernel");
+	}
+
+	ret = join_thread_consumer(&ustconsumer32_data);
+	if (ret) {
+		errno = ret;
+		PERROR("join_consumer ust32");
+	}
+
+	ret = join_thread_consumer(&ustconsumer64_data);
+	if (ret) {
+		errno = ret;
+		PERROR("join_consumer ust64");
+	}
 
 	DBG("Cleaning up all agent apps");
 	agent_app_ht_clean();
@@ -1289,14 +1367,20 @@ static void *thread_manage_consumer(void *data)
 	health_code_update();
 
 	/*
-	 * Pass 3 as size here for the thread quit pipe, consumerd_err_sock and the
+	 * Pass 3 as size here for the thread consumer quit pipe, consumerd_err_sock and the
 	 * metadata_sock. Nothing more will be added to this poll set.
 	 */
-	ret = sessiond_set_thread_pollset(&events, 3);
+	ret = lttng_poll_create(&events, 3, LTTNG_CLOEXEC);
 	if (ret < 0) {
 		goto error_poll;
 	}
 
+	/* Add quit pipe */
+	ret = lttng_poll_add(&events, thread_consumers_teardown_trigger_pipe[0], LPOLLIN | LPOLLERR);
+	if (ret < 0) {
+		goto error;
+	}
+
 	/*
 	 * The error socket here is already in a listening state which was done
 	 * just before spawning this thread to avoid a race between the consumer
@@ -1344,7 +1428,7 @@ restart:
 		}
 
 		/* Thread quit pipe has been closed. Killing thread. */
-		ret = sessiond_check_thread_quit_pipe(pollfd, revents);
+		ret = (pollfd == thread_consumers_teardown_trigger_pipe[0] && (revents & LPOLLIN)) ? 1 : 0;
 		if (ret) {
 			err = 0;
 			goto exit;
@@ -1509,7 +1593,7 @@ restart_poll:
 			 * but continue the current loop to handle potential data from
 			 * consumer.
 			 */
-			should_quit = sessiond_check_thread_quit_pipe(pollfd, revents);
+			should_quit = (pollfd == thread_consumers_teardown_trigger_pipe[0] && (revents & LPOLLIN)) ? 1 : 0;
 
 			if (pollfd == sock) {
 				/* Event on the consumerd socket */
@@ -1552,11 +1636,6 @@ restart_poll:
 
 exit:
 error:
-	/*
-	 * We lock here because we are about to close the sockets and some other
-	 * thread might be using them so get exclusive access which will abort all
-	 * other consumer command by other threads.
-	 */
 	pthread_mutex_lock(&consumer_data->lock);
 
 	/* Immediately set the consumerd state to stopped */
@@ -1570,6 +1649,13 @@ error:
 		assert(0);
 	}
 
+	/*
+	 * This thread is responsible for its consumerd. Make sure the
+	 * consumerd teardown is complete before proceding.
+	 */
+	kill_consumer(consumer_data);
+	wait_consumer(consumer_data);
+
 	if (consumer_data->err_sock >= 0) {
 		ret = close(consumer_data->err_sock);
 		if (ret) {
@@ -1600,13 +1686,15 @@ error:
 
 	unlink(consumer_data->err_unix_sock_path);
 	unlink(consumer_data->cmd_unix_sock_path);
-	pthread_mutex_unlock(&consumer_data->lock);
 
 	/* Cleanup metadata socket mutex. */
 	if (consumer_data->metadata_sock.lock) {
 		pthread_mutex_destroy(consumer_data->metadata_sock.lock);
 		free(consumer_data->metadata_sock.lock);
 	}
+
+	pthread_mutex_unlock(&consumer_data->lock);
+
 	lttng_poll_clean(&events);
 
 	if (cmd_socket_wrapper) {
@@ -2560,27 +2648,6 @@ error:
 }
 
 /*
- * Join consumer thread
- */
-static int join_consumer_thread(struct consumer_data *consumer_data)
-{
-	void *status;
-
-	/* Consumer pid must be a real one. */
-	if (consumer_data->pid > 0) {
-		int ret;
-		ret = kill(consumer_data->pid, SIGTERM);
-		if (ret) {
-			PERROR("Error killing consumer daemon");
-			return ret;
-		}
-		return pthread_join(consumer_data->thread, &status);
-	} else {
-		return 0;
-	}
-}
-
-/*
  * Fork and exec a consumer daemon (consumerd).
  *
  * Return pid if successful else -1.
@@ -4741,27 +4808,6 @@ error_create_poll:
 
 	rcu_unregister_thread();
 
-	/*
-	 * Since we are creating the consumer threads, we own them, so we need
-	 * to join them before our thread exits.
-	 */
-	ret = join_consumer_thread(&kconsumer_data);
-	if (ret) {
-		errno = ret;
-		PERROR("join_consumer");
-	}
-
-	ret = join_consumer_thread(&ustconsumer32_data);
-	if (ret) {
-		errno = ret;
-		PERROR("join_consumer ust32");
-	}
-
-	ret = join_consumer_thread(&ustconsumer64_data);
-	if (ret) {
-		errno = ret;
-		PERROR("join_consumer ust64");
-	}
 	return NULL;
 }
 
@@ -5785,6 +5831,11 @@ int main(int argc, char **argv)
 		goto exit_init_data;
 	}
 
+	if (init_thread_consumers_teardown_trigger_pipe()) {
+		retval = -1;
+		goto exit_init_data;
+	}
+
 	/* Check if daemon is UID = 0 */
 	is_root = !getuid();
 
@@ -6406,11 +6457,8 @@ exit_init_data:
 	 * perform lookups in those structures.
 	 */
 	rcu_barrier();
-	/*
-	 * sessiond_cleanup() is called when no other thread is running, except
-	 * the ht_cleanup thread, which is needed to destroy the hash tables.
-	 */
 	rcu_thread_online();
+
 	sessiond_cleanup();
 
 	/*
@@ -6461,6 +6509,8 @@ exit_init_data:
 		retval = -1;
 	}
 
+	/* Consumers thread teardown pipe cleanup */
+	utils_close_pipe(thread_consumers_teardown_trigger_pipe);
 	/* Health thread teardown pipe cleanup */
 	utils_close_pipe(thread_health_teardown_trigger_pipe);
 	/* Apps thread teardown pipe cleanup */
-- 
2.11.0

_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [RFC PATCH v2 13/13] Fix: quit early if instructed to
       [not found] <20170918225206.17725-1-jonathan.rajotte-julien@efficios.com>
                   ` (11 preceding siblings ...)
  2017-09-18 22:52 ` [RFC PATCH v2 12/13] Fix: delay termination on consumerd to allow metadata flushing Jonathan Rajotte
@ 2017-09-18 22:52 ` Jonathan Rajotte
       [not found] ` <20170918225206.17725-2-jonathan.rajotte-julien@efficios.com>
                   ` (12 subsequent siblings)
  25 siblings, 0 replies; 27+ messages in thread
From: Jonathan Rajotte @ 2017-09-18 22:52 UTC (permalink / raw)
  To: lttng-dev; +Cc: jgalar

If the wait_queue size is considerable, not checking if the thread should
quit delays the termination (3+ seconds during stress testing).

Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
---
 src/bin/lttng-sessiond/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c
index fb58ab4b..0475c5a3 100644
--- a/src/bin/lttng-sessiond/main.c
+++ b/src/bin/lttng-sessiond/main.c
@@ -2244,7 +2244,7 @@ static void *thread_dispatch_ust_registration(void *data)
 				rcu_read_unlock();
 				session_unlock_list();
 			}
-		} while (node != NULL);
+		} while (node != NULL && !CMM_LOAD_SHARED(dispatch_thread_exit));
 
 		health_poll_entry();
 		/* Futex wait on queue. Blocking call on futex() */
-- 
2.11.0

_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH v2 10/13] Teardown apps_notify_thread before thread_manage_apps
       [not found] ` <20170918225206.17725-11-jonathan.rajotte-julien@efficios.com>
@ 2017-12-13 17:58   ` Jérémie Galarneau
  2017-12-14  1:57   ` Jérémie Galarneau
  1 sibling, 0 replies; 27+ messages in thread
From: Jérémie Galarneau @ 2017-12-13 17:58 UTC (permalink / raw)
  To: Jonathan Rajotte; +Cc: lttng-dev, Jeremie Galarneau

On 18 September 2017 at 18:52, Jonathan Rajotte
<jonathan.rajotte-julien@efficios.com> wrote:
> RFC:
> This is necessary since we use the ust_app_ht_by_notify_sock to
> perform the cleanup operation. Since both ust_app_unregister and
> ust_app_ust_app_notify_unregister perform a remove of the app on the
> ust_ust_app_by_notify_sock but only ust_app_notify_unregister actually

There are some extra ust_ and ust_app_ on the last two lines ;)

> end up performing a close(call_rcu) on the socket.

a close(call_rcu) ?

>

It took me a while to wrap my head around what the problem is given this
description. Please, correct me if I'm wrong.

Two threads are performing a clean-up step based on the content of the
ust_app_ht_by_notify_sock hash table.

The apps_notify_thread, on shutdown:
  - invokes notify_sock_unregister_all()
    - iterates through the ust_app_ht_by_notify_sock hash table
      - calls ust_app_notify_sock_unregister() on every application
        notification socket
        - removes the app from the ust_app_ht_by_notify_sock hash table
        - closes the notify socket through a deferred call_rcu()


The apps_thread, on shutdown:
  - iterates through the ust_app_ht hash table
    - calls ust_app_unregister() on every application notification
      socket
      - This function's header comment mentions "The socket is already
        closed at this point so no close to sock.", by which the author
        most likely meant that "there is no need to close the socket".

        This is no longer true with your patch (#09). This thread
        only reacts to errors on the application sockets and then
 tears
        down

      - Flushes that app's metadata and buffers
      - Removes the app from the ust_app_ht_by_sock hash table
      - Removes the app from the ust_app_ht_by_notify_sock hash table
      - Removes the app from the ust_app_ht hash table


Given this, it already seems weird that both threads try to remove
the entry from the ust_app_ht_by_notify_sock hash table.

In the case where we are not shutting down,


> Other way to do fix this problem?
>
> Could we simply not remove it on a ust_app_unregister? And always defer
> to the apps_notify_thread for cleanup?
>
> Update the value in the hash table to -1 and emit a close and remove
> from the hash table if the value is -1?
>
> We could also keep a local list of fd in apps_notify_thread and use it for
> cleanup instead of relying on ust_ust_app_by_notify_sock.
>
> I'm not sure what is the best/elegant solution here. I am not a fan of
> the current solution but it working.
>
> Obviously this commit will be reworded and modified accordingly.
>
> Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
> ---
>  src/bin/lttng-sessiond/main.c | 55 ++++++++++++++++++++++++-------------------
>  1 file changed, 31 insertions(+), 24 deletions(-)
>
> diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c
> index 4a2a661f..216d0da6 100644
> --- a/src/bin/lttng-sessiond/main.c
> +++ b/src/bin/lttng-sessiond/main.c
> @@ -6209,16 +6209,6 @@ int main(int argc, char **argv)
>         }
>         notification_thread_running = true;
>
> -       /* Create thread to manage application notify socket */
> -       ret = pthread_create(&apps_notify_thread, default_pthread_attr(),
> -                       ust_thread_manage_notify, (void *) NULL);
> -       if (ret) {
> -               errno = ret;
> -               PERROR("pthread_create notify");
> -               retval = -1;
> -               stop_threads();
> -               goto exit_apps_notify;
> -       }
>
>         /* Create thread to manage application socket */
>         ret = pthread_create(&apps_thread, default_pthread_attr(),
> @@ -6231,6 +6221,17 @@ int main(int argc, char **argv)
>                 goto exit_apps;
>         }
>
> +       /* Create thread to manage application notify socket */
> +       ret = pthread_create(&apps_notify_thread, default_pthread_attr(),
> +                       ust_thread_manage_notify, (void *) NULL);
> +       if (ret) {
> +               errno = ret;
> +               PERROR("pthread_create notify");
> +               retval = -1;
> +               stop_threads();
> +               goto exit_apps_notify;
> +       }
> +
>         /* Create thread to dispatch registration */
>         ret = pthread_create(&dispatch_thread, default_pthread_attr(),
>                         thread_dispatch_ust_registration, (void *) NULL);
> @@ -6358,20 +6359,6 @@ exit_reg_apps:
>         }
>
>  exit_dispatch:
> -       /* Instruct the apps thread to quit */
> -       ret = notify_thread_pipe(thread_apps_teardown_trigger_pipe[1]);
> -       if (ret < 0) {
> -               ERR("write error on thread quit pipe");
> -       }
> -
> -       ret = pthread_join(apps_thread, &status);
> -       if (ret) {
> -               errno = ret;
> -               PERROR("pthread_join apps");
> -               retval = -1;
> -       }
> -
> -exit_apps:
>         /* Instruct the apps_notify thread to quit */
>         ret = notify_thread_pipe(thread_apps_notify_teardown_trigger_pipe[1]);
>         if (ret < 0) {
> @@ -6386,6 +6373,26 @@ exit_apps:
>         }
>
>  exit_apps_notify:
> +       /*
> +        * The barrier ensure that all previous resources, notify sockets in
> +        * particular, are freed/closed.
> +        */
> +       rcu_barrier();
> +
> +       /* Instruct the apps thread to quit */
> +       ret = notify_thread_pipe(thread_apps_teardown_trigger_pipe[1]);
> +       if (ret < 0) {
> +               ERR("write error on thread quit pipe");
> +       }
> +
> +       ret = pthread_join(apps_thread, &status);
> +       if (ret) {
> +               errno = ret;
> +               PERROR("pthread_join apps");
> +               retval = -1;
> +       }
> +
> +exit_apps:
>  exit_notification:
>  exit_health:
>  exit_init_data:
> --
> 2.11.0
>



-- 
Jérémie Galarneau
EfficiOS Inc.
http://www.efficios.com
_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH v2 01/13] Extend health thread lifetime
       [not found] ` <20170918225206.17725-2-jonathan.rajotte-julien@efficios.com>
@ 2017-12-14  1:54   ` Jérémie Galarneau
  0 siblings, 0 replies; 27+ messages in thread
From: Jérémie Galarneau @ 2017-12-14  1:54 UTC (permalink / raw)
  To: Jonathan Rajotte; +Cc: lttng-dev, Jeremie Galarneau

Hi,

I rebased this patch set on master.

https://github.com/jgalar/lttng-tools/tree/teardown-rebased

Most patches still applied cleanly, but git really messed this first one up
when rebasing. The patch applied and the code compiled, but
I realized hunks from thread_manage_health() and thread_manage_clients()
ended up mixed-up... *Fun* times.

See comments below and on the other patches.

Thanks!
Jérémie

On 18 September 2017 at 18:51, Jonathan Rajotte
<jonathan.rajotte-julien@efficios.com> wrote:
> Allow easier control over the thread by providing a dedicated quit pipe.

over the health thread's lifetime

>
> Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
> ---
>  src/bin/lttng-sessiond/main.c | 74 ++++++++++++++++++++++++++++++++-----------
>  1 file changed, 55 insertions(+), 19 deletions(-)
>
> diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c
> index 03f695ec..5d7df744 100644
> --- a/src/bin/lttng-sessiond/main.c
> +++ b/src/bin/lttng-sessiond/main.c
> @@ -204,6 +204,7 @@ static int kernel_poll_pipe[2] = { -1, -1 };
>   * for all threads when receiving an event on the pipe.
>   */
>  static int thread_quit_pipe[2] = { -1, -1 };
> +static int thread_health_teardown_trigger_pipe[2] = { -1, -1 };
>
>  /*
>   * This pipe is used to inform the thread managing application communication
> @@ -477,6 +478,11 @@ static int init_thread_quit_pipe(void)
>         return __init_thread_quit_pipe(thread_quit_pipe);
>  }
>
> +static int init_thread_health_teardown_trigger_pipe(void)
> +{
> +       return __init_thread_quit_pipe(thread_health_teardown_trigger_pipe);
> +}
> +
>  /*
>   * Stop all threads by closing the thread quit pipe.
>   */
> @@ -4297,11 +4303,13 @@ static void *thread_manage_health(void *data)
>                 goto error;
>         }
>
> -       /*
> -        * Pass 2 as size here for the thread quit pipe and client_sock. Nothing
> -        * more will be added to this poll set.
> -        */
> -       ret = sessiond_set_thread_pollset(&events, 2);
> +       ret = lttng_poll_create(&events, 2, LTTNG_CLOEXEC);

Please add a comment about the "2" being used here.

> +       if (ret < 0) {
> +               goto error;
> +       }
> +
> +       /* Add teardown trigger */
> +       ret = lttng_poll_add(&events, thread_health_teardown_trigger_pipe[0], LPOLLIN | LPOLLERR);
>         if (ret < 0) {
>                 goto error;
>         }
> @@ -4343,10 +4351,18 @@ restart:
>                         }
>
>                         /* Thread quit pipe has been closed. Killing thread. */
> -                       ret = sessiond_check_thread_quit_pipe(pollfd, revents);
> -                       if (ret) {
> -                               err = 0;
> -                               goto exit;
> +                       if (pollfd == thread_health_teardown_trigger_pipe[0]) {
> +                               if (revents & LPOLLIN) {
> +                                       /* Normal exit */
> +                                       err = 0;
> +                                       goto exit;
> +                               } else if (revents & LPOLLERR) {
> +                                       ERR("Health teardown poll error");
> +                                       goto error;
> +                               } else {
> +                                       ERR("Unexpected poll events %u for teardown sock", revents);

teardown socket -> pipe

> +                                       goto error;
> +                               }
>                         }
>
>                         /* Event on the registration socket */
> @@ -4412,8 +4428,13 @@ restart:
>                 }
>         }
>
> -exit:
>  error:
> +       /*
> +        * Perform stop_thread only on error path since in a normal teardown the
> +        * health thread is in the last thread to terminate.

is the last thread to terminate

> +        */
> +       stop_threads();
> +exit:
>         if (err) {
>                 ERR("Health error occurred in %s", __func__);
>         }
> @@ -4425,9 +4446,7 @@ error:
>                         PERROR("close");
>                 }
>         }
> -
>         lttng_poll_clean(&events);
> -       stop_threads();
>         rcu_unregister_thread();
>         return NULL;
>  }
> @@ -5631,6 +5650,7 @@ int main(int argc, char **argv)
>                         *ust64_channel_monitor_pipe = NULL,
>                         *kernel_channel_monitor_pipe = NULL;
>         bool notification_thread_running = false;
> +       bool health_thread_running = false;
>
>         init_kernel_workarounds();
>
> @@ -5717,6 +5737,11 @@ int main(int argc, char **argv)
>                 goto exit_init_data;
>         }
>
> +       if (init_thread_health_teardown_trigger_pipe()) {
> +               retval = -1;
> +               goto exit_init_data;
> +       }
> +
>         /* Check if daemon is UID = 0 */
>         is_root = !getuid();
>
> @@ -6119,6 +6144,7 @@ int main(int argc, char **argv)
>                 retval = -1;
>                 goto exit_health;
>         }
> +       health_thread_running = true;
>
>         /* notification_thread_data acquires the pipes' read side. */
>         notification_thread_handle = notification_thread_handle_create(
> @@ -6311,13 +6337,6 @@ exit_dispatch:
>
>  exit_client:
>  exit_notification:
> -       ret = pthread_join(health_thread, &status);
> -       if (ret) {
> -               errno = ret;
> -               PERROR("pthread_join health thread");
> -               retval = -1;
> -       }
> -
>  exit_health:
>  exit_init_data:
>         /*
> @@ -6359,6 +6378,20 @@ exit_init_data:
>                 notification_thread_handle_destroy(notification_thread_handle);
>         }
>
> +       if (health_thread_running) {
> +               ret = notify_thread_pipe(thread_health_teardown_trigger_pipe[1]);
> +               if (ret < 0) {
> +                       ERR("write error on thread quit pipe");
> +               }
> +
> +               ret = pthread_join(health_thread, &status);
> +               if (ret) {
> +                       errno = ret;
> +                       PERROR("pthread_join health thread");
> +                       retval = -1;
> +               }
> +       }
> +
>         rcu_thread_offline();
>         rcu_unregister_thread();
>
> @@ -6366,6 +6399,9 @@ exit_init_data:
>         if (ret) {
>                 retval = -1;
>         }
> +
> +       /* Health thread teardown pipe cleanup */
> +       utils_close_pipe(thread_health_teardown_trigger_pipe);
>         lttng_pipe_destroy(ust32_channel_monitor_pipe);
>         lttng_pipe_destroy(ust64_channel_monitor_pipe);
>         lttng_pipe_destroy(kernel_channel_monitor_pipe);
> --
> 2.11.0
>



-- 
Jérémie Galarneau
EfficiOS Inc.
http://www.efficios.com
_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH v2 02/13] Reorder pthread_join for easier ordering later on
       [not found] ` <20170918225206.17725-3-jonathan.rajotte-julien@efficios.com>
@ 2017-12-14  1:54   ` Jérémie Galarneau
  0 siblings, 0 replies; 27+ messages in thread
From: Jérémie Galarneau @ 2017-12-14  1:54 UTC (permalink / raw)
  To: Jonathan Rajotte; +Cc: lttng-dev, Jeremie Galarneau

This patch calls for a better title and description. I see that you are
shuffling the order in which the threads are created and joined.

However, it would help to contrast the current and intended order of
creation/join (and why it is done) in the commit message and comments.

Jérémie

On 18 September 2017 at 18:51, Jonathan Rajotte
<jonathan.rajotte-julien@efficios.com> wrote:
> Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
> ---
>  src/bin/lttng-sessiond/main.c | 96 ++++++++++++++++++++-----------------------
>  1 file changed, 45 insertions(+), 51 deletions(-)
>
> diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c
> index 5d7df744..45c0270e 100644
> --- a/src/bin/lttng-sessiond/main.c
> +++ b/src/bin/lttng-sessiond/main.c
> @@ -6170,15 +6170,26 @@ int main(int argc, char **argv)
>         }
>         notification_thread_running = true;
>
> -       /* Create thread to manage the client socket */
> -       ret = pthread_create(&client_thread, default_pthread_attr(),
> -                       thread_manage_clients, (void *) NULL);
> +       /* Create thread to manage application notify socket */
> +       ret = pthread_create(&apps_notify_thread, default_pthread_attr(),
> +                       ust_thread_manage_notify, (void *) NULL);
>         if (ret) {
>                 errno = ret;
> -               PERROR("pthread_create clients");
> +               PERROR("pthread_create notify");
>                 retval = -1;
>                 stop_threads();
> -               goto exit_client;
> +               goto exit_apps_notify;
> +       }
> +
> +       /* Create thread to manage application socket */
> +       ret = pthread_create(&apps_thread, default_pthread_attr(),
> +                       thread_manage_apps, (void *) NULL);
> +       if (ret) {
> +               errno = ret;
> +               PERROR("pthread_create apps");
> +               retval = -1;
> +               stop_threads();
> +               goto exit_apps;
>         }
>
>         /* Create thread to dispatch registration */
> @@ -6203,26 +6214,15 @@ int main(int argc, char **argv)
>                 goto exit_reg_apps;
>         }
>
> -       /* Create thread to manage application socket */
> -       ret = pthread_create(&apps_thread, default_pthread_attr(),
> -                       thread_manage_apps, (void *) NULL);
> +       /* Create thread to manage the client socket */
> +       ret = pthread_create(&client_thread, default_pthread_attr(),
> +                       thread_manage_clients, (void *) NULL);
>         if (ret) {
>                 errno = ret;
> -               PERROR("pthread_create apps");
> +               PERROR("pthread_create clients");
>                 retval = -1;
>                 stop_threads();
> -               goto exit_apps;
> -       }
> -
> -       /* Create thread to manage application notify socket */
> -       ret = pthread_create(&apps_notify_thread, default_pthread_attr(),
> -                       ust_thread_manage_notify, (void *) NULL);
> -       if (ret) {
> -               errno = ret;
> -               PERROR("pthread_create notify");
> -               retval = -1;
> -               stop_threads();
> -               goto exit_apps_notify;
> +               goto exit_client;
>         }
>
>         /* Create agent registration thread. */
> @@ -6278,12 +6278,12 @@ exit_load_session:
>                 ret = pthread_join(kernel_thread, &status);
>                 if (ret) {
>                         errno = ret;
> -                       PERROR("pthread_join");
> +                       PERROR("pthread_join kernel");
>                         retval = -1;
>                 }
>         }
> -exit_kernel:
>
> +exit_kernel:
>         ret = pthread_join(agent_reg_thread, &status);
>         if (ret) {
>                 errno = ret;
> @@ -6291,51 +6291,45 @@ exit_kernel:
>                 retval = -1;
>         }
>  exit_agent_reg:
> -
> -       ret = pthread_join(apps_notify_thread, &status);
> +       ret = pthread_join(client_thread, &status);
>         if (ret) {
>                 errno = ret;
> -               PERROR("pthread_join apps notify");
> +               PERROR("pthread_join client");
>                 retval = -1;
>         }
> -exit_apps_notify:
>
> +exit_client:
> +       ret = pthread_join(reg_apps_thread, &status);
> +       if (ret) {
> +               errno = ret;
> +               PERROR("pthread_join registration app" );
> +               retval = -1;
> +       }
> +exit_reg_apps:
> +       ret = pthread_join(dispatch_thread, &status);
> +       if (ret) {
> +               errno = ret;
> +               PERROR("pthread_join dispatch");
> +               retval = -1;
> +       }
> +
> +exit_dispatch:
>         ret = pthread_join(apps_thread, &status);
>         if (ret) {
>                 errno = ret;
>                 PERROR("pthread_join apps");
>                 retval = -1;
>         }
> +
>  exit_apps:
> -
> -       ret = pthread_join(reg_apps_thread, &status);
> +       ret = pthread_join(apps_notify_thread, &status);
>         if (ret) {
>                 errno = ret;
> -               PERROR("pthread_join");
> -               retval = -1;
> -       }
> -exit_reg_apps:
> -
> -       /*
> -        * Join dispatch thread after joining reg_apps_thread to ensure
> -        * we don't leak applications in the queue.
> -        */
> -       ret = pthread_join(dispatch_thread, &status);
> -       if (ret) {
> -               errno = ret;
> -               PERROR("pthread_join");
> -               retval = -1;
> -       }
> -exit_dispatch:
> -
> -       ret = pthread_join(client_thread, &status);
> -       if (ret) {
> -               errno = ret;
> -               PERROR("pthread_join");
> +               PERROR("pthread_join apps notify");
>                 retval = -1;
>         }
>
> -exit_client:
> +exit_apps_notify:
>  exit_notification:
>  exit_health:
>  exit_init_data:
> --
> 2.11.0
>



-- 
Jérémie Galarneau
EfficiOS Inc.
http://www.efficios.com
_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH v2 03/13] Terminate dispatch thread after reg_apps_thread is terminated
       [not found] ` <20170918225206.17725-4-jonathan.rajotte-julien@efficios.com>
@ 2017-12-14  1:55   ` Jérémie Galarneau
  0 siblings, 0 replies; 27+ messages in thread
From: Jérémie Galarneau @ 2017-12-14  1:55 UTC (permalink / raw)
  To: Jonathan Rajotte; +Cc: lttng-dev, Jeremie Galarneau

No objection to the code, but please add a description and comment.

Jérémie

On 18 September 2017 at 18:51, Jonathan Rajotte
<jonathan.rajotte-julien@efficios.com> wrote:
> Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
> ---
>  src/bin/lttng-sessiond/main.c | 7 ++++---
>  1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c
> index 45c0270e..0ca72ec8 100644
> --- a/src/bin/lttng-sessiond/main.c
> +++ b/src/bin/lttng-sessiond/main.c
> @@ -497,9 +497,6 @@ static void stop_threads(void)
>                 ERR("write error on thread quit pipe");
>         }
>
> -       /* Dispatch thread */
> -       CMM_STORE_SHARED(dispatch_thread_exit, 1);
> -       futex_nto1_wake(&ust_cmd_queue.futex);
>  }
>
>  /*
> @@ -6306,6 +6303,10 @@ exit_client:
>                 retval = -1;
>         }
>  exit_reg_apps:
> +       /* Instruct the dispatch thread to quit */
> +       CMM_STORE_SHARED(dispatch_thread_exit, 1);
> +       futex_nto1_wake(&ust_cmd_queue.futex);
> +
>         ret = pthread_join(dispatch_thread, &status);
>         if (ret) {
>                 errno = ret;
> --
> 2.11.0
>



-- 
Jérémie Galarneau
EfficiOS Inc.
http://www.efficios.com
_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH v2 04/13] Order termination of thread_manage_apps after dispatch_thread
       [not found] ` <20170918225206.17725-5-jonathan.rajotte-julien@efficios.com>
@ 2017-12-14  1:55   ` Jérémie Galarneau
  0 siblings, 0 replies; 27+ messages in thread
From: Jérémie Galarneau @ 2017-12-14  1:55 UTC (permalink / raw)
  To: Jonathan Rajotte; +Cc: lttng-dev, Jeremie Galarneau

On 18 September 2017 at 18:51, Jonathan Rajotte
<jonathan.rajotte-julien@efficios.com> wrote:
> This ensure that no registration are ongoing when thread_manage_apps

registration -> registrations

> perform it's cleanup and termination.

perform -> performs

>
> Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
> ---
>  src/bin/lttng-sessiond/main.c | 46 +++++++++++++++++++++++++++++++++++++------
>  1 file changed, 40 insertions(+), 6 deletions(-)
>
> diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c
> index 0ca72ec8..8d9b14b5 100644
> --- a/src/bin/lttng-sessiond/main.c
> +++ b/src/bin/lttng-sessiond/main.c
> @@ -205,6 +205,7 @@ static int kernel_poll_pipe[2] = { -1, -1 };
>   */
>  static int thread_quit_pipe[2] = { -1, -1 };
>  static int thread_health_teardown_trigger_pipe[2] = { -1, -1 };
> +static int thread_apps_teardown_trigger_pipe[2] = { -1, -1 };
>
>  /*
>   * This pipe is used to inform the thread managing application communication
> @@ -483,6 +484,11 @@ static int init_thread_health_teardown_trigger_pipe(void)
>         return __init_thread_quit_pipe(thread_health_teardown_trigger_pipe);
>  }
>
> +static int init_thread_apps_teardown_trigger_pipe(void)
> +{
> +       return __init_thread_quit_pipe(thread_apps_teardown_trigger_pipe);
> +}
> +
>  /*
>   * Stop all threads by closing the thread quit pipe.
>   */
> @@ -496,7 +502,6 @@ static void stop_threads(void)
>         if (ret < 0) {
>                 ERR("write error on thread quit pipe");
>         }
> -
>  }
>
>  /*
> @@ -1634,11 +1639,17 @@ static void *thread_manage_apps(void *data)
>
>         health_code_update();
>
> -       ret = sessiond_set_thread_pollset(&events, 2);
> +       ret = lttng_poll_create(&events, 2, LTTNG_CLOEXEC);
>         if (ret < 0) {
>                 goto error_poll_create;
>         }
>
> +       /* Add quit pipe */
> +       ret = lttng_poll_add(&events, thread_apps_teardown_trigger_pipe[0], LPOLLIN | LPOLLERR);
> +       if (ret < 0) {
> +               goto error;
> +       }
> +
>         ret = lttng_poll_add(&events, apps_cmd_pipe[0], LPOLLIN | LPOLLRDHUP);
>         if (ret < 0) {
>                 goto error;
> @@ -1685,10 +1696,18 @@ static void *thread_manage_apps(void *data)
>                         }
>
>                         /* Thread quit pipe has been closed. Killing thread. */
> -                       ret = sessiond_check_thread_quit_pipe(pollfd, revents);
> -                       if (ret) {
> -                               err = 0;
> -                               goto exit;
> +                       if (pollfd == thread_apps_teardown_trigger_pipe[0]) {
> +                               if (revents & LPOLLIN) {
> +                                       /* Normal exit */
> +                                       err = 0;
> +                                       goto exit;
> +                               } else if (revents & LPOLLERR) {
> +                                       ERR("Quit pipe error");
> +                                       goto error;
> +                               } else {
> +                                       ERR("Unknown poll events %u for quit pipe", revents);
> +                                       goto error;
> +                               }
>                         }
>
>                         /* Inspect the apps cmd pipe */
> @@ -5739,6 +5758,11 @@ int main(int argc, char **argv)
>                 goto exit_init_data;
>         }
>
> +       if (init_thread_apps_teardown_trigger_pipe()) {
> +               retval = -1;
> +               goto exit_init_data;
> +       }
> +
>         /* Check if daemon is UID = 0 */
>         is_root = !getuid();
>
> @@ -6302,6 +6326,7 @@ exit_client:
>                 PERROR("pthread_join registration app" );
>                 retval = -1;
>         }
> +
>  exit_reg_apps:
>         /* Instruct the dispatch thread to quit */
>         CMM_STORE_SHARED(dispatch_thread_exit, 1);
> @@ -6315,6 +6340,12 @@ exit_reg_apps:
>         }
>
>  exit_dispatch:
> +       /* Instruct the apps thread to quit */
> +       ret = notify_thread_pipe(thread_apps_teardown_trigger_pipe[1]);
> +       if (ret < 0) {
> +               ERR("write error on thread quit pipe");
> +       }
> +
>         ret = pthread_join(apps_thread, &status);
>         if (ret) {
>                 errno = ret;
> @@ -6397,6 +6428,9 @@ exit_init_data:
>
>         /* Health thread teardown pipe cleanup */
>         utils_close_pipe(thread_health_teardown_trigger_pipe);
> +       /* Apps thread teardown pipe cleanup */
> +       utils_close_pipe(thread_apps_teardown_trigger_pipe);
> +
>         lttng_pipe_destroy(ust32_channel_monitor_pipe);
>         lttng_pipe_destroy(ust64_channel_monitor_pipe);
>         lttng_pipe_destroy(kernel_channel_monitor_pipe);
> --
> 2.11.0
>



-- 
Jérémie Galarneau
EfficiOS Inc.
http://www.efficios.com
_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH v2 05/13] Control thread_apps_notify lifetime with specialized quit pipe
       [not found] ` <20170918225206.17725-6-jonathan.rajotte-julien@efficios.com>
@ 2017-12-14  1:55   ` Jérémie Galarneau
  0 siblings, 0 replies; 27+ messages in thread
From: Jérémie Galarneau @ 2017-12-14  1:55 UTC (permalink / raw)
  To: Jonathan Rajotte; +Cc: lttng-dev, Jeremie Galarneau

In the subject, replace "specialized" by "dedicated".

On 18 September 2017 at 18:51, Jonathan Rajotte
<jonathan.rajotte-julien@efficios.com> wrote:
> Listen to application longer and quit only when most other thread are

application -> applications

> terterminated. This simplifies data interaction with other threads.

terterminated -> terminated.

>
> Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
> ---
>  src/bin/lttng-sessiond/lttng-sessiond.h |  2 ++
>  src/bin/lttng-sessiond/main.c           | 20 +++++++++++++++++++-
>  src/bin/lttng-sessiond/ust-thread.c     | 27 ++++++++++++++++++++-------
>  3 files changed, 41 insertions(+), 8 deletions(-)
>
> diff --git a/src/bin/lttng-sessiond/lttng-sessiond.h b/src/bin/lttng-sessiond/lttng-sessiond.h
> index 74552db6..8fbcac60 100644
> --- a/src/bin/lttng-sessiond/lttng-sessiond.h
> +++ b/src/bin/lttng-sessiond/lttng-sessiond.h
> @@ -94,6 +94,8 @@ struct ust_reg_wait_node {
>   */
>  extern int apps_cmd_notify_pipe[2];
>
> +extern int thread_apps_notify_teardown_trigger_pipe[2];
> +
>  /*
>   * Used to notify that a hash table needs to be destroyed by dedicated
>   * thread. Required by design because we don't want to move destroy
> diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c
> index 8d9b14b5..3596d7e3 100644
> --- a/src/bin/lttng-sessiond/main.c
> +++ b/src/bin/lttng-sessiond/main.c
> @@ -206,6 +206,7 @@ static int kernel_poll_pipe[2] = { -1, -1 };
>  static int thread_quit_pipe[2] = { -1, -1 };
>  static int thread_health_teardown_trigger_pipe[2] = { -1, -1 };
>  static int thread_apps_teardown_trigger_pipe[2] = { -1, -1 };
> +int thread_apps_notify_teardown_trigger_pipe[2] = { -1, -1 };
>
>  /*
>   * This pipe is used to inform the thread managing application communication
> @@ -489,6 +490,11 @@ static int init_thread_apps_teardown_trigger_pipe(void)
>         return __init_thread_quit_pipe(thread_apps_teardown_trigger_pipe);
>  }
>
> +static int init_thread_apps_notify_teardown_trigger_pipe(void)
> +{
> +       return __init_thread_quit_pipe(thread_apps_notify_teardown_trigger_pipe);
> +}
> +
>  /*
>   * Stop all threads by closing the thread quit pipe.
>   */
> @@ -5763,6 +5769,11 @@ int main(int argc, char **argv)
>                 goto exit_init_data;
>         }
>
> +       if (init_thread_apps_notify_teardown_trigger_pipe()) {
> +               retval = -1;
> +               goto exit_init_data;
> +       }
> +
>         /* Check if daemon is UID = 0 */
>         is_root = !getuid();
>
> @@ -6354,6 +6365,12 @@ exit_dispatch:
>         }
>
>  exit_apps:
> +       /* Instruct the apps_notify thread to quit */
> +       ret = notify_thread_pipe(thread_apps_notify_teardown_trigger_pipe[1]);
> +       if (ret < 0) {
> +               ERR("write error on thread quit pipe");
> +       }
> +
>         ret = pthread_join(apps_notify_thread, &status);
>         if (ret) {
>                 errno = ret;
> @@ -6430,7 +6447,8 @@ exit_init_data:
>         utils_close_pipe(thread_health_teardown_trigger_pipe);
>         /* Apps thread teardown pipe cleanup */
>         utils_close_pipe(thread_apps_teardown_trigger_pipe);
> -
> +       /* Apps notify thread teardown pipe cleanup */
> +       utils_close_pipe(thread_apps_notify_teardown_trigger_pipe);
>         lttng_pipe_destroy(ust32_channel_monitor_pipe);
>         lttng_pipe_destroy(ust64_channel_monitor_pipe);
>         lttng_pipe_destroy(kernel_channel_monitor_pipe);
> diff --git a/src/bin/lttng-sessiond/ust-thread.c b/src/bin/lttng-sessiond/ust-thread.c
> index 7fb18a78..1e7a8229 100644
> --- a/src/bin/lttng-sessiond/ust-thread.c
> +++ b/src/bin/lttng-sessiond/ust-thread.c
> @@ -51,9 +51,15 @@ void *ust_thread_manage_notify(void *data)
>
>         health_code_update();
>
> -       ret = sessiond_set_thread_pollset(&events, 2);
> +       ret = lttng_poll_create(&events, 2, LTTNG_CLOEXEC);
>         if (ret < 0) {
> -               goto error_poll_create;
> +               goto error;
> +       }
> +
> +       /* Add quit pipe */
> +       ret = lttng_poll_add(&events, thread_apps_notify_teardown_trigger_pipe[0], LPOLLIN | LPOLLERR);
> +       if (ret < 0) {
> +               goto error;
>         }
>
>         /* Add notify pipe to the pollset. */
> @@ -99,11 +105,18 @@ restart:
>                                 continue;
>                         }
>
> -                       /* Thread quit pipe has been closed. Killing thread. */
> -                       ret = sessiond_check_thread_quit_pipe(pollfd, revents);
> -                       if (ret) {
> -                               err = 0;
> -                               goto exit;
> +                       if (pollfd == thread_apps_notify_teardown_trigger_pipe[0]) {
> +                               if (revents & LPOLLIN) {
> +                                       /* Normal exit */
> +                                       err = 0;
> +                                       goto exit;
> +                               } else if (revents & LPOLLERR) {
> +                                       ERR("Apps notify quit error");
> +                                       goto error;
> +                               } else {
> +                                       ERR("Unexpected poll events %u for quit pipe", revents);
> +                                       goto error;
> +                               }
>                         }
>
>                         /* Inspect the apps cmd pipe */
> --
> 2.11.0
>



-- 
Jérémie Galarneau
EfficiOS Inc.
http://www.efficios.com
_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH v2 06/13] Fix: unregister app notify socket on sessiond tear down
       [not found] ` <20170918225206.17725-7-jonathan.rajotte-julien@efficios.com>
@ 2017-12-14  1:56   ` Jérémie Galarneau
  0 siblings, 0 replies; 27+ messages in thread
From: Jérémie Galarneau @ 2017-12-14  1:56 UTC (permalink / raw)
  To: Jonathan Rajotte; +Cc: lttng-dev, Jeremie Galarneau

On 18 September 2017 at 18:51, Jonathan Rajotte
<jonathan.rajotte-julien@efficios.com> wrote:
> A race between the sessiond tear down and applications initialization
> can lead to a deadlock.
>
> Applications try to communicate via the notify sockets while sessiond

the sessiond

> does not listen anymore on these sockets since the thread responsible
> for reception/response is terminated (ust_thread_manage_notify). These
> sockets are never closed hence an application could hang on
> communication.
>
> Sessiond hang happen during call to cmd_destroy_session during

The sessiond hang happens during the call to cmd_destroy_session initiated
by sessiond_cleanup().

> sessiond_cleanup. Sessiond is trying to communicate with the app while
> the app is waiting for a response on the app notification socket.
>
> To prevent this situation a call to ust_app_notify_sock_unregister is
> performed on all entry of the ust_app_ht_by_notify_sock hash table at

entry -> entries

> the time of termination. This ensure that any pending communication

ensure -> ensures

> initiated by the application will be terminated since all sockets will
> be closed at the end of the grace period via call_rcu inside
> ust_app_notify_sock_unregister. The use of ust_app_ht_by_notify_sock
> instead of the ust_app_ht prevent a double call_rcu since entries are

prevent -> prevents

Do I understand the "big picture" of what you are trying to solve here?
  - App communicates on its 'notify' socket and waits for a reply
    - Sessiond never sends that reply since apps_notify_thread (the
      thread that manages application 'notify' sockets) is dead.
  - Sessiond is sending a command through the application 'command'
    socket during its clean-up
    - Sessiond never receives a reply since the application is still
      waiting for a reply on the 'notify' socket.

The 'simple' fix I see would be that the apps_notify_thread should
close all the FDs it manages (and set them to -1) as it is the only
thread interacting with those FDs (to my knowledge). That would
unblock the applications and prevent further notification from
being sent.

What I understand you are trying to do is to perform the clean-up
in the same way it would have occurred had the application simply
died. At first glance, that sounds like a clean way to handle the
problem.

Going back to your patch:

Why does using ust_app_ht_by_notify_sock instead of ust_app_ht
prevent a double call_rcu?

I see how this thread performs a deferred clean-up using call_rcu().
What is causing the second one?

> removed from ust_app_ht_by_notify_sock during ust_app_notify_sock_unregister.
>
> This can be reproduced using the sessiond_teardown_active_session
> scenario provided by [1].
>
> [1] https://github.com/PSRCode/lttng-stress
>
> Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
> ---
>  src/bin/lttng-sessiond/ust-thread.c | 17 ++++++++++++++++-
>  1 file changed, 16 insertions(+), 1 deletion(-)
>
> diff --git a/src/bin/lttng-sessiond/ust-thread.c b/src/bin/lttng-sessiond/ust-thread.c
> index 1e7a8229..8f11133a 100644
> --- a/src/bin/lttng-sessiond/ust-thread.c
> +++ b/src/bin/lttng-sessiond/ust-thread.c
> @@ -27,6 +27,19 @@
>  #include "health-sessiond.h"
>  #include "testpoint.h"
>
> +
> +static
> +void notify_sock_unregister_all()
> +{
> +       struct lttng_ht_iter iter;
> +       struct ust_app *app;

Missing empty line here.

> +       rcu_read_lock();
> +       cds_lfht_for_each_entry(ust_app_ht_by_notify_sock->ht, &iter.iter, app, notify_sock_n.node) {

This line exceeds 80 chars.

> +               ust_app_notify_sock_unregister(app->notify_sock);
> +       }
> +       rcu_read_unlock();
> +}
> +
>  /*
>   * This thread manage application notify communication.
>   */
> @@ -53,7 +66,7 @@ void *ust_thread_manage_notify(void *data)
>
>         ret = lttng_poll_create(&events, 2, LTTNG_CLOEXEC);
>         if (ret < 0) {
> -               goto error;
> +               goto error_poll_create;
>         }
>
>         /* Add quit pipe */
> @@ -197,6 +210,8 @@ error_poll_create:
>  error_testpoint:
>         utils_close_pipe(apps_cmd_notify_pipe);
>         apps_cmd_notify_pipe[0] = apps_cmd_notify_pipe[1] = -1;
> +       notify_sock_unregister_all();
> +
>         DBG("Application notify communication apps thread cleanup complete");
>         if (err) {
>                 health_error();
> --
> 2.11.0
>



-- 
Jérémie Galarneau
EfficiOS Inc.
http://www.efficios.com
_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH v2 07/13] Always reply to an inquiring app
       [not found] ` <20170918225206.17725-8-jonathan.rajotte-julien@efficios.com>
@ 2017-12-14  1:56   ` Jérémie Galarneau
  0 siblings, 0 replies; 27+ messages in thread
From: Jérémie Galarneau @ 2017-12-14  1:56 UTC (permalink / raw)
  To: Jonathan Rajotte; +Cc: lttng-dev, Jeremie Galarneau

Looks good to me.

Jérémie

On 18 September 2017 at 18:52, Jonathan Rajotte
<jonathan.rajotte-julien@efficios.com> wrote:
> Reply to the app on errors to prevent an app-side receive hang.
>
> Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
> ---
>  src/bin/lttng-sessiond/ust-app.c | 64 ++++++++++++++++++++++------------------
>  1 file changed, 36 insertions(+), 28 deletions(-)
>
> diff --git a/src/bin/lttng-sessiond/ust-app.c b/src/bin/lttng-sessiond/ust-app.c
> index e79455b0..20ac469b 100644
> --- a/src/bin/lttng-sessiond/ust-app.c
> +++ b/src/bin/lttng-sessiond/ust-app.c
> @@ -5394,13 +5394,13 @@ static int reply_ust_register_channel(int sock, int sobjd, int cobjd,
>                 size_t nr_fields, struct ustctl_field *fields)
>  {
>         int ret, ret_code = 0;
> -       uint32_t chan_id, reg_count;
> -       uint64_t chan_reg_key;
> -       enum ustctl_channel_header type;
> +       uint32_t chan_id = 0, reg_count = 0;
> +       uint64_t chan_reg_key = 0;
> +       enum ustctl_channel_header type = USTCTL_CHANNEL_HEADER_UNKNOWN;
>         struct ust_app *app;
>         struct ust_app_channel *ua_chan;
>         struct ust_app_session *ua_sess;
> -       struct ust_registry_session *registry;
> +       struct ust_registry_session *registry = NULL;
>         struct ust_registry_channel *chan_reg;
>
>         rcu_read_lock();
> @@ -5410,16 +5410,16 @@ static int reply_ust_register_channel(int sock, int sobjd, int cobjd,
>         if (!app) {
>                 DBG("Application socket %d is being torn down. Abort event notify",
>                                 sock);
> -               ret = 0;
> -               goto error_rcu_unlock;
> +               ret_code = -1;
> +               goto reply;
>         }
>
>         /* Lookup channel by UST object descriptor. */
>         ua_chan = find_channel_by_objd(app, cobjd);
>         if (!ua_chan) {
>                 DBG("Application channel is being torn down. Abort event notify");
> -               ret = 0;
> -               goto error_rcu_unlock;
> +               ret_code = -1;
> +               goto reply;
>         }
>
>         assert(ua_chan->session);
> @@ -5429,8 +5429,8 @@ static int reply_ust_register_channel(int sock, int sobjd, int cobjd,
>         registry = get_session_registry(ua_sess);
>         if (!registry) {
>                 DBG("Application session is being torn down. Abort event notify");
> -               ret = 0;
> -               goto error_rcu_unlock;
> +               ret_code = -1;
> +               goto reply;
>         };
>
>         /* Depending on the buffer type, a different channel key is used. */
> @@ -5469,10 +5469,13 @@ static int reply_ust_register_channel(int sock, int sobjd, int cobjd,
>                 ret_code = ust_metadata_channel_statedump(registry, chan_reg);
>                 if (ret_code) {
>                         ERR("Error appending channel metadata (errno = %d)", ret_code);
> -                       goto reply;
> +                       goto unlock_reply;
>                 }
>         }
>
> +unlock_reply:
> +       pthread_mutex_unlock(&registry->lock);
> +
>  reply:
>         DBG3("UST app replying to register channel key %" PRIu64
>                         " with id %u, type: %d, ret: %d", chan_reg_key, chan_id, type,
> @@ -5488,12 +5491,13 @@ reply:
>                 goto error;
>         }
>
> -       /* This channel registry registration is completed. */
> +       if (ret_code < 0) {
> +               goto error;
> +       }
> +
>         chan_reg->register_done = 1;
>
>  error:
> -       pthread_mutex_unlock(&registry->lock);
> -error_rcu_unlock:
>         rcu_read_unlock();
>         free(fields);
>         return ret;
> @@ -5527,16 +5531,16 @@ static int add_event_ust_registry(int sock, int sobjd, int cobjd, char *name,
>         if (!app) {
>                 DBG("Application socket %d is being torn down. Abort event notify",
>                                 sock);
> -               ret = 0;
> -               goto error_rcu_unlock;
> +               ret_code = -1;
> +               goto reply;
>         }
>
>         /* Lookup channel by UST object descriptor. */
>         ua_chan = find_channel_by_objd(app, cobjd);
>         if (!ua_chan) {
>                 DBG("Application channel is being torn down. Abort event notify");
> -               ret = 0;
> -               goto error_rcu_unlock;
> +               ret_code = -1;
> +               goto reply;
>         }
>
>         assert(ua_chan->session);
> @@ -5545,8 +5549,8 @@ static int add_event_ust_registry(int sock, int sobjd, int cobjd, char *name,
>         registry = get_session_registry(ua_sess);
>         if (!registry) {
>                 DBG("Application session is being torn down. Abort event notify");
> -               ret = 0;
> -               goto error_rcu_unlock;
> +               ret_code = -1;
> +               goto reply;
>         }
>
>         if (ua_sess->buffer_type == LTTNG_BUFFER_PER_UID) {
> @@ -5570,6 +5574,9 @@ static int add_event_ust_registry(int sock, int sobjd, int cobjd, char *name,
>         fields = NULL;
>         model_emf_uri = NULL;
>
> +       pthread_mutex_unlock(&registry->lock);
> +
> +reply:
>         /*
>          * The return value is returned to ustctl so in case of an error, the
>          * application can be notified. In case of an error, it's important not to
> @@ -5593,8 +5600,6 @@ static int add_event_ust_registry(int sock, int sobjd, int cobjd, char *name,
>                         name, event_id);
>
>  error:
> -       pthread_mutex_unlock(&registry->lock);
> -error_rcu_unlock:
>         rcu_read_unlock();
>         free(sig);
>         free(fields);
> @@ -5627,8 +5632,9 @@ static int add_enum_ust_registry(int sock, int sobjd, char *name,
>                 /* Return an error since this is not an error */
>                 DBG("Application socket %d is being torn down. Aborting enum registration",
>                                 sock);
> +               ret_code = -1;
>                 free(entries);
> -               goto error_rcu_unlock;
> +               goto reply;
>         }
>
>         /* Lookup session by UST object descriptor. */
> @@ -5637,14 +5643,16 @@ static int add_enum_ust_registry(int sock, int sobjd, char *name,
>                 /* Return an error since this is not an error */
>                 DBG("Application session is being torn down (session not found). Aborting enum registration.");
>                 free(entries);
> -               goto error_rcu_unlock;
> +               ret_code = -1;
> +               goto reply;
>         }
>
>         registry = get_session_registry(ua_sess);
>         if (!registry) {
>                 DBG("Application session is being torn down (registry not found). Aborting enum registration.");
>                 free(entries);
> -               goto error_rcu_unlock;
> +               ret_code = -1;
> +               goto reply;
>         }
>
>         pthread_mutex_lock(&registry->lock);
> @@ -5658,6 +5666,9 @@ static int add_enum_ust_registry(int sock, int sobjd, char *name,
>                         entries, nr_entries, &enum_id);
>         entries = NULL;
>
> +       pthread_mutex_unlock(&registry->lock);
> +
> +reply:
>         /*
>          * The return value is returned to ustctl so in case of an error, the
>          * application can be notified. In case of an error, it's important not to
> @@ -5678,10 +5689,7 @@ static int add_enum_ust_registry(int sock, int sobjd, char *name,
>         }
>
>         DBG3("UST registry enum %s added successfully or already found", name);
> -
>  error:
> -       pthread_mutex_unlock(&registry->lock);
> -error_rcu_unlock:
>         rcu_read_unlock();
>         return ret;
>  }
> --
> 2.11.0
>



-- 
Jérémie Galarneau
EfficiOS Inc.
http://www.efficios.com
_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH v2 08/13] Fix: perform lookup then test for condition
       [not found] ` <20170918225206.17725-9-jonathan.rajotte-julien@efficios.com>
@ 2017-12-14  1:56   ` Jérémie Galarneau
  0 siblings, 0 replies; 27+ messages in thread
From: Jérémie Galarneau @ 2017-12-14  1:56 UTC (permalink / raw)
  To: Jonathan Rajotte; +Cc: lttng-dev, Jeremie Galarneau

Please add a description of what this fixes, not just how it fixes it.

In this case, I see that the error path is triggered anytime
the pollfd does not match the first app socket in the wait
queue.

On 18 September 2017 at 18:52, Jonathan Rajotte
<jonathan.rajotte-julien@efficios.com> wrote:
> Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
> ---
>  src/bin/lttng-sessiond/main.c | 7 +++++--
>  1 file changed, 5 insertions(+), 2 deletions(-)
>
> diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c
> index 3596d7e3..8cffa6f6 100644
> --- a/src/bin/lttng-sessiond/main.c
> +++ b/src/bin/lttng-sessiond/main.c
> @@ -1899,8 +1899,11 @@ static void sanitize_wait_queue(struct ust_reg_wait_queue *wait_queue)
>
>                 cds_list_for_each_entry_safe(wait_node, tmp_wait_node,
>                                 &wait_queue->head, head) {
> -                       if (pollfd == wait_node->app->sock &&
> -                                       (revents & (LPOLLHUP | LPOLLERR))) {
> +                       if (pollfd != wait_node->app->sock) {
> +                               continue;
> +                       }
> +
> +                       if (revents & (LPOLLHUP | LPOLLERR)) {

The fix is correct, but this check can be performed before this loop.

Since (LPOLLHUP | LPOLLERR) are the only expected poll events
at this point, we can log the "Unexpected poll events" message
and goto error before entering the wait queue loop, which
will simplify its body.

Jérémie

>                                 cds_list_del(&wait_node->head);
>                                 wait_queue->count--;
>                                 ust_app_destroy(wait_node->app);
> --
> 2.11.0
>



-- 
Jérémie Galarneau
EfficiOS Inc.
http://www.efficios.com
_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH v2 09/13] Perform ust_app_unregister on thread_manage_apps teardown
       [not found] ` <20170918225206.17725-10-jonathan.rajotte-julien@efficios.com>
@ 2017-12-14  1:57   ` Jérémie Galarneau
  0 siblings, 0 replies; 27+ messages in thread
From: Jérémie Galarneau @ 2017-12-14  1:57 UTC (permalink / raw)
  To: Jonathan Rajotte; +Cc: lttng-dev, Jeremie Galarneau

On 18 September 2017 at 18:52, Jonathan Rajotte
<jonathan.rajotte-julien@efficios.com> wrote:
> Previous work on thread termination ordering permits the dismissal of
> the following comment:
>
>    We don't clean the UST app hash table here since already registered
>    applications can still be controlled so let them be until the session
>    daemon dies or the applications stop.
>
> At the moment of termination control thread are already terminated.

It's not clear why this patch is needed. I think what you are worried
about here is the application notification sockets never being closed
on shutdown?

That seems to be taken care of in sessiond_cleanup() [1] which cleans
the various global HTs. More specifically, if we look in
ust_app_clean_list [2], delete_ust_app_rcu() is invoked on all
ust_app_ht entries, which eventually results in delete_ust_app()
being called, which closes the application socket [3].

Looking at the rest of your patch set, I'm sure there is something
I am missing, but it's not clear.

Read on for other comments.

[1] https://github.com/lttng/lttng-tools/blob/master/src/bin/lttng-sessiond/main.c#L579
[2] https://github.com/lttng/lttng-tools/blob/master/src/bin/lttng-sessiond/ust-app.c#L3880
[3] https://github.com/lttng/lttng-tools/blob/master/src/bin/lttng-sessiond/ust-app.c#L922

>
> Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
> ---
>  src/bin/lttng-sessiond/main.c | 14 +++++++++-----
>  1 file changed, 9 insertions(+), 5 deletions(-)
>
> diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c
> index 8cffa6f6..4a2a661f 100644
> --- a/src/bin/lttng-sessiond/main.c
> +++ b/src/bin/lttng-sessiond/main.c
> @@ -1780,11 +1780,15 @@ error_testpoint:
>         utils_close_pipe(apps_cmd_pipe);
>         apps_cmd_pipe[0] = apps_cmd_pipe[1] = -1;
>
> -       /*
> -        * We don't clean the UST app hash table here since already registered
> -        * applications can still be controlled so let them be until the session
> -        * daemon dies or the applications stop.
> -        */
> +       {
> +               struct lttng_ht_iter iter;
> +               struct ust_app *app;

Missing blank line here.

> +               rcu_read_lock();
> +               cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) {
> +                       ust_app_unregister(app->sock);

This part deserves an explanation in your commit message.

Jérémie

> +               }
> +               rcu_read_unlock();
> +       }
>
>         if (err) {
>                 health_error();
> --
> 2.11.0
>



-- 
Jérémie Galarneau
EfficiOS Inc.
http://www.efficios.com
_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH v2 10/13] Teardown apps_notify_thread before thread_manage_apps
       [not found] ` <20170918225206.17725-11-jonathan.rajotte-julien@efficios.com>
  2017-12-13 17:58   ` [RFC PATCH v2 10/13] Teardown apps_notify_thread before thread_manage_apps Jérémie Galarneau
@ 2017-12-14  1:57   ` Jérémie Galarneau
  1 sibling, 0 replies; 27+ messages in thread
From: Jérémie Galarneau @ 2017-12-14  1:57 UTC (permalink / raw)
  To: Jonathan Rajotte; +Cc: lttng-dev, Jeremie Galarneau

Sorry, I fat-finger-sent an incomplete reply, disregard it. This e-mail
contains all the comments.

On 18 September 2017 at 18:52, Jonathan Rajotte
<jonathan.rajotte-julien@efficios.com> wrote:
> RFC:
> This is necessary since we use the ust_app_ht_by_notify_sock to
> perform the cleanup operation. Since both ust_app_unregister and
> ust_app_ust_app_notify_unregister perform a remove of the app on the
> ust_ust_app_by_notify_sock but only ust_app_notify_unregister actually

There are some extra ust_ and ust_app_ on the last two lines ;)

> end up performing a close(call_rcu) on the socket.

a close(call_rcu) ?

>
> Other way to do fix this problem?
>

Let me walk through the problem as I understand it and
we'll see if/where I am going off-course.

An ust_app has two sockets:
  - command (through which the sessiond sends commands to the app)
  - notify (through which the application notifies the sessiond of
    various conditions such as new events being available, etc.)

These two sockets are received from applications by the
reg_apps_thread [1] which posts them to the dispatch_thread [2].
Once the dispatch thread is aware of both sockets for a given app,
the two sockets are "dispatched" to their respective management
threads.

* The "command" socket is sent to the "apps_thread" [3]
* The "notify" socket is sent to the "apps_notify_thread" [4]

Let's look at what happens when an application dies.

The "apps_thread" (handles application command sockets):
- wakes up from its poll() and notices an error/hang-up has
  occurred on an application's command socket
  - calls ust_app_unregister() on with that socket's FD
    - retrieves the ust_app from the socket's FD through
      ust_app_ht_by_sock
      - flushes that application's buffers and metadata
      - removes the app from ust_app_ht_by_sock
      - removes the app from ust_app_ht_by_notify_sock
        (it's okay for this to fail)
      - removes the app from ust_app_ht (pid <-> ust_app)
      - enqueues a call_rcu() job to close the command socket

The "apps_notify_thread" (handles application notification sockets):
- wakes up from its poll() and notices an error/hand-up has
  occurred on an application's notification socket
  - calls ust_app_notify_sock_unregister()
    - removes the app from ust_app_ht_by_notify_sock
      (it's okay for this to fail since both threads are trying
       to clean this up)
    - call_rcu() to close the notify socket


Now, provided that I understand the problem correctly
(as stated in my reply to patch #06), you want to clean-up
the command and notify sockets in the same way that they
would be if their associated apps had died. That's fair.

However, as seen above, cleaning up the "apps_thread" will cause
it to empty all the ust_app data structures:
  * ust_app_ht_by_sock
  * ust_app_ht_by_notify_sock
  * ust_app_ht

However, the "apps_notify_thread" needs at least one of those
data structures to be present to iterate on all of the
applications and perform its clean-up.

Hence, you want to make sure that the "apps_notify_thread" can
complete its shutdown before the "apps_thread" starts its own
clean-up. Correct?

I would be okay with reordering the threads' teardown to
provide that guarantee. My only gripe is that it needs to
be documented _extensively_ as it is not obvious at all that
such a dependency exists between those threads.

On the other hand, it seems to me that it would be simpler
to _not_ perform the clean-up you added in the "apps_thread"
and leave that to the sessiond_cleanup(), once all threads
have been torn down.

Not performing the clean-up in the "apps_thread" allows the
clean-up (that you added) to occur in the "apps_notify_thread"
as the data structures (such as ust_app_ht) are still in place.

As a result of the "apps_notify_thread" clean-up, the notify
sockets would eventually be closed by through the call_rcu()'s
during the next grace period. This will then unblock the
applications that are stuck waiting on their notify socket.

Would that solve the problem or am I missing something?

Jérémie

[1] https://github.com/lttng/lttng-tools/blob/master/src/bin/lttng-sessiond/main.c#L2005
[2] https://github.com/lttng/lttng-tools/blob/master/src/bin/lttng-sessiond/main.c#L1749
[3] https://github.com/lttng/lttng-tools/blob/master/src/bin/lttng-sessiond/main.c#L1450
[4] https://github.com/lttng/lttng-tools/blob/master/src/bin/lttng-sessiond/ust-thread.c#L33


> Could we simply not remove it on a ust_app_unregister? And always defer
> to the apps_notify_thread for cleanup?
>
> Update the value in the hash table to -1 and emit a close and remove
> from the hash table if the value is -1?
>
> We could also keep a local list of fd in apps_notify_thread and use it for
> cleanup instead of relying on ust_ust_app_by_notify_sock.
>
> I'm not sure what is the best/elegant solution here. I am not a fan of
> the current solution but it working.
>
> Obviously this commit will be reworded and modified accordingly.
>
> Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
> ---
>  src/bin/lttng-sessiond/main.c | 55 ++++++++++++++++++++++++-------------------
>  1 file changed, 31 insertions(+), 24 deletions(-)
>
> diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c
> index 4a2a661f..216d0da6 100644
> --- a/src/bin/lttng-sessiond/main.c
> +++ b/src/bin/lttng-sessiond/main.c
> @@ -6209,16 +6209,6 @@ int main(int argc, char **argv)
>         }
>         notification_thread_running = true;
>
> -       /* Create thread to manage application notify socket */
> -       ret = pthread_create(&apps_notify_thread, default_pthread_attr(),
> -                       ust_thread_manage_notify, (void *) NULL);
> -       if (ret) {
> -               errno = ret;
> -               PERROR("pthread_create notify");
> -               retval = -1;
> -               stop_threads();
> -               goto exit_apps_notify;
> -       }
>
>         /* Create thread to manage application socket */
>         ret = pthread_create(&apps_thread, default_pthread_attr(),
> @@ -6231,6 +6221,17 @@ int main(int argc, char **argv)
>                 goto exit_apps;
>         }
>
> +       /* Create thread to manage application notify socket */
> +       ret = pthread_create(&apps_notify_thread, default_pthread_attr(),
> +                       ust_thread_manage_notify, (void *) NULL);
> +       if (ret) {
> +               errno = ret;
> +               PERROR("pthread_create notify");
> +               retval = -1;
> +               stop_threads();
> +               goto exit_apps_notify;
> +       }
> +
>         /* Create thread to dispatch registration */
>         ret = pthread_create(&dispatch_thread, default_pthread_attr(),
>                         thread_dispatch_ust_registration, (void *) NULL);
> @@ -6358,20 +6359,6 @@ exit_reg_apps:
>         }
>
>  exit_dispatch:
> -       /* Instruct the apps thread to quit */
> -       ret = notify_thread_pipe(thread_apps_teardown_trigger_pipe[1]);
> -       if (ret < 0) {
> -               ERR("write error on thread quit pipe");
> -       }
> -
> -       ret = pthread_join(apps_thread, &status);
> -       if (ret) {
> -               errno = ret;
> -               PERROR("pthread_join apps");
> -               retval = -1;
> -       }
> -
> -exit_apps:
>         /* Instruct the apps_notify thread to quit */
>         ret = notify_thread_pipe(thread_apps_notify_teardown_trigger_pipe[1]);
>         if (ret < 0) {
> @@ -6386,6 +6373,26 @@ exit_apps:
>         }
>
>  exit_apps_notify:
> +       /*
> +        * The barrier ensure that all previous resources, notify sockets in
> +        * particular, are freed/closed.
> +        */
> +       rcu_barrier();
> +
> +       /* Instruct the apps thread to quit */
> +       ret = notify_thread_pipe(thread_apps_teardown_trigger_pipe[1]);
> +       if (ret < 0) {
> +               ERR("write error on thread quit pipe");
> +       }
> +
> +       ret = pthread_join(apps_thread, &status);
> +       if (ret) {
> +               errno = ret;
> +               PERROR("pthread_join apps");
> +               retval = -1;
> +       }
> +
> +exit_apps:
>  exit_notification:
>  exit_health:
>  exit_init_data:
> --
> 2.11.0
>



-- 
Jérémie Galarneau
EfficiOS Inc.
http://www.efficios.com
_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH v2 11/13] Comments update
       [not found] ` <20170918225206.17725-12-jonathan.rajotte-julien@efficios.com>
@ 2017-12-14  1:57   ` Jérémie Galarneau
  0 siblings, 0 replies; 27+ messages in thread
From: Jérémie Galarneau @ 2017-12-14  1:57 UTC (permalink / raw)
  To: Jonathan Rajotte; +Cc: lttng-dev, Jeremie Galarneau

LGTM.

Jérémie

On 18 September 2017 at 18:52, Jonathan Rajotte
<jonathan.rajotte-julien@efficios.com> wrote:
> Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
> ---
>  src/bin/lttng-sessiond/main.c | 10 +++++++---
>  1 file changed, 7 insertions(+), 3 deletions(-)
>
> diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c
> index 216d0da6..a840e8de 100644
> --- a/src/bin/lttng-sessiond/main.c
> +++ b/src/bin/lttng-sessiond/main.c
> @@ -496,14 +496,18 @@ static int init_thread_apps_notify_teardown_trigger_pipe(void)
>  }
>
>  /*
> - * Stop all threads by closing the thread quit pipe.
> + * Stop first wave threads by closing the thread quit pipe.
> + *  - kernel thread
> + *  - client thread
> + *  - agent thread
> + *  - reg apps thread
>   */
>  static void stop_threads(void)
>  {
>         int ret;
>
> -       /* Stopping all threads */
> -       DBG("Terminating all threads");
> +       /* Stopping first wave threads */
> +       DBG("Terminating first wave threads");
>         ret = notify_thread_pipe(thread_quit_pipe[1]);
>         if (ret < 0) {
>                 ERR("write error on thread quit pipe");
> --
> 2.11.0
>



-- 
Jérémie Galarneau
EfficiOS Inc.
http://www.efficios.com
_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH v2 12/13] Fix: delay termination on consumerd to allow metadata flushing
       [not found] ` <20170918225206.17725-13-jonathan.rajotte-julien@efficios.com>
@ 2017-12-14  1:57   ` Jérémie Galarneau
  0 siblings, 0 replies; 27+ messages in thread
From: Jérémie Galarneau @ 2017-12-14  1:57 UTC (permalink / raw)
  To: Jonathan Rajotte; +Cc: lttng-dev, Jeremie Galarneau

On 18 September 2017 at 18:52, Jonathan Rajotte
<jonathan.rajotte-julien@efficios.com> wrote:
> Move consumerd ownership to thread_manage_consumer to scope the lifetime
> on the consumerd to its manager thread.
>
> "thread_manage_consumer" is responsible for signaling and waiting the
> termination of its consumerd.
>
> All thread_manage_consumer threads now wait on a unique quit pipe
> different from the global thread quit pipe. This allow control over its
> lifetime.
>
> The termination notification is sent during sessiond_cleanup after the
> destroy session command to ensure that no session are still active at
> the moment the consumerds are terminated.
>
> Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
> ---
>  src/bin/lttng-sessiond/main.c | 174 +++++++++++++++++++++++++++---------------
>  1 file changed, 112 insertions(+), 62 deletions(-)
>
> diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c
> index a840e8de..fb58ab4b 100644
> --- a/src/bin/lttng-sessiond/main.c
> +++ b/src/bin/lttng-sessiond/main.c
> @@ -206,6 +206,7 @@ static int kernel_poll_pipe[2] = { -1, -1 };
>  static int thread_quit_pipe[2] = { -1, -1 };
>  static int thread_health_teardown_trigger_pipe[2] = { -1, -1 };
>  static int thread_apps_teardown_trigger_pipe[2] = { -1, -1 };
> +static int thread_consumers_teardown_trigger_pipe[2] = { -1, -1 };
>  int thread_apps_notify_teardown_trigger_pipe[2] = { -1, -1 };
>
>  /*
> @@ -495,6 +496,11 @@ static int init_thread_apps_notify_teardown_trigger_pipe(void)
>         return __init_thread_quit_pipe(thread_apps_notify_teardown_trigger_pipe);
>  }
>
> +static int init_thread_consumers_teardown_trigger_pipe(void)
> +{
> +       return __init_thread_quit_pipe(thread_consumers_teardown_trigger_pipe);
> +}
> +
>  /*
>   * Stop first wave threads by closing the thread quit pipe.
>   *  - kernel thread
> @@ -601,14 +607,15 @@ static int generate_lock_file_path(char *path, size_t len)
>  /*
>   * Wait on consumer process termination.
>   *
> - * Need to be called with the consumer data lock held or from a context
> - * ensuring no concurrent access to data (e.g: cleanup).
> + * Need to be called with the consumer data lock held.
>   */
>  static void wait_consumer(struct consumer_data *consumer_data)
>  {
>         pid_t ret;
>         int status;
>
> +       assert(consumer_data);
> +
>         if (consumer_data->pid <= 0) {
>                 return;
>         }
> @@ -626,6 +633,52 @@ static void wait_consumer(struct consumer_data *consumer_data)
>  }
>
>  /*
> + * Signal to the consumer process to terminate.
> + *
> + * Need to be called with the consumer data lock held.
> + */
> +static void kill_consumer(struct consumer_data *consumer_data)
> +{
> +       int ret;
> +
> +       assert(consumer_data);
> +
> +       /* Consumer pid must be a real one. */
> +       if (consumer_data->pid <= 0) {
> +               goto end;
> +       }
> +
> +       ret = kill(consumer_data->pid, SIGTERM);
> +       if (ret) {
> +               PERROR("Error killing consumer daemon");
> +               goto end;
> +       }
> +end:
> +       return;
> +}
> +
> +static int join_thread_consumer(struct consumer_data *consumer_data)
> +{
> +       int ret;
> +       void *status;
> +
> +       assert(consumer_data);
> +
> +       /* Consumer pid must be a real one. */
> +       if (consumer_data->pid <= 0) {
> +               ret = 0;
> +               goto end;
> +       }
> +
> +       ret = pthread_join(consumer_data->thread, &status);
> +       if (ret) {
> +               ERR("Joining consumer thread pid %d", consumer_data->pid);
> +       }
> +end:
> +       return ret;
> +}
> +
> +/*
>   * Cleanup the session daemon's data structures.
>   */
>  static void sessiond_cleanup(void)
> @@ -707,7 +760,6 @@ static void sessiond_cleanup(void)
>         (void) rmdir(path);
>
>         DBG("Cleaning up all sessions");
> -
>         /* Destroy session list mutex */
>         if (session_list_ptr != NULL) {
>                 pthread_mutex_destroy(&session_list_ptr->lock);
> @@ -719,9 +771,35 @@ static void sessiond_cleanup(void)
>                 }
>         }
>
> -       wait_consumer(&kconsumer_data);
> -       wait_consumer(&ustconsumer64_data);
> -       wait_consumer(&ustconsumer32_data);
> +       /*
> +        * Delay the termination of manage_consumer_thread threads to allow
> +        * proper metadata flushing, following the session destroy. Use a
> +        * barrier to ensure that all call_rcu are executed at this point.
> +        */
> +       DBG("Teardown consurmer thread");
> +       rcu_barrier();
> +       ret = notify_thread_pipe(thread_consumers_teardown_trigger_pipe[1]);
> +       if (ret < 0) {
> +               ERR("write error on thread consumer quit pipe");
> +       }
> +
> +       ret = join_thread_consumer(&kconsumer_data);
> +       if (ret) {
> +               errno = ret;
> +               PERROR("join_consumer kernel");
> +       }
> +
> +       ret = join_thread_consumer(&ustconsumer32_data);
> +       if (ret) {
> +               errno = ret;
> +               PERROR("join_consumer ust32");
> +       }
> +
> +       ret = join_thread_consumer(&ustconsumer64_data);
> +       if (ret) {
> +               errno = ret;
> +               PERROR("join_consumer ust64");
> +       }
>
>         DBG("Cleaning up all agent apps");
>         agent_app_ht_clean();
> @@ -1289,14 +1367,20 @@ static void *thread_manage_consumer(void *data)
>         health_code_update();
>
>         /*
> -        * Pass 3 as size here for the thread quit pipe, consumerd_err_sock and the
> +        * Pass 3 as size here for the thread consumer quit pipe, consumerd_err_sock and the
>          * metadata_sock. Nothing more will be added to this poll set.
>          */
> -       ret = sessiond_set_thread_pollset(&events, 3);
> +       ret = lttng_poll_create(&events, 3, LTTNG_CLOEXEC);
>         if (ret < 0) {
>                 goto error_poll;
>         }
>
> +       /* Add quit pipe */
> +       ret = lttng_poll_add(&events, thread_consumers_teardown_trigger_pipe[0], LPOLLIN | LPOLLERR);
> +       if (ret < 0) {
> +               goto error;
> +       }
> +
>         /*
>          * The error socket here is already in a listening state which was done
>          * just before spawning this thread to avoid a race between the consumer
> @@ -1344,7 +1428,7 @@ restart:
>                 }
>
>                 /* Thread quit pipe has been closed. Killing thread. */
> -               ret = sessiond_check_thread_quit_pipe(pollfd, revents);
> +               ret = (pollfd == thread_consumers_teardown_trigger_pipe[0] && (revents & LPOLLIN)) ? 1 : 0;

I don't mind ternaries, but that's a really long expression.
I think this would be more readable using a good-old 'if'.

Also, is LPOLLERR checked somewhere?

>                 if (ret) {
>                         err = 0;
>                         goto exit;
> @@ -1509,7 +1593,7 @@ restart_poll:
>                          * but continue the current loop to handle potential data from
>                          * consumer.
>                          */
> -                       should_quit = sessiond_check_thread_quit_pipe(pollfd, revents);
> +                       should_quit = (pollfd == thread_consumers_teardown_trigger_pipe[0] && (revents & LPOLLIN)) ? 1 : 0;

Same comment applies here.

>
>                         if (pollfd == sock) {
>                                 /* Event on the consumerd socket */
> @@ -1552,11 +1636,6 @@ restart_poll:
>
>  exit:
>  error:
> -       /*
> -        * We lock here because we are about to close the sockets and some other
> -        * thread might be using them so get exclusive access which will abort all
> -        * other consumer command by other threads.
> -        */
>         pthread_mutex_lock(&consumer_data->lock);
>
>         /* Immediately set the consumerd state to stopped */
> @@ -1570,6 +1649,13 @@ error:
>                 assert(0);
>         }
>
> +       /*
> +        * This thread is responsible for its consumerd. Make sure the
> +        * consumerd teardown is complete before proceding.
> +        */
> +       kill_consumer(consumer_data);
> +       wait_consumer(consumer_data);
> +
>         if (consumer_data->err_sock >= 0) {
>                 ret = close(consumer_data->err_sock);
>                 if (ret) {
> @@ -1600,13 +1686,15 @@ error:
>
>         unlink(consumer_data->err_unix_sock_path);
>         unlink(consumer_data->cmd_unix_sock_path);
> -       pthread_mutex_unlock(&consumer_data->lock);
>
>         /* Cleanup metadata socket mutex. */
>         if (consumer_data->metadata_sock.lock) {
>                 pthread_mutex_destroy(consumer_data->metadata_sock.lock);
>                 free(consumer_data->metadata_sock.lock);
>         }
> +
> +       pthread_mutex_unlock(&consumer_data->lock);
> +
>         lttng_poll_clean(&events);
>
>         if (cmd_socket_wrapper) {
> @@ -2560,27 +2648,6 @@ error:
>  }
>
>  /*
> - * Join consumer thread
> - */
> -static int join_consumer_thread(struct consumer_data *consumer_data)
> -{
> -       void *status;
> -
> -       /* Consumer pid must be a real one. */
> -       if (consumer_data->pid > 0) {
> -               int ret;
> -               ret = kill(consumer_data->pid, SIGTERM);
> -               if (ret) {
> -                       PERROR("Error killing consumer daemon");
> -                       return ret;
> -               }
> -               return pthread_join(consumer_data->thread, &status);
> -       } else {
> -               return 0;
> -       }
> -}
> -
> -/*
>   * Fork and exec a consumer daemon (consumerd).
>   *
>   * Return pid if successful else -1.
> @@ -4741,27 +4808,6 @@ error_create_poll:
>
>         rcu_unregister_thread();
>
> -       /*
> -        * Since we are creating the consumer threads, we own them, so we need
> -        * to join them before our thread exits.
> -        */
> -       ret = join_consumer_thread(&kconsumer_data);
> -       if (ret) {
> -               errno = ret;
> -               PERROR("join_consumer");
> -       }
> -
> -       ret = join_consumer_thread(&ustconsumer32_data);
> -       if (ret) {
> -               errno = ret;
> -               PERROR("join_consumer ust32");
> -       }
> -
> -       ret = join_consumer_thread(&ustconsumer64_data);
> -       if (ret) {
> -               errno = ret;
> -               PERROR("join_consumer ust64");
> -       }
>         return NULL;
>  }
>
> @@ -5785,6 +5831,11 @@ int main(int argc, char **argv)
>                 goto exit_init_data;
>         }
>
> +       if (init_thread_consumers_teardown_trigger_pipe()) {
> +               retval = -1;
> +               goto exit_init_data;
> +       }
> +
>         /* Check if daemon is UID = 0 */
>         is_root = !getuid();
>
> @@ -6406,11 +6457,8 @@ exit_init_data:
>          * perform lookups in those structures.
>          */
>         rcu_barrier();
> -       /*
> -        * sessiond_cleanup() is called when no other thread is running, except
> -        * the ht_cleanup thread, which is needed to destroy the hash tables.
> -        */
>         rcu_thread_online();
> +
>         sessiond_cleanup();
>
>         /*
> @@ -6461,6 +6509,8 @@ exit_init_data:
>                 retval = -1;
>         }
>
> +       /* Consumers thread teardown pipe cleanup */
> +       utils_close_pipe(thread_consumers_teardown_trigger_pipe);
>         /* Health thread teardown pipe cleanup */
>         utils_close_pipe(thread_health_teardown_trigger_pipe);
>         /* Apps thread teardown pipe cleanup */
> --
> 2.11.0
>



-- 
Jérémie Galarneau
EfficiOS Inc.
http://www.efficios.com
_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH v2 13/13] Fix: quit early if instructed to
       [not found] ` <20170918225206.17725-14-jonathan.rajotte-julien@efficios.com>
@ 2017-12-14  1:58   ` Jérémie Galarneau
  0 siblings, 0 replies; 27+ messages in thread
From: Jérémie Galarneau @ 2017-12-14  1:58 UTC (permalink / raw)
  To: Jonathan Rajotte; +Cc: lttng-dev, Jeremie Galarneau

LGTM.

Jérémie

On 18 September 2017 at 18:52, Jonathan Rajotte
<jonathan.rajotte-julien@efficios.com> wrote:
> If the wait_queue size is considerable, not checking if the thread should
> quit delays the termination (3+ seconds during stress testing).
>
> Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
> ---
>  src/bin/lttng-sessiond/main.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c
> index fb58ab4b..0475c5a3 100644
> --- a/src/bin/lttng-sessiond/main.c
> +++ b/src/bin/lttng-sessiond/main.c
> @@ -2244,7 +2244,7 @@ static void *thread_dispatch_ust_registration(void *data)
>                                 rcu_read_unlock();
>                                 session_unlock_list();
>                         }
> -               } while (node != NULL);
> +               } while (node != NULL && !CMM_LOAD_SHARED(dispatch_thread_exit));
>
>                 health_poll_entry();
>                 /* Futex wait on queue. Blocking call on futex() */
> --
> 2.11.0
>



-- 
Jérémie Galarneau
EfficiOS Inc.
http://www.efficios.com
_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2017-12-14  1:58 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20170918225206.17725-1-jonathan.rajotte-julien@efficios.com>
2017-09-18 22:51 ` [RFC PATCH v2 01/13] Extend health thread lifetime Jonathan Rajotte
2017-09-18 22:51 ` [RFC PATCH v2 02/13] Reorder pthread_join for easier ordering later on Jonathan Rajotte
2017-09-18 22:51 ` [RFC PATCH v2 03/13] Terminate dispatch thread after reg_apps_thread is terminated Jonathan Rajotte
2017-09-18 22:51 ` [RFC PATCH v2 04/13] Order termination of thread_manage_apps after dispatch_thread Jonathan Rajotte
2017-09-18 22:51 ` [RFC PATCH v2 05/13] Control thread_apps_notify lifetime with specialized quit pipe Jonathan Rajotte
2017-09-18 22:51 ` [RFC PATCH v2 06/13] Fix: unregister app notify socket on sessiond tear down Jonathan Rajotte
2017-09-18 22:52 ` [RFC PATCH v2 07/13] Always reply to an inquiring app Jonathan Rajotte
2017-09-18 22:52 ` [RFC PATCH v2 08/13] Fix: perform lookup then test for condition Jonathan Rajotte
2017-09-18 22:52 ` [RFC PATCH v2 09/13] Perform ust_app_unregister on thread_manage_apps teardown Jonathan Rajotte
2017-09-18 22:52 ` [RFC PATCH v2 10/13] Teardown apps_notify_thread before thread_manage_apps Jonathan Rajotte
2017-09-18 22:52 ` [RFC PATCH v2 11/13] Comments update Jonathan Rajotte
2017-09-18 22:52 ` [RFC PATCH v2 12/13] Fix: delay termination on consumerd to allow metadata flushing Jonathan Rajotte
2017-09-18 22:52 ` [RFC PATCH v2 13/13] Fix: quit early if instructed to Jonathan Rajotte
     [not found] ` <20170918225206.17725-2-jonathan.rajotte-julien@efficios.com>
2017-12-14  1:54   ` [RFC PATCH v2 01/13] Extend health thread lifetime Jérémie Galarneau
     [not found] ` <20170918225206.17725-3-jonathan.rajotte-julien@efficios.com>
2017-12-14  1:54   ` [RFC PATCH v2 02/13] Reorder pthread_join for easier ordering later on Jérémie Galarneau
     [not found] ` <20170918225206.17725-4-jonathan.rajotte-julien@efficios.com>
2017-12-14  1:55   ` [RFC PATCH v2 03/13] Terminate dispatch thread after reg_apps_thread is terminated Jérémie Galarneau
     [not found] ` <20170918225206.17725-5-jonathan.rajotte-julien@efficios.com>
2017-12-14  1:55   ` [RFC PATCH v2 04/13] Order termination of thread_manage_apps after dispatch_thread Jérémie Galarneau
     [not found] ` <20170918225206.17725-6-jonathan.rajotte-julien@efficios.com>
2017-12-14  1:55   ` [RFC PATCH v2 05/13] Control thread_apps_notify lifetime with specialized quit pipe Jérémie Galarneau
     [not found] ` <20170918225206.17725-7-jonathan.rajotte-julien@efficios.com>
2017-12-14  1:56   ` [RFC PATCH v2 06/13] Fix: unregister app notify socket on sessiond tear down Jérémie Galarneau
     [not found] ` <20170918225206.17725-8-jonathan.rajotte-julien@efficios.com>
2017-12-14  1:56   ` [RFC PATCH v2 07/13] Always reply to an inquiring app Jérémie Galarneau
     [not found] ` <20170918225206.17725-9-jonathan.rajotte-julien@efficios.com>
2017-12-14  1:56   ` [RFC PATCH v2 08/13] Fix: perform lookup then test for condition Jérémie Galarneau
     [not found] ` <20170918225206.17725-10-jonathan.rajotte-julien@efficios.com>
2017-12-14  1:57   ` [RFC PATCH v2 09/13] Perform ust_app_unregister on thread_manage_apps teardown Jérémie Galarneau
     [not found] ` <20170918225206.17725-11-jonathan.rajotte-julien@efficios.com>
2017-12-13 17:58   ` [RFC PATCH v2 10/13] Teardown apps_notify_thread before thread_manage_apps Jérémie Galarneau
2017-12-14  1:57   ` Jérémie Galarneau
     [not found] ` <20170918225206.17725-12-jonathan.rajotte-julien@efficios.com>
2017-12-14  1:57   ` [RFC PATCH v2 11/13] Comments update Jérémie Galarneau
     [not found] ` <20170918225206.17725-13-jonathan.rajotte-julien@efficios.com>
2017-12-14  1:57   ` [RFC PATCH v2 12/13] Fix: delay termination on consumerd to allow metadata flushing Jérémie Galarneau
     [not found] ` <20170918225206.17725-14-jonathan.rajotte-julien@efficios.com>
2017-12-14  1:58   ` [RFC PATCH v2 13/13] Fix: quit early if instructed to Jérémie Galarneau

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.