All of lore.kernel.org
 help / color / mirror / Atom feed
* [lttng-tools RFC] Fix: registry can be null on lookup
@ 2017-02-06 20:28 Jonathan Rajotte
  0 siblings, 0 replies; 2+ messages in thread
From: Jonathan Rajotte @ 2017-02-06 20:28 UTC (permalink / raw)
  To: lttng-dev; +Cc: jgalar

A session teardown can be initiated by a dying application. Hence, a
session object can exist without a valid registry. As a result,
get_session_registry can return null. To prevent this, the UST
application session lock should be held, when possible, when looking up
the registry to ensure synchronization. Otherwise the presence of a
registry is not guaranteed. In such case, handling a null return value
from look-up registry function is necessary.

Core dumps, triggered by the "assert(registry)" statement found in
reply_ust_register_channel, were observed when killing instrumented
applications. In this occurrence, obtaining the UST application lock
result in a deadlock since the lock is already held during
ust_app_global_create. Handling the null value is simpler and
corresponds with the handling of previous look-up done during the
function.

Handling of null value is also applied to:
	add_event_ust_registry
	add_enum_ust_registry
	ust_app_snapshot_record

Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
---
 src/bin/lttng-sessiond/ust-app.c | 53 ++++++++++++++++++++++++++++++++++------
 1 file changed, 45 insertions(+), 8 deletions(-)

diff --git a/src/bin/lttng-sessiond/ust-app.c b/src/bin/lttng-sessiond/ust-app.c
index 5a41c38..4395a59 100644
--- a/src/bin/lttng-sessiond/ust-app.c
+++ b/src/bin/lttng-sessiond/ust-app.c
@@ -810,6 +810,7 @@ void delete_ust_app_session(int sock, struct ust_app_session *ua_sess,
 	ua_sess->deleted = true;
 
 	registry = get_session_registry(ua_sess);
+	/* Registry can be null on error path during initialization */
 	if (registry) {
 		/* Push metadata for application before freeing the application. */
 		(void) push_metadata(registry, ua_sess->consumer);
@@ -837,6 +838,10 @@ void delete_ust_app_session(int sock, struct ust_app_session *ua_sess,
 	if (ua_sess->buffer_type == LTTNG_BUFFER_PER_PID) {
 		struct buffer_reg_pid *reg_pid = buffer_reg_pid_find(ua_sess->id);
 		if (reg_pid) {
+			/*
+			 * Registry can be null on error path during
+			 * initialization
+			 */
 			buffer_reg_pid_remove(reg_pid);
 			buffer_reg_pid_destroy(reg_pid);
 		}
@@ -2464,6 +2469,8 @@ error:
 /*
  * Ask the consumer to create a channel and get it if successful.
  *
+ * Called with UST app session lock held.
+ *
  * Return 0 on success or else a negative value.
  */
 static int do_consumer_create_channel(struct ltt_ust_session *usess,
@@ -2917,6 +2924,8 @@ error:
 /*
  * Create and send to the application the created buffers with per PID buffers.
  *
+ * Called with UST app session lock held.
+ *
  * Return 0 on success else a negative value.
  */
 static int create_channel_per_pid(struct ust_app *app,
@@ -2936,6 +2945,7 @@ static int create_channel_per_pid(struct ust_app *app,
 	rcu_read_lock();
 
 	registry = get_session_registry(ua_sess);
+	/* The UST app session lock is held, registry shall not be null */
 	assert(registry);
 
 	/* Create and add a new channel registry to session. */
@@ -2973,6 +2983,8 @@ error:
  * need and send it to the application. This MUST be called with a RCU read
  * side lock acquired.
  *
+ * Called with UST app session lock held.
+ *
  * Return 0 on success or else a negative value. Returns -ENOTCONN if
  * the application exited concurrently.
  */
@@ -3160,6 +3172,7 @@ static int create_ust_app_metadata(struct ust_app_session *ua_sess,
 	assert(consumer);
 
 	registry = get_session_registry(ua_sess);
+	/* The UST app session is held registry shall not be null */
 	assert(registry);
 
 	pthread_mutex_lock(&registry->lock);
@@ -4296,6 +4309,9 @@ int ust_app_create_event_glb(struct ltt_ust_session *usess,
 
 /*
  * Start tracing for a specific UST session and app.
+ *
+ * Called with UST app session lock held.
+ *
  */
 static
 int ust_app_start_trace(struct ltt_ust_session *usess, struct ust_app *app)
@@ -4479,6 +4495,8 @@ int ust_app_stop_trace(struct ltt_ust_session *usess, struct ust_app *app)
 	health_code_update();
 
 	registry = get_session_registry(ua_sess);
+
+	/* The UST app session is held registry shall not be null */
 	assert(registry);
 
 	/* Push metadata for application before freeing the application. */
@@ -5320,7 +5338,7 @@ static int reply_ust_register_channel(int sock, int sobjd, int cobjd,
 	/* Lookup application. If not found, there is a code flow error. */
 	app = find_app_by_notify_sock(sock);
 	if (!app) {
-		DBG("Application socket %d is being teardown. Abort event notify",
+		DBG("Application socket %d is being torn down. Abort event notify",
 				sock);
 		ret = 0;
 		free(fields);
@@ -5330,7 +5348,7 @@ static int reply_ust_register_channel(int sock, int sobjd, int cobjd,
 	/* Lookup channel by UST object descriptor. */
 	ua_chan = find_channel_by_objd(app, cobjd);
 	if (!ua_chan) {
-		DBG("Application channel is being teardown. Abort event notify");
+		DBG("Application channel is being torn down. Abort event notify");
 		ret = 0;
 		free(fields);
 		goto error_rcu_unlock;
@@ -5341,7 +5359,12 @@ static int reply_ust_register_channel(int sock, int sobjd, int cobjd,
 
 	/* Get right session registry depending on the session buffer type. */
 	registry = get_session_registry(ua_sess);
-	assert(registry);
+	if (!registry) {
+		DBG("Application session is being torn down. Abort event notify");
+		ret = 0;
+		free(fields);
+		goto error_rcu_unlock;
+	};
 
 	/* Depending on the buffer type, a different channel key is used. */
 	if (ua_sess->buffer_type == LTTNG_BUFFER_PER_UID) {
@@ -5439,7 +5462,7 @@ static int add_event_ust_registry(int sock, int sobjd, int cobjd, char *name,
 	/* Lookup application. If not found, there is a code flow error. */
 	app = find_app_by_notify_sock(sock);
 	if (!app) {
-		DBG("Application socket %d is being teardown. Abort event notify",
+		DBG("Application socket %d is being torn down. Abort event notify",
 				sock);
 		ret = 0;
 		free(sig);
@@ -5451,7 +5474,7 @@ static int add_event_ust_registry(int sock, int sobjd, int cobjd, char *name,
 	/* Lookup channel by UST object descriptor. */
 	ua_chan = find_channel_by_objd(app, cobjd);
 	if (!ua_chan) {
-		DBG("Application channel is being teardown. Abort event notify");
+		DBG("Application channel is being torn down. Abort event notify");
 		ret = 0;
 		free(sig);
 		free(fields);
@@ -5463,7 +5486,14 @@ static int add_event_ust_registry(int sock, int sobjd, int cobjd, char *name,
 	ua_sess = ua_chan->session;
 
 	registry = get_session_registry(ua_sess);
-	assert(registry);
+	if (!registry) {
+		DBG("Application session is being torn down. Abort event notify");
+		ret = 0;
+		free(sig);
+		free(fields);
+		free(model_emf_uri);
+		goto error_rcu_unlock;
+	}
 
 	if (ua_sess->buffer_type == LTTNG_BUFFER_PER_UID) {
 		chan_reg_key = ua_chan->tracing_channel_id;
@@ -5551,7 +5581,11 @@ static int add_enum_ust_registry(int sock, int sobjd, char *name,
 	}
 
 	registry = get_session_registry(ua_sess);
-	assert(registry);
+	if (!registry) {
+		DBG("Application session is being torn down. Aborting enum registration.");
+		free(entries);
+		goto error_rcu_unlock;
+	}
 
 	pthread_mutex_lock(&registry->lock);
 
@@ -5917,7 +5951,10 @@ int ust_app_snapshot_record(struct ltt_ust_session *usess,
 			}
 
 			registry = get_session_registry(ua_sess);
-			assert(registry);
+			if (!registry) {
+				DBG("Application session is being torn down. Abort snapshot record.");
+				goto error;
+			}
 			ret = consumer_snapshot_channel(socket, registry->metadata_key, output,
 					1, ua_sess->euid, ua_sess->egid, pathname, wait, 0);
 			if (ret < 0) {
-- 
2.7.4

_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [lttng-tools RFC] Fix: registry can be null on lookup
       [not found] <1486412932-12806-1-git-send-email-jonathan.rajotte-julien@efficios.com>
@ 2017-06-01 19:42 ` Jérémie Galarneau
  0 siblings, 0 replies; 2+ messages in thread
From: Jérémie Galarneau @ 2017-06-01 19:42 UTC (permalink / raw)
  To: Jonathan Rajotte; +Cc: lttng-dev, Jeremie Galarneau

Merged with some modifications in master, stable-2.10, stable-2.9, and
stable-2.8. Read on for changes.

On 6 February 2017 at 15:28, Jonathan Rajotte
<jonathan.rajotte-julien@efficios.com> wrote:
> A session teardown can be initiated by a dying application. Hence, a
> session object can exist without a valid registry. As a result,
> get_session_registry can return null. To prevent this, the UST
> application session lock should be held, when possible, when looking up
> the registry to ensure synchronization. Otherwise the presence of a
> registry is not guaranteed. In such case, handling a null return value
> from look-up registry function is necessary.
>
> Core dumps, triggered by the "assert(registry)" statement found in
> reply_ust_register_channel, were observed when killing instrumented
> applications. In this occurrence, obtaining the UST application lock
> result in a deadlock since the lock is already held during
> ust_app_global_create. Handling the null value is simpler and
> corresponds with the handling of previous look-up done during the
> function.
>
> Handling of null value is also applied to:
>         add_event_ust_registry
>         add_enum_ust_registry
>         ust_app_snapshot_record
>
> Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
> ---
>  src/bin/lttng-sessiond/ust-app.c | 53 ++++++++++++++++++++++++++++++++++------
>  1 file changed, 45 insertions(+), 8 deletions(-)
>
> diff --git a/src/bin/lttng-sessiond/ust-app.c b/src/bin/lttng-sessiond/ust-app.c
> index 5a41c38..4395a59 100644
> --- a/src/bin/lttng-sessiond/ust-app.c
> +++ b/src/bin/lttng-sessiond/ust-app.c
> @@ -810,6 +810,7 @@ void delete_ust_app_session(int sock, struct ust_app_session *ua_sess,
>         ua_sess->deleted = true;
>
>         registry = get_session_registry(ua_sess);
> +       /* Registry can be null on error path during initialization */
>         if (registry) {
>                 /* Push metadata for application before freeing the application. */
>                 (void) push_metadata(registry, ua_sess->consumer);
> @@ -837,6 +838,10 @@ void delete_ust_app_session(int sock, struct ust_app_session *ua_sess,
>         if (ua_sess->buffer_type == LTTNG_BUFFER_PER_PID) {
>                 struct buffer_reg_pid *reg_pid = buffer_reg_pid_find(ua_sess->id);
>                 if (reg_pid) {
> +                       /*
> +                        * Registry can be null on error path during
> +                        * initialization

Added dots at the end of comments.

> +                        */
>                         buffer_reg_pid_remove(reg_pid);
>                         buffer_reg_pid_destroy(reg_pid);
>                 }
> @@ -2464,6 +2469,8 @@ error:
>  /*
>   * Ask the consumer to create a channel and get it if successful.
>   *
> + * Called with UST app session lock held.
> + *
>   * Return 0 on success or else a negative value.
>   */
>  static int do_consumer_create_channel(struct ltt_ust_session *usess,
> @@ -2917,6 +2924,8 @@ error:
>  /*
>   * Create and send to the application the created buffers with per PID buffers.
>   *
> + * Called with UST app session lock held.
> + *
>   * Return 0 on success else a negative value.
>   */
>  static int create_channel_per_pid(struct ust_app *app,
> @@ -2936,6 +2945,7 @@ static int create_channel_per_pid(struct ust_app *app,
>         rcu_read_lock();
>
>         registry = get_session_registry(ua_sess);
> +       /* The UST app session lock is held, registry shall not be null */
>         assert(registry);
>
>         /* Create and add a new channel registry to session. */
> @@ -2973,6 +2983,8 @@ error:
>   * need and send it to the application. This MUST be called with a RCU read
>   * side lock acquired.
>   *
> + * Called with UST app session lock held.
> + *
>   * Return 0 on success or else a negative value. Returns -ENOTCONN if
>   * the application exited concurrently.
>   */
> @@ -3160,6 +3172,7 @@ static int create_ust_app_metadata(struct ust_app_session *ua_sess,
>         assert(consumer);
>
>         registry = get_session_registry(ua_sess);
> +       /* The UST app session is held registry shall not be null */
>         assert(registry);
>
>         pthread_mutex_lock(&registry->lock);
> @@ -4296,6 +4309,9 @@ int ust_app_create_event_glb(struct ltt_ust_session *usess,
>
>  /*
>   * Start tracing for a specific UST session and app.
> + *
> + * Called with UST app session lock held.
> + *
>   */
>  static
>  int ust_app_start_trace(struct ltt_ust_session *usess, struct ust_app *app)
> @@ -4479,6 +4495,8 @@ int ust_app_stop_trace(struct ltt_ust_session *usess, struct ust_app *app)
>         health_code_update();
>
>         registry = get_session_registry(ua_sess);
> +
> +       /* The UST app session is held registry shall not be null */
>         assert(registry);
>
>         /* Push metadata for application before freeing the application. */
> @@ -5320,7 +5338,7 @@ static int reply_ust_register_channel(int sock, int sobjd, int cobjd,
>         /* Lookup application. If not found, there is a code flow error. */
>         app = find_app_by_notify_sock(sock);
>         if (!app) {
> -               DBG("Application socket %d is being teardown. Abort event notify",
> +               DBG("Application socket %d is being torn down. Abort event notify",
>                                 sock);
>                 ret = 0;
>                 free(fields);
> @@ -5330,7 +5348,7 @@ static int reply_ust_register_channel(int sock, int sobjd, int cobjd,
>         /* Lookup channel by UST object descriptor. */
>         ua_chan = find_channel_by_objd(app, cobjd);
>         if (!ua_chan) {
> -               DBG("Application channel is being teardown. Abort event notify");
> +               DBG("Application channel is being torn down. Abort event notify");
>                 ret = 0;
>                 free(fields);
>                 goto error_rcu_unlock;
> @@ -5341,7 +5359,12 @@ static int reply_ust_register_channel(int sock, int sobjd, int cobjd,
>
>         /* Get right session registry depending on the session buffer type. */
>         registry = get_session_registry(ua_sess);
> -       assert(registry);
> +       if (!registry) {
> +               DBG("Application session is being torn down. Abort event notify");
> +               ret = 0;
> +               free(fields);

I reworked the various free(fields) to perform the free at the end of
the function, and set "fields" to NULL only once the ownership has
been transfered.

> +               goto error_rcu_unlock;
> +       };
>
>         /* Depending on the buffer type, a different channel key is used. */
>         if (ua_sess->buffer_type == LTTNG_BUFFER_PER_UID) {
> @@ -5439,7 +5462,7 @@ static int add_event_ust_registry(int sock, int sobjd, int cobjd, char *name,
>         /* Lookup application. If not found, there is a code flow error. */
>         app = find_app_by_notify_sock(sock);
>         if (!app) {
> -               DBG("Application socket %d is being teardown. Abort event notify",
> +               DBG("Application socket %d is being torn down. Abort event notify",
>                                 sock);
>                 ret = 0;
>                 free(sig);
> @@ -5451,7 +5474,7 @@ static int add_event_ust_registry(int sock, int sobjd, int cobjd, char *name,
>         /* Lookup channel by UST object descriptor. */
>         ua_chan = find_channel_by_objd(app, cobjd);
>         if (!ua_chan) {
> -               DBG("Application channel is being teardown. Abort event notify");
> +               DBG("Application channel is being torn down. Abort event notify");
>                 ret = 0;
>                 free(sig);
>                 free(fields);
> @@ -5463,7 +5486,14 @@ static int add_event_ust_registry(int sock, int sobjd, int cobjd, char *name,
>         ua_sess = ua_chan->session;
>
>         registry = get_session_registry(ua_sess);
> -       assert(registry);
> +       if (!registry) {
> +               DBG("Application session is being torn down. Abort event notify");
> +               ret = 0;
> +               free(sig);
> +               free(fields);
> +               free(model_emf_uri);

Same idea than the modifications done for the "fields" memory
ownership transfer.

> +               goto error_rcu_unlock;
> +       }
>
>         if (ua_sess->buffer_type == LTTNG_BUFFER_PER_UID) {
>                 chan_reg_key = ua_chan->tracing_channel_id;
> @@ -5551,7 +5581,11 @@ static int add_enum_ust_registry(int sock, int sobjd, char *name,
>         }
>
>         registry = get_session_registry(ua_sess);
> -       assert(registry);
> +       if (!registry) {
> +               DBG("Application session is being torn down. Aborting enum registration.");

Modified this logging message to indicate the cause (null registry vs
null session).

> +               free(entries);
> +               goto error_rcu_unlock;
> +       }
>
>         pthread_mutex_lock(&registry->lock);
>
> @@ -5917,7 +5951,10 @@ int ust_app_snapshot_record(struct ltt_ust_session *usess,
>                         }
>
>                         registry = get_session_registry(ua_sess);
> -                       assert(registry);
> +                       if (!registry) {
> +                               DBG("Application session is being torn down. Abort snapshot record.");
> +                               goto error;

Added ret = -1 here to indicate the error. Also, ret could be left to
an unexpected value if the ua_sess->channels->ht is empty (it would
take the return value of the snprintf() call before).


> +                       }
>                         ret = consumer_snapshot_channel(socket, registry->metadata_key, output,
>                                         1, ua_sess->euid, ua_sess->egid, pathname, wait, 0);
>                         if (ret < 0) {
> --
> 2.7.4
>



-- 
Jérémie Galarneau
EfficiOS Inc.
http://www.efficios.com
_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2017-06-01 19:43 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-06 20:28 [lttng-tools RFC] Fix: registry can be null on lookup Jonathan Rajotte
     [not found] <1486412932-12806-1-git-send-email-jonathan.rajotte-julien@efficios.com>
2017-06-01 19:42 ` Jérémie Galarneau

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.