All of lore.kernel.org
 help / color / mirror / Atom feed
From: Benjamin Marzinski <bmarzins@redhat.com>
To: mwilck@suse.com
Cc: dm-devel@redhat.com
Subject: Re: [dm-devel] [RFC PATCH] libmultipath: prevent DSO unloading with astray checker threads
Date: Thu, 5 Nov 2020 18:41:15 -0600	[thread overview]
Message-ID: <20201106004115.GH3384@octiron.msp.redhat.com> (raw)
In-Reply-To: <20201105114952.1059-1-mwilck@suse.com>

On Thu, Nov 05, 2020 at 12:49:52PM +0100, mwilck@suse.com wrote:
> From: Martin Wilck <mwilck@suse.com>
> 
> The multipathd tur checker thread is designed to be able to finish at
> any time, even after the tur checker itself has been freed. The
> multipathd shutdown code makes sure all the checkers have been freed
> before freeing the checker_class and calling dlclose() to unload the
> DSO, but this doesn't guarantee that the checker threads have finished.
> If one hasn't, the DSO will get unloaded while the thread still running
> code from it, causing a segfault.
> 
> This patch fixes the issue by further incrementing the DSO's refcount
> for every running thread. To avoid race conditions leading to segfaults,
> the thread's entrypoint must be in libmultipath, not in the DSO itself.
> Therefore we add a new optional checker method, libcheck_thread().
> Checkers defining this method may create a detached thread with
> entrypoint checker_thread_entry(), which will call the DSO's
> libcheck_thread and take care of the refcount handling.

I can't make this segfault. So that looks good, but it does need
libmultipath.version updated to include checker_thread_entry()

-Ben

> 
> Reported-by: Benjamin Marzinski <bmarzins@redhat.com>
> Signed-off-by: Martin Wilck <mwilck@suse.com>
> ---
>  libmultipath/checkers.c     | 55 ++++++++++++++++++++++++++++++++-----
>  libmultipath/checkers.h     | 17 ++++++++++++
>  libmultipath/checkers/tur.c |  7 ++---
>  3 files changed, 67 insertions(+), 12 deletions(-)
> 
> diff --git a/libmultipath/checkers.c b/libmultipath/checkers.c
> index f7ddd53..7d10f2a 100644
> --- a/libmultipath/checkers.c
> +++ b/libmultipath/checkers.c
> @@ -3,6 +3,9 @@
>  #include <stddef.h>
>  #include <dlfcn.h>
>  #include <sys/stat.h>
> +#include <pthread.h>
> +#include <urcu.h>
> +#include <urcu/uatomic.h>
>  
>  #include "debug.h"
>  #include "checkers.h"
> @@ -19,6 +22,7 @@ struct checker_class {
>  	int (*mp_init)(struct checker *);    /* to allocate the mpcontext */
>  	void (*free)(struct checker *);      /* to free the context */
>  	void (*reset)(void);		     /* to reset the global variables */
> +	void *(*thread)(void *);	     /* async thread entry point */
>  	const char **msgtable;
>  	short msgtable_size;
>  };
> @@ -54,19 +58,32 @@ static struct checker_class *alloc_checker_class(void)
>  	c = MALLOC(sizeof(struct checker_class));
>  	if (c) {
>  		INIT_LIST_HEAD(&c->node);
> -		c->refcount = 1;
> +		uatomic_set(&c->refcount, 1);
>  	}
>  	return c;
>  }
>  
> +/* Use uatomic_{sub,add}_return() to ensure proper memory barriers */
> +static int checker_class_ref(struct checker_class *cls)
> +{
> +	return uatomic_add_return(&cls->refcount, 1);
> +}
> +
> +static int checker_class_unref(struct checker_class *cls)
> +{
> +	return uatomic_sub_return(&cls->refcount, 1);
> +}
> +
>  void free_checker_class(struct checker_class *c)
>  {
> +	int cnt;
> +
>  	if (!c)
>  		return;
> -	c->refcount--;
> -	if (c->refcount) {
> -		condlog(4, "%s checker refcount %d",
> -			c->name, c->refcount);
> +	cnt = checker_class_unref(c);
> +	if (cnt != 0) {
> +		condlog(cnt < 0 ? 1 : 4, "%s checker refcount %d",
> +			c->name, cnt);
>  		return;
>  	}
>  	condlog(3, "unloading %s checker", c->name);
> @@ -160,7 +177,8 @@ static struct checker_class *add_checker_class(const char *multipath_dir,
>  
>  	c->mp_init = (int (*)(struct checker *)) dlsym(c->handle, "libcheck_mp_init");
>  	c->reset = (void (*)(void)) dlsym(c->handle, "libcheck_reset");
> -	/* These 2 functions can be NULL. call dlerror() to clear out any
> +	c->thread = (void *(*)(void*)) dlsym(c->handle, "libcheck_thread");
> +	/* These 3 functions can be NULL. call dlerror() to clear out any
>  	 * error string */
>  	dlerror();
>  
> @@ -346,6 +364,29 @@ bad_id:
>  	return generic_msg[CHECKER_MSGID_NONE];
>  }
>  
> +static void checker_cleanup_thread(void *arg)
> +{
> +	struct checker_class *cls = arg;
> +
> +	(void)checker_class_unref(cls);
> +	rcu_unregister_thread();
> +}
> +
> +void *checker_thread_entry(void *arg)
> +{
> +	struct checker *chk = arg;
> +	void *rv;
> +
> +	assert(chk && chk->cls && chk->cls->thread);
> +
> +	rcu_register_thread();
> +	(void)checker_class_ref(chk->cls);
> +	pthread_cleanup_push(checker_cleanup_thread, chk->cls);
> +	rv = chk->cls->thread(chk->context);
> +	pthread_cleanup_pop(1);
> +	return rv;
> +}
> +
>  void checker_clear_message (struct checker *c)
>  {
>  	if (!c)
> @@ -370,7 +411,7 @@ void checker_get(const char *multipath_dir, struct checker *dst,
>  	if (!src)
>  		return;
>  
> -	src->refcount++;
> +	(void)checker_class_ref(dst->cls);
>  }
>  
>  int init_checkers(const char *multipath_dir)
> diff --git a/libmultipath/checkers.h b/libmultipath/checkers.h
> index 9d5f90b..01af02d 100644
> --- a/libmultipath/checkers.h
> +++ b/libmultipath/checkers.h
> @@ -148,6 +148,23 @@ void checker_set_async (struct checker *);
>  void checker_set_fd (struct checker *, int);
>  void checker_enable (struct checker *);
>  void checker_disable (struct checker *);
> +/*
> + * checker_thread_entry(): entry point for async path checker thread
> + *
> + * Path checkers that do I/O may hang forever. To avoid blocking, some
> + * checkers therefore use asyncronous, detached threads for checking
> + * the paths. These threads may continue hanging if multipathd is stopped.
> + * In this case, we can't unload the checker DSO at exit. In order to
> + * avoid race conditions and crashes, the entry point of the thread
> + * needs to be in libmultipath, not in the DSO itself.
> + * Checker threads must use this function as entry point. It will call
> + * the DSO's "libcheck_thread" function with the checker context as
> + * argument. When libcheck_thread() returns, it will clean up and
> + * decrement the DSO's refcount. See the tur checker for a usage example.
> + *
> + * @param arg: pointer to struct checker, must have non-NULL cls->thread
> + */
> +void *checker_thread_entry (void *);
>  int checker_check (struct checker *, int);
>  int checker_is_sync(const struct checker *);
>  const char *checker_name (const struct checker *);
> diff --git a/libmultipath/checkers/tur.c b/libmultipath/checkers/tur.c
> index e886fcf..063c419 100644
> --- a/libmultipath/checkers/tur.c
> +++ b/libmultipath/checkers/tur.c
> @@ -15,7 +15,6 @@
>  #include <errno.h>
>  #include <sys/time.h>
>  #include <pthread.h>
> -#include <urcu.h>
>  #include <urcu/uatomic.h>
>  
>  #include "checkers.h"
> @@ -204,7 +203,6 @@ static void cleanup_func(void *data)
>  	holders = uatomic_sub_return(&ct->holders, 1);
>  	if (!holders)
>  		cleanup_context(ct);
> -	rcu_unregister_thread();
>  }
>  
>  /*
> @@ -251,7 +249,7 @@ static void tur_deep_sleep(const struct tur_checker_context *ct)
>  #define tur_deep_sleep(x) do {} while (0)
>  #endif /* TUR_TEST_MAJOR */
>  
> -static void *tur_thread(void *ctx)
> +void *libcheck_thread(void *ctx)
>  {
>  	struct tur_checker_context *ct = ctx;
>  	int state, running;
> @@ -259,7 +257,6 @@ static void *tur_thread(void *ctx)
>  
>  	/* This thread can be canceled, so setup clean up */
>  	tur_thread_cleanup_push(ct);
> -	rcu_register_thread();
>  
>  	condlog(4, "%d:%d : tur checker starting up", major(ct->devt),
>  		minor(ct->devt));
> @@ -394,7 +391,7 @@ int libcheck_check(struct checker * c)
>  		uatomic_set(&ct->running, 1);
>  		tur_set_async_timeout(c);
>  		setup_thread_attr(&attr, 32 * 1024, 1);
> -		r = pthread_create(&ct->thread, &attr, tur_thread, ct);
> +		r = pthread_create(&ct->thread, &attr, checker_thread_entry, c);
>  		pthread_attr_destroy(&attr);
>  		if (r) {
>  			uatomic_sub(&ct->holders, 1);
> -- 
> 2.29.0

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


  reply	other threads:[~2020-11-06  0:41 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-05 11:49 [dm-devel] [RFC PATCH] libmultipath: prevent DSO unloading with astray checker threads mwilck
2020-11-06  0:41 ` Benjamin Marzinski [this message]
2020-11-06 17:32   ` Martin Wilck
2020-11-08  0:06     ` Xose Vazquez Perez
2020-11-24 20:33     ` Benjamin Marzinski
2020-11-24 20:59       ` Martin Wilck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201106004115.GH3384@octiron.msp.redhat.com \
    --to=bmarzins@redhat.com \
    --cc=dm-devel@redhat.com \
    --cc=mwilck@suse.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.