From a4dd64808d49f5a0d2a94336e56401262ef99e55 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Tue, 2 Mar 2021 17:03:15 +0100 Subject: [PATCH 1/2] libmultipath: protect DSO unloading with RCU Some crashes possibly related to DSO unloading are still observed. Try protecting the unloading with RCU. Signed-off-by: Martin Wilck --- libmultipath/checkers.c | 79 ++++++++++++++++++++++++++++++----------- libmultipath/propsel.c | 4 +++ 2 files changed, 63 insertions(+), 20 deletions(-) diff --git a/libmultipath/checkers.c b/libmultipath/checkers.c index 2dd9915..25f07ce 100644 --- a/libmultipath/checkers.c +++ b/libmultipath/checkers.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -25,6 +26,7 @@ struct checker_class { void *(*thread)(void *); /* async thread entry point */ const char **msgtable; short msgtable_size; + struct rcu_head rcu; }; static const char *checker_state_names[PATH_MAX_STATE] = { @@ -74,20 +76,16 @@ static int checker_class_unref(struct checker_class *cls) return uatomic_sub_return(&cls->refcount, 1); } -void free_checker_class(struct checker_class *c) +static void free_checker_class_rcu(struct rcu_head *head) { - int cnt; + struct checker_class *c = container_of(head, struct checker_class, rcu); - if (!c) - return; - cnt = checker_class_unref(c); - if (cnt != 0) { - condlog(cnt < 0 ? 1 : 4, "%s checker refcount %d", - c->name, cnt); + if (uatomic_read(&c-refcount) > 0) { + condlog(1, "%s: RACE: refcount = %d, not freeing checker", + __func__, refcount); return; } condlog(3, "unloading %s checker", c->name); - list_del(&c->node); if (c->reset) c->reset(); if (c->handle) { @@ -99,6 +97,22 @@ void free_checker_class(struct checker_class *c) FREE(c); } +static void free_checker_class(struct checker_class *c) +{ + int cnt; + + if (!c) + return; + cnt = checker_class_unref(c); + if (cnt != 0) { + condlog(cnt < 0 ? 1 : 4, "%s checker refcount %d", + c->name, cnt); + return; + } + list_del(&c->node); + call_rcu(&c->rcu, free_checker_class_rcu); +} + void cleanup_checkers (void) { struct checker_class *checker_loop; @@ -111,15 +125,32 @@ void cleanup_checkers (void) static struct checker_class *checker_class_lookup(const char *name) { - struct checker_class *c; + struct checker_class *c, *found = NULL; + int refcount = 0; if (!name || !strlen(name)) return NULL; + + rcu_read_lock(); list_for_each_entry(c, &checkers, node) { - if (!strncmp(name, c->name, CHECKER_NAME_LEN)) - return c; + if (!strncmp(name, c->name, CHECKER_NAME_LEN)) { + found = c; + break; + } } - return NULL; + if (found) { + refcount = checker_class_ref(found); + if (refcount == 1) + checker_class_unref(found); + } + rcu_read_unlock(); + + if (refcount <= 1) { + condlog(1, "%s: RACE: got refcount == %d", __func__, refcount); + found = NULL; + } + + return found; } void reset_checker_classes(void) @@ -387,11 +418,20 @@ static void *checker_thread_entry(void *arg) int start_checker_thread(pthread_t *thread, const pthread_attr_t *attr, struct checker_context *ctx) { - int rv; + int rv, refcount; assert(ctx && ctx->cls && ctx->cls->thread); + /* Take a ref here, lest the class be freed before the thread starts */ - (void)checker_class_ref(ctx->cls); + rcu_read_lock(); + refcount = checker_class_ref(ctx->cls); + if (refcount <= 1) + checker_class_unref(ctx->cls); + rcu_read_unlock(); + if (refcount <= 1) + condlog(1, "%s: RACE: got refcount == %d", __func_, refcount); + return EIO; + } rv = pthread_create(thread, attr, checker_thread_entry, ctx); if (rv != 0) { condlog(1, "failed to start checker thread for %s: %m", @@ -418,14 +458,13 @@ void checker_get(const char *multipath_dir, struct checker *dst, if (name && strlen(name)) { src = checker_class_lookup(name); - if (!src) + if (!src) { src = add_checker_class(multipath_dir, name); + if (src && checker_class_ref(src) == 1) + src = NULL; + } } dst->cls = src; - if (!src) - return; - - (void)checker_class_ref(dst->cls); } int init_checkers(const char *multipath_dir) diff --git a/libmultipath/propsel.c b/libmultipath/propsel.c index f771a83..4add95a 100644 --- a/libmultipath/propsel.c +++ b/libmultipath/propsel.c @@ -536,6 +536,10 @@ int select_checker(struct config *conf, struct path *pp) do_default(ckr_name, DEFAULT_CHECKER); out: checker_get(conf->multipath_dir, c, ckr_name); + if (!checker_selected(c)) { + condlog(1, "%s: failed to grab checker", __func__); + return 1; + } condlog(3, "%s: path_checker = %s %s", pp->dev, checker_name(c), origin); if (conf->checker_timeout) { -- 2.29.2