dm-devel.redhat.com archive mirror
 help / color / mirror / Atom feed
From: Benjamin Marzinski <bmarzins@redhat.com>
To: mwilck@suse.com
Cc: lixiaokeng@huawei.com, dm-devel@redhat.com,
	Chongyun Wu <wu.chongyun@h3c.com>
Subject: Re: [dm-devel] [PATCH 31/35] multipathd: uxlsnr: add idle notification
Date: Wed, 15 Sep 2021 23:14:52 -0500	[thread overview]
Message-ID: <20210916041452.GQ3087@octiron.msp.redhat.com> (raw)
In-Reply-To: <20210910114120.13665-32-mwilck@suse.com>

On Fri, Sep 10, 2021 at 01:41:16PM +0200, mwilck@suse.com wrote:
> From: Martin Wilck <mwilck@suse.com>
> 
> The previous patches added the state machine and the timeout handling,
> but there was no wakeup mechanism for the uxlsnr for cases where
> client connections were waiting for the vecs lock.
> 
> This patch uses the previously introduced wakeup mechanism of
> struct mutex_lock for this purpose. Processes which unlock the
> "global" vecs lock send an event in an eventfd which the uxlsnr
> loop is polling for.
> 
> As we are now woken up for servicing client handlers that don't
> wait for input but for the lock, we need to set up the pollfds
> differently, and iterate over all clients when handling events,
> not only over the ones that are receiving. The hangup handling
> is changed, too. We have to look at every client, even if one has
> hung up. Note that I don't take client_lock for the loop in
> uxsock_listen(), it's not necessary and will be removed elsewhere
> in a follow-up patch.
> 
> With this in place, the lock need not be taken in execute_handler()
> any more. The uxlsnr only ever calls trylock() on the vecs lock,
> avoiding any waiting for other threads to finish.
> 
> Signed-off-by: Martin Wilck <mwilck@suse.com>
> ---
>  multipathd/uxlsnr.c | 211 ++++++++++++++++++++++++++++++--------------
>  1 file changed, 143 insertions(+), 68 deletions(-)
> 
> diff --git a/multipathd/uxlsnr.c b/multipathd/uxlsnr.c
> index 553274b..4637954 100644
> --- a/multipathd/uxlsnr.c
> +++ b/multipathd/uxlsnr.c
> @@ -24,6 +24,7 @@
>  #include <signal.h>
>  #include <stdbool.h>
>  #include <sys/inotify.h>
> +#include <sys/eventfd.h>
>  #include "checkers.h"
>  #include "memory.h"
>  #include "debug.h"
> @@ -70,6 +71,7 @@ struct client {
>  enum {
>  	POLLFD_UX = 0,
>  	POLLFD_NOTIFY,
> +	POLLFD_IDLE,
>  	POLLFDS_BASE,
>  };
>  
> @@ -90,8 +92,23 @@ static LIST_HEAD(clients);
>  static pthread_mutex_t client_lock = PTHREAD_MUTEX_INITIALIZER;
>  static struct pollfd *polls;
>  static int notify_fd = -1;
> +static int idle_fd = -1;
>  static char *watch_config_dir;
>  
> +struct possible_lock {
> +	struct mutex_lock *lock;
> +	bool held;
> +};
> +
> +static void unlock_if_held(void *arg)
> +{
> +	struct possible_lock *pl = arg;
> +
> +	/* don't call unlock_wakeup() here, lest we wakeup ourselves */
> +	if (pl->held)
> +		__unlock(pl->lock);
> +}
> +
>  static bool _socket_client_is_root(int fd)
>  {
>  	socklen_t len = 0;
> @@ -187,6 +204,17 @@ void uxsock_cleanup(void *arg)
>  	free_polls();
>  }
>  
> +void wakeup_cleanup(void *arg)
> +{
> +	struct mutex_lock *lck = arg;
> +	int fd = idle_fd;
> +
> +	idle_fd = -1;
> +	set_wakeup_fn(lck, NULL);
> +	if (fd != -1)
> +		close(fd);
> +}
> +
>  struct watch_descriptors {
>  	int conf_wd;
>  	int dir_wd;
> @@ -293,6 +321,18 @@ static void handle_inotify(int fd, struct watch_descriptors *wds)
>  
>  static const struct timespec ts_zero = { .tv_sec = 0, };
>  
> +/* call with clients lock held */
> +static bool __need_lock(void)
> +{
> +	struct client *c;
> +
> +	list_for_each_entry(c, &clients, node) {
> +		if (c->state == CLT_WAIT_LOCK)
> +			return true;
> +	}
> +	return false;
> +}
> +
>  static int parse_cmd(struct client *c)
>  {
>  	int r;
> @@ -310,40 +350,31 @@ static int parse_cmd(struct client *c)
>  	return r;
>  }
>  
> -static int execute_handler(struct client *c, struct vectors *vecs, int timeout)
> +static int execute_handler(struct client *c, struct vectors *vecs)
>  {
> -	int r;
> -	struct timespec tmo;
>  
> -	if (!c->handler)
> +	if (!c->handler || !c->handler->fn)
>  		return -EINVAL;
>  
> -	if (clock_gettime(CLOCK_REALTIME, &tmo) == 0) {
> -		tmo.tv_sec += timeout;
> -	} else {
> -		tmo.tv_sec = 0;
> -	}
> +	return c->handler->fn(c->cmdvec, &c->reply, vecs);
> +}
>  
> -	if (c->handler->locked) {
> -		int locked = 0;
> +static void wakeup_listener(void)
> +{
> +	uint64_t one = 1;
>  
> -		pthread_cleanup_push(cleanup_lock, &vecs->lock);
> -		if (tmo.tv_sec) {
> -			r = timedlock(&vecs->lock, &tmo);
> -		} else {
> -			lock(&vecs->lock);
> -			r = 0;
> -		}
> -		if (r == 0) {
> -			locked = 1;
> -			pthread_testcancel();
> -			r = c->handler->fn(c->cmdvec, &c->reply, vecs);
> -		}
> -		pthread_cleanup_pop(locked);
> -	} else
> -		r = c->handler->fn(c->cmdvec, &c->reply, vecs);
> +	if (idle_fd != -1 &&
> +	    write(idle_fd, &one, sizeof(one)) != sizeof(one))
> +		condlog(1, "%s: failed", __func__);
> +}
>  
> -	return r;
> +static void drain_idle_fd(int fd)
> +{
> +	uint64_t val;
> +	int rc;
> +
> +	rc = read(fd, &val, sizeof(val));
> +	condlog(4, "%s: %d, %"PRIu64, __func__, rc, val);
>  }
>  
>  void default_reply(struct client *c, int r)
> @@ -391,15 +422,26 @@ static void set_client_state(struct client *c, int state)
>  	c->state = state;
>  }
>  
> -static void handle_client(struct client *c, struct vectors *vecs)
> +static void handle_client(struct client *c, struct vectors *vecs, short revents)
>  {
>  	ssize_t n;
> +	struct possible_lock pl = {
> +		.lock = &vecs->lock,
> +		.held = false,
> +	};
>  
> -	condlog(4, "%s: cli[%d] state=%d cmd=\"%s\" repl \"%s\"", __func__,
> -		c->fd, c->state, c->cmd, get_strbuf_str(&c->reply));
> +	if (revents & (POLLHUP|POLLERR)) {
> +		c->error = -ECONNRESET;
> +		return;
> +	}
> +
> +	condlog(4, "%s: cli[%d] poll=%x state=%d cmd=\"%s\" repl \"%s\"", __func__,
> +		c->fd, revents, c->state, c->cmd, get_strbuf_str(&c->reply));
>  
>  	switch (c->state) {
>  	case CLT_RECV:
> +		if (!(revents & POLLIN))
> +			return;
>  		if (c->cmd_len == 0) {
>  			/*
>  			 * We got POLLIN; assume that at least the length can
> @@ -463,15 +505,28 @@ static void handle_client(struct client *c, struct vectors *vecs)
>  	case CLT_WAIT_LOCK:
>  		condlog(4, "%s: cli[%d] state=%d cmd=\"%s\" repl \"%s\"", __func__,
>  			c->fd, c->state, c->cmd, get_strbuf_str(&c->reply));
> -		/* tbd */
> +
> +		if (c->error == 0 && c->handler && c->handler->locked) {
> +			pl.held = trylock(pl.lock) == 0;

I do worry that if there are, for instance, a lot of uevents coming in,
this could starve the uxlsnr thread, since other threads could be
grabbing and releasing the vecs lock, but if it's usually being held,
then the uxlsnr thread might never try to grab it when it's free, and it
will keep losing its place in line. Also, every time that the vecs lock
is dropped between ppoll() calls, a wakeup will get triggered, even if
the lock was grabbed by something else before the ppoll thread runs.

I suppose the only way to deal with that would be to move the locking
commands to a list handled by a separate thread, so that it could block
without stalling the non-locking commands.

> +			if (!pl.held) {
> +				condlog(4, "%s: cli[%d] waiting for lock",
> +					__func__, c->fd);
> +				return;
> +			} else
> +				condlog(4, "%s: cli[%d] grabbed lock",
> +					__func__, c->fd);
> +		}
>  		set_client_state(c, CLT_WORK);
>  		/* fallthrough */
>  

We should never return to uxsock_listen() while the lock is held. The
code doesn't, but the fact that CLT_WORK is a separate state makes it
look like this could be possible.  Since we must never be in CLT_WORK
without first being in CLT_WAIT_LOCK, I don't see any point for having a
separate CLT_WAIT_LOCK state.  CLT_WORK should do both the locking if
necessary, and calling the handler.

>  	case CLT_WORK:
>  		condlog(4, "%s: cli[%d] state=%d cmd=\"%s\" repl \"%s\"", __func__,
>  			c->fd, c->state, c->cmd, get_strbuf_str(&c->reply));
> -		if (c->error == 0 && c->handler)
> -			c->error = execute_handler(c, vecs, uxsock_timeout / 1000);
> +		if (c->error == 0 && c->handler) {
> +			pthread_cleanup_push(unlock_if_held, &pl);
> +			c->error = execute_handler(c, vecs);
> +			pthread_cleanup_pop(1);
> +		}
>  
>  		if (c->cmdvec) {
>  			free_keys(c->cmdvec);
> @@ -511,6 +566,8 @@ void *uxsock_listen(long ux_sock, void *trigger_data)
>  	/* conf->sequence_nr will be 1 when uxsock_listen is first called */
>  	unsigned int sequence_nr = 0;
>  	struct watch_descriptors wds = { .conf_wd = -1, .dir_wd = -1 };
> +	bool need_lock = false;
> +	struct vectors *vecs = trigger_data;
>  
>  	condlog(3, "uxsock: startup listener");
>  	polls = MALLOC(max_pfds * sizeof(*polls));
> @@ -521,6 +578,14 @@ void *uxsock_listen(long ux_sock, void *trigger_data)
>  	notify_fd = inotify_init1(IN_NONBLOCK);
>  	if (notify_fd == -1) /* it's fine if notifications fail */
>  		condlog(3, "failed to start up configuration notifications");
> +
> +	pthread_cleanup_push(wakeup_cleanup, &vecs->lock);
> +	idle_fd = eventfd(0, EFD_NONBLOCK|EFD_CLOEXEC);
> +	if (idle_fd == -1)
> +		condlog(1, "failed to create idle fd");
> +	else
> +		set_wakeup_fn(&vecs->lock, wakeup_listener);
> +
>  	sigfillset(&mask);
>  	sigdelset(&mask, SIGINT);
>  	sigdelset(&mask, SIGTERM);
> @@ -572,16 +637,30 @@ void *uxsock_listen(long ux_sock, void *trigger_data)
>  		else
>  			polls[POLLFD_NOTIFY].events = POLLIN;
>  
> +		need_lock = __need_lock();
> +		polls[POLLFD_IDLE].fd = idle_fd;
> +		if (need_lock)
> +			polls[POLLFD_IDLE].events = POLLIN;
> +		else
> +			polls[POLLFD_IDLE].events = 0;
> +
>  		/* setup the clients */
> -		i = POLLFDS_BASE;
> -		list_for_each_entry(c, &clients, node) {
> -			polls[i].fd = c->fd;
> -			polls[i].events = POLLIN;
> -			i++;
> -			if (i >= max_pfds)
> -				break;
> -		}
> -		n_pfds = i;
> +                i = POLLFDS_BASE;
> +                list_for_each_entry(c, &clients, node) {
> +                        switch(c->state) {
> +                        case CLT_RECV:
> +                                polls[i].events = POLLIN;
> +                                break;
> +                        default:
> +				/* don't poll for this client */
> +                                continue;
> +                        }
> +                        polls[i].fd = c->fd;
> +                        i++;
> +                        if (i >= max_pfds)
> +                                break;
> +                }
> +                n_pfds = i;
>  		pthread_cleanup_pop(1);
>  
>  		/* most of our life is spent in this call */
> @@ -604,33 +683,28 @@ void *uxsock_listen(long ux_sock, void *trigger_data)
>  			handle_signals(true);
>  			continue;
>  		}
> +		if (polls[POLLFD_IDLE].fd != -1 &&
> +		    polls[POLLFD_IDLE].revents & POLLIN)
> +			drain_idle_fd(idle_fd);
>  
> -		/* see if a client wants to speak to us */
> -		for (i = POLLFDS_BASE; i < n_pfds; i++) {
> -			if (polls[i].revents & (POLLIN|POLLHUP|POLLERR)) {
> -				c = NULL;
> -				pthread_mutex_lock(&client_lock);
> -				list_for_each_entry(tmp, &clients, node) {
> -					if (tmp->fd == polls[i].fd) {
> -						c = tmp;
> -						break;
> -					}
> -				}
> -				pthread_mutex_unlock(&client_lock);
> -				if (!c) {
> -					condlog(4, "cli%d: new fd %d",
> -						i, polls[i].fd);
> -					continue;
> -				}
> -				if (polls[i].revents & (POLLHUP|POLLERR)) {
> -					condlog(4, "cli[%d]: Disconnected",
> -						c->fd);
> -					dead_client(c);
> -					continue;
> -				}
> -				handle_client(c, trigger_data);
> -				if (c->error == -ECONNRESET)
> -					dead_client(c);
> +		/* see if a client needs handling */
> +		list_for_each_entry_safe(c, tmp, &clients, node) {
> +			short revents = 0;
> +
> +			for (i = POLLFDS_BASE; i < n_pfds; i++) {
> +                                if (polls[i].fd == c->fd) {
> +                                        revents = polls[i].revents;
> +                                        break;
> +                                }
> +                        }
> +
> +			handle_client(c, trigger_data, revents);
> +
> +			if (c->error == -ECONNRESET) {
> +				condlog(4, "cli[%d]: disconnected", c->fd);
> +				dead_client(c);
> +				if (i < n_pfds)
> +					polls[i].fd = -1;
>  			}
>  		}
>  		/* see if we got a non-fatal signal */
> @@ -646,5 +720,6 @@ void *uxsock_listen(long ux_sock, void *trigger_data)
>  			handle_inotify(notify_fd, &wds);
>  	}
>  
> +	pthread_cleanup_pop(1);
>  	return NULL;
>  }
> -- 
> 2.33.0

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel


  reply	other threads:[~2021-09-16  6:50 UTC|newest]

Thread overview: 89+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-10 11:40 [dm-devel] [PATCH 00/35] multipathd: uxlsnr overhaul mwilck
2021-09-10 11:40 ` [dm-devel] [PATCH 01/35] libmultipath: add timespeccmp() utility function mwilck
2021-09-15 22:07   ` Benjamin Marzinski
2021-09-10 11:40 ` [dm-devel] [PATCH 02/35] libmultipath: add trylock() helper mwilck
2021-09-15 22:07   ` Benjamin Marzinski
2021-09-10 11:40 ` [dm-devel] [PATCH 03/35] libmultipath: add optional wakeup functionality to lock.c mwilck
2021-09-15 22:13   ` Benjamin Marzinski
2021-09-10 11:40 ` [dm-devel] [PATCH 04/35] libmultipath: print: add __snprint_config() mwilck
2021-09-15 22:14   ` Benjamin Marzinski
2021-09-10 11:40 ` [dm-devel] [PATCH 05/35] libmultipath: improve cleanup of uevent queues on exit mwilck
2021-09-15 22:20   ` Benjamin Marzinski
2021-09-16  7:10     ` Martin Wilck
2021-09-16 14:26       ` Benjamin Marzinski
2021-09-10 11:40 ` [dm-devel] [PATCH 06/35] multipathd: fix systemd notification when stopping while reloading mwilck
2021-09-15 22:55   ` Benjamin Marzinski
2021-09-10 11:40 ` [dm-devel] [PATCH 07/35] multipathd: improve delayed reconfigure mwilck
2021-09-15 23:00   ` Benjamin Marzinski
2021-09-16  7:16     ` Martin Wilck
2021-09-10 11:40 ` [dm-devel] [PATCH 08/35] multipathd: cli.h: formatting improvements mwilck
2021-09-15 23:01   ` Benjamin Marzinski
2021-09-10 11:40 ` [dm-devel] [PATCH 09/35] multipathd: cli_del_map: fix reply for delayed action mwilck
2021-09-15 23:40   ` Benjamin Marzinski
2021-09-10 11:40 ` [dm-devel] [PATCH 10/35] multipathd: add prototype for cli_handler functions mwilck
2021-09-15 23:53   ` Benjamin Marzinski
2021-09-10 11:40 ` [dm-devel] [PATCH 11/35] multipathd: make all cli_handlers static mwilck
2021-09-15 23:53   ` Benjamin Marzinski
2021-09-10 11:40 ` [dm-devel] [PATCH 12/35] multipathd: add and set cli_handlers in a single step mwilck
2021-09-16  0:01   ` Benjamin Marzinski
2021-09-16  7:22     ` Martin Wilck
2021-11-12 21:45     ` Martin Wilck
2021-09-10 11:40 ` [dm-devel] [PATCH 13/35] multipathd: cli.c: use ESRCH for "command not found" mwilck
2021-09-16  0:02   ` Benjamin Marzinski
2021-09-10 11:40 ` [dm-devel] [PATCH 14/35] multipathd: add "force_reconfigure" option mwilck
2021-09-16  0:13   ` Benjamin Marzinski
2021-09-16  7:34     ` Martin Wilck
2021-09-16 14:32       ` Benjamin Marzinski
2021-09-10 11:41 ` [dm-devel] [PATCH 15/35] multipathd: uxlsnr: avoid stalled clients during reconfigure mwilck
2021-09-16  2:17   ` Benjamin Marzinski
2021-09-10 11:41 ` [dm-devel] [PATCH 16/35] multipathd: uxlsnr: handle client HUP mwilck
2021-09-16  2:17   ` Benjamin Marzinski
2021-09-10 11:41 ` [dm-devel] [PATCH 17/35] multipathd: uxlsnr: use symbolic values for pollfd indices mwilck
2021-09-16  2:18   ` Benjamin Marzinski
2021-09-10 11:41 ` [dm-devel] [PATCH 18/35] multipathd: uxlsnr: avoid using fd -1 in ppoll() mwilck
2021-09-16  2:18   ` Benjamin Marzinski
2021-09-10 11:41 ` [dm-devel] [PATCH 19/35] multipathd: uxlsnr: data structure for stateful client connection mwilck
2021-09-16  2:19   ` Benjamin Marzinski
2021-09-10 11:41 ` [dm-devel] [PATCH 20/35] multipathd: move uxsock_trigger() to uxlsnr.c mwilck
2021-09-16  2:19   ` Benjamin Marzinski
2021-09-10 11:41 ` [dm-devel] [PATCH 21/35] multipathd: move parse_cmd() " mwilck
2021-09-16  2:19   ` Benjamin Marzinski
2021-09-10 11:41 ` [dm-devel] [PATCH 22/35] multipathd: uxlsnr: remove check_timeout() mwilck
2021-09-16  2:21   ` Benjamin Marzinski
2021-09-10 11:41 ` [dm-devel] [PATCH 23/35] multipathd: uxlsnr: move client handling to separate function mwilck
2021-09-16  2:21   ` Benjamin Marzinski
2021-09-10 11:41 ` [dm-devel] [PATCH 24/35] multipathd: uxlsnr: use main poll loop for receiving mwilck
2021-09-16  2:22   ` Benjamin Marzinski
2021-09-10 11:41 ` [dm-devel] [PATCH 25/35] multipathd: use strbuf in cli_handler functions mwilck
2021-09-16  2:23   ` Benjamin Marzinski
2021-09-10 11:41 ` [dm-devel] [PATCH 26/35] multipathd: uxlsnr: check root on connection startup mwilck
2021-09-16  2:23   ` Benjamin Marzinski
2021-09-10 11:41 ` [dm-devel] [PATCH 27/35] multipathd: uxlsnr: pass struct client to uxsock_trigger() and parse_cmd() mwilck
2021-09-16  2:28   ` Benjamin Marzinski
2021-09-10 11:41 ` [dm-devel] [PATCH 28/35] multipathd: uxlsnr: move handler execution to separate function mwilck
2021-09-16  2:28   ` Benjamin Marzinski
2021-09-10 11:41 ` [dm-devel] [PATCH 29/35] multipathd: uxlsnr: use parser to determine non-root commands mwilck
2021-09-16  2:29   ` Benjamin Marzinski
2021-09-10 11:41 ` [dm-devel] [PATCH 30/35] multipathd: uxlsnr: merge uxsock_trigger() into state machine mwilck
2021-09-16  3:32   ` Benjamin Marzinski
2021-09-16  8:02     ` Martin Wilck
2021-11-12 22:07     ` Martin Wilck
2021-09-10 11:41 ` [dm-devel] [PATCH 31/35] multipathd: uxlsnr: add idle notification mwilck
2021-09-16  4:14   ` Benjamin Marzinski [this message]
2021-09-16  8:54     ` Martin Wilck
2021-09-16 15:06       ` Benjamin Marzinski
2021-09-16 15:54         ` Martin Wilck
2021-09-16 16:10           ` Benjamin Marzinski
2021-09-10 11:41 ` [dm-devel] [PATCH 32/35] multipathd: uxlsnr: add timeout handling mwilck
2021-09-16  4:17   ` Benjamin Marzinski
2021-09-16  8:58     ` Martin Wilck
2021-09-16 15:08       ` Benjamin Marzinski
2021-09-10 11:41 ` [dm-devel] [PATCH 33/35] multipathd: uxlsnr: use poll loop for sending, too mwilck
2021-09-16  4:22   ` Benjamin Marzinski
2021-09-16  9:33     ` Martin Wilck
2021-09-16 15:26       ` Benjamin Marzinski
2021-09-10 11:41 ` [dm-devel] [PATCH 34/35] multipathd: uxlsnr: drop client_lock mwilck
2021-09-16  4:24   ` Benjamin Marzinski
2021-09-16  9:34     ` Martin Wilck
2021-09-10 11:41 ` [dm-devel] [PATCH 35/35] multipathd: uxclt: allow client mode for non-root, too mwilck
2021-09-16  4:24   ` Benjamin Marzinski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210916041452.GQ3087@octiron.msp.redhat.com \
    --to=bmarzins@redhat.com \
    --cc=dm-devel@redhat.com \
    --cc=lixiaokeng@huawei.com \
    --cc=mwilck@suse.com \
    --cc=wu.chongyun@h3c.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).