All of lore.kernel.org
 help / color / mirror / Atom feed
From: Doug Oucharek <doucharek@cray.com>
To: lustre-devel@lists.lustre.org
Subject: [lustre-devel] [PATCH 18/34] lnet: add ni_state
Date: Wed, 12 Sep 2018 03:59:16 +0000	[thread overview]
Message-ID: <0DD49EFB-412D-4C87-8FC8-3F0C93D2E98A@cray.com> (raw)
In-Reply-To: <153628137195.8267.16400748098054215181.stgit@noble>

I believe the introduction of this state machine is to help us understand how healthy an NI is so we can avoid if it is not healthy and we have other paths which are still ok.

Reviewed-by: Doug Oucharek <dougso@me.com>

Doug

?On 9/6/18, 5:54 PM, "NeilBrown" <neilb@suse.com> wrote:

    This is barely used.
    
    This is part of
        8cbb8cd3e771e7f7e0f99cafc19fad32770dc015
           LU-7734 lnet: Multi-Rail local NI split
    
    Signed-off-by: NeilBrown <neilb@suse.com>
    ---
     .../staging/lustre/include/linux/lnet/lib-lnet.h   |    1 +
     .../staging/lustre/include/linux/lnet/lib-types.h  |   16 ++++++++++++++++
     drivers/staging/lustre/lnet/lnet/api-ni.c          |   16 ++++++++++++++++
     drivers/staging/lustre/lnet/lnet/config.c          |    1 +
     4 files changed, 34 insertions(+)
    
    diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
    index faa3f19dd844..54a93235834c 100644
    --- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
    +++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
    @@ -400,6 +400,7 @@ int lnet_cpt_of_nid(lnet_nid_t nid, struct lnet_ni *ni);
     struct lnet_ni *lnet_nid2ni_locked(lnet_nid_t nid, int cpt);
     struct lnet_ni *lnet_net2ni_locked(__u32 net, int cpt);
     struct lnet_ni *lnet_net2ni(__u32 net);
    +bool lnet_is_ni_healthy_locked(struct lnet_ni *ni);
     
     extern int portal_rotor;
     
    diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h
    index 1d372672e2de..6c34ecf22021 100644
    --- a/drivers/staging/lustre/include/linux/lnet/lib-types.h
    +++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h
    @@ -256,6 +256,19 @@ struct lnet_tx_queue {
     	struct list_head	tq_delayed;	/* delayed TXs */
     };
     
    +enum lnet_ni_state {
    +	/* set when NI block is allocated */
    +	LNET_NI_STATE_INIT = 0,
    +	/* set when NI is started successfully */
    +	LNET_NI_STATE_ACTIVE,
    +	/* set when LND notifies NI failed */
    +	LNET_NI_STATE_FAILED,
    +	/* set when LND notifies NI degraded */
    +	LNET_NI_STATE_DEGRADED,
    +	/* set when shuttding down NI */
    +	LNET_NI_STATE_DELETING
    +};
    +
     struct lnet_net {
     	/* chain on the ln_nets */
     	struct list_head	net_list;
    @@ -324,6 +337,9 @@ struct lnet_ni {
     	/* my health status */
     	struct lnet_ni_status	*ni_status;
     
    +	/* NI FSM */
    +	enum lnet_ni_state	ni_state;
    +
     	/* per NI LND tunables */
     	struct lnet_lnd_tunables ni_lnd_tunables;
     
    diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c
    index 46c5ca71bc07..618fdf8141f0 100644
    --- a/drivers/staging/lustre/lnet/lnet/api-ni.c
    +++ b/drivers/staging/lustre/lnet/lnet/api-ni.c
    @@ -780,6 +780,16 @@ lnet_islocalnet(__u32 net)
     	return !!ni;
     }
     
    +bool
    +lnet_is_ni_healthy_locked(struct lnet_ni *ni)
    +{
    +	if (ni->ni_state == LNET_NI_STATE_ACTIVE ||
    +	    ni->ni_state == LNET_NI_STATE_DEGRADED)
    +		return true;
    +
    +	return false;
    +}
    +
     struct lnet_ni  *
     lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
     {
    @@ -1117,6 +1127,9 @@ lnet_clear_zombies_nis_locked(struct lnet_net *net)
     		ni = list_entry(zombie_list->next,
     				struct lnet_ni, ni_netlist);
     		list_del_init(&ni->ni_netlist);
    +		/* the ni should be in deleting state. If it's not it's
    +		 * a bug */
    +		LASSERT(ni->ni_state == LNET_NI_STATE_DELETING);
     		cfs_percpt_for_each(ref, j, ni->ni_refs) {
     			if (!*ref)
     				continue;
    @@ -1163,6 +1176,7 @@ lnet_shutdown_lndni(struct lnet_ni *ni)
     	struct lnet_net *net = ni->ni_net;
     
     	lnet_net_lock(LNET_LOCK_EX);
    +	ni->ni_state = LNET_NI_STATE_DELETING;
     	lnet_ni_unlink_locked(ni);
     	lnet_net_unlock(LNET_LOCK_EX);
     
    @@ -1291,6 +1305,8 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
     
     	lnet_net_unlock(LNET_LOCK_EX);
     
    +	ni->ni_state = LNET_NI_STATE_ACTIVE;
    +
     	if (net->net_lnd->lnd_type == LOLND) {
     		lnet_ni_addref(ni);
     		LASSERT(!the_lnet.ln_loni);
    diff --git a/drivers/staging/lustre/lnet/lnet/config.c b/drivers/staging/lustre/lnet/lnet/config.c
    index 2588d67fea1b..081812e19b13 100644
    --- a/drivers/staging/lustre/lnet/lnet/config.c
    +++ b/drivers/staging/lustre/lnet/lnet/config.c
    @@ -393,6 +393,7 @@ lnet_ni_alloc(struct lnet_net *net, struct cfs_expr_list *el, char *iface)
     		ni->ni_net_ns = NULL;
     
     	ni->ni_last_alive = ktime_get_real_seconds();
    +	ni->ni_state = LNET_NI_STATE_INIT;
     	rc = lnet_net_append_cpts(ni->ni_cpts, ni->ni_ncpts, net);
     	if (rc != 0)
     		goto failed;
    
    
    

  reply	other threads:[~2018-09-12  3:59 UTC|newest]

Thread overview: 98+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-07  0:49 [lustre-devel] [PATCH 00/34] Beginning of multi-rail support for drivers/staging/lustre NeilBrown
2018-09-07  0:49 ` [lustre-devel] [PATCH 11/34] lnet: pass tun to lnet_startup_lndni, instead of full conf NeilBrown
2018-09-11 18:31   ` Amir Shehata
2018-09-12  4:03     ` NeilBrown
2018-09-12  3:30   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 12/34] lnet: split lnet_startup_lndni NeilBrown
2018-09-12  3:39   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 16/34] lnet: lnet_shutdown_lndnets - remove some cleanup code NeilBrown
2018-09-07  0:49 ` [lustre-devel] [PATCH 18/34] lnet: add ni_state NeilBrown
2018-09-12  3:59   ` Doug Oucharek [this message]
2018-09-12  4:25     ` NeilBrown
2018-09-07  0:49 ` [lustre-devel] [PATCH 14/34] lnet: rename lnet_find_net_locked to lnet_find_rnet_locked NeilBrown
2018-09-12  3:40   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 09/34] lnet: add list of cpts to lnet_net NeilBrown
2018-09-10 23:28   ` Doug Oucharek
2018-09-12  2:16     ` NeilBrown
2018-09-11  1:02   ` James Simmons
2018-09-07  0:49 ` [lustre-devel] [PATCH 06/34] lnet: store separate xmit/recv net-interface in each message NeilBrown
2018-09-10 23:24   ` Doug Oucharek
2018-09-10 23:29   ` James Simmons
2018-09-10 23:36   ` James Simmons
2018-09-07  0:49 ` [lustre-devel] [PATCH 03/34] lnet: struct lnet_ni: move ni_lnd to lnet_net NeilBrown
2018-09-10 23:04   ` Doug Oucharek
2018-09-10 23:19     ` James Simmons
2018-09-10 23:19       ` Doug Oucharek
2018-09-10 23:19     ` James Simmons
2018-09-10 23:24   ` James Simmons
2018-09-10 23:25   ` James Simmons
2018-09-07  0:49 ` [lustre-devel] [PATCH 15/34] lnet: extend zombie handling to nets and nis NeilBrown
2018-09-12  3:53   ` Doug Oucharek
2018-09-12  4:10     ` NeilBrown
2018-09-07  0:49 ` [lustre-devel] [PATCH 02/34] lnet: Create struct lnet_net NeilBrown
2018-09-10 22:56   ` Doug Oucharek
2018-09-10 23:23   ` James Simmons
2018-09-07  0:49 ` [lustre-devel] [PATCH 04/34] lnet: embed lnd_tunables in lnet_ni NeilBrown
2018-09-10 23:08   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 05/34] lnet: begin separating "networks" from "network interfaces" NeilBrown
2018-09-10 23:18   ` Doug Oucharek
2018-09-12  2:48     ` NeilBrown
2018-09-10 23:27   ` James Simmons
2018-09-07  0:49 ` [lustre-devel] [PATCH 13/34] lnet: reverse order of lnet_startup_lnd{net, ni} NeilBrown
2018-09-12  3:39   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 10/34] lnet: add ni arg to lnet_cpt_of_nid() NeilBrown
2018-09-10 23:32   ` Doug Oucharek
2018-09-11  1:03   ` James Simmons
2018-09-07  0:49 ` [lustre-devel] [PATCH 01/34] struct lnet_ni - reformat comments NeilBrown
2018-09-10 22:49   ` Doug Oucharek
2018-09-10 23:17   ` James Simmons
2018-09-12  2:44     ` NeilBrown
2018-09-07  0:49 ` [lustre-devel] [PATCH 07/34] lnet: change lnet_peer to reference the net, rather than ni NeilBrown
2018-09-10 23:17   ` James Simmons
2018-09-12  2:56     ` NeilBrown
2018-09-07  0:49 ` [lustre-devel] [PATCH 08/34] lnet: add cpt to lnet_match_info NeilBrown
2018-09-10 23:25   ` Doug Oucharek
2018-09-11  1:01   ` James Simmons
2018-09-11  1:01   ` [lustre-devel] BRe: " James Simmons
2018-09-07  0:49 ` [lustre-devel] [PATCH 17/34] lnet: move lnet_shutdown_lndnets down to after first use NeilBrown
2018-09-12  3:55   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 20/34] lnet: discard ni_cpt_list NeilBrown
2018-09-12  4:07   ` Doug Oucharek
2018-09-12  5:48     ` NeilBrown
2018-09-13 19:33       ` Amir Shehata
2018-09-24  6:03         ` NeilBrown
2018-09-12 16:29   ` Amir Shehata
2018-09-07  0:49 ` [lustre-devel] [PATCH 34/34] lnet: introduce use_tcp_bonding mod param NeilBrown
2018-09-12  4:54   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 28/34] lnet: add checks to ensure network interface names are unique NeilBrown
2018-09-12  4:39   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 27/34] lnet: make it possible to add a new interface to a network NeilBrown
2018-09-12  4:38   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 30/34] lnet: fix typo NeilBrown
2018-09-12  4:47   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 33/34] Completely re-write lnet_parse_networks() NeilBrown
2018-09-12  4:54   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 32/34] lnet: lnet_dyn_del_ni: fix ping_info count NeilBrown
2018-09-12  4:49   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 22/34] lnet: don't take reference in lnet_XX2ni_locked() NeilBrown
2018-09-12  4:18   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 29/34] lnet: track tunables in lnet_startup_lndnet() NeilBrown
2018-09-12  4:47   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 23/34] lnet: don't need lock to test ln_shutdown NeilBrown
2018-09-12  4:27   ` Doug Oucharek
2018-09-12  5:54     ` NeilBrown
2018-09-07  0:49 ` [lustre-devel] [PATCH 26/34] lnet: only valid lnd_type when net_id is unique NeilBrown
2018-09-12  4:34   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 31/34] lnet: lnet_dyn_add_ni: fix ping_info count NeilBrown
2018-09-12  4:48   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 19/34] lnet: simplify lnet_islocalnet() NeilBrown
2018-09-12  4:02   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 25/34] lnet: swap 'then' and 'else' branches in lnet_startup_lndnet NeilBrown
2018-09-12  4:32   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 24/34] lnet: don't take lock over lnet_net_unique() NeilBrown
2018-09-12  4:29   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 21/34] lnet: add net_ni_added NeilBrown
2018-09-12  4:15   ` Doug Oucharek
2018-09-10 23:10 ` [lustre-devel] [PATCH 00/34] Beginning of multi-rail support for drivers/staging/lustre James Simmons
2018-09-24  6:58   ` NeilBrown
2018-09-29 22:35     ` James Simmons

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=0DD49EFB-412D-4C87-8FC8-3F0C93D2E98A@cray.com \
    --to=doucharek@cray.com \
    --cc=lustre-devel@lists.lustre.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.