All of lore.kernel.org
 help / color / mirror / Atom feed
From: NeilBrown <neilb@suse.com>
To: lustre-devel@lists.lustre.org
Subject: [lustre-devel] [PATCH 15/34] lnet: extend zombie handling to nets and nis
Date: Fri, 07 Sep 2018 10:49:31 +1000	[thread overview]
Message-ID: <153628137183.8267.14166864803956204561.stgit@noble> (raw)
In-Reply-To: <153628058697.8267.6056114844033479774.stgit@noble>

A zombie lnet_ni is now attached to the lnet_net rather than the
global the_lnet.  The zombie lnet_net are attached to the_lnet.

For some reason, we don't drop the refcount on the lnd before shutting
it down now.

This is part of
    8cbb8cd3e771e7f7e0f99cafc19fad32770dc015
       LU-7734 lnet: Multi-Rail local NI split

Signed-off-by: NeilBrown <neilb@suse.com>
---
 .../staging/lustre/include/linux/lnet/lib-types.h  |    9 ++-
 drivers/staging/lustre/lnet/lnet/api-ni.c          |   65 ++++++++++----------
 drivers/staging/lustre/lnet/lnet/config.c          |    3 +
 3 files changed, 42 insertions(+), 35 deletions(-)

diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h
index 22957d142cc0..1d372672e2de 100644
--- a/drivers/staging/lustre/include/linux/lnet/lib-types.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h
@@ -284,6 +284,9 @@ struct lnet_net {
 	struct lnet_lnd		*net_lnd;
 	/* list of NIs on this net */
 	struct list_head	net_ni_list;
+
+	/* dying LND instances */
+	struct list_head	net_ni_zombie;
 };
 
 struct lnet_ni {
@@ -653,11 +656,11 @@ struct lnet {
 	/* LND instances */
 	struct list_head		ln_nets;
 	/* NIs bond on specific CPT(s) */
-	struct list_head		  ln_nis_cpt;
-	/* dying LND instances */
-	struct list_head		  ln_nis_zombie;
+	struct list_head		ln_nis_cpt;
 	/* the loopback NI */
 	struct lnet_ni			*ln_loni;
+	/* network zombie list */
+	struct list_head		ln_net_zombie;
 
 	/* remote networks with routes to them */
 	struct list_head		 *ln_remote_nets_hash;
diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c
index c3c568e63342..18d111cb826b 100644
--- a/drivers/staging/lustre/lnet/lnet/api-ni.c
+++ b/drivers/staging/lustre/lnet/lnet/api-ni.c
@@ -539,7 +539,6 @@ lnet_prepare(lnet_pid_t requested_pid)
 	INIT_LIST_HEAD(&the_lnet.ln_test_peers);
 	INIT_LIST_HEAD(&the_lnet.ln_nets);
 	INIT_LIST_HEAD(&the_lnet.ln_nis_cpt);
-	INIT_LIST_HEAD(&the_lnet.ln_nis_zombie);
 	INIT_LIST_HEAD(&the_lnet.ln_routers);
 	INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
 	INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
@@ -618,7 +617,6 @@ lnet_unprepare(void)
 	LASSERT(list_empty(&the_lnet.ln_test_peers));
 	LASSERT(list_empty(&the_lnet.ln_nets));
 	LASSERT(list_empty(&the_lnet.ln_nis_cpt));
-	LASSERT(list_empty(&the_lnet.ln_nis_zombie));
 
 	lnet_portals_destroy();
 
@@ -1095,34 +1093,35 @@ lnet_ni_unlink_locked(struct lnet_ni *ni)
 
 	/* move it to zombie list and nobody can find it anymore */
 	LASSERT(!list_empty(&ni->ni_netlist));
-	list_move(&ni->ni_netlist, &the_lnet.ln_nis_zombie);
+	list_move(&ni->ni_netlist, &ni->ni_net->net_ni_zombie);
 	lnet_ni_decref_locked(ni, 0);
 }
 
 static void
-lnet_clear_zombies_nis_locked(void)
+lnet_clear_zombies_nis_locked(struct lnet_net *net)
 {
 	int i;
 	int islo;
 	struct lnet_ni *ni;
+	struct list_head *zombie_list = &net->net_ni_zombie;
 
 	/*
-	 * Now wait for the NI's I just nuked to show up on ln_zombie_nis
-	 * and shut them down in guaranteed thread context
+	 * Now wait for the NIs I just nuked to show up on the zombie
+	 * list and shut them down in guaranteed thread context
 	 */
 	i = 2;
-	while (!list_empty(&the_lnet.ln_nis_zombie)) {
+	while (!list_empty(zombie_list)) {
 		int *ref;
 		int j;
 
-		ni = list_entry(the_lnet.ln_nis_zombie.next,
+		ni = list_entry(zombie_list->next,
 				struct lnet_ni, ni_netlist);
 		list_del_init(&ni->ni_netlist);
 		cfs_percpt_for_each(ref, j, ni->ni_refs) {
 			if (!*ref)
 				continue;
 			/* still busy, add it back to zombie list */
-			list_add(&ni->ni_netlist, &the_lnet.ln_nis_zombie);
+			list_add(&ni->ni_netlist, zombie_list);
 			break;
 		}
 
@@ -1138,18 +1137,13 @@ lnet_clear_zombies_nis_locked(void)
 			continue;
 		}
 
-		ni->ni_net->net_lnd->lnd_refcount--;
 		lnet_net_unlock(LNET_LOCK_EX);
 
 		islo = ni->ni_net->net_lnd->lnd_type == LOLND;
 
 		LASSERT(!in_interrupt());
-		ni->ni_net->net_lnd->lnd_shutdown(ni);
+		net->net_lnd->lnd_shutdown(ni);
 
-		/*
-		 * can't deref lnd anymore now; it might have unregistered
-		 * itself...
-		 */
 		if (!islo)
 			CDEBUG(D_LNI, "Removed LNI %s\n",
 			       libcfs_nid2str(ni->ni_nid));
@@ -1162,9 +1156,11 @@ lnet_clear_zombies_nis_locked(void)
 }
 
 static void
-lnet_shutdown_lndnis(void)
+lnet_shutdown_lndnet(struct lnet_net *net);
+
+static void
+lnet_shutdown_lndnets(void)
 {
-	struct lnet_ni *ni;
 	int i;
 	struct lnet_net *net;
 
@@ -1173,30 +1169,35 @@ lnet_shutdown_lndnis(void)
 	/* All quiet on the API front */
 	LASSERT(!the_lnet.ln_shutdown);
 	LASSERT(!the_lnet.ln_refcount);
-	LASSERT(list_empty(&the_lnet.ln_nis_zombie));
 
 	lnet_net_lock(LNET_LOCK_EX);
 	the_lnet.ln_shutdown = 1;	/* flag shutdown */
 
-	/* Unlink NIs from the global table */
 	while (!list_empty(&the_lnet.ln_nets)) {
+		/*
+		 * move the nets to the zombie list to avoid them being
+		 * picked up for new work. LONET is also included in the
+		 * Nets that will be moved to the zombie list
+		 */
 		net = list_entry(the_lnet.ln_nets.next,
 				 struct lnet_net, net_list);
-		while (!list_empty(&net->net_ni_list)) {
-			ni = list_entry(net->net_ni_list.next,
-					struct lnet_ni, ni_netlist);
-			lnet_ni_unlink_locked(ni);
-		}
+		list_move(&net->net_list, &the_lnet.ln_net_zombie);
 	}
 
-	/* Drop the cached loopback NI. */
+	/* Drop the cached loopback Net. */
 	if (the_lnet.ln_loni) {
 		lnet_ni_decref_locked(the_lnet.ln_loni, 0);
 		the_lnet.ln_loni = NULL;
 	}
-
 	lnet_net_unlock(LNET_LOCK_EX);
 
+	/* iterate through the net zombie list and delete each net */
+	while (!list_empty(&the_lnet.ln_net_zombie)) {
+		net = list_entry(the_lnet.ln_net_zombie.next,
+				 struct lnet_net, net_list);
+		lnet_shutdown_lndnet(net);
+	}
+
 	/*
 	 * Clear lazy portals and drop delayed messages which hold refs
 	 * on their lnet_msg::msg_rxpeer
@@ -1211,8 +1212,6 @@ lnet_shutdown_lndnis(void)
 	lnet_peer_tables_cleanup(NULL);
 
 	lnet_net_lock(LNET_LOCK_EX);
-
-	lnet_clear_zombies_nis_locked();
 	the_lnet.ln_shutdown = 0;
 	lnet_net_unlock(LNET_LOCK_EX);
 }
@@ -1222,6 +1221,7 @@ static void
 lnet_shutdown_lndni(struct lnet_ni *ni)
 {
 	int i;
+	struct lnet_net *net = ni->ni_net;
 
 	lnet_net_lock(LNET_LOCK_EX);
 	lnet_ni_unlink_locked(ni);
@@ -1235,7 +1235,7 @@ lnet_shutdown_lndni(struct lnet_ni *ni)
 	lnet_peer_tables_cleanup(ni);
 
 	lnet_net_lock(LNET_LOCK_EX);
-	lnet_clear_zombies_nis_locked();
+	lnet_clear_zombies_nis_locked(net);
 	lnet_net_unlock(LNET_LOCK_EX);
 }
 
@@ -1445,7 +1445,7 @@ lnet_startup_lndnets(struct list_head *netlist)
 
 	return ni_count;
 failed:
-	lnet_shutdown_lndnis();
+	lnet_shutdown_lndnets();
 
 	return rc;
 }
@@ -1492,6 +1492,7 @@ int lnet_lib_init(void)
 	the_lnet.ln_refcount = 0;
 	LNetInvalidateEQHandle(&the_lnet.ln_rc_eqh);
 	INIT_LIST_HEAD(&the_lnet.ln_lnds);
+	INIT_LIST_HEAD(&the_lnet.ln_net_zombie);
 	INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
 	INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
 
@@ -1656,7 +1657,7 @@ LNetNIInit(lnet_pid_t requested_pid)
 	if (!the_lnet.ln_nis_from_mod_params)
 		lnet_destroy_routes();
 err_shutdown_lndnis:
-	lnet_shutdown_lndnis();
+	lnet_shutdown_lndnets();
 err_empty_list:
 	lnet_unprepare();
 	LASSERT(rc < 0);
@@ -1703,7 +1704,7 @@ LNetNIFini(void)
 
 		lnet_acceptor_stop();
 		lnet_destroy_routes();
-		lnet_shutdown_lndnis();
+		lnet_shutdown_lndnets();
 		lnet_unprepare();
 	}
 
diff --git a/drivers/staging/lustre/lnet/lnet/config.c b/drivers/staging/lustre/lnet/lnet/config.c
index 380a3fb1caba..2588d67fea1b 100644
--- a/drivers/staging/lustre/lnet/lnet/config.c
+++ b/drivers/staging/lustre/lnet/lnet/config.c
@@ -279,6 +279,8 @@ lnet_net_free(struct lnet_net *net)
 	struct list_head *tmp, *tmp2;
 	struct lnet_ni *ni;
 
+	LASSERT(list_empty(&net->net_ni_zombie));
+
 	/* delete any nis which have been started. */
 	list_for_each_safe(tmp, tmp2, &net->net_ni_list) {
 		ni = list_entry(tmp, struct lnet_ni, ni_netlist);
@@ -312,6 +314,7 @@ lnet_net_alloc(__u32 net_id, struct list_head *net_list)
 
 	INIT_LIST_HEAD(&net->net_list);
 	INIT_LIST_HEAD(&net->net_ni_list);
+	INIT_LIST_HEAD(&net->net_ni_zombie);
 
 	net->net_id = net_id;
 

  parent reply	other threads:[~2018-09-07  0:49 UTC|newest]

Thread overview: 98+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-07  0:49 [lustre-devel] [PATCH 00/34] Beginning of multi-rail support for drivers/staging/lustre NeilBrown
2018-09-07  0:49 ` [lustre-devel] [PATCH 11/34] lnet: pass tun to lnet_startup_lndni, instead of full conf NeilBrown
2018-09-11 18:31   ` Amir Shehata
2018-09-12  4:03     ` NeilBrown
2018-09-12  3:30   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 12/34] lnet: split lnet_startup_lndni NeilBrown
2018-09-12  3:39   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 16/34] lnet: lnet_shutdown_lndnets - remove some cleanup code NeilBrown
2018-09-07  0:49 ` [lustre-devel] [PATCH 18/34] lnet: add ni_state NeilBrown
2018-09-12  3:59   ` Doug Oucharek
2018-09-12  4:25     ` NeilBrown
2018-09-07  0:49 ` [lustre-devel] [PATCH 14/34] lnet: rename lnet_find_net_locked to lnet_find_rnet_locked NeilBrown
2018-09-12  3:40   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 09/34] lnet: add list of cpts to lnet_net NeilBrown
2018-09-10 23:28   ` Doug Oucharek
2018-09-12  2:16     ` NeilBrown
2018-09-11  1:02   ` James Simmons
2018-09-07  0:49 ` [lustre-devel] [PATCH 06/34] lnet: store separate xmit/recv net-interface in each message NeilBrown
2018-09-10 23:24   ` Doug Oucharek
2018-09-10 23:29   ` James Simmons
2018-09-10 23:36   ` James Simmons
2018-09-07  0:49 ` [lustre-devel] [PATCH 03/34] lnet: struct lnet_ni: move ni_lnd to lnet_net NeilBrown
2018-09-10 23:04   ` Doug Oucharek
2018-09-10 23:19     ` James Simmons
2018-09-10 23:19       ` Doug Oucharek
2018-09-10 23:19     ` James Simmons
2018-09-10 23:24   ` James Simmons
2018-09-10 23:25   ` James Simmons
2018-09-07  0:49 ` NeilBrown [this message]
2018-09-12  3:53   ` [lustre-devel] [PATCH 15/34] lnet: extend zombie handling to nets and nis Doug Oucharek
2018-09-12  4:10     ` NeilBrown
2018-09-07  0:49 ` [lustre-devel] [PATCH 02/34] lnet: Create struct lnet_net NeilBrown
2018-09-10 22:56   ` Doug Oucharek
2018-09-10 23:23   ` James Simmons
2018-09-07  0:49 ` [lustre-devel] [PATCH 04/34] lnet: embed lnd_tunables in lnet_ni NeilBrown
2018-09-10 23:08   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 05/34] lnet: begin separating "networks" from "network interfaces" NeilBrown
2018-09-10 23:18   ` Doug Oucharek
2018-09-12  2:48     ` NeilBrown
2018-09-10 23:27   ` James Simmons
2018-09-07  0:49 ` [lustre-devel] [PATCH 13/34] lnet: reverse order of lnet_startup_lnd{net, ni} NeilBrown
2018-09-12  3:39   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 10/34] lnet: add ni arg to lnet_cpt_of_nid() NeilBrown
2018-09-10 23:32   ` Doug Oucharek
2018-09-11  1:03   ` James Simmons
2018-09-07  0:49 ` [lustre-devel] [PATCH 01/34] struct lnet_ni - reformat comments NeilBrown
2018-09-10 22:49   ` Doug Oucharek
2018-09-10 23:17   ` James Simmons
2018-09-12  2:44     ` NeilBrown
2018-09-07  0:49 ` [lustre-devel] [PATCH 07/34] lnet: change lnet_peer to reference the net, rather than ni NeilBrown
2018-09-10 23:17   ` James Simmons
2018-09-12  2:56     ` NeilBrown
2018-09-07  0:49 ` [lustre-devel] [PATCH 08/34] lnet: add cpt to lnet_match_info NeilBrown
2018-09-10 23:25   ` Doug Oucharek
2018-09-11  1:01   ` James Simmons
2018-09-11  1:01   ` [lustre-devel] BRe: " James Simmons
2018-09-07  0:49 ` [lustre-devel] [PATCH 17/34] lnet: move lnet_shutdown_lndnets down to after first use NeilBrown
2018-09-12  3:55   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 20/34] lnet: discard ni_cpt_list NeilBrown
2018-09-12  4:07   ` Doug Oucharek
2018-09-12  5:48     ` NeilBrown
2018-09-13 19:33       ` Amir Shehata
2018-09-24  6:03         ` NeilBrown
2018-09-12 16:29   ` Amir Shehata
2018-09-07  0:49 ` [lustre-devel] [PATCH 34/34] lnet: introduce use_tcp_bonding mod param NeilBrown
2018-09-12  4:54   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 28/34] lnet: add checks to ensure network interface names are unique NeilBrown
2018-09-12  4:39   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 27/34] lnet: make it possible to add a new interface to a network NeilBrown
2018-09-12  4:38   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 30/34] lnet: fix typo NeilBrown
2018-09-12  4:47   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 33/34] Completely re-write lnet_parse_networks() NeilBrown
2018-09-12  4:54   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 32/34] lnet: lnet_dyn_del_ni: fix ping_info count NeilBrown
2018-09-12  4:49   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 22/34] lnet: don't take reference in lnet_XX2ni_locked() NeilBrown
2018-09-12  4:18   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 29/34] lnet: track tunables in lnet_startup_lndnet() NeilBrown
2018-09-12  4:47   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 23/34] lnet: don't need lock to test ln_shutdown NeilBrown
2018-09-12  4:27   ` Doug Oucharek
2018-09-12  5:54     ` NeilBrown
2018-09-07  0:49 ` [lustre-devel] [PATCH 26/34] lnet: only valid lnd_type when net_id is unique NeilBrown
2018-09-12  4:34   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 31/34] lnet: lnet_dyn_add_ni: fix ping_info count NeilBrown
2018-09-12  4:48   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 19/34] lnet: simplify lnet_islocalnet() NeilBrown
2018-09-12  4:02   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 25/34] lnet: swap 'then' and 'else' branches in lnet_startup_lndnet NeilBrown
2018-09-12  4:32   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 24/34] lnet: don't take lock over lnet_net_unique() NeilBrown
2018-09-12  4:29   ` Doug Oucharek
2018-09-07  0:49 ` [lustre-devel] [PATCH 21/34] lnet: add net_ni_added NeilBrown
2018-09-12  4:15   ` Doug Oucharek
2018-09-10 23:10 ` [lustre-devel] [PATCH 00/34] Beginning of multi-rail support for drivers/staging/lustre James Simmons
2018-09-24  6:58   ` NeilBrown
2018-09-29 22:35     ` James Simmons

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=153628137183.8267.14166864803956204561.stgit@noble \
    --to=neilb@suse.com \
    --cc=lustre-devel@lists.lustre.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.