lustre-devel-lustre.org archive mirror
 help / color / mirror / Atom feed
From: James Simmons <jsimmons@infradead.org>
To: Andreas Dilger <adilger@whamcloud.com>,
	Oleg Drokin <green@whamcloud.com>, NeilBrown <neilb@suse.de>
Cc: Chris Horn <chris.horn@hpe.com>,
	Lustre Development List <lustre-devel@lists.lustre.org>
Subject: [lustre-devel] [PATCH 06/49] lnet: Prevent discovery on deleted peer
Date: Thu, 15 Apr 2021 00:01:58 -0400	[thread overview]
Message-ID: <1618459361-17909-7-git-send-email-jsimmons@infradead.org> (raw)
In-Reply-To: <1618459361-17909-1-git-send-email-jsimmons@infradead.org>

From: Chris Horn <chris.horn@hpe.com>

We needn't perform any discovery activities on a peer that has had
lnet_peer_del() called on it.

HPE-bug-id: LUS-9192
WC-bug-id: https://jira.whamcloud.com/browse/LU-13895
Lustre-commit: fd32cd817cba336c ("LU-13895 lnet: Prevent discovery on deleted peer"):1
Signed-off-by: Chris Horn <chris.horn@hpe.com>
Reviewed-on: https://review.whamcloud.com/39605
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Signed-off-by: James Simmons <jsimmons@infradead.org>
---
 include/linux/lnet/lib-lnet.h  |  2 ++
 include/linux/lnet/lib-types.h |  4 ++-
 net/lnet/lnet/peer.c           | 73 ++++++++++++++++++++++++------------------
 3 files changed, 46 insertions(+), 33 deletions(-)

diff --git a/include/linux/lnet/lib-lnet.h b/include/linux/lnet/lib-lnet.h
index 1efac9b..2741c6f 100644
--- a/include/linux/lnet/lib-lnet.h
+++ b/include/linux/lnet/lib-lnet.h
@@ -893,6 +893,8 @@ int lnet_get_peer_ni_info(u32 peer_index, u64 *nid,
 {
 	if (!(lp->lp_state & LNET_PEER_MULTI_RAIL))
 		return false;
+	if (lp->lp_state & LNET_PEER_MARK_DELETED)
+		return false;
 	if (lp->lp_state & LNET_PEER_FORCE_PUSH)
 		return true;
 	if (lp->lp_state & LNET_PEER_NO_DISCOVERY)
diff --git a/include/linux/lnet/lib-types.h b/include/linux/lnet/lib-types.h
index f1f4eac5..2424993 100644
--- a/include/linux/lnet/lib-types.h
+++ b/include/linux/lnet/lib-types.h
@@ -750,7 +750,9 @@ struct lnet_peer {
 #define LNET_PEER_RTR_DISCOVERED BIT(17)
 
 /* peer is marked for deletion */
-#define LNET_PEER_MARK_DELETION BIT(18)
+#define LNET_PEER_MARK_DELETION		BIT(18)
+/* lnet_peer_del()/lnet_peer_del_locked() has been called on the peer */
+#define LNET_PEER_MARK_DELETED		BIT(19)
 
 struct lnet_peer_net {
 	/* chain on lp_peer_nets */
diff --git a/net/lnet/lnet/peer.c b/net/lnet/lnet/peer.c
index 48f78ef..34153a8 100644
--- a/net/lnet/lnet/peer.c
+++ b/net/lnet/lnet/peer.c
@@ -450,6 +450,10 @@ void lnet_peer_uninit(void)
 
 	CDEBUG(D_NET, "peer %s\n", libcfs_nid2str(peer->lp_primary_nid));
 
+	spin_lock(&peer->lp_lock);
+	peer->lp_state |= LNET_PEER_MARK_DELETED;
+	spin_unlock(&peer->lp_lock);
+
 	lpni = lnet_get_next_peer_ni_locked(peer, NULL, lpni);
 	while (lpni) {
 		lpni2 = lnet_get_next_peer_ni_locked(peer, NULL, lpni);
@@ -462,9 +466,40 @@ void lnet_peer_uninit(void)
 	return rc2;
 }
 
+/* Discovering this peer is taking too long. Cancel any Ping or Push
+ * that discovery is waiting on by unlinking the relevant MDs. The
+ * lnet_discovery_event_handler() will proceed from here and complete
+ * the cleanup.
+ */
+static void lnet_peer_cancel_discovery(struct lnet_peer *lp)
+{
+	struct lnet_handle_md ping_mdh;
+	struct lnet_handle_md push_mdh;
+
+	LNetInvalidateMDHandle(&ping_mdh);
+	LNetInvalidateMDHandle(&push_mdh);
+
+	spin_lock(&lp->lp_lock);
+	if (lp->lp_state & LNET_PEER_PING_SENT) {
+		ping_mdh = lp->lp_ping_mdh;
+		LNetInvalidateMDHandle(&lp->lp_ping_mdh);
+	}
+	if (lp->lp_state & LNET_PEER_PUSH_SENT) {
+		push_mdh = lp->lp_push_mdh;
+		LNetInvalidateMDHandle(&lp->lp_push_mdh);
+	}
+	spin_unlock(&lp->lp_lock);
+
+	if (!LNetMDHandleIsInvalid(ping_mdh))
+		LNetMDUnlink(ping_mdh);
+	if (!LNetMDHandleIsInvalid(push_mdh))
+		LNetMDUnlink(push_mdh);
+}
+
 static int
 lnet_peer_del(struct lnet_peer *peer)
 {
+	lnet_peer_cancel_discovery(peer);
 	lnet_net_lock(LNET_LOCK_EX);
 	lnet_peer_del_locked(peer);
 	lnet_net_unlock(LNET_LOCK_EX);
@@ -2955,6 +2990,10 @@ static int lnet_peer_deletion(struct lnet_peer *lp)
 	CDEBUG(D_NET, "peer %s(%p) state %#x\n",
 	       libcfs_nid2str(lp->lp_primary_nid), lp, lp->lp_state);
 
+	/* no-op if lnet_peer_del() has already been called on this peer */
+	if (lp->lp_state & LNET_PEER_MARK_DELETED)
+		return 0;
+
 	if (the_lnet.ln_dc_state != LNET_DC_STATE_RUNNING)
 		return -ESHUTDOWN;
 
@@ -3382,37 +3421,6 @@ static void lnet_peer_discovery_error(struct lnet_peer *lp, int error)
 }
 
 /*
- * Discovering this peer is taking too long. Cancel any Ping or Push
- * that discovery is waiting on by unlinking the relevant MDs. The
- * lnet_discovery_event_handler() will proceed from here and complete
- * the cleanup.
- */
-static void lnet_peer_cancel_discovery(struct lnet_peer *lp)
-{
-	struct lnet_handle_md ping_mdh;
-	struct lnet_handle_md push_mdh;
-
-	LNetInvalidateMDHandle(&ping_mdh);
-	LNetInvalidateMDHandle(&push_mdh);
-
-	spin_lock(&lp->lp_lock);
-	if (lp->lp_state & LNET_PEER_PING_SENT) {
-		ping_mdh = lp->lp_ping_mdh;
-		LNetInvalidateMDHandle(&lp->lp_ping_mdh);
-	}
-	if (lp->lp_state & LNET_PEER_PUSH_SENT) {
-		push_mdh = lp->lp_push_mdh;
-		LNetInvalidateMDHandle(&lp->lp_push_mdh);
-	}
-	spin_unlock(&lp->lp_lock);
-
-	if (!LNetMDHandleIsInvalid(ping_mdh))
-		LNetMDUnlink(ping_mdh);
-	if (!LNetMDHandleIsInvalid(push_mdh))
-		LNetMDUnlink(push_mdh);
-}
-
-/*
  * Wait for work to be queued or some other change that must be
  * attended to. Returns non-zero if the discovery thread should shut
  * down.
@@ -3566,7 +3574,8 @@ static int lnet_peer_discovery(void *arg)
 			CDEBUG(D_NET, "peer %s(%p) state %#x\n",
 			       libcfs_nid2str(lp->lp_primary_nid), lp,
 			       lp->lp_state);
-			if (lp->lp_state & LNET_PEER_MARK_DELETION)
+			if (lp->lp_state & (LNET_PEER_MARK_DELETION |
+					    LNET_PEER_MARK_DELETED))
 				rc = lnet_peer_deletion(lp);
 			else if (lp->lp_state & LNET_PEER_DATA_PRESENT)
 				rc = lnet_peer_data_present(lp);
-- 
1.8.3.1

_______________________________________________
lustre-devel mailing list
lustre-devel@lists.lustre.org
http://lists.lustre.org/listinfo.cgi/lustre-devel-lustre.org

  parent reply	other threads:[~2021-04-15  4:03 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-15  4:01 [lustre-devel] [PATCH 00/49] lustre: sync to OpenSFS as of March 30 2021 James Simmons
2021-04-15  4:01 ` [lustre-devel] [PATCH 01/49] lnet: libcfs: Fix for unconfigured arch_stackwalk James Simmons
2021-04-15  4:01 ` [lustre-devel] [PATCH 02/49] lustre: lmv: iput() can safely be passed NULL James Simmons
2021-04-15  4:01 ` [lustre-devel] [PATCH 03/49] lustre: llite: mark extended attr and inode flags James Simmons
2021-04-15  4:01 ` [lustre-devel] [PATCH 04/49] lnet: lnet_notify sets route aliveness incorrectly James Simmons
2021-04-15  4:01 ` [lustre-devel] [PATCH 05/49] lnet: Prevent discovery on peer marked deletion James Simmons
2021-04-15  4:01 ` James Simmons [this message]
2021-04-15  4:01 ` [lustre-devel] [PATCH 07/49] lnet: Transfer disc src NID when merging peers James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 08/49] lnet: Lookup lpni after discovery James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 09/49] lustre: llite: update and fix module loading bug in mounting code James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 10/49] lnet: socklnd: change various ints to bool James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 11/49] lnet: Correct asymmetric route detection James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 12/49] lustre: fixup ldlm_pool and lu_object shrinker failure cases James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 13/49] lustre: log: Add ending newline for some messages James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 14/49] lustre: use with_imp_locked() more broadly James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 15/49] lnet: o2iblnd: change some ints to bool James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 16/49] lustre: lmv: striped directory as subdirectory mount James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 17/49] lustre: llite: create file_operations registration function James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 18/49] lustre: osc: fix performance regression in osc_extent_merge() James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 19/49] lustre: mds: add enums for MDS_ATTR flags James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 20/49] lustre: uapi: remove OBD_IOC_LOV_GET_CONFIG James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 21/49] lustre: sec: fix migrate for encrypted dir James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 22/49] lnet: libcfs: restore LNET_DUMP_ON_PANIC functionality James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 23/49] lustre: ptlrpc: fix ASSERTION on scp_rqbd_posted James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 24/49] lustre: ldlm: not freed req on enqueue James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 25/49] lnet: uapi: move userland only nidstr.h handling James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 26/49] lnet: libcfs: don't depend on sysctl support for debugfs James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 27/49] lustre: ptlrpc: Add a binary heap implementation James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 28/49] lustre: ptlrpc: Implement NRS Delay Policy James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 29/49] lustre: ptlrpc: rename cfs_binheap to simply binheap James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 30/49] lustre: ptlrpc: mark some functions as static James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 31/49] lustre: use tgt_pool for lov layer James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 32/49] lustre: quota: make used for pool correct James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 33/49] lustre: quota: call rhashtable_lookup near params decl James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 34/49] lustre: lov: cancel layout lock on replay deadlock James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 35/49] lustre: obdclass: Protect cl_env_percpu[] James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 36/49] lnet: libcfs: discard cfs_trace_console_buffers[] James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 37/49] lnet: libcfs: discard cfs_trace_copyin_string() James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 38/49] lustre: lmv: don't use lqr_alloc spinlock in lmv James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 39/49] lustre: lov: fault page update cp_lov_index James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 40/49] lustre: update version to 2.14.51 James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 41/49] lustre: llite: mirror extend/copy keeps sparseness James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 42/49] lustre: ptlrpc: don't use list_for_each_entry_safe unnecessarily James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 43/49] lnet: Age peer NI out of recovery James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 44/49] lnet: Only recover known good peer NIs James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 45/49] lnet: Recover peer NI w/exponential backoff interval James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 46/49] lustre: lov: return valid stripe_count/size for PFL files James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 47/49] lnet: convert lpni_refcount to a kref James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 48/49] lustre: lmv: handle default stripe_count=-1 properly James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 49/49] lnet: libcfs: discard cfs_array_alloc() James Simmons

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1618459361-17909-7-git-send-email-jsimmons@infradead.org \
    --to=jsimmons@infradead.org \
    --cc=adilger@whamcloud.com \
    --cc=chris.horn@hpe.com \
    --cc=green@whamcloud.com \
    --cc=lustre-devel@lists.lustre.org \
    --cc=neilb@suse.de \
    --subject='Re: [lustre-devel] [PATCH 06/49] lnet: Prevent discovery on deleted peer' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).