All of lore.kernel.org
 help / color / mirror / Atom feed
From: James Simmons <jsimmons@infradead.org>
To: Andreas Dilger <adilger@whamcloud.com>,
	Oleg Drokin <green@whamcloud.com>, NeilBrown <neilb@suse.de>
Cc: Chris Horn <chris.horn@hpe.com>,
	Lustre Development List <lustre-devel@lists.lustre.org>
Subject: [lustre-devel] [PATCH 06/49] lnet: Prevent discovery on deleted peer
Date: Thu, 15 Apr 2021 00:01:58 -0400	[thread overview]
Message-ID: <1618459361-17909-7-git-send-email-jsimmons@infradead.org> (raw)
In-Reply-To: <1618459361-17909-1-git-send-email-jsimmons@infradead.org>

From: Chris Horn <chris.horn@hpe.com>

We needn't perform any discovery activities on a peer that has had
lnet_peer_del() called on it.

HPE-bug-id: LUS-9192
WC-bug-id: https://jira.whamcloud.com/browse/LU-13895
Lustre-commit: fd32cd817cba336c ("LU-13895 lnet: Prevent discovery on deleted peer"):1
Signed-off-by: Chris Horn <chris.horn@hpe.com>
Reviewed-on: https://review.whamcloud.com/39605
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Signed-off-by: James Simmons <jsimmons@infradead.org>
---
 include/linux/lnet/lib-lnet.h  |  2 ++
 include/linux/lnet/lib-types.h |  4 ++-
 net/lnet/lnet/peer.c           | 73 ++++++++++++++++++++++++------------------
 3 files changed, 46 insertions(+), 33 deletions(-)

diff --git a/include/linux/lnet/lib-lnet.h b/include/linux/lnet/lib-lnet.h
index 1efac9b..2741c6f 100644
--- a/include/linux/lnet/lib-lnet.h
+++ b/include/linux/lnet/lib-lnet.h
@@ -893,6 +893,8 @@ int lnet_get_peer_ni_info(u32 peer_index, u64 *nid,
 {
 	if (!(lp->lp_state & LNET_PEER_MULTI_RAIL))
 		return false;
+	if (lp->lp_state & LNET_PEER_MARK_DELETED)
+		return false;
 	if (lp->lp_state & LNET_PEER_FORCE_PUSH)
 		return true;
 	if (lp->lp_state & LNET_PEER_NO_DISCOVERY)
diff --git a/include/linux/lnet/lib-types.h b/include/linux/lnet/lib-types.h
index f1f4eac5..2424993 100644
--- a/include/linux/lnet/lib-types.h
+++ b/include/linux/lnet/lib-types.h
@@ -750,7 +750,9 @@ struct lnet_peer {
 #define LNET_PEER_RTR_DISCOVERED BIT(17)
 
 /* peer is marked for deletion */
-#define LNET_PEER_MARK_DELETION BIT(18)
+#define LNET_PEER_MARK_DELETION		BIT(18)
+/* lnet_peer_del()/lnet_peer_del_locked() has been called on the peer */
+#define LNET_PEER_MARK_DELETED		BIT(19)
 
 struct lnet_peer_net {
 	/* chain on lp_peer_nets */
diff --git a/net/lnet/lnet/peer.c b/net/lnet/lnet/peer.c
index 48f78ef..34153a8 100644
--- a/net/lnet/lnet/peer.c
+++ b/net/lnet/lnet/peer.c
@@ -450,6 +450,10 @@ void lnet_peer_uninit(void)
 
 	CDEBUG(D_NET, "peer %s\n", libcfs_nid2str(peer->lp_primary_nid));
 
+	spin_lock(&peer->lp_lock);
+	peer->lp_state |= LNET_PEER_MARK_DELETED;
+	spin_unlock(&peer->lp_lock);
+
 	lpni = lnet_get_next_peer_ni_locked(peer, NULL, lpni);
 	while (lpni) {
 		lpni2 = lnet_get_next_peer_ni_locked(peer, NULL, lpni);
@@ -462,9 +466,40 @@ void lnet_peer_uninit(void)
 	return rc2;
 }
 
+/* Discovering this peer is taking too long. Cancel any Ping or Push
+ * that discovery is waiting on by unlinking the relevant MDs. The
+ * lnet_discovery_event_handler() will proceed from here and complete
+ * the cleanup.
+ */
+static void lnet_peer_cancel_discovery(struct lnet_peer *lp)
+{
+	struct lnet_handle_md ping_mdh;
+	struct lnet_handle_md push_mdh;
+
+	LNetInvalidateMDHandle(&ping_mdh);
+	LNetInvalidateMDHandle(&push_mdh);
+
+	spin_lock(&lp->lp_lock);
+	if (lp->lp_state & LNET_PEER_PING_SENT) {
+		ping_mdh = lp->lp_ping_mdh;
+		LNetInvalidateMDHandle(&lp->lp_ping_mdh);
+	}
+	if (lp->lp_state & LNET_PEER_PUSH_SENT) {
+		push_mdh = lp->lp_push_mdh;
+		LNetInvalidateMDHandle(&lp->lp_push_mdh);
+	}
+	spin_unlock(&lp->lp_lock);
+
+	if (!LNetMDHandleIsInvalid(ping_mdh))
+		LNetMDUnlink(ping_mdh);
+	if (!LNetMDHandleIsInvalid(push_mdh))
+		LNetMDUnlink(push_mdh);
+}
+
 static int
 lnet_peer_del(struct lnet_peer *peer)
 {
+	lnet_peer_cancel_discovery(peer);
 	lnet_net_lock(LNET_LOCK_EX);
 	lnet_peer_del_locked(peer);
 	lnet_net_unlock(LNET_LOCK_EX);
@@ -2955,6 +2990,10 @@ static int lnet_peer_deletion(struct lnet_peer *lp)
 	CDEBUG(D_NET, "peer %s(%p) state %#x\n",
 	       libcfs_nid2str(lp->lp_primary_nid), lp, lp->lp_state);
 
+	/* no-op if lnet_peer_del() has already been called on this peer */
+	if (lp->lp_state & LNET_PEER_MARK_DELETED)
+		return 0;
+
 	if (the_lnet.ln_dc_state != LNET_DC_STATE_RUNNING)
 		return -ESHUTDOWN;
 
@@ -3382,37 +3421,6 @@ static void lnet_peer_discovery_error(struct lnet_peer *lp, int error)
 }
 
 /*
- * Discovering this peer is taking too long. Cancel any Ping or Push
- * that discovery is waiting on by unlinking the relevant MDs. The
- * lnet_discovery_event_handler() will proceed from here and complete
- * the cleanup.
- */
-static void lnet_peer_cancel_discovery(struct lnet_peer *lp)
-{
-	struct lnet_handle_md ping_mdh;
-	struct lnet_handle_md push_mdh;
-
-	LNetInvalidateMDHandle(&ping_mdh);
-	LNetInvalidateMDHandle(&push_mdh);
-
-	spin_lock(&lp->lp_lock);
-	if (lp->lp_state & LNET_PEER_PING_SENT) {
-		ping_mdh = lp->lp_ping_mdh;
-		LNetInvalidateMDHandle(&lp->lp_ping_mdh);
-	}
-	if (lp->lp_state & LNET_PEER_PUSH_SENT) {
-		push_mdh = lp->lp_push_mdh;
-		LNetInvalidateMDHandle(&lp->lp_push_mdh);
-	}
-	spin_unlock(&lp->lp_lock);
-
-	if (!LNetMDHandleIsInvalid(ping_mdh))
-		LNetMDUnlink(ping_mdh);
-	if (!LNetMDHandleIsInvalid(push_mdh))
-		LNetMDUnlink(push_mdh);
-}
-
-/*
  * Wait for work to be queued or some other change that must be
  * attended to. Returns non-zero if the discovery thread should shut
  * down.
@@ -3566,7 +3574,8 @@ static int lnet_peer_discovery(void *arg)
 			CDEBUG(D_NET, "peer %s(%p) state %#x\n",
 			       libcfs_nid2str(lp->lp_primary_nid), lp,
 			       lp->lp_state);
-			if (lp->lp_state & LNET_PEER_MARK_DELETION)
+			if (lp->lp_state & (LNET_PEER_MARK_DELETION |
+					    LNET_PEER_MARK_DELETED))
 				rc = lnet_peer_deletion(lp);
 			else if (lp->lp_state & LNET_PEER_DATA_PRESENT)
 				rc = lnet_peer_data_present(lp);
-- 
1.8.3.1

_______________________________________________
lustre-devel mailing list
lustre-devel@lists.lustre.org
http://lists.lustre.org/listinfo.cgi/lustre-devel-lustre.org

  parent reply	other threads:[~2021-04-15  4:03 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-15  4:01 [lustre-devel] [PATCH 00/49] lustre: sync to OpenSFS as of March 30 2021 James Simmons
2021-04-15  4:01 ` [lustre-devel] [PATCH 01/49] lnet: libcfs: Fix for unconfigured arch_stackwalk James Simmons
2021-04-15  4:01 ` [lustre-devel] [PATCH 02/49] lustre: lmv: iput() can safely be passed NULL James Simmons
2021-04-15  4:01 ` [lustre-devel] [PATCH 03/49] lustre: llite: mark extended attr and inode flags James Simmons
2021-04-15  4:01 ` [lustre-devel] [PATCH 04/49] lnet: lnet_notify sets route aliveness incorrectly James Simmons
2021-04-15  4:01 ` [lustre-devel] [PATCH 05/49] lnet: Prevent discovery on peer marked deletion James Simmons
2021-04-15  4:01 ` James Simmons [this message]
2021-04-15  4:01 ` [lustre-devel] [PATCH 07/49] lnet: Transfer disc src NID when merging peers James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 08/49] lnet: Lookup lpni after discovery James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 09/49] lustre: llite: update and fix module loading bug in mounting code James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 10/49] lnet: socklnd: change various ints to bool James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 11/49] lnet: Correct asymmetric route detection James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 12/49] lustre: fixup ldlm_pool and lu_object shrinker failure cases James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 13/49] lustre: log: Add ending newline for some messages James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 14/49] lustre: use with_imp_locked() more broadly James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 15/49] lnet: o2iblnd: change some ints to bool James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 16/49] lustre: lmv: striped directory as subdirectory mount James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 17/49] lustre: llite: create file_operations registration function James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 18/49] lustre: osc: fix performance regression in osc_extent_merge() James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 19/49] lustre: mds: add enums for MDS_ATTR flags James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 20/49] lustre: uapi: remove OBD_IOC_LOV_GET_CONFIG James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 21/49] lustre: sec: fix migrate for encrypted dir James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 22/49] lnet: libcfs: restore LNET_DUMP_ON_PANIC functionality James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 23/49] lustre: ptlrpc: fix ASSERTION on scp_rqbd_posted James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 24/49] lustre: ldlm: not freed req on enqueue James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 25/49] lnet: uapi: move userland only nidstr.h handling James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 26/49] lnet: libcfs: don't depend on sysctl support for debugfs James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 27/49] lustre: ptlrpc: Add a binary heap implementation James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 28/49] lustre: ptlrpc: Implement NRS Delay Policy James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 29/49] lustre: ptlrpc: rename cfs_binheap to simply binheap James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 30/49] lustre: ptlrpc: mark some functions as static James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 31/49] lustre: use tgt_pool for lov layer James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 32/49] lustre: quota: make used for pool correct James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 33/49] lustre: quota: call rhashtable_lookup near params decl James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 34/49] lustre: lov: cancel layout lock on replay deadlock James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 35/49] lustre: obdclass: Protect cl_env_percpu[] James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 36/49] lnet: libcfs: discard cfs_trace_console_buffers[] James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 37/49] lnet: libcfs: discard cfs_trace_copyin_string() James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 38/49] lustre: lmv: don't use lqr_alloc spinlock in lmv James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 39/49] lustre: lov: fault page update cp_lov_index James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 40/49] lustre: update version to 2.14.51 James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 41/49] lustre: llite: mirror extend/copy keeps sparseness James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 42/49] lustre: ptlrpc: don't use list_for_each_entry_safe unnecessarily James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 43/49] lnet: Age peer NI out of recovery James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 44/49] lnet: Only recover known good peer NIs James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 45/49] lnet: Recover peer NI w/exponential backoff interval James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 46/49] lustre: lov: return valid stripe_count/size for PFL files James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 47/49] lnet: convert lpni_refcount to a kref James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 48/49] lustre: lmv: handle default stripe_count=-1 properly James Simmons
2021-04-15  4:02 ` [lustre-devel] [PATCH 49/49] lnet: libcfs: discard cfs_array_alloc() James Simmons

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1618459361-17909-7-git-send-email-jsimmons@infradead.org \
    --to=jsimmons@infradead.org \
    --cc=adilger@whamcloud.com \
    --cc=chris.horn@hpe.com \
    --cc=green@whamcloud.com \
    --cc=lustre-devel@lists.lustre.org \
    --cc=neilb@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.