All of lore.kernel.org
 help / color / mirror / Atom feed
From: James Simmons <jsimmons@infradead.org>
To: Andreas Dilger <adilger@whamcloud.com>,
	Oleg Drokin <green@whamcloud.com>, NeilBrown <neilb@suse.de>
Cc: Serguei Smirnov <ssmirnov@whamcloud.com>,
	Amir Shehata <ashehata@whamcloud.com>,
	Lustre Development List <lustre-devel@lists.lustre.org>
Subject: [lustre-devel] [PATCH 10/41] lnet: Select NI/peer NI with highest prio
Date: Sun,  4 Apr 2021 20:50:39 -0400	[thread overview]
Message-ID: <1617583870-32029-11-git-send-email-jsimmons@infradead.org> (raw)
In-Reply-To: <1617583870-32029-1-git-send-email-jsimmons@infradead.org>

From: Amir Shehata <ashehata@whamcloud.com>

Modify the selection algorithm to select the highest priority
local and peer NI. Health always trumps all other selection
criteria

WC-bug-id: https://jira.whamcloud.com/browse/LU-9121
Lustre-commit: 374fcb2caea3ca0 ("LU-9121 lnet: Select NI/peer NI with highest prio")
Signed-off-by: Amir Shehata <ashehata@whamcloud.com>
Signed-off-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/34351
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Signed-off-by: James Simmons <jsimmons@infradead.org>
---
 net/lnet/lnet/lib-move.c | 148 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 95 insertions(+), 53 deletions(-)

diff --git a/net/lnet/lnet/lib-move.c b/net/lnet/lnet/lib-move.c
index 8763c3f..166ebcc 100644
--- a/net/lnet/lnet/lib-move.c
+++ b/net/lnet/lnet/lib-move.c
@@ -1112,65 +1112,91 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
 	 */
 	struct lnet_peer_ni *lpni = NULL;
 	int best_lpni_credits =  (best_lpni) ? best_lpni->lpni_txcredits :
-					       INT_MIN;
+				 INT_MIN;
 	int best_lpni_healthv = (best_lpni) ?
 				atomic_read(&best_lpni->lpni_healthv) : 0;
-	bool preferred = false;
-	bool ni_is_pref;
+	bool best_lpni_is_preferred = false;
+	bool lpni_is_preferred;
 	int lpni_healthv;
+	u32 lpni_sel_prio;
+	u32 best_sel_prio = LNET_MAX_SELECTION_PRIORITY;
 
 	while ((lpni = lnet_get_next_peer_ni_locked(peer, peer_net, lpni))) {
 		/* if the best_ni we've chosen aleady has this lpni
 		 * preferred, then let's use it
 		 */
 		if (best_ni) {
-			ni_is_pref = lnet_peer_is_pref_nid_locked(lpni,
-								  best_ni->ni_nid);
-			CDEBUG(D_NET, "%s ni_is_pref = %d\n",
-			       libcfs_nid2str(best_ni->ni_nid), ni_is_pref);
+			lpni_is_preferred = lnet_peer_is_pref_nid_locked(lpni,
+									 best_ni->ni_nid);
+			CDEBUG(D_NET, "%s lpni_is_preferred = %d\n",
+			       libcfs_nid2str(best_ni->ni_nid),
+			       lpni_is_preferred);
 		} else {
-			ni_is_pref = false;
+			lpni_is_preferred = false;
 		}
 
 		lpni_healthv = atomic_read(&lpni->lpni_healthv);
+		lpni_sel_prio = lpni->lpni_sel_priority;
 
 		if (best_lpni)
-			CDEBUG(D_NET, "%s c:[%d, %d], s:[%d, %d]\n",
+			CDEBUG(D_NET,
+			       "n:[%s, %s] h:[%d, %d] p:[%d, %d] c:[%d, %d] s:[%d, %d]\n",
 			       libcfs_nid2str(lpni->lpni_nid),
+			       libcfs_nid2str(best_lpni->lpni_nid),
+			       lpni_healthv, best_lpni_healthv,
+			       lpni_sel_prio, best_sel_prio,
 			       lpni->lpni_txcredits, best_lpni_credits,
 			       lpni->lpni_seq, best_lpni->lpni_seq);
+		else
+			goto select_lpni;
 
 		/* pick the healthiest peer ni */
 		if (lpni_healthv < best_lpni_healthv) {
 			continue;
 		} else if (lpni_healthv > best_lpni_healthv) {
-			best_lpni_healthv = lpni_healthv;
+			if (best_lpni_is_preferred)
+				best_lpni_is_preferred = false;
+			goto select_lpni;
+		}
+
+		if (lpni_sel_prio > best_sel_prio) {
+			continue;
+		} else if (lpni_sel_prio < best_sel_prio) {
+			if (best_lpni_is_preferred)
+				best_lpni_is_preferred = false;
+			goto select_lpni;
+		}
+
 		/* if this is a preferred peer use it */
-		} else if (!preferred && ni_is_pref) {
-			preferred = true;
-		} else if (preferred && !ni_is_pref) {
+		if (!best_lpni_is_preferred && lpni_is_preferred) {
+			best_lpni_is_preferred = true;
+			goto select_lpni;
+		} else if (best_lpni_is_preferred && !lpni_is_preferred) {
 			/* this is not the preferred peer so let's ignore
 			 * it.
 			 */
 			continue;
-		} else if (lpni->lpni_txcredits < best_lpni_credits) {
+		}
+
+		if (lpni->lpni_txcredits < best_lpni_credits)
 			/* We already have a peer that has more credits
 			 * available than this one. No need to consider
 			 * this peer further.
 			 */
 			continue;
-		} else if (lpni->lpni_txcredits == best_lpni_credits) {
-			/* The best peer found so far and the current peer
-			 * have the same number of available credits let's
-			 * make sure to select between them using Round
-			 * Robin
-			 */
-			if (best_lpni) {
-				if (best_lpni->lpni_seq <= lpni->lpni_seq)
-					continue;
-			}
-		}
+		else if (lpni->lpni_txcredits > best_lpni_credits)
+			goto select_lpni;
 
+		/* The best peer found so far and the current peer
+		 * have the same number of available credits let's
+		 * make sure to select between them using Round Robin
+		 */
+		if (best_lpni && best_lpni->lpni_seq <= lpni->lpni_seq)
+			continue;
+select_lpni:
+		best_lpni_is_preferred = lpni_is_preferred;
+		best_lpni_healthv = lpni_healthv;
+		best_sel_prio = lpni_sel_prio;
 		best_lpni = lpni;
 		best_lpni_credits = lpni->lpni_txcredits;
 	}
@@ -1178,7 +1204,7 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
 	/* if we still can't find a peer ni then we can't reach it */
 	if (!best_lpni) {
 		u32 net_id = (peer_net) ? peer_net->lpn_net_id :
-			LNET_NIDNET(dst_nid);
+			     LNET_NIDNET(dst_nid);
 		CDEBUG(D_NET, "no peer_ni found on peer net %s\n",
 		       libcfs_net2str(net_id));
 		return NULL;
@@ -1396,6 +1422,7 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
 	unsigned int shortest_distance;
 	int best_credits;
 	int best_healthv;
+	u32 best_sel_prio;
 
 	/* If there is no peer_ni that we can send to on this network,
 	 * then there is no point in looking for a new best_ni here.
@@ -1404,6 +1431,7 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
 		return best_ni;
 
 	if (!best_ni) {
+		best_sel_prio = LNET_MAX_SELECTION_PRIORITY;
 		shortest_distance = UINT_MAX;
 		best_credits = INT_MIN;
 		best_healthv = 0;
@@ -1412,6 +1440,7 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
 						     best_ni->ni_dev_cpt);
 		best_credits = atomic_read(&best_ni->ni_tx_credits);
 		best_healthv = atomic_read(&best_ni->ni_healthv);
+		best_sel_prio = best_ni->ni_sel_priority;
 	}
 
 	while ((ni = lnet_get_next_ni_locked(local_net, ni))) {
@@ -1419,10 +1448,12 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
 		int ni_credits;
 		int ni_healthv;
 		int ni_fatal;
+		u32 ni_sel_prio;
 
 		ni_credits = atomic_read(&ni->ni_tx_credits);
 		ni_healthv = atomic_read(&ni->ni_healthv);
 		ni_fatal = atomic_read(&ni->ni_fatal_error_on);
+		ni_sel_prio = ni->ni_sel_priority;
 
 		/*
 		 * calculate the distance from the CPT on which
@@ -1433,13 +1464,6 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
 					    md_cpt,
 					    ni->ni_dev_cpt);
 
-		CDEBUG(D_NET,
-		       "compare ni %s [c:%d, d:%d, s:%d] with best_ni %s [c:%d, d:%d, s:%d]\n",
-		       libcfs_nid2str(ni->ni_nid), ni_credits, distance,
-		       ni->ni_seq, (best_ni) ? libcfs_nid2str(best_ni->ni_nid)
-			: "not seleced", best_credits, shortest_distance,
-			(best_ni) ? best_ni->ni_seq : 0);
-
 		/*
 		 * All distances smaller than the NUMA range
 		 * are treated equally.
@@ -1451,30 +1475,48 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
 		 * Select on health, shorter distance, available
 		 * credits, then round-robin.
 		 */
-		if (ni_fatal) {
+		if (ni_fatal)
 			continue;
-		} else if (ni_healthv < best_healthv) {
+
+		if (best_ni)
+			CDEBUG(D_NET,
+			       "compare ni %s [c:%d, d:%d, s:%d, p:%u] with best_ni %s [c:%d, d:%d, s:%d, p:%u]\n",
+			       libcfs_nid2str(ni->ni_nid), ni_credits, distance,
+			       ni->ni_seq, ni_sel_prio,
+			       (best_ni) ? libcfs_nid2str(best_ni->ni_nid)
+			       : "not selected", best_credits, shortest_distance,
+			       (best_ni) ? best_ni->ni_seq : 0,
+			       best_sel_prio);
+		else
+			goto select_ni;
+
+		if (ni_healthv < best_healthv)
 			continue;
-		} else if (ni_healthv > best_healthv) {
-			best_healthv = ni_healthv;
-			/* If we're going to prefer this ni because it's
-			 * the healthiest, then we should set the
-			 * shortest_distance in the algorithm in case
-			 * there are multiple NIs with the same health but
-			 * different distances.
-			 */
-			if (distance < shortest_distance)
-				shortest_distance = distance;
-		} else if (distance > shortest_distance) {
+		else if (ni_healthv > best_healthv)
+			goto select_ni;
+
+		if (ni_sel_prio > best_sel_prio)
 			continue;
-		} else if (distance < shortest_distance) {
-			shortest_distance = distance;
-		} else if (ni_credits < best_credits) {
+		else if (ni_sel_prio < best_sel_prio)
+			goto select_ni;
+
+		if (distance > shortest_distance)
 			continue;
-		} else if (ni_credits == best_credits) {
-			if (best_ni && best_ni->ni_seq <= ni->ni_seq)
-				continue;
-		}
+		else if (distance < shortest_distance)
+			goto select_ni;
+
+		if (ni_credits < best_credits)
+			continue;
+		else if (ni_credits > best_credits)
+			goto select_ni;
+
+		if (best_ni && best_ni->ni_seq <= ni->ni_seq)
+			continue;
+
+select_ni:
+		best_sel_prio = ni_sel_prio;
+		shortest_distance = distance;
+		best_healthv = ni_healthv;
 		best_ni = ni;
 		best_credits = ni_credits;
 	}
-- 
1.8.3.1

_______________________________________________
lustre-devel mailing list
lustre-devel@lists.lustre.org
http://lists.lustre.org/listinfo.cgi/lustre-devel-lustre.org

  parent reply	other threads:[~2021-04-05  0:51 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-05  0:50 [lustre-devel] [PATCH 00/41] lustre: sync to OpenSFS branch as of March 1 James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 01/41] lustre: llite: data corruption due to RPC reordering James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 02/41] lustre: llite: make readahead aware of hints James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 03/41] lustre: lov: avoid NULL dereference in cleanup James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 04/41] lustre: llite: quiet spurious ioctl warning James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 05/41] lustre: ptlrpc: do not output error when imp_sec is freed James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 06/41] lustre: update version to 2.14.0 James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 07/41] lnet: UDSP storage and marshalled structs James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 08/41] lnet: foundation patch for selection mod James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 09/41] lnet: Preferred gateway selection James Simmons
2021-04-05  0:50 ` James Simmons [this message]
2021-04-05  0:50 ` [lustre-devel] [PATCH 11/41] lnet: select best peer and local net James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 12/41] lnet: UDSP handling James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 13/41] lnet: Apply UDSP on local and remote NIs James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 14/41] lnet: Add the kernel level Marshalling API James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 15/41] lnet: Add the kernel level De-Marshalling API James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 16/41] lnet: Add the ioctl handler for "add policy" James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 17/41] lnet: ioctl handler for "delete policy" James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 18/41] lnet: ioctl handler for get policy info James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 19/41] lustre: update version to 2.14.50 James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 20/41] lustre: gss: handle empty reqmsg in sptlrpc_req_ctx_switch James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 21/41] lustre: sec: file ioctls to handle encryption policies James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 22/41] lustre: obdclass: try to skip corrupted llog records James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 23/41] lustre: lov: fix layout generation inc for mirror split James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 24/41] lnet: modify assertion in lnet_post_send_locked James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 25/41] lustre: lov: fixes bitfield in lod qos code James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 26/41] lustre: lov: grant deadlock if same OSC in two components James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 27/41] lustre: change EWOULDBLOCK to EAGAIN James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 28/41] lsutre: ldlm: return error from ldlm_namespace_new() James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 29/41] lustre: llite: remove unused ll_teardown_mmaps() James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 30/41] lustre: lov: style cleanups in lov_set_osc_active() James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 31/41] lustre: change various operations structs to const James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 32/41] lustre: mark strings in char arrays as const James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 33/41] lustre: convert snprintf to scnprintf as appropriate James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 34/41] lustre: remove non-static 'inline' markings James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 35/41] lustre: llite: use is_root_inode() James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 36/41] lnet: libcfs: discard cfs_firststr James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 37/41] lnet: place wire protocol data int own headers James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 38/41] lnet: libcfs: use wait_event_timeout() in tracefiled() James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 39/41] lnet: use init_wait() rather than init_waitqueue_entry() James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 40/41] lnet: discard LNET_MD_PHYS James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 41/41] lnet: o2iblnd: convert peers hash table to hashtable.h James Simmons

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1617583870-32029-11-git-send-email-jsimmons@infradead.org \
    --to=jsimmons@infradead.org \
    --cc=adilger@whamcloud.com \
    --cc=ashehata@whamcloud.com \
    --cc=green@whamcloud.com \
    --cc=lustre-devel@lists.lustre.org \
    --cc=neilb@suse.de \
    --cc=ssmirnov@whamcloud.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.