lustre-devel-lustre.org archive mirror
 help / color / mirror / Atom feed
From: James Simmons <jsimmons@infradead.org>
To: Andreas Dilger <adilger@whamcloud.com>,
	Oleg Drokin <green@whamcloud.com>, NeilBrown <neilb@suse.de>
Cc: Amir Shehata <ashehata@whamcloud.com>,
	Lustre Development List <lustre-devel@lists.lustre.org>
Subject: [lustre-devel] [PATCH 09/41] lnet: Preferred gateway selection
Date: Sun,  4 Apr 2021 20:50:38 -0400	[thread overview]
Message-ID: <1617583870-32029-10-git-send-email-jsimmons@infradead.org> (raw)
In-Reply-To: <1617583870-32029-1-git-send-email-jsimmons@infradead.org>

From: Amir Shehata <ashehata@whamcloud.com>

Add mechanism for managing preferred gateway lists.
When selecting a route through a gateway, if there exists
a preferred gateway list for the destination peer, then choose
the preferred gateway. If there are multiple preferred
gateways, to make the selection, use in order of decreasing
importance: route priority, number of hops, number of available
tx credits on the associated lpni and route sequence counters.
If there are no preferred routes, select the best route
available using the same criteria.

WC-bug-id: https://jira.whamcloud.com/browse/LU-9121
Lustre-commit: 66acff74d0da31e ("LU-9121 lnet: Preferred gateway selection")
Signed-off-by: Amir Shehata <ashehata@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/34353
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Signed-off-by: James Simmons <jsimmons@infradead.org>
---
 include/linux/lnet/lib-lnet.h |   5 ++
 net/lnet/lnet/lib-move.c      | 119 ++++++++++++++++++++++++++++++------------
 net/lnet/lnet/peer.c          | 111 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 201 insertions(+), 34 deletions(-)

diff --git a/include/linux/lnet/lib-lnet.h b/include/linux/lnet/lib-lnet.h
index 927ca44..90f18a0 100644
--- a/include/linux/lnet/lib-lnet.h
+++ b/include/linux/lnet/lib-lnet.h
@@ -806,6 +806,11 @@ struct lnet_peer_ni *lnet_peer_get_ni_locked(struct lnet_peer *lp,
 struct lnet_peer_net *lnet_peer_get_net_locked(struct lnet_peer *peer,
 					       u32 net_id);
 bool lnet_peer_is_pref_nid_locked(struct lnet_peer_ni *lpni, lnet_nid_t nid);
+int lnet_peer_add_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid);
+void lnet_peer_clr_pref_nids(struct lnet_peer_ni *lpni);
+bool lnet_peer_is_pref_rtr_locked(struct lnet_peer_ni *lpni, lnet_nid_t gw_nid);
+void lnet_peer_clr_pref_rtrs(struct lnet_peer_ni *lpni);
+int lnet_peer_add_pref_rtr(struct lnet_peer_ni *lpni, lnet_nid_t nid);
 int lnet_peer_ni_set_non_mr_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid);
 int lnet_add_peer_ni(lnet_nid_t key_nid, lnet_nid_t nid, bool mr);
 int lnet_del_peer_ni(lnet_nid_t key_nid, lnet_nid_t nid);
diff --git a/net/lnet/lnet/lib-move.c b/net/lnet/lnet/lib-move.c
index 4687acd..8763c3f 100644
--- a/net/lnet/lnet/lib-move.c
+++ b/net/lnet/lnet/lib-move.c
@@ -1097,24 +1097,6 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
 	}
 }
 
-static int
-lnet_compare_gw_lpnis(struct lnet_peer_ni *p1, struct lnet_peer_ni *p2)
-{
-	if (p1->lpni_txqnob < p2->lpni_txqnob)
-		return 1;
-
-	if (p1->lpni_txqnob > p2->lpni_txqnob)
-		return -1;
-
-	if (p1->lpni_txcredits > p2->lpni_txcredits)
-		return 1;
-
-	if (p1->lpni_txcredits < p2->lpni_txcredits)
-		return -1;
-
-	return 0;
-}
-
 static struct lnet_peer_ni *
 lnet_select_peer_ni(struct lnet_ni *best_ni, lnet_nid_t dst_nid,
 		    struct lnet_peer *peer,
@@ -1246,6 +1228,24 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
 	return NULL;
 }
 
+static int
+lnet_compare_gw_lpnis(struct lnet_peer_ni *lpni1, struct lnet_peer_ni *lpni2)
+{
+	if (lpni1->lpni_txqnob < lpni2->lpni_txqnob)
+		return 1;
+
+	if (lpni1->lpni_txqnob > lpni2->lpni_txqnob)
+		return -1;
+
+	if (lpni1->lpni_txcredits > lpni2->lpni_txcredits)
+		return 1;
+
+	if (lpni1->lpni_txcredits < lpni2->lpni_txcredits)
+		return -1;
+
+	return 0;
+}
+
 /* Compare route priorities and hop counts */
 static int
 lnet_compare_routes(struct lnet_route *r1, struct lnet_route *r2)
@@ -1270,6 +1270,7 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
 
 static struct lnet_route *
 lnet_find_route_locked(struct lnet_remotenet *rnet, u32 src_net,
+		       struct lnet_peer_ni *remote_lpni,
 		       struct lnet_route **prev_route,
 		       struct lnet_peer_ni **gwni)
 {
@@ -1278,6 +1279,8 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
 	struct lnet_route *last_route;
 	struct lnet_route *route;
 	int rc;
+	bool best_rte_is_preferred = false;
+	lnet_nid_t gw_pnid;
 
 	CDEBUG(D_NET, "Looking up a route to %s, from %s\n",
 	       libcfs_net2str(rnet->lrn_net), libcfs_net2str(src_net));
@@ -1287,44 +1290,76 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
 	list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
 		if (!lnet_is_route_alive(route))
 			continue;
+		gw_pnid = route->lr_gateway->lp_primary_nid;
+
+		/* no protection on below fields, but it's harmless */
+		if (last_route && (last_route->lr_seq - route->lr_seq < 0))
+			last_route = route;
 
-		/* Restrict the selection of the router NI on the src_net
-		 * provided. If the src_net is LNET_NID_ANY, then select
-		 * the best interface available.
+		/* if the best route found is in the preferred list then
+		 * tag it as preferred and use it later on. But if we
+		 * didn't find any routes which are on the preferred list
+		 * then just use the best route possible.
 		 */
-		if (!best_route) {
+		rc = lnet_peer_is_pref_rtr_locked(remote_lpni, gw_pnid);
+
+		if (!best_route || (rc && !best_rte_is_preferred)) {
+			/* Restrict the selection of the router NI on the
+			 * src_net provided. If the src_net is LNET_NID_ANY,
+			 * then select the best interface available.
+			 */
 			lpni = lnet_find_best_lpni(NULL, LNET_NID_ANY,
 						   route->lr_gateway,
 						   src_net);
-			if (lpni) {
-				best_route = route;
-				last_route = route;
-				best_gw_ni = lpni;
-			} else {
+			if (!lpni) {
 				CDEBUG(D_NET,
 				       "Gateway %s does not have a peer NI on net %s\n",
-				       libcfs_nid2str(route->lr_gateway->lp_primary_nid),
+				       libcfs_nid2str(gw_pnid),
 				       libcfs_net2str(src_net));
+				continue;
 			}
-			continue;
 		}
 
-		/* no protection on below fields, but it's harmless */
-		if (last_route->lr_seq - route->lr_seq < 0)
+		if (rc && !best_rte_is_preferred) {
+			/* This is the first preferred route we found,
+			 * so it beats any route found previously
+			 */
+			best_route = route;
+			if (!last_route)
+				last_route = route;
+			best_gw_ni = lpni;
+			best_rte_is_preferred = true;
+			CDEBUG(D_NET, "preferred gw = %s\n",
+			       libcfs_nid2str(gw_pnid));
+			continue;
+		} else if ((!rc) && best_rte_is_preferred)
+			/* The best route we found so far is in the preferred
+			 * list, so it beats any non-preferred route
+			 */
+			continue;
+
+		if (!best_route) {
+			best_route = route;
 			last_route = route;
+			best_gw_ni = lpni;
+			continue;
+		}
 
 		rc = lnet_compare_routes(route, best_route);
 		if (rc == -1)
 			continue;
 
+		/* Restrict the selection of the router NI on the
+		 * src_net provided. If the src_net is LNET_NID_ANY,
+		 * then select the best interface available.
+		 */
 		lpni = lnet_find_best_lpni(NULL, LNET_NID_ANY,
 					   route->lr_gateway,
 					   src_net);
-		/* restrict the lpni on the src_net if specified */
 		if (!lpni) {
 			CDEBUG(D_NET,
 			       "Gateway %s does not have a peer NI on net %s\n",
-			       libcfs_nid2str(route->lr_gateway->lp_primary_nid),
+			       libcfs_nid2str(gw_pnid),
 			       libcfs_net2str(src_net));
 			continue;
 		}
@@ -1805,6 +1840,8 @@ struct lnet_ni *
 	lnet_nid_t src_nid = (sd->sd_src_nid != LNET_NID_ANY) ? sd->sd_src_nid :
 			      sd->sd_best_ni ? sd->sd_best_ni->ni_nid :
 			      LNET_NID_ANY;
+	int best_lpn_healthv = 0;
+	u32 best_lpn_sel_prio = LNET_MAX_SELECTION_PRIORITY;
 
 	CDEBUG(D_NET, "using src nid %s for route restriction\n",
 	       libcfs_nid2str(src_nid));
@@ -1861,9 +1898,22 @@ struct lnet_ni *
 					best_rnet = rnet;
 				}
 
-				if (best_lpn->lpn_seq <= lpn->lpn_seq)
+				/* select the preferred peer net */
+				if (best_lpn_healthv > lpn->lpn_healthv)
 					continue;
+				else if (best_lpn_healthv < lpn->lpn_healthv)
+					goto use_lpn;
 
+				if (best_lpn_sel_prio < lpn->lpn_sel_priority)
+					continue;
+				else if (best_lpn_sel_prio > lpn->lpn_sel_priority)
+					goto use_lpn;
+
+				if (best_lpn->lpn_seq <= lpn->lpn_seq)
+					continue;
+use_lpn:
+				best_lpn_healthv = lpn->lpn_healthv;
+				best_lpn_sel_prio = lpn->lpn_sel_priority;
 				best_lpn = lpn;
 				best_rnet = rnet;
 			}
@@ -1905,6 +1955,7 @@ struct lnet_ni *
 		 */
 		best_route = lnet_find_route_locked(best_rnet,
 						    LNET_NIDNET(src_nid),
+						    sd->sd_best_lpni,
 						    &last_route, &gwni);
 		if (!best_route) {
 			CERROR("no route to %s from %s\n",
diff --git a/net/lnet/lnet/peer.c b/net/lnet/lnet/peer.c
index 60e6b51..bbd43c8 100644
--- a/net/lnet/lnet/peer.c
+++ b/net/lnet/lnet/peer.c
@@ -894,6 +894,94 @@ struct lnet_peer_ni *
 	wake_up(&the_lnet.ln_dc_waitq);
 }
 
+/* find the NID in the preferred gateways for the remote peer
+ * return:
+ *	false: list is not empty and NID is not preferred
+ *	false: list is empty
+ *	true: nid is found in the list
+ */
+bool
+lnet_peer_is_pref_rtr_locked(struct lnet_peer_ni *lpni,
+			     lnet_nid_t gw_nid)
+{
+	struct lnet_nid_list *ne;
+
+	CDEBUG(D_NET, "%s: rtr pref emtpy: %d\n",
+	       libcfs_nid2str(lpni->lpni_nid),
+	       list_empty(&lpni->lpni_rtr_pref_nids));
+
+	if (list_empty(&lpni->lpni_rtr_pref_nids))
+		return false;
+
+	/* iterate through all the preferred NIDs and see if any of them
+	 * matches the provided gw_nid
+	 */
+	list_for_each_entry(ne, &lpni->lpni_rtr_pref_nids, nl_list) {
+		CDEBUG(D_NET, "Comparing pref %s with gw %s\n",
+		       libcfs_nid2str(ne->nl_nid),
+		       libcfs_nid2str(gw_nid));
+		if (ne->nl_nid == gw_nid)
+			return true;
+	}
+
+	return false;
+}
+
+void
+lnet_peer_clr_pref_rtrs(struct lnet_peer_ni *lpni)
+{
+	struct list_head zombies;
+	struct lnet_nid_list *ne;
+	struct lnet_nid_list *tmp;
+	int cpt = lpni->lpni_cpt;
+
+	INIT_LIST_HEAD(&zombies);
+
+	lnet_net_lock(cpt);
+	list_splice_init(&lpni->lpni_rtr_pref_nids, &zombies);
+	lnet_net_unlock(cpt);
+
+	list_for_each_entry_safe(ne, tmp, &zombies, nl_list) {
+		list_del(&ne->nl_list);
+		kfree(ne);
+	}
+}
+
+int
+lnet_peer_add_pref_rtr(struct lnet_peer_ni *lpni,
+		       lnet_nid_t gw_nid)
+{
+	int cpt = lpni->lpni_cpt;
+	struct lnet_nid_list *ne = NULL;
+
+	/* This function is called with api_mutex held. When the api_mutex
+	 * is held the list can not be modified, as it is only modified as
+	 * a result of applying a UDSP and that happens under api_mutex
+	 * lock.
+	 */
+	__must_hold(&the_lnet.ln_api_mutex);
+
+	list_for_each_entry(ne, &lpni->lpni_rtr_pref_nids, nl_list) {
+		if (ne->nl_nid == gw_nid)
+			return -EEXIST;
+	}
+
+	ne = kzalloc_cpt(sizeof(*ne), GFP_KERNEL, cpt);
+	if (!ne)
+		return -ENOMEM;
+
+	ne->nl_nid = gw_nid;
+
+	/* Lock the cpt to protect against addition and checks in the
+	 * selection algorithm
+	 */
+	lnet_net_lock(cpt);
+	list_add(&ne->nl_list, &lpni->lpni_rtr_pref_nids);
+	lnet_net_unlock(cpt);
+
+	return 0;
+}
+
 /*
  * Test whether a ni is a preferred ni for this peer_ni, e.g, whether
  * this is a preferred point-to-point path. Call with lnet_net_lock in
@@ -1123,6 +1211,29 @@ struct lnet_peer_ni *
 	return rc;
 }
 
+void
+lnet_peer_clr_pref_nids(struct lnet_peer_ni *lpni)
+{
+	struct list_head zombies;
+	struct lnet_nid_list *ne;
+	struct lnet_nid_list *tmp;
+
+	INIT_LIST_HEAD(&zombies);
+
+	lnet_net_lock(LNET_LOCK_EX);
+	if (lpni->lpni_pref_nnids == 1)
+		lpni->lpni_pref.nid = LNET_NID_ANY;
+	else if (lpni->lpni_pref_nnids > 1)
+		list_splice_init(&lpni->lpni_pref.nids, &zombies);
+	lpni->lpni_pref_nnids = 0;
+	lnet_net_unlock(LNET_LOCK_EX);
+
+	list_for_each_entry_safe(ne, tmp, &zombies, nl_list) {
+		list_del_init(&ne->nl_list);
+		kfree(ne);
+	}
+}
+
 lnet_nid_t
 lnet_peer_primary_nid_locked(lnet_nid_t nid)
 {
-- 
1.8.3.1

_______________________________________________
lustre-devel mailing list
lustre-devel@lists.lustre.org
http://lists.lustre.org/listinfo.cgi/lustre-devel-lustre.org

  parent reply	other threads:[~2021-04-05  0:51 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-05  0:50 [lustre-devel] [PATCH 00/41] lustre: sync to OpenSFS branch as of March 1 James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 01/41] lustre: llite: data corruption due to RPC reordering James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 02/41] lustre: llite: make readahead aware of hints James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 03/41] lustre: lov: avoid NULL dereference in cleanup James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 04/41] lustre: llite: quiet spurious ioctl warning James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 05/41] lustre: ptlrpc: do not output error when imp_sec is freed James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 06/41] lustre: update version to 2.14.0 James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 07/41] lnet: UDSP storage and marshalled structs James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 08/41] lnet: foundation patch for selection mod James Simmons
2021-04-05  0:50 ` James Simmons [this message]
2021-04-05  0:50 ` [lustre-devel] [PATCH 10/41] lnet: Select NI/peer NI with highest prio James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 11/41] lnet: select best peer and local net James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 12/41] lnet: UDSP handling James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 13/41] lnet: Apply UDSP on local and remote NIs James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 14/41] lnet: Add the kernel level Marshalling API James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 15/41] lnet: Add the kernel level De-Marshalling API James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 16/41] lnet: Add the ioctl handler for "add policy" James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 17/41] lnet: ioctl handler for "delete policy" James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 18/41] lnet: ioctl handler for get policy info James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 19/41] lustre: update version to 2.14.50 James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 20/41] lustre: gss: handle empty reqmsg in sptlrpc_req_ctx_switch James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 21/41] lustre: sec: file ioctls to handle encryption policies James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 22/41] lustre: obdclass: try to skip corrupted llog records James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 23/41] lustre: lov: fix layout generation inc for mirror split James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 24/41] lnet: modify assertion in lnet_post_send_locked James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 25/41] lustre: lov: fixes bitfield in lod qos code James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 26/41] lustre: lov: grant deadlock if same OSC in two components James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 27/41] lustre: change EWOULDBLOCK to EAGAIN James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 28/41] lsutre: ldlm: return error from ldlm_namespace_new() James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 29/41] lustre: llite: remove unused ll_teardown_mmaps() James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 30/41] lustre: lov: style cleanups in lov_set_osc_active() James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 31/41] lustre: change various operations structs to const James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 32/41] lustre: mark strings in char arrays as const James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 33/41] lustre: convert snprintf to scnprintf as appropriate James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 34/41] lustre: remove non-static 'inline' markings James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 35/41] lustre: llite: use is_root_inode() James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 36/41] lnet: libcfs: discard cfs_firststr James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 37/41] lnet: place wire protocol data int own headers James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 38/41] lnet: libcfs: use wait_event_timeout() in tracefiled() James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 39/41] lnet: use init_wait() rather than init_waitqueue_entry() James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 40/41] lnet: discard LNET_MD_PHYS James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 41/41] lnet: o2iblnd: convert peers hash table to hashtable.h James Simmons

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1617583870-32029-10-git-send-email-jsimmons@infradead.org \
    --to=jsimmons@infradead.org \
    --cc=adilger@whamcloud.com \
    --cc=ashehata@whamcloud.com \
    --cc=green@whamcloud.com \
    --cc=lustre-devel@lists.lustre.org \
    --cc=neilb@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).