From: James Simmons <jsimmons@infradead.org>
To: Andreas Dilger <adilger@whamcloud.com>,
Oleg Drokin <green@whamcloud.com>, NeilBrown <neilb@suse.de>
Cc: Amir Shehata <ashehata@whamcloud.com>,
Lustre Development List <lustre-devel@lists.lustre.org>
Subject: [lustre-devel] [PATCH 11/41] lnet: select best peer and local net
Date: Sun, 4 Apr 2021 20:50:40 -0400 [thread overview]
Message-ID: <1617583870-32029-12-git-send-email-jsimmons@infradead.org> (raw)
In-Reply-To: <1617583870-32029-1-git-send-email-jsimmons@infradead.org>
From: Amir Shehata <ashehata@whamcloud.com>
Select the healthiest and highest priority peer and local net when
sending a message.
WC-bug-id: https://jira.whamcloud.com/browse/LU-9121
Lustre-commit: 7d309d57fd843f1 ("LU-9121 lnet: select best peer and local net")
Signed-off-by: Amir Shehata <ashehata@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/34352
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Signed-off-by: James Simmons <jsimmons@infradead.org>
---
include/linux/lnet/lib-lnet.h | 2 +
include/linux/lnet/lib-types.h | 3 +
net/lnet/lnet/api-ni.c | 15 +++++
net/lnet/lnet/lib-move.c | 125 +++++++++++++++++++++++++++++++----------
4 files changed, 116 insertions(+), 29 deletions(-)
diff --git a/include/linux/lnet/lib-lnet.h b/include/linux/lnet/lib-lnet.h
index 90f18a0..5152c0a70 100644
--- a/include/linux/lnet/lib-lnet.h
+++ b/include/linux/lnet/lib-lnet.h
@@ -507,6 +507,8 @@ int lnet_get_route(int idx, u32 *net, u32 *hops,
struct lnet_ni *lnet_get_next_ni_locked(struct lnet_net *mynet,
struct lnet_ni *prev);
struct lnet_ni *lnet_get_ni_idx_locked(int idx);
+int lnet_get_net_healthv_locked(struct lnet_net *net);
+
int lnet_get_peer_list(u32 *countp, u32 *sizep,
struct lnet_process_id __user *ids);
extern void lnet_peer_ni_set_healthv(lnet_nid_t nid, int value, bool all);
diff --git a/include/linux/lnet/lib-types.h b/include/linux/lnet/lib-types.h
index 187e1f3..f1f4eac5 100644
--- a/include/linux/lnet/lib-types.h
+++ b/include/linux/lnet/lib-types.h
@@ -359,6 +359,9 @@ struct lnet_net {
* lnet/include/lnet/nidstr.h */
u32 net_id;
+ /* round robin selection */
+ u32 net_seq;
+
/* total number of CPTs in the array */
u32 net_ncpts;
diff --git a/net/lnet/lnet/api-ni.c b/net/lnet/lnet/api-ni.c
index 3acc86e..2c31b06 100644
--- a/net/lnet/lnet/api-ni.c
+++ b/net/lnet/lnet/api-ni.c
@@ -2931,6 +2931,21 @@ struct lnet_ni *
return NULL;
}
+int lnet_get_net_healthv_locked(struct lnet_net *net)
+{
+ struct lnet_ni *ni;
+ int best_healthv = 0;
+ int healthv;
+
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+ healthv = atomic_read(&ni->ni_healthv);
+ if (healthv > best_healthv)
+ best_healthv = healthv;
+ }
+
+ return best_healthv;
+}
+
struct lnet_ni *
lnet_get_next_ni_locked(struct lnet_net *mynet, struct lnet_ni *prev)
{
diff --git a/net/lnet/lnet/lib-move.c b/net/lnet/lnet/lib-move.c
index 166ebcc..4dcc68a 100644
--- a/net/lnet/lnet/lib-move.c
+++ b/net/lnet/lnet/lib-move.c
@@ -1602,10 +1602,25 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
u32 routing = send_case & REMOTE_DST;
struct lnet_rsp_tracker *rspt;
- /* Increment sequence number of the selected peer so that we
- * pick the next one in Round Robin.
+ /* Increment sequence number of the selected peer, peer net,
+ * local ni and local net so that we pick the next ones
+ * in Round Robin.
*/
best_lpni->lpni_seq++;
+ best_lpni->lpni_peer_net->lpn_seq++;
+ best_ni->ni_seq++;
+ best_ni->ni_net->net_seq++;
+
+ CDEBUG(D_NET,
+ "%s NI seq info: [%d:%d:%d:%u] %s LPNI seq info [%d:%d:%d:%u]\n",
+ libcfs_nid2str(best_ni->ni_nid),
+ best_ni->ni_seq, best_ni->ni_net->net_seq,
+ atomic_read(&best_ni->ni_tx_credits),
+ best_ni->ni_sel_priority,
+ libcfs_nid2str(best_lpni->lpni_nid),
+ best_lpni->lpni_seq, best_lpni->lpni_peer_net->lpn_seq,
+ best_lpni->lpni_txcredits,
+ best_lpni->lpni_sel_priority);
/* grab a reference on the peer_ni so it sticks around even if
* we need to drop and relock the lnet_net_lock below.
@@ -1787,8 +1802,7 @@ struct lnet_ni *
lnet_find_best_ni_on_spec_net(struct lnet_ni *cur_best_ni,
struct lnet_peer *peer,
struct lnet_peer_net *peer_net,
- int cpt,
- bool incr_seq)
+ int cpt)
{
struct lnet_net *local_net;
struct lnet_ni *best_ni;
@@ -1807,9 +1821,6 @@ struct lnet_ni *
best_ni = lnet_get_best_ni(local_net, cur_best_ni,
peer, peer_net, cpt);
- if (incr_seq && best_ni)
- best_ni->ni_seq++;
-
return best_ni;
}
@@ -2032,8 +2043,7 @@ struct lnet_ni *
lpeer = lnet_peer_get_net_locked(gw, local_lnet);
sd->sd_best_ni = lnet_find_best_ni_on_spec_net(NULL, gw, lpeer,
- sd->sd_md_cpt,
- true);
+ sd->sd_md_cpt);
}
if (!sd->sd_best_ni) {
@@ -2115,9 +2125,19 @@ struct lnet_ni *
lnet_find_best_ni_on_local_net(struct lnet_peer *peer, int md_cpt,
bool discovery)
{
- struct lnet_peer_net *peer_net = NULL;
+ struct lnet_peer_net *lpn = NULL;
+ struct lnet_peer_net *best_lpn = NULL;
+ struct lnet_net *net = NULL;
+ struct lnet_net *best_net = NULL;
struct lnet_ni *best_ni = NULL;
- int lpn_healthv = 0;
+ int best_lpn_healthv = 0;
+ int best_net_healthv = 0;
+ int net_healthv;
+ u32 best_lpn_sel_prio = LNET_MAX_SELECTION_PRIORITY;
+ u32 lpn_sel_prio;
+ u32 best_net_sel_prio = LNET_MAX_SELECTION_PRIORITY;
+ u32 net_sel_prio;
+ bool exit = false;
/* The peer can have multiple interfaces, some of them can be on
* the local network and others on a routed network. We should
@@ -2126,32 +2146,80 @@ struct lnet_ni *
*/
/* go through all the peer nets and find the best_ni */
- list_for_each_entry(peer_net, &peer->lp_peer_nets, lpn_peer_nets) {
+ list_for_each_entry(lpn, &peer->lp_peer_nets, lpn_peer_nets) {
/* The peer's list of nets can contain non-local nets. We
* want to only examine the local ones.
*/
- if (!lnet_get_net_locked(peer_net->lpn_net_id))
+ net = lnet_get_net_locked(lpn->lpn_net_id);
+ if (!net)
continue;
- /* always select the lpn with the best health */
- if (lpn_healthv <= peer_net->lpn_healthv)
- lpn_healthv = peer_net->lpn_healthv;
- else
- continue;
+ lpn_sel_prio = lpn->lpn_sel_priority;
+ net_healthv = lnet_get_net_healthv_locked(net);
+ net_sel_prio = net->net_sel_priority;
- best_ni = lnet_find_best_ni_on_spec_net(best_ni, peer, peer_net,
- md_cpt, false);
/* if this is a discovery message and lp_disc_net_id is
* specified then use that net to send the discovery on.
*/
- if (peer->lp_disc_net_id == peer_net->lpn_net_id &&
- discovery)
+ if (peer->lp_disc_net_id == lpn->lpn_net_id &&
+ discovery) {
+ exit = true;
+ goto select_lpn;
+ }
+
+ if (!best_lpn)
+ goto select_lpn;
+
+ /* always select the lpn with the best health */
+ if (best_lpn_healthv > lpn->lpn_healthv)
+ continue;
+ else if (best_lpn_healthv < lpn->lpn_healthv)
+ goto select_lpn;
+
+ /* select the preferred peer and local nets */
+ if (best_lpn_sel_prio < lpn_sel_prio)
+ continue;
+ else if (best_lpn_sel_prio > lpn_sel_prio)
+ goto select_lpn;
+
+ if (best_net_healthv > net_healthv)
+ continue;
+ else if (best_net_healthv < net_healthv)
+ goto select_lpn;
+
+ if (best_net_sel_prio < net_sel_prio)
+ continue;
+ else if (best_net_sel_prio > net_sel_prio)
+ goto select_lpn;
+
+ if (best_lpn->lpn_seq < lpn->lpn_seq)
+ continue;
+ else if (best_lpn->lpn_seq > lpn->lpn_seq)
+ goto select_lpn;
+
+ /* round robin over the local networks */
+ if (best_net->net_seq <= net->net_seq)
+ continue;
+
+select_lpn:
+ best_net_healthv = net_healthv;
+ best_net_sel_prio = net_sel_prio;
+ best_lpn_healthv = lpn->lpn_healthv;
+ best_lpn_sel_prio = lpn_sel_prio;
+ best_lpn = lpn;
+ best_net = net;
+
+ if (exit)
break;
}
- if (best_ni)
- /* increment sequence number so we can round robin */
- best_ni->ni_seq++;
+ if (best_lpn) {
+ /* Select the best NI on the same net as best_lpn chosen
+ * above
+ */
+ best_ni = lnet_find_best_ni_on_spec_net(NULL, peer,
+ best_lpn, md_cpt);
+ }
return best_ni;
}
@@ -2210,7 +2278,7 @@ struct lnet_ni *
best_ni =
lnet_find_best_ni_on_spec_net(NULL, sd->sd_peer,
sd->sd_best_lpni->lpni_peer_net,
- sd->sd_md_cpt, true);
+ sd->sd_md_cpt);
/* If there is no best_ni we don't have a route */
if (!best_ni) {
CERROR("no path to %s from net %s\n",
@@ -2262,8 +2330,7 @@ struct lnet_ni *
sd->sd_best_ni = lnet_find_best_ni_on_spec_net(NULL,
sd->sd_peer,
sd->sd_best_lpni->lpni_peer_net,
- sd->sd_md_cpt,
- true);
+ sd->sd_md_cpt);
if (!sd->sd_best_ni) {
CERROR("Unable to forward message to %s. No local NI available\n",
libcfs_nid2str(sd->sd_dst_nid));
@@ -2295,7 +2362,7 @@ struct lnet_ni *
sd->sd_best_ni =
lnet_find_best_ni_on_spec_net(NULL, sd->sd_peer,
sd->sd_best_lpni->lpni_peer_net,
- sd->sd_md_cpt, true);
+ sd->sd_md_cpt);
if (!sd->sd_best_ni) {
/* We're not going to deal with not able to send
--
1.8.3.1
_______________________________________________
lustre-devel mailing list
lustre-devel@lists.lustre.org
http://lists.lustre.org/listinfo.cgi/lustre-devel-lustre.org
next prev parent reply other threads:[~2021-04-05 0:52 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-04-05 0:50 [lustre-devel] [PATCH 00/41] lustre: sync to OpenSFS branch as of March 1 James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 01/41] lustre: llite: data corruption due to RPC reordering James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 02/41] lustre: llite: make readahead aware of hints James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 03/41] lustre: lov: avoid NULL dereference in cleanup James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 04/41] lustre: llite: quiet spurious ioctl warning James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 05/41] lustre: ptlrpc: do not output error when imp_sec is freed James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 06/41] lustre: update version to 2.14.0 James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 07/41] lnet: UDSP storage and marshalled structs James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 08/41] lnet: foundation patch for selection mod James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 09/41] lnet: Preferred gateway selection James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 10/41] lnet: Select NI/peer NI with highest prio James Simmons
2021-04-05 0:50 ` James Simmons [this message]
2021-04-05 0:50 ` [lustre-devel] [PATCH 12/41] lnet: UDSP handling James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 13/41] lnet: Apply UDSP on local and remote NIs James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 14/41] lnet: Add the kernel level Marshalling API James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 15/41] lnet: Add the kernel level De-Marshalling API James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 16/41] lnet: Add the ioctl handler for "add policy" James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 17/41] lnet: ioctl handler for "delete policy" James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 18/41] lnet: ioctl handler for get policy info James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 19/41] lustre: update version to 2.14.50 James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 20/41] lustre: gss: handle empty reqmsg in sptlrpc_req_ctx_switch James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 21/41] lustre: sec: file ioctls to handle encryption policies James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 22/41] lustre: obdclass: try to skip corrupted llog records James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 23/41] lustre: lov: fix layout generation inc for mirror split James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 24/41] lnet: modify assertion in lnet_post_send_locked James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 25/41] lustre: lov: fixes bitfield in lod qos code James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 26/41] lustre: lov: grant deadlock if same OSC in two components James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 27/41] lustre: change EWOULDBLOCK to EAGAIN James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 28/41] lsutre: ldlm: return error from ldlm_namespace_new() James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 29/41] lustre: llite: remove unused ll_teardown_mmaps() James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 30/41] lustre: lov: style cleanups in lov_set_osc_active() James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 31/41] lustre: change various operations structs to const James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 32/41] lustre: mark strings in char arrays as const James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 33/41] lustre: convert snprintf to scnprintf as appropriate James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 34/41] lustre: remove non-static 'inline' markings James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 35/41] lustre: llite: use is_root_inode() James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 36/41] lnet: libcfs: discard cfs_firststr James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 37/41] lnet: place wire protocol data int own headers James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 38/41] lnet: libcfs: use wait_event_timeout() in tracefiled() James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 39/41] lnet: use init_wait() rather than init_waitqueue_entry() James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 40/41] lnet: discard LNET_MD_PHYS James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 41/41] lnet: o2iblnd: convert peers hash table to hashtable.h James Simmons
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1617583870-32029-12-git-send-email-jsimmons@infradead.org \
--to=jsimmons@infradead.org \
--cc=adilger@whamcloud.com \
--cc=ashehata@whamcloud.com \
--cc=green@whamcloud.com \
--cc=lustre-devel@lists.lustre.org \
--cc=neilb@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).