All of lore.kernel.org
 help / color / mirror / Atom feed
From: NeilBrown <neilb@suse.com>
To: lustre-devel@lists.lustre.org
Subject: [lustre-devel] [PATCH 14/34] LU-7734 lnet: handle non-MR peers
Date: Tue, 25 Sep 2018 11:07:15 +1000	[thread overview]
Message-ID: <153783763540.32103.13948722910331939075.stgit@noble> (raw)
In-Reply-To: <153783752960.32103.8394391715843917125.stgit@noble>

From: Amir Shehata <amir.shehata@intel.com>

Add the ability to declare a peer to be non-MR from the DLC
interface. By default if a peer is configured from DLC it is
assumed to be MR capable, except when the non-mr flag is set.

For non-MR peers always use the same NI to communicate with it.
If multiple NIs are used to communicate with a non-MR peer the
peer will consider that it's talking to different peers which could
cause upper layers to be confused.

Signed-off-by: Amir Shehata <amir.shehata@intel.com>
Change-Id: Ie3ec45f5f44fa7d72e3e0335b1383f9c3cc92627
Reviewed-on: http://review.whamcloud.com/19305
Tested-by: Jenkins
Reviewed-by: Doug Oucharek <doug.s.oucharek@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Olaf Weber <olaf@sgi.com>
Signed-off-by: NeilBrown <neilb@suse.com>
---
 .../staging/lustre/include/linux/lnet/lib-lnet.h   |   17 ++++++++++++++++-
 .../lustre/include/uapi/linux/lnet/lnet-dlc.h      |    1 +
 drivers/staging/lustre/lnet/lnet/api-ni.c          |    3 ++-
 drivers/staging/lustre/lnet/lnet/lib-move.c        |   13 +++++++++++++
 drivers/staging/lustre/lnet/lnet/peer.c            |    7 ++++---
 5 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
index 0259cd2251ed..08fc4abad332 100644
--- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
@@ -661,7 +661,7 @@ struct lnet_peer_net *lnet_peer_get_net_locked(struct lnet_peer *peer,
 					       u32 net_id);
 bool lnet_peer_is_ni_pref_locked(struct lnet_peer_ni *lpni,
 				 struct lnet_ni *ni);
-int lnet_add_peer_ni_to_peer(lnet_nid_t key_nid, lnet_nid_t nid);
+int lnet_add_peer_ni_to_peer(lnet_nid_t key_nid, lnet_nid_t nid, bool mr);
 int lnet_del_peer_ni_from_peer(lnet_nid_t key_nid, lnet_nid_t nid);
 int lnet_get_peer_info(__u32 idx, lnet_nid_t *primary_nid, lnet_nid_t *nid,
 		       struct lnet_peer_ni_credit_info *peer_ni_info);
@@ -672,6 +672,21 @@ int lnet_get_peer_ni_info(__u32 peer_index, __u64 *nid,
 			  __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credtis,
 			  __u32 *peer_tx_qnob);
 
+static inline __u32
+lnet_get_num_peer_nis(struct lnet_peer *peer)
+{
+	struct lnet_peer_net *lpn;
+	struct lnet_peer_ni *lpni;
+	__u32 count = 0;
+
+	list_for_each_entry(lpn, &peer->lp_peer_nets, lpn_on_peer_list)
+		list_for_each_entry(lpni, &lpn->lpn_peer_nis,
+				    lpni_on_peer_net_list)
+			count++;
+
+	return count;
+}
+
 static inline bool
 lnet_is_peer_ni_healthy_locked(struct lnet_peer_ni *lpni)
 {
diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h b/drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h
index 5eaaf0eae470..8be322dd4bd2 100644
--- a/drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h
+++ b/drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h
@@ -211,6 +211,7 @@ struct lnet_ioctl_peer_cfg {
 	lnet_nid_t prcfg_key_nid;
 	lnet_nid_t prcfg_cfg_nid;
 	__u32 prcfg_idx;
+	bool prcfg_mr;
 	char prcfg_bulk[0];
 };
 
diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c
index 67a3301258d4..2d5d657de058 100644
--- a/drivers/staging/lustre/lnet/lnet/api-ni.c
+++ b/drivers/staging/lustre/lnet/lnet/api-ni.c
@@ -2689,7 +2689,8 @@ LNetCtl(unsigned int cmd, void *arg)
 			return -EINVAL;
 
 		return lnet_add_peer_ni_to_peer(cfg->prcfg_key_nid,
-						cfg->prcfg_cfg_nid);
+						cfg->prcfg_cfg_nid,
+						cfg->prcfg_mr);
 	}
 
 	case IOC_LIBCFS_DEL_PEER_NI: {
diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c
index 5153de984ede..6c5bb953a6d3 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-move.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-move.c
@@ -1164,6 +1164,12 @@ lnet_select_pathway(lnet_nid_t src_nid, lnet_nid_t dst_nid,
 		return -EHOSTUNREACH;
 	}
 
+	if (!peer->lp_multi_rail && lnet_get_num_peer_nis(peer) > 1) {
+		CERROR("peer %s is declared to be non MR capable, yet configured with more than one NID\n",
+		       libcfs_nid2str(dst_nid));
+		return -EINVAL;
+	}
+
 	/*
 	 * STEP 1: first jab at determineing best_ni
 	 * if src_nid is explicitly specified, then best_ni is already
@@ -1361,6 +1367,13 @@ lnet_select_pathway(lnet_nid_t src_nid, lnet_nid_t dst_nid,
 	 */
 	best_ni->ni_seq++;
 
+	/*
+	 * if the peer is not MR capable, then we should always send to it
+	 * using the first NI in the NET we determined.
+	 */
+	if (!peer->lp_multi_rail && local_net)
+		best_ni = lnet_net2ni_locked(local_net->net_id, cpt);
+
 	if (!best_ni) {
 		lnet_net_unlock(cpt);
 		LCONSOLE_WARN("No local ni found to send from to %s\n",
diff --git a/drivers/staging/lustre/lnet/lnet/peer.c b/drivers/staging/lustre/lnet/lnet/peer.c
index bde7b6214668..ecbd276703f1 100644
--- a/drivers/staging/lustre/lnet/lnet/peer.c
+++ b/drivers/staging/lustre/lnet/lnet/peer.c
@@ -477,6 +477,7 @@ lnet_build_peer_hierarchy(struct lnet_peer_ni *lpni)
 	peer_net->lpn_peer = peer;
 	lpni->lpni_peer_net = peer_net;
 	peer->lp_primary_nid = lpni->lpni_nid;
+	peer->lp_multi_rail = false;
 	list_add_tail(&peer_net->lpn_on_peer_list, &peer->lp_peer_nets);
 	list_add_tail(&lpni->lpni_on_peer_net_list, &peer_net->lpn_peer_nis);
 	list_add_tail(&peer->lp_on_lnet_peer_list, &the_lnet.ln_peers);
@@ -502,7 +503,7 @@ lnet_peer_get_net_locked(struct lnet_peer *peer, u32 net_id)
  * is unique
  */
 int
-lnet_add_peer_ni_to_peer(lnet_nid_t key_nid, lnet_nid_t nid)
+lnet_add_peer_ni_to_peer(lnet_nid_t key_nid, lnet_nid_t nid, bool mr)
 {
 	struct lnet_peer_ni *lpni, *lpni2;
 	struct lnet_peer *peer;
@@ -535,14 +536,14 @@ lnet_add_peer_ni_to_peer(lnet_nid_t key_nid, lnet_nid_t nid)
 			return -EINVAL;
 		}
 		peer = lpni->lpni_peer_net->lpn_peer;
-		peer->lp_multi_rail = true;
+		peer->lp_multi_rail = mr;
 		lnet_peer_ni_decref_locked(lpni);
 		lnet_net_unlock(cpt2);
 	} else {
 		lnet_net_lock(LNET_LOCK_EX);
 		rc = lnet_nid2peerni_locked(&lpni, nid, LNET_LOCK_EX);
 		if (rc == 0) {
-			lpni->lpni_peer_net->lpn_peer->lp_multi_rail = true;
+			lpni->lpni_peer_net->lpn_peer->lp_multi_rail = mr;
 			lnet_peer_ni_decref_locked(lpni);
 		}
 		lnet_net_unlock(LNET_LOCK_EX);

  parent reply	other threads:[~2018-09-25  1:07 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-25  1:07 [lustre-devel] [PATCH 00/34] lustre: remainder of multi-rail series NeilBrown
2018-09-25  1:07 ` [lustre-devel] [PATCH 02/34] lnet: change struct lnet_peer to struct lnet_peer_ni NeilBrown
2018-09-29 22:47   ` James Simmons
2018-09-25  1:07 ` [lustre-devel] [PATCH 01/34] lnet: replace all lp_ fields with lpni_ NeilBrown
2018-09-29 22:45   ` James Simmons
2018-09-25  1:07 ` [lustre-devel] [PATCH 03/34] lnet: Change lpni_refcount to atomic_t NeilBrown
2018-09-29 22:47   ` James Simmons
2018-09-25  1:07 ` [lustre-devel] [PATCH 26/34] LU-7734 lnet: Routing fixes part 2 NeilBrown
2018-09-25  1:07 ` [lustre-devel] [PATCH 28/34] LU-7734 lnet: Fix crash in router_proc.c NeilBrown
2018-09-25  1:07 ` [lustre-devel] [PATCH 13/34] LU-7734 lnet: Primary NID and traffic distribution NeilBrown
2018-09-25  1:07 ` [lustre-devel] [PATCH 06/34] lnet: introduce lnet_find_peer_ni_locked() NeilBrown
2018-09-29 22:48   ` James Simmons
2018-09-25  1:07 ` [lustre-devel] [PATCH 12/34] LU-7734 lnet: NUMA support NeilBrown
2018-09-30  1:49   ` James Simmons
2018-09-25  1:07 ` [lustre-devel] [PATCH 08/34] LU-7734 lnet: Multi-Rail peer split NeilBrown
2018-09-29 23:01   ` James Simmons
2018-10-02  3:10     ` NeilBrown
2018-09-25  1:07 ` [lustre-devel] [PATCH 04/34] lnet: change some function names - add 'ni' NeilBrown
2018-09-29 22:47   ` James Simmons
2018-09-25  1:07 ` [lustre-devel] [PATCH 09/34] LU-7734 lnet: Multi-Rail local_ni/peer_ni selection NeilBrown
2018-09-25  1:07 ` [lustre-devel] [PATCH 15/34] LU-7734 lnet: handle N NIs to 1 LND peer NeilBrown
2018-09-25  1:07 ` [lustre-devel] [PATCH 19/34] LU-7734 lnet: proper cpt locking NeilBrown
2018-09-25  1:07 ` [lustre-devel] [PATCH 17/34] LU-7734 lnet: Add peer_ni and NI stats for DLC NeilBrown
2018-09-25  1:07 ` [lustre-devel] [PATCH 24/34] LU-7734 lnet: fix lnet_select_pathway() NeilBrown
2018-09-25  1:07 ` [lustre-devel] [PATCH 22/34] LU-7734 lnet: fix lnet_peer_table_cleanup_locked() NeilBrown
2018-09-25  1:07 ` [lustre-devel] [PATCH 07/34] lnet: lnet_peer_tables_cleanup: use an exclusive lock NeilBrown
2018-09-29 22:53   ` James Simmons
2018-10-02  2:25     ` NeilBrown
2018-09-25  1:07 ` NeilBrown [this message]
2018-09-25  1:07 ` [lustre-devel] [PATCH 21/34] LU-7734 lnet: simplify and fix lnet_select_pathway() NeilBrown
2018-09-25  1:07 ` [lustre-devel] [PATCH 27/34] LU-7734 lnet: fix routing selection NeilBrown
2018-09-25  1:07 ` [lustre-devel] [PATCH 23/34] LU-7734 lnet: configuration fixes NeilBrown
2018-09-25  1:07 ` [lustre-devel] [PATCH 18/34] LU-7734 lnet: peer/peer_ni handling adjustments NeilBrown
2018-09-25  1:07 ` [lustre-devel] [PATCH 25/34] LU-7734 lnet: Routing fixes part 1 NeilBrown
2018-09-25  1:07 ` [lustre-devel] [PATCH 10/34] LU-7734 lnet: configure peers from DLC NeilBrown
2018-09-25  1:07 ` [lustre-devel] [PATCH 16/34] LU-7734 lnet: rename LND peer to peer_ni NeilBrown
2018-09-25  1:07 ` [lustre-devel] [PATCH 20/34] LU-7734 lnet: protect peer_ni credits NeilBrown
2018-09-25  1:07 ` [lustre-devel] [PATCH 11/34] LU-7734 lnet: configure local NI from DLC NeilBrown
2018-09-29 21:05   ` James Simmons
2018-10-02  3:19     ` NeilBrown
2018-09-25  1:07 ` [lustre-devel] [PATCH 05/34] lnet: make lnet_nid_cpt_hash non-static NeilBrown
2018-09-29 22:48   ` James Simmons
2018-09-25  1:07 ` [lustre-devel] [PATCH 30/34] LU-7734 lnet: set primary NID in ptlrpc_connection_get() NeilBrown
2018-09-25  1:07 ` [lustre-devel] [PATCH 29/34] LU-7734 lnet: double free in lnet_add_net_common() NeilBrown
2018-09-25  1:07 ` [lustre-devel] [PATCH 34/34] LU-7734 lnet: cpt locking NeilBrown
2018-09-25  1:07 ` [lustre-devel] [PATCH 32/34] LU-7734 lnet: rename peer key_nid to prim_nid NeilBrown
2018-09-25  1:07 ` [lustre-devel] [PATCH 33/34] lnet: use BIT() macro for LNET_MD_* flags NeilBrown
2018-09-28 16:25   ` James Simmons
2018-10-02  3:31     ` NeilBrown
2018-09-25  1:07 ` [lustre-devel] [PATCH 31/34] LU-7734 lnet: fix NULL access in lnet_peer_aliveness_enabled NeilBrown
2018-09-30  2:17 ` [lustre-devel] [PATCH 00/34] lustre: remainder of multi-rail series James Simmons
2018-10-02  3:41   ` NeilBrown
2018-10-01  2:06 ` James Simmons

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=153783763540.32103.13948722910331939075.stgit@noble \
    --to=neilb@suse.com \
    --cc=lustre-devel@lists.lustre.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.