[PATCH 0/7] All remaining outstanding bug fixes for ko2iblnd

linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 0/7] All remaining outstanding bug fixes for ko2iblnd
@ 2016-03-02 23:53 James Simmons
  2016-03-02 23:53 ` [PATCH 1/7] staging: lustre: Support different ko2iblnd configs between systems James Simmons
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: James Simmons @ 2016-03-02 23:53 UTC (permalink / raw)
  To: Greg Kroah-Hartman, devel, Andreas Dilger, Oleg Drokin
  Cc: Linux Kernel Mailing List, Lustre Development List, James Simmons

Final missing bug fixes to bring the infiniband LND driver up
to date with the latest code used in production system.

Amir Shehata (1):
  staging: lustre: make ko2iblnd connect parameters persistent

Doug Oucharek (1):
  staging: lustre: Change connect peer failed cleanup order

Jeremy Filizetti (1):
  staging: lustre: Support different ko2iblnd configs between systems

Liang Zhen (4):
  staging: lustre: take extra refcount in kiblnd_connreq_done
  staging: lustre: check wr_id returned by ib_poll_cq
  staging: lustre: avoid intensive reconnecting for ko2iblnd
  staging: lustre: do less intense allocating retry for ko2iblnd

 .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c    |  105 +++---
 .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h    |  101 ++++--
 .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c |  433 ++++++++++++++------
 3 files changed, 443 insertions(+), 196 deletions(-)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/7] staging: lustre: Support different ko2iblnd configs between systems
  2016-03-02 23:53 [PATCH 0/7] All remaining outstanding bug fixes for ko2iblnd James Simmons
@ 2016-03-02 23:53 ` James Simmons
  2016-03-02 23:53 ` [PATCH 2/7] staging: lustre: make ko2iblnd connect parameters persistent James Simmons
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: James Simmons @ 2016-03-02 23:53 UTC (permalink / raw)
  To: Greg Kroah-Hartman, devel, Andreas Dilger, Oleg Drokin
  Cc: Linux Kernel Mailing List, Lustre Development List, Jeremy Filizetti

From: Jeremy Filizetti <jeremy.filizetti@gmail.com>

This patch adds suppoort for ko2iblnd to have different values for
peer_credits and map_on_demand between systems.

Signed-off-by: Jeremy Filizetti <jeremy.filizetti@gmail.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-3322
Reviewed-on: http://review.whamcloud.com/11794
Reviewed-by: Amir Shehata <amir.shehata@intel.com>
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
---
 .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c    |   51 ++++---
 .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h    |   36 +++--
 .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c |  156 ++++++++++++--------
 3 files changed, 146 insertions(+), 97 deletions(-)

diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
index 1dc18d7..0b1ffbe 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
@@ -631,7 +631,7 @@ static int kiblnd_get_completion_vector(kib_conn_t *conn, int cpt)
 }
 
 kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
-			       int state, int version)
+			       int state, int version, kib_connparams_t *cp)
 {
 	/*
 	 * CAVEAT EMPTOR:
@@ -686,6 +686,14 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
 	cmid->context = conn;		   /* for future CM callbacks */
 	conn->ibc_cmid = cmid;
 
+	if (!cp) {
+		conn->ibc_max_frags = IBLND_CFG_RDMA_FRAGS;
+		conn->ibc_queue_depth = *kiblnd_tunables.kib_peertxcredits;
+	} else {
+		conn->ibc_max_frags = cp->ibcp_max_frags;
+		conn->ibc_queue_depth = cp->ibcp_queue_depth;
+	}
+
 	INIT_LIST_HEAD(&conn->ibc_early_rxs);
 	INIT_LIST_HEAD(&conn->ibc_tx_noops);
 	INIT_LIST_HEAD(&conn->ibc_tx_queue);
@@ -730,27 +738,27 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
 	write_unlock_irqrestore(glock, flags);
 
 	LIBCFS_CPT_ALLOC(conn->ibc_rxs, lnet_cpt_table(), cpt,
-			 IBLND_RX_MSGS(version) * sizeof(kib_rx_t));
+			 IBLND_RX_MSGS(conn) * sizeof(kib_rx_t));
 	if (!conn->ibc_rxs) {
 		CERROR("Cannot allocate RX buffers\n");
 		goto failed_2;
 	}
 
 	rc = kiblnd_alloc_pages(&conn->ibc_rx_pages, cpt,
-				IBLND_RX_MSG_PAGES(version));
+				IBLND_RX_MSG_PAGES(conn));
 	if (rc)
 		goto failed_2;
 
 	kiblnd_map_rx_descs(conn);
 
-	cq_attr.cqe = IBLND_CQ_ENTRIES(version);
+	cq_attr.cqe = IBLND_CQ_ENTRIES(conn);
 	cq_attr.comp_vector = kiblnd_get_completion_vector(conn, cpt);
 	cq = ib_create_cq(cmid->device,
 			  kiblnd_cq_completion, kiblnd_cq_event, conn,
 			  &cq_attr);
 	if (IS_ERR(cq)) {
-		CERROR("Can't create CQ: %ld, cqe: %d\n",
-		       PTR_ERR(cq), IBLND_CQ_ENTRIES(version));
+		CERROR("Failed to create CQ with %d CQEs: %ld\n",
+		       IBLND_CQ_ENTRIES(conn), PTR_ERR(cq));
 		goto failed_2;
 	}
 
@@ -764,8 +772,8 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
 
 	init_qp_attr->event_handler = kiblnd_qp_event;
 	init_qp_attr->qp_context = conn;
-	init_qp_attr->cap.max_send_wr = IBLND_SEND_WRS(version);
-	init_qp_attr->cap.max_recv_wr = IBLND_RECV_WRS(version);
+	init_qp_attr->cap.max_send_wr = IBLND_SEND_WRS(conn);
+	init_qp_attr->cap.max_recv_wr = IBLND_RECV_WRS(conn);
 	init_qp_attr->cap.max_send_sge = 1;
 	init_qp_attr->cap.max_recv_sge = 1;
 	init_qp_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
@@ -786,11 +794,11 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
 	LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr));
 
 	/* 1 ref for caller and each rxmsg */
-	atomic_set(&conn->ibc_refcount, 1 + IBLND_RX_MSGS(version));
-	conn->ibc_nrx = IBLND_RX_MSGS(version);
+	atomic_set(&conn->ibc_refcount, 1 + IBLND_RX_MSGS(conn));
+	conn->ibc_nrx = IBLND_RX_MSGS(conn);
 
 	/* post receives */
-	for (i = 0; i < IBLND_RX_MSGS(version); i++) {
+	for (i = 0; i < IBLND_RX_MSGS(conn); i++) {
 		rc = kiblnd_post_rx(&conn->ibc_rxs[i],
 				    IBLND_POSTRX_NO_CREDIT);
 		if (rc) {
@@ -804,7 +812,7 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
 			 * NB locking needed now I'm racing with completion
 			 */
 			spin_lock_irqsave(&sched->ibs_lock, flags);
-			conn->ibc_nrx -= IBLND_RX_MSGS(version) - i;
+			conn->ibc_nrx -= IBLND_RX_MSGS(conn) - i;
 			spin_unlock_irqrestore(&sched->ibs_lock, flags);
 
 			/*
@@ -816,7 +824,7 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
 			conn->ibc_cmid = NULL;
 
 			/* Drop my own and unused rxbuffer refcounts */
-			while (i++ <= IBLND_RX_MSGS(version))
+			while (i++ <= IBLND_RX_MSGS(conn))
 				kiblnd_conn_decref(conn);
 
 			return NULL;
@@ -886,8 +894,7 @@ void kiblnd_destroy_conn(kib_conn_t *conn)
 
 	if (conn->ibc_rxs) {
 		LIBCFS_FREE(conn->ibc_rxs,
-			    IBLND_RX_MSGS(conn->ibc_version)
-			      * sizeof(kib_rx_t));
+			    IBLND_RX_MSGS(conn) * sizeof(kib_rx_t));
 	}
 
 	if (conn->ibc_connvars)
@@ -1143,7 +1150,7 @@ void kiblnd_unmap_rx_descs(kib_conn_t *conn)
 	LASSERT(conn->ibc_rxs);
 	LASSERT(conn->ibc_hdev);
 
-	for (i = 0; i < IBLND_RX_MSGS(conn->ibc_version); i++) {
+	for (i = 0; i < IBLND_RX_MSGS(conn); i++) {
 		rx = &conn->ibc_rxs[i];
 
 		LASSERT(rx->rx_nob >= 0); /* not posted */
@@ -1167,7 +1174,7 @@ void kiblnd_map_rx_descs(kib_conn_t *conn)
 	int ipg;
 	int i;
 
-	for (pg_off = ipg = i = 0; i < IBLND_RX_MSGS(conn->ibc_version); i++) {
+	for (pg_off = ipg = i = 0; i < IBLND_RX_MSGS(conn); i++) {
 		pg = conn->ibc_rx_pages->ibp_pages[ipg];
 		rx = &conn->ibc_rxs[i];
 
@@ -1192,7 +1199,7 @@ void kiblnd_map_rx_descs(kib_conn_t *conn)
 		if (pg_off == PAGE_SIZE) {
 			pg_off = 0;
 			ipg++;
-			LASSERT(ipg <= IBLND_RX_MSG_PAGES(conn->ibc_version));
+			LASSERT(ipg <= IBLND_RX_MSG_PAGES(conn));
 		}
 	}
 }
@@ -1296,12 +1303,16 @@ static void kiblnd_map_tx_pool(kib_tx_pool_t *tpo)
 	}
 }
 
-struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev, kib_rdma_desc_t *rd)
+struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev, kib_rdma_desc_t *rd,
+				    int negotiated_nfrags)
 {
+	__u16 nfrags = (negotiated_nfrags != -1) ?
+			negotiated_nfrags : *kiblnd_tunables.kib_map_on_demand;
+
 	LASSERT(hdev->ibh_mrs);
 
 	if (*kiblnd_tunables.kib_map_on_demand > 0 &&
-	    *kiblnd_tunables.kib_map_on_demand <= rd->rd_nfrags)
+	    nfrags <= rd->rd_nfrags)
 		return NULL;
 
 	return hdev->ibh_mrs;
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
index 0c88e8b..59a26c4 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
@@ -162,18 +162,17 @@ kiblnd_concurrent_sends_v1(void)
 #define IBLND_FMR_POOL			256
 #define IBLND_FMR_POOL_FLUSH		192
 
-/* TX messages (shared by all connections) */
-#define IBLND_TX_MSGS()	    (*kiblnd_tunables.kib_ntx)
-
-/* RX messages (per connection) */
-#define IBLND_RX_MSGS(v)	    (IBLND_MSG_QUEUE_SIZE(v) * 2 + IBLND_OOB_MSGS(v))
-#define IBLND_RX_MSG_BYTES(v)       (IBLND_RX_MSGS(v) * IBLND_MSG_SIZE)
-#define IBLND_RX_MSG_PAGES(v)      ((IBLND_RX_MSG_BYTES(v) + PAGE_SIZE - 1) / PAGE_SIZE)
+#define IBLND_RX_MSGS(c)	\
+	((c->ibc_queue_depth) * 2 + IBLND_OOB_MSGS(c->ibc_version))
+#define IBLND_RX_MSG_BYTES(c)	(IBLND_RX_MSGS(c) * IBLND_MSG_SIZE)
+#define IBLND_RX_MSG_PAGES(c)	\
+	((IBLND_RX_MSG_BYTES(c) + PAGE_SIZE - 1) / PAGE_SIZE)
 
 /* WRs and CQEs (per connection) */
-#define IBLND_RECV_WRS(v)	    IBLND_RX_MSGS(v)
-#define IBLND_SEND_WRS(v)	  ((IBLND_RDMA_FRAGS(v) + 1) * IBLND_CONCURRENT_SENDS(v))
-#define IBLND_CQ_ENTRIES(v)	 (IBLND_RECV_WRS(v) + IBLND_SEND_WRS(v))
+#define IBLND_RECV_WRS(c)	IBLND_RX_MSGS(c)
+#define IBLND_SEND_WRS(c)	\
+	((c->ibc_max_frags + 1) * IBLND_CONCURRENT_SENDS(c->ibc_version))
+#define IBLND_CQ_ENTRIES(c)	(IBLND_RECV_WRS(c) + IBLND_SEND_WRS(c))
 
 struct kib_hca_dev;
 
@@ -464,10 +463,10 @@ typedef struct {
 #define IBLND_REJECT_FATAL          3 /* Anything else */
 #define IBLND_REJECT_CONN_UNCOMPAT  4 /* incompatible version peer */
 #define IBLND_REJECT_CONN_STALE     5 /* stale peer */
-#define IBLND_REJECT_RDMA_FRAGS     6 /* Fatal: peer's rdma frags can't match */
-				      /* mine */
-#define IBLND_REJECT_MSG_QUEUE_SIZE 7 /* Fatal: peer's msg queue size can't */
-				      /* match mine */
+/* peer's rdma frags doesn't match mine */
+#define IBLND_REJECT_RDMA_FRAGS	    6
+/* peer's msg queue size doesn't match mine */
+#define IBLND_REJECT_MSG_QUEUE_SIZE 7
 
 /***********************************************************************/
 
@@ -535,6 +534,10 @@ typedef struct kib_conn {
 	int                   ibc_outstanding_credits; /* # credits to return */
 	int                   ibc_reserved_credits; /* # ACK/DONE msg credits */
 	int                   ibc_comms_error; /* set on comms error */
+	/* connections queue depth */
+	__u16		      ibc_queue_depth;
+	/* connections max frags */
+	__u16		      ibc_max_frags;
 	unsigned int          ibc_nrx:16;      /* receive buffers owned */
 	unsigned int          ibc_scheduled:1; /* scheduled for attention */
 	unsigned int          ibc_ready:1;     /* CQ callback fired */
@@ -907,7 +910,8 @@ static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev,
 #define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len)
 
 struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev,
-				    kib_rdma_desc_t *rd);
+				    kib_rdma_desc_t *rd,
+				    int negotiated_nfrags);
 void kiblnd_map_rx_descs(kib_conn_t *conn);
 void kiblnd_unmap_rx_descs(kib_conn_t *conn);
 void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node);
@@ -942,7 +946,7 @@ int  kiblnd_close_stale_conns_locked(kib_peer_t *peer,
 int  kiblnd_close_peer_conns_locked(kib_peer_t *peer, int why);
 
 kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
-			       int state, int version);
+			       int state, int version, kib_connparams_t *cp);
 void kiblnd_destroy_conn(kib_conn_t *conn);
 void kiblnd_close_conn(kib_conn_t *conn, int error);
 void kiblnd_close_conn_locked(kib_conn_t *conn, int error);
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index 0bd6120..3937735 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -328,14 +328,13 @@ kiblnd_handle_rx(kib_rx_t *rx)
 		spin_lock(&conn->ibc_lock);
 
 		if (conn->ibc_credits + credits >
-		    IBLND_MSG_QUEUE_SIZE(conn->ibc_version)) {
+		    conn->ibc_queue_depth) {
 			rc2 = conn->ibc_credits;
 			spin_unlock(&conn->ibc_lock);
 
 			CERROR("Bad credits from %s: %d + %d > %d\n",
 			       libcfs_nid2str(conn->ibc_peer->ibp_nid),
-			       rc2, credits,
-			       IBLND_MSG_QUEUE_SIZE(conn->ibc_version));
+			       rc2, credits, conn->ibc_queue_depth);
 
 			kiblnd_close_conn(conn, -EPROTO);
 			kiblnd_post_rx(rx, IBLND_POSTRX_NO_CREDIT);
@@ -653,8 +652,8 @@ static int kiblnd_map_tx(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
 		nob += rd->rd_frags[i].rf_nob;
 	}
 
-	/* looking for pre-mapping MR */
-	mr = kiblnd_find_rd_dma_mr(hdev, rd);
+	mr = kiblnd_find_rd_dma_mr(hdev, rd, tx->tx_conn ?
+				   tx->tx_conn->ibc_max_frags : -1);
 	if (mr) {
 		/* found pre-mapping MR */
 		rd->rd_key = (rd != tx->tx_rd) ? mr->rkey : mr->lkey;
@@ -774,13 +773,13 @@ kiblnd_post_tx_locked(kib_conn_t *conn, kib_tx_t *tx, int credit)
 	LASSERT(tx->tx_queued);
 	/* We rely on this for QP sizing */
 	LASSERT(tx->tx_nwrq > 0);
-	LASSERT(tx->tx_nwrq <= 1 + IBLND_RDMA_FRAGS(ver));
+	LASSERT(tx->tx_nwrq <= 1 + conn->ibc_max_frags);
 
 	LASSERT(!credit || credit == 1);
 	LASSERT(conn->ibc_outstanding_credits >= 0);
-	LASSERT(conn->ibc_outstanding_credits <= IBLND_MSG_QUEUE_SIZE(ver));
+	LASSERT(conn->ibc_outstanding_credits <= conn->ibc_queue_depth);
 	LASSERT(conn->ibc_credits >= 0);
-	LASSERT(conn->ibc_credits <= IBLND_MSG_QUEUE_SIZE(ver));
+	LASSERT(conn->ibc_credits <= conn->ibc_queue_depth);
 
 	if (conn->ibc_nsends_posted == IBLND_CONCURRENT_SENDS(ver)) {
 		/* tx completions outstanding... */
@@ -1089,10 +1088,10 @@ kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type,
 			break;
 		}
 
-		if (tx->tx_nwrq == IBLND_RDMA_FRAGS(conn->ibc_version)) {
-			CERROR("RDMA too fragmented for %s (%d): %d/%d src %d/%d dst frags\n",
+		if (tx->tx_nwrq >= conn->ibc_max_frags) {
+			CERROR("RDMA has too many fragments for peer %s (%d), src idx/frags: %d/%d dst idx/frags: %d/%d\n",
 			       libcfs_nid2str(conn->ibc_peer->ibp_nid),
-			       IBLND_RDMA_FRAGS(conn->ibc_version),
+			       conn->ibc_max_frags,
 			       srcidx, srcrd->rd_nfrags,
 			       dstidx, dstrd->rd_nfrags);
 			rc = -EMSGSIZE;
@@ -2243,7 +2242,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 	if (!ni ||			 /* no matching net */
 	    ni->ni_nid != reqmsg->ibm_dstnid ||   /* right NET, wrong NID! */
 	    net->ibn_dev != ibdev) {	      /* wrong device */
-		CERROR("Can't accept %s on %s (%s:%d:%pI4h): bad dst nid %s\n",
+		CERROR("Can't accept conn from %s on %s (%s:%d:%pI4h): bad dst nid %s\n",
 		       libcfs_nid2str(nid),
 		       !ni ? "NA" : libcfs_nid2str(ni->ni_nid),
 		       ibdev->ibd_ifname, ibdev->ibd_nnets,
@@ -2270,10 +2269,11 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 		goto failed;
 	}
 
-	if (reqmsg->ibm_u.connparams.ibcp_queue_depth !=
+	if (reqmsg->ibm_u.connparams.ibcp_queue_depth >
 	    IBLND_MSG_QUEUE_SIZE(version)) {
-		CERROR("Can't accept %s: incompatible queue depth %d (%d wanted)\n",
-		       libcfs_nid2str(nid), reqmsg->ibm_u.connparams.ibcp_queue_depth,
+		CERROR("Can't accept conn from %s, queue depth too large: %d (<=%d wanted)\n",
+		       libcfs_nid2str(nid),
+		       reqmsg->ibm_u.connparams.ibcp_queue_depth,
 		       IBLND_MSG_QUEUE_SIZE(version));
 
 		if (version == IBLND_MSG_VERSION)
@@ -2282,14 +2282,25 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 		goto failed;
 	}
 
-	if (reqmsg->ibm_u.connparams.ibcp_max_frags !=
+	if (reqmsg->ibm_u.connparams.ibcp_max_frags >
 	    IBLND_RDMA_FRAGS(version)) {
-		CERROR("Can't accept %s(version %x): incompatible max_frags %d (%d wanted)\n",
-		       libcfs_nid2str(nid), version,
-		       reqmsg->ibm_u.connparams.ibcp_max_frags,
-		       IBLND_RDMA_FRAGS(version));
+		CWARN("Can't accept conn from %s (version %x): max_frags %d too large (%d wanted)\n",
+		      libcfs_nid2str(nid), version,
+		      reqmsg->ibm_u.connparams.ibcp_max_frags,
+		      IBLND_RDMA_FRAGS(version));
 
-		if (version == IBLND_MSG_VERSION)
+		if (version >= IBLND_MSG_VERSION)
+			rej.ibr_why = IBLND_REJECT_RDMA_FRAGS;
+
+		goto failed;
+	} else if (reqmsg->ibm_u.connparams.ibcp_max_frags <
+		   IBLND_RDMA_FRAGS(version) && !net->ibn_fmr_ps) {
+		CWARN("Can't accept conn from %s (version %x): max_frags %d incompatible without FMR pool (%d wanted)\n",
+		      libcfs_nid2str(nid), version,
+		      reqmsg->ibm_u.connparams.ibcp_max_frags,
+		      IBLND_RDMA_FRAGS(version));
+
+		if (version >= IBLND_MSG_VERSION)
 			rej.ibr_why = IBLND_REJECT_RDMA_FRAGS;
 
 		goto failed;
@@ -2371,7 +2382,8 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 		write_unlock_irqrestore(g_lock, flags);
 	}
 
-	conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_PASSIVE_WAIT, version);
+	conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_PASSIVE_WAIT, version,
+				  &reqmsg->ibm_u.connparams);
 	if (!conn) {
 		kiblnd_peer_connect_failed(peer, 0, -ENOMEM);
 		kiblnd_peer_decref(peer);
@@ -2384,19 +2396,21 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 	 * CM callback doesn't destroy cmid.
 	 */
 	conn->ibc_incarnation      = reqmsg->ibm_srcstamp;
-	conn->ibc_credits          = IBLND_MSG_QUEUE_SIZE(version);
-	conn->ibc_reserved_credits = IBLND_MSG_QUEUE_SIZE(version);
-	LASSERT(conn->ibc_credits + conn->ibc_reserved_credits + IBLND_OOB_MSGS(version)
-		 <= IBLND_RX_MSGS(version));
+	conn->ibc_credits          = reqmsg->ibm_u.connparams.ibcp_queue_depth;
+	conn->ibc_reserved_credits = reqmsg->ibm_u.connparams.ibcp_queue_depth;
+	LASSERT(conn->ibc_credits + conn->ibc_reserved_credits +
+		IBLND_OOB_MSGS(version) <= IBLND_RX_MSGS(conn));
 
 	ackmsg = &conn->ibc_connvars->cv_msg;
 	memset(ackmsg, 0, sizeof(*ackmsg));
 
 	kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK,
 			sizeof(ackmsg->ibm_u.connparams));
-	ackmsg->ibm_u.connparams.ibcp_queue_depth  = IBLND_MSG_QUEUE_SIZE(version);
+	ackmsg->ibm_u.connparams.ibcp_queue_depth =
+		reqmsg->ibm_u.connparams.ibcp_queue_depth;
+	ackmsg->ibm_u.connparams.ibcp_max_frags =
+		reqmsg->ibm_u.connparams.ibcp_max_frags;
 	ackmsg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
-	ackmsg->ibm_u.connparams.ibcp_max_frags    = IBLND_RDMA_FRAGS(version);
 
 	kiblnd_pack_msg(ni, ackmsg, version, 0, nid, reqmsg->ibm_srcstamp);
 
@@ -2479,6 +2493,31 @@ kiblnd_reconnect(kib_conn_t *conn, int version,
 		reason = "Unknown";
 		break;
 
+	case IBLND_REJECT_RDMA_FRAGS:
+		if (conn->ibc_max_frags <= cp->ibcp_max_frags) {
+			CNETERR("Unsupported max frags, peer supports %d\n",
+				cp->ibcp_max_frags);
+			goto failed;
+		} else if (!*kiblnd_tunables.kib_map_on_demand) {
+			CNETERR("map_on_demand must be enabled to support map_on_demand peers\n");
+			goto failed;
+		}
+
+		conn->ibc_max_frags = cp->ibcp_max_frags;
+		reason = "rdma fragments";
+		break;
+
+	case IBLND_REJECT_MSG_QUEUE_SIZE:
+		if (conn->ibc_queue_depth <= cp->ibcp_queue_depth) {
+			CNETERR("Unsupported queue depth, peer supports %d\n",
+				cp->ibcp_queue_depth);
+			goto failed;
+		}
+
+		conn->ibc_queue_depth = cp->ibcp_queue_depth;
+		reason = "queue depth";
+		break;
+
 	case IBLND_REJECT_CONN_STALE:
 		reason = "stale";
 		break;
@@ -2495,11 +2534,17 @@ kiblnd_reconnect(kib_conn_t *conn, int version,
 	CNETERR("%s: retrying (%s), %x, %x, queue_dep: %d, max_frag: %d, msg_size: %d\n",
 		libcfs_nid2str(peer->ibp_nid),
 		reason, IBLND_MSG_VERSION, version,
-		cp ? cp->ibcp_queue_depth  : IBLND_MSG_QUEUE_SIZE(version),
-		cp ? cp->ibcp_max_frags    : IBLND_RDMA_FRAGS(version),
+		conn->ibc_queue_depth, conn->ibc_max_frags,
 		cp ? cp->ibcp_max_msg_size : IBLND_MSG_SIZE);
 
 	kiblnd_connect_peer(peer);
+	return;
+failed:
+	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
+	peer->ibp_connecting--;
+	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+
+	return;
 }
 
 static void
@@ -2595,24 +2640,10 @@ kiblnd_rejected(kib_conn_t *conn, int reason, void *priv, int priv_nob)
 			case IBLND_REJECT_CONN_RACE:
 			case IBLND_REJECT_CONN_STALE:
 			case IBLND_REJECT_CONN_UNCOMPAT:
-				kiblnd_reconnect(conn, rej->ibr_version,
-						 incarnation, rej->ibr_why, cp);
-				break;
-
 			case IBLND_REJECT_MSG_QUEUE_SIZE:
-				CERROR("%s rejected: incompatible message queue depth %d, %d\n",
-				       libcfs_nid2str(peer->ibp_nid),
-				       cp ? cp->ibcp_queue_depth :
-				       IBLND_MSG_QUEUE_SIZE(rej->ibr_version),
-				       IBLND_MSG_QUEUE_SIZE(conn->ibc_version));
-				break;
-
 			case IBLND_REJECT_RDMA_FRAGS:
-				CERROR("%s rejected: incompatible # of RDMA fragments %d, %d\n",
-				       libcfs_nid2str(peer->ibp_nid),
-				       cp ? cp->ibcp_max_frags :
-				       IBLND_RDMA_FRAGS(rej->ibr_version),
-				       IBLND_RDMA_FRAGS(conn->ibc_version));
+				kiblnd_reconnect(conn, rej->ibr_version,
+						 incarnation, rej->ibr_why, cp);
 				break;
 
 			case IBLND_REJECT_NO_RESOURCES:
@@ -2676,22 +2707,22 @@ kiblnd_check_connreply(kib_conn_t *conn, void *priv, int priv_nob)
 		goto failed;
 	}
 
-	if (msg->ibm_u.connparams.ibcp_queue_depth !=
-	    IBLND_MSG_QUEUE_SIZE(ver)) {
-		CERROR("%s has incompatible queue depth %d(%d wanted)\n",
+	if (msg->ibm_u.connparams.ibcp_queue_depth >
+	    conn->ibc_queue_depth) {
+		CERROR("%s has incompatible queue depth %d (<=%d wanted)\n",
 		       libcfs_nid2str(peer->ibp_nid),
 		       msg->ibm_u.connparams.ibcp_queue_depth,
-		       IBLND_MSG_QUEUE_SIZE(ver));
+		       conn->ibc_queue_depth);
 		rc = -EPROTO;
 		goto failed;
 	}
 
-	if (msg->ibm_u.connparams.ibcp_max_frags !=
-	    IBLND_RDMA_FRAGS(ver)) {
-		CERROR("%s has incompatible max_frags %d (%d wanted)\n",
+	if (msg->ibm_u.connparams.ibcp_max_frags >
+	    conn->ibc_max_frags) {
+		CERROR("%s has incompatible max_frags %d (<=%d wanted)\n",
 		       libcfs_nid2str(peer->ibp_nid),
 		       msg->ibm_u.connparams.ibcp_max_frags,
-		       IBLND_RDMA_FRAGS(ver));
+		       conn->ibc_max_frags);
 		rc = -EPROTO;
 		goto failed;
 	}
@@ -2721,10 +2752,12 @@ kiblnd_check_connreply(kib_conn_t *conn, void *priv, int priv_nob)
 	}
 
 	conn->ibc_incarnation = msg->ibm_srcstamp;
-	conn->ibc_credits =
-	conn->ibc_reserved_credits = IBLND_MSG_QUEUE_SIZE(ver);
-	LASSERT(conn->ibc_credits + conn->ibc_reserved_credits + IBLND_OOB_MSGS(ver)
-		 <= IBLND_RX_MSGS(ver));
+	conn->ibc_credits = msg->ibm_u.connparams.ibcp_queue_depth;
+	conn->ibc_reserved_credits = msg->ibm_u.connparams.ibcp_queue_depth;
+	conn->ibc_queue_depth = msg->ibm_u.connparams.ibcp_queue_depth;
+	conn->ibc_max_frags = msg->ibm_u.connparams.ibcp_max_frags;
+	LASSERT(conn->ibc_credits + conn->ibc_reserved_credits +
+		IBLND_OOB_MSGS(ver) <= IBLND_RX_MSGS(conn));
 
 	kiblnd_connreq_done(conn, 0);
 	return;
@@ -2761,7 +2794,8 @@ kiblnd_active_connect(struct rdma_cm_id *cmid)
 
 	read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
 
-	conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_ACTIVE_CONNECT, version);
+	conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_ACTIVE_CONNECT,
+				  version, NULL);
 	if (!conn) {
 		kiblnd_peer_connect_failed(peer, 1, -ENOMEM);
 		kiblnd_peer_decref(peer); /* lose cmid's ref */
@@ -2777,8 +2811,8 @@ kiblnd_active_connect(struct rdma_cm_id *cmid)
 
 	memset(msg, 0, sizeof(*msg));
 	kiblnd_init_msg(msg, IBLND_MSG_CONNREQ, sizeof(msg->ibm_u.connparams));
-	msg->ibm_u.connparams.ibcp_queue_depth  = IBLND_MSG_QUEUE_SIZE(version);
-	msg->ibm_u.connparams.ibcp_max_frags    = IBLND_RDMA_FRAGS(version);
+	msg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth;
+	msg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags;
 	msg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
 
 	kiblnd_pack_msg(peer->ibp_ni, msg, version,
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/7] staging: lustre: make ko2iblnd connect parameters persistent
  2016-03-02 23:53 [PATCH 0/7] All remaining outstanding bug fixes for ko2iblnd James Simmons
  2016-03-02 23:53 ` [PATCH 1/7] staging: lustre: Support different ko2iblnd configs between systems James Simmons
@ 2016-03-02 23:53 ` James Simmons
  2016-03-02 23:53 ` [PATCH 3/7] staging: lustre: take extra refcount in kiblnd_connreq_done James Simmons
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: James Simmons @ 2016-03-02 23:53 UTC (permalink / raw)
  To: Greg Kroah-Hartman, devel, Andreas Dilger, Oleg Drokin
  Cc: Linux Kernel Mailing List, Lustre Development List, Amir Shehata

From: Amir Shehata <amir.shehata@intel.com>

Store map-on-demand and peertx credits in the peer, since the peer
is persistent. Also made sure that when assigning the parameters
received on the connection to the peer structure through create,
that if another peer is added before grabbing the lock we assign
these parameters to it as well.

Signed-off-by: Amir Shehata <amir.shehata@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-3322
Reviewed-on: http://review.whamcloud.com/17074
Reviewed-by: Doug Oucharek <doug.s.oucharek@intel.com>
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
---
 .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c    |   14 +++-----
 .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h    |    6 +++-
 .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c |   38 ++++++++++++++------
 3 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
index 0b1ffbe..56c221b 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
@@ -335,6 +335,8 @@ int kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid)
 	peer->ibp_nid = nid;
 	peer->ibp_error = 0;
 	peer->ibp_last_alive = 0;
+	peer->ibp_max_frags = IBLND_CFG_RDMA_FRAGS;
+	peer->ibp_queue_depth = *kiblnd_tunables.kib_peertxcredits;
 	atomic_set(&peer->ibp_refcount, 1);  /* 1 ref for caller */
 
 	INIT_LIST_HEAD(&peer->ibp_list);     /* not in the peer table yet */
@@ -631,7 +633,7 @@ static int kiblnd_get_completion_vector(kib_conn_t *conn, int cpt)
 }
 
 kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
-			       int state, int version, kib_connparams_t *cp)
+			       int state, int version)
 {
 	/*
 	 * CAVEAT EMPTOR:
@@ -685,14 +687,8 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
 	conn->ibc_peer = peer;		  /* I take the caller's ref */
 	cmid->context = conn;		   /* for future CM callbacks */
 	conn->ibc_cmid = cmid;
-
-	if (!cp) {
-		conn->ibc_max_frags = IBLND_CFG_RDMA_FRAGS;
-		conn->ibc_queue_depth = *kiblnd_tunables.kib_peertxcredits;
-	} else {
-		conn->ibc_max_frags = cp->ibcp_max_frags;
-		conn->ibc_queue_depth = cp->ibcp_queue_depth;
-	}
+	conn->ibc_max_frags = peer->ibp_max_frags;
+	conn->ibc_queue_depth = peer->ibp_queue_depth;
 
 	INIT_LIST_HEAD(&conn->ibc_early_rxs);
 	INIT_LIST_HEAD(&conn->ibc_tx_noops);
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
index 59a26c4..3db1413 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
@@ -586,6 +586,10 @@ typedef struct kib_peer {
 	int              ibp_error;       /* errno on closing this peer */
 	unsigned long    ibp_last_alive;  /* when (in jiffies) I was last alive
 					   */
+	/* max map_on_demand */
+	__u16		 ibp_max_frags;
+	/* max_peer_credits */
+	__u16		 ibp_queue_depth;
 } kib_peer_t;
 
 extern kib_data_t kiblnd_data;
@@ -946,7 +950,7 @@ int  kiblnd_close_stale_conns_locked(kib_peer_t *peer,
 int  kiblnd_close_peer_conns_locked(kib_peer_t *peer, int why);
 
 kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
-			       int state, int version, kib_connparams_t *cp);
+			       int state, int version);
 void kiblnd_destroy_conn(kib_conn_t *conn);
 void kiblnd_close_conn(kib_conn_t *conn, int error);
 void kiblnd_close_conn_locked(kib_conn_t *conn, int error);
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index 3937735..6c8f09e 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -2322,6 +2322,10 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 		goto failed;
 	}
 
+	/* We have validated the peer's parameters so use those */
+	peer->ibp_max_frags = reqmsg->ibm_u.connparams.ibcp_max_frags;
+	peer->ibp_queue_depth = reqmsg->ibm_u.connparams.ibcp_queue_depth;
+
 	write_lock_irqsave(g_lock, flags);
 
 	peer2 = kiblnd_find_peer_locked(nid);
@@ -2360,6 +2364,14 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 		peer2->ibp_accepting++;
 		kiblnd_peer_addref(peer2);
 
+		/**
+		 * Race with kiblnd_launch_tx (active connect) to create peer
+		 * so copy validated parameters since we now know what the
+		 * peer's limits are
+		 */
+		peer2->ibp_max_frags = peer->ibp_max_frags;
+		peer2->ibp_queue_depth = peer->ibp_queue_depth;
+
 		write_unlock_irqrestore(g_lock, flags);
 		kiblnd_peer_decref(peer);
 		peer = peer2;
@@ -2382,8 +2394,8 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 		write_unlock_irqrestore(g_lock, flags);
 	}
 
-	conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_PASSIVE_WAIT, version,
-				  &reqmsg->ibm_u.connparams);
+	conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_PASSIVE_WAIT,
+				  version);
 	if (!conn) {
 		kiblnd_peer_connect_failed(peer, 0, -ENOMEM);
 		kiblnd_peer_decref(peer);
@@ -2396,8 +2408,8 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 	 * CM callback doesn't destroy cmid.
 	 */
 	conn->ibc_incarnation      = reqmsg->ibm_srcstamp;
-	conn->ibc_credits          = reqmsg->ibm_u.connparams.ibcp_queue_depth;
-	conn->ibc_reserved_credits = reqmsg->ibm_u.connparams.ibcp_queue_depth;
+	conn->ibc_credits          = conn->ibc_queue_depth;
+	conn->ibc_reserved_credits = conn->ibc_queue_depth;
 	LASSERT(conn->ibc_credits + conn->ibc_reserved_credits +
 		IBLND_OOB_MSGS(version) <= IBLND_RX_MSGS(conn));
 
@@ -2406,10 +2418,8 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 
 	kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK,
 			sizeof(ackmsg->ibm_u.connparams));
-	ackmsg->ibm_u.connparams.ibcp_queue_depth =
-		reqmsg->ibm_u.connparams.ibcp_queue_depth;
-	ackmsg->ibm_u.connparams.ibcp_max_frags =
-		reqmsg->ibm_u.connparams.ibcp_max_frags;
+	ackmsg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth;
+	ackmsg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags;
 	ackmsg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
 
 	kiblnd_pack_msg(ni, ackmsg, version, 0, nid, reqmsg->ibm_srcstamp);
@@ -2494,6 +2504,9 @@ kiblnd_reconnect(kib_conn_t *conn, int version,
 		break;
 
 	case IBLND_REJECT_RDMA_FRAGS:
+		if (!cp)
+			goto failed;
+
 		if (conn->ibc_max_frags <= cp->ibcp_max_frags) {
 			CNETERR("Unsupported max frags, peer supports %d\n",
 				cp->ibcp_max_frags);
@@ -2503,18 +2516,21 @@ kiblnd_reconnect(kib_conn_t *conn, int version,
 			goto failed;
 		}
 
-		conn->ibc_max_frags = cp->ibcp_max_frags;
+		peer->ibp_max_frags = cp->ibcp_max_frags;
 		reason = "rdma fragments";
 		break;
 
 	case IBLND_REJECT_MSG_QUEUE_SIZE:
+		if (!cp)
+			goto failed;
+
 		if (conn->ibc_queue_depth <= cp->ibcp_queue_depth) {
 			CNETERR("Unsupported queue depth, peer supports %d\n",
 				cp->ibcp_queue_depth);
 			goto failed;
 		}
 
-		conn->ibc_queue_depth = cp->ibcp_queue_depth;
+		peer->ibp_queue_depth = cp->ibcp_queue_depth;
 		reason = "queue depth";
 		break;
 
@@ -2795,7 +2811,7 @@ kiblnd_active_connect(struct rdma_cm_id *cmid)
 	read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
 
 	conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_ACTIVE_CONNECT,
-				  version, NULL);
+				  version);
 	if (!conn) {
 		kiblnd_peer_connect_failed(peer, 1, -ENOMEM);
 		kiblnd_peer_decref(peer); /* lose cmid's ref */
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/7] staging: lustre: take extra refcount in kiblnd_connreq_done
  2016-03-02 23:53 [PATCH 0/7] All remaining outstanding bug fixes for ko2iblnd James Simmons
  2016-03-02 23:53 ` [PATCH 1/7] staging: lustre: Support different ko2iblnd configs between systems James Simmons
  2016-03-02 23:53 ` [PATCH 2/7] staging: lustre: make ko2iblnd connect parameters persistent James Simmons
@ 2016-03-02 23:53 ` James Simmons
  2016-03-02 23:53 ` [PATCH 4/7] staging: lustre: Change connect peer failed cleanup order James Simmons
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: James Simmons @ 2016-03-02 23:53 UTC (permalink / raw)
  To: Greg Kroah-Hartman, devel, Andreas Dilger, Oleg Drokin
  Cc: Linux Kernel Mailing List, Lustre Development List, Liang Zhen

From: Liang Zhen <liang.zhen@intel.com>

refcount taken by cmid is not reliable after kiblnd_connreq_done
released the glock because this connection is visible to other
threads, another thread can find and close this connection right
after kiblnd_connreq_done released the glock, if kiblnd_cm_callback
for RDMA_CM_EVENT_DISCONNECTED is called, it can release the
connection refcount taken by cmid. It means the connection could be
destroyed before kiblnd_connreq_done() finish operations on it.

Signed-off-by: Liang Zhen <liang.zhen@intel.com>
ntel-bug-id: https://jira.hpdd.intel.com/browse/LU-7210
Reviewed-on: http://review.whamcloud.com/17527
Reviewed-by: Doug Oucharek <doug.s.oucharek@intel.com>
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Tested-by: James Simmons <uja.ornl@yahoo.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
---
 .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c |   16 ++++++++++++----
 1 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index 6c8f09e..f76c570 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -939,8 +939,6 @@ kiblnd_check_sends(kib_conn_t *conn)
 			kiblnd_queue_tx_locked(tx, conn);
 	}
 
-	kiblnd_conn_addref(conn); /* 1 ref for me.... (see b21911) */
-
 	for (;;) {
 		int credit;
 
@@ -966,8 +964,6 @@ kiblnd_check_sends(kib_conn_t *conn)
 	}
 
 	spin_unlock(&conn->ibc_lock);
-
-	kiblnd_conn_decref(conn); /* ...until here */
 }
 
 static void
@@ -2131,6 +2127,16 @@ kiblnd_connreq_done(kib_conn_t *conn, int status)
 		return;
 	}
 
+	/**
+	 * refcount taken by cmid is not reliable after I released the glock
+	 * because this connection is visible to other threads now, another
+	 * thread can find and close this connection right after I released
+	 * the glock, if kiblnd_cm_callback for RDMA_CM_EVENT_DISCONNECTED is
+	 * called, it can release the connection refcount taken by cmid.
+	 * It means the connection could be destroyed before I finish my
+	 * operations on it.
+	 */
+	kiblnd_conn_addref(conn);
 	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
 
 	/* Schedule blocked txs */
@@ -2146,6 +2152,8 @@ kiblnd_connreq_done(kib_conn_t *conn, int status)
 
 	/* schedule blocked rxs */
 	kiblnd_handle_early_rxs(conn);
+
+	kiblnd_conn_decref(conn);
 }
 
 static void
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 4/7] staging: lustre: Change connect peer failed cleanup order
  2016-03-02 23:53 [PATCH 0/7] All remaining outstanding bug fixes for ko2iblnd James Simmons
                   ` (2 preceding siblings ...)
  2016-03-02 23:53 ` [PATCH 3/7] staging: lustre: take extra refcount in kiblnd_connreq_done James Simmons
@ 2016-03-02 23:53 ` James Simmons
  2016-03-02 23:53 ` [PATCH 5/7] staging: lustre: check wr_id returned by ib_poll_cq James Simmons
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: James Simmons @ 2016-03-02 23:53 UTC (permalink / raw)
  To: Greg Kroah-Hartman, devel, Andreas Dilger, Oleg Drokin
  Cc: Linux Kernel Mailing List, Lustre Development List, Doug Oucharek

From: Doug Oucharek <doug.s.oucharek@intel.com>

A race condition has been found where connd is cleaning up failed
connections, the peer ref counter goes to zero, but we stil have
a connecting counter > 0.

One possible race is when we are retrying a connection by
calling kiblnd_connect_peer() which itself fails and decrements
the peer ref counter and gets swapped out before it can decrement
the connecting counter.  connd swaps in and cleans up the
connection where it sees a peer ref counter of 1 and a connecting
counter of 1.  This will trigger the assert seen in LU-7210 when
it decrements the peer counter.

The solution: be sure to decrement the connecting counter
before decrementing the peer counter in the peer connect
failure path.

Signed-off-by: Doug Oucharek <doug.s.oucharek@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7210
Reviewed-on: http://review.whamcloud.com/17004
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Reviewed-by: Amir Shehata <amir.shehata@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
---
 .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index f76c570..7602d71 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -1298,8 +1298,10 @@ kiblnd_connect_peer(kib_peer_t *peer)
 	return;
 
  failed2:
+	kiblnd_peer_connect_failed(peer, 1, rc);
 	kiblnd_peer_decref(peer);	       /* cmid's ref */
 	rdma_destroy_id(cmid);
+	return;
  failed:
 	kiblnd_peer_connect_failed(peer, 1, rc);
 }
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 5/7] staging: lustre: check wr_id returned by ib_poll_cq
  2016-03-02 23:53 [PATCH 0/7] All remaining outstanding bug fixes for ko2iblnd James Simmons
                   ` (3 preceding siblings ...)
  2016-03-02 23:53 ` [PATCH 4/7] staging: lustre: Change connect peer failed cleanup order James Simmons
@ 2016-03-02 23:53 ` James Simmons
  2016-03-02 23:53 ` [PATCH 6/7] staging: lustre: avoid intensive reconnecting for ko2iblnd James Simmons
  2016-03-02 23:53 ` [PATCH 7/7] staging: lustre: do less intense allocating retry " James Simmons
  6 siblings, 0 replies; 8+ messages in thread
From: James Simmons @ 2016-03-02 23:53 UTC (permalink / raw)
  To: Greg Kroah-Hartman, devel, Andreas Dilger, Oleg Drokin
  Cc: Linux Kernel Mailing List, Lustre Development List, Liang Zhen

From: Liang Zhen <liang.zhen@intel.com>

If ib_poll_cq returned +ve without initialising ib_wc::wr_id (bug
in driver), then o2iblnd will run into unpredictable situation
because ib_wc::wr_id may refer to stale tx/rx pointer in stack.

It indicates bug in HCA driver if this happened, ko2iblnd should
output console error then close current connection.

This patch could also be helpful for LU-5271

Signed-off-by: Liang Zhen <liang.zhen@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-519
Reviewed-on: http://review.whamcloud.com/12747
Reviewed-by: Isaac Huang <he.huang@intel.com>
Reviewed-by: Doug Oucharek <doug.s.oucharek@intel.com>
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
---
 .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h    |    9 ++++---
 .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c |   21 ++++++++++++++++++-
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
index 3db1413..6a4c4ac 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
@@ -762,10 +762,11 @@ kiblnd_queue2str(kib_conn_t *conn, struct list_head *q)
 /* CAVEAT EMPTOR: We rely on descriptor alignment to allow us to use the */
 /* lowest bits of the work request id to stash the work item type. */
 
-#define IBLND_WID_TX    0
-#define IBLND_WID_RDMA  1
-#define IBLND_WID_RX    2
-#define IBLND_WID_MASK  3UL
+#define IBLND_WID_INVAL	0
+#define IBLND_WID_TX	1
+#define IBLND_WID_RX	2
+#define IBLND_WID_RDMA	3
+#define IBLND_WID_MASK	3UL
 
 static inline __u64
 kiblnd_ptr2wreqid(void *ptr, int type)
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index 7602d71..199c105 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -768,7 +768,6 @@ kiblnd_post_tx_locked(kib_conn_t *conn, kib_tx_t *tx, int credit)
 	int ver = conn->ibc_version;
 	int rc;
 	int done;
-	struct ib_send_wr *bad_wrq;
 
 	LASSERT(tx->tx_queued);
 	/* We rely on this for QP sizing */
@@ -852,7 +851,14 @@ kiblnd_post_tx_locked(kib_conn_t *conn, kib_tx_t *tx, int credit)
 		/* close_conn will launch failover */
 		rc = -ENETDOWN;
 	} else {
-		rc = ib_post_send(conn->ibc_cmid->qp, &tx->tx_wrq->wr, &bad_wrq);
+		struct ib_send_wr *wrq = &tx->tx_wrq[tx->tx_nwrq - 1].wr;
+
+		LASSERTF(wrq->wr_id == kiblnd_ptr2wreqid(tx, IBLND_WID_TX),
+			 "bad wr_id %llx, opc %d, flags %d, peer: %s\n",
+			 wrq->wr_id, wrq->opcode, wrq->send_flags,
+		libcfs_nid2str(conn->ibc_peer->ibp_nid));
+		wrq = NULL;
+		rc = ib_post_send(conn->ibc_cmid->qp, &tx->tx_wrq->wr, &wrq);
 	}
 
 	conn->ibc_last_send = jiffies;
@@ -3420,6 +3426,8 @@ kiblnd_scheduler(void *arg)
 
 			spin_unlock_irqrestore(&sched->ibs_lock, flags);
 
+			wc.wr_id = IBLND_WID_INVAL;
+
 			rc = ib_poll_cq(conn->ibc_cq, 1, &wc);
 			if (!rc) {
 				rc = ib_req_notify_cq(conn->ibc_cq,
@@ -3437,6 +3445,15 @@ kiblnd_scheduler(void *arg)
 				rc = ib_poll_cq(conn->ibc_cq, 1, &wc);
 			}
 
+			if (unlikely(rc > 0 && wc.wr_id == IBLND_WID_INVAL)) {
+				LCONSOLE_ERROR("ib_poll_cq (rc: %d) returned invalid wr_id, opcode %d, status: %d, vendor_err: %d, conn: %s status: %d\nplease upgrade firmware and OFED or contact vendor.\n",
+					       rc, wc.opcode, wc.status,
+					       wc.vendor_err,
+					       libcfs_nid2str(conn->ibc_peer->ibp_nid),
+					       conn->ibc_state);
+				rc = -EINVAL;
+			}
+
 			if (rc < 0) {
 				CWARN("%s: ib_poll_cq failed: %d, closing connection\n",
 				      libcfs_nid2str(conn->ibc_peer->ibp_nid),
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 6/7] staging: lustre: avoid intensive reconnecting for ko2iblnd
  2016-03-02 23:53 [PATCH 0/7] All remaining outstanding bug fixes for ko2iblnd James Simmons
                   ` (4 preceding siblings ...)
  2016-03-02 23:53 ` [PATCH 5/7] staging: lustre: check wr_id returned by ib_poll_cq James Simmons
@ 2016-03-02 23:53 ` James Simmons
  2016-03-02 23:53 ` [PATCH 7/7] staging: lustre: do less intense allocating retry " James Simmons
  6 siblings, 0 replies; 8+ messages in thread
From: James Simmons @ 2016-03-02 23:53 UTC (permalink / raw)
  To: Greg Kroah-Hartman, devel, Andreas Dilger, Oleg Drokin
  Cc: Linux Kernel Mailing List, Lustre Development List, Liang Zhen

From: Liang Zhen <liang.zhen@intel.com>

When there is a connection race between two nodes and one side
of the connection is rejected by the other side. o2iblnd will
reconnect immediately, this is going to generate a lot of
trashes if:

 - race winner is slow and can't send out connecting request
   in short time.
 - remote side leaves a cmid in TIMEWAIT state, which will reject
   future connection requests

To resolve this problem, this patch changed the reconnection
behave: reconnection is submitted by connd only if a zombie
connection is being destroyed and there is a pending
reconnection request for the corresponding peer.

Also, after a few rejections, reconnection will have a time
interval between each attempt.

Signed-off-by: Liang Zhen <liang.zhen@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7569
Reviewed-on: http://review.whamcloud.com/17892
Reviewed-by: Doug Oucharek <doug.s.oucharek@intel.com>
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Tested-by: James Simmons <uja.ornl@yahoo.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
---
 .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c    |   40 +--
 .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h    |   54 +++-
 .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c |  280 ++++++++++++++------
 3 files changed, 258 insertions(+), 116 deletions(-)

diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
index 56c221b..135ccf1 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
@@ -364,9 +364,7 @@ void kiblnd_destroy_peer(kib_peer_t *peer)
 	LASSERT(net);
 	LASSERT(!atomic_read(&peer->ibp_refcount));
 	LASSERT(!kiblnd_peer_active(peer));
-	LASSERT(!peer->ibp_connecting);
-	LASSERT(!peer->ibp_accepting);
-	LASSERT(list_empty(&peer->ibp_conns));
+	LASSERT(kiblnd_peer_idle(peer));
 	LASSERT(list_empty(&peer->ibp_tx_queue));
 
 	LIBCFS_FREE(peer, sizeof(*peer));
@@ -392,10 +390,7 @@ kib_peer_t *kiblnd_find_peer_locked(lnet_nid_t nid)
 
 	list_for_each(tmp, peer_list) {
 		peer = list_entry(tmp, kib_peer_t, ibp_list);
-
-		LASSERT(peer->ibp_connecting > 0 || /* creating conns */
-			 peer->ibp_accepting > 0 ||
-			 !list_empty(&peer->ibp_conns));  /* active conn */
+		LASSERT(!kiblnd_peer_idle(peer));
 
 		if (peer->ibp_nid != nid)
 			continue;
@@ -432,9 +427,7 @@ static int kiblnd_get_peer_info(lnet_ni_t *ni, int index,
 	for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
 		list_for_each(ptmp, &kiblnd_data.kib_peers[i]) {
 			peer = list_entry(ptmp, kib_peer_t, ibp_list);
-			LASSERT(peer->ibp_connecting > 0 ||
-				peer->ibp_accepting > 0 ||
-				!list_empty(&peer->ibp_conns));
+			LASSERT(!kiblnd_peer_idle(peer));
 
 			if (peer->ibp_ni != ni)
 				continue;
@@ -502,9 +495,7 @@ static int kiblnd_del_peer(lnet_ni_t *ni, lnet_nid_t nid)
 	for (i = lo; i <= hi; i++) {
 		list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) {
 			peer = list_entry(ptmp, kib_peer_t, ibp_list);
-			LASSERT(peer->ibp_connecting > 0 ||
-				peer->ibp_accepting > 0 ||
-				!list_empty(&peer->ibp_conns));
+			LASSERT(!kiblnd_peer_idle(peer));
 
 			if (peer->ibp_ni != ni)
 				continue;
@@ -545,9 +536,7 @@ static kib_conn_t *kiblnd_get_conn_by_idx(lnet_ni_t *ni, int index)
 	for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
 		list_for_each(ptmp, &kiblnd_data.kib_peers[i]) {
 			peer = list_entry(ptmp, kib_peer_t, ibp_list);
-			LASSERT(peer->ibp_connecting > 0 ||
-				peer->ibp_accepting > 0 ||
-				!list_empty(&peer->ibp_conns));
+			LASSERT(!kiblnd_peer_idle(peer));
 
 			if (peer->ibp_ni != ni)
 				continue;
@@ -837,14 +826,14 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
 	return conn;
 
  failed_2:
-	kiblnd_destroy_conn(conn);
+	kiblnd_destroy_conn(conn, true);
  failed_1:
 	LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr));
  failed_0:
 	return NULL;
 }
 
-void kiblnd_destroy_conn(kib_conn_t *conn)
+void kiblnd_destroy_conn(kib_conn_t *conn, bool free_conn)
 {
 	struct rdma_cm_id *cmid = conn->ibc_cmid;
 	kib_peer_t *peer = conn->ibc_peer;
@@ -984,9 +973,7 @@ static int kiblnd_close_matching_conns(lnet_ni_t *ni, lnet_nid_t nid)
 	for (i = lo; i <= hi; i++) {
 		list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) {
 			peer = list_entry(ptmp, kib_peer_t, ibp_list);
-			LASSERT(peer->ibp_connecting > 0 ||
-				peer->ibp_accepting > 0 ||
-				!list_empty(&peer->ibp_conns));
+			LASSERT(!kiblnd_peer_idle(peer));
 
 			if (peer->ibp_ni != ni)
 				continue;
@@ -1071,12 +1058,8 @@ static void kiblnd_query(lnet_ni_t *ni, lnet_nid_t nid, unsigned long *when)
 	read_lock_irqsave(glock, flags);
 
 	peer = kiblnd_find_peer_locked(nid);
-	if (peer) {
-		LASSERT(peer->ibp_connecting > 0 || /* creating conns */
-			 peer->ibp_accepting > 0 ||
-			 !list_empty(&peer->ibp_conns));  /* active conn */
+	if (peer)
 		last_alive = peer->ibp_last_alive;
-	}
 
 	read_unlock_irqrestore(glock, flags);
 
@@ -2368,6 +2351,8 @@ static void kiblnd_base_shutdown(void)
 			LASSERT(list_empty(&kiblnd_data.kib_peers[i]));
 		LASSERT(list_empty(&kiblnd_data.kib_connd_zombies));
 		LASSERT(list_empty(&kiblnd_data.kib_connd_conns));
+		LASSERT(list_empty(&kiblnd_data.kib_reconn_list));
+		LASSERT(list_empty(&kiblnd_data.kib_reconn_wait));
 
 		/* flag threads to terminate; wake and wait for them to die */
 		kiblnd_data.kib_shutdown = 1;
@@ -2506,6 +2491,9 @@ static int kiblnd_base_startup(void)
 	spin_lock_init(&kiblnd_data.kib_connd_lock);
 	INIT_LIST_HEAD(&kiblnd_data.kib_connd_conns);
 	INIT_LIST_HEAD(&kiblnd_data.kib_connd_zombies);
+	INIT_LIST_HEAD(&kiblnd_data.kib_reconn_list);
+	INIT_LIST_HEAD(&kiblnd_data.kib_reconn_wait);
+
 	init_waitqueue_head(&kiblnd_data.kib_connd_waitq);
 	init_waitqueue_head(&kiblnd_data.kib_failover_waitq);
 
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
index 6a4c4ac..bfcbdd1 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
@@ -348,6 +348,16 @@ typedef struct {
 	void *kib_connd; /* the connd task (serialisation assertions) */
 	struct list_head kib_connd_conns;   /* connections to setup/teardown */
 	struct list_head kib_connd_zombies; /* connections with zero refcount */
+	/* connections to reconnect */
+	struct list_head	kib_reconn_list;
+	/* peers wait for reconnection */
+	struct list_head	kib_reconn_wait;
+	/**
+	 * The second that peers are pulled out from \a kib_reconn_wait
+	 * for reconnection.
+	 */
+	time64_t		kib_reconn_sec;
+
 	wait_queue_head_t kib_connd_waitq;  /* connection daemon sleeps here */
 	spinlock_t kib_connd_lock;          /* serialise */
 	struct ib_qp_attr kib_error_qpa;    /* QP->ERROR */
@@ -525,6 +535,8 @@ typedef struct kib_conn {
 	struct list_head ibc_list;             /* stash on peer's conn list */
 	struct list_head      ibc_sched_list;  /* schedule for attention */
 	__u16                 ibc_version;     /* version of connection */
+	/* reconnect later */
+	__u16			ibc_reconnect:1;
 	__u64                 ibc_incarnation; /* which instance of the peer */
 	atomic_t              ibc_refcount;    /* # users */
 	int                   ibc_state;       /* what's happening */
@@ -574,18 +586,25 @@ typedef struct kib_peer {
 	struct list_head ibp_list;        /* stash on global peer list */
 	lnet_nid_t       ibp_nid;         /* who's on the other end(s) */
 	lnet_ni_t        *ibp_ni;         /* LNet interface */
-	atomic_t         ibp_refcount;    /* # users */
 	struct list_head ibp_conns;       /* all active connections */
 	struct list_head ibp_tx_queue;    /* msgs waiting for a conn */
-	__u16            ibp_version;     /* version of peer */
 	__u64            ibp_incarnation; /* incarnation of peer */
-	int              ibp_connecting;  /* current active connection attempts
-					   */
-	int              ibp_accepting;   /* current passive connection attempts
-					   */
-	int              ibp_error;       /* errno on closing this peer */
-	unsigned long    ibp_last_alive;  /* when (in jiffies) I was last alive
-					   */
+	/* when (in jiffies) I was last alive */
+	unsigned long		ibp_last_alive;
+	/* # users */
+	atomic_t		ibp_refcount;
+	/* version of peer */
+	__u16			ibp_version;
+	/* current passive connection attempts */
+	unsigned short		ibp_accepting;
+	/* current active connection attempts */
+	unsigned short		ibp_connecting;
+	/* reconnect this peer later */
+	unsigned short		ibp_reconnecting:1;
+	/* # consecutive reconnection attempts to this peer */
+	unsigned int		ibp_reconnected;
+	/* errno on closing this peer */
+	int              ibp_error;
 	/* max map_on_demand */
 	__u16		 ibp_max_frags;
 	/* max_peer_credits */
@@ -667,6 +686,20 @@ do {							    \
 		kiblnd_destroy_peer(peer);		      \
 } while (0)
 
+static inline bool
+kiblnd_peer_connecting(kib_peer_t *peer)
+{
+	return peer->ibp_connecting ||
+	       peer->ibp_reconnecting ||
+	       peer->ibp_accepting;
+}
+
+static inline bool
+kiblnd_peer_idle(kib_peer_t *peer)
+{
+	return !kiblnd_peer_connecting(peer) && list_empty(&peer->ibp_conns);
+}
+
 static inline struct list_head *
 kiblnd_nid2peerlist(lnet_nid_t nid)
 {
@@ -943,6 +976,7 @@ int  kiblnd_translate_mtu(int value);
 int  kiblnd_dev_failover(kib_dev_t *dev);
 int  kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid);
 void kiblnd_destroy_peer(kib_peer_t *peer);
+bool kiblnd_reconnect_peer(kib_peer_t *peer);
 void kiblnd_destroy_dev(kib_dev_t *dev);
 void kiblnd_unlink_peer_locked(kib_peer_t *peer);
 kib_peer_t *kiblnd_find_peer_locked(lnet_nid_t nid);
@@ -952,7 +986,7 @@ int  kiblnd_close_peer_conns_locked(kib_peer_t *peer, int why);
 
 kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
 			       int state, int version);
-void kiblnd_destroy_conn(kib_conn_t *conn);
+void kiblnd_destroy_conn(kib_conn_t *conn, bool free_conn);
 void kiblnd_close_conn(kib_conn_t *conn, int error);
 void kiblnd_close_conn_locked(kib_conn_t *conn, int error);
 
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index 199c105..79051c0 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -1257,6 +1257,7 @@ kiblnd_connect_peer(kib_peer_t *peer)
 
 	LASSERT(net);
 	LASSERT(peer->ibp_connecting > 0);
+	LASSERT(!peer->ibp_reconnecting);
 
 	cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, peer, RDMA_PS_TCP,
 				     IB_QPT_RC);
@@ -1312,6 +1313,56 @@ kiblnd_connect_peer(kib_peer_t *peer)
 	kiblnd_peer_connect_failed(peer, 1, rc);
 }
 
+bool
+kiblnd_reconnect_peer(kib_peer_t *peer)
+{
+	rwlock_t *glock = &kiblnd_data.kib_global_lock;
+	char *reason = NULL;
+	struct list_head txs;
+	unsigned long flags;
+
+	INIT_LIST_HEAD(&txs);
+
+	write_lock_irqsave(glock, flags);
+	if (!peer->ibp_reconnecting) {
+		if (peer->ibp_accepting)
+			reason = "accepting";
+		else if (peer->ibp_connecting)
+			reason = "connecting";
+		else if (!list_empty(&peer->ibp_conns))
+			reason = "connected";
+		else /* connected then closed */
+			reason = "closed";
+
+		goto no_reconnect;
+	}
+
+	LASSERT(!peer->ibp_accepting && !peer->ibp_connecting &&
+		list_empty(&peer->ibp_conns));
+	peer->ibp_reconnecting = 0;
+
+	if (!kiblnd_peer_active(peer)) {
+		list_splice_init(&peer->ibp_tx_queue, &txs);
+		reason = "unlinked";
+		goto no_reconnect;
+	}
+
+	peer->ibp_connecting++;
+	peer->ibp_reconnected++;
+	write_unlock_irqrestore(glock, flags);
+
+	kiblnd_connect_peer(peer);
+	return true;
+
+no_reconnect:
+	write_unlock_irqrestore(glock, flags);
+
+	CWARN("Abort reconnection of %s: %s\n",
+	      libcfs_nid2str(peer->ibp_nid), reason);
+	kiblnd_txlist_done(peer->ibp_ni, &txs, -ECONNABORTED);
+	return false;
+}
+
 void
 kiblnd_launch_tx(lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid)
 {
@@ -1357,8 +1408,7 @@ kiblnd_launch_tx(lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid)
 	if (peer) {
 		if (list_empty(&peer->ibp_conns)) {
 			/* found a peer, but it's still connecting... */
-			LASSERT(peer->ibp_connecting ||
-				peer->ibp_accepting);
+			LASSERT(kiblnd_peer_connecting(peer));
 			if (tx)
 				list_add_tail(&tx->tx_list,
 					      &peer->ibp_tx_queue);
@@ -1396,8 +1446,7 @@ kiblnd_launch_tx(lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid)
 	if (peer2) {
 		if (list_empty(&peer2->ibp_conns)) {
 			/* found a peer, but it's still connecting... */
-			LASSERT(peer2->ibp_connecting ||
-				peer2->ibp_accepting);
+			LASSERT(kiblnd_peer_connecting(peer2));
 			if (tx)
 				list_add_tail(&tx->tx_list,
 					      &peer2->ibp_tx_queue);
@@ -1817,10 +1866,7 @@ kiblnd_peer_notify(kib_peer_t *peer)
 
 	read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
 
-	if (list_empty(&peer->ibp_conns) &&
-	    !peer->ibp_accepting &&
-	    !peer->ibp_connecting &&
-	    peer->ibp_error) {
+	if (kiblnd_peer_idle(peer) && peer->ibp_error) {
 		error = peer->ibp_error;
 		peer->ibp_error = 0;
 
@@ -2020,14 +2066,14 @@ kiblnd_peer_connect_failed(kib_peer_t *peer, int active, int error)
 		peer->ibp_accepting--;
 	}
 
-	if (peer->ibp_connecting ||
-	    peer->ibp_accepting) {
+	if (kiblnd_peer_connecting(peer)) {
 		/* another connection attempt under way... */
 		write_unlock_irqrestore(&kiblnd_data.kib_global_lock,
 					flags);
 		return;
 	}
 
+	peer->ibp_reconnected = 0;
 	if (list_empty(&peer->ibp_conns)) {
 		/* Take peer's blocked transmits to complete with error */
 		list_add(&zombies, &peer->ibp_tx_queue);
@@ -2100,6 +2146,7 @@ kiblnd_connreq_done(kib_conn_t *conn, int status)
 	 */
 	kiblnd_conn_addref(conn);	       /* +1 ref for ibc_list */
 	list_add(&conn->ibc_list, &peer->ibp_conns);
+	peer->ibp_reconnected = 0;
 	if (active)
 		peer->ibp_connecting--;
 	else
@@ -2355,10 +2402,16 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 		if (peer2->ibp_incarnation != reqmsg->ibm_srcstamp ||
 		    peer2->ibp_version     != version) {
 			kiblnd_close_peer_conns_locked(peer2, -ESTALE);
+
+			if (kiblnd_peer_active(peer2)) {
+				peer2->ibp_incarnation = reqmsg->ibm_srcstamp;
+				peer2->ibp_version = version;
+			}
 			write_unlock_irqrestore(g_lock, flags);
 
-			CWARN("Conn stale %s [old ver: %x, new ver: %x]\n",
-			      libcfs_nid2str(nid), peer2->ibp_version, version);
+			CWARN("Conn stale %s version %x/%x incarnation %llu/%llu\n",
+			      libcfs_nid2str(nid), peer2->ibp_version, version,
+			      peer2->ibp_incarnation, reqmsg->ibm_srcstamp);
 
 			kiblnd_peer_decref(peer);
 			rej.ibr_why = IBLND_REJECT_CONN_STALE;
@@ -2377,6 +2430,11 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 			goto failed;
 		}
 
+		/**
+		 * passive connection is allowed even this peer is waiting for
+		 * reconnection.
+		 */
+		peer2->ibp_reconnecting = 0;
 		peer2->ibp_accepting++;
 		kiblnd_peer_addref(peer2);
 
@@ -2478,75 +2536,79 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 }
 
 static void
-kiblnd_reconnect(kib_conn_t *conn, int version,
-		 __u64 incarnation, int why, kib_connparams_t *cp)
+kiblnd_check_reconnect(kib_conn_t *conn, int version,
+		       __u64 incarnation, int why, kib_connparams_t *cp)
 {
+	rwlock_t *glock = &kiblnd_data.kib_global_lock;
 	kib_peer_t *peer = conn->ibc_peer;
 	char *reason;
-	int retry = 0;
+	int msg_size = IBLND_MSG_SIZE;
+	int frag_num = -1;
+	int queue_dep = -1;
+	bool reconnect;
 	unsigned long flags;
 
 	LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
 	LASSERT(peer->ibp_connecting > 0);     /* 'conn' at least */
+	LASSERT(!peer->ibp_reconnecting);
 
-	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
+	if (cp) {
+		msg_size = cp->ibcp_max_msg_size;
+		frag_num = cp->ibcp_max_frags;
+		queue_dep = cp->ibcp_queue_depth;
+	}
 
-	/*
+	write_lock_irqsave(glock, flags);
+	/**
 	 * retry connection if it's still needed and no other connection
 	 * attempts (active or passive) are in progress
 	 * NB: reconnect is still needed even when ibp_tx_queue is
 	 * empty if ibp_version != version because reconnect may be
 	 * initiated by kiblnd_query()
 	 */
-	if ((!list_empty(&peer->ibp_tx_queue) ||
-	     peer->ibp_version != version) &&
-	    peer->ibp_connecting == 1 &&
-	    !peer->ibp_accepting) {
-		retry = 1;
-		peer->ibp_connecting++;
-
-		peer->ibp_version     = version;
-		peer->ibp_incarnation = incarnation;
+	reconnect = (!list_empty(&peer->ibp_tx_queue) ||
+		     peer->ibp_version != version) &&
+		    peer->ibp_connecting == 1 &&
+		    !peer->ibp_accepting;
+	if (!reconnect) {
+		reason = "no need";
+		goto out;
 	}
 
-	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
-	if (!retry)
-		return;
-
 	switch (why) {
 	default:
 		reason = "Unknown";
 		break;
 
 	case IBLND_REJECT_RDMA_FRAGS:
-		if (!cp)
-			goto failed;
-
-		if (conn->ibc_max_frags <= cp->ibcp_max_frags) {
-			CNETERR("Unsupported max frags, peer supports %d\n",
-				cp->ibcp_max_frags);
-			goto failed;
-		} else if (!*kiblnd_tunables.kib_map_on_demand) {
-			CNETERR("map_on_demand must be enabled to support map_on_demand peers\n");
-			goto failed;
+		if (!cp) {
+			reason = "can't negotiate max frags";
+			goto out;
+		}
+		if (!*kiblnd_tunables.kib_map_on_demand) {
+			reason = "map_on_demand must be enabled";
+			goto out;
+		}
+		if (conn->ibc_max_frags <= frag_num) {
+			reason = "unsupported max frags";
+			goto out;
 		}
 
-		peer->ibp_max_frags = cp->ibcp_max_frags;
+		peer->ibp_max_frags = frag_num;
 		reason = "rdma fragments";
 		break;
 
 	case IBLND_REJECT_MSG_QUEUE_SIZE:
-		if (!cp)
-			goto failed;
-
-		if (conn->ibc_queue_depth <= cp->ibcp_queue_depth) {
-			CNETERR("Unsupported queue depth, peer supports %d\n",
-				cp->ibcp_queue_depth);
-			goto failed;
+		if (!cp) {
+			reason = "can't negotiate queue depth";
+			goto out;
+		}
+		if (conn->ibc_queue_depth <= queue_dep) {
+			reason = "unsupported queue depth";
+			goto out;
 		}
 
-		peer->ibp_queue_depth = cp->ibcp_queue_depth;
+		peer->ibp_queue_depth = queue_dep;
 		reason = "queue depth";
 		break;
 
@@ -2563,20 +2625,24 @@ kiblnd_reconnect(kib_conn_t *conn, int version,
 		break;
 	}
 
-	CNETERR("%s: retrying (%s), %x, %x, queue_dep: %d, max_frag: %d, msg_size: %d\n",
-		libcfs_nid2str(peer->ibp_nid),
-		reason, IBLND_MSG_VERSION, version,
-		conn->ibc_queue_depth, conn->ibc_max_frags,
-		cp ? cp->ibcp_max_msg_size : IBLND_MSG_SIZE);
-
-	kiblnd_connect_peer(peer);
-	return;
-failed:
-	write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-	peer->ibp_connecting--;
-	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+	conn->ibc_reconnect = 1;
+	peer->ibp_reconnecting = 1;
+	peer->ibp_version = version;
+	if (incarnation)
+		peer->ibp_incarnation = incarnation;
+out:
+        write_unlock_irqrestore(glock, flags);
 
-	return;
+	CNETERR("%s: %s (%s), %x, %x, msg_size: %d, queue_depth: %d/%d, max_frags: %d/%d\n",
+		libcfs_nid2str(peer->ibp_nid),
+		reconnect ? "reconnect" : "don't reconnect",
+		reason, IBLND_MSG_VERSION, version, msg_size,
+		conn->ibc_queue_depth, queue_dep,
+		conn->ibc_max_frags, frag_num);
+        /**
+	 * if conn::ibc_reconnect is TRUE, connd will reconnect to the peer
+	 * while destroying the zombie
+	 */
 }
 
 static void
@@ -2589,8 +2655,8 @@ kiblnd_rejected(kib_conn_t *conn, int reason, void *priv, int priv_nob)
 
 	switch (reason) {
 	case IB_CM_REJ_STALE_CONN:
-		kiblnd_reconnect(conn, IBLND_MSG_VERSION, 0,
-				 IBLND_REJECT_CONN_STALE, NULL);
+		kiblnd_check_reconnect(conn, IBLND_MSG_VERSION, 0,
+				       IBLND_REJECT_CONN_STALE, NULL);
 		break;
 
 	case IB_CM_REJ_INVALID_SERVICE_ID:
@@ -2674,8 +2740,9 @@ kiblnd_rejected(kib_conn_t *conn, int reason, void *priv, int priv_nob)
 			case IBLND_REJECT_CONN_UNCOMPAT:
 			case IBLND_REJECT_MSG_QUEUE_SIZE:
 			case IBLND_REJECT_RDMA_FRAGS:
-				kiblnd_reconnect(conn, rej->ibr_version,
-						 incarnation, rej->ibr_why, cp);
+				kiblnd_check_reconnect(conn, rej->ibr_version,
+						       incarnation,
+						       rej->ibr_why, cp);
 				break;
 
 			case IBLND_REJECT_NO_RESOURCES:
@@ -3179,9 +3246,21 @@ kiblnd_disconnect_conn(kib_conn_t *conn)
 	kiblnd_peer_notify(conn->ibc_peer);
 }
 
+/**
+ * High-water for reconnection to the same peer, reconnection attempt should
+ * be delayed after trying more than KIB_RECONN_HIGH_RACE.
+ */
+#define KIB_RECONN_HIGH_RACE	10
+/**
+ * Allow connd to take a break and handle other things after consecutive
+ * reconnection attemps.
+ */
+#define KIB_RECONN_BREAK	100
+
 int
 kiblnd_connd(void *arg)
 {
+	spinlock_t *lock= &kiblnd_data.kib_connd_lock;
 	wait_queue_t wait;
 	unsigned long flags;
 	kib_conn_t *conn;
@@ -3196,23 +3275,40 @@ kiblnd_connd(void *arg)
 	init_waitqueue_entry(&wait, current);
 	kiblnd_data.kib_connd = current;
 
-	spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
+	spin_lock_irqsave(lock, flags);
 
 	while (!kiblnd_data.kib_shutdown) {
+		int reconn = 0;
+
 		dropped_lock = 0;
 
 		if (!list_empty(&kiblnd_data.kib_connd_zombies)) {
+			kib_peer_t *peer = NULL;
+
 			conn = list_entry(kiblnd_data.kib_connd_zombies.next,
 					  kib_conn_t, ibc_list);
 			list_del(&conn->ibc_list);
+			if (conn->ibc_reconnect) {
+				peer = conn->ibc_peer;
+				kiblnd_peer_addref(peer);
+			}
 
-			spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock,
-					       flags);
+			spin_unlock_irqrestore(lock, flags);
 			dropped_lock = 1;
 
-			kiblnd_destroy_conn(conn);
+			kiblnd_destroy_conn(conn, !peer);
 
-			spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
+			spin_lock_irqsave(lock, flags);
+			if (!peer)
+				continue;
+
+			conn->ibc_peer = peer;
+			if (peer->ibp_reconnected < KIB_RECONN_HIGH_RACE)
+				list_add_tail(&conn->ibc_list,
+					      &kiblnd_data.kib_reconn_list);
+			else
+				list_add_tail(&conn->ibc_list,
+					      &kiblnd_data.kib_reconn_wait);
 		}
 
 		if (!list_empty(&kiblnd_data.kib_connd_conns)) {
@@ -3220,14 +3316,38 @@ kiblnd_connd(void *arg)
 					  kib_conn_t, ibc_list);
 			list_del(&conn->ibc_list);
 
-			spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock,
-					       flags);
+			spin_unlock_irqrestore(lock, flags);
 			dropped_lock = 1;
 
 			kiblnd_disconnect_conn(conn);
 			kiblnd_conn_decref(conn);
 
-			spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
+			spin_lock_irqsave(lock, flags);
+		}
+
+		while (reconn < KIB_RECONN_BREAK) {
+			if (kiblnd_data.kib_reconn_sec !=
+			    ktime_get_real_seconds()) {
+				kiblnd_data.kib_reconn_sec = ktime_get_real_seconds();
+				list_splice_init(&kiblnd_data.kib_reconn_wait,
+						 &kiblnd_data.kib_reconn_list);
+			}
+
+			if (list_empty(&kiblnd_data.kib_reconn_list))
+				break;
+
+			conn = list_entry(kiblnd_data.kib_reconn_list.next,
+					  kib_conn_t, ibc_list);
+			list_del(&conn->ibc_list);
+
+			spin_unlock_irqrestore(lock, flags);
+			dropped_lock = 1;
+
+			reconn += kiblnd_reconnect_peer(conn->ibc_peer);
+			kiblnd_peer_decref(conn->ibc_peer);
+			LIBCFS_FREE(conn, sizeof(*conn));
+
+			spin_lock_irqsave(lock, flags);
 		}
 
 		/* careful with the jiffy wrap... */
@@ -3237,7 +3357,7 @@ kiblnd_connd(void *arg)
 			const int p = 1;
 			int chunk = kiblnd_data.kib_peer_hash_size;
 
-			spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);
+			spin_unlock_irqrestore(lock, flags);
 			dropped_lock = 1;
 
 			/*
@@ -3262,7 +3382,7 @@ kiblnd_connd(void *arg)
 			}
 
 			deadline += msecs_to_jiffies(p * MSEC_PER_SEC);
-			spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
+			spin_lock_irqsave(lock, flags);
 		}
 
 		if (dropped_lock)
@@ -3271,15 +3391,15 @@ kiblnd_connd(void *arg)
 		/* Nothing to do for 'timeout'  */
 		set_current_state(TASK_INTERRUPTIBLE);
 		add_wait_queue(&kiblnd_data.kib_connd_waitq, &wait);
-		spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);
+		spin_unlock_irqrestore(lock, flags);
 
 		schedule_timeout(timeout);
 
 		remove_wait_queue(&kiblnd_data.kib_connd_waitq, &wait);
-		spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
+		spin_lock_irqsave(lock, flags);
 	}
 
-	spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);
+	spin_unlock_irqrestore(lock, flags);
 
 	kiblnd_thread_fini();
 	return 0;
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 7/7] staging: lustre: do less intense allocating retry for ko2iblnd
  2016-03-02 23:53 [PATCH 0/7] All remaining outstanding bug fixes for ko2iblnd James Simmons
                   ` (5 preceding siblings ...)
  2016-03-02 23:53 ` [PATCH 6/7] staging: lustre: avoid intensive reconnecting for ko2iblnd James Simmons
@ 2016-03-02 23:53 ` James Simmons
  6 siblings, 0 replies; 8+ messages in thread
From: James Simmons @ 2016-03-02 23:53 UTC (permalink / raw)
  To: Greg Kroah-Hartman, devel, Andreas Dilger, Oleg Drokin
  Cc: Linux Kernel Mailing List, Lustre Development List, Liang Zhen

From: Liang Zhen <liang.zhen@intel.com>

ko2iblnd may retry too frequent for growing pools, all schedulers
are spinning if another thread is in progress of allocating a new
pool and can't finish right away because of high system load.

Signed-off-by: Liang Zhen <liang.zhen@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7054
Reviewed-on: http://review.whamcloud.com/16470
Reviewed-by: Doug Oucharek <doug.s.oucharek@intel.com>
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
---
 .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c    |   20 ++++++++++++++++----
 1 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
index 135ccf1..0d32e65 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
@@ -1218,6 +1218,7 @@ static kib_hca_dev_t *kiblnd_current_hdev(kib_dev_t *dev)
 		if (!(i++ % 50))
 			CDEBUG(D_NET, "%s: Wait for failover\n",
 			       dev->ibd_ifname);
+		set_current_state(TASK_INTERRUPTIBLE);
 		schedule_timeout(cfs_time_seconds(1) / 100);
 
 		read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
@@ -1684,6 +1685,9 @@ struct list_head *kiblnd_pool_alloc_node(kib_poolset_t *ps)
 {
 	struct list_head *node;
 	kib_pool_t *pool;
+	unsigned int interval = 1;
+	unsigned long time_before;
+	unsigned int trips = 0;
 	int rc;
 
  again:
@@ -1709,9 +1713,15 @@ struct list_head *kiblnd_pool_alloc_node(kib_poolset_t *ps)
 	if (ps->ps_increasing) {
 		/* another thread is allocating a new pool */
 		spin_unlock(&ps->ps_lock);
-		CDEBUG(D_NET, "Another thread is allocating new %s pool, waiting for her to complete\n",
-		       ps->ps_name);
-		schedule();
+		trips++;
+		CDEBUG(D_NET, "Another thread is allocating new %s pool, waiting %d HZs for her to complete. trips = %d\n",
+		       ps->ps_name, interval, trips);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(interval);
+		if (interval < cfs_time_seconds(1))
+			interval *= 2;
+
 		goto again;
 	}
 
@@ -1725,8 +1735,10 @@ struct list_head *kiblnd_pool_alloc_node(kib_poolset_t *ps)
 	spin_unlock(&ps->ps_lock);
 
 	CDEBUG(D_NET, "%s pool exhausted, allocate new pool\n", ps->ps_name);
-
+	time_before = cfs_time_current();
 	rc = ps->ps_pool_create(ps, ps->ps_pool_size, &pool);
+	CDEBUG(D_NET, "ps_pool_create took %lu HZ to complete",
+	       cfs_time_current() - time_before);
 
 	spin_lock(&ps->ps_lock);
 	ps->ps_increasing = 0;
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2016-03-02 23:54 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-03-02 23:53 [PATCH 0/7] All remaining outstanding bug fixes for ko2iblnd James Simmons
2016-03-02 23:53 ` [PATCH 1/7] staging: lustre: Support different ko2iblnd configs between systems James Simmons
2016-03-02 23:53 ` [PATCH 2/7] staging: lustre: make ko2iblnd connect parameters persistent James Simmons
2016-03-02 23:53 ` [PATCH 3/7] staging: lustre: take extra refcount in kiblnd_connreq_done James Simmons
2016-03-02 23:53 ` [PATCH 4/7] staging: lustre: Change connect peer failed cleanup order James Simmons
2016-03-02 23:53 ` [PATCH 5/7] staging: lustre: check wr_id returned by ib_poll_cq James Simmons
2016-03-02 23:53 ` [PATCH 6/7] staging: lustre: avoid intensive reconnecting for ko2iblnd James Simmons
2016-03-02 23:53 ` [PATCH 7/7] staging: lustre: do less intense allocating retry " James Simmons

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).