linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH for-next] RDMA/siw: fix SQ/RQ drain logic to support ib_drain_qp
@ 2019-09-27 22:15 Krishnamraju Eraparaju
  2019-09-30 15:37 ` Bernard Metzler
  0 siblings, 1 reply; 10+ messages in thread
From: Krishnamraju Eraparaju @ 2019-09-27 22:15 UTC (permalink / raw)
  To: jgg, bmt; +Cc: linux-rdma, bharat, nirranjan, Krishnamraju Eraparaju

The storage ULPs(iSER & NVMeOF) uses ib_drain_qp() to drain
QP/CQ properly. But SIW is currently using it's own routines to
drain SQ & RQ, which can't help ULPs to determine the last CQE.
Failing to wait until last CQE causes touch after free issues:

[  +0.001831] general protection fault: 0000 [#1] SMP PTI
[  +0.000002] Call Trace:
[  +0.000026]  ? __ib_process_cq+0x7a/0xc0 [ib_core]
[  +0.000008]  ? ib_poll_handler+0x2c/0x80 [ib_core]
[  +0.000005]  ? irq_poll_softirq+0xae/0x110
[  +0.000005]  ? __do_softirq+0xda/0x2a8
[  +0.000004]  ? run_ksoftirqd+0x26/0x40
[  +0.000005]  ? smpboot_thread_fn+0x10e/0x160
[  +0.000004]  ? kthread+0xf8/0x130
[  +0.000003]  ? sort_range+0x20/0x20
[  +0.000003]  ? kthread_bind+0x10/0x10
[  +0.000003]  ? ret_from_fork+0x35/0x40

Hence, changing the SQ & RQ drain logic to support current IB/core
drain semantics, though this drain method does not naturally aligns
to iWARP spec(where post_send/recv calls are not allowed in
QP ERROR state). More on this was described in below commit:
commit 4fe7c2962e11 ("iw_cxgb4: refactor sq/rq drain logic")

Signed-off-by: Krishnamraju Eraparaju <krishna2@chelsio.com>
---
 drivers/infiniband/sw/siw/siw.h       |  3 +-
 drivers/infiniband/sw/siw/siw_cm.c    |  4 +-
 drivers/infiniband/sw/siw/siw_main.c  | 20 ---------
 drivers/infiniband/sw/siw/siw_verbs.c | 60 +++++++++++++++++++++++++++
 4 files changed, 64 insertions(+), 23 deletions(-)

diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
index dba4535494ab..ad4f078e4587 100644
--- a/drivers/infiniband/sw/siw/siw.h
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -240,7 +240,8 @@ enum siw_qp_flags {
 	SIW_RDMA_READ_ENABLED = (1 << 2),
 	SIW_SIGNAL_ALL_WR = (1 << 3),
 	SIW_MPA_CRC = (1 << 4),
-	SIW_QP_IN_DESTROY = (1 << 5)
+	SIW_QP_IN_DESTROY = (1 << 5),
+	SIW_QP_DRAINED_FINAL = (1 << 6)
 };
 
 enum siw_qp_attr_mask {
diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
index 8c1931a57f4a..fb830622d32e 100644
--- a/drivers/infiniband/sw/siw/siw_cm.c
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -857,7 +857,7 @@ static int siw_proc_mpareply(struct siw_cep *cep)
 	memset(&qp_attrs, 0, sizeof(qp_attrs));
 
 	if (rep->params.bits & MPA_RR_FLAG_CRC)
-		qp_attrs.flags = SIW_MPA_CRC;
+		qp_attrs.flags |= SIW_MPA_CRC;
 
 	qp_attrs.irq_size = cep->ird;
 	qp_attrs.orq_size = cep->ord;
@@ -1675,7 +1675,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
 	qp_attrs.irq_size = cep->ird;
 	qp_attrs.sk = cep->sock;
 	if (cep->mpa.hdr.params.bits & MPA_RR_FLAG_CRC)
-		qp_attrs.flags = SIW_MPA_CRC;
+		qp_attrs.flags |= SIW_MPA_CRC;
 	qp_attrs.state = SIW_QP_STATE_RTS;
 
 	siw_dbg_cep(cep, "[QP%u]: moving to rts\n", qp_id(qp));
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index 05a92f997f60..fb01407a310f 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -248,24 +248,6 @@ static struct ib_qp *siw_get_base_qp(struct ib_device *base_dev, int id)
 	return NULL;
 }
 
-static void siw_verbs_sq_flush(struct ib_qp *base_qp)
-{
-	struct siw_qp *qp = to_siw_qp(base_qp);
-
-	down_write(&qp->state_lock);
-	siw_sq_flush(qp);
-	up_write(&qp->state_lock);
-}
-
-static void siw_verbs_rq_flush(struct ib_qp *base_qp)
-{
-	struct siw_qp *qp = to_siw_qp(base_qp);
-
-	down_write(&qp->state_lock);
-	siw_rq_flush(qp);
-	up_write(&qp->state_lock);
-}
-
 static const struct ib_device_ops siw_device_ops = {
 	.owner = THIS_MODULE,
 	.uverbs_abi_ver = SIW_ABI_VERSION,
@@ -284,8 +266,6 @@ static const struct ib_device_ops siw_device_ops = {
 	.destroy_cq = siw_destroy_cq,
 	.destroy_qp = siw_destroy_qp,
 	.destroy_srq = siw_destroy_srq,
-	.drain_rq = siw_verbs_rq_flush,
-	.drain_sq = siw_verbs_sq_flush,
 	.get_dma_mr = siw_get_dma_mr,
 	.get_port_immutable = siw_get_port_immutable,
 	.iw_accept = siw_accept,
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 869e02b69a01..5dd62946a649 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -596,6 +596,13 @@ int siw_verbs_modify_qp(struct ib_qp *base_qp, struct ib_qp_attr *attr,
 
 	rv = siw_qp_modify(qp, &new_attrs, siw_attr_mask);
 
+	/* QP state ERROR here ensures that all the SQ & RQ entries got drained
+	 * completely. And henceforth, no more entries will be added to the CQ,
+	 * exception is special drain CQEs via ib_drain_qp().
+	 */
+	if (qp->attrs.state == SIW_QP_STATE_ERROR)
+		qp->attrs.flags |= SIW_QP_DRAINED_FINAL;
+
 	up_write(&qp->state_lock);
 out:
 	return rv;
@@ -687,6 +694,44 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr,
 	return bytes;
 }
 
+/* SQ final completion routine to support ib_drain_sp(). */
+int siw_sq_final_comp(struct siw_qp *qp, const struct ib_send_wr *wr,
+		      const struct ib_send_wr **bad_wr)
+{
+	struct siw_sqe sqe = {};
+	int rv = 0;
+
+	while (wr) {
+		sqe.id = wr->wr_id;
+		sqe.opcode = wr->opcode;
+		rv = siw_sqe_complete(qp, &sqe, 0, SIW_WC_WR_FLUSH_ERR);
+		if (rv) {
+			*bad_wr = wr;
+			break;
+		}
+		wr = wr->next;
+	}
+	return rv;
+}
+
+/* RQ final completion routine to support ib_drain_rp(). */
+int siw_rq_final_comp(struct siw_qp *qp, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr)
+{
+	struct siw_rqe rqe = {};
+	int rv = 0;
+
+	while (wr) {
+		rqe.id = wr->wr_id;
+		rv = siw_rqe_complete(qp, &rqe, 0, 0, SIW_WC_WR_FLUSH_ERR);
+		if (rv) {
+			*bad_wr = wr;
+			break;
+		}
+		wr = wr->next;
+	}
+	return rv;
+}
 /*
  * siw_post_send()
  *
@@ -705,6 +750,15 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr,
 	unsigned long flags;
 	int rv = 0;
 
+	/* Currently there is no way to distinguish between special drain
+	 * WRs and normal WRs(?), so we do FLUSH_ERR for all the WRs that've
+	 * arrived in the ERROR/SIW_QP_DRAINED_FINAL state, assuming we get
+	 * only special drain WRs in this state via ib_drain_sq().
+	 */
+	if (qp->attrs.flags & SIW_QP_DRAINED_FINAL) {
+		rv = siw_sq_final_comp(qp, wr, bad_wr);
+		return rv;
+	}
 	/*
 	 * Try to acquire QP state lock. Must be non-blocking
 	 * to accommodate kernel clients needs.
@@ -919,6 +973,12 @@ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr,
 		*bad_wr = wr;
 		return -EOPNOTSUPP; /* what else from errno.h? */
 	}
+
+	if (qp->attrs.flags & SIW_QP_DRAINED_FINAL) {
+		rv = siw_rq_final_comp(qp, wr, bad_wr);
+		return rv;
+	}
+
 	/*
 	 * Try to acquire QP state lock. Must be non-blocking
 	 * to accommodate kernel clients needs.
-- 
2.23.0.rc0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2019-10-04 13:48 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-27 22:15 [PATCH for-next] RDMA/siw: fix SQ/RQ drain logic to support ib_drain_qp Krishnamraju Eraparaju
2019-09-30 15:37 ` Bernard Metzler
2019-10-01  9:52   ` Krishnamraju Eraparaju
2019-10-01 15:56   ` Bernard Metzler
2019-10-01 17:45     ` Krishnamraju Eraparaju
2019-10-02 11:27     ` Bernard Metzler
2019-10-03 10:51       ` Krishnamraju Eraparaju
2019-10-03 14:03       ` Bernard Metzler
2019-10-04  4:57         ` Krishnamraju Eraparaju
2019-10-04 13:47         ` Bernard Metzler

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).