* [[PATCH v3 for-next]] RDMA/siw: Fix SQ/RQ drain logic
@ 2019-10-04 12:53 Bernard Metzler
2019-10-18 13:22 ` Bernard Metzler
2019-10-22 17:10 ` Jason Gunthorpe
0 siblings, 2 replies; 3+ messages in thread
From: Bernard Metzler @ 2019-10-04 12:53 UTC (permalink / raw)
To: linux-rdma
Cc: bharat, jgg, nirranjan, krishna2, bvanassche, leon, Bernard Metzler
Storage ULPs (e.g. iSER & NVMeOF) use ib_drain_qp() to
drain QP/CQ. Current SIW's own drain routines do not properly
wait until all SQ/RQ elements are completed and reaped
from the CQ. This may cause touch after free issues.
New logic relies on generic __ib_drain_sq()/__ib_drain_rq()
posting a final work request, which SIW immediately flushes
to CQ.
Fixes: 303ae1cdfdf7 ("rdma/siw: application interface")
Signed-off-by: Krishnamraju Eraparaju <krishna2@chelsio.com>
Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
---
v2 -> v3:
- Handle ib_drain_sq()/ib_drain_rq() calls when QP's
state is currently locked.
v1 -> v2:
- Accept SQ and RQ work requests, if QP is in ERROR
state. In that case, immediately flush WR's to CQ.
This already provides needed functionality to
support ib_drain_sq()/ib_drain_rq() without extra
state checking in the fast path.
drivers/infiniband/sw/siw/siw_main.c | 20 ----
drivers/infiniband/sw/siw/siw_verbs.c | 144 ++++++++++++++++++++++----
2 files changed, 122 insertions(+), 42 deletions(-)
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index 05a92f997f60..fb01407a310f 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -248,24 +248,6 @@ static struct ib_qp *siw_get_base_qp(struct ib_device *base_dev, int id)
return NULL;
}
-static void siw_verbs_sq_flush(struct ib_qp *base_qp)
-{
- struct siw_qp *qp = to_siw_qp(base_qp);
-
- down_write(&qp->state_lock);
- siw_sq_flush(qp);
- up_write(&qp->state_lock);
-}
-
-static void siw_verbs_rq_flush(struct ib_qp *base_qp)
-{
- struct siw_qp *qp = to_siw_qp(base_qp);
-
- down_write(&qp->state_lock);
- siw_rq_flush(qp);
- up_write(&qp->state_lock);
-}
-
static const struct ib_device_ops siw_device_ops = {
.owner = THIS_MODULE,
.uverbs_abi_ver = SIW_ABI_VERSION,
@@ -284,8 +266,6 @@ static const struct ib_device_ops siw_device_ops = {
.destroy_cq = siw_destroy_cq,
.destroy_qp = siw_destroy_qp,
.destroy_srq = siw_destroy_srq,
- .drain_rq = siw_verbs_rq_flush,
- .drain_sq = siw_verbs_sq_flush,
.get_dma_mr = siw_get_dma_mr,
.get_port_immutable = siw_get_port_immutable,
.iw_accept = siw_accept,
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 869e02b69a01..c0574ddc98fa 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -687,6 +687,47 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr,
return bytes;
}
+/* Complete SQ WR's without processing */
+static int siw_sq_flush_wr(struct siw_qp *qp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
+{
+ struct siw_sqe sqe = {};
+ int rv = 0;
+
+ while (wr) {
+ sqe.id = wr->wr_id;
+ sqe.opcode = wr->opcode;
+ rv = siw_sqe_complete(qp, &sqe, 0, SIW_WC_WR_FLUSH_ERR);
+ if (rv) {
+ if (bad_wr)
+ *bad_wr = wr;
+ break;
+ }
+ wr = wr->next;
+ }
+ return rv;
+}
+
+/* Complete RQ WR's without processing */
+static int siw_rq_flush_wr(struct siw_qp *qp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct siw_rqe rqe = {};
+ int rv = 0;
+
+ while (wr) {
+ rqe.id = wr->wr_id;
+ rv = siw_rqe_complete(qp, &rqe, 0, 0, SIW_WC_WR_FLUSH_ERR);
+ if (rv) {
+ if (bad_wr)
+ *bad_wr = wr;
+ break;
+ }
+ wr = wr->next;
+ }
+ return rv;
+}
+
/*
* siw_post_send()
*
@@ -705,26 +746,54 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr,
unsigned long flags;
int rv = 0;
+ if (wr && !qp->kernel_verbs) {
+ siw_dbg_qp(qp, "wr must be empty for user mapped sq\n");
+ *bad_wr = wr;
+ return -EINVAL;
+ }
+
/*
* Try to acquire QP state lock. Must be non-blocking
* to accommodate kernel clients needs.
*/
if (!down_read_trylock(&qp->state_lock)) {
- *bad_wr = wr;
- siw_dbg_qp(qp, "QP locked, state %d\n", qp->attrs.state);
- return -ENOTCONN;
+ if (qp->attrs.state == SIW_QP_STATE_ERROR) {
+ /*
+ * ERROR state is final, so we can be sure
+ * this state will not change as long as the QP
+ * exists.
+ *
+ * This handles an ib_drain_sq() call with
+ * a concurrent request to set the QP state
+ * to ERROR.
+ */
+ rv = siw_sq_flush_wr(qp, wr, bad_wr);
+ } else {
+ siw_dbg_qp(qp, "QP locked, state %d\n",
+ qp->attrs.state);
+ *bad_wr = wr;
+ rv = -ENOTCONN;
+ }
+ return rv;
}
if (unlikely(qp->attrs.state != SIW_QP_STATE_RTS)) {
+ if (qp->attrs.state == SIW_QP_STATE_ERROR) {
+ /*
+ * Immediately flush this WR to CQ, if QP
+ * is in ERROR state. SQ is guaranteed to
+ * be empty, so WR complets in-order.
+ *
+ * Typically triggered by ib_drain_sq().
+ */
+ rv = siw_sq_flush_wr(qp, wr, bad_wr);
+ } else {
+ siw_dbg_qp(qp, "QP out of state %d\n",
+ qp->attrs.state);
+ *bad_wr = wr;
+ rv = -ENOTCONN;
+ }
up_read(&qp->state_lock);
- *bad_wr = wr;
- siw_dbg_qp(qp, "QP out of state %d\n", qp->attrs.state);
- return -ENOTCONN;
- }
- if (wr && !qp->kernel_verbs) {
- siw_dbg_qp(qp, "wr must be empty for user mapped sq\n");
- up_read(&qp->state_lock);
- *bad_wr = wr;
- return -EINVAL;
+ return rv;
}
spin_lock_irqsave(&qp->sq_lock, flags);
@@ -919,24 +988,55 @@ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr,
*bad_wr = wr;
return -EOPNOTSUPP; /* what else from errno.h? */
}
- /*
- * Try to acquire QP state lock. Must be non-blocking
- * to accommodate kernel clients needs.
- */
- if (!down_read_trylock(&qp->state_lock)) {
- *bad_wr = wr;
- return -ENOTCONN;
- }
if (!qp->kernel_verbs) {
siw_dbg_qp(qp, "no kernel post_recv for user mapped sq\n");
up_read(&qp->state_lock);
*bad_wr = wr;
return -EINVAL;
}
+
+ /*
+ * Try to acquire QP state lock. Must be non-blocking
+ * to accommodate kernel clients needs.
+ */
+ if (!down_read_trylock(&qp->state_lock)) {
+ if (qp->attrs.state == SIW_QP_STATE_ERROR) {
+ /*
+ * ERROR state is final, so we can be sure
+ * this state will not change as long as the QP
+ * exists.
+ *
+ * This handles an ib_drain_rq() call with
+ * a concurrent request to set the QP state
+ * to ERROR.
+ */
+ rv = siw_rq_flush_wr(qp, wr, bad_wr);
+ } else {
+ siw_dbg_qp(qp, "QP locked, state %d\n",
+ qp->attrs.state);
+ *bad_wr = wr;
+ rv = -ENOTCONN;
+ }
+ return rv;
+ }
if (qp->attrs.state > SIW_QP_STATE_RTS) {
+ if (qp->attrs.state == SIW_QP_STATE_ERROR) {
+ /*
+ * Immediately flush this WR to CQ, if QP
+ * is in ERROR state. RQ is guaranteed to
+ * be empty, so WR complets in-order.
+ *
+ * Typically triggered by ib_drain_rq().
+ */
+ rv = siw_rq_flush_wr(qp, wr, bad_wr);
+ } else {
+ siw_dbg_qp(qp, "QP out of state %d\n",
+ qp->attrs.state);
+ *bad_wr = wr;
+ rv = -ENOTCONN;
+ }
up_read(&qp->state_lock);
- *bad_wr = wr;
- return -EINVAL;
+ return rv;
}
/*
* Serialize potentially multiple producers.
--
2.17.2
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [[PATCH v3 for-next]] RDMA/siw: Fix SQ/RQ drain logic
2019-10-04 12:53 [[PATCH v3 for-next]] RDMA/siw: Fix SQ/RQ drain logic Bernard Metzler
@ 2019-10-18 13:22 ` Bernard Metzler
2019-10-22 17:10 ` Jason Gunthorpe
1 sibling, 0 replies; 3+ messages in thread
From: Bernard Metzler @ 2019-10-18 13:22 UTC (permalink / raw)
To: linux-rdma
Cc: bharat, jgg, nirranjan, krishna2, bvanassche, leon, Bernard Metzler
-----"Bernard Metzler" <bmt@zurich.ibm.com> wrote: -----
>To: linux-rdma@vger.kernel.org
>From: "Bernard Metzler" <bmt@zurich.ibm.com>
>Date: 10/04/2019 02:54PM
>Cc: bharat@chelsio.com, jgg@ziepe.ca, nirranjan@chelsio.com,
>krishna2@chelsio.com, bvanassche@acm.org, leon@kernel.org, "Bernard
>Metzler" <bmt@zurich.ibm.com>
>Subject: [[PATCH v3 for-next]] RDMA/siw: Fix SQ/RQ drain logic
>
>Storage ULPs (e.g. iSER & NVMeOF) use ib_drain_qp() to
>drain QP/CQ. Current SIW's own drain routines do not properly
>wait until all SQ/RQ elements are completed and reaped
>from the CQ. This may cause touch after free issues.
>New logic relies on generic __ib_drain_sq()/__ib_drain_rq()
>posting a final work request, which SIW immediately flushes
>to CQ.
>
>Fixes: 303ae1cdfdf7 ("rdma/siw: application interface")
>Signed-off-by: Krishnamraju Eraparaju <krishna2@chelsio.com>
>Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
>---
>v2 -> v3:
>- Handle ib_drain_sq()/ib_drain_rq() calls when QP's
> state is currently locked.
>
>v1 -> v2:
>- Accept SQ and RQ work requests, if QP is in ERROR
> state. In that case, immediately flush WR's to CQ.
> This already provides needed functionality to
> support ib_drain_sq()/ib_drain_rq() without extra
> state checking in the fast path.
>
> drivers/infiniband/sw/siw/siw_main.c | 20 ----
> drivers/infiniband/sw/siw/siw_verbs.c | 144
>++++++++++++++++++++++----
> 2 files changed, 122 insertions(+), 42 deletions(-)
>
Is there any more comment on that one? I think it has been
sufficiently discussed and it is well understood, and it fixes
the issue at hand.
Thanks very much,
Bernard.
>diff --git a/drivers/infiniband/sw/siw/siw_main.c
>b/drivers/infiniband/sw/siw/siw_main.c
>index 05a92f997f60..fb01407a310f 100644
>--- a/drivers/infiniband/sw/siw/siw_main.c
>+++ b/drivers/infiniband/sw/siw/siw_main.c
>@@ -248,24 +248,6 @@ static struct ib_qp *siw_get_base_qp(struct
>ib_device *base_dev, int id)
> return NULL;
> }
>
>-static void siw_verbs_sq_flush(struct ib_qp *base_qp)
>-{
>- struct siw_qp *qp = to_siw_qp(base_qp);
>-
>- down_write(&qp->state_lock);
>- siw_sq_flush(qp);
>- up_write(&qp->state_lock);
>-}
>-
>-static void siw_verbs_rq_flush(struct ib_qp *base_qp)
>-{
>- struct siw_qp *qp = to_siw_qp(base_qp);
>-
>- down_write(&qp->state_lock);
>- siw_rq_flush(qp);
>- up_write(&qp->state_lock);
>-}
>-
> static const struct ib_device_ops siw_device_ops = {
> .owner = THIS_MODULE,
> .uverbs_abi_ver = SIW_ABI_VERSION,
>@@ -284,8 +266,6 @@ static const struct ib_device_ops siw_device_ops
>= {
> .destroy_cq = siw_destroy_cq,
> .destroy_qp = siw_destroy_qp,
> .destroy_srq = siw_destroy_srq,
>- .drain_rq = siw_verbs_rq_flush,
>- .drain_sq = siw_verbs_sq_flush,
> .get_dma_mr = siw_get_dma_mr,
> .get_port_immutable = siw_get_port_immutable,
> .iw_accept = siw_accept,
>diff --git a/drivers/infiniband/sw/siw/siw_verbs.c
>b/drivers/infiniband/sw/siw/siw_verbs.c
>index 869e02b69a01..c0574ddc98fa 100644
>--- a/drivers/infiniband/sw/siw/siw_verbs.c
>+++ b/drivers/infiniband/sw/siw/siw_verbs.c
>@@ -687,6 +687,47 @@ static int siw_copy_inline_sgl(const struct
>ib_send_wr *core_wr,
> return bytes;
> }
>
>+/* Complete SQ WR's without processing */
>+static int siw_sq_flush_wr(struct siw_qp *qp, const struct
>ib_send_wr *wr,
>+ const struct ib_send_wr **bad_wr)
>+{
>+ struct siw_sqe sqe = {};
>+ int rv = 0;
>+
>+ while (wr) {
>+ sqe.id = wr->wr_id;
>+ sqe.opcode = wr->opcode;
>+ rv = siw_sqe_complete(qp, &sqe, 0, SIW_WC_WR_FLUSH_ERR);
>+ if (rv) {
>+ if (bad_wr)
>+ *bad_wr = wr;
>+ break;
>+ }
>+ wr = wr->next;
>+ }
>+ return rv;
>+}
>+
>+/* Complete RQ WR's without processing */
>+static int siw_rq_flush_wr(struct siw_qp *qp, const struct
>ib_recv_wr *wr,
>+ const struct ib_recv_wr **bad_wr)
>+{
>+ struct siw_rqe rqe = {};
>+ int rv = 0;
>+
>+ while (wr) {
>+ rqe.id = wr->wr_id;
>+ rv = siw_rqe_complete(qp, &rqe, 0, 0, SIW_WC_WR_FLUSH_ERR);
>+ if (rv) {
>+ if (bad_wr)
>+ *bad_wr = wr;
>+ break;
>+ }
>+ wr = wr->next;
>+ }
>+ return rv;
>+}
>+
> /*
> * siw_post_send()
> *
>@@ -705,26 +746,54 @@ int siw_post_send(struct ib_qp *base_qp, const
>struct ib_send_wr *wr,
> unsigned long flags;
> int rv = 0;
>
>+ if (wr && !qp->kernel_verbs) {
>+ siw_dbg_qp(qp, "wr must be empty for user mapped sq\n");
>+ *bad_wr = wr;
>+ return -EINVAL;
>+ }
>+
> /*
> * Try to acquire QP state lock. Must be non-blocking
> * to accommodate kernel clients needs.
> */
> if (!down_read_trylock(&qp->state_lock)) {
>- *bad_wr = wr;
>- siw_dbg_qp(qp, "QP locked, state %d\n", qp->attrs.state);
>- return -ENOTCONN;
>+ if (qp->attrs.state == SIW_QP_STATE_ERROR) {
>+ /*
>+ * ERROR state is final, so we can be sure
>+ * this state will not change as long as the QP
>+ * exists.
>+ *
>+ * This handles an ib_drain_sq() call with
>+ * a concurrent request to set the QP state
>+ * to ERROR.
>+ */
>+ rv = siw_sq_flush_wr(qp, wr, bad_wr);
>+ } else {
>+ siw_dbg_qp(qp, "QP locked, state %d\n",
>+ qp->attrs.state);
>+ *bad_wr = wr;
>+ rv = -ENOTCONN;
>+ }
>+ return rv;
> }
> if (unlikely(qp->attrs.state != SIW_QP_STATE_RTS)) {
>+ if (qp->attrs.state == SIW_QP_STATE_ERROR) {
>+ /*
>+ * Immediately flush this WR to CQ, if QP
>+ * is in ERROR state. SQ is guaranteed to
>+ * be empty, so WR complets in-order.
>+ *
>+ * Typically triggered by ib_drain_sq().
>+ */
>+ rv = siw_sq_flush_wr(qp, wr, bad_wr);
>+ } else {
>+ siw_dbg_qp(qp, "QP out of state %d\n",
>+ qp->attrs.state);
>+ *bad_wr = wr;
>+ rv = -ENOTCONN;
>+ }
> up_read(&qp->state_lock);
>- *bad_wr = wr;
>- siw_dbg_qp(qp, "QP out of state %d\n", qp->attrs.state);
>- return -ENOTCONN;
>- }
>- if (wr && !qp->kernel_verbs) {
>- siw_dbg_qp(qp, "wr must be empty for user mapped sq\n");
>- up_read(&qp->state_lock);
>- *bad_wr = wr;
>- return -EINVAL;
>+ return rv;
> }
> spin_lock_irqsave(&qp->sq_lock, flags);
>
>@@ -919,24 +988,55 @@ int siw_post_receive(struct ib_qp *base_qp,
>const struct ib_recv_wr *wr,
> *bad_wr = wr;
> return -EOPNOTSUPP; /* what else from errno.h? */
> }
>- /*
>- * Try to acquire QP state lock. Must be non-blocking
>- * to accommodate kernel clients needs.
>- */
>- if (!down_read_trylock(&qp->state_lock)) {
>- *bad_wr = wr;
>- return -ENOTCONN;
>- }
> if (!qp->kernel_verbs) {
> siw_dbg_qp(qp, "no kernel post_recv for user mapped sq\n");
> up_read(&qp->state_lock);
> *bad_wr = wr;
> return -EINVAL;
> }
>+
>+ /*
>+ * Try to acquire QP state lock. Must be non-blocking
>+ * to accommodate kernel clients needs.
>+ */
>+ if (!down_read_trylock(&qp->state_lock)) {
>+ if (qp->attrs.state == SIW_QP_STATE_ERROR) {
>+ /*
>+ * ERROR state is final, so we can be sure
>+ * this state will not change as long as the QP
>+ * exists.
>+ *
>+ * This handles an ib_drain_rq() call with
>+ * a concurrent request to set the QP state
>+ * to ERROR.
>+ */
>+ rv = siw_rq_flush_wr(qp, wr, bad_wr);
>+ } else {
>+ siw_dbg_qp(qp, "QP locked, state %d\n",
>+ qp->attrs.state);
>+ *bad_wr = wr;
>+ rv = -ENOTCONN;
>+ }
>+ return rv;
>+ }
> if (qp->attrs.state > SIW_QP_STATE_RTS) {
>+ if (qp->attrs.state == SIW_QP_STATE_ERROR) {
>+ /*
>+ * Immediately flush this WR to CQ, if QP
>+ * is in ERROR state. RQ is guaranteed to
>+ * be empty, so WR complets in-order.
>+ *
>+ * Typically triggered by ib_drain_rq().
>+ */
>+ rv = siw_rq_flush_wr(qp, wr, bad_wr);
>+ } else {
>+ siw_dbg_qp(qp, "QP out of state %d\n",
>+ qp->attrs.state);
>+ *bad_wr = wr;
>+ rv = -ENOTCONN;
>+ }
> up_read(&qp->state_lock);
>- *bad_wr = wr;
>- return -EINVAL;
>+ return rv;
> }
> /*
> * Serialize potentially multiple producers.
>--
>2.17.2
>
>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [[PATCH v3 for-next]] RDMA/siw: Fix SQ/RQ drain logic
2019-10-04 12:53 [[PATCH v3 for-next]] RDMA/siw: Fix SQ/RQ drain logic Bernard Metzler
2019-10-18 13:22 ` Bernard Metzler
@ 2019-10-22 17:10 ` Jason Gunthorpe
1 sibling, 0 replies; 3+ messages in thread
From: Jason Gunthorpe @ 2019-10-22 17:10 UTC (permalink / raw)
To: Bernard Metzler; +Cc: linux-rdma, bharat, nirranjan, krishna2, bvanassche, leon
On Fri, Oct 04, 2019 at 02:53:56PM +0200, Bernard Metzler wrote:
> Storage ULPs (e.g. iSER & NVMeOF) use ib_drain_qp() to
> drain QP/CQ. Current SIW's own drain routines do not properly
> wait until all SQ/RQ elements are completed and reaped
> from the CQ. This may cause touch after free issues.
> New logic relies on generic __ib_drain_sq()/__ib_drain_rq()
> posting a final work request, which SIW immediately flushes
> to CQ.
>
> Fixes: 303ae1cdfdf7 ("rdma/siw: application interface")
> Signed-off-by: Krishnamraju Eraparaju <krishna2@chelsio.com>
> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
> ---
> v2 -> v3:
> - Handle ib_drain_sq()/ib_drain_rq() calls when QP's
> state is currently locked.
>
> v1 -> v2:
> - Accept SQ and RQ work requests, if QP is in ERROR
> state. In that case, immediately flush WR's to CQ.
> This already provides needed functionality to
> support ib_drain_sq()/ib_drain_rq() without extra
> state checking in the fast path.
>
> drivers/infiniband/sw/siw/siw_main.c | 20 ----
> drivers/infiniband/sw/siw/siw_verbs.c | 144 ++++++++++++++++++++++----
> 2 files changed, 122 insertions(+), 42 deletions(-)
Applied to for-next, thanks
Jason
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2019-10-22 17:10 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-04 12:53 [[PATCH v3 for-next]] RDMA/siw: Fix SQ/RQ drain logic Bernard Metzler
2019-10-18 13:22 ` Bernard Metzler
2019-10-22 17:10 ` Jason Gunthorpe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).