All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] IB/hfi1: Eliminate races in the SDMA send error path
@ 2018-10-30 16:17 Michael J. Ruhl
  2018-11-28 10:59 ` Greg KH
  0 siblings, 1 reply; 6+ messages in thread
From: Michael J. Ruhl @ 2018-10-30 16:17 UTC (permalink / raw)
  To: stable

From: Michael J. Ruhl <michael.j.ruhl@intel.com>

commit a0e0cb82804a6a21d9067022c2dfdf80d11da429 upstream

pq_update() can only be called in two places: from the completion
function when the complete (npkts) sequence of packets has been
submitted and processed, or from setup function if a subset of the
packets were submitted (i.e. the error path).

Currently both paths can call pq_update() if an error occurrs.  This
race will cause the n_req value to go negative, hanging file_close(),
or cause a crash by freeing the txlist more than once.

Several variables are used to determine SDMA send state.  Most of
these are unnecessary, and have code inspectible races between the
setup function and the completion function, in both the send path and
the error path.

The request 'status' value can be set by the setup or by the
completion function.  This is code inspectibly racy.  Since the status
is not needed in the completion code or by the caller it has been
removed.

The request 'done' value races between usage by the setup and the
completion function.  The completion function does not need this.
When the number of processed packets matches npkts, it is done.

The 'has_error' value races between usage of the setup and the
completion function.  This can cause incorrect error handling and leave
the n_req in an incorrect value (i.e. negative).

Simplify the code by removing all of the unneeded state checks and
variables.

Clean up iovs node when it is freed.

Eliminate race conditions in the error path:

If all packets are submitted, the completion handler will set the
completion status correctly (ok or aborted).

If all packets are not submitted, the caller must wait until the
submitted packets have completed, and then set the completion status.

These two change eliminate the race condition in the error path.

Cc: <stable@vger.kernel.org> # 4.18.x+
Reviewed-by: Mitko Haralanov <mitko.haralanov@intel.com>
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/hfi1/user_sdma.c |   87 ++++++++++++++------------------
 drivers/infiniband/hw/hfi1/user_sdma.h |    3 -
 2 files changed, 39 insertions(+), 51 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 5c88706..39134dd 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -328,7 +328,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
 	u8 opcode, sc, vl;
 	u16 pkey;
 	u32 slid;
-	int req_queued = 0;
 	u16 dlid;
 	u32 selector;
 
@@ -392,7 +391,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
 	req->data_len  = 0;
 	req->pq = pq;
 	req->cq = cq;
-	req->status = -1;
 	req->ahg_idx = -1;
 	req->iov_idx = 0;
 	req->sent = 0;
@@ -400,12 +398,14 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
 	req->seqcomp = 0;
 	req->seqsubmitted = 0;
 	req->tids = NULL;
-	req->done = 0;
 	req->has_error = 0;
 	INIT_LIST_HEAD(&req->txps);
 
 	memcpy(&req->info, &info, sizeof(info));
 
+	/* The request is initialized, count it */
+	atomic_inc(&pq->n_reqs);
+
 	if (req_opcode(info.ctrl) == EXPECTED) {
 		/* expected must have a TID info and at least one data vector */
 		if (req->data_iovs < 2) {
@@ -500,7 +500,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
 		ret = pin_vector_pages(req, &req->iovs[i]);
 		if (ret) {
 			req->data_iovs = i;
-			req->status = ret;
 			goto free_req;
 		}
 		req->data_len += req->iovs[i].iov.iov_len;
@@ -561,14 +560,10 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
 		req->ahg_idx = sdma_ahg_alloc(req->sde);
 
 	set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
-	atomic_inc(&pq->n_reqs);
-	req_queued = 1;
 	/* Send the first N packets in the request to buy us some time */
 	ret = user_sdma_send_pkts(req, pcount);
-	if (unlikely(ret < 0 && ret != -EBUSY)) {
-		req->status = ret;
+	if (unlikely(ret < 0 && ret != -EBUSY))
 		goto free_req;
-	}
 
 	/*
 	 * It is possible that the SDMA engine would have processed all the
@@ -588,14 +583,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
 	while (req->seqsubmitted != req->info.npkts) {
 		ret = user_sdma_send_pkts(req, pcount);
 		if (ret < 0) {
-			if (ret != -EBUSY) {
-				req->status = ret;
-				WRITE_ONCE(req->has_error, 1);
-				if (READ_ONCE(req->seqcomp) ==
-				    req->seqsubmitted - 1)
-					goto free_req;
-				return ret;
-			}
+			if (ret != -EBUSY)
+				goto free_req;
 			wait_event_interruptible_timeout(
 				pq->busy.wait_dma,
 				(pq->state == SDMA_PKT_Q_ACTIVE),
@@ -606,10 +595,19 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
 	*count += idx;
 	return 0;
 free_req:
-	user_sdma_free_request(req, true);
-	if (req_queued)
+	/*
+	 * If the submitted seqsubmitted == npkts, the completion routine
+	 * controls the final state.  If sequbmitted < npkts, wait for any
+	 * outstanding packets to finish before cleaning up.
+	 */
+	if (req->seqsubmitted < req->info.npkts) {
+		if (req->seqsubmitted)
+			wait_event(pq->busy.wait_dma,
+				   (req->seqcomp == req->seqsubmitted - 1));
+		user_sdma_free_request(req, true);
 		pq_update(pq);
-	set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
+		set_comp_state(pq, cq, info.comp_idx, ERROR, ret);
+	}
 	return ret;
 }
 
@@ -917,7 +915,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
 	ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
 	req->seqsubmitted += count;
 	if (req->seqsubmitted == req->info.npkts) {
-		WRITE_ONCE(req->done, 1);
 		/*
 		 * The txreq has already been submitted to the HW queue
 		 * so we can free the AHG entry now. Corruption will not
@@ -1365,11 +1362,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
 	return idx;
 }
 
-/*
- * SDMA tx request completion callback. Called when the SDMA progress
- * state machine gets notification that the SDMA descriptors for this
- * tx request have been processed by the DMA engine. Called in
- * interrupt context.
+/**
+ * user_sdma_txreq_cb() - SDMA tx request completion callback.
+ * @txreq: valid sdma tx request
+ * @status: success/failure of request
+ *
+ * Called when the SDMA progress state machine gets notification that
+ * the SDMA descriptors for this tx request have been processed by the
+ * DMA engine. Called in interrupt context.
+ * Only do work on completed sequences.
  */
 static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
 {
@@ -1378,7 +1379,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
 	struct user_sdma_request *req;
 	struct hfi1_user_sdma_pkt_q *pq;
 	struct hfi1_user_sdma_comp_q *cq;
-	u16 idx;
+	enum hfi1_sdma_comp_state state = COMPLETE;
 
 	if (!tx->req)
 		return;
@@ -1391,31 +1392,19 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
 		SDMA_DBG(req, "SDMA completion with error %d",
 			 status);
 		WRITE_ONCE(req->has_error, 1);
+		state = ERROR;
 	}
 
 	req->seqcomp = tx->seqnum;
 	kmem_cache_free(pq->txreq_cache, tx);
-	tx = NULL;
-
-	idx = req->info.comp_idx;
-	if (req->status == -1 && status == SDMA_TXREQ_S_OK) {
-		if (req->seqcomp == req->info.npkts - 1) {
-			req->status = 0;
-			user_sdma_free_request(req, false);
-			pq_update(pq);
-			set_comp_state(pq, cq, idx, COMPLETE, 0);
-		}
-	} else {
-		if (status != SDMA_TXREQ_S_OK)
-			req->status = status;
-		if (req->seqcomp == (READ_ONCE(req->seqsubmitted) - 1) &&
-		    (READ_ONCE(req->done) ||
-		     READ_ONCE(req->has_error))) {
-			user_sdma_free_request(req, false);
-			pq_update(pq);
-			set_comp_state(pq, cq, idx, ERROR, req->status);
-		}
-	}
+
+	/* sequence isn't complete?  We are done */
+	if (req->seqcomp != req->info.npkts - 1)
+		return;
+
+	user_sdma_free_request(req, false);
+	set_comp_state(pq, cq, req->info.comp_idx, state, status);
+	pq_update(pq);
 }
 
 static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
@@ -1448,6 +1437,8 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
 		if (!node)
 			continue;
 
+		req->iovs[i].node = NULL;
+
 		if (unpin)
 			hfi1_mmu_rb_remove(req->pq->handler,
 					   &node->rb);
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index d2bc77f..0ae0645 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -205,8 +205,6 @@ struct user_sdma_request {
 	/* Writeable fields shared with interrupt */
 	u64 seqcomp ____cacheline_aligned_in_smp;
 	u64 seqsubmitted;
-	/* status of the last txreq completed */
-	int status;
 
 	/* Send side fields */
 	struct list_head txps ____cacheline_aligned_in_smp;
@@ -228,7 +226,6 @@ struct user_sdma_request {
 	u16 tididx;
 	/* progress index moving along the iovs array */
 	u8 iov_idx;
-	u8 done;
 	u8 has_error;
 
 	struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ];

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] IB/hfi1: Eliminate races in the SDMA send error path
  2018-10-30 16:17 [PATCH] IB/hfi1: Eliminate races in the SDMA send error path Michael J. Ruhl
@ 2018-11-28 10:59 ` Greg KH
  0 siblings, 0 replies; 6+ messages in thread
From: Greg KH @ 2018-11-28 10:59 UTC (permalink / raw)
  To: Michael J. Ruhl; +Cc: stable

On Tue, Oct 30, 2018 at 12:17:32PM -0400, Michael J. Ruhl wrote:
> From: Michael J. Ruhl <michael.j.ruhl@intel.com>
> 
> commit a0e0cb82804a6a21d9067022c2dfdf80d11da429 upstream
> 
> pq_update() can only be called in two places: from the completion
> function when the complete (npkts) sequence of packets has been
> submitted and processed, or from setup function if a subset of the
> packets were submitted (i.e. the error path).
> 
> Currently both paths can call pq_update() if an error occurrs.  This
> race will cause the n_req value to go negative, hanging file_close(),
> or cause a crash by freeing the txlist more than once.
> 
> Several variables are used to determine SDMA send state.  Most of
> these are unnecessary, and have code inspectible races between the
> setup function and the completion function, in both the send path and
> the error path.
> 
> The request 'status' value can be set by the setup or by the
> completion function.  This is code inspectibly racy.  Since the status
> is not needed in the completion code or by the caller it has been
> removed.
> 
> The request 'done' value races between usage by the setup and the
> completion function.  The completion function does not need this.
> When the number of processed packets matches npkts, it is done.
> 
> The 'has_error' value races between usage of the setup and the
> completion function.  This can cause incorrect error handling and leave
> the n_req in an incorrect value (i.e. negative).
> 
> Simplify the code by removing all of the unneeded state checks and
> variables.
> 
> Clean up iovs node when it is freed.
> 
> Eliminate race conditions in the error path:
> 
> If all packets are submitted, the completion handler will set the
> completion status correctly (ok or aborted).
> 
> If all packets are not submitted, the caller must wait until the
> submitted packets have completed, and then set the completion status.
> 
> These two change eliminate the race condition in the error path.
> 
> Cc: <stable@vger.kernel.org> # 4.18.x+

Now applied, thanks.

greg k-h

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] IB/hfi1: Eliminate races in the SDMA send error path
  2018-10-31 14:48 Michael J. Ruhl
@ 2018-11-28 11:00 ` Greg KH
  0 siblings, 0 replies; 6+ messages in thread
From: Greg KH @ 2018-11-28 11:00 UTC (permalink / raw)
  To: Michael J. Ruhl; +Cc: stable

On Wed, Oct 31, 2018 at 10:48:50AM -0400, Michael J. Ruhl wrote:
> From: Michael J. Ruhl <michael.j.ruhl@intel.com>
> 
> commit a0e0cb82804a6a21d9067022c2dfdf80d11da429 upstream
> 
> pq_update() can only be called in two places: from the completion
> function when the complete (npkts) sequence of packets has been
> submitted and processed, or from setup function if a subset of the
> packets were submitted (i.e. the error path).
> 
> Currently both paths can call pq_update() if an error occurrs.  This
> race will cause the n_req value to go negative, hanging file_close(),
> or cause a crash by freeing the txlist more than once.
> 
> Several variables are used to determine SDMA send state.  Most of
> these are unnecessary, and have code inspectible races between the
> setup function and the completion function, in both the send path and
> the error path.
> 
> The request 'status' value can be set by the setup or by the
> completion function.  This is code inspectibly racy.  Since the status
> is not needed in the completion code or by the caller it has been
> removed.
> 
> The request 'done' value races between usage by the setup and the
> completion function.  The completion function does not need this.
> When the number of processed packets matches npkts, it is done.
> 
> The 'has_error' value races between usage of the setup and the
> completion function.  This can cause incorrect error handling and leave
> the n_req in an incorrect value (i.e. negative).
> 
> Simplify the code by removing all of the unneeded state checks and
> variables.
> 
> Clean up iovs node when it is freed.
> 
> Eliminate race conditions in the error path:
> 
> If all packets are submitted, the completion handler will set the
> completion status correctly (ok or aborted).
> 
> If all packets are not submitted, the caller must wait until the
> submitted packets have completed, and then set the completion status.
> 
> These two change eliminate the race condition in the error path.
> 
> Cc: <stable@vger.kernel.org> # 4.14.0

Now applied, thanks.

greg k-h

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] IB/hfi1: Eliminate races in the SDMA send error path
  2018-10-30 16:31 Michael J. Ruhl
@ 2018-11-28 11:00 ` Greg KH
  0 siblings, 0 replies; 6+ messages in thread
From: Greg KH @ 2018-11-28 11:00 UTC (permalink / raw)
  To: Michael J. Ruhl; +Cc: stable

On Tue, Oct 30, 2018 at 12:31:13PM -0400, Michael J. Ruhl wrote:
> From: Michael J. Ruhl <michael.j.ruhl@intel.com>
> 
> commit a0e0cb82804a6a21d9067022c2dfdf80d11da429 upstream
> 
> pq_update() can only be called in two places: from the completion
> function when the complete (npkts) sequence of packets has been
> submitted and processed, or from setup function if a subset of the
> packets were submitted (i.e. the error path).
> 
> Currently both paths can call pq_update() if an error occurrs.  This
> race will cause the n_req value to go negative, hanging file_close(),
> or cause a crash by freeing the txlist more than once.
> 
> Several variables are used to determine SDMA send state.  Most of
> these are unnecessary, and have code inspectible races between the
> setup function and the completion function, in both the send path and
> the error path.
> 
> The request 'status' value can be set by the setup or by the
> completion function.  This is code inspectibly racy.  Since the status
> is not needed in the completion code or by the caller it has been
> removed.
> 
> The request 'done' value races between usage by the setup and the
> completion function.  The completion function does not need this.
> When the number of processed packets matches npkts, it is done.
> 
> The 'has_error' value races between usage of the setup and the
> completion function.  This can cause incorrect error handling and leave
> the n_req in an incorrect value (i.e. negative).
> 
> Simplify the code by removing all of the unneeded state checks and
> variables.
> 
> Clean up iovs node when it is freed.
> 
> Eliminate race conditions in the error path:
> 
> If all packets are submitted, the completion handler will set the
> completion status correctly (ok or aborted).
> 
> If all packets are not submitted, the caller must wait until the
> submitted packets have completed, and then set the completion status.
> 
> These two change eliminate the race condition in the error path.
> 
> Cc: <stable@vger.kernel.org> # 4.9.0

Now applied, thanks.

greg k-h

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] IB/hfi1: Eliminate races in the SDMA send error path
@ 2018-10-31 14:48 Michael J. Ruhl
  2018-11-28 11:00 ` Greg KH
  0 siblings, 1 reply; 6+ messages in thread
From: Michael J. Ruhl @ 2018-10-31 14:48 UTC (permalink / raw)
  To: stable

From: Michael J. Ruhl <michael.j.ruhl@intel.com>

commit a0e0cb82804a6a21d9067022c2dfdf80d11da429 upstream

pq_update() can only be called in two places: from the completion
function when the complete (npkts) sequence of packets has been
submitted and processed, or from setup function if a subset of the
packets were submitted (i.e. the error path).

Currently both paths can call pq_update() if an error occurrs.  This
race will cause the n_req value to go negative, hanging file_close(),
or cause a crash by freeing the txlist more than once.

Several variables are used to determine SDMA send state.  Most of
these are unnecessary, and have code inspectible races between the
setup function and the completion function, in both the send path and
the error path.

The request 'status' value can be set by the setup or by the
completion function.  This is code inspectibly racy.  Since the status
is not needed in the completion code or by the caller it has been
removed.

The request 'done' value races between usage by the setup and the
completion function.  The completion function does not need this.
When the number of processed packets matches npkts, it is done.

The 'has_error' value races between usage of the setup and the
completion function.  This can cause incorrect error handling and leave
the n_req in an incorrect value (i.e. negative).

Simplify the code by removing all of the unneeded state checks and
variables.

Clean up iovs node when it is freed.

Eliminate race conditions in the error path:

If all packets are submitted, the completion handler will set the
completion status correctly (ok or aborted).

If all packets are not submitted, the caller must wait until the
submitted packets have completed, and then set the completion status.

These two change eliminate the race condition in the error path.

Cc: <stable@vger.kernel.org> # 4.14.0
Reviewed-by: Mitko Haralanov <mitko.haralanov@intel.com>
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
---
 drivers/infiniband/hw/hfi1/user_sdma.c |   87 ++++++++++++++------------------
 drivers/infiniband/hw/hfi1/user_sdma.h |    3 -
 2 files changed, 39 insertions(+), 51 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 8c954a0..c14ec04 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -328,7 +328,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
 	u8 opcode, sc, vl;
 	u16 pkey;
 	u32 slid;
-	int req_queued = 0;
 	u16 dlid;
 	u32 selector;
 
@@ -392,7 +391,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
 	req->data_len  = 0;
 	req->pq = pq;
 	req->cq = cq;
-	req->status = -1;
 	req->ahg_idx = -1;
 	req->iov_idx = 0;
 	req->sent = 0;
@@ -400,12 +398,14 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
 	req->seqcomp = 0;
 	req->seqsubmitted = 0;
 	req->tids = NULL;
-	req->done = 0;
 	req->has_error = 0;
 	INIT_LIST_HEAD(&req->txps);
 
 	memcpy(&req->info, &info, sizeof(info));
 
+	/* The request is initialized, count it */
+	atomic_inc(&pq->n_reqs);
+
 	if (req_opcode(info.ctrl) == EXPECTED) {
 		/* expected must have a TID info and at least one data vector */
 		if (req->data_iovs < 2) {
@@ -500,7 +500,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
 		ret = pin_vector_pages(req, &req->iovs[i]);
 		if (ret) {
 			req->data_iovs = i;
-			req->status = ret;
 			goto free_req;
 		}
 		req->data_len += req->iovs[i].iov.iov_len;
@@ -561,14 +560,10 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
 		req->ahg_idx = sdma_ahg_alloc(req->sde);
 
 	set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
-	atomic_inc(&pq->n_reqs);
-	req_queued = 1;
 	/* Send the first N packets in the request to buy us some time */
 	ret = user_sdma_send_pkts(req, pcount);
-	if (unlikely(ret < 0 && ret != -EBUSY)) {
-		req->status = ret;
+	if (unlikely(ret < 0 && ret != -EBUSY))
 		goto free_req;
-	}
 
 	/*
 	 * It is possible that the SDMA engine would have processed all the
@@ -588,14 +583,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
 	while (req->seqsubmitted != req->info.npkts) {
 		ret = user_sdma_send_pkts(req, pcount);
 		if (ret < 0) {
-			if (ret != -EBUSY) {
-				req->status = ret;
-				WRITE_ONCE(req->has_error, 1);
-				if (ACCESS_ONCE(req->seqcomp) ==
-				    req->seqsubmitted - 1)
-					goto free_req;
-				return ret;
-			}
+			if (ret != -EBUSY)
+				goto free_req;
 			wait_event_interruptible_timeout(
 				pq->busy.wait_dma,
 				(pq->state == SDMA_PKT_Q_ACTIVE),
@@ -606,10 +595,19 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
 	*count += idx;
 	return 0;
 free_req:
-	user_sdma_free_request(req, true);
-	if (req_queued)
+	/*
+	 * If the submitted seqsubmitted == npkts, the completion routine
+	 * controls the final state.  If sequbmitted < npkts, wait for any
+	 * outstanding packets to finish before cleaning up.
+	 */
+	if (req->seqsubmitted < req->info.npkts) {
+		if (req->seqsubmitted)
+			wait_event(pq->busy.wait_dma,
+				   (req->seqcomp == req->seqsubmitted - 1));
+		user_sdma_free_request(req, true);
 		pq_update(pq);
-	set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
+		set_comp_state(pq, cq, info.comp_idx, ERROR, ret);
+	}
 	return ret;
 }
 
@@ -917,7 +915,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
 	ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
 	req->seqsubmitted += count;
 	if (req->seqsubmitted == req->info.npkts) {
-		WRITE_ONCE(req->done, 1);
 		/*
 		 * The txreq has already been submitted to the HW queue
 		 * so we can free the AHG entry now. Corruption will not
@@ -1347,11 +1344,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
 	return diff;
 }
 
-/*
- * SDMA tx request completion callback. Called when the SDMA progress
- * state machine gets notification that the SDMA descriptors for this
- * tx request have been processed by the DMA engine. Called in
- * interrupt context.
+/**
+ * user_sdma_txreq_cb() - SDMA tx request completion callback.
+ * @txreq: valid sdma tx request
+ * @status: success/failure of request
+ *
+ * Called when the SDMA progress state machine gets notification that
+ * the SDMA descriptors for this tx request have been processed by the
+ * DMA engine. Called in interrupt context.
+ * Only do work on completed sequences.
  */
 static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
 {
@@ -1360,7 +1361,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
 	struct user_sdma_request *req;
 	struct hfi1_user_sdma_pkt_q *pq;
 	struct hfi1_user_sdma_comp_q *cq;
-	u16 idx;
+	enum hfi1_sdma_comp_state state = COMPLETE;
 
 	if (!tx->req)
 		return;
@@ -1373,31 +1374,19 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
 		SDMA_DBG(req, "SDMA completion with error %d",
 			 status);
 		WRITE_ONCE(req->has_error, 1);
+		state = ERROR;
 	}
 
 	req->seqcomp = tx->seqnum;
 	kmem_cache_free(pq->txreq_cache, tx);
-	tx = NULL;
-
-	idx = req->info.comp_idx;
-	if (req->status == -1 && status == SDMA_TXREQ_S_OK) {
-		if (req->seqcomp == req->info.npkts - 1) {
-			req->status = 0;
-			user_sdma_free_request(req, false);
-			pq_update(pq);
-			set_comp_state(pq, cq, idx, COMPLETE, 0);
-		}
-	} else {
-		if (status != SDMA_TXREQ_S_OK)
-			req->status = status;
-		if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) &&
-		    (READ_ONCE(req->done) ||
-		     READ_ONCE(req->has_error))) {
-			user_sdma_free_request(req, false);
-			pq_update(pq);
-			set_comp_state(pq, cq, idx, ERROR, req->status);
-		}
-	}
+
+	/* sequence isn't complete?  We are done */
+	if (req->seqcomp != req->info.npkts - 1)
+		return;
+
+	user_sdma_free_request(req, false);
+	set_comp_state(pq, cq, req->info.comp_idx, state, status);
+	pq_update(pq);
 }
 
 static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
@@ -1430,6 +1419,8 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
 			if (!node)
 				continue;
 
+			req->iovs[i].node = NULL;
+
 			if (unpin)
 				hfi1_mmu_rb_remove(req->pq->handler,
 						   &node->rb);
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index 9b8bb56..5af5233 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -196,8 +196,6 @@ struct user_sdma_request {
 	/* Writeable fields shared with interrupt */
 	u64 seqcomp ____cacheline_aligned_in_smp;
 	u64 seqsubmitted;
-	/* status of the last txreq completed */
-	int status;
 
 	/* Send side fields */
 	struct list_head txps ____cacheline_aligned_in_smp;
@@ -219,7 +217,6 @@ struct user_sdma_request {
 	u16 tididx;
 	/* progress index moving along the iovs array */
 	u8 iov_idx;
-	u8 done;
 	u8 has_error;
 
 	struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ];

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH] IB/hfi1: Eliminate races in the SDMA send error path
@ 2018-10-30 16:31 Michael J. Ruhl
  2018-11-28 11:00 ` Greg KH
  0 siblings, 1 reply; 6+ messages in thread
From: Michael J. Ruhl @ 2018-10-30 16:31 UTC (permalink / raw)
  To: stable

From: Michael J. Ruhl <michael.j.ruhl@intel.com>

commit a0e0cb82804a6a21d9067022c2dfdf80d11da429 upstream

pq_update() can only be called in two places: from the completion
function when the complete (npkts) sequence of packets has been
submitted and processed, or from setup function if a subset of the
packets were submitted (i.e. the error path).

Currently both paths can call pq_update() if an error occurrs.  This
race will cause the n_req value to go negative, hanging file_close(),
or cause a crash by freeing the txlist more than once.

Several variables are used to determine SDMA send state.  Most of
these are unnecessary, and have code inspectible races between the
setup function and the completion function, in both the send path and
the error path.

The request 'status' value can be set by the setup or by the
completion function.  This is code inspectibly racy.  Since the status
is not needed in the completion code or by the caller it has been
removed.

The request 'done' value races between usage by the setup and the
completion function.  The completion function does not need this.
When the number of processed packets matches npkts, it is done.

The 'has_error' value races between usage of the setup and the
completion function.  This can cause incorrect error handling and leave
the n_req in an incorrect value (i.e. negative).

Simplify the code by removing all of the unneeded state checks and
variables.

Clean up iovs node when it is freed.

Eliminate race conditions in the error path:

If all packets are submitted, the completion handler will set the
completion status correctly (ok or aborted).

If all packets are not submitted, the caller must wait until the
submitted packets have completed, and then set the completion status.

These two change eliminate the race condition in the error path.

Cc: <stable@vger.kernel.org> # 4.9.0
Reviewed-by: Mitko Haralanov <mitko.haralanov@intel.com>
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
---
 drivers/infiniband/hw/hfi1/user_sdma.c |  106 ++++++++++++++------------------
 1 file changed, 45 insertions(+), 61 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 018a415..619475c 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -148,11 +148,8 @@
 #define TXREQ_FLAGS_REQ_LAST_PKT BIT(0)
 
 /* SDMA request flag bits */
-#define SDMA_REQ_FOR_THREAD 1
-#define SDMA_REQ_SEND_DONE  2
-#define SDMA_REQ_HAVE_AHG   3
-#define SDMA_REQ_HAS_ERROR  4
-#define SDMA_REQ_DONE_ERROR 5
+#define SDMA_REQ_HAVE_AHG   1
+#define SDMA_REQ_HAS_ERROR  2
 
 #define SDMA_PKT_Q_INACTIVE BIT(0)
 #define SDMA_PKT_Q_ACTIVE   BIT(1)
@@ -252,8 +249,6 @@ struct user_sdma_request {
 	u64 seqsubmitted;
 	struct list_head txps;
 	unsigned long flags;
-	/* status of the last txreq completed */
-	int status;
 };
 
 /*
@@ -546,7 +541,6 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
 	struct sdma_req_info info;
 	struct user_sdma_request *req;
 	u8 opcode, sc, vl;
-	int req_queued = 0;
 	u16 dlid;
 	u32 selector;
 
@@ -611,11 +605,13 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
 	req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */
 	req->pq = pq;
 	req->cq = cq;
-	req->status = -1;
 	INIT_LIST_HEAD(&req->txps);
 
 	memcpy(&req->info, &info, sizeof(info));
 
+	/* The request is initialized, count it */
+	atomic_inc(&pq->n_reqs);
+
 	if (req_opcode(info.ctrl) == EXPECTED) {
 		/* expected must have a TID info and at least one data vector */
 		if (req->data_iovs < 2) {
@@ -704,7 +700,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
 		memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec));
 		ret = pin_vector_pages(req, &req->iovs[i]);
 		if (ret) {
-			req->status = ret;
+			req->data_iovs = i;
 			goto free_req;
 		}
 		req->data_len += req->iovs[i].iov.iov_len;
@@ -772,14 +768,10 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
 	}
 
 	set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
-	atomic_inc(&pq->n_reqs);
-	req_queued = 1;
 	/* Send the first N packets in the request to buy us some time */
 	ret = user_sdma_send_pkts(req, pcount);
-	if (unlikely(ret < 0 && ret != -EBUSY)) {
-		req->status = ret;
+	if (unlikely(ret < 0 && ret != -EBUSY))
 		goto free_req;
-	}
 
 	/*
 	 * It is possible that the SDMA engine would have processed all the
@@ -796,17 +788,11 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
 	 * request have been submitted to the SDMA engine. However, it
 	 * will not wait for send completions.
 	 */
-	while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) {
+	while (req->seqsubmitted != req->info.npkts) {
 		ret = user_sdma_send_pkts(req, pcount);
 		if (ret < 0) {
-			if (ret != -EBUSY) {
-				req->status = ret;
-				set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
-				if (ACCESS_ONCE(req->seqcomp) ==
-				    req->seqsubmitted - 1)
-					goto free_req;
-				return ret;
-			}
+			if (ret != -EBUSY)
+				goto free_req;
 			wait_event_interruptible_timeout(
 				pq->busy.wait_dma,
 				(pq->state == SDMA_PKT_Q_ACTIVE),
@@ -817,10 +803,19 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
 	*count += idx;
 	return 0;
 free_req:
-	user_sdma_free_request(req, true);
-	if (req_queued)
+	/*
+	 * If the submitted seqsubmitted == npkts, the completion routine
+	 * controls the final state.  If sequbmitted < npkts, wait for any
+	 * outstanding packets to finish before cleaning up.
+	 */
+	if (req->seqsubmitted < req->info.npkts) {
+		if (req->seqsubmitted)
+			wait_event(pq->busy.wait_dma,
+				   (req->seqcomp == req->seqsubmitted - 1));
+		user_sdma_free_request(req, true);
 		pq_update(pq);
-	set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
+		set_comp_state(pq, cq, info.comp_idx, ERROR, ret);
+	}
 	return ret;
 }
 
@@ -903,10 +898,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
 	pq = req->pq;
 
 	/* If tx completion has reported an error, we are done. */
-	if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) {
-		set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
+	if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags))
 		return -EFAULT;
-	}
 
 	/*
 	 * Check if we might have sent the entire request already
@@ -929,10 +922,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
 		 * with errors. If so, we are not going to process any
 		 * more packets from this request.
 		 */
-		if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) {
-			set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
+		if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags))
 			return -EFAULT;
-		}
 
 		tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL);
 		if (!tx)
@@ -1090,7 +1081,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
 	ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
 	req->seqsubmitted += count;
 	if (req->seqsubmitted == req->info.npkts) {
-		set_bit(SDMA_REQ_SEND_DONE, &req->flags);
 		/*
 		 * The txreq has already been submitted to the HW queue
 		 * so we can free the AHG entry now. Corruption will not
@@ -1489,11 +1479,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
 	return diff;
 }
 
-/*
- * SDMA tx request completion callback. Called when the SDMA progress
- * state machine gets notification that the SDMA descriptors for this
- * tx request have been processed by the DMA engine. Called in
- * interrupt context.
+/**
+ * user_sdma_txreq_cb() - SDMA tx request completion callback.
+ * @txreq: valid sdma tx request
+ * @status: success/failure of request
+ *
+ * Called when the SDMA progress state machine gets notification that
+ * the SDMA descriptors for this tx request have been processed by the
+ * DMA engine. Called in interrupt context.
+ * Only do work on completed sequences.
  */
 static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
 {
@@ -1502,7 +1496,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
 	struct user_sdma_request *req;
 	struct hfi1_user_sdma_pkt_q *pq;
 	struct hfi1_user_sdma_comp_q *cq;
-	u16 idx;
+	enum hfi1_sdma_comp_state state = COMPLETE;
 
 	if (!tx->req)
 		return;
@@ -1515,31 +1509,19 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
 		SDMA_DBG(req, "SDMA completion with error %d",
 			 status);
 		set_bit(SDMA_REQ_HAS_ERROR, &req->flags);
+		state = ERROR;
 	}
 
 	req->seqcomp = tx->seqnum;
 	kmem_cache_free(pq->txreq_cache, tx);
-	tx = NULL;
-
-	idx = req->info.comp_idx;
-	if (req->status == -1 && status == SDMA_TXREQ_S_OK) {
-		if (req->seqcomp == req->info.npkts - 1) {
-			req->status = 0;
-			user_sdma_free_request(req, false);
-			pq_update(pq);
-			set_comp_state(pq, cq, idx, COMPLETE, 0);
-		}
-	} else {
-		if (status != SDMA_TXREQ_S_OK)
-			req->status = status;
-		if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) &&
-		    (test_bit(SDMA_REQ_SEND_DONE, &req->flags) ||
-		     test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) {
-			user_sdma_free_request(req, false);
-			pq_update(pq);
-			set_comp_state(pq, cq, idx, ERROR, req->status);
-		}
-	}
+
+	/* sequence isn't complete?  We are done */
+	if (req->seqcomp != req->info.npkts - 1)
+		return;
+
+	user_sdma_free_request(req, false);
+	set_comp_state(pq, cq, req->info.comp_idx, state, status);
+	pq_update(pq);
 }
 
 static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
@@ -1572,6 +1554,8 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
 			if (!node)
 				continue;
 
+			req->iovs[i].node = NULL;
+
 			if (unpin)
 				hfi1_mmu_rb_remove(req->pq->handler,
 						   &node->rb);

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-11-28 22:01 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-30 16:17 [PATCH] IB/hfi1: Eliminate races in the SDMA send error path Michael J. Ruhl
2018-11-28 10:59 ` Greg KH
2018-10-30 16:31 Michael J. Ruhl
2018-11-28 11:00 ` Greg KH
2018-10-31 14:48 Michael J. Ruhl
2018-11-28 11:00 ` Greg KH

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.