All of lore.kernel.org
 help / color / mirror / Atom feed
From: Haggai Eran <haggaie-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
To: Doug Ledford <dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Cc: Haggai Eran <haggaie-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>,
	Leon Romanovsky <leonro-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>,
	Matan Barak <matanb-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>,
	Hal Rosenstock
	<hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>,
	Jason Gunthorpe
	<jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: [PATCH for-next 07/10] IB/mlx5: Reorder GSI completions
Date: Mon, 29 Feb 2016 15:45:09 +0200	[thread overview]
Message-ID: <1456753512-17688-8-git-send-email-haggaie@mellanox.com> (raw)
In-Reply-To: <1456753512-17688-1-git-send-email-haggaie-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

The emulated GSI QP's send completions are generated by multiple hardware
QPs, so their completions could arrive out of order with respect to the
order their work request were submitted.

Reorder the completions by keeping a list of the posted work request and
their completions. A newly received completion from the hardware updates
the list and marks its work request as completed. However, the completions
are only reported to the client according to the list order.

In order to support that, create a new private CQ to handle the hardware
completions.

Signed-off-by: Haggai Eran <haggaie-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
 drivers/infiniband/hw/mlx5/gsi.c | 156 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 150 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c
index 1648f539c836..8d040626abb2 100644
--- a/drivers/infiniband/hw/mlx5/gsi.c
+++ b/drivers/infiniband/hw/mlx5/gsi.c
@@ -32,6 +32,13 @@
 
 #include "mlx5_ib.h"
 
+struct mlx5_ib_gsi_wr {
+	struct ib_cqe cqe;
+	struct ib_wc wc;
+	int send_flags;
+	bool completed:1;
+};
+
 struct mlx5_ib_gsi_qp {
 	struct ib_qp ibqp;
 	struct ib_qp *rx_qp;
@@ -40,9 +47,13 @@ struct mlx5_ib_gsi_qp {
 	enum ib_sig_type sq_sig_type;
 	/* Serialize qp state modifications */
 	struct mutex mutex;
+	struct ib_cq *cq;
+	struct mlx5_ib_gsi_wr *outstanding_wrs;
+	u32 outstanding_pi, outstanding_ci;
 	int num_qps;
 	/* Protects access to the tx_qps. Post send operations synchronize
-	 * with tx_qp creation in setup_qp().
+	 * with tx_qp creation in setup_qp(). Also protects the
+	 * outstanding_wrs array and indices.
 	 */
 	spinlock_t lock;
 	struct ib_qp **tx_qps;
@@ -58,6 +69,57 @@ static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev)
 	return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn);
 }
 
+static u32 next_outstanding(struct mlx5_ib_gsi_qp *gsi, u32 index)
+{
+	return ++index % gsi->cap.max_send_wr;
+}
+
+#define for_each_outstanding_wr(gsi, index) \
+	for (index = gsi->outstanding_ci; index != gsi->outstanding_pi; \
+	     index = next_outstanding(gsi, index))
+
+/* Call with gsi->lock locked */
+static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
+{
+	struct ib_cq *gsi_cq = gsi->ibqp.send_cq;
+	struct mlx5_ib_gsi_wr *wr;
+	u32 index;
+
+	for_each_outstanding_wr(gsi, index) {
+		wr = &gsi->outstanding_wrs[index];
+
+		if (!wr->completed)
+			break;
+
+		if (gsi->sq_sig_type == IB_SIGNAL_ALL_WR ||
+		    wr->send_flags & IB_SEND_SIGNALED)
+			WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc));
+
+		wr->completed = false;
+	}
+
+	gsi->outstanding_ci = index;
+}
+
+static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct mlx5_ib_gsi_qp *gsi = cq->cq_context;
+	struct mlx5_ib_gsi_wr *wr =
+		container_of(wc->wr_cqe, struct mlx5_ib_gsi_wr, cqe);
+	u64 wr_id;
+	unsigned long flags;
+
+	spin_lock_irqsave(&gsi->lock, flags);
+	wr->completed = true;
+	wr_id = wr->wc.wr_id;
+	wr->wc = *wc;
+	wr->wc.wr_id = wr_id;
+	wr->wc.qp = &gsi->ibqp;
+
+	generate_completions(gsi);
+	spin_unlock_irqrestore(&gsi->lock, flags);
+}
+
 struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
 				    struct ib_qp_init_attr *init_attr)
 {
@@ -88,6 +150,14 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
 		goto err_free;
 	}
 
+	gsi->outstanding_wrs = kcalloc(init_attr->cap.max_send_wr,
+				       sizeof(*gsi->outstanding_wrs),
+				       GFP_KERNEL);
+	if (!gsi->outstanding_wrs) {
+		ret = -ENOMEM;
+		goto err_free_tx;
+	}
+
 	mutex_init(&gsi->mutex);
 
 	mutex_lock(&dev->devr.mutex);
@@ -96,7 +166,7 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
 		mlx5_ib_warn(dev, "GSI QP already exists on port %d\n",
 			     port_num);
 		ret = -EBUSY;
-		goto err_free_tx;
+		goto err_free_wrs;
 	}
 	gsi->num_qps = num_qps;
 	spin_lock_init(&gsi->lock);
@@ -106,13 +176,23 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
 	gsi->ibqp.qp_num = 1;
 	gsi->port_num = port_num;
 
+	gsi->cq = ib_alloc_cq(pd->device, gsi, init_attr->cap.max_send_wr, 0,
+			      IB_POLL_SOFTIRQ);
+	if (IS_ERR(gsi->cq)) {
+		mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n",
+			     PTR_ERR(gsi->cq));
+		ret = PTR_ERR(gsi->cq);
+		goto err_free_wrs;
+	}
+
 	hw_init_attr.qp_type = MLX5_IB_QPT_HW_GSI;
+	hw_init_attr.send_cq = gsi->cq;
 	gsi->rx_qp = ib_create_qp(pd, &hw_init_attr);
 	if (IS_ERR(gsi->rx_qp)) {
 		mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n",
 			     PTR_ERR(gsi->rx_qp));
 		ret = PTR_ERR(gsi->rx_qp);
-		goto err_free_tx;
+		goto err_destroy_cq;
 	}
 
 	dev->devr.ports[init_attr->port_num - 1].gsi = gsi;
@@ -121,8 +201,12 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
 
 	return &gsi->ibqp;
 
-err_free_tx:
+err_destroy_cq:
+	ib_free_cq(gsi->cq);
+err_free_wrs:
 	mutex_unlock(&dev->devr.mutex);
+	kfree(gsi->outstanding_wrs);
+err_free_tx:
 	kfree(gsi->tx_qps);
 err_free:
 	kfree(gsi);
@@ -158,6 +242,9 @@ int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp)
 		gsi->tx_qps[qp_index] = NULL;
 	}
 
+	ib_free_cq(gsi->cq);
+
+	kfree(gsi->outstanding_wrs);
 	kfree(gsi->tx_qps);
 	kfree(gsi);
 
@@ -170,7 +257,7 @@ static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi)
 	struct ib_qp_init_attr init_attr = {
 		.event_handler = gsi->rx_qp->event_handler,
 		.qp_context = gsi->rx_qp->qp_context,
-		.send_cq = gsi->rx_qp->send_cq,
+		.send_cq = gsi->cq,
 		.recv_cq = gsi->rx_qp->recv_cq,
 		.cap = {
 			.max_send_wr = gsi->cap.max_send_wr,
@@ -326,12 +413,69 @@ int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
 	return ret;
 }
 
+/* Call with gsi->lock locked */
+static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi,
+				      struct ib_ud_wr *wr, struct ib_wc *wc)
+{
+	struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
+	struct mlx5_ib_gsi_wr *gsi_wr;
+
+	if (gsi->outstanding_pi == gsi->outstanding_ci + gsi->cap.max_send_wr) {
+		mlx5_ib_warn(dev, "no available GSI work request.\n");
+		return -ENOMEM;
+	}
+
+	gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi];
+	gsi->outstanding_pi = next_outstanding(gsi, gsi->outstanding_pi);
+
+	if (!wc) {
+		memset(&gsi_wr->wc, 0, sizeof(gsi_wr->wc));
+		gsi_wr->wc.pkey_index = wr->pkey_index;
+		gsi_wr->wc.wr_id = wr->wr.wr_id;
+	} else {
+		gsi_wr->wc = *wc;
+		gsi_wr->completed = true;
+	}
+
+	gsi_wr->cqe.done = &handle_single_completion;
+	wr->wr.wr_cqe = &gsi_wr->cqe;
+
+	return 0;
+}
+
 int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr,
 			  struct ib_send_wr **bad_wr)
 {
 	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+	unsigned long flags;
+	int ret;
+
+	for (; wr; wr = wr->next) {
+		struct ib_ud_wr cur_wr = *ud_wr(wr);
+
+		cur_wr.wr.next = NULL;
 
-	return ib_post_send(gsi->rx_qp, wr, bad_wr);
+		spin_lock_irqsave(&gsi->lock, flags);
+		ret = mlx5_ib_add_outstanding_wr(gsi, &cur_wr, NULL);
+		if (ret)
+			goto err;
+
+		ret = ib_post_send(gsi->rx_qp, &cur_wr.wr, bad_wr);
+		if (ret) {
+			/* Undo the effect of adding the outstanding wr */
+			gsi->outstanding_pi = (gsi->outstanding_pi - 1) %
+					      gsi->cap.max_send_wr;
+			goto err;
+		}
+		spin_unlock_irqrestore(&gsi->lock, flags);
+	}
+
+	return 0;
+
+err:
+	spin_unlock_irqrestore(&gsi->lock, flags);
+	*bad_wr = wr;
+	return ret;
 }
 
 int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr,
-- 
1.7.11.2

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2016-02-29 13:45 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-29 13:45 [PATCH for-next 00/10] IB/mlx5: Use wr.ud.pkey_index in GSI QP Haggai Eran
     [not found] ` <1456753512-17688-1-git-send-email-haggaie-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2016-02-29 13:45   ` [PATCH for-next 01/10] IB/mlx5: Add support for setting source QP number Haggai Eran
2016-02-29 13:45   ` [PATCH for-next 02/10] IB/mlx5: Modify QP debugging prints Haggai Eran
2016-02-29 13:45   ` [PATCH for-next 03/10] IB/mlx5: Add GSI QP wrapper Haggai Eran
2016-02-29 13:45   ` [PATCH for-next 04/10] IB/mlx5: Create multiple transmission GSI QPs Haggai Eran
2016-02-29 13:45   ` [PATCH for-next 05/10] IB/mlx5: Create GSI transmission QPs when P_Key table is changed Haggai Eran
2016-02-29 13:45   ` [PATCH for-next 06/10] IB/mlx5: Generate completions in software Haggai Eran
2016-02-29 13:45   ` Haggai Eran [this message]
2016-02-29 13:45   ` [PATCH for-next 08/10] IB/mlx5: Pick the right GSI transmission QP for sending Haggai Eran
2016-02-29 13:45   ` [PATCH for-next 09/10] IB/mlx5: Eliminate GSI RX QP's send buffers Haggai Eran
2016-02-29 13:45   ` [PATCH for-next 10/10] IB/cma: Print warning on different inner and header P_Keys Haggai Eran
2016-03-03 15:47   ` [PATCH for-next 00/10] IB/mlx5: Use wr.ud.pkey_index in GSI QP Doug Ledford

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1456753512-17688-8-git-send-email-haggaie@mellanox.com \
    --to=haggaie-vpraknaxozvwk0htik3j/w@public.gmane.org \
    --cc=dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    --cc=hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org \
    --cc=jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org \
    --cc=leonro-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=matanb-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.