All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 for-next 0/5] RDMA/hns: Refactor wqe related codes
@ 2020-03-10 11:17 Weihang Li
  2020-03-10 11:18 ` [PATCH v3 for-next 1/5] RDMA/hns: Rename wqe buffer related functions Weihang Li
                   ` (6 more replies)
  0 siblings, 7 replies; 10+ messages in thread
From: Weihang Li @ 2020-03-10 11:17 UTC (permalink / raw)
  To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm

Process of wqe is hard to understand and maintain in hns, this series
refactor wqe related code, especially in hns_roce_v2_post_send().

Previous discussion could be found at:
- v2: https://patchwork.kernel.org/cover/11422963/
- v1: https://patchwork.kernel.org/cover/11415395/

Changes since v2:
- Remove a redundant assignment of "NULL" in patch 4/5.

Changes since v1:
- Fix comments from Leon about the inplementation of to_hr_opcode() in
  patch 3/5.
- Patch 4/5 couldn't be applied. Just do a rebase.

Xi Wang (5):
  RDMA/hns: Rename wqe buffer related functions
  RDMA/hns: Optimize wqe buffer filling process for post send
  RDMA/hns: Optimize the wr opcode conversion from ib to hns
  RDMA/hns: Optimize base address table config flow for qp buffer
  RDMA/hns: Optimize wqe buffer set flow for post send

 drivers/infiniband/hw/hns/hns_roce_device.h |  10 +-
 drivers/infiniband/hw/hns/hns_roce_hem.c    |  16 +-
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c  |   9 +-
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 588 ++++++++++++++--------------
 drivers/infiniband/hw/hns/hns_roce_qp.c     |  48 +--
 5 files changed, 319 insertions(+), 352 deletions(-)

-- 
2.8.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v3 for-next 1/5] RDMA/hns: Rename wqe buffer related functions
  2020-03-10 11:17 [PATCH v3 for-next 0/5] RDMA/hns: Refactor wqe related codes Weihang Li
@ 2020-03-10 11:18 ` Weihang Li
  2020-03-10 11:18 ` [PATCH v3 for-next 2/5] RDMA/hns: Optimize wqe buffer filling process for post send Weihang Li
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Weihang Li @ 2020-03-10 11:18 UTC (permalink / raw)
  To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm

From: Xi Wang <wangxi11@huawei.com>

There are serval global functions related to wqe buffer in the hns driver
and are called in different files. These symbols cannot directly represent
the namespace they belong to. So add prefix 'hns_roce_' to 3 wqe buffer
related global functions: get_recv_wqe(), get_send_wqe(), and
get_send_extend_sge().

Signed-off-by: Xi Wang <wangxi11@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/hns/hns_roce_device.h |  6 +++---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c  |  9 +++++----
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 10 +++++-----
 drivers/infiniband/hw/hns/hns_roce_qp.c     |  6 +++---
 4 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index d7dcf6e..b6ae12d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -1238,9 +1238,9 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd,
 int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		       int attr_mask, struct ib_udata *udata);
 void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp);
-void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n);
-void *get_send_wqe(struct hns_roce_qp *hr_qp, int n);
-void *get_send_extend_sge(struct hns_roce_qp *hr_qp, int n);
+void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, int n);
+void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, int n);
+void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, int n);
 bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq,
 			  struct ib_cq *ib_cq);
 enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index c05a905..2e53045 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -106,7 +106,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
 			goto out;
 		}
 
-		wqe = get_send_wqe(qp, wqe_idx);
+		wqe = hns_roce_get_send_wqe(qp, wqe_idx);
 		qp->sq.wrid[wqe_idx] = wr->wr_id;
 
 		/* Corresponding to the RC and RD type wqe process separately */
@@ -378,7 +378,7 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
 			goto out;
 		}
 
-		ctrl = get_recv_wqe(hr_qp, wqe_idx);
+		ctrl = hns_roce_get_recv_wqe(hr_qp, wqe_idx);
 
 		roce_set_field(ctrl->rwqe_byte_12,
 			       RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_M,
@@ -2284,9 +2284,10 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *hr_cq,
 
 	if (is_send) {
 		/* SQ conrespond to CQE */
-		sq_wqe = get_send_wqe(*cur_qp, roce_get_field(cqe->cqe_byte_4,
+		sq_wqe = hns_roce_get_send_wqe(*cur_qp,
+						roce_get_field(cqe->cqe_byte_4,
 						CQE_BYTE_4_WQE_INDEX_M,
-						CQE_BYTE_4_WQE_INDEX_S)&
+						CQE_BYTE_4_WQE_INDEX_S) &
 						((*cur_qp)->sq.wqe_cnt-1));
 		switch (le32_to_cpu(sq_wqe->flag) & HNS_ROCE_WQE_OPCODE_MASK) {
 		case HNS_ROCE_WQE_OPCODE_SEND:
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 82021fa..88d671a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -127,7 +127,7 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
 	 * should calculate how many sges in the first page and the second
 	 * page.
 	 */
-	dseg = get_send_extend_sge(qp, (*sge_ind) & (qp->sge.sge_cnt - 1));
+	dseg = hns_roce_get_extend_sge(qp, (*sge_ind) & (qp->sge.sge_cnt - 1));
 	fi_sge_num = (round_up((uintptr_t)dseg, 1 << shift) -
 		      (uintptr_t)dseg) /
 		      sizeof(struct hns_roce_v2_wqe_data_seg);
@@ -137,7 +137,7 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
 			set_data_seg_v2(dseg++, sg + i);
 			(*sge_ind)++;
 		}
-		dseg = get_send_extend_sge(qp,
+		dseg = hns_roce_get_extend_sge(qp,
 					   (*sge_ind) & (qp->sge.sge_cnt - 1));
 		for (i = 0; i < se_sge_num; i++) {
 			set_data_seg_v2(dseg++, sg + fi_sge_num + i);
@@ -329,7 +329,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 			goto out;
 		}
 
-		wqe = get_send_wqe(qp, wqe_idx);
+		wqe = hns_roce_get_send_wqe(qp, wqe_idx);
 		qp->sq.wrid[wqe_idx] = wr->wr_id;
 		owner_bit =
 		       ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1);
@@ -676,7 +676,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
 			goto out;
 		}
 
-		wqe = get_recv_wqe(hr_qp, wqe_idx);
+		wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx);
 		dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
 		for (i = 0; i < wr->num_sge; i++) {
 			if (!wr->sg_list[i].length)
@@ -2935,7 +2935,7 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe,
 
 	sge_list = (*cur_qp)->rq_inl_buf.wqe_list[wr_cnt].sg_list;
 	sge_num = (*cur_qp)->rq_inl_buf.wqe_list[wr_cnt].sge_cnt;
-	wqe_buf = get_recv_wqe(*cur_qp, wr_cnt);
+	wqe_buf = hns_roce_get_recv_wqe(*cur_qp, wr_cnt);
 	data_len = wc->byte_len;
 
 	for (sge_cnt = 0; (sge_cnt < sge_num) && (data_len); sge_cnt++) {
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 5a28d62..c2ea489 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -1470,17 +1470,17 @@ static void *get_wqe(struct hns_roce_qp *hr_qp, int offset)
 	return hns_roce_buf_offset(&hr_qp->hr_buf, offset);
 }
 
-void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n)
+void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, int n)
 {
 	return get_wqe(hr_qp, hr_qp->rq.offset + (n << hr_qp->rq.wqe_shift));
 }
 
-void *get_send_wqe(struct hns_roce_qp *hr_qp, int n)
+void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, int n)
 {
 	return get_wqe(hr_qp, hr_qp->sq.offset + (n << hr_qp->sq.wqe_shift));
 }
 
-void *get_send_extend_sge(struct hns_roce_qp *hr_qp, int n)
+void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, int n)
 {
 	return hns_roce_buf_offset(&hr_qp->hr_buf, hr_qp->sge.offset +
 					(n << hr_qp->sge.sge_shift));
-- 
2.8.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v3 for-next 2/5] RDMA/hns: Optimize wqe buffer filling process for post send
  2020-03-10 11:17 [PATCH v3 for-next 0/5] RDMA/hns: Refactor wqe related codes Weihang Li
  2020-03-10 11:18 ` [PATCH v3 for-next 1/5] RDMA/hns: Rename wqe buffer related functions Weihang Li
@ 2020-03-10 11:18 ` Weihang Li
  2020-03-10 11:18 ` [PATCH v3 for-next 3/5] RDMA/hns: Optimize the wr opcode conversion from ib to hns Weihang Li
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Weihang Li @ 2020-03-10 11:18 UTC (permalink / raw)
  To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm

From: Xi Wang <wangxi11@huawei.com>

Encapsulates the wqe buffer process details for datagram seg, fast mr seg
and atomic seg.

Signed-off-by: Xi Wang <wangxi11@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 63 +++++++++++++++---------------
 1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 88d671a..c8c345f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -57,10 +57,10 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
 }
 
 static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
-			 struct hns_roce_wqe_frmr_seg *fseg,
-			 const struct ib_reg_wr *wr)
+			 void *wqe, const struct ib_reg_wr *wr)
 {
 	struct hns_roce_mr *mr = to_hr_mr(wr->mr);
+	struct hns_roce_wqe_frmr_seg *fseg = wqe;
 
 	/* use ib_access_flags */
 	roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S,
@@ -92,16 +92,26 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
 		     V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0);
 }
 
-static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg,
-			   const struct ib_atomic_wr *wr)
+static void set_atomic_seg(const struct ib_send_wr *wr, void *wqe,
+			   struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+			   int valid_num_sge)
 {
-	if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
-		aseg->fetchadd_swap_data = cpu_to_le64(wr->swap);
-		aseg->cmp_data  = cpu_to_le64(wr->compare_add);
+	struct hns_roce_wqe_atomic_seg *aseg;
+
+	set_data_seg_v2(wqe, wr->sg_list);
+	aseg = wqe + sizeof(struct hns_roce_v2_wqe_data_seg);
+
+	if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+		aseg->fetchadd_swap_data = cpu_to_le64(atomic_wr(wr)->swap);
+		aseg->cmp_data = cpu_to_le64(atomic_wr(wr)->compare_add);
 	} else {
-		aseg->fetchadd_swap_data = cpu_to_le64(wr->compare_add);
+		aseg->fetchadd_swap_data =
+			cpu_to_le64(atomic_wr(wr)->compare_add);
 		aseg->cmp_data  = 0;
 	}
+
+	roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
+		       V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
 }
 
 static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
@@ -154,11 +164,11 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
 static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 			     struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
 			     void *wqe, unsigned int *sge_ind,
-			     int valid_num_sge,
-			     const struct ib_send_wr **bad_wr)
+			     int valid_num_sge)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
 	struct hns_roce_v2_wqe_data_seg *dseg = wqe;
+	struct ib_device *ibdev = &hr_dev->ib_dev;
 	struct hns_roce_qp *qp = to_hr_qp(ibqp);
 	int j = 0;
 	int i;
@@ -166,15 +176,14 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 	if (wr->send_flags & IB_SEND_INLINE && valid_num_sge) {
 		if (le32_to_cpu(rc_sq_wqe->msg_len) >
 		    hr_dev->caps.max_sq_inline) {
-			*bad_wr = wr;
-			dev_err(hr_dev->dev, "inline len(1-%d)=%d, illegal",
-				rc_sq_wqe->msg_len, hr_dev->caps.max_sq_inline);
+			ibdev_err(ibdev, "inline len(1-%d)=%d, illegal",
+				  rc_sq_wqe->msg_len,
+				  hr_dev->caps.max_sq_inline);
 			return -EINVAL;
 		}
 
 		if (wr->opcode == IB_WR_RDMA_READ) {
-			*bad_wr =  wr;
-			dev_err(hr_dev->dev, "Not support inline data!\n");
+			ibdev_err(ibdev, "Not support inline data!\n");
 			return -EINVAL;
 		}
 
@@ -285,7 +294,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 	struct hns_roce_v2_ud_send_wqe *ud_sq_wqe;
 	struct hns_roce_v2_rc_send_wqe *rc_sq_wqe;
 	struct hns_roce_qp *qp = to_hr_qp(ibqp);
-	struct hns_roce_wqe_frmr_seg *fseg;
 	struct device *dev = hr_dev->dev;
 	unsigned int owner_bit;
 	unsigned int sge_idx;
@@ -547,8 +555,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 				break;
 			case IB_WR_REG_MR:
 				hr_op = HNS_ROCE_V2_WQE_OP_FAST_REG_PMR;
-				fseg = wqe;
-				set_frmr_seg(rc_sq_wqe, fseg, reg_wr(wr));
+				set_frmr_seg(rc_sq_wqe, wqe, reg_wr(wr));
 				break;
 			case IB_WR_ATOMIC_CMP_AND_SWP:
 				hr_op = HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP;
@@ -582,23 +589,17 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 				       V2_RC_SEND_WQE_BYTE_4_OPCODE_S, hr_op);
 
 			if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-			    wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
-				struct hns_roce_v2_wqe_data_seg *dseg;
-
-				dseg = wqe;
-				set_data_seg_v2(dseg, wr->sg_list);
-				wqe += sizeof(struct hns_roce_v2_wqe_data_seg);
-				set_atomic_seg(wqe, atomic_wr(wr));
-				roce_set_field(rc_sq_wqe->byte_16,
-					       V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
-					       V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
+			    wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+				set_atomic_seg(wr, wqe, rc_sq_wqe,
 					       valid_num_sge);
-			} else if (wr->opcode != IB_WR_REG_MR) {
+			else if (wr->opcode != IB_WR_REG_MR) {
 				ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe,
 							wqe, &sge_idx,
-							valid_num_sge, bad_wr);
-				if (ret)
+							valid_num_sge);
+				if (ret) {
+					*bad_wr = wr;
 					goto out;
+				}
 			}
 		} else {
 			dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type);
-- 
2.8.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v3 for-next 3/5] RDMA/hns: Optimize the wr opcode conversion from ib to hns
  2020-03-10 11:17 [PATCH v3 for-next 0/5] RDMA/hns: Refactor wqe related codes Weihang Li
  2020-03-10 11:18 ` [PATCH v3 for-next 1/5] RDMA/hns: Rename wqe buffer related functions Weihang Li
  2020-03-10 11:18 ` [PATCH v3 for-next 2/5] RDMA/hns: Optimize wqe buffer filling process for post send Weihang Li
@ 2020-03-10 11:18 ` Weihang Li
  2020-03-10 11:18 ` [PATCH v3 for-next 4/5] RDMA/hns: Optimize base address table config flow for qp buffer Weihang Li
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Weihang Li @ 2020-03-10 11:18 UTC (permalink / raw)
  To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm

From: Xi Wang <wangxi11@huawei.com>

Simplify the wr opcode conversion from ib to hns by using a map table
instead of the switch-case statement.

Signed-off-by: Xi Wang <wangxi11@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 63 +++++++++++++++++-------------
 1 file changed, 36 insertions(+), 27 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index c8c345f..c813c74 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -56,6 +56,40 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
 	dseg->len  = cpu_to_le32(sg->length);
 }
 
+/*
+ * mapped-value = 1 + real-value
+ * The hns wr opcode real value is start from 0, In order to distinguish between
+ * initialized and uninitialized map values, we plus 1 to the actual value when
+ * defining the mapping, so that the validity can be identified by checking the
+ * mapped value is greater than 0.
+ */
+#define HR_OPC_MAP(ib_key, hr_key) \
+		[IB_WR_ ## ib_key] = 1 + HNS_ROCE_V2_WQE_OP_ ## hr_key
+
+static const u32 hns_roce_op_code[] = {
+	HR_OPC_MAP(RDMA_WRITE,			RDMA_WRITE),
+	HR_OPC_MAP(RDMA_WRITE_WITH_IMM,		RDMA_WRITE_WITH_IMM),
+	HR_OPC_MAP(SEND,			SEND),
+	HR_OPC_MAP(SEND_WITH_IMM,		SEND_WITH_IMM),
+	HR_OPC_MAP(RDMA_READ,			RDMA_READ),
+	HR_OPC_MAP(ATOMIC_CMP_AND_SWP,		ATOM_CMP_AND_SWAP),
+	HR_OPC_MAP(ATOMIC_FETCH_AND_ADD,	ATOM_FETCH_AND_ADD),
+	HR_OPC_MAP(SEND_WITH_INV,		SEND_WITH_INV),
+	HR_OPC_MAP(LOCAL_INV,			LOCAL_INV),
+	HR_OPC_MAP(MASKED_ATOMIC_CMP_AND_SWP,	ATOM_MSK_CMP_AND_SWAP),
+	HR_OPC_MAP(MASKED_ATOMIC_FETCH_AND_ADD,	ATOM_MSK_FETCH_AND_ADD),
+	HR_OPC_MAP(REG_MR,			FAST_REG_PMR),
+};
+
+static u32 to_hr_opcode(u32 ib_opcode)
+{
+	if (ib_opcode >= ARRAY_SIZE(hns_roce_op_code))
+		return HNS_ROCE_V2_WQE_OP_MASK;
+
+	return hns_roce_op_code[ib_opcode] ? hns_roce_op_code[ib_opcode] - 1 :
+					     HNS_ROCE_V2_WQE_OP_MASK;
+}
+
 static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
 			 void *wqe, const struct ib_reg_wr *wr)
 {
@@ -303,7 +337,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 	void *wqe = NULL;
 	bool loopback;
 	u32 tmp_len;
-	u32 hr_op;
 	u8 *smac;
 	int nreq;
 	int ret;
@@ -517,76 +550,52 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 			wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
 			switch (wr->opcode) {
 			case IB_WR_RDMA_READ:
-				hr_op = HNS_ROCE_V2_WQE_OP_RDMA_READ;
 				rc_sq_wqe->rkey =
 					cpu_to_le32(rdma_wr(wr)->rkey);
 				rc_sq_wqe->va =
 					cpu_to_le64(rdma_wr(wr)->remote_addr);
 				break;
 			case IB_WR_RDMA_WRITE:
-				hr_op = HNS_ROCE_V2_WQE_OP_RDMA_WRITE;
 				rc_sq_wqe->rkey =
 					cpu_to_le32(rdma_wr(wr)->rkey);
 				rc_sq_wqe->va =
 					cpu_to_le64(rdma_wr(wr)->remote_addr);
 				break;
 			case IB_WR_RDMA_WRITE_WITH_IMM:
-				hr_op = HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM;
 				rc_sq_wqe->rkey =
 					cpu_to_le32(rdma_wr(wr)->rkey);
 				rc_sq_wqe->va =
 					cpu_to_le64(rdma_wr(wr)->remote_addr);
 				break;
-			case IB_WR_SEND:
-				hr_op = HNS_ROCE_V2_WQE_OP_SEND;
-				break;
-			case IB_WR_SEND_WITH_INV:
-				hr_op = HNS_ROCE_V2_WQE_OP_SEND_WITH_INV;
-				break;
-			case IB_WR_SEND_WITH_IMM:
-				hr_op = HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM;
-				break;
 			case IB_WR_LOCAL_INV:
-				hr_op = HNS_ROCE_V2_WQE_OP_LOCAL_INV;
 				roce_set_bit(rc_sq_wqe->byte_4,
 					       V2_RC_SEND_WQE_BYTE_4_SO_S, 1);
 				rc_sq_wqe->inv_key =
 					    cpu_to_le32(wr->ex.invalidate_rkey);
 				break;
 			case IB_WR_REG_MR:
-				hr_op = HNS_ROCE_V2_WQE_OP_FAST_REG_PMR;
 				set_frmr_seg(rc_sq_wqe, wqe, reg_wr(wr));
 				break;
 			case IB_WR_ATOMIC_CMP_AND_SWP:
-				hr_op = HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP;
 				rc_sq_wqe->rkey =
 					cpu_to_le32(atomic_wr(wr)->rkey);
 				rc_sq_wqe->va =
 					cpu_to_le64(atomic_wr(wr)->remote_addr);
 				break;
 			case IB_WR_ATOMIC_FETCH_AND_ADD:
-				hr_op = HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD;
 				rc_sq_wqe->rkey =
 					cpu_to_le32(atomic_wr(wr)->rkey);
 				rc_sq_wqe->va =
 					cpu_to_le64(atomic_wr(wr)->remote_addr);
 				break;
-			case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
-				hr_op =
-				       HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP;
-				break;
-			case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
-				hr_op =
-				      HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD;
-				break;
 			default:
-				hr_op = HNS_ROCE_V2_WQE_OP_MASK;
 				break;
 			}
 
 			roce_set_field(rc_sq_wqe->byte_4,
 				       V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
-				       V2_RC_SEND_WQE_BYTE_4_OPCODE_S, hr_op);
+				       V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
+				       to_hr_opcode(wr->opcode));
 
 			if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
 			    wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
-- 
2.8.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v3 for-next 4/5] RDMA/hns: Optimize base address table config flow for qp buffer
  2020-03-10 11:17 [PATCH v3 for-next 0/5] RDMA/hns: Refactor wqe related codes Weihang Li
                   ` (2 preceding siblings ...)
  2020-03-10 11:18 ` [PATCH v3 for-next 3/5] RDMA/hns: Optimize the wr opcode conversion from ib to hns Weihang Li
@ 2020-03-10 11:18 ` Weihang Li
  2020-03-10 11:18 ` [PATCH v3 for-next 5/5] RDMA/hns: Optimize wqe buffer set flow for post send Weihang Li
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Weihang Li @ 2020-03-10 11:18 UTC (permalink / raw)
  To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm

From: Xi Wang <wangxi11@huawei.com>

Currently, before the qp is created, a page size needs to be calculated for
the base address table to store all base addresses in the mtr. As a result,
the parameter configuration of the mtr is complex. So integrate the process
of calculating the base table page size into the hem related interface to
simplify the process of using mtr.

Signed-off-by: Xi Wang <wangxi11@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/hns/hns_roce_device.h |  4 ---
 drivers/infiniband/hw/hns/hns_roce_hem.c    | 16 +++++++----
 drivers/infiniband/hw/hns/hns_roce_qp.c     | 42 +++++++----------------------
 3 files changed, 21 insertions(+), 41 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index b6ae12d..f6b3cf6 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -669,10 +669,6 @@ struct hns_roce_qp {
 	struct ib_umem		*umem;
 	struct hns_roce_mtt	mtt;
 	struct hns_roce_mtr	mtr;
-
-	/* this define must less than HNS_ROCE_MAX_BT_REGION */
-#define HNS_ROCE_WQE_REGION_MAX	 3
-	struct hns_roce_buf_region regions[HNS_ROCE_WQE_REGION_MAX];
 	int                     wqe_bt_pg_shift;
 
 	u32			buff_size;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index e822157..8380d71 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -1383,6 +1383,7 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev,
 	void *cpu_base;
 	u64 phy_base;
 	int ret = 0;
+	int ba_num;
 	int offset;
 	int total;
 	int step;
@@ -1393,12 +1394,16 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev,
 	if (root_hem)
 		return 0;
 
+	ba_num = hns_roce_hem_list_calc_root_ba(regions, region_cnt, unit);
+	if (ba_num < 1)
+		return -ENOMEM;
+
 	INIT_LIST_HEAD(&temp_root);
-	total = r->offset;
+	offset = r->offset;
 	/* indicate to last region */
 	r = &regions[region_cnt - 1];
-	root_hem = hem_list_alloc_item(hr_dev, total, r->offset + r->count - 1,
-				       unit, true, 0);
+	root_hem = hem_list_alloc_item(hr_dev, offset, r->offset + r->count - 1,
+				       ba_num, true, 0);
 	if (!root_hem)
 		return -ENOMEM;
 	list_add(&root_hem->list, &temp_root);
@@ -1410,7 +1415,7 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev,
 		INIT_LIST_HEAD(&temp_list[i]);
 
 	total = 0;
-	for (i = 0; i < region_cnt && total < unit; i++) {
+	for (i = 0; i < region_cnt && total < ba_num; i++) {
 		r = &regions[i];
 		if (!r->count)
 			continue;
@@ -1443,7 +1448,8 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev,
 			/* if exist mid bt, link L1 to L0 */
 			list_for_each_entry_safe(hem, temp_hem,
 					  &hem_list->mid_bt[i][1], list) {
-				offset = hem->start / step * BA_BYTE_LEN;
+				offset = (hem->start - r->offset) / step *
+					  BA_BYTE_LEN;
 				hem_list_link_bt(hr_dev, cpu_base + offset,
 						 hem->dma_addr);
 				total++;
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index c2ea489..7ba3db5 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -579,30 +579,6 @@ static int split_wqe_buf_region(struct hns_roce_dev *hr_dev,
 	return region_cnt;
 }
 
-static int calc_wqe_bt_page_shift(struct hns_roce_dev *hr_dev,
-				  struct hns_roce_buf_region *regions,
-				  int region_cnt)
-{
-	int bt_pg_shift;
-	int ba_num;
-	int ret;
-
-	bt_pg_shift = PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz;
-
-	/* all root ba entries must in one bt page */
-	do {
-		ba_num = (1 << bt_pg_shift) / BA_BYTE_LEN;
-		ret = hns_roce_hem_list_calc_root_ba(regions, region_cnt,
-						     ba_num);
-		if (ret <= ba_num)
-			break;
-
-		bt_pg_shift++;
-	} while (ret > ba_num);
-
-	return bt_pg_shift - PAGE_SHIFT;
-}
-
 static int set_extend_sge_param(struct hns_roce_dev *hr_dev,
 				struct hns_roce_qp *hr_qp)
 {
@@ -768,7 +744,10 @@ static void free_rq_inline_buf(struct hns_roce_qp *hr_qp)
 static int map_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
 		       u32 page_shift, bool is_user)
 {
-	dma_addr_t *buf_list[ARRAY_SIZE(hr_qp->regions)] = { NULL };
+/* WQE buffer include 3 parts: SQ, extend SGE and RQ. */
+#define HNS_ROCE_WQE_REGION_MAX	 3
+	struct hns_roce_buf_region regions[HNS_ROCE_WQE_REGION_MAX] = {};
+	dma_addr_t *buf_list[HNS_ROCE_WQE_REGION_MAX] = {};
 	struct ib_device *ibdev = &hr_dev->ib_dev;
 	struct hns_roce_buf_region *r;
 	int region_count;
@@ -776,18 +755,18 @@ static int map_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
 	int ret;
 	int i;
 
-	region_count = split_wqe_buf_region(hr_dev, hr_qp, hr_qp->regions,
-					ARRAY_SIZE(hr_qp->regions), page_shift);
+	region_count = split_wqe_buf_region(hr_dev, hr_qp, regions,
+					    ARRAY_SIZE(regions), page_shift);
 
 	/* alloc a tmp list to store WQE buffers address */
-	ret = hns_roce_alloc_buf_list(hr_qp->regions, buf_list, region_count);
+	ret = hns_roce_alloc_buf_list(regions, buf_list, region_count);
 	if (ret) {
 		ibdev_err(ibdev, "Failed to alloc WQE buffer list\n");
 		return ret;
 	}
 
 	for (i = 0; i < region_count; i++) {
-		r = &hr_qp->regions[i];
+		r = &regions[i];
 		if (is_user)
 			buf_count = hns_roce_get_umem_bufs(hr_dev, buf_list[i],
 					r->count, r->offset, hr_qp->umem,
@@ -805,11 +784,10 @@ static int map_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
 		}
 	}
 
-	hr_qp->wqe_bt_pg_shift = calc_wqe_bt_page_shift(hr_dev, hr_qp->regions,
-							region_count);
+	hr_qp->wqe_bt_pg_shift = hr_dev->caps.mtt_ba_pg_sz;
 	hns_roce_mtr_init(&hr_qp->mtr, PAGE_SHIFT + hr_qp->wqe_bt_pg_shift,
 			  page_shift);
-	ret = hns_roce_mtr_attach(hr_dev, &hr_qp->mtr, buf_list, hr_qp->regions,
+	ret = hns_roce_mtr_attach(hr_dev, &hr_qp->mtr, buf_list, regions,
 				  region_count);
 	if (ret)
 		ibdev_err(ibdev, "Failed to attach WQE's mtr\n");
-- 
2.8.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v3 for-next 5/5] RDMA/hns: Optimize wqe buffer set flow for post send
  2020-03-10 11:17 [PATCH v3 for-next 0/5] RDMA/hns: Refactor wqe related codes Weihang Li
                   ` (3 preceding siblings ...)
  2020-03-10 11:18 ` [PATCH v3 for-next 4/5] RDMA/hns: Optimize base address table config flow for qp buffer Weihang Li
@ 2020-03-10 11:18 ` Weihang Li
  2020-03-10 12:53 ` [PATCH v3 for-next 0/5] RDMA/hns: Refactor wqe related codes Weihang Li
  2020-03-18 13:27 ` Jason Gunthorpe
  6 siblings, 0 replies; 10+ messages in thread
From: Weihang Li @ 2020-03-10 11:18 UTC (permalink / raw)
  To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm

From: Xi Wang <wangxi11@huawei.com>

Splits hns_roce_v2_post_send() into three sub-functions: set_rc_wqe(),
set_ud_wqe() and update_sq_db() to simplify the code.

Signed-off-by: Xi Wang <wangxi11@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 472 ++++++++++++++---------------
 1 file changed, 224 insertions(+), 248 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index c813c74..9bd8fbf 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -287,6 +287,214 @@ static int check_send_valid(struct hns_roce_dev *hr_dev,
 	return 0;
 }
 
+static inline int calc_wr_sge_num(const struct ib_send_wr *wr, u32 *sge_len)
+{
+	int valid_num = 0;
+	u32 len = 0;
+	int i;
+
+	for (i = 0; i < wr->num_sge; i++) {
+		if (likely(wr->sg_list[i].length)) {
+			len += wr->sg_list[i].length;
+			valid_num++;
+		}
+	}
+
+	*sge_len = len;
+	return valid_num;
+}
+
+static inline int set_ud_wqe(struct hns_roce_qp *qp,
+			     const struct ib_send_wr *wr,
+			     void *wqe, unsigned int *sge_idx,
+			     unsigned int owner_bit)
+{
+	struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device);
+	struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah);
+	struct hns_roce_v2_ud_send_wqe *ud_sq_wqe = wqe;
+	unsigned int curr_idx = *sge_idx;
+	int valid_num_sge;
+	u32 msg_len = 0;
+	bool loopback;
+	u8 *smac;
+
+	valid_num_sge = calc_wr_sge_num(wr, &msg_len);
+	memset(ud_sq_wqe, 0, sizeof(*ud_sq_wqe));
+
+	roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_0_M,
+		       V2_UD_SEND_WQE_DMAC_0_S, ah->av.mac[0]);
+	roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_1_M,
+		       V2_UD_SEND_WQE_DMAC_1_S, ah->av.mac[1]);
+	roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_2_M,
+		       V2_UD_SEND_WQE_DMAC_2_S, ah->av.mac[2]);
+	roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_3_M,
+		       V2_UD_SEND_WQE_DMAC_3_S, ah->av.mac[3]);
+	roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_DMAC_4_M,
+		       V2_UD_SEND_WQE_BYTE_48_DMAC_4_S, ah->av.mac[4]);
+	roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_DMAC_5_M,
+		       V2_UD_SEND_WQE_BYTE_48_DMAC_5_S, ah->av.mac[5]);
+
+	/* MAC loopback */
+	smac = (u8 *)hr_dev->dev_addr[qp->port];
+	loopback = ether_addr_equal_unaligned(ah->av.mac, smac) ? 1 : 0;
+
+	roce_set_bit(ud_sq_wqe->byte_40,
+		     V2_UD_SEND_WQE_BYTE_40_LBI_S, loopback);
+
+	roce_set_field(ud_sq_wqe->byte_4,
+		       V2_UD_SEND_WQE_BYTE_4_OPCODE_M,
+		       V2_UD_SEND_WQE_BYTE_4_OPCODE_S,
+		       HNS_ROCE_V2_WQE_OP_SEND);
+
+	ud_sq_wqe->msg_len = cpu_to_le32(msg_len);
+
+	switch (wr->opcode) {
+	case IB_WR_SEND_WITH_IMM:
+	case IB_WR_RDMA_WRITE_WITH_IMM:
+		ud_sq_wqe->immtdata = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
+		break;
+	default:
+		ud_sq_wqe->immtdata = 0;
+		break;
+	}
+
+	/* Set sig attr */
+	roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_CQE_S,
+		     (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
+
+	/* Set se attr */
+	roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_SE_S,
+		     (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0);
+
+	roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OWNER_S,
+		     owner_bit);
+
+	roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_PD_M,
+		       V2_UD_SEND_WQE_BYTE_16_PD_S, to_hr_pd(qp->ibqp.pd)->pdn);
+
+	roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M,
+		       V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
+
+	roce_set_field(ud_sq_wqe->byte_20,
+		       V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
+		       V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
+		       curr_idx & (qp->sge.sge_cnt - 1));
+
+	roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M,
+		       V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, 0);
+	ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ?
+			  qp->qkey : ud_wr(wr)->remote_qkey);
+	roce_set_field(ud_sq_wqe->byte_32, V2_UD_SEND_WQE_BYTE_32_DQPN_M,
+		       V2_UD_SEND_WQE_BYTE_32_DQPN_S, ud_wr(wr)->remote_qpn);
+
+	roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M,
+		       V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id);
+	roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M,
+		       V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit);
+	roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M,
+		       V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass);
+	roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M,
+		       V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel);
+	roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M,
+		       V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl);
+	roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_PORTN_M,
+		       V2_UD_SEND_WQE_BYTE_40_PORTN_S, qp->port);
+
+	roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S,
+		     ah->av.vlan_en ? 1 : 0);
+	roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M,
+		       V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S, ah->av.gid_index);
+
+	memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN_V2);
+
+	set_extend_sge(qp, wr, &curr_idx, valid_num_sge);
+
+	*sge_idx = curr_idx;
+
+	return 0;
+}
+
+static inline int set_rc_wqe(struct hns_roce_qp *qp,
+			     const struct ib_send_wr *wr,
+			     void *wqe, unsigned int *sge_idx,
+			     unsigned int owner_bit)
+{
+	struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe;
+	unsigned int curr_idx = *sge_idx;
+	int valid_num_sge;
+	u32 msg_len = 0;
+	int ret = 0;
+
+	valid_num_sge = calc_wr_sge_num(wr, &msg_len);
+	memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe));
+
+	rc_sq_wqe->msg_len = cpu_to_le32(msg_len);
+
+	switch (wr->opcode) {
+	case IB_WR_SEND_WITH_IMM:
+	case IB_WR_RDMA_WRITE_WITH_IMM:
+		rc_sq_wqe->immtdata = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
+		break;
+	case IB_WR_SEND_WITH_INV:
+		rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
+		break;
+	default:
+		rc_sq_wqe->immtdata = 0;
+		break;
+	}
+
+	roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FENCE_S,
+		     (wr->send_flags & IB_SEND_FENCE) ? 1 : 0);
+
+	roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SE_S,
+		     (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0);
+
+	roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_CQE_S,
+		     (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
+
+	roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S,
+		     owner_bit);
+
+	wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
+	switch (wr->opcode) {
+	case IB_WR_RDMA_READ:
+	case IB_WR_RDMA_WRITE:
+	case IB_WR_RDMA_WRITE_WITH_IMM:
+		rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey);
+		rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr);
+		break;
+	case IB_WR_LOCAL_INV:
+		roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1);
+		rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
+		break;
+	case IB_WR_REG_MR:
+		set_frmr_seg(rc_sq_wqe, wqe, reg_wr(wr));
+		break;
+	case IB_WR_ATOMIC_CMP_AND_SWP:
+	case IB_WR_ATOMIC_FETCH_AND_ADD:
+		rc_sq_wqe->rkey = cpu_to_le32(atomic_wr(wr)->rkey);
+		rc_sq_wqe->va = cpu_to_le64(atomic_wr(wr)->remote_addr);
+		break;
+	default:
+		break;
+	}
+
+	roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
+		       V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
+		       to_hr_opcode(wr->opcode));
+
+	if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+	    wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+		set_atomic_seg(wr, wqe, rc_sq_wqe, valid_num_sge);
+	else if (wr->opcode != IB_WR_REG_MR)
+		ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe,
+					wqe, &curr_idx, valid_num_sge);
+
+	*sge_idx = curr_idx;
+
+	return ret;
+}
+
 static inline void update_sq_db(struct hns_roce_dev *hr_dev,
 				struct hns_roce_qp *qp)
 {
@@ -324,23 +532,15 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 				 const struct ib_send_wr **bad_wr)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
-	struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah);
-	struct hns_roce_v2_ud_send_wqe *ud_sq_wqe;
-	struct hns_roce_v2_rc_send_wqe *rc_sq_wqe;
+	struct ib_device *ibdev = &hr_dev->ib_dev;
 	struct hns_roce_qp *qp = to_hr_qp(ibqp);
-	struct device *dev = hr_dev->dev;
+	unsigned long flags = 0;
 	unsigned int owner_bit;
 	unsigned int sge_idx;
 	unsigned int wqe_idx;
-	unsigned long flags;
-	int valid_num_sge;
 	void *wqe = NULL;
-	bool loopback;
-	u32 tmp_len;
-	u8 *smac;
 	int nreq;
 	int ret;
-	int i;
 
 	spin_lock_irqsave(&qp->sq.lock, flags);
 
@@ -363,8 +563,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 		wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1);
 
 		if (unlikely(wr->num_sge > qp->sq.max_gs)) {
-			dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n",
-				wr->num_sge, qp->sq.max_gs);
+			ibdev_err(ibdev, "num_sge=%d > qp->sq.max_gs=%d\n",
+				  wr->num_sge, qp->sq.max_gs);
 			ret = -EINVAL;
 			*bad_wr = wr;
 			goto out;
@@ -374,248 +574,24 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 		qp->sq.wrid[wqe_idx] = wr->wr_id;
 		owner_bit =
 		       ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1);
-		valid_num_sge = 0;
-		tmp_len = 0;
-
-		for (i = 0; i < wr->num_sge; i++) {
-			if (likely(wr->sg_list[i].length)) {
-				tmp_len += wr->sg_list[i].length;
-				valid_num_sge++;
-			}
-		}
 
 		/* Corresponding to the QP type, wqe process separately */
-		if (ibqp->qp_type == IB_QPT_GSI) {
-			ud_sq_wqe = wqe;
-			memset(ud_sq_wqe, 0, sizeof(*ud_sq_wqe));
-
-			roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_0_M,
-				       V2_UD_SEND_WQE_DMAC_0_S, ah->av.mac[0]);
-			roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_1_M,
-				       V2_UD_SEND_WQE_DMAC_1_S, ah->av.mac[1]);
-			roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_2_M,
-				       V2_UD_SEND_WQE_DMAC_2_S, ah->av.mac[2]);
-			roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_3_M,
-				       V2_UD_SEND_WQE_DMAC_3_S, ah->av.mac[3]);
-			roce_set_field(ud_sq_wqe->byte_48,
-				       V2_UD_SEND_WQE_BYTE_48_DMAC_4_M,
-				       V2_UD_SEND_WQE_BYTE_48_DMAC_4_S,
-				       ah->av.mac[4]);
-			roce_set_field(ud_sq_wqe->byte_48,
-				       V2_UD_SEND_WQE_BYTE_48_DMAC_5_M,
-				       V2_UD_SEND_WQE_BYTE_48_DMAC_5_S,
-				       ah->av.mac[5]);
-
-			/* MAC loopback */
-			smac = (u8 *)hr_dev->dev_addr[qp->port];
-			loopback = ether_addr_equal_unaligned(ah->av.mac,
-							      smac) ? 1 : 0;
-
-			roce_set_bit(ud_sq_wqe->byte_40,
-				     V2_UD_SEND_WQE_BYTE_40_LBI_S, loopback);
-
-			roce_set_field(ud_sq_wqe->byte_4,
-				       V2_UD_SEND_WQE_BYTE_4_OPCODE_M,
-				       V2_UD_SEND_WQE_BYTE_4_OPCODE_S,
-				       HNS_ROCE_V2_WQE_OP_SEND);
-
-			ud_sq_wqe->msg_len =
-			 cpu_to_le32(le32_to_cpu(ud_sq_wqe->msg_len) + tmp_len);
-
-			switch (wr->opcode) {
-			case IB_WR_SEND_WITH_IMM:
-			case IB_WR_RDMA_WRITE_WITH_IMM:
-				ud_sq_wqe->immtdata =
-				      cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
-				break;
-			default:
-				ud_sq_wqe->immtdata = 0;
-				break;
-			}
-
-			/* Set sig attr */
-			roce_set_bit(ud_sq_wqe->byte_4,
-				   V2_UD_SEND_WQE_BYTE_4_CQE_S,
-				   (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
-
-			/* Set se attr */
-			roce_set_bit(ud_sq_wqe->byte_4,
-				  V2_UD_SEND_WQE_BYTE_4_SE_S,
-				  (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0);
-
-			roce_set_bit(ud_sq_wqe->byte_4,
-				     V2_UD_SEND_WQE_BYTE_4_OWNER_S, owner_bit);
-
-			roce_set_field(ud_sq_wqe->byte_16,
-				       V2_UD_SEND_WQE_BYTE_16_PD_M,
-				       V2_UD_SEND_WQE_BYTE_16_PD_S,
-				       to_hr_pd(ibqp->pd)->pdn);
-
-			roce_set_field(ud_sq_wqe->byte_16,
-				       V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M,
-				       V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S,
-				       valid_num_sge);
-
-			roce_set_field(ud_sq_wqe->byte_20,
-				     V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
-				     V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
-				     sge_idx & (qp->sge.sge_cnt - 1));
-
-			roce_set_field(ud_sq_wqe->byte_24,
-				       V2_UD_SEND_WQE_BYTE_24_UDPSPN_M,
-				       V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, 0);
-			ud_sq_wqe->qkey =
-			     cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ?
-			     qp->qkey : ud_wr(wr)->remote_qkey);
-			roce_set_field(ud_sq_wqe->byte_32,
-				       V2_UD_SEND_WQE_BYTE_32_DQPN_M,
-				       V2_UD_SEND_WQE_BYTE_32_DQPN_S,
-				       ud_wr(wr)->remote_qpn);
-
-			roce_set_field(ud_sq_wqe->byte_36,
-				       V2_UD_SEND_WQE_BYTE_36_VLAN_M,
-				       V2_UD_SEND_WQE_BYTE_36_VLAN_S,
-				       ah->av.vlan_id);
-			roce_set_field(ud_sq_wqe->byte_36,
-				       V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M,
-				       V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S,
-				       ah->av.hop_limit);
-			roce_set_field(ud_sq_wqe->byte_36,
-				       V2_UD_SEND_WQE_BYTE_36_TCLASS_M,
-				       V2_UD_SEND_WQE_BYTE_36_TCLASS_S,
-				       ah->av.tclass);
-			roce_set_field(ud_sq_wqe->byte_40,
-				       V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M,
-				       V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S,
-				       ah->av.flowlabel);
-			roce_set_field(ud_sq_wqe->byte_40,
-				       V2_UD_SEND_WQE_BYTE_40_SL_M,
-				       V2_UD_SEND_WQE_BYTE_40_SL_S,
-				       ah->av.sl);
-			roce_set_field(ud_sq_wqe->byte_40,
-				       V2_UD_SEND_WQE_BYTE_40_PORTN_M,
-				       V2_UD_SEND_WQE_BYTE_40_PORTN_S,
-				       qp->port);
-
-			roce_set_bit(ud_sq_wqe->byte_40,
-				     V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S,
-				     ah->av.vlan_en ? 1 : 0);
-			roce_set_field(ud_sq_wqe->byte_48,
-				       V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M,
-				       V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S,
-				       hns_get_gid_index(hr_dev, qp->phy_port,
-							 ah->av.gid_index));
-
-			memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0],
-			       GID_LEN_V2);
-
-			set_extend_sge(qp, wr, &sge_idx, valid_num_sge);
-		} else if (ibqp->qp_type == IB_QPT_RC) {
-			rc_sq_wqe = wqe;
-			memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe));
-
-			rc_sq_wqe->msg_len =
-			 cpu_to_le32(le32_to_cpu(rc_sq_wqe->msg_len) + tmp_len);
-
-			switch (wr->opcode) {
-			case IB_WR_SEND_WITH_IMM:
-			case IB_WR_RDMA_WRITE_WITH_IMM:
-				rc_sq_wqe->immtdata =
-				      cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
-				break;
-			case IB_WR_SEND_WITH_INV:
-				rc_sq_wqe->inv_key =
-					cpu_to_le32(wr->ex.invalidate_rkey);
-				break;
-			default:
-				rc_sq_wqe->immtdata = 0;
-				break;
-			}
-
-			roce_set_bit(rc_sq_wqe->byte_4,
-				     V2_RC_SEND_WQE_BYTE_4_FENCE_S,
-				     (wr->send_flags & IB_SEND_FENCE) ? 1 : 0);
-
-			roce_set_bit(rc_sq_wqe->byte_4,
-				  V2_RC_SEND_WQE_BYTE_4_SE_S,
-				  (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0);
-
-			roce_set_bit(rc_sq_wqe->byte_4,
-				   V2_RC_SEND_WQE_BYTE_4_CQE_S,
-				   (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
-
-			roce_set_bit(rc_sq_wqe->byte_4,
-				     V2_RC_SEND_WQE_BYTE_4_OWNER_S, owner_bit);
-
-			wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
-			switch (wr->opcode) {
-			case IB_WR_RDMA_READ:
-				rc_sq_wqe->rkey =
-					cpu_to_le32(rdma_wr(wr)->rkey);
-				rc_sq_wqe->va =
-					cpu_to_le64(rdma_wr(wr)->remote_addr);
-				break;
-			case IB_WR_RDMA_WRITE:
-				rc_sq_wqe->rkey =
-					cpu_to_le32(rdma_wr(wr)->rkey);
-				rc_sq_wqe->va =
-					cpu_to_le64(rdma_wr(wr)->remote_addr);
-				break;
-			case IB_WR_RDMA_WRITE_WITH_IMM:
-				rc_sq_wqe->rkey =
-					cpu_to_le32(rdma_wr(wr)->rkey);
-				rc_sq_wqe->va =
-					cpu_to_le64(rdma_wr(wr)->remote_addr);
-				break;
-			case IB_WR_LOCAL_INV:
-				roce_set_bit(rc_sq_wqe->byte_4,
-					       V2_RC_SEND_WQE_BYTE_4_SO_S, 1);
-				rc_sq_wqe->inv_key =
-					    cpu_to_le32(wr->ex.invalidate_rkey);
-				break;
-			case IB_WR_REG_MR:
-				set_frmr_seg(rc_sq_wqe, wqe, reg_wr(wr));
-				break;
-			case IB_WR_ATOMIC_CMP_AND_SWP:
-				rc_sq_wqe->rkey =
-					cpu_to_le32(atomic_wr(wr)->rkey);
-				rc_sq_wqe->va =
-					cpu_to_le64(atomic_wr(wr)->remote_addr);
-				break;
-			case IB_WR_ATOMIC_FETCH_AND_ADD:
-				rc_sq_wqe->rkey =
-					cpu_to_le32(atomic_wr(wr)->rkey);
-				rc_sq_wqe->va =
-					cpu_to_le64(atomic_wr(wr)->remote_addr);
-				break;
-			default:
-				break;
-			}
-
-			roce_set_field(rc_sq_wqe->byte_4,
-				       V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
-				       V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
-				       to_hr_opcode(wr->opcode));
-
-			if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-			    wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
-				set_atomic_seg(wr, wqe, rc_sq_wqe,
-					       valid_num_sge);
-			else if (wr->opcode != IB_WR_REG_MR) {
-				ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe,
-							wqe, &sge_idx,
-							valid_num_sge);
-				if (ret) {
-					*bad_wr = wr;
-					goto out;
-				}
-			}
-		} else {
-			dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type);
+		if (ibqp->qp_type == IB_QPT_GSI)
+			ret = set_ud_wqe(qp, wr, wqe, &sge_idx, owner_bit);
+		else if (ibqp->qp_type == IB_QPT_RC)
+			ret = set_rc_wqe(qp, wr, wqe, &sge_idx, owner_bit);
+		else {
+			ibdev_err(ibdev, "Illegal qp_type(0x%x)\n",
+				  ibqp->qp_type);
 			spin_unlock_irqrestore(&qp->sq.lock, flags);
 			*bad_wr = wr;
 			return -EOPNOTSUPP;
 		}
+
+		if (ret) {
+			*bad_wr = wr;
+			goto out;
+		}
 	}
 
 out:
-- 
2.8.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v3 for-next 0/5] RDMA/hns: Refactor wqe related codes
  2020-03-10 11:17 [PATCH v3 for-next 0/5] RDMA/hns: Refactor wqe related codes Weihang Li
                   ` (4 preceding siblings ...)
  2020-03-10 11:18 ` [PATCH v3 for-next 5/5] RDMA/hns: Optimize wqe buffer set flow for post send Weihang Li
@ 2020-03-10 12:53 ` Weihang Li
  2020-03-10 14:42   ` Jason Gunthorpe
  2020-03-18 13:27 ` Jason Gunthorpe
  6 siblings, 1 reply; 10+ messages in thread
From: Weihang Li @ 2020-03-10 12:53 UTC (permalink / raw)
  To: dledford, jgg; +Cc: leon, linux-rdma, Linuxarm

Hi Jason,

Sorry, I didn't notice that the first three patches of this series had
been applied to you for-next branch, so I sent this new version. Need
I resend the last two patches in a new series?

Thanks
Weihang

On 2020/3/10 19:22, Weihang Li wrote:
> Process of wqe is hard to understand and maintain in hns, this series
> refactor wqe related code, especially in hns_roce_v2_post_send().
> 
> Previous discussion could be found at:
> - v2: https://patchwork.kernel.org/cover/11422963/
> - v1: https://patchwork.kernel.org/cover/11415395/
> 
> Changes since v2:
> - Remove a redundant assignment of "NULL" in patch 4/5.
> 
> Changes since v1:
> - Fix comments from Leon about the inplementation of to_hr_opcode() in
>   patch 3/5.
> - Patch 4/5 couldn't be applied. Just do a rebase.
> 
> Xi Wang (5):
>   RDMA/hns: Rename wqe buffer related functions
>   RDMA/hns: Optimize wqe buffer filling process for post send
>   RDMA/hns: Optimize the wr opcode conversion from ib to hns
>   RDMA/hns: Optimize base address table config flow for qp buffer
>   RDMA/hns: Optimize wqe buffer set flow for post send
> 
>  drivers/infiniband/hw/hns/hns_roce_device.h |  10 +-
>  drivers/infiniband/hw/hns/hns_roce_hem.c    |  16 +-
>  drivers/infiniband/hw/hns/hns_roce_hw_v1.c  |   9 +-
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 588 ++++++++++++++--------------
>  drivers/infiniband/hw/hns/hns_roce_qp.c     |  48 +--
>  5 files changed, 319 insertions(+), 352 deletions(-)
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3 for-next 0/5] RDMA/hns: Refactor wqe related codes
  2020-03-10 12:53 ` [PATCH v3 for-next 0/5] RDMA/hns: Refactor wqe related codes Weihang Li
@ 2020-03-10 14:42   ` Jason Gunthorpe
  2020-03-11  1:14     ` liweihang
  0 siblings, 1 reply; 10+ messages in thread
From: Jason Gunthorpe @ 2020-03-10 14:42 UTC (permalink / raw)
  To: Weihang Li; +Cc: dledford, leon, linux-rdma, Linuxarm

On Tue, Mar 10, 2020 at 08:53:22PM +0800, Weihang Li wrote:
> Hi Jason,
> 
> Sorry, I didn't notice that the first three patches of this series had
> been applied to you for-next branch, so I sent this new version. Need
> I resend the last two patches in a new series?

Oh, something got out of sorts, I did not intend to push those four
patches, I will drop them an this v3 will supersede and the 'minimum
depth of qp to 0' should be resent with comments addressed.

Jason

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3 for-next 0/5] RDMA/hns: Refactor wqe related codes
  2020-03-10 14:42   ` Jason Gunthorpe
@ 2020-03-11  1:14     ` liweihang
  0 siblings, 0 replies; 10+ messages in thread
From: liweihang @ 2020-03-11  1:14 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: dledford, leon, linux-rdma, Linuxarm

On 2020/3/10 22:42, Jason Gunthorpe wrote:
> On Tue, Mar 10, 2020 at 08:53:22PM +0800, Weihang Li wrote:
>> Hi Jason,
>>
>> Sorry, I didn't notice that the first three patches of this series had
>> been applied to you for-next branch, so I sent this new version. Need
>> I resend the last two patches in a new series?
> 
> Oh, something got out of sorts, I did not intend to push those four
> patches, I will drop them an this v3 will supersede and the 'minimum
> depth of qp to 0' should be resent with comments addressed.
> 
> Jason
> 

OK, thank you :)

Weihang

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3 for-next 0/5] RDMA/hns: Refactor wqe related codes
  2020-03-10 11:17 [PATCH v3 for-next 0/5] RDMA/hns: Refactor wqe related codes Weihang Li
                   ` (5 preceding siblings ...)
  2020-03-10 12:53 ` [PATCH v3 for-next 0/5] RDMA/hns: Refactor wqe related codes Weihang Li
@ 2020-03-18 13:27 ` Jason Gunthorpe
  6 siblings, 0 replies; 10+ messages in thread
From: Jason Gunthorpe @ 2020-03-18 13:27 UTC (permalink / raw)
  To: Weihang Li; +Cc: dledford, leon, linux-rdma, linuxarm

On Tue, Mar 10, 2020 at 07:17:59PM +0800, Weihang Li wrote:
> Process of wqe is hard to understand and maintain in hns, this series
> refactor wqe related code, especially in hns_roce_v2_post_send().
> 
> Previous discussion could be found at:
> - v2: https://patchwork.kernel.org/cover/11422963/
> - v1: https://patchwork.kernel.org/cover/11415395/
> 
> Changes since v2:
> - Remove a redundant assignment of "NULL" in patch 4/5.
> 
> Changes since v1:
> - Fix comments from Leon about the inplementation of to_hr_opcode() in
>   patch 3/5.
> - Patch 4/5 couldn't be applied. Just do a rebase.
> 
> Xi Wang (5):
>   RDMA/hns: Rename wqe buffer related functions
>   RDMA/hns: Optimize wqe buffer filling process for post send
>   RDMA/hns: Optimize the wr opcode conversion from ib to hns
>   RDMA/hns: Optimize base address table config flow for qp buffer
>   RDMA/hns: Optimize wqe buffer set flow for post send

Applied to for-next, thanks

Jason

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-03-18 13:27 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-10 11:17 [PATCH v3 for-next 0/5] RDMA/hns: Refactor wqe related codes Weihang Li
2020-03-10 11:18 ` [PATCH v3 for-next 1/5] RDMA/hns: Rename wqe buffer related functions Weihang Li
2020-03-10 11:18 ` [PATCH v3 for-next 2/5] RDMA/hns: Optimize wqe buffer filling process for post send Weihang Li
2020-03-10 11:18 ` [PATCH v3 for-next 3/5] RDMA/hns: Optimize the wr opcode conversion from ib to hns Weihang Li
2020-03-10 11:18 ` [PATCH v3 for-next 4/5] RDMA/hns: Optimize base address table config flow for qp buffer Weihang Li
2020-03-10 11:18 ` [PATCH v3 for-next 5/5] RDMA/hns: Optimize wqe buffer set flow for post send Weihang Li
2020-03-10 12:53 ` [PATCH v3 for-next 0/5] RDMA/hns: Refactor wqe related codes Weihang Li
2020-03-10 14:42   ` Jason Gunthorpe
2020-03-11  1:14     ` liweihang
2020-03-18 13:27 ` Jason Gunthorpe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.