All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v5 rdma-core 0/4] Support UD on hip08
@ 2019-10-16 12:27 Weihang Li
  2019-10-16 12:27 ` [PATCH v5 rdma-core 1/4] libhns: Add support of handling AH for hip08 Weihang Li
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Weihang Li @ 2019-10-16 12:27 UTC (permalink / raw)
  To: dledford, jgg; +Cc: linux-rdma, linuxarm

Enables hip08 to support Unreliable Datagram.

PR: https://github.com/linux-rdma/rdma-core/pull/587

Changelog:
v4->v5: Fix comments from Leon Romanovsky, including some code
	optimizations and remove printf in library.
v3->v4: Remove codes rely on kernel-headers changes about vlan and mac.
v2->v3: Remove patch "libhns: Support configuring loopback mode by user"
	from this patchset, it needs to be further considered.
v1->v2: Fix comments from Leon Romanovsky and do some fixes in patch(3/4).

Lijun Ou (2):
  libhns: Add support of handling AH for hip08
  libhns: Add UD support for hip08 in user mode

Yixian Liu (2):
  libhns: Simplify the calculation and usage of wqe idx for post verbs
  libhns: Refactor for post send

 providers/hns/hns_roce_u.c       |   2 +
 providers/hns/hns_roce_u.h       |  39 +++
 providers/hns/hns_roce_u_hw_v1.c |  27 +-
 providers/hns/hns_roce_u_hw_v2.c | 568 +++++++++++++++++++++------------------
 providers/hns/hns_roce_u_hw_v2.h |  91 +++++++
 providers/hns/hns_roce_u_verbs.c |  47 ++++
 6 files changed, 489 insertions(+), 285 deletions(-)

-- 
2.8.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v5 rdma-core 1/4] libhns: Add support of handling AH for hip08
  2019-10-16 12:27 [PATCH v5 rdma-core 0/4] Support UD on hip08 Weihang Li
@ 2019-10-16 12:27 ` Weihang Li
  2019-10-16 12:27 ` [PATCH v5 rdma-core 2/4] libhns: Simplify the calculation and usage of wqe idx for post verbs Weihang Li
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Weihang Li @ 2019-10-16 12:27 UTC (permalink / raw)
  To: dledford, jgg; +Cc: linux-rdma, linuxarm

From: Lijun Ou <oulijun@huawei.com>

This patch achieves two verbs create_ah and destroy_ah to support
allocation and destruction of Address Handle.

Signed-off-by: Lijun Ou <oulijun@huawei.com>
Signed-off-by: Weihang Li <liweihang@hisilicon.com>
---
 providers/hns/hns_roce_u.c       |  2 ++
 providers/hns/hns_roce_u.h       | 29 ++++++++++++++++++++++++++++
 providers/hns/hns_roce_u_verbs.c | 41 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 72 insertions(+)

diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index 5872599..8ba41de 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -81,6 +81,8 @@ static const struct verbs_context_ops hns_common_ops = {
 	.modify_srq = hns_roce_u_modify_srq,
 	.query_srq = hns_roce_u_query_srq,
 	.destroy_srq = hns_roce_u_destroy_srq,
+	.create_ah = hns_roce_u_create_ah,
+	.destroy_ah = hns_roce_u_destroy_ah,
 };
 
 static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 23e0f13..31664c1 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -42,6 +42,7 @@
 #include <infiniband/verbs.h>
 #include <ccan/bitmap.h>
 #include <ccan/container_of.h>
+#include <linux/if_ether.h>
 
 #define HNS_ROCE_HW_VER1		('h' << 24 | 'i' << 16 | '0' << 8 | '6')
 
@@ -244,6 +245,25 @@ struct hns_roce_qp {
 	unsigned long			flags;
 };
 
+struct hns_roce_av {
+	uint8_t				port;
+	uint8_t				gid_index;
+	uint8_t				static_rate;
+	uint8_t				hop_limit;
+	uint32_t			flowlabel;
+	uint8_t				sl;
+	uint8_t				tclass;
+	uint8_t				dgid[HNS_ROCE_GID_SIZE];
+	uint8_t				mac[ETH_ALEN];
+	uint16_t			vlan_id;
+	uint8_t				vlan_en;
+};
+
+struct hns_roce_ah {
+	struct ibv_ah			ibv_ah;
+	struct hns_roce_av		av;
+};
+
 struct hns_roce_u_hw {
 	uint32_t hw_version;
 	struct verbs_context_ops hw_ops;
@@ -280,6 +300,11 @@ static inline struct  hns_roce_qp *to_hr_qp(struct ibv_qp *ibv_qp)
 	return container_of(ibv_qp, struct hns_roce_qp, ibv_qp);
 }
 
+static inline struct hns_roce_ah *to_hr_ah(struct ibv_ah *ibv_ah)
+{
+	return container_of(ibv_ah, struct hns_roce_ah, ibv_ah);
+}
+
 int hns_roce_u_query_device(struct ibv_context *context,
 			    struct ibv_device_attr *attr);
 int hns_roce_u_query_port(struct ibv_context *context, uint8_t port,
@@ -319,6 +344,10 @@ struct ibv_qp *hns_roce_u_create_qp(struct ibv_pd *pd,
 int hns_roce_u_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr,
 			int attr_mask, struct ibv_qp_init_attr *init_attr);
 
+struct ibv_ah *hns_roce_u_create_ah(struct ibv_pd *pd,
+				    struct ibv_ah_attr *attr);
+int hns_roce_u_destroy_ah(struct ibv_ah *ah);
+
 int hns_roce_alloc_buf(struct hns_roce_buf *buf, unsigned int size,
 		       int page_size);
 void hns_roce_free_buf(struct hns_roce_buf *buf);
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 9d222c0..ef4b9e0 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -38,6 +38,7 @@
 #include <sys/mman.h>
 #include <ccan/ilog.h>
 #include <ccan/minmax.h>
+#include <ccan/array_size.h>
 #include <util/util.h>
 #include "hns_roce_u.h"
 #include "hns_roce_u_abi.h"
@@ -952,3 +953,43 @@ int hns_roce_u_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr,
 
 	return ret;
 }
+
+struct ibv_ah *hns_roce_u_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
+{
+	struct hns_roce_ah *ah;
+
+	ah = calloc(1, sizeof(*ah));
+	if (!ah)
+		return NULL;
+
+	ah->av.port = attr->port_num;
+	ah->av.sl = attr->sl;
+
+	if (attr->static_rate)
+		ah->av.static_rate = IBV_RATE_10_GBPS;
+
+	if (attr->is_global) {
+		ah->av.gid_index = attr->grh.sgid_index;
+		ah->av.hop_limit = attr->grh.hop_limit;
+		ah->av.tclass = attr->grh.traffic_class;
+		ah->av.flowlabel = attr->grh.flow_label;
+
+		memcpy(ah->av.dgid, attr->grh.dgid.raw,
+		       ARRAY_SIZE(ah->av.dgid));
+	}
+
+	return &ah->ibv_ah;
+}
+
+int hns_roce_u_destroy_ah(struct ibv_ah *ah)
+{
+	int ret;
+
+	ret = ibv_cmd_destroy_ah(ah);
+	if (ret)
+		return ret;
+
+	free(to_hr_ah(ah));
+
+	return 0;
+}
-- 
2.8.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v5 rdma-core 2/4] libhns: Simplify the calculation and usage of wqe idx for post verbs
  2019-10-16 12:27 [PATCH v5 rdma-core 0/4] Support UD on hip08 Weihang Li
  2019-10-16 12:27 ` [PATCH v5 rdma-core 1/4] libhns: Add support of handling AH for hip08 Weihang Li
@ 2019-10-16 12:27 ` Weihang Li
  2019-10-16 12:27 ` [PATCH v5 rdma-core 3/4] libhns: Refactor for post send Weihang Li
  2019-10-16 12:27 ` [PATCH v5 rdma-core 4/4] libhns: Add UD support for hip08 in user mode Weihang Li
  3 siblings, 0 replies; 5+ messages in thread
From: Weihang Li @ 2019-10-16 12:27 UTC (permalink / raw)
  To: dledford, jgg; +Cc: linux-rdma, linuxarm

From: Yixian Liu <liuyixian@huawei.com>

Currently, the wqe idx is calculated repeatly everywhere it is used.
This patch defines wqe_idx and calculated it only once, then just use it
as needed.

Signed-off-by: Yixian Liu <liuyixian@huawei.com>
Signed-off-by: Weihang Li <liweihang@hisilicon.com>
---
 providers/hns/hns_roce_u_hw_v1.c | 27 ++++++++-----------
 providers/hns/hns_roce_u_hw_v2.c | 56 +++++++++++++++++++---------------------
 2 files changed, 37 insertions(+), 46 deletions(-)

diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c
index fceb57a..c7689d6 100644
--- a/providers/hns/hns_roce_u_hw_v1.c
+++ b/providers/hns/hns_roce_u_hw_v1.c
@@ -462,7 +462,6 @@ static int hns_roce_u_v1_arm_cq(struct ibv_cq *ibvcq, int solicited)
 static int hns_roce_u_v1_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
 				   struct ibv_send_wr **bad_wr)
 {
-	unsigned int ind;
 	void *wqe;
 	int nreq;
 	int ps_opcode, i;
@@ -471,12 +470,10 @@ static int hns_roce_u_v1_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
 	struct hns_roce_wqe_data_seg *dseg = NULL;
 	struct hns_roce_qp *qp = to_hr_qp(ibvqp);
 	struct hns_roce_context *ctx = to_hr_ctx(ibvqp->context);
+	unsigned int wqe_idx;
 
 	pthread_spin_lock(&qp->sq.lock);
 
-	/* check that state is OK to post send */
-	ind = qp->sq.head;
-
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
 		if (hns_roce_wq_overflow(&qp->sq, nreq,
 					 to_hr_cq(qp->ibv_qp.send_cq))) {
@@ -484,6 +481,9 @@ static int hns_roce_u_v1_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
 			*bad_wr = wr;
 			goto out;
 		}
+
+		wqe_idx = (qp->sq.head + nreq) % qp->sq.wqe_cnt;
+
 		if (wr->num_sge > qp->sq.max_gs) {
 			ret = -1;
 			*bad_wr = wr;
@@ -492,10 +492,10 @@ static int hns_roce_u_v1_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
 			goto out;
 		}
 
-		ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
+		ctrl = wqe = get_send_wqe(qp, wqe_idx);
 		memset(ctrl, 0, sizeof(struct hns_roce_wqe_ctrl_seg));
 
-		qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id;
+		qp->sq.wrid[wqe_idx] = wr->wr_id;
 		for (i = 0; i < wr->num_sge; i++)
 			ctrl->msg_length = htole32(le32toh(ctrl->msg_length) +
 						   wr->sg_list[i].length);
@@ -578,8 +578,6 @@ static int hns_roce_u_v1_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
 			ctrl->flag |=
 			       htole32(wr->num_sge << HNS_ROCE_WQE_SGE_NUM_BIT);
 		}
-
-		ind++;
 	}
 
 out:
@@ -745,17 +743,14 @@ static int hns_roce_u_v1_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
 {
 	int ret = 0;
 	int nreq;
-	int ind;
 	struct ibv_sge *sg;
 	struct hns_roce_rc_rq_wqe *rq_wqe;
 	struct hns_roce_qp *qp = to_hr_qp(ibvqp);
 	struct hns_roce_context *ctx = to_hr_ctx(ibvqp->context);
+	unsigned int wqe_idx;
 
 	pthread_spin_lock(&qp->rq.lock);
 
-	/* check that state is OK to post receive */
-	ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
-
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
 		if (hns_roce_wq_overflow(&qp->rq, nreq,
 					 to_hr_cq(qp->ibv_qp.recv_cq))) {
@@ -764,13 +759,15 @@ static int hns_roce_u_v1_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
 			goto out;
 		}
 
+		wqe_idx = (qp->rq.head + nreq) % qp->rq.wqe_cnt;
+
 		if (wr->num_sge > qp->rq.max_gs) {
 			ret = -1;
 			*bad_wr = wr;
 			goto out;
 		}
 
-		rq_wqe = get_recv_wqe(qp, ind);
+		rq_wqe = get_recv_wqe(qp, wqe_idx);
 		if (wr->num_sge > HNS_ROCE_RC_RQ_WQE_MAX_SGE_NUM) {
 			ret = -1;
 			*bad_wr = wr;
@@ -811,9 +808,7 @@ static int hns_roce_u_v1_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
 				       HNS_ROCE_RC_RQ_WQE_MAX_SGE_NUM - 2);
 		}
 
-		qp->rq.wrid[ind] = wr->wr_id;
-
-		ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
+		qp->rq.wrid[wqe_idx] = wr->wr_id;
 	}
 
 out:
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 931f59d..4dbbc7e 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -612,27 +612,26 @@ static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited)
 int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
 			    struct ibv_send_wr **bad_wr)
 {
-	unsigned int ind_sge;
-	unsigned int ind;
-	int nreq;
-	void *wqe;
-	int ret = 0;
-	struct hns_roce_qp *qp = to_hr_qp(ibvqp);
 	struct hns_roce_context *ctx = to_hr_ctx(ibvqp->context);
-	struct hns_roce_rc_sq_wqe *rc_sq_wqe;
+	struct hns_roce_qp *qp = to_hr_qp(ibvqp);
 	struct hns_roce_v2_wqe_data_seg *dseg;
+	struct hns_roce_rc_sq_wqe *rc_sq_wqe;
 	struct ibv_qp_attr attr;
+	unsigned int wqe_idx;
+	unsigned int sge_idx;
 	int valid_num_sge;
 	int attr_mask;
+	int ret = 0;
+	void *wqe;
+	int nreq;
 	int j;
 	int i;
 
 	pthread_spin_lock(&qp->sq.lock);
 
-	/* check that state is OK to post send */
-	ind = qp->sq.head;
-	ind_sge = qp->next_sge;
+	sge_idx = qp->next_sge;
 
+	/* check that state is OK to post send */
 	if (ibvqp->state == IBV_QPS_RESET || ibvqp->state == IBV_QPS_INIT ||
 	    ibvqp->state == IBV_QPS_RTR) {
 		pthread_spin_unlock(&qp->sq.lock);
@@ -648,18 +647,19 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
 			goto out;
 		}
 
+		wqe_idx = (qp->sq.head + nreq) % qp->sq.wqe_cnt;
+
 		if (wr->num_sge > qp->sq.max_gs) {
 			ret = EINVAL;
 			*bad_wr = wr;
 			goto out;
 		}
 
-		wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
+		wqe = get_send_wqe(qp, wqe_idx);
 		rc_sq_wqe = wqe;
 
 		memset(rc_sq_wqe, 0, sizeof(struct hns_roce_rc_sq_wqe));
-
-		qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id;
+		qp->sq.wrid[wqe_idx] = wr->wr_id;
 
 		valid_num_sge = wr->num_sge;
 		j = 0;
@@ -880,7 +880,7 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
 				roce_set_field(rc_sq_wqe->byte_20,
 					RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_M,
 					RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_S,
-					ind_sge & (qp->sge.sge_cnt - 1));
+					sge_idx & (qp->sge.sge_cnt - 1));
 
 				for (i = 0; i < wr->num_sge && j < 2; i++)
 					if (likely(wr->sg_list[i].length)) {
@@ -893,17 +893,15 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
 				for (; i < wr->num_sge; i++) {
 					if (likely(wr->sg_list[i].length)) {
 						dseg = get_send_sge_ex(qp,
-							ind_sge &
+							sge_idx &
 							(qp->sge.sge_cnt - 1));
 						set_data_seg_v2(dseg,
 							   wr->sg_list + i);
-						ind_sge++;
+						sge_idx++;
 					}
 				}
 			}
 		}
-
-		ind++;
 	}
 
 out:
@@ -916,7 +914,7 @@ out:
 		if (qp->flags & HNS_ROCE_SUPPORT_SQ_RECORD_DB)
 			*(qp->sdb) = qp->sq.head & 0xffff;
 
-		qp->next_sge = ind_sge;
+		qp->next_sge = sge_idx;
 	}
 
 	pthread_spin_unlock(&qp->sq.lock);
@@ -936,23 +934,21 @@ out:
 static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
 				   struct ibv_recv_wr **bad_wr)
 {
-	int ret = 0;
-	int nreq;
-	int ind;
 	struct hns_roce_qp *qp = to_hr_qp(ibvqp);
 	struct hns_roce_context *ctx = to_hr_ctx(ibvqp->context);
 	struct hns_roce_v2_wqe_data_seg *dseg;
 	struct hns_roce_rinl_sge *sge_list;
 	struct ibv_qp_attr attr;
 	int attr_mask;
+	int ret = 0;
+	int wqe_idx;
 	void *wqe;
+	int nreq;
 	int i;
 
 	pthread_spin_lock(&qp->rq.lock);
 
 	/* check that state is OK to post receive */
-	ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
-
 	if (ibvqp->state == IBV_QPS_RESET) {
 		pthread_spin_unlock(&qp->rq.lock);
 		*bad_wr = wr;
@@ -967,13 +963,15 @@ static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
 			goto out;
 		}
 
+		wqe_idx = (qp->rq.head + nreq) % qp->rq.wqe_cnt;
+
 		if (wr->num_sge > qp->rq.max_gs) {
 			ret = -EINVAL;
 			*bad_wr = wr;
 			goto out;
 		}
 
-		wqe = get_recv_wqe_v2(qp, ind);
+		wqe = get_recv_wqe_v2(qp, wqe_idx);
 		if (!wqe) {
 			ret = -EINVAL;
 			*bad_wr = wr;
@@ -995,8 +993,8 @@ static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
 		}
 
 		/* QP support receive inline wqe */
-		sge_list = qp->rq_rinl_buf.wqe_list[ind].sg_list;
-		qp->rq_rinl_buf.wqe_list[ind].sge_cnt =
+		sge_list = qp->rq_rinl_buf.wqe_list[wqe_idx].sg_list;
+		qp->rq_rinl_buf.wqe_list[wqe_idx].sge_cnt =
 						(unsigned int)wr->num_sge;
 
 		for (i = 0; i < wr->num_sge; i++) {
@@ -1005,9 +1003,7 @@ static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
 			sge_list[i].len = wr->sg_list[i].length;
 		}
 
-		qp->rq.wrid[ind] = wr->wr_id;
-
-		ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
+		qp->rq.wrid[wqe_idx] = wr->wr_id;
 	}
 
 out:
-- 
2.8.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v5 rdma-core 3/4] libhns: Refactor for post send
  2019-10-16 12:27 [PATCH v5 rdma-core 0/4] Support UD on hip08 Weihang Li
  2019-10-16 12:27 ` [PATCH v5 rdma-core 1/4] libhns: Add support of handling AH for hip08 Weihang Li
  2019-10-16 12:27 ` [PATCH v5 rdma-core 2/4] libhns: Simplify the calculation and usage of wqe idx for post verbs Weihang Li
@ 2019-10-16 12:27 ` Weihang Li
  2019-10-16 12:27 ` [PATCH v5 rdma-core 4/4] libhns: Add UD support for hip08 in user mode Weihang Li
  3 siblings, 0 replies; 5+ messages in thread
From: Weihang Li @ 2019-10-16 12:27 UTC (permalink / raw)
  To: dledford, jgg; +Cc: linux-rdma, linuxarm

From: Yixian Liu <liuyixian@huawei.com>

This patch refactors the interface of hns_roce_u_v2_post_send, which
is now very complicated. We reduce the complexity with following points:
1. Separate RC server into a function.
2. Simplify and separate the process of sge.
3. Keep the logic and consistence of all operations.

Signed-off-by: Yixian Liu <liuyixian@huawei.com>
Signed-off-by: Weihang Li <liweihang@hisilicon.com>
---
 providers/hns/hns_roce_u.h       |  10 +
 providers/hns/hns_roce_u_hw_v2.c | 427 ++++++++++++++++-----------------------
 2 files changed, 189 insertions(+), 248 deletions(-)

diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 31664c1..45e8099 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -69,6 +69,9 @@
 #define HNS_ROCE_TPTR_OFFSET		0x1000
 #define HNS_ROCE_STATIC_RATE		3 /* Gbps */
 
+#define HNS_ROCE_ADDRESS_MASK		0xFFFFFFFF
+#define HNS_ROCE_ADDRESS_SHIFT		32
+
 #define roce_get_field(origin, mask, shift) \
 	(((le32toh(origin)) & (mask)) >> (shift))
 
@@ -205,6 +208,13 @@ struct hns_roce_wq {
 	int				offset;
 };
 
+/* record the result of sge process */
+struct hns_roce_sge_info {
+	unsigned int		valid_num; /* sge length is not 0 */
+	unsigned int		start_idx; /* start position of extend sge */
+	unsigned int		total_len; /* total length of valid sges */
+};
+
 struct hns_roce_sge_ex {
 	int				offset;
 	unsigned int			sge_cnt;
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 4dbbc7e..4eb493c 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -609,28 +609,188 @@ static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited)
 	return 0;
 }
 
+static void set_sge(struct hns_roce_v2_wqe_data_seg *dseg,
+		    struct hns_roce_qp *qp, struct ibv_send_wr *wr,
+		    struct hns_roce_sge_info *sge_info)
+{
+	int i;
+
+	sge_info->valid_num = 0;
+	sge_info->total_len = 0;
+
+	for (i = 0; i < wr->num_sge; i++) {
+		if (unlikely(!wr->sg_list[i].length))
+			continue;
+
+		sge_info->total_len += wr->sg_list[i].length;
+		sge_info->valid_num++;
+
+		/* No inner sge in UD wqe */
+		if (sge_info->valid_num <= HNS_ROCE_SGE_IN_WQE &&
+		    qp->ibv_qp.qp_type != IBV_QPT_UD) {
+			set_data_seg_v2(dseg, wr->sg_list + i);
+			dseg++;
+		} else {
+			dseg = get_send_sge_ex(qp, sge_info->start_idx &
+					       (qp->sge.sge_cnt - 1));
+			set_data_seg_v2(dseg, wr->sg_list + i);
+			sge_info->start_idx++;
+		}
+	}
+}
+
+static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
+		      int nreq, struct hns_roce_sge_info *sge_info)
+{
+	struct hns_roce_rc_sq_wqe *rc_sq_wqe = wqe;
+	struct hns_roce_v2_wqe_data_seg *dseg;
+	int hr_op;
+	int i;
+
+	memset(rc_sq_wqe, 0, sizeof(struct hns_roce_rc_sq_wqe));
+
+	switch (wr->opcode) {
+	case IBV_WR_RDMA_READ:
+		hr_op = HNS_ROCE_WQE_OP_RDMA_READ;
+		rc_sq_wqe->va = htole64(wr->wr.rdma.remote_addr);
+		rc_sq_wqe->rkey = htole32(wr->wr.rdma.rkey);
+		break;
+	case IBV_WR_RDMA_WRITE:
+		hr_op = HNS_ROCE_WQE_OP_RDMA_WRITE;
+		rc_sq_wqe->va = htole64(wr->wr.rdma.remote_addr);
+		rc_sq_wqe->rkey = htole32(wr->wr.rdma.rkey);
+		break;
+	case IBV_WR_RDMA_WRITE_WITH_IMM:
+		hr_op = HNS_ROCE_WQE_OP_RDMA_WRITE_WITH_IMM;
+		rc_sq_wqe->va = htole64(wr->wr.rdma.remote_addr);
+		rc_sq_wqe->rkey = htole32(wr->wr.rdma.rkey);
+		rc_sq_wqe->immtdata = htole32(be32toh(wr->imm_data));
+		break;
+	case IBV_WR_SEND:
+		hr_op = HNS_ROCE_WQE_OP_SEND;
+		break;
+	case IBV_WR_SEND_WITH_INV:
+		hr_op = HNS_ROCE_WQE_OP_SEND_WITH_INV;
+		rc_sq_wqe->inv_key = htole32(wr->invalidate_rkey);
+		break;
+	case IBV_WR_SEND_WITH_IMM:
+		hr_op = HNS_ROCE_WQE_OP_SEND_WITH_IMM;
+		rc_sq_wqe->immtdata = htole32(be32toh(wr->imm_data));
+		break;
+	case IBV_WR_LOCAL_INV:
+		hr_op = HNS_ROCE_WQE_OP_LOCAL_INV;
+		roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_SO_S, 1);
+		rc_sq_wqe->inv_key = htole32(wr->invalidate_rkey);
+		break;
+	case IBV_WR_BIND_MW:
+		hr_op = HNS_ROCE_WQE_OP_BIND_MW_TYPE;
+		roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_MW_TYPE_S,
+			     wr->bind_mw.mw->type - 1);
+		roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_ATOMIC_S,
+			     (wr->bind_mw.bind_info.mw_access_flags &
+			     IBV_ACCESS_REMOTE_ATOMIC) ? 1 : 0);
+		roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_RDMA_READ_S,
+			     (wr->bind_mw.bind_info.mw_access_flags &
+			     IBV_ACCESS_REMOTE_READ) ? 1 : 0);
+		roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_RDMA_WRITE_S,
+			     (wr->bind_mw.bind_info.mw_access_flags &
+			     IBV_ACCESS_REMOTE_WRITE) ? 1 : 0);
+		rc_sq_wqe->new_rkey = htole32(wr->bind_mw.rkey);
+		rc_sq_wqe->byte_16 = htole32(wr->bind_mw.bind_info.length &
+					     HNS_ROCE_ADDRESS_MASK);
+		rc_sq_wqe->byte_20 = htole32(wr->bind_mw.bind_info.length >>
+					     HNS_ROCE_ADDRESS_SHIFT);
+		rc_sq_wqe->rkey = htole32(wr->bind_mw.bind_info.mr->rkey);
+		rc_sq_wqe->va = htole64(wr->bind_mw.bind_info.addr);
+		break;
+	case IBV_WR_ATOMIC_CMP_AND_SWP:
+		hr_op = HNS_ROCE_WQE_OP_ATOMIC_COM_AND_SWAP;
+		rc_sq_wqe->rkey = htole32(wr->wr.atomic.rkey);
+		rc_sq_wqe->va = htole64(wr->wr.atomic.remote_addr);
+		break;
+	case IBV_WR_ATOMIC_FETCH_AND_ADD:
+		hr_op = HNS_ROCE_WQE_OP_ATOMIC_FETCH_AND_ADD;
+		rc_sq_wqe->rkey = htole32(wr->wr.atomic.rkey);
+		rc_sq_wqe->va = htole64(wr->wr.atomic.remote_addr);
+		break;
+	default:
+		hr_op = HNS_ROCE_WQE_OP_MASK;
+		return -EINVAL;
+	}
+
+	roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_OPCODE_M,
+		       RC_SQ_WQE_BYTE_4_OPCODE_S, hr_op);
+
+	roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_CQE_S,
+		     (wr->send_flags & IBV_SEND_SIGNALED) ? 1 : 0);
+
+	roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_FENCE_S,
+		     (wr->send_flags & IBV_SEND_FENCE) ? 1 : 0);
+
+	roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_SE_S,
+		     (wr->send_flags & IBV_SEND_SOLICITED) ? 1 : 0);
+
+	roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_OWNER_S,
+		     ~(((qp->sq.head + nreq) >> qp->sq.shift) & 0x1));
+
+	roce_set_field(rc_sq_wqe->byte_20,
+		       RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_M,
+		       RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_S,
+		       sge_info->start_idx & (qp->sge.sge_cnt - 1));
+
+	if (wr->opcode == IBV_WR_BIND_MW)
+		return 0;
+
+	wqe += sizeof(struct hns_roce_rc_sq_wqe);
+	dseg = wqe;
+
+	set_sge(dseg, qp, wr, sge_info);
+
+	rc_sq_wqe->msg_len = htole32(sge_info->total_len);
+
+	roce_set_field(rc_sq_wqe->byte_16, RC_SQ_WQE_BYTE_16_SGE_NUM_M,
+		       RC_SQ_WQE_BYTE_16_SGE_NUM_S, sge_info->valid_num);
+
+	if (wr->opcode == IBV_WR_ATOMIC_FETCH_AND_ADD ||
+	    wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP) {
+		dseg++;
+		set_atomic_seg((struct hns_roce_wqe_atomic_seg *)dseg, wr);
+		return 0;
+	}
+
+	if (wr->send_flags & IBV_SEND_INLINE) {
+		if (wr->opcode == IBV_WR_RDMA_READ)
+			return -EINVAL;
+
+		if (sge_info->total_len > qp->max_inline_data)
+			return -EINVAL;
+
+		for (i = 0; i < wr->num_sge; i++) {
+			memcpy(dseg, (void *)(uintptr_t)(wr->sg_list[i].addr),
+			       wr->sg_list[i].length);
+			dseg += wr->sg_list[i].length;
+		}
+		roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_INLINE_S, 1);
+	}
+
+	return 0;
+}
+
 int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
 			    struct ibv_send_wr **bad_wr)
 {
 	struct hns_roce_context *ctx = to_hr_ctx(ibvqp->context);
 	struct hns_roce_qp *qp = to_hr_qp(ibvqp);
-	struct hns_roce_v2_wqe_data_seg *dseg;
-	struct hns_roce_rc_sq_wqe *rc_sq_wqe;
+	struct hns_roce_sge_info sge_info = {};
 	struct ibv_qp_attr attr;
 	unsigned int wqe_idx;
-	unsigned int sge_idx;
-	int valid_num_sge;
 	int attr_mask;
 	int ret = 0;
 	void *wqe;
 	int nreq;
-	int j;
-	int i;
 
 	pthread_spin_lock(&qp->sq.lock);
 
-	sge_idx = qp->next_sge;
-
 	/* check that state is OK to post send */
 	if (ibvqp->state == IBV_QPS_RESET || ibvqp->state == IBV_QPS_INIT ||
 	    ibvqp->state == IBV_QPS_RTR) {
@@ -639,6 +799,8 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
 		return EINVAL;
 	}
 
+	sge_info.start_idx = qp->next_sge; /* start index of extend sge */
+
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
 		if (hns_roce_v2_wq_overflow(&qp->sq, nreq,
 					    to_hr_cq(qp->ibv_qp.send_cq))) {
@@ -647,274 +809,43 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
 			goto out;
 		}
 
-		wqe_idx = (qp->sq.head + nreq) % qp->sq.wqe_cnt;
-
 		if (wr->num_sge > qp->sq.max_gs) {
 			ret = EINVAL;
 			*bad_wr = wr;
 			goto out;
 		}
 
+		wqe_idx = (qp->sq.head + nreq) % qp->sq.wqe_cnt;
 		wqe = get_send_wqe(qp, wqe_idx);
-		rc_sq_wqe = wqe;
-
-		memset(rc_sq_wqe, 0, sizeof(struct hns_roce_rc_sq_wqe));
 		qp->sq.wrid[wqe_idx] = wr->wr_id;
 
-		valid_num_sge = wr->num_sge;
-		j = 0;
-
-		for (i = 0; i < wr->num_sge; i++) {
-			if (unlikely(!wr->sg_list[i].length))
-				valid_num_sge--;
-
-			rc_sq_wqe->msg_len =
-					htole32(le32toh(rc_sq_wqe->msg_len) +
-							wr->sg_list[i].length);
-		}
-
-		if (wr->opcode == IBV_WR_SEND_WITH_IMM ||
-		    wr->opcode == IBV_WR_RDMA_WRITE_WITH_IMM)
-			rc_sq_wqe->immtdata = htole32(be32toh(wr->imm_data));
-
-		roce_set_field(rc_sq_wqe->byte_16, RC_SQ_WQE_BYTE_16_SGE_NUM_M,
-			       RC_SQ_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
-
-		roce_set_field(rc_sq_wqe->byte_20,
-			       RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_S,
-			       RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_S,
-			       0);
-
-		roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_CQE_S,
-			     (wr->send_flags & IBV_SEND_SIGNALED) ? 1 : 0);
-
-		/* Set fence attr */
-		roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_FENCE_S,
-			     (wr->send_flags & IBV_SEND_FENCE) ? 1 : 0);
-
-		/* Set solicited attr */
-		roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_SE_S,
-			     (wr->send_flags & IBV_SEND_SOLICITED) ? 1 : 0);
-
-		roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_OWNER_S,
-			     ~(((qp->sq.head + nreq) >> qp->sq.shift) & 0x1));
-
-		wqe += sizeof(struct hns_roce_rc_sq_wqe);
-		/* set remote addr segment */
 		switch (ibvqp->qp_type) {
 		case IBV_QPT_RC:
-			switch (wr->opcode) {
-			case IBV_WR_RDMA_READ:
-				roce_set_field(rc_sq_wqe->byte_4,
-					       RC_SQ_WQE_BYTE_4_OPCODE_M,
-					       RC_SQ_WQE_BYTE_4_OPCODE_S,
-					       HNS_ROCE_WQE_OP_RDMA_READ);
-				rc_sq_wqe->va =
-					htole64(wr->wr.rdma.remote_addr);
-				rc_sq_wqe->rkey = htole32(wr->wr.rdma.rkey);
-				break;
-
-			case IBV_WR_RDMA_WRITE:
-				roce_set_field(rc_sq_wqe->byte_4,
-					       RC_SQ_WQE_BYTE_4_OPCODE_M,
-					       RC_SQ_WQE_BYTE_4_OPCODE_S,
-					       HNS_ROCE_WQE_OP_RDMA_WRITE);
-				rc_sq_wqe->va =
-					htole64(wr->wr.rdma.remote_addr);
-				rc_sq_wqe->rkey = htole32(wr->wr.rdma.rkey);
-				break;
-
-			case IBV_WR_RDMA_WRITE_WITH_IMM:
-				roce_set_field(rc_sq_wqe->byte_4,
-				       RC_SQ_WQE_BYTE_4_OPCODE_M,
-				       RC_SQ_WQE_BYTE_4_OPCODE_S,
-				       HNS_ROCE_WQE_OP_RDMA_WRITE_WITH_IMM);
-				rc_sq_wqe->va =
-					htole64(wr->wr.rdma.remote_addr);
-				rc_sq_wqe->rkey = htole32(wr->wr.rdma.rkey);
-				break;
-
-			case IBV_WR_SEND:
-				roce_set_field(rc_sq_wqe->byte_4,
-					       RC_SQ_WQE_BYTE_4_OPCODE_M,
-					       RC_SQ_WQE_BYTE_4_OPCODE_S,
-					       HNS_ROCE_WQE_OP_SEND);
-				break;
-			case IBV_WR_SEND_WITH_INV:
-				roce_set_field(rc_sq_wqe->byte_4,
-					     RC_SQ_WQE_BYTE_4_OPCODE_M,
-					     RC_SQ_WQE_BYTE_4_OPCODE_S,
-					     HNS_ROCE_WQE_OP_SEND_WITH_INV);
-				rc_sq_wqe->inv_key =
-						htole32(wr->invalidate_rkey);
-				break;
-			case IBV_WR_SEND_WITH_IMM:
-				roce_set_field(rc_sq_wqe->byte_4,
-					RC_SQ_WQE_BYTE_4_OPCODE_M,
-					RC_SQ_WQE_BYTE_4_OPCODE_S,
-					HNS_ROCE_WQE_OP_SEND_WITH_IMM);
-				break;
-
-			case IBV_WR_LOCAL_INV:
-				roce_set_field(rc_sq_wqe->byte_4,
-					       RC_SQ_WQE_BYTE_4_OPCODE_M,
-					       RC_SQ_WQE_BYTE_4_OPCODE_S,
-					       HNS_ROCE_WQE_OP_LOCAL_INV);
-				roce_set_bit(rc_sq_wqe->byte_4,
-					     RC_SQ_WQE_BYTE_4_SO_S, 1);
-				rc_sq_wqe->inv_key =
-						htole32(wr->invalidate_rkey);
-				break;
-			case IBV_WR_ATOMIC_CMP_AND_SWP:
-				roce_set_field(rc_sq_wqe->byte_4,
-					RC_SQ_WQE_BYTE_4_OPCODE_M,
-					RC_SQ_WQE_BYTE_4_OPCODE_S,
-					HNS_ROCE_WQE_OP_ATOMIC_COM_AND_SWAP);
-				rc_sq_wqe->rkey = htole32(wr->wr.atomic.rkey);
-				rc_sq_wqe->va =
-					htole64(wr->wr.atomic.remote_addr);
-				break;
-
-			case IBV_WR_ATOMIC_FETCH_AND_ADD:
-				roce_set_field(rc_sq_wqe->byte_4,
-					RC_SQ_WQE_BYTE_4_OPCODE_M,
-					RC_SQ_WQE_BYTE_4_OPCODE_S,
-					HNS_ROCE_WQE_OP_ATOMIC_FETCH_AND_ADD);
-				rc_sq_wqe->rkey = htole32(wr->wr.atomic.rkey);
-				rc_sq_wqe->va =
-					htole64(wr->wr.atomic.remote_addr);
-				break;
-
-			case IBV_WR_BIND_MW:
-				roce_set_field(rc_sq_wqe->byte_4,
-					RC_SQ_WQE_BYTE_4_OPCODE_M,
-					RC_SQ_WQE_BYTE_4_OPCODE_S,
-					HNS_ROCE_WQE_OP_BIND_MW_TYPE);
-				roce_set_bit(rc_sq_wqe->byte_4,
-					RC_SQ_WQE_BYTE_4_MW_TYPE_S,
-					wr->bind_mw.mw->type - 1);
-				roce_set_bit(rc_sq_wqe->byte_4,
-					RC_SQ_WQE_BYTE_4_ATOMIC_S,
-					wr->bind_mw.bind_info.mw_access_flags &
-					IBV_ACCESS_REMOTE_ATOMIC ? 1 : 0);
-				roce_set_bit(rc_sq_wqe->byte_4,
-					RC_SQ_WQE_BYTE_4_RDMA_READ_S,
-					wr->bind_mw.bind_info.mw_access_flags &
-					IBV_ACCESS_REMOTE_READ ? 1 : 0);
-				roce_set_bit(rc_sq_wqe->byte_4,
-					RC_SQ_WQE_BYTE_4_RDMA_WRITE_S,
-					wr->bind_mw.bind_info.mw_access_flags &
-					IBV_ACCESS_REMOTE_WRITE ? 1 : 0);
-
-				rc_sq_wqe->new_rkey = htole32(wr->bind_mw.rkey);
-				rc_sq_wqe->byte_16 =
-					  htole32(wr->bind_mw.bind_info.length &
-						  0xffffffff);
-				rc_sq_wqe->byte_20 =
-					 htole32(wr->bind_mw.bind_info.length >>
-						 32);
-				rc_sq_wqe->rkey =
-					htole32(wr->bind_mw.bind_info.mr->rkey);
-				rc_sq_wqe->va =
-					    htole64(wr->bind_mw.bind_info.addr);
-				break;
-
-			default:
-				roce_set_field(rc_sq_wqe->byte_4,
-					       RC_SQ_WQE_BYTE_4_OPCODE_M,
-					       RC_SQ_WQE_BYTE_4_OPCODE_S,
-					       HNS_ROCE_WQE_OP_MASK);
-				printf("Not supported transport opcode %d\n",
-				       wr->opcode);
-				break;
+			ret = set_rc_wqe(wqe, qp, wr, nreq, &sge_info);
+			if (ret) {
+				*bad_wr = wr;
+				goto out;
 			}
-
 			break;
 		case IBV_QPT_UC:
 		case IBV_QPT_UD:
 		default:
-			break;
-		}
-
-		dseg = wqe;
-		if (wr->opcode == IBV_WR_ATOMIC_FETCH_AND_ADD ||
-		    wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP) {
-			set_data_seg_v2(dseg, wr->sg_list);
-			wqe += sizeof(struct hns_roce_v2_wqe_data_seg);
-			set_atomic_seg(wqe, wr);
-		} else if (wr->send_flags & IBV_SEND_INLINE && valid_num_sge) {
-			if (le32toh(rc_sq_wqe->msg_len) > qp->max_inline_data) {
-				ret = EINVAL;
-				*bad_wr = wr;
-				printf("data len=%d, send_flags = 0x%x!\r\n",
-					rc_sq_wqe->msg_len, wr->send_flags);
-				goto out;
-			}
-
-			if (wr->opcode == IBV_WR_RDMA_READ) {
-				ret = EINVAL;
-				*bad_wr = wr;
-				printf("Not supported inline data!\n");
-				goto out;
-			}
-
-			for (i = 0; i < wr->num_sge; i++) {
-				memcpy(wqe,
-				     ((void *) (uintptr_t) wr->sg_list[i].addr),
-				     wr->sg_list[i].length);
-				wqe = wqe + wr->sg_list[i].length;
-			}
-
-			roce_set_bit(rc_sq_wqe->byte_4,
-				     RC_SQ_WQE_BYTE_4_INLINE_S, 1);
-		} else {
-			/* set sge */
-			if (valid_num_sge <= HNS_ROCE_SGE_IN_WQE) {
-				for (i = 0; i < wr->num_sge; i++)
-					if (likely(wr->sg_list[i].length)) {
-						set_data_seg_v2(dseg,
-							       wr->sg_list + i);
-						dseg++;
-					}
-			} else {
-				roce_set_field(rc_sq_wqe->byte_20,
-					RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_M,
-					RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_S,
-					sge_idx & (qp->sge.sge_cnt - 1));
-
-				for (i = 0; i < wr->num_sge && j < 2; i++)
-					if (likely(wr->sg_list[i].length)) {
-						set_data_seg_v2(dseg,
-							       wr->sg_list + i);
-						dseg++;
-						j++;
-					}
-
-				for (; i < wr->num_sge; i++) {
-					if (likely(wr->sg_list[i].length)) {
-						dseg = get_send_sge_ex(qp,
-							sge_idx &
-							(qp->sge.sge_cnt - 1));
-						set_data_seg_v2(dseg,
-							   wr->sg_list + i);
-						sge_idx++;
-					}
-				}
-			}
+			ret = -EINVAL;
+			*bad_wr = wr;
+			goto out;
 		}
 	}
 
 out:
 	if (likely(nreq)) {
 		qp->sq.head += nreq;
+		qp->next_sge = sge_info.start_idx;
 
 		hns_roce_update_sq_db(ctx, qp->ibv_qp.qp_num, qp->sl,
 				     qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1));
 
 		if (qp->flags & HNS_ROCE_SUPPORT_SQ_RECORD_DB)
 			*(qp->sdb) = qp->sq.head & 0xffff;
-
-		qp->next_sge = sge_idx;
 	}
 
 	pthread_spin_unlock(&qp->sq.lock);
-- 
2.8.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v5 rdma-core 4/4] libhns: Add UD support for hip08 in user mode
  2019-10-16 12:27 [PATCH v5 rdma-core 0/4] Support UD on hip08 Weihang Li
                   ` (2 preceding siblings ...)
  2019-10-16 12:27 ` [PATCH v5 rdma-core 3/4] libhns: Refactor for post send Weihang Li
@ 2019-10-16 12:27 ` Weihang Li
  3 siblings, 0 replies; 5+ messages in thread
From: Weihang Li @ 2019-10-16 12:27 UTC (permalink / raw)
  To: dledford, jgg; +Cc: linux-rdma, linuxarm

From: Lijun Ou <oulijun@huawei.com>

User Application can use Unreliable Datagram on hip08 now.

Signed-off-by: Lijun Ou <oulijun@huawei.com>
Signed-off-by: Weihang Li <liweihang@hisilicon.com>
---
 providers/hns/hns_roce_u_hw_v2.c | 105 ++++++++++++++++++++++++++++++++++++++-
 providers/hns/hns_roce_u_hw_v2.h |  91 +++++++++++++++++++++++++++++++++
 providers/hns/hns_roce_u_verbs.c |   6 +++
 3 files changed, 201 insertions(+), 1 deletion(-)

diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 4eb493c..208b2f1 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -639,6 +639,108 @@ static void set_sge(struct hns_roce_v2_wqe_data_seg *dseg,
 	}
 }
 
+static void set_ud_wqe(void *wqe, struct hns_roce_qp *qp,
+		       struct ibv_send_wr *wr, int nreq,
+		       struct hns_roce_sge_info *sge_info)
+{
+	struct hns_roce_ah *ah = to_hr_ah(wr->wr.ud.ah);
+	struct hns_roce_ud_sq_wqe *ud_sq_wqe = wqe;
+	struct hns_roce_v2_wqe_data_seg *dseg;
+	unsigned int hr_op;
+
+	memset(ud_sq_wqe, 0, sizeof(*ud_sq_wqe));
+
+	roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_CQE_S,
+		     (wr->send_flags & IBV_SEND_SIGNALED) ? 1 : 0);
+	roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_SE_S,
+		     (wr->send_flags & IBV_SEND_SOLICITED) ? 1 : 0);
+	roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_OWNER_S,
+		     ~(((qp->sq.head + nreq) >> qp->sq.shift) & 0x1));
+
+	switch (wr->opcode) {
+	case IBV_WR_SEND:
+		hr_op = HNS_ROCE_WQE_OP_SEND;
+		break;
+	case IBV_WR_SEND_WITH_IMM:
+		hr_op = HNS_ROCE_WQE_OP_SEND_WITH_IMM;
+		ud_sq_wqe->immtdata =
+				htole32(be32toh(wr->imm_data));
+		break;
+	default:
+		hr_op = HNS_ROCE_WQE_OP_MASK;
+		break;
+	}
+
+	roce_set_field(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_OPCODE_M,
+		       UD_SQ_WQE_OPCODE_S, hr_op);
+
+	roce_set_field(ud_sq_wqe->sge_num_pd, UD_SQ_WQE_PD_M,
+		       UD_SQ_WQE_PD_S, to_hr_pd(qp->ibv_qp.pd)->pdn);
+
+	roce_set_field(ud_sq_wqe->rsv_msg_start_sge_idx,
+		       UD_SQ_WQE_MSG_START_SGE_IDX_M,
+		       UD_SQ_WQE_MSG_START_SGE_IDX_S,
+		       sge_info->start_idx & (qp->sge.sge_cnt - 1));
+
+	roce_set_field(ud_sq_wqe->udpspn_rsv, UD_SQ_WQE_UDP_SPN_M,
+		       UD_SQ_WQE_UDP_SPN_S, 0);
+
+	ud_sq_wqe->qkey = htole32(wr->wr.ud.remote_qkey);
+
+	roce_set_field(ud_sq_wqe->rsv_dqpn, UD_SQ_WQE_DQPN_M,
+		       UD_SQ_WQE_DQPN_S, wr->wr.ud.remote_qpn);
+
+	roce_set_field(ud_sq_wqe->tclass_vlan, UD_SQ_WQE_VLAN_M,
+		       UD_SQ_WQE_VLAN_S, ah->av.vlan_id);
+
+	roce_set_field(ud_sq_wqe->tclass_vlan, UD_SQ_WQE_TCLASS_M,
+		       UD_SQ_WQE_TCLASS_S, ah->av.tclass);
+
+	roce_set_field(ud_sq_wqe->tclass_vlan, UD_SQ_WQE_HOPLIMIT_M,
+		       UD_SQ_WQE_HOPLIMIT_S, ah->av.hop_limit);
+
+	roce_set_field(ud_sq_wqe->lbi_flow_label, UD_SQ_WQE_FLOW_LABEL_M,
+		       UD_SQ_WQE_FLOW_LABEL_S, ah->av.flowlabel);
+
+	roce_set_bit(ud_sq_wqe->lbi_flow_label, UD_SQ_WQE_VLAN_EN_S,
+		     ah->av.vlan_en ? 1 : 0);
+
+	roce_set_bit(ud_sq_wqe->lbi_flow_label, UD_SQ_WQE_LBI_S, 0);
+
+	roce_set_field(ud_sq_wqe->lbi_flow_label, UD_SQ_WQE_SL_M,
+		       UD_SQ_WQE_SL_S, ah->av.sl);
+
+	roce_set_field(ud_sq_wqe->lbi_flow_label, UD_SQ_WQE_PORTN_M,
+		       UD_SQ_WQE_PORTN_S, qp->ibv_qp.qp_num);
+
+	roce_set_field(ud_sq_wqe->dmac, UD_SQ_WQE_DMAC_0_M,
+		       UD_SQ_WQE_DMAC_0_S, ah->av.mac[0]);
+	roce_set_field(ud_sq_wqe->dmac, UD_SQ_WQE_DMAC_1_M,
+		       UD_SQ_WQE_DMAC_1_S, ah->av.mac[1]);
+	roce_set_field(ud_sq_wqe->dmac, UD_SQ_WQE_DMAC_2_M,
+		       UD_SQ_WQE_DMAC_2_S, ah->av.mac[2]);
+	roce_set_field(ud_sq_wqe->dmac, UD_SQ_WQE_DMAC_3_M,
+		       UD_SQ_WQE_DMAC_3_S, ah->av.mac[3]);
+	roce_set_field(ud_sq_wqe->smac_index_dmac,
+		       UD_SQ_WQE_DMAC_4_M, UD_SQ_WQE_DMAC_4_S,
+		       to_hr_ah(wr->wr.ud.ah)->av.mac[4]);
+	roce_set_field(ud_sq_wqe->smac_index_dmac,
+		       UD_SQ_WQE_DMAC_5_M, UD_SQ_WQE_DMAC_5_S,
+		       to_hr_ah(wr->wr.ud.ah)->av.mac[5]);
+	roce_set_field(ud_sq_wqe->smac_index_dmac, UD_SQ_WQE_SGID_IDX_M,
+		       UD_SQ_WQE_SGID_IDX_S, ah->av.gid_index);
+
+	memcpy(ud_sq_wqe->dgid, ah->av.dgid, HNS_ROCE_GID_SIZE);
+
+	dseg = wqe;
+	set_sge(dseg, qp, wr, sge_info);
+
+	ud_sq_wqe->msg_len = htole32(sge_info->total_len);
+
+	roce_set_field(ud_sq_wqe->sge_num_pd, UD_SQ_WQE_SGE_NUM_M,
+		       UD_SQ_WQE_SGE_NUM_S, sge_info->valid_num);
+}
+
 static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
 		      int nreq, struct hns_roce_sge_info *sge_info)
 {
@@ -827,8 +929,9 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
 				goto out;
 			}
 			break;
-		case IBV_QPT_UC:
 		case IBV_QPT_UD:
+			set_ud_wqe(wqe, qp, wr, nreq, &sge_info);
+			break;
 		default:
 			ret = -EINVAL;
 			*bad_wr = wr;
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
index 3eca1fd..84cf6c4 100644
--- a/providers/hns/hns_roce_u_hw_v2.h
+++ b/providers/hns/hns_roce_u_hw_v2.h
@@ -291,4 +291,95 @@ struct hns_roce_wqe_atomic_seg {
 int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
 			    struct ibv_send_wr **bad_wr);
 
+struct hns_roce_ud_sq_wqe {
+	__le32		rsv_opcode;
+	__le32		msg_len;
+	__le32		immtdata;
+	__le32		sge_num_pd;
+	__le32		rsv_msg_start_sge_idx;
+	__le32		udpspn_rsv;
+	__le32		qkey;
+	__le32		rsv_dqpn;
+	__le32		tclass_vlan;
+	__le32		lbi_flow_label;
+	__le32		dmac;
+	__le32		smac_index_dmac;
+	uint8_t		dgid[HNS_ROCE_GID_SIZE];
+};
+
+#define UD_SQ_WQE_OPCODE_S 0
+#define UD_SQ_WQE_OPCODE_M  (((1UL << 5) - 1) << UD_SQ_WQE_OPCODE_S)
+
+#define UD_SQ_WQE_OWNER_S 7
+
+#define UD_SQ_WQE_CQE_S 8
+
+#define UD_SQ_WQE_SE_S 11
+
+#define UD_SQ_WQE_PD_S 0
+#define UD_SQ_WQE_PD_M  (((1UL << 24) - 1) << UD_SQ_WQE_PD_S)
+
+#define UD_SQ_WQE_SGE_NUM_S 24
+#define UD_SQ_WQE_SGE_NUM_M  (((1UL << 8) - 1) << UD_SQ_WQE_SGE_NUM_S)
+
+#define UD_SQ_WQE_MSG_START_SGE_IDX_S 0
+#define UD_SQ_WQE_MSG_START_SGE_IDX_M \
+	(((1UL << 24) - 1) << UD_SQ_WQE_MSG_START_SGE_IDX_S)
+
+#define UD_SQ_WQE_UDP_SPN_S 16
+#define UD_SQ_WQE_UDP_SPN_M \
+	(((1UL << 16) - 1) << UD_SQ_WQE_UDP_SPN_S)
+
+#define UD_SQ_WQE_DQPN_S 0
+#define UD_SQ_WQE_DQPN_M (((1UL << 24) - 1) << UD_SQ_WQE_DQPN_S)
+
+#define UD_SQ_WQE_VLAN_S 0
+#define UD_SQ_WQE_VLAN_M (((1UL << 16) - 1) << UD_SQ_WQE_VLAN_S)
+
+#define UD_SQ_WQE_HOPLIMIT_S 16
+#define UD_SQ_WQE_HOPLIMIT_M (((1UL << 8) - 1) << UD_SQ_WQE_HOPLIMIT_S)
+
+#define UD_SQ_WQE_TCLASS_S 24
+#define UD_SQ_WQE_TCLASS_M (((1UL << 8) - 1) << UD_SQ_WQE_TCLASS_S)
+
+#define UD_SQ_WQE_FLOW_LABEL_S 0
+#define UD_SQ_WQE_FLOW_LABEL_M (((1UL << 20) - 1) << UD_SQ_WQE_FLOW_LABEL_S)
+
+#define UD_SQ_WQE_SL_S 20
+#define UD_SQ_WQE_SL_M (((1UL << 4) - 1) << UD_SQ_WQE_SL_S)
+
+#define UD_SQ_WQE_PORTN_S 24
+#define UD_SQ_WQE_PORTN_M (((1UL << 3) - 1) << UD_SQ_WQE_PORTN_S)
+
+#define UD_SQ_WQE_VLAN_EN_S 30
+
+#define UD_SQ_WQE_LBI_S 31
+
+#define UD_SQ_WQE_PORTN_S 24
+#define UD_SQ_WQE_PORTN_M (((1UL << 3) - 1) << UD_SQ_WQE_PORTN_S)
+
+#define UD_SQ_WQE_DMAC_0_S 0
+#define UD_SQ_WQE_DMAC_0_M (((1UL << 8) - 1) << UD_SQ_WQE_DMAC_0_S)
+
+#define UD_SQ_WQE_DMAC_1_S 8
+#define UD_SQ_WQE_DMAC_1_M (((1UL << 8) - 1) << UD_SQ_WQE_DMAC_1_S)
+
+#define UD_SQ_WQE_DMAC_2_S 16
+#define UD_SQ_WQE_DMAC_2_M (((1UL << 8) - 1) << UD_SQ_WQE_DMAC_2_S)
+
+#define UD_SQ_WQE_DMAC_3_S 24
+#define UD_SQ_WQE_DMAC_3_M (((1UL << 8) - 1) << UD_SQ_WQE_DMAC_3_S)
+
+#define UD_SQ_WQE_DMAC_4_S 0
+#define UD_SQ_WQE_DMAC_4_M (((1UL << 8) - 1) << UD_SQ_WQE_DMAC_4_S)
+
+#define UD_SQ_WQE_DMAC_5_S 8
+#define UD_SQ_WQE_DMAC_5_M (((1UL << 8) - 1) << UD_SQ_WQE_DMAC_5_S)
+
+#define UD_SQ_WQE_SGID_IDX_S 16
+#define UD_SQ_WQE_SGID_IDX_M (((1UL << 8) - 1) << UD_SQ_WQE_SGID_IDX_S)
+
+#define UD_SQ_WQE_SMAC_IDX_S 24
+#define UD_SQ_WQE_SMAC_IDX_M (((1UL << 8) - 1) << UD_SQ_WQE_SMAC_IDX_S)
+
 #endif /* _HNS_ROCE_U_HW_V2_H */
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index ef4b9e0..e72df38 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -767,6 +767,12 @@ static void hns_roce_set_qp_params(struct ibv_pd *pd,
 		}
 	}
 
+	if (attr->qp_type == IBV_QPT_UD)
+		qp->sge.sge_cnt = roundup_pow_of_two(qp->sq.wqe_cnt *
+						     qp->sq.max_gs);
+
+	qp->ibv_qp.qp_type = attr->qp_type;
+
 	/* limit by the context queried during alloc context */
 	qp->rq.max_post = min(ctx->max_qp_wr, qp->rq.wqe_cnt);
 	qp->sq.max_post = min(ctx->max_qp_wr, qp->sq.wqe_cnt);
-- 
2.8.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-10-16 12:31 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-16 12:27 [PATCH v5 rdma-core 0/4] Support UD on hip08 Weihang Li
2019-10-16 12:27 ` [PATCH v5 rdma-core 1/4] libhns: Add support of handling AH for hip08 Weihang Li
2019-10-16 12:27 ` [PATCH v5 rdma-core 2/4] libhns: Simplify the calculation and usage of wqe idx for post verbs Weihang Li
2019-10-16 12:27 ` [PATCH v5 rdma-core 3/4] libhns: Refactor for post send Weihang Li
2019-10-16 12:27 ` [PATCH v5 rdma-core 4/4] libhns: Add UD support for hip08 in user mode Weihang Li

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.