This patchset enables hip08 to support Unreliable Datagram. PR: https://github.com/linux-rdma/rdma-core/pull/575 Lijun Ou (2): libhns: Add support of handling AH for hip08 libhns: Add UD support for hip08 in user mode Weihang Li (1): libhns: Support configuring loopback mode by user Yixian Liu (2): libhns: Bugfix for wqe idx calc of post verbs libhns: Refactor for post send kernel-headers/rdma/hns-abi.h | 7 + providers/hns/hns_roce_u.c | 17 ++ providers/hns/hns_roce_u.h | 39 +++ providers/hns/hns_roce_u_abi.h | 3 +- providers/hns/hns_roce_u_hw_v1.c | 27 +- providers/hns/hns_roce_u_hw_v2.c | 554 ++++++++++++++++++++------------------- providers/hns/hns_roce_u_hw_v2.h | 91 +++++++ providers/hns/hns_roce_u_verbs.c | 58 ++++ 8 files changed, 510 insertions(+), 286 deletions(-) -- 2.8.1
From: Lijun Ou <oulijun@huawei.com> This patch achieves two verbs create_ah and destroy_ah to support allocation and destruction of Address Handle. Signed-off-by: Lijun Ou <oulijun@huawei.com> Signed-off-by: Weihang Li <liweihang@hisilicon.com> --- kernel-headers/rdma/hns-abi.h | 7 ++++++ providers/hns/hns_roce_u.c | 2 ++ providers/hns/hns_roce_u.h | 30 +++++++++++++++++++++++ providers/hns/hns_roce_u_abi.h | 3 ++- providers/hns/hns_roce_u_verbs.c | 53 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 94 insertions(+), 1 deletion(-) diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h index eb76b38..5f9b178 100644 --- a/kernel-headers/rdma/hns-abi.h +++ b/kernel-headers/rdma/hns-abi.h @@ -80,4 +80,11 @@ struct hns_roce_ib_alloc_pd_resp { __u32 pdn; }; +struct hns_roce_ib_create_ah_resp { + __u8 dmac[6]; + __u16 vlan_id; + __u8 vlan_en; + __u8 reserved[7]; +}; + #endif /* HNS_ABI_USER_H */ diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c index 5872599..8ba41de 100644 --- a/providers/hns/hns_roce_u.c +++ b/providers/hns/hns_roce_u.c @@ -81,6 +81,8 @@ static const struct verbs_context_ops hns_common_ops = { .modify_srq = hns_roce_u_modify_srq, .query_srq = hns_roce_u_query_srq, .destroy_srq = hns_roce_u_destroy_srq, + .create_ah = hns_roce_u_create_ah, + .destroy_ah = hns_roce_u_destroy_ah, }; static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h index 23e0f13..45472fe 100644 --- a/providers/hns/hns_roce_u.h +++ b/providers/hns/hns_roce_u.h @@ -68,6 +68,8 @@ #define HNS_ROCE_TPTR_OFFSET 0x1000 #define HNS_ROCE_STATIC_RATE 3 /* Gbps */ +#define ETH_ALEN 6 + #define roce_get_field(origin, mask, shift) \ (((le32toh(origin)) & (mask)) >> (shift)) @@ -244,6 +246,25 @@ struct hns_roce_qp { unsigned long flags; }; +struct hns_roce_av { + uint8_t port; + uint8_t gid_index; + uint8_t static_rate; + uint8_t hop_limit; + uint32_t flowlabel; + uint8_t sl; + uint8_t tclass; + uint8_t dgid[HNS_ROCE_GID_SIZE]; + uint8_t mac[ETH_ALEN]; + uint16_t vlan_id; + uint8_t vlan_en; +}; + +struct hns_roce_ah { + struct ibv_ah ibv_ah; + struct hns_roce_av av; +}; + struct hns_roce_u_hw { uint32_t hw_version; struct verbs_context_ops hw_ops; @@ -280,6 +301,11 @@ static inline struct hns_roce_qp *to_hr_qp(struct ibv_qp *ibv_qp) return container_of(ibv_qp, struct hns_roce_qp, ibv_qp); } +static inline struct hns_roce_ah *to_hr_ah(struct ibv_ah *ibv_ah) +{ + return container_of(ibv_ah, struct hns_roce_ah, ibv_ah); +} + int hns_roce_u_query_device(struct ibv_context *context, struct ibv_device_attr *attr); int hns_roce_u_query_port(struct ibv_context *context, uint8_t port, @@ -319,6 +345,10 @@ struct ibv_qp *hns_roce_u_create_qp(struct ibv_pd *pd, int hns_roce_u_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, int attr_mask, struct ibv_qp_init_attr *init_attr); +struct ibv_ah *hns_roce_u_create_ah(struct ibv_pd *pd, + struct ibv_ah_attr *attr); +int hns_roce_u_destroy_ah(struct ibv_ah *ah); + int hns_roce_alloc_buf(struct hns_roce_buf *buf, unsigned int size, int page_size); void hns_roce_free_buf(struct hns_roce_buf *buf); diff --git a/providers/hns/hns_roce_u_abi.h b/providers/hns/hns_roce_u_abi.h index 79fd7dd..029535e 100644 --- a/providers/hns/hns_roce_u_abi.h +++ b/providers/hns/hns_roce_u_abi.h @@ -48,5 +48,6 @@ DECLARE_DRV_CMD(hns_roce_alloc_ucontext, IB_USER_VERBS_CMD_GET_CONTEXT, DECLARE_DRV_CMD(hns_roce_create_srq, IB_USER_VERBS_CMD_CREATE_SRQ, hns_roce_ib_create_srq, hns_roce_ib_create_srq_resp); - +DECLARE_DRV_CMD(hns_roce_create_ah, IB_USER_VERBS_CMD_CREATE_AH, empty, + hns_roce_ib_create_ah_resp); #endif /* _HNS_ROCE_U_ABI_H */ diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c index 9d222c0..0d10b1c 100644 --- a/providers/hns/hns_roce_u_verbs.c +++ b/providers/hns/hns_roce_u_verbs.c @@ -38,6 +38,7 @@ #include <sys/mman.h> #include <ccan/ilog.h> #include <ccan/minmax.h> +#include <ccan/array_size.h> #include <util/util.h> #include "hns_roce_u.h" #include "hns_roce_u_abi.h" @@ -952,3 +953,55 @@ int hns_roce_u_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, return ret; } + +struct ibv_ah *hns_roce_u_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) +{ + struct hns_roce_create_ah_resp resp = {}; + struct hns_roce_ah *ah; + int status; + + ah = calloc(1, sizeof(*ah)); + if (!ah) + return NULL; + + ah->av.port = attr->port_num; + ah->av.sl = attr->sl; + + if (attr->static_rate) + ah->av.static_rate = IBV_RATE_10_GBPS; + + if (attr->is_global) { + ah->av.gid_index = attr->grh.sgid_index; + ah->av.hop_limit = attr->grh.hop_limit; + ah->av.tclass = attr->grh.traffic_class; + ah->av.flowlabel = attr->grh.flow_label; + + memcpy(ah->av.dgid, attr->grh.dgid.raw, + ARRAY_SIZE(ah->av.dgid)); + } + + status = ibv_cmd_create_ah(pd, &ah->ibv_ah, attr, &resp.ibv_resp, + sizeof(resp)); + if (status) { + free(ah); + return NULL; + } + + memcpy(ah->av.mac, resp.dmac, ETH_ALEN); + ah->av.vlan_id = resp.vlan_id; + ah->av.vlan_en = resp.vlan_en; + return &ah->ibv_ah; +} + +int hns_roce_u_destroy_ah(struct ibv_ah *ah) +{ + int ret; + + ret = ibv_cmd_destroy_ah(ah); + if (ret) + return ret; + + free(to_hr_ah(ah)); + + return 0; +} -- 2.8.1
From: Yixian Liu <liuyixian@huawei.com> Current wqe idx is calculated before checking wq overflow, this patch fixes it and simplifies the usage of it. Signed-off-by: Yixian Liu <liuyixian@huawei.com> Signed-off-by: Weihang Li <liweihang@hisilicon.com> --- providers/hns/hns_roce_u_hw_v1.c | 27 ++++++++----------- providers/hns/hns_roce_u_hw_v2.c | 56 +++++++++++++++++++--------------------- 2 files changed, 37 insertions(+), 46 deletions(-) diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c index fceb57a..6d3aba5 100644 --- a/providers/hns/hns_roce_u_hw_v1.c +++ b/providers/hns/hns_roce_u_hw_v1.c @@ -462,7 +462,6 @@ static int hns_roce_u_v1_arm_cq(struct ibv_cq *ibvcq, int solicited) static int hns_roce_u_v1_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, struct ibv_send_wr **bad_wr) { - unsigned int ind; void *wqe; int nreq; int ps_opcode, i; @@ -471,12 +470,10 @@ static int hns_roce_u_v1_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, struct hns_roce_wqe_data_seg *dseg = NULL; struct hns_roce_qp *qp = to_hr_qp(ibvqp); struct hns_roce_context *ctx = to_hr_ctx(ibvqp->context); + unsigned int wqe_idx; pthread_spin_lock(&qp->sq.lock); - /* check that state is OK to post send */ - ind = qp->sq.head; - for (nreq = 0; wr; ++nreq, wr = wr->next) { if (hns_roce_wq_overflow(&qp->sq, nreq, to_hr_cq(qp->ibv_qp.send_cq))) { @@ -484,6 +481,9 @@ static int hns_roce_u_v1_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, *bad_wr = wr; goto out; } + + wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1); + if (wr->num_sge > qp->sq.max_gs) { ret = -1; *bad_wr = wr; @@ -492,10 +492,10 @@ static int hns_roce_u_v1_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, goto out; } - ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); + ctrl = wqe = get_send_wqe(qp, wqe_idx); memset(ctrl, 0, sizeof(struct hns_roce_wqe_ctrl_seg)); - qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id; + qp->sq.wrid[wqe_idx] = wr->wr_id; for (i = 0; i < wr->num_sge; i++) ctrl->msg_length = htole32(le32toh(ctrl->msg_length) + wr->sg_list[i].length); @@ -578,8 +578,6 @@ static int hns_roce_u_v1_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, ctrl->flag |= htole32(wr->num_sge << HNS_ROCE_WQE_SGE_NUM_BIT); } - - ind++; } out: @@ -745,17 +743,14 @@ static int hns_roce_u_v1_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr, { int ret = 0; int nreq; - int ind; struct ibv_sge *sg; struct hns_roce_rc_rq_wqe *rq_wqe; struct hns_roce_qp *qp = to_hr_qp(ibvqp); struct hns_roce_context *ctx = to_hr_ctx(ibvqp->context); + unsigned int wqe_idx; pthread_spin_lock(&qp->rq.lock); - /* check that state is OK to post receive */ - ind = qp->rq.head & (qp->rq.wqe_cnt - 1); - for (nreq = 0; wr; ++nreq, wr = wr->next) { if (hns_roce_wq_overflow(&qp->rq, nreq, to_hr_cq(qp->ibv_qp.recv_cq))) { @@ -764,13 +759,15 @@ static int hns_roce_u_v1_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr, goto out; } + wqe_idx = (qp->rq.head + nreq) & (qp->rq.wqe_cnt - 1); + if (wr->num_sge > qp->rq.max_gs) { ret = -1; *bad_wr = wr; goto out; } - rq_wqe = get_recv_wqe(qp, ind); + rq_wqe = get_recv_wqe(qp, wqe_idx); if (wr->num_sge > HNS_ROCE_RC_RQ_WQE_MAX_SGE_NUM) { ret = -1; *bad_wr = wr; @@ -811,9 +808,7 @@ static int hns_roce_u_v1_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr, HNS_ROCE_RC_RQ_WQE_MAX_SGE_NUM - 2); } - qp->rq.wrid[ind] = wr->wr_id; - - ind = (ind + 1) & (qp->rq.wqe_cnt - 1); + qp->rq.wrid[wqe_idx] = wr->wr_id; } out: diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c index 931f59d..439e547 100644 --- a/providers/hns/hns_roce_u_hw_v2.c +++ b/providers/hns/hns_roce_u_hw_v2.c @@ -612,27 +612,26 @@ static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited) int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, struct ibv_send_wr **bad_wr) { - unsigned int ind_sge; - unsigned int ind; - int nreq; - void *wqe; - int ret = 0; - struct hns_roce_qp *qp = to_hr_qp(ibvqp); struct hns_roce_context *ctx = to_hr_ctx(ibvqp->context); - struct hns_roce_rc_sq_wqe *rc_sq_wqe; + struct hns_roce_qp *qp = to_hr_qp(ibvqp); struct hns_roce_v2_wqe_data_seg *dseg; + struct hns_roce_rc_sq_wqe *rc_sq_wqe; struct ibv_qp_attr attr; + unsigned int wqe_idx; + unsigned int sge_idx; int valid_num_sge; int attr_mask; + int ret = 0; + void *wqe; + int nreq; int j; int i; pthread_spin_lock(&qp->sq.lock); - /* check that state is OK to post send */ - ind = qp->sq.head; - ind_sge = qp->next_sge; + sge_idx = qp->next_sge; + /* check that state is OK to post send */ if (ibvqp->state == IBV_QPS_RESET || ibvqp->state == IBV_QPS_INIT || ibvqp->state == IBV_QPS_RTR) { pthread_spin_unlock(&qp->sq.lock); @@ -648,18 +647,19 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, goto out; } + wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1); + if (wr->num_sge > qp->sq.max_gs) { ret = EINVAL; *bad_wr = wr; goto out; } - wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); + wqe = get_send_wqe(qp, wqe_idx); rc_sq_wqe = wqe; memset(rc_sq_wqe, 0, sizeof(struct hns_roce_rc_sq_wqe)); - - qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id; + qp->sq.wrid[wqe_idx] = wr->wr_id; valid_num_sge = wr->num_sge; j = 0; @@ -880,7 +880,7 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, roce_set_field(rc_sq_wqe->byte_20, RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_M, RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_S, - ind_sge & (qp->sge.sge_cnt - 1)); + sge_idx & (qp->sge.sge_cnt - 1)); for (i = 0; i < wr->num_sge && j < 2; i++) if (likely(wr->sg_list[i].length)) { @@ -893,17 +893,15 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, for (; i < wr->num_sge; i++) { if (likely(wr->sg_list[i].length)) { dseg = get_send_sge_ex(qp, - ind_sge & + sge_idx & (qp->sge.sge_cnt - 1)); set_data_seg_v2(dseg, wr->sg_list + i); - ind_sge++; + sge_idx++; } } } } - - ind++; } out: @@ -916,7 +914,7 @@ out: if (qp->flags & HNS_ROCE_SUPPORT_SQ_RECORD_DB) *(qp->sdb) = qp->sq.head & 0xffff; - qp->next_sge = ind_sge; + qp->next_sge = sge_idx; } pthread_spin_unlock(&qp->sq.lock); @@ -936,23 +934,21 @@ out: static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr, struct ibv_recv_wr **bad_wr) { - int ret = 0; - int nreq; - int ind; struct hns_roce_qp *qp = to_hr_qp(ibvqp); struct hns_roce_context *ctx = to_hr_ctx(ibvqp->context); struct hns_roce_v2_wqe_data_seg *dseg; struct hns_roce_rinl_sge *sge_list; struct ibv_qp_attr attr; int attr_mask; + int ret = 0; + int wqe_idx; void *wqe; + int nreq; int i; pthread_spin_lock(&qp->rq.lock); /* check that state is OK to post receive */ - ind = qp->rq.head & (qp->rq.wqe_cnt - 1); - if (ibvqp->state == IBV_QPS_RESET) { pthread_spin_unlock(&qp->rq.lock); *bad_wr = wr; @@ -967,13 +963,15 @@ static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr, goto out; } + wqe_idx = (qp->rq.head + nreq) & (qp->rq.wqe_cnt - 1); + if (wr->num_sge > qp->rq.max_gs) { ret = -EINVAL; *bad_wr = wr; goto out; } - wqe = get_recv_wqe_v2(qp, ind); + wqe = get_recv_wqe_v2(qp, wqe_idx); if (!wqe) { ret = -EINVAL; *bad_wr = wr; @@ -995,8 +993,8 @@ static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr, } /* QP support receive inline wqe */ - sge_list = qp->rq_rinl_buf.wqe_list[ind].sg_list; - qp->rq_rinl_buf.wqe_list[ind].sge_cnt = + sge_list = qp->rq_rinl_buf.wqe_list[wqe_idx].sg_list; + qp->rq_rinl_buf.wqe_list[wqe_idx].sge_cnt = (unsigned int)wr->num_sge; for (i = 0; i < wr->num_sge; i++) { @@ -1005,9 +1003,7 @@ static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr, sge_list[i].len = wr->sg_list[i].length; } - qp->rq.wrid[ind] = wr->wr_id; - - ind = (ind + 1) & (qp->rq.wqe_cnt - 1); + qp->rq.wrid[wqe_idx] = wr->wr_id; } out: -- 2.8.1
From: Yixian Liu <liuyixian@huawei.com> This patch refactors the interface of hns_roce_u_v2_post_send, which is now very complicated. We reduce the complexity with following points: 1. Separate RC server into a function. 2. Simplify and separate the process of sge. 3. Keep the logic and consistence of all operations. Signed-off-by: Yixian Liu <liuyixian@huawei.com> Signed-off-by: Weihang Li <liweihang@hisilicon.com> --- providers/hns/hns_roce_u.h | 7 + providers/hns/hns_roce_u_hw_v2.c | 427 ++++++++++++++++----------------------- 2 files changed, 186 insertions(+), 248 deletions(-) diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h index 45472fe..9d8f72e 100644 --- a/providers/hns/hns_roce_u.h +++ b/providers/hns/hns_roce_u.h @@ -206,6 +206,13 @@ struct hns_roce_wq { int offset; }; +/* record the result of sge process */ +struct hns_roce_sge_info { + unsigned int valid_num; /* sge length is not 0 */ + unsigned int start_idx; /* start position of extend sge */ + unsigned int total_len; /* total length of valid sges */ +}; + struct hns_roce_sge_ex { int offset; unsigned int sge_cnt; diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c index 439e547..21a6ee0 100644 --- a/providers/hns/hns_roce_u_hw_v2.c +++ b/providers/hns/hns_roce_u_hw_v2.c @@ -609,28 +609,188 @@ static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited) return 0; } +static void set_sge(struct hns_roce_v2_wqe_data_seg *dseg, + struct hns_roce_qp *qp, struct ibv_send_wr *wr, + struct hns_roce_sge_info *sge_info) +{ + int i; + + for (i = 0; i < wr->num_sge; i++) + if (likely(wr->sg_list[i].length)) { + sge_info->total_len += wr->sg_list[i].length; + sge_info->valid_num++; + + /* No inner sge in UD wqe */ + if (sge_info->valid_num <= HNS_ROCE_SGE_IN_WQE && + qp->ibv_qp.qp_type != IBV_QPT_UD) { + set_data_seg_v2(dseg, wr->sg_list + i); + dseg++; + } else { + dseg = get_send_sge_ex(qp, sge_info->start_idx & + (qp->sge.sge_cnt - 1)); + set_data_seg_v2(dseg, wr->sg_list + i); + sge_info->start_idx++; + } + } +} + +static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, + int nreq, struct hns_roce_sge_info *sge_info) +{ + struct hns_roce_rc_sq_wqe *rc_sq_wqe = wqe; + struct hns_roce_v2_wqe_data_seg *dseg; + int hr_op; + int i; + + memset(rc_sq_wqe, 0, sizeof(struct hns_roce_rc_sq_wqe)); + + switch (wr->opcode) { + case IBV_WR_RDMA_READ: + hr_op = HNS_ROCE_WQE_OP_RDMA_READ; + rc_sq_wqe->va = htole64(wr->wr.rdma.remote_addr); + rc_sq_wqe->rkey = htole32(wr->wr.rdma.rkey); + break; + case IBV_WR_RDMA_WRITE: + hr_op = HNS_ROCE_WQE_OP_RDMA_WRITE; + rc_sq_wqe->va = htole64(wr->wr.rdma.remote_addr); + rc_sq_wqe->rkey = htole32(wr->wr.rdma.rkey); + break; + case IBV_WR_RDMA_WRITE_WITH_IMM: + hr_op = HNS_ROCE_WQE_OP_RDMA_WRITE_WITH_IMM; + rc_sq_wqe->va = htole64(wr->wr.rdma.remote_addr); + rc_sq_wqe->rkey = htole32(wr->wr.rdma.rkey); + rc_sq_wqe->immtdata = htole32(be32toh(wr->imm_data)); + break; + case IBV_WR_SEND: + hr_op = HNS_ROCE_WQE_OP_SEND; + break; + case IBV_WR_SEND_WITH_INV: + hr_op = HNS_ROCE_WQE_OP_SEND_WITH_INV; + rc_sq_wqe->inv_key = htole32(wr->invalidate_rkey); + break; + case IBV_WR_SEND_WITH_IMM: + hr_op = HNS_ROCE_WQE_OP_SEND_WITH_IMM; + rc_sq_wqe->immtdata = htole32(be32toh(wr->imm_data)); + break; + case IBV_WR_LOCAL_INV: + hr_op = HNS_ROCE_WQE_OP_LOCAL_INV; + roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_SO_S, 1); + rc_sq_wqe->inv_key = htole32(wr->invalidate_rkey); + break; + case IBV_WR_BIND_MW: + hr_op = HNS_ROCE_WQE_OP_BIND_MW_TYPE; + roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_MW_TYPE_S, + wr->bind_mw.mw->type - 1); + roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_ATOMIC_S, + (wr->bind_mw.bind_info.mw_access_flags & + IBV_ACCESS_REMOTE_ATOMIC) ? 1 : 0); + roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_RDMA_READ_S, + (wr->bind_mw.bind_info.mw_access_flags & + IBV_ACCESS_REMOTE_READ) ? 1 : 0); + roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_RDMA_WRITE_S, + (wr->bind_mw.bind_info.mw_access_flags & + IBV_ACCESS_REMOTE_WRITE) ? 1 : 0); + rc_sq_wqe->new_rkey = htole32(wr->bind_mw.rkey); + rc_sq_wqe->byte_16 = + htole32(wr->bind_mw.bind_info.length & 0xffffffff); + rc_sq_wqe->byte_20 = + htole32(wr->bind_mw.bind_info.length >> 32); + rc_sq_wqe->rkey = htole32(wr->bind_mw.bind_info.mr->rkey); + rc_sq_wqe->va = htole64(wr->bind_mw.bind_info.addr); + break; + case IBV_WR_ATOMIC_CMP_AND_SWP: + hr_op = HNS_ROCE_WQE_OP_ATOMIC_COM_AND_SWAP; + rc_sq_wqe->rkey = htole32(wr->wr.atomic.rkey); + rc_sq_wqe->va = htole64(wr->wr.atomic.remote_addr); + break; + case IBV_WR_ATOMIC_FETCH_AND_ADD: + hr_op = HNS_ROCE_WQE_OP_ATOMIC_FETCH_AND_ADD; + rc_sq_wqe->rkey = htole32(wr->wr.atomic.rkey); + rc_sq_wqe->va = htole64(wr->wr.atomic.remote_addr); + break; + default: + hr_op = HNS_ROCE_WQE_OP_MASK; + printf("Not supported transport opcode %d\n", wr->opcode); + return -EINVAL; + } + + roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_OPCODE_M, + RC_SQ_WQE_BYTE_4_OPCODE_S, hr_op); + + roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_CQE_S, + (wr->send_flags & IBV_SEND_SIGNALED) ? 1 : 0); + + roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_FENCE_S, + (wr->send_flags & IBV_SEND_FENCE) ? 1 : 0); + + roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_SE_S, + (wr->send_flags & IBV_SEND_SOLICITED) ? 1 : 0); + + roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_OWNER_S, + ~(((qp->sq.head + nreq) >> qp->sq.shift) & 0x1)); + + roce_set_field(rc_sq_wqe->byte_20, + RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_M, + RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_S, + sge_info->start_idx & (qp->sge.sge_cnt - 1)); + + if (wr->opcode == IBV_WR_BIND_MW) + return 0; + + wqe += sizeof(struct hns_roce_rc_sq_wqe); + dseg = wqe; + + set_sge(dseg, qp, wr, sge_info); + + rc_sq_wqe->msg_len = htole32(sge_info->total_len); + + roce_set_field(rc_sq_wqe->byte_16, RC_SQ_WQE_BYTE_16_SGE_NUM_M, + RC_SQ_WQE_BYTE_16_SGE_NUM_S, sge_info->valid_num); + + if (wr->opcode == IBV_WR_ATOMIC_FETCH_AND_ADD || + wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP) { + set_atomic_seg((struct hns_roce_wqe_atomic_seg *)dseg, wr); + return 0; + } + + if (wr->send_flags & IBV_SEND_INLINE) { + if (wr->opcode == IBV_WR_RDMA_READ) { + printf("Not supported inline data!\n"); + return -EINVAL; + } + + if (sge_info->total_len > qp->max_inline_data) { + printf("Failed to inline, data len=%d, max inline len=%d!\n", + sge_info->total_len, qp->max_inline_data); + return -EINVAL; + } + + for (i = 0; i < wr->num_sge; i++) { + memcpy(dseg, (void *)(uintptr_t)(wr->sg_list[i].addr), + wr->sg_list[i].length); + dseg = dseg + wr->sg_list[i].length; + } + roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_INLINE_S, 1); + } + + return 0; +} + int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, struct ibv_send_wr **bad_wr) { struct hns_roce_context *ctx = to_hr_ctx(ibvqp->context); struct hns_roce_qp *qp = to_hr_qp(ibvqp); - struct hns_roce_v2_wqe_data_seg *dseg; - struct hns_roce_rc_sq_wqe *rc_sq_wqe; + struct hns_roce_sge_info sge_info = {}; struct ibv_qp_attr attr; unsigned int wqe_idx; - unsigned int sge_idx; - int valid_num_sge; int attr_mask; int ret = 0; void *wqe; int nreq; - int j; - int i; pthread_spin_lock(&qp->sq.lock); - sge_idx = qp->next_sge; - /* check that state is OK to post send */ if (ibvqp->state == IBV_QPS_RESET || ibvqp->state == IBV_QPS_INIT || ibvqp->state == IBV_QPS_RTR) { @@ -639,6 +799,8 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, return EINVAL; } + sge_info.start_idx = qp->next_sge; /* start index of extend sge */ + for (nreq = 0; wr; ++nreq, wr = wr->next) { if (hns_roce_v2_wq_overflow(&qp->sq, nreq, to_hr_cq(qp->ibv_qp.send_cq))) { @@ -647,274 +809,43 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, goto out; } - wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1); - if (wr->num_sge > qp->sq.max_gs) { ret = EINVAL; *bad_wr = wr; goto out; } + wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1); wqe = get_send_wqe(qp, wqe_idx); - rc_sq_wqe = wqe; - - memset(rc_sq_wqe, 0, sizeof(struct hns_roce_rc_sq_wqe)); qp->sq.wrid[wqe_idx] = wr->wr_id; - valid_num_sge = wr->num_sge; - j = 0; - - for (i = 0; i < wr->num_sge; i++) { - if (unlikely(!wr->sg_list[i].length)) - valid_num_sge--; - - rc_sq_wqe->msg_len = - htole32(le32toh(rc_sq_wqe->msg_len) + - wr->sg_list[i].length); - } - - if (wr->opcode == IBV_WR_SEND_WITH_IMM || - wr->opcode == IBV_WR_RDMA_WRITE_WITH_IMM) - rc_sq_wqe->immtdata = htole32(be32toh(wr->imm_data)); - - roce_set_field(rc_sq_wqe->byte_16, RC_SQ_WQE_BYTE_16_SGE_NUM_M, - RC_SQ_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); - - roce_set_field(rc_sq_wqe->byte_20, - RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_S, - RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_S, - 0); - - roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_CQE_S, - (wr->send_flags & IBV_SEND_SIGNALED) ? 1 : 0); - - /* Set fence attr */ - roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_FENCE_S, - (wr->send_flags & IBV_SEND_FENCE) ? 1 : 0); - - /* Set solicited attr */ - roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_SE_S, - (wr->send_flags & IBV_SEND_SOLICITED) ? 1 : 0); - - roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_OWNER_S, - ~(((qp->sq.head + nreq) >> qp->sq.shift) & 0x1)); - - wqe += sizeof(struct hns_roce_rc_sq_wqe); - /* set remote addr segment */ switch (ibvqp->qp_type) { case IBV_QPT_RC: - switch (wr->opcode) { - case IBV_WR_RDMA_READ: - roce_set_field(rc_sq_wqe->byte_4, - RC_SQ_WQE_BYTE_4_OPCODE_M, - RC_SQ_WQE_BYTE_4_OPCODE_S, - HNS_ROCE_WQE_OP_RDMA_READ); - rc_sq_wqe->va = - htole64(wr->wr.rdma.remote_addr); - rc_sq_wqe->rkey = htole32(wr->wr.rdma.rkey); - break; - - case IBV_WR_RDMA_WRITE: - roce_set_field(rc_sq_wqe->byte_4, - RC_SQ_WQE_BYTE_4_OPCODE_M, - RC_SQ_WQE_BYTE_4_OPCODE_S, - HNS_ROCE_WQE_OP_RDMA_WRITE); - rc_sq_wqe->va = - htole64(wr->wr.rdma.remote_addr); - rc_sq_wqe->rkey = htole32(wr->wr.rdma.rkey); - break; - - case IBV_WR_RDMA_WRITE_WITH_IMM: - roce_set_field(rc_sq_wqe->byte_4, - RC_SQ_WQE_BYTE_4_OPCODE_M, - RC_SQ_WQE_BYTE_4_OPCODE_S, - HNS_ROCE_WQE_OP_RDMA_WRITE_WITH_IMM); - rc_sq_wqe->va = - htole64(wr->wr.rdma.remote_addr); - rc_sq_wqe->rkey = htole32(wr->wr.rdma.rkey); - break; - - case IBV_WR_SEND: - roce_set_field(rc_sq_wqe->byte_4, - RC_SQ_WQE_BYTE_4_OPCODE_M, - RC_SQ_WQE_BYTE_4_OPCODE_S, - HNS_ROCE_WQE_OP_SEND); - break; - case IBV_WR_SEND_WITH_INV: - roce_set_field(rc_sq_wqe->byte_4, - RC_SQ_WQE_BYTE_4_OPCODE_M, - RC_SQ_WQE_BYTE_4_OPCODE_S, - HNS_ROCE_WQE_OP_SEND_WITH_INV); - rc_sq_wqe->inv_key = - htole32(wr->invalidate_rkey); - break; - case IBV_WR_SEND_WITH_IMM: - roce_set_field(rc_sq_wqe->byte_4, - RC_SQ_WQE_BYTE_4_OPCODE_M, - RC_SQ_WQE_BYTE_4_OPCODE_S, - HNS_ROCE_WQE_OP_SEND_WITH_IMM); - break; - - case IBV_WR_LOCAL_INV: - roce_set_field(rc_sq_wqe->byte_4, - RC_SQ_WQE_BYTE_4_OPCODE_M, - RC_SQ_WQE_BYTE_4_OPCODE_S, - HNS_ROCE_WQE_OP_LOCAL_INV); - roce_set_bit(rc_sq_wqe->byte_4, - RC_SQ_WQE_BYTE_4_SO_S, 1); - rc_sq_wqe->inv_key = - htole32(wr->invalidate_rkey); - break; - case IBV_WR_ATOMIC_CMP_AND_SWP: - roce_set_field(rc_sq_wqe->byte_4, - RC_SQ_WQE_BYTE_4_OPCODE_M, - RC_SQ_WQE_BYTE_4_OPCODE_S, - HNS_ROCE_WQE_OP_ATOMIC_COM_AND_SWAP); - rc_sq_wqe->rkey = htole32(wr->wr.atomic.rkey); - rc_sq_wqe->va = - htole64(wr->wr.atomic.remote_addr); - break; - - case IBV_WR_ATOMIC_FETCH_AND_ADD: - roce_set_field(rc_sq_wqe->byte_4, - RC_SQ_WQE_BYTE_4_OPCODE_M, - RC_SQ_WQE_BYTE_4_OPCODE_S, - HNS_ROCE_WQE_OP_ATOMIC_FETCH_AND_ADD); - rc_sq_wqe->rkey = htole32(wr->wr.atomic.rkey); - rc_sq_wqe->va = - htole64(wr->wr.atomic.remote_addr); - break; - - case IBV_WR_BIND_MW: - roce_set_field(rc_sq_wqe->byte_4, - RC_SQ_WQE_BYTE_4_OPCODE_M, - RC_SQ_WQE_BYTE_4_OPCODE_S, - HNS_ROCE_WQE_OP_BIND_MW_TYPE); - roce_set_bit(rc_sq_wqe->byte_4, - RC_SQ_WQE_BYTE_4_MW_TYPE_S, - wr->bind_mw.mw->type - 1); - roce_set_bit(rc_sq_wqe->byte_4, - RC_SQ_WQE_BYTE_4_ATOMIC_S, - wr->bind_mw.bind_info.mw_access_flags & - IBV_ACCESS_REMOTE_ATOMIC ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, - RC_SQ_WQE_BYTE_4_RDMA_READ_S, - wr->bind_mw.bind_info.mw_access_flags & - IBV_ACCESS_REMOTE_READ ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, - RC_SQ_WQE_BYTE_4_RDMA_WRITE_S, - wr->bind_mw.bind_info.mw_access_flags & - IBV_ACCESS_REMOTE_WRITE ? 1 : 0); - - rc_sq_wqe->new_rkey = htole32(wr->bind_mw.rkey); - rc_sq_wqe->byte_16 = - htole32(wr->bind_mw.bind_info.length & - 0xffffffff); - rc_sq_wqe->byte_20 = - htole32(wr->bind_mw.bind_info.length >> - 32); - rc_sq_wqe->rkey = - htole32(wr->bind_mw.bind_info.mr->rkey); - rc_sq_wqe->va = - htole64(wr->bind_mw.bind_info.addr); - break; - - default: - roce_set_field(rc_sq_wqe->byte_4, - RC_SQ_WQE_BYTE_4_OPCODE_M, - RC_SQ_WQE_BYTE_4_OPCODE_S, - HNS_ROCE_WQE_OP_MASK); - printf("Not supported transport opcode %d\n", - wr->opcode); - break; + ret = set_rc_wqe(wqe, qp, wr, nreq, &sge_info); + if (ret) { + *bad_wr = wr; + goto out; } - break; case IBV_QPT_UC: case IBV_QPT_UD: default: - break; - } - - dseg = wqe; - if (wr->opcode == IBV_WR_ATOMIC_FETCH_AND_ADD || - wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP) { - set_data_seg_v2(dseg, wr->sg_list); - wqe += sizeof(struct hns_roce_v2_wqe_data_seg); - set_atomic_seg(wqe, wr); - } else if (wr->send_flags & IBV_SEND_INLINE && valid_num_sge) { - if (le32toh(rc_sq_wqe->msg_len) > qp->max_inline_data) { - ret = EINVAL; - *bad_wr = wr; - printf("data len=%d, send_flags = 0x%x!\r\n", - rc_sq_wqe->msg_len, wr->send_flags); - goto out; - } - - if (wr->opcode == IBV_WR_RDMA_READ) { - ret = EINVAL; - *bad_wr = wr; - printf("Not supported inline data!\n"); - goto out; - } - - for (i = 0; i < wr->num_sge; i++) { - memcpy(wqe, - ((void *) (uintptr_t) wr->sg_list[i].addr), - wr->sg_list[i].length); - wqe = wqe + wr->sg_list[i].length; - } - - roce_set_bit(rc_sq_wqe->byte_4, - RC_SQ_WQE_BYTE_4_INLINE_S, 1); - } else { - /* set sge */ - if (valid_num_sge <= HNS_ROCE_SGE_IN_WQE) { - for (i = 0; i < wr->num_sge; i++) - if (likely(wr->sg_list[i].length)) { - set_data_seg_v2(dseg, - wr->sg_list + i); - dseg++; - } - } else { - roce_set_field(rc_sq_wqe->byte_20, - RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_M, - RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_S, - sge_idx & (qp->sge.sge_cnt - 1)); - - for (i = 0; i < wr->num_sge && j < 2; i++) - if (likely(wr->sg_list[i].length)) { - set_data_seg_v2(dseg, - wr->sg_list + i); - dseg++; - j++; - } - - for (; i < wr->num_sge; i++) { - if (likely(wr->sg_list[i].length)) { - dseg = get_send_sge_ex(qp, - sge_idx & - (qp->sge.sge_cnt - 1)); - set_data_seg_v2(dseg, - wr->sg_list + i); - sge_idx++; - } - } - } + ret = -EINVAL; + *bad_wr = wr; + goto out; } } out: if (likely(nreq)) { qp->sq.head += nreq; + qp->next_sge = sge_info.start_idx; hns_roce_update_sq_db(ctx, qp->ibv_qp.qp_num, qp->sl, qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1)); if (qp->flags & HNS_ROCE_SUPPORT_SQ_RECORD_DB) *(qp->sdb) = qp->sq.head & 0xffff; - - qp->next_sge = sge_idx; } pthread_spin_unlock(&qp->sq.lock); -- 2.8.1
From: Lijun Ou <oulijun@huawei.com> User Application can use Unreliable Datagram on hip08 now. Signed-off-by: Lijun Ou <oulijun@huawei.com> Signed-off-by: Weihang Li <liweihang@hisilicon.com> --- providers/hns/hns_roce_u_hw_v2.c | 90 ++++++++++++++++++++++++++++++++++++++- providers/hns/hns_roce_u_hw_v2.h | 91 ++++++++++++++++++++++++++++++++++++++++ providers/hns/hns_roce_u_verbs.c | 4 ++ 3 files changed, 184 insertions(+), 1 deletion(-) diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c index 21a6ee0..dd7545f 100644 --- a/providers/hns/hns_roce_u_hw_v2.c +++ b/providers/hns/hns_roce_u_hw_v2.c @@ -634,6 +634,92 @@ static void set_sge(struct hns_roce_v2_wqe_data_seg *dseg, } } +static void set_ud_wqe(void *wqe, struct hns_roce_qp *qp, + struct ibv_send_wr *wr, int nreq, + struct hns_roce_sge_info *sge_info) +{ + struct hns_roce_ah *ah = to_hr_ah(wr->wr.ud.ah); + struct hns_roce_ud_sq_wqe *ud_sq_wqe = wqe; + + memset(ud_sq_wqe, 0, sizeof(*ud_sq_wqe)); + + roce_set_field(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_OPCODE_M, + UD_SQ_WQE_OPCODE_S, HNS_ROCE_WQE_OP_SEND); + roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_CQE_S, + (wr->send_flags & IBV_SEND_SIGNALED) ? 1 : 0); + roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_SE_S, + (wr->send_flags & IBV_SEND_SOLICITED) ? 1 : 0); + roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_OWNER_S, + ~(((qp->sq.head + nreq) >> qp->sq.shift) & 0x1)); + + ud_sq_wqe->immtdata = htole32(be32toh(wr->imm_data)); + + roce_set_field(ud_sq_wqe->sge_num_pd, UD_SQ_WQE_PD_M, + UD_SQ_WQE_PD_S, to_hr_pd(qp->ibv_qp.pd)->pdn); + + roce_set_field(ud_sq_wqe->rsv_msg_start_sge_idx, + UD_SQ_WQE_MSG_START_SGE_IDX_M, + UD_SQ_WQE_MSG_START_SGE_IDX_S, + sge_info->start_idx & (qp->sge.sge_cnt - 1)); + + roce_set_field(ud_sq_wqe->udpspn_rsv, UD_SQ_WQE_UDP_SPN_M, + UD_SQ_WQE_UDP_SPN_S, 0); + + ud_sq_wqe->qkey = htole32(wr->wr.ud.remote_qkey); + + roce_set_field(ud_sq_wqe->rsv_dqpn, UD_SQ_WQE_DQPN_M, + UD_SQ_WQE_DQPN_S, wr->wr.ud.remote_qpn); + + roce_set_field(ud_sq_wqe->tclass_vlan, UD_SQ_WQE_VLAN_M, + UD_SQ_WQE_VLAN_S, ah->av.vlan_id); + + roce_set_field(ud_sq_wqe->tclass_vlan, UD_SQ_WQE_TCLASS_M, + UD_SQ_WQE_TCLASS_S, ah->av.tclass); + + roce_set_field(ud_sq_wqe->tclass_vlan, UD_SQ_WQE_HOPLIMIT_M, + UD_SQ_WQE_HOPLIMIT_S, ah->av.hop_limit); + + roce_set_field(ud_sq_wqe->lbi_flow_label, UD_SQ_WQE_FLOW_LABEL_M, + UD_SQ_WQE_FLOW_LABEL_S, ah->av.flowlabel); + + roce_set_bit(ud_sq_wqe->lbi_flow_label, UD_SQ_WQE_VLAN_EN_S, + ah->av.vlan_en ? 1 : 0); + + roce_set_bit(ud_sq_wqe->lbi_flow_label, UD_SQ_WQE_LBI_S, 0); + + roce_set_field(ud_sq_wqe->lbi_flow_label, UD_SQ_WQE_SL_M, + UD_SQ_WQE_SL_S, ah->av.sl); + + roce_set_field(ud_sq_wqe->lbi_flow_label, UD_SQ_WQE_PORTN_M, + UD_SQ_WQE_PORTN_S, qp->ibv_qp.qp_num); + + roce_set_field(ud_sq_wqe->dmac, UD_SQ_WQE_DMAC_0_M, + UD_SQ_WQE_DMAC_0_S, ah->av.mac[0]); + roce_set_field(ud_sq_wqe->dmac, UD_SQ_WQE_DMAC_1_M, + UD_SQ_WQE_DMAC_1_S, ah->av.mac[1]); + roce_set_field(ud_sq_wqe->dmac, UD_SQ_WQE_DMAC_2_M, + UD_SQ_WQE_DMAC_2_S, ah->av.mac[2]); + roce_set_field(ud_sq_wqe->dmac, UD_SQ_WQE_DMAC_3_M, + UD_SQ_WQE_DMAC_3_S, ah->av.mac[3]); + roce_set_field(ud_sq_wqe->smac_index_dmac, + UD_SQ_WQE_DMAC_4_M, UD_SQ_WQE_DMAC_4_S, + to_hr_ah(wr->wr.ud.ah)->av.mac[4]); + roce_set_field(ud_sq_wqe->smac_index_dmac, + UD_SQ_WQE_DMAC_5_M, UD_SQ_WQE_DMAC_5_S, + to_hr_ah(wr->wr.ud.ah)->av.mac[5]); + roce_set_field(ud_sq_wqe->smac_index_dmac, UD_SQ_WQE_SGID_IDX_M, + UD_SQ_WQE_SGID_IDX_S, ah->av.gid_index); + + memcpy(ud_sq_wqe->dgid, ah->av.dgid, HNS_ROCE_GID_SIZE); + + set_sge(NULL, qp, wr, sge_info); + + ud_sq_wqe->msg_len = htole32(sge_info->total_len); + + roce_set_field(ud_sq_wqe->sge_num_pd, UD_SQ_WQE_SGE_NUM_M, + UD_SQ_WQE_SGE_NUM_S, sge_info->valid_num); +} + static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, int nreq, struct hns_roce_sge_info *sge_info) { @@ -827,8 +913,10 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, goto out; } break; - case IBV_QPT_UC: case IBV_QPT_UD: + set_ud_wqe(wqe, qp, wr, nreq, &sge_info); + break; + case IBV_QPT_UC: default: ret = -EINVAL; *bad_wr = wr; diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h index 3eca1fd..84cf6c4 100644 --- a/providers/hns/hns_roce_u_hw_v2.h +++ b/providers/hns/hns_roce_u_hw_v2.h @@ -291,4 +291,95 @@ struct hns_roce_wqe_atomic_seg { int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, struct ibv_send_wr **bad_wr); +struct hns_roce_ud_sq_wqe { + __le32 rsv_opcode; + __le32 msg_len; + __le32 immtdata; + __le32 sge_num_pd; + __le32 rsv_msg_start_sge_idx; + __le32 udpspn_rsv; + __le32 qkey; + __le32 rsv_dqpn; + __le32 tclass_vlan; + __le32 lbi_flow_label; + __le32 dmac; + __le32 smac_index_dmac; + uint8_t dgid[HNS_ROCE_GID_SIZE]; +}; + +#define UD_SQ_WQE_OPCODE_S 0 +#define UD_SQ_WQE_OPCODE_M (((1UL << 5) - 1) << UD_SQ_WQE_OPCODE_S) + +#define UD_SQ_WQE_OWNER_S 7 + +#define UD_SQ_WQE_CQE_S 8 + +#define UD_SQ_WQE_SE_S 11 + +#define UD_SQ_WQE_PD_S 0 +#define UD_SQ_WQE_PD_M (((1UL << 24) - 1) << UD_SQ_WQE_PD_S) + +#define UD_SQ_WQE_SGE_NUM_S 24 +#define UD_SQ_WQE_SGE_NUM_M (((1UL << 8) - 1) << UD_SQ_WQE_SGE_NUM_S) + +#define UD_SQ_WQE_MSG_START_SGE_IDX_S 0 +#define UD_SQ_WQE_MSG_START_SGE_IDX_M \ + (((1UL << 24) - 1) << UD_SQ_WQE_MSG_START_SGE_IDX_S) + +#define UD_SQ_WQE_UDP_SPN_S 16 +#define UD_SQ_WQE_UDP_SPN_M \ + (((1UL << 16) - 1) << UD_SQ_WQE_UDP_SPN_S) + +#define UD_SQ_WQE_DQPN_S 0 +#define UD_SQ_WQE_DQPN_M (((1UL << 24) - 1) << UD_SQ_WQE_DQPN_S) + +#define UD_SQ_WQE_VLAN_S 0 +#define UD_SQ_WQE_VLAN_M (((1UL << 16) - 1) << UD_SQ_WQE_VLAN_S) + +#define UD_SQ_WQE_HOPLIMIT_S 16 +#define UD_SQ_WQE_HOPLIMIT_M (((1UL << 8) - 1) << UD_SQ_WQE_HOPLIMIT_S) + +#define UD_SQ_WQE_TCLASS_S 24 +#define UD_SQ_WQE_TCLASS_M (((1UL << 8) - 1) << UD_SQ_WQE_TCLASS_S) + +#define UD_SQ_WQE_FLOW_LABEL_S 0 +#define UD_SQ_WQE_FLOW_LABEL_M (((1UL << 20) - 1) << UD_SQ_WQE_FLOW_LABEL_S) + +#define UD_SQ_WQE_SL_S 20 +#define UD_SQ_WQE_SL_M (((1UL << 4) - 1) << UD_SQ_WQE_SL_S) + +#define UD_SQ_WQE_PORTN_S 24 +#define UD_SQ_WQE_PORTN_M (((1UL << 3) - 1) << UD_SQ_WQE_PORTN_S) + +#define UD_SQ_WQE_VLAN_EN_S 30 + +#define UD_SQ_WQE_LBI_S 31 + +#define UD_SQ_WQE_PORTN_S 24 +#define UD_SQ_WQE_PORTN_M (((1UL << 3) - 1) << UD_SQ_WQE_PORTN_S) + +#define UD_SQ_WQE_DMAC_0_S 0 +#define UD_SQ_WQE_DMAC_0_M (((1UL << 8) - 1) << UD_SQ_WQE_DMAC_0_S) + +#define UD_SQ_WQE_DMAC_1_S 8 +#define UD_SQ_WQE_DMAC_1_M (((1UL << 8) - 1) << UD_SQ_WQE_DMAC_1_S) + +#define UD_SQ_WQE_DMAC_2_S 16 +#define UD_SQ_WQE_DMAC_2_M (((1UL << 8) - 1) << UD_SQ_WQE_DMAC_2_S) + +#define UD_SQ_WQE_DMAC_3_S 24 +#define UD_SQ_WQE_DMAC_3_M (((1UL << 8) - 1) << UD_SQ_WQE_DMAC_3_S) + +#define UD_SQ_WQE_DMAC_4_S 0 +#define UD_SQ_WQE_DMAC_4_M (((1UL << 8) - 1) << UD_SQ_WQE_DMAC_4_S) + +#define UD_SQ_WQE_DMAC_5_S 8 +#define UD_SQ_WQE_DMAC_5_M (((1UL << 8) - 1) << UD_SQ_WQE_DMAC_5_S) + +#define UD_SQ_WQE_SGID_IDX_S 16 +#define UD_SQ_WQE_SGID_IDX_M (((1UL << 8) - 1) << UD_SQ_WQE_SGID_IDX_S) + +#define UD_SQ_WQE_SMAC_IDX_S 24 +#define UD_SQ_WQE_SMAC_IDX_M (((1UL << 8) - 1) << UD_SQ_WQE_SMAC_IDX_S) + #endif /* _HNS_ROCE_U_HW_V2_H */ diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c index 0d10b1c..5255fff 100644 --- a/providers/hns/hns_roce_u_verbs.c +++ b/providers/hns/hns_roce_u_verbs.c @@ -767,6 +767,10 @@ static void hns_roce_set_qp_params(struct ibv_pd *pd, } } + if (attr->qp_type == IBV_QPT_UD) + qp->sge.sge_cnt = roundup_pow_of_two(qp->sq.wqe_cnt * + qp->sq.max_gs); + /* limit by the context queried during alloc context */ qp->rq.max_post = min(ctx->max_qp_wr, qp->rq.wqe_cnt); qp->sq.max_post = min(ctx->max_qp_wr, qp->sq.wqe_cnt); -- 2.8.1
User can configure whether hardware working on loopback mode or not by export an environment variable "HNS_ROCE_LOOPBACK". Signed-off-by: Weihang Li <liweihang@hisilicon.com> --- providers/hns/hns_roce_u.c | 15 +++++++++++++++ providers/hns/hns_roce_u.h | 2 ++ providers/hns/hns_roce_u_hw_v2.c | 3 ++- providers/hns/hns_roce_u_verbs.c | 1 + 4 files changed, 20 insertions(+), 1 deletion(-) diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c index 8ba41de..f68c84b 100644 --- a/providers/hns/hns_roce_u.c +++ b/providers/hns/hns_roce_u.c @@ -40,6 +40,8 @@ #include "hns_roce_u.h" #include "hns_roce_u_abi.h" +bool loopback; + #define HID_LEN 15 #define DEV_MATCH_LEN 128 @@ -85,6 +87,17 @@ static const struct verbs_context_ops hns_common_ops = { .destroy_ah = hns_roce_u_destroy_ah, }; +static bool get_param_config(const char *var) +{ + char *env; + + env = getenv(var); + if (env) + return strcmp(env, "true") ? true : false; + + return false; +} + static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, int cmd_fd, void *private_data) @@ -105,6 +118,8 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, &resp.ibv_resp, sizeof(resp))) goto err_free; + loopback = get_param_config("HNS_ROCE_LOOPBACK"); + context->num_qps = resp.qp_tab_size; context->qp_table_shift = ffs(context->num_qps) - 1 - HNS_ROCE_QP_TABLE_BITS; diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h index 9d8f72e..624bdaa 100644 --- a/providers/hns/hns_roce_u.h +++ b/providers/hns/hns_roce_u.h @@ -43,6 +43,7 @@ #include <ccan/bitmap.h> #include <ccan/container_of.h> +extern bool loopback; #define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6') #define HNS_ROCE_HW_VER2 ('h' << 24 | 'i' << 16 | '0' << 8 | '8') @@ -265,6 +266,7 @@ struct hns_roce_av { uint8_t mac[ETH_ALEN]; uint16_t vlan_id; uint8_t vlan_en; + uint8_t loopback; }; struct hns_roce_ah { diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c index dd7545f..62e8561 100644 --- a/providers/hns/hns_roce_u_hw_v2.c +++ b/providers/hns/hns_roce_u_hw_v2.c @@ -685,7 +685,8 @@ static void set_ud_wqe(void *wqe, struct hns_roce_qp *qp, roce_set_bit(ud_sq_wqe->lbi_flow_label, UD_SQ_WQE_VLAN_EN_S, ah->av.vlan_en ? 1 : 0); - roce_set_bit(ud_sq_wqe->lbi_flow_label, UD_SQ_WQE_LBI_S, 0); + roce_set_bit(ud_sq_wqe->lbi_flow_label, UD_SQ_WQE_LBI_S, + ah->av.loopback); roce_set_field(ud_sq_wqe->lbi_flow_label, UD_SQ_WQE_SL_M, UD_SQ_WQE_SL_S, ah->av.sl); diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c index 5255fff..ff24ae3 100644 --- a/providers/hns/hns_roce_u_verbs.c +++ b/providers/hns/hns_roce_u_verbs.c @@ -970,6 +970,7 @@ struct ibv_ah *hns_roce_u_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) ah->av.port = attr->port_num; ah->av.sl = attr->sl; + ah->av.loopback = loopback ? 1 : 0; if (attr->static_rate) ah->av.static_rate = IBV_RATE_10_GBPS; -- 2.8.1
On Tue, Sep 10, 2019 at 08:20:50PM +0800, Weihang Li wrote:
> From: Yixian Liu <liuyixian@huawei.com>
>
> This patch refactors the interface of hns_roce_u_v2_post_send, which
> is now very complicated. We reduce the complexity with following points:
> 1. Separate RC server into a function.
> 2. Simplify and separate the process of sge.
> 3. Keep the logic and consistence of all operations.
>
> Signed-off-by: Yixian Liu <liuyixian@huawei.com>
> Signed-off-by: Weihang Li <liweihang@hisilicon.com>
> ---
> providers/hns/hns_roce_u.h | 7 +
> providers/hns/hns_roce_u_hw_v2.c | 427 ++++++++++++++++-----------------------
> 2 files changed, 186 insertions(+), 248 deletions(-)
>
No printf() calls in the providers code, please.
Thanks
On Tue, Sep 10, 2019 at 08:20:52PM +0800, Weihang Li wrote:
> User can configure whether hardware working on loopback mode or not
> by export an environment variable "HNS_ROCE_LOOPBACK".
It is definitely wrong interface to configure behaviour of application.
Environment variables make sense if you need to change library
behaviour.
Thanks
> -----Original Message-----
> From: Leon Romanovsky [mailto:leon@kernel.org]
> Sent: Wednesday, September 11, 2019 3:36 PM
> To: liweihang <liweihang@hisilicon.com>
> Cc: dledford@redhat.com; jgg@ziepe.ca; linux-rdma@vger.kernel.org;
> Linuxarm <linuxarm@huawei.com>
> Subject: Re: [PATCH rdma-core 3/5] libhns: Refactor for post send
>
> On Tue, Sep 10, 2019 at 08:20:50PM +0800, Weihang Li wrote:
> > From: Yixian Liu <liuyixian@huawei.com>
> >
> > This patch refactors the interface of hns_roce_u_v2_post_send, which
> > is now very complicated. We reduce the complexity with following points:
> > 1. Separate RC server into a function.
> > 2. Simplify and separate the process of sge.
> > 3. Keep the logic and consistence of all operations.
> >
> > Signed-off-by: Yixian Liu <liuyixian@huawei.com>
> > Signed-off-by: Weihang Li <liweihang@hisilicon.com>
> > ---
> > providers/hns/hns_roce_u.h | 7 +
> > providers/hns/hns_roce_u_hw_v2.c | 427
> > ++++++++++++++++-----------------------
> > 2 files changed, 186 insertions(+), 248 deletions(-)
> >
>
> No printf() calls in the providers code, please.
>
> Thanks
OK, will modify them, thank you.
> -----Original Message-----
> From: Leon Romanovsky [mailto:leon@kernel.org]
> Sent: Wednesday, September 11, 2019 3:42 PM
> To: liweihang <liweihang@hisilicon.com>
> Cc: dledford@redhat.com; jgg@ziepe.ca; linux-rdma@vger.kernel.org;
> Linuxarm <linuxarm@huawei.com>
> Subject: Re: [PATCH rdma-core 5/5] libhns: Support configuring loopback
> mode by user
>
> On Tue, Sep 10, 2019 at 08:20:52PM +0800, Weihang Li wrote:
> > User can configure whether hardware working on loopback mode or not by
> > export an environment variable "HNS_ROCE_LOOPBACK".
>
> It is definitely wrong interface to configure behaviour of application.
> Environment variables make sense if you need to change library behaviour.
>
> Thanks
Hi Leon,
Could you please give some advices on how to get configurations from users?
Thanks,
Weihang