linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH for-next] RDMA/hns: Add support for extended atomic
@ 2019-11-15  1:39 Weihang Li
  2019-12-10 13:10 ` Weihang Li
  2020-01-02 21:03 ` Jason Gunthorpe
  0 siblings, 2 replies; 9+ messages in thread
From: Weihang Li @ 2019-11-15  1:39 UTC (permalink / raw)
  To: dledford, jgg; +Cc: linux-rdma, linuxarm

From: Jiaran Zhang <zhangjiaran@huawei.com>

Support extended atomic operations including cmp & swap and fetch & add
of 8 bytes, 16 bytes, 32 bytes, 64 bytes on hip08.

Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
Signed-off-by: Weihang Li <liweihang@hisilicon.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 100 ++++++++++++++++++++++++-----
 drivers/infiniband/hw/hns/hns_roce_hw_v2.h |   8 +++
 2 files changed, 93 insertions(+), 15 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 907c951..74ccb08 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -97,18 +97,68 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
 		     V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0);
 }
 
-static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg,
-			   const struct ib_atomic_wr *wr)
+static void set_extend_atomic_seg(struct hns_roce_qp *qp,
+				  u32 ex_sge_num, unsigned int *sge_idx,
+				  u64 *data_addr)
 {
-	if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
-		aseg->fetchadd_swap_data = cpu_to_le64(wr->swap);
-		aseg->cmp_data  = cpu_to_le64(wr->compare_add);
-	} else {
-		aseg->fetchadd_swap_data = cpu_to_le64(wr->compare_add);
-		aseg->cmp_data  = 0;
+	__le64 *ext_seg;
+	int i;
+
+	for (i = 0; i < ex_sge_num; i += EXT_SGE_BYTE_8_NUM, (*sge_idx)++) {
+		ext_seg = get_send_extend_sge(qp, ((*sge_idx) &
+					      (qp->sge.sge_cnt - 1)));
+		/* In the extended atomic scenario, the data_add parameter
+		 * passes the address where the extended atomic data is stored.
+		 */
+		*ext_seg = data_addr ? cpu_to_le64(*(data_addr + i)) : 0;
+		*(ext_seg + 1) = data_addr ?
+				 cpu_to_le64(*(data_addr + (i + 1))) : 0;
 	}
 }
 
+static int set_atomic_seg(struct hns_roce_qp *qp,
+			  const struct ib_send_wr *wr, unsigned int msg_len,
+			  void *dseg, unsigned int *sge_idx)
+{
+	struct hns_roce_wqe_atomic_seg *aseg;
+	u32 ex_sge_num;
+
+	dseg += sizeof(struct hns_roce_v2_wqe_data_seg);
+	aseg = dseg;
+
+	if (msg_len == STANDARD_ATOMIC_BYTE_8) {
+		if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+			aseg->fetchadd_swap_data =
+				cpu_to_le64(atomic_wr(wr)->swap);
+			aseg->cmp_data =
+				cpu_to_le64(atomic_wr(wr)->compare_add);
+		} else {
+			aseg->fetchadd_swap_data =
+				cpu_to_le64(atomic_wr(wr)->compare_add);
+			aseg->cmp_data = 0;
+		}
+	} else if (msg_len == EXTEND_ATOMIC_BYTE_16 ||
+		   msg_len == EXTEND_ATOMIC_BYTE_32 ||
+		   msg_len == EXTEND_ATOMIC_BYTE_64) {
+		ex_sge_num = msg_len >> 3;
+		aseg->fetchadd_swap_data = 0;
+		aseg->cmp_data = 0;
+		if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+			set_extend_atomic_seg(qp, ex_sge_num, sge_idx,
+					(u64 *)atomic_wr(wr)->swap);
+			set_extend_atomic_seg(qp, ex_sge_num, sge_idx,
+					(u64 *)atomic_wr(wr)->compare_add);
+		} else {
+			set_extend_atomic_seg(qp, ex_sge_num, sge_idx,
+					(u64 *)atomic_wr(wr)->compare_add);
+			set_extend_atomic_seg(qp, ex_sge_num, sge_idx, 0);
+		}
+	} else
+		return -EINVAL;
+
+	return 0;
+}
+
 static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
 			   unsigned int *sge_ind)
 {
@@ -545,8 +595,12 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 
 				dseg = wqe;
 				set_data_seg_v2(dseg, wr->sg_list);
-				wqe += sizeof(struct hns_roce_v2_wqe_data_seg);
-				set_atomic_seg(wqe, atomic_wr(wr));
+				ret = set_atomic_seg(qp, wr, rc_sq_wqe->msg_len,
+						     dseg, &sge_idx);
+				if (ret) {
+					*bad_wr = wr;
+					goto out;
+				}
 				roce_set_field(rc_sq_wqe->byte_16,
 					       V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
 					       V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
@@ -1668,7 +1722,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
 	caps->max_srq_desc_sz	= HNS_ROCE_V2_MAX_SRQ_DESC_SZ;
 	caps->qpc_entry_sz	= HNS_ROCE_V2_QPC_ENTRY_SZ;
 	caps->irrl_entry_sz	= HNS_ROCE_V2_IRRL_ENTRY_SZ;
-	caps->trrl_entry_sz	= HNS_ROCE_V2_TRRL_ENTRY_SZ;
+	caps->trrl_entry_sz	= HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ;
 	caps->cqc_entry_sz	= HNS_ROCE_V2_CQC_ENTRY_SZ;
 	caps->srqc_entry_sz	= HNS_ROCE_V2_SRQC_ENTRY_SZ;
 	caps->mtpt_entry_sz	= HNS_ROCE_V2_MTPT_ENTRY_SZ;
@@ -2860,19 +2914,19 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
 			break;
 		case HNS_ROCE_SQ_OPCODE_ATOMIC_COMP_AND_SWAP:
 			wc->opcode = IB_WC_COMP_SWAP;
-			wc->byte_len  = 8;
+			wc->byte_len  = le32_to_cpu(cqe->byte_cnt);
 			break;
 		case HNS_ROCE_SQ_OPCODE_ATOMIC_FETCH_AND_ADD:
 			wc->opcode = IB_WC_FETCH_ADD;
-			wc->byte_len  = 8;
+			wc->byte_len  = le32_to_cpu(cqe->byte_cnt);
 			break;
 		case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_COMP_AND_SWAP:
 			wc->opcode = IB_WC_MASKED_COMP_SWAP;
-			wc->byte_len  = 8;
+			wc->byte_len  = le32_to_cpu(cqe->byte_cnt);
 			break;
 		case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_FETCH_AND_ADD:
 			wc->opcode = IB_WC_MASKED_FETCH_ADD;
-			wc->byte_len  = 8;
+			wc->byte_len  = le32_to_cpu(cqe->byte_cnt);
 			break;
 		case HNS_ROCE_SQ_OPCODE_FAST_REG_WR:
 			wc->opcode = IB_WC_REG_MR;
@@ -3211,6 +3265,9 @@ static void set_access_flags(struct hns_roce_qp *hr_qp,
 	roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
 		     !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
 	roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0);
+	roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S,
+		     !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
+	roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S, 0);
 }
 
 static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp,
@@ -3578,6 +3635,12 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
 			     IB_ACCESS_REMOTE_ATOMIC));
 		roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
 			     0);
+		roce_set_bit(context->byte_76_srqn_op_en,
+			     V2_QPC_BYTE_76_EXT_ATE_S,
+			     !!(attr->qp_access_flags &
+				IB_ACCESS_REMOTE_ATOMIC));
+		roce_set_bit(qpc_mask->byte_76_srqn_op_en,
+			     V2_QPC_BYTE_76_EXT_ATE_S, 0);
 	} else {
 		roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S,
 			     !!(hr_qp->access_flags & IB_ACCESS_REMOTE_READ));
@@ -3593,6 +3656,13 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
 			     !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC));
 		roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
 			     0);
+
+		roce_set_bit(context->byte_76_srqn_op_en,
+			     V2_QPC_BYTE_76_EXT_ATE_S,
+			     !!(hr_qp->access_flags &
+				IB_ACCESS_REMOTE_ATOMIC));
+		roce_set_bit(qpc_mask->byte_76_srqn_op_en,
+			     V2_QPC_BYTE_76_EXT_ATE_S, 0);
 	}
 
 	roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 76a14db..0a9d1e5 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -81,6 +81,7 @@
 #define HNS_ROCE_V2_QPC_ENTRY_SZ		256
 #define HNS_ROCE_V2_IRRL_ENTRY_SZ		64
 #define HNS_ROCE_V2_TRRL_ENTRY_SZ		48
+#define HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ	100
 #define HNS_ROCE_V2_CQC_ENTRY_SZ		64
 #define HNS_ROCE_V2_SRQC_ENTRY_SZ		64
 #define HNS_ROCE_V2_MTPT_ENTRY_SZ		64
@@ -158,6 +159,12 @@ enum {
 
 #define HNS_ROCE_V2_CQE_QPN_MASK		0x3ffff
 
+#define EXT_SGE_BYTE_8_NUM	2
+#define STANDARD_ATOMIC_BYTE_8	0x8
+#define EXTEND_ATOMIC_BYTE_16	0x10
+#define EXTEND_ATOMIC_BYTE_32	0x20
+#define EXTEND_ATOMIC_BYTE_64	0x40
+
 enum {
 	HNS_ROCE_V2_WQE_OP_SEND				= 0x0,
 	HNS_ROCE_V2_WQE_OP_SEND_WITH_INV		= 0x1,
@@ -644,6 +651,7 @@ struct hns_roce_v2_qp_context {
 
 #define	V2_QPC_BYTE_76_RQIE_S 28
 
+#define	V2_QPC_BYTE_76_EXT_ATE_S 29
 #define	V2_QPC_BYTE_76_RQ_VLAN_EN_S 30
 #define	V2_QPC_BYTE_80_RX_CQN_S 0
 #define V2_QPC_BYTE_80_RX_CQN_M GENMASK(23, 0)
-- 
2.8.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-01-15  1:30 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-15  1:39 [PATCH for-next] RDMA/hns: Add support for extended atomic Weihang Li
2019-12-10 13:10 ` Weihang Li
2019-12-19 19:07   ` Jason Gunthorpe
2020-01-02 21:03 ` Jason Gunthorpe
2020-01-03  5:57   ` Weihang Li
2020-01-03 13:39     ` Jason Gunthorpe
2020-01-14  1:53       ` Weihang Li
2020-01-14 13:29         ` Jason Gunthorpe
2020-01-15  1:30           ` Weihang Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).