Linux-RDMA Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH for-next] RDMA/hns: Add support for extended atomic
@ 2019-11-15  1:39 Weihang Li
  2019-12-10 13:10 ` Weihang Li
  2020-01-02 21:03 ` Jason Gunthorpe
  0 siblings, 2 replies; 9+ messages in thread
From: Weihang Li @ 2019-11-15  1:39 UTC (permalink / raw)
  To: dledford, jgg; +Cc: linux-rdma, linuxarm

From: Jiaran Zhang <zhangjiaran@huawei.com>

Support extended atomic operations including cmp & swap and fetch & add
of 8 bytes, 16 bytes, 32 bytes, 64 bytes on hip08.

Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
Signed-off-by: Weihang Li <liweihang@hisilicon.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 100 ++++++++++++++++++++++++-----
 drivers/infiniband/hw/hns/hns_roce_hw_v2.h |   8 +++
 2 files changed, 93 insertions(+), 15 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 907c951..74ccb08 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -97,18 +97,68 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
 		     V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0);
 }
 
-static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg,
-			   const struct ib_atomic_wr *wr)
+static void set_extend_atomic_seg(struct hns_roce_qp *qp,
+				  u32 ex_sge_num, unsigned int *sge_idx,
+				  u64 *data_addr)
 {
-	if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
-		aseg->fetchadd_swap_data = cpu_to_le64(wr->swap);
-		aseg->cmp_data  = cpu_to_le64(wr->compare_add);
-	} else {
-		aseg->fetchadd_swap_data = cpu_to_le64(wr->compare_add);
-		aseg->cmp_data  = 0;
+	__le64 *ext_seg;
+	int i;
+
+	for (i = 0; i < ex_sge_num; i += EXT_SGE_BYTE_8_NUM, (*sge_idx)++) {
+		ext_seg = get_send_extend_sge(qp, ((*sge_idx) &
+					      (qp->sge.sge_cnt - 1)));
+		/* In the extended atomic scenario, the data_add parameter
+		 * passes the address where the extended atomic data is stored.
+		 */
+		*ext_seg = data_addr ? cpu_to_le64(*(data_addr + i)) : 0;
+		*(ext_seg + 1) = data_addr ?
+				 cpu_to_le64(*(data_addr + (i + 1))) : 0;
 	}
 }
 
+static int set_atomic_seg(struct hns_roce_qp *qp,
+			  const struct ib_send_wr *wr, unsigned int msg_len,
+			  void *dseg, unsigned int *sge_idx)
+{
+	struct hns_roce_wqe_atomic_seg *aseg;
+	u32 ex_sge_num;
+
+	dseg += sizeof(struct hns_roce_v2_wqe_data_seg);
+	aseg = dseg;
+
+	if (msg_len == STANDARD_ATOMIC_BYTE_8) {
+		if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+			aseg->fetchadd_swap_data =
+				cpu_to_le64(atomic_wr(wr)->swap);
+			aseg->cmp_data =
+				cpu_to_le64(atomic_wr(wr)->compare_add);
+		} else {
+			aseg->fetchadd_swap_data =
+				cpu_to_le64(atomic_wr(wr)->compare_add);
+			aseg->cmp_data = 0;
+		}
+	} else if (msg_len == EXTEND_ATOMIC_BYTE_16 ||
+		   msg_len == EXTEND_ATOMIC_BYTE_32 ||
+		   msg_len == EXTEND_ATOMIC_BYTE_64) {
+		ex_sge_num = msg_len >> 3;
+		aseg->fetchadd_swap_data = 0;
+		aseg->cmp_data = 0;
+		if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+			set_extend_atomic_seg(qp, ex_sge_num, sge_idx,
+					(u64 *)atomic_wr(wr)->swap);
+			set_extend_atomic_seg(qp, ex_sge_num, sge_idx,
+					(u64 *)atomic_wr(wr)->compare_add);
+		} else {
+			set_extend_atomic_seg(qp, ex_sge_num, sge_idx,
+					(u64 *)atomic_wr(wr)->compare_add);
+			set_extend_atomic_seg(qp, ex_sge_num, sge_idx, 0);
+		}
+	} else
+		return -EINVAL;
+
+	return 0;
+}
+
 static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
 			   unsigned int *sge_ind)
 {
@@ -545,8 +595,12 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 
 				dseg = wqe;
 				set_data_seg_v2(dseg, wr->sg_list);
-				wqe += sizeof(struct hns_roce_v2_wqe_data_seg);
-				set_atomic_seg(wqe, atomic_wr(wr));
+				ret = set_atomic_seg(qp, wr, rc_sq_wqe->msg_len,
+						     dseg, &sge_idx);
+				if (ret) {
+					*bad_wr = wr;
+					goto out;
+				}
 				roce_set_field(rc_sq_wqe->byte_16,
 					       V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
 					       V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
@@ -1668,7 +1722,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
 	caps->max_srq_desc_sz	= HNS_ROCE_V2_MAX_SRQ_DESC_SZ;
 	caps->qpc_entry_sz	= HNS_ROCE_V2_QPC_ENTRY_SZ;
 	caps->irrl_entry_sz	= HNS_ROCE_V2_IRRL_ENTRY_SZ;
-	caps->trrl_entry_sz	= HNS_ROCE_V2_TRRL_ENTRY_SZ;
+	caps->trrl_entry_sz	= HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ;
 	caps->cqc_entry_sz	= HNS_ROCE_V2_CQC_ENTRY_SZ;
 	caps->srqc_entry_sz	= HNS_ROCE_V2_SRQC_ENTRY_SZ;
 	caps->mtpt_entry_sz	= HNS_ROCE_V2_MTPT_ENTRY_SZ;
@@ -2860,19 +2914,19 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
 			break;
 		case HNS_ROCE_SQ_OPCODE_ATOMIC_COMP_AND_SWAP:
 			wc->opcode = IB_WC_COMP_SWAP;
-			wc->byte_len  = 8;
+			wc->byte_len  = le32_to_cpu(cqe->byte_cnt);
 			break;
 		case HNS_ROCE_SQ_OPCODE_ATOMIC_FETCH_AND_ADD:
 			wc->opcode = IB_WC_FETCH_ADD;
-			wc->byte_len  = 8;
+			wc->byte_len  = le32_to_cpu(cqe->byte_cnt);
 			break;
 		case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_COMP_AND_SWAP:
 			wc->opcode = IB_WC_MASKED_COMP_SWAP;
-			wc->byte_len  = 8;
+			wc->byte_len  = le32_to_cpu(cqe->byte_cnt);
 			break;
 		case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_FETCH_AND_ADD:
 			wc->opcode = IB_WC_MASKED_FETCH_ADD;
-			wc->byte_len  = 8;
+			wc->byte_len  = le32_to_cpu(cqe->byte_cnt);
 			break;
 		case HNS_ROCE_SQ_OPCODE_FAST_REG_WR:
 			wc->opcode = IB_WC_REG_MR;
@@ -3211,6 +3265,9 @@ static void set_access_flags(struct hns_roce_qp *hr_qp,
 	roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
 		     !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
 	roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0);
+	roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S,
+		     !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
+	roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S, 0);
 }
 
 static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp,
@@ -3578,6 +3635,12 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
 			     IB_ACCESS_REMOTE_ATOMIC));
 		roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
 			     0);
+		roce_set_bit(context->byte_76_srqn_op_en,
+			     V2_QPC_BYTE_76_EXT_ATE_S,
+			     !!(attr->qp_access_flags &
+				IB_ACCESS_REMOTE_ATOMIC));
+		roce_set_bit(qpc_mask->byte_76_srqn_op_en,
+			     V2_QPC_BYTE_76_EXT_ATE_S, 0);
 	} else {
 		roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S,
 			     !!(hr_qp->access_flags & IB_ACCESS_REMOTE_READ));
@@ -3593,6 +3656,13 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
 			     !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC));
 		roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
 			     0);
+
+		roce_set_bit(context->byte_76_srqn_op_en,
+			     V2_QPC_BYTE_76_EXT_ATE_S,
+			     !!(hr_qp->access_flags &
+				IB_ACCESS_REMOTE_ATOMIC));
+		roce_set_bit(qpc_mask->byte_76_srqn_op_en,
+			     V2_QPC_BYTE_76_EXT_ATE_S, 0);
 	}
 
 	roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 76a14db..0a9d1e5 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -81,6 +81,7 @@
 #define HNS_ROCE_V2_QPC_ENTRY_SZ		256
 #define HNS_ROCE_V2_IRRL_ENTRY_SZ		64
 #define HNS_ROCE_V2_TRRL_ENTRY_SZ		48
+#define HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ	100
 #define HNS_ROCE_V2_CQC_ENTRY_SZ		64
 #define HNS_ROCE_V2_SRQC_ENTRY_SZ		64
 #define HNS_ROCE_V2_MTPT_ENTRY_SZ		64
@@ -158,6 +159,12 @@ enum {
 
 #define HNS_ROCE_V2_CQE_QPN_MASK		0x3ffff
 
+#define EXT_SGE_BYTE_8_NUM	2
+#define STANDARD_ATOMIC_BYTE_8	0x8
+#define EXTEND_ATOMIC_BYTE_16	0x10
+#define EXTEND_ATOMIC_BYTE_32	0x20
+#define EXTEND_ATOMIC_BYTE_64	0x40
+
 enum {
 	HNS_ROCE_V2_WQE_OP_SEND				= 0x0,
 	HNS_ROCE_V2_WQE_OP_SEND_WITH_INV		= 0x1,
@@ -644,6 +651,7 @@ struct hns_roce_v2_qp_context {
 
 #define	V2_QPC_BYTE_76_RQIE_S 28
 
+#define	V2_QPC_BYTE_76_EXT_ATE_S 29
 #define	V2_QPC_BYTE_76_RQ_VLAN_EN_S 30
 #define	V2_QPC_BYTE_80_RX_CQN_S 0
 #define V2_QPC_BYTE_80_RX_CQN_M GENMASK(23, 0)
-- 
2.8.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH for-next] RDMA/hns: Add support for extended atomic
  2019-11-15  1:39 [PATCH for-next] RDMA/hns: Add support for extended atomic Weihang Li
@ 2019-12-10 13:10 ` Weihang Li
  2019-12-19 19:07   ` Jason Gunthorpe
  2020-01-02 21:03 ` Jason Gunthorpe
  1 sibling, 1 reply; 9+ messages in thread
From: Weihang Li @ 2019-12-10 13:10 UTC (permalink / raw)
  To: dledford, jgg; +Cc: linux-rdma, linuxarm

Hi Jason and Doug,

Do you have some comments on this patch?

Thanks,
Weihang

On 2019/11/15 9:39, Weihang Li wrote:
> From: Jiaran Zhang <zhangjiaran@huawei.com>
> 
> Support extended atomic operations including cmp & swap and fetch & add
> of 8 bytes, 16 bytes, 32 bytes, 64 bytes on hip08.
> 
> Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
> Signed-off-by: Weihang Li <liweihang@hisilicon.com>
> ---
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 100 ++++++++++++++++++++++++-----
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h |   8 +++
>  2 files changed, 93 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> index 907c951..74ccb08 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> @@ -97,18 +97,68 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
>  		     V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0);
>  }
>  
> -static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg,
> -			   const struct ib_atomic_wr *wr)
> +static void set_extend_atomic_seg(struct hns_roce_qp *qp,
> +				  u32 ex_sge_num, unsigned int *sge_idx,
> +				  u64 *data_addr)
>  {
> -	if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
> -		aseg->fetchadd_swap_data = cpu_to_le64(wr->swap);
> -		aseg->cmp_data  = cpu_to_le64(wr->compare_add);
> -	} else {
> -		aseg->fetchadd_swap_data = cpu_to_le64(wr->compare_add);
> -		aseg->cmp_data  = 0;
> +	__le64 *ext_seg;
> +	int i;
> +
> +	for (i = 0; i < ex_sge_num; i += EXT_SGE_BYTE_8_NUM, (*sge_idx)++) {
> +		ext_seg = get_send_extend_sge(qp, ((*sge_idx) &
> +					      (qp->sge.sge_cnt - 1)));
> +		/* In the extended atomic scenario, the data_add parameter
> +		 * passes the address where the extended atomic data is stored.
> +		 */
> +		*ext_seg = data_addr ? cpu_to_le64(*(data_addr + i)) : 0;
> +		*(ext_seg + 1) = data_addr ?
> +				 cpu_to_le64(*(data_addr + (i + 1))) : 0;
>  	}
>  }
>  
> +static int set_atomic_seg(struct hns_roce_qp *qp,
> +			  const struct ib_send_wr *wr, unsigned int msg_len,
> +			  void *dseg, unsigned int *sge_idx)
> +{
> +	struct hns_roce_wqe_atomic_seg *aseg;
> +	u32 ex_sge_num;
> +
> +	dseg += sizeof(struct hns_roce_v2_wqe_data_seg);
> +	aseg = dseg;
> +
> +	if (msg_len == STANDARD_ATOMIC_BYTE_8) {
> +		if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
> +			aseg->fetchadd_swap_data =
> +				cpu_to_le64(atomic_wr(wr)->swap);
> +			aseg->cmp_data =
> +				cpu_to_le64(atomic_wr(wr)->compare_add);
> +		} else {
> +			aseg->fetchadd_swap_data =
> +				cpu_to_le64(atomic_wr(wr)->compare_add);
> +			aseg->cmp_data = 0;
> +		}
> +	} else if (msg_len == EXTEND_ATOMIC_BYTE_16 ||
> +		   msg_len == EXTEND_ATOMIC_BYTE_32 ||
> +		   msg_len == EXTEND_ATOMIC_BYTE_64) {
> +		ex_sge_num = msg_len >> 3;
> +		aseg->fetchadd_swap_data = 0;
> +		aseg->cmp_data = 0;
> +		if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
> +			set_extend_atomic_seg(qp, ex_sge_num, sge_idx,
> +					(u64 *)atomic_wr(wr)->swap);
> +			set_extend_atomic_seg(qp, ex_sge_num, sge_idx,
> +					(u64 *)atomic_wr(wr)->compare_add);
> +		} else {
> +			set_extend_atomic_seg(qp, ex_sge_num, sge_idx,
> +					(u64 *)atomic_wr(wr)->compare_add);
> +			set_extend_atomic_seg(qp, ex_sge_num, sge_idx, 0);
> +		}
> +	} else
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
>  static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
>  			   unsigned int *sge_ind)
>  {
> @@ -545,8 +595,12 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
>  
>  				dseg = wqe;
>  				set_data_seg_v2(dseg, wr->sg_list);
> -				wqe += sizeof(struct hns_roce_v2_wqe_data_seg);
> -				set_atomic_seg(wqe, atomic_wr(wr));
> +				ret = set_atomic_seg(qp, wr, rc_sq_wqe->msg_len,
> +						     dseg, &sge_idx);
> +				if (ret) {
> +					*bad_wr = wr;
> +					goto out;
> +				}
>  				roce_set_field(rc_sq_wqe->byte_16,
>  					       V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
>  					       V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
> @@ -1668,7 +1722,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
>  	caps->max_srq_desc_sz	= HNS_ROCE_V2_MAX_SRQ_DESC_SZ;
>  	caps->qpc_entry_sz	= HNS_ROCE_V2_QPC_ENTRY_SZ;
>  	caps->irrl_entry_sz	= HNS_ROCE_V2_IRRL_ENTRY_SZ;
> -	caps->trrl_entry_sz	= HNS_ROCE_V2_TRRL_ENTRY_SZ;
> +	caps->trrl_entry_sz	= HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ;
>  	caps->cqc_entry_sz	= HNS_ROCE_V2_CQC_ENTRY_SZ;
>  	caps->srqc_entry_sz	= HNS_ROCE_V2_SRQC_ENTRY_SZ;
>  	caps->mtpt_entry_sz	= HNS_ROCE_V2_MTPT_ENTRY_SZ;
> @@ -2860,19 +2914,19 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
>  			break;
>  		case HNS_ROCE_SQ_OPCODE_ATOMIC_COMP_AND_SWAP:
>  			wc->opcode = IB_WC_COMP_SWAP;
> -			wc->byte_len  = 8;
> +			wc->byte_len  = le32_to_cpu(cqe->byte_cnt);
>  			break;
>  		case HNS_ROCE_SQ_OPCODE_ATOMIC_FETCH_AND_ADD:
>  			wc->opcode = IB_WC_FETCH_ADD;
> -			wc->byte_len  = 8;
> +			wc->byte_len  = le32_to_cpu(cqe->byte_cnt);
>  			break;
>  		case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_COMP_AND_SWAP:
>  			wc->opcode = IB_WC_MASKED_COMP_SWAP;
> -			wc->byte_len  = 8;
> +			wc->byte_len  = le32_to_cpu(cqe->byte_cnt);
>  			break;
>  		case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_FETCH_AND_ADD:
>  			wc->opcode = IB_WC_MASKED_FETCH_ADD;
> -			wc->byte_len  = 8;
> +			wc->byte_len  = le32_to_cpu(cqe->byte_cnt);
>  			break;
>  		case HNS_ROCE_SQ_OPCODE_FAST_REG_WR:
>  			wc->opcode = IB_WC_REG_MR;
> @@ -3211,6 +3265,9 @@ static void set_access_flags(struct hns_roce_qp *hr_qp,
>  	roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
>  		     !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
>  	roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0);
> +	roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S,
> +		     !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
> +	roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S, 0);
>  }
>  
>  static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp,
> @@ -3578,6 +3635,12 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
>  			     IB_ACCESS_REMOTE_ATOMIC));
>  		roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
>  			     0);
> +		roce_set_bit(context->byte_76_srqn_op_en,
> +			     V2_QPC_BYTE_76_EXT_ATE_S,
> +			     !!(attr->qp_access_flags &
> +				IB_ACCESS_REMOTE_ATOMIC));
> +		roce_set_bit(qpc_mask->byte_76_srqn_op_en,
> +			     V2_QPC_BYTE_76_EXT_ATE_S, 0);
>  	} else {
>  		roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S,
>  			     !!(hr_qp->access_flags & IB_ACCESS_REMOTE_READ));
> @@ -3593,6 +3656,13 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
>  			     !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC));
>  		roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
>  			     0);
> +
> +		roce_set_bit(context->byte_76_srqn_op_en,
> +			     V2_QPC_BYTE_76_EXT_ATE_S,
> +			     !!(hr_qp->access_flags &
> +				IB_ACCESS_REMOTE_ATOMIC));
> +		roce_set_bit(qpc_mask->byte_76_srqn_op_en,
> +			     V2_QPC_BYTE_76_EXT_ATE_S, 0);
>  	}
>  
>  	roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M,
> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
> index 76a14db..0a9d1e5 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
> @@ -81,6 +81,7 @@
>  #define HNS_ROCE_V2_QPC_ENTRY_SZ		256
>  #define HNS_ROCE_V2_IRRL_ENTRY_SZ		64
>  #define HNS_ROCE_V2_TRRL_ENTRY_SZ		48
> +#define HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ	100
>  #define HNS_ROCE_V2_CQC_ENTRY_SZ		64
>  #define HNS_ROCE_V2_SRQC_ENTRY_SZ		64
>  #define HNS_ROCE_V2_MTPT_ENTRY_SZ		64
> @@ -158,6 +159,12 @@ enum {
>  
>  #define HNS_ROCE_V2_CQE_QPN_MASK		0x3ffff
>  
> +#define EXT_SGE_BYTE_8_NUM	2
> +#define STANDARD_ATOMIC_BYTE_8	0x8
> +#define EXTEND_ATOMIC_BYTE_16	0x10
> +#define EXTEND_ATOMIC_BYTE_32	0x20
> +#define EXTEND_ATOMIC_BYTE_64	0x40
> +
>  enum {
>  	HNS_ROCE_V2_WQE_OP_SEND				= 0x0,
>  	HNS_ROCE_V2_WQE_OP_SEND_WITH_INV		= 0x1,
> @@ -644,6 +651,7 @@ struct hns_roce_v2_qp_context {
>  
>  #define	V2_QPC_BYTE_76_RQIE_S 28
>  
> +#define	V2_QPC_BYTE_76_EXT_ATE_S 29
>  #define	V2_QPC_BYTE_76_RQ_VLAN_EN_S 30
>  #define	V2_QPC_BYTE_80_RX_CQN_S 0
>  #define V2_QPC_BYTE_80_RX_CQN_M GENMASK(23, 0)
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH for-next] RDMA/hns: Add support for extended atomic
  2019-12-10 13:10 ` Weihang Li
@ 2019-12-19 19:07   ` Jason Gunthorpe
  0 siblings, 0 replies; 9+ messages in thread
From: Jason Gunthorpe @ 2019-12-19 19:07 UTC (permalink / raw)
  To: Weihang Li; +Cc: dledford, linux-rdma, linuxarm

On Tue, Dec 10, 2019 at 09:10:24PM +0800, Weihang Li wrote:
> Hi Jason and Doug,
> 
> Do you have some comments on this patch?

It seemed OK to me

Jason

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH for-next] RDMA/hns: Add support for extended atomic
  2019-11-15  1:39 [PATCH for-next] RDMA/hns: Add support for extended atomic Weihang Li
  2019-12-10 13:10 ` Weihang Li
@ 2020-01-02 21:03 ` Jason Gunthorpe
  2020-01-03  5:57   ` Weihang Li
  1 sibling, 1 reply; 9+ messages in thread
From: Jason Gunthorpe @ 2020-01-02 21:03 UTC (permalink / raw)
  To: Weihang Li; +Cc: dledford, linux-rdma, linuxarm

On Fri, Nov 15, 2019 at 09:39:26AM +0800, Weihang Li wrote:
> From: Jiaran Zhang <zhangjiaran@huawei.com>
> 
> Support extended atomic operations including cmp & swap and fetch & add
> of 8 bytes, 16 bytes, 32 bytes, 64 bytes on hip08.
> 
> Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
> Signed-off-by: Weihang Li <liweihang@hisilicon.com>
> ---
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 100 ++++++++++++++++++++++++-----
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h |   8 +++
>  2 files changed, 93 insertions(+), 15 deletions(-)

How is this related to the userspace patch:

https://github.com/linux-rdma/rdma-core/pull/640

?

Under what conditions would the kernel part be needed?

Confused because we have no kernel users of extended atomic.

Jason

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH for-next] RDMA/hns: Add support for extended atomic
  2020-01-02 21:03 ` Jason Gunthorpe
@ 2020-01-03  5:57   ` Weihang Li
  2020-01-03 13:39     ` Jason Gunthorpe
  0 siblings, 1 reply; 9+ messages in thread
From: Weihang Li @ 2020-01-03  5:57 UTC (permalink / raw)
  To: Jason Gunthorpe, Weihang Li; +Cc: dledford, linux-rdma, linuxarm



On 2020/1/3 5:03, Jason Gunthorpe wrote:
> On Fri, Nov 15, 2019 at 09:39:26AM +0800, Weihang Li wrote:
>> From: Jiaran Zhang <zhangjiaran@huawei.com>
>>
>> Support extended atomic operations including cmp & swap and fetch & add
>> of 8 bytes, 16 bytes, 32 bytes, 64 bytes on hip08.
>>
>> Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
>> Signed-off-by: Weihang Li <liweihang@hisilicon.com>
>> ---
>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 100 ++++++++++++++++++++++++-----
>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h |   8 +++
>>  2 files changed, 93 insertions(+), 15 deletions(-)
> 
> How is this related to the userspace patch:
> 
> https://github.com/linux-rdma/rdma-core/pull/640
> 
> ?
> 
> Under what conditions would the kernel part be needed?
> 
> Confused because we have no kernel users of extended atomic.
> 
> Jason
> 
> 

Hi Jason,

This patch has no relationship with the userspace one you pointed out.
But I have pushed a userspace patch that support extended atomic on hip08,
maybe you were asking about the following one:

https://github.com/linux-rdma/rdma-core/pull/638

The kernel part is not needed by the userspace part, they are independent
of each other.

We made this patch because we noticed that some other providers has also
support this feature in kernel, maybe there will be some kernel users in
future. I would be grateful if you could give me more suggestions.

Thank you
Weihang



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH for-next] RDMA/hns: Add support for extended atomic
  2020-01-03  5:57   ` Weihang Li
@ 2020-01-03 13:39     ` Jason Gunthorpe
  2020-01-14  1:53       ` Weihang Li
  0 siblings, 1 reply; 9+ messages in thread
From: Jason Gunthorpe @ 2020-01-03 13:39 UTC (permalink / raw)
  To: Weihang Li; +Cc: Weihang Li, dledford, linux-rdma, linuxarm

On Fri, Jan 03, 2020 at 01:57:22PM +0800, Weihang Li wrote:

 
> This patch has no relationship with the userspace one you pointed out.
> But I have pushed a userspace patch that support extended atomic on hip08,
> maybe you were asking about the following one:
> 
> https://github.com/linux-rdma/rdma-core/pull/638

Right, sorry
 
> The kernel part is not needed by the userspace part, they are independent
> of each other.
> 
> We made this patch because we noticed that some other providers has also
> support this feature in kernel, maybe there will be some kernel users in
> future. I would be grateful if you could give me more suggestions.

I think we have no kernel users of extended atomics, it is probably an
mistake that other providers implemented this in the kernel.

I would advise against making the hns send path more complicated with
dead code.

Jason

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH for-next] RDMA/hns: Add support for extended atomic
  2020-01-03 13:39     ` Jason Gunthorpe
@ 2020-01-14  1:53       ` Weihang Li
  2020-01-14 13:29         ` Jason Gunthorpe
  0 siblings, 1 reply; 9+ messages in thread
From: Weihang Li @ 2020-01-14  1:53 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Weihang Li, dledford, linux-rdma, linuxarm



On 2020/1/3 21:39, Jason Gunthorpe wrote:
> On Fri, Jan 03, 2020 at 01:57:22PM +0800, Weihang Li wrote:
> 
>  
>> This patch has no relationship with the userspace one you pointed out.
>> But I have pushed a userspace patch that support extended atomic on hip08,
>> maybe you were asking about the following one:
>>
>> https://github.com/linux-rdma/rdma-core/pull/638
> 
> Right, sorry
>  
>> The kernel part is not needed by the userspace part, they are independent
>> of each other.
>>
>> We made this patch because we noticed that some other providers has also
>> support this feature in kernel, maybe there will be some kernel users in
>> future. I would be grateful if you could give me more suggestions.
> 
> I think we have no kernel users of extended atomics, it is probably an
> mistake that other providers implemented this in the kernel.
> 
> I would advise against making the hns send path more complicated with
> dead code.
> 
> Jason
> 
> 

Hi Jason,

Thanks for your reminder about extended atomic in kernel and sorry for the
delayed response.

We cancel this patch as your suggestion, and will send another one which
the user space extended atomic is dependent on.

Weihang


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH for-next] RDMA/hns: Add support for extended atomic
  2020-01-14  1:53       ` Weihang Li
@ 2020-01-14 13:29         ` Jason Gunthorpe
  2020-01-15  1:30           ` Weihang Li
  0 siblings, 1 reply; 9+ messages in thread
From: Jason Gunthorpe @ 2020-01-14 13:29 UTC (permalink / raw)
  To: Weihang Li; +Cc: Weihang Li, dledford, linux-rdma, linuxarm

On Tue, Jan 14, 2020 at 09:53:07AM +0800, Weihang Li wrote:
> Thanks for your reminder about extended atomic in kernel and sorry for the
> delayed response.
> 
> We cancel this patch as your suggestion, and will send another one which
> the user space extended atomic is dependent on.

You should not have allowed your userspace to be merged until any
required kernel pieces were merged.

Jason

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH for-next] RDMA/hns: Add support for extended atomic
  2020-01-14 13:29         ` Jason Gunthorpe
@ 2020-01-15  1:30           ` Weihang Li
  0 siblings, 0 replies; 9+ messages in thread
From: Weihang Li @ 2020-01-15  1:30 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Weihang Li, dledford, linux-rdma, linuxarm



On 2020/1/14 21:29, Jason Gunthorpe wrote:
> On Tue, Jan 14, 2020 at 09:53:07AM +0800, Weihang Li wrote:
>> Thanks for your reminder about extended atomic in kernel and sorry for the
>> delayed response.
>>
>> We cancel this patch as your suggestion, and will send another one which
>> the user space extended atomic is dependent on.
> 
> You should not have allowed your userspace to be merged until any
> required kernel pieces were merged.
> 
> Jason
> 
> 

Hi Jason,

Sorry for that, I missed two related changes to configure QP context to
enable capability of extended atomic.
Will send it as soon as possible.

Weihang


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, back to index

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-15  1:39 [PATCH for-next] RDMA/hns: Add support for extended atomic Weihang Li
2019-12-10 13:10 ` Weihang Li
2019-12-19 19:07   ` Jason Gunthorpe
2020-01-02 21:03 ` Jason Gunthorpe
2020-01-03  5:57   ` Weihang Li
2020-01-03 13:39     ` Jason Gunthorpe
2020-01-14  1:53       ` Weihang Li
2020-01-14 13:29         ` Jason Gunthorpe
2020-01-15  1:30           ` Weihang Li

Linux-RDMA Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-rdma/0 linux-rdma/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-rdma linux-rdma/ https://lore.kernel.org/linux-rdma \
		linux-rdma@vger.kernel.org
	public-inbox-index linux-rdma

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-rdma


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git