All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 for-next 0/2] RDMA/hns: Add supports for stash
@ 2020-11-16 11:58 Weihang Li
  2020-11-16 11:58 ` [PATCH v2 for-next 1/2] RDMA/hns: Add support for CQ stash Weihang Li
  2020-11-16 11:58 ` [PATCH v2 for-next 2/2] RDMA/hns: Add support for QP stash Weihang Li
  0 siblings, 2 replies; 11+ messages in thread
From: Weihang Li @ 2020-11-16 11:58 UTC (permalink / raw)
  To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm

Stash is a mechanism that uses the core information carried by the ARM AXI
bus to access the L3 cache. The CPU and I/O subsystems can access the L3
cache consistently by enabling stash, so the performance can be improved.

Changes since v1:
- Fix comments from Jason about the unused macros.
- Rewrite hr_reg_set() to make it easier for driver to define and set a
  field.
link: https://patchwork.kernel.org/project/linux-rdma/cover/1601458452-55263-1-git-send-email-liweihang@huawei.com/

Lang Cheng (2):
  RDMA/hns: Add support for CQ stash
  RDMA/hns: Add support for QP stash

 drivers/infiniband/hw/hns/hns_roce_common.h | 12 +++++++++
 drivers/infiniband/hw/hns/hns_roce_device.h |  1 +
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c  |  9 +++++++
 drivers/infiniband/hw/hns/hns_roce_hw_v2.h  | 41 ++++++++++++++++++-----------
 4 files changed, 47 insertions(+), 16 deletions(-)

-- 
2.8.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 for-next 1/2] RDMA/hns: Add support for CQ stash
  2020-11-16 11:58 [PATCH v2 for-next 0/2] RDMA/hns: Add supports for stash Weihang Li
@ 2020-11-16 11:58 ` Weihang Li
  2020-11-16 13:46   ` Leon Romanovsky
  2020-11-16 11:58 ` [PATCH v2 for-next 2/2] RDMA/hns: Add support for QP stash Weihang Li
  1 sibling, 1 reply; 11+ messages in thread
From: Weihang Li @ 2020-11-16 11:58 UTC (permalink / raw)
  To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm

From: Lang Cheng <chenglang@huawei.com>

Stash is a mechanism that uses the core information carried by the ARM AXI
bus to access the L3 cache. It can be used to improve the performance by
increasing the hit ratio of L3 cache. CQs need to enable stash by default.

Signed-off-by: Lang Cheng <chenglang@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_common.h | 12 +++++++++
 drivers/infiniband/hw/hns/hns_roce_device.h |  1 +
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c  |  3 +++
 drivers/infiniband/hw/hns/hns_roce_hw_v2.h  | 39 +++++++++++++++++------------
 4 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
index f5669ff..8d96c4e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_common.h
+++ b/drivers/infiniband/hw/hns/hns_roce_common.h
@@ -53,6 +53,18 @@
 #define roce_set_bit(origin, shift, val) \
 	roce_set_field((origin), (1ul << (shift)), (shift), (val))
 
+#define FIELD_LOC(field_h, field_l) field_h, field_l
+
+#define _hr_reg_set(arr, field_h, field_l)                                     \
+	do {                                                                   \
+		BUILD_BUG_ON(((field_h) / 32) != ((field_l) / 32));            \
+		BUILD_BUG_ON((field_h) / 32 >= ARRAY_SIZE(arr));               \
+		(arr)[(field_h) / 32] |=                                       \
+			cpu_to_le32(GENMASK((field_h) % 32, (field_l) % 32));  \
+	} while (0)
+
+#define hr_reg_set(arr, field) _hr_reg_set(arr, field)
+
 #define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3
 #define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 1d99022..ab7df8e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -223,6 +223,7 @@ enum {
 	HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL		= BIT(9),
 	HNS_ROCE_CAP_FLAG_ATOMIC		= BIT(10),
 	HNS_ROCE_CAP_FLAG_SDI_MODE		= BIT(14),
+	HNS_ROCE_CAP_FLAG_STASH			= BIT(17),
 };
 
 #define HNS_ROCE_DB_TYPE_COUNT			2
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 4d697e4..5fd0458 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -3177,6 +3177,9 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
 		       V2_CQC_BYTE_8_CQE_SIZE_S, hr_cq->cqe_size ==
 		       HNS_ROCE_V3_CQE_SIZE ? 1 : 0);
 
+	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH)
+		hr_reg_set(cq_context->raw, CQC_STASH);
+
 	cq_context->cqe_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0]));
 
 	roce_set_field(cq_context->byte_16_hop_addr,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 1409d05..50a5187 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -267,22 +267,27 @@ enum hns_roce_sgid_type {
 };
 
 struct hns_roce_v2_cq_context {
-	__le32	byte_4_pg_ceqn;
-	__le32	byte_8_cqn;
-	__le32	cqe_cur_blk_addr;
-	__le32	byte_16_hop_addr;
-	__le32	cqe_nxt_blk_addr;
-	__le32	byte_24_pgsz_addr;
-	__le32	byte_28_cq_pi;
-	__le32	byte_32_cq_ci;
-	__le32	cqe_ba;
-	__le32	byte_40_cqe_ba;
-	__le32	byte_44_db_record;
-	__le32	db_record_addr;
-	__le32	byte_52_cqe_cnt;
-	__le32	byte_56_cqe_period_maxcnt;
-	__le32	cqe_report_timer;
-	__le32	byte_64_se_cqe_idx;
+	union {
+		struct {
+			__le32 byte_4_pg_ceqn;
+			__le32 byte_8_cqn;
+			__le32 cqe_cur_blk_addr;
+			__le32 byte_16_hop_addr;
+			__le32 cqe_nxt_blk_addr;
+			__le32 byte_24_pgsz_addr;
+			__le32 byte_28_cq_pi;
+			__le32 byte_32_cq_ci;
+			__le32 cqe_ba;
+			__le32 byte_40_cqe_ba;
+			__le32 byte_44_db_record;
+			__le32 db_record_addr;
+			__le32 byte_52_cqe_cnt;
+			__le32 byte_56_cqe_period_maxcnt;
+			__le32 cqe_report_timer;
+			__le32 byte_64_se_cqe_idx;
+		};
+		__le32 raw[16];
+	};
 };
 #define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0
 #define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL	0x0
@@ -360,6 +365,8 @@ struct hns_roce_v2_cq_context {
 #define	V2_CQC_BYTE_64_SE_CQE_IDX_S 0
 #define	V2_CQC_BYTE_64_SE_CQE_IDX_M GENMASK(23, 0)
 
+#define CQC_STASH FIELD_LOC(63, 63)
+
 struct hns_roce_srq_context {
 	__le32	byte_4_srqn_srqst;
 	__le32	byte_8_limit_wl;
-- 
2.8.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v2 for-next 2/2] RDMA/hns: Add support for QP stash
  2020-11-16 11:58 [PATCH v2 for-next 0/2] RDMA/hns: Add supports for stash Weihang Li
  2020-11-16 11:58 ` [PATCH v2 for-next 1/2] RDMA/hns: Add support for CQ stash Weihang Li
@ 2020-11-16 11:58 ` Weihang Li
  1 sibling, 0 replies; 11+ messages in thread
From: Weihang Li @ 2020-11-16 11:58 UTC (permalink / raw)
  To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm

From: Lang Cheng <chenglang@huawei.com>

Stash is a mechanism that uses the core information carried by the ARM AXI
bus to access the L3 cache. It can be used to improve the performance by
increasing the hit ratio of L3 cache. QPs need to enable stash by default.

Signed-off-by: Lang Cheng <chenglang@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 6 ++++++
 drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 5fd0458..eb2838a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -3986,6 +3986,12 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
 	hr_qp->access_flags = attr->qp_access_flags;
 	roce_set_field(context->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M,
 		       V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->send_cq)->cqn);
+
+	if (hr_dev->caps.qpc_sz < HNS_ROCE_V3_QPC_SZ)
+		return;
+
+	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH)
+		hr_reg_set(context->ext, QPCEX_STASH);
 }
 
 static void modify_qp_init_to_init(struct ib_qp *ibqp,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 50a5187..4679afb 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -898,6 +898,8 @@ struct hns_roce_v2_qp_context {
 #define	V2_QPC_BYTE_256_SQ_FLUSH_IDX_S 16
 #define V2_QPC_BYTE_256_SQ_FLUSH_IDX_M GENMASK(31, 16)
 
+#define QPCEX_STASH FIELD_LOC(82, 82)
+
 #define	V2_QP_RWE_S 1 /* rdma write enable */
 #define	V2_QP_RRE_S 2 /* rdma read enable */
 #define	V2_QP_ATE_S 3 /* rdma atomic enable */
-- 
2.8.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 for-next 1/2] RDMA/hns: Add support for CQ stash
  2020-11-16 11:58 ` [PATCH v2 for-next 1/2] RDMA/hns: Add support for CQ stash Weihang Li
@ 2020-11-16 13:46   ` Leon Romanovsky
  2020-11-17  6:37     ` liweihang
  0 siblings, 1 reply; 11+ messages in thread
From: Leon Romanovsky @ 2020-11-16 13:46 UTC (permalink / raw)
  To: Weihang Li; +Cc: dledford, jgg, linux-rdma, linuxarm

On Mon, Nov 16, 2020 at 07:58:38PM +0800, Weihang Li wrote:
> From: Lang Cheng <chenglang@huawei.com>
>
> Stash is a mechanism that uses the core information carried by the ARM AXI
> bus to access the L3 cache. It can be used to improve the performance by
> increasing the hit ratio of L3 cache. CQs need to enable stash by default.
>
> Signed-off-by: Lang Cheng <chenglang@huawei.com>
> Signed-off-by: Weihang Li <liweihang@huawei.com>
> ---
>  drivers/infiniband/hw/hns/hns_roce_common.h | 12 +++++++++
>  drivers/infiniband/hw/hns/hns_roce_device.h |  1 +
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  |  3 +++
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h  | 39 +++++++++++++++++------------
>  4 files changed, 39 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
> index f5669ff..8d96c4e 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_common.h
> +++ b/drivers/infiniband/hw/hns/hns_roce_common.h
> @@ -53,6 +53,18 @@
>  #define roce_set_bit(origin, shift, val) \
>  	roce_set_field((origin), (1ul << (shift)), (shift), (val))
>
> +#define FIELD_LOC(field_h, field_l) field_h, field_l
> +
> +#define _hr_reg_set(arr, field_h, field_l)                                     \
> +	do {                                                                   \
> +		BUILD_BUG_ON(((field_h) / 32) != ((field_l) / 32));            \
> +		BUILD_BUG_ON((field_h) / 32 >= ARRAY_SIZE(arr));               \
> +		(arr)[(field_h) / 32] |=                                       \
> +			cpu_to_le32(GENMASK((field_h) % 32, (field_l) % 32));  \
> +	} while (0)
> +
> +#define hr_reg_set(arr, field) _hr_reg_set(arr, field)

I afraid that it is too much.
1. FIELD_LOC() macro to hide two fields.
2. hr_reg_set and  _hr_reg_set are the same.
3. In both patches field_h and field_l are the same.
4. "do {} while (0)" without need.

Thanks

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 for-next 1/2] RDMA/hns: Add support for CQ stash
  2020-11-16 13:46   ` Leon Romanovsky
@ 2020-11-17  6:37     ` liweihang
  2020-11-17  7:20       ` Leon Romanovsky
  0 siblings, 1 reply; 11+ messages in thread
From: liweihang @ 2020-11-17  6:37 UTC (permalink / raw)
  To: Leon Romanovsky; +Cc: dledford, jgg, linux-rdma, Linuxarm

On 2020/11/16 21:47, Leon Romanovsky wrote:
> On Mon, Nov 16, 2020 at 07:58:38PM +0800, Weihang Li wrote:
>> From: Lang Cheng <chenglang@huawei.com>
>>
>> Stash is a mechanism that uses the core information carried by the ARM AXI
>> bus to access the L3 cache. It can be used to improve the performance by
>> increasing the hit ratio of L3 cache. CQs need to enable stash by default.
>>
>> Signed-off-by: Lang Cheng <chenglang@huawei.com>
>> Signed-off-by: Weihang Li <liweihang@huawei.com>
>> ---
>>  drivers/infiniband/hw/hns/hns_roce_common.h | 12 +++++++++
>>  drivers/infiniband/hw/hns/hns_roce_device.h |  1 +
>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  |  3 +++
>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h  | 39 +++++++++++++++++------------
>>  4 files changed, 39 insertions(+), 16 deletions(-)
>>
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
>> index f5669ff..8d96c4e 100644
>> --- a/drivers/infiniband/hw/hns/hns_roce_common.h
>> +++ b/drivers/infiniband/hw/hns/hns_roce_common.h
>> @@ -53,6 +53,18 @@
>>  #define roce_set_bit(origin, shift, val) \
>>  	roce_set_field((origin), (1ul << (shift)), (shift), (val))
>>
>> +#define FIELD_LOC(field_h, field_l) field_h, field_l
>> +
>> +#define _hr_reg_set(arr, field_h, field_l)                                     \
>> +	do {                                                                   \
>> +		BUILD_BUG_ON(((field_h) / 32) != ((field_l) / 32));            \
>> +		BUILD_BUG_ON((field_h) / 32 >= ARRAY_SIZE(arr));               \
>> +		(arr)[(field_h) / 32] |=                                       \
>> +			cpu_to_le32(GENMASK((field_h) % 32, (field_l) % 32));  \
>> +	} while (0)
>> +
>> +#define hr_reg_set(arr, field) _hr_reg_set(arr, field)
> 
> I afraid that it is too much.

Hi Leon,

Thanks for the comments.

> 1. FIELD_LOC() macro to hide two fields.

Jason has suggested us to simplify the function of setting/getting bit/field in
hns driver like IBA_SET and IBA_GET.

https://patchwork.kernel.org/project/linux-rdma/patch/1589982799-28728-3-git-send-email-liweihang@huawei.com/

So we try to make it easier and clearer to define a bitfield for developers.

For example:

	#define QPCEX_SRC_ID FIELD_LOC(94, 84)

	hr_reg_set(context->ext, QPCEX_SRC_ID);

This will set 84 ~ 91 bit of QPC to 1. Without FIELD_LOC(), it may look
like:

	#define QPCEX_SRC_ID_START 84
	#define QPCEX_SRC_ID_END 94

	hr_reg_set(context->ext, QPCEX_SRC_ID_END, QPCEX_SRC_ID_START);

> 2. hr_reg_set and  _hr_reg_set are the same.

'field' will be spilted into two parts: 'field_h' and 'field_l':

	#define _hr_reg_set(arr, field_h, field_l)
	...

	#define hr_reg_set(arr, field) _hr_reg_set(arr, field)

> 3. In both patches field_h and field_l are the same.

This is because we want hr_reg_set() to be used both for setting bits and for
setting fields. In this series, we just use it to set bit.

> 4. "do {} while (0)" without need.

OK, I will remove the do-while and replace BUILD_BUG_ON with BUILD_BUG_ON_ZERO:

	#define _hr_reg_set(arr, field_h, field_l)                                  \
		(arr)[(field_h) / 32] |=                                            \
			cpu_to_le32(GENMASK((field_h) % 32, (field_l) % 32)) +      \
			BUILD_BUG_ON_ZERO(((field_h) / 32) != ((field_l) / 32)) +   \
			BUILD_BUG_ON_ZERO((field_h) / 32 >= ARRAY_SIZE(arr));

Weihang

> 
> Thanks
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 for-next 1/2] RDMA/hns: Add support for CQ stash
  2020-11-17  6:37     ` liweihang
@ 2020-11-17  7:20       ` Leon Romanovsky
  2020-11-17  8:35         ` liweihang
  0 siblings, 1 reply; 11+ messages in thread
From: Leon Romanovsky @ 2020-11-17  7:20 UTC (permalink / raw)
  To: liweihang; +Cc: dledford, jgg, linux-rdma, Linuxarm

On Tue, Nov 17, 2020 at 06:37:58AM +0000, liweihang wrote:
> On 2020/11/16 21:47, Leon Romanovsky wrote:
> > On Mon, Nov 16, 2020 at 07:58:38PM +0800, Weihang Li wrote:
> >> From: Lang Cheng <chenglang@huawei.com>
> >>
> >> Stash is a mechanism that uses the core information carried by the ARM AXI
> >> bus to access the L3 cache. It can be used to improve the performance by
> >> increasing the hit ratio of L3 cache. CQs need to enable stash by default.
> >>
> >> Signed-off-by: Lang Cheng <chenglang@huawei.com>
> >> Signed-off-by: Weihang Li <liweihang@huawei.com>
> >> ---
> >>  drivers/infiniband/hw/hns/hns_roce_common.h | 12 +++++++++
> >>  drivers/infiniband/hw/hns/hns_roce_device.h |  1 +
> >>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  |  3 +++
> >>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h  | 39 +++++++++++++++++------------
> >>  4 files changed, 39 insertions(+), 16 deletions(-)
> >>
> >> diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
> >> index f5669ff..8d96c4e 100644
> >> --- a/drivers/infiniband/hw/hns/hns_roce_common.h
> >> +++ b/drivers/infiniband/hw/hns/hns_roce_common.h
> >> @@ -53,6 +53,18 @@
> >>  #define roce_set_bit(origin, shift, val) \
> >>  	roce_set_field((origin), (1ul << (shift)), (shift), (val))
> >>
> >> +#define FIELD_LOC(field_h, field_l) field_h, field_l
> >> +
> >> +#define _hr_reg_set(arr, field_h, field_l)                                     \
> >> +	do {                                                                   \
> >> +		BUILD_BUG_ON(((field_h) / 32) != ((field_l) / 32));            \
> >> +		BUILD_BUG_ON((field_h) / 32 >= ARRAY_SIZE(arr));               \
> >> +		(arr)[(field_h) / 32] |=                                       \
> >> +			cpu_to_le32(GENMASK((field_h) % 32, (field_l) % 32));  \
> >> +	} while (0)
> >> +
> >> +#define hr_reg_set(arr, field) _hr_reg_set(arr, field)
> >
> > I afraid that it is too much.
>
> Hi Leon,
>
> Thanks for the comments.
>
> > 1. FIELD_LOC() macro to hide two fields.
>
> Jason has suggested us to simplify the function of setting/getting bit/field in
> hns driver like IBA_SET and IBA_GET.
>
> https://patchwork.kernel.org/project/linux-rdma/patch/1589982799-28728-3-git-send-email-liweihang@huawei.com/
>
> So we try to make it easier and clearer to define a bitfield for developers.

Jason asked to use genmask and FIELD_PREP, but you invented something else.

Thanks

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 for-next 1/2] RDMA/hns: Add support for CQ stash
  2020-11-17  7:20       ` Leon Romanovsky
@ 2020-11-17  8:35         ` liweihang
  2020-11-17  8:50           ` Leon Romanovsky
  0 siblings, 1 reply; 11+ messages in thread
From: liweihang @ 2020-11-17  8:35 UTC (permalink / raw)
  To: Leon Romanovsky; +Cc: dledford, jgg, linux-rdma, Linuxarm

On 2020/11/17 15:21, Leon Romanovsky wrote:
> On Tue, Nov 17, 2020 at 06:37:58AM +0000, liweihang wrote:
>> On 2020/11/16 21:47, Leon Romanovsky wrote:
>>> On Mon, Nov 16, 2020 at 07:58:38PM +0800, Weihang Li wrote:
>>>> From: Lang Cheng <chenglang@huawei.com>
>>>>
>>>> Stash is a mechanism that uses the core information carried by the ARM AXI
>>>> bus to access the L3 cache. It can be used to improve the performance by
>>>> increasing the hit ratio of L3 cache. CQs need to enable stash by default.
>>>>
>>>> Signed-off-by: Lang Cheng <chenglang@huawei.com>
>>>> Signed-off-by: Weihang Li <liweihang@huawei.com>
>>>> ---
>>>>  drivers/infiniband/hw/hns/hns_roce_common.h | 12 +++++++++
>>>>  drivers/infiniband/hw/hns/hns_roce_device.h |  1 +
>>>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  |  3 +++
>>>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h  | 39 +++++++++++++++++------------
>>>>  4 files changed, 39 insertions(+), 16 deletions(-)
>>>>
>>>> diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
>>>> index f5669ff..8d96c4e 100644
>>>> --- a/drivers/infiniband/hw/hns/hns_roce_common.h
>>>> +++ b/drivers/infiniband/hw/hns/hns_roce_common.h
>>>> @@ -53,6 +53,18 @@
>>>>  #define roce_set_bit(origin, shift, val) \
>>>>  	roce_set_field((origin), (1ul << (shift)), (shift), (val))
>>>>
>>>> +#define FIELD_LOC(field_h, field_l) field_h, field_l
>>>> +
>>>> +#define _hr_reg_set(arr, field_h, field_l)                                     \
>>>> +	do {                                                                   \
>>>> +		BUILD_BUG_ON(((field_h) / 32) != ((field_l) / 32));            \
>>>> +		BUILD_BUG_ON((field_h) / 32 >= ARRAY_SIZE(arr));               \
>>>> +		(arr)[(field_h) / 32] |=                                       \
>>>> +			cpu_to_le32(GENMASK((field_h) % 32, (field_l) % 32));  \
>>>> +	} while (0)
>>>> +
>>>> +#define hr_reg_set(arr, field) _hr_reg_set(arr, field)
>>>
>>> I afraid that it is too much.
>>
>> Hi Leon,
>>
>> Thanks for the comments.
>>
>>> 1. FIELD_LOC() macro to hide two fields.
>>
>> Jason has suggested us to simplify the function of setting/getting bit/field in
>> hns driver like IBA_SET and IBA_GET.
>>
>> https://patchwork.kernel.org/project/linux-rdma/patch/1589982799-28728-3-git-send-email-liweihang@huawei.com/
>>
>> So we try to make it easier and clearer to define a bitfield for developers.
> 
> Jason asked to use genmask and FIELD_PREP, but you invented something else.
> 
> Thanks
> 

We use them in another interface 'hr_reg_write(arr, field, val)' which hasn't been
used in this series.

Does it make any unacceptable mistake? We would appreciate any suggestions :)

Thanks
Weihang



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 for-next 1/2] RDMA/hns: Add support for CQ stash
  2020-11-17  8:35         ` liweihang
@ 2020-11-17  8:50           ` Leon Romanovsky
  2020-11-18 10:49             ` liweihang
  0 siblings, 1 reply; 11+ messages in thread
From: Leon Romanovsky @ 2020-11-17  8:50 UTC (permalink / raw)
  To: liweihang; +Cc: dledford, jgg, linux-rdma, Linuxarm

On Tue, Nov 17, 2020 at 08:35:55AM +0000, liweihang wrote:
> On 2020/11/17 15:21, Leon Romanovsky wrote:
> > On Tue, Nov 17, 2020 at 06:37:58AM +0000, liweihang wrote:
> >> On 2020/11/16 21:47, Leon Romanovsky wrote:
> >>> On Mon, Nov 16, 2020 at 07:58:38PM +0800, Weihang Li wrote:
> >>>> From: Lang Cheng <chenglang@huawei.com>
> >>>>
> >>>> Stash is a mechanism that uses the core information carried by the ARM AXI
> >>>> bus to access the L3 cache. It can be used to improve the performance by
> >>>> increasing the hit ratio of L3 cache. CQs need to enable stash by default.
> >>>>
> >>>> Signed-off-by: Lang Cheng <chenglang@huawei.com>
> >>>> Signed-off-by: Weihang Li <liweihang@huawei.com>
> >>>> ---
> >>>>  drivers/infiniband/hw/hns/hns_roce_common.h | 12 +++++++++
> >>>>  drivers/infiniband/hw/hns/hns_roce_device.h |  1 +
> >>>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  |  3 +++
> >>>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h  | 39 +++++++++++++++++------------
> >>>>  4 files changed, 39 insertions(+), 16 deletions(-)
> >>>>
> >>>> diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
> >>>> index f5669ff..8d96c4e 100644
> >>>> --- a/drivers/infiniband/hw/hns/hns_roce_common.h
> >>>> +++ b/drivers/infiniband/hw/hns/hns_roce_common.h
> >>>> @@ -53,6 +53,18 @@
> >>>>  #define roce_set_bit(origin, shift, val) \
> >>>>  	roce_set_field((origin), (1ul << (shift)), (shift), (val))
> >>>>
> >>>> +#define FIELD_LOC(field_h, field_l) field_h, field_l
> >>>> +
> >>>> +#define _hr_reg_set(arr, field_h, field_l)                                     \
> >>>> +	do {                                                                   \
> >>>> +		BUILD_BUG_ON(((field_h) / 32) != ((field_l) / 32));            \
> >>>> +		BUILD_BUG_ON((field_h) / 32 >= ARRAY_SIZE(arr));               \
> >>>> +		(arr)[(field_h) / 32] |=                                       \
> >>>> +			cpu_to_le32(GENMASK((field_h) % 32, (field_l) % 32));  \
> >>>> +	} while (0)
> >>>> +
> >>>> +#define hr_reg_set(arr, field) _hr_reg_set(arr, field)
> >>>
> >>> I afraid that it is too much.
> >>
> >> Hi Leon,
> >>
> >> Thanks for the comments.
> >>
> >>> 1. FIELD_LOC() macro to hide two fields.
> >>
> >> Jason has suggested us to simplify the function of setting/getting bit/field in
> >> hns driver like IBA_SET and IBA_GET.
> >>
> >> https://patchwork.kernel.org/project/linux-rdma/patch/1589982799-28728-3-git-send-email-liweihang@huawei.com/
> >>
> >> So we try to make it easier and clearer to define a bitfield for developers.
> >
> > Jason asked to use genmask and FIELD_PREP, but you invented something else.
> >
> > Thanks
> >
>
> We use them in another interface 'hr_reg_write(arr, field, val)' which hasn't been
> used in this series.
>
> Does it make any unacceptable mistake? We would appreciate any suggestions :)

The invention of FIELD_LOC() and hr_reg_set equal to __hr_reg_set are unacceptable.
Pass directly your field_h and field_l to hr_reg_set().

Thanks


>
> Thanks
> Weihang
>
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 for-next 1/2] RDMA/hns: Add support for CQ stash
  2020-11-17  8:50           ` Leon Romanovsky
@ 2020-11-18 10:49             ` liweihang
  2020-11-18 20:07               ` Jason Gunthorpe
  0 siblings, 1 reply; 11+ messages in thread
From: liweihang @ 2020-11-18 10:49 UTC (permalink / raw)
  To: Leon Romanovsky; +Cc: dledford, jgg, linux-rdma, Linuxarm

On 2020/11/17 16:50, Leon Romanovsky wrote:
> On Tue, Nov 17, 2020 at 08:35:55AM +0000, liweihang wrote:
>> On 2020/11/17 15:21, Leon Romanovsky wrote:
>>> On Tue, Nov 17, 2020 at 06:37:58AM +0000, liweihang wrote:
>>>> On 2020/11/16 21:47, Leon Romanovsky wrote:
>>>>> On Mon, Nov 16, 2020 at 07:58:38PM +0800, Weihang Li wrote:
>>>>>> From: Lang Cheng <chenglang@huawei.com>
>>>>>>
>>>>>> Stash is a mechanism that uses the core information carried by the ARM AXI
>>>>>> bus to access the L3 cache. It can be used to improve the performance by
>>>>>> increasing the hit ratio of L3 cache. CQs need to enable stash by default.
>>>>>>
>>>>>> Signed-off-by: Lang Cheng <chenglang@huawei.com>
>>>>>> Signed-off-by: Weihang Li <liweihang@huawei.com>
>>>>>> ---
>>>>>>  drivers/infiniband/hw/hns/hns_roce_common.h | 12 +++++++++
>>>>>>  drivers/infiniband/hw/hns/hns_roce_device.h |  1 +
>>>>>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  |  3 +++
>>>>>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h  | 39 +++++++++++++++++------------
>>>>>>  4 files changed, 39 insertions(+), 16 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
>>>>>> index f5669ff..8d96c4e 100644
>>>>>> --- a/drivers/infiniband/hw/hns/hns_roce_common.h
>>>>>> +++ b/drivers/infiniband/hw/hns/hns_roce_common.h
>>>>>> @@ -53,6 +53,18 @@
>>>>>>  #define roce_set_bit(origin, shift, val) \
>>>>>>  	roce_set_field((origin), (1ul << (shift)), (shift), (val))
>>>>>>
>>>>>> +#define FIELD_LOC(field_h, field_l) field_h, field_l
>>>>>> +
>>>>>> +#define _hr_reg_set(arr, field_h, field_l)                                     \
>>>>>> +	do {                                                                   \
>>>>>> +		BUILD_BUG_ON(((field_h) / 32) != ((field_l) / 32));            \
>>>>>> +		BUILD_BUG_ON((field_h) / 32 >= ARRAY_SIZE(arr));               \
>>>>>> +		(arr)[(field_h) / 32] |=                                       \
>>>>>> +			cpu_to_le32(GENMASK((field_h) % 32, (field_l) % 32));  \
>>>>>> +	} while (0)
>>>>>> +
>>>>>> +#define hr_reg_set(arr, field) _hr_reg_set(arr, field)
>>>>>
>>>>> I afraid that it is too much.
>>>>
>>>> Hi Leon,
>>>>
>>>> Thanks for the comments.
>>>>
>>>>> 1. FIELD_LOC() macro to hide two fields.
>>>>
>>>> Jason has suggested us to simplify the function of setting/getting bit/field in
>>>> hns driver like IBA_SET and IBA_GET.
>>>>
>>>> https://patchwork.kernel.org/project/linux-rdma/patch/1589982799-28728-3-git-send-email-liweihang@huawei.com/
>>>>
>>>> So we try to make it easier and clearer to define a bitfield for developers.
>>>
>>> Jason asked to use genmask and FIELD_PREP, but you invented something else.
>>>
>>> Thanks
>>>
>>
>> We use them in another interface 'hr_reg_write(arr, field, val)' which hasn't been
>> used in this series.
>>
>> Does it make any unacceptable mistake? We would appreciate any suggestions :)
> 
> The invention of FIELD_LOC() and hr_reg_set equal to __hr_reg_set are unacceptable.
> Pass directly your field_h and field_l to hr_reg_set().
> 
> Thanks
> 

Hi Leon,

We let hr_reg_set equal() to __hr_reg_set() because if not, there will be a compile error:

../hns_roce_hw_v2.c:4566:41: error: macro "_hr_reg_set" requires 3 arguments, but only 2 given
_hr_reg_set(cq_context->raw, CQC_STASH);

There are similar codes in iba.h, I think they are of the same reason:

	#define _IBA_SET(field_struct, field_offset, field_mask, num_bits, ptr, value) \
		({                                                                     \
			field_struct *_ptr = ptr;                                      \
			_iba_set##num_bits((void *)_ptr + (field_offset), field_mask,  \
					   FIELD_PREP(field_mask, value));             \
		})
	#define IBA_SET(field, ptr, value) _IBA_SET(field, ptr, value)

	#define IBA_FIELD64_LOC(field_struct, byte_offset)                             \
		field_struct, byte_offset, GENMASK_ULL(63, 0), 64

	#define CM_FIELD64_LOC(field_struct, byte_offset)                              \
		IBA_FIELD64_LOC(field_struct, (byte_offset + sizeof(struct ib_mad_hdr)))

	#define CM_REQ_LOCAL_CA_GUID CM_FIELD64_LOC(struct cm_req_msg, 16)

	IBA_SET(CM_REQ_LOCAL_CA_GUID, req_msg,
		be64_to_cpu(cm_id_priv->id.device->node_guid));

As I said, we want to use a single symbol to represent a field to make it
easier and clearer to define and set a bitfield for developers.

Let's compare the following implementations:

	#define _hr_reg_set(arr, field_h, field_l) \
		(arr)[(field_h) / 32] |= \
			cpu_to_le32(GENMASK((field_h) % 32, (field_l) % 32)) + \
			BUILD_BUG_ON_ZERO(((field_h) / 32) != ((field_l) / 32)) + \
			BUILD_BUG_ON_ZERO((field_h) / 32 >= ARRAY_SIZE(arr))

1)
	#define hr_reg_set(arr, field) _hr_reg_set(arr, field)

	#define QPCEX_PASID_EN FIELD_LOC(111, 95)
	hr_reg_set(context->ext, QPCEX_PASID_EN);

In this way, we just need to define a single symbol and use it easily.

2)
	#define QPCEX_PASID_EN_H 111
	#define QPCEX_PASID_EN_L 95
	_hr_reg_set(context->ext, QPCEX_PASID_EN_H, QPCEX_PASID_EN_L);

We have to define and pass 2 symbols.

3)
	#define QPCEX_PASID_EN_H 111
	#define QPCEX_PASID_EN 95
	#define hr_reg_set(arr, field) _hr_reg_set(arr, field, field##_H)
	hr_reg_set(context->ext, QPCEX_PASID_EN);

We have to define 2 symbols and use 1 symbols when setting field.

4)
	#define _hr_reg_set(arr, field_h, field_l) \
		...

	#define QPCEX_PASID_EN_M GENMASK(111, 95) //mask
	#define QPCEX_PASID_EN 95 // shift
	#define hr_reg_set(arr, field) _hr_reg_set(arr, field, field##_M)
	hr_reg_set(context->ext, QPCEX_PASID_EN);

We use mask and shift, similar with the way #3.

I think the first way is the best, and it doesn't seem to have any side effects.

Thanks
Weihang

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 for-next 1/2] RDMA/hns: Add support for CQ stash
  2020-11-18 10:49             ` liweihang
@ 2020-11-18 20:07               ` Jason Gunthorpe
  2020-11-20  9:01                 ` liweihang
  0 siblings, 1 reply; 11+ messages in thread
From: Jason Gunthorpe @ 2020-11-18 20:07 UTC (permalink / raw)
  To: liweihang; +Cc: Leon Romanovsky, dledford, linux-rdma, Linuxarm

On Wed, Nov 18, 2020 at 10:49:14AM +0000, liweihang wrote:
> On 2020/11/17 16:50, Leon Romanovsky wrote:
> > On Tue, Nov 17, 2020 at 08:35:55AM +0000, liweihang wrote:
> >> On 2020/11/17 15:21, Leon Romanovsky wrote:
> >>> On Tue, Nov 17, 2020 at 06:37:58AM +0000, liweihang wrote:
> >>>> On 2020/11/16 21:47, Leon Romanovsky wrote:
> >>>>> On Mon, Nov 16, 2020 at 07:58:38PM +0800, Weihang Li wrote:
> >>>>>> From: Lang Cheng <chenglang@huawei.com>
> >>>>>>
> >>>>>> Stash is a mechanism that uses the core information carried by the ARM AXI
> >>>>>> bus to access the L3 cache. It can be used to improve the performance by
> >>>>>> increasing the hit ratio of L3 cache. CQs need to enable stash by default.
> >>>>>>
> >>>>>> Signed-off-by: Lang Cheng <chenglang@huawei.com>
> >>>>>> Signed-off-by: Weihang Li <liweihang@huawei.com>
> >>>>>>  drivers/infiniband/hw/hns/hns_roce_common.h | 12 +++++++++
> >>>>>>  drivers/infiniband/hw/hns/hns_roce_device.h |  1 +
> >>>>>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  |  3 +++
> >>>>>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h  | 39 +++++++++++++++++------------
> >>>>>>  4 files changed, 39 insertions(+), 16 deletions(-)
> >>>>>>
> >>>>>> diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
> >>>>>> index f5669ff..8d96c4e 100644
> >>>>>> +++ b/drivers/infiniband/hw/hns/hns_roce_common.h
> >>>>>> @@ -53,6 +53,18 @@
> >>>>>>  #define roce_set_bit(origin, shift, val) \
> >>>>>>  	roce_set_field((origin), (1ul << (shift)), (shift), (val))
> >>>>>>
> >>>>>> +#define FIELD_LOC(field_h, field_l) field_h, field_l
> >>>>>> +
> >>>>>> +#define _hr_reg_set(arr, field_h, field_l)                                     \
> >>>>>> +	do {                                                                   \
> >>>>>> +		BUILD_BUG_ON(((field_h) / 32) != ((field_l) / 32));            \
> >>>>>> +		BUILD_BUG_ON((field_h) / 32 >= ARRAY_SIZE(arr));               \
> >>>>>> +		(arr)[(field_h) / 32] |=                                       \
> >>>>>> +			cpu_to_le32(GENMASK((field_h) % 32, (field_l) % 32));  \
> >>>>>> +	} while (0)
> >>>>>> +
> >>>>>> +#define hr_reg_set(arr, field) _hr_reg_set(arr, field)
> >>>>>
> >>>>> I afraid that it is too much.
> >>>>
> >>>> Hi Leon,
> >>>>
> >>>> Thanks for the comments.
> >>>>
> >>>>> 1. FIELD_LOC() macro to hide two fields.
> >>>>
> >>>> Jason has suggested us to simplify the function of setting/getting bit/field in
> >>>> hns driver like IBA_SET and IBA_GET.
> >>>>
> >>>> https://patchwork.kernel.org/project/linux-rdma/patch/1589982799-28728-3-git-send-email-liweihang@huawei.com/
> >>>>
> >>>> So we try to make it easier and clearer to define a bitfield for developers.
> >>>
> >>> Jason asked to use genmask and FIELD_PREP, but you invented something else.
> >>>
> >>> Thanks
> >>>
> >>
> >> We use them in another interface 'hr_reg_write(arr, field, val)' which hasn't been
> >> used in this series.
> >>
> >> Does it make any unacceptable mistake? We would appreciate any suggestions :)
> > 
> > The invention of FIELD_LOC() and hr_reg_set equal to __hr_reg_set are unacceptable.
> > Pass directly your field_h and field_l to hr_reg_set().
> > 
> > Thanks
> > 
> 
> Hi Leon,
> 
> We let hr_reg_set equal() to __hr_reg_set() because if not, there will be a compile error:
> 
> .../hns_roce_hw_v2.c:4566:41: error: macro "_hr_reg_set" requires 3 arguments, but only 2 given
> _hr_reg_set(cq_context->raw, CQC_STASH);

Yes, it is very un-intuitive but the rules for CPP require the extra
macro pass to generate the correct expansion. Otherwise cpp will try
to pass the value with commas in as a single argument, what we need
here is to expand the commads first and have them break up into macro
arguments as it goes down the macro chain.

> Let's compare the following implementations:
> 
> 	#define _hr_reg_set(arr, field_h, field_l) \
> 		(arr)[(field_h) / 32] |= \
> 			cpu_to_le32(GENMASK((field_h) % 32, (field_l) % 32)) + \
> 			BUILD_BUG_ON_ZERO(((field_h) / 32) != ((field_l) / 32)) + \
> 			BUILD_BUG_ON_ZERO((field_h) / 32 >= ARRAY_SIZE(arr))
> 
> 1)
> 	#define hr_reg_set(arr, field) _hr_reg_set(arr, field)
> 
> 	#define QPCEX_PASID_EN FIELD_LOC(111, 95)
> 	hr_reg_set(context->ext, QPCEX_PASID_EN);

It is also weird that something called set can only write all ones to
the field.

It feels saner to have a set that accepts a value and if the all ones
case is something very common then use a macro to compute it from the
field name.

Jason

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 for-next 1/2] RDMA/hns: Add support for CQ stash
  2020-11-18 20:07               ` Jason Gunthorpe
@ 2020-11-20  9:01                 ` liweihang
  0 siblings, 0 replies; 11+ messages in thread
From: liweihang @ 2020-11-20  9:01 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Leon Romanovsky, dledford, linux-rdma, Linuxarm

On 2020/11/19 4:08, Jason Gunthorpe wrote:
> On Wed, Nov 18, 2020 at 10:49:14AM +0000, liweihang wrote:
>> On 2020/11/17 16:50, Leon Romanovsky wrote:
>>> On Tue, Nov 17, 2020 at 08:35:55AM +0000, liweihang wrote:
>>>> On 2020/11/17 15:21, Leon Romanovsky wrote:
>>>>> On Tue, Nov 17, 2020 at 06:37:58AM +0000, liweihang wrote:
>>>>>> On 2020/11/16 21:47, Leon Romanovsky wrote:
>>>>>>> On Mon, Nov 16, 2020 at 07:58:38PM +0800, Weihang Li wrote:
>>>>>>>> From: Lang Cheng <chenglang@huawei.com>
>>>>>>>>
>>>>>>>> Stash is a mechanism that uses the core information carried by the ARM AXI
>>>>>>>> bus to access the L3 cache. It can be used to improve the performance by
>>>>>>>> increasing the hit ratio of L3 cache. CQs need to enable stash by default.
>>>>>>>>
>>>>>>>> Signed-off-by: Lang Cheng <chenglang@huawei.com>
>>>>>>>> Signed-off-by: Weihang Li <liweihang@huawei.com>
>>>>>>>>  drivers/infiniband/hw/hns/hns_roce_common.h | 12 +++++++++
>>>>>>>>  drivers/infiniband/hw/hns/hns_roce_device.h |  1 +
>>>>>>>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  |  3 +++
>>>>>>>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h  | 39 +++++++++++++++++------------
>>>>>>>>  4 files changed, 39 insertions(+), 16 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
>>>>>>>> index f5669ff..8d96c4e 100644
>>>>>>>> +++ b/drivers/infiniband/hw/hns/hns_roce_common.h
>>>>>>>> @@ -53,6 +53,18 @@
>>>>>>>>  #define roce_set_bit(origin, shift, val) \
>>>>>>>>  	roce_set_field((origin), (1ul << (shift)), (shift), (val))
>>>>>>>>
>>>>>>>> +#define FIELD_LOC(field_h, field_l) field_h, field_l
>>>>>>>> +
>>>>>>>> +#define _hr_reg_set(arr, field_h, field_l)                                     \
>>>>>>>> +	do {                                                                   \
>>>>>>>> +		BUILD_BUG_ON(((field_h) / 32) != ((field_l) / 32));            \
>>>>>>>> +		BUILD_BUG_ON((field_h) / 32 >= ARRAY_SIZE(arr));               \
>>>>>>>> +		(arr)[(field_h) / 32] |=                                       \
>>>>>>>> +			cpu_to_le32(GENMASK((field_h) % 32, (field_l) % 32));  \
>>>>>>>> +	} while (0)
>>>>>>>> +
>>>>>>>> +#define hr_reg_set(arr, field) _hr_reg_set(arr, field)
>>>>>>>
>>>>>>> I afraid that it is too much.
>>>>>>
>>>>>> Hi Leon,
>>>>>>
>>>>>> Thanks for the comments.
>>>>>>
>>>>>>> 1. FIELD_LOC() macro to hide two fields.
>>>>>>
>>>>>> Jason has suggested us to simplify the function of setting/getting bit/field in
>>>>>> hns driver like IBA_SET and IBA_GET.
>>>>>>
>>>>>> https://patchwork.kernel.org/project/linux-rdma/patch/1589982799-28728-3-git-send-email-liweihang@huawei.com/
>>>>>>
>>>>>> So we try to make it easier and clearer to define a bitfield for developers.
>>>>>
>>>>> Jason asked to use genmask and FIELD_PREP, but you invented something else.
>>>>>
>>>>> Thanks
>>>>>
>>>>
>>>> We use them in another interface 'hr_reg_write(arr, field, val)' which hasn't been
>>>> used in this series.
>>>>
>>>> Does it make any unacceptable mistake? We would appreciate any suggestions :)
>>>
>>> The invention of FIELD_LOC() and hr_reg_set equal to __hr_reg_set are unacceptable.
>>> Pass directly your field_h and field_l to hr_reg_set().
>>>
>>> Thanks
>>>
>>
>> Hi Leon,
>>
>> We let hr_reg_set equal() to __hr_reg_set() because if not, there will be a compile error:
>>
>> .../hns_roce_hw_v2.c:4566:41: error: macro "_hr_reg_set" requires 3 arguments, but only 2 given
>> _hr_reg_set(cq_context->raw, CQC_STASH);
> 
> Yes, it is very un-intuitive but the rules for CPP require the extra
> macro pass to generate the correct expansion. Otherwise cpp will try
> to pass the value with commas in as a single argument, what we need
> here is to expand the commads first and have them break up into macro
> arguments as it goes down the macro chain.
> 

Thanks for your explanation :)

>> Let's compare the following implementations:
>>
>> 	#define _hr_reg_set(arr, field_h, field_l) \
>> 		(arr)[(field_h) / 32] |= \
>> 			cpu_to_le32(GENMASK((field_h) % 32, (field_l) % 32)) + \
>> 			BUILD_BUG_ON_ZERO(((field_h) / 32) != ((field_l) / 32)) + \
>> 			BUILD_BUG_ON_ZERO((field_h) / 32 >= ARRAY_SIZE(arr))
>>
>> 1)
>> 	#define hr_reg_set(arr, field) _hr_reg_set(arr, field)
>>
>> 	#define QPCEX_PASID_EN FIELD_LOC(111, 95)
>> 	hr_reg_set(context->ext, QPCEX_PASID_EN);
> 
> It is also weird that something called set can only write all ones to
> the field.
> 
> It feels saner to have a set that accepts a value and if the all ones
> case is something very common then use a macro to compute it from the
> field name.
> 
> Jason
> 

OK, We will achieve hr_reg_enable(arr, field) for setting a single bit
and hr_reg_write(arr, field, val) for filling a value into a field.

Thanks
Weihang



^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2020-11-20  9:01 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-16 11:58 [PATCH v2 for-next 0/2] RDMA/hns: Add supports for stash Weihang Li
2020-11-16 11:58 ` [PATCH v2 for-next 1/2] RDMA/hns: Add support for CQ stash Weihang Li
2020-11-16 13:46   ` Leon Romanovsky
2020-11-17  6:37     ` liweihang
2020-11-17  7:20       ` Leon Romanovsky
2020-11-17  8:35         ` liweihang
2020-11-17  8:50           ` Leon Romanovsky
2020-11-18 10:49             ` liweihang
2020-11-18 20:07               ` Jason Gunthorpe
2020-11-20  9:01                 ` liweihang
2020-11-16 11:58 ` [PATCH v2 for-next 2/2] RDMA/hns: Add support for QP stash Weihang Li

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.