All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH for-next 0/3] RDMA/hns: Add supports for stash
@ 2020-09-30  9:34 Weihang Li
  2020-09-30  9:34 ` [PATCH for-next 1/3] RDMA/hns: Add support for CQ stash Weihang Li
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Weihang Li @ 2020-09-30  9:34 UTC (permalink / raw)
  To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm

Stash is a mechanism that uses the core information carried by the ARM AXI
bus to access the L3 cache. The CPU and I/O subsystems can access the L3
cache consistently by enabling stash, so the performance can be improved.

Lang Cheng (3):
  RDMA/hns: Add support for CQ stash
  RDMA/hns: Add new interfaces to set/clear/read fields in QPC
  RDMA/hns: Add support for QP stash

 drivers/infiniband/hw/hns/hns_roce_common.h | 26 ++++++++++++++++++++++++++
 drivers/infiniband/hw/hns/hns_roce_device.h |  1 +
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c  |  9 +++++++++
 drivers/infiniband/hw/hns/hns_roce_hw_v2.h  |  5 +++++
 4 files changed, 41 insertions(+)

-- 
2.8.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH for-next 1/3] RDMA/hns: Add support for CQ stash
  2020-09-30  9:34 [PATCH for-next 0/3] RDMA/hns: Add supports for stash Weihang Li
@ 2020-09-30  9:34 ` Weihang Li
  2020-09-30  9:34 ` [PATCH for-next 2/3] RDMA/hns: Add new interfaces to set/clear/read fields in QPC Weihang Li
  2020-09-30  9:34 ` [PATCH for-next 3/3] RDMA/hns: Add support for QP stash Weihang Li
  2 siblings, 0 replies; 6+ messages in thread
From: Weihang Li @ 2020-09-30  9:34 UTC (permalink / raw)
  To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm

From: Lang Cheng <chenglang@huawei.com>

Stash is a mechanism that uses the core information carried by the ARM AXI
bus to access the L3 cache. It can be used to improve the performance by
increasing the hit ratio of L3 cache. CQs need to enable stash by default.

Signed-off-by: Lang Cheng <chenglang@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_device.h | 1 +
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 3 +++
 drivers/infiniband/hw/hns/hns_roce_hw_v2.h  | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 6d2acff..87d3e57 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -221,6 +221,7 @@ enum {
 	HNS_ROCE_CAP_FLAG_FRMR                  = BIT(8),
 	HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL		= BIT(9),
 	HNS_ROCE_CAP_FLAG_ATOMIC		= BIT(10),
+	HNS_ROCE_CAP_FLAG_STASH			= BIT(17),
 };
 
 #define HNS_ROCE_DB_TYPE_COUNT			2
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 6d30850..154afc0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -3059,6 +3059,9 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
 		       V2_CQC_BYTE_8_CQE_SIZE_S, hr_cq->cqe_size ==
 		       HNS_ROCE_V3_CQE_SIZE ? 1 : 0);
 
+	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH)
+		roce_set_bit(cq_context->byte_8_cqn, V2_CQC_BYTE_8_STASH_S, 1);
+
 	cq_context->cqe_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0]));
 
 	roce_set_field(cq_context->byte_16_hop_addr,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 29c9dd4..cfa8caa 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -315,6 +315,8 @@ struct hns_roce_v2_cq_context {
 #define V2_CQC_BYTE_8_CQE_SIZE_S 27
 #define V2_CQC_BYTE_8_CQE_SIZE_M GENMASK(28, 27)
 
+#define V2_CQC_BYTE_8_STASH_S 31
+
 #define	V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_S 0
 #define V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_M GENMASK(19, 0)
 
-- 
2.8.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH for-next 2/3] RDMA/hns: Add new interfaces to set/clear/read fields in QPC
  2020-09-30  9:34 [PATCH for-next 0/3] RDMA/hns: Add supports for stash Weihang Li
  2020-09-30  9:34 ` [PATCH for-next 1/3] RDMA/hns: Add support for CQ stash Weihang Li
@ 2020-09-30  9:34 ` Weihang Li
  2020-10-06 19:55   ` Jason Gunthorpe
  2020-09-30  9:34 ` [PATCH for-next 3/3] RDMA/hns: Add support for QP stash Weihang Li
  2 siblings, 1 reply; 6+ messages in thread
From: Weihang Li @ 2020-09-30  9:34 UTC (permalink / raw)
  To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm

From: Lang Cheng <chenglang@huawei.com>

For a field in extended QPC, there are four newly added interfaces:
- hr_reg_set(arr, field) can set all bits to 1,
- hr_reg_clear(arr, field) can clear all bits to 0,
- hr_reg_write(arr, field, val) can write a new value,
- hr_reg_read(arr, field) can read the value.
'arr' is the array name of extended QPC, and 'field' is the global bit
offset of the whole array.

Signed-off-by: Lang Cheng <chenglang@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_common.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
index f5669ff..ab2386d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_common.h
+++ b/drivers/infiniband/hw/hns/hns_roce_common.h
@@ -53,6 +53,32 @@
 #define roce_set_bit(origin, shift, val) \
 	roce_set_field((origin), (1ul << (shift)), (shift), (val))
 
+#define hr_reg_set(arr, field)                                                 \
+	((arr)[(field) / 32] |=                                                \
+	 cpu_to_le32((field##_W) +                                             \
+		     BUILD_BUG_ON_ZERO((field) / 32 >= ARRAY_SIZE(arr))))
+
+#define hr_reg_clear(arr, field)                                               \
+	((arr)[(field) / 32] &=                                                \
+	 ~cpu_to_le32((field##_W) +                                            \
+		      BUILD_BUG_ON_ZERO((field) / 32 >= ARRAY_SIZE(arr))))
+
+#define hr_reg_write(arr, field, val)                                          \
+	do {                                                                   \
+		BUILD_BUG_ON((field) / 32 >= ARRAY_SIZE(arr));                 \
+		(arr)[(field) / 32] &= ~cpu_to_le32(field##_W);                \
+		(arr)[(field) / 32] |= cpu_to_le32(                            \
+			((u32)(val) << ((field) % 32)) & (field##_W));         \
+	} while (0)
+
+#define hr_reg_read(arr, field)                                                \
+	(((le32_to_cpu((arr)[(field) / 32]) & (field##_W)) >> (field) % 32) +  \
+	 BUILD_BUG_ON_ZERO((field) / 32 >= ARRAY_SIZE(arr)))
+
+#define V3_GENMASK(h, l)                                                       \
+	GENMASK(((h) + BUILD_BUG_ON_ZERO(((h) / 32) != ((l) / 32))) % 32,      \
+		((l) + BUILD_BUG_ON_ZERO((h) < (l))) % 32)
+
 #define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3
 #define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4
 
-- 
2.8.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH for-next 3/3] RDMA/hns: Add support for QP stash
  2020-09-30  9:34 [PATCH for-next 0/3] RDMA/hns: Add supports for stash Weihang Li
  2020-09-30  9:34 ` [PATCH for-next 1/3] RDMA/hns: Add support for CQ stash Weihang Li
  2020-09-30  9:34 ` [PATCH for-next 2/3] RDMA/hns: Add new interfaces to set/clear/read fields in QPC Weihang Li
@ 2020-09-30  9:34 ` Weihang Li
  2 siblings, 0 replies; 6+ messages in thread
From: Weihang Li @ 2020-09-30  9:34 UTC (permalink / raw)
  To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm

From: Lang Cheng <chenglang@huawei.com>

Stash is a mechanism that uses the core information carried by the ARM AXI
bus to access the L3 cache. It can be used to improve the performance by
increasing the hit ratio of L3 cache. QPs need to enable stash by default.

Signed-off-by: Lang Cheng <chenglang@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 6 ++++++
 drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 3 +++
 2 files changed, 9 insertions(+)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 154afc0..d561e98 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -3856,6 +3856,12 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
 	hr_qp->access_flags = attr->qp_access_flags;
 	roce_set_field(context->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M,
 		       V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->send_cq)->cqn);
+
+	if (hr_dev->caps.qpc_sz < HNS_ROCE_V3_QPC_SZ)
+		return;
+
+	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH)
+		hr_reg_set(context->ext, QPCEX_STASH);
 }
 
 static void modify_qp_init_to_init(struct ib_qp *ibqp,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index cfa8caa..1692586 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -889,6 +889,9 @@ struct hns_roce_v2_qp_context {
 #define	V2_QPC_BYTE_256_SQ_FLUSH_IDX_S 16
 #define V2_QPC_BYTE_256_SQ_FLUSH_IDX_M GENMASK(31, 16)
 
+#define QPCEX_STASH 82
+#define QPCEX_STASH_W V3_GENMASK(82, 82)
+
 #define	V2_QP_RWE_S 1 /* rdma write enable */
 #define	V2_QP_RRE_S 2 /* rdma read enable */
 #define	V2_QP_ATE_S 3 /* rdma atomic enable */
-- 
2.8.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH for-next 2/3] RDMA/hns: Add new interfaces to set/clear/read fields in QPC
  2020-09-30  9:34 ` [PATCH for-next 2/3] RDMA/hns: Add new interfaces to set/clear/read fields in QPC Weihang Li
@ 2020-10-06 19:55   ` Jason Gunthorpe
  2020-10-09  2:01     ` liweihang
  0 siblings, 1 reply; 6+ messages in thread
From: Jason Gunthorpe @ 2020-10-06 19:55 UTC (permalink / raw)
  To: Weihang Li; +Cc: dledford, leon, linux-rdma, linuxarm

On Wed, Sep 30, 2020 at 05:34:11PM +0800, Weihang Li wrote:
> From: Lang Cheng <chenglang@huawei.com>
> 
> For a field in extended QPC, there are four newly added interfaces:
> - hr_reg_set(arr, field) can set all bits to 1,
> - hr_reg_clear(arr, field) can clear all bits to 0,
> - hr_reg_write(arr, field, val) can write a new value,
> - hr_reg_read(arr, field) can read the value.
> 'arr' is the array name of extended QPC, and 'field' is the global bit
> offset of the whole array.
> 
> Signed-off-by: Lang Cheng <chenglang@huawei.com>
> Signed-off-by: Weihang Li <liweihang@huawei.com>
>  drivers/infiniband/hw/hns/hns_roce_common.h | 26 ++++++++++++++++++++++++++
>  1 file changed, 26 insertions(+)
> 
> diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
> index f5669ff..ab2386d 100644
> +++ b/drivers/infiniband/hw/hns/hns_roce_common.h
> @@ -53,6 +53,32 @@
>  #define roce_set_bit(origin, shift, val) \
>  	roce_set_field((origin), (1ul << (shift)), (shift), (val))
>  
> +#define hr_reg_set(arr, field)                                                 \
> +	((arr)[(field) / 32] |=                                                \
> +	 cpu_to_le32((field##_W) +                                             \
> +		     BUILD_BUG_ON_ZERO((field) / 32 >= ARRAY_SIZE(arr))))
> +
> +#define hr_reg_clear(arr, field)                                               \
> +	((arr)[(field) / 32] &=                                                \
> +	 ~cpu_to_le32((field##_W) +                                            \
> +		      BUILD_BUG_ON_ZERO((field) / 32 >= ARRAY_SIZE(arr))))
> +
> +#define hr_reg_write(arr, field, val)                                          \
> +	do {                                                                   \
> +		BUILD_BUG_ON((field) / 32 >= ARRAY_SIZE(arr));                 \
> +		(arr)[(field) / 32] &= ~cpu_to_le32(field##_W);                \
> +		(arr)[(field) / 32] |= cpu_to_le32(                            \
> +			((u32)(val) << ((field) % 32)) & (field##_W));         \
> +	} while (0)
> +
> +#define hr_reg_read(arr, field)                                                \
> +	(((le32_to_cpu((arr)[(field) / 32]) & (field##_W)) >> (field) % 32) +  \
> +	 BUILD_BUG_ON_ZERO((field) / 32 >= ARRAY_SIZE(arr)))

Why add these functions that are not used?

Jason

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH for-next 2/3] RDMA/hns: Add new interfaces to set/clear/read fields in QPC
  2020-10-06 19:55   ` Jason Gunthorpe
@ 2020-10-09  2:01     ` liweihang
  0 siblings, 0 replies; 6+ messages in thread
From: liweihang @ 2020-10-09  2:01 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: dledford, leon, linux-rdma, Linuxarm

On 2020/10/7 3:56, Jason Gunthorpe wrote:
> On Wed, Sep 30, 2020 at 05:34:11PM +0800, Weihang Li wrote:
>> From: Lang Cheng <chenglang@huawei.com>
>>
>> For a field in extended QPC, there are four newly added interfaces:
>> - hr_reg_set(arr, field) can set all bits to 1,
>> - hr_reg_clear(arr, field) can clear all bits to 0,
>> - hr_reg_write(arr, field, val) can write a new value,
>> - hr_reg_read(arr, field) can read the value.
>> 'arr' is the array name of extended QPC, and 'field' is the global bit
>> offset of the whole array.
>>
>> Signed-off-by: Lang Cheng <chenglang@huawei.com>
>> Signed-off-by: Weihang Li <liweihang@huawei.com>
>>  drivers/infiniband/hw/hns/hns_roce_common.h | 26 ++++++++++++++++++++++++++
>>  1 file changed, 26 insertions(+)
>>
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
>> index f5669ff..ab2386d 100644
>> +++ b/drivers/infiniband/hw/hns/hns_roce_common.h
>> @@ -53,6 +53,32 @@
>>  #define roce_set_bit(origin, shift, val) \
>>  	roce_set_field((origin), (1ul << (shift)), (shift), (val))
>>  
>> +#define hr_reg_set(arr, field)                                                 \
>> +	((arr)[(field) / 32] |=                                                \
>> +	 cpu_to_le32((field##_W) +                                             \
>> +		     BUILD_BUG_ON_ZERO((field) / 32 >= ARRAY_SIZE(arr))))
>> +
>> +#define hr_reg_clear(arr, field)                                               \
>> +	((arr)[(field) / 32] &=                                                \
>> +	 ~cpu_to_le32((field##_W) +                                            \
>> +		      BUILD_BUG_ON_ZERO((field) / 32 >= ARRAY_SIZE(arr))))
>> +
>> +#define hr_reg_write(arr, field, val)                                          \
>> +	do {                                                                   \
>> +		BUILD_BUG_ON((field) / 32 >= ARRAY_SIZE(arr));                 \
>> +		(arr)[(field) / 32] &= ~cpu_to_le32(field##_W);                \
>> +		(arr)[(field) / 32] |= cpu_to_le32(                            \
>> +			((u32)(val) << ((field) % 32)) & (field##_W));         \
>> +	} while (0)
>> +
>> +#define hr_reg_read(arr, field)                                                \
>> +	(((le32_to_cpu((arr)[(field) / 32]) & (field##_W)) >> (field) % 32) +  \
>> +	 BUILD_BUG_ON_ZERO((field) / 32 >= ARRAY_SIZE(arr)))
> 
> Why add these functions that are not used?
> 
> Jason
> 

hr_reg_set() is to be used in patch 3/3 in this series, the others are prepared
for follow-up series. I will put hr_reg_set() in patch #3 and add other interfaces
only when required.

Thanks
Weihang


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-10-09  2:01 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-30  9:34 [PATCH for-next 0/3] RDMA/hns: Add supports for stash Weihang Li
2020-09-30  9:34 ` [PATCH for-next 1/3] RDMA/hns: Add support for CQ stash Weihang Li
2020-09-30  9:34 ` [PATCH for-next 2/3] RDMA/hns: Add new interfaces to set/clear/read fields in QPC Weihang Li
2020-10-06 19:55   ` Jason Gunthorpe
2020-10-09  2:01     ` liweihang
2020-09-30  9:34 ` [PATCH for-next 3/3] RDMA/hns: Add support for QP stash Weihang Li

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.