All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH for-next 0/2] RDMA/hns: Add support for userspace Direct WQE
@ 2021-05-28  9:19 Weihang Li
  2021-05-28  9:19 ` [PATCH for-next 1/2] RDMA/hns: Refactor hns uar mmap flow Weihang Li
  2021-05-28  9:19 ` [PATCH for-next 2/2] RDMA/hns: Support direct WQE of userspace Weihang Li
  0 siblings, 2 replies; 5+ messages in thread
From: Weihang Li @ 2021-05-28  9:19 UTC (permalink / raw)
  To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm, Weihang Li

Direct wqe is a mechanism to fill wqe directly into the hardware. In the
case of light load, the wqe will be filled into pcie bar space of the
hardware, this will reduce one memory access operation and therefore
reduce the latency. 

This series first refactor current uar mmap process to add branch for
direct wqe, then the feature is enabled.

The related userspace series is named "libhns: Add support for direct WQE".

Xi Wang (1):
  RDMA/hns: Refactor hns uar mmap flow

Yixing Liu (1):
  RDMA/hns: Support direct WQE of userspace

 drivers/infiniband/hw/hns/hns_roce_device.h |  7 ++-
 drivers/infiniband/hw/hns/hns_roce_main.c   | 72 +++++++++++++++++++++++++++--
 drivers/infiniband/hw/hns/hns_roce_pd.c     |  8 +++-
 drivers/infiniband/hw/hns/hns_roce_qp.c     |  5 ++
 include/uapi/rdma/hns-abi.h                 |  6 +++
 5 files changed, 89 insertions(+), 9 deletions(-)

-- 
2.7.4


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH for-next 1/2] RDMA/hns: Refactor hns uar mmap flow
  2021-05-28  9:19 [PATCH for-next 0/2] RDMA/hns: Add support for userspace Direct WQE Weihang Li
@ 2021-05-28  9:19 ` Weihang Li
  2021-06-02 11:16   ` Leon Romanovsky
  2021-05-28  9:19 ` [PATCH for-next 2/2] RDMA/hns: Support direct WQE of userspace Weihang Li
  1 sibling, 1 reply; 5+ messages in thread
From: Weihang Li @ 2021-05-28  9:19 UTC (permalink / raw)
  To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm, Xi Wang, Weihang Li

From: Xi Wang <wangxi11@huawei.com>

Classify the uar address by wrapping the uar type and start page as offset
for hns rdma io mmap.

Signed-off-by: Xi Wang <wangxi11@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_main.c | 27 ++++++++++++++++++++++++---
 include/uapi/rdma/hns-abi.h               |  4 ++++
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 6c6e82b..00dbbf1 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -338,12 +338,23 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
 	hns_roce_uar_free(to_hr_dev(ibcontext->device), &context->uar);
 }
 
-static int hns_roce_mmap(struct ib_ucontext *context,
-			 struct vm_area_struct *vma)
+/* command value is offset[15:8] */
+static inline int hns_roce_mmap_get_command(unsigned long offset)
+{
+	return (offset >> 8) & 0xff;
+}
+
+/* index value is offset[63:16] | offset[7:0] */
+static inline unsigned long hns_roce_mmap_get_index(unsigned long offset)
+{
+	return ((offset >> 16) << 8) | (offset & 0xff);
+}
+
+static int mmap_uar(struct ib_ucontext *context, struct vm_area_struct *vma)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(context->device);
 
-	switch (vma->vm_pgoff) {
+	switch (hns_roce_mmap_get_index(vma->vm_pgoff)) {
 	case 0:
 		return rdma_user_mmap_io(context, vma,
 					 to_hr_ucontext(context)->uar.pfn,
@@ -370,6 +381,16 @@ static int hns_roce_mmap(struct ib_ucontext *context,
 	}
 }
 
+static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
+{
+	switch (hns_roce_mmap_get_command(vma->vm_pgoff)) {
+	case HNS_ROCE_MMAP_REGULAR_PAGE:
+		return mmap_uar(uctx, vma);
+	default:
+		return -EINVAL;
+	}
+}
+
 static int hns_roce_port_immutable(struct ib_device *ib_dev, u32 port_num,
 				   struct ib_port_immutable *immutable)
 {
diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h
index 42b1776..18529d7 100644
--- a/include/uapi/rdma/hns-abi.h
+++ b/include/uapi/rdma/hns-abi.h
@@ -94,4 +94,8 @@ struct hns_roce_ib_alloc_pd_resp {
 	__u32 pdn;
 };
 
+enum {
+	HNS_ROCE_MMAP_REGULAR_PAGE,
+};
+
 #endif /* HNS_ABI_USER_H */
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH for-next 2/2] RDMA/hns: Support direct WQE of userspace
  2021-05-28  9:19 [PATCH for-next 0/2] RDMA/hns: Add support for userspace Direct WQE Weihang Li
  2021-05-28  9:19 ` [PATCH for-next 1/2] RDMA/hns: Refactor hns uar mmap flow Weihang Li
@ 2021-05-28  9:19 ` Weihang Li
  1 sibling, 0 replies; 5+ messages in thread
From: Weihang Li @ 2021-05-28  9:19 UTC (permalink / raw)
  To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm, Yixing Liu, Weihang Li

From: Yixing Liu <liuyixing1@huawei.com>

Enable direct WQE of userspace and add address mapping for it.

Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_device.h |  7 ++--
 drivers/infiniband/hw/hns/hns_roce_main.c   | 59 +++++++++++++++++++++++++----
 drivers/infiniband/hw/hns/hns_roce_pd.c     |  8 +++-
 drivers/infiniband/hw/hns/hns_roce_qp.c     |  5 +++
 include/uapi/rdma/hns-abi.h                 |  2 +
 5 files changed, 68 insertions(+), 13 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 111cab5..ee2726e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -187,6 +187,7 @@ enum {
 	HNS_ROCE_CAP_FLAG_FRMR                  = BIT(8),
 	HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL		= BIT(9),
 	HNS_ROCE_CAP_FLAG_ATOMIC		= BIT(10),
+	HNS_ROCE_CAP_FLAG_DIRECT_WQE		= BIT(12),
 	HNS_ROCE_CAP_FLAG_SDI_MODE		= BIT(14),
 	HNS_ROCE_CAP_FLAG_STASH			= BIT(17),
 };
@@ -226,6 +227,7 @@ enum {
 
 struct hns_roce_uar {
 	u64		pfn;
+	u64		dwqe_page;
 	unsigned long	index;
 	unsigned long	logic_idx;
 };
@@ -608,10 +610,6 @@ struct hns_roce_work {
 	u32 queue_num;
 };
 
-enum {
-	HNS_ROCE_QP_CAP_DIRECT_WQE = BIT(5),
-};
-
 struct hns_roce_qp {
 	struct ib_qp		ibqp;
 	struct hns_roce_wq	rq;
@@ -656,6 +654,7 @@ struct hns_roce_qp {
 	struct list_head	node;		/* all qps are on a list */
 	struct list_head	rq_node;	/* all recv qps are on a list */
 	struct list_head	sq_node;	/* all send qps are on a list */
+	bool			has_mmaped;	/* mark qp of direct wqe */
 };
 
 struct hns_roce_ib_iboe {
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 00dbbf1..c240c9f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -331,13 +331,6 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
 	return ret;
 }
 
-static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
-{
-	struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext);
-
-	hns_roce_uar_free(to_hr_dev(ibcontext->device), &context->uar);
-}
-
 /* command value is offset[15:8] */
 static inline int hns_roce_mmap_get_command(unsigned long offset)
 {
@@ -350,6 +343,56 @@ static inline unsigned long hns_roce_mmap_get_index(unsigned long offset)
 	return ((offset >> 16) << 8) | (offset & 0xff);
 }
 
+static int mmap_dwqe(struct ib_ucontext *uctx, struct vm_area_struct *vma)
+{
+	struct hns_roce_ucontext *context = to_hr_ucontext(uctx);
+	struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device);
+	struct ib_device *ibdev = &hr_dev->ib_dev;
+	struct hns_roce_qp *hr_qp;
+	unsigned long pgoff;
+	unsigned long qpn;
+	phys_addr_t pfn;
+	pgprot_t prot;
+	int ret;
+
+	pgoff = hns_roce_mmap_get_index(vma->vm_pgoff);
+	qpn = pgoff / (HNS_ROCE_DWQE_SIZE / PAGE_SIZE);
+	hr_qp = __hns_roce_qp_lookup(hr_dev, qpn);
+	if (!hr_qp) {
+		ibdev_err(ibdev, "failed to find QP.\n");
+		return -EINVAL;
+	}
+
+	if (hr_qp->ibqp.pd->uobject->context != uctx) {
+		ibdev_err(ibdev,
+			  "the QP is not owned by the context, QPN = %lu.\n",
+			  hr_qp->qpn);
+		return -EINVAL;
+	}
+
+	if (hr_qp->has_mmaped) {
+		ibdev_err(ibdev,
+			  "the QP has been already mapped, QPN = %lu.\n",
+			  hr_qp->qpn);
+		return -EINVAL;
+	}
+
+	hr_qp->has_mmaped = true;
+	pfn = context->uar.dwqe_page + pgoff;
+	prot = pgprot_device(vma->vm_page_prot);
+
+	ret = rdma_user_mmap_io(uctx, vma, pfn, HNS_ROCE_DWQE_SIZE, prot, NULL);
+
+	return ret;
+}
+
+static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
+{
+	struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext);
+
+	hns_roce_uar_free(to_hr_dev(ibcontext->device), &context->uar);
+}
+
 static int mmap_uar(struct ib_ucontext *context, struct vm_area_struct *vma)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(context->device);
@@ -386,6 +429,8 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
 	switch (hns_roce_mmap_get_command(vma->vm_pgoff)) {
 	case HNS_ROCE_MMAP_REGULAR_PAGE:
 		return mmap_uar(uctx, vma);
+	case HNS_ROCE_MMAP_DWQE_PAGE:
+		return mmap_dwqe(uctx, vma);
 	default:
 		return -EINVAL;
 	}
diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c
index a5813bf..620eb25 100644
--- a/drivers/infiniband/hw/hns/hns_roce_pd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_pd.c
@@ -112,8 +112,12 @@ int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
 		}
 		uar->pfn = ((res->start) >> PAGE_SHIFT) + uar->index;
 	} else {
-		uar->pfn = ((pci_resource_start(hr_dev->pci_dev, 2))
-			   >> PAGE_SHIFT);
+		uar->pfn = pci_resource_start(hr_dev->pci_dev, 2) >> PAGE_SHIFT;
+
+		if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DIRECT_WQE)
+			uar->dwqe_page =
+				pci_resource_start(hr_dev->pci_dev, 4) >>
+				PAGE_SHIFT;
 	}
 
 	return 0;
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index c6e120e..c34e0ee 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -768,6 +768,10 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
 		goto err_inline;
 	}
 
+	if ((PAGE_SIZE <= HNS_ROCE_DWQE_SIZE) &&
+	    hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DIRECT_WQE)
+		hr_qp->en_flags |= HNS_ROCE_QP_CAP_DIRECT_WQE;
+
 	return 0;
 err_inline:
 	free_rq_inline_buf(hr_qp);
@@ -1060,6 +1064,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 	}
 
 	if (udata) {
+		resp.cap_flags = hr_qp->en_flags;
 		ret = ib_copy_to_udata(udata, &resp,
 				       min(udata->outlen, sizeof(resp)));
 		if (ret) {
diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h
index 18529d7..248c611 100644
--- a/include/uapi/rdma/hns-abi.h
+++ b/include/uapi/rdma/hns-abi.h
@@ -77,6 +77,7 @@ enum hns_roce_qp_cap_flags {
 	HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0,
 	HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1,
 	HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2,
+	HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5,
 };
 
 struct hns_roce_ib_create_qp_resp {
@@ -96,6 +97,7 @@ struct hns_roce_ib_alloc_pd_resp {
 
 enum {
 	HNS_ROCE_MMAP_REGULAR_PAGE,
+	HNS_ROCE_MMAP_DWQE_PAGE,
 };
 
 #endif /* HNS_ABI_USER_H */
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH for-next 1/2] RDMA/hns: Refactor hns uar mmap flow
  2021-05-28  9:19 ` [PATCH for-next 1/2] RDMA/hns: Refactor hns uar mmap flow Weihang Li
@ 2021-06-02 11:16   ` Leon Romanovsky
  2021-06-03  3:12     ` liweihang
  0 siblings, 1 reply; 5+ messages in thread
From: Leon Romanovsky @ 2021-06-02 11:16 UTC (permalink / raw)
  To: Weihang Li; +Cc: dledford, jgg, linux-rdma, linuxarm, Xi Wang

On Fri, May 28, 2021 at 05:19:04PM +0800, Weihang Li wrote:
> From: Xi Wang <wangxi11@huawei.com>
> 
> Classify the uar address by wrapping the uar type and start page as offset
> for hns rdma io mmap.
> 
> Signed-off-by: Xi Wang <wangxi11@huawei.com>
> Signed-off-by: Weihang Li <liweihang@huawei.com>
> ---
>  drivers/infiniband/hw/hns/hns_roce_main.c | 27 ++++++++++++++++++++++++---
>  include/uapi/rdma/hns-abi.h               |  4 ++++
>  2 files changed, 28 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
> index 6c6e82b..00dbbf1 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_main.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_main.c
> @@ -338,12 +338,23 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
>  	hns_roce_uar_free(to_hr_dev(ibcontext->device), &context->uar);
>  }
>  
> -static int hns_roce_mmap(struct ib_ucontext *context,
> -			 struct vm_area_struct *vma)
> +/* command value is offset[15:8] */
> +static inline int hns_roce_mmap_get_command(unsigned long offset)
> +{
> +	return (offset >> 8) & 0xff;
> +}
> +
> +/* index value is offset[63:16] | offset[7:0] */
> +static inline unsigned long hns_roce_mmap_get_index(unsigned long offset)
> +{
> +	return ((offset >> 16) << 8) | (offset & 0xff);
> +}

Let's follow the common practice and don't introduce inline functions in .c files.

Thanks

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH for-next 1/2] RDMA/hns: Refactor hns uar mmap flow
  2021-06-02 11:16   ` Leon Romanovsky
@ 2021-06-03  3:12     ` liweihang
  0 siblings, 0 replies; 5+ messages in thread
From: liweihang @ 2021-06-03  3:12 UTC (permalink / raw)
  To: Leon Romanovsky; +Cc: dledford, jgg, linux-rdma, Linuxarm, wangxi (M)

On 2021/6/2 19:16, Leon Romanovsky wrote:
> On Fri, May 28, 2021 at 05:19:04PM +0800, Weihang Li wrote:
>> From: Xi Wang <wangxi11@huawei.com>
>>
>> Classify the uar address by wrapping the uar type and start page as offset
>> for hns rdma io mmap.
>>
>> Signed-off-by: Xi Wang <wangxi11@huawei.com>
>> Signed-off-by: Weihang Li <liweihang@huawei.com>
>> ---
>>  drivers/infiniband/hw/hns/hns_roce_main.c | 27 ++++++++++++++++++++++++---
>>  include/uapi/rdma/hns-abi.h               |  4 ++++
>>  2 files changed, 28 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
>> index 6c6e82b..00dbbf1 100644
>> --- a/drivers/infiniband/hw/hns/hns_roce_main.c
>> +++ b/drivers/infiniband/hw/hns/hns_roce_main.c
>> @@ -338,12 +338,23 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
>>  	hns_roce_uar_free(to_hr_dev(ibcontext->device), &context->uar);
>>  }
>>  
>> -static int hns_roce_mmap(struct ib_ucontext *context,
>> -			 struct vm_area_struct *vma)
>> +/* command value is offset[15:8] */
>> +static inline int hns_roce_mmap_get_command(unsigned long offset)
>> +{
>> +	return (offset >> 8) & 0xff;
>> +}
>> +
>> +/* index value is offset[63:16] | offset[7:0] */
>> +static inline unsigned long hns_roce_mmap_get_index(unsigned long offset)
>> +{
>> +	return ((offset >> 16) << 8) | (offset & 0xff);
>> +}
> 
> Let's follow the common practice and don't introduce inline functions in .c files.
> 
> Thanks
> 

Sure, thanks.

Weihang

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-06-03  3:12 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-28  9:19 [PATCH for-next 0/2] RDMA/hns: Add support for userspace Direct WQE Weihang Li
2021-05-28  9:19 ` [PATCH for-next 1/2] RDMA/hns: Refactor hns uar mmap flow Weihang Li
2021-06-02 11:16   ` Leon Romanovsky
2021-06-03  3:12     ` liweihang
2021-05-28  9:19 ` [PATCH for-next 2/2] RDMA/hns: Support direct WQE of userspace Weihang Li

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.