* [PATCH for-next 0/9] Codes optimization for hip08
@ 2019-07-08 13:41 Lijun Ou
2019-07-08 13:41 ` [PATCH for-next 1/9] RDMA/hns: Package the flow of creating cq Lijun Ou
` (9 more replies)
0 siblings, 10 replies; 11+ messages in thread
From: Lijun Ou @ 2019-07-08 13:41 UTC (permalink / raw)
To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm
Here are codes optimization in order to reduce complexity and
add readability.
Lijun Ou (6):
RDMA/hns: Package the flow of creating cq
RDMA/hns: Refactor the code of creating srq
RDMA/hns: Refactor for hns_roce_v2_modify_qp function
RDMA/hns: Use a separated function for setting extend sge paramters
RDMA/hns: Package for hns_roce_rereg_user_mr function
RDMA/hns: Refactor hem table mhop check and calculation
Xi Wang (1):
RDMA/hns: optimize the duplicated code for qpc setting flow
Yixian Liu (1):
RDMA/hns: Refactor eq table init for hip08
chenglang (1):
RDMA/hns: Optimize hns_roce_mhop_alloc function.
drivers/infiniband/hw/hns/hns_roce_cq.c | 186 +++++---
drivers/infiniband/hw/hns/hns_roce_device.h | 2 -
drivers/infiniband/hw/hns/hns_roce_hem.c | 180 ++++----
drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 628 ++++++++++++++++------------
drivers/infiniband/hw/hns/hns_roce_mr.c | 428 +++++++++++--------
drivers/infiniband/hw/hns/hns_roce_qp.c | 61 ++-
drivers/infiniband/hw/hns/hns_roce_srq.c | 310 ++++++++------
7 files changed, 1015 insertions(+), 780 deletions(-)
--
1.9.1
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH for-next 1/9] RDMA/hns: Package the flow of creating cq
2019-07-08 13:41 [PATCH for-next 0/9] Codes optimization for hip08 Lijun Ou
@ 2019-07-08 13:41 ` Lijun Ou
2019-07-08 13:41 ` [PATCH for-next 2/9] RDMA/hns: Refactor the code of creating srq Lijun Ou
` (8 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Lijun Ou @ 2019-07-08 13:41 UTC (permalink / raw)
To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm
Here moves the relatived codes of creating cq into the separated
functions in order to comprehensibility.
Signed-off-by: Lijun Ou <oulijun@huawei.com>
---
drivers/infiniband/hw/hns/hns_roce_cq.c | 186 +++++++++++++++++++++-----------
1 file changed, 124 insertions(+), 62 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 4e50c22..507d3c4 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -298,21 +298,132 @@ static void hns_roce_ib_free_cq_buf(struct hns_roce_dev *hr_dev,
&buf->hr_buf);
}
+static int create_user_cq(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cq *hr_cq,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_cq_resp *resp,
+ struct hns_roce_uar *uar,
+ int cq_entries)
+{
+ struct hns_roce_ib_create_cq ucmd;
+ struct device *dev = hr_dev->dev;
+ int ret;
+ struct hns_roce_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct hns_roce_ucontext, ibucontext);
+
+ if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
+ dev_err(dev, "Failed to copy_from_udata.\n");
+ return -EFAULT;
+ }
+
+ /* Get user space address, write it into mtt table */
+ ret = hns_roce_ib_get_cq_umem(hr_dev, udata, &hr_cq->hr_buf,
+ &hr_cq->umem, ucmd.buf_addr,
+ cq_entries);
+ if (ret) {
+ dev_err(dev, "Failed to get_cq_umem.\n");
+ return ret;
+ }
+
+ if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
+ (udata->outlen >= sizeof(*resp))) {
+ ret = hns_roce_db_map_user(context, udata, ucmd.db_addr,
+ &hr_cq->db);
+ if (ret) {
+ dev_err(dev, "cq record doorbell map failed!\n");
+ goto err_mtt;
+ }
+ hr_cq->db_en = 1;
+ resp->cap_flags |= HNS_ROCE_SUPPORT_CQ_RECORD_DB;
+ }
+
+ /* Get user space parameters */
+ uar = &context->uar;
+
+ return 0;
+
+err_mtt:
+ hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
+ ib_umem_release(hr_cq->umem);
+
+ return ret;
+}
+
+static int create_kernel_cq(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cq *hr_cq, struct hns_roce_uar *uar,
+ int cq_entries)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) {
+ ret = hns_roce_alloc_db(hr_dev, &hr_cq->db, 1);
+ if (ret)
+ return ret;
+
+ hr_cq->set_ci_db = hr_cq->db.db_record;
+ *hr_cq->set_ci_db = 0;
+ hr_cq->db_en = 1;
+ }
+
+ /* Init mtt table and write buff address to mtt table */
+ ret = hns_roce_ib_alloc_cq_buf(hr_dev, &hr_cq->hr_buf, cq_entries);
+ if (ret) {
+ dev_err(dev, "Failed to alloc_cq_buf.\n");
+ goto err_db;
+ }
+
+ uar = &hr_dev->priv_uar;
+ hr_cq->cq_db_l = hr_dev->reg_base + hr_dev->odb_offset +
+ DB_REG_OFFSET * uar->index;
+
+ return 0;
+
+err_db:
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)
+ hns_roce_free_db(hr_dev, &hr_cq->db);
+
+ return ret;
+}
+
+static void destroy_user_cq(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cq *hr_cq,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_cq_resp *resp)
+{
+ struct hns_roce_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct hns_roce_ucontext, ibucontext);
+
+ if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
+ (udata->outlen >= sizeof(*resp)))
+ hns_roce_db_unmap_user(context, &hr_cq->db);
+
+ hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
+ ib_umem_release(hr_cq->umem);
+}
+
+static void destroy_kernel_cq(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cq *hr_cq)
+{
+ hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
+ hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, hr_cq->ib_cq.cqe);
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)
+ hns_roce_free_db(hr_dev, &hr_cq->db);
+}
+
int hns_roce_ib_create_cq(struct ib_cq *ib_cq,
const struct ib_cq_init_attr *attr,
struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
struct device *dev = hr_dev->dev;
- struct hns_roce_ib_create_cq ucmd;
struct hns_roce_ib_create_cq_resp resp = {};
struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
struct hns_roce_uar *uar = NULL;
int vector = attr->comp_vector;
int cq_entries = attr->cqe;
int ret;
- struct hns_roce_ucontext *context = rdma_udata_to_drv_context(
- udata, struct hns_roce_ucontext, ibucontext);
if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) {
dev_err(dev, "Creat CQ failed. entries=%d, max=%d\n",
@@ -328,57 +439,18 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq,
spin_lock_init(&hr_cq->lock);
if (udata) {
- if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
- dev_err(dev, "Failed to copy_from_udata.\n");
- ret = -EFAULT;
- goto err_cq;
- }
-
- /* Get user space address, write it into mtt table */
- ret = hns_roce_ib_get_cq_umem(hr_dev, udata, &hr_cq->hr_buf,
- &hr_cq->umem, ucmd.buf_addr,
- cq_entries);
+ ret = create_user_cq(hr_dev, hr_cq, udata, &resp, uar,
+ cq_entries);
if (ret) {
- dev_err(dev, "Failed to get_cq_umem.\n");
+ dev_err(dev, "Create cq failed in user mode!\n");
goto err_cq;
}
-
- if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
- (udata->outlen >= sizeof(resp))) {
- ret = hns_roce_db_map_user(context, udata, ucmd.db_addr,
- &hr_cq->db);
- if (ret) {
- dev_err(dev, "cq record doorbell map failed!\n");
- goto err_mtt;
- }
- hr_cq->db_en = 1;
- resp.cap_flags |= HNS_ROCE_SUPPORT_CQ_RECORD_DB;
- }
-
- /* Get user space parameters */
- uar = &context->uar;
} else {
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) {
- ret = hns_roce_alloc_db(hr_dev, &hr_cq->db, 1);
- if (ret)
- goto err_cq;
-
- hr_cq->set_ci_db = hr_cq->db.db_record;
- *hr_cq->set_ci_db = 0;
- hr_cq->db_en = 1;
- }
-
- /* Init mmt table and write buff address to mtt table */
- ret = hns_roce_ib_alloc_cq_buf(hr_dev, &hr_cq->hr_buf,
- cq_entries);
+ ret = create_kernel_cq(hr_dev, hr_cq, uar, cq_entries);
if (ret) {
- dev_err(dev, "Failed to alloc_cq_buf.\n");
- goto err_db;
+ dev_err(dev, "Create cq failed in kernel mode!\n");
+ goto err_cq;
}
-
- uar = &hr_dev->priv_uar;
- hr_cq->cq_db_l = hr_dev->reg_base + hr_dev->odb_offset +
- DB_REG_OFFSET * uar->index;
}
/* Allocate cq index, fill cq_context */
@@ -416,20 +488,10 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq,
hns_roce_free_cq(hr_dev, hr_cq);
err_dbmap:
- if (udata && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
- (udata->outlen >= sizeof(resp)))
- hns_roce_db_unmap_user(context, &hr_cq->db);
-
-err_mtt:
- hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
- ib_umem_release(hr_cq->umem);
- if (!udata)
- hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf,
- hr_cq->ib_cq.cqe);
-
-err_db:
- if (!udata && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB))
- hns_roce_free_db(hr_dev, &hr_cq->db);
+ if (udata)
+ destroy_user_cq(hr_dev, hr_cq, udata, &resp);
+ else
+ destroy_kernel_cq(hr_dev, hr_cq);
err_cq:
return ret;
--
1.9.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH for-next 2/9] RDMA/hns: Refactor the code of creating srq
2019-07-08 13:41 [PATCH for-next 0/9] Codes optimization for hip08 Lijun Ou
2019-07-08 13:41 ` [PATCH for-next 1/9] RDMA/hns: Package the flow of creating cq Lijun Ou
@ 2019-07-08 13:41 ` Lijun Ou
2019-07-08 13:41 ` [PATCH for-next 3/9] RDMA/hns: Refactor for hns_roce_v2_modify_qp function Lijun Ou
` (7 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Lijun Ou @ 2019-07-08 13:41 UTC (permalink / raw)
To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm
Here moves the relatived codes of creating user srq and
kernel srq into the two independent functions as well as
removes some unused codes and optimizes some codes.
Signed-off-by: Lijun Ou <oulijun@huawei.com>
---
drivers/infiniband/hw/hns/hns_roce_srq.c | 310 ++++++++++++++++++-------------
1 file changed, 183 insertions(+), 127 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index 38bb548..c011422 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -175,6 +175,91 @@ static void hns_roce_srq_free(struct hns_roce_dev *hr_dev,
hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR);
}
+static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata,
+ int srq_buf_size)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
+ struct hns_roce_ib_create_srq ucmd;
+ u32 page_shift;
+ u32 npages;
+ int ret;
+
+ if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
+ return -EFAULT;
+
+ srq->umem = ib_umem_get(udata, ucmd.buf_addr, srq_buf_size, 0, 0);
+ if (IS_ERR(srq->umem))
+ return PTR_ERR(srq->umem);
+
+ if (hr_dev->caps.srqwqe_buf_pg_sz) {
+ npages = (ib_umem_page_count(srq->umem) +
+ (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) /
+ (1 << hr_dev->caps.srqwqe_buf_pg_sz);
+ page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
+ ret = hns_roce_mtt_init(hr_dev, npages, page_shift, &srq->mtt);
+ } else
+ ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(srq->umem),
+ PAGE_SHIFT, &srq->mtt);
+ if (ret)
+ goto err_user_buf;
+
+ ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->mtt, srq->umem);
+ if (ret)
+ goto err_user_srq_mtt;
+
+ /* config index queue BA */
+ srq->idx_que.umem = ib_umem_get(udata, ucmd.que_addr,
+ srq->idx_que.buf_size, 0, 0);
+ if (IS_ERR(srq->idx_que.umem)) {
+ dev_err(hr_dev->dev, "ib_umem_get error for index queue\n");
+ ret = PTR_ERR(srq->idx_que.umem);
+ goto err_user_srq_mtt;
+ }
+
+ if (hr_dev->caps.idx_buf_pg_sz) {
+ npages = (ib_umem_page_count(srq->idx_que.umem) +
+ (1 << hr_dev->caps.idx_buf_pg_sz) - 1) /
+ (1 << hr_dev->caps.idx_buf_pg_sz);
+ page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz;
+ ret = hns_roce_mtt_init(hr_dev, npages, page_shift,
+ &srq->idx_que.mtt);
+ } else {
+ ret = hns_roce_mtt_init(hr_dev,
+ ib_umem_page_count(srq->idx_que.umem),
+ PAGE_SHIFT,
+ &srq->idx_que.mtt);
+ }
+
+ if (ret) {
+ dev_err(hr_dev->dev, "hns_roce_mtt_init error for idx que\n");
+ goto err_user_idx_mtt;
+ }
+
+ ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->idx_que.mtt,
+ srq->idx_que.umem);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "hns_roce_ib_umem_write_mtt error for idx que\n");
+ goto err_user_idx_buf;
+ }
+
+ return 0;
+
+err_user_idx_buf:
+ hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
+
+err_user_idx_mtt:
+ ib_umem_release(srq->idx_que.umem);
+
+err_user_srq_mtt:
+ hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
+
+err_user_buf:
+ ib_umem_release(srq->umem);
+
+ return ret;
+}
+
static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq,
u32 page_shift)
{
@@ -196,6 +281,93 @@ static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq,
return 0;
}
+static int create_kernel_srq(struct hns_roce_srq *srq, int srq_buf_size)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
+ u32 page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
+ int ret;
+
+ if (hns_roce_buf_alloc(hr_dev, srq_buf_size, (1 << page_shift) * 2,
+ &srq->buf, page_shift))
+ return -ENOMEM;
+
+ srq->head = 0;
+ srq->tail = srq->max - 1;
+
+ ret = hns_roce_mtt_init(hr_dev, srq->buf.npages, srq->buf.page_shift,
+ &srq->mtt);
+ if (ret)
+ goto err_kernel_buf;
+
+ ret = hns_roce_buf_write_mtt(hr_dev, &srq->mtt, &srq->buf);
+ if (ret)
+ goto err_kernel_srq_mtt;
+
+ page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz;
+ ret = hns_roce_create_idx_que(srq->ibsrq.pd, srq, page_shift);
+ if (ret) {
+ dev_err(hr_dev->dev, "Create idx queue fail(%d)!\n", ret);
+ goto err_kernel_srq_mtt;
+ }
+
+ /* Init mtt table for idx_que */
+ ret = hns_roce_mtt_init(hr_dev, srq->idx_que.idx_buf.npages,
+ srq->idx_que.idx_buf.page_shift,
+ &srq->idx_que.mtt);
+ if (ret)
+ goto err_kernel_create_idx;
+
+ /* Write buffer address into the mtt table */
+ ret = hns_roce_buf_write_mtt(hr_dev, &srq->idx_que.mtt,
+ &srq->idx_que.idx_buf);
+ if (ret)
+ goto err_kernel_idx_buf;
+
+ srq->wrid = kvmalloc_array(srq->max, sizeof(u64), GFP_KERNEL);
+ if (!srq->wrid) {
+ ret = -ENOMEM;
+ goto err_kernel_idx_buf;
+ }
+
+ return 0;
+
+err_kernel_idx_buf:
+ hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
+
+err_kernel_create_idx:
+ hns_roce_buf_free(hr_dev, srq->idx_que.buf_size,
+ &srq->idx_que.idx_buf);
+ kfree(srq->idx_que.bitmap);
+
+err_kernel_srq_mtt:
+ hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
+
+err_kernel_buf:
+ hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf);
+
+ return ret;
+}
+
+static void destroy_user_srq(struct hns_roce_dev *hr_dev,
+ struct hns_roce_srq *srq)
+{
+ hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
+ ib_umem_release(srq->idx_que.umem);
+ hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
+ ib_umem_release(srq->umem);
+}
+
+static void destroy_kernel_srq(struct hns_roce_dev *hr_dev,
+ struct hns_roce_srq *srq, int srq_buf_size)
+{
+ kvfree(srq->wrid);
+ hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
+ hns_roce_buf_free(hr_dev, srq->idx_que.buf_size, &srq->idx_que.idx_buf);
+ kfree(srq->idx_que.bitmap);
+ hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
+ hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf);
+}
+
int hns_roce_create_srq(struct ib_srq *ib_srq,
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata)
@@ -205,9 +377,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
struct hns_roce_srq *srq = to_hr_srq(ib_srq);
int srq_desc_size;
int srq_buf_size;
- u32 page_shift;
int ret = 0;
- u32 npages;
u32 cqn;
/* Check the actual SRQ wqe and SRQ sge num */
@@ -233,115 +403,16 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
srq->idx_que.mtt.mtt_type = MTT_TYPE_IDX;
if (udata) {
- struct hns_roce_ib_create_srq ucmd;
-
- if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
- return -EFAULT;
-
- srq->umem =
- ib_umem_get(udata, ucmd.buf_addr, srq_buf_size, 0, 0);
- if (IS_ERR(srq->umem))
- return PTR_ERR(srq->umem);
-
- if (hr_dev->caps.srqwqe_buf_pg_sz) {
- npages = (ib_umem_page_count(srq->umem) +
- (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) /
- (1 << hr_dev->caps.srqwqe_buf_pg_sz);
- page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
- ret = hns_roce_mtt_init(hr_dev, npages,
- page_shift,
- &srq->mtt);
- } else
- ret = hns_roce_mtt_init(hr_dev,
- ib_umem_page_count(srq->umem),
- PAGE_SHIFT, &srq->mtt);
- if (ret)
- goto err_buf;
-
- ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->mtt, srq->umem);
- if (ret)
- goto err_srq_mtt;
-
- /* config index queue BA */
- srq->idx_que.umem = ib_umem_get(udata, ucmd.que_addr,
- srq->idx_que.buf_size, 0, 0);
- if (IS_ERR(srq->idx_que.umem)) {
- dev_err(hr_dev->dev,
- "ib_umem_get error for index queue\n");
- ret = PTR_ERR(srq->idx_que.umem);
- goto err_srq_mtt;
- }
-
- if (hr_dev->caps.idx_buf_pg_sz) {
- npages = (ib_umem_page_count(srq->idx_que.umem) +
- (1 << hr_dev->caps.idx_buf_pg_sz) - 1) /
- (1 << hr_dev->caps.idx_buf_pg_sz);
- page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz;
- ret = hns_roce_mtt_init(hr_dev, npages,
- page_shift, &srq->idx_que.mtt);
- } else {
- ret = hns_roce_mtt_init(
- hr_dev, ib_umem_page_count(srq->idx_que.umem),
- PAGE_SHIFT, &srq->idx_que.mtt);
- }
-
- if (ret) {
- dev_err(hr_dev->dev,
- "hns_roce_mtt_init error for idx que\n");
- goto err_idx_mtt;
- }
-
- ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->idx_que.mtt,
- srq->idx_que.umem);
+ ret = create_user_srq(srq, udata, srq_buf_size);
if (ret) {
- dev_err(hr_dev->dev,
- "hns_roce_ib_umem_write_mtt error for idx que\n");
- goto err_idx_buf;
+ dev_err(hr_dev->dev, "Create user srq failed\n");
+ goto err_srq;
}
} else {
- page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
- if (hns_roce_buf_alloc(hr_dev, srq_buf_size,
- (1 << page_shift) * 2, &srq->buf,
- page_shift))
- return -ENOMEM;
-
- srq->head = 0;
- srq->tail = srq->max - 1;
-
- ret = hns_roce_mtt_init(hr_dev, srq->buf.npages,
- srq->buf.page_shift, &srq->mtt);
- if (ret)
- goto err_buf;
-
- ret = hns_roce_buf_write_mtt(hr_dev, &srq->mtt, &srq->buf);
- if (ret)
- goto err_srq_mtt;
-
- page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz;
- ret = hns_roce_create_idx_que(ib_srq->pd, srq, page_shift);
+ ret = create_kernel_srq(srq, srq_buf_size);
if (ret) {
- dev_err(hr_dev->dev, "Create idx queue fail(%d)!\n",
- ret);
- goto err_srq_mtt;
- }
-
- /* Init mtt table for idx_que */
- ret = hns_roce_mtt_init(hr_dev, srq->idx_que.idx_buf.npages,
- srq->idx_que.idx_buf.page_shift,
- &srq->idx_que.mtt);
- if (ret)
- goto err_create_idx;
-
- /* Write buffer address into the mtt table */
- ret = hns_roce_buf_write_mtt(hr_dev, &srq->idx_que.mtt,
- &srq->idx_que.idx_buf);
- if (ret)
- goto err_idx_buf;
-
- srq->wrid = kvmalloc_array(srq->max, sizeof(u64), GFP_KERNEL);
- if (!srq->wrid) {
- ret = -ENOMEM;
- goto err_idx_buf;
+ dev_err(hr_dev->dev, "Create kernel srq failed\n");
+ goto err_srq;
}
}
@@ -373,27 +444,12 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
hns_roce_srq_free(hr_dev, srq);
err_wrid:
- kvfree(srq->wrid);
-
-err_idx_buf:
- hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
-
-err_idx_mtt:
- ib_umem_release(srq->idx_que.umem);
-
-err_create_idx:
- hns_roce_buf_free(hr_dev, srq->idx_que.buf_size,
- &srq->idx_que.idx_buf);
- bitmap_free(srq->idx_que.bitmap);
-
-err_srq_mtt:
- hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
-
-err_buf:
- ib_umem_release(srq->umem);
- if (!udata)
- hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf);
+ if (udata)
+ destroy_user_srq(hr_dev, srq);
+ else
+ destroy_kernel_srq(hr_dev, srq, srq_buf_size);
+err_srq:
return ret;
}
--
1.9.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH for-next 3/9] RDMA/hns: Refactor for hns_roce_v2_modify_qp function
2019-07-08 13:41 [PATCH for-next 0/9] Codes optimization for hip08 Lijun Ou
2019-07-08 13:41 ` [PATCH for-next 1/9] RDMA/hns: Package the flow of creating cq Lijun Ou
2019-07-08 13:41 ` [PATCH for-next 2/9] RDMA/hns: Refactor the code of creating srq Lijun Ou
@ 2019-07-08 13:41 ` Lijun Ou
2019-07-08 13:41 ` [PATCH for-next 4/9] RDMA/hns: Use a separated function for setting extend sge paramters Lijun Ou
` (6 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Lijun Ou @ 2019-07-08 13:41 UTC (permalink / raw)
To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm
This patch packages some lines which exist hns_roce_v2_modify_qp
function into the new function. The codes refactored mainly
include some absolute fields of qp context and some optional
fields of qp context.
Signed-off-by: Lijun Ou <oulijun@huawei.com>
Signed-off-by: Xi Wang <wangxi11@huawei.com>
---
drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 356 +++++++++++++++++------------
1 file changed, 206 insertions(+), 150 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index b76e3be..edc5dd2 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -3974,30 +3974,119 @@ static inline bool hns_roce_v2_check_qp_stat(enum ib_qp_state cur_state,
}
-static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
- const struct ib_qp_attr *attr,
- int attr_mask, enum ib_qp_state cur_state,
- enum ib_qp_state new_state)
+static int hns_roce_v2_set_path(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
{
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct hns_roce_v2_qp_context *context;
- struct hns_roce_v2_qp_context *qpc_mask;
- struct device *dev = hr_dev->dev;
- int ret = -EINVAL;
+ const struct ib_gid_attr *gid_attr = NULL;
+ int is_roce_protocol;
+ bool is_udp = false;
+ u16 vlan = 0xffff;
+ u8 ib_port;
+ u8 hr_port;
+ int ret;
- context = kcalloc(2, sizeof(*context), GFP_ATOMIC);
- if (!context)
- return -ENOMEM;
+ ib_port = (attr_mask & IB_QP_PORT) ? attr->port_num : hr_qp->port + 1;
+ hr_port = ib_port - 1;
+ is_roce_protocol = rdma_cap_eth_ah(&hr_dev->ib_dev, ib_port) &&
+ rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH;
+
+ if (is_roce_protocol) {
+ gid_attr = attr->ah_attr.grh.sgid_attr;
+ ret = rdma_read_gid_l2_fields(gid_attr, &vlan, NULL);
+ if (ret)
+ return ret;
+
+ if (gid_attr)
+ is_udp = (gid_attr->gid_type ==
+ IB_GID_TYPE_ROCE_UDP_ENCAP);
+ }
+
+ if (vlan < VLAN_CFI_MASK) {
+ roce_set_bit(context->byte_76_srqn_op_en,
+ V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1);
+ roce_set_bit(qpc_mask->byte_76_srqn_op_en,
+ V2_QPC_BYTE_76_RQ_VLAN_EN_S, 0);
+ roce_set_bit(context->byte_168_irrl_idx,
+ V2_QPC_BYTE_168_SQ_VLAN_EN_S, 1);
+ roce_set_bit(qpc_mask->byte_168_irrl_idx,
+ V2_QPC_BYTE_168_SQ_VLAN_EN_S, 0);
+ }
+
+ roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M,
+ V2_QPC_BYTE_24_VLAN_ID_S, vlan);
+ roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M,
+ V2_QPC_BYTE_24_VLAN_ID_S, 0);
+
+ if (grh->sgid_index >= hr_dev->caps.gid_table_len[hr_port]) {
+ dev_err(hr_dev->dev, "sgid_index(%u) too large. max is %d\n",
+ grh->sgid_index, hr_dev->caps.gid_table_len[hr_port]);
+ return -EINVAL;
+ }
+
+ if (attr->ah_attr.type != RDMA_AH_ATTR_TYPE_ROCE) {
+ dev_err(hr_dev->dev, "ah attr is not RDMA roce type\n");
+ return -EINVAL;
+ }
+
+ roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_UDPSPN_M,
+ V2_QPC_BYTE_52_UDPSPN_S,
+ is_udp ? 0x12b7 : 0);
+
+ roce_set_field(qpc_mask->byte_52_udpspn_dmac, V2_QPC_BYTE_52_UDPSPN_M,
+ V2_QPC_BYTE_52_UDPSPN_S, 0);
+
+ roce_set_field(context->byte_20_smac_sgid_idx,
+ V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S,
+ grh->sgid_index);
+
+ roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
+ V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, 0);
+
+ roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M,
+ V2_QPC_BYTE_24_HOP_LIMIT_S, grh->hop_limit);
+ roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M,
+ V2_QPC_BYTE_24_HOP_LIMIT_S, 0);
+
+ if (hr_dev->pci_dev->revision == 0x21 && is_udp)
+ roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
+ V2_QPC_BYTE_24_TC_S, grh->traffic_class >> 2);
+ else
+ roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
+ V2_QPC_BYTE_24_TC_S, grh->traffic_class);
+ roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
+ V2_QPC_BYTE_24_TC_S, 0);
+ roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
+ V2_QPC_BYTE_28_FL_S, grh->flow_label);
+ roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
+ V2_QPC_BYTE_28_FL_S, 0);
+ memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
+ memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw));
+ roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
+ V2_QPC_BYTE_28_SL_S, rdma_ah_get_sl(&attr->ah_attr));
+ roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
+ V2_QPC_BYTE_28_SL_S, 0);
+ hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
+
+ return 0;
+}
+
+static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask,
+ enum ib_qp_state cur_state,
+ enum ib_qp_state new_state,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ int ret = 0;
- qpc_mask = context + 1;
- /*
- * In v2 engine, software pass context and context mask to hardware
- * when modifying qp. If software need modify some fields in context,
- * we should set all bits of the relevant fields in context mask to
- * 0 at the same time, else set them to 0x1.
- */
- memset(qpc_mask, 0xff, sizeof(*qpc_mask));
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
memset(qpc_mask, 0, sizeof(*qpc_mask));
modify_qp_reset_to_init(ibqp, attr, attr_mask, context,
@@ -4019,134 +4108,30 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
/* Nothing */
;
} else {
- dev_err(dev, "Illegal state for QP!\n");
+ dev_err(hr_dev->dev, "Illegal state for QP!\n");
ret = -EINVAL;
goto out;
}
- /* When QP state is err, SQ and RQ WQE should be flushed */
- if (new_state == IB_QPS_ERR) {
- roce_set_field(context->byte_160_sq_ci_pi,
- V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
- V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S,
- hr_qp->sq.head);
- roce_set_field(qpc_mask->byte_160_sq_ci_pi,
- V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
- V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0);
+out:
+ return ret;
+}
- if (!ibqp->srq) {
- roce_set_field(context->byte_84_rq_ci_pi,
- V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
- V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S,
- hr_qp->rq.head);
- roce_set_field(qpc_mask->byte_84_rq_ci_pi,
- V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
- V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0);
- }
- }
+static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ int ret = 0;
if (attr_mask & IB_QP_AV) {
- const struct ib_global_route *grh =
- rdma_ah_read_grh(&attr->ah_attr);
- const struct ib_gid_attr *gid_attr = NULL;
- int is_roce_protocol;
- u16 vlan = 0xffff;
- u8 ib_port;
- u8 hr_port;
-
- ib_port = (attr_mask & IB_QP_PORT) ? attr->port_num :
- hr_qp->port + 1;
- hr_port = ib_port - 1;
- is_roce_protocol = rdma_cap_eth_ah(&hr_dev->ib_dev, ib_port) &&
- rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH;
-
- if (is_roce_protocol) {
- gid_attr = attr->ah_attr.grh.sgid_attr;
- ret = rdma_read_gid_l2_fields(gid_attr, &vlan, NULL);
- if (ret)
- goto out;
- }
-
- if (vlan < VLAN_CFI_MASK) {
- roce_set_bit(context->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1);
- roce_set_bit(qpc_mask->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_RQ_VLAN_EN_S, 0);
- roce_set_bit(context->byte_168_irrl_idx,
- V2_QPC_BYTE_168_SQ_VLAN_EN_S, 1);
- roce_set_bit(qpc_mask->byte_168_irrl_idx,
- V2_QPC_BYTE_168_SQ_VLAN_EN_S, 0);
- }
-
- roce_set_field(context->byte_24_mtu_tc,
- V2_QPC_BYTE_24_VLAN_ID_M,
- V2_QPC_BYTE_24_VLAN_ID_S, vlan);
- roce_set_field(qpc_mask->byte_24_mtu_tc,
- V2_QPC_BYTE_24_VLAN_ID_M,
- V2_QPC_BYTE_24_VLAN_ID_S, 0);
-
- if (grh->sgid_index >= hr_dev->caps.gid_table_len[hr_port]) {
- dev_err(hr_dev->dev,
- "sgid_index(%u) too large. max is %d\n",
- grh->sgid_index,
- hr_dev->caps.gid_table_len[hr_port]);
- ret = -EINVAL;
- goto out;
- }
-
- if (attr->ah_attr.type != RDMA_AH_ATTR_TYPE_ROCE) {
- dev_err(hr_dev->dev, "ah attr is not RDMA roce type\n");
- ret = -EINVAL;
- goto out;
- }
-
- roce_set_field(context->byte_52_udpspn_dmac,
- V2_QPC_BYTE_52_UDPSPN_M, V2_QPC_BYTE_52_UDPSPN_S,
- (gid_attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) ?
- 0 : 0x12b7);
-
- roce_set_field(qpc_mask->byte_52_udpspn_dmac,
- V2_QPC_BYTE_52_UDPSPN_M,
- V2_QPC_BYTE_52_UDPSPN_S, 0);
-
- roce_set_field(context->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SGID_IDX_M,
- V2_QPC_BYTE_20_SGID_IDX_S, grh->sgid_index);
-
- roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SGID_IDX_M,
- V2_QPC_BYTE_20_SGID_IDX_S, 0);
-
- roce_set_field(context->byte_24_mtu_tc,
- V2_QPC_BYTE_24_HOP_LIMIT_M,
- V2_QPC_BYTE_24_HOP_LIMIT_S, grh->hop_limit);
- roce_set_field(qpc_mask->byte_24_mtu_tc,
- V2_QPC_BYTE_24_HOP_LIMIT_M,
- V2_QPC_BYTE_24_HOP_LIMIT_S, 0);
-
- if (hr_dev->pci_dev->revision == 0x21 &&
- gid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
- roce_set_field(context->byte_24_mtu_tc,
- V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S,
- grh->traffic_class >> 2);
- else
- roce_set_field(context->byte_24_mtu_tc,
- V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S,
- grh->traffic_class);
- roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
- V2_QPC_BYTE_24_TC_S, 0);
- roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
- V2_QPC_BYTE_28_FL_S, grh->flow_label);
- roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
- V2_QPC_BYTE_28_FL_S, 0);
- memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
- memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw));
- roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
- V2_QPC_BYTE_28_SL_S,
- rdma_ah_get_sl(&attr->ah_attr));
- roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
- V2_QPC_BYTE_28_SL_S, 0);
- hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
+ ret = hns_roce_v2_set_path(ibqp, attr, attr_mask, context,
+ qpc_mask);
+ if (ret)
+ return ret;
}
if (attr_mask & IB_QP_TIMEOUT) {
@@ -4158,7 +4143,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S,
0);
} else {
- dev_warn(dev, "Local ACK timeout shall be 0 to 30.\n");
+ dev_warn(hr_dev->dev,
+ "Local ACK timeout shall be 0 to 30.\n");
}
}
@@ -4196,6 +4182,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
V2_QPC_BYTE_244_RNR_CNT_S, 0);
}
+ /* RC&UC&UD required attr */
if (attr_mask & IB_QP_SQ_PSN) {
roce_set_field(context->byte_172_sq_psn,
V2_QPC_BYTE_172_SQ_CUR_PSN_M,
@@ -4295,6 +4282,83 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
hr_qp->qkey = attr->qkey;
}
+ return ret;
+}
+
+static void hns_roce_v2_record_opt_fields(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+
+ if (attr_mask & IB_QP_ACCESS_FLAGS)
+ hr_qp->atomic_rd_en = attr->qp_access_flags;
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ hr_qp->resp_depth = attr->max_dest_rd_atomic;
+ if (attr_mask & IB_QP_PORT) {
+ hr_qp->port = attr->port_num - 1;
+ hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
+ }
+}
+
+static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask, enum ib_qp_state cur_state,
+ enum ib_qp_state new_state)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct hns_roce_v2_qp_context *context;
+ struct hns_roce_v2_qp_context *qpc_mask;
+ struct device *dev = hr_dev->dev;
+ int ret = -EINVAL;
+
+ context = kcalloc(2, sizeof(*context), GFP_ATOMIC);
+ if (!context)
+ return -ENOMEM;
+
+ qpc_mask = context + 1;
+ /*
+ * In v2 engine, software pass context and context mask to hardware
+ * when modifying qp. If software need modify some fields in context,
+ * we should set all bits of the relevant fields in context mask to
+ * 0 at the same time, else set them to 0x1.
+ */
+ memset(qpc_mask, 0xff, sizeof(*qpc_mask));
+ ret = hns_roce_v2_set_abs_fields(ibqp, attr, attr_mask, cur_state,
+ new_state, context, qpc_mask);
+ if (ret)
+ goto out;
+
+ /* When QP state is err, SQ and RQ WQE should be flushed */
+ if (new_state == IB_QPS_ERR) {
+ roce_set_field(context->byte_160_sq_ci_pi,
+ V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
+ V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S,
+ hr_qp->sq.head);
+ roce_set_field(qpc_mask->byte_160_sq_ci_pi,
+ V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
+ V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0);
+
+ if (!ibqp->srq) {
+ roce_set_field(context->byte_84_rq_ci_pi,
+ V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
+ V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S,
+ hr_qp->rq.head);
+ roce_set_field(qpc_mask->byte_84_rq_ci_pi,
+ V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
+ V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0);
+ }
+ }
+
+ /* Configure the optional fields */
+ ret = hns_roce_v2_set_opt_fields(ibqp, attr, attr_mask, context,
+ qpc_mask);
+ if (ret)
+ goto out;
+
roce_set_bit(context->byte_108_rx_reqepsn, V2_QPC_BYTE_108_INV_CREDIT_S,
ibqp->srq ? 1 : 0);
roce_set_bit(qpc_mask->byte_108_rx_reqepsn,
@@ -4316,15 +4380,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
hr_qp->state = new_state;
- if (attr_mask & IB_QP_ACCESS_FLAGS)
- hr_qp->atomic_rd_en = attr->qp_access_flags;
-
- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
- hr_qp->resp_depth = attr->max_dest_rd_atomic;
- if (attr_mask & IB_QP_PORT) {
- hr_qp->port = attr->port_num - 1;
- hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
- }
+ hns_roce_v2_record_opt_fields(ibqp, attr, attr_mask);
if (new_state == IB_QPS_RESET && !ibqp->uobject) {
hns_roce_v2_cq_clean(to_hr_cq(ibqp->recv_cq), hr_qp->qpn,
--
1.9.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH for-next 4/9] RDMA/hns: Use a separated function for setting extend sge paramters
2019-07-08 13:41 [PATCH for-next 0/9] Codes optimization for hip08 Lijun Ou
` (2 preceding siblings ...)
2019-07-08 13:41 ` [PATCH for-next 3/9] RDMA/hns: Refactor for hns_roce_v2_modify_qp function Lijun Ou
@ 2019-07-08 13:41 ` Lijun Ou
2019-07-08 13:41 ` [PATCH for-next 5/9] RDMA/hns: optimize the duplicated code for qpc setting flow Lijun Ou
` (5 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Lijun Ou @ 2019-07-08 13:41 UTC (permalink / raw)
To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm
Here moves the related lines of setting extend sge size into
the separate function as well as removes the unused variables.
Signed-off-by: Lijun Ou <oulijun@huawei.com>
---
drivers/infiniband/hw/hns/hns_roce_device.h | 2 -
drivers/infiniband/hw/hns/hns_roce_qp.c | 61 ++++++++++++++++++-----------
2 files changed, 38 insertions(+), 25 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index a548b28..b39497a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -654,8 +654,6 @@ struct hns_roce_qp {
u32 doorbell_qpn;
__le32 sq_signal_bits;
u32 sq_next_wqe;
- int sq_max_wqes_per_wr;
- int sq_spare_wqes;
struct hns_roce_wq sq;
struct ib_umem *umem;
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index c109602..d35033b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -502,6 +502,35 @@ static int calc_wqe_bt_page_shift(struct hns_roce_dev *hr_dev,
return bt_pg_shift - PAGE_SHIFT;
}
+static int set_extend_sge_param(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp)
+{
+ struct device *dev = hr_dev->dev;
+
+ if (hr_qp->sq.max_gs > 2) {
+ hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
+ (hr_qp->sq.max_gs - 2));
+ hr_qp->sge.sge_shift = 4;
+ }
+
+ /* ud sqwqe's sge use extend sge */
+ if (hr_dev->caps.max_sq_sg > 2 && hr_qp->ibqp.qp_type == IB_QPT_GSI) {
+ hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
+ hr_qp->sq.max_gs);
+ hr_qp->sge.sge_shift = 4;
+ }
+
+ if ((hr_qp->sq.max_gs > 2) && hr_dev->pci_dev->revision == 0x20) {
+ if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
+ dev_err(dev, "The extended sge cnt error! sge_cnt=%d\n",
+ hr_qp->sge.sge_cnt);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
struct ib_qp_cap *cap,
struct hns_roce_qp *hr_qp)
@@ -510,6 +539,7 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
u32 page_size;
u32 max_cnt;
int size;
+ int ret;
if (cap->max_send_wr > hr_dev->caps.max_wqes ||
cap->max_send_sge > hr_dev->caps.max_sq_sg ||
@@ -519,8 +549,6 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
}
hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz);
- hr_qp->sq_max_wqes_per_wr = 1;
- hr_qp->sq_spare_wqes = 0;
if (hr_dev->caps.min_wqes)
max_cnt = max(cap->max_send_wr, hr_dev->caps.min_wqes);
@@ -540,25 +568,10 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
else
hr_qp->sq.max_gs = max_cnt;
- if (hr_qp->sq.max_gs > 2) {
- hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
- (hr_qp->sq.max_gs - 2));
- hr_qp->sge.sge_shift = 4;
- }
-
- /* ud sqwqe's sge use extend sge */
- if (hr_dev->caps.max_sq_sg > 2 && hr_qp->ibqp.qp_type == IB_QPT_GSI) {
- hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
- hr_qp->sq.max_gs);
- hr_qp->sge.sge_shift = 4;
- }
-
- if ((hr_qp->sq.max_gs > 2) && hr_dev->pci_dev->revision == 0x20) {
- if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
- dev_err(dev, "The extended sge cnt error! sge_cnt=%d\n",
- hr_qp->sge.sge_cnt);
- return -EINVAL;
- }
+ ret = set_extend_sge_param(hr_dev, hr_qp);
+ if (ret) {
+ dev_err(dev, "set extend sge parameters fail\n");
+ return ret;
}
/* Get buf size, SQ and RQ are aligned to PAGE_SIZE */
@@ -943,11 +956,13 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
hns_roce_free_db(hr_dev, &hr_qp->rdb);
err_rq_sge_list:
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE)
+ if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
+ hns_roce_qp_has_rq(init_attr))
kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
err_wqe_list:
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE)
+ if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
+ hns_roce_qp_has_rq(init_attr))
kfree(hr_qp->rq_inl_buf.wqe_list);
err_out:
--
1.9.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH for-next 5/9] RDMA/hns: optimize the duplicated code for qpc setting flow
2019-07-08 13:41 [PATCH for-next 0/9] Codes optimization for hip08 Lijun Ou
` (3 preceding siblings ...)
2019-07-08 13:41 ` [PATCH for-next 4/9] RDMA/hns: Use a separated function for setting extend sge paramters Lijun Ou
@ 2019-07-08 13:41 ` Lijun Ou
2019-07-08 13:41 ` [PATCH for-next 6/9] RDMA/hns: Optimize hns_roce_mhop_alloc function Lijun Ou
` (4 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Lijun Ou @ 2019-07-08 13:41 UTC (permalink / raw)
To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm
From: Xi Wang <wangxi11@huawei.com>
Currently, more than 20 lines of duplicate code exist in function
'modify_qp_init_to_init' and function 'modify_qp_reset_to_init',
which affects the readability of the code.
Signed-off-by: Xi Wang <wangxi11@huawei.com>
---
drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 96 ++++++++++++------------------
1 file changed, 38 insertions(+), 58 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index edc5dd2..c2ddfc1 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -3118,6 +3118,43 @@ static void set_access_flags(struct hns_roce_qp *hr_qp,
roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0);
}
+static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ if (hr_qp->ibqp.qp_type == IB_QPT_GSI)
+ roce_set_field(context->byte_4_sqpn_tst,
+ V2_QPC_BYTE_4_SGE_SHIFT_M,
+ V2_QPC_BYTE_4_SGE_SHIFT_S,
+ ilog2((unsigned int)hr_qp->sge.sge_cnt));
+ else
+ roce_set_field(context->byte_4_sqpn_tst,
+ V2_QPC_BYTE_4_SGE_SHIFT_M,
+ V2_QPC_BYTE_4_SGE_SHIFT_S,
+ hr_qp->sq.max_gs >
+ HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE ?
+ ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0);
+
+ roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M,
+ V2_QPC_BYTE_4_SGE_SHIFT_S, 0);
+
+ roce_set_field(context->byte_20_smac_sgid_idx,
+ V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S,
+ ilog2((unsigned int)hr_qp->sq.wqe_cnt));
+ roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
+ V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, 0);
+
+ roce_set_field(context->byte_20_smac_sgid_idx,
+ V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S,
+ (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+ hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT ||
+ hr_qp->ibqp.srq) ? 0 :
+ ilog2((unsigned int)hr_qp->rq.wqe_cnt));
+
+ roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
+ V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0);
+}
+
static void modify_qp_reset_to_init(struct ib_qp *ibqp,
const struct ib_qp_attr *attr,
int attr_mask,
@@ -3138,21 +3175,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M,
V2_QPC_BYTE_4_TST_S, 0);
- if (ibqp->qp_type == IB_QPT_GSI)
- roce_set_field(context->byte_4_sqpn_tst,
- V2_QPC_BYTE_4_SGE_SHIFT_M,
- V2_QPC_BYTE_4_SGE_SHIFT_S,
- ilog2((unsigned int)hr_qp->sge.sge_cnt));
- else
- roce_set_field(context->byte_4_sqpn_tst,
- V2_QPC_BYTE_4_SGE_SHIFT_M,
- V2_QPC_BYTE_4_SGE_SHIFT_S,
- hr_qp->sq.max_gs > 2 ?
- ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0);
-
- roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M,
- V2_QPC_BYTE_4_SGE_SHIFT_S, 0);
-
roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M,
V2_QPC_BYTE_4_SQPN_S, hr_qp->qpn);
roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M,
@@ -3168,19 +3190,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQWS_M,
V2_QPC_BYTE_20_RQWS_S, 0);
- roce_set_field(context->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S,
- ilog2((unsigned int)hr_qp->sq.wqe_cnt));
- roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, 0);
-
- roce_set_field(context->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S,
- (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
- hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 :
- ilog2((unsigned int)hr_qp->rq.wqe_cnt));
- roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0);
+ set_qpc_wqe_cnt(hr_qp, context, qpc_mask);
/* No VLAN need to set 0xFFF */
roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M,
@@ -3456,22 +3466,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M,
V2_QPC_BYTE_4_TST_S, 0);
- if (ibqp->qp_type == IB_QPT_GSI)
- roce_set_field(context->byte_4_sqpn_tst,
- V2_QPC_BYTE_4_SGE_SHIFT_M,
- V2_QPC_BYTE_4_SGE_SHIFT_S,
- ilog2((unsigned int)hr_qp->sge.sge_cnt));
- else
- roce_set_field(context->byte_4_sqpn_tst,
- V2_QPC_BYTE_4_SGE_SHIFT_M,
- V2_QPC_BYTE_4_SGE_SHIFT_S,
- hr_qp->sq.max_gs >
- HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE ?
- ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0);
-
- roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M,
- V2_QPC_BYTE_4_SGE_SHIFT_S, 0);
-
if (attr_mask & IB_QP_ACCESS_FLAGS) {
roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S,
!!(attr->qp_access_flags & IB_ACCESS_REMOTE_READ));
@@ -3506,20 +3500,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
0);
}
- roce_set_field(context->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S,
- ilog2((unsigned int)hr_qp->sq.wqe_cnt));
- roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, 0);
-
- roce_set_field(context->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S,
- (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
- hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 :
- ilog2((unsigned int)hr_qp->rq.wqe_cnt));
- roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0);
-
roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M,
V2_QPC_BYTE_16_PD_S, to_hr_pd(ibqp->pd)->pdn);
roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M,
--
1.9.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH for-next 6/9] RDMA/hns: Optimize hns_roce_mhop_alloc function.
2019-07-08 13:41 [PATCH for-next 0/9] Codes optimization for hip08 Lijun Ou
` (4 preceding siblings ...)
2019-07-08 13:41 ` [PATCH for-next 5/9] RDMA/hns: optimize the duplicated code for qpc setting flow Lijun Ou
@ 2019-07-08 13:41 ` Lijun Ou
2019-07-08 13:41 ` [PATCH for-next 7/9] RDMA/hns: Package for hns_roce_rereg_user_mr function Lijun Ou
` (3 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Lijun Ou @ 2019-07-08 13:41 UTC (permalink / raw)
To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm
From: chenglang <chenglang@huawei.com>
Here packages some lines for allocating multi-hop addressing
into the independent functions in order to add the readability.
Signed-off-by: Lang Cheng <chenglang@huawei.com>
Signed-off-by: Lijun Ou <oulijun@huawei.com>
---
drivers/infiniband/hw/hns/hns_roce_mr.c | 275 +++++++++++++++++++-------------
1 file changed, 160 insertions(+), 115 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 549e1a3..c4b758c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -347,155 +347,208 @@ static void hns_roce_loop_free(struct hns_roce_dev *hr_dev,
mr->pbl_bt_l0 = NULL;
mr->pbl_l0_dma_addr = 0;
}
+static int pbl_1hop_alloc(struct hns_roce_dev *hr_dev, int npages,
+ struct hns_roce_mr *mr, u32 pbl_bt_sz)
+{
+ struct device *dev = hr_dev->dev;
-/* PBL multi hop addressing */
-static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
- struct hns_roce_mr *mr)
+ if (npages > pbl_bt_sz / 8) {
+ dev_err(dev, "npages %d is larger than buf_pg_sz!",
+ npages);
+ return -EINVAL;
+ }
+ mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
+ &(mr->pbl_dma_addr),
+ GFP_KERNEL);
+ if (!mr->pbl_buf)
+ return -ENOMEM;
+
+ mr->pbl_size = npages;
+ mr->pbl_ba = mr->pbl_dma_addr;
+ mr->pbl_hop_num = 1;
+ mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
+ mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
+ return 0;
+
+}
+
+
+static int pbl_2hop_alloc(struct hns_roce_dev *hr_dev, int npages,
+ struct hns_roce_mr *mr, u32 pbl_bt_sz)
{
struct device *dev = hr_dev->dev;
- int mr_alloc_done = 0;
int npages_allocated;
- int i = 0, j = 0;
- u32 pbl_bt_sz;
- u32 mhop_num;
u64 pbl_last_bt_num;
u64 pbl_bt_cnt = 0;
- u64 bt_idx;
u64 size;
+ int i;
- mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num);
- pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
- if (mhop_num == HNS_ROCE_HOP_NUM_0)
- return 0;
-
- /* hop_num = 1 */
- if (mhop_num == 1) {
- if (npages > pbl_bt_sz / 8) {
- dev_err(dev, "npages %d is larger than buf_pg_sz!",
- npages);
- return -EINVAL;
+ /* alloc L1 BT */
+ for (i = 0; i < pbl_bt_sz / 8; i++) {
+ if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
+ size = pbl_bt_sz;
+ } else {
+ npages_allocated = i * (pbl_bt_sz / 8);
+ size = (npages - npages_allocated) * 8;
}
- mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
- &(mr->pbl_dma_addr),
- GFP_KERNEL);
- if (!mr->pbl_buf)
+ mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size,
+ &(mr->pbl_l1_dma_addr[i]),
+ GFP_KERNEL);
+ if (!mr->pbl_bt_l1[i]) {
+ hns_roce_loop_free(hr_dev, mr, 1, i, 0);
return -ENOMEM;
+ }
- mr->pbl_size = npages;
- mr->pbl_ba = mr->pbl_dma_addr;
- mr->pbl_hop_num = mhop_num;
- mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
- mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
- return 0;
+ *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
+
+ pbl_bt_cnt++;
+ if (pbl_bt_cnt >= pbl_last_bt_num)
+ break;
}
- mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8,
- sizeof(*mr->pbl_l1_dma_addr),
+ mr->l0_chunk_last_num = i + 1;
+
+ return 0;
+}
+
+static int pbl_3hop_alloc(struct hns_roce_dev *hr_dev, int npages,
+ struct hns_roce_mr *mr, u32 pbl_bt_sz)
+{
+ struct device *dev = hr_dev->dev;
+ int mr_alloc_done = 0;
+ int npages_allocated;
+ u64 pbl_last_bt_num;
+ u64 pbl_bt_cnt = 0;
+ u64 bt_idx;
+ u64 size;
+ int i;
+ int j = 0;
+
+ pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
+
+ mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num,
+ sizeof(*mr->pbl_l2_dma_addr),
GFP_KERNEL);
- if (!mr->pbl_l1_dma_addr)
+ if (!mr->pbl_l2_dma_addr)
return -ENOMEM;
- mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1),
+ mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num,
+ sizeof(*mr->pbl_bt_l2),
GFP_KERNEL);
- if (!mr->pbl_bt_l1)
- goto err_kcalloc_bt_l1;
-
- if (mhop_num == 3) {
- mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num,
- sizeof(*mr->pbl_l2_dma_addr),
- GFP_KERNEL);
- if (!mr->pbl_l2_dma_addr)
- goto err_kcalloc_l2_dma;
+ if (!mr->pbl_bt_l2)
+ goto err_kcalloc_bt_l2;
+
+ /* alloc L1, L2 BT */
+ for (i = 0; i < pbl_bt_sz / 8; i++) {
+ mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz,
+ &(mr->pbl_l1_dma_addr[i]),
+ GFP_KERNEL);
+ if (!mr->pbl_bt_l1[i]) {
+ hns_roce_loop_free(hr_dev, mr, 1, i, 0);
+ goto err_dma_alloc_l0;
+ }
- mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num,
- sizeof(*mr->pbl_bt_l2),
- GFP_KERNEL);
- if (!mr->pbl_bt_l2)
- goto err_kcalloc_bt_l2;
- }
+ *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
- /* alloc L0 BT */
- mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz,
- &(mr->pbl_l0_dma_addr),
- GFP_KERNEL);
- if (!mr->pbl_bt_l0)
- goto err_dma_alloc_l0;
+ for (j = 0; j < pbl_bt_sz / 8; j++) {
+ bt_idx = i * pbl_bt_sz / 8 + j;
- if (mhop_num == 2) {
- /* alloc L1 BT */
- for (i = 0; i < pbl_bt_sz / 8; i++) {
if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
size = pbl_bt_sz;
} else {
- npages_allocated = i * (pbl_bt_sz / 8);
+ npages_allocated = bt_idx *
+ (pbl_bt_sz / 8);
size = (npages - npages_allocated) * 8;
}
- mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size,
- &(mr->pbl_l1_dma_addr[i]),
- GFP_KERNEL);
- if (!mr->pbl_bt_l1[i]) {
- hns_roce_loop_free(hr_dev, mr, 1, i, 0);
+ mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent(
+ dev, size,
+ &(mr->pbl_l2_dma_addr[bt_idx]),
+ GFP_KERNEL);
+ if (!mr->pbl_bt_l2[bt_idx]) {
+ hns_roce_loop_free(hr_dev, mr, 2, i, j);
goto err_dma_alloc_l0;
}
- *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
+ *(mr->pbl_bt_l1[i] + j) =
+ mr->pbl_l2_dma_addr[bt_idx];
pbl_bt_cnt++;
- if (pbl_bt_cnt >= pbl_last_bt_num)
+ if (pbl_bt_cnt >= pbl_last_bt_num) {
+ mr_alloc_done = 1;
break;
- }
- } else if (mhop_num == 3) {
- /* alloc L1, L2 BT */
- for (i = 0; i < pbl_bt_sz / 8; i++) {
- mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz,
- &(mr->pbl_l1_dma_addr[i]),
- GFP_KERNEL);
- if (!mr->pbl_bt_l1[i]) {
- hns_roce_loop_free(hr_dev, mr, 1, i, 0);
- goto err_dma_alloc_l0;
}
+ }
- *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
+ if (mr_alloc_done)
+ break;
+ }
- for (j = 0; j < pbl_bt_sz / 8; j++) {
- bt_idx = i * pbl_bt_sz / 8 + j;
+ mr->l0_chunk_last_num = i + 1;
+ mr->l1_chunk_last_num = j + 1;
- if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
- size = pbl_bt_sz;
- } else {
- npages_allocated = bt_idx *
- (pbl_bt_sz / 8);
- size = (npages - npages_allocated) * 8;
- }
- mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent(
- dev, size,
- &(mr->pbl_l2_dma_addr[bt_idx]),
- GFP_KERNEL);
- if (!mr->pbl_bt_l2[bt_idx]) {
- hns_roce_loop_free(hr_dev, mr, 2, i, j);
- goto err_dma_alloc_l0;
- }
- *(mr->pbl_bt_l1[i] + j) =
- mr->pbl_l2_dma_addr[bt_idx];
+ return 0;
- pbl_bt_cnt++;
- if (pbl_bt_cnt >= pbl_last_bt_num) {
- mr_alloc_done = 1;
- break;
- }
- }
+err_dma_alloc_l0:
+ kfree(mr->pbl_bt_l2);
+ mr->pbl_bt_l2 = NULL;
- if (mr_alloc_done)
- break;
- }
+err_kcalloc_bt_l2:
+ kfree(mr->pbl_l2_dma_addr);
+ mr->pbl_l2_dma_addr = NULL;
+
+ return -ENOMEM;
+}
+
+
+/* PBL multi hop addressing */
+static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
+ struct hns_roce_mr *mr)
+{
+ struct device *dev = hr_dev->dev;
+ u32 pbl_bt_sz;
+ u32 mhop_num;
+
+ mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num);
+ pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
+
+ if (mhop_num == HNS_ROCE_HOP_NUM_0)
+ return 0;
+
+ /* hop_num = 1 */
+ if (mhop_num == 1)
+ return pbl_1hop_alloc(hr_dev, npages, mr, pbl_bt_sz);
+
+ mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8,
+ sizeof(*mr->pbl_l1_dma_addr),
+ GFP_KERNEL);
+ if (!mr->pbl_l1_dma_addr)
+ return -ENOMEM;
+
+ mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1),
+ GFP_KERNEL);
+ if (!mr->pbl_bt_l1)
+ goto err_kcalloc_bt_l1;
+
+ /* alloc L0 BT */
+ mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz,
+ &(mr->pbl_l0_dma_addr),
+ GFP_KERNEL);
+ if (!mr->pbl_bt_l0)
+ goto err_kcalloc_l2_dma;
+
+ if (mhop_num == 2) {
+ if (pbl_2hop_alloc(hr_dev, npages, mr, pbl_bt_sz))
+ goto err_kcalloc_l2_dma;
+ }
+
+ if (mhop_num == 3) {
+ if (pbl_3hop_alloc(hr_dev, npages, mr, pbl_bt_sz))
+ goto err_kcalloc_l2_dma;
}
- mr->l0_chunk_last_num = i + 1;
- if (mhop_num == 3)
- mr->l1_chunk_last_num = j + 1;
mr->pbl_size = npages;
mr->pbl_ba = mr->pbl_l0_dma_addr;
@@ -505,14 +558,6 @@ static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
return 0;
-err_dma_alloc_l0:
- kfree(mr->pbl_bt_l2);
- mr->pbl_bt_l2 = NULL;
-
-err_kcalloc_bt_l2:
- kfree(mr->pbl_l2_dma_addr);
- mr->pbl_l2_dma_addr = NULL;
-
err_kcalloc_l2_dma:
kfree(mr->pbl_bt_l1);
mr->pbl_bt_l1 = NULL;
--
1.9.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH for-next 7/9] RDMA/hns: Package for hns_roce_rereg_user_mr function
2019-07-08 13:41 [PATCH for-next 0/9] Codes optimization for hip08 Lijun Ou
` (5 preceding siblings ...)
2019-07-08 13:41 ` [PATCH for-next 6/9] RDMA/hns: Optimize hns_roce_mhop_alloc function Lijun Ou
@ 2019-07-08 13:41 ` Lijun Ou
2019-07-08 13:41 ` [PATCH for-next 8/9] RDMA/hns: Refactor hem table mhop check and calculation Lijun Ou
` (2 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Lijun Ou @ 2019-07-08 13:41 UTC (permalink / raw)
To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm
Here moves some codes of hns_roce_rereg_user_mr functioon into
an independent function in oder to optimize complexity.
Signed-off-by: Lang Cheng <chenglang@huawei.com>
Signed-off-by: Lijun Ou <oulijun@huawei.com>
---
drivers/infiniband/hw/hns/hns_roce_mr.c | 153 +++++++++++++++++++-------------
1 file changed, 89 insertions(+), 64 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index c4b758c..0cfa946 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -1206,6 +1206,83 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return ERR_PTR(ret);
}
+static int rereg_mr_trans(struct ib_mr *ibmr, int flags,
+ u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct hns_roce_cmd_mailbox *mailbox,
+ u32 pdn, struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
+ struct hns_roce_mr *mr = to_hr_mr(ibmr);
+ struct device *dev = hr_dev->dev;
+ int npages;
+ int ret;
+
+ if (mr->size != ~0ULL) {
+ npages = ib_umem_page_count(mr->umem);
+
+ if (hr_dev->caps.pbl_hop_num)
+ hns_roce_mhop_free(hr_dev, mr);
+ else
+ dma_free_coherent(dev, npages * 8,
+ mr->pbl_buf, mr->pbl_dma_addr);
+ }
+ ib_umem_release(mr->umem);
+
+ mr->umem = ib_umem_get(udata, start, length, mr_access_flags, 0);
+ if (IS_ERR(mr->umem)) {
+ ret = PTR_ERR(mr->umem);
+ mr->umem = NULL;
+ return -ENOMEM;
+ }
+ npages = ib_umem_page_count(mr->umem);
+
+ if (hr_dev->caps.pbl_hop_num) {
+ ret = hns_roce_mhop_alloc(hr_dev, npages, mr);
+ if (ret)
+ goto release_umem;
+ } else {
+ mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
+ &(mr->pbl_dma_addr),
+ GFP_KERNEL);
+ if (!mr->pbl_buf) {
+ ret = -ENOMEM;
+ goto release_umem;
+ }
+ }
+
+ ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
+ mr_access_flags, virt_addr,
+ length, mailbox->buf);
+ if (ret)
+ goto release_umem;
+
+
+ ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem);
+ if (ret) {
+ if (mr->size != ~0ULL) {
+ npages = ib_umem_page_count(mr->umem);
+
+ if (hr_dev->caps.pbl_hop_num)
+ hns_roce_mhop_free(hr_dev, mr);
+ else
+ dma_free_coherent(dev, npages * 8,
+ mr->pbl_buf,
+ mr->pbl_dma_addr);
+ }
+
+ goto release_umem;
+ }
+
+ return 0;
+
+release_umem:
+ ib_umem_release(mr->umem);
+ return ret;
+
+}
+
+
int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
u64 virt_addr, int mr_access_flags, struct ib_pd *pd,
struct ib_udata *udata)
@@ -1216,7 +1293,6 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
struct device *dev = hr_dev->dev;
unsigned long mtpt_idx;
u32 pdn = 0;
- int npages;
int ret;
if (!mr->enabled)
@@ -1243,73 +1319,25 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
pdn = to_hr_pd(pd)->pdn;
if (flags & IB_MR_REREG_TRANS) {
- if (mr->size != ~0ULL) {
- npages = ib_umem_page_count(mr->umem);
-
- if (hr_dev->caps.pbl_hop_num)
- hns_roce_mhop_free(hr_dev, mr);
- else
- dma_free_coherent(dev, npages * 8, mr->pbl_buf,
- mr->pbl_dma_addr);
- }
- ib_umem_release(mr->umem);
-
- mr->umem =
- ib_umem_get(udata, start, length, mr_access_flags, 0);
- if (IS_ERR(mr->umem)) {
- ret = PTR_ERR(mr->umem);
- mr->umem = NULL;
+ ret = rereg_mr_trans(ibmr, flags,
+ start, length,
+ virt_addr, mr_access_flags,
+ mailbox, pdn, udata);
+ if (ret)
goto free_cmd_mbox;
- }
- npages = ib_umem_page_count(mr->umem);
-
- if (hr_dev->caps.pbl_hop_num) {
- ret = hns_roce_mhop_alloc(hr_dev, npages, mr);
- if (ret)
- goto release_umem;
- } else {
- mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
- &(mr->pbl_dma_addr),
- GFP_KERNEL);
- if (!mr->pbl_buf) {
- ret = -ENOMEM;
- goto release_umem;
- }
- }
- }
-
- ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
- mr_access_flags, virt_addr,
- length, mailbox->buf);
- if (ret) {
- if (flags & IB_MR_REREG_TRANS)
- goto release_umem;
- else
+ } else {
+ ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
+ mr_access_flags, virt_addr,
+ length, mailbox->buf);
+ if (ret)
goto free_cmd_mbox;
}
- if (flags & IB_MR_REREG_TRANS) {
- ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem);
- if (ret) {
- if (mr->size != ~0ULL) {
- npages = ib_umem_page_count(mr->umem);
-
- if (hr_dev->caps.pbl_hop_num)
- hns_roce_mhop_free(hr_dev, mr);
- else
- dma_free_coherent(dev, npages * 8,
- mr->pbl_buf,
- mr->pbl_dma_addr);
- }
-
- goto release_umem;
- }
- }
-
ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, mtpt_idx);
if (ret) {
dev_err(dev, "SW2HW_MPT failed (%d)\n", ret);
- goto release_umem;
+ ib_umem_release(mr->umem);
+ goto free_cmd_mbox;
}
mr->enabled = 1;
@@ -1320,9 +1348,6 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
return 0;
-release_umem:
- ib_umem_release(mr->umem);
-
free_cmd_mbox:
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
--
1.9.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH for-next 8/9] RDMA/hns: Refactor hem table mhop check and calculation
2019-07-08 13:41 [PATCH for-next 0/9] Codes optimization for hip08 Lijun Ou
` (6 preceding siblings ...)
2019-07-08 13:41 ` [PATCH for-next 7/9] RDMA/hns: Package for hns_roce_rereg_user_mr function Lijun Ou
@ 2019-07-08 13:41 ` Lijun Ou
2019-07-08 13:41 ` [PATCH for-next 9/9] RDMA/hns: Refactor eq table init for hip08 Lijun Ou
2019-07-25 15:42 ` [PATCH for-next 0/9] Codes optimization " Jason Gunthorpe
9 siblings, 0 replies; 11+ messages in thread
From: Lijun Ou @ 2019-07-08 13:41 UTC (permalink / raw)
To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm
The calculation of mhop for hem is duplicated in
hns_roce_init_hem_table and hns_roce_calc_hem_mhop,
extracting it from them to a separated function. Moreover,
this patch refactor hns_roce_check_whether_mhop to
reduce complexity.
Signed-off-by: Yixian Liu <liuyixian@huawei.com>
Signed-off-by: Lijun Ou <oulijun@huawei.com>
---
drivers/infiniband/hw/hns/hns_roce_hem.c | 180 ++++++++++++-------------------
1 file changed, 70 insertions(+), 110 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index f4da5bd..6150311 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -41,20 +41,47 @@
bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type)
{
- if ((hr_dev->caps.qpc_hop_num && type == HEM_TYPE_QPC) ||
- (hr_dev->caps.mpt_hop_num && type == HEM_TYPE_MTPT) ||
- (hr_dev->caps.cqc_hop_num && type == HEM_TYPE_CQC) ||
- (hr_dev->caps.srqc_hop_num && type == HEM_TYPE_SRQC) ||
- (hr_dev->caps.sccc_hop_num && type == HEM_TYPE_SCCC) ||
- (hr_dev->caps.qpc_timer_hop_num && type == HEM_TYPE_QPC_TIMER) ||
- (hr_dev->caps.cqc_timer_hop_num && type == HEM_TYPE_CQC_TIMER) ||
- (hr_dev->caps.cqe_hop_num && type == HEM_TYPE_CQE) ||
- (hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT) ||
- (hr_dev->caps.srqwqe_hop_num && type == HEM_TYPE_SRQWQE) ||
- (hr_dev->caps.idx_hop_num && type == HEM_TYPE_IDX))
- return true;
-
- return false;
+ int hop_num = 0;
+
+ switch (type) {
+ case HEM_TYPE_QPC:
+ hop_num = hr_dev->caps.qpc_hop_num;
+ break;
+ case HEM_TYPE_MTPT:
+ hop_num = hr_dev->caps.mpt_hop_num;
+ break;
+ case HEM_TYPE_CQC:
+ hop_num = hr_dev->caps.cqc_hop_num;
+ break;
+ case HEM_TYPE_SRQC:
+ hop_num = hr_dev->caps.srqc_hop_num;
+ break;
+ case HEM_TYPE_SCCC:
+ hop_num = hr_dev->caps.sccc_hop_num;
+ break;
+ case HEM_TYPE_QPC_TIMER:
+ hop_num = hr_dev->caps.qpc_timer_hop_num;
+ break;
+ case HEM_TYPE_CQC_TIMER:
+ hop_num = hr_dev->caps.cqc_timer_hop_num;
+ break;
+ case HEM_TYPE_CQE:
+ hop_num = hr_dev->caps.cqe_hop_num;
+ break;
+ case HEM_TYPE_MTT:
+ hop_num = hr_dev->caps.mtt_hop_num;
+ break;
+ case HEM_TYPE_SRQWQE:
+ hop_num = hr_dev->caps.srqwqe_hop_num;
+ break;
+ case HEM_TYPE_IDX:
+ hop_num = hr_dev->caps.idx_hop_num;
+ break;
+ default:
+ return false;
+ }
+
+ return hop_num ? true : false;
}
static bool hns_roce_check_hem_null(struct hns_roce_hem **hem, u64 start_idx,
@@ -92,17 +119,13 @@ static int hns_roce_get_bt_num(u32 table_type, u32 hop_num)
return 0;
}
-int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table, unsigned long *obj,
- struct hns_roce_hem_mhop *mhop)
+static int get_hem_table_config(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_mhop *mhop,
+ u32 type)
{
struct device *dev = hr_dev->dev;
- u32 chunk_ba_num;
- u32 table_idx;
- u32 bt_num;
- u32 chunk_size;
- switch (table->type) {
+ switch (type) {
case HEM_TYPE_QPC:
mhop->buf_chunk_size = 1 << (hr_dev->caps.qpc_buf_pg_sz
+ PAGE_SHIFT);
@@ -193,10 +216,26 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
break;
default:
dev_err(dev, "Table %d not support multi-hop addressing!\n",
- table->type);
+ type);
return -EINVAL;
}
+ return 0;
+}
+
+int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, unsigned long *obj,
+ struct hns_roce_hem_mhop *mhop)
+{
+ struct device *dev = hr_dev->dev;
+ u32 chunk_ba_num;
+ u32 table_idx;
+ u32 bt_num;
+ u32 chunk_size;
+
+ if (get_hem_table_config(hr_dev, mhop, table->type))
+ return -EINVAL;
+
if (!obj)
return 0;
@@ -887,7 +926,6 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
unsigned long obj_size, unsigned long nobj,
int use_lowmem)
{
- struct device *dev = hr_dev->dev;
unsigned long obj_per_chunk;
unsigned long num_hem;
@@ -900,99 +938,21 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
if (!table->hem)
return -ENOMEM;
} else {
+ struct hns_roce_hem_mhop mhop = {};
unsigned long buf_chunk_size;
unsigned long bt_chunk_size;
unsigned long bt_chunk_num;
unsigned long num_bt_l0 = 0;
u32 hop_num;
- switch (type) {
- case HEM_TYPE_QPC:
- buf_chunk_size = 1 << (hr_dev->caps.qpc_buf_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = 1 << (hr_dev->caps.qpc_ba_pg_sz
- + PAGE_SHIFT);
- num_bt_l0 = hr_dev->caps.qpc_bt_num;
- hop_num = hr_dev->caps.qpc_hop_num;
- break;
- case HEM_TYPE_MTPT:
- buf_chunk_size = 1 << (hr_dev->caps.mpt_buf_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = 1 << (hr_dev->caps.mpt_ba_pg_sz
- + PAGE_SHIFT);
- num_bt_l0 = hr_dev->caps.mpt_bt_num;
- hop_num = hr_dev->caps.mpt_hop_num;
- break;
- case HEM_TYPE_CQC:
- buf_chunk_size = 1 << (hr_dev->caps.cqc_buf_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = 1 << (hr_dev->caps.cqc_ba_pg_sz
- + PAGE_SHIFT);
- num_bt_l0 = hr_dev->caps.cqc_bt_num;
- hop_num = hr_dev->caps.cqc_hop_num;
- break;
- case HEM_TYPE_SCCC:
- buf_chunk_size = 1 << (hr_dev->caps.sccc_buf_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = 1 << (hr_dev->caps.sccc_ba_pg_sz
- + PAGE_SHIFT);
- num_bt_l0 = hr_dev->caps.sccc_bt_num;
- hop_num = hr_dev->caps.sccc_hop_num;
- break;
- case HEM_TYPE_QPC_TIMER:
- buf_chunk_size = 1 << (hr_dev->caps.qpc_timer_buf_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = 1 << (hr_dev->caps.qpc_timer_ba_pg_sz
- + PAGE_SHIFT);
- num_bt_l0 = hr_dev->caps.qpc_timer_bt_num;
- hop_num = hr_dev->caps.qpc_timer_hop_num;
- break;
- case HEM_TYPE_CQC_TIMER:
- buf_chunk_size = 1 << (hr_dev->caps.cqc_timer_buf_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = 1 << (hr_dev->caps.cqc_timer_ba_pg_sz
- + PAGE_SHIFT);
- num_bt_l0 = hr_dev->caps.cqc_timer_bt_num;
- hop_num = hr_dev->caps.cqc_timer_hop_num;
- break;
- case HEM_TYPE_SRQC:
- buf_chunk_size = 1 << (hr_dev->caps.srqc_buf_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = 1 << (hr_dev->caps.srqc_ba_pg_sz
- + PAGE_SHIFT);
- num_bt_l0 = hr_dev->caps.srqc_bt_num;
- hop_num = hr_dev->caps.srqc_hop_num;
- break;
- case HEM_TYPE_MTT:
- buf_chunk_size = 1 << (hr_dev->caps.mtt_ba_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = buf_chunk_size;
- hop_num = hr_dev->caps.mtt_hop_num;
- break;
- case HEM_TYPE_CQE:
- buf_chunk_size = 1 << (hr_dev->caps.cqe_ba_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = buf_chunk_size;
- hop_num = hr_dev->caps.cqe_hop_num;
- break;
- case HEM_TYPE_SRQWQE:
- buf_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = buf_chunk_size;
- hop_num = hr_dev->caps.srqwqe_hop_num;
- break;
- case HEM_TYPE_IDX:
- buf_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = buf_chunk_size;
- hop_num = hr_dev->caps.idx_hop_num;
- break;
- default:
- dev_err(dev,
- "Table %d not support to init hem table here!\n",
- type);
+ if (get_hem_table_config(hr_dev, &mhop, type))
return -EINVAL;
- }
+
+ buf_chunk_size = mhop.buf_chunk_size;
+ bt_chunk_size = mhop.bt_chunk_size;
+ num_bt_l0 = mhop.ba_l0_num;
+ hop_num = mhop.hop_num;
+
obj_per_chunk = buf_chunk_size / obj_size;
num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk;
bt_chunk_num = bt_chunk_size / BA_BYTE_LEN;
--
1.9.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH for-next 9/9] RDMA/hns: Refactor eq table init for hip08
2019-07-08 13:41 [PATCH for-next 0/9] Codes optimization for hip08 Lijun Ou
` (7 preceding siblings ...)
2019-07-08 13:41 ` [PATCH for-next 8/9] RDMA/hns: Refactor hem table mhop check and calculation Lijun Ou
@ 2019-07-08 13:41 ` Lijun Ou
2019-07-25 15:42 ` [PATCH for-next 0/9] Codes optimization " Jason Gunthorpe
9 siblings, 0 replies; 11+ messages in thread
From: Lijun Ou @ 2019-07-08 13:41 UTC (permalink / raw)
To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm
From: Yixian Liu <liuyixian@huawei.com>
To make the code more readable, here moves the part of
naming irq and request irq out of eq table init into an
seperate function.
Signed-off-by: Yixian Liu <liuyixian@huawei.com>
Signed-off-by: Lijun Ou <oulijun@huawei.com>
---
drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 176 ++++++++++++++++++-----------
1 file changed, 107 insertions(+), 69 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index c2ddfc1..83c58be 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -5735,6 +5735,94 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
return ret;
}
+static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num,
+ int comp_num, int aeq_num, int other_num)
+{
+ struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
+ int i, j;
+ int ret;
+
+ for (i = 0; i < irq_num; i++) {
+ hr_dev->irq_names[i] = kzalloc(HNS_ROCE_INT_NAME_LEN,
+ GFP_KERNEL);
+ if (!hr_dev->irq_names[i]) {
+ ret = -ENOMEM;
+ goto err_kzalloc_failed;
+ }
+ }
+
+ /* irq contains: abnormal + AEQ + CEQ*/
+ for (j = 0; j < irq_num; j++)
+ if (j < other_num)
+ snprintf((char *)hr_dev->irq_names[j],
+ HNS_ROCE_INT_NAME_LEN, "hns-abn-%d", j);
+ else if (j < (other_num + aeq_num))
+ snprintf((char *)hr_dev->irq_names[j],
+ HNS_ROCE_INT_NAME_LEN, "hns-aeq-%d",
+ j - other_num);
+ else
+ snprintf((char *)hr_dev->irq_names[j],
+ HNS_ROCE_INT_NAME_LEN, "hns-ceq-%d",
+ j - other_num - aeq_num);
+
+ for (j = 0; j < irq_num; j++) {
+ if (j < other_num)
+ ret = request_irq(hr_dev->irq[j],
+ hns_roce_v2_msix_interrupt_abn,
+ 0, hr_dev->irq_names[j], hr_dev);
+
+ else if (j < (other_num + comp_num))
+ ret = request_irq(eq_table->eq[j - other_num].irq,
+ hns_roce_v2_msix_interrupt_eq,
+ 0, hr_dev->irq_names[j + aeq_num],
+ &eq_table->eq[j - other_num]);
+ else
+ ret = request_irq(eq_table->eq[j - other_num].irq,
+ hns_roce_v2_msix_interrupt_eq,
+ 0, hr_dev->irq_names[j - comp_num],
+ &eq_table->eq[j - other_num]);
+ if (ret) {
+ dev_err(hr_dev->dev, "Request irq error!\n");
+ goto err_request_failed;
+ }
+ }
+
+ return 0;
+
+err_request_failed:
+ for (j -= 1; j >= 0; j--)
+ if (j < other_num)
+ free_irq(hr_dev->irq[j], hr_dev);
+ else
+ free_irq(eq_table->eq[j - other_num].irq,
+ &eq_table->eq[j - other_num]);
+
+err_kzalloc_failed:
+ for (i -= 1; i >= 0; i--)
+ kfree(hr_dev->irq_names[i]);
+
+ return ret;
+}
+
+static void __hns_roce_free_irq(struct hns_roce_dev *hr_dev)
+{
+ int irq_num;
+ int eq_num;
+ int i;
+
+ eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
+ irq_num = eq_num + hr_dev->caps.num_other_vectors;
+
+ for (i = 0; i < hr_dev->caps.num_other_vectors; i++)
+ free_irq(hr_dev->irq[i], hr_dev);
+
+ for (i = 0; i < eq_num; i++)
+ free_irq(hr_dev->eq_table.eq[i].irq, &hr_dev->eq_table.eq[i]);
+
+ for (i = 0; i < irq_num; i++)
+ kfree(hr_dev->irq_names[i]);
+}
+
static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
{
struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
@@ -5746,7 +5834,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
int other_num;
int comp_num;
int aeq_num;
- int i, j, k;
+ int i;
int ret;
other_num = hr_dev->caps.num_other_vectors;
@@ -5760,27 +5848,18 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
if (!eq_table->eq)
return -ENOMEM;
- for (i = 0; i < irq_num; i++) {
- hr_dev->irq_names[i] = kzalloc(HNS_ROCE_INT_NAME_LEN,
- GFP_KERNEL);
- if (!hr_dev->irq_names[i]) {
- ret = -ENOMEM;
- goto err_failed_kzalloc;
- }
- }
-
/* create eq */
- for (j = 0; j < eq_num; j++) {
- eq = &eq_table->eq[j];
+ for (i = 0; i < eq_num; i++) {
+ eq = &eq_table->eq[i];
eq->hr_dev = hr_dev;
- eq->eqn = j;
- if (j < comp_num) {
+ eq->eqn = i;
+ if (i < comp_num) {
/* CEQ */
eq_cmd = HNS_ROCE_CMD_CREATE_CEQC;
eq->type_flag = HNS_ROCE_CEQ;
eq->entries = hr_dev->caps.ceqe_depth;
eq->eqe_size = HNS_ROCE_CEQ_ENTRY_SIZE;
- eq->irq = hr_dev->irq[j + other_num + aeq_num];
+ eq->irq = hr_dev->irq[i + other_num + aeq_num];
eq->eq_max_cnt = HNS_ROCE_CEQ_DEFAULT_BURST_NUM;
eq->eq_period = HNS_ROCE_CEQ_DEFAULT_INTERVAL;
} else {
@@ -5789,7 +5868,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
eq->type_flag = HNS_ROCE_AEQ;
eq->entries = hr_dev->caps.aeqe_depth;
eq->eqe_size = HNS_ROCE_AEQ_ENTRY_SIZE;
- eq->irq = hr_dev->irq[j - comp_num + other_num];
+ eq->irq = hr_dev->irq[i - comp_num + other_num];
eq->eq_max_cnt = HNS_ROCE_AEQ_DEFAULT_BURST_NUM;
eq->eq_period = HNS_ROCE_AEQ_DEFAULT_INTERVAL;
}
@@ -5804,40 +5883,11 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
/* enable irq */
hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_ENABLE);
- /* irq contains: abnormal + AEQ + CEQ*/
- for (k = 0; k < irq_num; k++)
- if (k < other_num)
- snprintf((char *)hr_dev->irq_names[k],
- HNS_ROCE_INT_NAME_LEN, "hns-abn-%d", k);
- else if (k < (other_num + aeq_num))
- snprintf((char *)hr_dev->irq_names[k],
- HNS_ROCE_INT_NAME_LEN, "hns-aeq-%d",
- k - other_num);
- else
- snprintf((char *)hr_dev->irq_names[k],
- HNS_ROCE_INT_NAME_LEN, "hns-ceq-%d",
- k - other_num - aeq_num);
-
- for (k = 0; k < irq_num; k++) {
- if (k < other_num)
- ret = request_irq(hr_dev->irq[k],
- hns_roce_v2_msix_interrupt_abn,
- 0, hr_dev->irq_names[k], hr_dev);
-
- else if (k < (other_num + comp_num))
- ret = request_irq(eq_table->eq[k - other_num].irq,
- hns_roce_v2_msix_interrupt_eq,
- 0, hr_dev->irq_names[k + aeq_num],
- &eq_table->eq[k - other_num]);
- else
- ret = request_irq(eq_table->eq[k - other_num].irq,
- hns_roce_v2_msix_interrupt_eq,
- 0, hr_dev->irq_names[k - comp_num],
- &eq_table->eq[k - other_num]);
- if (ret) {
- dev_err(dev, "Request irq error!\n");
- goto err_request_irq_fail;
- }
+ ret = __hns_roce_request_irq(hr_dev, irq_num, comp_num,
+ aeq_num, other_num);
+ if (ret) {
+ dev_err(dev, "Request irq failed.\n");
+ goto err_request_irq_fail;
}
hr_dev->irq_workq =
@@ -5845,26 +5895,20 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
if (!hr_dev->irq_workq) {
dev_err(dev, "Create irq workqueue failed!\n");
ret = -ENOMEM;
- goto err_request_irq_fail;
+ goto err_create_wq_fail;
}
return 0;
+err_create_wq_fail:
+ __hns_roce_free_irq(hr_dev);
+
err_request_irq_fail:
- for (k -= 1; k >= 0; k--)
- if (k < other_num)
- free_irq(hr_dev->irq[k], hr_dev);
- else
- free_irq(eq_table->eq[k - other_num].irq,
- &eq_table->eq[k - other_num]);
+ hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_DISABLE);
err_create_eq_fail:
- for (j -= 1; j >= 0; j--)
- hns_roce_v2_free_eq(hr_dev, &eq_table->eq[j]);
-
-err_failed_kzalloc:
for (i -= 1; i >= 0; i--)
- kfree(hr_dev->irq_names[i]);
+ hns_roce_v2_free_eq(hr_dev, &eq_table->eq[i]);
kfree(eq_table->eq);
return ret;
@@ -5883,20 +5927,14 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
/* Disable irq */
hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_DISABLE);
- for (i = 0; i < hr_dev->caps.num_other_vectors; i++)
- free_irq(hr_dev->irq[i], hr_dev);
+ __hns_roce_free_irq(hr_dev);
for (i = 0; i < eq_num; i++) {
hns_roce_v2_destroy_eqc(hr_dev, i);
- free_irq(eq_table->eq[i].irq, &eq_table->eq[i]);
-
hns_roce_v2_free_eq(hr_dev, &eq_table->eq[i]);
}
- for (i = 0; i < irq_num; i++)
- kfree(hr_dev->irq_names[i]);
-
kfree(eq_table->eq);
flush_workqueue(hr_dev->irq_workq);
--
1.9.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH for-next 0/9] Codes optimization for hip08
2019-07-08 13:41 [PATCH for-next 0/9] Codes optimization for hip08 Lijun Ou
` (8 preceding siblings ...)
2019-07-08 13:41 ` [PATCH for-next 9/9] RDMA/hns: Refactor eq table init for hip08 Lijun Ou
@ 2019-07-25 15:42 ` Jason Gunthorpe
9 siblings, 0 replies; 11+ messages in thread
From: Jason Gunthorpe @ 2019-07-25 15:42 UTC (permalink / raw)
To: Lijun Ou; +Cc: dledford, leon, linux-rdma, linuxarm
On Mon, Jul 08, 2019 at 09:41:16PM +0800, Lijun Ou wrote:
> Here are codes optimization in order to reduce complexity and
> add readability.
>
> Lijun Ou (6):
> RDMA/hns: Package the flow of creating cq
> RDMA/hns: Refactor the code of creating srq
> RDMA/hns: Refactor for hns_roce_v2_modify_qp function
> RDMA/hns: Use a separated function for setting extend sge paramters
> RDMA/hns: Package for hns_roce_rereg_user_mr function
> RDMA/hns: Refactor hem table mhop check and calculation
>
> Xi Wang (1):
> RDMA/hns: optimize the duplicated code for qpc setting flow
>
> Yixian Liu (1):
> RDMA/hns: Refactor eq table init for hip08
>
> chenglang (1):
> RDMA/hns: Optimize hns_roce_mhop_alloc function.
Applied to for-next
Thanks,
Jason
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2019-07-25 15:42 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-08 13:41 [PATCH for-next 0/9] Codes optimization for hip08 Lijun Ou
2019-07-08 13:41 ` [PATCH for-next 1/9] RDMA/hns: Package the flow of creating cq Lijun Ou
2019-07-08 13:41 ` [PATCH for-next 2/9] RDMA/hns: Refactor the code of creating srq Lijun Ou
2019-07-08 13:41 ` [PATCH for-next 3/9] RDMA/hns: Refactor for hns_roce_v2_modify_qp function Lijun Ou
2019-07-08 13:41 ` [PATCH for-next 4/9] RDMA/hns: Use a separated function for setting extend sge paramters Lijun Ou
2019-07-08 13:41 ` [PATCH for-next 5/9] RDMA/hns: optimize the duplicated code for qpc setting flow Lijun Ou
2019-07-08 13:41 ` [PATCH for-next 6/9] RDMA/hns: Optimize hns_roce_mhop_alloc function Lijun Ou
2019-07-08 13:41 ` [PATCH for-next 7/9] RDMA/hns: Package for hns_roce_rereg_user_mr function Lijun Ou
2019-07-08 13:41 ` [PATCH for-next 8/9] RDMA/hns: Refactor hem table mhop check and calculation Lijun Ou
2019-07-08 13:41 ` [PATCH for-next 9/9] RDMA/hns: Refactor eq table init for hip08 Lijun Ou
2019-07-25 15:42 ` [PATCH for-next 0/9] Codes optimization " Jason Gunthorpe
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.