From: Weihang Li <liweihang@huawei.com>
To: Leon Romanovsky <leon@kernel.org>
Cc: <dledford@redhat.com>, <jgg@ziepe.ca>,
<linux-rdma@vger.kernel.org>, <linuxarm@huawei.com>
Subject: Re: [PATCH for-next] RDMA/hns: Optimize eqe buffer allocation flow
Date: Wed, 5 Feb 2020 14:04:09 +0800 [thread overview]
Message-ID: <10b7a08c-e069-0751-8bde-e5d19521c0b2@huawei.com> (raw)
In-Reply-To: <20200127055205.GH3870@unreal>
On 2020/1/27 13:52, Leon Romanovsky wrote:
> On Sun, Jan 26, 2020 at 10:58:35PM +0800, Weihang Li wrote:
>> From: Xi Wang <wangxi11@huawei.com>
>>
>> The eqe has a private multi-hop addressing implementation, but there is
>> already a set of interfaces in the hns driver that can achieve this.
>>
>> So, simplify the eqe buffer allocation process by using the mtr interface
>> and remove large amount of repeated logic.
>>
>> Signed-off-by: Xi Wang <wangxi11@huawei.com>
>> Signed-off-by: Weihang Li <liweihang@huawei.com>
>> ---
>> drivers/infiniband/hw/hns/hns_roce_device.h | 10 +-
>> drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 481 ++++++----------------------
>> 2 files changed, 108 insertions(+), 383 deletions(-)
>>
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
>> index a4c45bf..dab3f3c 100644
>> --- a/drivers/infiniband/hw/hns/hns_roce_device.h
>> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h
>> @@ -757,14 +757,8 @@ struct hns_roce_eq {
>> int eqe_ba_pg_sz;
>> int eqe_buf_pg_sz;
>> int hop_num;
>> - u64 *bt_l0; /* Base address table for L0 */
>> - u64 **bt_l1; /* Base address table for L1 */
>> - u64 **buf;
>> - dma_addr_t l0_dma;
>> - dma_addr_t *l1_dma;
>> - dma_addr_t *buf_dma;
>> - u32 l0_last_num; /* L0 last chunk num */
>> - u32 l1_last_num; /* L1 last chunk num */
>> + struct hns_roce_mtr mtr;
>> + struct hns_roce_buf buf;
>> int eq_max_cnt;
>> int eq_period;
>> int shift;
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> index c462b19..88f2e76 100644
>> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> @@ -5287,44 +5287,24 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
>> hns_roce_write64(hr_dev, doorbell, eq->doorbell);
>> }
>>
>> -static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry)
>> +static inline void *get_eqe_buf(struct hns_roce_eq *eq, unsigned long offset)
>> {
>> u32 buf_chk_sz;
>> - unsigned long off;
>>
>> buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
>> - off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE;
>> -
>> - return (struct hns_roce_aeqe *)((char *)(eq->buf_list->buf) +
>> - off % buf_chk_sz);
>> -}
>> -
>> -static struct hns_roce_aeqe *mhop_get_aeqe(struct hns_roce_eq *eq, u32 entry)
>> -{
>> - u32 buf_chk_sz;
>> - unsigned long off;
>> -
>> - buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
>> -
>> - off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE;
>> -
>> - if (eq->hop_num == HNS_ROCE_HOP_NUM_0)
>> - return (struct hns_roce_aeqe *)((u8 *)(eq->bt_l0) +
>> - off % buf_chk_sz);
>> + if (eq->buf.nbufs == 1)
>> + return eq->buf.direct.buf + offset % buf_chk_sz;
>> else
>> - return (struct hns_roce_aeqe *)((u8 *)
>> - (eq->buf[off / buf_chk_sz]) + off % buf_chk_sz);
>> + return eq->buf.page_list[offset / buf_chk_sz].buf +
>> + offset % buf_chk_sz;
>> }
>>
>> static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
>> {
>> struct hns_roce_aeqe *aeqe;
>>
>> - if (!eq->hop_num)
>> - aeqe = get_aeqe_v2(eq, eq->cons_index);
>> - else
>> - aeqe = mhop_get_aeqe(eq, eq->cons_index);
>> -
>> + aeqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) *
>> + HNS_ROCE_AEQ_ENTRY_SIZE);
>> return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^
>> !!(eq->cons_index & eq->entries)) ? aeqe : NULL;
>> }
>> @@ -5417,44 +5397,12 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
>> return aeqe_found;
>> }
>>
>> -static struct hns_roce_ceqe *get_ceqe_v2(struct hns_roce_eq *eq, u32 entry)
>> -{
>> - u32 buf_chk_sz;
>> - unsigned long off;
>> -
>> - buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
>> - off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE;
>> -
>> - return (struct hns_roce_ceqe *)((char *)(eq->buf_list->buf) +
>> - off % buf_chk_sz);
>> -}
>> -
>> -static struct hns_roce_ceqe *mhop_get_ceqe(struct hns_roce_eq *eq, u32 entry)
>> -{
>> - u32 buf_chk_sz;
>> - unsigned long off;
>> -
>> - buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
>> -
>> - off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE;
>> -
>> - if (eq->hop_num == HNS_ROCE_HOP_NUM_0)
>> - return (struct hns_roce_ceqe *)((u8 *)(eq->bt_l0) +
>> - off % buf_chk_sz);
>> - else
>> - return (struct hns_roce_ceqe *)((u8 *)(eq->buf[off /
>> - buf_chk_sz]) + off % buf_chk_sz);
>> -}
>> -
>> static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
>> {
>> struct hns_roce_ceqe *ceqe;
>>
>> - if (!eq->hop_num)
>> - ceqe = get_ceqe_v2(eq, eq->cons_index);
>> - else
>> - ceqe = mhop_get_ceqe(eq, eq->cons_index);
>> -
>> + ceqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) *
>> + HNS_ROCE_CEQ_ENTRY_SIZE);
>> return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^
>> (!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
>> }
>> @@ -5614,90 +5562,11 @@ static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, int eqn)
>> dev_err(dev, "[mailbox cmd] destroy eqc(%d) failed.\n", eqn);
>> }
>>
>> -static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev,
>> - struct hns_roce_eq *eq)
>> +static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
>> {
>> - struct device *dev = hr_dev->dev;
>> - u64 idx;
>> - u64 size;
>> - u32 buf_chk_sz;
>> - u32 bt_chk_sz;
>> - u32 mhop_num;
>> - int eqe_alloc;
>> - int i = 0;
>> - int j = 0;
>> -
>> - mhop_num = hr_dev->caps.eqe_hop_num;
>> - buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
>> - bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
>> -
>> - if (mhop_num == HNS_ROCE_HOP_NUM_0) {
>> - dma_free_coherent(dev, (unsigned int)(eq->entries *
>> - eq->eqe_size), eq->bt_l0, eq->l0_dma);
>> - return;
>> - }
>> -
>> - dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma);
>> - if (mhop_num == 1) {
>> - for (i = 0; i < eq->l0_last_num; i++) {
>> - if (i == eq->l0_last_num - 1) {
>> - eqe_alloc = i * (buf_chk_sz / eq->eqe_size);
>> - size = (eq->entries - eqe_alloc) * eq->eqe_size;
>> - dma_free_coherent(dev, size, eq->buf[i],
>> - eq->buf_dma[i]);
>> - break;
>> - }
>> - dma_free_coherent(dev, buf_chk_sz, eq->buf[i],
>> - eq->buf_dma[i]);
>> - }
>> - } else if (mhop_num == 2) {
>> - for (i = 0; i < eq->l0_last_num; i++) {
>> - dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
>> - eq->l1_dma[i]);
>> -
>> - for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) {
>> - idx = i * (bt_chk_sz / BA_BYTE_LEN) + j;
>> - if ((i == eq->l0_last_num - 1)
>> - && j == eq->l1_last_num - 1) {
>> - eqe_alloc = (buf_chk_sz / eq->eqe_size)
>> - * idx;
>> - size = (eq->entries - eqe_alloc)
>> - * eq->eqe_size;
>> - dma_free_coherent(dev, size,
>> - eq->buf[idx],
>> - eq->buf_dma[idx]);
>> - break;
>> - }
>> - dma_free_coherent(dev, buf_chk_sz, eq->buf[idx],
>> - eq->buf_dma[idx]);
>> - }
>> - }
>> - }
>> - kfree(eq->buf_dma);
>> - kfree(eq->buf);
>> - kfree(eq->l1_dma);
>> - kfree(eq->bt_l1);
>> - eq->buf_dma = NULL;
>> - eq->buf = NULL;
>> - eq->l1_dma = NULL;
>> - eq->bt_l1 = NULL;
>> -}
>> -
>> -static void hns_roce_v2_free_eq(struct hns_roce_dev *hr_dev,
>> - struct hns_roce_eq *eq)
>> -{
>> - u32 buf_chk_sz;
>> -
>> - buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
>> -
>> - if (hr_dev->caps.eqe_hop_num) {
>> - hns_roce_mhop_free_eq(hr_dev, eq);
>> - return;
>> - }
>> -
>> - dma_free_coherent(hr_dev->dev, buf_chk_sz, eq->buf_list->buf,
>> - eq->buf_list->map);
>> - kfree(eq->buf_list);
>> + if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0)
>> + hns_roce_mtr_cleanup(hr_dev, &eq->mtr);
>> + hns_roce_buf_free(hr_dev, eq->buf.size, &eq->buf);
>> }
>>
>> static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
>> @@ -5705,6 +5574,8 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
>> void *mb_buf)
>> {
>> struct hns_roce_eq_context *eqc;
>> + u64 ba[MTT_MIN_COUNT] = { 0 };
>> + int count;
>>
>> eqc = mb_buf;
>> memset(eqc, 0, sizeof(struct hns_roce_eq_context));
>> @@ -5720,10 +5591,23 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
>> eq->eqe_buf_pg_sz = hr_dev->caps.eqe_buf_pg_sz;
>> eq->shift = ilog2((unsigned int)eq->entries);
>>
>> - if (!eq->hop_num)
>> - eq->eqe_ba = eq->buf_list->map;
>> - else
>> - eq->eqe_ba = eq->l0_dma;
>> + /* if not muti-hop, eqe buffer only use one trunk */
>> + if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) {
>> + eq->eqe_ba = eq->buf.direct.map;
>> + eq->cur_eqe_ba = eq->eqe_ba;
>> + if (eq->buf.npages > 1)
>> + eq->nxt_eqe_ba = eq->eqe_ba + (1 << eq->eqe_buf_pg_sz);
>> + else
>> + eq->nxt_eqe_ba = eq->eqe_ba;
>> + } else {
>> + count = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, ba,
>> + MTT_MIN_COUNT, &eq->eqe_ba);
>> + eq->cur_eqe_ba = ba[0];
>> + if (count > 1)
>> + eq->nxt_eqe_ba = ba[1];
>> + else
>> + eq->nxt_eqe_ba = ba[0];
>> + }
>>
>> /* set eqc state */
>> roce_set_field(eqc->byte_4, HNS_ROCE_EQC_EQ_ST_M, HNS_ROCE_EQC_EQ_ST_S,
>> @@ -5821,220 +5705,97 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
>> HNS_ROCE_EQC_NXT_EQE_BA_H_S, eq->nxt_eqe_ba >> 44);
>> }
>>
>> -static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
>> - struct hns_roce_eq *eq)
>> +static int map_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
>> + u32 page_shift)
>> {
>> - struct device *dev = hr_dev->dev;
>> - int eq_alloc_done = 0;
>> - int eq_buf_cnt = 0;
>> - int eqe_alloc;
>> - u32 buf_chk_sz;
>> - u32 bt_chk_sz;
>> - u32 mhop_num;
>> - u64 size;
>> - u64 idx;
>> + struct hns_roce_buf_region region = {};
>> + dma_addr_t *buf_list = NULL;
>> int ba_num;
>> - int bt_num;
>> - int record_i;
>> - int record_j;
>> - int i = 0;
>> - int j = 0;
>> -
>> - mhop_num = hr_dev->caps.eqe_hop_num;
>> - buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
>> - bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
>> + int ret;
>>
>> ba_num = DIV_ROUND_UP(PAGE_ALIGN(eq->entries * eq->eqe_size),
>> - buf_chk_sz);
>> - bt_num = DIV_ROUND_UP(ba_num, bt_chk_sz / BA_BYTE_LEN);
>> + 1 << page_shift);
>> + hns_roce_init_buf_region(®ion, hr_dev->caps.eqe_hop_num, 0, ba_num);
>>
>> - if (mhop_num == HNS_ROCE_HOP_NUM_0) {
>> - if (eq->entries > buf_chk_sz / eq->eqe_size) {
>> - dev_err(dev, "eq entries %d is larger than buf_pg_sz!",
>> - eq->entries);
>> - return -EINVAL;
>> - }
>> - eq->bt_l0 = dma_alloc_coherent(dev, eq->entries * eq->eqe_size,
>> - &(eq->l0_dma), GFP_KERNEL);
>> - if (!eq->bt_l0)
>> - return -ENOMEM;
>> -
>> - eq->cur_eqe_ba = eq->l0_dma;
>> - eq->nxt_eqe_ba = 0;
>> + /* alloc a tmp list for storing eq buf address */
>> + ret = hns_roce_alloc_buf_list(®ion, &buf_list, 1);
>> + if (ret) {
>> + dev_err(hr_dev->dev, "alloc eq buf_list error\n");
>
> The same comment like we gave for bnxt driver, no dev_* prints inside
> driver, use ibdev_*.
>
> Thanks
>
Hi Leon,
map_eq_buf() is called before ib_register_device(), so we can't use
ibdev_* here.
Thanks for your reminder, another patch that replace other dev_* in
hns driver with ibdev_* is on preparing.
Weihang
> .
>
next prev parent reply other threads:[~2020-02-05 6:04 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-01-26 14:58 [PATCH for-next] RDMA/hns: Optimize eqe buffer allocation flow Weihang Li
2020-01-27 5:52 ` Leon Romanovsky
2020-01-27 7:47 ` Weihang Li
2020-02-05 6:04 ` Weihang Li [this message]
2020-02-10 9:25 ` Leon Romanovsky
2020-02-10 9:48 ` Weihang Li
2020-02-10 10:21 ` Leon Romanovsky
2020-02-10 11:26 ` Weihang Li
2020-02-12 8:18 ` Leon Romanovsky
2020-02-13 2:16 ` Weihang Li
2020-02-11 18:27 ` Jason Gunthorpe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=10b7a08c-e069-0751-8bde-e5d19521c0b2@huawei.com \
--to=liweihang@huawei.com \
--cc=dledford@redhat.com \
--cc=jgg@ziepe.ca \
--cc=leon@kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=linuxarm@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).