From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-8.2 required=3.0 tests=HEADER_FROM_DIFFERENT_DOMAINS, INCLUDES_PATCH,MAILING_LIST_MULTI,SIGNED_OFF_BY,SPF_HELO_NONE,SPF_PASS, URIBL_BLOCKED,USER_AGENT_SANE_1 autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 7FFC6C32771 for ; Mon, 27 Jan 2020 07:48:07 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 43F0721569 for ; Mon, 27 Jan 2020 07:48:07 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1725840AbgA0HsG (ORCPT ); Mon, 27 Jan 2020 02:48:06 -0500 Received: from szxga06-in.huawei.com ([45.249.212.32]:46580 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1725765AbgA0HsG (ORCPT ); Mon, 27 Jan 2020 02:48:06 -0500 Received: from DGGEMS407-HUB.china.huawei.com (unknown [172.30.72.59]) by Forcepoint Email with ESMTP id 5B486E612E01AC969ED2; Mon, 27 Jan 2020 15:48:01 +0800 (CST) Received: from [127.0.0.1] (10.45.210.74) by DGGEMS407-HUB.china.huawei.com (10.3.19.207) with Microsoft SMTP Server id 14.3.439.0; Mon, 27 Jan 2020 15:47:52 +0800 Subject: Re: [PATCH for-next] RDMA/hns: Optimize eqe buffer allocation flow To: Leon Romanovsky CC: , , , References: <20200126145835.11368-1-liweihang@huawei.com> <20200127055205.GH3870@unreal> From: Weihang Li Message-ID: <2a4b95d4-4d76-b969-4954-99138e20d181@huawei.com> Date: Mon, 27 Jan 2020 15:47:40 +0800 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:68.0) Gecko/20100101 Thunderbird/68.4.1 MIME-Version: 1.0 In-Reply-To: <20200127055205.GH3870@unreal> Content-Type: text/plain; charset="utf-8" Content-Language: en-US Content-Transfer-Encoding: 7bit X-Originating-IP: [10.45.210.74] X-CFilter-Loop: Reflected Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org On 2020/1/27 13:52, Leon Romanovsky wrote: > On Sun, Jan 26, 2020 at 10:58:35PM +0800, Weihang Li wrote: >> From: Xi Wang >> >> The eqe has a private multi-hop addressing implementation, but there is >> already a set of interfaces in the hns driver that can achieve this. >> >> So, simplify the eqe buffer allocation process by using the mtr interface >> and remove large amount of repeated logic. >> >> Signed-off-by: Xi Wang >> Signed-off-by: Weihang Li >> --- >> drivers/infiniband/hw/hns/hns_roce_device.h | 10 +- >> drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 481 ++++++---------------------- >> 2 files changed, 108 insertions(+), 383 deletions(-) >> >> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h >> index a4c45bf..dab3f3c 100644 >> --- a/drivers/infiniband/hw/hns/hns_roce_device.h >> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h >> @@ -757,14 +757,8 @@ struct hns_roce_eq { >> int eqe_ba_pg_sz; >> int eqe_buf_pg_sz; >> int hop_num; >> - u64 *bt_l0; /* Base address table for L0 */ >> - u64 **bt_l1; /* Base address table for L1 */ >> - u64 **buf; >> - dma_addr_t l0_dma; >> - dma_addr_t *l1_dma; >> - dma_addr_t *buf_dma; >> - u32 l0_last_num; /* L0 last chunk num */ >> - u32 l1_last_num; /* L1 last chunk num */ >> + struct hns_roce_mtr mtr; >> + struct hns_roce_buf buf; >> int eq_max_cnt; >> int eq_period; >> int shift; >> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c >> index c462b19..88f2e76 100644 >> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c >> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c >> @@ -5287,44 +5287,24 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq) >> hns_roce_write64(hr_dev, doorbell, eq->doorbell); >> } >> >> -static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry) >> +static inline void *get_eqe_buf(struct hns_roce_eq *eq, unsigned long offset) >> { >> u32 buf_chk_sz; >> - unsigned long off; >> >> buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT); >> - off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE; >> - >> - return (struct hns_roce_aeqe *)((char *)(eq->buf_list->buf) + >> - off % buf_chk_sz); >> -} >> - >> -static struct hns_roce_aeqe *mhop_get_aeqe(struct hns_roce_eq *eq, u32 entry) >> -{ >> - u32 buf_chk_sz; >> - unsigned long off; >> - >> - buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT); >> - >> - off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE; >> - >> - if (eq->hop_num == HNS_ROCE_HOP_NUM_0) >> - return (struct hns_roce_aeqe *)((u8 *)(eq->bt_l0) + >> - off % buf_chk_sz); >> + if (eq->buf.nbufs == 1) >> + return eq->buf.direct.buf + offset % buf_chk_sz; >> else >> - return (struct hns_roce_aeqe *)((u8 *) >> - (eq->buf[off / buf_chk_sz]) + off % buf_chk_sz); >> + return eq->buf.page_list[offset / buf_chk_sz].buf + >> + offset % buf_chk_sz; >> } >> >> static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq) >> { >> struct hns_roce_aeqe *aeqe; >> >> - if (!eq->hop_num) >> - aeqe = get_aeqe_v2(eq, eq->cons_index); >> - else >> - aeqe = mhop_get_aeqe(eq, eq->cons_index); >> - >> + aeqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) * >> + HNS_ROCE_AEQ_ENTRY_SIZE); >> return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^ >> !!(eq->cons_index & eq->entries)) ? aeqe : NULL; >> } >> @@ -5417,44 +5397,12 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, >> return aeqe_found; >> } >> >> -static struct hns_roce_ceqe *get_ceqe_v2(struct hns_roce_eq *eq, u32 entry) >> -{ >> - u32 buf_chk_sz; >> - unsigned long off; >> - >> - buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT); >> - off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE; >> - >> - return (struct hns_roce_ceqe *)((char *)(eq->buf_list->buf) + >> - off % buf_chk_sz); >> -} >> - >> -static struct hns_roce_ceqe *mhop_get_ceqe(struct hns_roce_eq *eq, u32 entry) >> -{ >> - u32 buf_chk_sz; >> - unsigned long off; >> - >> - buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT); >> - >> - off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE; >> - >> - if (eq->hop_num == HNS_ROCE_HOP_NUM_0) >> - return (struct hns_roce_ceqe *)((u8 *)(eq->bt_l0) + >> - off % buf_chk_sz); >> - else >> - return (struct hns_roce_ceqe *)((u8 *)(eq->buf[off / >> - buf_chk_sz]) + off % buf_chk_sz); >> -} >> - >> static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq) >> { >> struct hns_roce_ceqe *ceqe; >> >> - if (!eq->hop_num) >> - ceqe = get_ceqe_v2(eq, eq->cons_index); >> - else >> - ceqe = mhop_get_ceqe(eq, eq->cons_index); >> - >> + ceqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) * >> + HNS_ROCE_CEQ_ENTRY_SIZE); >> return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^ >> (!!(eq->cons_index & eq->entries)) ? ceqe : NULL; >> } >> @@ -5614,90 +5562,11 @@ static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, int eqn) >> dev_err(dev, "[mailbox cmd] destroy eqc(%d) failed.\n", eqn); >> } >> >> -static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev, >> - struct hns_roce_eq *eq) >> +static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) >> { >> - struct device *dev = hr_dev->dev; >> - u64 idx; >> - u64 size; >> - u32 buf_chk_sz; >> - u32 bt_chk_sz; >> - u32 mhop_num; >> - int eqe_alloc; >> - int i = 0; >> - int j = 0; >> - >> - mhop_num = hr_dev->caps.eqe_hop_num; >> - buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT); >> - bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT); >> - >> - if (mhop_num == HNS_ROCE_HOP_NUM_0) { >> - dma_free_coherent(dev, (unsigned int)(eq->entries * >> - eq->eqe_size), eq->bt_l0, eq->l0_dma); >> - return; >> - } >> - >> - dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma); >> - if (mhop_num == 1) { >> - for (i = 0; i < eq->l0_last_num; i++) { >> - if (i == eq->l0_last_num - 1) { >> - eqe_alloc = i * (buf_chk_sz / eq->eqe_size); >> - size = (eq->entries - eqe_alloc) * eq->eqe_size; >> - dma_free_coherent(dev, size, eq->buf[i], >> - eq->buf_dma[i]); >> - break; >> - } >> - dma_free_coherent(dev, buf_chk_sz, eq->buf[i], >> - eq->buf_dma[i]); >> - } >> - } else if (mhop_num == 2) { >> - for (i = 0; i < eq->l0_last_num; i++) { >> - dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i], >> - eq->l1_dma[i]); >> - >> - for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) { >> - idx = i * (bt_chk_sz / BA_BYTE_LEN) + j; >> - if ((i == eq->l0_last_num - 1) >> - && j == eq->l1_last_num - 1) { >> - eqe_alloc = (buf_chk_sz / eq->eqe_size) >> - * idx; >> - size = (eq->entries - eqe_alloc) >> - * eq->eqe_size; >> - dma_free_coherent(dev, size, >> - eq->buf[idx], >> - eq->buf_dma[idx]); >> - break; >> - } >> - dma_free_coherent(dev, buf_chk_sz, eq->buf[idx], >> - eq->buf_dma[idx]); >> - } >> - } >> - } >> - kfree(eq->buf_dma); >> - kfree(eq->buf); >> - kfree(eq->l1_dma); >> - kfree(eq->bt_l1); >> - eq->buf_dma = NULL; >> - eq->buf = NULL; >> - eq->l1_dma = NULL; >> - eq->bt_l1 = NULL; >> -} >> - >> -static void hns_roce_v2_free_eq(struct hns_roce_dev *hr_dev, >> - struct hns_roce_eq *eq) >> -{ >> - u32 buf_chk_sz; >> - >> - buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT); >> - >> - if (hr_dev->caps.eqe_hop_num) { >> - hns_roce_mhop_free_eq(hr_dev, eq); >> - return; >> - } >> - >> - dma_free_coherent(hr_dev->dev, buf_chk_sz, eq->buf_list->buf, >> - eq->buf_list->map); >> - kfree(eq->buf_list); >> + if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) >> + hns_roce_mtr_cleanup(hr_dev, &eq->mtr); >> + hns_roce_buf_free(hr_dev, eq->buf.size, &eq->buf); >> } >> >> static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, >> @@ -5705,6 +5574,8 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, >> void *mb_buf) >> { >> struct hns_roce_eq_context *eqc; >> + u64 ba[MTT_MIN_COUNT] = { 0 }; >> + int count; >> >> eqc = mb_buf; >> memset(eqc, 0, sizeof(struct hns_roce_eq_context)); >> @@ -5720,10 +5591,23 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, >> eq->eqe_buf_pg_sz = hr_dev->caps.eqe_buf_pg_sz; >> eq->shift = ilog2((unsigned int)eq->entries); >> >> - if (!eq->hop_num) >> - eq->eqe_ba = eq->buf_list->map; >> - else >> - eq->eqe_ba = eq->l0_dma; >> + /* if not muti-hop, eqe buffer only use one trunk */ >> + if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) { >> + eq->eqe_ba = eq->buf.direct.map; >> + eq->cur_eqe_ba = eq->eqe_ba; >> + if (eq->buf.npages > 1) >> + eq->nxt_eqe_ba = eq->eqe_ba + (1 << eq->eqe_buf_pg_sz); >> + else >> + eq->nxt_eqe_ba = eq->eqe_ba; >> + } else { >> + count = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, ba, >> + MTT_MIN_COUNT, &eq->eqe_ba); >> + eq->cur_eqe_ba = ba[0]; >> + if (count > 1) >> + eq->nxt_eqe_ba = ba[1]; >> + else >> + eq->nxt_eqe_ba = ba[0]; >> + } >> >> /* set eqc state */ >> roce_set_field(eqc->byte_4, HNS_ROCE_EQC_EQ_ST_M, HNS_ROCE_EQC_EQ_ST_S, >> @@ -5821,220 +5705,97 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, >> HNS_ROCE_EQC_NXT_EQE_BA_H_S, eq->nxt_eqe_ba >> 44); >> } >> >> -static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev, >> - struct hns_roce_eq *eq) >> +static int map_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, >> + u32 page_shift) >> { >> - struct device *dev = hr_dev->dev; >> - int eq_alloc_done = 0; >> - int eq_buf_cnt = 0; >> - int eqe_alloc; >> - u32 buf_chk_sz; >> - u32 bt_chk_sz; >> - u32 mhop_num; >> - u64 size; >> - u64 idx; >> + struct hns_roce_buf_region region = {}; >> + dma_addr_t *buf_list = NULL; >> int ba_num; >> - int bt_num; >> - int record_i; >> - int record_j; >> - int i = 0; >> - int j = 0; >> - >> - mhop_num = hr_dev->caps.eqe_hop_num; >> - buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT); >> - bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT); >> + int ret; >> >> ba_num = DIV_ROUND_UP(PAGE_ALIGN(eq->entries * eq->eqe_size), >> - buf_chk_sz); >> - bt_num = DIV_ROUND_UP(ba_num, bt_chk_sz / BA_BYTE_LEN); >> + 1 << page_shift); >> + hns_roce_init_buf_region(®ion, hr_dev->caps.eqe_hop_num, 0, ba_num); >> >> - if (mhop_num == HNS_ROCE_HOP_NUM_0) { >> - if (eq->entries > buf_chk_sz / eq->eqe_size) { >> - dev_err(dev, "eq entries %d is larger than buf_pg_sz!", >> - eq->entries); >> - return -EINVAL; >> - } >> - eq->bt_l0 = dma_alloc_coherent(dev, eq->entries * eq->eqe_size, >> - &(eq->l0_dma), GFP_KERNEL); >> - if (!eq->bt_l0) >> - return -ENOMEM; >> - >> - eq->cur_eqe_ba = eq->l0_dma; >> - eq->nxt_eqe_ba = 0; >> + /* alloc a tmp list for storing eq buf address */ >> + ret = hns_roce_alloc_buf_list(®ion, &buf_list, 1); >> + if (ret) { >> + dev_err(hr_dev->dev, "alloc eq buf_list error\n"); > > The same comment like we gave for bnxt driver, no dev_* prints inside > driver, use ibdev_*. > > Thanks > > . OK, thank you. Weihang >