Linux-RDMA Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH for-next] RDMA/hns: Optimize eqe buffer allocation flow
@ 2020-01-26 14:58 Weihang Li
  2020-01-27  5:52 ` Leon Romanovsky
  2020-02-11 18:27 ` Jason Gunthorpe
  0 siblings, 2 replies; 11+ messages in thread
From: Weihang Li @ 2020-01-26 14:58 UTC (permalink / raw)
  To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm

From: Xi Wang <wangxi11@huawei.com>

The eqe has a private multi-hop addressing implementation, but there is
already a set of interfaces in the hns driver that can achieve this.

So, simplify the eqe buffer allocation process by using the mtr interface
and remove large amount of repeated logic.

Signed-off-by: Xi Wang <wangxi11@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_device.h |  10 +-
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 481 ++++++----------------------
 2 files changed, 108 insertions(+), 383 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index a4c45bf..dab3f3c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -757,14 +757,8 @@ struct hns_roce_eq {
 	int				eqe_ba_pg_sz;
 	int				eqe_buf_pg_sz;
 	int				hop_num;
-	u64				*bt_l0;	/* Base address table for L0 */
-	u64				**bt_l1; /* Base address table for L1 */
-	u64				**buf;
-	dma_addr_t			l0_dma;
-	dma_addr_t			*l1_dma;
-	dma_addr_t			*buf_dma;
-	u32				l0_last_num; /* L0 last chunk num */
-	u32				l1_last_num; /* L1 last chunk num */
+	struct hns_roce_mtr		mtr;
+	struct hns_roce_buf		buf;
 	int				eq_max_cnt;
 	int				eq_period;
 	int				shift;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index c462b19..88f2e76 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -5287,44 +5287,24 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
 	hns_roce_write64(hr_dev, doorbell, eq->doorbell);
 }
 
-static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry)
+static inline void *get_eqe_buf(struct hns_roce_eq *eq, unsigned long offset)
 {
 	u32 buf_chk_sz;
-	unsigned long off;
 
 	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
-	off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE;
-
-	return (struct hns_roce_aeqe *)((char *)(eq->buf_list->buf) +
-		off % buf_chk_sz);
-}
-
-static struct hns_roce_aeqe *mhop_get_aeqe(struct hns_roce_eq *eq, u32 entry)
-{
-	u32 buf_chk_sz;
-	unsigned long off;
-
-	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
-
-	off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE;
-
-	if (eq->hop_num == HNS_ROCE_HOP_NUM_0)
-		return (struct hns_roce_aeqe *)((u8 *)(eq->bt_l0) +
-			off % buf_chk_sz);
+	if (eq->buf.nbufs == 1)
+		return eq->buf.direct.buf + offset % buf_chk_sz;
 	else
-		return (struct hns_roce_aeqe *)((u8 *)
-			(eq->buf[off / buf_chk_sz]) + off % buf_chk_sz);
+		return eq->buf.page_list[offset / buf_chk_sz].buf +
+		       offset % buf_chk_sz;
 }
 
 static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
 {
 	struct hns_roce_aeqe *aeqe;
 
-	if (!eq->hop_num)
-		aeqe = get_aeqe_v2(eq, eq->cons_index);
-	else
-		aeqe = mhop_get_aeqe(eq, eq->cons_index);
-
+	aeqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) *
+			   HNS_ROCE_AEQ_ENTRY_SIZE);
 	return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^
 		!!(eq->cons_index & eq->entries)) ? aeqe : NULL;
 }
@@ -5417,44 +5397,12 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
 	return aeqe_found;
 }
 
-static struct hns_roce_ceqe *get_ceqe_v2(struct hns_roce_eq *eq, u32 entry)
-{
-	u32 buf_chk_sz;
-	unsigned long off;
-
-	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
-	off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE;
-
-	return (struct hns_roce_ceqe *)((char *)(eq->buf_list->buf) +
-		off % buf_chk_sz);
-}
-
-static struct hns_roce_ceqe *mhop_get_ceqe(struct hns_roce_eq *eq, u32 entry)
-{
-	u32 buf_chk_sz;
-	unsigned long off;
-
-	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
-
-	off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE;
-
-	if (eq->hop_num == HNS_ROCE_HOP_NUM_0)
-		return (struct hns_roce_ceqe *)((u8 *)(eq->bt_l0) +
-			off % buf_chk_sz);
-	else
-		return (struct hns_roce_ceqe *)((u8 *)(eq->buf[off /
-			buf_chk_sz]) + off % buf_chk_sz);
-}
-
 static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
 {
 	struct hns_roce_ceqe *ceqe;
 
-	if (!eq->hop_num)
-		ceqe = get_ceqe_v2(eq, eq->cons_index);
-	else
-		ceqe = mhop_get_ceqe(eq, eq->cons_index);
-
+	ceqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) *
+			   HNS_ROCE_CEQ_ENTRY_SIZE);
 	return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^
 		(!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
 }
@@ -5614,90 +5562,11 @@ static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, int eqn)
 		dev_err(dev, "[mailbox cmd] destroy eqc(%d) failed.\n", eqn);
 }
 
-static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev,
-				  struct hns_roce_eq *eq)
+static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
 {
-	struct device *dev = hr_dev->dev;
-	u64 idx;
-	u64 size;
-	u32 buf_chk_sz;
-	u32 bt_chk_sz;
-	u32 mhop_num;
-	int eqe_alloc;
-	int i = 0;
-	int j = 0;
-
-	mhop_num = hr_dev->caps.eqe_hop_num;
-	buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
-	bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
-
-	if (mhop_num == HNS_ROCE_HOP_NUM_0) {
-		dma_free_coherent(dev, (unsigned int)(eq->entries *
-				  eq->eqe_size), eq->bt_l0, eq->l0_dma);
-		return;
-	}
-
-	dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma);
-	if (mhop_num == 1) {
-		for (i = 0; i < eq->l0_last_num; i++) {
-			if (i == eq->l0_last_num - 1) {
-				eqe_alloc = i * (buf_chk_sz / eq->eqe_size);
-				size = (eq->entries - eqe_alloc) * eq->eqe_size;
-				dma_free_coherent(dev, size, eq->buf[i],
-						  eq->buf_dma[i]);
-				break;
-			}
-			dma_free_coherent(dev, buf_chk_sz, eq->buf[i],
-					  eq->buf_dma[i]);
-		}
-	} else if (mhop_num == 2) {
-		for (i = 0; i < eq->l0_last_num; i++) {
-			dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
-					  eq->l1_dma[i]);
-
-			for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) {
-				idx = i * (bt_chk_sz / BA_BYTE_LEN) + j;
-				if ((i == eq->l0_last_num - 1)
-				     && j == eq->l1_last_num - 1) {
-					eqe_alloc = (buf_chk_sz / eq->eqe_size)
-						    * idx;
-					size = (eq->entries - eqe_alloc)
-						* eq->eqe_size;
-					dma_free_coherent(dev, size,
-							  eq->buf[idx],
-							  eq->buf_dma[idx]);
-					break;
-				}
-				dma_free_coherent(dev, buf_chk_sz, eq->buf[idx],
-						  eq->buf_dma[idx]);
-			}
-		}
-	}
-	kfree(eq->buf_dma);
-	kfree(eq->buf);
-	kfree(eq->l1_dma);
-	kfree(eq->bt_l1);
-	eq->buf_dma = NULL;
-	eq->buf = NULL;
-	eq->l1_dma = NULL;
-	eq->bt_l1 = NULL;
-}
-
-static void hns_roce_v2_free_eq(struct hns_roce_dev *hr_dev,
-				struct hns_roce_eq *eq)
-{
-	u32 buf_chk_sz;
-
-	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
-
-	if (hr_dev->caps.eqe_hop_num) {
-		hns_roce_mhop_free_eq(hr_dev, eq);
-		return;
-	}
-
-	dma_free_coherent(hr_dev->dev, buf_chk_sz, eq->buf_list->buf,
-			  eq->buf_list->map);
-	kfree(eq->buf_list);
+	if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0)
+		hns_roce_mtr_cleanup(hr_dev, &eq->mtr);
+	hns_roce_buf_free(hr_dev, eq->buf.size, &eq->buf);
 }
 
 static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
@@ -5705,6 +5574,8 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
 				void *mb_buf)
 {
 	struct hns_roce_eq_context *eqc;
+	u64 ba[MTT_MIN_COUNT] = { 0 };
+	int count;
 
 	eqc = mb_buf;
 	memset(eqc, 0, sizeof(struct hns_roce_eq_context));
@@ -5720,10 +5591,23 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
 	eq->eqe_buf_pg_sz = hr_dev->caps.eqe_buf_pg_sz;
 	eq->shift = ilog2((unsigned int)eq->entries);
 
-	if (!eq->hop_num)
-		eq->eqe_ba = eq->buf_list->map;
-	else
-		eq->eqe_ba = eq->l0_dma;
+	/* if not muti-hop, eqe buffer only use one trunk */
+	if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) {
+		eq->eqe_ba = eq->buf.direct.map;
+		eq->cur_eqe_ba = eq->eqe_ba;
+		if (eq->buf.npages > 1)
+			eq->nxt_eqe_ba = eq->eqe_ba + (1 << eq->eqe_buf_pg_sz);
+		else
+			eq->nxt_eqe_ba = eq->eqe_ba;
+	} else {
+		count = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, ba,
+					  MTT_MIN_COUNT, &eq->eqe_ba);
+		eq->cur_eqe_ba = ba[0];
+		if (count > 1)
+			eq->nxt_eqe_ba = ba[1];
+		else
+			eq->nxt_eqe_ba = ba[0];
+	}
 
 	/* set eqc state */
 	roce_set_field(eqc->byte_4, HNS_ROCE_EQC_EQ_ST_M, HNS_ROCE_EQC_EQ_ST_S,
@@ -5821,220 +5705,97 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
 		       HNS_ROCE_EQC_NXT_EQE_BA_H_S, eq->nxt_eqe_ba >> 44);
 }
 
-static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
-				  struct hns_roce_eq *eq)
+static int map_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
+		      u32 page_shift)
 {
-	struct device *dev = hr_dev->dev;
-	int eq_alloc_done = 0;
-	int eq_buf_cnt = 0;
-	int eqe_alloc;
-	u32 buf_chk_sz;
-	u32 bt_chk_sz;
-	u32 mhop_num;
-	u64 size;
-	u64 idx;
+	struct hns_roce_buf_region region = {};
+	dma_addr_t *buf_list = NULL;
 	int ba_num;
-	int bt_num;
-	int record_i;
-	int record_j;
-	int i = 0;
-	int j = 0;
-
-	mhop_num = hr_dev->caps.eqe_hop_num;
-	buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
-	bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
+	int ret;
 
 	ba_num = DIV_ROUND_UP(PAGE_ALIGN(eq->entries * eq->eqe_size),
-			      buf_chk_sz);
-	bt_num = DIV_ROUND_UP(ba_num, bt_chk_sz / BA_BYTE_LEN);
+			      1 << page_shift);
+	hns_roce_init_buf_region(&region, hr_dev->caps.eqe_hop_num, 0, ba_num);
 
-	if (mhop_num == HNS_ROCE_HOP_NUM_0) {
-		if (eq->entries > buf_chk_sz / eq->eqe_size) {
-			dev_err(dev, "eq entries %d is larger than buf_pg_sz!",
-				eq->entries);
-			return -EINVAL;
-		}
-		eq->bt_l0 = dma_alloc_coherent(dev, eq->entries * eq->eqe_size,
-					       &(eq->l0_dma), GFP_KERNEL);
-		if (!eq->bt_l0)
-			return -ENOMEM;
-
-		eq->cur_eqe_ba = eq->l0_dma;
-		eq->nxt_eqe_ba = 0;
+	/* alloc a tmp list for storing eq buf address */
+	ret = hns_roce_alloc_buf_list(&region, &buf_list, 1);
+	if (ret) {
+		dev_err(hr_dev->dev, "alloc eq buf_list error\n");
+		return ret;
+	}
 
-		return 0;
+	ba_num = hns_roce_get_kmem_bufs(hr_dev, buf_list, region.count,
+					region.offset, &eq->buf);
+	if (ba_num != region.count) {
+		dev_err(hr_dev->dev, "get eqe buf err,expect %d,ret %d.\n",
+			region.count, ba_num);
+		ret = -ENOBUFS;
+		goto done;
 	}
 
-	eq->buf_dma = kcalloc(ba_num, sizeof(*eq->buf_dma), GFP_KERNEL);
-	if (!eq->buf_dma)
-		return -ENOMEM;
-	eq->buf = kcalloc(ba_num, sizeof(*eq->buf), GFP_KERNEL);
-	if (!eq->buf)
-		goto err_kcalloc_buf;
-
-	if (mhop_num == 2) {
-		eq->l1_dma = kcalloc(bt_num, sizeof(*eq->l1_dma), GFP_KERNEL);
-		if (!eq->l1_dma)
-			goto err_kcalloc_l1_dma;
-
-		eq->bt_l1 = kcalloc(bt_num, sizeof(*eq->bt_l1), GFP_KERNEL);
-		if (!eq->bt_l1)
-			goto err_kcalloc_bt_l1;
-	}
-
-	/* alloc L0 BT */
-	eq->bt_l0 = dma_alloc_coherent(dev, bt_chk_sz, &eq->l0_dma, GFP_KERNEL);
-	if (!eq->bt_l0)
-		goto err_dma_alloc_l0;
-
-	if (mhop_num == 1) {
-		if (ba_num > (bt_chk_sz / BA_BYTE_LEN))
-			dev_err(dev, "ba_num %d is too large for 1 hop\n",
-				ba_num);
-
-		/* alloc buf */
-		for (i = 0; i < bt_chk_sz / BA_BYTE_LEN; i++) {
-			if (eq_buf_cnt + 1 < ba_num) {
-				size = buf_chk_sz;
-			} else {
-				eqe_alloc = i * (buf_chk_sz / eq->eqe_size);
-				size = (eq->entries - eqe_alloc) * eq->eqe_size;
-			}
-			eq->buf[i] = dma_alloc_coherent(dev, size,
-							&(eq->buf_dma[i]),
-							GFP_KERNEL);
-			if (!eq->buf[i])
-				goto err_dma_alloc_buf;
+	hns_roce_mtr_init(&eq->mtr, PAGE_SHIFT + hr_dev->caps.eqe_ba_pg_sz,
+			  page_shift);
+	ret = hns_roce_mtr_attach(hr_dev, &eq->mtr, &buf_list, &region, 1);
+	if (ret)
+		dev_err(hr_dev->dev, "mtr attatch error for eqe\n");
 
-			*(eq->bt_l0 + i) = eq->buf_dma[i];
+	goto done;
 
-			eq_buf_cnt++;
-			if (eq_buf_cnt >= ba_num)
-				break;
-		}
-		eq->cur_eqe_ba = eq->buf_dma[0];
-		if (ba_num > 1)
-			eq->nxt_eqe_ba = eq->buf_dma[1];
-
-	} else if (mhop_num == 2) {
-		/* alloc L1 BT and buf */
-		for (i = 0; i < bt_chk_sz / BA_BYTE_LEN; i++) {
-			eq->bt_l1[i] = dma_alloc_coherent(dev, bt_chk_sz,
-							  &(eq->l1_dma[i]),
-							  GFP_KERNEL);
-			if (!eq->bt_l1[i])
-				goto err_dma_alloc_l1;
-			*(eq->bt_l0 + i) = eq->l1_dma[i];
-
-			for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) {
-				idx = i * bt_chk_sz / BA_BYTE_LEN + j;
-				if (eq_buf_cnt + 1 < ba_num) {
-					size = buf_chk_sz;
-				} else {
-					eqe_alloc = (buf_chk_sz / eq->eqe_size)
-						    * idx;
-					size = (eq->entries - eqe_alloc)
-						* eq->eqe_size;
-				}
-				eq->buf[idx] = dma_alloc_coherent(dev, size,
-								  &(eq->buf_dma[idx]),
-								  GFP_KERNEL);
-				if (!eq->buf[idx])
-					goto err_dma_alloc_buf;
-
-				*(eq->bt_l1[i] + j) = eq->buf_dma[idx];
-
-				eq_buf_cnt++;
-				if (eq_buf_cnt >= ba_num) {
-					eq_alloc_done = 1;
-					break;
-				}
-			}
+	hns_roce_mtr_cleanup(hr_dev, &eq->mtr);
+done:
+	hns_roce_free_buf_list(&buf_list, 1);
 
-			if (eq_alloc_done)
-				break;
-		}
-		eq->cur_eqe_ba = eq->buf_dma[0];
-		if (ba_num > 1)
-			eq->nxt_eqe_ba = eq->buf_dma[1];
-	}
+	return ret;
+}
 
-	eq->l0_last_num = i + 1;
-	if (mhop_num == 2)
-		eq->l1_last_num = j + 1;
+static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
+{
+	struct hns_roce_buf *buf = &eq->buf;
+	bool is_mhop = false;
+	u32 page_shift;
+	u32 mhop_num;
+	u32 max_size;
+	int ret;
 
-	return 0;
+	page_shift = PAGE_SHIFT + hr_dev->caps.eqe_buf_pg_sz;
+	mhop_num = hr_dev->caps.eqe_hop_num;
+	if (!mhop_num) {
+		max_size = 1 << page_shift;
+		buf->size = max_size;
+	} else if (mhop_num == HNS_ROCE_HOP_NUM_0) {
+		max_size = eq->entries * eq->eqe_size;
+		buf->size = max_size;
+	} else {
+		max_size = 1 << page_shift;
+		buf->size = PAGE_ALIGN(eq->entries * eq->eqe_size);
+		is_mhop = true;
+	}
 
-err_dma_alloc_l1:
-	dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma);
-	eq->bt_l0 = NULL;
-	eq->l0_dma = 0;
-	for (i -= 1; i >= 0; i--) {
-		dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
-				  eq->l1_dma[i]);
-
-		for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) {
-			idx = i * bt_chk_sz / BA_BYTE_LEN + j;
-			dma_free_coherent(dev, buf_chk_sz, eq->buf[idx],
-					  eq->buf_dma[idx]);
-		}
+	ret = hns_roce_buf_alloc(hr_dev, buf->size, max_size, buf, page_shift);
+	if (ret) {
+		dev_err(hr_dev->dev, "alloc eq buf error\n");
+		return ret;
 	}
-	goto err_dma_alloc_l0;
-
-err_dma_alloc_buf:
-	dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma);
-	eq->bt_l0 = NULL;
-	eq->l0_dma = 0;
-
-	if (mhop_num == 1)
-		for (i -= 1; i >= 0; i--)
-			dma_free_coherent(dev, buf_chk_sz, eq->buf[i],
-					  eq->buf_dma[i]);
-	else if (mhop_num == 2) {
-		record_i = i;
-		record_j = j;
-		for (; i >= 0; i--) {
-			dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
-					  eq->l1_dma[i]);
-
-			for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) {
-				if (i == record_i && j >= record_j)
-					break;
-
-				idx = i * bt_chk_sz / BA_BYTE_LEN + j;
-				dma_free_coherent(dev, buf_chk_sz,
-						  eq->buf[idx],
-						  eq->buf_dma[idx]);
-			}
+
+	if (is_mhop) {
+		ret = map_eq_buf(hr_dev, eq, page_shift);
+		if (ret) {
+			dev_err(hr_dev->dev, "map roce buf error\n");
+			goto err_alloc;
 		}
 	}
 
-err_dma_alloc_l0:
-	kfree(eq->bt_l1);
-	eq->bt_l1 = NULL;
-
-err_kcalloc_bt_l1:
-	kfree(eq->l1_dma);
-	eq->l1_dma = NULL;
-
-err_kcalloc_l1_dma:
-	kfree(eq->buf);
-	eq->buf = NULL;
-
-err_kcalloc_buf:
-	kfree(eq->buf_dma);
-	eq->buf_dma = NULL;
-
-	return -ENOMEM;
+	return 0;
+err_alloc:
+	hns_roce_buf_free(hr_dev, buf->size, buf);
+	return ret;
 }
 
 static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
 				 struct hns_roce_eq *eq,
 				 unsigned int eq_cmd)
 {
-	struct device *dev = hr_dev->dev;
 	struct hns_roce_cmd_mailbox *mailbox;
-	u32 buf_chk_sz = 0;
 	int ret;
 
 	/* Allocate mailbox memory */
@@ -6042,38 +5803,17 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
 	if (IS_ERR(mailbox))
 		return PTR_ERR(mailbox);
 
-	if (!hr_dev->caps.eqe_hop_num) {
-		buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
-
-		eq->buf_list = kzalloc(sizeof(struct hns_roce_buf_list),
-				       GFP_KERNEL);
-		if (!eq->buf_list) {
-			ret = -ENOMEM;
-			goto free_cmd_mbox;
-		}
-
-		eq->buf_list->buf = dma_alloc_coherent(dev, buf_chk_sz,
-						       &(eq->buf_list->map),
-						       GFP_KERNEL);
-		if (!eq->buf_list->buf) {
-			ret = -ENOMEM;
-			goto err_alloc_buf;
-		}
-
-	} else {
-		ret = hns_roce_mhop_alloc_eq(hr_dev, eq);
-		if (ret) {
-			ret = -ENOMEM;
-			goto free_cmd_mbox;
-		}
+	ret = alloc_eq_buf(hr_dev, eq);
+	if (ret) {
+		ret = -ENOMEM;
+		goto free_cmd_mbox;
 	}
-
 	hns_roce_config_eqc(hr_dev, eq, mailbox->buf);
 
 	ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, eq->eqn, 0,
 				eq_cmd, HNS_ROCE_CMD_TIMEOUT_MSECS);
 	if (ret) {
-		dev_err(dev, "[mailbox cmd] create eqc failed.\n");
+		dev_err(hr_dev->dev, "[mailbox cmd] create eqc failed.\n");
 		goto err_cmd_mbox;
 	}
 
@@ -6082,16 +5822,7 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
 	return 0;
 
 err_cmd_mbox:
-	if (!hr_dev->caps.eqe_hop_num)
-		dma_free_coherent(dev, buf_chk_sz, eq->buf_list->buf,
-				  eq->buf_list->map);
-	else {
-		hns_roce_mhop_free_eq(hr_dev, eq);
-		goto free_cmd_mbox;
-	}
-
-err_alloc_buf:
-	kfree(eq->buf_list);
+	free_eq_buf(hr_dev, eq);
 
 free_cmd_mbox:
 	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
@@ -6271,7 +6002,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
 
 err_create_eq_fail:
 	for (i -= 1; i >= 0; i--)
-		hns_roce_v2_free_eq(hr_dev, &eq_table->eq[i]);
+		free_eq_buf(hr_dev, &eq_table->eq[i]);
 	kfree(eq_table->eq);
 
 	return ret;
@@ -6293,7 +6024,7 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
 	for (i = 0; i < eq_num; i++) {
 		hns_roce_v2_destroy_eqc(hr_dev, i);
 
-		hns_roce_v2_free_eq(hr_dev, &eq_table->eq[i]);
+		free_eq_buf(hr_dev, &eq_table->eq[i]);
 	}
 
 	kfree(eq_table->eq);
-- 
2.8.1



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH for-next] RDMA/hns: Optimize eqe buffer allocation flow
  2020-01-26 14:58 [PATCH for-next] RDMA/hns: Optimize eqe buffer allocation flow Weihang Li
@ 2020-01-27  5:52 ` Leon Romanovsky
  2020-01-27  7:47   ` Weihang Li
  2020-02-05  6:04   ` Weihang Li
  2020-02-11 18:27 ` Jason Gunthorpe
  1 sibling, 2 replies; 11+ messages in thread
From: Leon Romanovsky @ 2020-01-27  5:52 UTC (permalink / raw)
  To: Weihang Li; +Cc: dledford, jgg, linux-rdma, linuxarm

On Sun, Jan 26, 2020 at 10:58:35PM +0800, Weihang Li wrote:
> From: Xi Wang <wangxi11@huawei.com>
>
> The eqe has a private multi-hop addressing implementation, but there is
> already a set of interfaces in the hns driver that can achieve this.
>
> So, simplify the eqe buffer allocation process by using the mtr interface
> and remove large amount of repeated logic.
>
> Signed-off-by: Xi Wang <wangxi11@huawei.com>
> Signed-off-by: Weihang Li <liweihang@huawei.com>
> ---
>  drivers/infiniband/hw/hns/hns_roce_device.h |  10 +-
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 481 ++++++----------------------
>  2 files changed, 108 insertions(+), 383 deletions(-)
>
> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
> index a4c45bf..dab3f3c 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_device.h
> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h
> @@ -757,14 +757,8 @@ struct hns_roce_eq {
>  	int				eqe_ba_pg_sz;
>  	int				eqe_buf_pg_sz;
>  	int				hop_num;
> -	u64				*bt_l0;	/* Base address table for L0 */
> -	u64				**bt_l1; /* Base address table for L1 */
> -	u64				**buf;
> -	dma_addr_t			l0_dma;
> -	dma_addr_t			*l1_dma;
> -	dma_addr_t			*buf_dma;
> -	u32				l0_last_num; /* L0 last chunk num */
> -	u32				l1_last_num; /* L1 last chunk num */
> +	struct hns_roce_mtr		mtr;
> +	struct hns_roce_buf		buf;
>  	int				eq_max_cnt;
>  	int				eq_period;
>  	int				shift;
> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> index c462b19..88f2e76 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> @@ -5287,44 +5287,24 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
>  	hns_roce_write64(hr_dev, doorbell, eq->doorbell);
>  }
>
> -static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry)
> +static inline void *get_eqe_buf(struct hns_roce_eq *eq, unsigned long offset)
>  {
>  	u32 buf_chk_sz;
> -	unsigned long off;
>
>  	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
> -	off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE;
> -
> -	return (struct hns_roce_aeqe *)((char *)(eq->buf_list->buf) +
> -		off % buf_chk_sz);
> -}
> -
> -static struct hns_roce_aeqe *mhop_get_aeqe(struct hns_roce_eq *eq, u32 entry)
> -{
> -	u32 buf_chk_sz;
> -	unsigned long off;
> -
> -	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
> -
> -	off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE;
> -
> -	if (eq->hop_num == HNS_ROCE_HOP_NUM_0)
> -		return (struct hns_roce_aeqe *)((u8 *)(eq->bt_l0) +
> -			off % buf_chk_sz);
> +	if (eq->buf.nbufs == 1)
> +		return eq->buf.direct.buf + offset % buf_chk_sz;
>  	else
> -		return (struct hns_roce_aeqe *)((u8 *)
> -			(eq->buf[off / buf_chk_sz]) + off % buf_chk_sz);
> +		return eq->buf.page_list[offset / buf_chk_sz].buf +
> +		       offset % buf_chk_sz;
>  }
>
>  static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
>  {
>  	struct hns_roce_aeqe *aeqe;
>
> -	if (!eq->hop_num)
> -		aeqe = get_aeqe_v2(eq, eq->cons_index);
> -	else
> -		aeqe = mhop_get_aeqe(eq, eq->cons_index);
> -
> +	aeqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) *
> +			   HNS_ROCE_AEQ_ENTRY_SIZE);
>  	return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^
>  		!!(eq->cons_index & eq->entries)) ? aeqe : NULL;
>  }
> @@ -5417,44 +5397,12 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
>  	return aeqe_found;
>  }
>
> -static struct hns_roce_ceqe *get_ceqe_v2(struct hns_roce_eq *eq, u32 entry)
> -{
> -	u32 buf_chk_sz;
> -	unsigned long off;
> -
> -	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
> -	off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE;
> -
> -	return (struct hns_roce_ceqe *)((char *)(eq->buf_list->buf) +
> -		off % buf_chk_sz);
> -}
> -
> -static struct hns_roce_ceqe *mhop_get_ceqe(struct hns_roce_eq *eq, u32 entry)
> -{
> -	u32 buf_chk_sz;
> -	unsigned long off;
> -
> -	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
> -
> -	off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE;
> -
> -	if (eq->hop_num == HNS_ROCE_HOP_NUM_0)
> -		return (struct hns_roce_ceqe *)((u8 *)(eq->bt_l0) +
> -			off % buf_chk_sz);
> -	else
> -		return (struct hns_roce_ceqe *)((u8 *)(eq->buf[off /
> -			buf_chk_sz]) + off % buf_chk_sz);
> -}
> -
>  static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
>  {
>  	struct hns_roce_ceqe *ceqe;
>
> -	if (!eq->hop_num)
> -		ceqe = get_ceqe_v2(eq, eq->cons_index);
> -	else
> -		ceqe = mhop_get_ceqe(eq, eq->cons_index);
> -
> +	ceqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) *
> +			   HNS_ROCE_CEQ_ENTRY_SIZE);
>  	return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^
>  		(!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
>  }
> @@ -5614,90 +5562,11 @@ static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, int eqn)
>  		dev_err(dev, "[mailbox cmd] destroy eqc(%d) failed.\n", eqn);
>  }
>
> -static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev,
> -				  struct hns_roce_eq *eq)
> +static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
>  {
> -	struct device *dev = hr_dev->dev;
> -	u64 idx;
> -	u64 size;
> -	u32 buf_chk_sz;
> -	u32 bt_chk_sz;
> -	u32 mhop_num;
> -	int eqe_alloc;
> -	int i = 0;
> -	int j = 0;
> -
> -	mhop_num = hr_dev->caps.eqe_hop_num;
> -	buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
> -	bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
> -
> -	if (mhop_num == HNS_ROCE_HOP_NUM_0) {
> -		dma_free_coherent(dev, (unsigned int)(eq->entries *
> -				  eq->eqe_size), eq->bt_l0, eq->l0_dma);
> -		return;
> -	}
> -
> -	dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma);
> -	if (mhop_num == 1) {
> -		for (i = 0; i < eq->l0_last_num; i++) {
> -			if (i == eq->l0_last_num - 1) {
> -				eqe_alloc = i * (buf_chk_sz / eq->eqe_size);
> -				size = (eq->entries - eqe_alloc) * eq->eqe_size;
> -				dma_free_coherent(dev, size, eq->buf[i],
> -						  eq->buf_dma[i]);
> -				break;
> -			}
> -			dma_free_coherent(dev, buf_chk_sz, eq->buf[i],
> -					  eq->buf_dma[i]);
> -		}
> -	} else if (mhop_num == 2) {
> -		for (i = 0; i < eq->l0_last_num; i++) {
> -			dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
> -					  eq->l1_dma[i]);
> -
> -			for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) {
> -				idx = i * (bt_chk_sz / BA_BYTE_LEN) + j;
> -				if ((i == eq->l0_last_num - 1)
> -				     && j == eq->l1_last_num - 1) {
> -					eqe_alloc = (buf_chk_sz / eq->eqe_size)
> -						    * idx;
> -					size = (eq->entries - eqe_alloc)
> -						* eq->eqe_size;
> -					dma_free_coherent(dev, size,
> -							  eq->buf[idx],
> -							  eq->buf_dma[idx]);
> -					break;
> -				}
> -				dma_free_coherent(dev, buf_chk_sz, eq->buf[idx],
> -						  eq->buf_dma[idx]);
> -			}
> -		}
> -	}
> -	kfree(eq->buf_dma);
> -	kfree(eq->buf);
> -	kfree(eq->l1_dma);
> -	kfree(eq->bt_l1);
> -	eq->buf_dma = NULL;
> -	eq->buf = NULL;
> -	eq->l1_dma = NULL;
> -	eq->bt_l1 = NULL;
> -}
> -
> -static void hns_roce_v2_free_eq(struct hns_roce_dev *hr_dev,
> -				struct hns_roce_eq *eq)
> -{
> -	u32 buf_chk_sz;
> -
> -	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
> -
> -	if (hr_dev->caps.eqe_hop_num) {
> -		hns_roce_mhop_free_eq(hr_dev, eq);
> -		return;
> -	}
> -
> -	dma_free_coherent(hr_dev->dev, buf_chk_sz, eq->buf_list->buf,
> -			  eq->buf_list->map);
> -	kfree(eq->buf_list);
> +	if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0)
> +		hns_roce_mtr_cleanup(hr_dev, &eq->mtr);
> +	hns_roce_buf_free(hr_dev, eq->buf.size, &eq->buf);
>  }
>
>  static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
> @@ -5705,6 +5574,8 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
>  				void *mb_buf)
>  {
>  	struct hns_roce_eq_context *eqc;
> +	u64 ba[MTT_MIN_COUNT] = { 0 };
> +	int count;
>
>  	eqc = mb_buf;
>  	memset(eqc, 0, sizeof(struct hns_roce_eq_context));
> @@ -5720,10 +5591,23 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
>  	eq->eqe_buf_pg_sz = hr_dev->caps.eqe_buf_pg_sz;
>  	eq->shift = ilog2((unsigned int)eq->entries);
>
> -	if (!eq->hop_num)
> -		eq->eqe_ba = eq->buf_list->map;
> -	else
> -		eq->eqe_ba = eq->l0_dma;
> +	/* if not muti-hop, eqe buffer only use one trunk */
> +	if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) {
> +		eq->eqe_ba = eq->buf.direct.map;
> +		eq->cur_eqe_ba = eq->eqe_ba;
> +		if (eq->buf.npages > 1)
> +			eq->nxt_eqe_ba = eq->eqe_ba + (1 << eq->eqe_buf_pg_sz);
> +		else
> +			eq->nxt_eqe_ba = eq->eqe_ba;
> +	} else {
> +		count = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, ba,
> +					  MTT_MIN_COUNT, &eq->eqe_ba);
> +		eq->cur_eqe_ba = ba[0];
> +		if (count > 1)
> +			eq->nxt_eqe_ba = ba[1];
> +		else
> +			eq->nxt_eqe_ba = ba[0];
> +	}
>
>  	/* set eqc state */
>  	roce_set_field(eqc->byte_4, HNS_ROCE_EQC_EQ_ST_M, HNS_ROCE_EQC_EQ_ST_S,
> @@ -5821,220 +5705,97 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
>  		       HNS_ROCE_EQC_NXT_EQE_BA_H_S, eq->nxt_eqe_ba >> 44);
>  }
>
> -static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
> -				  struct hns_roce_eq *eq)
> +static int map_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
> +		      u32 page_shift)
>  {
> -	struct device *dev = hr_dev->dev;
> -	int eq_alloc_done = 0;
> -	int eq_buf_cnt = 0;
> -	int eqe_alloc;
> -	u32 buf_chk_sz;
> -	u32 bt_chk_sz;
> -	u32 mhop_num;
> -	u64 size;
> -	u64 idx;
> +	struct hns_roce_buf_region region = {};
> +	dma_addr_t *buf_list = NULL;
>  	int ba_num;
> -	int bt_num;
> -	int record_i;
> -	int record_j;
> -	int i = 0;
> -	int j = 0;
> -
> -	mhop_num = hr_dev->caps.eqe_hop_num;
> -	buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
> -	bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
> +	int ret;
>
>  	ba_num = DIV_ROUND_UP(PAGE_ALIGN(eq->entries * eq->eqe_size),
> -			      buf_chk_sz);
> -	bt_num = DIV_ROUND_UP(ba_num, bt_chk_sz / BA_BYTE_LEN);
> +			      1 << page_shift);
> +	hns_roce_init_buf_region(&region, hr_dev->caps.eqe_hop_num, 0, ba_num);
>
> -	if (mhop_num == HNS_ROCE_HOP_NUM_0) {
> -		if (eq->entries > buf_chk_sz / eq->eqe_size) {
> -			dev_err(dev, "eq entries %d is larger than buf_pg_sz!",
> -				eq->entries);
> -			return -EINVAL;
> -		}
> -		eq->bt_l0 = dma_alloc_coherent(dev, eq->entries * eq->eqe_size,
> -					       &(eq->l0_dma), GFP_KERNEL);
> -		if (!eq->bt_l0)
> -			return -ENOMEM;
> -
> -		eq->cur_eqe_ba = eq->l0_dma;
> -		eq->nxt_eqe_ba = 0;
> +	/* alloc a tmp list for storing eq buf address */
> +	ret = hns_roce_alloc_buf_list(&region, &buf_list, 1);
> +	if (ret) {
> +		dev_err(hr_dev->dev, "alloc eq buf_list error\n");

The same comment like we gave for bnxt driver, no dev_* prints inside
driver, use ibdev_*.

Thanks

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH for-next] RDMA/hns: Optimize eqe buffer allocation flow
  2020-01-27  5:52 ` Leon Romanovsky
@ 2020-01-27  7:47   ` Weihang Li
  2020-02-05  6:04   ` Weihang Li
  1 sibling, 0 replies; 11+ messages in thread
From: Weihang Li @ 2020-01-27  7:47 UTC (permalink / raw)
  To: Leon Romanovsky; +Cc: dledford, jgg, linux-rdma, linuxarm



On 2020/1/27 13:52, Leon Romanovsky wrote:
> On Sun, Jan 26, 2020 at 10:58:35PM +0800, Weihang Li wrote:
>> From: Xi Wang <wangxi11@huawei.com>
>>
>> The eqe has a private multi-hop addressing implementation, but there is
>> already a set of interfaces in the hns driver that can achieve this.
>>
>> So, simplify the eqe buffer allocation process by using the mtr interface
>> and remove large amount of repeated logic.
>>
>> Signed-off-by: Xi Wang <wangxi11@huawei.com>
>> Signed-off-by: Weihang Li <liweihang@huawei.com>
>> ---
>>  drivers/infiniband/hw/hns/hns_roce_device.h |  10 +-
>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 481 ++++++----------------------
>>  2 files changed, 108 insertions(+), 383 deletions(-)
>>
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
>> index a4c45bf..dab3f3c 100644
>> --- a/drivers/infiniband/hw/hns/hns_roce_device.h
>> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h
>> @@ -757,14 +757,8 @@ struct hns_roce_eq {
>>  	int				eqe_ba_pg_sz;
>>  	int				eqe_buf_pg_sz;
>>  	int				hop_num;
>> -	u64				*bt_l0;	/* Base address table for L0 */
>> -	u64				**bt_l1; /* Base address table for L1 */
>> -	u64				**buf;
>> -	dma_addr_t			l0_dma;
>> -	dma_addr_t			*l1_dma;
>> -	dma_addr_t			*buf_dma;
>> -	u32				l0_last_num; /* L0 last chunk num */
>> -	u32				l1_last_num; /* L1 last chunk num */
>> +	struct hns_roce_mtr		mtr;
>> +	struct hns_roce_buf		buf;
>>  	int				eq_max_cnt;
>>  	int				eq_period;
>>  	int				shift;
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> index c462b19..88f2e76 100644
>> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> @@ -5287,44 +5287,24 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
>>  	hns_roce_write64(hr_dev, doorbell, eq->doorbell);
>>  }
>>
>> -static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry)
>> +static inline void *get_eqe_buf(struct hns_roce_eq *eq, unsigned long offset)
>>  {
>>  	u32 buf_chk_sz;
>> -	unsigned long off;
>>
>>  	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
>> -	off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE;
>> -
>> -	return (struct hns_roce_aeqe *)((char *)(eq->buf_list->buf) +
>> -		off % buf_chk_sz);
>> -}
>> -
>> -static struct hns_roce_aeqe *mhop_get_aeqe(struct hns_roce_eq *eq, u32 entry)
>> -{
>> -	u32 buf_chk_sz;
>> -	unsigned long off;
>> -
>> -	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
>> -
>> -	off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE;
>> -
>> -	if (eq->hop_num == HNS_ROCE_HOP_NUM_0)
>> -		return (struct hns_roce_aeqe *)((u8 *)(eq->bt_l0) +
>> -			off % buf_chk_sz);
>> +	if (eq->buf.nbufs == 1)
>> +		return eq->buf.direct.buf + offset % buf_chk_sz;
>>  	else
>> -		return (struct hns_roce_aeqe *)((u8 *)
>> -			(eq->buf[off / buf_chk_sz]) + off % buf_chk_sz);
>> +		return eq->buf.page_list[offset / buf_chk_sz].buf +
>> +		       offset % buf_chk_sz;
>>  }
>>
>>  static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
>>  {
>>  	struct hns_roce_aeqe *aeqe;
>>
>> -	if (!eq->hop_num)
>> -		aeqe = get_aeqe_v2(eq, eq->cons_index);
>> -	else
>> -		aeqe = mhop_get_aeqe(eq, eq->cons_index);
>> -
>> +	aeqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) *
>> +			   HNS_ROCE_AEQ_ENTRY_SIZE);
>>  	return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^
>>  		!!(eq->cons_index & eq->entries)) ? aeqe : NULL;
>>  }
>> @@ -5417,44 +5397,12 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
>>  	return aeqe_found;
>>  }
>>
>> -static struct hns_roce_ceqe *get_ceqe_v2(struct hns_roce_eq *eq, u32 entry)
>> -{
>> -	u32 buf_chk_sz;
>> -	unsigned long off;
>> -
>> -	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
>> -	off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE;
>> -
>> -	return (struct hns_roce_ceqe *)((char *)(eq->buf_list->buf) +
>> -		off % buf_chk_sz);
>> -}
>> -
>> -static struct hns_roce_ceqe *mhop_get_ceqe(struct hns_roce_eq *eq, u32 entry)
>> -{
>> -	u32 buf_chk_sz;
>> -	unsigned long off;
>> -
>> -	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
>> -
>> -	off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE;
>> -
>> -	if (eq->hop_num == HNS_ROCE_HOP_NUM_0)
>> -		return (struct hns_roce_ceqe *)((u8 *)(eq->bt_l0) +
>> -			off % buf_chk_sz);
>> -	else
>> -		return (struct hns_roce_ceqe *)((u8 *)(eq->buf[off /
>> -			buf_chk_sz]) + off % buf_chk_sz);
>> -}
>> -
>>  static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
>>  {
>>  	struct hns_roce_ceqe *ceqe;
>>
>> -	if (!eq->hop_num)
>> -		ceqe = get_ceqe_v2(eq, eq->cons_index);
>> -	else
>> -		ceqe = mhop_get_ceqe(eq, eq->cons_index);
>> -
>> +	ceqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) *
>> +			   HNS_ROCE_CEQ_ENTRY_SIZE);
>>  	return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^
>>  		(!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
>>  }
>> @@ -5614,90 +5562,11 @@ static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, int eqn)
>>  		dev_err(dev, "[mailbox cmd] destroy eqc(%d) failed.\n", eqn);
>>  }
>>
>> -static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev,
>> -				  struct hns_roce_eq *eq)
>> +static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
>>  {
>> -	struct device *dev = hr_dev->dev;
>> -	u64 idx;
>> -	u64 size;
>> -	u32 buf_chk_sz;
>> -	u32 bt_chk_sz;
>> -	u32 mhop_num;
>> -	int eqe_alloc;
>> -	int i = 0;
>> -	int j = 0;
>> -
>> -	mhop_num = hr_dev->caps.eqe_hop_num;
>> -	buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
>> -	bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
>> -
>> -	if (mhop_num == HNS_ROCE_HOP_NUM_0) {
>> -		dma_free_coherent(dev, (unsigned int)(eq->entries *
>> -				  eq->eqe_size), eq->bt_l0, eq->l0_dma);
>> -		return;
>> -	}
>> -
>> -	dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma);
>> -	if (mhop_num == 1) {
>> -		for (i = 0; i < eq->l0_last_num; i++) {
>> -			if (i == eq->l0_last_num - 1) {
>> -				eqe_alloc = i * (buf_chk_sz / eq->eqe_size);
>> -				size = (eq->entries - eqe_alloc) * eq->eqe_size;
>> -				dma_free_coherent(dev, size, eq->buf[i],
>> -						  eq->buf_dma[i]);
>> -				break;
>> -			}
>> -			dma_free_coherent(dev, buf_chk_sz, eq->buf[i],
>> -					  eq->buf_dma[i]);
>> -		}
>> -	} else if (mhop_num == 2) {
>> -		for (i = 0; i < eq->l0_last_num; i++) {
>> -			dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
>> -					  eq->l1_dma[i]);
>> -
>> -			for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) {
>> -				idx = i * (bt_chk_sz / BA_BYTE_LEN) + j;
>> -				if ((i == eq->l0_last_num - 1)
>> -				     && j == eq->l1_last_num - 1) {
>> -					eqe_alloc = (buf_chk_sz / eq->eqe_size)
>> -						    * idx;
>> -					size = (eq->entries - eqe_alloc)
>> -						* eq->eqe_size;
>> -					dma_free_coherent(dev, size,
>> -							  eq->buf[idx],
>> -							  eq->buf_dma[idx]);
>> -					break;
>> -				}
>> -				dma_free_coherent(dev, buf_chk_sz, eq->buf[idx],
>> -						  eq->buf_dma[idx]);
>> -			}
>> -		}
>> -	}
>> -	kfree(eq->buf_dma);
>> -	kfree(eq->buf);
>> -	kfree(eq->l1_dma);
>> -	kfree(eq->bt_l1);
>> -	eq->buf_dma = NULL;
>> -	eq->buf = NULL;
>> -	eq->l1_dma = NULL;
>> -	eq->bt_l1 = NULL;
>> -}
>> -
>> -static void hns_roce_v2_free_eq(struct hns_roce_dev *hr_dev,
>> -				struct hns_roce_eq *eq)
>> -{
>> -	u32 buf_chk_sz;
>> -
>> -	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
>> -
>> -	if (hr_dev->caps.eqe_hop_num) {
>> -		hns_roce_mhop_free_eq(hr_dev, eq);
>> -		return;
>> -	}
>> -
>> -	dma_free_coherent(hr_dev->dev, buf_chk_sz, eq->buf_list->buf,
>> -			  eq->buf_list->map);
>> -	kfree(eq->buf_list);
>> +	if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0)
>> +		hns_roce_mtr_cleanup(hr_dev, &eq->mtr);
>> +	hns_roce_buf_free(hr_dev, eq->buf.size, &eq->buf);
>>  }
>>
>>  static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
>> @@ -5705,6 +5574,8 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
>>  				void *mb_buf)
>>  {
>>  	struct hns_roce_eq_context *eqc;
>> +	u64 ba[MTT_MIN_COUNT] = { 0 };
>> +	int count;
>>
>>  	eqc = mb_buf;
>>  	memset(eqc, 0, sizeof(struct hns_roce_eq_context));
>> @@ -5720,10 +5591,23 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
>>  	eq->eqe_buf_pg_sz = hr_dev->caps.eqe_buf_pg_sz;
>>  	eq->shift = ilog2((unsigned int)eq->entries);
>>
>> -	if (!eq->hop_num)
>> -		eq->eqe_ba = eq->buf_list->map;
>> -	else
>> -		eq->eqe_ba = eq->l0_dma;
>> +	/* if not muti-hop, eqe buffer only use one trunk */
>> +	if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) {
>> +		eq->eqe_ba = eq->buf.direct.map;
>> +		eq->cur_eqe_ba = eq->eqe_ba;
>> +		if (eq->buf.npages > 1)
>> +			eq->nxt_eqe_ba = eq->eqe_ba + (1 << eq->eqe_buf_pg_sz);
>> +		else
>> +			eq->nxt_eqe_ba = eq->eqe_ba;
>> +	} else {
>> +		count = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, ba,
>> +					  MTT_MIN_COUNT, &eq->eqe_ba);
>> +		eq->cur_eqe_ba = ba[0];
>> +		if (count > 1)
>> +			eq->nxt_eqe_ba = ba[1];
>> +		else
>> +			eq->nxt_eqe_ba = ba[0];
>> +	}
>>
>>  	/* set eqc state */
>>  	roce_set_field(eqc->byte_4, HNS_ROCE_EQC_EQ_ST_M, HNS_ROCE_EQC_EQ_ST_S,
>> @@ -5821,220 +5705,97 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
>>  		       HNS_ROCE_EQC_NXT_EQE_BA_H_S, eq->nxt_eqe_ba >> 44);
>>  }
>>
>> -static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
>> -				  struct hns_roce_eq *eq)
>> +static int map_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
>> +		      u32 page_shift)
>>  {
>> -	struct device *dev = hr_dev->dev;
>> -	int eq_alloc_done = 0;
>> -	int eq_buf_cnt = 0;
>> -	int eqe_alloc;
>> -	u32 buf_chk_sz;
>> -	u32 bt_chk_sz;
>> -	u32 mhop_num;
>> -	u64 size;
>> -	u64 idx;
>> +	struct hns_roce_buf_region region = {};
>> +	dma_addr_t *buf_list = NULL;
>>  	int ba_num;
>> -	int bt_num;
>> -	int record_i;
>> -	int record_j;
>> -	int i = 0;
>> -	int j = 0;
>> -
>> -	mhop_num = hr_dev->caps.eqe_hop_num;
>> -	buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
>> -	bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
>> +	int ret;
>>
>>  	ba_num = DIV_ROUND_UP(PAGE_ALIGN(eq->entries * eq->eqe_size),
>> -			      buf_chk_sz);
>> -	bt_num = DIV_ROUND_UP(ba_num, bt_chk_sz / BA_BYTE_LEN);
>> +			      1 << page_shift);
>> +	hns_roce_init_buf_region(&region, hr_dev->caps.eqe_hop_num, 0, ba_num);
>>
>> -	if (mhop_num == HNS_ROCE_HOP_NUM_0) {
>> -		if (eq->entries > buf_chk_sz / eq->eqe_size) {
>> -			dev_err(dev, "eq entries %d is larger than buf_pg_sz!",
>> -				eq->entries);
>> -			return -EINVAL;
>> -		}
>> -		eq->bt_l0 = dma_alloc_coherent(dev, eq->entries * eq->eqe_size,
>> -					       &(eq->l0_dma), GFP_KERNEL);
>> -		if (!eq->bt_l0)
>> -			return -ENOMEM;
>> -
>> -		eq->cur_eqe_ba = eq->l0_dma;
>> -		eq->nxt_eqe_ba = 0;
>> +	/* alloc a tmp list for storing eq buf address */
>> +	ret = hns_roce_alloc_buf_list(&region, &buf_list, 1);
>> +	if (ret) {
>> +		dev_err(hr_dev->dev, "alloc eq buf_list error\n");
> 
> The same comment like we gave for bnxt driver, no dev_* prints inside
> driver, use ibdev_*.
> 
> Thanks
> 
> .

OK, thank you.

Weihang

> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH for-next] RDMA/hns: Optimize eqe buffer allocation flow
  2020-01-27  5:52 ` Leon Romanovsky
  2020-01-27  7:47   ` Weihang Li
@ 2020-02-05  6:04   ` Weihang Li
  2020-02-10  9:25     ` Leon Romanovsky
  1 sibling, 1 reply; 11+ messages in thread
From: Weihang Li @ 2020-02-05  6:04 UTC (permalink / raw)
  To: Leon Romanovsky; +Cc: dledford, jgg, linux-rdma, linuxarm



On 2020/1/27 13:52, Leon Romanovsky wrote:
> On Sun, Jan 26, 2020 at 10:58:35PM +0800, Weihang Li wrote:
>> From: Xi Wang <wangxi11@huawei.com>
>>
>> The eqe has a private multi-hop addressing implementation, but there is
>> already a set of interfaces in the hns driver that can achieve this.
>>
>> So, simplify the eqe buffer allocation process by using the mtr interface
>> and remove large amount of repeated logic.
>>
>> Signed-off-by: Xi Wang <wangxi11@huawei.com>
>> Signed-off-by: Weihang Li <liweihang@huawei.com>
>> ---
>>  drivers/infiniband/hw/hns/hns_roce_device.h |  10 +-
>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 481 ++++++----------------------
>>  2 files changed, 108 insertions(+), 383 deletions(-)
>>
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
>> index a4c45bf..dab3f3c 100644
>> --- a/drivers/infiniband/hw/hns/hns_roce_device.h
>> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h
>> @@ -757,14 +757,8 @@ struct hns_roce_eq {
>>  	int				eqe_ba_pg_sz;
>>  	int				eqe_buf_pg_sz;
>>  	int				hop_num;
>> -	u64				*bt_l0;	/* Base address table for L0 */
>> -	u64				**bt_l1; /* Base address table for L1 */
>> -	u64				**buf;
>> -	dma_addr_t			l0_dma;
>> -	dma_addr_t			*l1_dma;
>> -	dma_addr_t			*buf_dma;
>> -	u32				l0_last_num; /* L0 last chunk num */
>> -	u32				l1_last_num; /* L1 last chunk num */
>> +	struct hns_roce_mtr		mtr;
>> +	struct hns_roce_buf		buf;
>>  	int				eq_max_cnt;
>>  	int				eq_period;
>>  	int				shift;
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> index c462b19..88f2e76 100644
>> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> @@ -5287,44 +5287,24 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
>>  	hns_roce_write64(hr_dev, doorbell, eq->doorbell);
>>  }
>>
>> -static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry)
>> +static inline void *get_eqe_buf(struct hns_roce_eq *eq, unsigned long offset)
>>  {
>>  	u32 buf_chk_sz;
>> -	unsigned long off;
>>
>>  	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
>> -	off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE;
>> -
>> -	return (struct hns_roce_aeqe *)((char *)(eq->buf_list->buf) +
>> -		off % buf_chk_sz);
>> -}
>> -
>> -static struct hns_roce_aeqe *mhop_get_aeqe(struct hns_roce_eq *eq, u32 entry)
>> -{
>> -	u32 buf_chk_sz;
>> -	unsigned long off;
>> -
>> -	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
>> -
>> -	off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE;
>> -
>> -	if (eq->hop_num == HNS_ROCE_HOP_NUM_0)
>> -		return (struct hns_roce_aeqe *)((u8 *)(eq->bt_l0) +
>> -			off % buf_chk_sz);
>> +	if (eq->buf.nbufs == 1)
>> +		return eq->buf.direct.buf + offset % buf_chk_sz;
>>  	else
>> -		return (struct hns_roce_aeqe *)((u8 *)
>> -			(eq->buf[off / buf_chk_sz]) + off % buf_chk_sz);
>> +		return eq->buf.page_list[offset / buf_chk_sz].buf +
>> +		       offset % buf_chk_sz;
>>  }
>>
>>  static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
>>  {
>>  	struct hns_roce_aeqe *aeqe;
>>
>> -	if (!eq->hop_num)
>> -		aeqe = get_aeqe_v2(eq, eq->cons_index);
>> -	else
>> -		aeqe = mhop_get_aeqe(eq, eq->cons_index);
>> -
>> +	aeqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) *
>> +			   HNS_ROCE_AEQ_ENTRY_SIZE);
>>  	return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^
>>  		!!(eq->cons_index & eq->entries)) ? aeqe : NULL;
>>  }
>> @@ -5417,44 +5397,12 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
>>  	return aeqe_found;
>>  }
>>
>> -static struct hns_roce_ceqe *get_ceqe_v2(struct hns_roce_eq *eq, u32 entry)
>> -{
>> -	u32 buf_chk_sz;
>> -	unsigned long off;
>> -
>> -	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
>> -	off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE;
>> -
>> -	return (struct hns_roce_ceqe *)((char *)(eq->buf_list->buf) +
>> -		off % buf_chk_sz);
>> -}
>> -
>> -static struct hns_roce_ceqe *mhop_get_ceqe(struct hns_roce_eq *eq, u32 entry)
>> -{
>> -	u32 buf_chk_sz;
>> -	unsigned long off;
>> -
>> -	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
>> -
>> -	off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE;
>> -
>> -	if (eq->hop_num == HNS_ROCE_HOP_NUM_0)
>> -		return (struct hns_roce_ceqe *)((u8 *)(eq->bt_l0) +
>> -			off % buf_chk_sz);
>> -	else
>> -		return (struct hns_roce_ceqe *)((u8 *)(eq->buf[off /
>> -			buf_chk_sz]) + off % buf_chk_sz);
>> -}
>> -
>>  static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
>>  {
>>  	struct hns_roce_ceqe *ceqe;
>>
>> -	if (!eq->hop_num)
>> -		ceqe = get_ceqe_v2(eq, eq->cons_index);
>> -	else
>> -		ceqe = mhop_get_ceqe(eq, eq->cons_index);
>> -
>> +	ceqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) *
>> +			   HNS_ROCE_CEQ_ENTRY_SIZE);
>>  	return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^
>>  		(!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
>>  }
>> @@ -5614,90 +5562,11 @@ static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, int eqn)
>>  		dev_err(dev, "[mailbox cmd] destroy eqc(%d) failed.\n", eqn);
>>  }
>>
>> -static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev,
>> -				  struct hns_roce_eq *eq)
>> +static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
>>  {
>> -	struct device *dev = hr_dev->dev;
>> -	u64 idx;
>> -	u64 size;
>> -	u32 buf_chk_sz;
>> -	u32 bt_chk_sz;
>> -	u32 mhop_num;
>> -	int eqe_alloc;
>> -	int i = 0;
>> -	int j = 0;
>> -
>> -	mhop_num = hr_dev->caps.eqe_hop_num;
>> -	buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
>> -	bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
>> -
>> -	if (mhop_num == HNS_ROCE_HOP_NUM_0) {
>> -		dma_free_coherent(dev, (unsigned int)(eq->entries *
>> -				  eq->eqe_size), eq->bt_l0, eq->l0_dma);
>> -		return;
>> -	}
>> -
>> -	dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma);
>> -	if (mhop_num == 1) {
>> -		for (i = 0; i < eq->l0_last_num; i++) {
>> -			if (i == eq->l0_last_num - 1) {
>> -				eqe_alloc = i * (buf_chk_sz / eq->eqe_size);
>> -				size = (eq->entries - eqe_alloc) * eq->eqe_size;
>> -				dma_free_coherent(dev, size, eq->buf[i],
>> -						  eq->buf_dma[i]);
>> -				break;
>> -			}
>> -			dma_free_coherent(dev, buf_chk_sz, eq->buf[i],
>> -					  eq->buf_dma[i]);
>> -		}
>> -	} else if (mhop_num == 2) {
>> -		for (i = 0; i < eq->l0_last_num; i++) {
>> -			dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
>> -					  eq->l1_dma[i]);
>> -
>> -			for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) {
>> -				idx = i * (bt_chk_sz / BA_BYTE_LEN) + j;
>> -				if ((i == eq->l0_last_num - 1)
>> -				     && j == eq->l1_last_num - 1) {
>> -					eqe_alloc = (buf_chk_sz / eq->eqe_size)
>> -						    * idx;
>> -					size = (eq->entries - eqe_alloc)
>> -						* eq->eqe_size;
>> -					dma_free_coherent(dev, size,
>> -							  eq->buf[idx],
>> -							  eq->buf_dma[idx]);
>> -					break;
>> -				}
>> -				dma_free_coherent(dev, buf_chk_sz, eq->buf[idx],
>> -						  eq->buf_dma[idx]);
>> -			}
>> -		}
>> -	}
>> -	kfree(eq->buf_dma);
>> -	kfree(eq->buf);
>> -	kfree(eq->l1_dma);
>> -	kfree(eq->bt_l1);
>> -	eq->buf_dma = NULL;
>> -	eq->buf = NULL;
>> -	eq->l1_dma = NULL;
>> -	eq->bt_l1 = NULL;
>> -}
>> -
>> -static void hns_roce_v2_free_eq(struct hns_roce_dev *hr_dev,
>> -				struct hns_roce_eq *eq)
>> -{
>> -	u32 buf_chk_sz;
>> -
>> -	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
>> -
>> -	if (hr_dev->caps.eqe_hop_num) {
>> -		hns_roce_mhop_free_eq(hr_dev, eq);
>> -		return;
>> -	}
>> -
>> -	dma_free_coherent(hr_dev->dev, buf_chk_sz, eq->buf_list->buf,
>> -			  eq->buf_list->map);
>> -	kfree(eq->buf_list);
>> +	if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0)
>> +		hns_roce_mtr_cleanup(hr_dev, &eq->mtr);
>> +	hns_roce_buf_free(hr_dev, eq->buf.size, &eq->buf);
>>  }
>>
>>  static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
>> @@ -5705,6 +5574,8 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
>>  				void *mb_buf)
>>  {
>>  	struct hns_roce_eq_context *eqc;
>> +	u64 ba[MTT_MIN_COUNT] = { 0 };
>> +	int count;
>>
>>  	eqc = mb_buf;
>>  	memset(eqc, 0, sizeof(struct hns_roce_eq_context));
>> @@ -5720,10 +5591,23 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
>>  	eq->eqe_buf_pg_sz = hr_dev->caps.eqe_buf_pg_sz;
>>  	eq->shift = ilog2((unsigned int)eq->entries);
>>
>> -	if (!eq->hop_num)
>> -		eq->eqe_ba = eq->buf_list->map;
>> -	else
>> -		eq->eqe_ba = eq->l0_dma;
>> +	/* if not muti-hop, eqe buffer only use one trunk */
>> +	if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) {
>> +		eq->eqe_ba = eq->buf.direct.map;
>> +		eq->cur_eqe_ba = eq->eqe_ba;
>> +		if (eq->buf.npages > 1)
>> +			eq->nxt_eqe_ba = eq->eqe_ba + (1 << eq->eqe_buf_pg_sz);
>> +		else
>> +			eq->nxt_eqe_ba = eq->eqe_ba;
>> +	} else {
>> +		count = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, ba,
>> +					  MTT_MIN_COUNT, &eq->eqe_ba);
>> +		eq->cur_eqe_ba = ba[0];
>> +		if (count > 1)
>> +			eq->nxt_eqe_ba = ba[1];
>> +		else
>> +			eq->nxt_eqe_ba = ba[0];
>> +	}
>>
>>  	/* set eqc state */
>>  	roce_set_field(eqc->byte_4, HNS_ROCE_EQC_EQ_ST_M, HNS_ROCE_EQC_EQ_ST_S,
>> @@ -5821,220 +5705,97 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
>>  		       HNS_ROCE_EQC_NXT_EQE_BA_H_S, eq->nxt_eqe_ba >> 44);
>>  }
>>
>> -static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
>> -				  struct hns_roce_eq *eq)
>> +static int map_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
>> +		      u32 page_shift)
>>  {
>> -	struct device *dev = hr_dev->dev;
>> -	int eq_alloc_done = 0;
>> -	int eq_buf_cnt = 0;
>> -	int eqe_alloc;
>> -	u32 buf_chk_sz;
>> -	u32 bt_chk_sz;
>> -	u32 mhop_num;
>> -	u64 size;
>> -	u64 idx;
>> +	struct hns_roce_buf_region region = {};
>> +	dma_addr_t *buf_list = NULL;
>>  	int ba_num;
>> -	int bt_num;
>> -	int record_i;
>> -	int record_j;
>> -	int i = 0;
>> -	int j = 0;
>> -
>> -	mhop_num = hr_dev->caps.eqe_hop_num;
>> -	buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
>> -	bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
>> +	int ret;
>>
>>  	ba_num = DIV_ROUND_UP(PAGE_ALIGN(eq->entries * eq->eqe_size),
>> -			      buf_chk_sz);
>> -	bt_num = DIV_ROUND_UP(ba_num, bt_chk_sz / BA_BYTE_LEN);
>> +			      1 << page_shift);
>> +	hns_roce_init_buf_region(&region, hr_dev->caps.eqe_hop_num, 0, ba_num);
>>
>> -	if (mhop_num == HNS_ROCE_HOP_NUM_0) {
>> -		if (eq->entries > buf_chk_sz / eq->eqe_size) {
>> -			dev_err(dev, "eq entries %d is larger than buf_pg_sz!",
>> -				eq->entries);
>> -			return -EINVAL;
>> -		}
>> -		eq->bt_l0 = dma_alloc_coherent(dev, eq->entries * eq->eqe_size,
>> -					       &(eq->l0_dma), GFP_KERNEL);
>> -		if (!eq->bt_l0)
>> -			return -ENOMEM;
>> -
>> -		eq->cur_eqe_ba = eq->l0_dma;
>> -		eq->nxt_eqe_ba = 0;
>> +	/* alloc a tmp list for storing eq buf address */
>> +	ret = hns_roce_alloc_buf_list(&region, &buf_list, 1);
>> +	if (ret) {
>> +		dev_err(hr_dev->dev, "alloc eq buf_list error\n");
> 
> The same comment like we gave for bnxt driver, no dev_* prints inside
> driver, use ibdev_*.
> 
> Thanks
> 

Hi Leon,

map_eq_buf() is called before ib_register_device(), so we can't use
ibdev_* here.

Thanks for your reminder, another patch that replace other dev_* in
hns driver with ibdev_* is on preparing.

Weihang

> .
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH for-next] RDMA/hns: Optimize eqe buffer allocation flow
  2020-02-05  6:04   ` Weihang Li
@ 2020-02-10  9:25     ` Leon Romanovsky
  2020-02-10  9:48       ` Weihang Li
  0 siblings, 1 reply; 11+ messages in thread
From: Leon Romanovsky @ 2020-02-10  9:25 UTC (permalink / raw)
  To: Weihang Li; +Cc: dledford, jgg, linux-rdma, linuxarm

On Wed, Feb 05, 2020 at 02:04:09PM +0800, Weihang Li wrote:
>
>
> On 2020/1/27 13:52, Leon Romanovsky wrote:
> > On Sun, Jan 26, 2020 at 10:58:35PM +0800, Weihang Li wrote:
> >> From: Xi Wang <wangxi11@huawei.com>
> >>
> >> The eqe has a private multi-hop addressing implementation, but there is
> >> already a set of interfaces in the hns driver that can achieve this.
> >>
> >> So, simplify the eqe buffer allocation process by using the mtr interface
> >> and remove large amount of repeated logic.
> >>
> >> Signed-off-by: Xi Wang <wangxi11@huawei.com>
> >> Signed-off-by: Weihang Li <liweihang@huawei.com>
> >> ---
> >>  drivers/infiniband/hw/hns/hns_roce_device.h |  10 +-
> >>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 481 ++++++----------------------
> >>  2 files changed, 108 insertions(+), 383 deletions(-)
> >>
> >> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
> >> index a4c45bf..dab3f3c 100644
> >> --- a/drivers/infiniband/hw/hns/hns_roce_device.h
> >> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h
> >> @@ -757,14 +757,8 @@ struct hns_roce_eq {
> >>  	int				eqe_ba_pg_sz;
> >>  	int				eqe_buf_pg_sz;
> >>  	int				hop_num;
> >> -	u64				*bt_l0;	/* Base address table for L0 */
> >> -	u64				**bt_l1; /* Base address table for L1 */
> >> -	u64				**buf;
> >> -	dma_addr_t			l0_dma;
> >> -	dma_addr_t			*l1_dma;
> >> -	dma_addr_t			*buf_dma;
> >> -	u32				l0_last_num; /* L0 last chunk num */
> >> -	u32				l1_last_num; /* L1 last chunk num */
> >> +	struct hns_roce_mtr		mtr;
> >> +	struct hns_roce_buf		buf;
> >>  	int				eq_max_cnt;
> >>  	int				eq_period;
> >>  	int				shift;
> >> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> >> index c462b19..88f2e76 100644
> >> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> >> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> >> @@ -5287,44 +5287,24 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
> >>  	hns_roce_write64(hr_dev, doorbell, eq->doorbell);
> >>  }
> >>
> >> -static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry)
> >> +static inline void *get_eqe_buf(struct hns_roce_eq *eq, unsigned long offset)
> >>  {
> >>  	u32 buf_chk_sz;
> >> -	unsigned long off;
> >>
> >>  	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
> >> -	off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE;
> >> -
> >> -	return (struct hns_roce_aeqe *)((char *)(eq->buf_list->buf) +
> >> -		off % buf_chk_sz);
> >> -}
> >> -
> >> -static struct hns_roce_aeqe *mhop_get_aeqe(struct hns_roce_eq *eq, u32 entry)
> >> -{
> >> -	u32 buf_chk_sz;
> >> -	unsigned long off;
> >> -
> >> -	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
> >> -
> >> -	off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE;
> >> -
> >> -	if (eq->hop_num == HNS_ROCE_HOP_NUM_0)
> >> -		return (struct hns_roce_aeqe *)((u8 *)(eq->bt_l0) +
> >> -			off % buf_chk_sz);
> >> +	if (eq->buf.nbufs == 1)
> >> +		return eq->buf.direct.buf + offset % buf_chk_sz;
> >>  	else
> >> -		return (struct hns_roce_aeqe *)((u8 *)
> >> -			(eq->buf[off / buf_chk_sz]) + off % buf_chk_sz);
> >> +		return eq->buf.page_list[offset / buf_chk_sz].buf +
> >> +		       offset % buf_chk_sz;
> >>  }
> >>
> >>  static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
> >>  {
> >>  	struct hns_roce_aeqe *aeqe;
> >>
> >> -	if (!eq->hop_num)
> >> -		aeqe = get_aeqe_v2(eq, eq->cons_index);
> >> -	else
> >> -		aeqe = mhop_get_aeqe(eq, eq->cons_index);
> >> -
> >> +	aeqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) *
> >> +			   HNS_ROCE_AEQ_ENTRY_SIZE);
> >>  	return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^
> >>  		!!(eq->cons_index & eq->entries)) ? aeqe : NULL;
> >>  }
> >> @@ -5417,44 +5397,12 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
> >>  	return aeqe_found;
> >>  }
> >>
> >> -static struct hns_roce_ceqe *get_ceqe_v2(struct hns_roce_eq *eq, u32 entry)
> >> -{
> >> -	u32 buf_chk_sz;
> >> -	unsigned long off;
> >> -
> >> -	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
> >> -	off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE;
> >> -
> >> -	return (struct hns_roce_ceqe *)((char *)(eq->buf_list->buf) +
> >> -		off % buf_chk_sz);
> >> -}
> >> -
> >> -static struct hns_roce_ceqe *mhop_get_ceqe(struct hns_roce_eq *eq, u32 entry)
> >> -{
> >> -	u32 buf_chk_sz;
> >> -	unsigned long off;
> >> -
> >> -	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
> >> -
> >> -	off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE;
> >> -
> >> -	if (eq->hop_num == HNS_ROCE_HOP_NUM_0)
> >> -		return (struct hns_roce_ceqe *)((u8 *)(eq->bt_l0) +
> >> -			off % buf_chk_sz);
> >> -	else
> >> -		return (struct hns_roce_ceqe *)((u8 *)(eq->buf[off /
> >> -			buf_chk_sz]) + off % buf_chk_sz);
> >> -}
> >> -
> >>  static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
> >>  {
> >>  	struct hns_roce_ceqe *ceqe;
> >>
> >> -	if (!eq->hop_num)
> >> -		ceqe = get_ceqe_v2(eq, eq->cons_index);
> >> -	else
> >> -		ceqe = mhop_get_ceqe(eq, eq->cons_index);
> >> -
> >> +	ceqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) *
> >> +			   HNS_ROCE_CEQ_ENTRY_SIZE);
> >>  	return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^
> >>  		(!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
> >>  }
> >> @@ -5614,90 +5562,11 @@ static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, int eqn)
> >>  		dev_err(dev, "[mailbox cmd] destroy eqc(%d) failed.\n", eqn);
> >>  }
> >>
> >> -static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev,
> >> -				  struct hns_roce_eq *eq)
> >> +static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
> >>  {
> >> -	struct device *dev = hr_dev->dev;
> >> -	u64 idx;
> >> -	u64 size;
> >> -	u32 buf_chk_sz;
> >> -	u32 bt_chk_sz;
> >> -	u32 mhop_num;
> >> -	int eqe_alloc;
> >> -	int i = 0;
> >> -	int j = 0;
> >> -
> >> -	mhop_num = hr_dev->caps.eqe_hop_num;
> >> -	buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
> >> -	bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
> >> -
> >> -	if (mhop_num == HNS_ROCE_HOP_NUM_0) {
> >> -		dma_free_coherent(dev, (unsigned int)(eq->entries *
> >> -				  eq->eqe_size), eq->bt_l0, eq->l0_dma);
> >> -		return;
> >> -	}
> >> -
> >> -	dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma);
> >> -	if (mhop_num == 1) {
> >> -		for (i = 0; i < eq->l0_last_num; i++) {
> >> -			if (i == eq->l0_last_num - 1) {
> >> -				eqe_alloc = i * (buf_chk_sz / eq->eqe_size);
> >> -				size = (eq->entries - eqe_alloc) * eq->eqe_size;
> >> -				dma_free_coherent(dev, size, eq->buf[i],
> >> -						  eq->buf_dma[i]);
> >> -				break;
> >> -			}
> >> -			dma_free_coherent(dev, buf_chk_sz, eq->buf[i],
> >> -					  eq->buf_dma[i]);
> >> -		}
> >> -	} else if (mhop_num == 2) {
> >> -		for (i = 0; i < eq->l0_last_num; i++) {
> >> -			dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
> >> -					  eq->l1_dma[i]);
> >> -
> >> -			for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) {
> >> -				idx = i * (bt_chk_sz / BA_BYTE_LEN) + j;
> >> -				if ((i == eq->l0_last_num - 1)
> >> -				     && j == eq->l1_last_num - 1) {
> >> -					eqe_alloc = (buf_chk_sz / eq->eqe_size)
> >> -						    * idx;
> >> -					size = (eq->entries - eqe_alloc)
> >> -						* eq->eqe_size;
> >> -					dma_free_coherent(dev, size,
> >> -							  eq->buf[idx],
> >> -							  eq->buf_dma[idx]);
> >> -					break;
> >> -				}
> >> -				dma_free_coherent(dev, buf_chk_sz, eq->buf[idx],
> >> -						  eq->buf_dma[idx]);
> >> -			}
> >> -		}
> >> -	}
> >> -	kfree(eq->buf_dma);
> >> -	kfree(eq->buf);
> >> -	kfree(eq->l1_dma);
> >> -	kfree(eq->bt_l1);
> >> -	eq->buf_dma = NULL;
> >> -	eq->buf = NULL;
> >> -	eq->l1_dma = NULL;
> >> -	eq->bt_l1 = NULL;
> >> -}
> >> -
> >> -static void hns_roce_v2_free_eq(struct hns_roce_dev *hr_dev,
> >> -				struct hns_roce_eq *eq)
> >> -{
> >> -	u32 buf_chk_sz;
> >> -
> >> -	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
> >> -
> >> -	if (hr_dev->caps.eqe_hop_num) {
> >> -		hns_roce_mhop_free_eq(hr_dev, eq);
> >> -		return;
> >> -	}
> >> -
> >> -	dma_free_coherent(hr_dev->dev, buf_chk_sz, eq->buf_list->buf,
> >> -			  eq->buf_list->map);
> >> -	kfree(eq->buf_list);
> >> +	if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0)
> >> +		hns_roce_mtr_cleanup(hr_dev, &eq->mtr);
> >> +	hns_roce_buf_free(hr_dev, eq->buf.size, &eq->buf);
> >>  }
> >>
> >>  static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
> >> @@ -5705,6 +5574,8 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
> >>  				void *mb_buf)
> >>  {
> >>  	struct hns_roce_eq_context *eqc;
> >> +	u64 ba[MTT_MIN_COUNT] = { 0 };
> >> +	int count;
> >>
> >>  	eqc = mb_buf;
> >>  	memset(eqc, 0, sizeof(struct hns_roce_eq_context));
> >> @@ -5720,10 +5591,23 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
> >>  	eq->eqe_buf_pg_sz = hr_dev->caps.eqe_buf_pg_sz;
> >>  	eq->shift = ilog2((unsigned int)eq->entries);
> >>
> >> -	if (!eq->hop_num)
> >> -		eq->eqe_ba = eq->buf_list->map;
> >> -	else
> >> -		eq->eqe_ba = eq->l0_dma;
> >> +	/* if not muti-hop, eqe buffer only use one trunk */
> >> +	if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) {
> >> +		eq->eqe_ba = eq->buf.direct.map;
> >> +		eq->cur_eqe_ba = eq->eqe_ba;
> >> +		if (eq->buf.npages > 1)
> >> +			eq->nxt_eqe_ba = eq->eqe_ba + (1 << eq->eqe_buf_pg_sz);
> >> +		else
> >> +			eq->nxt_eqe_ba = eq->eqe_ba;
> >> +	} else {
> >> +		count = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, ba,
> >> +					  MTT_MIN_COUNT, &eq->eqe_ba);
> >> +		eq->cur_eqe_ba = ba[0];
> >> +		if (count > 1)
> >> +			eq->nxt_eqe_ba = ba[1];
> >> +		else
> >> +			eq->nxt_eqe_ba = ba[0];
> >> +	}
> >>
> >>  	/* set eqc state */
> >>  	roce_set_field(eqc->byte_4, HNS_ROCE_EQC_EQ_ST_M, HNS_ROCE_EQC_EQ_ST_S,
> >> @@ -5821,220 +5705,97 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
> >>  		       HNS_ROCE_EQC_NXT_EQE_BA_H_S, eq->nxt_eqe_ba >> 44);
> >>  }
> >>
> >> -static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
> >> -				  struct hns_roce_eq *eq)
> >> +static int map_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
> >> +		      u32 page_shift)
> >>  {
> >> -	struct device *dev = hr_dev->dev;
> >> -	int eq_alloc_done = 0;
> >> -	int eq_buf_cnt = 0;
> >> -	int eqe_alloc;
> >> -	u32 buf_chk_sz;
> >> -	u32 bt_chk_sz;
> >> -	u32 mhop_num;
> >> -	u64 size;
> >> -	u64 idx;
> >> +	struct hns_roce_buf_region region = {};
> >> +	dma_addr_t *buf_list = NULL;
> >>  	int ba_num;
> >> -	int bt_num;
> >> -	int record_i;
> >> -	int record_j;
> >> -	int i = 0;
> >> -	int j = 0;
> >> -
> >> -	mhop_num = hr_dev->caps.eqe_hop_num;
> >> -	buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
> >> -	bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
> >> +	int ret;
> >>
> >>  	ba_num = DIV_ROUND_UP(PAGE_ALIGN(eq->entries * eq->eqe_size),
> >> -			      buf_chk_sz);
> >> -	bt_num = DIV_ROUND_UP(ba_num, bt_chk_sz / BA_BYTE_LEN);
> >> +			      1 << page_shift);
> >> +	hns_roce_init_buf_region(&region, hr_dev->caps.eqe_hop_num, 0, ba_num);
> >>
> >> -	if (mhop_num == HNS_ROCE_HOP_NUM_0) {
> >> -		if (eq->entries > buf_chk_sz / eq->eqe_size) {
> >> -			dev_err(dev, "eq entries %d is larger than buf_pg_sz!",
> >> -				eq->entries);
> >> -			return -EINVAL;
> >> -		}
> >> -		eq->bt_l0 = dma_alloc_coherent(dev, eq->entries * eq->eqe_size,
> >> -					       &(eq->l0_dma), GFP_KERNEL);
> >> -		if (!eq->bt_l0)
> >> -			return -ENOMEM;
> >> -
> >> -		eq->cur_eqe_ba = eq->l0_dma;
> >> -		eq->nxt_eqe_ba = 0;
> >> +	/* alloc a tmp list for storing eq buf address */
> >> +	ret = hns_roce_alloc_buf_list(&region, &buf_list, 1);
> >> +	if (ret) {
> >> +		dev_err(hr_dev->dev, "alloc eq buf_list error\n");
> >
> > The same comment like we gave for bnxt driver, no dev_* prints inside
> > driver, use ibdev_*.
> >
> > Thanks
> >
>
> Hi Leon,
>
> map_eq_buf() is called before ib_register_device(), so we can't use
> ibdev_* here.

As long as map_eq_buf() is called after ib_alloc_device(), you will be fine.

Thanks

>
> Thanks for your reminder, another patch that replace other dev_* in
> hns driver with ibdev_* is on preparing.
>
> Weihang
>
> > .
> >
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH for-next] RDMA/hns: Optimize eqe buffer allocation flow
  2020-02-10  9:25     ` Leon Romanovsky
@ 2020-02-10  9:48       ` Weihang Li
  2020-02-10 10:21         ` Leon Romanovsky
  0 siblings, 1 reply; 11+ messages in thread
From: Weihang Li @ 2020-02-10  9:48 UTC (permalink / raw)
  To: Leon Romanovsky; +Cc: dledford, jgg, linux-rdma, linuxarm



On 2020/2/10 17:25, Leon Romanovsky wrote:
>>>> -		if (!eq->bt_l0)
>>>> -			return -ENOMEM;
>>>> -
>>>> -		eq->cur_eqe_ba = eq->l0_dma;
>>>> -		eq->nxt_eqe_ba = 0;
>>>> +	/* alloc a tmp list for storing eq buf address */
>>>> +	ret = hns_roce_alloc_buf_list(&region, &buf_list, 1);
>>>> +	if (ret) {
>>>> +		dev_err(hr_dev->dev, "alloc eq buf_list error\n");
>>> The same comment like we gave for bnxt driver, no dev_* prints inside
>>> driver, use ibdev_*.
>>>
>>> Thanks
>>>
>> Hi Leon,
>>
>> map_eq_buf() is called before ib_register_device(), so we can't use
>> ibdev_* here.
> As long as map_eq_buf() is called after ib_alloc_device(), you will be fine.
> 
> Thanks

Hi Leon,

eq is used to queue hardware event, it should be ready before hardware is initialized.
So we can't call map_eq_buf() after ib_alloc_device().

Thanks
Weihang

> 
>> Thanks for your reminder, another patch that replace other dev_* in
>> hns driver with ibdev_* is on preparing.
>>
>> Weihang
>>
>>> .
>>>
> .
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH for-next] RDMA/hns: Optimize eqe buffer allocation flow
  2020-02-10  9:48       ` Weihang Li
@ 2020-02-10 10:21         ` Leon Romanovsky
  2020-02-10 11:26           ` Weihang Li
  0 siblings, 1 reply; 11+ messages in thread
From: Leon Romanovsky @ 2020-02-10 10:21 UTC (permalink / raw)
  To: Weihang Li; +Cc: dledford, jgg, linux-rdma, linuxarm

On Mon, Feb 10, 2020 at 05:48:05PM +0800, Weihang Li wrote:
>
>
> On 2020/2/10 17:25, Leon Romanovsky wrote:
> >>>> -		if (!eq->bt_l0)
> >>>> -			return -ENOMEM;
> >>>> -
> >>>> -		eq->cur_eqe_ba = eq->l0_dma;
> >>>> -		eq->nxt_eqe_ba = 0;
> >>>> +	/* alloc a tmp list for storing eq buf address */
> >>>> +	ret = hns_roce_alloc_buf_list(&region, &buf_list, 1);
> >>>> +	if (ret) {
> >>>> +		dev_err(hr_dev->dev, "alloc eq buf_list error\n");
> >>> The same comment like we gave for bnxt driver, no dev_* prints inside
> >>> driver, use ibdev_*.
> >>>
> >>> Thanks
> >>>
> >> Hi Leon,
> >>
> >> map_eq_buf() is called before ib_register_device(), so we can't use
> >> ibdev_* here.
> > As long as map_eq_buf() is called after ib_alloc_device(), you will be fine.
> >
> > Thanks
>
> Hi Leon,
>
> eq is used to queue hardware event, it should be ready before hardware is initialized.
> So we can't call map_eq_buf() after ib_alloc_device().

How can it be that your newly added function has hns_roce_dev in the
signature and you didn't call to ib_alloc_device()?

 +static int map_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
 +                u32 page_shift)

Thanks

>
> Thanks
> Weihang
>
> >
> >> Thanks for your reminder, another patch that replace other dev_* in
> >> hns driver with ibdev_* is on preparing.
> >>
> >> Weihang
> >>
> >>> .
> >>>
> > .
> >
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH for-next] RDMA/hns: Optimize eqe buffer allocation flow
  2020-02-10 10:21         ` Leon Romanovsky
@ 2020-02-10 11:26           ` Weihang Li
  2020-02-12  8:18             ` Leon Romanovsky
  0 siblings, 1 reply; 11+ messages in thread
From: Weihang Li @ 2020-02-10 11:26 UTC (permalink / raw)
  To: Leon Romanovsky; +Cc: dledford, jgg, linux-rdma, linuxarm



On 2020/2/10 18:21, Leon Romanovsky wrote:
> On Mon, Feb 10, 2020 at 05:48:05PM +0800, Weihang Li wrote:
>>
>>
>> On 2020/2/10 17:25, Leon Romanovsky wrote:
>>>>>> -		if (!eq->bt_l0)
>>>>>> -			return -ENOMEM;
>>>>>> -
>>>>>> -		eq->cur_eqe_ba = eq->l0_dma;
>>>>>> -		eq->nxt_eqe_ba = 0;
>>>>>> +	/* alloc a tmp list for storing eq buf address */
>>>>>> +	ret = hns_roce_alloc_buf_list(&region, &buf_list, 1);
>>>>>> +	if (ret) {
>>>>>> +		dev_err(hr_dev->dev, "alloc eq buf_list error\n");
>>>>> The same comment like we gave for bnxt driver, no dev_* prints inside
>>>>> driver, use ibdev_*.
>>>>>
>>>>> Thanks
>>>>>
>>>> Hi Leon,
>>>>
>>>> map_eq_buf() is called before ib_register_device(), so we can't use
>>>> ibdev_* here.
>>> As long as map_eq_buf() is called after ib_alloc_device(), you will be fine.
>>>
>>> Thanks
>>
>> Hi Leon,
>>
>> eq is used to queue hardware event, it should be ready before hardware is initialized.
>> So we can't call map_eq_buf() after ib_alloc_device().
> 
> How can it be that your newly added function has hns_roce_dev in the
> signature and you didn't call to ib_alloc_device()?
> 
>  +static int map_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
>  +                u32 page_shift)
> 
> Thanks
> 

Sorry, I confused ib_alloc_device() and ib_register_device(). What I was about to say is
ib_register_device().

Order of these functions in hns driver is:

1. ib_alloc_device()
2. map_eq_buf()
3. ib_register_device()

Refer to code in __ibdev_printk():

	else if (ibdev)
		printk("%s%s: %pV",
		       level, dev_name(&ibdev->dev), vaf);


If we called ibdev_*() before ib_register_device(), it will print "null" for the device
name. And I make a simple test, it will print like this:

[   41.400347] (null): -------------- This is a test!----------

Because map_eq_buf() should be finished before ib_register_device(), so I think we have
to use dev_*() in it.

>>
>> Thanks
>> Weihang
>>
>>>
>>>> Thanks for your reminder, another patch that replace other dev_* in
>>>> hns driver with ibdev_* is on preparing.
>>>>
>>>> Weihang
>>>>
>>>>> .
>>>>>
>>> .
>>>
>>
> 
> .
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH for-next] RDMA/hns: Optimize eqe buffer allocation flow
  2020-01-26 14:58 [PATCH for-next] RDMA/hns: Optimize eqe buffer allocation flow Weihang Li
  2020-01-27  5:52 ` Leon Romanovsky
@ 2020-02-11 18:27 ` Jason Gunthorpe
  1 sibling, 0 replies; 11+ messages in thread
From: Jason Gunthorpe @ 2020-02-11 18:27 UTC (permalink / raw)
  To: Weihang Li; +Cc: dledford, leon, linux-rdma, linuxarm

On Sun, Jan 26, 2020 at 10:58:35PM +0800, Weihang Li wrote:
> From: Xi Wang <wangxi11@huawei.com>
> 
> The eqe has a private multi-hop addressing implementation, but there is
> already a set of interfaces in the hns driver that can achieve this.
> 
> So, simplify the eqe buffer allocation process by using the mtr interface
> and remove large amount of repeated logic.
> 
> Signed-off-by: Xi Wang <wangxi11@huawei.com>
> Signed-off-by: Weihang Li <liweihang@huawei.com>
> ---
>  drivers/infiniband/hw/hns/hns_roce_device.h |  10 +-
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 481 ++++++----------------------
>  2 files changed, 108 insertions(+), 383 deletions(-)

Applied to for-next thanks

Jason

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH for-next] RDMA/hns: Optimize eqe buffer allocation flow
  2020-02-10 11:26           ` Weihang Li
@ 2020-02-12  8:18             ` Leon Romanovsky
  2020-02-13  2:16               ` Weihang Li
  0 siblings, 1 reply; 11+ messages in thread
From: Leon Romanovsky @ 2020-02-12  8:18 UTC (permalink / raw)
  To: Weihang Li; +Cc: dledford, jgg, linux-rdma, linuxarm

On Mon, Feb 10, 2020 at 07:26:59PM +0800, Weihang Li wrote:
>
>
> On 2020/2/10 18:21, Leon Romanovsky wrote:
> > On Mon, Feb 10, 2020 at 05:48:05PM +0800, Weihang Li wrote:
> >>
> >>
> >> On 2020/2/10 17:25, Leon Romanovsky wrote:
> >>>>>> -		if (!eq->bt_l0)
> >>>>>> -			return -ENOMEM;
> >>>>>> -
> >>>>>> -		eq->cur_eqe_ba = eq->l0_dma;
> >>>>>> -		eq->nxt_eqe_ba = 0;
> >>>>>> +	/* alloc a tmp list for storing eq buf address */
> >>>>>> +	ret = hns_roce_alloc_buf_list(&region, &buf_list, 1);
> >>>>>> +	if (ret) {
> >>>>>> +		dev_err(hr_dev->dev, "alloc eq buf_list error\n");
> >>>>> The same comment like we gave for bnxt driver, no dev_* prints inside
> >>>>> driver, use ibdev_*.
> >>>>>
> >>>>> Thanks
> >>>>>
> >>>> Hi Leon,
> >>>>
> >>>> map_eq_buf() is called before ib_register_device(), so we can't use
> >>>> ibdev_* here.
> >>> As long as map_eq_buf() is called after ib_alloc_device(), you will be fine.
> >>>
> >>> Thanks
> >>
> >> Hi Leon,
> >>
> >> eq is used to queue hardware event, it should be ready before hardware is initialized.
> >> So we can't call map_eq_buf() after ib_alloc_device().
> >
> > How can it be that your newly added function has hns_roce_dev in the
> > signature and you didn't call to ib_alloc_device()?
> >
> >  +static int map_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
> >  +                u32 page_shift)
> >
> > Thanks
> >
>
> Sorry, I confused ib_alloc_device() and ib_register_device(). What I was about to say is
> ib_register_device().
>
> Order of these functions in hns driver is:
>
> 1. ib_alloc_device()
> 2. map_eq_buf()
> 3. ib_register_device()
>
> Refer to code in __ibdev_printk():
>
> 	else if (ibdev)
> 		printk("%s%s: %pV",
> 		       level, dev_name(&ibdev->dev), vaf);
>
>
> If we called ibdev_*() before ib_register_device(), it will print "null" for the device
> name. And I make a simple test, it will print like this:
>
> [   41.400347] (null): -------------- This is a test!----------
>
> Because map_eq_buf() should be finished before ib_register_device(), so I think we have
> to use dev_*() in it.

Interesting, I wonder why "ibdev->dev" is set so late. I afraid that it
is a bug in hns.

Thanks

>
> >>
> >> Thanks
> >> Weihang
> >>
> >>>
> >>>> Thanks for your reminder, another patch that replace other dev_* in
> >>>> hns driver with ibdev_* is on preparing.
> >>>>
> >>>> Weihang
> >>>>
> >>>>> .
> >>>>>
> >>> .
> >>>
> >>
> >
> > .
> >
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH for-next] RDMA/hns: Optimize eqe buffer allocation flow
  2020-02-12  8:18             ` Leon Romanovsky
@ 2020-02-13  2:16               ` Weihang Li
  0 siblings, 0 replies; 11+ messages in thread
From: Weihang Li @ 2020-02-13  2:16 UTC (permalink / raw)
  To: Leon Romanovsky; +Cc: dledford, jgg, linux-rdma, linuxarm



On 2020/2/12 16:18, Leon Romanovsky wrote:
> On Mon, Feb 10, 2020 at 07:26:59PM +0800, Weihang Li wrote:
>>
>>
>> On 2020/2/10 18:21, Leon Romanovsky wrote:
>>> On Mon, Feb 10, 2020 at 05:48:05PM +0800, Weihang Li wrote:
>>>>
>>>>
>>>> On 2020/2/10 17:25, Leon Romanovsky wrote:
>>>>>>>> -		if (!eq->bt_l0)
>>>>>>>> -			return -ENOMEM;
>>>>>>>> -
>>>>>>>> -		eq->cur_eqe_ba = eq->l0_dma;
>>>>>>>> -		eq->nxt_eqe_ba = 0;
>>>>>>>> +	/* alloc a tmp list for storing eq buf address */
>>>>>>>> +	ret = hns_roce_alloc_buf_list(&region, &buf_list, 1);
>>>>>>>> +	if (ret) {
>>>>>>>> +		dev_err(hr_dev->dev, "alloc eq buf_list error\n");
>>>>>>> The same comment like we gave for bnxt driver, no dev_* prints inside
>>>>>>> driver, use ibdev_*.
>>>>>>>
>>>>>>> Thanks
>>>>>>>
>>>>>> Hi Leon,
>>>>>>
>>>>>> map_eq_buf() is called before ib_register_device(), so we can't use
>>>>>> ibdev_* here.
>>>>> As long as map_eq_buf() is called after ib_alloc_device(), you will be fine.
>>>>>
>>>>> Thanks
>>>>
>>>> Hi Leon,
>>>>
>>>> eq is used to queue hardware event, it should be ready before hardware is initialized.
>>>> So we can't call map_eq_buf() after ib_alloc_device().
>>>
>>> How can it be that your newly added function has hns_roce_dev in the
>>> signature and you didn't call to ib_alloc_device()?
>>>
>>>  +static int map_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
>>>  +                u32 page_shift)
>>>
>>> Thanks
>>>
>>
>> Sorry, I confused ib_alloc_device() and ib_register_device(). What I was about to say is
>> ib_register_device().
>>
>> Order of these functions in hns driver is:
>>
>> 1. ib_alloc_device()
>> 2. map_eq_buf()
>> 3. ib_register_device()
>>
>> Refer to code in __ibdev_printk():
>>
>> 	else if (ibdev)
>> 		printk("%s%s: %pV",
>> 		       level, dev_name(&ibdev->dev), vaf);
>>
>>
>> If we called ibdev_*() before ib_register_device(), it will print "null" for the device
>> name. And I make a simple test, it will print like this:
>>
>> [   41.400347] (null): -------------- This is a test!----------
>>
>> Because map_eq_buf() should be finished before ib_register_device(), so I think we have
>> to use dev_*() in it.
> 
> Interesting, I wonder why "ibdev->dev" is set so late. I afraid that it
> is a bug in hns.
> 
> Thanks
> 

I will check the code in hns and ib core to find out the reasons. Thank you.

Weihang

>>
>>>>
>>>> Thanks
>>>> Weihang
>>>>
>>>>>
>>>>>> Thanks for your reminder, another patch that replace other dev_* in
>>>>>> hns driver with ibdev_* is on preparing.
>>>>>>
>>>>>> Weihang
>>>>>>
>>>>>>> .
>>>>>>>
>>>>> .
>>>>>
>>>>
>>>
>>> .
>>>
>>
> 
> .
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, back to index

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-26 14:58 [PATCH for-next] RDMA/hns: Optimize eqe buffer allocation flow Weihang Li
2020-01-27  5:52 ` Leon Romanovsky
2020-01-27  7:47   ` Weihang Li
2020-02-05  6:04   ` Weihang Li
2020-02-10  9:25     ` Leon Romanovsky
2020-02-10  9:48       ` Weihang Li
2020-02-10 10:21         ` Leon Romanovsky
2020-02-10 11:26           ` Weihang Li
2020-02-12  8:18             ` Leon Romanovsky
2020-02-13  2:16               ` Weihang Li
2020-02-11 18:27 ` Jason Gunthorpe

Linux-RDMA Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-rdma/0 linux-rdma/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-rdma linux-rdma/ https://lore.kernel.org/linux-rdma \
		linux-rdma@vger.kernel.org
	public-inbox-index linux-rdma

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-rdma


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git