linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/4] habanalabs: print va_range in vm node debugfs
@ 2021-10-25 12:36 Oded Gabbay
  2021-10-25 12:36 ` [PATCH 2/4] habanalabs: revise and document use of boot status flags Oded Gabbay
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Oded Gabbay @ 2021-10-25 12:36 UTC (permalink / raw)
  To: linux-kernel; +Cc: Yuri Nudelman

From: Yuri Nudelman <ynudelman@habana.ai>

VA range info could assist in debugging VA allocation bugs.

Signed-off-by: Yuri Nudelman <ynudelman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/debugfs.c | 25 ++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index 1f2a3dc6c4e2..a239c5679f95 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -235,6 +235,8 @@ static int vm_show(struct seq_file *s, void *data)
 	struct hl_vm_hash_node *hnode;
 	struct hl_userptr *userptr;
 	struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
+	struct hl_va_range *va_range;
+	struct hl_vm_va_block *va_block;
 	enum vm_type *vm_type;
 	bool once = true;
 	u64 j;
@@ -314,6 +316,29 @@ static int vm_show(struct seq_file *s, void *data)
 
 	spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
 
+	mutex_lock(&dev_entry->hdev->fpriv_list_lock);
+	ctx = dev_entry->hdev->compute_ctx;
+	if (ctx)
+		hl_ctx_get(dev_entry->hdev, ctx);
+	mutex_unlock(&dev_entry->hdev->fpriv_list_lock);
+	if (ctx) {
+		seq_puts(s, "\nVA ranges:\n\n");
+		for (i = HL_VA_RANGE_TYPE_HOST ; i < HL_VA_RANGE_TYPE_MAX ; ++i) {
+			va_range = ctx->va_range[i];
+			seq_printf(s, "   va_range %d\n", i);
+			seq_puts(s, "---------------------\n");
+			mutex_lock(&va_range->lock);
+			list_for_each_entry(va_block, &va_range->list, node) {
+				seq_printf(s, "%#16llx - %#16llx (%#llx)\n",
+					   va_block->start, va_block->end,
+					   va_block->size);
+			}
+			mutex_unlock(&va_range->lock);
+			seq_puts(s, "\n");
+		}
+		hl_ctx_put(ctx);
+	}
+
 	if (!once)
 		seq_puts(s, "\n");
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/4] habanalabs: revise and document use of boot status flags
  2021-10-25 12:36 [PATCH 1/4] habanalabs: print va_range in vm node debugfs Oded Gabbay
@ 2021-10-25 12:36 ` Oded Gabbay
  2021-10-25 12:36 ` [PATCH 3/4] habanalabs/gaudi: fix debugfs dma channel selection Oded Gabbay
  2021-10-25 12:36 ` [PATCH 4/4] habanalabs: wrong VA size calculation Oded Gabbay
  2 siblings, 0 replies; 4+ messages in thread
From: Oded Gabbay @ 2021-10-25 12:36 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ohad Sharabi

From: Ohad Sharabi <osharabi@habana.ai>

The boot status flag "SRAM available" can be set by f/w Linux (in the
general case) or by f/w uboot (in some specific debug scenario) but
never by f/w preboot.

Hence, when polling the boot status flags in the preboot stage we do not
want to poll on "SRAM Avialable".

The special case in which uboot set this flag is when we are running
special debug scenario without Linux. In this case, at some point during
the boot, the uboot relocates its code to the DRAM and then set the
specified flag.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 24 ++++++++++++++++----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 025707a21882..482bed152c39 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -1101,7 +1101,6 @@ static int hl_fw_read_preboot_caps(struct hl_device *hdev,
 		(status == CPU_BOOT_STATUS_DRAM_RDY) ||
 		(status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
 		(status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
-		(status == CPU_BOOT_STATUS_SRAM_AVAIL) ||
 		(status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),
 		FW_CPU_STATUS_POLL_INTERVAL_USEC,
 		timeout);
@@ -2055,12 +2054,20 @@ static int hl_fw_dynamic_wait_for_boot_fit_active(struct hl_device *hdev,
 
 	dyn_loader = &fw_loader->dynamic_loader;
 
-	/* Make sure CPU boot-loader is running */
+	/*
+	 * Make sure CPU boot-loader is running
+	 * Note that the CPU_BOOT_STATUS_SRAM_AVAIL is generally set by Linux
+	 * yet there is a debug scenario in which we loading uboot (without Linux)
+	 * which at later stage is relocated to DRAM. In this case we expect
+	 * uboot to set the CPU_BOOT_STATUS_SRAM_AVAIL and so we add it to the
+	 * poll flags
+	 */
 	rc = hl_poll_timeout(
 		hdev,
 		le32_to_cpu(dyn_loader->comm_desc.cpu_dyn_regs.cpu_boot_status),
 		status,
-		(status == CPU_BOOT_STATUS_READY_TO_BOOT),
+		(status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
+		(status == CPU_BOOT_STATUS_SRAM_AVAIL),
 		FW_CPU_STATUS_POLL_INTERVAL_USEC,
 		dyn_loader->wait_for_bl_timeout);
 	if (rc) {
@@ -2081,7 +2088,7 @@ static int hl_fw_dynamic_wait_for_linux_active(struct hl_device *hdev,
 
 	dyn_loader = &fw_loader->dynamic_loader;
 
-	/* Make sure CPU boot-loader is running */
+	/* Make sure CPU linux is running */
 
 	rc = hl_poll_timeout(
 		hdev,
@@ -2415,7 +2422,14 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
 		WREG32(msg_to_cpu_reg, KMD_MSG_NA);
 	}
 
-	/* Make sure CPU boot-loader is running */
+	/*
+	 * Make sure CPU boot-loader is running
+	 * Note that the CPU_BOOT_STATUS_SRAM_AVAIL is generally set by Linux
+	 * yet there is a debug scenario in which we loading uboot (without Linux)
+	 * which at later stage is relocated to DRAM. In this case we expect
+	 * uboot to set the CPU_BOOT_STATUS_SRAM_AVAIL and so we add it to the
+	 * poll flags
+	 */
 	rc = hl_poll_timeout(
 		hdev,
 		cpu_boot_status_reg,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/4] habanalabs/gaudi: fix debugfs dma channel selection
  2021-10-25 12:36 [PATCH 1/4] habanalabs: print va_range in vm node debugfs Oded Gabbay
  2021-10-25 12:36 ` [PATCH 2/4] habanalabs: revise and document use of boot status flags Oded Gabbay
@ 2021-10-25 12:36 ` Oded Gabbay
  2021-10-25 12:36 ` [PATCH 4/4] habanalabs: wrong VA size calculation Oded Gabbay
  2 siblings, 0 replies; 4+ messages in thread
From: Oded Gabbay @ 2021-10-25 12:36 UTC (permalink / raw)
  To: linux-kernel; +Cc: Guy Zadicario

From: Guy Zadicario <gzadicario@habana.ai>

Do not use a dma channel for debugfs requested transfer if it's
QM is not idle.

Signed-off-by: Guy Zadicario <gzadicario@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index d2b7ecb45497..92d55a0a10c1 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -6430,6 +6430,7 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
 {
 	u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
 	struct gaudi_device *gaudi = hdev->asic_specific;
+	u32 qm_glbl_sts0, qm_cgm_sts;
 	u64 dma_offset, qm_offset;
 	dma_addr_t dma_addr;
 	void *kernel_addr;
@@ -6454,14 +6455,20 @@ static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
 	dma_offset = dma_id * DMA_CORE_OFFSET;
 	qm_offset = dma_id * DMA_QMAN_OFFSET;
 	dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
-	is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
+	qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
+	qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
+	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
+		      IS_DMA_IDLE(dma_core_sts0);
 
 	if (!is_eng_idle) {
 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
 		dma_offset = dma_id * DMA_CORE_OFFSET;
 		qm_offset = dma_id * DMA_QMAN_OFFSET;
 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
-		is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
+		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
+		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
+		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
+			      IS_DMA_IDLE(dma_core_sts0);
 
 		if (!is_eng_idle) {
 			dev_err_ratelimited(hdev->dev,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 4/4] habanalabs: wrong VA size calculation
  2021-10-25 12:36 [PATCH 1/4] habanalabs: print va_range in vm node debugfs Oded Gabbay
  2021-10-25 12:36 ` [PATCH 2/4] habanalabs: revise and document use of boot status flags Oded Gabbay
  2021-10-25 12:36 ` [PATCH 3/4] habanalabs/gaudi: fix debugfs dma channel selection Oded Gabbay
@ 2021-10-25 12:36 ` Oded Gabbay
  2 siblings, 0 replies; 4+ messages in thread
From: Oded Gabbay @ 2021-10-25 12:36 UTC (permalink / raw)
  To: linux-kernel; +Cc: Yuri Nudelman

From: Yuri Nudelman <ynudelman@habana.ai>

VA blocks are currently stored in an inconsistent way. Sometimes block
end is inclusive, sometimes exclusive. This leads to wrong size
calculations in certain cases, plus could lead to a segmentation fault
in case mapping process fails in the middle and we try to roll it back.
Need to make this consistent - start inclusive till end inclusive.

For example, the regions table may now look like this:
    0x0000 - 0x1fff : allocated
    0x2000 - 0x2fff : free
    0x3000 - 0x3fff : allocated

Signed-off-by: Yuri Nudelman <ynudelman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../misc/habanalabs/common/command_buffer.c   |  2 +-
 drivers/misc/habanalabs/common/habanalabs.h   | 16 ++------------
 drivers/misc/habanalabs/common/memory.c       | 22 ++++++++++++-------
 3 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c
index 8132a84698d5..41a12bcd26e5 100644
--- a/drivers/misc/habanalabs/common/command_buffer.c
+++ b/drivers/misc/habanalabs/common/command_buffer.c
@@ -57,7 +57,7 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
 		}
 
 		va_block->start = virt_addr;
-		va_block->end = virt_addr + page_size;
+		va_block->end = virt_addr + page_size - 1;
 		va_block->size = page_size;
 		list_add_tail(&va_block->node, &cb->va_block_list);
 	}
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index a2002cbf794b..4f3c228c9b9d 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2757,21 +2757,9 @@ static inline bool hl_mem_area_inside_range(u64 address, u64 size,
 static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
 				u64 range_start_address, u64 range_end_address)
 {
-	u64 end_address = address + size;
+	u64 end_address = address + size - 1;
 
-	if ((address >= range_start_address) &&
-			(address < range_end_address))
-		return true;
-
-	if ((end_address >= range_start_address) &&
-			(end_address < range_end_address))
-		return true;
-
-	if ((address < range_start_address) &&
-			(end_address >= range_end_address))
-		return true;
-
-	return false;
+	return ((address <= range_end_address) && (range_start_address <= end_address));
 }
 
 int hl_device_open(struct inode *inode, struct file *filp);
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index 504973330e2e..97434149f5f8 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -475,7 +475,7 @@ static int add_va_block_locked(struct hl_device *hdev,
 		struct list_head *va_list, u64 start, u64 end)
 {
 	struct hl_vm_va_block *va_block, *res = NULL;
-	u64 size = end - start;
+	u64 size = end - start + 1;
 
 	print_va_list_locked(hdev, va_list);
 
@@ -642,7 +642,7 @@ static u64 get_va_block(struct hl_device *hdev,
 				continue;
 		}
 
-		valid_size = va_block->end - valid_start;
+		valid_size = va_block->end - valid_start + 1;
 		if (valid_size < size)
 			continue;
 
@@ -705,7 +705,7 @@ static u64 get_va_block(struct hl_device *hdev,
 
 	if (new_va_block->size > size) {
 		new_va_block->start += size;
-		new_va_block->size = new_va_block->end - new_va_block->start;
+		new_va_block->size = new_va_block->end - new_va_block->start + 1;
 	} else {
 		list_del(&new_va_block->node);
 		kfree(new_va_block);
@@ -2386,8 +2386,14 @@ static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
 			start += PAGE_SIZE;
 		}
 
-		if (end & (PAGE_SIZE - 1))
-			end &= PAGE_MASK;
+		/*
+		 * The end of the range is inclusive, hence we need to align it
+		 * to the end of the last full page in the range. For example if
+		 * end = 0x3ff5 with page size 0x1000, we need to align it to
+		 * 0x2fff. The remainig 0xff5 bytes do not form a full page.
+		 */
+		if ((end + 1) & (PAGE_SIZE - 1))
+			end = ((end + 1) & PAGE_MASK) - 1;
 	}
 
 	if (start >= end) {
@@ -2562,14 +2568,14 @@ int hl_vm_ctx_init(struct hl_ctx *ctx)
 		return 0;
 
 	dram_range_start = prop->dmmu.start_addr;
-	dram_range_end = prop->dmmu.end_addr;
+	dram_range_end = prop->dmmu.end_addr - 1;
 	dram_page_size = prop->dram_page_size ?
 				prop->dram_page_size : prop->dmmu.page_size;
 	host_range_start = prop->pmmu.start_addr;
-	host_range_end = prop->pmmu.end_addr;
+	host_range_end = prop->pmmu.end_addr - 1;
 	host_page_size = prop->pmmu.page_size;
 	host_huge_range_start = prop->pmmu_huge.start_addr;
-	host_huge_range_end = prop->pmmu_huge.end_addr;
+	host_huge_range_end = prop->pmmu_huge.end_addr - 1;
 	host_huge_page_size = prop->pmmu_huge.page_size;
 
 	return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-10-25 12:36 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-25 12:36 [PATCH 1/4] habanalabs: print va_range in vm node debugfs Oded Gabbay
2021-10-25 12:36 ` [PATCH 2/4] habanalabs: revise and document use of boot status flags Oded Gabbay
2021-10-25 12:36 ` [PATCH 3/4] habanalabs/gaudi: fix debugfs dma channel selection Oded Gabbay
2021-10-25 12:36 ` [PATCH 4/4] habanalabs: wrong VA size calculation Oded Gabbay

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).