linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] habanalabs: flush only at the end of the map/unmap
@ 2019-12-14 20:57 Oded Gabbay
  0 siblings, 0 replies; only message in thread
From: Oded Gabbay @ 2019-12-14 20:57 UTC (permalink / raw)
  To: linux-kernel, oshpigelman, ttayar; +Cc: gregkh

From: Pawel Piskorski <ppiskorski@habana.ai>

Optimize hl_mmu_map and hl_mmu_unmap by not calling flush(ctx)
within per-page loop.

Signed-off-by: Pawel Piskorski <ppiskorski@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/goya/goya.c  | 20 +++++++------
 drivers/misc/habanalabs/habanalabs.h |  6 ++--
 drivers/misc/habanalabs/memory.c     |  9 ++++--
 drivers/misc/habanalabs/mmu.c        | 42 ++++++++++++++++++----------
 4 files changed, 50 insertions(+), 27 deletions(-)

diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index c8d16aa4382c..08d4bef86e23 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -4737,7 +4737,8 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
 
 	for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
 		rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
-				prop->dram_base_address + off, PAGE_SIZE_2MB);
+				prop->dram_base_address + off, PAGE_SIZE_2MB,
+				(off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
 		if (rc) {
 			dev_err(hdev->dev, "Map failed for address 0x%llx\n",
 				prop->dram_base_address + off);
@@ -4747,7 +4748,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
 
 	if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
 		rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
-			hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB);
+			hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB, true);
 
 		if (rc) {
 			dev_err(hdev->dev,
@@ -4760,7 +4761,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
 			rc = hl_mmu_map(hdev->kernel_ctx,
 				VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
 				hdev->cpu_accessible_dma_address + cpu_off,
-				PAGE_SIZE_4KB);
+				PAGE_SIZE_4KB, true);
 			if (rc) {
 				dev_err(hdev->dev,
 					"Map failed for CPU accessible memory\n");
@@ -4786,14 +4787,15 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
 	for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
 		if (hl_mmu_unmap(hdev->kernel_ctx,
 				VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
-				PAGE_SIZE_4KB))
+				PAGE_SIZE_4KB, true))
 			dev_warn_ratelimited(hdev->dev,
 				"failed to unmap address 0x%llx\n",
 				VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
 unmap:
 	for (; off >= 0 ; off -= PAGE_SIZE_2MB)
 		if (hl_mmu_unmap(hdev->kernel_ctx,
-				prop->dram_base_address + off, PAGE_SIZE_2MB))
+				prop->dram_base_address + off, PAGE_SIZE_2MB,
+				true))
 			dev_warn_ratelimited(hdev->dev,
 				"failed to unmap address 0x%llx\n",
 				prop->dram_base_address + off);
@@ -4818,14 +4820,15 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
 
 	if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
 		if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
-				PAGE_SIZE_2MB))
+				PAGE_SIZE_2MB, true))
 			dev_warn(hdev->dev,
 				"Failed to unmap CPU accessible memory\n");
 	} else {
 		for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
 			if (hl_mmu_unmap(hdev->kernel_ctx,
 					VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
-					PAGE_SIZE_4KB))
+					PAGE_SIZE_4KB,
+					(cpu_off + PAGE_SIZE_4KB) >= SZ_2M))
 				dev_warn_ratelimited(hdev->dev,
 					"failed to unmap address 0x%llx\n",
 					VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
@@ -4833,7 +4836,8 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
 
 	for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
 		if (hl_mmu_unmap(hdev->kernel_ctx,
-				prop->dram_base_address + off, PAGE_SIZE_2MB))
+				prop->dram_base_address + off, PAGE_SIZE_2MB,
+				(off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE))
 			dev_warn_ratelimited(hdev->dev,
 					"Failed to unmap address 0x%llx\n",
 					prop->dram_base_address + off);
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 00c949f4ccd1..df34227dea31 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -1573,8 +1573,10 @@ int hl_mmu_init(struct hl_device *hdev);
 void hl_mmu_fini(struct hl_device *hdev);
 int hl_mmu_ctx_init(struct hl_ctx *ctx);
 void hl_mmu_ctx_fini(struct hl_ctx *ctx);
-int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size);
-int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size);
+int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
+		u32 page_size, bool flush_pte);
+int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
+		bool flush_pte);
 void hl_mmu_swap_out(struct hl_ctx *ctx);
 void hl_mmu_swap_in(struct hl_ctx *ctx);
 
diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c
index 6c72cb4eff54..b612b1ad0aac 100644
--- a/drivers/misc/habanalabs/memory.c
+++ b/drivers/misc/habanalabs/memory.c
@@ -747,7 +747,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
 	for (i = 0 ; i < phys_pg_pack->npages ; i++) {
 		paddr = phys_pg_pack->pages[i];
 
-		rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size);
+		rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size,
+				(i + 1) == phys_pg_pack->npages);
 		if (rc) {
 			dev_err(hdev->dev,
 				"map failed for handle %u, npages: %llu, mapped: %llu",
@@ -765,7 +766,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
 err:
 	next_vaddr = vaddr;
 	for (i = 0 ; i < mapped_pg_cnt ; i++) {
-		if (hl_mmu_unmap(ctx, next_vaddr, page_size))
+		if (hl_mmu_unmap(ctx, next_vaddr, page_size,
+					(i + 1) == mapped_pg_cnt))
 			dev_warn_ratelimited(hdev->dev,
 				"failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
 					phys_pg_pack->handle, next_vaddr,
@@ -794,7 +796,8 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
 	next_vaddr = vaddr;
 
 	for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
-		if (hl_mmu_unmap(ctx, next_vaddr, page_size))
+		if (hl_mmu_unmap(ctx, next_vaddr, page_size,
+				       (i + 1) == phys_pg_pack->npages))
 			dev_warn_ratelimited(hdev->dev,
 			"unmap failed for vaddr: 0x%llx\n", next_vaddr);
 
diff --git a/drivers/misc/habanalabs/mmu.c b/drivers/misc/habanalabs/mmu.c
index 6262b26e2086..006eee47909d 100644
--- a/drivers/misc/habanalabs/mmu.c
+++ b/drivers/misc/habanalabs/mmu.c
@@ -637,29 +637,27 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
 			clear_hop3 = true;
 
 		if (!clear_hop3)
-			goto flush;
+			goto mapped;
 
 		clear_pte(ctx, hop3_pte_addr);
 
 		if (put_pte(ctx, hop3_addr))
-			goto flush;
+			goto mapped;
 
 		clear_pte(ctx, hop2_pte_addr);
 
 		if (put_pte(ctx, hop2_addr))
-			goto flush;
+			goto mapped;
 
 		clear_pte(ctx, hop1_pte_addr);
 
 		if (put_pte(ctx, hop1_addr))
-			goto flush;
+			goto mapped;
 
 		clear_pte(ctx, hop0_pte_addr);
 	}
 
-flush:
-	flush(ctx);
-
+mapped:
 	return 0;
 
 not_mapped:
@@ -675,6 +673,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
  * @ctx: pointer to the context structure
  * @virt_addr: virt addr to map from
  * @page_size: size of the page to unmap
+ * @flush_pte: whether to do a PCI flush
  *
  * This function does the following:
  * - Check that the virt addr is mapped
@@ -685,15 +684,19 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
  * changes the MMU hash, it must be protected by a lock.
  * However, because it maps only a single page, the lock should be implemented
  * in a higher level in order to protect the entire mapping of the memory area
+ *
+ * For optimization reasons PCI flush may be requested once after unmapping of
+ * large area.
  */
-int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
+int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
+		bool flush_pte)
 {
 	struct hl_device *hdev = ctx->hdev;
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct hl_mmu_properties *mmu_prop;
 	u64 real_virt_addr;
 	u32 real_page_size, npages;
-	int i, rc;
+	int i, rc = 0;
 	bool is_dram_addr;
 
 	if (!hdev->mmu_enable)
@@ -729,12 +732,15 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
 	for (i = 0 ; i < npages ; i++) {
 		rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr);
 		if (rc)
-			return rc;
+			break;
 
 		real_virt_addr += real_page_size;
 	}
 
-	return 0;
+	if (flush_pte)
+		flush(ctx);
+
+	return rc;
 }
 
 static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
@@ -885,8 +891,6 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
 		get_pte(ctx, hop3_addr);
 	}
 
-	flush(ctx);
-
 	return 0;
 
 err:
@@ -909,6 +913,7 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
  * @virt_addr: virt addr to map from
  * @phys_addr: phys addr to map to
  * @page_size: physical page size
+ * @flush_pte: whether to do a PCI flush
  *
  * This function does the following:
  * - Check that the virt addr is not mapped
@@ -919,8 +924,12 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
  * changes the MMU hash, it must be protected by a lock.
  * However, because it maps only a single page, the lock should be implemented
  * in a higher level in order to protect the entire mapping of the memory area
+ *
+ * For optimization reasons PCI flush may be requested once after mapping of
+ * large area.
  */
-int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
+int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
+		bool flush_pte)
 {
 	struct hl_device *hdev = ctx->hdev;
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -976,6 +985,9 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
 		mapped_cnt++;
 	}
 
+	if (flush_pte)
+		flush(ctx);
+
 	return 0;
 
 err:
@@ -988,6 +1000,8 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
 		real_virt_addr += real_page_size;
 	}
 
+	flush(ctx);
+
 	return rc;
 }
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2019-12-14 20:58 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-14 20:57 [PATCH] habanalabs: flush only at the end of the map/unmap Oded Gabbay

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).