All of lore.kernel.org
 help / color / mirror / Atom feed
From: Oded Gabbay <ogabbay@kernel.org>
To: linux-kernel@vger.kernel.org
Cc: Alon Mizrahi <amizrahi@habana.ai>
Subject: [PATCH 7/7] habanalabs: use mmu cache range invalidation
Date: Sat, 17 Apr 2021 12:06:38 +0300	[thread overview]
Message-ID: <20210417090638.9802-7-ogabbay@kernel.org> (raw)
In-Reply-To: <20210417090638.9802-1-ogabbay@kernel.org>

From: Alon Mizrahi <amizrahi@habana.ai>

Use mmu cache range invalidation instead of entire cache invalidation
because it yields better performance.

In GOYA and GAUDI, always use entire cache invalidation because these
ASICs don't support range invalidation.

Signed-off-by: Alon Mizrahi <amizrahi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs.h |  2 +-
 drivers/misc/habanalabs/common/memory.c     |  8 ++--
 drivers/misc/habanalabs/gaudi/gaudi.c       | 49 ++------------------
 drivers/misc/habanalabs/goya/goya.c         | 51 ++-------------------
 4 files changed, 16 insertions(+), 94 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 9ee7a583b614..07c2713ba372 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1035,7 +1035,7 @@ struct hl_asic_funcs {
 	int (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard,
 					u32 flags);
 	int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
-			u32 asid, u64 va, u64 size);
+				u32 flags, u32 asid, u64 va, u64 size);
 	int (*send_heartbeat)(struct hl_device *hdev);
 	void (*set_clock_gating)(struct hl_device *hdev);
 	void (*disable_clock_gating)(struct hl_device *hdev);
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index 2938cbbafbbc..b92878d76f23 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -1117,7 +1117,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 		goto map_err;
 	}
 
-	rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, false, *vm_type);
+	rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, false,
+		*vm_type, ctx->asid, ret_vaddr, phys_pg_pack->total_size);
 
 	mutex_unlock(&ctx->mmu_lock);
 
@@ -1261,8 +1262,9 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 	 * at the loop end rather than for each iteration
 	 */
 	if (!ctx_free)
-		rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
-								*vm_type);
+		rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, true,
+				*vm_type, ctx->asid, vaddr,
+				phys_pg_pack->total_size);
 
 	mutex_unlock(&ctx->mmu_lock);
 
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 45c40549ded4..1d5930578e07 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7863,52 +7863,13 @@ static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
 }
 
 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
-				bool is_hard, u32 asid, u64 va, u64 size)
+						bool is_hard, u32 flags,
+						u32 asid, u64 va, u64 size)
 {
-	struct gaudi_device *gaudi = hdev->asic_specific;
-	u32 status, timeout_usec;
-	u32 inv_data;
-	u32 pi;
-	int rc;
-
-	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
-		hdev->hard_reset_pending)
-		return 0;
-
-	if (hdev->pldm)
-		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
-	else
-		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
-
-	/*
-	 * TODO: currently invalidate entire L0 & L1 as in regular hard
-	 * invalidation. Need to apply invalidation of specific cache
-	 * lines with mask of ASID & VA & size.
-	 * Note that L1 with be flushed entirely in any case.
+	/* Treat as invalidate all because there is no range invalidation
+	 * in Gaudi
 	 */
-
-	/* L0 & L1 invalidation */
-	inv_data = RREG32(mmSTLB_CACHE_INV);
-	/* PI is 8 bit */
-	pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
-	WREG32(mmSTLB_CACHE_INV,
-		(inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
-
-	rc = hl_poll_timeout(
-		hdev,
-		mmSTLB_INV_CONSUMER_INDEX,
-		status,
-		status == pi,
-		1000,
-		timeout_usec);
-
-	if (rc) {
-		dev_err_ratelimited(hdev->dev,
-					"MMU cache invalidation timeout\n");
-		hl_device_reset(hdev, HL_RESET_HARD);
-	}
-
-	return rc;
+	return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
 }
 
 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index d554304b6868..678fbbc6521b 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -5178,54 +5178,13 @@ static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
 }
 
 static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
-				bool is_hard, u32 asid, u64 va, u64 size)
+						bool is_hard, u32 flags,
+						u32 asid, u64 va, u64 size)
 {
-	struct goya_device *goya = hdev->asic_specific;
-	u32 status, timeout_usec, inv_data, pi;
-	int rc;
-
-	if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
-		hdev->hard_reset_pending)
-		return 0;
-
-	/* no need in L1 only invalidation in Goya */
-	if (!is_hard)
-		return 0;
-
-	if (hdev->pldm)
-		timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
-	else
-		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
-
-	/*
-	 * TODO: currently invalidate entire L0 & L1 as in regular hard
-	 * invalidation. Need to apply invalidation of specific cache lines with
-	 * mask of ASID & VA & size.
-	 * Note that L1 with be flushed entirely in any case.
+	/* Treat as invalidate all because there is no range invalidation
+	 * in Goya
 	 */
-
-	/* L0 & L1 invalidation */
-	inv_data = RREG32(mmSTLB_CACHE_INV);
-	/* PI is 8 bit */
-	pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
-	WREG32(mmSTLB_CACHE_INV,
-			(inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
-
-	rc = hl_poll_timeout(
-		hdev,
-		mmSTLB_INV_CONSUMER_INDEX,
-		status,
-		status == pi,
-		1000,
-		timeout_usec);
-
-	if (rc) {
-		dev_err_ratelimited(hdev->dev,
-					"MMU cache invalidation timeout\n");
-		hl_device_reset(hdev, HL_RESET_HARD);
-	}
-
-	return rc;
+	return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
 }
 
 int goya_send_heartbeat(struct hl_device *hdev)
-- 
2.25.1


      parent reply	other threads:[~2021-04-17  9:06 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-17  9:06 [PATCH 1/7] habanalabs: expose ASIC specific PLL index Oded Gabbay
2021-04-17  9:06 ` [PATCH 2/7] habanalabs: increase ELBI reset timeout for PLDM Oded Gabbay
2021-04-17  9:06 ` [PATCH 3/7] habanalabs: update firmware files to latest Oded Gabbay
2021-04-17  9:06 ` [PATCH 4/7] habanalabs: prepare preboot stage to dynamic f/w load Oded Gabbay
2021-04-17  9:06 ` [PATCH 5/7] habanalabs: request f/w in separate function Oded Gabbay
2021-04-17  9:06 ` [PATCH 6/7] habanalabs: refactor init device cpu code Oded Gabbay
2021-04-17  9:06 ` Oded Gabbay [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210417090638.9802-7-ogabbay@kernel.org \
    --to=ogabbay@kernel.org \
    --cc=amizrahi@habana.ai \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.