All of lore.kernel.org
 help / color / mirror / Atom feed
From: Oded Gabbay <ogabbay@kernel.org>
To: linux-kernel@vger.kernel.org
Subject: [PATCH 3/3] habanalabs/gaudi: refactor reset code
Date: Mon, 24 May 2021 11:46:19 +0300	[thread overview]
Message-ID: <20210524084619.4680-3-ogabbay@kernel.org> (raw)
In-Reply-To: <20210524084619.4680-1-ogabbay@kernel.org>

After all the latest changes to the reset code, there were some
redundancy and errors in the flows.

If the Linux FIT is loaded to the ASIC CPU, we need to communicate
with it only via GIC. If it is not loaded, we need to either use
COMMS protocol (for newer f/w) or MSG_TO_CPU register (for older f/w).

In addition, if we halted the device CPU then we need to mark that
the driver will do the reset, regardless of the capabilities.

Also, to prevent false errors, we need to keep track whether the
device CPU was already halted. If so, we shouldn't try to halt it
again.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi/gaudi.c  | 47 ++++++++++++++++----------
 drivers/misc/habanalabs/gaudi/gaudiP.h |  5 +++
 2 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 836465dccc61..5ca4c8f86801 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -1931,11 +1931,11 @@ static void gaudi_disable_msi(struct hl_device *hdev)
 	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
 }
 
-static void gaudi_fw_hard_reset(struct hl_device *hdev)
+static void gaudi_ask_hard_reset_without_linux(struct hl_device *hdev)
 {
 	int rc;
 
-	if (hdev->asic_prop.dynamic_fw_load && !hdev->fw_loader.linux_loaded) {
+	if (hdev->asic_prop.dynamic_fw_load) {
 		rc = hl_fw_dynamic_send_protocol_cmd(hdev, &hdev->fw_loader,
 				COMMS_RST_DEV, 0, false,
 				hdev->fw_loader.cpu_timeout);
@@ -1946,12 +1946,16 @@ static void gaudi_fw_hard_reset(struct hl_device *hdev)
 	}
 }
 
-static void gaudi_fw_halt_cpu(struct hl_device *hdev)
+static void gaudi_ask_halt_machine_without_linux(struct hl_device *hdev)
 {
+	struct gaudi_device *gaudi = hdev->asic_specific;
 	int rc;
 
+	if (gaudi && gaudi->device_cpu_is_halted)
+		return;
+
 	/* Stop device CPU to make sure nothing bad happens */
-	if (hdev->asic_prop.dynamic_fw_load && !hdev->fw_loader.linux_loaded) {
+	if (hdev->asic_prop.dynamic_fw_load) {
 		rc = hl_fw_dynamic_send_protocol_cmd(hdev, &hdev->fw_loader,
 				COMMS_GOTO_WFE, 0, true,
 				hdev->fw_loader.cpu_timeout);
@@ -1961,6 +1965,9 @@ static void gaudi_fw_halt_cpu(struct hl_device *hdev)
 		WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
 		msleep(GAUDI_CPU_RESET_WAIT_MSEC);
 	}
+
+	if (gaudi)
+		gaudi->device_cpu_is_halted = true;
 }
 
 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
@@ -4110,8 +4117,9 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
 {
 	struct cpu_dyn_regs *dyn_regs =
 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
-	struct gaudi_device *gaudi = hdev->asic_specific;
 	u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
+	struct gaudi_device *gaudi = hdev->asic_specific;
+	bool driver_performs_reset;
 
 	if (!hard_reset) {
 		dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
@@ -4126,32 +4134,34 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
 		cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
 	}
 
+	driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
+					!hdev->asic_prop.hard_reset_done_by_fw);
+
 	/* Set device to handle FLR by H/W as we will put the device CPU to
 	 * halt mode
 	 */
-	if (!hdev->asic_prop.fw_security_enabled &&
-				!hdev->asic_prop.hard_reset_done_by_fw)
+	if (driver_performs_reset)
 		WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
 					PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
 
-	/* I don't know what is the state of the CPU so make sure it is
-	 * stopped in any means necessary
+	/* If linux is loaded in the device CPU we need to communicate with it
+	 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
+	 * registers in case of old F/Ws
 	 */
-	if (hdev->asic_prop.hard_reset_done_by_fw)
-		gaudi_fw_hard_reset(hdev);
-	else
-		gaudi_fw_halt_cpu(hdev);
-
 	if (hdev->fw_loader.linux_loaded) {
 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
 				le32_to_cpu(dyn_regs->gic_host_irq_ctrl);
 
 		WREG32(irq_handler_offset, GAUDI_EVENT_HALT_MACHINE);
+	} else {
+		if (hdev->asic_prop.hard_reset_done_by_fw)
+			gaudi_ask_hard_reset_without_linux(hdev);
+		else
+			gaudi_ask_halt_machine_without_linux(hdev);
 	}
 
-	if (!hdev->asic_prop.fw_security_enabled &&
-				!hdev->asic_prop.hard_reset_done_by_fw) {
+	if (driver_performs_reset) {
 
 		/* Configure the reset registers. Must be done as early as
 		 * possible in case we fail during H/W initialization
@@ -4185,8 +4195,7 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
 		WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
 
 		/* Restart BTL/BLR upon hard-reset */
-		if (!hdev->asic_prop.fw_security_enabled)
-			WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
+		WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
 
 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
 			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
@@ -4223,6 +4232,8 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
 				HW_CAP_CLK_GATE);
 
 		memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
+
+		gaudi->device_cpu_is_halted = false;
 	}
 }
 
diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h
index 5929be81ec23..48637a6343bb 100644
--- a/drivers/misc/habanalabs/gaudi/gaudiP.h
+++ b/drivers/misc/habanalabs/gaudi/gaudiP.h
@@ -314,6 +314,10 @@ struct gaudi_internal_qman_info {
  *                  Multi MSI is possible only with IOMMU enabled.
  * @mmu_cache_inv_pi: PI for MMU cache invalidation flow. The H/W expects an
  *                    8-bit value so use u8.
+ * @device_cpu_is_halted: Flag to indicate whether the device CPU was already
+ *                        halted. We can't halt it again because the COMMS
+ *                        protocol will throw an error. Relevant only for
+ *                        cases where Linux was not loaded to device CPU
  */
 struct gaudi_device {
 	int (*cpucp_info_get)(struct hl_device *hdev);
@@ -335,6 +339,7 @@ struct gaudi_device {
 	u32				hw_cap_initialized;
 	u8				multi_msi_mode;
 	u8				mmu_cache_inv_pi;
+	u8				device_cpu_is_halted;
 };
 
 void gaudi_init_security(struct hl_device *hdev);
-- 
2.25.1


      parent reply	other threads:[~2021-05-24  8:46 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-24  8:46 [PATCH 1/3] habanalabs/gaudi: use COMMS to reset device / halt CPU Oded Gabbay
2021-05-24  8:46 ` [PATCH 2/3] habanalabs: track security status using positive logic Oded Gabbay
2021-05-24  8:46 ` Oded Gabbay [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210524084619.4680-3-ogabbay@kernel.org \
    --to=ogabbay@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.