linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] habanalabs/gaudi: use COMMS to reset device / halt CPU
@ 2021-05-24  8:46 Oded Gabbay
  2021-05-24  8:46 ` [PATCH 2/3] habanalabs: track security status using positive logic Oded Gabbay
  2021-05-24  8:46 ` [PATCH 3/3] habanalabs/gaudi: refactor reset code Oded Gabbay
  0 siblings, 2 replies; 3+ messages in thread
From: Oded Gabbay @ 2021-05-24  8:46 UTC (permalink / raw)
  To: linux-kernel; +Cc: Koby Elbaz

From: Koby Elbaz <kelbaz@habana.ai>

This is needed because legacy FW 'communication' protocol will soon
become obsolete.
Because COMMS is a boot protocol, communicating through it is supported
only until Linux is loaded to the device CPU, where in that case we
will fallback to the former implementation.

Signed-off-by: Koby Elbaz <kelbaz@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c |  2 +-
 drivers/misc/habanalabs/common/habanalabs.h  |  5 ++-
 drivers/misc/habanalabs/gaudi/gaudi.c        | 36 ++++++++++++++++++--
 3 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 399d64e4f4c2..c19acefdb7e4 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -1390,7 +1390,7 @@ static int hl_fw_dynamic_send_clear_cmd(struct hl_device *hdev,
  *             leftovers between command
  * NOOP command: necessary to avoid loop on the clear command by the FW
  */
-static int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev,
+int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev,
 				struct fw_load_mgr *fw_loader,
 				enum comms_cmd cmd, unsigned int size,
 				bool wait_ok, u32 timeout)
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 433262bfb7e6..f1ff4d503cf2 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2574,7 +2574,10 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
 				u32 sts_boot_dev_sts0_reg,
 				u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
 				u32 boot_err1_reg, u32 timeout);
-
+int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev,
+				struct fw_load_mgr *fw_loader,
+				enum comms_cmd cmd, unsigned int size,
+				bool wait_ok, u32 timeout);
 int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
 			bool is_wc[3]);
 int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data);
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 9be3809d4d0d..e155fae5edcb 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -1931,6 +1931,38 @@ static void gaudi_disable_msi(struct hl_device *hdev)
 	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
 }
 
+static void gaudi_fw_hard_reset(struct hl_device *hdev)
+{
+	int rc;
+
+	if (hdev->asic_prop.dynamic_fw_load && !hdev->fw_loader.linux_loaded) {
+		rc = hl_fw_dynamic_send_protocol_cmd(hdev, &hdev->fw_loader,
+				COMMS_RST_DEV, 0, false,
+				hdev->fw_loader.cpu_timeout);
+		if (rc)
+			dev_warn(hdev->dev, "Failed sending COMMS_RST_DEV\n");
+	} else {
+		WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_RST_DEV);
+	}
+}
+
+static void gaudi_fw_halt_cpu(struct hl_device *hdev)
+{
+	int rc;
+
+	/* Stop device CPU to make sure nothing bad happens */
+	if (hdev->asic_prop.dynamic_fw_load && !hdev->fw_loader.linux_loaded) {
+		rc = hl_fw_dynamic_send_protocol_cmd(hdev, &hdev->fw_loader,
+				COMMS_GOTO_WFE, 0, true,
+				hdev->fw_loader.cpu_timeout);
+		if (rc)
+			dev_warn(hdev->dev, "Failed sending COMMS_GOTO_WFE\n");
+	} else {
+		WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
+		msleep(GAUDI_CPU_RESET_WAIT_MSEC);
+	}
+}
+
 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
 {
 	struct gaudi_device *gaudi = hdev->asic_specific;
@@ -4106,9 +4138,9 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
 	 * stopped in any means necessary
 	 */
 	if (hdev->asic_prop.hard_reset_done_by_fw)
-		WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_RST_DEV);
+		gaudi_fw_hard_reset(hdev);
 	else
-		WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
+		gaudi_fw_halt_cpu(hdev);
 
 	if (hdev->fw_loader.linux_loaded) {
 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/3] habanalabs: track security status using positive logic
  2021-05-24  8:46 [PATCH 1/3] habanalabs/gaudi: use COMMS to reset device / halt CPU Oded Gabbay
@ 2021-05-24  8:46 ` Oded Gabbay
  2021-05-24  8:46 ` [PATCH 3/3] habanalabs/gaudi: refactor reset code Oded Gabbay
  1 sibling, 0 replies; 3+ messages in thread
From: Oded Gabbay @ 2021-05-24  8:46 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ohad Sharabi

From: Ohad Sharabi <osharabi@habana.ai>

Using negative logic (i.e. fw_security_disabled) is confusing.

Modify the flag to use positive logic (fw_security_enabled).

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c  |  2 +-
 drivers/misc/habanalabs/common/habanalabs.h   |  6 +--
 .../misc/habanalabs/common/habanalabs_drv.c   |  6 +--
 drivers/misc/habanalabs/gaudi/gaudi.c         | 48 +++++++++----------
 .../misc/habanalabs/gaudi/gaudi_coresight.c   |  2 +-
 .../misc/habanalabs/gaudi/gaudi_security.c    | 15 +++---
 drivers/misc/habanalabs/goya/goya.c           | 20 ++++----
 drivers/misc/habanalabs/goya/goya_coresight.c |  2 +-
 8 files changed, 51 insertions(+), 50 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index c19acefdb7e4..4cc6690a3e26 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -1142,7 +1142,7 @@ static void hl_fw_preboot_update_state(struct hl_device *hdev)
 			prop->hard_reset_done_by_fw ? "enabled" : "disabled");
 
 	dev_dbg(hdev->dev, "firmware-level security is %s\n",
-			prop->fw_security_disabled ? "disabled" : "enabled");
+			prop->fw_security_enabled ? "enabled" : "disabled");
 
 	dev_dbg(hdev->dev, "GIC controller is %s\n",
 			prop->gic_interrupts_enable ? "enabled" : "disabled");
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index f1ff4d503cf2..e751868b3ed3 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -456,8 +456,8 @@ struct hl_mmu_properties {
  * @user_interrupt_count: number of user interrupts.
  * @tpc_enabled_mask: which TPCs are enabled.
  * @completion_queues_count: number of completion queues.
- * @fw_security_disabled: true if security measures are disabled in firmware,
- *                        false otherwise
+ * @fw_security_enabled: true if security measures are enabled in firmware,
+ *                       false otherwise
  * @fw_cpu_boot_dev_sts0_valid: status bits are valid and can be fetched from
  *                              BOOT_DEV_STS0
  * @fw_cpu_boot_dev_sts1_valid: status bits are valid and can be fetched from
@@ -531,7 +531,7 @@ struct asic_fixed_properties {
 	u16				user_interrupt_count;
 	u8				tpc_enabled_mask;
 	u8				completion_queues_count;
-	u8				fw_security_disabled;
+	u8				fw_security_enabled;
 	u8				fw_cpu_boot_dev_sts0_valid;
 	u8				fw_cpu_boot_dev_sts1_valid;
 	u8				dram_supports_virtual_memory;
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 339a1860c1e7..bd67d4ceab56 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -308,10 +308,10 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
 	}
 
 	if (pdev)
-		hdev->asic_prop.fw_security_disabled =
-				!is_asic_secured(hdev->asic_type);
+		hdev->asic_prop.fw_security_enabled =
+					is_asic_secured(hdev->asic_type);
 	else
-		hdev->asic_prop.fw_security_disabled = true;
+		hdev->asic_prop.fw_security_enabled = false;
 
 	/* Assign status description string */
 	strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION],
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index e155fae5edcb..836465dccc61 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -687,7 +687,7 @@ static int gaudi_early_init(struct hl_device *hdev)
 	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
 
 	/* If FW security is enabled at this point it means no access to ELBI */
-	if (!hdev->asic_prop.fw_security_disabled) {
+	if (hdev->asic_prop.fw_security_enabled) {
 		hdev->asic_prop.iatu_done_by_fw = true;
 
 		/*
@@ -763,7 +763,14 @@ static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
 	int rc;
 
-	if (hdev->asic_prop.fw_security_disabled) {
+	if (hdev->asic_prop.fw_security_enabled) {
+		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
+
+		if (rc)
+			return rc;
+
+		freq = pll_freq_arr[2];
+	} else {
 		/* Backward compatibility */
 		div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
 		div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
@@ -791,13 +798,6 @@ static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
 				div_sel);
 			freq = 0;
 		}
-	} else {
-		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
-
-		if (rc)
-			return rc;
-
-		freq = pll_freq_arr[2];
 	}
 
 	prop->psoc_timestamp_frequency = freq;
@@ -1525,7 +1525,7 @@ static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
 	hdev->cpu_pci_msb_addr =
 		GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
 
-	if (hdev->asic_prop.fw_security_disabled)
+	if (!hdev->asic_prop.fw_security_enabled)
 		GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
 
 free_dma_mem_arr:
@@ -1725,7 +1725,7 @@ static int gaudi_sw_init(struct hl_device *hdev)
 free_cpu_accessible_dma_pool:
 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
 free_cpu_dma_mem:
-	if (hdev->asic_prop.fw_security_disabled)
+	if (!hdev->asic_prop.fw_security_enabled)
 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
 					hdev->cpu_pci_msb_addr);
 	hdev->asic_funcs->asic_dma_free_coherent(hdev,
@@ -1747,7 +1747,7 @@ static int gaudi_sw_fini(struct hl_device *hdev)
 
 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
 
-	if (hdev->asic_prop.fw_security_disabled)
+	if (!hdev->asic_prop.fw_security_enabled)
 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
 					hdev->cpu_pci_msb_addr);
 
@@ -1967,7 +1967,7 @@ static void gaudi_init_scrambler_sram(struct hl_device *hdev)
 {
 	struct gaudi_device *gaudi = hdev->asic_specific;
 
-	if (!hdev->asic_prop.fw_security_disabled)
+	if (hdev->asic_prop.fw_security_enabled)
 		return;
 
 	if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid &&
@@ -2039,7 +2039,7 @@ static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
 {
 	struct gaudi_device *gaudi = hdev->asic_specific;
 
-	if (!hdev->asic_prop.fw_security_disabled)
+	if (hdev->asic_prop.fw_security_enabled)
 		return;
 
 	if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid &&
@@ -2109,7 +2109,7 @@ static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
 
 static void gaudi_init_e2e(struct hl_device *hdev)
 {
-	if (!hdev->asic_prop.fw_security_disabled)
+	if (hdev->asic_prop.fw_security_enabled)
 		return;
 
 	if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid &&
@@ -2484,7 +2484,7 @@ static void gaudi_init_hbm_cred(struct hl_device *hdev)
 {
 	uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
 
-	if (!hdev->asic_prop.fw_security_disabled)
+	if (hdev->asic_prop.fw_security_enabled)
 		return;
 
 	if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid &&
@@ -3602,7 +3602,7 @@ static void gaudi_set_clock_gating(struct hl_device *hdev)
 	if (hdev->in_debug)
 		return;
 
-	if (!hdev->asic_prop.fw_security_disabled)
+	if (hdev->asic_prop.fw_security_enabled)
 		return;
 
 	for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
@@ -3662,7 +3662,7 @@ static void gaudi_disable_clock_gating(struct hl_device *hdev)
 	u32 qman_offset;
 	int i;
 
-	if (!hdev->asic_prop.fw_security_disabled)
+	if (hdev->asic_prop.fw_security_enabled)
 		return;
 
 	for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
@@ -3897,7 +3897,7 @@ static int gaudi_init_cpu(struct hl_device *hdev)
 	 * The device CPU works with 40 bits addresses.
 	 * This register sets the extension to 50 bits.
 	 */
-	if (hdev->asic_prop.fw_security_disabled)
+	if (!hdev->asic_prop.fw_security_enabled)
 		WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
 
 	rc = hl_fw_init_cpu(hdev);
@@ -3991,7 +3991,7 @@ static void gaudi_pre_hw_init(struct hl_device *hdev)
 	/* Perform read from the device to make sure device is up */
 	RREG32(mmHW_STATE);
 
-	if (hdev->asic_prop.fw_security_disabled) {
+	if (!hdev->asic_prop.fw_security_enabled) {
 		/* Set the access through PCI bars (Linux driver only) as
 		 * secured
 		 */
@@ -4129,7 +4129,7 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
 	/* Set device to handle FLR by H/W as we will put the device CPU to
 	 * halt mode
 	 */
-	if (hdev->asic_prop.fw_security_disabled &&
+	if (!hdev->asic_prop.fw_security_enabled &&
 				!hdev->asic_prop.hard_reset_done_by_fw)
 		WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
 					PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
@@ -4150,7 +4150,7 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
 		WREG32(irq_handler_offset, GAUDI_EVENT_HALT_MACHINE);
 	}
 
-	if (hdev->asic_prop.fw_security_disabled &&
+	if (!hdev->asic_prop.fw_security_enabled &&
 				!hdev->asic_prop.hard_reset_done_by_fw) {
 
 		/* Configure the reset registers. Must be done as early as
@@ -4185,7 +4185,7 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
 		WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
 
 		/* Restart BTL/BLR upon hard-reset */
-		if (hdev->asic_prop.fw_security_disabled)
+		if (!hdev->asic_prop.fw_security_enabled)
 			WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
 
 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
@@ -7573,7 +7573,7 @@ static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
 		return 0;
 	}
 
-	if (!hdev->asic_prop.fw_security_disabled) {
+	if (hdev->asic_prop.fw_security_enabled) {
 		dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
 		return 0;
 	}
diff --git a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
index 6e56fa1c6c69..9e271fd9f0d2 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
@@ -634,7 +634,7 @@ static int gaudi_config_etr(struct hl_device *hdev,
 		WREG32(mmPSOC_ETR_BUFWM, 0x3FFC);
 		WREG32(mmPSOC_ETR_RSZ, input->buffer_size);
 		WREG32(mmPSOC_ETR_MODE, input->sink_mode);
-		if (hdev->asic_prop.fw_security_disabled) {
+		if (!hdev->asic_prop.fw_security_enabled) {
 			/* make ETR not privileged */
 			val = FIELD_PREP(
 					PSOC_ETR_AXICTL_PROTCTRLBIT0_MASK, 0);
diff --git a/drivers/misc/habanalabs/gaudi/gaudi_security.c b/drivers/misc/habanalabs/gaudi/gaudi_security.c
index 9a706c5980ef..0d3240f1f7d7 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi_security.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi_security.c
@@ -1448,7 +1448,7 @@ static void gaudi_init_dma_protection_bits(struct hl_device *hdev)
 	u32 pb_addr, mask;
 	u8 word_offset;
 
-	if (hdev->asic_prop.fw_security_disabled) {
+	if (!hdev->asic_prop.fw_security_enabled) {
 		gaudi_pb_set_block(hdev, mmDMA_IF_E_S_BASE);
 		gaudi_pb_set_block(hdev, mmDMA_IF_E_S_DOWN_CH0_BASE);
 		gaudi_pb_set_block(hdev, mmDMA_IF_E_S_DOWN_CH1_BASE);
@@ -9135,7 +9135,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
 	u32 pb_addr, mask;
 	u8 word_offset;
 
-	if (hdev->asic_prop.fw_security_disabled) {
+	if (!hdev->asic_prop.fw_security_enabled) {
 		gaudi_pb_set_block(hdev, mmTPC0_E2E_CRED_BASE);
 		gaudi_pb_set_block(hdev, mmTPC1_E2E_CRED_BASE);
 		gaudi_pb_set_block(hdev, mmTPC2_E2E_CRED_BASE);
@@ -12818,7 +12818,7 @@ static void gaudi_init_protection_bits(struct hl_device *hdev)
 	 * secured
 	 */
 
-	if (hdev->asic_prop.fw_security_disabled) {
+	if (!hdev->asic_prop.fw_security_enabled) {
 		gaudi_pb_set_block(hdev, mmIF_E_PLL_BASE);
 		gaudi_pb_set_block(hdev, mmMESH_W_PLL_BASE);
 		gaudi_pb_set_block(hdev, mmSRAM_W_PLL_BASE);
@@ -13023,7 +13023,7 @@ void gaudi_init_security(struct hl_device *hdev)
 	 * property configuration of MME SBAB and ACC to be non-privileged and
 	 * non-secured
 	 */
-	if (hdev->asic_prop.fw_security_disabled) {
+	if (!hdev->asic_prop.fw_security_enabled) {
 		WREG32(mmMME0_SBAB_PROT, 0x2);
 		WREG32(mmMME0_ACC_PROT, 0x2);
 		WREG32(mmMME1_SBAB_PROT, 0x2);
@@ -13032,11 +13032,12 @@ void gaudi_init_security(struct hl_device *hdev)
 		WREG32(mmMME2_ACC_PROT, 0x2);
 		WREG32(mmMME3_SBAB_PROT, 0x2);
 		WREG32(mmMME3_ACC_PROT, 0x2);
-	}
 
-	/* On RAZWI, 0 will be returned from RR and 0xBABA0BAD from PB */
-	if (hdev->asic_prop.fw_security_disabled)
+		/*
+		 * On RAZWI, 0 will be returned from RR and 0xBABA0BAD from PB
+		 */
 		WREG32(0xC01B28, 0x1);
+	}
 
 	gaudi_init_range_registers_lbw(hdev);
 
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 3d7a760cf2ba..bcefc372a689 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -619,7 +619,7 @@ static int goya_early_init(struct hl_device *hdev)
 	prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
 
 	/* If FW security is enabled at this point it means no access to ELBI */
-	if (!hdev->asic_prop.fw_security_disabled) {
+	if (hdev->asic_prop.fw_security_enabled) {
 		hdev->asic_prop.iatu_done_by_fw = true;
 		goto pci_init;
 	}
@@ -726,7 +726,15 @@ static void goya_fetch_psoc_frequency(struct hl_device *hdev)
 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
 	int rc;
 
-	if (hdev->asic_prop.fw_security_disabled) {
+	if (hdev->asic_prop.fw_security_enabled) {
+		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GOYA_PCI_PLL,
+				pll_freq_arr);
+
+		if (rc)
+			return;
+
+		freq = pll_freq_arr[1];
+	} else {
 		div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
 		div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
 		nr = RREG32(mmPSOC_PCI_PLL_NR);
@@ -753,14 +761,6 @@ static void goya_fetch_psoc_frequency(struct hl_device *hdev)
 				div_sel);
 			freq = 0;
 		}
-	} else {
-		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GOYA_PCI_PLL,
-				pll_freq_arr);
-
-		if (rc)
-			return;
-
-		freq = pll_freq_arr[1];
 	}
 
 	prop->psoc_timestamp_frequency = freq;
diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c
index 6b7445cca580..c55c100fdd24 100644
--- a/drivers/misc/habanalabs/goya/goya_coresight.c
+++ b/drivers/misc/habanalabs/goya/goya_coresight.c
@@ -434,7 +434,7 @@ static int goya_config_etr(struct hl_device *hdev,
 		WREG32(mmPSOC_ETR_BUFWM, 0x3FFC);
 		WREG32(mmPSOC_ETR_RSZ, input->buffer_size);
 		WREG32(mmPSOC_ETR_MODE, input->sink_mode);
-		if (hdev->asic_prop.fw_security_disabled) {
+		if (!hdev->asic_prop.fw_security_enabled) {
 			/* make ETR not privileged */
 			val = FIELD_PREP(PSOC_ETR_AXICTL_PROTCTRLBIT0_MASK, 0);
 			/* make ETR non-secured (inverted logic) */
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 3/3] habanalabs/gaudi: refactor reset code
  2021-05-24  8:46 [PATCH 1/3] habanalabs/gaudi: use COMMS to reset device / halt CPU Oded Gabbay
  2021-05-24  8:46 ` [PATCH 2/3] habanalabs: track security status using positive logic Oded Gabbay
@ 2021-05-24  8:46 ` Oded Gabbay
  1 sibling, 0 replies; 3+ messages in thread
From: Oded Gabbay @ 2021-05-24  8:46 UTC (permalink / raw)
  To: linux-kernel

After all the latest changes to the reset code, there were some
redundancy and errors in the flows.

If the Linux FIT is loaded to the ASIC CPU, we need to communicate
with it only via GIC. If it is not loaded, we need to either use
COMMS protocol (for newer f/w) or MSG_TO_CPU register (for older f/w).

In addition, if we halted the device CPU then we need to mark that
the driver will do the reset, regardless of the capabilities.

Also, to prevent false errors, we need to keep track whether the
device CPU was already halted. If so, we shouldn't try to halt it
again.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi/gaudi.c  | 47 ++++++++++++++++----------
 drivers/misc/habanalabs/gaudi/gaudiP.h |  5 +++
 2 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 836465dccc61..5ca4c8f86801 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -1931,11 +1931,11 @@ static void gaudi_disable_msi(struct hl_device *hdev)
 	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
 }
 
-static void gaudi_fw_hard_reset(struct hl_device *hdev)
+static void gaudi_ask_hard_reset_without_linux(struct hl_device *hdev)
 {
 	int rc;
 
-	if (hdev->asic_prop.dynamic_fw_load && !hdev->fw_loader.linux_loaded) {
+	if (hdev->asic_prop.dynamic_fw_load) {
 		rc = hl_fw_dynamic_send_protocol_cmd(hdev, &hdev->fw_loader,
 				COMMS_RST_DEV, 0, false,
 				hdev->fw_loader.cpu_timeout);
@@ -1946,12 +1946,16 @@ static void gaudi_fw_hard_reset(struct hl_device *hdev)
 	}
 }
 
-static void gaudi_fw_halt_cpu(struct hl_device *hdev)
+static void gaudi_ask_halt_machine_without_linux(struct hl_device *hdev)
 {
+	struct gaudi_device *gaudi = hdev->asic_specific;
 	int rc;
 
+	if (gaudi && gaudi->device_cpu_is_halted)
+		return;
+
 	/* Stop device CPU to make sure nothing bad happens */
-	if (hdev->asic_prop.dynamic_fw_load && !hdev->fw_loader.linux_loaded) {
+	if (hdev->asic_prop.dynamic_fw_load) {
 		rc = hl_fw_dynamic_send_protocol_cmd(hdev, &hdev->fw_loader,
 				COMMS_GOTO_WFE, 0, true,
 				hdev->fw_loader.cpu_timeout);
@@ -1961,6 +1965,9 @@ static void gaudi_fw_halt_cpu(struct hl_device *hdev)
 		WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
 		msleep(GAUDI_CPU_RESET_WAIT_MSEC);
 	}
+
+	if (gaudi)
+		gaudi->device_cpu_is_halted = true;
 }
 
 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
@@ -4110,8 +4117,9 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
 {
 	struct cpu_dyn_regs *dyn_regs =
 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
-	struct gaudi_device *gaudi = hdev->asic_specific;
 	u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
+	struct gaudi_device *gaudi = hdev->asic_specific;
+	bool driver_performs_reset;
 
 	if (!hard_reset) {
 		dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
@@ -4126,32 +4134,34 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
 		cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
 	}
 
+	driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
+					!hdev->asic_prop.hard_reset_done_by_fw);
+
 	/* Set device to handle FLR by H/W as we will put the device CPU to
 	 * halt mode
 	 */
-	if (!hdev->asic_prop.fw_security_enabled &&
-				!hdev->asic_prop.hard_reset_done_by_fw)
+	if (driver_performs_reset)
 		WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
 					PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
 
-	/* I don't know what is the state of the CPU so make sure it is
-	 * stopped in any means necessary
+	/* If linux is loaded in the device CPU we need to communicate with it
+	 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
+	 * registers in case of old F/Ws
 	 */
-	if (hdev->asic_prop.hard_reset_done_by_fw)
-		gaudi_fw_hard_reset(hdev);
-	else
-		gaudi_fw_halt_cpu(hdev);
-
 	if (hdev->fw_loader.linux_loaded) {
 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
 				le32_to_cpu(dyn_regs->gic_host_irq_ctrl);
 
 		WREG32(irq_handler_offset, GAUDI_EVENT_HALT_MACHINE);
+	} else {
+		if (hdev->asic_prop.hard_reset_done_by_fw)
+			gaudi_ask_hard_reset_without_linux(hdev);
+		else
+			gaudi_ask_halt_machine_without_linux(hdev);
 	}
 
-	if (!hdev->asic_prop.fw_security_enabled &&
-				!hdev->asic_prop.hard_reset_done_by_fw) {
+	if (driver_performs_reset) {
 
 		/* Configure the reset registers. Must be done as early as
 		 * possible in case we fail during H/W initialization
@@ -4185,8 +4195,7 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
 		WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
 
 		/* Restart BTL/BLR upon hard-reset */
-		if (!hdev->asic_prop.fw_security_enabled)
-			WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
+		WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
 
 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
 			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
@@ -4223,6 +4232,8 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
 				HW_CAP_CLK_GATE);
 
 		memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
+
+		gaudi->device_cpu_is_halted = false;
 	}
 }
 
diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h
index 5929be81ec23..48637a6343bb 100644
--- a/drivers/misc/habanalabs/gaudi/gaudiP.h
+++ b/drivers/misc/habanalabs/gaudi/gaudiP.h
@@ -314,6 +314,10 @@ struct gaudi_internal_qman_info {
  *                  Multi MSI is possible only with IOMMU enabled.
  * @mmu_cache_inv_pi: PI for MMU cache invalidation flow. The H/W expects an
  *                    8-bit value so use u8.
+ * @device_cpu_is_halted: Flag to indicate whether the device CPU was already
+ *                        halted. We can't halt it again because the COMMS
+ *                        protocol will throw an error. Relevant only for
+ *                        cases where Linux was not loaded to device CPU
  */
 struct gaudi_device {
 	int (*cpucp_info_get)(struct hl_device *hdev);
@@ -335,6 +339,7 @@ struct gaudi_device {
 	u32				hw_cap_initialized;
 	u8				multi_msi_mode;
 	u8				mmu_cache_inv_pi;
+	u8				device_cpu_is_halted;
 };
 
 void gaudi_init_security(struct hl_device *hdev);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-05-24  8:46 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-24  8:46 [PATCH 1/3] habanalabs/gaudi: use COMMS to reset device / halt CPU Oded Gabbay
2021-05-24  8:46 ` [PATCH 2/3] habanalabs: track security status using positive logic Oded Gabbay
2021-05-24  8:46 ` [PATCH 3/3] habanalabs/gaudi: refactor reset code Oded Gabbay

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).