linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] habanalabs: advanced FW loading
@ 2020-11-04  8:36 Oded Gabbay
  2020-11-04  8:36 ` [PATCH 1/2] habanalabs: fetch security indication from FW Oded Gabbay
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Oded Gabbay @ 2020-11-04  8:36 UTC (permalink / raw)
  To: linux-kernel; +Cc: SW_Drivers, Ofir Bitton

From: Ofir Bitton <obitton@habana.ai>

Today driver is able to load a whole FW binary into a specific
location on ASIC. We add support for loading sections from the
same FW binary into different loactions.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 21 ++++++++++++++++----
 drivers/misc/habanalabs/common/habanalabs.h  |  2 +-
 drivers/misc/habanalabs/gaudi/gaudi.c        |  4 ++--
 drivers/misc/habanalabs/goya/goya.c          |  4 ++--
 4 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 1340afa8ce3b..647606f6cc2a 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -20,16 +20,18 @@
  * @hdev: pointer to hl_device structure.
  * @fw_name: the firmware image name
  * @dst: IO memory mapped address space to copy firmware to
+ * @src_offset: offset in src FW to copy from
+ * @size: amount of bytes to copy (0 to copy the whole binary)
  *
  * Copy fw code from firmware file to device memory.
  *
  * Return: 0 on success, non-zero for failure.
  */
 int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
-				void __iomem *dst)
+				void __iomem *dst, u32 src_offset, u32 size)
 {
 	const struct firmware *fw;
-	const u64 *fw_data;
+	const void *fw_data;
 	size_t fw_size;
 	int rc;
 
@@ -57,9 +59,20 @@ int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
 		goto out;
 	}
 
-	fw_data = (const u64 *) fw->data;
+	if (size - src_offset > fw_size) {
+		dev_err(hdev->dev,
+			"size to copy(%u) and offset(%u) are invalid\n",
+			size, src_offset);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	if (size)
+		fw_size = size;
+
+	fw_data = (const void *) fw->data;
 
-	memcpy_toio(dst, fw_data, fw_size);
+	memcpy_toio(dst, fw_data + src_offset, fw_size);
 
 out:
 	release_firmware(fw);
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 70d166e4fe09..2b17c13a2e34 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2028,7 +2028,7 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev);
 void hl_mmu_v1_set_funcs(struct hl_device *hdev);
 
 int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
-				void __iomem *dst);
+				void __iomem *dst, u32 src_offset, u32 size);
 int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode);
 int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
 				u16 len, u32 timeout, long *result);
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 8b02d6341b4c..d8fdcae542c0 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -3541,7 +3541,7 @@ static int gaudi_load_firmware_to_device(struct hl_device *hdev)
 
 	dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
 
-	return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst);
+	return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
 }
 
 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
@@ -3550,7 +3550,7 @@ static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
 
 	dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
 
-	return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst);
+	return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
 }
 
 static void gaudi_read_device_fw_version(struct hl_device *hdev,
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 0afce8a834af..49075d994bbc 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -2315,7 +2315,7 @@ static int goya_load_firmware_to_device(struct hl_device *hdev)
 
 	dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
 
-	return hl_fw_load_fw_to_device(hdev, GOYA_LINUX_FW_FILE, dst);
+	return hl_fw_load_fw_to_device(hdev, GOYA_LINUX_FW_FILE, dst, 0, 0);
 }
 
 /*
@@ -2332,7 +2332,7 @@ static int goya_load_boot_fit_to_device(struct hl_device *hdev)
 
 	dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
 
-	return hl_fw_load_fw_to_device(hdev, GOYA_BOOT_FIT_FILE, dst);
+	return hl_fw_load_fw_to_device(hdev, GOYA_BOOT_FIT_FILE, dst, 0, 0);
 }
 
 /*
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 1/2] habanalabs: fetch security indication from FW
  2020-11-04  8:36 [PATCH] habanalabs: advanced FW loading Oded Gabbay
@ 2020-11-04  8:36 ` Oded Gabbay
  2020-11-04  8:36 ` [PATCH] habanalabs: fix cs counters structure Oded Gabbay
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Oded Gabbay @ 2020-11-04  8:36 UTC (permalink / raw)
  To: linux-kernel; +Cc: SW_Drivers, Ofir Bitton

From: Ofir Bitton <obitton@habana.ai>

Add support for fetching security indication from FW.
This indication is needed in order to skip unnecessary
initializations done by FW.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c  | 60 ++++++++++++---
 drivers/misc/habanalabs/common/habanalabs.h   | 23 ++++--
 drivers/misc/habanalabs/common/pci.c          | 10 ++-
 drivers/misc/habanalabs/gaudi/gaudi.c         | 15 ++--
 drivers/misc/habanalabs/goya/goya.c           | 17 +++--
 .../misc/habanalabs/include/common/cpucp_if.h | 30 ++++++++
 .../habanalabs/include/common/hl_boot_if.h    | 74 +++++++++++++++++++
 .../habanalabs/include/gaudi/gaudi_reg_map.h  |  2 +
 .../habanalabs/include/goya/goya_reg_map.h    |  2 +
 9 files changed, 204 insertions(+), 29 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 647606f6cc2a..8de6a8690b1b 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -448,9 +448,10 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy)
 	return rc;
 }
 
-static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg)
+static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
+		u32 cpu_security_boot_status_reg)
 {
-	u32 err_val;
+	u32 err_val, security_val;
 
 	/* Some of the firmware status codes are deprecated in newer f/w
 	 * versions. In those versions, the errors are reported
@@ -485,6 +486,11 @@ static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg)
 	if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
 		dev_err(hdev->dev,
 			"Device boot error - NIC F/W initialization failed\n");
+
+	security_val = RREG32(cpu_security_boot_status_reg);
+	if (security_val & CPU_BOOT_DEV_STS0_ENABLED)
+		dev_info(hdev->dev, "Device security status %#x\n",
+				security_val);
 }
 
 static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
@@ -537,10 +543,12 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
 	}
 }
 
-int hl_fw_read_preboot_ver(struct hl_device *hdev, u32 cpu_boot_status_reg,
-				u32 boot_err0_reg, u32 timeout)
+int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
+		u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
+		u32 timeout)
 {
-	u32 status;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	u32 status, security_status;
 	int rc;
 
 	if (!hdev->cpu_enable)
@@ -570,19 +578,43 @@ int hl_fw_read_preboot_ver(struct hl_device *hdev, u32 cpu_boot_status_reg,
 	if (rc) {
 		dev_err(hdev->dev, "Failed to read preboot version\n");
 		detect_cpu_boot_status(hdev, status);
-		fw_read_errors(hdev, boot_err0_reg);
+		fw_read_errors(hdev, boot_err0_reg,
+				cpu_security_boot_status_reg);
 		return -EIO;
 	}
 
 	hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT);
 
+	security_status = RREG32(cpu_security_boot_status_reg);
+
+	/* We read security status multiple times during boot:
+	 * 1. preboot - we check if fw security feature is supported
+	 * 2. boot cpu - we get boot cpu security status
+	 * 3. FW application - we get FW application security status
+	 *
+	 * Preboot:
+	 * Check security status bit (CPU_BOOT_DEV_STS0_ENABLED), if it is set
+	 * check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN)
+	 */
+	if (security_status & CPU_BOOT_DEV_STS0_ENABLED) {
+		hdev->asic_prop.fw_security_status_valid = 1;
+		prop->fw_security_disabled =
+			!(security_status & CPU_BOOT_DEV_STS0_SECURITY_EN);
+	} else {
+		hdev->asic_prop.fw_security_status_valid = 0;
+		prop->fw_security_disabled = true;
+	}
+
+	dev_info(hdev->dev, "firmware-level security is %s\n",
+		prop->fw_security_disabled ? "disabled" : "enabled");
+
 	return 0;
 }
 
 int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 			u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
-			u32 boot_err0_reg, bool skip_bmc,
-			u32 cpu_timeout, u32 boot_fit_timeout)
+			u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
+			bool skip_bmc, u32 cpu_timeout, u32 boot_fit_timeout)
 {
 	u32 status;
 	int rc;
@@ -652,6 +684,11 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 	/* Read U-Boot version now in case we will later fail */
 	hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT);
 
+	/* Read boot_cpu security bits */
+	if (hdev->asic_prop.fw_security_status_valid)
+		hdev->asic_prop.fw_boot_cpu_security_map =
+				RREG32(cpu_security_boot_status_reg);
+
 	if (rc) {
 		detect_cpu_boot_status(hdev, status);
 		rc = -EIO;
@@ -720,10 +757,15 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 		goto out;
 	}
 
+	/* Read FW application security bits */
+	if (hdev->asic_prop.fw_security_status_valid)
+		hdev->asic_prop.fw_app_security_map =
+				RREG32(cpu_security_boot_status_reg);
+
 	dev_info(hdev->dev, "Successfully loaded firmware to device\n");
 
 out:
-	fw_read_errors(hdev, boot_err0_reg);
+	fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
 
 	return rc;
 }
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 0fe108eef1a4..d814ff69eb01 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -372,6 +372,12 @@ struct hl_mmu_properties {
  * @cb_pool_cb_size: size of each CB in the CB pool.
  * @max_pending_cs: maximum of concurrent pending command submissions
  * @max_queues: maximum amount of queues in the system
+ * @fw_boot_cpu_security_map: bitmap representation of boot cpu security status
+ *                            reported by FW, bit description can be found in
+ *                            CPU_BOOT_DEV_STS*
+ * @fw_app_security_map: bitmap representation of application security status
+ *                       reported by FW, bit description can be found in
+ *                       CPU_BOOT_DEV_STS*
  * @collective_first_sob: first sync object available for collective use
  * @collective_first_mon: first monitor available for collective use
  * @sync_stream_first_sob: first sync object available for sync stream use
@@ -382,6 +388,8 @@ struct hl_mmu_properties {
  * @completion_queues_count: number of completion queues.
  * @fw_security_disabled: true if security measures are disabled in firmware,
  *                        false otherwise
+ * @fw_security_status_valid: security status bits are valid and can be fetched
+ *                            from BOOT_DEV_STS0
  */
 struct asic_fixed_properties {
 	struct hw_queue_properties	*hw_queues_props;
@@ -426,6 +434,8 @@ struct asic_fixed_properties {
 	u32				cb_pool_cb_size;
 	u32				max_pending_cs;
 	u32				max_queues;
+	u32				fw_boot_cpu_security_map;
+	u32				fw_app_security_map;
 	u16				collective_first_sob;
 	u16				collective_first_mon;
 	u16				sync_stream_first_sob;
@@ -435,6 +445,7 @@ struct asic_fixed_properties {
 	u8				tpc_enabled_mask;
 	u8				completion_queues_count;
 	u8				fw_security_disabled;
+	u8				fw_security_status_valid;
 };
 
 /**
@@ -2050,10 +2061,11 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev,
 			u64 *total_energy);
 int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 			u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
-			u32 boot_err0_reg, bool skip_bmc,
-			u32 cpu_timeout, u32 boot_fit_timeout);
-int hl_fw_read_preboot_ver(struct hl_device *hdev, u32 cpu_boot_status_reg,
-				u32 boot_err0_reg, u32 timeout);
+			u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
+			bool skip_bmc, u32 cpu_timeout, u32 boot_fit_timeout);
+int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
+		u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
+		u32 timeout);
 
 int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
 			bool is_wc[3]);
@@ -2063,7 +2075,8 @@ int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
 int hl_pci_set_outbound_region(struct hl_device *hdev,
 		struct hl_outbound_pci_region *pci_region);
 int hl_pci_init(struct hl_device *hdev, u32 cpu_boot_status_reg,
-		u32 boot_err0_reg, u32 preboot_ver_timeout);
+		u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
+		u32 preboot_ver_timeout);
 void hl_pci_fini(struct hl_device *hdev);
 
 long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
diff --git a/drivers/misc/habanalabs/common/pci.c b/drivers/misc/habanalabs/common/pci.c
index 4327e5704ebb..211f3190f8d7 100644
--- a/drivers/misc/habanalabs/common/pci.c
+++ b/drivers/misc/habanalabs/common/pci.c
@@ -339,6 +339,8 @@ static int hl_pci_set_dma_mask(struct hl_device *hdev)
  * hl_pci_init() - PCI initialization code.
  * @hdev: Pointer to hl_device structure.
  * @cpu_boot_status_reg: status register of the device's CPU
+ * @cpu_security_boot_status_reg: status register of device's CPU security
+ *                                configuration
  * @boot_err0_reg: boot error register of the device's CPU
  * @preboot_ver_timeout: how much to wait before bailing out on reading
  *                       the preboot version
@@ -348,7 +350,8 @@ static int hl_pci_set_dma_mask(struct hl_device *hdev)
  * Return: 0 on success, non-zero for failure.
  */
 int hl_pci_init(struct hl_device *hdev, u32 cpu_boot_status_reg,
-		u32 boot_err0_reg, u32 preboot_ver_timeout)
+		u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
+		u32 preboot_ver_timeout)
 {
 	struct pci_dev *pdev = hdev->pdev;
 	int rc;
@@ -384,8 +387,9 @@ int hl_pci_init(struct hl_device *hdev, u32 cpu_boot_status_reg,
 	 * version to determine whether we run with a security-enabled firmware
 	 * The check will be done in each ASIC's specific code
 	 */
-	rc = hl_fw_read_preboot_ver(hdev, cpu_boot_status_reg, boot_err0_reg,
-					preboot_ver_timeout);
+	rc = hl_fw_read_preboot_status(hdev, cpu_boot_status_reg,
+			cpu_security_boot_status_reg, boot_err0_reg,
+			preboot_ver_timeout);
 	if (rc)
 		goto unmap_pci_bars;
 
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index e5a2b7da0a20..e874c1765922 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -514,6 +514,10 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
 			prop->sync_stream_first_mon +
 			(num_sync_stream_queues * HL_RSVD_MONS);
 
+	/* disable fw security for now, set it in a later stage */
+	prop->fw_security_disabled = true;
+	prop->fw_security_status_valid = false;
+
 	return 0;
 }
 
@@ -638,13 +642,13 @@ static int gaudi_early_init(struct hl_device *hdev)
 	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
 
 	rc = hl_pci_init(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
-			mmCPU_BOOT_ERR0, GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
+			mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
+			GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
 	if (rc)
 		goto free_queue_props;
 
-	/* GAUDI Firmware does not yet support security */
-	prop->fw_security_disabled = true;
-	dev_info(hdev->dev, "firmware-level security is disabled\n");
+	dev_info(hdev->dev, "firmware-level security is %s\n",
+		hdev->asic_prop.fw_security_disabled ? "disabled" : "enabled");
 
 	return 0;
 
@@ -2319,7 +2323,6 @@ static void gaudi_init_golden_registers(struct hl_device *hdev)
 	int tpc_id, i;
 
 	gaudi_init_e2e(hdev);
-
 	gaudi_init_hbm_cred(hdev);
 
 	hdev->asic_funcs->disable_clock_gating(hdev);
@@ -3608,7 +3611,7 @@ static int gaudi_init_cpu(struct hl_device *hdev)
 	rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
 			mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
 			mmCPU_CMD_STATUS_TO_HOST,
-			mmCPU_BOOT_ERR0,
+			mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
 			!hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
 			GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
 
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 49075d994bbc..5b0f02a1b8ee 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -456,6 +456,10 @@ int goya_get_fixed_properties(struct hl_device *hdev)
 
 	prop->max_pending_cs = GOYA_MAX_PENDING_CS;
 
+	/* disable fw security for now, set it in a later stage */
+	prop->fw_security_disabled = true;
+	prop->fw_security_status_valid = false;
+
 	return 0;
 }
 
@@ -601,14 +605,11 @@ static int goya_early_init(struct hl_device *hdev)
 	prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
 
 	rc = hl_pci_init(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
-			mmCPU_BOOT_ERR0, GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
+			mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
+			GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
 	if (rc)
 		goto free_queue_props;
 
-	/* Goya Firmware does not support security */
-	prop->fw_security_disabled = true;
-	dev_info(hdev->dev, "firmware-level security is disabled\n");
-
 	if (!hdev->pldm) {
 		val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
 		if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
@@ -616,6 +617,9 @@ static int goya_early_init(struct hl_device *hdev)
 				"PCI strap is not configured correctly, PCI bus errors may occur\n");
 	}
 
+	dev_info(hdev->dev, "firmware-level security is %s\n",
+		hdev->asic_prop.fw_security_disabled ? "disabled" : "enabled");
+
 	return 0;
 
 free_queue_props:
@@ -2397,7 +2401,8 @@ static int goya_init_cpu(struct hl_device *hdev)
 
 	rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
 			mmPSOC_GLOBAL_CONF_UBOOT_MAGIC,
-			mmCPU_CMD_STATUS_TO_HOST, mmCPU_BOOT_ERR0,
+			mmCPU_CMD_STATUS_TO_HOST,
+			mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
 			false, GOYA_CPU_TIMEOUT_USEC,
 			GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
 
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h
index 782b8b8636be..1c1e2b394457 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -420,6 +420,29 @@ enum cpucp_card_types {
 	cpucp_card_type_pmc
 };
 
+#define CPUCP_SEC_CONF_ENABLED_SHIFT	0
+#define CPUCP_SEC_CONF_ENABLED_MASK	0x00000001
+
+#define CPUCP_SEC_CONF_FLASH_WP_SHIFT	1
+#define CPUCP_SEC_CONF_FLASH_WP_MASK	0x00000002
+
+#define CPUCP_SEC_CONF_EEPROM_WP_SHIFT	2
+#define CPUCP_SEC_CONF_EEPROM_WP_MASK	0x00000004
+
+/**
+ * struct cpucp_security_info - Security information.
+ * @config: configuration bit field
+ * @keys_num: number of stored keys
+ * @revoked_keys: revoked keys bit field
+ * @min_svn: minimal security version
+ */
+struct cpucp_security_info {
+	__u8 config;
+	__u8 keys_num;
+	__u8 revoked_keys;
+	__u8 min_svn;
+};
+
 /**
  * struct cpucp_info - Info from CpuCP that is necessary to the host's driver
  * @sensors: available sensors description.
@@ -435,6 +458,7 @@ enum cpucp_card_types {
  * @cpucp_version: CpuCP S/W version.
  * @dram_size: available DRAM size.
  * @card_name: card name that will be displayed in HWMON subsystem on the host
+ * @sec_info: security information
  */
 struct cpucp_info {
 	struct cpucp_sensor sensors[CPUCP_MAX_SENSORS];
@@ -450,6 +474,12 @@ struct cpucp_info {
 	__le32 reserved2;
 	__le64 dram_size;
 	char card_name[CARD_NAME_MAX_LEN];
+	__le64 reserved3;
+	__le64 reserved4;
+	__u8 reserved5;
+	__u8 pad[7];
+	struct cpucp_security_info sec_info;
+	__le32 reserved6;
 };
 
 struct cpucp_mac_addr {
diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h
index bb67cafc6e00..d928ad93cd80 100644
--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -53,6 +53,9 @@
  *					trust), boot authentication (chain of
  *					trust), data packets authentication.
  *
+ * CPU_BOOT_ERR0_EFUSE_FAIL		Reading from eFuse failed.
+ *					The PCI device ID might be wrong.
+ *
  * CPU_BOOT_ERR0_ENABLED		Error registers enabled.
  *					This is a main indication that the
  *					running FW populates the error
@@ -68,8 +71,79 @@
 #define CPU_BOOT_ERR0_NIC_FW_FAIL		(1 << 6)
 #define CPU_BOOT_ERR0_SECURITY_NOT_RDY		(1 << 7)
 #define CPU_BOOT_ERR0_SECURITY_FAIL		(1 << 8)
+#define CPU_BOOT_ERR0_EFUSE_FAIL		(1 << 9)
 #define CPU_BOOT_ERR0_ENABLED			(1 << 31)
 
+/*
+ * BOOT DEVICE STATUS bits in BOOT_DEVICE_STS registers
+ *
+ * CPU_BOOT_DEV_STS0_SECURITY_EN	Security is Enabled.
+ *					This is an indication for security
+ *					enabled in FW, which means that
+ *					all conditions for security are met:
+ *					device is indicated as security enabled,
+ *					registers are protected, and device
+ *					uses keys for image verification.
+ *					Initialized at: preboot
+ *
+ * CPU_BOOT_DEV_STS0_DEBUG_EN		Debug is enabled.
+ *					Enabled when JTAG or DEBUG is enabled
+ *					in FW.
+ *					Initialized at: preboot
+ *
+ * CPU_BOOT_DEV_STS0_WATCHDOG_EN	Watchdog is enabled.
+ *					Watchdog is enabled in FW.
+ *					Initialized at: preboot
+ *
+ * CPU_BOOT_DEV_STS0_DRAM_INIT_EN	DRAM initialization is enabled.
+ *					DRAM initialization has been done in FW.
+ *					Initialized at: u-boot
+ *
+ * CPU_BOOT_DEV_STS0_BMC_WAIT_EN	Waiting for BMC data enabled.
+ *					If set, it means that during boot,
+ *					FW waited for BMC data.
+ *					Initialized at: u-boot
+ *
+ * CPU_BOOT_DEV_STS0_E2E_CRED_EN	E2E credits initialized.
+ *					FW initialized E2E credits.
+ *					Initialized at: u-boot
+ *
+ * CPU_BOOT_DEV_STS0_HBM_CRED_EN	HBM credits initialized.
+ *					FW initialized HBM credits.
+ *					Initialized at: u-boot
+ *
+ * CPU_BOOT_DEV_STS0_RL_EN		Rate limiter initialized.
+ *					FW initialized rate limiter.
+ *					Initialized at: u-boot
+ *
+ * CPU_BOOT_DEV_STS0_SRAM_SCR_EN	SRAM scrambler enabled.
+ *					FW initialized SRAM scrambler.
+ *					Initialized at: linux
+ *
+ * CPU_BOOT_DEV_STS0_DRAM_SCR_EN	DRAM scrambler enabled.
+ *					FW initialized DRAM scrambler.
+ *					Initialized at: u-boot
+ *
+ * CPU_BOOT_DEV_STS0_ENABLED		Device status register enabled.
+ *					This is a main indication that the
+ *					running FW populates the device status
+ *					register. Meaning the device status
+ *					bits are not garbage, but actual
+ *					statuses.
+ *					Initialized at: preboot
+ */
+#define CPU_BOOT_DEV_STS0_SECURITY_EN			(1 << 0)
+#define CPU_BOOT_DEV_STS0_DEBUG_EN			(1 << 1)
+#define CPU_BOOT_DEV_STS0_WATCHDOG_EN			(1 << 2)
+#define CPU_BOOT_DEV_STS0_DRAM_INIT_EN			(1 << 3)
+#define CPU_BOOT_DEV_STS0_BMC_WAIT_EN			(1 << 4)
+#define CPU_BOOT_DEV_STS0_E2E_CRED_EN			(1 << 5)
+#define CPU_BOOT_DEV_STS0_HBM_CRED_EN			(1 << 6)
+#define CPU_BOOT_DEV_STS0_RL_EN				(1 << 7)
+#define CPU_BOOT_DEV_STS0_SRAM_SCR_EN			(1 << 8)
+#define CPU_BOOT_DEV_STS0_DRAM_SCR_EN			(1 << 9)
+#define CPU_BOOT_DEV_STS0_ENABLED			(1 << 31)
+
 enum cpu_boot_status {
 	CPU_BOOT_STATUS_NA = 0,		/* Default value after reset of chip */
 	CPU_BOOT_STATUS_IN_WFE = 1,
diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_reg_map.h b/drivers/misc/habanalabs/include/gaudi/gaudi_reg_map.h
index 977fb341a6e7..137afedf5f15 100644
--- a/drivers/misc/habanalabs/include/gaudi/gaudi_reg_map.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi_reg_map.h
@@ -12,6 +12,8 @@
  * PSOC scratch-pad registers
  */
 #define mmHW_STATE			mmPSOC_GLOBAL_CONF_SCRATCHPAD_0
+#define mmCPU_BOOT_DEV_STS0		mmPSOC_GLOBAL_CONF_SCRATCHPAD_20
+#define mmCPU_BOOT_DEV_STS1		mmPSOC_GLOBAL_CONF_SCRATCHPAD_21
 #define mmFUSE_VER_OFFSET		mmPSOC_GLOBAL_CONF_SCRATCHPAD_22
 #define mmCPU_CMD_STATUS_TO_HOST	mmPSOC_GLOBAL_CONF_SCRATCHPAD_23
 #define mmCPU_BOOT_ERR0			mmPSOC_GLOBAL_CONF_SCRATCHPAD_24
diff --git a/drivers/misc/habanalabs/include/goya/goya_reg_map.h b/drivers/misc/habanalabs/include/goya/goya_reg_map.h
index e56124265a05..f3ab282cafa4 100644
--- a/drivers/misc/habanalabs/include/goya/goya_reg_map.h
+++ b/drivers/misc/habanalabs/include/goya/goya_reg_map.h
@@ -22,6 +22,8 @@
 #define mmCPU_CQ_BASE_ADDR_LOW		mmPSOC_GLOBAL_CONF_SCRATCHPAD_8
 #define mmCPU_CQ_BASE_ADDR_HIGH		mmPSOC_GLOBAL_CONF_SCRATCHPAD_9
 #define mmCPU_CQ_LENGTH			mmPSOC_GLOBAL_CONF_SCRATCHPAD_10
+#define mmCPU_BOOT_DEV_STS0		mmPSOC_GLOBAL_CONF_SCRATCHPAD_20
+#define mmCPU_BOOT_DEV_STS1		mmPSOC_GLOBAL_CONF_SCRATCHPAD_21
 #define mmFUSE_VER_OFFSET		mmPSOC_GLOBAL_CONF_SCRATCHPAD_22
 #define mmCPU_CMD_STATUS_TO_HOST	mmPSOC_GLOBAL_CONF_SCRATCHPAD_23
 #define mmCPU_BOOT_ERR0			mmPSOC_GLOBAL_CONF_SCRATCHPAD_24
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH] habanalabs: fix cs counters structure
  2020-11-04  8:36 [PATCH] habanalabs: advanced FW loading Oded Gabbay
  2020-11-04  8:36 ` [PATCH 1/2] habanalabs: fetch security indication from FW Oded Gabbay
@ 2020-11-04  8:36 ` Oded Gabbay
  2020-11-04  8:36 ` [PATCH] habanalabs/gaudi: monitor device memory usage Oded Gabbay
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Oded Gabbay @ 2020-11-04  8:36 UTC (permalink / raw)
  To: linux-kernel; +Cc: SW_Drivers, farah kassabri

From: farah kassabri <fkassabri@habana.ai>

Fix cs counters structure in uapi to be one flat structure instead
of two instances of the same other structure.
use atomic read/increment for context counters so we could use
one structure for both aggregated and context counters.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../habanalabs/common/command_submission.c    | 18 +++--
 drivers/misc/habanalabs/common/habanalabs.h   | 73 ++++++++++---------
 .../misc/habanalabs/common/habanalabs_ioctl.c | 35 ++++++---
 drivers/misc/habanalabs/common/hw_queue.c     |  5 +-
 drivers/misc/habanalabs/gaudi/gaudi.c         |  4 +-
 include/uapi/misc/habanalabs.h                | 35 +++++----
 6 files changed, 95 insertions(+), 75 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 746005f7f991..e9529f3efc1b 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -462,7 +462,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 	if (other && !completion_done(&other->completion)) {
 		dev_dbg_ratelimited(hdev->dev,
 			"Rejecting CS because of too many in-flights CS\n");
-		ctx->cs_counters.max_cs_in_flight_drop_cnt++;
+		atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt);
 		atomic64_inc(&cntr->max_cs_in_flight_drop_cnt);
 		rc = -EAGAIN;
 		goto free_fence;
@@ -720,7 +720,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 		rc = validate_queue_index(hdev, chunk, &queue_type,
 						&is_kernel_allocated_cb);
 		if (rc) {
-			hpriv->ctx->cs_counters.parsing_drop_cnt++;
+			atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
 			atomic64_inc(&cntr->parsing_drop_cnt);
 			goto free_cs_object;
 		}
@@ -728,7 +728,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 		if (is_kernel_allocated_cb) {
 			cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
 			if (!cb) {
-				hpriv->ctx->cs_counters.parsing_drop_cnt++;
+				atomic64_inc(
+				&hpriv->ctx->cs_counters.parsing_drop_cnt);
 				atomic64_inc(&cntr->parsing_drop_cnt);
 				rc = -EINVAL;
 				goto free_cs_object;
@@ -743,7 +744,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 		job = hl_cs_allocate_job(hdev, queue_type,
 						is_kernel_allocated_cb);
 		if (!job) {
-			hpriv->ctx->cs_counters.out_of_mem_drop_cnt++;
+			atomic64_inc(
+			&hpriv->ctx->cs_counters.out_of_mem_drop_cnt);
 			atomic64_inc(&cntr->out_of_mem_drop_cnt);
 			dev_err(hdev->dev, "Failed to allocate a new job\n");
 			rc = -ENOMEM;
@@ -777,7 +779,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 
 		rc = cs_parser(hpriv, job);
 		if (rc) {
-			hpriv->ctx->cs_counters.parsing_drop_cnt++;
+			atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
 			atomic64_inc(&cntr->parsing_drop_cnt);
 			dev_err(hdev->dev,
 				"Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
@@ -787,7 +789,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 	}
 
 	if (int_queues_only) {
-		hpriv->ctx->cs_counters.parsing_drop_cnt++;
+		atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
 		atomic64_inc(&cntr->parsing_drop_cnt);
 		dev_err(hdev->dev,
 			"Reject CS %d.%llu because only internal queues jobs are present\n",
@@ -880,7 +882,7 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
 
 	job = hl_cs_allocate_job(hdev, q_type, true);
 	if (!job) {
-		ctx->cs_counters.out_of_mem_drop_cnt++;
+		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
 		dev_err(hdev->dev, "Failed to allocate a new job\n");
 		return -ENOMEM;
@@ -894,7 +896,7 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
 	cb = hl_cb_kernel_create(hdev, cb_size,
 				q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
 	if (!cb) {
-		ctx->cs_counters.out_of_mem_drop_cnt++;
+		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
 		kfree(job);
 		return -EFAULT;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 2b17c13a2e34..0fe108eef1a4 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -936,6 +936,22 @@ struct hl_va_range {
 	u64			end_addr;
 };
 
+/**
+ * struct hl_cs_counters_atomic - command submission counters
+ * @out_of_mem_drop_cnt: dropped due to memory allocation issue
+ * @parsing_drop_cnt: dropped due to error in packet parsing
+ * @queue_full_drop_cnt: dropped due to queue full
+ * @device_in_reset_drop_cnt: dropped due to device in reset
+ * @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight
+ */
+struct hl_cs_counters_atomic {
+	atomic64_t out_of_mem_drop_cnt;
+	atomic64_t parsing_drop_cnt;
+	atomic64_t queue_full_drop_cnt;
+	atomic64_t device_in_reset_drop_cnt;
+	atomic64_t max_cs_in_flight_drop_cnt;
+};
+
 /**
  * struct hl_ctx - user/kernel context.
  * @mem_hash: holds mapping from virtual address to virtual memory area
@@ -954,6 +970,7 @@ struct hl_va_range {
  * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the
  *            MMU hash or walking the PGT requires talking this lock.
  * @debugfs_list: node in debugfs list of contexts.
+ * @cs_counters: context command submission counters.
  * @cb_va_pool: device VA pool for command buffers which are mapped to the
  *              device's MMU.
  * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
@@ -976,26 +993,26 @@ struct hl_va_range {
 struct hl_ctx {
 	DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS);
 	DECLARE_HASHTABLE(mmu_shadow_hash, MMU_HASH_TABLE_BITS);
-	struct hl_fpriv		*hpriv;
-	struct hl_device	*hdev;
-	struct kref		refcount;
-	struct hl_fence		**cs_pending;
-	struct hl_va_range	*host_va_range;
-	struct hl_va_range	*host_huge_va_range;
-	struct hl_va_range	*dram_va_range;
-	struct mutex		mem_hash_lock;
-	struct mutex		mmu_lock;
-	struct list_head	debugfs_list;
-	struct hl_cs_counters	cs_counters;
-	struct gen_pool		*cb_va_pool;
-	u64			cs_sequence;
-	u64			*dram_default_hops;
-	spinlock_t		cs_lock;
-	atomic64_t		dram_phys_mem;
-	atomic_t		thread_ctx_switch_token;
-	u32			thread_ctx_switch_wait_token;
-	u32			asid;
-	u32			handle;
+	struct hl_fpriv			*hpriv;
+	struct hl_device		*hdev;
+	struct kref			refcount;
+	struct hl_fence			**cs_pending;
+	struct hl_va_range		*host_va_range;
+	struct hl_va_range		*host_huge_va_range;
+	struct hl_va_range		*dram_va_range;
+	struct mutex			mem_hash_lock;
+	struct mutex			mmu_lock;
+	struct list_head		debugfs_list;
+	struct hl_cs_counters_atomic	cs_counters;
+	struct gen_pool			*cb_va_pool;
+	u64				cs_sequence;
+	u64				*dram_default_hops;
+	spinlock_t			cs_lock;
+	atomic64_t			dram_phys_mem;
+	atomic_t			thread_ctx_switch_token;
+	u32				thread_ctx_switch_wait_token;
+	u32				asid;
+	u32				handle;
 };
 
 /**
@@ -1164,22 +1181,6 @@ struct hl_cs_parser {
 	u8			contains_dma_pkt;
 };
 
-/**
- * struct hl_info_cs_counters - command submission counters
- * @out_of_mem_drop_cnt: dropped due to memory allocation issue
- * @parsing_drop_cnt: dropped due to error in packet parsing
- * @queue_full_drop_cnt: dropped due to queue full
- * @device_in_reset_drop_cnt: dropped due to device in reset
- * @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight
- */
-struct hl_cs_counters_atomic {
-	atomic64_t out_of_mem_drop_cnt;
-	atomic64_t parsing_drop_cnt;
-	atomic64_t queue_full_drop_cnt;
-	atomic64_t device_in_reset_drop_cnt;
-	atomic64_t max_cs_in_flight_drop_cnt;
-};
-
 /*
  * MEMORY STRUCTURE
  */
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 350a768309bd..1d8bea626e78 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -315,7 +315,7 @@ static int clk_throttle_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 {
 	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
-	struct hl_info_cs_counters cs_counters = { {0} };
+	struct hl_info_cs_counters cs_counters = {0};
 	struct hl_device *hdev = hpriv->hdev;
 	struct hl_cs_counters_atomic *cntr;
 	u32 max_size = args->return_size;
@@ -325,23 +325,34 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 	if ((!max_size) || (!out))
 		return -EINVAL;
 
-	memcpy(&cs_counters.cs_counters, &hdev->aggregated_cs_counters,
-			sizeof(struct hl_cs_counters));
-
-	cs_counters.cs_counters.out_of_mem_drop_cnt =
+	cs_counters.total_out_of_mem_drop_cnt =
 			atomic64_read(&cntr->out_of_mem_drop_cnt);
-	cs_counters.cs_counters.parsing_drop_cnt =
+	cs_counters.total_parsing_drop_cnt =
 			atomic64_read(&cntr->parsing_drop_cnt);
-	cs_counters.cs_counters.queue_full_drop_cnt =
+	cs_counters.total_queue_full_drop_cnt =
 			atomic64_read(&cntr->queue_full_drop_cnt);
-	cs_counters.cs_counters.device_in_reset_drop_cnt =
+	cs_counters.total_device_in_reset_drop_cnt =
 			atomic64_read(&cntr->device_in_reset_drop_cnt);
-	cs_counters.cs_counters.max_cs_in_flight_drop_cnt =
+	cs_counters.total_max_cs_in_flight_drop_cnt =
 			atomic64_read(&cntr->max_cs_in_flight_drop_cnt);
 
-	if (hpriv->ctx)
-		memcpy(&cs_counters.ctx_cs_counters, &hpriv->ctx->cs_counters,
-				sizeof(struct hl_cs_counters));
+	if (hpriv->ctx) {
+		cs_counters.ctx_out_of_mem_drop_cnt =
+				atomic64_read(
+				&hpriv->ctx->cs_counters.out_of_mem_drop_cnt);
+		cs_counters.ctx_parsing_drop_cnt =
+				atomic64_read(
+				&hpriv->ctx->cs_counters.parsing_drop_cnt);
+		cs_counters.ctx_queue_full_drop_cnt =
+				atomic64_read(
+				&hpriv->ctx->cs_counters.queue_full_drop_cnt);
+		cs_counters.ctx_device_in_reset_drop_cnt =
+				atomic64_read(
+			&hpriv->ctx->cs_counters.device_in_reset_drop_cnt);
+		cs_counters.ctx_max_cs_in_flight_drop_cnt =
+				atomic64_read(
+			&hpriv->ctx->cs_counters.max_cs_in_flight_drop_cnt);
+	}
 
 	return copy_to_user(out, &cs_counters,
 		min((size_t) max_size, sizeof(cs_counters))) ? -EFAULT : 0;
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c
index 8a15db724116..239e2ba0545f 100644
--- a/drivers/misc/habanalabs/common/hw_queue.c
+++ b/drivers/misc/habanalabs/common/hw_queue.c
@@ -524,7 +524,7 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
 	hdev->asic_funcs->hw_queues_lock(hdev);
 
 	if (hl_device_disabled_or_in_reset(hdev)) {
-		ctx->cs_counters.device_in_reset_drop_cnt++;
+		atomic64_inc(&ctx->cs_counters.device_in_reset_drop_cnt);
 		atomic64_inc(&cntr->device_in_reset_drop_cnt);
 		dev_err(hdev->dev,
 			"device is disabled or in reset, CS rejected!\n");
@@ -558,7 +558,8 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
 			}
 
 			if (rc) {
-				ctx->cs_counters.queue_full_drop_cnt++;
+				atomic64_inc(
+					&ctx->cs_counters.queue_full_drop_cnt);
 				atomic64_inc(&cntr->queue_full_drop_cnt);
 				goto unroll_cq_resv;
 			}
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index d8fdcae542c0..e5a2b7da0a20 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -1141,7 +1141,7 @@ static int gaudi_collective_wait_create_job(struct hl_device *hdev,
 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
 	job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
 	if (!job) {
-		ctx->cs_counters.out_of_mem_drop_cnt++;
+		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
 		dev_err(hdev->dev, "Failed to allocate a new job\n");
 		return -ENOMEM;
@@ -1151,7 +1151,7 @@ static int gaudi_collective_wait_create_job(struct hl_device *hdev,
 	cb = hl_cb_kernel_create(hdev, cb_size,
 			hdev->mmu_enable && !patched_cb);
 	if (!cb) {
-		ctx->cs_counters.out_of_mem_drop_cnt++;
+		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
 		kfree(job);
 		return -EFAULT;
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 0185311b679b..61f8f9144b54 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -407,23 +407,28 @@ struct hl_info_sync_manager {
 
 /**
  * struct hl_info_cs_counters - command submission counters
- * @out_of_mem_drop_cnt: dropped due to memory allocation issue
- * @parsing_drop_cnt: dropped due to error in packet parsing
- * @queue_full_drop_cnt: dropped due to queue full
- * @device_in_reset_drop_cnt: dropped due to device in reset
- * @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight
+ * @total_out_of_mem_drop_cnt: total dropped due to memory allocation issue
+ * @ctx_out_of_mem_drop_cnt: context dropped due to memory allocation issue
+ * @total_parsing_drop_cnt: total dropped due to error in packet parsing
+ * @ctx_parsing_drop_cnt: context dropped due to error in packet parsing
+ * @total_queue_full_drop_cnt: total dropped due to queue full
+ * @ctx_queue_full_drop_cnt: context dropped due to queue full
+ * @total_device_in_reset_drop_cnt: total dropped due to device in reset
+ * @ctx_device_in_reset_drop_cnt: context dropped due to device in reset
+ * @total_max_cs_in_flight_drop_cnt: total dropped due to maximum CS in-flight
+ * @ctx_max_cs_in_flight_drop_cnt: context dropped due to maximum CS in-flight
  */
-struct hl_cs_counters {
-	__u64 out_of_mem_drop_cnt;
-	__u64 parsing_drop_cnt;
-	__u64 queue_full_drop_cnt;
-	__u64 device_in_reset_drop_cnt;
-	__u64 max_cs_in_flight_drop_cnt;
-};
-
 struct hl_info_cs_counters {
-	struct hl_cs_counters cs_counters;
-	struct hl_cs_counters ctx_cs_counters;
+	__u64 total_out_of_mem_drop_cnt;
+	__u64 ctx_out_of_mem_drop_cnt;
+	__u64 total_parsing_drop_cnt;
+	__u64 ctx_parsing_drop_cnt;
+	__u64 total_queue_full_drop_cnt;
+	__u64 ctx_queue_full_drop_cnt;
+	__u64 total_device_in_reset_drop_cnt;
+	__u64 ctx_device_in_reset_drop_cnt;
+	__u64 total_max_cs_in_flight_drop_cnt;
+	__u64 ctx_max_cs_in_flight_drop_cnt;
 };
 
 enum gaudi_dcores {
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH] habanalabs/gaudi: monitor device memory usage
  2020-11-04  8:36 [PATCH] habanalabs: advanced FW loading Oded Gabbay
  2020-11-04  8:36 ` [PATCH 1/2] habanalabs: fetch security indication from FW Oded Gabbay
  2020-11-04  8:36 ` [PATCH] habanalabs: fix cs counters structure Oded Gabbay
@ 2020-11-04  8:36 ` Oded Gabbay
  2020-11-04  8:36 ` [PATCH] habanalabs/gaudi: remove unreachable code Oded Gabbay
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Oded Gabbay @ 2020-11-04  8:36 UTC (permalink / raw)
  To: linux-kernel; +Cc: SW_Drivers

In GAUDI we don't have an MMU towards the HBM device memory. Therefore,
the user access that memory directly through physical address (via the
different engines) without the need to go through the driver to
allocate/free memory on the HBM.

For system monitoring purposes, the driver will keep track of the HBM
usage. This can be done as long as the user accurately reports the
allocations and releases of HBM memory, through the existing MEMORY
IOCTL uapi.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/memory.c | 56 ++++++++++++++++++++++---
 1 file changed, 50 insertions(+), 6 deletions(-)

diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index 75dd18771868..f885812d9939 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -1236,18 +1236,35 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
 
 	switch (args->in.op) {
 	case HL_MEM_OP_ALLOC:
-		if (!hdev->dram_supports_virtual_memory) {
-			dev_err(hdev->dev, "DRAM alloc is not supported\n");
-			rc = -EINVAL;
-			goto out;
-		}
-
 		if (args->in.alloc.mem_size == 0) {
 			dev_err(hdev->dev,
 				"alloc size must be larger than 0\n");
 			rc = -EINVAL;
 			goto out;
 		}
+
+		/* If DRAM does not support virtual memory the driver won't
+		 * handle the allocation/freeing of that memory. However, for
+		 * system administration/monitoring purposes, the driver will
+		 * keep track of the amount of DRAM memory that is allocated
+		 * and freed by the user. Because this code totally relies on
+		 * the user's input, the driver can't ensure the validity
+		 * of this accounting.
+		 */
+		if (!hdev->dram_supports_virtual_memory) {
+			atomic64_add(args->in.alloc.mem_size,
+					&ctx->dram_phys_mem);
+			atomic64_add(args->in.alloc.mem_size,
+					&hdev->dram_used_mem);
+
+			dev_dbg(hdev->dev, "DRAM alloc is not supported\n");
+			rc = 0;
+
+			memset(args, 0, sizeof(*args));
+			args->out.handle = 0;
+			goto out;
+		}
+
 		rc = alloc_device_memory(ctx, &args->in, &handle);
 
 		memset(args, 0, sizeof(*args));
@@ -1255,6 +1272,26 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
 		break;
 
 	case HL_MEM_OP_FREE:
+		/* If DRAM does not support virtual memory the driver won't
+		 * handle the allocation/freeing of that memory. However, for
+		 * system administration/monitoring purposes, the driver will
+		 * keep track of the amount of DRAM memory that is allocated
+		 * and freed by the user. Because this code totally relies on
+		 * the user's input, the driver can't ensure the validity
+		 * of this accounting.
+		 */
+		if (!hdev->dram_supports_virtual_memory) {
+			atomic64_sub(args->in.alloc.mem_size,
+					&ctx->dram_phys_mem);
+			atomic64_sub(args->in.alloc.mem_size,
+					&hdev->dram_used_mem);
+
+			dev_dbg(hdev->dev, "DRAM alloc is not supported\n");
+			rc = 0;
+
+			goto out;
+		}
+
 		rc = free_device_memory(ctx, args->in.free.handle);
 		break;
 
@@ -1773,6 +1810,13 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
 
 	mutex_destroy(&ctx->mem_hash_lock);
 	hl_mmu_ctx_fini(ctx);
+
+	/* In this case we need to clear the global accounting of DRAM usage
+	 * because the user notifies us on allocations. If the user is no more,
+	 * all DRAM is available
+	 */
+	if (!ctx->hdev->dram_supports_virtual_memory)
+		atomic64_set(&ctx->hdev->dram_used_mem, 0);
 }
 
 /*
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH] habanalabs/gaudi: remove unreachable code
  2020-11-04  8:36 [PATCH] habanalabs: advanced FW loading Oded Gabbay
                   ` (2 preceding siblings ...)
  2020-11-04  8:36 ` [PATCH] habanalabs/gaudi: monitor device memory usage Oded Gabbay
@ 2020-11-04  8:36 ` Oded Gabbay
  2020-11-04  8:36 ` [PATCH] habanalabs: initialize variable before use Oded Gabbay
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Oded Gabbay @ 2020-11-04  8:36 UTC (permalink / raw)
  To: linux-kernel; +Cc: SW_Drivers, Ofir Bitton

From: Ofir Bitton <obitton@habana.ai>

Remove unreachable code in gaudi collective flow.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index f73e508cf14c..8b02d6341b4c 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -1224,10 +1224,8 @@ static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
 
 	if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
 		collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
-	else if (collective_engine_id == GAUDI_ENGINE_ID_TPC_7)
-		collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
 	else
-		return -EINVAL;
+		collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
 
 	num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
 	nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH] habanalabs: initialize variable before use
  2020-11-04  8:36 [PATCH] habanalabs: advanced FW loading Oded Gabbay
                   ` (3 preceding siblings ...)
  2020-11-04  8:36 ` [PATCH] habanalabs/gaudi: remove unreachable code Oded Gabbay
@ 2020-11-04  8:36 ` Oded Gabbay
  2020-11-04  8:36 ` [PATCH] habanalabs: make sure cs type is valid in cs_ioctl_signal_wait Oded Gabbay
  2020-11-04  8:36 ` [PATCH 2/2] habanalabs/gaudi: add support for FW security Oded Gabbay
  6 siblings, 0 replies; 8+ messages in thread
From: Oded Gabbay @ 2020-11-04  8:36 UTC (permalink / raw)
  To: linux-kernel; +Cc: SW_Drivers

GCC 7.3.1 20180303 (Red Hat 7.3.1-5) complains that collective_engine_id
might be used uninitialized.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/command_submission.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 46c36b385c95..746005f7f991 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -942,7 +942,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 	struct hl_cs_compl *sig_waitcs_cmpl;
 	struct hl_cs *cs;
 	enum hl_queue_type q_type;
-	u32 size_to_copy, q_idx, collective_engine_id;
+	u32 size_to_copy, q_idx, collective_engine_id = 0;
 	u64 signal_seq;
 	int rc;
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH] habanalabs: make sure cs type is valid in cs_ioctl_signal_wait
  2020-11-04  8:36 [PATCH] habanalabs: advanced FW loading Oded Gabbay
                   ` (4 preceding siblings ...)
  2020-11-04  8:36 ` [PATCH] habanalabs: initialize variable before use Oded Gabbay
@ 2020-11-04  8:36 ` Oded Gabbay
  2020-11-04  8:36 ` [PATCH 2/2] habanalabs/gaudi: add support for FW security Oded Gabbay
  6 siblings, 0 replies; 8+ messages in thread
From: Oded Gabbay @ 2020-11-04  8:36 UTC (permalink / raw)
  To: linux-kernel; +Cc: SW_Drivers

Although we get a valid cs type from the callee, in case new values
will be added in the future, it is best to check the expected values
in that function.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/command_submission.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index f5bf8684f8aa..46c36b385c95 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -1070,9 +1070,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 	if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL)
 		rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type,
 				q_idx);
-	else
+	else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
 		rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
 				cs, q_idx, collective_engine_id);
+	else
+		rc = -EINVAL;
 
 	if (rc)
 		goto free_cs_object;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] habanalabs/gaudi: add support for FW security
  2020-11-04  8:36 [PATCH] habanalabs: advanced FW loading Oded Gabbay
                   ` (5 preceding siblings ...)
  2020-11-04  8:36 ` [PATCH] habanalabs: make sure cs type is valid in cs_ioctl_signal_wait Oded Gabbay
@ 2020-11-04  8:36 ` Oded Gabbay
  6 siblings, 0 replies; 8+ messages in thread
From: Oded Gabbay @ 2020-11-04  8:36 UTC (permalink / raw)
  To: linux-kernel; +Cc: SW_Drivers, Ofir Bitton

From: Ofir Bitton <obitton@habana.ai>

Skip relevant HW configurations once FW security is enabled
because these configurations are being performed by FW.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi/gaudi.c         | 123 ++++++++++++------
 .../misc/habanalabs/gaudi/gaudi_security.c    |  83 ++++++------
 2 files changed, 128 insertions(+), 78 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index e874c1765922..9017c712766c 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -1410,7 +1410,8 @@ static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
 	hdev->cpu_pci_msb_addr =
 		GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
 
-	GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
+	if (hdev->asic_prop.fw_security_disabled)
+		GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
 
 free_dma_mem_arr:
 	for (j = 0 ; j < i ; j++)
@@ -1564,8 +1565,9 @@ static int gaudi_sw_init(struct hl_device *hdev)
 free_cpu_accessible_dma_pool:
 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
 free_cpu_dma_mem:
-	GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
-				hdev->cpu_pci_msb_addr);
+	if (hdev->asic_prop.fw_security_disabled)
+		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
+					hdev->cpu_pci_msb_addr);
 	hdev->asic_funcs->asic_dma_free_coherent(hdev,
 			HL_CPU_ACCESSIBLE_MEM_SIZE,
 			hdev->cpu_accessible_dma_mem,
@@ -1585,8 +1587,10 @@ static int gaudi_sw_fini(struct hl_device *hdev)
 
 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
 
-	GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
+	if (hdev->asic_prop.fw_security_disabled)
+		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
 					hdev->cpu_pci_msb_addr);
+
 	hdev->asic_funcs->asic_dma_free_coherent(hdev,
 			HL_CPU_ACCESSIBLE_MEM_SIZE,
 			hdev->cpu_accessible_dma_mem,
@@ -1772,6 +1776,14 @@ static void gaudi_init_scrambler_sram(struct hl_device *hdev)
 {
 	struct gaudi_device *gaudi = hdev->asic_specific;
 
+	if (!hdev->asic_prop.fw_security_disabled)
+		return;
+
+	if (hdev->asic_prop.fw_security_status_valid &&
+			(hdev->asic_prop.fw_app_security_map &
+					CPU_BOOT_DEV_STS0_SRAM_SCR_EN))
+		return;
+
 	if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
 		return;
 
@@ -1836,6 +1848,14 @@ static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
 {
 	struct gaudi_device *gaudi = hdev->asic_specific;
 
+	if (!hdev->asic_prop.fw_security_disabled)
+		return;
+
+	if (hdev->asic_prop.fw_security_status_valid &&
+			(hdev->asic_prop.fw_boot_cpu_security_map &
+					CPU_BOOT_DEV_STS0_DRAM_SCR_EN))
+		return;
+
 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
 		return;
 
@@ -1898,6 +1918,14 @@ static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
 
 static void gaudi_init_e2e(struct hl_device *hdev)
 {
+	if (!hdev->asic_prop.fw_security_disabled)
+		return;
+
+	if (hdev->asic_prop.fw_security_status_valid &&
+			(hdev->asic_prop.fw_boot_cpu_security_map &
+					CPU_BOOT_DEV_STS0_E2E_CRED_EN))
+		return;
+
 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
@@ -2265,6 +2293,14 @@ static void gaudi_init_hbm_cred(struct hl_device *hdev)
 {
 	uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
 
+	if (!hdev->asic_prop.fw_security_disabled)
+		return;
+
+	if (hdev->asic_prop.fw_security_status_valid &&
+			(hdev->asic_prop.fw_boot_cpu_security_map &
+					CPU_BOOT_DEV_STS0_HBM_CRED_EN))
+		return;
+
 	hbm0_wr = 0x33333333;
 	hbm0_rd = 0x77777777;
 	hbm1_wr = 0x55555555;
@@ -3606,7 +3642,8 @@ static int gaudi_init_cpu(struct hl_device *hdev)
 	 * The device CPU works with 40 bits addresses.
 	 * This register sets the extension to 50 bits.
 	 */
-	WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
+	if (hdev->asic_prop.fw_security_disabled)
+		WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
 
 	rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
 			mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
@@ -3691,17 +3728,19 @@ static void gaudi_pre_hw_init(struct hl_device *hdev)
 	/* Perform read from the device to make sure device is up */
 	RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
 
-	/* Set the access through PCI bars (Linux driver only) as
-	 * secured
-	 */
-	WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
-			(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
-			PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
+	if (hdev->asic_prop.fw_security_disabled) {
+		/* Set the access through PCI bars (Linux driver only) as
+		 * secured
+		 */
+		WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
+				(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
+				PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
 
-	/* Perform read to flush the waiting writes to ensure
-	 * configuration was set in the device
-	 */
-	RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
+		/* Perform read to flush the waiting writes to ensure
+		 * configuration was set in the device
+		 */
+		RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
+	}
 
 	/*
 	 * Let's mark in the H/W that we have reached this point. We check
@@ -3710,32 +3749,33 @@ static void gaudi_pre_hw_init(struct hl_device *hdev)
 	 * cleared by the H/W upon H/W reset
 	 */
 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
+	if (hdev->asic_prop.fw_security_disabled) {
+		/* Configure the reset registers. Must be done as early as
+		 * possible in case we fail during H/W initialization
+		 */
+		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
+						(CFG_RST_H_DMA_MASK |
+						CFG_RST_H_MME_MASK |
+						CFG_RST_H_SM_MASK |
+						CFG_RST_H_TPC_7_MASK));
 
-	/* Configure the reset registers. Must be done as early as possible
-	 * in case we fail during H/W initialization
-	 */
-	WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
-					(CFG_RST_H_DMA_MASK |
-					CFG_RST_H_MME_MASK |
-					CFG_RST_H_SM_MASK |
-					CFG_RST_H_TPC_7_MASK));
-
-	WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
-
-	WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
-					(CFG_RST_H_HBM_MASK |
-					CFG_RST_H_TPC_7_MASK |
-					CFG_RST_H_NIC_MASK |
-					CFG_RST_H_SM_MASK |
-					CFG_RST_H_DMA_MASK |
-					CFG_RST_H_MME_MASK |
-					CFG_RST_H_CPU_MASK |
-					CFG_RST_H_MMU_MASK));
-
-	WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
-					(CFG_RST_L_IF_MASK |
-					CFG_RST_L_PSOC_MASK |
-					CFG_RST_L_TPC_MASK));
+		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
+
+		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
+						(CFG_RST_H_HBM_MASK |
+						CFG_RST_H_TPC_7_MASK |
+						CFG_RST_H_NIC_MASK |
+						CFG_RST_H_SM_MASK |
+						CFG_RST_H_DMA_MASK |
+						CFG_RST_H_MME_MASK |
+						CFG_RST_H_CPU_MASK |
+						CFG_RST_H_MMU_MASK));
+
+		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
+						(CFG_RST_L_IF_MASK |
+						CFG_RST_L_PSOC_MASK |
+						CFG_RST_L_TPC_MASK));
+	}
 }
 
 static int gaudi_hw_init(struct hl_device *hdev)
@@ -3851,7 +3891,8 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
 	WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap & ~0x2);
 
 	/* Restart BTL/BLR upon hard-reset */
-	WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
+	if (hdev->asic_prop.fw_security_disabled)
+		WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
 
 	WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
 			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
diff --git a/drivers/misc/habanalabs/gaudi/gaudi_security.c b/drivers/misc/habanalabs/gaudi/gaudi_security.c
index 8a921ab56557..e10181692d0b 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi_security.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi_security.c
@@ -1448,21 +1448,23 @@ static void gaudi_init_dma_protection_bits(struct hl_device *hdev)
 	u32 pb_addr, mask;
 	u8 word_offset;
 
-	gaudi_pb_set_block(hdev, mmDMA_IF_E_S_BASE);
-	gaudi_pb_set_block(hdev, mmDMA_IF_E_S_DOWN_CH0_BASE);
-	gaudi_pb_set_block(hdev, mmDMA_IF_E_S_DOWN_CH1_BASE);
-	gaudi_pb_set_block(hdev, mmDMA_E_PLL_BASE);
-	gaudi_pb_set_block(hdev, mmDMA_IF_E_S_DOWN_BASE);
-
-	gaudi_pb_set_block(hdev, mmDMA_IF_W_N_BASE);
-	gaudi_pb_set_block(hdev, mmDMA_IF_W_N_DOWN_CH0_BASE);
-	gaudi_pb_set_block(hdev, mmDMA_IF_W_N_DOWN_CH1_BASE);
-	gaudi_pb_set_block(hdev, mmDMA_IF_W_N_DOWN_BASE);
-
-	gaudi_pb_set_block(hdev, mmDMA_IF_E_N_BASE);
-	gaudi_pb_set_block(hdev, mmDMA_IF_E_N_DOWN_CH0_BASE);
-	gaudi_pb_set_block(hdev, mmDMA_IF_E_N_DOWN_CH1_BASE);
-	gaudi_pb_set_block(hdev, mmDMA_IF_E_N_DOWN_BASE);
+	if (hdev->asic_prop.fw_security_disabled) {
+		gaudi_pb_set_block(hdev, mmDMA_IF_E_S_BASE);
+		gaudi_pb_set_block(hdev, mmDMA_IF_E_S_DOWN_CH0_BASE);
+		gaudi_pb_set_block(hdev, mmDMA_IF_E_S_DOWN_CH1_BASE);
+		gaudi_pb_set_block(hdev, mmDMA_E_PLL_BASE);
+		gaudi_pb_set_block(hdev, mmDMA_IF_E_S_DOWN_BASE);
+
+		gaudi_pb_set_block(hdev, mmDMA_IF_W_N_BASE);
+		gaudi_pb_set_block(hdev, mmDMA_IF_W_N_DOWN_CH0_BASE);
+		gaudi_pb_set_block(hdev, mmDMA_IF_W_N_DOWN_CH1_BASE);
+		gaudi_pb_set_block(hdev, mmDMA_IF_W_N_DOWN_BASE);
+
+		gaudi_pb_set_block(hdev, mmDMA_IF_E_N_BASE);
+		gaudi_pb_set_block(hdev, mmDMA_IF_E_N_DOWN_CH0_BASE);
+		gaudi_pb_set_block(hdev, mmDMA_IF_E_N_DOWN_CH1_BASE);
+		gaudi_pb_set_block(hdev, mmDMA_IF_E_N_DOWN_BASE);
+	}
 
 	WREG32(mmDMA0_QM_BASE - CFG_BASE + PROT_BITS_OFFS + 0x7C, 0);
 	WREG32(mmDMA1_QM_BASE - CFG_BASE + PROT_BITS_OFFS + 0x7C, 0);
@@ -9133,14 +9135,16 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
 	u32 pb_addr, mask;
 	u8 word_offset;
 
-	gaudi_pb_set_block(hdev, mmTPC0_E2E_CRED_BASE);
-	gaudi_pb_set_block(hdev, mmTPC1_E2E_CRED_BASE);
-	gaudi_pb_set_block(hdev, mmTPC2_E2E_CRED_BASE);
-	gaudi_pb_set_block(hdev, mmTPC3_E2E_CRED_BASE);
-	gaudi_pb_set_block(hdev, mmTPC4_E2E_CRED_BASE);
-	gaudi_pb_set_block(hdev, mmTPC5_E2E_CRED_BASE);
-	gaudi_pb_set_block(hdev, mmTPC6_E2E_CRED_BASE);
-	gaudi_pb_set_block(hdev, mmTPC7_E2E_CRED_BASE);
+	if (hdev->asic_prop.fw_security_disabled) {
+		gaudi_pb_set_block(hdev, mmTPC0_E2E_CRED_BASE);
+		gaudi_pb_set_block(hdev, mmTPC1_E2E_CRED_BASE);
+		gaudi_pb_set_block(hdev, mmTPC2_E2E_CRED_BASE);
+		gaudi_pb_set_block(hdev, mmTPC3_E2E_CRED_BASE);
+		gaudi_pb_set_block(hdev, mmTPC4_E2E_CRED_BASE);
+		gaudi_pb_set_block(hdev, mmTPC5_E2E_CRED_BASE);
+		gaudi_pb_set_block(hdev, mmTPC6_E2E_CRED_BASE);
+		gaudi_pb_set_block(hdev, mmTPC7_E2E_CRED_BASE);
+	}
 
 	WREG32(mmTPC0_QM_BASE - CFG_BASE + PROT_BITS_OFFS + 0x7C, 0);
 	WREG32(mmTPC0_CFG_BASE - CFG_BASE + PROT_BITS_OFFS + 0x7C, 0);
@@ -12822,11 +12826,13 @@ static void gaudi_init_protection_bits(struct hl_device *hdev)
 	 * secured
 	 */
 
-	gaudi_pb_set_block(hdev, mmIF_E_PLL_BASE);
-	gaudi_pb_set_block(hdev, mmMESH_W_PLL_BASE);
-	gaudi_pb_set_block(hdev, mmSRAM_W_PLL_BASE);
-	gaudi_pb_set_block(hdev, mmMESH_E_PLL_BASE);
-	gaudi_pb_set_block(hdev, mmSRAM_E_PLL_BASE);
+	if (hdev->asic_prop.fw_security_disabled) {
+		gaudi_pb_set_block(hdev, mmIF_E_PLL_BASE);
+		gaudi_pb_set_block(hdev, mmMESH_W_PLL_BASE);
+		gaudi_pb_set_block(hdev, mmSRAM_W_PLL_BASE);
+		gaudi_pb_set_block(hdev, mmMESH_E_PLL_BASE);
+		gaudi_pb_set_block(hdev, mmSRAM_E_PLL_BASE);
+	}
 
 	gaudi_init_dma_protection_bits(hdev);
 
@@ -13025,17 +13031,20 @@ void gaudi_init_security(struct hl_device *hdev)
 	 * property configuration of MME SBAB and ACC to be non-privileged and
 	 * non-secured
 	 */
-	WREG32(mmMME0_SBAB_PROT, 0x2);
-	WREG32(mmMME0_ACC_PROT, 0x2);
-	WREG32(mmMME1_SBAB_PROT, 0x2);
-	WREG32(mmMME1_ACC_PROT, 0x2);
-	WREG32(mmMME2_SBAB_PROT, 0x2);
-	WREG32(mmMME2_ACC_PROT, 0x2);
-	WREG32(mmMME3_SBAB_PROT, 0x2);
-	WREG32(mmMME3_ACC_PROT, 0x2);
+	if (hdev->asic_prop.fw_security_disabled) {
+		WREG32(mmMME0_SBAB_PROT, 0x2);
+		WREG32(mmMME0_ACC_PROT, 0x2);
+		WREG32(mmMME1_SBAB_PROT, 0x2);
+		WREG32(mmMME1_ACC_PROT, 0x2);
+		WREG32(mmMME2_SBAB_PROT, 0x2);
+		WREG32(mmMME2_ACC_PROT, 0x2);
+		WREG32(mmMME3_SBAB_PROT, 0x2);
+		WREG32(mmMME3_ACC_PROT, 0x2);
+	}
 
 	/* On RAZWI, 0 will be returned from RR and 0xBABA0BAD from PB */
-	WREG32(0xC01B28, 0x1);
+	if (hdev->asic_prop.fw_security_disabled)
+		WREG32(0xC01B28, 0x1);
 
 	gaudi_init_range_registers_lbw(hdev);
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2020-11-04  8:37 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-04  8:36 [PATCH] habanalabs: advanced FW loading Oded Gabbay
2020-11-04  8:36 ` [PATCH 1/2] habanalabs: fetch security indication from FW Oded Gabbay
2020-11-04  8:36 ` [PATCH] habanalabs: fix cs counters structure Oded Gabbay
2020-11-04  8:36 ` [PATCH] habanalabs/gaudi: monitor device memory usage Oded Gabbay
2020-11-04  8:36 ` [PATCH] habanalabs/gaudi: remove unreachable code Oded Gabbay
2020-11-04  8:36 ` [PATCH] habanalabs: initialize variable before use Oded Gabbay
2020-11-04  8:36 ` [PATCH] habanalabs: make sure cs type is valid in cs_ioctl_signal_wait Oded Gabbay
2020-11-04  8:36 ` [PATCH 2/2] habanalabs/gaudi: add support for FW security Oded Gabbay

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).