All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/7] habanalabs/gaudi: clear QM errors only if not in stop_on_err mode
@ 2021-04-08 15:22 Oded Gabbay
  2021-04-08 15:22 ` [PATCH 2/7] habanalabs: send dynamic msi-x indexes to f/w Oded Gabbay
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: Oded Gabbay @ 2021-04-08 15:22 UTC (permalink / raw)
  To: linux-kernel; +Cc: Tomer Tayar

From: Tomer Tayar <ttayar@habana.ai>

Clearing QM errors by the driver will prevent these H/W blocks from
stopping in case they are configured to stop on errors, so perform this
clearing only if this mode is not in use.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 03d3fb643e79..791434278904 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7086,7 +7086,8 @@ static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
 		}
 
 		/* Write 1 clear errors */
-		WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
+		if (!hdev->stop_on_err)
+			WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
 	}
 
 	arb_err_val = RREG32(arb_err_addr);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/7] habanalabs: send dynamic msi-x indexes to f/w
  2021-04-08 15:22 [PATCH 1/7] habanalabs/gaudi: clear QM errors only if not in stop_on_err mode Oded Gabbay
@ 2021-04-08 15:22 ` Oded Gabbay
  2021-04-08 15:22 ` [PATCH 3/7] habanalabs: move dram scrub to free sequence Oded Gabbay
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Oded Gabbay @ 2021-04-08 15:22 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ohad Sharabi

From: Ohad Sharabi <osharabi@habana.ai>

In order to minimize hard coded values between F/W and the driver, we
send msi-x indexes dynamically to the F/W.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c  | 67 +++++++++++++++++
 drivers/misc/habanalabs/common/habanalabs.h   |  4 +
 drivers/misc/habanalabs/gaudi/gaudi.c         |  2 +-
 drivers/misc/habanalabs/goya/goya.c           |  2 +-
 .../misc/habanalabs/include/common/cpucp_if.h | 75 ++++++++++++++-----
 5 files changed, 131 insertions(+), 19 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index d81a8d537373..532a2fd7bfb4 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -422,6 +422,73 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev,
 	return rc;
 }
 
+static int hl_fw_send_msi_info_msg(struct hl_device *hdev)
+{
+	struct cpucp_array_data_packet *pkt;
+	size_t total_pkt_size, data_size;
+	u64 result;
+	int rc;
+
+	/* skip sending this info for unsupported ASICs */
+	if (!hdev->asic_funcs->get_msi_info)
+		return 0;
+
+	data_size = CPUCP_NUM_OF_MSI_TYPES * sizeof(u32);
+	total_pkt_size = sizeof(struct cpucp_array_data_packet) + data_size;
+
+	/* data should be aligned to 8 bytes in order to CPU-CP to copy it */
+	total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
+
+	/* total_pkt_size is casted to u16 later on */
+	if (total_pkt_size > USHRT_MAX) {
+		dev_err(hdev->dev, "CPUCP array data is too big\n");
+		return -EINVAL;
+	}
+
+	pkt = kzalloc(total_pkt_size, GFP_KERNEL);
+	if (!pkt)
+		return -ENOMEM;
+
+	pkt->length = cpu_to_le32(CPUCP_NUM_OF_MSI_TYPES);
+
+	hdev->asic_funcs->get_msi_info((u32 *)&pkt->data);
+
+	pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_MSI_INFO_SET <<
+						CPUCP_PKT_CTL_OPCODE_SHIFT);
+
+	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *)pkt,
+						total_pkt_size, 0, &result);
+
+	/*
+	 * in case packet result is invalid it means that FW does not support
+	 * this feature and will use default/hard coded MSI values. no reason
+	 * to stop the boot
+	 */
+	if (rc && result == cpucp_packet_invalid)
+		rc = 0;
+
+	if (rc)
+		dev_err(hdev->dev, "failed to send CPUCP array data\n");
+
+	kfree(pkt);
+
+	return rc;
+}
+
+int hl_fw_cpucp_handshake(struct hl_device *hdev,
+			u32 cpu_security_boot_status_reg,
+			u32 boot_err0_reg)
+{
+	int rc;
+
+	rc = hl_fw_cpucp_info_get(hdev, cpu_security_boot_status_reg,
+					boot_err0_reg);
+	if (rc)
+		return rc;
+
+	return hl_fw_send_msi_info_msg(hdev);
+}
+
 int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
 {
 	struct cpucp_packet pkt = {};
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index d89ae4c3d634..867986ef4588 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1049,6 +1049,7 @@ struct hl_asic_funcs {
 	int (*hw_block_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
 			u32 block_id, u32 block_size);
 	void (*enable_events_from_fw)(struct hl_device *hdev);
+	void (*get_msi_info)(u32 *table);
 };
 
 
@@ -2374,6 +2375,9 @@ int hl_fw_send_heartbeat(struct hl_device *hdev);
 int hl_fw_cpucp_info_get(struct hl_device *hdev,
 			u32 cpu_security_boot_status_reg,
 			u32 boot_err0_reg);
+int hl_fw_cpucp_handshake(struct hl_device *hdev,
+			u32 cpu_security_boot_status_reg,
+			u32 boot_err0_reg);
 int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
 int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
 		struct hl_info_pci_counters *counters);
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 791434278904..62e3c63bec20 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7966,7 +7966,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev)
 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
 		return 0;
 
-	rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0);
+	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0);
 	if (rc)
 		return rc;
 
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 175b7b0af450..9d49ba649db0 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -5245,7 +5245,7 @@ int goya_cpucp_info_get(struct hl_device *hdev)
 	if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
 		return 0;
 
-	rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0);
+	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0);
 	if (rc)
 		return rc;
 
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h
index 88e8ce6e0694..20a710f7a369 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -302,6 +302,27 @@ enum pq_init_status {
  * CPUCP_PACKET_POWER_GET
  *       Fetch the present power consumption of the device (Current * Voltage).
  *
+ * CPUCP_PACKET_NIC_PFC_SET -
+ *       Enable/Disable the NIC PFC feature. The packet's arguments specify the
+ *       NIC port, relevant lanes to configure and one bit indication for
+ *       enable/disable.
+ *
+ * CPUCP_PACKET_NIC_FAULT_GET -
+ *       Fetch the current indication for local/remote faults from the NIC MAC.
+ *       The result is 32-bit value of the relevant register.
+ *
+ * CPUCP_PACKET_NIC_LPBK_SET -
+ *       Enable/Disable the MAC loopback feature. The packet's arguments specify
+ *       the NIC port, relevant lanes to configure and one bit indication for
+ *       enable/disable.
+ *
+ * CPUCP_PACKET_NIC_MAC_INIT -
+ *       Configure the NIC MAC channels. The packet's arguments specify the
+ *       NIC port and the speed.
+ *
+ * CPUCP_PACKET_MSI_INFO_SET -
+ *       set the index number for each supported msi type going from
+ *       host to device
  */
 
 enum cpucp_packet_id {
@@ -337,6 +358,11 @@ enum cpucp_packet_id {
 	CPUCP_PACKET_PLL_INFO_GET,		/* internal */
 	CPUCP_PACKET_NIC_STATUS,		/* internal */
 	CPUCP_PACKET_POWER_GET,			/* internal */
+	CPUCP_PACKET_NIC_PFC_SET,		/* internal */
+	CPUCP_PACKET_NIC_FAULT_GET,		/* internal */
+	CPUCP_PACKET_NIC_LPBK_SET,		/* internal */
+	CPUCP_PACKET_NIC_MAC_CFG,		/* internal */
+	CPUCP_PACKET_MSI_INFO_SET,		/* internal */
 };
 
 #define CPUCP_PACKET_FENCE_VAL	0xFE8CE7A5
@@ -408,6 +434,12 @@ struct cpucp_unmask_irq_arr_packet {
 	__le32 irqs[0];
 };
 
+struct cpucp_array_data_packet {
+	struct cpucp_packet cpucp_pkt;
+	__le32 length;
+	__le32 data[0];
+};
+
 enum cpucp_packet_rc {
 	cpucp_packet_success,
 	cpucp_packet_invalid,
@@ -476,6 +508,22 @@ enum cpucp_pll_type_attributes {
 	cpucp_pll_pci,
 };
 
+/*
+ * MSI type enumeration table for all ASICs and future SW versions.
+ * For future ASIC-LKD compatibility, we can only add new enumerations.
+ * at the end of the table (before CPUCP_NUM_OF_MSI_TYPES).
+ * Changing the order of entries or removing entries is not allowed.
+ */
+enum cpucp_msi_type {
+	CPUCP_EVENT_QUEUE_MSI_TYPE,
+	CPUCP_NIC_PORT1_MSI_TYPE,
+	CPUCP_NIC_PORT3_MSI_TYPE,
+	CPUCP_NIC_PORT5_MSI_TYPE,
+	CPUCP_NIC_PORT7_MSI_TYPE,
+	CPUCP_NIC_PORT9_MSI_TYPE,
+	CPUCP_NUM_OF_MSI_TYPES
+};
+
 /*
  * PLL enumeration table used for all ASICs and future SW versions.
  * For future ASIC-LKD compatibility, we can only add new enumerations.
@@ -492,23 +540,16 @@ enum pll_index {
 	TPC_PLL = 6,
 	IF_PLL = 7,
 	SRAM_PLL = 8,
-	NS_DCORE_PLL = 9,
-	MESH_DCORE_PLL = 10,
-	HBM_PLL = 11,
-	TPC_DCORE_PLL = 12,
-	VIDEO_DCORE_PLL = 13,
-	SRAM_DCORE_PLL = 14,
-	NIC_PHY_DCORE_PLL = 15,
-	MSS_DCORE_PLL = 16,
-	DMA_DCORE_PLL = 17,
-	SIF_PLL = 18,
-	DDR_PLL = 19,
-	VID_PLL = 20,
-	BANK_PLL = 21,
-	MMU_PLL = 22,
-	IC_PLL = 23,
-	MC_PLL = 24,
-	EMMC_PLL = 25,
+	NS_PLL = 9,
+	HBM_PLL = 10,
+	MSS_PLL = 11,
+	DDR_PLL = 12,
+	VID_PLL = 13,
+	BANK_PLL = 14,
+	MMU_PLL = 15,
+	IC_PLL = 16,
+	MC_PLL = 17,
+	EMMC_PLL = 18,
 	PLL_MAX
 };
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/7] habanalabs: move dram scrub to free sequence
  2021-04-08 15:22 [PATCH 1/7] habanalabs/gaudi: clear QM errors only if not in stop_on_err mode Oded Gabbay
  2021-04-08 15:22 ` [PATCH 2/7] habanalabs: send dynamic msi-x indexes to f/w Oded Gabbay
@ 2021-04-08 15:22 ` Oded Gabbay
  2021-04-08 15:22 ` [PATCH 4/7] habanalabs/gaudi: derive security status from pci id Oded Gabbay
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Oded Gabbay @ 2021-04-08 15:22 UTC (permalink / raw)
  To: linux-kernel; +Cc: Bharat Jauhari

From: Bharat Jauhari <bjauhari@habana.ai>

DRAM scrubbing can take time hence it adds to latency during allocation.
To minimize latency during initialization, scrubbing is moved to release
call.
In case scrubbing fails it means the device is in a bad state,
hence HARD reset is initiated.

Signed-off-by: Bharat Jauhari <bjauhari@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/memory.c | 87 ++++++++++++++-----------
 1 file changed, 48 insertions(+), 39 deletions(-)

diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index 6530fddbbc21..2938cbbafbbc 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -81,16 +81,6 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
 				num_pgs, total_size);
 			return -ENOMEM;
 		}
-
-		if (hdev->memory_scrub) {
-			rc = hdev->asic_funcs->scrub_device_mem(hdev, paddr,
-					total_size);
-			if (rc) {
-				dev_err(hdev->dev,
-					"Failed to scrub contiguous device memory\n");
-				goto pages_pack_err;
-			}
-		}
 	}
 
 	phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
@@ -128,17 +118,6 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
 				goto page_err;
 			}
 
-			if (hdev->memory_scrub) {
-				rc = hdev->asic_funcs->scrub_device_mem(hdev,
-						phys_pg_pack->pages[i],
-						page_size);
-				if (rc) {
-					dev_err(hdev->dev,
-						"Failed to scrub device memory\n");
-					goto page_err;
-				}
-			}
-
 			num_curr_pgs++;
 		}
 	}
@@ -280,37 +259,67 @@ static void dram_pg_pool_do_release(struct kref *ref)
  * @phys_pg_pack: physical page pack to free.
  *
  * This function does the following:
- * - For DRAM memory only, iterate over the pack and free each physical block
- *   structure by returning it to the general pool.
+ * - For DRAM memory only
+ *   - iterate over the pack, scrub and free each physical block structure by
+ *     returning it to the general pool.
+ *     In case of error during scrubbing, initiate hard reset.
+ *     Once hard reset is triggered, scrubbing is bypassed while freeing the
+ *     memory continues.
  * - Free the hl_vm_phys_pg_pack structure.
  */
-static void free_phys_pg_pack(struct hl_device *hdev,
+static int free_phys_pg_pack(struct hl_device *hdev,
 				struct hl_vm_phys_pg_pack *phys_pg_pack)
 {
 	struct hl_vm *vm = &hdev->vm;
 	u64 i;
+	int rc = 0;
+
+	if (phys_pg_pack->created_from_userptr)
+		goto end;
 
-	if (!phys_pg_pack->created_from_userptr) {
-		if (phys_pg_pack->contiguous) {
-			gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0],
+	if (phys_pg_pack->contiguous) {
+		if (hdev->memory_scrub && !hdev->disabled) {
+			rc = hdev->asic_funcs->scrub_device_mem(hdev,
+					phys_pg_pack->pages[0],
 					phys_pg_pack->total_size);
+			if (rc)
+				dev_err(hdev->dev,
+					"Failed to scrub contiguous device memory\n");
+		}
 
-			for (i = 0; i < phys_pg_pack->npages ; i++)
-				kref_put(&vm->dram_pg_pool_refcount,
-					dram_pg_pool_do_release);
-		} else {
-			for (i = 0 ; i < phys_pg_pack->npages ; i++) {
-				gen_pool_free(vm->dram_pg_pool,
+		gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0],
+			phys_pg_pack->total_size);
+
+		for (i = 0; i < phys_pg_pack->npages ; i++)
+			kref_put(&vm->dram_pg_pool_refcount,
+				dram_pg_pool_do_release);
+	} else {
+		for (i = 0 ; i < phys_pg_pack->npages ; i++) {
+			if (hdev->memory_scrub && !hdev->disabled && rc == 0) {
+				rc = hdev->asic_funcs->scrub_device_mem(
+						hdev,
 						phys_pg_pack->pages[i],
 						phys_pg_pack->page_size);
-				kref_put(&vm->dram_pg_pool_refcount,
-					dram_pg_pool_do_release);
+				if (rc)
+					dev_err(hdev->dev,
+						"Failed to scrub device memory\n");
 			}
+			gen_pool_free(vm->dram_pg_pool,
+				phys_pg_pack->pages[i],
+				phys_pg_pack->page_size);
+			kref_put(&vm->dram_pg_pool_refcount,
+				dram_pg_pool_do_release);
 		}
 	}
 
+	if (rc && !hdev->disabled)
+		hl_device_reset(hdev, HL_RESET_HARD);
+
+end:
 	kvfree(phys_pg_pack->pages);
 	kfree(phys_pg_pack);
+
+	return rc;
 }
 
 /**
@@ -349,7 +358,7 @@ static int free_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args)
 		atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem);
 		atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem);
 
-		free_phys_pg_pack(hdev, phys_pg_pack);
+		return free_phys_pg_pack(hdev, phys_pg_pack);
 	} else {
 		spin_unlock(&vm->idr_lock);
 		dev_err(hdev->dev,
@@ -1131,9 +1140,9 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 	*device_addr = ret_vaddr;
 
 	if (is_userptr)
-		free_phys_pg_pack(hdev, phys_pg_pack);
+		rc = free_phys_pg_pack(hdev, phys_pg_pack);
 
-	return 0;
+	return rc;
 
 map_err:
 	if (add_va_block(hdev, va_range, ret_vaddr,
@@ -1286,7 +1295,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 	kfree(hnode);
 
 	if (is_userptr) {
-		free_phys_pg_pack(hdev, phys_pg_pack);
+		rc = free_phys_pg_pack(hdev, phys_pg_pack);
 		dma_unmap_host_va(hdev, userptr);
 	}
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 4/7] habanalabs/gaudi: derive security status from pci id
  2021-04-08 15:22 [PATCH 1/7] habanalabs/gaudi: clear QM errors only if not in stop_on_err mode Oded Gabbay
  2021-04-08 15:22 ` [PATCH 2/7] habanalabs: send dynamic msi-x indexes to f/w Oded Gabbay
  2021-04-08 15:22 ` [PATCH 3/7] habanalabs: move dram scrub to free sequence Oded Gabbay
@ 2021-04-08 15:22 ` Oded Gabbay
  2021-04-08 15:22 ` [PATCH 5/7] habanalabs/gaudi: skip iATU if F/W security is enabled Oded Gabbay
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Oded Gabbay @ 2021-04-08 15:22 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ofir Bitton

From: Ofir Bitton <obitton@habana.ai>

As F/ security indication must be available before driver approaches
PCI bus, F/W security should be derived from PCI id rather than be
fetched during boot handshake with F/W.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c       |  4 ++++
 drivers/misc/habanalabs/common/firmware_if.c  |  6 +++---
 drivers/misc/habanalabs/common/habanalabs.h   |  4 +++-
 .../misc/habanalabs/common/habanalabs_drv.c   | 21 +++++++++++++++++++
 drivers/misc/habanalabs/common/mmu/mmu.c      |  1 +
 drivers/misc/habanalabs/common/sysfs.c        |  3 +++
 drivers/misc/habanalabs/gaudi/gaudi.c         |  2 --
 drivers/misc/habanalabs/goya/goya.c           |  2 --
 8 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 2ed4f2bedc08..00e92b678828 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -327,6 +327,10 @@ static int device_early_init(struct hl_device *hdev)
 		gaudi_set_asic_funcs(hdev);
 		strscpy(hdev->asic_name, "GAUDI", sizeof(hdev->asic_name));
 		break;
+	case ASIC_GAUDI_SEC:
+		gaudi_set_asic_funcs(hdev);
+		strscpy(hdev->asic_name, "GAUDI SEC", sizeof(hdev->asic_name));
+		break;
 	default:
 		dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
 			hdev->asic_type);
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 532a2fd7bfb4..652571d3b8e6 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -819,16 +819,16 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
 	if (security_status & CPU_BOOT_DEV_STS0_ENABLED) {
 		prop->fw_security_status_valid = 1;
 
+		/* FW security should be derived from PCI ID, we keep this
+		 * check for backward compatibility
+		 */
 		if (security_status & CPU_BOOT_DEV_STS0_SECURITY_EN)
 			prop->fw_security_disabled = false;
-		else
-			prop->fw_security_disabled = true;
 
 		if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
 			prop->hard_reset_done_by_fw = true;
 	} else {
 		prop->fw_security_status_valid = 0;
-		prop->fw_security_disabled = true;
 	}
 
 	dev_dbg(hdev->dev, "Firmware preboot security status %#x\n",
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 867986ef4588..c1b46126c522 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -766,11 +766,13 @@ struct hl_eq {
  * @ASIC_INVALID: Invalid ASIC type.
  * @ASIC_GOYA: Goya device.
  * @ASIC_GAUDI: Gaudi device.
+ * @ASIC_GAUDI_SEC: Gaudi secured device (HL-2000).
  */
 enum hl_asic_type {
 	ASIC_INVALID,
 	ASIC_GOYA,
-	ASIC_GAUDI
+	ASIC_GAUDI,
+	ASIC_GAUDI_SEC
 };
 
 struct hl_cs_parser;
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 59896566dca1..7135f1e03864 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -47,10 +47,12 @@ MODULE_PARM_DESC(memory_scrub,
 
 #define PCI_IDS_GOYA			0x0001
 #define PCI_IDS_GAUDI			0x1000
+#define PCI_IDS_GAUDI_SEC		0x1010
 
 static const struct pci_device_id ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), },
+	{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI_SEC), },
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, ids);
@@ -74,6 +76,9 @@ static enum hl_asic_type get_asic_type(u16 device)
 	case PCI_IDS_GAUDI:
 		asic_type = ASIC_GAUDI;
 		break;
+	case PCI_IDS_GAUDI_SEC:
+		asic_type = ASIC_GAUDI_SEC;
+		break;
 	default:
 		asic_type = ASIC_INVALID;
 		break;
@@ -82,6 +87,16 @@ static enum hl_asic_type get_asic_type(u16 device)
 	return asic_type;
 }
 
+static bool is_asic_secured(enum hl_asic_type asic_type)
+{
+	switch (asic_type) {
+	case ASIC_GAUDI_SEC:
+		return true;
+	default:
+		return false;
+	}
+}
+
 /*
  * hl_device_open - open function for habanalabs device
  *
@@ -287,6 +302,12 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
 		hdev->asic_type = asic_type;
 	}
 
+	if (pdev)
+		hdev->asic_prop.fw_security_disabled =
+				!is_asic_secured(pdev->device);
+	else
+		hdev->asic_prop.fw_security_disabled = true;
+
 	/* Assign status description string */
 	strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION],
 					"disabled", HL_STR_MAX);
diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c
index ae1778103e23..b37189956b14 100644
--- a/drivers/misc/habanalabs/common/mmu/mmu.c
+++ b/drivers/misc/habanalabs/common/mmu/mmu.c
@@ -591,6 +591,7 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev)
 	switch (hdev->asic_type) {
 	case ASIC_GOYA:
 	case ASIC_GAUDI:
+	case ASIC_GAUDI_SEC:
 		hl_mmu_v1_set_funcs(hdev, &hdev->mmu_func[MMU_DR_PGT]);
 		break;
 	default:
diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c
index c7ac5dc0cda4..9fa61573a89d 100644
--- a/drivers/misc/habanalabs/common/sysfs.c
+++ b/drivers/misc/habanalabs/common/sysfs.c
@@ -257,6 +257,9 @@ static ssize_t device_type_show(struct device *dev,
 	case ASIC_GAUDI:
 		str = "GAUDI";
 		break;
+	case ASIC_GAUDI_SEC:
+		str = "GAUDI SEC";
+		break;
 	default:
 		dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
 				hdev->asic_type);
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 62e3c63bec20..841748392e49 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -575,8 +575,6 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
 	for (i = 0 ; i < HL_MAX_DCORES ; i++)
 		prop->first_available_cq[i] = USHRT_MAX;
 
-	/* disable fw security for now, set it in a later stage */
-	prop->fw_security_disabled = true;
 	prop->fw_security_status_valid = false;
 	prop->hard_reset_done_by_fw = false;
 
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 9d49ba649db0..44dd4d8d8822 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -484,8 +484,6 @@ int goya_get_fixed_properties(struct hl_device *hdev)
 	for (i = 0 ; i < HL_MAX_DCORES ; i++)
 		prop->first_available_cq[i] = USHRT_MAX;
 
-	/* disable fw security for now, set it in a later stage */
-	prop->fw_security_disabled = true;
 	prop->fw_security_status_valid = false;
 	prop->hard_reset_done_by_fw = false;
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 5/7] habanalabs/gaudi: skip iATU if F/W security is enabled
  2021-04-08 15:22 [PATCH 1/7] habanalabs/gaudi: clear QM errors only if not in stop_on_err mode Oded Gabbay
                   ` (2 preceding siblings ...)
  2021-04-08 15:22 ` [PATCH 4/7] habanalabs/gaudi: derive security status from pci id Oded Gabbay
@ 2021-04-08 15:22 ` Oded Gabbay
  2021-04-08 15:22 ` [PATCH 6/7] habanalabs: print on f/w boot unknown error Oded Gabbay
  2021-04-08 15:22 ` [PATCH 7/7] habanalabs: update to latest F/W communication header Oded Gabbay
  5 siblings, 0 replies; 7+ messages in thread
From: Oded Gabbay @ 2021-04-08 15:22 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ofir Bitton

From: Ofir Bitton <obitton@habana.ai>

As part of the securing GAUDI, the F/W will configure the PCI iATU
regions. If the driver identifies a secured PCI ID, it will know to
skip iATU configuration in a very early stage.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs.h |  3 ++
 drivers/misc/habanalabs/common/pci/pci.c    | 52 +++++++++++++++++++++
 drivers/misc/habanalabs/gaudi/gaudi.c       | 23 +++++++++
 drivers/misc/habanalabs/goya/goya.c         | 24 +++++++++-
 4 files changed, 101 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index c1b46126c522..44e89da30b4a 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -445,6 +445,7 @@ struct hl_mmu_properties {
  * @dram_supports_virtual_memory: is there an MMU towards the DRAM
  * @hard_reset_done_by_fw: true if firmware is handling hard reset flow
  * @num_functional_hbms: number of functional HBMs in each DCORE.
+ * @iatu_done_by_fw: true if iATU configuration is being done by FW.
  */
 struct asic_fixed_properties {
 	struct hw_queue_properties	*hw_queues_props;
@@ -508,6 +509,7 @@ struct asic_fixed_properties {
 	u8				dram_supports_virtual_memory;
 	u8				hard_reset_done_by_fw;
 	u8				num_functional_hbms;
+	u8				iatu_done_by_fw;
 };
 
 /**
@@ -2400,6 +2402,7 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
 
 int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
 			bool is_wc[3]);
+int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data);
 int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data);
 int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
 		struct hl_inbound_pci_region *pci_region);
diff --git a/drivers/misc/habanalabs/common/pci/pci.c b/drivers/misc/habanalabs/common/pci/pci.c
index b799f9258fb0..e941b7eef346 100644
--- a/drivers/misc/habanalabs/common/pci/pci.c
+++ b/drivers/misc/habanalabs/common/pci/pci.c
@@ -85,6 +85,58 @@ static void hl_pci_bars_unmap(struct hl_device *hdev)
 	pci_release_regions(pdev);
 }
 
+int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data)
+{
+	struct pci_dev *pdev = hdev->pdev;
+	ktime_t timeout;
+	u64 msec;
+	u32 val;
+
+	if (hdev->pldm)
+		msec = HL_PLDM_PCI_ELBI_TIMEOUT_MSEC;
+	else
+		msec = HL_PCI_ELBI_TIMEOUT_MSEC;
+
+	/* Clear previous status */
+	pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0);
+
+	pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_ADDR, (u32) addr);
+	pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL, 0);
+
+	timeout = ktime_add_ms(ktime_get(), msec);
+	for (;;) {
+		pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val);
+		if (val & PCI_CONFIG_ELBI_STS_MASK)
+			break;
+		if (ktime_compare(ktime_get(), timeout) > 0) {
+			pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS,
+						&val);
+			break;
+		}
+
+		usleep_range(300, 500);
+	}
+
+	if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE) {
+		pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data);
+
+		return 0;
+	}
+
+	if (val & PCI_CONFIG_ELBI_STS_ERR) {
+		dev_err(hdev->dev, "Error reading from ELBI\n");
+		return -EIO;
+	}
+
+	if (!(val & PCI_CONFIG_ELBI_STS_MASK)) {
+		dev_err(hdev->dev, "ELBI read didn't finish in time\n");
+		return -EIO;
+	}
+
+	dev_err(hdev->dev, "ELBI read has undefined bits in status\n");
+	return -EIO;
+}
+
 /**
  * hl_pci_elbi_write() - Write through the ELBI interface.
  * @hdev: Pointer to hl_device structure.
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 841748392e49..8730b691ec61 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -629,6 +629,11 @@ static int gaudi_init_iatu(struct hl_device *hdev)
 	struct hl_outbound_pci_region outbound_region;
 	int rc;
 
+	if (hdev->asic_prop.iatu_done_by_fw) {
+		hdev->asic_funcs->set_dma_mask_from_fw(hdev);
+		return 0;
+	}
+
 	/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
 	inbound_region.mode = PCI_BAR_MATCH_MODE;
 	inbound_region.bar = SRAM_BAR_ID;
@@ -673,6 +678,7 @@ static int gaudi_early_init(struct hl_device *hdev)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct pci_dev *pdev = hdev->pdev;
+	u32 fw_boot_status;
 	int rc;
 
 	rc = gaudi_get_fixed_properties(hdev);
@@ -706,6 +712,23 @@ static int gaudi_early_init(struct hl_device *hdev)
 
 	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
 
+	/* If FW security is enabled at this point it means no access to ELBI */
+	if (!hdev->asic_prop.fw_security_disabled) {
+		hdev->asic_prop.iatu_done_by_fw = true;
+		goto pci_init;
+	}
+
+	rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
+				&fw_boot_status);
+	if (rc)
+		goto free_queue_props;
+
+	/* Check whether FW is configuring iATU */
+	if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
+			(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
+		hdev->asic_prop.iatu_done_by_fw = true;
+
+pci_init:
 	rc = hl_pci_init(hdev);
 	if (rc)
 		goto free_queue_props;
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 44dd4d8d8822..e27338f4aad2 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -555,6 +555,11 @@ static int goya_init_iatu(struct hl_device *hdev)
 	struct hl_outbound_pci_region outbound_region;
 	int rc;
 
+	if (hdev->asic_prop.iatu_done_by_fw) {
+		hdev->asic_funcs->set_dma_mask_from_fw(hdev);
+		return 0;
+	}
+
 	/* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */
 	inbound_region.mode = PCI_BAR_MATCH_MODE;
 	inbound_region.bar = SRAM_CFG_BAR_ID;
@@ -602,7 +607,7 @@ static int goya_early_init(struct hl_device *hdev)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct pci_dev *pdev = hdev->pdev;
-	u32 val;
+	u32 fw_boot_status, val;
 	int rc;
 
 	rc = goya_get_fixed_properties(hdev);
@@ -636,6 +641,23 @@ static int goya_early_init(struct hl_device *hdev)
 
 	prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
 
+	/* If FW security is enabled at this point it means no access to ELBI */
+	if (!hdev->asic_prop.fw_security_disabled) {
+		hdev->asic_prop.iatu_done_by_fw = true;
+		goto pci_init;
+	}
+
+	rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
+				&fw_boot_status);
+	if (rc)
+		goto free_queue_props;
+
+	/* Check whether FW is configuring iATU */
+	if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
+			(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
+		hdev->asic_prop.iatu_done_by_fw = true;
+
+pci_init:
 	rc = hl_pci_init(hdev);
 	if (rc)
 		goto free_queue_props;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 6/7] habanalabs: print on f/w boot unknown error
  2021-04-08 15:22 [PATCH 1/7] habanalabs/gaudi: clear QM errors only if not in stop_on_err mode Oded Gabbay
                   ` (3 preceding siblings ...)
  2021-04-08 15:22 ` [PATCH 5/7] habanalabs/gaudi: skip iATU if F/W security is enabled Oded Gabbay
@ 2021-04-08 15:22 ` Oded Gabbay
  2021-04-08 15:22 ` [PATCH 7/7] habanalabs: update to latest F/W communication header Oded Gabbay
  5 siblings, 0 replies; 7+ messages in thread
From: Oded Gabbay @ 2021-04-08 15:22 UTC (permalink / raw)
  To: linux-kernel

We need to print a message to the kernel log in case we encounter
an unknown error in the f/w boot to help the user understand what
happened.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 652571d3b8e6..4c096b6132b5 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -351,8 +351,12 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
 		dev_dbg(hdev->dev, "Device security status %#x\n",
 				security_val);
 
-	if (err_val & ~CPU_BOOT_ERR0_ENABLED)
+	if (err_val & ~CPU_BOOT_ERR0_ENABLED) {
+		dev_err(hdev->dev,
+			"Device boot error - unknown error 0x%08x\n",
+			err_val);
 		return -EIO;
+	}
 
 	return 0;
 }
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 7/7] habanalabs: update to latest F/W communication header
  2021-04-08 15:22 [PATCH 1/7] habanalabs/gaudi: clear QM errors only if not in stop_on_err mode Oded Gabbay
                   ` (4 preceding siblings ...)
  2021-04-08 15:22 ` [PATCH 6/7] habanalabs: print on f/w boot unknown error Oded Gabbay
@ 2021-04-08 15:22 ` Oded Gabbay
  5 siblings, 0 replies; 7+ messages in thread
From: Oded Gabbay @ 2021-04-08 15:22 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ohad Sharabi

From: Ohad Sharabi <osharabi@habana.ai>

update files to latest version from F/W team.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../misc/habanalabs/include/common/cpucp_if.h |   3 +-
 .../habanalabs/include/common/hl_boot_if.h    | 198 ++++++++++++++++++
 2 files changed, 200 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h
index 20a710f7a369..27cd0ba99aa3 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -11,6 +11,8 @@
 #include <linux/types.h>
 #include <linux/if_ether.h>
 
+#include "hl_boot_if.h"
+
 #define NUM_HBM_PSEUDO_CH				2
 #define NUM_HBM_CH_PER_DEV				8
 #define CPUCP_PKT_HBM_ECC_INFO_WR_PAR_SHIFT		0
@@ -564,7 +566,6 @@ struct eq_generic_event {
  */
 
 #define CARD_NAME_MAX_LEN		16
-#define VERSION_MAX_LEN			128
 #define CPUCP_MAX_SENSORS		128
 #define CPUCP_MAX_NICS			128
 #define CPUCP_LANES_PER_NIC		4
diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h
index 980b432fd76e..e0a259e0495c 100644
--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -13,6 +13,8 @@
 
 #define BOOT_FIT_SRAM_OFFSET		0x200000
 
+#define VERSION_MAX_LEN			128
+
 /*
  * CPU error bits in BOOT_ERROR registers
  *
@@ -251,6 +253,7 @@ enum kmd_msg {
 	KMD_MSG_SKIP_BMC,
 	RESERVED,
 	KMD_MSG_RST_DEV,
+	KMD_MSG_LAST
 };
 
 enum cpu_msg_status {
@@ -259,4 +262,199 @@ enum cpu_msg_status {
 	CPU_MSG_ERR,
 };
 
+/* communication registers mapping - consider ABI when changing */
+struct cpu_dyn_regs {
+	uint32_t cpu_pq_base_addr_low;
+	uint32_t cpu_pq_base_addr_high;
+	uint32_t cpu_pq_length;
+	uint32_t cpu_pq_init_status;
+	uint32_t cpu_eq_base_addr_low;
+	uint32_t cpu_eq_base_addr_high;
+	uint32_t cpu_eq_length;
+	uint32_t cpu_eq_ci;
+	uint32_t cpu_cq_base_addr_low;
+	uint32_t cpu_cq_base_addr_high;
+	uint32_t cpu_cq_length;
+	uint32_t cpu_pf_pq_pi;
+	uint32_t cpu_boot_dev_sts0;
+	uint32_t cpu_boot_dev_sts1;
+	uint32_t cpu_boot_err0;
+	uint32_t cpu_boot_err1;
+	uint32_t cpu_boot_status;
+	uint32_t fw_upd_sts;
+	uint32_t fw_upd_cmd;
+	uint32_t fw_upd_pending_sts;
+	uint32_t fuse_ver_offset;
+	uint32_t preboot_ver_offset;
+	uint32_t uboot_ver_offset;
+	uint32_t hw_state;
+	uint32_t kmd_msg_to_cpu;
+	uint32_t cpu_cmd_status_to_host;
+	uint32_t reserved1[32];		/* reserve for future use */
+};
+
+/* HCDM - Habana Communications Descriptor Magic */
+#define HL_COMMS_DESC_MAGIC	0x4843444D
+#define HL_COMMS_DESC_VER	1
+
+/* this is the comms descriptor header - meta data */
+struct comms_desc_header {
+	uint32_t magic;		/* magic for validation */
+	uint32_t crc32;		/* CRC32 of the descriptor w/o header */
+	uint16_t size;		/* size of the descriptor w/o header */
+	uint8_t version;	/* descriptor version */
+	uint8_t reserved[5];	/* pad to 64 bit */
+};
+
+/* this is the main FW descriptor - consider ABI when changing */
+struct lkd_fw_comms_desc {
+	struct comms_desc_header header;
+	struct cpu_dyn_regs cpu_dyn_regs;
+	char fuse_ver[VERSION_MAX_LEN];
+	char cur_fw_ver[VERSION_MAX_LEN];
+	/* can be used for 1 more version w/o ABI change */
+	char reserved0[VERSION_MAX_LEN];
+	uint64_t img_addr;	/* address for next FW component load */
+};
+
+/*
+ * LKD commands:
+ *
+ * COMMS_NOOP			Used to clear the command register and no actual
+ *				command is send.
+ *
+ * COMMS_CLR_STS		Clear status command - FW should clear the
+ *				status register. Used for synchronization
+ *				between the commands as part of the race free
+ *				protocol.
+ *
+ * COMMS_RST_STATE		Reset the current communication state which is
+ *				kept by FW for proper responses.
+ *				Should be used in the beginning of the
+ *				communication cycle to clean any leftovers from
+ *				previous communication attempts.
+ *
+ * COMMS_PREP_DESC		Prepare descriptor for setting up the
+ *				communication and other dynamic data:
+ *				struct lkd_fw_comms_desc.
+ *				This command has a parameter stating the next FW
+ *				component size, so the FW can actually prepare a
+ *				space for it and in the status response provide
+ *				the descriptor offset. The Offset of the next FW
+ *				data component is a part of the descriptor
+ *				structure.
+ *
+ * COMMS_DATA_RDY		The FW data has been uploaded and is ready for
+ *				validation.
+ *
+ * COMMS_EXEC			Execute the next FW component.
+ *
+ * COMMS_RST_DEV		Reset the device.
+ *
+ * COMMS_GOTO_WFE		Execute WFE command. Allowed only on non-secure
+ *				devices.
+ *
+ * COMMS_SKIP_BMC		Perform actions required for BMC-less servers.
+ *				Do not wait for BMC response.
+ *
+ * COMMS_LOW_PLL_OPP		Initialize PLLs for low OPP.
+ */
+enum comms_cmd {
+	COMMS_NOOP = 0,
+	COMMS_CLR_STS = 1,
+	COMMS_RST_STATE = 2,
+	COMMS_PREP_DESC = 3,
+	COMMS_DATA_RDY = 4,
+	COMMS_EXEC = 5,
+	COMMS_RST_DEV = 6,
+	COMMS_GOTO_WFE = 7,
+	COMMS_SKIP_BMC = 8,
+	COMMS_LOW_PLL_OPP = 9,
+	COMMS_INVLD_LAST
+};
+
+#define COMMS_COMMAND_SIZE_SHIFT	0
+#define COMMS_COMMAND_SIZE_MASK		0x1FFFFFF
+#define COMMS_COMMAND_CMD_SHIFT		27
+#define COMMS_COMMAND_CMD_MASK		0xF8000000
+
+/*
+ * LKD command to FW register structure
+ * @size	- FW component size
+ * @cmd		- command from enum comms_cmd
+ */
+struct comms_command {
+	union {		/* bit fields are only for FW use */
+		struct {
+			unsigned int size :25;		/* 32MB max. */
+			unsigned int reserved :2;
+			enum comms_cmd cmd :5;		/* 32 commands */
+		};
+		unsigned int val;
+	};
+};
+
+/*
+ * FW status
+ *
+ * COMMS_STS_NOOP		Used to clear the status register and no actual
+ *				status is provided.
+ *
+ * COMMS_STS_ACK		Command has been received and recognized.
+ *
+ * COMMS_STS_OK			Command execution has finished successfully.
+ *
+ * COMMS_STS_ERR		Command execution was unsuccessful and resulted
+ *				in error.
+ *
+ * COMMS_STS_VALID_ERR		FW validation has failed.
+ *
+ * COMMS_STS_TIMEOUT_ERR	Command execution has timed out.
+ */
+enum comms_sts {
+	COMMS_STS_NOOP = 0,
+	COMMS_STS_ACK = 1,
+	COMMS_STS_OK = 2,
+	COMMS_STS_ERR = 3,
+	COMMS_STS_VALID_ERR = 4,
+	COMMS_STS_TIMEOUT_ERR = 5,
+	COMMS_STS_INVLD_LAST
+};
+
+/* RAM types for FW components loading - defines the base address */
+enum comms_ram_types {
+	COMMS_SRAM = 0,
+	COMMS_DRAM = 1,
+};
+
+#define COMMS_STATUS_OFFSET_SHIFT	0
+#define COMMS_STATUS_OFFSET_MASK	0x03FFFFFF
+#define COMMS_STATUS_OFFSET_ALIGN_SHIFT	2
+#define COMMS_STATUS_RAM_TYPE_SHIFT	26
+#define COMMS_STATUS_RAM_TYPE_MASK	0x0C000000
+#define COMMS_STATUS_STATUS_SHIFT	28
+#define COMMS_STATUS_STATUS_MASK	0xF0000000
+
+/*
+ * FW status to LKD register structure
+ * @offset	- an offset from the base of the ram_type shifted right by
+ *		  2 bits (always aligned to 32 bits).
+ *		  Allows a maximum addressable offset of 256MB from RAM base.
+ *		  Example: for real offset in RAM of 0x800000 (8MB), the value
+ *		  in offset field is (0x800000 >> 2) = 0x200000.
+ * @ram_type	- the RAM type that should be used for offset from
+ *		  enum comms_ram_types
+ * @status	- status from enum comms_sts
+ */
+struct comms_status {
+	union {		/* bit fields are only for FW use */
+		struct {
+			unsigned int offset :26;
+			unsigned int ram_type :2;
+			enum comms_sts status :4;	/* 16 statuses */
+		};
+		unsigned int val;
+	};
+};
+
 #endif /* HL_BOOT_IF_H */
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-04-08 15:23 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-08 15:22 [PATCH 1/7] habanalabs/gaudi: clear QM errors only if not in stop_on_err mode Oded Gabbay
2021-04-08 15:22 ` [PATCH 2/7] habanalabs: send dynamic msi-x indexes to f/w Oded Gabbay
2021-04-08 15:22 ` [PATCH 3/7] habanalabs: move dram scrub to free sequence Oded Gabbay
2021-04-08 15:22 ` [PATCH 4/7] habanalabs/gaudi: derive security status from pci id Oded Gabbay
2021-04-08 15:22 ` [PATCH 5/7] habanalabs/gaudi: skip iATU if F/W security is enabled Oded Gabbay
2021-04-08 15:22 ` [PATCH 6/7] habanalabs: print on f/w boot unknown error Oded Gabbay
2021-04-08 15:22 ` [PATCH 7/7] habanalabs: update to latest F/W communication header Oded Gabbay

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.