linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/5] habanalabs: notify before f/w loading
@ 2021-05-15 16:31 Oded Gabbay
  2021-05-15 16:31 ` [PATCH 2/5] habanalabs/gaudi: send hard reset cause to preboot Oded Gabbay
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Oded Gabbay @ 2021-05-15 16:31 UTC (permalink / raw)
  To: linux-kernel

An information print notifying on starting to load the f/w was removed
by mistake when moving to the new dynamic f/w loading mechanism.

Restore that print as the F/W loading usually takes between 10 to 20
seconds and this print helps the user know the status of the driver
load.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 3969351b5513..8922d4a43919 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -1951,6 +1951,9 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
 	struct cpu_dyn_regs *dyn_regs;
 	int rc;
 
+	dev_info(hdev->dev,
+		"Loading firmware to device, may take some time...\n");
+
 	dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs;
 
 	rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_RST_STATE,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/5] habanalabs/gaudi: send hard reset cause to preboot
  2021-05-15 16:31 [PATCH 1/5] habanalabs: notify before f/w loading Oded Gabbay
@ 2021-05-15 16:31 ` Oded Gabbay
  2021-05-15 16:31 ` [PATCH 3/5] habanalabs: check if asic secured with asic type Oded Gabbay
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Oded Gabbay @ 2021-05-15 16:31 UTC (permalink / raw)
  To: linux-kernel; +Cc: Koby Elbaz

From: Koby Elbaz <kelbaz@habana.ai>

LKD should provide hard reset cause to preboot prior to
loading any FW components (in case needed).
Current implementation is based on the new FW 'COMMS' protocol
In cased 'COMMS' is disabled - reset cause won't be sent.
Currently, only 2 reset causes are shared: HEARTBEAT & TDR.

Sending the reset cause will provide the missing watchdog
info that the firmware needs to provide to the BMC.

Signed-off-by: Koby Elbaz <kelbaz@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../habanalabs/common/command_submission.c    |   2 +-
 drivers/misc/habanalabs/common/device.c       |  18 ++-
 drivers/misc/habanalabs/common/firmware_if.c  | 127 +++++++++++++++++-
 drivers/misc/habanalabs/common/habanalabs.h   |   7 +
 .../misc/habanalabs/common/habanalabs_drv.c   |   2 +
 .../habanalabs/include/common/hl_boot_if.h    |  60 ++++++++-
 6 files changed, 208 insertions(+), 8 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index af3c497defb1..ecd96fbe3150 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -614,7 +614,7 @@ static void cs_timedout(struct work_struct *work)
 	cs_put(cs);
 
 	if (hdev->reset_on_lockup)
-		hl_device_reset(hdev, 0);
+		hl_device_reset(hdev, HL_RESET_TDR);
 	else
 		hdev->needs_reset = true;
 }
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 00e92b678828..bc58a91bf50a 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -900,6 +900,19 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 		if (rc)
 			return 0;
 
+		/*
+		 * 'reset cause' is being updated here, because getting here
+		 * means that it's the 1st time and the last time we're here
+		 * ('in_reset' makes sure of it). This makes sure that
+		 * 'reset_cause' will continue holding its 1st recorded reason!
+		 */
+		if (flags & HL_RESET_HEARTBEAT)
+			hdev->curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT;
+		else if (flags & HL_RESET_TDR)
+			hdev->curr_reset_cause = HL_RESET_CAUSE_TDR;
+		else
+			hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
+
 		/*
 		 * if reset is due to heartbeat, device CPU is no responsive in
 		 * which case no point sending PCI disable message to it
@@ -943,9 +956,8 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 		hdev->process_kill_trial_cnt = 0;
 
 		/*
-		 * Because the reset function can't run from interrupt or
-		 * from heartbeat work, we need to call the reset function
-		 * from a dedicated work
+		 * Because the reset function can't run from heartbeat work,
+		 * we need to call the reset function from a dedicated work.
 		 */
 		queue_delayed_work(hdev->device_reset_work.wq,
 			&hdev->device_reset_work.reset_work, 0);
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 8922d4a43919..2d5a849a377e 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -103,6 +103,41 @@ static int hl_fw_copy_fw_to_device(struct hl_device *hdev,
 	return 0;
 }
 
+/**
+ * hl_fw_copy_msg_to_device() - copy message to device
+ *
+ * @hdev: pointer to hl_device structure.
+ * @msg: message
+ * @dst: IO memory mapped address space to copy firmware to
+ * @src_offset: offset in src message to copy from
+ * @size: amount of bytes to copy (0 to copy the whole binary)
+ *
+ * actual copy of message data to device.
+ */
+static int hl_fw_copy_msg_to_device(struct hl_device *hdev,
+		struct lkd_msg_comms *msg, void __iomem *dst,
+		u32 src_offset, u32 size)
+{
+	void *msg_data;
+
+	/* size 0 indicates to copy the whole file */
+	if (!size)
+		size = sizeof(struct lkd_msg_comms);
+
+	if (src_offset + size > sizeof(struct lkd_msg_comms)) {
+		dev_err(hdev->dev,
+			"size to copy(%u) and offset(%u) are invalid\n",
+			size, src_offset);
+		return -EINVAL;
+	}
+
+	msg_data = (void *) msg;
+
+	memcpy_toio(dst, msg_data + src_offset, size);
+
+	return 0;
+}
+
 /**
  * hl_fw_load_fw_to_device() - Load F/W code to device's memory.
  *
@@ -1698,6 +1733,36 @@ static int hl_fw_dynamic_copy_image(struct hl_device *hdev,
 	return rc;
 }
 
+/**
+ * hl_fw_dynamic_copy_msg - copy msg to memory allocated by the FW
+ *
+ * @hdev: pointer to the habanalabs device structure
+ * @msg: message
+ * @fw_loader: managing structure for loading device's FW
+ */
+static int hl_fw_dynamic_copy_msg(struct hl_device *hdev,
+		struct lkd_msg_comms *msg, struct fw_load_mgr *fw_loader)
+{
+	struct lkd_fw_comms_desc *fw_desc;
+	struct pci_mem_region *region;
+	void __iomem *dest;
+	u64 addr;
+	int rc;
+
+	fw_desc = &fw_loader->dynamic_loader.comm_desc;
+	addr = le64_to_cpu(fw_desc->img_addr);
+
+	/* find memory region to which to copy the image */
+	region = fw_loader->dynamic_loader.image_region;
+
+	dest = hdev->pcie_bar[region->bar_id] + region->offset_in_bar +
+					(addr - region->region_base);
+
+	rc = hl_fw_copy_msg_to_device(hdev, msg, dest, 0, 0);
+
+	return rc;
+}
+
 /**
  * hl_fw_boot_fit_update_state - update internal data structures after boot-fit
  *                               is loaded
@@ -1771,7 +1836,6 @@ static int hl_fw_dynamic_load_image(struct hl_device *hdev,
 	} else {
 		cur_fwc = FW_COMP_BOOT_FIT;
 		fw_name = fw_loader->linux_img.image_name;
-
 	}
 
 	/* request FW in order to communicate to FW the size to be allocated */
@@ -1927,6 +1991,57 @@ static void hl_fw_linux_update_state(struct hl_device *hdev,
 	dev_info(hdev->dev, "Successfully loaded firmware to device\n");
 }
 
+/**
+ * hl_fw_dynamic_report_reset_cause - send a COMMS message with the cause
+ *                                    of the newly triggered hard reset
+ *
+ * @hdev: pointer to the habanalabs device structure
+ * @fw_loader: managing structure for loading device's FW
+ * @reset_cause: enumerated cause for the recent hard reset
+ *
+ * @return 0 on success, otherwise non-zero error code
+ */
+static int hl_fw_dynamic_report_reset_cause(struct hl_device *hdev,
+		struct fw_load_mgr *fw_loader,
+		enum comms_reset_cause reset_cause)
+{
+	struct lkd_msg_comms msg;
+	int rc;
+
+	memset(&msg, 0, sizeof(msg));
+
+	/* create message to be sent */
+	msg.header.type = HL_COMMS_RESET_CAUSE_TYPE;
+	msg.header.size = cpu_to_le16(sizeof(struct comms_msg_header));
+	msg.header.magic = cpu_to_le32(HL_COMMS_MSG_MAGIC);
+
+	msg.reset_cause = reset_cause;
+
+	rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader,
+			sizeof(struct lkd_msg_comms));
+	if (rc)
+		return rc;
+
+	/* copy message to space allocated by FW */
+	rc = hl_fw_dynamic_copy_msg(hdev, &msg, fw_loader);
+	if (rc)
+		return rc;
+
+	rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_DATA_RDY,
+						0, true,
+						fw_loader->cpu_timeout);
+	if (rc)
+		return rc;
+
+	rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_EXEC,
+						0, true,
+						fw_loader->cpu_timeout);
+	if (rc)
+		return rc;
+
+	return 0;
+}
+
 /**
  * hl_fw_dynamic_init_cpu - initialize the device CPU using dynamic protocol
  *
@@ -1962,6 +2077,16 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
 	if (rc)
 		goto protocol_err;
 
+	if (hdev->curr_reset_cause) {
+		rc = hl_fw_dynamic_report_reset_cause(hdev, fw_loader,
+				hdev->curr_reset_cause);
+		if (rc)
+			goto protocol_err;
+
+		/* Clear current reset cause */
+		hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
+	}
+
 	if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) {
 		/* update the preboot state */
 		hl_fw_preboot_update_state(hdev);
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 7e13a198a946..a046180254c8 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -115,10 +115,14 @@ enum hl_mmu_page_table_location {
  *
  * - HL_RESET_HEARTBEAT
  *       Set if reset is due to heartbeat
+ *
+ * - HL_RESET_TDR
+ *       Set if reset is due to TDR
  */
 #define HL_RESET_HARD			(1 << 0)
 #define HL_RESET_FROM_RESET_THREAD	(1 << 1)
 #define HL_RESET_HEARTBEAT		(1 << 2)
+#define HL_RESET_TDR			(1 << 3)
 
 #define HL_MAX_SOBS_PER_MONITOR	8
 
@@ -2163,6 +2167,8 @@ struct hl_mmu_funcs {
  * @device_fini_pending: true if device_fini was called and might be
  *                       waiting for the reset thread to finish
  * @supports_staged_submission: true if staged submissions are supported
+ * @curr_reset_cause: saves an enumerated reset cause when a hard reset is
+ *                    triggered, and cleared after it is shared with preboot.
  */
 struct hl_device {
 	struct pci_dev			*pdev;
@@ -2273,6 +2279,7 @@ struct hl_device {
 	u8				process_kill_trial_cnt;
 	u8				device_fini_pending;
 	u8				supports_staged_submission;
+	u8				curr_reset_cause;
 
 	/* Parameters for bring-up */
 	u64				nic_ports_mask;
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 64d1530db985..dc92401e7a3f 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -330,6 +330,8 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
 
 	set_driver_behavior_per_device(hdev);
 
+	hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
+
 	if (timeout_locked)
 		hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000);
 	else
diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h
index 9baa56acf473..e9d86673109c 100644
--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -302,19 +302,43 @@ struct cpu_dyn_regs {
 	__le32 reserved1[32];		/* reserve for future use */
 };
 
+/* TODO: remove the desc magic after the code is updated to use message */
 /* HCDM - Habana Communications Descriptor Magic */
 #define HL_COMMS_DESC_MAGIC	0x4843444D
 #define HL_COMMS_DESC_VER	1
 
+/* HCMv - Habana Communications Message + header version */
+#define HL_COMMS_MSG_MAGIC_VER(ver)	(0x48434D00 | ((ver) & 0xff))
+#define HL_COMMS_MSG_MAGIC_V0		HL_COMMS_DESC_MAGIC
+#define HL_COMMS_MSG_MAGIC_V1		HL_COMMS_MSG_MAGIC_VER(1)
+
+#define HL_COMMS_MSG_MAGIC		HL_COMMS_MSG_MAGIC_V1
+
+enum comms_msg_type {
+	HL_COMMS_DESC_TYPE = 0,
+	HL_COMMS_RESET_CAUSE_TYPE = 1,
+};
+
+/* TODO: remove this struct after the code is updated to use comms_msg_header */
 /* this is the comms descriptor header - meta data */
 struct comms_desc_header {
 	__le32 magic;		/* magic for validation */
 	__le32 crc32;		/* CRC32 of the descriptor w/o header */
 	__le16 size;		/* size of the descriptor w/o header */
-	__u8 version;	/* descriptor version */
+	__u8 version;		/* descriptor version */
 	__u8 reserved[5];	/* pad to 64 bit */
 };
 
+/* this is the comms message header - meta data */
+struct comms_msg_header {
+	__le32 magic;		/* magic for validation */
+	__le32 crc32;		/* CRC32 of the message w/o header */
+	__le16 size;		/* size of the message w/o header */
+	__u8 version;		/* message payload version */
+	__u8 type;		/* message type */
+	__u8 reserved[4];	/* pad to 64 bit */
+};
+
 /* this is the main FW descriptor - consider ABI when changing */
 struct lkd_fw_comms_desc {
 	struct comms_desc_header header;
@@ -323,7 +347,37 @@ struct lkd_fw_comms_desc {
 	char cur_fw_ver[VERSION_MAX_LEN];
 	/* can be used for 1 more version w/o ABI change */
 	char reserved0[VERSION_MAX_LEN];
-	__le64 img_addr;	/* address for next FW component load */
+	/* address for next FW component load */
+	__le64 img_addr;
+};
+
+enum comms_reset_cause {
+	HL_RESET_CAUSE_UNKNOWN = 0,
+	HL_RESET_CAUSE_HEARTBEAT = 1,
+	HL_RESET_CAUSE_TDR = 2,
+};
+
+#define RESET_CAUSE_PADDING	7
+
+/* this is the comms message descriptor */
+struct lkd_msg_comms {
+	struct comms_msg_header header;
+	/* union for future expantions of new messages */
+	union {
+		struct {
+			struct cpu_dyn_regs cpu_dyn_regs;
+			char fuse_ver[VERSION_MAX_LEN];
+			char cur_fw_ver[VERSION_MAX_LEN];
+			/* can be used for 1 more version w/o ABI change */
+			char reserved0[VERSION_MAX_LEN];
+			/* address for next FW component load */
+			__le64 img_addr;
+		};
+		struct {
+			__u8 reset_cause;
+			__u8 reserved[RESET_CAUSE_PADDING]; /* 64 bit pad */
+		};
+	};
 };
 
 /*
@@ -395,7 +449,7 @@ enum comms_cmd {
 struct comms_command {
 	union {		/* bit fields are only for FW use */
 		struct {
-			u32 size :25;		/* 32MB max. */
+			u32 size :25;			/* 32MB max. */
 			u32 reserved :2;
 			enum comms_cmd cmd :5;		/* 32 commands */
 		};
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/5] habanalabs: check if asic secured with asic type
  2021-05-15 16:31 [PATCH 1/5] habanalabs: notify before f/w loading Oded Gabbay
  2021-05-15 16:31 ` [PATCH 2/5] habanalabs/gaudi: send hard reset cause to preboot Oded Gabbay
@ 2021-05-15 16:31 ` Oded Gabbay
  2021-05-15 16:31 ` [PATCH 4/5] habanalabs/gaudi: read GIC sts after FW is loaded Oded Gabbay
  2021-05-15 16:31 ` [PATCH 5/5] habanalabs/gaudi: do not move HBM bar if iATU done by FW Oded Gabbay
  3 siblings, 0 replies; 5+ messages in thread
From: Oded Gabbay @ 2021-05-15 16:31 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ohad Sharabi

From: Ohad Sharabi <osharabi@habana.ai>

Fix issue in which the input to the function is_asic_secured was device
PCI_IDS number instead of the asic_type enumeration.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs_drv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index dc92401e7a3f..df1e91f810cc 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -309,7 +309,7 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
 
 	if (pdev)
 		hdev->asic_prop.fw_security_disabled =
-				!is_asic_secured(pdev->device);
+				!is_asic_secured(hdev->asic_type);
 	else
 		hdev->asic_prop.fw_security_disabled = true;
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 4/5] habanalabs/gaudi: read GIC sts after FW is loaded
  2021-05-15 16:31 [PATCH 1/5] habanalabs: notify before f/w loading Oded Gabbay
  2021-05-15 16:31 ` [PATCH 2/5] habanalabs/gaudi: send hard reset cause to preboot Oded Gabbay
  2021-05-15 16:31 ` [PATCH 3/5] habanalabs: check if asic secured with asic type Oded Gabbay
@ 2021-05-15 16:31 ` Oded Gabbay
  2021-05-15 16:31 ` [PATCH 5/5] habanalabs/gaudi: do not move HBM bar if iATU done by FW Oded Gabbay
  3 siblings, 0 replies; 5+ messages in thread
From: Oded Gabbay @ 2021-05-15 16:31 UTC (permalink / raw)
  To: linux-kernel; +Cc: Koby Elbaz

From: Koby Elbaz <kelbaz@habana.ai>

Reading of GIC privileged status will be done after F/W is loaded,
because privileged GIC capability is only available with the correct
ARMCP version, and after it's loaded.
Such versions necessarily support COMMS, so GIC alternatives (SP regs)
will be read directly from dynamic regs.

As well, initiation of DMA QMANs will occur after F/W is loaded
since it depends on GIC configuration.

In case F/W isn't loaded there's no problem since either way
there won't be any GIC IRQ handling.

Signed-off-by: Koby Elbaz <kelbaz@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c  | 16 ++---
 drivers/misc/habanalabs/gaudi/gaudi.c         | 59 +++++++++++++------
 .../habanalabs/include/common/hl_boot_if.h    | 10 +++-
 3 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 2d5a849a377e..b00f763bcda6 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -1123,9 +1123,6 @@ static void hl_fw_preboot_update_state(struct hl_device *hdev)
 
 		if (cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
 			prop->hard_reset_done_by_fw = true;
-
-		if (cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN)
-			prop->gic_interrupts_enable = false;
 	} else {
 		prop->fw_cpu_boot_dev_sts0_valid = 0;
 	}
@@ -1143,11 +1140,8 @@ static void hl_fw_preboot_update_state(struct hl_device *hdev)
 	dev_dbg(hdev->dev, "Firmware preboot hard-reset is %s\n",
 			prop->hard_reset_done_by_fw ? "enabled" : "disabled");
 
-	dev_info(hdev->dev, "firmware-level security is %s\n",
+	dev_dbg(hdev->dev, "firmware-level security is %s\n",
 			prop->fw_security_disabled ? "disabled" : "enabled");
-
-	dev_info(hdev->dev, "GIC controller is %s\n",
-			prop->gic_interrupts_enable ? "enabled" : "disabled");
 }
 
 static int hl_fw_static_read_preboot_status(struct hl_device *hdev)
@@ -1971,9 +1965,17 @@ static void hl_fw_linux_update_state(struct hl_device *hdev,
 				CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
 			prop->hard_reset_done_by_fw = true;
 
+		if (prop->fw_app_cpu_boot_dev_sts0 &
+				CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN)
+			prop->gic_interrupts_enable = false;
+
 		dev_dbg(hdev->dev,
 			"Firmware application CPU status0 %#x\n",
 			prop->fw_app_cpu_boot_dev_sts0);
+
+		dev_dbg(hdev->dev, "GIC controller is %s\n",
+				prop->gic_interrupts_enable ?
+						"enabled" : "disabled");
 	}
 
 	if (prop->fw_cpu_boot_dev_sts1_valid) {
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index cf76edc49382..375f7893b84a 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -2534,6 +2534,8 @@ static void gaudi_init_golden_registers(struct hl_device *hdev)
 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
 					int qman_id, dma_addr_t qman_pq_addr)
 {
+	struct cpu_dyn_regs *dyn_regs =
+			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
 	u32 q_off, dma_qm_offset;
@@ -2588,7 +2590,7 @@ static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
 	if (qman_id == 0) {
 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
-				mmGIC_DMA_QM_IRQ_CTRL_POLL_REG;
+				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
 
 		/* Configure RAZWI IRQ */
 		dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
@@ -2624,6 +2626,8 @@ static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
 
 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
 {
+	struct cpu_dyn_regs *dyn_regs =
+			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
 	u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
 	u32 dma_offset = dma_id * DMA_CORE_OFFSET;
 	u32 irq_handler_offset;
@@ -2643,7 +2647,7 @@ static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
 
 	irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
-			mmGIC_DMA_CR_IRQ_CTRL_POLL_REG;
+			le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
 
 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
 		lower_32_bits(CFG_BASE + irq_handler_offset));
@@ -2712,6 +2716,8 @@ static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
 					int qman_id, u64 qman_base_addr)
 {
+	struct cpu_dyn_regs *dyn_regs =
+			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
 	u32 dma_qm_err_cfg, irq_handler_offset;
@@ -2756,8 +2762,8 @@ static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
 							QMAN_CPDMA_DST_OFFSET);
 	} else {
 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
-					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
-					mmGIC_DMA_QM_IRQ_CTRL_POLL_REG;
+				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
+				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
 
 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
 							QMAN_LDMA_SIZE_OFFSET);
@@ -2853,6 +2859,8 @@ static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
 					int qman_id, u64 qman_base_addr)
 {
+	struct cpu_dyn_regs *dyn_regs =
+			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
 	u32 mtr_base_lo, mtr_base_hi;
 	u32 so_base_lo, so_base_hi;
 	u32 irq_handler_offset;
@@ -2888,8 +2896,8 @@ static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
 							QMAN_CPDMA_DST_OFFSET);
 	} else {
 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
-					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
-					mmGIC_MME_QM_IRQ_CTRL_POLL_REG;
+				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
+				le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
 
 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
 							QMAN_LDMA_SIZE_OFFSET);
@@ -2978,6 +2986,8 @@ static void gaudi_init_mme_qmans(struct hl_device *hdev)
 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
 				int qman_id, u64 qman_base_addr)
 {
+	struct cpu_dyn_regs *dyn_regs =
+			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
 	u32 tpc_qm_err_cfg, irq_handler_offset;
@@ -3023,8 +3033,8 @@ static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
 							QMAN_CPDMA_DST_OFFSET);
 	} else {
 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
-					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
-					mmGIC_TPC_QM_IRQ_CTRL_POLL_REG;
+				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
+				le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
 
 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
 							QMAN_LDMA_SIZE_OFFSET);
@@ -3129,6 +3139,8 @@ static void gaudi_init_tpc_qmans(struct hl_device *hdev)
 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
 				int qman_id, u64 qman_base_addr, int nic_id)
 {
+	struct cpu_dyn_regs *dyn_regs =
+			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
 	u32 nic_qm_err_cfg, irq_handler_offset;
@@ -3180,8 +3192,8 @@ static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
 
 	if (qman_id == 0) {
 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
-					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
-					mmGIC_NIC_QM_IRQ_CTRL_POLL_REG;
+				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
+				le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
 
 		/* Configure RAZWI IRQ */
 		nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
@@ -3858,6 +3870,8 @@ static int gaudi_init_cpu(struct hl_device *hdev)
 
 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
 {
+	struct cpu_dyn_regs *dyn_regs =
+			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct gaudi_device *gaudi = hdev->asic_specific;
 	u32 status, irq_handler_offset;
@@ -3902,7 +3916,7 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
 
 	irq_handler_offset = prop->gic_interrupts_enable ?
 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
-			mmGIC_HOST_IRQ_CTRL_POLL_REG;
+			le32_to_cpu(dyn_regs->gic_host_irq_ctrl);
 
 	WREG32(irq_handler_offset, GAUDI_EVENT_PI_UPDATE);
 
@@ -3964,9 +3978,6 @@ static int gaudi_hw_init(struct hl_device *hdev)
 
 	gaudi_pre_hw_init(hdev);
 
-	gaudi_init_pci_dma_qmans(hdev);
-
-	gaudi_init_hbm_dma_qmans(hdev);
 
 	rc = gaudi_init_cpu(hdev);
 	if (rc) {
@@ -3995,6 +4006,10 @@ static int gaudi_hw_init(struct hl_device *hdev)
 
 	gaudi_init_security(hdev);
 
+	gaudi_init_pci_dma_qmans(hdev);
+
+	gaudi_init_hbm_dma_qmans(hdev);
+
 	gaudi_init_mme_qmans(hdev);
 
 	gaudi_init_tpc_qmans(hdev);
@@ -4034,6 +4049,8 @@ static int gaudi_hw_init(struct hl_device *hdev)
 
 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
 {
+	struct cpu_dyn_regs *dyn_regs =
+			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
 	struct gaudi_device *gaudi = hdev->asic_specific;
 	u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
 
@@ -4067,8 +4084,8 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
 		WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
 
 	irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
-				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
-				mmGIC_HOST_IRQ_CTRL_POLL_REG;
+			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
+			le32_to_cpu(dyn_regs->gic_host_irq_ctrl);
 
 	WREG32(irq_handler_offset, GAUDI_EVENT_HALT_MACHINE);
 
@@ -4182,6 +4199,8 @@ static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
 
 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
 {
+	struct cpu_dyn_regs *dyn_regs =
+			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
 	u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
 	struct gaudi_device *gaudi = hdev->asic_specific;
 	int dma_id;
@@ -4592,8 +4611,8 @@ static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
 		mb();
 
 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
-					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
-					mmGIC_HOST_IRQ_CTRL_POLL_REG;
+				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
+				le32_to_cpu(dyn_regs->gic_host_irq_ctrl);
 
 		WREG32(irq_handler_offset, GAUDI_EVENT_PI_UPDATE);
 	}
@@ -8820,9 +8839,11 @@ static int gaudi_block_mmap(struct hl_device *hdev,
 
 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
 {
+	struct cpu_dyn_regs *dyn_regs =
+			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
 	u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
-			mmGIC_HOST_IRQ_CTRL_POLL_REG;
+			le32_to_cpu(dyn_regs->gic_host_irq_ctrl);
 
 	WREG32(irq_handler_offset, GAUDI_EVENT_INTS_REGISTER);
 }
diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h
index e9d86673109c..9266c44d8c6c 100644
--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -195,7 +195,7 @@
  *					previleged entity. FW sets this status
  *					bit for host. If this bit is set then
  *					GIC can not be accessed from host.
- *					Initialized in: preboot
+ *					Initialized in: armcpd
  *
  * CPU_BOOT_DEV_STS0_ENABLED		Device status register enabled.
  *					This is a main indication that the
@@ -299,7 +299,13 @@ struct cpu_dyn_regs {
 	__le32 hw_state;
 	__le32 kmd_msg_to_cpu;
 	__le32 cpu_cmd_status_to_host;
-	__le32 reserved1[32];		/* reserve for future use */
+	__le32 gic_host_irq_ctrl;
+	__le32 gic_tpc_qm_irq_ctrl;
+	__le32 gic_mme_qm_irq_ctrl;
+	__le32 gic_dma_qm_irq_ctrl;
+	__le32 gic_nic_qm_irq_ctrl;
+	__le32 gic_dma_core_irq_ctrl;
+	__le32 reserved1[26];		/* reserve for future use */
 };
 
 /* TODO: remove the desc magic after the code is updated to use message */
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 5/5] habanalabs/gaudi: do not move HBM bar if iATU done by FW
  2021-05-15 16:31 [PATCH 1/5] habanalabs: notify before f/w loading Oded Gabbay
                   ` (2 preceding siblings ...)
  2021-05-15 16:31 ` [PATCH 4/5] habanalabs/gaudi: read GIC sts after FW is loaded Oded Gabbay
@ 2021-05-15 16:31 ` Oded Gabbay
  3 siblings, 0 replies; 5+ messages in thread
From: Oded Gabbay @ 2021-05-15 16:31 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ofir Bitton

From: Ofir Bitton <obitton@habana.ai>

As iATU configuration is done by FW, driver should not try and
move HBM bar.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 375f7893b84a..6654f95caecd 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -580,6 +580,9 @@ static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
 	if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
 		return old_addr;
 
+	if (hdev->asic_prop.iatu_done_by_fw)
+		return U64_MAX;
+
 	/* Inbound Region 2 - Bar 4 - Point to HBM */
 	pci_region.mode = PCI_BAR_MATCH_MODE;
 	pci_region.bar = HBM_BAR_ID;
@@ -3974,10 +3977,27 @@ static void gaudi_pre_hw_init(struct hl_device *hdev)
 
 static int gaudi_hw_init(struct hl_device *hdev)
 {
+	struct gaudi_device *gaudi = hdev->asic_specific;
 	int rc;
 
 	gaudi_pre_hw_init(hdev);
 
+	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
+	 * So we set it here and if anyone tries to move it later to
+	 * a different address, there will be an error
+	 */
+	if (hdev->asic_prop.iatu_done_by_fw)
+		gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
+
+	/*
+	 * Before pushing u-boot/linux to device, need to set the hbm bar to
+	 * base address of dram
+	 */
+	if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
+		dev_err(hdev->dev,
+			"failed to map HBM bar to DRAM base address\n");
+		return -EIO;
+	}
 
 	rc = gaudi_init_cpu(hdev);
 	if (rc) {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-05-15 16:31 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-15 16:31 [PATCH 1/5] habanalabs: notify before f/w loading Oded Gabbay
2021-05-15 16:31 ` [PATCH 2/5] habanalabs/gaudi: send hard reset cause to preboot Oded Gabbay
2021-05-15 16:31 ` [PATCH 3/5] habanalabs: check if asic secured with asic type Oded Gabbay
2021-05-15 16:31 ` [PATCH 4/5] habanalabs/gaudi: read GIC sts after FW is loaded Oded Gabbay
2021-05-15 16:31 ` [PATCH 5/5] habanalabs/gaudi: do not move HBM bar if iATU done by FW Oded Gabbay

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).