All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] habanalabs: set memory scrubbing to disabled by default
@ 2021-05-21 17:06 Oded Gabbay
  2021-05-21 17:06 ` [PATCH 2/4] habanalabs: check running index in eqe control Oded Gabbay
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Oded Gabbay @ 2021-05-21 17:06 UTC (permalink / raw)
  To: linux-kernel

Scrubbing memory after every unmap is very costly in terms of
performance. If a user wants it he can enable it but the default
should prioritize performance.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs_drv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index df1e91f810cc..339a1860c1e7 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -29,7 +29,7 @@ static DEFINE_MUTEX(hl_devs_idr_lock);
 
 static int timeout_locked = 30;
 static int reset_on_lockup = 1;
-static int memory_scrub = 1;
+static int memory_scrub;
 static ulong boot_error_status_mask = ULONG_MAX;
 
 module_param(timeout_locked, int, 0444);
@@ -42,7 +42,7 @@ MODULE_PARM_DESC(reset_on_lockup,
 
 module_param(memory_scrub, int, 0444);
 MODULE_PARM_DESC(memory_scrub,
-	"Scrub device memory in various states (0 = no, 1 = yes, default yes)");
+	"Scrub device memory in various states (0 = no, 1 = yes, default no)");
 
 module_param(boot_error_status_mask, ulong, 0444);
 MODULE_PARM_DESC(boot_error_status_mask,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/4] habanalabs: check running index in eqe control
  2021-05-21 17:06 [PATCH 1/4] habanalabs: set memory scrubbing to disabled by default Oded Gabbay
@ 2021-05-21 17:06 ` Oded Gabbay
  2021-05-21 17:06 ` [PATCH 3/4] habanalabs: read preboot status bits in an earlier stage Oded Gabbay
  2021-05-21 17:06 ` [PATCH 4/4] habanalabs/gaudi: disable GIC usage if security is enabled Oded Gabbay
  2 siblings, 0 replies; 4+ messages in thread
From: Oded Gabbay @ 2021-05-21 17:06 UTC (permalink / raw)
  To: linux-kernel

To harden the event queue mechanism, we add a running index to the
control header of the entry.

The firmware writes the index in each entry and the driver verifies
that the index of the current entry is larger by 1 of the index of
the previous entry.

In case it isn't, the driver will treat the entry as if it wasn't
valid (it won't process it but won't skip it).

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c  |  9 ++++++-
 drivers/misc/habanalabs/common/habanalabs.h   |  7 ++++++
 drivers/misc/habanalabs/common/irq.c          | 24 ++++++++++++++++---
 .../misc/habanalabs/include/common/cpucp_if.h |  3 +++
 .../habanalabs/include/common/hl_boot_if.h    | 11 ++++++++-
 5 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index b00f763bcda6..17173020ff53 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -617,10 +617,17 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev,
 		goto out;
 	}
 
+	/* assume EQ code doesn't need to check eqe index */
+	hdev->event_queue.check_eqe_index = false;
+
 	/* Read FW application security bits again */
-	if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid)
+	if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid) {
 		hdev->asic_prop.fw_app_cpu_boot_dev_sts0 =
 						RREG32(sts_boot_dev_sts0_reg);
+		if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
+				CPU_BOOT_DEV_STS0_EQ_INDEX_EN)
+			hdev->event_queue.check_eqe_index = true;
+	}
 
 	if (hdev->asic_prop.fw_cpu_boot_dev_sts1_valid)
 		hdev->asic_prop.fw_app_cpu_boot_dev_sts1 =
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index a046180254c8..c3f41f0b609f 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -780,12 +780,19 @@ struct hl_user_pending_interrupt {
  * @kernel_address: holds the queue's kernel virtual address
  * @bus_address: holds the queue's DMA address
  * @ci: ci inside the queue
+ * @prev_eqe_index: the index of the previous event queue entry. The index of
+ *                  the current entry's index must be +1 of the previous one.
+ * @check_eqe_index: do we need to check the index of the current entry vs. the
+ *                   previous one. This is for backward compatibility with older
+ *                   firmwares
  */
 struct hl_eq {
 	struct hl_device	*hdev;
 	void			*kernel_address;
 	dma_addr_t		bus_address;
 	u32			ci;
+	u32			prev_eqe_index;
+	bool			check_eqe_index;
 };
 
 
diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c
index 27129868c711..39b14a933393 100644
--- a/drivers/misc/habanalabs/common/irq.c
+++ b/drivers/misc/habanalabs/common/irq.c
@@ -207,17 +207,33 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg)
 	struct hl_eq_entry *eq_entry;
 	struct hl_eq_entry *eq_base;
 	struct hl_eqe_work *handle_eqe_work;
+	bool entry_ready;
+	u32 cur_eqe;
+	u16 cur_eqe_index;
 
 	eq_base = eq->kernel_address;
 
 	while (1) {
-		bool entry_ready =
-			((le32_to_cpu(eq_base[eq->ci].hdr.ctl) &
-				EQ_CTL_READY_MASK) >> EQ_CTL_READY_SHIFT);
+		cur_eqe = le32_to_cpu(eq_base[eq->ci].hdr.ctl);
+		entry_ready = !!FIELD_GET(EQ_CTL_READY_MASK, cur_eqe);
 
 		if (!entry_ready)
 			break;
 
+		cur_eqe_index = FIELD_GET(EQ_CTL_INDEX_MASK, cur_eqe);
+		if ((hdev->event_queue.check_eqe_index) &&
+				(((eq->prev_eqe_index + 1) & EQ_CTL_INDEX_MASK)
+							!= cur_eqe_index)) {
+			dev_dbg(hdev->dev,
+				"EQE 0x%x in queue is ready but index does not match %d!=%d",
+				eq_base[eq->ci].hdr.ctl,
+				((eq->prev_eqe_index + 1) & EQ_CTL_INDEX_MASK),
+				cur_eqe_index);
+			break;
+		}
+
+		eq->prev_eqe_index++;
+
 		eq_entry = &eq_base[eq->ci];
 
 		/*
@@ -341,6 +357,7 @@ int hl_eq_init(struct hl_device *hdev, struct hl_eq *q)
 	q->hdev = hdev;
 	q->kernel_address = p;
 	q->ci = 0;
+	q->prev_eqe_index = 0;
 
 	return 0;
 }
@@ -365,6 +382,7 @@ void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q)
 void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q)
 {
 	q->ci = 0;
+	q->prev_eqe_index = 0;
 
 	/*
 	 * It's not enough to just reset the PI/CI because the H/W may have
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h
index 4f1123102968..c7da62243619 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -103,6 +103,9 @@ struct hl_eq_entry {
 #define EQ_CTL_EVENT_TYPE_SHIFT		16
 #define EQ_CTL_EVENT_TYPE_MASK		0x03FF0000
 
+#define EQ_CTL_INDEX_SHIFT		0
+#define EQ_CTL_INDEX_MASK		0x0000FFFF
+
 enum pq_init_status {
 	PQ_INIT_STATUS_NA = 0,
 	PQ_INIT_STATUS_READY_FOR_CP,
diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h
index 9266c44d8c6c..6d0c1ddb4304 100644
--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -195,7 +195,15 @@
  *					previleged entity. FW sets this status
  *					bit for host. If this bit is set then
  *					GIC can not be accessed from host.
- *					Initialized in: armcpd
+ *					Initialized in: linux
+ *
+ * CPU_BOOT_DEV_STS0_EQ_INDEX_EN	Event Queue (EQ) index is a running
+ *					index for each new event sent to host.
+ *					This is used as a method in host to
+ *					identify that the waiting event in
+ *					queue is actually a new event which
+ *					was not served before.
+ *					Initialized in: linux
  *
  * CPU_BOOT_DEV_STS0_ENABLED		Device status register enabled.
  *					This is a main indication that the
@@ -226,6 +234,7 @@
 #define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN		(1 << 17)
 #define CPU_BOOT_DEV_STS0_DYN_PLL_EN			(1 << 19)
 #define CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN		(1 << 20)
+#define CPU_BOOT_DEV_STS0_EQ_INDEX_EN			(1 << 21)
 #define CPU_BOOT_DEV_STS0_ENABLED			(1 << 31)
 #define CPU_BOOT_DEV_STS1_ENABLED			(1 << 31)
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/4] habanalabs: read preboot status bits in an earlier stage
  2021-05-21 17:06 [PATCH 1/4] habanalabs: set memory scrubbing to disabled by default Oded Gabbay
  2021-05-21 17:06 ` [PATCH 2/4] habanalabs: check running index in eqe control Oded Gabbay
@ 2021-05-21 17:06 ` Oded Gabbay
  2021-05-21 17:06 ` [PATCH 4/4] habanalabs/gaudi: disable GIC usage if security is enabled Oded Gabbay
  2 siblings, 0 replies; 4+ messages in thread
From: Oded Gabbay @ 2021-05-21 17:06 UTC (permalink / raw)
  To: linux-kernel; +Cc: Koby Elbaz

From: Koby Elbaz <kelbaz@habana.ai>

On newer releases, host won't be able to trigger an interrupt directly
to the ASIC GIC controller.
To be able to decide whether GIC can/not be used, we must read device's
preboot status bits in a stage that precedes the possible first use of
GIC (when device is in dirty state).

Signed-off-by: Koby Elbaz <kelbaz@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 17173020ff53..cdec7212f377 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -1159,8 +1159,6 @@ static int hl_fw_static_read_preboot_status(struct hl_device *hdev)
 	if (rc)
 		return rc;
 
-	hl_fw_preboot_update_state(hdev);
-
 	return 0;
 }
 
@@ -1189,6 +1187,8 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
 	if (rc)
 		return rc;
 
+	hl_fw_preboot_update_state(hdev);
+
 	/* no need to read preboot status in dynamic load */
 	if (hdev->asic_prop.dynamic_fw_load)
 		return 0;
@@ -1864,9 +1864,6 @@ static int hl_fw_dynamic_load_image(struct hl_device *hdev,
 		hl_fw_boot_fit_update_state(hdev,
 				le32_to_cpu(dyn_regs->cpu_boot_dev_sts0),
 				le32_to_cpu(dyn_regs->cpu_boot_dev_sts1));
-	} else {
-		/* update state during preboot handshake */
-		hl_fw_preboot_update_state(hdev);
 	}
 
 	/* copy boot fit to space allocated by FW */
@@ -2097,9 +2094,6 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
 	}
 
 	if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) {
-		/* update the preboot state */
-		hl_fw_preboot_update_state(hdev);
-
 		rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader, 0);
 		if (rc)
 			goto protocol_err;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 4/4] habanalabs/gaudi: disable GIC usage if security is enabled
  2021-05-21 17:06 [PATCH 1/4] habanalabs: set memory scrubbing to disabled by default Oded Gabbay
  2021-05-21 17:06 ` [PATCH 2/4] habanalabs: check running index in eqe control Oded Gabbay
  2021-05-21 17:06 ` [PATCH 3/4] habanalabs: read preboot status bits in an earlier stage Oded Gabbay
@ 2021-05-21 17:06 ` Oded Gabbay
  2 siblings, 0 replies; 4+ messages in thread
From: Oded Gabbay @ 2021-05-21 17:06 UTC (permalink / raw)
  To: linux-kernel; +Cc: Koby Elbaz

From: Koby Elbaz <kelbaz@habana.ai>

Security is set based on PCI ID, and after reading preboot status bits.
GIC usage is set in both scenarios since GIC can't be used when security
is enabled.
Moreover, writing to GIC/SP is enabled only after Linux is fully loaded.

Signed-off-by: Koby Elbaz <kelbaz@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 25 +++++++++++---------
 drivers/misc/habanalabs/common/habanalabs.h  |  2 ++
 drivers/misc/habanalabs/gaudi/gaudi.c        | 17 +++++++++----
 3 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index cdec7212f377..399d64e4f4c2 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -1115,19 +1115,13 @@ static void hl_fw_preboot_update_state(struct hl_device *hdev)
 	 *                     b. Check whether hard reset is done by fw app
 	 *
 	 * Preboot:
-	 * Check security status bit (CPU_BOOT_DEV_STS0_ENABLED), if it is set
+	 * Check security status bit (CPU_BOOT_DEV_STS0_ENABLED). If set, then-
 	 * check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN)
-	 * Check GIC privileged bit (CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN)
+	 * If set, then mark GIC controller to be disabled.
 	 */
 	if (cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_ENABLED) {
 		prop->fw_cpu_boot_dev_sts0_valid = 1;
 
-		/* FW security should be derived from PCI ID, we keep this
-		 * check for backward compatibility
-		 */
-		if (cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_SECURITY_EN)
-			prop->fw_security_disabled = false;
-
 		if (cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
 			prop->hard_reset_done_by_fw = true;
 	} else {
@@ -1149,6 +1143,9 @@ static void hl_fw_preboot_update_state(struct hl_device *hdev)
 
 	dev_dbg(hdev->dev, "firmware-level security is %s\n",
 			prop->fw_security_disabled ? "disabled" : "enabled");
+
+	dev_dbg(hdev->dev, "GIC controller is %s\n",
+			prop->gic_interrupts_enable ? "enabled" : "disabled");
 }
 
 static int hl_fw_static_read_preboot_status(struct hl_device *hdev)
@@ -1941,9 +1938,13 @@ static int hl_fw_dynamic_wait_for_linux_active(struct hl_device *hdev,
 }
 
 /**
- * hl_fw_linux_update_state - update internal data structures after loading
- *                            Linux
- *
+ * hl_fw_linux_update_state -	update internal data structures after Linux
+ *				is loaded.
+ *				Note: Linux initialization is comprised mainly
+ *				of two stages - loading kernel (SRAM_AVAIL)
+ *				& loading ARMCP.
+ *				Therefore reading boot device status in any of
+ *				these stages might result in different values.
  *
  * @hdev: pointer to the habanalabs device structure
  * @cpu_boot_dev_sts0_reg: register holding CPU boot dev status 0
@@ -1957,6 +1958,8 @@ static void hl_fw_linux_update_state(struct hl_device *hdev,
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 
+	hdev->fw_loader.linux_loaded = true;
+
 	/* Clear reset status since we need to read again from app */
 	prop->hard_reset_done_by_fw = false;
 
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index c3f41f0b609f..433262bfb7e6 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -956,6 +956,7 @@ struct fw_image_props {
  * @skip_bmc: should BMC be skipped
  * @sram_bar_id: SRAM bar ID
  * @dram_bar_id: DRAM bar ID
+ * @linux_loaded: true if linux was loaded so far
  */
 struct fw_load_mgr {
 	union {
@@ -969,6 +970,7 @@ struct fw_load_mgr {
 	u8 skip_bmc;
 	u8 sram_bar_id;
 	u8 dram_bar_id;
+	u8 linux_loaded;
 };
 
 /**
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 6654f95caecd..9be3809d4d0d 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -689,6 +689,12 @@ static int gaudi_early_init(struct hl_device *hdev)
 	/* If FW security is enabled at this point it means no access to ELBI */
 	if (!hdev->asic_prop.fw_security_disabled) {
 		hdev->asic_prop.iatu_done_by_fw = true;
+
+		/*
+		 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
+		 * decision can only be taken based on PCI ID security.
+		 */
+		hdev->asic_prop.gic_interrupts_enable = false;
 		goto pci_init;
 	}
 
@@ -3829,6 +3835,7 @@ static void gaudi_init_firmware_loader(struct hl_device *hdev)
 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
 
 	/* fill common fields */
+	fw_loader->linux_loaded = false;
 	fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
 	fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
 	fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
@@ -4103,11 +4110,13 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
 	else
 		WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
 
-	irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
-			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
-			le32_to_cpu(dyn_regs->gic_host_irq_ctrl);
+	if (hdev->fw_loader.linux_loaded) {
+		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
+				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
+				le32_to_cpu(dyn_regs->gic_host_irq_ctrl);
 
-	WREG32(irq_handler_offset, GAUDI_EVENT_HALT_MACHINE);
+		WREG32(irq_handler_offset, GAUDI_EVENT_HALT_MACHINE);
+	}
 
 	if (hdev->asic_prop.fw_security_disabled &&
 				!hdev->asic_prop.hard_reset_done_by_fw) {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-05-21 17:07 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-21 17:06 [PATCH 1/4] habanalabs: set memory scrubbing to disabled by default Oded Gabbay
2021-05-21 17:06 ` [PATCH 2/4] habanalabs: check running index in eqe control Oded Gabbay
2021-05-21 17:06 ` [PATCH 3/4] habanalabs: read preboot status bits in an earlier stage Oded Gabbay
2021-05-21 17:06 ` [PATCH 4/4] habanalabs/gaudi: disable GIC usage if security is enabled Oded Gabbay

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.