All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/12] habanalabs: fixes to the poll-timeout macros
@ 2022-07-11  6:29 Oded Gabbay
  2022-07-11  6:29 ` [PATCH 02/12] habanalabs: add a value field to hl_fw_send_pci_access_msg() Oded Gabbay
                   ` (10 more replies)
  0 siblings, 11 replies; 12+ messages in thread
From: Oded Gabbay @ 2022-07-11  6:29 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ohad Sharabi

From: Ohad Sharabi <osharabi@habana.ai>

- use conventional internal macro variables (double underscore prefix)
- adjust address casting
- on register poll using ELBI use ELBI read rather than BAR read on
  error condition
- remove unused macro

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs.h | 119 +++++++++++++++-----
 1 file changed, 90 insertions(+), 29 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 72cb12f2068a..3c51eaca521c 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2473,9 +2473,11 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
 /* Timeout should be longer when working with simulator but cap the
  * increased timeout to some maximum
  */
-#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
+#define hl_poll_timeout_common(hdev, addr, val, cond, sleep_us, timeout_us, elbi) \
 ({ \
 	ktime_t __timeout; \
+	u32 __elbi_read; \
+	int __rc = 0; \
 	if (hdev->pdev) \
 		__timeout = ktime_add_us(ktime_get(), timeout_us); \
 	else \
@@ -2484,19 +2486,103 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
 					(u64) HL_SIM_MAX_TIMEOUT_US)); \
 	might_sleep_if(sleep_us); \
 	for (;;) { \
-		(val) = RREG32(addr); \
+		if (elbi) { \
+			__rc = hl_pci_elbi_read(hdev, addr, &__elbi_read); \
+			if (__rc) \
+				break; \
+			(val) = __elbi_read; \
+		} else {\
+			(val) = RREG32((u32)addr); \
+		} \
 		if (cond) \
 			break; \
 		if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
-			(val) = RREG32(addr); \
+			if (elbi) { \
+				__rc = hl_pci_elbi_read(hdev, addr, &__elbi_read); \
+				if (__rc) \
+					break; \
+				(val) = __elbi_read; \
+			} else {\
+				(val) = RREG32((u32)addr); \
+			} \
 			break; \
 		} \
 		if (sleep_us) \
 			usleep_range((sleep_us >> 2) + 1, sleep_us); \
 	} \
-	(cond) ? 0 : -ETIMEDOUT; \
+	__rc ? __rc : ((cond) ? 0 : -ETIMEDOUT); \
 })
 
+#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
+		hl_poll_timeout_common(hdev, addr, val, cond, sleep_us, timeout_us, false)
+
+#define hl_poll_timeout_elbi(hdev, addr, val, cond, sleep_us, timeout_us) \
+		hl_poll_timeout_common(hdev, addr, val, cond, sleep_us, timeout_us, true)
+
+/*
+ * poll array of register addresses.
+ * condition is satisfied if all registers values match the expected value.
+ * once some register in the array satisfies the condition it will not be polled again,
+ * this is done both for efficiency and due to some registers are "clear on read".
+ * TODO: use read from PCI bar in other places in the code (SW-91406)
+ */
+#define hl_poll_reg_array_timeout_common(hdev, addr_arr, arr_size, expected_val, sleep_us, \
+						timeout_us, elbi) \
+({ \
+	ktime_t __timeout; \
+	u64 __elem_bitmask; \
+	u32 __read_val;	\
+	u8 __arr_idx;	\
+	int __rc = 0; \
+	\
+	if (hdev->pdev) \
+		__timeout = ktime_add_us(ktime_get(), timeout_us); \
+	else \
+		__timeout = ktime_add_us(ktime_get(),\
+				min(((u64)timeout_us * 10), \
+					(u64) HL_SIM_MAX_TIMEOUT_US)); \
+	\
+	might_sleep_if(sleep_us); \
+	if (arr_size >= 64) \
+		__rc = -EINVAL; \
+	else \
+		__elem_bitmask = BIT_ULL(arr_size) - 1; \
+	for (;;) { \
+		if (__rc) \
+			break; \
+		for (__arr_idx = 0; __arr_idx < (arr_size); __arr_idx++) {	\
+			if (!(__elem_bitmask & BIT_ULL(__arr_idx)))	\
+				continue;	\
+			if (elbi) { \
+				__rc = hl_pci_elbi_read(hdev, (addr_arr)[__arr_idx], &__read_val); \
+				if (__rc) \
+					break; \
+			} else { \
+				__read_val = RREG32((u32)(addr_arr)[__arr_idx]); \
+			} \
+			if (__read_val == (expected_val))	\
+				__elem_bitmask &= ~BIT_ULL(__arr_idx);	\
+		}	\
+		if (__rc || (__elem_bitmask == 0)) \
+			break; \
+		if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) \
+			break; \
+		if (sleep_us) \
+			usleep_range((sleep_us >> 2) + 1, sleep_us); \
+	} \
+	__rc ? __rc : ((__elem_bitmask == 0) ? 0 : -ETIMEDOUT); \
+})
+
+#define hl_poll_reg_array_timeout(hdev, addr_arr, arr_size, expected_val, sleep_us, \
+					timeout_us) \
+	hl_poll_reg_array_timeout_common(hdev, addr_arr, arr_size, expected_val, sleep_us, \
+						timeout_us, false)
+
+#define hl_poll_reg_array_timeout_elbi(hdev, addr_arr, arr_size, expected_val, sleep_us, \
+					timeout_us) \
+	hl_poll_reg_array_timeout_common(hdev, addr_arr, arr_size, expected_val, sleep_us, \
+						timeout_us, true)
+
 /*
  * address in this macro points always to a memory location in the
  * host's (server's) memory. That location is updated asynchronously
@@ -2540,31 +2626,6 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
 	(cond) ? 0 : -ETIMEDOUT; \
 })
 
-#define hl_poll_timeout_device_memory(hdev, addr, val, cond, sleep_us, \
-					timeout_us) \
-({ \
-	ktime_t __timeout; \
-	if (hdev->pdev) \
-		__timeout = ktime_add_us(ktime_get(), timeout_us); \
-	else \
-		__timeout = ktime_add_us(ktime_get(),\
-				min((u64)(timeout_us * 10), \
-					(u64) HL_SIM_MAX_TIMEOUT_US)); \
-	might_sleep_if(sleep_us); \
-	for (;;) { \
-		(val) = readl(addr); \
-		if (cond) \
-			break; \
-		if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
-			(val) = readl(addr); \
-			break; \
-		} \
-		if (sleep_us) \
-			usleep_range((sleep_us >> 2) + 1, sleep_us); \
-	} \
-	(cond) ? 0 : -ETIMEDOUT; \
-})
-
 #define HL_USR_MAPPED_BLK_INIT(blk, base, sz) \
 ({ \
 	struct user_mapped_block *p = blk; \
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 02/12] habanalabs: add a value field to hl_fw_send_pci_access_msg()
  2022-07-11  6:29 [PATCH 01/12] habanalabs: fixes to the poll-timeout macros Oded Gabbay
@ 2022-07-11  6:29 ` Oded Gabbay
  2022-07-11  6:29 ` [PATCH 03/12] habanalabs/gaudi2: configure virtual MSI-X doorbell interface Oded Gabbay
                   ` (9 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Oded Gabbay @ 2022-07-11  6:29 UTC (permalink / raw)
  To: linux-kernel; +Cc: Tomer Tayar

From: Tomer Tayar <ttayar@habana.ai>

For gaudi2 we need to send a value to F/W as part of the
PCI_ACCESS packet.
As a preparation, modify hl_fw_send_pci_access_msg() to have a 'value'
field.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c      | 5 ++---
 drivers/misc/habanalabs/common/firmware_if.c | 6 +++---
 drivers/misc/habanalabs/common/habanalabs.h  | 2 +-
 drivers/misc/habanalabs/gaudi/gaudi.c        | 6 +++---
 drivers/misc/habanalabs/gaudi2/gaudi2.c      | 6 +++---
 drivers/misc/habanalabs/goya/goya.c          | 4 ++--
 6 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 5a76d7689abe..4391eb22ddb8 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -1259,8 +1259,7 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
 		 * of heartbeat, the device CPU is marked as disable
 		 * so this message won't be sent
 		 */
-		if (hl_fw_send_pci_access_msg(hdev,
-				CPUCP_PACKET_DISABLE_PCI_ACCESS))
+		if (hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0))
 			dev_warn(hdev->dev,
 				"Failed to disable PCI access by F/W\n");
 	}
@@ -2054,7 +2053,7 @@ void hl_device_fini(struct hl_device *hdev)
 	 * message won't be send. Also, in case of heartbeat, the device CPU is
 	 * marked as disable so this message won't be sent
 	 */
-	hl_fw_send_pci_access_msg(hdev,	CPUCP_PACKET_DISABLE_PCI_ACCESS);
+	hl_fw_send_pci_access_msg(hdev,	CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
 
 	/* Mark device as disabled */
 	hdev->disabled = true;
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index fd8dd332a59a..f80a8c9d2cc8 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -242,14 +242,14 @@ int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
 	return rc;
 }
 
-int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
+int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode, u64 value)
 {
 	struct cpucp_packet pkt = {};
 
 	pkt.ctl = cpu_to_le32(opcode << CPUCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.value = cpu_to_le64(value);
 
-	return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt,
-						sizeof(pkt), 0, NULL);
+	return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
 }
 
 int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 3c51eaca521c..0e45f2be13ed 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -3631,7 +3631,7 @@ bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr);
 
 int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
 				void __iomem *dst, u32 src_offset, u32 size);
-int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode);
+int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode, u64 value);
 int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
 				u16 len, u32 timeout, u64 *result);
 int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type);
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 1f84dd6f3adb..8c1b1824d1a9 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -1632,7 +1632,7 @@ static int gaudi_late_init(struct hl_device *hdev)
 		gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
 	}
 
-	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
+	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
 	if (rc) {
 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
 		return rc;
@@ -1677,7 +1677,7 @@ static int gaudi_late_init(struct hl_device *hdev)
 	return 0;
 
 disable_pci_access:
-	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
+	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
 
 	return rc;
 }
@@ -4236,7 +4236,7 @@ static int gaudi_suspend(struct hl_device *hdev)
 {
 	int rc;
 
-	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
+	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
 	if (rc)
 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
 
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index 26f105a84e6b..d9a85ea56f75 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -2675,7 +2675,7 @@ static int gaudi2_late_init(struct hl_device *hdev)
 {
 	int rc;
 
-	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
+	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
 	if (rc) {
 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
 		return rc;
@@ -2694,7 +2694,7 @@ static int gaudi2_late_init(struct hl_device *hdev)
 	return 0;
 
 disable_pci_access:
-	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
+	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
 
 	return rc;
 }
@@ -5323,7 +5323,7 @@ static int gaudi2_suspend(struct hl_device *hdev)
 {
 	int rc;
 
-	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
+	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
 	if (rc)
 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
 
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 0c333b42225a..216570938b91 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -888,7 +888,7 @@ int goya_late_init(struct hl_device *hdev)
 	 */
 	WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size));
 
-	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
+	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
 	if (rc) {
 		dev_err(hdev->dev,
 			"Failed to enable PCI access from CPU %d\n", rc);
@@ -2878,7 +2878,7 @@ int goya_suspend(struct hl_device *hdev)
 {
 	int rc;
 
-	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
+	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
 	if (rc)
 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 03/12] habanalabs/gaudi2: configure virtual MSI-X doorbell interface
  2022-07-11  6:29 [PATCH 01/12] habanalabs: fixes to the poll-timeout macros Oded Gabbay
  2022-07-11  6:29 ` [PATCH 02/12] habanalabs: add a value field to hl_fw_send_pci_access_msg() Oded Gabbay
@ 2022-07-11  6:29 ` Oded Gabbay
  2022-07-11  6:29 ` [PATCH 04/12] habanalabs/gaudi2: replace defines for reserved sob/mob with enums Oded Gabbay
                   ` (8 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Oded Gabbay @ 2022-07-11  6:29 UTC (permalink / raw)
  To: linux-kernel; +Cc: Tomer Tayar

From: Tomer Tayar <ttayar@habana.ai>

Due to a watchdog timer in the LBW path, writes to the MSI-X doorbell
can return sporadic error responses.
To work-around this issue, a virtual MSI-X doorbell on the HBW path is
configured, using the MSI-X AXI slave interface in the PCIe controller.
Upon an access to a configured HBW host address, the controller will
generate MSI-X interrupt instead of treating the access as regular host
memory access.

This patch allocates the dedicate host memory page, and communicate the
address to F/W, so it will configure the relevant address match
registers in the controller, and will use this address to generate MSI-X
interrupts for F/W events.

Following patches will handle other initiators in the device, to move
them to use the virtual MSI-X doorbell.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi2/gaudi2.c       | 23 ++++++++++++++++---
 drivers/misc/habanalabs/gaudi2/gaudi2P.h      |  5 ++++
 drivers/misc/habanalabs/gaudi2/gaudi2_masks.h |  3 +++
 3 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index d9a85ea56f75..539cb88a88e4 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -2673,9 +2673,11 @@ static void gaudi2_scrub_arcs_dccm(struct hl_device *hdev)
 
 static int gaudi2_late_init(struct hl_device *hdev)
 {
+	struct gaudi2_device *gaudi2 = hdev->asic_specific;
 	int rc;
 
-	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
+	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
+					gaudi2->virt_msix_db_dma_addr);
 	if (rc) {
 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
 		return rc;
@@ -2922,6 +2924,7 @@ static inline int gaudi2_get_non_zero_random_int(void)
 
 static int gaudi2_sw_init(struct hl_device *hdev)
 {
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct gaudi2_device *gaudi2;
 	int i, rc;
 
@@ -2982,6 +2985,14 @@ static int gaudi2_sw_init(struct hl_device *hdev)
 		goto free_cpu_accessible_dma_pool;
 	}
 
+	gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size,
+								&gaudi2->virt_msix_db_dma_addr);
+	if (!gaudi2->virt_msix_db_cpu_addr) {
+		dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n");
+		rc = -ENOMEM;
+		goto free_cpu_accessible_dma_pool;
+	}
+
 	spin_lock_init(&gaudi2->hw_queues_lock);
 	spin_lock_init(&gaudi2->kdma_lock);
 
@@ -2990,7 +3001,7 @@ static int gaudi2_sw_init(struct hl_device *hdev)
 							GFP_KERNEL | __GFP_ZERO);
 	if (!gaudi2->scratchpad_kernel_address) {
 		rc = -ENOMEM;
-		goto free_cpu_accessible_dma_pool;
+		goto free_virt_msix_db_mem;
 	}
 
 	gaudi2_user_mapped_blocks_init(hdev);
@@ -2999,15 +3010,18 @@ static int gaudi2_sw_init(struct hl_device *hdev)
 	gaudi2_user_interrupt_setup(hdev);
 
 	hdev->supports_coresight = true;
-	hdev->asic_prop.supports_soft_reset = true;
 	hdev->supports_sync_stream = true;
 	hdev->supports_cb_mapping = true;
 	hdev->supports_wait_for_multi_cs = false;
 
+	prop->supports_soft_reset = true;
+
 	hdev->asic_funcs->set_pci_memory_regions(hdev);
 
 	return 0;
 
+free_virt_msix_db_mem:
+	hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
 free_cpu_accessible_dma_pool:
 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
 free_cpu_dma_mem:
@@ -3022,8 +3036,11 @@ static int gaudi2_sw_init(struct hl_device *hdev)
 
 static int gaudi2_sw_fini(struct hl_device *hdev)
 {
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
 
+	hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
+
 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
 
 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2P.h b/drivers/misc/habanalabs/gaudi2/gaudi2P.h
index dc0094a2a911..012413d7df9a 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2P.h
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2P.h
@@ -439,6 +439,8 @@ struct dup_block_ctx {
  *                             currently used for HBW QMAN writes which is
  *                             redundant.
  * @scratchpad_bus_address: scratchpad bus address
+ * @virt_msix_db_cpu_addr: host memory page for the virtual MSI-X doorbell.
+ * @virt_msix_db_dma_addr: bus address of the page for the virtual MSI-X doorbell.
  * @dram_bar_cur_addr: current address of DRAM PCI bar.
  * @hw_cap_initialized: This field contains a bit per H/W engine. When that
  *                      engine is initialized, that bit is set by the driver to
@@ -499,6 +501,9 @@ struct gaudi2_device {
 	void				*scratchpad_kernel_address;
 	dma_addr_t			scratchpad_bus_address;
 
+	void				*virt_msix_db_cpu_addr;
+	dma_addr_t			virt_msix_db_dma_addr;
+
 	u64				dram_bar_cur_addr;
 	u64				hw_cap_initialized;
 	u64				active_hw_arc;
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h b/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h
index 19ec1f130bef..3fd5cf4a8645 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h
@@ -132,4 +132,7 @@
 #define ROT_MSS_HALT_RSB_MASK	BIT(1)
 #define ROT_MSS_HALT_MRSB_MASK	BIT(2)
 
+#define PCIE_DBI_MSIX_ADDRESS_MATCH_LOW_OFF_MSIX_ADDRESS_MATCH_EN_SHIFT	0
+#define PCIE_DBI_MSIX_ADDRESS_MATCH_LOW_OFF_MSIX_ADDRESS_MATCH_EN_MASK	0x1
+
 #endif /* GAUDI2_MASKS_H_ */
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 04/12] habanalabs/gaudi2: replace defines for reserved sob/mob with enums
  2022-07-11  6:29 [PATCH 01/12] habanalabs: fixes to the poll-timeout macros Oded Gabbay
  2022-07-11  6:29 ` [PATCH 02/12] habanalabs: add a value field to hl_fw_send_pci_access_msg() Oded Gabbay
  2022-07-11  6:29 ` [PATCH 03/12] habanalabs/gaudi2: configure virtual MSI-X doorbell interface Oded Gabbay
@ 2022-07-11  6:29 ` Oded Gabbay
  2022-07-11  6:29 ` [PATCH 05/12] habanalabs/gaudi2: modify CS completion CQ to use virtual MSI-X doorbell Oded Gabbay
                   ` (7 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Oded Gabbay @ 2022-07-11  6:29 UTC (permalink / raw)
  To: linux-kernel; +Cc: Tomer Tayar

From: Tomer Tayar <ttayar@habana.ai>

Following patches are going to add more reserved sync objects and
monitors.
To make the counting of these reserved resources simpler, replace the
existing RESERVED_* defines with enumerations.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi2/gaudi2.c       | 48 ++++++++++---------
 drivers/misc/habanalabs/gaudi2/gaudi2P.h      | 26 ++++++----
 .../misc/habanalabs/gaudi2/gaudi2_security.c  |  4 +-
 3 files changed, 46 insertions(+), 32 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index 539cb88a88e4..398bc0957417 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -1867,8 +1867,8 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
 	prop->user_dec_intr_count = NUMBER_OF_DEC;
 	prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1;
 	prop->completion_mode = HL_COMPLETION_MODE_CS;
-	prop->sync_stream_first_sob = GAUDI2_RESERVED_SOBS;
-	prop->sync_stream_first_mon = GAUDI2_RESERVED_MONITORS;
+	prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER;
+	prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER;
 
 	prop->sram_base_address = SRAM_BASE_ADDR;
 	prop->sram_size = SRAM_SIZE;
@@ -1988,10 +1988,10 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
 
 	prop->mme_master_slave_mode = 1;
 
-	prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOBS +
+	prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER +
 					(num_sync_stream_queues * HL_RSVD_SOBS);
 
-	prop->first_available_user_mon[0] = GAUDI2_RESERVED_MONITORS +
+	prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER +
 					(num_sync_stream_queues * HL_RSVD_MONS);
 
 	prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST;
@@ -3533,7 +3533,7 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
 	}
 
 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
-	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_COMPLETION];
+	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
 	rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq);
 	if (rc) {
 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
@@ -3643,7 +3643,7 @@ static void gaudi2_disable_msix(struct hl_device *hdev)
 	}
 
 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
-	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_COMPLETION];
+	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
 	free_irq(irq, cq);
 
 	pci_free_irq_vectors(hdev->pdev);
@@ -4139,7 +4139,7 @@ static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base,
 	u32 pq_id;
 
 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++)
-		hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_COMPLETION;
+		hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION;
 
 	gaudi2_init_qman_pq(hdev, reg_base, queue_id_base);
 	gaudi2_init_qman_cp(hdev, reg_base);
@@ -5762,31 +5762,33 @@ static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev,
 	WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp);
 }
 
-static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 index, u32 cq_id,
+static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id,
 						u32 mon_payload, u32 sync_value)
 {
-	u32 sync_group_id, mode, mon_arm;
-	int offset = index * 4;
+	u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm;
 	u8 mask;
 
+	sob_offset = sob_id * 4;
+	mon_offset = mon_id * 4;
+
 	/* Reset the SOB value */
-	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset, 0);
+	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
 
 	/* Configure this address with CQ_ID 0 because CQ_EN is set */
-	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + offset, cq_id);
+	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id);
 
 	/* Configure this address with CS index because CQ_EN is set */
-	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + offset, mon_payload);
+	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload);
 
-	sync_group_id = index / 8;
-	mask = ~(1 << (index & 0x7));
+	sync_group_id = sob_id / 8;
+	mask = ~(1 << (sob_id & 0x7));
 	mode = 1; /* comparison mode is "equal to" */
 
 	mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value);
 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode);
 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask);
 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id);
-	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + offset, mon_arm);
+	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm);
 }
 
 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */
@@ -5800,11 +5802,12 @@ static int gaudi2_send_job_to_kdma(struct hl_device *hdev,
 	u64 comp_addr;
 	int rc;
 
-	gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMP,
+	gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION,
+				GAUDI2_RESERVED_MON_KDMA_COMPLETION,
 				GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1);
 
 	comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 +
-			(GAUDI2_RESERVED_SOB_KDMA_COMP * sizeof(u32));
+			(GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32));
 
 	comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
@@ -9125,24 +9128,25 @@ static int gaudi2_pre_schedule_cs(struct hl_cs *cs)
 {
 	struct hl_device *hdev = cs->ctx->hdev;
 	int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
-	u32 mon_payload;
+	u32 mon_payload, sob_id, mon_id;
 
 	if (!cs_needs_completion(cs))
 		return 0;
 
 	/*
-	 * First 1024 SOB/MON are reserved for driver for QMAN auto completion
+	 * First 64 SOB/MON are reserved for driver for QMAN auto completion
 	 * mechanism. Each SOB/MON pair are used for a pending CS with the same
 	 * cyclic index. The SOB value is increased when each of the CS jobs is
 	 * completed. When the SOB reaches the number of CS jobs, the monitor
 	 * generates MSI-X interrupt.
 	 */
 
+	sob_id = mon_id = index;
 	mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
 				(1 << CQ_ENTRY_READY_SHIFT) | index;
 
-	gaudi2_arm_cq_monitor(hdev, index, GAUDI2_RESERVED_CQ_COMPLETION,
-						mon_payload, cs->jobs_cnt);
+	gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload,
+				cs->jobs_cnt);
 
 	return 0;
 }
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2P.h b/drivers/misc/habanalabs/gaudi2/gaudi2P.h
index 012413d7df9a..826d24284dc5 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2P.h
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2P.h
@@ -80,12 +80,6 @@
 
 #define GAUDI2_MAX_PENDING_CS		64
 
-/* Sob/Mon per CS + Sob/Mon for KDMA completion */
-#define GAUDI2_RESERVED_SOBS		(GAUDI2_MAX_PENDING_CS + 1)
-#define GAUDI2_RESERVED_MONITORS	(GAUDI2_MAX_PENDING_CS + 1)
-#define GAUDI2_RESERVED_SOB_KDMA_COMP	(GAUDI2_MAX_PENDING_CS)
-#define GAUDI2_RESERVED_MON_KDMA_COMP	(GAUDI2_MAX_PENDING_CS)
-
 #if !IS_MAX_PENDING_CS_VALID(GAUDI2_MAX_PENDING_CS)
 #error "GAUDI2_MAX_PENDING_CS must be power of 2 and greater than 1"
 #endif
@@ -249,10 +243,26 @@
 
 #define GAUDI2_ARC_PCI_MSB_ADDR(addr)	(((addr) & GENMASK_ULL(49, 28)) >> 28)
 
+enum gaudi2_reserved_sob_id {
+	GAUDI2_RESERVED_SOB_CS_COMPLETION_FIRST,
+	GAUDI2_RESERVED_SOB_CS_COMPLETION_LAST =
+			GAUDI2_RESERVED_SOB_CS_COMPLETION_FIRST + GAUDI2_MAX_PENDING_CS - 1,
+	GAUDI2_RESERVED_SOB_KDMA_COMPLETION,
+	GAUDI2_RESERVED_SOB_NUMBER
+};
+
+enum gaudi2_reserved_mon_id {
+	GAUDI2_RESERVED_MON_CS_COMPLETION_FIRST,
+	GAUDI2_RESERVED_MON_CS_COMPLETION_LAST =
+			GAUDI2_RESERVED_MON_CS_COMPLETION_FIRST + GAUDI2_MAX_PENDING_CS - 1,
+	GAUDI2_RESERVED_MON_KDMA_COMPLETION,
+	GAUDI2_RESERVED_MON_NUMBER
+};
+
 enum gaudi2_reserved_cq_id {
-	GAUDI2_RESERVED_CQ_COMPLETION,
+	GAUDI2_RESERVED_CQ_CS_COMPLETION,
 	GAUDI2_RESERVED_CQ_KDMA_COMPLETION,
-	GAUDI2_RESERVED_CQ_NUMBER,
+	GAUDI2_RESERVED_CQ_NUMBER
 };
 
 /*
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2_security.c b/drivers/misc/habanalabs/gaudi2/gaudi2_security.c
index afca8352a223..89a06ff5ba34 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2_security.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2_security.c
@@ -2703,8 +2703,8 @@ static int gaudi2_init_pb_sm_objs(struct hl_device *hdev)
 	if (!sec_array)
 		return -ENOMEM;
 
-	first_sob = GAUDI2_RESERVED_SOBS;
-	first_mon = GAUDI2_RESERVED_MONITORS;
+	first_sob = GAUDI2_RESERVED_SOB_NUMBER;
+	first_mon = GAUDI2_RESERVED_MON_NUMBER;
 
 	/* 8192 SOB_OBJs skipping first GAUDI2_MAX_PENDING_CS of them */
 	for (j = i = first_sob ; i < DCORE_NUM_OF_SOB ; i++, j++)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 05/12] habanalabs/gaudi2: modify CS completion CQ to use virtual MSI-X doorbell
  2022-07-11  6:29 [PATCH 01/12] habanalabs: fixes to the poll-timeout macros Oded Gabbay
                   ` (2 preceding siblings ...)
  2022-07-11  6:29 ` [PATCH 04/12] habanalabs/gaudi2: replace defines for reserved sob/mob with enums Oded Gabbay
@ 2022-07-11  6:29 ` Oded Gabbay
  2022-07-11  6:29 ` [PATCH 06/12] habanalabs/gaudi2: modify decoder " Oded Gabbay
                   ` (6 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Oded Gabbay @ 2022-07-11  6:29 UTC (permalink / raw)
  To: linux-kernel; +Cc: Tomer Tayar

From: Tomer Tayar <ttayar@habana.ai>

Modify the CQ which is used for CS completion, to use the virtual MSI-X
doorbell.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi2/gaudi2.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index 398bc0957417..60e11007e3ea 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -4254,7 +4254,7 @@ static void gaudi2_init_edma(struct hl_device *hdev)
 
 static void gaudi2_init_sm(struct hl_device *hdev)
 {
-	u64 msix_db_reg = CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF;
+	struct gaudi2_device *gaudi2 = hdev->asic_specific;
 	u64 cq_address;
 	u32 reg_val;
 	int i;
@@ -4272,8 +4272,21 @@ static void gaudi2_init_sm(struct hl_device *hdev)
 
 	/* Init CQ0 DB */
 	/* Configure the monitor to trigger MSI-X interrupt */
-	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(msix_db_reg));
-	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(msix_db_reg));
+	/* TODO:
+	 * Remove the if statement when virtual MSI-X doorbell is supported in simulator (SW-93022)
+	 * and in F/W (SW-93024).
+	 */
+	if (!hdev->pdev || hdev->asic_prop.fw_security_enabled) {
+		u64 msix_db_reg = CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF;
+
+		WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(msix_db_reg));
+		WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(msix_db_reg));
+	} else {
+		WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0,
+				lower_32_bits(gaudi2->virt_msix_db_dma_addr));
+		WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0,
+				upper_32_bits(gaudi2->virt_msix_db_dma_addr));
+	}
 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION);
 
 	for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 06/12] habanalabs/gaudi2: modify decoder to use virtual MSI-X doorbell
  2022-07-11  6:29 [PATCH 01/12] habanalabs: fixes to the poll-timeout macros Oded Gabbay
                   ` (3 preceding siblings ...)
  2022-07-11  6:29 ` [PATCH 05/12] habanalabs/gaudi2: modify CS completion CQ to use virtual MSI-X doorbell Oded Gabbay
@ 2022-07-11  6:29 ` Oded Gabbay
  2022-07-11  6:29 ` [PATCH 07/12] habanalabs/gaudi2: map virtual MSI-X doorbell memory for user Oded Gabbay
                   ` (5 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Oded Gabbay @ 2022-07-11  6:29 UTC (permalink / raw)
  To: linux-kernel; +Cc: Tomer Tayar

From: Tomer Tayar <ttayar@habana.ai>

Modify the decoder wrapper blocks to generate interrupts using the
virtual MSI-X doorbell.

As a decoder wrapper block cannot write directly to HBW upon completion,
it writes instead to SOB which is monitored by a master monitor.
When resolved, this monitor will be the one to actually write to the
virtual MSI-X doorbell.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi2/gaudi2.c       | 141 +++++++++++++++++-
 drivers/misc/habanalabs/gaudi2/gaudi2P.h      |  15 ++
 drivers/misc/habanalabs/gaudi2/gaudi2_masks.h |   3 +
 .../include/gaudi2/asic_reg/gaudi2_regs.h     |   6 +
 4 files changed, 160 insertions(+), 5 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index 60e11007e3ea..760ce5b7f955 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -4252,6 +4252,109 @@ static void gaudi2_init_edma(struct hl_device *hdev)
 	}
 }
 
+/*
+ * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell.
+ * @hdev: pointer to habanalabs device structure.
+ * @sob_id: sync object ID.
+ * @first_mon_id: ID of first monitor out of 3 consecutive monitors.
+ * @interrupt_id: interrupt ID.
+ *
+ * Some initiators cannot have HBW address in their completion address registers, and thus cannot
+ * write directly to the HBW host memory of the virtual MSI-X doorbell.
+ * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write.
+ *
+ * The mechanism in the sync manager block is composed of a master monitor with 3 messages.
+ * In addition to the HBW write, the other 2 messages are for preparing the monitor to next
+ * completion, by decrementing the sync object value and re-arming the monitor.
+ */
+static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id,
+							u32 first_mon_id, u32 interrupt_id)
+{
+	u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config;
+	struct gaudi2_device *gaudi2 = hdev->asic_specific;
+	u64 addr;
+	u8 mask;
+
+	/* Reset the SOB value */
+	sob_offset = sob_id * sizeof(u32);
+	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
+
+	/* Configure 3 monitors:
+	 * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor)
+	 * 2. Decrement SOB value by 1.
+	 * 3. Re-arm the master monitor.
+	 */
+
+	first_mon_offset = first_mon_id * sizeof(u32);
+
+	/* 2nd monitor: Decrement SOB value by 1 */
+	mon_offset = first_mon_offset + sizeof(u32);
+
+	addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
+	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
+	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
+
+	payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */
+			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) |
+			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1);
+	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
+
+	/* 3rd monitor: Re-arm the master monitor */
+	mon_offset = first_mon_offset + 2 * sizeof(u32);
+
+	addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset;
+	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
+	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
+
+	sob_group = sob_id / 8;
+	mask = ~BIT(sob_id & 0x7);
+	mode = 0; /* comparison mode is "greater than or equal to" */
+	arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) |
+			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) |
+			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) |
+			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1);
+
+	payload = arm;
+	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
+
+	/* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */
+	mon_offset = first_mon_offset;
+
+	config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */
+	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config);
+
+	addr = gaudi2->virt_msix_db_dma_addr;
+	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
+	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
+
+	payload = interrupt_id;
+	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
+
+	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm);
+}
+
+static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev)
+{
+	u32 decoder_id, sob_id, first_mon_id, interrupt_id;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+
+	/* Decoder normal/abnormal interrupts */
+	for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) {
+		if (!(prop->decoder_enabled_mask & BIT(decoder_id)))
+			continue;
+
+		sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
+		first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id;
+		interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
+		gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
+
+		sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
+		first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id;
+		interrupt_id += 1;
+		gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
+	}
+}
+
 static void gaudi2_init_sm(struct hl_device *hdev)
 {
 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
@@ -4304,6 +4407,9 @@ static void gaudi2_init_sm(struct hl_device *hdev)
 	/* Configure kernel ASID and MMU BP*/
 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000);
 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0);
+
+	/* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */
+	gaudi2_prepare_sm_for_virt_msix_db(hdev);
 }
 
 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base)
@@ -4452,10 +4558,35 @@ static void gaudi2_init_rotator(struct hl_device *hdev)
 	}
 }
 
-static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 msix_id)
+static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id)
 {
-	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, msix_id);
-	WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, msix_id + 1);
+	u32 sob_id;
+
+	/* TODO:
+	 * Remove when virtual MSI-X doorbell is supported in simulator (SW-93022) and in F/W
+	 * (SW-93024).
+	 */
+	if (!hdev->pdev || hdev->asic_prop.fw_security_enabled) {
+		u32 interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
+
+		WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR, mmPCIE_DBI_MSIX_DOORBELL_OFF);
+		WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, interrupt_id);
+		WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR, mmPCIE_DBI_MSIX_DOORBELL_OFF);
+		WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, interrupt_id + 1);
+		return;
+	}
+
+	/* VCMD normal interrupt */
+	sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
+	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR,
+			mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
+	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
+
+	/* VCMD abnormal interrupt */
+	sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
+	WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR,
+			mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
+	WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
 }
 
 static void gaudi2_init_dec(struct hl_device *hdev)
@@ -4485,7 +4616,7 @@ static void gaudi2_init_dec(struct hl_device *hdev)
 			msix_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM +
 				(dcore_id * NUM_OF_DEC_PER_DCORE + dec_id) * 2;
 
-			gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, msix_id);
+			gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
 
 			gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
 		}
@@ -4500,7 +4631,7 @@ static void gaudi2_init_dec(struct hl_device *hdev)
 
 		msix_id = GAUDI2_IRQ_NUM_SHARED_DEC0_NRM + (dec_id * 2);
 
-		gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, msix_id);
+		gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
 
 		gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
 	}
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2P.h b/drivers/misc/habanalabs/gaudi2/gaudi2P.h
index 826d24284dc5..e4bc4009f05b 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2P.h
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2P.h
@@ -243,11 +243,20 @@
 
 #define GAUDI2_ARC_PCI_MSB_ADDR(addr)	(((addr) & GENMASK_ULL(49, 28)) >> 28)
 
+#define GAUDI2_SOB_INCREMENT_BY_ONE	(FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1) | \
+					FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1))
+
 enum gaudi2_reserved_sob_id {
 	GAUDI2_RESERVED_SOB_CS_COMPLETION_FIRST,
 	GAUDI2_RESERVED_SOB_CS_COMPLETION_LAST =
 			GAUDI2_RESERVED_SOB_CS_COMPLETION_FIRST + GAUDI2_MAX_PENDING_CS - 1,
 	GAUDI2_RESERVED_SOB_KDMA_COMPLETION,
+	GAUDI2_RESERVED_SOB_DEC_NRM_FIRST,
+	GAUDI2_RESERVED_SOB_DEC_NRM_LAST =
+			GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + NUMBER_OF_DEC - 1,
+	GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST,
+	GAUDI2_RESERVED_SOB_DEC_ABNRM_LAST =
+			GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + NUMBER_OF_DEC - 1,
 	GAUDI2_RESERVED_SOB_NUMBER
 };
 
@@ -256,6 +265,12 @@ enum gaudi2_reserved_mon_id {
 	GAUDI2_RESERVED_MON_CS_COMPLETION_LAST =
 			GAUDI2_RESERVED_MON_CS_COMPLETION_FIRST + GAUDI2_MAX_PENDING_CS - 1,
 	GAUDI2_RESERVED_MON_KDMA_COMPLETION,
+	GAUDI2_RESERVED_MON_DEC_NRM_FIRST,
+	GAUDI2_RESERVED_MON_DEC_NRM_LAST =
+			GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * NUMBER_OF_DEC - 1,
+	GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST,
+	GAUDI2_RESERVED_MON_DEC_ABNRM_LAST =
+			GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * NUMBER_OF_DEC - 1,
 	GAUDI2_RESERVED_MON_NUMBER
 };
 
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h b/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h
index 3fd5cf4a8645..eed16d642a5a 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h
@@ -135,4 +135,7 @@
 #define PCIE_DBI_MSIX_ADDRESS_MATCH_LOW_OFF_MSIX_ADDRESS_MATCH_EN_SHIFT	0
 #define PCIE_DBI_MSIX_ADDRESS_MATCH_LOW_OFF_MSIX_ADDRESS_MATCH_EN_MASK	0x1
 
+#define DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_SHIFT	15
+#define DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK		0x8000
+
 #endif /* GAUDI2_MASKS_H_ */
diff --git a/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h b/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
index e5fe9d5e07f5..d0e2c68a639f 100644
--- a/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
+++ b/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
@@ -415,9 +415,15 @@
 #define SFT_DCORE_OFFSET (mmSFT1_HBW_RTR_IF0_RTR_CTRL_BASE - mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE)
 #define SFT_IF_OFFSET (mmSFT0_HBW_RTR_IF1_RTR_CTRL_BASE - mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE)
 
+#define BRDG_CTRL_NRM_MSIX_LBW_AWADDR	\
+	(mmDCORE0_VDEC0_BRDG_CTRL_NRM_MSIX_LBW_AWADDR - mmDCORE0_VDEC0_BRDG_CTRL_BASE)
+
 #define BRDG_CTRL_NRM_MSIX_LBW_WDATA	\
 	(mmDCORE0_VDEC0_BRDG_CTRL_NRM_MSIX_LBW_WDATA - mmDCORE0_VDEC0_BRDG_CTRL_BASE)
 
+#define BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR	\
+	(mmDCORE0_VDEC0_BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR - mmDCORE0_VDEC0_BRDG_CTRL_BASE)
+
 #define BRDG_CTRL_ABNRM_MSIX_LBW_WDATA	\
 	(mmDCORE0_VDEC0_BRDG_CTRL_ABNRM_MSIX_LBW_WDATA - mmDCORE0_VDEC0_BRDG_CTRL_BASE)
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 07/12] habanalabs/gaudi2: map virtual MSI-X doorbell memory for user
  2022-07-11  6:29 [PATCH 01/12] habanalabs: fixes to the poll-timeout macros Oded Gabbay
                   ` (4 preceding siblings ...)
  2022-07-11  6:29 ` [PATCH 06/12] habanalabs/gaudi2: modify decoder " Oded Gabbay
@ 2022-07-11  6:29 ` Oded Gabbay
  2022-07-11  6:29 ` [PATCH 08/12] habanalabs: expose only valid debugfs nodes Oded Gabbay
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Oded Gabbay @ 2022-07-11  6:29 UTC (permalink / raw)
  To: linux-kernel; +Cc: Tomer Tayar

From: Tomer Tayar <ttayar@habana.ai>

Upon the initialization of a user context, map the host memory page of
the virtual MSI-X doorbell in the device MMU.
A reserved VA is used for this purpose, so user can use it directly
without any allocation/map operation.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi2/gaudi2.c       | 43 ++++++++++++++++++-
 .../misc/habanalabs/include/gaudi2/gaudi2.h   |  3 ++
 2 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index 760ce5b7f955..eba8b0d674c3 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -1920,7 +1920,7 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
 	prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
 	prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
 
-	prop->hints_host_reserved_va_range.start_addr =	RESERVED_VA_RANGE_FOR_ARC_ON_HOST_START;
+	prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START;
 	prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END;
 	prop->hints_host_hpage_reserved_va_range.start_addr =
 			RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START;
@@ -9241,6 +9241,35 @@ static void gaudi2_restore_user_registers(struct hl_device *hdev)
 	gaudi2_restore_user_qm_registers(hdev);
 }
 
+static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
+{
+	struct hl_device *hdev = ctx->hdev;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	struct gaudi2_device *gaudi2 = hdev->asic_specific;
+	int rc;
+
+	rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
+				gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true);
+	if (rc)
+		dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n",
+			RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
+
+	return rc;
+}
+
+static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
+{
+	struct hl_device *hdev = ctx->hdev;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	int rc;
+
+	rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
+				prop->pmmu.page_size, true);
+	if (rc)
+		dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n",
+			RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
+}
+
 static int gaudi2_ctx_init(struct hl_ctx *ctx)
 {
 	int rc;
@@ -9257,7 +9286,15 @@ static int gaudi2_ctx_init(struct hl_ctx *ctx)
 	else
 		gaudi2_restore_user_registers(ctx->hdev);
 
-	return gaudi2_internal_cb_pool_init(ctx->hdev, ctx);
+	rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx);
+	if (rc)
+		return rc;
+
+	rc = gaudi2_map_virtual_msix_doorbell_memory(ctx);
+	if (rc)
+		gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
+
+	return rc;
 }
 
 static void gaudi2_ctx_fini(struct hl_ctx *ctx)
@@ -9266,6 +9303,8 @@ static void gaudi2_ctx_fini(struct hl_ctx *ctx)
 		return;
 
 	gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
+
+	gaudi2_unmap_virtual_msix_doorbell_memory(ctx);
 }
 
 static int gaudi2_pre_schedule_cs(struct hl_cs *cs)
diff --git a/drivers/misc/habanalabs/include/gaudi2/gaudi2.h b/drivers/misc/habanalabs/include/gaudi2/gaudi2.h
index 071fc5a820f7..5b4f9e108798 100644
--- a/drivers/misc/habanalabs/include/gaudi2/gaudi2.h
+++ b/drivers/misc/habanalabs/include/gaudi2/gaudi2.h
@@ -54,6 +54,9 @@
 #define RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START	0x1001500000000000ull
 #define RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END	0x10016FFFFFFFFFFFull
 
+#define RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START	0xFFF077FFFFFF0000ull
+#define RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_END	0xFFF077FFFFFFFFFFull
+
 #define RESERVED_VA_RANGE_FOR_ARC_ON_HOST_START	0xFFF0780000000000ull
 #define RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END	0xFFF07FFFFFFFFFFFull
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 08/12] habanalabs: expose only valid debugfs nodes
  2022-07-11  6:29 [PATCH 01/12] habanalabs: fixes to the poll-timeout macros Oded Gabbay
                   ` (5 preceding siblings ...)
  2022-07-11  6:29 ` [PATCH 07/12] habanalabs/gaudi2: map virtual MSI-X doorbell memory for user Oded Gabbay
@ 2022-07-11  6:29 ` Oded Gabbay
  2022-07-11  6:30 ` [PATCH 09/12] habanalabs: fix update of is_in_soft_reset Oded Gabbay
                   ` (3 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Oded Gabbay @ 2022-07-11  6:29 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ofir Bitton

From: Ofir Bitton <obitton@habana.ai>

In case security is enabled on the device, some debugfs nodes will
fail. Hence, we do not expose them.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../ABI/testing/debugfs-driver-habanalabs     | 24 +++--
 drivers/misc/habanalabs/common/debugfs.c      | 94 ++++++++++---------
 2 files changed, 66 insertions(+), 52 deletions(-)

diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs
index d9580f5d08a0..c915bf17b293 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -130,14 +130,16 @@ Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Sets I2C device address for I2C transaction that is generated
-                by the device's CPU
+                by the device's CPU, Not available when device is loaded with secured
+                firmware
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/i2c_bus
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Sets I2C bus address for I2C transaction that is generated by
-                the device's CPU
+                the device's CPU, Not available when device is loaded with secured
+                firmware
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/i2c_data
 Date:           Jan 2019
@@ -145,39 +147,45 @@ KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Triggers an I2C transaction that is generated by the device's
                 CPU. Writing to this file generates a write transaction while
-                reading from the file generates a read transaction
+                reading from the file generates a read transaction, Not available
+                when device is loaded with secured firmware
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/i2c_len
 Date:           Dec 2021
 KernelVersion:  5.17
 Contact:        obitton@habana.ai
 Description:    Sets I2C length in bytes for I2C transaction that is generated by
-                the device's CPU
+                the device's CPU, Not available when device is loaded with secured
+                firmware
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/i2c_reg
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Sets I2C register id for I2C transaction that is generated by
-                the device's CPU
+                the device's CPU, Not available when device is loaded with secured
+                firmware
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/led0
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
-Description:    Sets the state of the first S/W led on the device
+Description:    Sets the state of the first S/W led on the device, Not available
+                when device is loaded with secured firmware
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/led1
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
-Description:    Sets the state of the second S/W led on the device
+Description:    Sets the state of the second S/W led on the device, Not available
+                when device is loaded with secured firmware
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/led2
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
-Description:    Sets the state of the third S/W led on the device
+Description:    Sets the state of the third S/W led on the device, Not available
+                when device is loaded with secured firmware
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/memory_scrub
 Date:           May 2022
diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index 3da39662abd9..64439f33a19b 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -1562,6 +1562,53 @@ static const struct file_operations hl_debugfs_fops = {
 	.release = single_release,
 };
 
+static void add_secured_nodes(struct hl_dbg_device_entry *dev_entry)
+{
+	debugfs_create_u8("i2c_bus",
+				0644,
+				dev_entry->root,
+				&dev_entry->i2c_bus);
+
+	debugfs_create_u8("i2c_addr",
+				0644,
+				dev_entry->root,
+				&dev_entry->i2c_addr);
+
+	debugfs_create_u8("i2c_reg",
+				0644,
+				dev_entry->root,
+				&dev_entry->i2c_reg);
+
+	debugfs_create_u8("i2c_len",
+				0644,
+				dev_entry->root,
+				&dev_entry->i2c_len);
+
+	debugfs_create_file("i2c_data",
+				0644,
+				dev_entry->root,
+				dev_entry,
+				&hl_i2c_data_fops);
+
+	debugfs_create_file("led0",
+				0200,
+				dev_entry->root,
+				dev_entry,
+				&hl_led0_fops);
+
+	debugfs_create_file("led1",
+				0200,
+				dev_entry->root,
+				dev_entry,
+				&hl_led1_fops);
+
+	debugfs_create_file("led2",
+				0200,
+				dev_entry->root,
+				dev_entry,
+				&hl_led2_fops);
+}
+
 void hl_debugfs_add_device(struct hl_device *hdev)
 {
 	struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
@@ -1632,50 +1679,6 @@ void hl_debugfs_add_device(struct hl_device *hdev)
 				dev_entry,
 				&hl_power_fops);
 
-	debugfs_create_u8("i2c_bus",
-				0644,
-				dev_entry->root,
-				&dev_entry->i2c_bus);
-
-	debugfs_create_u8("i2c_addr",
-				0644,
-				dev_entry->root,
-				&dev_entry->i2c_addr);
-
-	debugfs_create_u8("i2c_reg",
-				0644,
-				dev_entry->root,
-				&dev_entry->i2c_reg);
-
-	debugfs_create_u8("i2c_len",
-				0644,
-				dev_entry->root,
-				&dev_entry->i2c_len);
-
-	debugfs_create_file("i2c_data",
-				0644,
-				dev_entry->root,
-				dev_entry,
-				&hl_i2c_data_fops);
-
-	debugfs_create_file("led0",
-				0200,
-				dev_entry->root,
-				dev_entry,
-				&hl_led0_fops);
-
-	debugfs_create_file("led1",
-				0200,
-				dev_entry->root,
-				dev_entry,
-				&hl_led1_fops);
-
-	debugfs_create_file("led2",
-				0200,
-				dev_entry->root,
-				dev_entry,
-				&hl_led2_fops);
-
 	debugfs_create_file("device",
 				0200,
 				dev_entry->root,
@@ -1754,6 +1757,9 @@ void hl_debugfs_add_device(struct hl_device *hdev)
 		entry->info_ent = &hl_debugfs_list[i];
 		entry->dev_entry = dev_entry;
 	}
+
+	if (!hdev->asic_prop.fw_security_enabled)
+		add_secured_nodes(dev_entry);
 }
 
 void hl_debugfs_remove_device(struct hl_device *hdev)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 09/12] habanalabs: fix update of is_in_soft_reset
  2022-07-11  6:29 [PATCH 01/12] habanalabs: fixes to the poll-timeout macros Oded Gabbay
                   ` (6 preceding siblings ...)
  2022-07-11  6:29 ` [PATCH 08/12] habanalabs: expose only valid debugfs nodes Oded Gabbay
@ 2022-07-11  6:30 ` Oded Gabbay
  2022-07-11  6:30 ` [PATCH 10/12] habanalabs: add status of reset after device release Oded Gabbay
                   ` (2 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Oded Gabbay @ 2022-07-11  6:30 UTC (permalink / raw)
  To: linux-kernel

reset_info.is_in_soft_reset should be updated both before in_reset
and inside the spin lock of the reset info structure.

The reasons are:

- When we are inside soft reset, it implies we are in reset. Therefore,
  if someone checks if we are in soft reset, he can deduce we are
  in reset, while the opposite is not correct and might be misleading.

- Both these flags are changed together so they must be changed
  inside the reset info spinlock.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 4391eb22ddb8..5bc291c11e9b 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -1346,7 +1346,14 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 			spin_unlock(&hdev->reset_info.lock);
 			return 0;
 		}
+
+		/* This still allows the completion of some KDMA ops
+		 * Update this before in_reset because is_in_soft_reset implies we are in reset
+		 */
+		hdev->reset_info.is_in_soft_reset = !hard_reset;
+
 		hdev->reset_info.in_reset = 1;
+
 		spin_unlock(&hdev->reset_info.lock);
 
 		if (delay_reset)
@@ -1354,9 +1361,6 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 
 		handle_reset_trigger(hdev, flags);
 
-		/* This still allows the completion of some KDMA ops */
-		hdev->reset_info.is_in_soft_reset = !hard_reset;
-
 		/* This also blocks future CS/VM/JOB completion operations */
 		hdev->disabled = true;
 
@@ -1565,7 +1569,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 	}
 
 	spin_lock(&hdev->reset_info.lock);
-	hdev->reset_info.is_in_soft_reset = false;
+	hdev->reset_info.is_in_soft_reset = 0;
 
 	/* Schedule hard reset only if requested and if not already in hard reset.
 	 * We keep 'in_reset' enabled, so no other reset can go in during the hard
@@ -1612,18 +1616,22 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 
 out_err:
 	hdev->disabled = true;
-	hdev->reset_info.is_in_soft_reset = false;
+
+	spin_lock(&hdev->reset_info.lock);
+	hdev->reset_info.is_in_soft_reset = 0;
 
 	if (hard_reset) {
 		dev_err(hdev->dev, "Failed to reset! Device is NOT usable\n");
 		hdev->reset_info.hard_reset_cnt++;
 	} else if (reset_upon_device_release) {
+		spin_unlock(&hdev->reset_info.lock);
 		dev_err(hdev->dev, "Failed to reset device after user release\n");
 		flags |= HL_DRV_RESET_HARD;
 		flags &= ~HL_DRV_RESET_DEV_RELEASE;
 		hard_reset = true;
 		goto again;
 	} else {
+		spin_unlock(&hdev->reset_info.lock);
 		dev_err(hdev->dev, "Failed to do soft-reset\n");
 		hdev->reset_info.soft_reset_cnt++;
 		flags |= HL_DRV_RESET_HARD;
@@ -1633,6 +1641,8 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 
 	hdev->reset_info.in_reset = 0;
 
+	spin_unlock(&hdev->reset_info.lock);
+
 	return rc;
 }
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 10/12] habanalabs: add status of reset after device release
  2022-07-11  6:29 [PATCH 01/12] habanalabs: fixes to the poll-timeout macros Oded Gabbay
                   ` (7 preceding siblings ...)
  2022-07-11  6:30 ` [PATCH 09/12] habanalabs: fix update of is_in_soft_reset Oded Gabbay
@ 2022-07-11  6:30 ` Oded Gabbay
  2022-07-11  6:30 ` [PATCH 11/12] habanalabs: rename soft reset to compute reset Oded Gabbay
  2022-07-11  6:30 ` [PATCH 12/12] habanalabs: move h/w dirty message to debug Oded Gabbay
  10 siblings, 0 replies; 12+ messages in thread
From: Oded Gabbay @ 2022-07-11  6:30 UTC (permalink / raw)
  To: linux-kernel

The user might want to know the device is in reset after device
release, which is not an erroneous event as a regular reset.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c         | 17 +++++++++++------
 drivers/misc/habanalabs/common/habanalabs_drv.c |  6 +++++-
 include/uapi/misc/habanalabs.h                  |  5 ++++-
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 5bc291c11e9b..19c049046383 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -271,16 +271,20 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
 {
 	enum hl_device_status status;
 
-	if (hdev->reset_info.in_reset)
-		status = HL_DEVICE_STATUS_IN_RESET;
-	else if (hdev->reset_info.needs_reset)
+	if (hdev->reset_info.in_reset) {
+		if (hdev->reset_info.is_in_soft_reset)
+			status = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE;
+		else
+			status = HL_DEVICE_STATUS_IN_RESET;
+	} else if (hdev->reset_info.needs_reset) {
 		status = HL_DEVICE_STATUS_NEEDS_RESET;
-	else if (hdev->disabled)
+	} else if (hdev->disabled) {
 		status = HL_DEVICE_STATUS_MALFUNCTION;
-	else if (!hdev->init_done)
+	} else if (!hdev->init_done) {
 		status = HL_DEVICE_STATUS_IN_DEVICE_CREATION;
-	else
+	} else {
 		status = HL_DEVICE_STATUS_OPERATIONAL;
+	}
 
 	return status;
 }
@@ -296,6 +300,7 @@ bool hl_device_operational(struct hl_device *hdev,
 
 	switch (current_status) {
 	case HL_DEVICE_STATUS_IN_RESET:
+	case HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE:
 	case HL_DEVICE_STATUS_MALFUNCTION:
 	case HL_DEVICE_STATUS_NEEDS_RESET:
 		return false;
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index d900bae86168..f733ead605e7 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -165,7 +165,8 @@ int hl_device_open(struct inode *inode, struct file *filp)
 			"Can't open %s because it is %s\n",
 			dev_name(hdev->dev), hdev->status[status]);
 
-		if (status == HL_DEVICE_STATUS_IN_RESET)
+		if (status == HL_DEVICE_STATUS_IN_RESET ||
+					status == HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE)
 			rc = -EAGAIN;
 		else
 			rc = -EPERM;
@@ -395,6 +396,9 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
 	strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX);
 	strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
 					"in device creation", HL_STR_MAX);
+	strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE],
+					"in reset after device release", HL_STR_MAX);
+
 
 	/* First, we must find out which ASIC are we handling. This is needed
 	 * to configure the behavior of the driver (kernel parameters)
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 8c6ab71e7831..5d06d5c74dd1 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -684,6 +684,8 @@ enum hl_goya_dma_direction {
  * @HL_DEVICE_STATUS_NEEDS_RESET: Device needs reset because auto reset was disabled.
  * @HL_DEVICE_STATUS_IN_DEVICE_CREATION: Device is operational but its creation is still in
  *                                       progress.
+ * @HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE: Device is currently during reset that was
+ *                                                  triggered because the user released the device
  * @HL_DEVICE_STATUS_LAST: Last status.
  */
 enum hl_device_status {
@@ -692,7 +694,8 @@ enum hl_device_status {
 	HL_DEVICE_STATUS_MALFUNCTION,
 	HL_DEVICE_STATUS_NEEDS_RESET,
 	HL_DEVICE_STATUS_IN_DEVICE_CREATION,
-	HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_DEVICE_CREATION
+	HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE,
+	HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE
 };
 
 enum hl_server_type {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 11/12] habanalabs: rename soft reset to compute reset
  2022-07-11  6:29 [PATCH 01/12] habanalabs: fixes to the poll-timeout macros Oded Gabbay
                   ` (8 preceding siblings ...)
  2022-07-11  6:30 ` [PATCH 10/12] habanalabs: add status of reset after device release Oded Gabbay
@ 2022-07-11  6:30 ` Oded Gabbay
  2022-07-11  6:30 ` [PATCH 12/12] habanalabs: move h/w dirty message to debug Oded Gabbay
  10 siblings, 0 replies; 12+ messages in thread
From: Oded Gabbay @ 2022-07-11  6:30 UTC (permalink / raw)
  To: linux-kernel

Doing compute reset can be the traditional inference soft reset
that is supported only in Goya.

Or it can be the new reset upon device release, which is supported
in Gaudi2 and above.

Therefore, wherever suitable, use the terminology of compute reset
instead of soft reset.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c       | 28 +++++++++----------
 drivers/misc/habanalabs/common/firmware_if.c  |  4 +--
 drivers/misc/habanalabs/common/habanalabs.h   | 16 +++++------
 .../misc/habanalabs/common/habanalabs_ioctl.c |  2 +-
 drivers/misc/habanalabs/common/irq.c          |  2 +-
 drivers/misc/habanalabs/common/sysfs.c        |  2 +-
 drivers/misc/habanalabs/gaudi2/gaudi2.c       |  4 +--
 drivers/misc/habanalabs/goya/goya.c           |  2 +-
 8 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 19c049046383..b30aeb1c657f 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -272,7 +272,7 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
 	enum hl_device_status status;
 
 	if (hdev->reset_info.in_reset) {
-		if (hdev->reset_info.is_in_soft_reset)
+		if (hdev->reset_info.in_compute_reset)
 			status = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE;
 		else
 			status = HL_DEVICE_STATUS_IN_RESET;
@@ -1306,7 +1306,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 	skip_wq_flush = !!(flags & HL_DRV_RESET_DEV_RELEASE);
 	delay_reset = !!(flags & HL_DRV_RESET_DELAY);
 
-	if (!hard_reset && !hdev->asic_prop.supports_soft_reset) {
+	if (!hard_reset && !hdev->asic_prop.supports_compute_reset) {
 		hard_instead_soft = true;
 		hard_reset = true;
 	}
@@ -1329,7 +1329,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 	}
 
 	if (hard_instead_soft)
-		dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n");
+		dev_dbg(hdev->dev, "Doing hard-reset instead of compute reset\n");
 
 do_reset:
 	/* Re-entry of reset thread */
@@ -1345,17 +1345,17 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 		/* Block future CS/VM/JOB completion operations */
 		spin_lock(&hdev->reset_info.lock);
 		if (hdev->reset_info.in_reset) {
-			/* We only allow scheduling of a hard reset during soft reset */
-			if (hard_reset && hdev->reset_info.is_in_soft_reset)
+			/* We only allow scheduling of a hard reset during compute reset */
+			if (hard_reset && hdev->reset_info.in_compute_reset)
 				hdev->reset_info.hard_reset_schedule_flags = flags;
 			spin_unlock(&hdev->reset_info.lock);
 			return 0;
 		}
 
 		/* This still allows the completion of some KDMA ops
-		 * Update this before in_reset because is_in_soft_reset implies we are in reset
+		 * Update this before in_reset because in_compute_reset implies we are in reset
 		 */
-		hdev->reset_info.is_in_soft_reset = !hard_reset;
+		hdev->reset_info.in_compute_reset = !hard_reset;
 
 		hdev->reset_info.in_reset = 1;
 
@@ -1562,7 +1562,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 				dev_err(hdev->dev,
 					"Failed late init in reset after device release\n");
 			else
-				dev_err(hdev->dev, "Failed late init after soft reset\n");
+				dev_err(hdev->dev, "Failed late init after compute reset\n");
 			goto out_err;
 		}
 	}
@@ -1574,7 +1574,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 	}
 
 	spin_lock(&hdev->reset_info.lock);
-	hdev->reset_info.is_in_soft_reset = 0;
+	hdev->reset_info.in_compute_reset = 0;
 
 	/* Schedule hard reset only if requested and if not already in hard reset.
 	 * We keep 'in_reset' enabled, so no other reset can go in during the hard
@@ -1604,11 +1604,11 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 		 */
 		hdev->asic_funcs->enable_events_from_fw(hdev);
 	} else if (!reset_upon_device_release) {
-		hdev->reset_info.soft_reset_cnt++;
+		hdev->reset_info.compute_reset_cnt++;
 	}
 
 	if (schedule_hard_reset) {
-		dev_info(hdev->dev, "Performing hard reset scheduled during soft reset\n");
+		dev_info(hdev->dev, "Performing hard reset scheduled during compute reset\n");
 		flags = hdev->reset_info.hard_reset_schedule_flags;
 		hdev->reset_info.hard_reset_schedule_flags = 0;
 		hdev->disabled = true;
@@ -1623,7 +1623,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 	hdev->disabled = true;
 
 	spin_lock(&hdev->reset_info.lock);
-	hdev->reset_info.is_in_soft_reset = 0;
+	hdev->reset_info.in_compute_reset = 0;
 
 	if (hard_reset) {
 		dev_err(hdev->dev, "Failed to reset! Device is NOT usable\n");
@@ -1637,8 +1637,8 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 		goto again;
 	} else {
 		spin_unlock(&hdev->reset_info.lock);
-		dev_err(hdev->dev, "Failed to do soft-reset\n");
-		hdev->reset_info.soft_reset_cnt++;
+		dev_err(hdev->dev, "Failed to do compute reset\n");
+		hdev->reset_info.compute_reset_cnt++;
 		flags |= HL_DRV_RESET_HARD;
 		hard_reset = true;
 		goto again;
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index f80a8c9d2cc8..608ca67527a5 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -275,7 +275,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
 	mutex_lock(&hdev->send_cpu_message_lock);
 
 	/* CPU-CP messages can be sent during soft-reset */
-	if (hdev->disabled && !hdev->reset_info.is_in_soft_reset) {
+	if (hdev->disabled && !hdev->reset_info.in_compute_reset) {
 		rc = 0;
 		goto out;
 	}
@@ -314,7 +314,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
 		/* If FW performed reset just before sending it a packet, we will get a timeout.
 		 * This is expected behavior, hence no need for error message.
 		 */
-		if (!hl_device_operational(hdev, NULL) && !hdev->reset_info.is_in_soft_reset)
+		if (!hl_device_operational(hdev, NULL) && !hdev->reset_info.in_compute_reset)
 			dev_dbg(hdev->dev, "Device CPU packet timeout (0x%x) due to FW reset\n",
 					tmp);
 		else
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 0e45f2be13ed..d59bba9e55c9 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -647,7 +647,7 @@ struct hl_hints_range {
  *                         false otherwise.
  * @use_get_power_for_reset_history: To support backward compatibility for Goya
  *                                   and Gaudi
- * @supports_soft_reset: is soft reset supported.
+ * @supports_compute_reset: is a reset which is not a hard-reset supported by this asic.
  * @allow_inference_soft_reset: true if the ASIC supports soft reset that is
  *                              initiated by user or TDR. This is only true
  *                              in inference ASICs, as there is no real-world
@@ -760,7 +760,7 @@ struct asic_fixed_properties {
 	u8				dynamic_fw_load;
 	u8				gic_interrupts_enable;
 	u8				use_get_power_for_reset_history;
-	u8				supports_soft_reset;
+	u8				supports_compute_reset;
 	u8				allow_inference_soft_reset;
 	u8				configurable_stop_on_err;
 	u8				set_max_power_on_device_init;
@@ -2960,12 +2960,12 @@ struct last_error_session_info {
 /**
  * struct hl_reset_info - holds current device reset information.
  * @lock: lock to protect critical reset flows.
- * @soft_reset_cnt: number of soft reset since the driver was loaded.
- * @hard_reset_cnt: number of hard reset since the driver was loaded.
- * @hard_reset_schedule_flags: hard reset is scheduled to after current soft reset,
+ * @compute_reset_cnt: number of compte resets since the driver was loaded.
+ * @hard_reset_cnt: number of hard resets since the driver was loaded.
+ * @hard_reset_schedule_flags: hard reset is scheduled to after current compute reset,
  *                             here we hold the hard reset flags.
  * @in_reset: is device in reset flow.
- * @is_in_soft_reset: Device is currently in soft reset process.
+ * @in_compute_reset: Device is currently in reset but not in hard-reset.
  * @needs_reset: true if reset_on_lockup is false and device should be reset
  *               due to lockup.
  * @hard_reset_pending: is there a hard reset work pending.
@@ -2980,11 +2980,11 @@ struct last_error_session_info {
  */
 struct hl_reset_info {
 	spinlock_t	lock;
-	u32		soft_reset_cnt;
+	u32		compute_reset_cnt;
 	u32		hard_reset_cnt;
 	u32		hard_reset_schedule_flags;
 	u8		in_reset;
-	u8		is_in_soft_reset;
+	u8		in_compute_reset;
 	u8		needs_reset;
 	u8		hard_reset_pending;
 
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 96e12ab7a924..6a30bd98ab5e 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -294,7 +294,7 @@ static int get_reset_count(struct hl_device *hdev, struct hl_info_args *args)
 		return -EINVAL;
 
 	reset_count.hard_reset_cnt = hdev->reset_info.hard_reset_cnt;
-	reset_count.soft_reset_cnt = hdev->reset_info.soft_reset_cnt;
+	reset_count.soft_reset_cnt = hdev->reset_info.compute_reset_cnt;
 
 	return copy_to_user(out, &reset_count,
 		min((size_t) max_size, sizeof(reset_count))) ? -EFAULT : 0;
diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c
index d60dafb03a8e..94d537fd4fde 100644
--- a/drivers/misc/habanalabs/common/irq.c
+++ b/drivers/misc/habanalabs/common/irq.c
@@ -411,7 +411,7 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg)
 		 */
 		dma_rmb();
 
-		if (hdev->disabled && !hdev->reset_info.is_in_soft_reset) {
+		if (hdev->disabled && !hdev->reset_info.in_compute_reset) {
 			dev_warn(hdev->dev, "Device disabled but received an EQ event\n");
 			goto skip_irq;
 		}
diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c
index edebdf960785..6c5271f01160 100644
--- a/drivers/misc/habanalabs/common/sysfs.c
+++ b/drivers/misc/habanalabs/common/sysfs.c
@@ -291,7 +291,7 @@ static ssize_t soft_reset_cnt_show(struct device *dev,
 {
 	struct hl_device *hdev = dev_get_drvdata(dev);
 
-	return sprintf(buf, "%d\n", hdev->reset_info.soft_reset_cnt);
+	return sprintf(buf, "%d\n", hdev->reset_info.compute_reset_cnt);
 }
 
 static ssize_t hard_reset_cnt_show(struct device *dev,
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index eba8b0d674c3..2ce1fed8ef26 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -2392,7 +2392,7 @@ static int gaudi2_cpucp_info_get(struct hl_device *hdev)
 	/* No point of asking this information again when not doing hard reset, as the device
 	 * CPU hasn't been reset
 	 */
-	if (hdev->reset_info.is_in_soft_reset)
+	if (hdev->reset_info.in_compute_reset)
 		return 0;
 
 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
@@ -3014,7 +3014,7 @@ static int gaudi2_sw_init(struct hl_device *hdev)
 	hdev->supports_cb_mapping = true;
 	hdev->supports_wait_for_multi_cs = false;
 
-	prop->supports_soft_reset = true;
+	prop->supports_compute_reset = true;
 
 	hdev->asic_funcs->set_pci_memory_regions(hdev);
 
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 216570938b91..573f8f7df976 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -1038,7 +1038,7 @@ static int goya_sw_init(struct hl_device *hdev)
 
 	spin_lock_init(&goya->hw_queues_lock);
 	hdev->supports_coresight = true;
-	hdev->asic_prop.supports_soft_reset = true;
+	hdev->asic_prop.supports_compute_reset = true;
 	hdev->asic_prop.allow_inference_soft_reset = true;
 	hdev->supports_wait_for_multi_cs = false;
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 12/12] habanalabs: move h/w dirty message to debug
  2022-07-11  6:29 [PATCH 01/12] habanalabs: fixes to the poll-timeout macros Oded Gabbay
                   ` (9 preceding siblings ...)
  2022-07-11  6:30 ` [PATCH 11/12] habanalabs: rename soft reset to compute reset Oded Gabbay
@ 2022-07-11  6:30 ` Oded Gabbay
  10 siblings, 0 replies; 12+ messages in thread
From: Oded Gabbay @ 2022-07-11  6:30 UTC (permalink / raw)
  To: linux-kernel

H/W being dirty during initialization is completely expected in case
f/w tools are used before loading the driver. As it is not an error,
and as it doesn't give any meaningful information to the user,
no point of printing it.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi/gaudi.c   | 3 +--
 drivers/misc/habanalabs/gaudi2/gaudi2.c | 2 +-
 drivers/misc/habanalabs/goya/goya.c     | 3 +--
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 8c1b1824d1a9..cb2988e2c7a8 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -870,8 +870,7 @@ static int gaudi_early_init(struct hl_device *hdev)
 	}
 
 	if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
-		dev_info(hdev->dev,
-			"H/W state is dirty, must reset before initializing\n");
+		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
 		hdev->asic_funcs->hw_fini(hdev, true, false);
 	}
 
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index 2ce1fed8ef26..1dfd923543d0 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -2536,7 +2536,7 @@ static int gaudi2_early_init(struct hl_device *hdev)
 	}
 
 	if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
-		dev_info(hdev->dev, "H/W state is dirty, must reset before initializing\n");
+		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
 		hdev->asic_funcs->hw_fini(hdev, true, false);
 	}
 
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 573f8f7df976..db4487c33582 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -673,8 +673,7 @@ static int goya_early_init(struct hl_device *hdev)
 	}
 
 	if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
-		dev_info(hdev->dev,
-			"H/W state is dirty, must reset before initializing\n");
+		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
 		hdev->asic_funcs->hw_fini(hdev, true, false);
 	}
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2022-07-11  6:30 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-11  6:29 [PATCH 01/12] habanalabs: fixes to the poll-timeout macros Oded Gabbay
2022-07-11  6:29 ` [PATCH 02/12] habanalabs: add a value field to hl_fw_send_pci_access_msg() Oded Gabbay
2022-07-11  6:29 ` [PATCH 03/12] habanalabs/gaudi2: configure virtual MSI-X doorbell interface Oded Gabbay
2022-07-11  6:29 ` [PATCH 04/12] habanalabs/gaudi2: replace defines for reserved sob/mob with enums Oded Gabbay
2022-07-11  6:29 ` [PATCH 05/12] habanalabs/gaudi2: modify CS completion CQ to use virtual MSI-X doorbell Oded Gabbay
2022-07-11  6:29 ` [PATCH 06/12] habanalabs/gaudi2: modify decoder " Oded Gabbay
2022-07-11  6:29 ` [PATCH 07/12] habanalabs/gaudi2: map virtual MSI-X doorbell memory for user Oded Gabbay
2022-07-11  6:29 ` [PATCH 08/12] habanalabs: expose only valid debugfs nodes Oded Gabbay
2022-07-11  6:30 ` [PATCH 09/12] habanalabs: fix update of is_in_soft_reset Oded Gabbay
2022-07-11  6:30 ` [PATCH 10/12] habanalabs: add status of reset after device release Oded Gabbay
2022-07-11  6:30 ` [PATCH 11/12] habanalabs: rename soft reset to compute reset Oded Gabbay
2022-07-11  6:30 ` [PATCH 12/12] habanalabs: move h/w dirty message to debug Oded Gabbay

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.