linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/6] habanalabs/gaudi: don't use disabled ports in collective wait
@ 2021-06-06  8:23 Oded Gabbay
  2021-06-06  8:23 ` [PATCH 2/6] habanalabs/gaudi: add FW alive event support Oded Gabbay
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Oded Gabbay @ 2021-06-06  8:23 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ofir Bitton

From: Ofir Bitton <obitton@habana.ai>

In the collective wait, we put jobs on the QMANs of all the NICs. The
code takes into account if a port is disabled only in case of PCI card.
When this info arrives from the f/w, the code doesn't take it into
account, and it tries to schedule jobs on NICs that aren't enabled and
thats a bug.

To fix this, after the f/w sends us the list of disabled ports, we
update the state of the QMANs according to that list. In addition,
we need to update the HW_CAP bits so the collective wait operation
will not try to use those QMANs. We also need to update the collective
master monitor mask.

Moreover, we need to add a protection for such future cases and in case
the user will try to submit work to those QMANs.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 219 +++++++++-----------------
 1 file changed, 71 insertions(+), 148 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 007248946b63..68b1187c9c1f 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -1000,9 +1000,27 @@ static void gaudi_sob_group_reset_error(struct kref *ref)
 		hw_sob_group->base_sob_id);
 }
 
+static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
+{
+	struct gaudi_collective_properties *prop;
+	int i;
+
+	prop = &gaudi->collective_props;
+
+	memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
+
+	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
+		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
+			prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
+					BIT(i % HL_MAX_SOBS_PER_MONITOR);
+	/* Set collective engine bit */
+	prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
+				BIT(i % HL_MAX_SOBS_PER_MONITOR);
+}
+
 static int gaudi_collective_init(struct hl_device *hdev)
 {
-	u32 i, master_monitor_sobs, sob_id, reserved_sobs_per_group;
+	u32 i, sob_id, reserved_sobs_per_group;
 	struct gaudi_collective_properties *prop;
 	struct gaudi_device *gaudi;
 
@@ -1028,22 +1046,7 @@ static int gaudi_collective_init(struct hl_device *hdev)
 		gaudi_collective_map_sobs(hdev, i);
 	}
 
-	prop->mstr_sob_mask[0] = 0;
-	master_monitor_sobs = HL_MAX_SOBS_PER_MONITOR;
-	for (i = 0 ; i < master_monitor_sobs ; i++)
-		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
-			prop->mstr_sob_mask[0] |= BIT(i);
-
-	prop->mstr_sob_mask[1] = 0;
-	master_monitor_sobs =
-		NIC_NUMBER_OF_ENGINES - HL_MAX_SOBS_PER_MONITOR;
-	for (i = 0 ; i < master_monitor_sobs; i++) {
-		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
-			prop->mstr_sob_mask[1] |= BIT(i);
-	}
-
-	/* Set collective engine bit */
-	prop->mstr_sob_mask[1] |= BIT(i);
+	gaudi_collective_mstr_sob_mask_set(gaudi);
 
 	return 0;
 }
@@ -4272,8 +4275,8 @@ static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
 	u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
 	struct gaudi_device *gaudi = hdev->asic_specific;
-	int dma_id;
 	bool invalid_queue = false;
+	int dma_id;
 
 	switch (hw_queue_id) {
 	case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
@@ -4499,164 +4502,84 @@ static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
 		db_reg_offset = mmTPC7_QM_PQ_PI_3;
 		break;
 
-	case GAUDI_QUEUE_ID_NIC_0_0:
-		db_reg_offset = mmNIC0_QM0_PQ_PI_0;
-		break;
-
-	case GAUDI_QUEUE_ID_NIC_0_1:
-		db_reg_offset = mmNIC0_QM0_PQ_PI_1;
-		break;
-
-	case GAUDI_QUEUE_ID_NIC_0_2:
-		db_reg_offset = mmNIC0_QM0_PQ_PI_2;
-		break;
-
-	case GAUDI_QUEUE_ID_NIC_0_3:
-		db_reg_offset = mmNIC0_QM0_PQ_PI_3;
-		break;
-
-	case GAUDI_QUEUE_ID_NIC_1_0:
-		db_reg_offset = mmNIC0_QM1_PQ_PI_0;
-		break;
-
-	case GAUDI_QUEUE_ID_NIC_1_1:
-		db_reg_offset = mmNIC0_QM1_PQ_PI_1;
-		break;
-
-	case GAUDI_QUEUE_ID_NIC_1_2:
-		db_reg_offset = mmNIC0_QM1_PQ_PI_2;
-		break;
-
-	case GAUDI_QUEUE_ID_NIC_1_3:
-		db_reg_offset = mmNIC0_QM1_PQ_PI_3;
-		break;
-
-	case GAUDI_QUEUE_ID_NIC_2_0:
-		db_reg_offset = mmNIC1_QM0_PQ_PI_0;
-		break;
-
-	case GAUDI_QUEUE_ID_NIC_2_1:
-		db_reg_offset = mmNIC1_QM0_PQ_PI_1;
-		break;
-
-	case GAUDI_QUEUE_ID_NIC_2_2:
-		db_reg_offset = mmNIC1_QM0_PQ_PI_2;
-		break;
-
-	case GAUDI_QUEUE_ID_NIC_2_3:
-		db_reg_offset = mmNIC1_QM0_PQ_PI_3;
-		break;
-
-	case GAUDI_QUEUE_ID_NIC_3_0:
-		db_reg_offset = mmNIC1_QM1_PQ_PI_0;
-		break;
-
-	case GAUDI_QUEUE_ID_NIC_3_1:
-		db_reg_offset = mmNIC1_QM1_PQ_PI_1;
-		break;
-
-	case GAUDI_QUEUE_ID_NIC_3_2:
-		db_reg_offset = mmNIC1_QM1_PQ_PI_2;
-		break;
-
-	case GAUDI_QUEUE_ID_NIC_3_3:
-		db_reg_offset = mmNIC1_QM1_PQ_PI_3;
-		break;
-
-	case GAUDI_QUEUE_ID_NIC_4_0:
-		db_reg_offset = mmNIC2_QM0_PQ_PI_0;
-		break;
-
-	case GAUDI_QUEUE_ID_NIC_4_1:
-		db_reg_offset = mmNIC2_QM0_PQ_PI_1;
-		break;
-
-	case GAUDI_QUEUE_ID_NIC_4_2:
-		db_reg_offset = mmNIC2_QM0_PQ_PI_2;
-		break;
+	case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
+		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
+			invalid_queue = true;
 
-	case GAUDI_QUEUE_ID_NIC_4_3:
-		db_reg_offset = mmNIC2_QM0_PQ_PI_3;
+		q_off = ((hw_queue_id - 1) & 0x3) * 4;
+		db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
 		break;
 
-	case GAUDI_QUEUE_ID_NIC_5_0:
-		db_reg_offset = mmNIC2_QM1_PQ_PI_0;
-		break;
+	case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
+		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
+			invalid_queue = true;
 
-	case GAUDI_QUEUE_ID_NIC_5_1:
-		db_reg_offset = mmNIC2_QM1_PQ_PI_1;
+		q_off = ((hw_queue_id - 1) & 0x3) * 4;
+		db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
 		break;
 
-	case GAUDI_QUEUE_ID_NIC_5_2:
-		db_reg_offset = mmNIC2_QM1_PQ_PI_2;
-		break;
+	case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
+		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
+			invalid_queue = true;
 
-	case GAUDI_QUEUE_ID_NIC_5_3:
-		db_reg_offset = mmNIC2_QM1_PQ_PI_3;
+		q_off = ((hw_queue_id - 1) & 0x3) * 4;
+		db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
 		break;
 
-	case GAUDI_QUEUE_ID_NIC_6_0:
-		db_reg_offset = mmNIC3_QM0_PQ_PI_0;
-		break;
-
-	case GAUDI_QUEUE_ID_NIC_6_1:
-		db_reg_offset = mmNIC3_QM0_PQ_PI_1;
-		break;
+	case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
+		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
+			invalid_queue = true;
 
-	case GAUDI_QUEUE_ID_NIC_6_2:
-		db_reg_offset = mmNIC3_QM0_PQ_PI_2;
+		q_off = ((hw_queue_id - 1) & 0x3) * 4;
+		db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
 		break;
 
-	case GAUDI_QUEUE_ID_NIC_6_3:
-		db_reg_offset = mmNIC3_QM0_PQ_PI_3;
-		break;
+	case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
+		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
+			invalid_queue = true;
 
-	case GAUDI_QUEUE_ID_NIC_7_0:
-		db_reg_offset = mmNIC3_QM1_PQ_PI_0;
+		q_off = ((hw_queue_id - 1) & 0x3) * 4;
+		db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
 		break;
 
-	case GAUDI_QUEUE_ID_NIC_7_1:
-		db_reg_offset = mmNIC3_QM1_PQ_PI_1;
-		break;
+	case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
+		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
+			invalid_queue = true;
 
-	case GAUDI_QUEUE_ID_NIC_7_2:
-		db_reg_offset = mmNIC3_QM1_PQ_PI_2;
+		q_off = ((hw_queue_id - 1) & 0x3) * 4;
+		db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
 		break;
 
-	case GAUDI_QUEUE_ID_NIC_7_3:
-		db_reg_offset = mmNIC3_QM1_PQ_PI_3;
-		break;
-
-	case GAUDI_QUEUE_ID_NIC_8_0:
-		db_reg_offset = mmNIC4_QM0_PQ_PI_0;
-		break;
+	case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
+		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
+			invalid_queue = true;
 
-	case GAUDI_QUEUE_ID_NIC_8_1:
-		db_reg_offset = mmNIC4_QM0_PQ_PI_1;
+		q_off = ((hw_queue_id - 1) & 0x3) * 4;
+		db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
 		break;
 
-	case GAUDI_QUEUE_ID_NIC_8_2:
-		db_reg_offset = mmNIC4_QM0_PQ_PI_2;
-		break;
+	case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
+		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
+			invalid_queue = true;
 
-	case GAUDI_QUEUE_ID_NIC_8_3:
-		db_reg_offset = mmNIC4_QM0_PQ_PI_3;
+		q_off = ((hw_queue_id - 1) & 0x3) * 4;
+		db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
 		break;
 
-	case GAUDI_QUEUE_ID_NIC_9_0:
-		db_reg_offset = mmNIC4_QM1_PQ_PI_0;
-		break;
+	case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
+		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
+			invalid_queue = true;
 
-	case GAUDI_QUEUE_ID_NIC_9_1:
-		db_reg_offset = mmNIC4_QM1_PQ_PI_1;
+		q_off = ((hw_queue_id - 1) & 0x3) * 4;
+		db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
 		break;
 
-	case GAUDI_QUEUE_ID_NIC_9_2:
-		db_reg_offset = mmNIC4_QM1_PQ_PI_2;
-		break;
+	case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
+		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
+			invalid_queue = true;
 
-	case GAUDI_QUEUE_ID_NIC_9_3:
-		db_reg_offset = mmNIC4_QM1_PQ_PI_3;
+		q_off = ((hw_queue_id - 1) & 0x3) * 4;
+		db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
 		break;
 
 	default:
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/6] habanalabs/gaudi: add FW alive event support
  2021-06-06  8:23 [PATCH 1/6] habanalabs/gaudi: don't use disabled ports in collective wait Oded Gabbay
@ 2021-06-06  8:23 ` Oded Gabbay
  2021-06-06  8:23 ` [PATCH 3/6] habanalabs: add debug flag to prevent failure on timeout Oded Gabbay
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Oded Gabbay @ 2021-06-06  8:23 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ofir Bitton

From: Ofir Bitton <obitton@habana.ai>

In order for driver to be aware of process or thread crashes inside
GAUDI's CPU, we introduce a new event which contains all relevant
information. Upon event reception, driver will dump information and
will reset the device.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi/gaudi.c             | 15 +++++++++++++++
 drivers/misc/habanalabs/include/common/cpucp_if.h | 15 +++++++++++++++
 .../habanalabs/include/gaudi/gaudi_async_events.h |  1 +
 .../include/gaudi/gaudi_async_ids_map_extended.h  |  2 +-
 4 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 68b1187c9c1f..9a4b0b495a29 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7451,6 +7451,16 @@ static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
 			sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
 }
 
+static void gaudi_print_fw_alive_info(struct hl_device *hdev,
+					struct hl_eq_fw_alive *fw_alive)
+{
+	dev_err(hdev->dev,
+		"FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
+		(fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
+		"Minor" : "Critical", fw_alive->process_id,
+		fw_alive->thread_id, fw_alive->uptime_seconds);
+}
+
 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
 {
 	struct gaudi_device *gaudi = hdev->asic_specific;
@@ -7905,6 +7915,11 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
 		gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
 		goto reset_device;
 
+	case GAUDI_EVENT_FW_ALIVE_S:
+		gaudi_print_irq_info(hdev, event_type, false);
+		gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
+		goto reset_device;
+
 	default:
 		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
 				event_type);
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h
index c7da62243619..d4dc189a6c92 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -84,6 +84,20 @@ struct hl_eq_sm_sei_data {
 	__u8 pad[3];
 };
 
+enum hl_fw_alive_severity {
+	FW_ALIVE_SEVERITY_MINOR,
+	FW_ALIVE_SEVERITY_CRITICAL
+};
+
+struct hl_eq_fw_alive {
+	__le64 uptime_seconds;
+	__le32 process_id;
+	__le32 thread_id;
+	/* enum hl_fw_alive_severity */
+	__u8 severity;
+	__u8 pad[7];
+};
+
 struct hl_eq_entry {
 	struct hl_eq_header hdr;
 	union {
@@ -91,6 +105,7 @@ struct hl_eq_entry {
 		struct hl_eq_hbm_ecc_data hbm_ecc_data;
 		struct hl_eq_sm_sei_data sm_sei_data;
 		struct cpucp_pkt_sync_err pkt_sync_err;
+		struct hl_eq_fw_alive fw_alive;
 		__le64 data[7];
 	};
 };
diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h
index e8651abf84f2..f66c759952e4 100644
--- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h
@@ -303,6 +303,7 @@ enum gaudi_async_event_id {
 	GAUDI_EVENT_NIC3_QP1 = 619,
 	GAUDI_EVENT_NIC4_QP0 = 620,
 	GAUDI_EVENT_NIC4_QP1 = 621,
+	GAUDI_EVENT_FW_ALIVE_S = 645,
 	GAUDI_EVENT_DEV_RESET_REQ = 646,
 	GAUDI_EVENT_PKT_QUEUE_OUT_SYNC = 647,
 	GAUDI_EVENT_FIX_POWER_ENV_S = 658,
diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h
index 3dc79c131805..e87554ab0102 100644
--- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h
@@ -669,7 +669,7 @@ static struct gaudi_async_events_ids_map gaudi_irq_map_table[] = {
 	{ .fc_id = 642, .cpu_id = 491, .valid = 0, .name = "" },
 	{ .fc_id = 643, .cpu_id = 492, .valid = 0, .name = "" },
 	{ .fc_id = 644, .cpu_id = 493, .valid = 0, .name = "" },
-	{ .fc_id = 645, .cpu_id = 494, .valid = 0, .name = "" },
+	{ .fc_id = 645, .cpu_id = 494, .valid = 1, .name = "FW_ALIVE_S" },
 	{ .fc_id = 646, .cpu_id = 495, .valid = 1, .name = "DEV_RESET_REQ" },
 	{ .fc_id = 647, .cpu_id = 496, .valid = 1,
 		.name = "PKT_QUEUE_OUT_SYNC" },
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/6] habanalabs: add debug flag to prevent failure on timeout
  2021-06-06  8:23 [PATCH 1/6] habanalabs/gaudi: don't use disabled ports in collective wait Oded Gabbay
  2021-06-06  8:23 ` [PATCH 2/6] habanalabs/gaudi: add FW alive event support Oded Gabbay
@ 2021-06-06  8:23 ` Oded Gabbay
  2021-06-06  8:23 ` [PATCH 4/6] habanalabs: reset device upon FD close if not idle Oded Gabbay
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Oded Gabbay @ 2021-06-06  8:23 UTC (permalink / raw)
  To: linux-kernel; +Cc: Yuri Nudelman

From: Yuri Nudelman <ynudelman@habana.ai>

Sometimes it is useful to allow the command to continue running despite
the timeout occurred, to differentiate between really stuck or just very
time consuming commands. This can be achieved by passing a new debug
flag alongside the cs, HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT.

Anyway, if the timeout occurred, a warning print shall be issued,
however this shall not fail the submission.

Signed-off-by: Yuri Nudelman <ynudelman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../habanalabs/common/command_submission.c    | 25 +++++++++++++++----
 drivers/misc/habanalabs/common/habanalabs.h   |  5 ++++
 include/uapi/misc/habanalabs.h                |  1 +
 3 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index ecd96fbe3150..516d190164e4 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -556,6 +556,13 @@ static void cs_do_release(struct kref *ref)
 	else if (!cs->submitted)
 		cs->fence->error = -EBUSY;
 
+	if (unlikely(cs->skip_reset_on_timeout)) {
+		dev_err(hdev->dev,
+			"Command submission %llu completed after %llu (s)\n",
+			cs->sequence,
+			(jiffies - cs->submission_time_jiffies) / HZ);
+	}
+
 	if (cs->timestamp)
 		cs->fence->timestamp = ktime_get();
 	complete_all(&cs->fence->completion);
@@ -571,6 +578,8 @@ static void cs_timedout(struct work_struct *work)
 	int rc;
 	struct hl_cs *cs = container_of(work, struct hl_cs,
 						 work_tdr.work);
+	bool skip_reset_on_timeout = cs->skip_reset_on_timeout;
+
 	rc = cs_get_unless_zero(cs);
 	if (!rc)
 		return;
@@ -581,7 +590,8 @@ static void cs_timedout(struct work_struct *work)
 	}
 
 	/* Mark the CS is timed out so we won't try to cancel its TDR */
-	cs->timedout = true;
+	if (likely(!skip_reset_on_timeout))
+		cs->timedout = true;
 
 	hdev = cs->ctx->hdev;
 
@@ -613,10 +623,12 @@ static void cs_timedout(struct work_struct *work)
 
 	cs_put(cs);
 
-	if (hdev->reset_on_lockup)
-		hl_device_reset(hdev, HL_RESET_TDR);
-	else
-		hdev->needs_reset = true;
+	if (likely(!skip_reset_on_timeout)) {
+		if (hdev->reset_on_lockup)
+			hl_device_reset(hdev, HL_RESET_TDR);
+		else
+			hdev->needs_reset = true;
+	}
 }
 
 static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
@@ -650,6 +662,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 	cs->type = cs_type;
 	cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
 	cs->timeout_jiffies = timeout;
+	cs->skip_reset_on_timeout =
+		!!(flags & HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT);
+	cs->submission_time_jiffies = jiffies;
 	INIT_LIST_HEAD(&cs->job_list);
 	INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
 	kref_init(&cs->refcount);
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index e751868b3ed3..56d2f41f8893 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1421,6 +1421,7 @@ struct hl_userptr {
  * @staged_sequence: the sequence of the staged submission this CS is part of,
  *                   relevant only if staged_cs is set.
  * @timeout_jiffies: cs timeout in jiffies.
+ * @submission_time_jiffies: submission time of the cs
  * @type: CS_TYPE_*.
  * @submitted: true if CS was submitted to H/W.
  * @completed: true if CS was completed by device.
@@ -1433,6 +1434,8 @@ struct hl_userptr {
  * @staged_first: true if this is the first staged CS and we need to receive
  *                timeout for this CS.
  * @staged_cs: true if this CS is part of a staged submission.
+ * @skip_reset_on_timeout: true if we shall not reset the device in case
+ *                         timeout occurs (debug scenario).
  */
 struct hl_cs {
 	u16			*jobs_in_queue_cnt;
@@ -1450,6 +1453,7 @@ struct hl_cs {
 	u64			sequence;
 	u64			staged_sequence;
 	u64			timeout_jiffies;
+	u64			submission_time_jiffies;
 	enum hl_cs_type		type;
 	u8			submitted;
 	u8			completed;
@@ -1460,6 +1464,7 @@ struct hl_cs {
 	u8			staged_last;
 	u8			staged_first;
 	u8			staged_cs;
+	u8			skip_reset_on_timeout;
 };
 
 /**
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 6d2d34c9f375..a47485a8d411 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -664,6 +664,7 @@ struct hl_cs_chunk {
 #define HL_CS_FLAGS_STAGED_SUBMISSION_FIRST	0x80
 #define HL_CS_FLAGS_STAGED_SUBMISSION_LAST	0x100
 #define HL_CS_FLAGS_CUSTOM_TIMEOUT		0x200
+#define HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT	0x400
 
 #define HL_CS_STATUS_SUCCESS		0
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 4/6] habanalabs: reset device upon FD close if not idle
  2021-06-06  8:23 [PATCH 1/6] habanalabs/gaudi: don't use disabled ports in collective wait Oded Gabbay
  2021-06-06  8:23 ` [PATCH 2/6] habanalabs/gaudi: add FW alive event support Oded Gabbay
  2021-06-06  8:23 ` [PATCH 3/6] habanalabs: add debug flag to prevent failure on timeout Oded Gabbay
@ 2021-06-06  8:23 ` Oded Gabbay
  2021-06-06  8:23 ` [PATCH 5/6] habanalabs: skip valid test for boot_dev_sts regs Oded Gabbay
  2021-06-06  8:23 ` [PATCH 6/6] habanalabs: fix mask to obtain page offset Oded Gabbay
  4 siblings, 0 replies; 6+ messages in thread
From: Oded Gabbay @ 2021-06-06  8:23 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ofir Bitton

From: Ofir Bitton <obitton@habana.ai>

If device is not idle after user closes the FD we must reset device
as next user that will try to open FD will encounter a non-functional
device.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/context.c      |  9 ---------
 drivers/misc/habanalabs/common/device.c       | 20 ++++++++++++++++---
 drivers/misc/habanalabs/common/habanalabs.h   |  1 +
 .../misc/habanalabs/common/habanalabs_drv.c   |  1 +
 4 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c
index 62d705889ca8..19b6b045219e 100644
--- a/drivers/misc/habanalabs/common/context.c
+++ b/drivers/misc/habanalabs/common/context.c
@@ -12,7 +12,6 @@
 static void hl_ctx_fini(struct hl_ctx *ctx)
 {
 	struct hl_device *hdev = ctx->hdev;
-	u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
 	int i;
 
 	/* Release all allocated pending cb's, those cb's were never
@@ -57,14 +56,6 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
 
 		/* Scrub both SRAM and DRAM */
 		hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
-
-		if ((!hdev->pldm) && (hdev->pdev) &&
-				(!hdev->asic_funcs->is_device_idle(hdev,
-					idle_mask,
-					HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)))
-			dev_notice(hdev->dev,
-					"device not idle after user context is closed (0x%llx, 0x%llx)\n",
-						idle_mask[0], idle_mask[1]);
 	} else {
 		dev_dbg(hdev->dev, "closing kernel context\n");
 		hdev->asic_funcs->ctx_fini(ctx);
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index bc58a91bf50a..0056282cec94 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -51,6 +51,8 @@ bool hl_device_operational(struct hl_device *hdev,
 
 static void hpriv_release(struct kref *ref)
 {
+	u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
+	bool device_is_idle = true;
 	struct hl_fpriv *hpriv;
 	struct hl_device *hdev;
 
@@ -71,7 +73,19 @@ static void hpriv_release(struct kref *ref)
 
 	kfree(hpriv);
 
-	if (hdev->reset_upon_device_release)
+	if ((!hdev->pldm) && (hdev->pdev) &&
+			(!hdev->asic_funcs->is_device_idle(hdev,
+				idle_mask,
+				HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL))) {
+		dev_err(hdev->dev,
+			"device not idle after user context is closed (0x%llx_%llx)\n",
+			idle_mask[1], idle_mask[0]);
+
+		device_is_idle = false;
+	}
+
+	if ((hdev->reset_if_device_not_idle && !device_is_idle)
+			|| hdev->reset_upon_device_release)
 		hl_device_reset(hdev, 0);
 }
 
@@ -1108,8 +1122,8 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 	if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask,
 			HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
 		dev_err(hdev->dev,
-			"device is not idle (mask %#llx %#llx) after reset\n",
-			idle_mask[0], idle_mask[1]);
+			"device is not idle (mask 0x%llx_%llx) after reset\n",
+			idle_mask[1], idle_mask[0]);
 		rc = -EIO;
 		goto out_err;
 	}
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 56d2f41f8893..bcb5bfdd7f20 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2311,6 +2311,7 @@ struct hl_device {
 	u8				rl_enable;
 	u8				reset_on_preboot_fail;
 	u8				reset_upon_device_release;
+	u8				reset_if_device_not_idle;
 };
 
 
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 137e7dc63d3b..b55dd1c55166 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -264,6 +264,7 @@ static void set_driver_behavior_per_device(struct hl_device *hdev)
 	hdev->bmc_enable = 1;
 	hdev->hard_reset_on_fw_events = 1;
 	hdev->reset_on_preboot_fail = 1;
+	hdev->reset_if_device_not_idle = 1;
 
 	hdev->reset_pcilink = 0;
 	hdev->axi_drain = 0;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 5/6] habanalabs: skip valid test for boot_dev_sts regs
  2021-06-06  8:23 [PATCH 1/6] habanalabs/gaudi: don't use disabled ports in collective wait Oded Gabbay
                   ` (2 preceding siblings ...)
  2021-06-06  8:23 ` [PATCH 4/6] habanalabs: reset device upon FD close if not idle Oded Gabbay
@ 2021-06-06  8:23 ` Oded Gabbay
  2021-06-06  8:23 ` [PATCH 6/6] habanalabs: fix mask to obtain page offset Oded Gabbay
  4 siblings, 0 replies; 6+ messages in thread
From: Oded Gabbay @ 2021-06-06  8:23 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ohad Sharabi

From: Ohad Sharabi <osharabi@habana.ai>

Get rid of the need to check if boot_dev_sts is valid on every access
to value read from these registers.

This is done by storing the register value in hdev props ONLY if
register is enabled.

This way if register is NOT enabled all capability bits will not be set.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 56 +++++++++++---------
 drivers/misc/habanalabs/gaudi/gaudi.c        | 25 ++++-----
 2 files changed, 40 insertions(+), 41 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 40e91985cb48..9412e6707906 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -216,9 +216,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
 		goto out;
 	}
 
-	if (prop->fw_cpu_boot_dev_sts0_valid &&
-				(prop->fw_app_cpu_boot_dev_sts0 &
-					CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN))
+	if (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN)
 		expected_ack_val = queue->pi;
 	else
 		expected_ack_val = CPUCP_PACKET_FENCE_VAL;
@@ -838,8 +836,8 @@ int get_used_pll_index(struct hl_device *hdev, u32 input_pll_index,
 	bool dynamic_pll;
 	int fw_pll_idx;
 
-	dynamic_pll = prop->fw_cpu_boot_dev_sts0_valid &&
-		(prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_DYN_PLL_EN);
+	dynamic_pll = !!(prop->fw_app_cpu_boot_dev_sts0 &
+						CPU_BOOT_DEV_STS0_DYN_PLL_EN);
 
 	if (!dynamic_pll) {
 		/*
@@ -988,7 +986,7 @@ static int hl_fw_read_preboot_caps(struct hl_device *hdev,
 					u32 timeout)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
-	u32 status;
+	u32 status, reg_val;
 	int rc;
 
 	/* Need to check two possible scenarios:
@@ -1026,14 +1024,30 @@ static int hl_fw_read_preboot_caps(struct hl_device *hdev,
 		return -EIO;
 	}
 
-	prop->fw_preboot_cpu_boot_dev_sts0 = RREG32(sts_boot_dev_sts0_reg);
-	prop->fw_preboot_cpu_boot_dev_sts1 = RREG32(sts_boot_dev_sts1_reg);
+	/*
+	 * the registers DEV_STS* contain FW capabilities/features.
+	 * We can rely on this registers only if bit CPU_BOOT_DEV_STS*_ENABLED
+	 * is set.
+	 * In the first read of this register we store the value of this
+	 * register ONLY if the register is enabled (which will be propagated
+	 * to next stages) and also mark the register as valid.
+	 * In case it is not enabled the stored value will be left 0- all
+	 * caps/features are off
+	 */
+	reg_val = RREG32(sts_boot_dev_sts0_reg);
+	if (reg_val & CPU_BOOT_DEV_STS0_ENABLED) {
+		prop->fw_cpu_boot_dev_sts0_valid = true;
+		prop->fw_preboot_cpu_boot_dev_sts0 = reg_val;
+	}
 
-	if (prop->fw_preboot_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_ENABLED)
-		prop->dynamic_fw_load = !!(prop->fw_preboot_cpu_boot_dev_sts0 &
+	reg_val = RREG32(sts_boot_dev_sts1_reg);
+	if (reg_val & CPU_BOOT_DEV_STS1_ENABLED) {
+		prop->fw_cpu_boot_dev_sts1_valid = true;
+		prop->fw_preboot_cpu_boot_dev_sts1 = reg_val;
+	}
+
+	prop->dynamic_fw_load = !!(prop->fw_preboot_cpu_boot_dev_sts0 &
 						CPU_BOOT_DEV_STS0_FW_LD_COM_EN);
-	else
-		prop->dynamic_fw_load = 0;
 
 	/* initialize FW loader once we know what load protocol is used */
 	hdev->asic_funcs->init_firmware_loader(hdev);
@@ -1105,7 +1119,7 @@ static void hl_fw_preboot_update_state(struct hl_device *hdev)
 	cpu_boot_dev_sts0 = prop->fw_preboot_cpu_boot_dev_sts0;
 	cpu_boot_dev_sts1 = prop->fw_preboot_cpu_boot_dev_sts1;
 
-	/* We read security status multiple times during boot:
+	/* We read boot_dev_sts registers multiple times during boot:
 	 * 1. preboot - a. Check whether the security status bits are valid
 	 *              b. Check whether fw security is enabled
 	 *              c. Check whether hard reset is done by preboot
@@ -1119,18 +1133,8 @@ static void hl_fw_preboot_update_state(struct hl_device *hdev)
 	 * check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN)
 	 * If set, then mark GIC controller to be disabled.
 	 */
-	if (cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_ENABLED) {
-		prop->fw_cpu_boot_dev_sts0_valid = 1;
-
-		if (cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
-			prop->hard_reset_done_by_fw = true;
-	} else {
-		prop->fw_cpu_boot_dev_sts0_valid = 0;
-	}
-
-	/* place holder for STS1 as no statuses are defined yet */
-	prop->fw_cpu_boot_dev_sts1_valid =
-			!!(cpu_boot_dev_sts1 & CPU_BOOT_DEV_STS1_ENABLED);
+	prop->hard_reset_done_by_fw =
+		!!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN);
 
 	dev_dbg(hdev->dev, "Firmware preboot boot device status0 %#x\n",
 							cpu_boot_dev_sts0);
@@ -1781,7 +1785,7 @@ static void hl_fw_boot_fit_update_state(struct hl_device *hdev,
 	prop->hard_reset_done_by_fw = false;
 
 	/* Read boot_cpu status bits */
-	if (prop->fw_cpu_boot_dev_sts0_valid) {
+	if (prop->fw_preboot_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_ENABLED) {
 		prop->fw_bootfit_cpu_boot_dev_sts0 =
 				RREG32(cpu_boot_dev_sts0_reg);
 
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 9a4b0b495a29..792d8a382c10 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -1980,9 +1980,8 @@ static void gaudi_init_scrambler_sram(struct hl_device *hdev)
 	if (hdev->asic_prop.fw_security_enabled)
 		return;
 
-	if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid &&
-			(hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
-					CPU_BOOT_DEV_STS0_SRAM_SCR_EN))
+	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
+						CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
 		return;
 
 	if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
@@ -2052,9 +2051,8 @@ static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
 	if (hdev->asic_prop.fw_security_enabled)
 		return;
 
-	if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid &&
-			(hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
-					CPU_BOOT_DEV_STS0_DRAM_SCR_EN))
+	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
+					CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
 		return;
 
 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
@@ -2122,9 +2120,8 @@ static void gaudi_init_e2e(struct hl_device *hdev)
 	if (hdev->asic_prop.fw_security_enabled)
 		return;
 
-	if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid &&
-			(hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
-					CPU_BOOT_DEV_STS0_E2E_CRED_EN))
+	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
+					CPU_BOOT_DEV_STS0_E2E_CRED_EN)
 		return;
 
 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
@@ -2497,9 +2494,8 @@ static void gaudi_init_hbm_cred(struct hl_device *hdev)
 	if (hdev->asic_prop.fw_security_enabled)
 		return;
 
-	if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid &&
-			(hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
-					CPU_BOOT_DEV_STS0_HBM_CRED_EN))
+	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
+						CPU_BOOT_DEV_STS0_HBM_CRED_EN)
 		return;
 
 	hbm0_wr = 0x33333333;
@@ -7477,9 +7473,8 @@ static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
 	u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
 	int err = 0;
 
-	if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid &&
-			(hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
-				CPU_BOOT_DEV_STS0_HBM_ECC_EN)) {
+	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
+					CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
 		if (!hbm_ecc_data) {
 			dev_err(hdev->dev, "No FW ECC data");
 			return 0;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 6/6] habanalabs: fix mask to obtain page offset
  2021-06-06  8:23 [PATCH 1/6] habanalabs/gaudi: don't use disabled ports in collective wait Oded Gabbay
                   ` (3 preceding siblings ...)
  2021-06-06  8:23 ` [PATCH 5/6] habanalabs: skip valid test for boot_dev_sts regs Oded Gabbay
@ 2021-06-06  8:23 ` Oded Gabbay
  4 siblings, 0 replies; 6+ messages in thread
From: Oded Gabbay @ 2021-06-06  8:23 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ohad Sharabi

From: Ohad Sharabi <osharabi@habana.ai>

When converting virtual address to physical we need to add correct
offset to the physical page.

For this we need to use mask that include ALL bits of page offset.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/mmu/mmu.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c
index b37189956b14..792d25b79ea6 100644
--- a/drivers/misc/habanalabs/common/mmu/mmu.c
+++ b/drivers/misc/habanalabs/common/mmu/mmu.c
@@ -501,12 +501,20 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr,
 
 	if ((hops->range_type == HL_VA_RANGE_TYPE_DRAM) &&
 			!is_power_of_2(prop->dram_page_size)) {
-		u32 bit;
+		unsigned long dram_page_size = prop->dram_page_size;
 		u64 page_offset_mask;
 		u64 phys_addr_mask;
+		u32 bit;
 
-		bit = __ffs64((u64)prop->dram_page_size);
-		page_offset_mask = ((1ull << bit) - 1);
+		/*
+		 * find last set bit in page_size to cover all bits of page
+		 * offset. note that 1 has to be added to bit index.
+		 * note that the internal ulong variable is used to avoid
+		 * alignment issue.
+		 */
+		bit = find_last_bit(&dram_page_size,
+					sizeof(dram_page_size) * BITS_PER_BYTE) + 1;
+		page_offset_mask = (BIT_ULL(bit) - 1);
 		phys_addr_mask = ~page_offset_mask;
 		*phys_addr = (tmp_phys_addr & phys_addr_mask) |
 				(virt_addr & page_offset_mask);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-06-06  8:23 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-06  8:23 [PATCH 1/6] habanalabs/gaudi: don't use disabled ports in collective wait Oded Gabbay
2021-06-06  8:23 ` [PATCH 2/6] habanalabs/gaudi: add FW alive event support Oded Gabbay
2021-06-06  8:23 ` [PATCH 3/6] habanalabs: add debug flag to prevent failure on timeout Oded Gabbay
2021-06-06  8:23 ` [PATCH 4/6] habanalabs: reset device upon FD close if not idle Oded Gabbay
2021-06-06  8:23 ` [PATCH 5/6] habanalabs: skip valid test for boot_dev_sts regs Oded Gabbay
2021-06-06  8:23 ` [PATCH 6/6] habanalabs: fix mask to obtain page offset Oded Gabbay

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).