* [PATCH 1/4] accel/habanalabs: fix a missing-braces compilation warning
@ 2023-03-19 9:58 Oded Gabbay
2023-03-19 9:58 ` [PATCH 2/4] accel/habanalabs: add handling for unexpected user event Oded Gabbay
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Oded Gabbay @ 2023-03-19 9:58 UTC (permalink / raw)
To: dri-devel; +Cc: Tomer Tayar
From: Tomer Tayar <ttayar@habana.ai>
Replace initialization of "struct cpucp_packet" from "{0} to "{}" to
avoid a "missing braces around initializer" compilation warning.
Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
drivers/accel/habanalabs/common/firmware_if.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c
index da892d8fb3d6..7ea611392f8c 100644
--- a/drivers/accel/habanalabs/common/firmware_if.c
+++ b/drivers/accel/habanalabs/common/firmware_if.c
@@ -3152,7 +3152,7 @@ int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_in
int hl_fw_send_generic_request(struct hl_device *hdev, enum hl_passthrough_type sub_opcode,
dma_addr_t buff, u32 *size)
{
- struct cpucp_packet pkt = {0};
+ struct cpucp_packet pkt = {};
u64 result;
int rc = 0;
--
2.40.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/4] accel/habanalabs: add handling for unexpected user event
2023-03-19 9:58 [PATCH 1/4] accel/habanalabs: fix a missing-braces compilation warning Oded Gabbay
@ 2023-03-19 9:58 ` Oded Gabbay
2023-03-19 9:58 ` [PATCH 3/4] accel/habanalabs: change razwi handle after fw fix Oded Gabbay
2023-03-19 9:58 ` [PATCH 4/4] accel/habanalabs: remove redundant TODOs Oded Gabbay
2 siblings, 0 replies; 4+ messages in thread
From: Oded Gabbay @ 2023-03-19 9:58 UTC (permalink / raw)
To: dri-devel; +Cc: Ofir Bitton
From: Ofir Bitton <obitton@habana.ai>
In order for the user to be aware of unexpected events in Gaudi2 that
aren't assigned to a specific engine, we are adding the handling of
this dedicated interrupt.
Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
drivers/accel/habanalabs/common/habanalabs.h | 7 +++++-
drivers/accel/habanalabs/common/irq.c | 8 ++++++
drivers/accel/habanalabs/gaudi2/gaudi2.c | 25 +++++++++++++++++++
drivers/accel/habanalabs/gaudi2/gaudi2P.h | 7 ++++--
.../accel/habanalabs/include/gaudi2/gaudi2.h | 2 ++
5 files changed, 46 insertions(+), 3 deletions(-)
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index c01677ed3c07..1636f6a700b9 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -662,6 +662,7 @@ struct hl_hints_range {
* @user_interrupt_count: number of user interrupts.
* @user_dec_intr_count: number of decoder interrupts exposed to user.
* @tpc_interrupt_id: interrupt id for TPC to use in order to raise events towards the host.
+ * @unexpected_user_error_interrupt_id: interrupt id used to indicate an unexpected user error.
* @cache_line_size: device cache line size.
* @server_type: Server type that the ASIC is currently installed in.
* The value is according to enum hl_server_type in uapi file.
@@ -792,6 +793,7 @@ struct asic_fixed_properties {
u16 user_interrupt_count;
u16 user_dec_intr_count;
u16 tpc_interrupt_id;
+ u16 unexpected_user_error_interrupt_id;
u16 cache_line_size;
u16 server_type;
u8 completion_queues_count;
@@ -1101,7 +1103,8 @@ struct hl_cq {
enum hl_user_interrupt_type {
HL_USR_INTERRUPT_CQ = 0,
HL_USR_INTERRUPT_DECODER,
- HL_USR_INTERRUPT_TPC
+ HL_USR_INTERRUPT_TPC,
+ HL_USR_INTERRUPT_UNEXPECTED
};
/**
@@ -3155,6 +3158,7 @@ struct hl_reset_info {
* interrupt, driver will monitor the list of fences
* registered to this interrupt.
* @tpc_interrupt: single TPC interrupt for all TPCs.
+ * @unexpected_error_interrupt: single interrupt for unexpected user error indication.
* @common_user_cq_interrupt: common user CQ interrupt for all user CQ interrupts.
* upon any user CQ interrupt, driver will monitor the
* list of fences registered to this common structure.
@@ -3340,6 +3344,7 @@ struct hl_device {
struct hl_cq *completion_queue;
struct hl_user_interrupt *user_interrupt;
struct hl_user_interrupt tpc_interrupt;
+ struct hl_user_interrupt unexpected_error_interrupt;
struct hl_user_interrupt common_user_cq_interrupt;
struct hl_user_interrupt common_decoder_interrupt;
struct hl_cs **shadow_cs_queue;
diff --git a/drivers/accel/habanalabs/common/irq.c b/drivers/accel/habanalabs/common/irq.c
index 8c6705cf958e..fab1abc5c910 100644
--- a/drivers/accel/habanalabs/common/irq.c
+++ b/drivers/accel/habanalabs/common/irq.c
@@ -340,6 +340,11 @@ static void handle_tpc_interrupt(struct hl_device *hdev)
hl_device_cond_reset(hdev, flags, event_mask);
}
+static void handle_unexpected_user_interrupt(struct hl_device *hdev)
+{
+ dev_err_ratelimited(hdev->dev, "Received unexpected user error interrupt\n");
+}
+
/**
* hl_irq_handler_user_interrupt - irq handler for user interrupts
*
@@ -385,6 +390,9 @@ irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg)
case HL_USR_INTERRUPT_TPC:
handle_tpc_interrupt(hdev);
break;
+ case HL_USR_INTERRUPT_UNEXPECTED:
+ handle_unexpected_user_interrupt(hdev);
+ break;
default:
break;
}
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index 40563e29be8d..cff1d4588913 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -2440,6 +2440,7 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST;
prop->tpc_interrupt_id = GAUDI2_IRQ_NUM_TPC_ASSERT;
+ prop->unexpected_user_error_interrupt_id = GAUDI2_IRQ_NUM_UNEXPECTED_ERROR;
prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER;
@@ -3346,6 +3347,10 @@ static void gaudi2_user_interrupt_setup(struct hl_device *hdev)
/* Initialize TPC interrupt */
HL_USR_INTR_STRUCT_INIT(hdev->tpc_interrupt, hdev, 0, HL_USR_INTERRUPT_TPC);
+ /* Initialize general purpose interrupt */
+ HL_USR_INTR_STRUCT_INIT(hdev->unexpected_error_interrupt, hdev, 0,
+ HL_USR_INTERRUPT_UNEXPECTED);
+
/* Initialize common user CQ interrupt */
HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev,
HL_COMMON_USER_CQ_INTERRUPT_ID, HL_USR_INTERRUPT_CQ);
@@ -4005,6 +4010,8 @@ static const char *gaudi2_irq_name(u16 irq_number)
return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM];
case GAUDI2_IRQ_NUM_TPC_ASSERT:
return "gaudi2 tpc assert";
+ case GAUDI2_IRQ_NUM_UNEXPECTED_ERROR:
+ return "gaudi2 tpc assert";
case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
return "gaudi2 user completion";
default:
@@ -4125,6 +4132,15 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
goto free_dec_irq;
}
+ irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
+ rc = request_irq(irq, hl_irq_handler_user_interrupt, 0,
+ gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR),
+ &hdev->unexpected_error_interrupt);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to request IRQ %d", irq);
+ goto free_tpc_irq;
+ }
+
for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0;
user_irq_init_cnt < prop->user_interrupt_count;
i++, j++, user_irq_init_cnt++) {
@@ -4151,6 +4167,11 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
irq = pci_irq_vector(hdev->pdev, i);
free_irq(irq, &hdev->user_interrupt[j]);
}
+ irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
+ free_irq(irq, &hdev->unexpected_error_interrupt);
+free_tpc_irq:
+ irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
+ free_irq(irq, &hdev->tpc_interrupt);
free_dec_irq:
gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_DEC_LAST + 1);
free_event_irq:
@@ -4185,6 +4206,7 @@ static void gaudi2_sync_irqs(struct hl_device *hdev)
}
synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT));
+ synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR));
for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count;
i++, j++) {
@@ -4215,6 +4237,9 @@ static void gaudi2_disable_msix(struct hl_device *hdev)
irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
free_irq(irq, &hdev->tpc_interrupt);
+ irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
+ free_irq(irq, &hdev->unexpected_error_interrupt);
+
for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0;
k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2P.h b/drivers/accel/habanalabs/gaudi2/gaudi2P.h
index f79958b24811..0742046810f9 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2P.h
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2P.h
@@ -387,6 +387,8 @@ enum gaudi2_edma_id {
* We have 64 CQ's per dcore, CQ0 in dcore 0 is reserved for legacy mode
*/
#define GAUDI2_NUM_USER_INTERRUPTS 255
+#define GAUDI2_NUM_RESERVED_INTERRUPTS 1
+#define GAUDI2_TOTAL_USER_INTERRUPTS (GAUDI2_NUM_USER_INTERRUPTS + GAUDI2_NUM_RESERVED_INTERRUPTS)
enum gaudi2_irq_num {
GAUDI2_IRQ_NUM_EVENT_QUEUE = GAUDI2_EVENT_QUEUE_MSIX_IDX,
@@ -416,8 +418,9 @@ enum gaudi2_irq_num {
GAUDI2_IRQ_NUM_NIC_PORT_LAST = (GAUDI2_IRQ_NUM_NIC_PORT_FIRST + NIC_NUMBER_OF_PORTS - 1),
GAUDI2_IRQ_NUM_TPC_ASSERT,
GAUDI2_IRQ_NUM_RESERVED_FIRST,
- GAUDI2_IRQ_NUM_RESERVED_LAST = (GAUDI2_MSIX_ENTRIES - GAUDI2_NUM_USER_INTERRUPTS - 1),
- GAUDI2_IRQ_NUM_USER_FIRST,
+ GAUDI2_IRQ_NUM_RESERVED_LAST = (GAUDI2_MSIX_ENTRIES - GAUDI2_TOTAL_USER_INTERRUPTS - 1),
+ GAUDI2_IRQ_NUM_UNEXPECTED_ERROR = RESERVED_MSIX_UNEXPECTED_USER_ERROR_INTERRUPT,
+ GAUDI2_IRQ_NUM_USER_FIRST = GAUDI2_IRQ_NUM_UNEXPECTED_ERROR + 1,
GAUDI2_IRQ_NUM_USER_LAST = (GAUDI2_IRQ_NUM_USER_FIRST + GAUDI2_NUM_USER_INTERRUPTS - 1),
GAUDI2_IRQ_NUM_LAST = (GAUDI2_MSIX_ENTRIES - 1)
};
diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2.h
index 5b4f9e108798..0231d6c55b4a 100644
--- a/drivers/accel/habanalabs/include/gaudi2/gaudi2.h
+++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2.h
@@ -63,6 +63,8 @@
#define RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START 0xFFF0F80000000000ull
#define RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END 0xFFF0FFFFFFFFFFFFull
+#define RESERVED_MSIX_UNEXPECTED_USER_ERROR_INTERRUPT 256
+
#define GAUDI2_MSIX_ENTRIES 512
#define QMAN_PQ_ENTRY_SIZE 16 /* Bytes */
--
2.40.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 3/4] accel/habanalabs: change razwi handle after fw fix
2023-03-19 9:58 [PATCH 1/4] accel/habanalabs: fix a missing-braces compilation warning Oded Gabbay
2023-03-19 9:58 ` [PATCH 2/4] accel/habanalabs: add handling for unexpected user event Oded Gabbay
@ 2023-03-19 9:58 ` Oded Gabbay
2023-03-19 9:58 ` [PATCH 4/4] accel/habanalabs: remove redundant TODOs Oded Gabbay
2 siblings, 0 replies; 4+ messages in thread
From: Oded Gabbay @ 2023-03-19 9:58 UTC (permalink / raw)
To: dri-devel; +Cc: Dani Liberman
From: Dani Liberman <dliberman@habana.ai>
FW had one data route for tpc0 and tpc1 when running in secured mode
and a different one when running without secured mode. After fw fixed
this issue, both mode have the same data path.
Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
drivers/accel/habanalabs/common/habanalabs.h | 4 ++++
drivers/accel/habanalabs/gaudi2/gaudi2.c | 6 ++----
2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index 1636f6a700b9..a6f5c2152b0a 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -3547,6 +3547,10 @@ struct hl_ioctl_desc {
hl_ioctl_t *func;
};
+static inline bool hl_is_fw_ver_below_1_9(struct hl_device *hdev)
+{
+ return (hdev->fw_major_version < 42);
+}
/*
* Kernel module functions that can be accessed by entire module
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index cff1d4588913..b13f998ae09d 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -7973,10 +7973,8 @@ static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
case RAZWI_TPC:
hbw_rtr_id = gaudi2_tpc_initiator_hbw_rtr_id[module_idx];
- /* TODO : remove this check and depend only on tpc routers table
- * when SW-118828 is resolved
- */
- if (!hdev->asic_prop.fw_security_enabled &&
+ if (hl_is_fw_ver_below_1_9(hdev) &&
+ !hdev->asic_prop.fw_security_enabled &&
((module_idx == 0) || (module_idx == 1)))
lbw_rtr_id = DCORE0_RTR0;
else
--
2.40.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 4/4] accel/habanalabs: remove redundant TODOs
2023-03-19 9:58 [PATCH 1/4] accel/habanalabs: fix a missing-braces compilation warning Oded Gabbay
2023-03-19 9:58 ` [PATCH 2/4] accel/habanalabs: add handling for unexpected user event Oded Gabbay
2023-03-19 9:58 ` [PATCH 3/4] accel/habanalabs: change razwi handle after fw fix Oded Gabbay
@ 2023-03-19 9:58 ` Oded Gabbay
2 siblings, 0 replies; 4+ messages in thread
From: Oded Gabbay @ 2023-03-19 9:58 UTC (permalink / raw)
To: dri-devel; +Cc: Ofir Bitton
From: Ofir Bitton <obitton@habana.ai>
As mmu refactor and nic resume are not relevant anymore, remove
their TODO comments.
Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
drivers/accel/habanalabs/gaudi2/gaudi2.c | 5 -----
1 file changed, 5 deletions(-)
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index b13f998ae09d..edcbda3d9b40 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -2340,7 +2340,6 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
prop->dmmu.num_hops = MMU_ARCH_6_HOPS;
prop->dmmu.last_mask = LAST_MASK;
prop->dmmu.host_resident = 1;
- /* TODO: will be duplicated until implementing per-MMU props */
prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
@@ -2356,7 +2355,6 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
prop->pmmu.host_resident = 1;
prop->pmmu.num_hops = MMU_ARCH_6_HOPS;
prop->pmmu.last_mask = LAST_MASK;
- /* TODO: will be duplicated until implementing per-MMU props */
prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
@@ -6906,9 +6904,6 @@ static int gaudi2_compute_reset_late_init(struct hl_device *hdev)
size_t irq_arr_size;
int rc;
- /* TODO: missing gaudi2_nic_resume.
- * Until implemented nic_hw_cap_initialized will remain zeroed
- */
gaudi2_init_arcs(hdev);
rc = gaudi2_scrub_arcs_dccm(hdev);
--
2.40.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2023-03-19 9:59 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-19 9:58 [PATCH 1/4] accel/habanalabs: fix a missing-braces compilation warning Oded Gabbay
2023-03-19 9:58 ` [PATCH 2/4] accel/habanalabs: add handling for unexpected user event Oded Gabbay
2023-03-19 9:58 ` [PATCH 3/4] accel/habanalabs: change razwi handle after fw fix Oded Gabbay
2023-03-19 9:58 ` [PATCH 4/4] accel/habanalabs: remove redundant TODOs Oded Gabbay
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).