* [PATCH 1/3] habanalabs: make set_pci_regions asic function
@ 2021-07-22 11:16 Oded Gabbay
2021-07-22 11:16 ` [PATCH 2/3] habanalabs: clear msg_to_cpu_reg to avoid misread after reset Oded Gabbay
2021-07-22 11:16 ` [PATCH 3/3] habanalabs: add validity check for event ID received from F/W Oded Gabbay
0 siblings, 2 replies; 3+ messages in thread
From: Oded Gabbay @ 2021-07-22 11:16 UTC (permalink / raw)
To: linux-kernel; +Cc: Ohad Sharabi
From: Ohad Sharabi <osharabi@habana.ai>
In order to better support variants of the same ASIC
the set_pci_regions function is now an ASIC function which
allows each ASIC to implement it internally, thus keeping
all definitions static to the file.
Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
drivers/misc/habanalabs/common/habanalabs.h | 2 ++
drivers/misc/habanalabs/gaudi/gaudi.c | 5 +++--
drivers/misc/habanalabs/goya/goya.c | 5 +++--
3 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 38617d4a34d5..75be25d47c98 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1160,6 +1160,7 @@ struct fw_load_mgr {
* @init_cpu_scrambler_dram: Enable CPU specific DRAM scrambling
* @state_dump_init: initialize constants required for state dump
* @get_sob_addr: get SOB base address offset.
+ * @set_pci_memory_regions: setting properties of PCI memory regions
*/
struct hl_asic_funcs {
int (*early_init)(struct hl_device *hdev);
@@ -1288,6 +1289,7 @@ struct hl_asic_funcs {
void (*init_cpu_scrambler_dram)(struct hl_device *hdev);
void (*state_dump_init)(struct hl_device *hdev);
u32 (*get_sob_addr)(struct hl_device *hdev, u32 sob_id);
+ void (*set_pci_memory_regions)(struct hl_device *hdev);
};
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 2fe14a138f37..0cb21440b5ff 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -1867,7 +1867,7 @@ static int gaudi_sw_init(struct hl_device *hdev)
hdev->supports_staged_submission = true;
hdev->supports_wait_for_multi_cs = true;
- gaudi_set_pci_memory_regions(hdev);
+ hdev->asic_funcs->set_pci_memory_regions(hdev);
return 0;
@@ -9463,7 +9463,8 @@ static const struct hl_asic_funcs gaudi_funcs = {
.init_firmware_loader = gaudi_init_firmware_loader,
.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
.state_dump_init = gaudi_state_dump_init,
- .get_sob_addr = gaudi_get_sob_addr
+ .get_sob_addr = gaudi_get_sob_addr,
+ .set_pci_memory_regions = gaudi_set_pci_memory_regions
};
/**
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index fad130f6a118..9d66bdda8789 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -963,7 +963,7 @@ static int goya_sw_init(struct hl_device *hdev)
hdev->allow_external_soft_reset = true;
hdev->supports_wait_for_multi_cs = false;
- goya_set_pci_memory_regions(hdev);
+ hdev->asic_funcs->set_pci_memory_regions(hdev);
return 0;
@@ -5670,7 +5670,8 @@ static const struct hl_asic_funcs goya_funcs = {
.init_firmware_loader = goya_init_firmware_loader,
.init_cpu_scrambler_dram = goya_cpu_init_scrambler_dram,
.state_dump_init = goya_state_dump_init,
- .get_sob_addr = &goya_get_sob_addr
+ .get_sob_addr = &goya_get_sob_addr,
+ .set_pci_memory_regions = goya_set_pci_memory_regions,
};
/*
--
2.25.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH 2/3] habanalabs: clear msg_to_cpu_reg to avoid misread after reset
2021-07-22 11:16 [PATCH 1/3] habanalabs: make set_pci_regions asic function Oded Gabbay
@ 2021-07-22 11:16 ` Oded Gabbay
2021-07-22 11:16 ` [PATCH 3/3] habanalabs: add validity check for event ID received from F/W Oded Gabbay
1 sibling, 0 replies; 3+ messages in thread
From: Oded Gabbay @ 2021-07-22 11:16 UTC (permalink / raw)
To: linux-kernel; +Cc: Koby Elbaz
From: Koby Elbaz <kelbaz@habana.ai>
For some ASICs, the f/w reads the msg_to_cpu_reg value after
reset, and for some it doesn't.
Therefore, to be sure f/w doesn't read a wrong value after reset, we
need to clear this register before the reset occurs.
Signed-off-by: Koby Elbaz <kelbaz@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
drivers/misc/habanalabs/common/firmware_if.c | 28 +++++++++-----------
1 file changed, 12 insertions(+), 16 deletions(-)
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 0be3f5414f0b..c232d197b57a 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -667,17 +667,15 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev,
hdev->event_queue.check_eqe_index = false;
/* Read FW application security bits again */
- if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid) {
- hdev->asic_prop.fw_app_cpu_boot_dev_sts0 =
- RREG32(sts_boot_dev_sts0_reg);
- if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
+ if (prop->fw_cpu_boot_dev_sts0_valid) {
+ prop->fw_app_cpu_boot_dev_sts0 = RREG32(sts_boot_dev_sts0_reg);
+ if (prop->fw_app_cpu_boot_dev_sts0 &
CPU_BOOT_DEV_STS0_EQ_INDEX_EN)
hdev->event_queue.check_eqe_index = true;
}
- if (hdev->asic_prop.fw_cpu_boot_dev_sts1_valid)
- hdev->asic_prop.fw_app_cpu_boot_dev_sts1 =
- RREG32(sts_boot_dev_sts1_reg);
+ if (prop->fw_cpu_boot_dev_sts1_valid)
+ prop->fw_app_cpu_boot_dev_sts1 = RREG32(sts_boot_dev_sts1_reg);
out:
hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
@@ -1012,6 +1010,11 @@ void hl_fw_ask_halt_machine_without_linux(struct hl_device *hdev)
} else {
WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_GOTO_WFE);
msleep(static_loader->cpu_reset_wait_msec);
+
+ /* Must clear this register in order to prevent preboot
+ * from reading WFE after reboot
+ */
+ WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_NA);
}
hdev->device_cpu_is_halted = true;
@@ -1242,11 +1245,6 @@ static void hl_fw_preboot_update_state(struct hl_device *hdev)
* b. Check whether hard reset is done by boot cpu
* 3. FW application - a. Fetch fw application security status
* b. Check whether hard reset is done by fw app
- *
- * Preboot:
- * Check security status bit (CPU_BOOT_DEV_STS0_ENABLED). If set, then-
- * check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN)
- * If set, then mark GIC controller to be disabled.
*/
prop->hard_reset_done_by_fw =
!!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN);
@@ -2126,8 +2124,7 @@ static void hl_fw_linux_update_state(struct hl_device *hdev,
/* Read FW application security bits */
if (prop->fw_cpu_boot_dev_sts0_valid) {
- prop->fw_app_cpu_boot_dev_sts0 =
- RREG32(cpu_boot_dev_sts0_reg);
+ prop->fw_app_cpu_boot_dev_sts0 = RREG32(cpu_boot_dev_sts0_reg);
if (prop->fw_app_cpu_boot_dev_sts0 &
CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
@@ -2147,8 +2144,7 @@ static void hl_fw_linux_update_state(struct hl_device *hdev,
}
if (prop->fw_cpu_boot_dev_sts1_valid) {
- prop->fw_app_cpu_boot_dev_sts1 =
- RREG32(cpu_boot_dev_sts1_reg);
+ prop->fw_app_cpu_boot_dev_sts1 = RREG32(cpu_boot_dev_sts1_reg);
dev_dbg(hdev->dev,
"Firmware application CPU status1 %#x\n",
--
2.25.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH 3/3] habanalabs: add validity check for event ID received from F/W
2021-07-22 11:16 [PATCH 1/3] habanalabs: make set_pci_regions asic function Oded Gabbay
2021-07-22 11:16 ` [PATCH 2/3] habanalabs: clear msg_to_cpu_reg to avoid misread after reset Oded Gabbay
@ 2021-07-22 11:16 ` Oded Gabbay
1 sibling, 0 replies; 3+ messages in thread
From: Oded Gabbay @ 2021-07-22 11:16 UTC (permalink / raw)
To: linux-kernel; +Cc: Ofir Bitton
From: Ofir Bitton <obitton@habana.ai>
Currently there is no validity check for event ID received from F/W,
Thus exposing driver to memory overrun.
Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
drivers/misc/habanalabs/gaudi/gaudi.c | 6 ++++++
drivers/misc/habanalabs/goya/goya.c | 6 ++++++
2 files changed, 12 insertions(+)
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 0cb21440b5ff..655278e98618 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7962,6 +7962,12 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
u8 cause;
int rc;
+ if (event_type >= GAUDI_EVENT_SIZE) {
+ dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
+ event_type, GAUDI_EVENT_SIZE - 1);
+ return;
+ }
+
gaudi->events_stat[event_type]++;
gaudi->events_stat_aggregate[event_type]++;
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 9d66bdda8789..d54c700c31cd 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -4805,6 +4805,12 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
>> EQ_CTL_EVENT_TYPE_SHIFT);
struct goya_device *goya = hdev->asic_specific;
+ if (event_type >= GOYA_ASYNC_EVENT_ID_SIZE) {
+ dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
+ event_type, GOYA_ASYNC_EVENT_ID_SIZE - 1);
+ return;
+ }
+
goya->events_stat[event_type]++;
goya->events_stat_aggregate[event_type]++;
--
2.25.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2021-07-22 11:16 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-22 11:16 [PATCH 1/3] habanalabs: make set_pci_regions asic function Oded Gabbay
2021-07-22 11:16 ` [PATCH 2/3] habanalabs: clear msg_to_cpu_reg to avoid misread after reset Oded Gabbay
2021-07-22 11:16 ` [PATCH 3/3] habanalabs: add validity check for event ID received from F/W Oded Gabbay
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).