All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/7] habanalabs: missing mutex_unlock in process kill procedure
@ 2021-07-11 12:54 Oded Gabbay
  2021-07-11 12:54 ` [PATCH 2/7] habanalabs: rename cb_mmap to mmap Oded Gabbay
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: Oded Gabbay @ 2021-07-11 12:54 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ofir Bitton

From: Ofir Bitton <obitton@habana.ai>

missing mutex unlock once driver is giving up killing user processes.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 86426052a191..28fe15a28361 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -822,6 +822,7 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout)
 		} else {
 			dev_warn(hdev->dev,
 				"Can't get task struct for PID so giving up on killing process\n");
+			mutex_unlock(&hdev->fpriv_list_lock);
 			return -ETIME;
 		}
 	}
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/7] habanalabs: rename cb_mmap to mmap
  2021-07-11 12:54 [PATCH 1/7] habanalabs: missing mutex_unlock in process kill procedure Oded Gabbay
@ 2021-07-11 12:54 ` Oded Gabbay
  2021-07-11 12:54 ` [PATCH 3/7] habanalabs: fix nullifying of destroyed mmu pgt pool Oded Gabbay
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Oded Gabbay @ 2021-07-11 12:54 UTC (permalink / raw)
  To: linux-kernel; +Cc: Zvika Yehudai

From: Zvika Yehudai <zyehudai@habana.ai>

This function will be used for more mmap operations than just
mmaping CBs.

Signed-off-by: Zvika Yehudai <zyehudai@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/command_buffer.c | 2 +-
 drivers/misc/habanalabs/common/habanalabs.h     | 4 ++--
 drivers/misc/habanalabs/gaudi/gaudi.c           | 4 ++--
 drivers/misc/habanalabs/goya/goya.c             | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c
index 719168c980a4..58afefcd74f3 100644
--- a/drivers/misc/habanalabs/common/command_buffer.c
+++ b/drivers/misc/habanalabs/common/command_buffer.c
@@ -552,7 +552,7 @@ int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
 
 	vma->vm_private_data = cb;
 
-	rc = hdev->asic_funcs->cb_mmap(hdev, vma, cb->kernel_address,
+	rc = hdev->asic_funcs->mmap(hdev, vma, cb->kernel_address,
 					cb->bus_address, cb->size);
 	if (rc) {
 		spin_lock(&cb->lock);
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 9aedea471ebe..6affad6bc1b9 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1024,7 +1024,7 @@ struct fw_load_mgr {
  *                hw_fini and before CS rollback.
  * @suspend: handles IP specific H/W or SW changes for suspend.
  * @resume: handles IP specific H/W or SW changes for resume.
- * @cb_mmap: maps a CB.
+ * @mmap: maps a memory.
  * @ring_doorbell: increment PI on a given QMAN.
  * @pqe_write: Write the PQ entry to the PQ. This is ASIC-specific
  *             function because the PQs are located in different memory areas
@@ -1143,7 +1143,7 @@ struct hl_asic_funcs {
 	void (*halt_engines)(struct hl_device *hdev, bool hard_reset);
 	int (*suspend)(struct hl_device *hdev);
 	int (*resume)(struct hl_device *hdev);
-	int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
+	int (*mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
 			void *cpu_addr, dma_addr_t dma_addr, size_t size);
 	void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
 	void (*pqe_write)(struct hl_device *hdev, __le64 *pqe,
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 7dd36d1cb39e..14d0f6d9a383 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -4299,7 +4299,7 @@ static int gaudi_resume(struct hl_device *hdev)
 	return gaudi_init_iatu(hdev);
 }
 
-static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
+static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
 			void *cpu_addr, dma_addr_t dma_addr, size_t size)
 {
 	int rc;
@@ -9313,7 +9313,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.halt_engines = gaudi_halt_engines,
 	.suspend = gaudi_suspend,
 	.resume = gaudi_resume,
-	.cb_mmap = gaudi_cb_mmap,
+	.mmap = gaudi_mmap,
 	.ring_doorbell = gaudi_ring_doorbell,
 	.pqe_write = gaudi_pqe_write,
 	.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 4144a8445eef..017c913f7b5a 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -2798,7 +2798,7 @@ int goya_resume(struct hl_device *hdev)
 	return goya_init_iatu(hdev);
 }
 
-static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
+static int goya_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
 			void *cpu_addr, dma_addr_t dma_addr, size_t size)
 {
 	int rc;
@@ -5584,7 +5584,7 @@ static const struct hl_asic_funcs goya_funcs = {
 	.halt_engines = goya_halt_engines,
 	.suspend = goya_suspend,
 	.resume = goya_resume,
-	.cb_mmap = goya_cb_mmap,
+	.mmap = goya_mmap,
 	.ring_doorbell = goya_ring_doorbell,
 	.pqe_write = goya_pqe_write,
 	.asic_dma_alloc_coherent = goya_dma_alloc_coherent,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/7] habanalabs: fix nullifying of destroyed mmu pgt pool
  2021-07-11 12:54 [PATCH 1/7] habanalabs: missing mutex_unlock in process kill procedure Oded Gabbay
  2021-07-11 12:54 ` [PATCH 2/7] habanalabs: rename cb_mmap to mmap Oded Gabbay
@ 2021-07-11 12:54 ` Oded Gabbay
  2021-07-11 12:54 ` [PATCH 4/7] habanalabs: mark linux image as not loaded after hw_fini Oded Gabbay
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Oded Gabbay @ 2021-07-11 12:54 UTC (permalink / raw)
  To: linux-kernel; +Cc: Tomer Tayar

From: Tomer Tayar <ttayar@habana.ai>

In case of host-resident MMU, when the page tables pool is destroyed,
its pointer is not nullified correctly.
As a result, on a device fini which happens after a failing reset, the
already destroyed pool is accessed, which leads to a kernel panic.
The patch fixes the setting of the pool pointer to NULL.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/mmu/mmu_v1.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/misc/habanalabs/common/mmu/mmu_v1.c b/drivers/misc/habanalabs/common/mmu/mmu_v1.c
index c5e93ff32586..0f536f79dd9c 100644
--- a/drivers/misc/habanalabs/common/mmu/mmu_v1.c
+++ b/drivers/misc/habanalabs/common/mmu/mmu_v1.c
@@ -470,13 +470,13 @@ static void hl_mmu_v1_fini(struct hl_device *hdev)
 	if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.hr.mmu_shadow_hop0)) {
 		kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
 		gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
-	}
 
-	/* Make sure that if we arrive here again without init was called we
-	 * won't cause kernel panic. This can happen for example if we fail
-	 * during hard reset code at certain points
-	 */
-	hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL;
+		/* Make sure that if we arrive here again without init was
+		 * called we won't cause kernel panic. This can happen for
+		 * example if we fail during hard reset code at certain points
+		 */
+		hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL;
+	}
 }
 
 /**
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 4/7] habanalabs: mark linux image as not loaded after hw_fini
  2021-07-11 12:54 [PATCH 1/7] habanalabs: missing mutex_unlock in process kill procedure Oded Gabbay
  2021-07-11 12:54 ` [PATCH 2/7] habanalabs: rename cb_mmap to mmap Oded Gabbay
  2021-07-11 12:54 ` [PATCH 3/7] habanalabs: fix nullifying of destroyed mmu pgt pool Oded Gabbay
@ 2021-07-11 12:54 ` Oded Gabbay
  2021-07-11 12:54 ` [PATCH 5/7] habanalabs: fix type of variable Oded Gabbay
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Oded Gabbay @ 2021-07-11 12:54 UTC (permalink / raw)
  To: linux-kernel; +Cc: Tomer Tayar

From: Tomer Tayar <ttayar@habana.ai>

If hard reset fails after the call to hw_fini and before loading the
linux image to the device, a subsequent call to hw_fini should
communicate via COMMS (or MSG_TO_CPU regs for old FW versions).
However, the driver still tries in this case to communicate via the GIC,
and thus no hard reset is actually done.
To avoid that, the patch clears the linux_loaded flag after every call
to hw_fini.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 28fe15a28361..4fcd24e5a609 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -1075,9 +1075,12 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 	hdev->asic_funcs->hw_fini(hdev, hard_reset);
 
 	if (hard_reset) {
+		hdev->fw_loader.linux_loaded = false;
+
 		/* Release kernel context */
 		if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1)
 			hdev->kernel_ctx = NULL;
+
 		hl_vm_fini(hdev);
 		hl_mmu_fini(hdev);
 		hl_eq_reset(hdev, &hdev->event_queue);
@@ -1611,6 +1614,8 @@ void hl_device_fini(struct hl_device *hdev)
 	/* Reset the H/W. It will be in idle state after this returns */
 	hdev->asic_funcs->hw_fini(hdev, true);
 
+	hdev->fw_loader.linux_loaded = false;
+
 	/* Release kernel context */
 	if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
 		dev_err(hdev->dev, "kernel ctx is still alive\n");
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 5/7] habanalabs: fix type of variable
  2021-07-11 12:54 [PATCH 1/7] habanalabs: missing mutex_unlock in process kill procedure Oded Gabbay
                   ` (2 preceding siblings ...)
  2021-07-11 12:54 ` [PATCH 4/7] habanalabs: mark linux image as not loaded after hw_fini Oded Gabbay
@ 2021-07-11 12:54 ` Oded Gabbay
  2021-07-11 12:54 ` [PATCH 6/7] habanalabs: add asic property of host dma offset Oded Gabbay
  2021-07-11 12:54 ` [PATCH 7/7] habanalabs: set dma max segment size Oded Gabbay
  5 siblings, 0 replies; 7+ messages in thread
From: Oded Gabbay @ 2021-07-11 12:54 UTC (permalink / raw)
  To: linux-kernel

Recently, the size parameter in userptr structure was change to u64.
As a result, we need to change the type of the local range_size
in device_va_to_pa() to u64 to avoid overflow.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/debugfs.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index 51744e42b808..2c587af28f9b 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -491,11 +491,10 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size,
 	struct hl_vm_phys_pg_pack *phys_pg_pack;
 	struct hl_ctx *ctx = hdev->compute_ctx;
 	struct hl_vm_hash_node *hnode;
+	u64 end_address, range_size;
 	struct hl_userptr *userptr;
 	enum vm_type *vm_type;
 	bool valid = false;
-	u64 end_address;
-	u32 range_size;
 	int i, rc = 0;
 
 	if (!ctx) {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 6/7] habanalabs: add asic property of host dma offset
  2021-07-11 12:54 [PATCH 1/7] habanalabs: missing mutex_unlock in process kill procedure Oded Gabbay
                   ` (3 preceding siblings ...)
  2021-07-11 12:54 ` [PATCH 5/7] habanalabs: fix type of variable Oded Gabbay
@ 2021-07-11 12:54 ` Oded Gabbay
  2021-07-11 12:54 ` [PATCH 7/7] habanalabs: set dma max segment size Oded Gabbay
  5 siblings, 0 replies; 7+ messages in thread
From: Oded Gabbay @ 2021-07-11 12:54 UTC (permalink / raw)
  To: linux-kernel

Each ASIC can have a different offset to add to a host dma address,
to enable the ASIC to access that host memory.

The usage for this can be common code so add this to the asic
property structure.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs.h | 3 +++
 drivers/misc/habanalabs/gaudi/gaudi.c       | 1 +
 drivers/misc/habanalabs/goya/goya.c         | 1 +
 3 files changed, 5 insertions(+)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 6affad6bc1b9..9782bb50931a 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -434,6 +434,8 @@ struct hl_hints_range {
  *                  the device's MMU.
  * @dram_hints_align_mask: dram va hint addresses alignment mask which is used
  *                  for hints validity check.
+ * device_dma_offset_for_host_access: the offset to add to host DMA addresses
+ *                                    to enable the device to access them.
  * @mmu_pgt_size: MMU page tables total size.
  * @mmu_pte_size: PTE size in MMU page tables.
  * @mmu_hop_table_size: MMU hop table size.
@@ -527,6 +529,7 @@ struct asic_fixed_properties {
 	u64				cb_va_start_addr;
 	u64				cb_va_end_addr;
 	u64				dram_hints_align_mask;
+	u64				device_dma_offset_for_host_access;
 	u32				mmu_pgt_size;
 	u32				mmu_pte_size;
 	u32				mmu_hop_table_size;
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 14d0f6d9a383..fdbe8155ef3c 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -537,6 +537,7 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
 						get_collective_mode(hdev, i);
 	}
 
+	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
 	prop->collective_first_sob = 0;
 	prop->collective_first_mon = 0;
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 017c913f7b5a..ae9871928369 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -389,6 +389,7 @@ int goya_set_fixed_properties(struct hl_device *hdev)
 		prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_USER;
 	}
 
+	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
 
 	prop->dram_base_address = DRAM_PHYS_BASE;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 7/7] habanalabs: set dma max segment size
  2021-07-11 12:54 [PATCH 1/7] habanalabs: missing mutex_unlock in process kill procedure Oded Gabbay
                   ` (4 preceding siblings ...)
  2021-07-11 12:54 ` [PATCH 6/7] habanalabs: add asic property of host dma offset Oded Gabbay
@ 2021-07-11 12:54 ` Oded Gabbay
  5 siblings, 0 replies; 7+ messages in thread
From: Oded Gabbay @ 2021-07-11 12:54 UTC (permalink / raw)
  To: linux-kernel

This is required from any device that is capable to perform DMA.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/pci/pci.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/misc/habanalabs/common/pci/pci.c b/drivers/misc/habanalabs/common/pci/pci.c
index d5bedf5ba011..0b5366cc84fd 100644
--- a/drivers/misc/habanalabs/common/pci/pci.c
+++ b/drivers/misc/habanalabs/common/pci/pci.c
@@ -436,6 +436,8 @@ int hl_pci_init(struct hl_device *hdev)
 		goto unmap_pci_bars;
 	}
 
+	dma_set_max_seg_size(&pdev->dev, U32_MAX);
+
 	return 0;
 
 unmap_pci_bars:
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-07-11 12:54 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-11 12:54 [PATCH 1/7] habanalabs: missing mutex_unlock in process kill procedure Oded Gabbay
2021-07-11 12:54 ` [PATCH 2/7] habanalabs: rename cb_mmap to mmap Oded Gabbay
2021-07-11 12:54 ` [PATCH 3/7] habanalabs: fix nullifying of destroyed mmu pgt pool Oded Gabbay
2021-07-11 12:54 ` [PATCH 4/7] habanalabs: mark linux image as not loaded after hw_fini Oded Gabbay
2021-07-11 12:54 ` [PATCH 5/7] habanalabs: fix type of variable Oded Gabbay
2021-07-11 12:54 ` [PATCH 6/7] habanalabs: add asic property of host dma offset Oded Gabbay
2021-07-11 12:54 ` [PATCH 7/7] habanalabs: set dma max segment size Oded Gabbay

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.