linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/7] habanalabs: save f/w preboot major version
@ 2022-03-24  9:33 Oded Gabbay
  2022-03-24  9:33 ` [PATCH 2/7] habanalabs: unified memory manager infrastructure Oded Gabbay
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: Oded Gabbay @ 2022-03-24  9:33 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ofir Bitton

From: Ofir Bitton <obitton@habana.ai>

We need this property for doing backward compatibility hacks against
the f/w.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 30 +++++++++++++-------
 drivers/misc/habanalabs/common/habanalabs.h  |  2 ++
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 2665919dbbdd..42dce28ca815 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -1911,7 +1911,7 @@ static int hl_fw_dynamic_request_descriptor(struct hl_device *hdev,
  * @fwc: the firmware component
  * @fw_version: fw component's version string
  */
-static void hl_fw_dynamic_read_device_fw_version(struct hl_device *hdev,
+static int hl_fw_dynamic_read_device_fw_version(struct hl_device *hdev,
 					enum hl_fw_component fwc,
 					const char *fw_version)
 {
@@ -1935,23 +1935,33 @@ static void hl_fw_dynamic_read_device_fw_version(struct hl_device *hdev,
 						VERSION_MAX_LEN);
 		if (preboot_ver && preboot_ver != prop->preboot_ver) {
 			strscpy(btl_ver, prop->preboot_ver,
-				min((int) (preboot_ver - prop->preboot_ver),
-									31));
+				min((int) (preboot_ver - prop->preboot_ver), 31));
 			dev_info(hdev->dev, "%s\n", btl_ver);
 		}
 
 		preboot_ver = extract_fw_ver_from_str(prop->preboot_ver);
 		if (preboot_ver) {
-			dev_info(hdev->dev, "preboot version %s\n",
-								preboot_ver);
+			char major[8];
+			int rc;
+
+			dev_info(hdev->dev, "preboot version %s\n", preboot_ver);
+			sprintf(major, "%.2s", preboot_ver);
 			kfree(preboot_ver);
+
+			rc = kstrtou32(major, 10, &hdev->fw_major_version);
+			if (rc) {
+				dev_err(hdev->dev, "Error %d parsing preboot major version\n", rc);
+				return rc;
+			}
 		}
 
 		break;
 	default:
 		dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
-		return;
+		return -EINVAL;
 	}
+
+	return 0;
 }
 
 /**
@@ -2123,9 +2133,10 @@ static int hl_fw_dynamic_load_image(struct hl_device *hdev,
 		goto release_fw;
 
 	/* read preboot version */
-	hl_fw_dynamic_read_device_fw_version(hdev, cur_fwc,
+	rc = hl_fw_dynamic_read_device_fw_version(hdev, cur_fwc,
 				fw_loader->dynamic_loader.comm_desc.cur_fw_ver);
-
+	if (rc)
+		goto release_fw;
 
 	/* update state according to boot stage */
 	if (cur_fwc == FW_COMP_BOOT_FIT) {
@@ -2392,9 +2403,8 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
 			goto protocol_err;
 
 		/* read preboot version */
-		hl_fw_dynamic_read_device_fw_version(hdev, FW_COMP_PREBOOT,
+		return hl_fw_dynamic_read_device_fw_version(hdev, FW_COMP_PREBOOT,
 				fw_loader->dynamic_loader.comm_desc.cur_fw_ver);
-		return 0;
 	}
 
 	/* load boot fit to FW */
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 3e7012f7b1a3..9c70d95883a2 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2647,6 +2647,7 @@ struct hl_reset_info {
  * @reset_info: holds current device reset information.
  * @last_error: holds information about last session in which CS timeout or razwi error occurred.
  * @stream_master_qid_arr: pointer to array with QIDs of master streams.
+ * @fw_major_version: major version of current loaded preboot
  * @dram_used_mem: current DRAM memory consumption.
  * @timeout_jiffies: device CS timeout value.
  * @max_power: the max power of the device, as configured by the sysadmin. This
@@ -2785,6 +2786,7 @@ struct hl_device {
 	struct hl_reset_info		reset_info;
 
 	u32				*stream_master_qid_arr;
+	u32				fw_major_version;
 	atomic64_t			dram_used_mem;
 	u64				timeout_jiffies;
 	u64				max_power;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/7] habanalabs: unified memory manager infrastructure
  2022-03-24  9:33 [PATCH 1/7] habanalabs: save f/w preboot major version Oded Gabbay
@ 2022-03-24  9:33 ` Oded Gabbay
  2022-03-24  9:33 ` [PATCH 3/7] habanalabs: convert ts to use unified memory manager Oded Gabbay
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Oded Gabbay @ 2022-03-24  9:33 UTC (permalink / raw)
  To: linux-kernel; +Cc: Yuri Nudelman

From: Yuri Nudelman <ynudelman@habana.ai>

This is a part of overall refactoring attempt to separate nic and the
core drivers.
Currently, there are 4 different flows, that contain very similar code.
These are the ts, nic, hwblocks and cb alloc/map flows. The similar
aspect of all these flows is that they all contain a central store, with
memory buffers inside, supporting the following set of operations:

- Allocate buffer and return handle
- Get buffer from the store with handle
- Put the buffer (last put releases the buffer)
- Map the buffer to the user

This patch contains a generic data structure used to implement the above
memory buffer store interface. Conversion of the existing code to use
the new data structure will follow.

Signed-off-by: Yuri Nudelman <ynudelman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/Makefile     |   2 +-
 drivers/misc/habanalabs/common/habanalabs.h |  51 ++++
 drivers/misc/habanalabs/common/memory_mgr.c | 281 ++++++++++++++++++++
 3 files changed, 333 insertions(+), 1 deletion(-)
 create mode 100644 drivers/misc/habanalabs/common/memory_mgr.c

diff --git a/drivers/misc/habanalabs/common/Makefile b/drivers/misc/habanalabs/common/Makefile
index 6ebe3c7001ff..934a3a4aedc9 100644
--- a/drivers/misc/habanalabs/common/Makefile
+++ b/drivers/misc/habanalabs/common/Makefile
@@ -11,4 +11,4 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
 		common/command_buffer.o common/hw_queue.o common/irq.o \
 		common/sysfs.o common/hwmon.o common/memory.o \
 		common/command_submission.o common/firmware_if.o \
-		common/state_dump.o
+		common/state_dump.o common/memory_mgr.o
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 9c70d95883a2..1859ace28783 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -741,6 +741,57 @@ struct hl_ts_buff {
 	u32			user_buff_size;
 };
 
+struct hl_mmap_mem_buf;
+
+/**
+ * struct hl_mem_mgr - describes unified memory manager for mappable memory chunks.
+ * @dev: back pointer to the owning device
+ * @lock: protects handles
+ * @handles: an idr holding all active handles to the memory buffers in the system.
+ */
+struct hl_mem_mgr {
+	struct device *dev;
+	spinlock_t lock;
+	struct idr handles;
+};
+
+/**
+ * struct hl_mmap_mem_buf_ops - describes unified memory manager buffer behavior
+ * @alloc: callback executed on buffer allocation, shall allocate the memory,
+ *         set it under buffer private, and set mappable size.
+ * @mmap: callback executed on mmap, must map the buffer to vma
+ * @release: callback executed on release, must free the resources used by the buffer
+ */
+struct hl_mmap_mem_buf_ops {
+	int (*alloc)(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args);
+	int (*mmap)(struct hl_mmap_mem_buf *buf, struct vm_area_struct *vma, void *args);
+	void (*release)(struct hl_mmap_mem_buf *buf);
+};
+
+/**
+ * struct hl_mmap_mem_buf_ops - describes a single unified memory buffer
+ * @ops: buffer behavior
+ * @mmg: back pointer to the unified memory manager
+ * @refcount: reference counter for buffer users
+ * @private: pointer to buffer behavior private data
+ * @mmap: atomic boolean indicating whether or not the buffer is mapped right now
+ * @real_mapped_size: the actual size of buffer mapped, after part of it may be released,
+ *                   may change at runtime.
+ * @mappable_size: the original mappable size of the buffer, does not change after
+ *                 the allocation.
+ * @handle: the buffer id in mmg handles store
+ */
+struct hl_mmap_mem_buf {
+	struct hl_mmap_mem_buf_ops *ops;
+	struct hl_mem_mgr *mmg;
+	struct kref refcount;
+	void *private;
+	atomic_t mmap;
+	u64 real_mapped_size;
+	u64 mappable_size;
+	u32 handle;
+};
+
 /**
  * struct hl_cb - describes a Command Buffer.
  * @refcount: reference counter for usage of the CB.
diff --git a/drivers/misc/habanalabs/common/memory_mgr.c b/drivers/misc/habanalabs/common/memory_mgr.c
new file mode 100644
index 000000000000..3828ba44c660
--- /dev/null
+++ b/drivers/misc/habanalabs/common/memory_mgr.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2022 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+
+/**
+ * hl_mmap_mem_buf_get - increase the buffer refcount and return a pointer to
+ *                        the buffer descriptor.
+ *
+ * @mmg: parent unifed memory manager
+ * @handle: requested buffer handle
+ *
+ * @return Find the buffer in the store and return a pointer to its descriptor.
+ *         Increase buffer refcount. If not found - return NULL.
+ */
+struct hl_mmap_mem_buf *hl_mmap_mem_buf_get(struct hl_mem_mgr *mmg, u32 handle)
+{
+	struct hl_mmap_mem_buf *buf;
+
+	spin_lock(&mmg->lock);
+	buf = idr_find(&mmg->handles, handle);
+	if (!buf) {
+		spin_unlock(&mmg->lock);
+		dev_warn(mmg->dev,
+			 "Buff get failed, no match to handle %u\n", handle);
+		return NULL;
+	}
+	kref_get(&buf->refcount);
+	spin_unlock(&mmg->lock);
+	return buf;
+}
+
+/**
+ * @hl_mmap_mem_buf_release - release buffer
+ *
+ * @kref: kref that reached 0.
+ *
+ * Internal function, used as a kref release callback, when the last user of
+ * the buffer is released. Shall be called from an interrupt context.
+ */
+static void hl_mmap_mem_buf_release(struct kref *kref)
+{
+	struct hl_mmap_mem_buf *buf =
+		container_of(kref, struct hl_mmap_mem_buf, refcount);
+
+	spin_lock(&buf->mmg->lock);
+	idr_remove(&buf->mmg->handles, buf->handle);
+	spin_unlock(&buf->mmg->lock);
+
+	if (buf->ops->release)
+		buf->ops->release(buf);
+
+	kfree(buf);
+}
+
+/**
+ * @hl_mmap_mem_buf_put - decrease the reference to the buffer
+ *
+ * @buf: memory manager buffer descriptor
+ *
+ * Decrease the reference to the buffer, and release it if it was the last one.
+ * Shall be called from an interrupt context.
+ */
+int hl_mmap_mem_buf_put(struct hl_mmap_mem_buf *buf)
+{
+	return kref_put(&buf->refcount, hl_mmap_mem_buf_release);
+}
+
+/**
+ * @hl_mmap_mem_buf_alloc - allocate a new mappable buffer
+ *
+ * @mmg: parent unifed memory manager
+ * @behavior: behavior object describing this buffer polymorphic behavior
+ * @gfp: gfp flags to use for the memory allocations
+ * @args: additional args passed to behavior->alloc
+ *
+ * Allocate and register a new memory buffer inside the give memory manager.
+ * Return the pointer to the new buffer on success or NULL on failure.
+ */
+struct hl_mmap_mem_buf *
+hl_mmap_mem_buf_alloc(struct hl_mem_mgr *mmg,
+		      struct hl_mmap_mem_buf_ops *behavior, gfp_t gfp,
+		      void *args)
+{
+	struct hl_mmap_mem_buf *buf;
+	int rc;
+
+	buf = kzalloc(sizeof(*buf), gfp);
+	if (!buf)
+		return NULL;
+
+	spin_lock(&mmg->lock);
+	rc = idr_alloc(&mmg->handles, buf, 1, 0, GFP_ATOMIC);
+	spin_unlock(&mmg->lock);
+	if (rc < 0) {
+		dev_err(mmg->dev,
+			"Failed to allocate IDR for a new buffer, rc=%d\n", rc);
+		goto free_buf;
+	}
+
+	buf->handle = rc;
+	buf->mmg = mmg;
+	buf->ops = behavior;
+	kref_init(&buf->refcount);
+
+	rc = buf->ops->alloc(buf, gfp, args);
+	if (rc) {
+		dev_err(mmg->dev, "Failure in buffer alloc callback %d\n",
+			rc);
+		goto remove_idr;
+	}
+
+	dev_dbg(mmg->dev, "Created buff object handle %u\n", buf->handle);
+	return buf;
+
+remove_idr:
+	spin_lock(&mmg->lock);
+	idr_remove(&mmg->handles, buf->handle);
+	spin_unlock(&mmg->lock);
+free_buf:
+	kfree(buf);
+	return NULL;
+}
+
+/**
+ * @hl_mmap_mem_buf_vm_close - handle mmap close
+ *
+ * @vma: the vma object for which mmap was closed.
+ *
+ * Put the memory buffer if it is no longer mapped.
+ */
+static void hl_mmap_mem_buf_vm_close(struct vm_area_struct *vma)
+{
+	struct hl_mmap_mem_buf *buf =
+		(struct hl_mmap_mem_buf *)vma->vm_private_data;
+	long new_mmap_size;
+
+	new_mmap_size = buf->real_mapped_size - (vma->vm_end - vma->vm_start);
+
+	if (new_mmap_size > 0) {
+		buf->real_mapped_size = new_mmap_size;
+		return;
+	}
+
+	atomic_set(&buf->mmap, 0);
+	hl_mmap_mem_buf_put(buf);
+	vma->vm_private_data = NULL;
+}
+
+static const struct vm_operations_struct hl_mmap_mem_buf_vm_ops = {
+	.close = hl_mmap_mem_buf_vm_close
+};
+
+/**
+ * @hl_mem_mgr_mmap - map the given buffer to the user
+ *
+ * @mmg: unifed memory manager
+ * @vma: the vma object for which mmap was closed.
+ * @args: additional args passed to behavior->mmap
+ *
+ * Map the buffer specified by the vma->vm_pgoff to the given vma.
+ */
+int hl_mem_mgr_mmap(struct hl_mem_mgr *mmg, struct vm_area_struct *vma,
+		    void *args)
+{
+	struct hl_mmap_mem_buf *buf;
+	u64 user_mem_size;
+	u32 handle;
+	int rc;
+
+	/* We use the page offset to hold the idr and thus we need to clear
+	 * it before doing the mmap itself
+	 */
+	handle = vma->vm_pgoff;
+	vma->vm_pgoff = 0;
+
+	/* Reference was taken here */
+	buf = hl_mmap_mem_buf_get(mmg, handle);
+	if (!buf) {
+		dev_err(mmg->dev,
+			"Memory mmap failed, no match to handle %u\n", handle);
+		return -EINVAL;
+	}
+
+	/* Validation check */
+	user_mem_size = vma->vm_end - vma->vm_start;
+	if (user_mem_size != ALIGN(buf->mappable_size, PAGE_SIZE)) {
+		dev_err(mmg->dev,
+			"Memory mmap failed, mmap VM size 0x%llx != 0x%llx allocated physical mem size\n",
+			user_mem_size, buf->mappable_size);
+		rc = -EINVAL;
+		goto put_mem;
+	}
+
+#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK
+	if (!access_ok(VERIFY_WRITE, (void __user *)(uintptr_t)vma->vm_start,
+		       user_mem_size)) {
+#else
+	if (!access_ok((void __user *)(uintptr_t)vma->vm_start,
+		       user_mem_size)) {
+#endif
+		dev_err(mmg->dev, "user pointer is invalid - 0x%lx\n",
+			vma->vm_start);
+
+		rc = -EINVAL;
+		goto put_mem;
+	}
+
+	if (atomic_cmpxchg(&buf->mmap, 0, 1)) {
+		dev_err(mmg->dev,
+			"Memory mmap failed, already mmaped to user\n");
+		rc = -EINVAL;
+		goto put_mem;
+	}
+
+	vma->vm_ops = &hl_mmap_mem_buf_vm_ops;
+
+	/* Note: We're transferring the memory reference to vma->vm_private_data here. */
+
+	vma->vm_private_data = buf;
+
+	rc = buf->ops->mmap(buf, vma, args);
+	if (rc) {
+		atomic_set(&buf->mmap, 0);
+		goto put_mem;
+	}
+
+	buf->real_mapped_size = buf->mappable_size;
+	vma->vm_pgoff = handle;
+
+	return 0;
+
+put_mem:
+	hl_mmap_mem_buf_put(buf);
+	return rc;
+}
+
+/**
+ * @hl_mem_mgr_init - initialize unified memory manager
+ *
+ * @dev: owner device pointer
+ * @mmg: structure to initialize
+ *
+ * Initialize an instance of unified memory manager
+ */
+void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg)
+{
+	mmg->dev = dev;
+	spin_lock_init(&mmg->lock);
+	idr_init(&mmg->handles);
+}
+
+/**
+ * @hl_mem_mgr_fini - release unified memory manager
+ *
+ * @mmg: parent unifed memory manager
+ *
+ * Release the unified memory manager. Shall be called from an interrupt context.
+ */
+void hl_mem_mgr_fini(struct hl_mem_mgr *mmg)
+{
+	struct hl_mmap_mem_buf *buf;
+	struct idr *idp;
+	u32 id;
+
+	idp = &mmg->handles;
+
+	idr_for_each_entry(idp, buf, id) {
+		if (hl_mmap_mem_buf_put(buf) != 1)
+			dev_err(mmg->dev,
+				"Buff handle %u for CTX is still alive\n", id);
+	}
+
+	/* TODO: can it happen that some buffer is still in use at this point? */
+
+	idr_destroy(&mmg->handles);
+}
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/7] habanalabs: convert ts to use unified memory manager
  2022-03-24  9:33 [PATCH 1/7] habanalabs: save f/w preboot major version Oded Gabbay
  2022-03-24  9:33 ` [PATCH 2/7] habanalabs: unified memory manager infrastructure Oded Gabbay
@ 2022-03-24  9:33 ` Oded Gabbay
  2022-03-24  9:33 ` [PATCH 4/7] habanalabs: add MMU prefetch to ASIC-specific code Oded Gabbay
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Oded Gabbay @ 2022-03-24  9:33 UTC (permalink / raw)
  To: linux-kernel; +Cc: Yuri Nudelman

From: Yuri Nudelman <ynudelman@habana.ai>

With the introduction of the unified memory manager infrastructure, the
timestamp buffers can be converted to use it.

Signed-off-by: Yuri Nudelman <ynudelman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../habanalabs/common/command_submission.c    |  44 ++--
 drivers/misc/habanalabs/common/device.c       |   4 +-
 drivers/misc/habanalabs/common/habanalabs.h   |  43 ++--
 .../misc/habanalabs/common/habanalabs_drv.c   |   4 +-
 drivers/misc/habanalabs/common/irq.c          |  10 +-
 drivers/misc/habanalabs/common/memory.c       | 221 +++---------------
 6 files changed, 91 insertions(+), 235 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 32051f05a05a..23370aa67bcd 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -957,9 +957,9 @@ wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
 
 	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
 	list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) {
-		if (pend->ts_reg_info.ts_buff) {
+		if (pend->ts_reg_info.buf) {
 			list_del(&pend->wait_list_node);
-			hl_ts_put(pend->ts_reg_info.ts_buff);
+			hl_mmap_mem_buf_put(pend->ts_reg_info.buf);
 			hl_cb_put(pend->ts_reg_info.cq_cb);
 		} else {
 			pend->fence.error = -EIO;
@@ -2867,12 +2867,13 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 	return 0;
 }
 
-static int ts_buff_get_kernel_ts_record(struct hl_ts_buff *ts_buff,
+static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf,
 					struct hl_cb *cq_cb,
 					u64 ts_offset, u64 cq_offset, u64 target_value,
 					spinlock_t *wait_list_lock,
 					struct hl_user_pending_interrupt **pend)
 {
+	struct hl_ts_buff *ts_buff = buf->private;
 	struct hl_user_pending_interrupt *requested_offset_record =
 				(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
 				ts_offset;
@@ -2884,7 +2885,7 @@ static int ts_buff_get_kernel_ts_record(struct hl_ts_buff *ts_buff,
 
 	/* Validate ts_offset not exceeding last max */
 	if (requested_offset_record > cb_last) {
-		dev_err(ts_buff->hdev->dev, "Ts offset exceeds max CB offset(0x%llx)\n",
+		dev_err(buf->mmg->dev, "Ts offset exceeds max CB offset(0x%llx)\n",
 								(u64)(uintptr_t)cb_last);
 		return -EINVAL;
 	}
@@ -2903,18 +2904,21 @@ static int ts_buff_get_kernel_ts_record(struct hl_ts_buff *ts_buff,
 			list_del(&requested_offset_record->wait_list_node);
 			spin_unlock_irqrestore(wait_list_lock, flags);
 
-			hl_ts_put(requested_offset_record->ts_reg_info.ts_buff);
+			hl_mmap_mem_buf_put(requested_offset_record->ts_reg_info.buf);
 			hl_cb_put(requested_offset_record->ts_reg_info.cq_cb);
 
-			dev_dbg(ts_buff->hdev->dev, "ts node removed from interrupt list now can re-use\n");
+			dev_dbg(buf->mmg->dev,
+				"ts node removed from interrupt list now can re-use\n");
 		} else {
-			dev_dbg(ts_buff->hdev->dev, "ts node in middle of irq handling\n");
+			dev_dbg(buf->mmg->dev,
+				"ts node in middle of irq handling\n");
 
 			/* irq handling in the middle give it time to finish */
 			spin_unlock_irqrestore(wait_list_lock, flags);
 			usleep_range(1, 10);
 			if (++iter_counter == MAX_TS_ITER_NUM) {
-				dev_err(ts_buff->hdev->dev, "handling registration interrupt took too long!!\n");
+				dev_err(buf->mmg->dev,
+					"handling registration interrupt took too long!!\n");
 				return -EINVAL;
 			}
 
@@ -2926,7 +2930,7 @@ static int ts_buff_get_kernel_ts_record(struct hl_ts_buff *ts_buff,
 
 	/* Fill up the new registration node info */
 	requested_offset_record->ts_reg_info.in_use = 1;
-	requested_offset_record->ts_reg_info.ts_buff = ts_buff;
+	requested_offset_record->ts_reg_info.buf = buf;
 	requested_offset_record->ts_reg_info.cq_cb = cq_cb;
 	requested_offset_record->ts_reg_info.timestamp_kernel_addr =
 			(u64 *) ts_buff->user_buff_address + ts_offset;
@@ -2936,13 +2940,13 @@ static int ts_buff_get_kernel_ts_record(struct hl_ts_buff *ts_buff,
 
 	*pend = requested_offset_record;
 
-	dev_dbg(ts_buff->hdev->dev, "Found available node in TS kernel CB(0x%llx)\n",
+	dev_dbg(buf->mmg->dev, "Found available node in TS kernel CB(0x%llx)\n",
 						(u64)(uintptr_t)requested_offset_record);
 	return 0;
 }
 
 static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
-				struct hl_cb_mgr *cb_mgr, struct hl_ts_mgr *ts_mgr,
+				struct hl_cb_mgr *cb_mgr, struct hl_mem_mgr *mmg,
 				u64 timeout_us, u64 cq_counters_handle,	u64 cq_counters_offset,
 				u64 target_value, struct hl_user_interrupt *interrupt,
 				bool register_ts_record, u64 ts_handle, u64 ts_offset,
@@ -2950,7 +2954,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 {
 	u32 cq_patched_handle, ts_patched_handle;
 	struct hl_user_pending_interrupt *pend;
-	struct hl_ts_buff *ts_buff;
+	struct hl_mmap_mem_buf *buf;
 	struct hl_cb *cq_cb;
 	unsigned long timeout, flags;
 	long completion_rc;
@@ -2971,15 +2975,21 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 		dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n",
 					interrupt->interrupt_id, ts_offset, cq_counters_offset);
 
+		/* TODO:
+		 * See if this can be removed.
+		 * Embedding type in handle will no longer be needed as soon as we
+		 * switch to using a single memory manager for all memory types.
+		 * We may still need the page shift, though.
+		 */
 		ts_patched_handle = lower_32_bits(ts_handle >> PAGE_SHIFT);
-		ts_buff = hl_ts_get(hdev, ts_mgr, ts_patched_handle);
-		if (!ts_buff) {
+		buf = hl_mmap_mem_buf_get(mmg, ts_patched_handle);
+		if (!buf) {
 			rc = -EINVAL;
 			goto put_cq_cb;
 		}
 
 		/* Find first available record */
-		rc = ts_buff_get_kernel_ts_record(ts_buff, cq_cb, ts_offset,
+		rc = ts_buff_get_kernel_ts_record(buf, cq_cb, ts_offset,
 						cq_counters_offset, target_value,
 						&interrupt->wait_list_lock, &pend);
 		if (rc)
@@ -3086,7 +3096,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 	return rc;
 
 put_ts_buff:
-	hl_ts_put(ts_buff);
+	hl_mmap_mem_buf_put(buf);
 put_cq_cb:
 	hl_cb_put(cq_cb);
 put_ctx:
@@ -3248,7 +3258,7 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 		interrupt = &hdev->user_interrupt[interrupt_id - first_interrupt];
 
 	if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
-		rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr, &hpriv->ts_mem_mgr,
+		rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr, &hpriv->mem_mgr,
 				args->in.interrupt_timeout_us, args->in.cq_counters_handle,
 				args->in.cq_counters_offset,
 				args->in.target, interrupt,
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 3eb392b4308a..ef42646a9a3f 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -149,7 +149,7 @@ static int hl_device_release(struct inode *inode, struct file *filp)
 	hl_release_pending_user_interrupts(hpriv->hdev);
 
 	hl_cb_mgr_fini(hdev, &hpriv->cb_mgr);
-	hl_ts_mgr_fini(hpriv->hdev, &hpriv->ts_mem_mgr);
+	hl_mem_mgr_fini(&hpriv->mem_mgr);
 	hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
 
 	hdev->compute_ctx_in_release = 1;
@@ -218,7 +218,7 @@ static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
 		return hl_hw_block_mmap(hpriv, vma);
 
 	case HL_MMAP_TYPE_TS_BUFF:
-		return hl_ts_mmap(hpriv, vma);
+		return hl_mem_mgr_mmap(&hpriv->mem_mgr, vma, NULL);
 	}
 
 	return -EINVAL;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 1859ace28783..a6c6d9fd30db 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -719,26 +719,14 @@ struct hl_ts_mgr {
 
 /**
  * struct hl_ts_buff - describes a timestamp buffer.
- * @refcount: reference counter for usage of the buffer.
- * @hdev: pointer to device this buffer belongs to.
- * @mmap: true if the buff is currently mapped to user.
  * @kernel_buff_address: Holds the internal buffer's kernel virtual address.
  * @user_buff_address: Holds the user buffer's kernel virtual address.
- * @id: the buffer ID.
- * @mmap_size: Holds the buffer size that was mmaped.
  * @kernel_buff_size: Holds the internal kernel buffer size.
- * @user_buff_size: Holds the user buffer size.
  */
 struct hl_ts_buff {
-	struct kref		refcount;
-	struct hl_device	*hdev;
-	atomic_t		mmap;
 	void			*kernel_buff_address;
 	void			*user_buff_address;
-	u32			id;
-	u32			mmap_size;
 	u32			kernel_buff_size;
-	u32			user_buff_size;
 };
 
 struct hl_mmap_mem_buf;
@@ -973,12 +961,12 @@ struct hl_user_interrupt {
  * struct timestamp_reg_free_node - holds the timestamp registration free objects node
  * @free_objects_node: node in the list free_obj_jobs
  * @cq_cb: pointer to cq command buffer to be freed
- * @ts_buff: pointer to timestamp buffer to be freed
+ * @buf: pointer to timestamp buffer to be freed
  */
 struct timestamp_reg_free_node {
 	struct list_head	free_objects_node;
 	struct hl_cb		*cq_cb;
-	struct hl_ts_buff	*ts_buff;
+	struct hl_mmap_mem_buf	*buf;
 };
 
 /* struct timestamp_reg_work_obj - holds the timestamp registration free objects job
@@ -995,8 +983,8 @@ struct timestamp_reg_work_obj {
 };
 
 /* struct timestamp_reg_info - holds the timestamp registration related data.
- * @ts_buff: pointer to the timestamp buffer which include both user/kernel buffers.
- *           relevant only when doing timestamps records registration.
+ * @buf: pointer to the timestamp buffer which include both user/kernel buffers.
+ *       relevant only when doing timestamps records registration.
  * @cq_cb: pointer to CQ counter CB.
  * @timestamp_kernel_addr: timestamp handle address, where to set timestamp
  *                         relevant only when doing timestamps records
@@ -1007,7 +995,7 @@ struct timestamp_reg_work_obj {
  *          allocating records dynamically.
  */
 struct timestamp_reg_info {
-	struct hl_ts_buff	*ts_buff;
+	struct hl_mmap_mem_buf	*buf;
 	struct hl_cb		*cq_cb;
 	u64			*timestamp_kernel_addr;
 	u8			in_use;
@@ -1966,7 +1954,7 @@ struct hl_debug_params {
  * @ctx: current executing context. TODO: remove for multiple ctx per process
  * @ctx_mgr: context manager to handle multiple context for this FD.
  * @cb_mgr: command buffer manager to handle multiple buffers for this FD.
- * @ts_mem_mgr: timestamp registration manager for alloc/free/map timestamp buffers.
+ * @mem_mgr: manager descriptor for memory exportable via mmap
  * @debugfs_list: list of relevant ASIC debugfs.
  * @dev_node: node in the device list of file private data
  * @refcount: number of related contexts.
@@ -1979,7 +1967,7 @@ struct hl_fpriv {
 	struct hl_ctx		*ctx;
 	struct hl_ctx_mgr	ctx_mgr;
 	struct hl_cb_mgr	cb_mgr;
-	struct hl_ts_mgr	ts_mem_mgr;
+	struct hl_mem_mgr	mem_mgr;
 	struct list_head	debugfs_list;
 	struct list_head	dev_node;
 	struct kref		refcount;
@@ -3267,11 +3255,18 @@ __printf(4, 5) int hl_snprintf_resize(char **buf, size_t *size, size_t *offset,
 					const char *format, ...);
 char *hl_format_as_binary(char *buf, size_t buf_len, u32 n);
 const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type);
-void hl_ts_mgr_init(struct hl_ts_mgr *mgr);
-void hl_ts_mgr_fini(struct hl_device *hdev, struct hl_ts_mgr *mgr);
-int hl_ts_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
-struct hl_ts_buff *hl_ts_get(struct hl_device *hdev, struct hl_ts_mgr *mgr, u32 handle);
-void hl_ts_put(struct hl_ts_buff *buff);
+
+void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg);
+void hl_mem_mgr_fini(struct hl_mem_mgr *mmg);
+int hl_mem_mgr_mmap(struct hl_mem_mgr *mmg, struct vm_area_struct *vma,
+		    void *args);
+struct hl_mmap_mem_buf *hl_mmap_mem_buf_get(struct hl_mem_mgr *mmg,
+						   u32 handle);
+int hl_mmap_mem_buf_put(struct hl_mmap_mem_buf *buf);
+struct hl_mmap_mem_buf *
+hl_mmap_mem_buf_alloc(struct hl_mem_mgr *mmg,
+		      struct hl_mmap_mem_buf_ops *behavior, gfp_t gfp,
+		      void *args);
 
 #ifdef CONFIG_DEBUG_FS
 
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 95c000d7dc26..28549d34d99c 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -140,7 +140,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
 
 	hl_cb_mgr_init(&hpriv->cb_mgr);
 	hl_ctx_mgr_init(&hpriv->ctx_mgr);
-	hl_ts_mgr_init(&hpriv->ts_mem_mgr);
+	hl_mem_mgr_init(hpriv->hdev->dev, &hpriv->mem_mgr);
 
 	hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
 
@@ -198,7 +198,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
 out_err:
 	mutex_unlock(&hdev->fpriv_list_lock);
 	hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
-	hl_ts_mgr_fini(hpriv->hdev, &hpriv->ts_mem_mgr);
+	hl_mem_mgr_fini(&hpriv->mem_mgr);
 	hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
 	filp->private_data = NULL;
 	mutex_destroy(&hpriv->restore_phase_mutex);
diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c
index e2bc128f2291..5b5a992460c9 100644
--- a/drivers/misc/habanalabs/common/irq.c
+++ b/drivers/misc/habanalabs/common/irq.c
@@ -152,11 +152,11 @@ static void hl_ts_free_objects(struct work_struct *work)
 	struct hl_device *hdev = job->hdev;
 
 	list_for_each_entry_safe(free_obj, temp_free_obj, free_list_head, free_objects_node) {
-		dev_dbg(hdev->dev, "About to put refcount to ts_buff (%p) cq_cb(%p)\n",
-					free_obj->ts_buff,
+		dev_dbg(hdev->dev, "About to put refcount to buf (%p) cq_cb(%p)\n",
+					free_obj->buf,
 					free_obj->cq_cb);
 
-		hl_ts_put(free_obj->ts_buff);
+		hl_mmap_mem_buf_put(free_obj->buf);
 		hl_cb_put(free_obj->cq_cb);
 		kfree(free_obj);
 	}
@@ -210,7 +210,7 @@ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pendi
 	/* Putting the refcount for ts_buff and cq_cb objects will be handled
 	 * in workqueue context, just add job to free_list.
 	 */
-	free_node->ts_buff = pend->ts_reg_info.ts_buff;
+	free_node->buf = pend->ts_reg_info.buf;
 	free_node->cq_cb = pend->ts_reg_info.cq_cb;
 	list_add(&free_node->free_objects_node, *free_list);
 
@@ -244,7 +244,7 @@ static void handle_user_cq(struct hl_device *hdev,
 	list_for_each_entry_safe(pend, temp_pend, &user_cq->wait_list_head, wait_list_node) {
 		if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
 				!pend->cq_kernel_addr) {
-			if (pend->ts_reg_info.ts_buff) {
+			if (pend->ts_reg_info.buf) {
 				if (!reg_node_handle_fail) {
 					rc = handle_registration_node(hdev, pend,
 									&ts_reg_free_list_head);
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index 671633414504..a9bf1a5f4a86 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -2076,164 +2076,34 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
 	return rc;
 }
 
-static void ts_buff_release(struct kref *ref)
+static void ts_buff_release(struct hl_mmap_mem_buf *buf)
 {
-	struct hl_ts_buff *buff;
+	struct hl_ts_buff *ts_buff = buf->private;
 
-	buff = container_of(ref, struct hl_ts_buff, refcount);
-
-	vfree(buff->kernel_buff_address);
-	vfree(buff->user_buff_address);
-	kfree(buff);
-}
-
-struct hl_ts_buff *hl_ts_get(struct hl_device *hdev, struct hl_ts_mgr *mgr,
-					u32 handle)
-{
-	struct hl_ts_buff *buff;
-
-	spin_lock(&mgr->ts_lock);
-	buff = idr_find(&mgr->ts_handles, handle);
-	if (!buff) {
-		spin_unlock(&mgr->ts_lock);
-		dev_warn(hdev->dev,
-			"TS buff get failed, no match to handle 0x%x\n", handle);
-		return NULL;
-	}
-	kref_get(&buff->refcount);
-	spin_unlock(&mgr->ts_lock);
-
-	return buff;
-}
-
-void hl_ts_put(struct hl_ts_buff *buff)
-{
-	kref_put(&buff->refcount, ts_buff_release);
-}
-
-static void buff_vm_close(struct vm_area_struct *vma)
-{
-	struct hl_ts_buff *buff = (struct hl_ts_buff *) vma->vm_private_data;
-	long new_mmap_size;
-
-	new_mmap_size = buff->mmap_size - (vma->vm_end - vma->vm_start);
-
-	if (new_mmap_size > 0) {
-		buff->mmap_size = new_mmap_size;
-		return;
-	}
-
-	atomic_set(&buff->mmap, 0);
-	hl_ts_put(buff);
-	vma->vm_private_data = NULL;
+	vfree(ts_buff->kernel_buff_address);
+	vfree(ts_buff->user_buff_address);
+	kfree(ts_buff);
 }
 
-static const struct vm_operations_struct ts_buff_vm_ops = {
-	.close = buff_vm_close
-};
-
-int hl_ts_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
+static int hl_ts_mmap(struct hl_mmap_mem_buf *buf, struct vm_area_struct *vma, void *args)
 {
-	struct hl_device *hdev = hpriv->hdev;
-	struct hl_ts_buff *buff;
-	u32 handle, user_buff_size;
-	int rc;
-
-	/* We use the page offset to hold the idr and thus we need to clear
-	 * it before doing the mmap itself
-	 */
-	handle = vma->vm_pgoff;
-	vma->vm_pgoff = 0;
-
-	buff = hl_ts_get(hdev, &hpriv->ts_mem_mgr, handle);
-	if (!buff) {
-		dev_err(hdev->dev,
-			"TS buff mmap failed, no match to handle 0x%x\n", handle);
-		return -EINVAL;
-	}
-
-	/* Validation check */
-	user_buff_size = vma->vm_end - vma->vm_start;
-	if (user_buff_size != ALIGN(buff->user_buff_size, PAGE_SIZE)) {
-		dev_err(hdev->dev,
-			"TS buff mmap failed, mmap size 0x%x != 0x%x buff size\n",
-			user_buff_size, ALIGN(buff->user_buff_size, PAGE_SIZE));
-		rc = -EINVAL;
-		goto put_buff;
-	}
-
-#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK
-	if (!access_ok(VERIFY_WRITE,
-		(void __user *) (uintptr_t) vma->vm_start, user_buff_size)) {
-#else
-	if (!access_ok((void __user *) (uintptr_t) vma->vm_start,
-						user_buff_size)) {
-#endif
-		dev_err(hdev->dev,
-			"user pointer is invalid - 0x%lx\n",
-			vma->vm_start);
-
-		rc = -EINVAL;
-		goto put_buff;
-	}
-
-	if (atomic_cmpxchg(&buff->mmap, 0, 1)) {
-		dev_err(hdev->dev, "TS buff memory mmap failed, already mmaped to user\n");
-		rc = -EINVAL;
-		goto put_buff;
-	}
+	struct hl_ts_buff *ts_buff = buf->private;
 
-	vma->vm_ops = &ts_buff_vm_ops;
-	vma->vm_private_data = buff;
 	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE;
-	rc = remap_vmalloc_range(vma, buff->user_buff_address, 0);
-	if (rc) {
-		atomic_set(&buff->mmap, 0);
-		goto put_buff;
-	}
-
-	buff->mmap_size = buff->user_buff_size;
-	vma->vm_pgoff = handle;
-
-	return 0;
-
-put_buff:
-	hl_ts_put(buff);
-	return rc;
-}
-
-void hl_ts_mgr_init(struct hl_ts_mgr *mgr)
-{
-	spin_lock_init(&mgr->ts_lock);
-	idr_init(&mgr->ts_handles);
-}
-
-void hl_ts_mgr_fini(struct hl_device *hdev, struct hl_ts_mgr *mgr)
-{
-	struct hl_ts_buff *buff;
-	struct idr *idp;
-	u32 id;
-
-	idp = &mgr->ts_handles;
-
-	idr_for_each_entry(idp, buff, id) {
-		if (kref_put(&buff->refcount, ts_buff_release) != 1)
-			dev_err(hdev->dev, "TS buff handle %d for CTX is still alive\n",
-							id);
-	}
-
-	idr_destroy(&mgr->ts_handles);
+	return remap_vmalloc_range(vma, ts_buff->user_buff_address, 0);
 }
 
-static struct hl_ts_buff *hl_ts_alloc_buff(struct hl_device *hdev, u32 num_elements)
+static int hl_ts_alloc_buf(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args)
 {
 	struct hl_ts_buff *ts_buff = NULL;
-	u32 size;
+	u32 size, num_elements;
 	void *p;
 
+	num_elements = *(u32 *)args;
+
 	ts_buff = kzalloc(sizeof(*ts_buff), GFP_KERNEL);
 	if (!ts_buff)
-		return NULL;
+		return -ENOMEM;
 
 	/* Allocate the user buffer */
 	size = num_elements * sizeof(u64);
@@ -2242,7 +2112,7 @@ static struct hl_ts_buff *hl_ts_alloc_buff(struct hl_device *hdev, u32 num_eleme
 		goto free_mem;
 
 	ts_buff->user_buff_address = p;
-	ts_buff->user_buff_size = size;
+	buf->mappable_size = size;
 
 	/* Allocate the internal kernel buffer */
 	size = num_elements * sizeof(struct hl_user_pending_interrupt);
@@ -2253,15 +2123,23 @@ static struct hl_ts_buff *hl_ts_alloc_buff(struct hl_device *hdev, u32 num_eleme
 	ts_buff->kernel_buff_address = p;
 	ts_buff->kernel_buff_size = size;
 
-	return ts_buff;
+	buf->private = ts_buff;
+
+	return 0;
 
 free_user_buff:
 	vfree(ts_buff->user_buff_address);
 free_mem:
 	kfree(ts_buff);
-	return NULL;
+	return -ENOMEM;
 }
 
+struct hl_mmap_mem_buf_ops hl_ts_behavior = {
+	.mmap = hl_ts_mmap,
+	.alloc = hl_ts_alloc_buf,
+	.release = ts_buff_release,
+};
+
 /**
  * allocate_timestamps_buffers() - allocate timestamps buffers
  * This function will allocate ts buffer that will later on be mapped to the user
@@ -2278,54 +2156,27 @@ static struct hl_ts_buff *hl_ts_alloc_buff(struct hl_device *hdev, u32 num_eleme
  */
 static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in *args, u64 *handle)
 {
-	struct hl_ts_mgr *ts_mgr = &hpriv->ts_mem_mgr;
-	struct hl_device *hdev = hpriv->hdev;
-	struct hl_ts_buff *ts_buff;
-	int rc = 0;
+	struct hl_mem_mgr *mmg = &hpriv->mem_mgr;
+	struct hl_mmap_mem_buf *buf;
 
 	if (args->num_of_elements > TS_MAX_ELEMENTS_NUM) {
-		dev_err(hdev->dev, "Num of elements exceeds Max allowed number (0x%x > 0x%x)\n",
+		dev_err(mmg->dev, "Num of elements exceeds Max allowed number (0x%x > 0x%x)\n",
 				args->num_of_elements, TS_MAX_ELEMENTS_NUM);
 		return -EINVAL;
 	}
 
-	/* Allocate ts buffer object
-	 * This object will contain two buffers one that will be mapped to the user
-	 * and another internal buffer for the driver use only, which won't be mapped
-	 * to the user.
-	 */
-	ts_buff = hl_ts_alloc_buff(hdev, args->num_of_elements);
-	if (!ts_buff) {
-		rc = -ENOMEM;
-		goto out_err;
-	}
-
-	spin_lock(&ts_mgr->ts_lock);
-	rc = idr_alloc(&ts_mgr->ts_handles, ts_buff, 1, 0, GFP_ATOMIC);
-	spin_unlock(&ts_mgr->ts_lock);
-	if (rc < 0) {
-		dev_err(hdev->dev, "Failed to allocate IDR for a new ts buffer\n");
-		goto release_ts_buff;
-	}
-
-	ts_buff->id = rc;
-	ts_buff->hdev = hdev;
-
-	kref_init(&ts_buff->refcount);
-
-	/* idr is 32-bit so we can safely OR it with a mask that is above 32 bit */
-	*handle = (u64) ts_buff->id | HL_MMAP_TYPE_TS_BUFF;
-	*handle <<= PAGE_SHIFT;
+	buf = hl_mmap_mem_buf_alloc(mmg, &hl_ts_behavior, GFP_KERNEL, &args->num_of_elements);
+	if (!buf)
+		return -ENOMEM;
 
-	dev_dbg(hdev->dev, "Created ts buff object handle(%u)\n", ts_buff->id);
+	/* TODO:
+	 * Remove HL_MMAP_TYPE_TS_BUFF.
+	 * Embedding type in handle will no longer be needed as soon as we
+	 * switch to using a single memory manager for all memory types.
+	 */
+	*handle = ((u64)buf->handle | HL_MMAP_TYPE_TS_BUFF) << PAGE_SHIFT;
 
 	return 0;
-
-release_ts_buff:
-	kref_put(&ts_buff->refcount, ts_buff_release);
-out_err:
-	*handle = 0;
-	return rc;
 }
 
 int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 4/7] habanalabs: add MMU prefetch to ASIC-specific code
  2022-03-24  9:33 [PATCH 1/7] habanalabs: save f/w preboot major version Oded Gabbay
  2022-03-24  9:33 ` [PATCH 2/7] habanalabs: unified memory manager infrastructure Oded Gabbay
  2022-03-24  9:33 ` [PATCH 3/7] habanalabs: convert ts to use unified memory manager Oded Gabbay
@ 2022-03-24  9:33 ` Oded Gabbay
  2022-03-24  9:33 ` [PATCH 5/7] habanalabs/gaudi: Use correct sram size macro for debugfs Oded Gabbay
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Oded Gabbay @ 2022-03-24  9:33 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ohad Sharabi

From: Ohad Sharabi <osharabi@habana.ai>

This is necessary pre-requisite for future ASIC support, where MMU
TLB prefetch is supported.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs.h |  7 +++++--
 drivers/misc/habanalabs/common/memory.c     | 15 ++++++++++-----
 drivers/misc/habanalabs/common/mmu/mmu.c    | 11 +++++++++++
 drivers/misc/habanalabs/gaudi/gaudi.c       |  7 +++++++
 drivers/misc/habanalabs/goya/goya.c         |  7 +++++++
 5 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index a6c6d9fd30db..1f7758fed51e 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1283,8 +1283,8 @@ struct fw_load_mgr {
  * @write_pte: write MMU page table entry to DRAM.
  * @mmu_invalidate_cache: flush MMU STLB host/DRAM cache, either with soft
  *                        (L1 only) or hard (L0 & L1) flush.
- * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with
- *                              ASID-VA-size mask.
+ * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with ASID-VA-size mask.
+ * @mmu_prefetch_cache_range: pre-fetch specific MMU STLB cache lines with ASID-VA-size mask.
  * @send_heartbeat: send is-alive packet to CPU-CP and verify response.
  * @debug_coresight: perform certain actions on Coresight for debugging.
  * @is_device_idle: return true if device is idle, false otherwise.
@@ -1416,6 +1416,8 @@ struct hl_asic_funcs {
 					u32 flags);
 	int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
 				u32 flags, u32 asid, u64 va, u64 size);
+	int (*mmu_prefetch_cache_range)(struct hl_device *hdev, u32 flags, u32 asid, u64 va,
+				u64 size);
 	int (*send_heartbeat)(struct hl_device *hdev);
 	int (*debug_coresight)(struct hl_device *hdev, struct hl_ctx *ctx, void *data);
 	bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr,
@@ -3143,6 +3145,7 @@ int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size);
 int hl_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags);
 int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
 					u32 flags, u32 asid, u64 va, u64 size);
+int hl_mmu_prefetch_cache_range(struct hl_device *hdev, u32 flags, u32 asid, u64 va, u64 size);
 u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte);
 u64 hl_mmu_get_hop_pte_phys_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop,
 					u8 hop_idx, u64 hop_addr, u64 virt_addr);
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index a9bf1a5f4a86..5b09c15bff48 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -1241,20 +1241,23 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 
 	rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack);
 	if (rc) {
-		mutex_unlock(&ctx->mmu_lock);
-		dev_err(hdev->dev, "mapping page pack failed for handle %u\n",
-				handle);
+		dev_err(hdev->dev, "mapping page pack failed for handle %u\n", handle);
 		goto map_err;
 	}
 
 	rc = hl_mmu_invalidate_cache_range(hdev, false, *vm_type | MMU_OP_SKIP_LOW_CACHE_INV,
 				ctx->asid, ret_vaddr, phys_pg_pack->total_size);
+	if (rc)
+		goto map_err;
 
-	mutex_unlock(&ctx->mmu_lock);
-
+	/* already prefetch the relevant translations to the cache */
+	rc = hl_mmu_prefetch_cache_range(hdev, *vm_type, ctx->asid, ret_vaddr,
+						phys_pg_pack->total_size);
 	if (rc)
 		goto map_err;
 
+	mutex_unlock(&ctx->mmu_lock);
+
 	ret_vaddr += phys_pg_pack->offset;
 
 	hnode->ptr = vm_type;
@@ -1272,6 +1275,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 	return rc;
 
 map_err:
+	mutex_unlock(&ctx->mmu_lock);
+
 	if (add_va_block(hdev, va_range, ret_vaddr,
 				ret_vaddr + phys_pg_pack->total_size - 1))
 		dev_warn(hdev->dev,
diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c
index 27c9088bf347..dcd59d76da27 100644
--- a/drivers/misc/habanalabs/common/mmu/mmu.c
+++ b/drivers/misc/habanalabs/common/mmu/mmu.c
@@ -665,6 +665,17 @@ int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
 	return rc;
 }
 
+int hl_mmu_prefetch_cache_range(struct hl_device *hdev, u32 flags, u32 asid, u64 va, u64 size)
+{
+	int rc;
+
+	rc = hdev->asic_funcs->mmu_prefetch_cache_range(hdev, flags, asid, va, size);
+	if (rc)
+		dev_err_ratelimited(hdev->dev, "MMU cache range prefetch failed\n");
+
+	return rc;
+}
+
 u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
 {
 	return (curr_pte & PAGE_PRESENT_MASK) ? (curr_pte & HOP_PHYS_ADDR_MASK) : ULLONG_MAX;
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 66d56efb55ee..68c066e39660 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -9377,6 +9377,12 @@ static void gaudi_get_valid_dram_page_orders(struct hl_info_dev_memalloc_page_si
 	info->page_order_bitmask = 0;
 }
 
+static int gaudi_mmu_prefetch_cache_range(struct hl_device *hdev, u32 flags, u32 asid, u64 va,
+						u64 size)
+{
+	return 0;
+}
+
 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	struct hl_device *hdev = dev_get_drvdata(dev);
@@ -9444,6 +9450,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.write_pte = gaudi_write_pte,
 	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
 	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
+	.mmu_prefetch_cache_range = gaudi_mmu_prefetch_cache_range,
 	.send_heartbeat = gaudi_send_heartbeat,
 	.debug_coresight = gaudi_debug_coresight,
 	.is_device_idle = gaudi_is_device_idle,
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 0bc2b077d9f6..bc8431e4b50b 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -5680,6 +5680,12 @@ static void goya_get_valid_dram_page_orders(struct hl_info_dev_memalloc_page_siz
 	info->page_order_bitmask = 0;
 }
 
+static int goya_mmu_prefetch_cache_range(struct hl_device *hdev, u32 flags, u32 asid, u64 va,
+					u64 size)
+{
+	return 0;
+}
+
 static const struct hl_asic_funcs goya_funcs = {
 	.early_init = goya_early_init,
 	.early_fini = goya_early_fini,
@@ -5724,6 +5730,7 @@ static const struct hl_asic_funcs goya_funcs = {
 	.write_pte = goya_write_pte,
 	.mmu_invalidate_cache = goya_mmu_invalidate_cache,
 	.mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
+	.mmu_prefetch_cache_range = goya_mmu_prefetch_cache_range,
 	.send_heartbeat = goya_send_heartbeat,
 	.debug_coresight = goya_debug_coresight,
 	.is_device_idle = goya_is_device_idle,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 5/7] habanalabs/gaudi: Use correct sram size macro for debugfs
  2022-03-24  9:33 [PATCH 1/7] habanalabs: save f/w preboot major version Oded Gabbay
                   ` (2 preceding siblings ...)
  2022-03-24  9:33 ` [PATCH 4/7] habanalabs: add MMU prefetch to ASIC-specific code Oded Gabbay
@ 2022-03-24  9:33 ` Oded Gabbay
  2022-03-24  9:33 ` [PATCH 6/7] habanalabs: rephrase device out-of-memory message Oded Gabbay
  2022-03-24  9:33 ` [PATCH 7/7] habanalabs/gaudi: add debugfs to fetch internal sync status Oded Gabbay
  5 siblings, 0 replies; 7+ messages in thread
From: Oded Gabbay @ 2022-03-24  9:33 UTC (permalink / raw)
  To: linux-kernel; +Cc: Dafna Hirschfeld

From: Dafna Hirschfeld <dhirschfeld@habana.ai>

We currently allow accessing the whole SRAM bar size with
the macro SRAM_BAR_SIZE, but the actual size of the sram
region is the macro SRAM_SIZE which is only a portion of
the whole bar size. So when accessing the sram through
debugfs, use the macro SRAM_SIZE for the sram size
which is the correct macro.

Signed-off-by: Dafna Hirschfeld <dhirschfeld@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 68c066e39660..2101abf1d092 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -6115,7 +6115,7 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
 
 		*val = RREG32(addr - CFG_BASE);
 
-	} else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
+	} else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
 
 		*val = readl(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
 
@@ -6158,7 +6158,7 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
 
 		WREG32(addr - CFG_BASE, val);
 
-	} else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
+	} else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
 
 		writel(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
 
@@ -6205,7 +6205,7 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
 		*val = (((u64) val_h) << 32) | val_l;
 
 	} else if ((addr >= SRAM_BASE_ADDR) &&
-			(addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
+			(addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
 
 		*val = readq(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
 
@@ -6250,7 +6250,7 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
 		WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));
 
 	} else if ((addr >= SRAM_BASE_ADDR) &&
-			(addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
+			(addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
 
 		writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 6/7] habanalabs: rephrase device out-of-memory message
  2022-03-24  9:33 [PATCH 1/7] habanalabs: save f/w preboot major version Oded Gabbay
                   ` (3 preceding siblings ...)
  2022-03-24  9:33 ` [PATCH 5/7] habanalabs/gaudi: Use correct sram size macro for debugfs Oded Gabbay
@ 2022-03-24  9:33 ` Oded Gabbay
  2022-03-24  9:33 ` [PATCH 7/7] habanalabs/gaudi: add debugfs to fetch internal sync status Oded Gabbay
  5 siblings, 0 replies; 7+ messages in thread
From: Oded Gabbay @ 2022-03-24  9:33 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ohad Sharabi

From: Ohad Sharabi <osharabi@habana.ai>

The out of memory message is rephrased to more subtle expression as out
of memory may be caused by the user in case of, for example, greedy
allocation.

In addition the user is also being notified by an error code.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/memory.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index 5b09c15bff48..6bf5c4da99fb 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -117,7 +117,7 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
 			paddr = (u64) (uintptr_t) gen_pool_alloc(vm->dram_pg_pool, total_size);
 		if (!paddr) {
 			dev_err(hdev->dev,
-				"failed to allocate %llu contiguous pages with total size of %llu\n",
+				"Cannot allocate %llu contiguous pages with total size of %llu\n",
 				num_pgs, total_size);
 			return -ENOMEM;
 		}
@@ -158,7 +158,7 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
 										page_size);
 			if (!phys_pg_pack->pages[i]) {
 				dev_err(hdev->dev,
-					"Failed to allocate device memory (out of memory)\n");
+					"Cannot allocate device memory (out of memory)\n");
 				rc = -ENOMEM;
 				goto page_err;
 			}
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 7/7] habanalabs/gaudi: add debugfs to fetch internal sync status
  2022-03-24  9:33 [PATCH 1/7] habanalabs: save f/w preboot major version Oded Gabbay
                   ` (4 preceding siblings ...)
  2022-03-24  9:33 ` [PATCH 6/7] habanalabs: rephrase device out-of-memory message Oded Gabbay
@ 2022-03-24  9:33 ` Oded Gabbay
  5 siblings, 0 replies; 7+ messages in thread
From: Oded Gabbay @ 2022-03-24  9:33 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ohad Sharabi

From: Ohad Sharabi <osharabi@habana.ai>

When Gaudi device is secured the monitors data in the configuration
space is blocked from PCI access.
As we need to enable user to get sync-manager monitors registers when
debugging, this patch adds a debugfs that dumps the information to a
binary file (blob).
When a root user will trigger the dump, the driver will send request to
the f/w to fill a data structure containing dump of all monitors
registers.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../ABI/testing/debugfs-driver-habanalabs     | 24 +++++
 drivers/misc/habanalabs/common/debugfs.c      | 87 ++++++++++++++++---
 drivers/misc/habanalabs/common/firmware_if.c  | 47 ++++++++++
 drivers/misc/habanalabs/common/habanalabs.h   | 13 ++-
 drivers/misc/habanalabs/gaudi/gaudi.c         | 11 +++
 drivers/misc/habanalabs/goya/goya.c           |  6 ++
 .../misc/habanalabs/include/common/cpucp_if.h | 38 ++++++++
 7 files changed, 210 insertions(+), 16 deletions(-)

diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs
index bcf6915987e4..84bf3da2bb27 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -190,6 +190,30 @@ Description:    Check and display page fault or access violation mmu errors for
                 echo "0x200" > /sys/kernel/debug/habanalabs/hl0/mmu_error
                 cat /sys/kernel/debug/habanalabs/hl0/mmu_error
 
+What:           /sys/kernel/debug/habanalabs/hl<n>/monitor_dump
+Date:           Mar 2022
+KernelVersion:  5.19
+Contact:        osharabi@habana.ai
+Description:    Allows the root user to dump monitors status from the device's
+                protected config space.
+                This property is a binary blob that contains the result of the
+                monitors registers dump.
+                This custom interface is needed (instead of using the generic
+                Linux user-space PCI mapping) because this space is protected
+                and cannot be accessed using PCI read.
+                This interface doesn't support concurrency in the same device.
+                Only supported on GAUDI.
+
+What:           /sys/kernel/debug/habanalabs/hl<n>/monitor_dump_trig
+Date:           Mar 2022
+KernelVersion:  5.19
+Contact:        osharabi@habana.ai
+Description:    Triggers dump of monitor data. The value to trigger the operation
+                must be 1. Triggering the monitor dump operation initiates dump of
+                current registers values of all monitors.
+                When the write is finished, the user can read the "monitor_dump"
+                blob
+
 What:           /sys/kernel/debug/habanalabs/hl<n>/set_power_state
 Date:           Jan 2019
 KernelVersion:  5.1
diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index f18495545854..30c637eaf59b 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -829,23 +829,67 @@ static ssize_t hl_dma_size_write(struct file *f, const char __user *buf,
 	}
 
 	/* Free the previous allocation, if there was any */
-	entry->blob_desc.size = 0;
-	vfree(entry->blob_desc.data);
+	entry->data_dma_blob_desc.size = 0;
+	vfree(entry->data_dma_blob_desc.data);
 
-	entry->blob_desc.data = vmalloc(size);
-	if (!entry->blob_desc.data)
+	entry->data_dma_blob_desc.data = vmalloc(size);
+	if (!entry->data_dma_blob_desc.data)
 		return -ENOMEM;
 
 	rc = hdev->asic_funcs->debugfs_read_dma(hdev, addr, size,
-						entry->blob_desc.data);
+						entry->data_dma_blob_desc.data);
 	if (rc) {
 		dev_err(hdev->dev, "Failed to DMA from 0x%010llx\n", addr);
-		vfree(entry->blob_desc.data);
-		entry->blob_desc.data = NULL;
+		vfree(entry->data_dma_blob_desc.data);
+		entry->data_dma_blob_desc.data = NULL;
 		return -EIO;
 	}
 
-	entry->blob_desc.size = size;
+	entry->data_dma_blob_desc.size = size;
+
+	return count;
+}
+
+static ssize_t hl_monitor_dump_trigger(struct file *f, const char __user *buf,
+					size_t count, loff_t *ppos)
+{
+	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+	struct hl_device *hdev = entry->hdev;
+	u32 size, trig;
+	ssize_t rc;
+
+	if (hdev->reset_info.in_reset) {
+		dev_warn_ratelimited(hdev->dev, "Can't dump monitors during reset\n");
+		return 0;
+	}
+	rc = kstrtouint_from_user(buf, count, 10, &trig);
+	if (rc)
+		return rc;
+
+	if (trig != 1) {
+		dev_err(hdev->dev, "Must write 1 to trigger monitor dump\n");
+		return -EINVAL;
+	}
+
+	size = sizeof(struct cpucp_monitor_dump);
+
+	/* Free the previous allocation, if there was any */
+	entry->mon_dump_blob_desc.size = 0;
+	vfree(entry->mon_dump_blob_desc.data);
+
+	entry->mon_dump_blob_desc.data = vmalloc(size);
+	if (!entry->mon_dump_blob_desc.data)
+		return -ENOMEM;
+
+	rc = hdev->asic_funcs->get_monitor_dump(hdev, entry->mon_dump_blob_desc.data);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to dump monitors\n");
+		vfree(entry->mon_dump_blob_desc.data);
+		entry->mon_dump_blob_desc.data = NULL;
+		return -EIO;
+	}
+
+	entry->mon_dump_blob_desc.size = size;
 
 	return count;
 }
@@ -1235,6 +1279,11 @@ static const struct file_operations hl_dma_size_fops = {
 	.write = hl_dma_size_write
 };
 
+static const struct file_operations hl_monitor_dump_fops = {
+	.owner = THIS_MODULE,
+	.write = hl_monitor_dump_trigger
+};
+
 static const struct file_operations hl_i2c_data_fops = {
 	.owner = THIS_MODULE,
 	.read = hl_i2c_data_read,
@@ -1350,8 +1399,10 @@ void hl_debugfs_add_device(struct hl_device *hdev)
 	if (!dev_entry->entry_arr)
 		return;
 
-	dev_entry->blob_desc.size = 0;
-	dev_entry->blob_desc.data = NULL;
+	dev_entry->data_dma_blob_desc.size = 0;
+	dev_entry->data_dma_blob_desc.data = NULL;
+	dev_entry->mon_dump_blob_desc.size = 0;
+	dev_entry->mon_dump_blob_desc.data = NULL;
 
 	INIT_LIST_HEAD(&dev_entry->file_list);
 	INIT_LIST_HEAD(&dev_entry->cb_list);
@@ -1470,7 +1521,18 @@ void hl_debugfs_add_device(struct hl_device *hdev)
 	debugfs_create_blob("data_dma",
 				0400,
 				dev_entry->root,
-				&dev_entry->blob_desc);
+				&dev_entry->data_dma_blob_desc);
+
+	debugfs_create_file("monitor_dump_trig",
+				0200,
+				dev_entry->root,
+				dev_entry,
+				&hl_monitor_dump_fops);
+
+	debugfs_create_blob("monitor_dump",
+				0400,
+				dev_entry->root,
+				&dev_entry->mon_dump_blob_desc);
 
 	debugfs_create_x8("skip_reset_on_timeout",
 				0644,
@@ -1509,7 +1571,8 @@ void hl_debugfs_remove_device(struct hl_device *hdev)
 
 	mutex_destroy(&entry->file_mutex);
 
-	vfree(entry->blob_desc.data);
+	vfree(entry->data_dma_blob_desc.data);
+	vfree(entry->mon_dump_blob_desc.data);
 
 	for (i = 0; i < ARRAY_SIZE(entry->state_dump); ++i)
 		vfree(entry->state_dump[i]);
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 42dce28ca815..13e09183df18 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -821,6 +821,53 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
 	return rc;
 }
 
+int hl_fw_get_monitor_dump(struct hl_device *hdev, void *data)
+{
+	struct cpucp_monitor_dump *mon_dump_cpu_addr;
+	dma_addr_t mon_dump_dma_addr;
+	struct cpucp_packet pkt = {};
+	u32 *src_ptr, *dst_ptr;
+	size_t data_size;
+	u64 result;
+	int i, rc;
+
+	data_size = sizeof(struct cpucp_monitor_dump);
+	mon_dump_cpu_addr = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, data_size,
+										&mon_dump_dma_addr);
+	if (!mon_dump_cpu_addr) {
+		dev_err(hdev->dev,
+			"Failed to allocate DMA memory for CPU-CP monitor-dump packet\n");
+		return -ENOMEM;
+	}
+
+	memset(mon_dump_cpu_addr, 0, data_size);
+
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_MONITOR_DUMP_GET << CPUCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.addr = cpu_to_le64(mon_dump_dma_addr);
+	pkt.data_max_size = cpu_to_le32(data_size);
+
+	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+							HL_CPUCP_MON_DUMP_TIMEOUT_USEC, &result);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to handle CPU-CP monitor-dump packet, error %d\n", rc);
+		goto out;
+	}
+
+	/* result contains the actual size */
+	src_ptr = (u32 *)mon_dump_cpu_addr;
+	dst_ptr = data;
+	for (i = 0; i < (data_size / sizeof(u32)); i++) {
+		*dst_ptr = le32_to_cpu(*src_ptr);
+		src_ptr++;
+		dst_ptr++;
+	}
+
+out:
+	hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, data_size, mon_dump_cpu_addr);
+
+	return rc;
+}
+
 int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
 		struct hl_info_pci_counters *counters)
 {
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 1f7758fed51e..ece83b264b97 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -61,6 +61,7 @@
 
 #define HL_CPUCP_INFO_TIMEOUT_USEC	10000000 /* 10s */
 #define HL_CPUCP_EEPROM_TIMEOUT_USEC	10000000 /* 10s */
+#define HL_CPUCP_MON_DUMP_TIMEOUT_USEC	10000000 /* 10s */
 
 #define HL_FW_STATUS_POLL_INTERVAL_USEC		10000 /* 10ms */
 
@@ -1293,6 +1294,7 @@ struct fw_load_mgr {
  * @hw_queues_unlock: release H/W queues lock.
  * @get_pci_id: retrieve PCI ID.
  * @get_eeprom_data: retrieve EEPROM data from F/W.
+ * @get_monitor_dump: retrieve monitor registers dump from F/W.
  * @send_cpu_message: send message to F/W. If the message is timedout, the
  *                    driver will eventually reset the device. The timeout can
  *                    be determined by the calling function or it can be 0 and
@@ -1426,8 +1428,8 @@ struct hl_asic_funcs {
 	void (*hw_queues_lock)(struct hl_device *hdev);
 	void (*hw_queues_unlock)(struct hl_device *hdev);
 	u32 (*get_pci_id)(struct hl_device *hdev);
-	int (*get_eeprom_data)(struct hl_device *hdev, void *data,
-				size_t max_size);
+	int (*get_eeprom_data)(struct hl_device *hdev, void *data, size_t max_size);
+	int (*get_monitor_dump)(struct hl_device *hdev, void *data);
 	int (*send_cpu_message)(struct hl_device *hdev, u32 *msg,
 				u16 len, u32 timeout, u64 *result);
 	int (*pci_bars_map)(struct hl_device *hdev);
@@ -2021,7 +2023,8 @@ struct hl_debugfs_entry {
  * @userptr_spinlock: protects userptr_list.
  * @ctx_mem_hash_list: list of available contexts with MMU mappings.
  * @ctx_mem_hash_spinlock: protects cb_list.
- * @blob_desc: descriptor of blob
+ * @data_dma_blob_desc: data DMA descriptor of blob.
+ * @mon_dump_blob_desc: monitor dump descriptor of blob.
  * @state_dump: data of the system states in case of a bad cs.
  * @state_dump_sem: protects state_dump.
  * @addr: next address to read/write from/to in read/write32.
@@ -2050,7 +2053,8 @@ struct hl_dbg_device_entry {
 	spinlock_t			userptr_spinlock;
 	struct list_head		ctx_mem_hash_list;
 	spinlock_t			ctx_mem_hash_spinlock;
-	struct debugfs_blob_wrapper	blob_desc;
+	struct debugfs_blob_wrapper	data_dma_blob_desc;
+	struct debugfs_blob_wrapper	mon_dump_blob_desc;
 	char				*state_dump[HL_STATE_DUMP_HIST_LEN];
 	struct rw_semaphore		state_dump_sem;
 	u64				addr;
@@ -3183,6 +3187,7 @@ int hl_fw_cpucp_handshake(struct hl_device *hdev,
 				u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
 				u32 boot_err1_reg);
 int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
+int hl_fw_get_monitor_dump(struct hl_device *hdev, void *data);
 int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
 		struct hl_info_pci_counters *counters);
 int hl_fw_cpucp_total_energy_get(struct hl_device *hdev,
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 2101abf1d092..fdcdf47087c8 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -8500,6 +8500,16 @@ static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
 	return hl_fw_get_eeprom_data(hdev, data, max_size);
 }
 
+static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
+{
+	struct gaudi_device *gaudi = hdev->asic_specific;
+
+	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
+		return 0;
+
+	return hl_fw_get_monitor_dump(hdev, data);
+}
+
 /*
  * this function should be used only during initialization and/or after reset,
  * when there are no active users.
@@ -9459,6 +9469,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.hw_queues_unlock = gaudi_hw_queues_unlock,
 	.get_pci_id = gaudi_get_pci_id,
 	.get_eeprom_data = gaudi_get_eeprom_data,
+	.get_monitor_dump = gaudi_get_monitor_dump,
 	.send_cpu_message = gaudi_send_cpu_message,
 	.pci_bars_map = gaudi_pci_bars_map,
 	.init_iatu = gaudi_init_iatu,
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index bc8431e4b50b..36b3cf57aaae 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -5680,6 +5680,11 @@ static void goya_get_valid_dram_page_orders(struct hl_info_dev_memalloc_page_siz
 	info->page_order_bitmask = 0;
 }
 
+static int goya_get_monitor_dump(struct hl_device *hdev, void *data)
+{
+	return -EOPNOTSUPP;
+}
+
 static int goya_mmu_prefetch_cache_range(struct hl_device *hdev, u32 flags, u32 asid, u64 va,
 					u64 size)
 {
@@ -5739,6 +5744,7 @@ static const struct hl_asic_funcs goya_funcs = {
 	.hw_queues_unlock = goya_hw_queues_unlock,
 	.get_pci_id = goya_get_pci_id,
 	.get_eeprom_data = goya_get_eeprom_data,
+	.get_monitor_dump = goya_get_monitor_dump,
 	.send_cpu_message = goya_send_cpu_message,
 	.pci_bars_map = goya_pci_bars_map,
 	.init_iatu = goya_init_iatu,
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h
index f00db22f98fb..645fda75ed27 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -389,6 +389,14 @@ enum pq_init_status {
  *
  * CPUCP_PACKET_ENGINE_CORE_ASID_SET -
  *       Packet to perform engine core ASID configuration
+ *
+ * CPUCP_PACKET_MONITOR_DUMP_GET -
+ *       Get monitors registers dump from the CpuCP kernel.
+ *       The CPU will put the registers dump in the a buffer allocated by the driver
+ *       which address is passed via the CpuCp packet. In addition, the host's driver
+ *       passes the max size it allows the CpuCP to write to the structure, to prevent
+ *       data corruption in case of mismatched driver/FW versions.
+ *       Relevant only to Gaudi.
  */
 
 enum cpucp_packet_id {
@@ -439,6 +447,11 @@ enum cpucp_packet_id {
 	CPUCP_PACKET_POWER_SET,			/* internal */
 	CPUCP_PACKET_RESERVED,			/* not used */
 	CPUCP_PACKET_ENGINE_CORE_ASID_SET,	/* internal */
+	CPUCP_PACKET_RESERVED2,			/* not used */
+	CPUCP_PACKET_RESERVED3,			/* not used */
+	CPUCP_PACKET_RESERVED4,			/* not used */
+	CPUCP_PACKET_RESERVED5,			/* not used */
+	CPUCP_PACKET_MONITOR_DUMP_GET,		/* debugfs */
 };
 
 #define CPUCP_PACKET_FENCE_VAL	0xFE8CE7A5
@@ -889,4 +902,29 @@ struct cpucp_hbm_row_replaced_rows_info {
 	struct cpucp_hbm_row_info replaced_rows[CPUCP_HBM_ROW_REPLACE_MAX];
 };
 
+/*
+ * struct dcore_monitor_regs_data - DCORE monitor regs data.
+ * the structure follows sync manager block layout. relevant only to Gaudi.
+ * @mon_pay_addrl: array of payload address low bits.
+ * @mon_pay_addrh: array of payload address high bits.
+ * @mon_pay_data: array of payload data.
+ * @mon_arm: array of monitor arm.
+ * @mon_status: array of monitor status.
+ */
+struct dcore_monitor_regs_data {
+	__le32 mon_pay_addrl[512];
+	__le32 mon_pay_addrh[512];
+	__le32 mon_pay_data[512];
+	__le32 mon_arm[512];
+	__le32 mon_status[512];
+};
+
+/* contains SM data for each SYNC_MNGR (relevant only to Gaudi) */
+struct cpucp_monitor_dump {
+	struct dcore_monitor_regs_data sync_mngr_w_s;
+	struct dcore_monitor_regs_data sync_mngr_e_s;
+	struct dcore_monitor_regs_data sync_mngr_w_n;
+	struct dcore_monitor_regs_data sync_mngr_e_n;
+};
+
 #endif /* CPUCP_IF_H */
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2022-03-24  9:34 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-24  9:33 [PATCH 1/7] habanalabs: save f/w preboot major version Oded Gabbay
2022-03-24  9:33 ` [PATCH 2/7] habanalabs: unified memory manager infrastructure Oded Gabbay
2022-03-24  9:33 ` [PATCH 3/7] habanalabs: convert ts to use unified memory manager Oded Gabbay
2022-03-24  9:33 ` [PATCH 4/7] habanalabs: add MMU prefetch to ASIC-specific code Oded Gabbay
2022-03-24  9:33 ` [PATCH 5/7] habanalabs/gaudi: Use correct sram size macro for debugfs Oded Gabbay
2022-03-24  9:33 ` [PATCH 6/7] habanalabs: rephrase device out-of-memory message Oded Gabbay
2022-03-24  9:33 ` [PATCH 7/7] habanalabs/gaudi: add debugfs to fetch internal sync status Oded Gabbay

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).