linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/4] habanalabs: support HW blocks vm show
@ 2021-02-25 16:19 Oded Gabbay
  2021-02-25 16:19 ` [PATCH 2/4] habanalabs: return current power via INFO IOCTL Oded Gabbay
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Oded Gabbay @ 2021-02-25 16:19 UTC (permalink / raw)
  To: linux-kernel; +Cc: Sagiv Ozeri

From: Sagiv Ozeri <sozeri@habana.ai>

Improve "vm" debugfs node to print also the virtual addresses which are
currently mapped to HW blocks in the device.

Signed-off-by: Sagiv Ozeri <sozeri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../ABI/testing/debugfs-driver-habanalabs     |  2 +-
 drivers/misc/habanalabs/common/context.c      | 14 +++-
 drivers/misc/habanalabs/common/debugfs.c      | 16 +++++
 drivers/misc/habanalabs/common/habanalabs.h   | 24 +++++++
 drivers/misc/habanalabs/common/memory.c       | 65 +++++++++++++++++--
 5 files changed, 113 insertions(+), 8 deletions(-)

diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs
index d447a611c41b..f9e233cbdc37 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -174,7 +174,7 @@ Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Displays a list with information about all the active virtual
-                address mappings per ASID
+                address mappings per ASID and all user mappings of HW blocks
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
 Date:           Mar 2020
diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c
index cda871afb8f4..62d705889ca8 100644
--- a/drivers/misc/habanalabs/common/context.c
+++ b/drivers/misc/habanalabs/common/context.c
@@ -20,6 +20,11 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
 	 */
 	hl_pending_cb_list_flush(ctx);
 
+	/* Release all allocated HW block mapped list entries and destroy
+	 * the mutex.
+	 */
+	hl_hw_block_mem_fini(ctx);
+
 	/*
 	 * If we arrived here, there are no jobs waiting for this context
 	 * on its queues so we can safely remove it.
@@ -160,13 +165,15 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 	if (!ctx->cs_pending)
 		return -ENOMEM;
 
+	hl_hw_block_mem_init(ctx);
+
 	if (is_kernel_ctx) {
 		ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
 		rc = hl_vm_ctx_init(ctx);
 		if (rc) {
 			dev_err(hdev->dev, "Failed to init mem ctx module\n");
 			rc = -ENOMEM;
-			goto err_free_cs_pending;
+			goto err_hw_block_mem_fini;
 		}
 
 		rc = hdev->asic_funcs->ctx_init(ctx);
@@ -179,7 +186,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 		if (!ctx->asid) {
 			dev_err(hdev->dev, "No free ASID, failed to create context\n");
 			rc = -ENOMEM;
-			goto err_free_cs_pending;
+			goto err_hw_block_mem_fini;
 		}
 
 		rc = hl_vm_ctx_init(ctx);
@@ -214,7 +221,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 err_asid_free:
 	if (ctx->asid != HL_KERNEL_ASID_ID)
 		hl_asid_free(hdev, ctx->asid);
-err_free_cs_pending:
+err_hw_block_mem_fini:
+	hl_hw_block_mem_fini(ctx);
 	kfree(ctx->cs_pending);
 
 	return rc;
diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index df847a6d19f4..d4a49d6d1753 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -229,6 +229,7 @@ static int vm_show(struct seq_file *s, void *data)
 {
 	struct hl_debugfs_entry *entry = s->private;
 	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+	struct hl_vm_hw_block_list_node *lnode;
 	struct hl_ctx *ctx;
 	struct hl_vm *vm;
 	struct hl_vm_hash_node *hnode;
@@ -272,6 +273,21 @@ static int vm_show(struct seq_file *s, void *data)
 		}
 		mutex_unlock(&ctx->mem_hash_lock);
 
+		if (ctx->asid != HL_KERNEL_ASID_ID &&
+		    !list_empty(&ctx->hw_block_mem_list)) {
+			seq_puts(s, "\nhw_block mappings:\n\n");
+			seq_puts(s, "    virtual address    size    HW block id\n");
+			seq_puts(s, "-------------------------------------------\n");
+			mutex_lock(&ctx->hw_block_list_lock);
+			list_for_each_entry(lnode, &ctx->hw_block_mem_list,
+					    node) {
+				seq_printf(s,
+					"    0x%-14lx   %-6u      %-9u\n",
+					lnode->vaddr, lnode->size, lnode->id);
+			}
+			mutex_unlock(&ctx->hw_block_list_lock);
+		}
+
 		vm = &ctx->hdev->vm;
 		spin_lock(&vm->idr_lock);
 
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 56c5fb16b409..9129582e9339 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1103,9 +1103,11 @@ struct hl_pending_cb {
  * @mem_hash_lock: protects the mem_hash.
  * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the
  *            MMU hash or walking the PGT requires talking this lock.
+ * @hw_block_list_lock: protects the HW block memory list.
  * @debugfs_list: node in debugfs list of contexts.
  * pending_cb_list: list of pending command buffers waiting to be sent upon
  *                  next user command submission context.
+ * @hw_block_mem_list: list of HW block virtual mapped addresses.
  * @cs_counters: context command submission counters.
  * @cb_va_pool: device VA pool for command buffers which are mapped to the
  *              device's MMU.
@@ -1142,8 +1144,10 @@ struct hl_ctx {
 	struct hl_va_range		*va_range[HL_VA_RANGE_TYPE_MAX];
 	struct mutex			mem_hash_lock;
 	struct mutex			mmu_lock;
+	struct mutex			hw_block_list_lock;
 	struct list_head		debugfs_list;
 	struct list_head		pending_cb_list;
+	struct list_head		hw_block_mem_list;
 	struct hl_cs_counters_atomic	cs_counters;
 	struct gen_pool			*cb_va_pool;
 	u64				cs_sequence;
@@ -1362,6 +1366,23 @@ struct hl_vm_hash_node {
 	void			*ptr;
 };
 
+/**
+ * struct hl_vm_hw_block_list_node - list element from user virtual address to
+ *				HW block id.
+ * @node: node to hang on the list in context object.
+ * @ctx: the context this node belongs to.
+ * @vaddr: virtual address of the HW block.
+ * @size: size of the block.
+ * @id: HW block id (handle).
+ */
+struct hl_vm_hw_block_list_node {
+	struct list_head	node;
+	struct hl_ctx		*ctx;
+	unsigned long		vaddr;
+	u32			size;
+	u32			id;
+};
+
 /**
  * struct hl_vm_phys_pg_pack - physical page pack.
  * @vm_type: describes the type of the virtual area descriptor.
@@ -2280,6 +2301,9 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx);
 int hl_vm_init(struct hl_device *hdev);
 void hl_vm_fini(struct hl_device *hdev);
 
+void hl_hw_block_mem_init(struct hl_ctx *ctx);
+void hl_hw_block_mem_fini(struct hl_ctx *ctx);
+
 u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
 		enum hl_va_range_type type, u32 size, u32 alignment);
 int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index 1f5910517b0e..c2679896aa50 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -1305,9 +1305,15 @@ static int map_block(struct hl_device *hdev, u64 address, u64 *handle,
 
 static void hw_block_vm_close(struct vm_area_struct *vma)
 {
-	struct hl_ctx *ctx = (struct hl_ctx *) vma->vm_private_data;
+	struct hl_vm_hw_block_list_node *lnode =
+		(struct hl_vm_hw_block_list_node *) vma->vm_private_data;
+	struct hl_ctx *ctx = lnode->ctx;
 
+	mutex_lock(&ctx->hw_block_list_lock);
+	list_del(&lnode->node);
+	mutex_unlock(&ctx->hw_block_list_lock);
 	hl_ctx_put(ctx);
+	kfree(lnode);
 	vma->vm_private_data = NULL;
 }
 
@@ -1325,7 +1331,9 @@ static const struct vm_operations_struct hw_block_vm_ops = {
  */
 int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
 {
+	struct hl_vm_hw_block_list_node *lnode;
 	struct hl_device *hdev = hpriv->hdev;
+	struct hl_ctx *ctx = hpriv->ctx;
 	u32 block_id, block_size;
 	int rc;
 
@@ -1351,17 +1359,31 @@ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
 		return -EINVAL;
 	}
 
+	lnode = kzalloc(sizeof(*lnode), GFP_KERNEL);
+	if (!lnode)
+		return -ENOMEM;
+
 	vma->vm_ops = &hw_block_vm_ops;
-	vma->vm_private_data = hpriv->ctx;
+	vma->vm_private_data = lnode;
 
-	hl_ctx_get(hdev, hpriv->ctx);
+	hl_ctx_get(hdev, ctx);
 
 	rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size);
 	if (rc) {
-		hl_ctx_put(hpriv->ctx);
+		hl_ctx_put(ctx);
+		kfree(lnode);
 		return rc;
 	}
 
+	lnode->ctx = ctx;
+	lnode->vaddr = vma->vm_start;
+	lnode->size = block_size;
+	lnode->id = block_id;
+
+	mutex_lock(&ctx->hw_block_list_lock);
+	list_add_tail(&lnode->node, &ctx->hw_block_mem_list);
+	mutex_unlock(&ctx->hw_block_list_lock);
+
 	vma->vm_pgoff = block_id;
 
 	return 0;
@@ -2122,3 +2144,38 @@ void hl_vm_fini(struct hl_device *hdev)
 
 	vm->init_done = false;
 }
+
+/**
+ * hl_hw_block_mem_init() - HW block memory initialization.
+ * @ctx: pointer to the habanalabs context structure.
+ *
+ * This function initializes the HW block virtual mapped addresses list and
+ * it's lock.
+ */
+void hl_hw_block_mem_init(struct hl_ctx *ctx)
+{
+	mutex_init(&ctx->hw_block_list_lock);
+	INIT_LIST_HEAD(&ctx->hw_block_mem_list);
+}
+
+/**
+ * hl_hw_block_mem_fini() - HW block memory teardown.
+ * @ctx: pointer to the habanalabs context structure.
+ *
+ * This function clears the HW block virtual mapped addresses list and destroys
+ * it's lock.
+ */
+void hl_hw_block_mem_fini(struct hl_ctx *ctx)
+{
+	struct hl_vm_hw_block_list_node *lnode, *tmp;
+
+	if (!list_empty(&ctx->hw_block_mem_list))
+		dev_crit(ctx->hdev->dev, "HW block mem list isn't empty\n");
+
+	list_for_each_entry_safe(lnode, tmp, &ctx->hw_block_mem_list, node) {
+		list_del(&lnode->node);
+		kfree(lnode);
+	}
+
+	mutex_destroy(&ctx->hw_block_list_lock);
+}
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/4] habanalabs: return current power via INFO IOCTL
  2021-02-25 16:19 [PATCH 1/4] habanalabs: support HW blocks vm show Oded Gabbay
@ 2021-02-25 16:19 ` Oded Gabbay
  2021-02-25 16:19 ` [PATCH 3/4] habanalabs/gaudi: update extended async event header Oded Gabbay
  2021-02-25 16:19 ` [PATCH 4/4] habanalabs: replace GFP_ATOMIC with GFP_KERNEL Oded Gabbay
  2 siblings, 0 replies; 4+ messages in thread
From: Oded Gabbay @ 2021-02-25 16:19 UTC (permalink / raw)
  To: linux-kernel; +Cc: Sagiv Ozeri

From: Sagiv Ozeri <sozeri@habana.ai>

Add driver implementation for reading the current power from the device
CPU F/W.

Signed-off-by: Sagiv Ozeri <sozeri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c  | 23 +++++++++++++++++++
 drivers/misc/habanalabs/common/habanalabs.h   |  1 +
 .../misc/habanalabs/common/habanalabs_ioctl.c | 22 ++++++++++++++++++
 .../misc/habanalabs/include/common/cpucp_if.h |  5 ++++
 include/uapi/misc/habanalabs.h                |  9 ++++++++
 5 files changed, 60 insertions(+)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 6f3692bf5eff..2a58edaf984a 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -565,6 +565,29 @@ int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
 	return rc;
 }
 
+int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power)
+{
+	struct cpucp_packet pkt;
+	u64 result;
+	int rc;
+
+	memset(&pkt, 0, sizeof(pkt));
+
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET <<
+				CPUCP_PKT_CTL_OPCODE_SHIFT);
+
+	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+			HL_CPUCP_INFO_TIMEOUT_USEC, &result);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to read power, error %d\n", rc);
+		return rc;
+	}
+
+	*power = result;
+
+	return rc;
+}
+
 static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
 {
 	/* Some of the status codes below are deprecated in newer f/w
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 9129582e9339..33ada9425a62 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2363,6 +2363,7 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev,
 			u64 *total_energy);
 int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
 		u16 *pll_freq_arr);
+int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power);
 int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 			u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
 			u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 95a0b144604d..3f762e021428 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -444,6 +444,25 @@ static int pll_frequency_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 		min((size_t) max_size, sizeof(freq_info))) ? -EFAULT : 0;
 }
 
+static int power_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+	struct hl_device *hdev = hpriv->hdev;
+	u32 max_size = args->return_size;
+	struct hl_power_info power_info = {0};
+	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+	int rc;
+
+	if ((!max_size) || (!out))
+		return -EINVAL;
+
+	rc = hl_fw_cpucp_power_get(hdev, &power_info.power);
+	if (rc)
+		return rc;
+
+	return copy_to_user(out, &power_info,
+		min((size_t) max_size, sizeof(power_info))) ? -EFAULT : 0;
+}
+
 static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 				struct device *dev)
 {
@@ -524,6 +543,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 	case HL_INFO_PLL_FREQUENCY:
 		return pll_frequency_info(hpriv, args);
 
+	case HL_INFO_POWER:
+		return power_info(hpriv, args);
+
 	default:
 		dev_err(dev, "Invalid request %d\n", args->op);
 		rc = -ENOTTY;
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h
index bf4e7900d8c8..6ba480a316ce 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -296,6 +296,9 @@ enum pq_init_status {
  *       The result is composed of 4 outputs, each is 16-bit
  *       frequency in MHz.
  *
+ * CPUCP_PACKET_POWER_GET
+ *       Fetch the present power consumption of the device (Current * Voltage).
+ *
  */
 
 enum cpucp_packet_id {
@@ -329,6 +332,8 @@ enum cpucp_packet_id {
 	CPUCP_PACKET_PCIE_REPLAY_CNT_GET,	/* internal */
 	CPUCP_PACKET_TOTAL_ENERGY_GET,		/* internal */
 	CPUCP_PACKET_PLL_INFO_GET,		/* internal */
+	CPUCP_PACKET_NIC_STATUS,		/* internal */
+	CPUCP_PACKET_POWER_GET,			/* internal */
 };
 
 #define CPUCP_PACKET_FENCE_VAL	0xFE8CE7A5
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 05c7cf4e727e..92fd000ce0d3 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -297,6 +297,7 @@ enum hl_device_status {
 #define HL_INFO_SYNC_MANAGER		14
 #define HL_INFO_TOTAL_ENERGY		15
 #define HL_INFO_PLL_FREQUENCY		16
+#define HL_INFO_POWER			17
 
 #define HL_INFO_VERSION_MAX_LEN	128
 #define HL_INFO_CARD_NAME_MAX_LEN	16
@@ -410,6 +411,14 @@ struct hl_pll_frequency_info {
 	__u16 output[HL_PLL_NUM_OUTPUTS];
 };
 
+/**
+ * struct hl_power_info - power information
+ * @power: power consumption
+ */
+struct hl_power_info {
+	__u64 power;
+};
+
 /**
  * struct hl_info_sync_manager - sync manager information
  * @first_available_sync_object: first available sob
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/4] habanalabs/gaudi: update extended async event header
  2021-02-25 16:19 [PATCH 1/4] habanalabs: support HW blocks vm show Oded Gabbay
  2021-02-25 16:19 ` [PATCH 2/4] habanalabs: return current power via INFO IOCTL Oded Gabbay
@ 2021-02-25 16:19 ` Oded Gabbay
  2021-02-25 16:19 ` [PATCH 4/4] habanalabs: replace GFP_ATOMIC with GFP_KERNEL Oded Gabbay
  2 siblings, 0 replies; 4+ messages in thread
From: Oded Gabbay @ 2021-02-25 16:19 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ofir Bitton

From: Ofir Bitton <obitton@habana.ai>

Update to the latest definition of the firmware

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../include/gaudi/gaudi_async_ids_map_extended.h       | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h
index 737176ba06fb..9106395eb920 100644
--- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h
@@ -301,10 +301,10 @@ static struct gaudi_async_events_ids_map gaudi_irq_map_table[] = {
 	{ .fc_id = 274, .cpu_id = 128, .valid = 0, .name = "" },
 	{ .fc_id = 275, .cpu_id = 128, .valid = 0, .name = "" },
 	{ .fc_id = 276, .cpu_id = 128, .valid = 0, .name = "" },
-	{ .fc_id = 277, .cpu_id = 129, .valid = 0, .name = "" },
-	{ .fc_id = 278, .cpu_id = 129, .valid = 0, .name = "" },
-	{ .fc_id = 279, .cpu_id = 129, .valid = 0, .name = "" },
-	{ .fc_id = 280, .cpu_id = 129, .valid = 0, .name = "" },
+	{ .fc_id = 277, .cpu_id = 129, .valid = 1, .name = "DMA_IF_SEI_0" },
+	{ .fc_id = 278, .cpu_id = 129, .valid = 1, .name = "DMA_IF_SEI_1" },
+	{ .fc_id = 279, .cpu_id = 129, .valid = 1, .name = "DMA_IF_SEI_2" },
+	{ .fc_id = 280, .cpu_id = 129, .valid = 1, .name = "DMA_IF_SEI_3" },
 	{ .fc_id = 281, .cpu_id = 130, .valid = 0, .name = "" },
 	{ .fc_id = 282, .cpu_id = 131, .valid = 0, .name = "" },
 	{ .fc_id = 283, .cpu_id = 132, .valid = 0, .name = "" },
@@ -681,7 +681,7 @@ static struct gaudi_async_events_ids_map gaudi_irq_map_table[] = {
 	{ .fc_id = 654, .cpu_id = 503, .valid = 0, .name = "" },
 	{ .fc_id = 655, .cpu_id = 504, .valid = 0, .name = "" },
 	{ .fc_id = 656, .cpu_id = 505, .valid = 0, .name = "" },
-	{ .fc_id = 657, .cpu_id = 506, .valid = 0, .name = "" },
+	{ .fc_id = 657, .cpu_id = 506, .valid = 1, .name = "PKT_QUEUE_ASYNC" },
 	{ .fc_id = 658, .cpu_id = 507, .valid = 1, .name = "FIX_POWER_ENV_S" },
 	{ .fc_id = 659, .cpu_id = 508, .valid = 1, .name = "FIX_POWER_ENV_E" },
 	{ .fc_id = 660, .cpu_id = 509, .valid = 1,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 4/4] habanalabs: replace GFP_ATOMIC with GFP_KERNEL
  2021-02-25 16:19 [PATCH 1/4] habanalabs: support HW blocks vm show Oded Gabbay
  2021-02-25 16:19 ` [PATCH 2/4] habanalabs: return current power via INFO IOCTL Oded Gabbay
  2021-02-25 16:19 ` [PATCH 3/4] habanalabs/gaudi: update extended async event header Oded Gabbay
@ 2021-02-25 16:19 ` Oded Gabbay
  2 siblings, 0 replies; 4+ messages in thread
From: Oded Gabbay @ 2021-02-25 16:19 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ofir Bitton

From: Ofir Bitton <obitton@habana.ai>

As there are incorrect assumptions in which some of the
initialization and data path flows cannot sleep, most allocations
are being done using GFP_ATOMIC.
We modify the code to use GFP_ATOMIC only when realy needed, as
sleepable flow should use GFP_KERNEL.
In addition add a fallback to allocate memory using GFP_KERNEL,
once ATOMIC allocation fails.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../misc/habanalabs/common/command_buffer.c   | 12 +++++++---
 .../habanalabs/common/command_submission.c    | 22 ++++++++++++++++++-
 drivers/misc/habanalabs/common/device.c       |  2 +-
 drivers/misc/habanalabs/common/memory.c       | 10 +++------
 drivers/misc/habanalabs/gaudi/gaudi.c         |  2 +-
 drivers/misc/habanalabs/goya/goya.c           |  2 +-
 6 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c
index d9adb9a5e4d8..719168c980a4 100644
--- a/drivers/misc/habanalabs/common/command_buffer.c
+++ b/drivers/misc/habanalabs/common/command_buffer.c
@@ -181,7 +181,7 @@ static void cb_release(struct kref *ref)
 static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
 					int ctx_id, bool internal_cb)
 {
-	struct hl_cb *cb;
+	struct hl_cb *cb = NULL;
 	u32 cb_offset;
 	void *p;
 
@@ -193,9 +193,10 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
 	 * the kernel's copy. Hence, we must never sleep in this code section
 	 * and must use GFP_ATOMIC for all memory allocations.
 	 */
-	if (ctx_id == HL_KERNEL_ASID_ID)
+	if (ctx_id == HL_KERNEL_ASID_ID && !hdev->disabled)
 		cb = kzalloc(sizeof(*cb), GFP_ATOMIC);
-	else
+
+	if (!cb)
 		cb = kzalloc(sizeof(*cb), GFP_KERNEL);
 
 	if (!cb)
@@ -214,6 +215,9 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
 	} else if (ctx_id == HL_KERNEL_ASID_ID) {
 		p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
 						&cb->bus_address, GFP_ATOMIC);
+		if (!p)
+			p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
+					cb_size, &cb->bus_address, GFP_KERNEL);
 	} else {
 		p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
 						&cb->bus_address,
@@ -310,6 +314,8 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
 
 	spin_lock(&mgr->cb_lock);
 	rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC);
+	if (rc < 0)
+		rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_KERNEL);
 	spin_unlock(&mgr->cb_lock);
 
 	if (rc < 0) {
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 98fdb98ff4e4..7d0d3e61a6e7 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -638,6 +638,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 	cntr = &hdev->aggregated_cs_counters;
 
 	cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
+	if (!cs)
+		cs = kzalloc(sizeof(*cs), GFP_KERNEL);
+
 	if (!cs) {
 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
@@ -657,6 +660,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 	spin_lock_init(&cs->job_lock);
 
 	cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
+	if (!cs_cmpl)
+		cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_KERNEL);
+
 	if (!cs_cmpl) {
 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
@@ -698,6 +704,10 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 
 	cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
 			sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
+	if (!cs->jobs_in_queue_cnt)
+		cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
+				sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL);
+
 	if (!cs->jobs_in_queue_cnt) {
 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
@@ -927,6 +937,9 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
 	struct hl_cs_job *job;
 
 	job = kzalloc(sizeof(*job), GFP_ATOMIC);
+	if (!job)
+		job = kzalloc(sizeof(*job), GFP_KERNEL);
+
 	if (!job)
 		return NULL;
 
@@ -1029,6 +1042,9 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev,
 
 	*cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
 					GFP_ATOMIC);
+	if (!*cs_chunk_array)
+		*cs_chunk_array = kmalloc_array(num_chunks,
+					sizeof(**cs_chunk_array), GFP_KERNEL);
 	if (!*cs_chunk_array) {
 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 		atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
@@ -1483,6 +1499,10 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
 	signal_seq_arr = kmalloc_array(signal_seq_arr_len,
 					sizeof(*signal_seq_arr),
 					GFP_ATOMIC);
+	if (!signal_seq_arr)
+		signal_seq_arr = kmalloc_array(signal_seq_arr_len,
+					sizeof(*signal_seq_arr),
+					GFP_KERNEL);
 	if (!signal_seq_arr) {
 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 		atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
@@ -1931,7 +1951,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 
 	hl_ctx_get(hdev, ctx);
 
-	pend = kmalloc(sizeof(*pend), GFP_ATOMIC);
+	pend = kmalloc(sizeof(*pend), GFP_KERNEL);
 	if (!pend) {
 		hl_ctx_put(ctx);
 		return -ENOMEM;
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index b4321144379c..2cbe9d04c229 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -327,7 +327,7 @@ static int device_early_init(struct hl_device *hdev)
 	if (hdev->asic_prop.completion_queues_count) {
 		hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
 				sizeof(*hdev->cq_wq),
-				GFP_ATOMIC);
+				GFP_KERNEL);
 		if (!hdev->cq_wq) {
 			rc = -ENOMEM;
 			goto asid_fini;
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index c2679896aa50..1b69573369cf 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -145,7 +145,7 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
 
 	spin_lock(&vm->idr_lock);
 	handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0,
-				GFP_ATOMIC);
+				GFP_KERNEL);
 	spin_unlock(&vm->idr_lock);
 
 	if (handle < 0) {
@@ -1596,7 +1596,7 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
 
 	rc = sg_alloc_table_from_pages(userptr->sgt,
 				       userptr->pages,
-				       npages, offset, size, GFP_ATOMIC);
+				       npages, offset, size, GFP_KERNEL);
 	if (rc < 0) {
 		dev_err(hdev->dev, "failed to create SG table from pages\n");
 		goto put_pages;
@@ -1646,11 +1646,7 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
 		return -EINVAL;
 	}
 
-	/*
-	 * This function can be called also from data path, hence use atomic
-	 * always as it is not a big allocation.
-	 */
-	userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC);
+	userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_KERNEL);
 	if (!userptr->sgt)
 		return -ENOMEM;
 
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 4f39737b4808..eee9387c8a26 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -4873,7 +4873,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
 			parser->job_userptr_list, &userptr))
 		goto already_pinned;
 
-	userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
+	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
 	if (!userptr)
 		return -ENOMEM;
 
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 20abed6ef7e6..1eaf9c06227c 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -3221,7 +3221,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
 			parser->job_userptr_list, &userptr))
 		goto already_pinned;
 
-	userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
+	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
 	if (!userptr)
 		return -ENOMEM;
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-02-25 16:21 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-25 16:19 [PATCH 1/4] habanalabs: support HW blocks vm show Oded Gabbay
2021-02-25 16:19 ` [PATCH 2/4] habanalabs: return current power via INFO IOCTL Oded Gabbay
2021-02-25 16:19 ` [PATCH 3/4] habanalabs/gaudi: update extended async event header Oded Gabbay
2021-02-25 16:19 ` [PATCH 4/4] habanalabs: replace GFP_ATOMIC with GFP_KERNEL Oded Gabbay

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).