All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] habanalabs: change function to static
@ 2022-02-08 19:28 Oded Gabbay
  2022-02-08 19:28 ` [PATCH 2/4] habanalabs: enable stop-on-error debugfs setting per ASIC Oded Gabbay
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Oded Gabbay @ 2022-02-08 19:28 UTC (permalink / raw)
  To: linux-kernel

handle_registration_node() is called directly from the irq handler
in irq.c, so it can be static.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c
index c28f3a1c7a03..e2bc128f2291 100644
--- a/drivers/misc/habanalabs/common/irq.c
+++ b/drivers/misc/habanalabs/common/irq.c
@@ -175,7 +175,7 @@ static void hl_ts_free_objects(struct work_struct *work)
  * so here we'll be filling a list with nodes of "put" jobs and then will send this
  * list to a dedicated workqueue to do the actual put.
  */
-int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend,
+static int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend,
 						struct list_head **free_list)
 {
 	struct timestamp_reg_free_node *free_node;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/4] habanalabs: enable stop-on-error debugfs setting per ASIC
  2022-02-08 19:28 [PATCH 1/4] habanalabs: change function to static Oded Gabbay
@ 2022-02-08 19:28 ` Oded Gabbay
  2022-02-08 19:28 ` [PATCH 3/4] habanalabs: use proper max_power variable for device utilization Oded Gabbay
  2022-02-08 19:28 ` [PATCH 4/4] habanalabs: set max power on device init per ASIC Oded Gabbay
  2 siblings, 0 replies; 4+ messages in thread
From: Oded Gabbay @ 2022-02-08 19:28 UTC (permalink / raw)
  To: linux-kernel; +Cc: Tomer Tayar

From: Tomer Tayar <ttayar@habana.ai>

On Goya and Gaudi, the stop-on-error configuration can be set via
debugfs. However, in future devices, this configuration will always be
enabled.
Modify the debugfs node to be allowed only for ASICs that support this
dynamic configuration.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 Documentation/ABI/testing/debugfs-driver-habanalabs | 1 +
 drivers/misc/habanalabs/common/debugfs.c            | 6 ++++++
 drivers/misc/habanalabs/common/habanalabs.h         | 2 ++
 drivers/misc/habanalabs/gaudi/gaudi.c               | 2 ++
 drivers/misc/habanalabs/goya/goya.c                 | 2 ++
 5 files changed, 13 insertions(+)

diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs
index 783001a574b3..bcf6915987e4 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -222,6 +222,7 @@ KernelVersion:  5.6
 Contact:        ogabbay@kernel.org
 Description:    Sets the stop-on_error option for the device engines. Value of
                 "0" is for disable, otherwise enable.
+                Relevant only for GOYA and GAUDI.
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
 Date:           Sep 2021
diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index e3ee5f45d20c..9f0aaf0ef43b 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -1071,6 +1071,9 @@ static ssize_t hl_stop_on_err_read(struct file *f, char __user *buf,
 	char tmp_buf[200];
 	ssize_t rc;
 
+	if (!hdev->asic_prop.configurable_stop_on_err)
+		return -EOPNOTSUPP;
+
 	if (*ppos)
 		return 0;
 
@@ -1089,6 +1092,9 @@ static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf,
 	u32 value;
 	ssize_t rc;
 
+	if (!hdev->asic_prop.configurable_stop_on_err)
+		return -EOPNOTSUPP;
+
 	if (hdev->reset_info.in_reset) {
 		dev_warn_ratelimited(hdev->dev,
 				"Can't change stop on error during reset\n");
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index b06e2b0812b6..93116fe71ef6 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -561,6 +561,7 @@ struct hl_hints_range {
  *                              use-case of doing soft-reset in training (due
  *                              to the fact that training runs on multiple
  *                              devices)
+ * @configurable_stop_on_err: is stop-on-error option configurable via debugfs.
  */
 struct asic_fixed_properties {
 	struct hw_queue_properties	*hw_queues_props;
@@ -644,6 +645,7 @@ struct asic_fixed_properties {
 	u8				use_get_power_for_reset_history;
 	u8				supports_soft_reset;
 	u8				allow_inference_soft_reset;
+	u8				configurable_stop_on_err;
 };
 
 /**
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index f2242aa3baa2..61aa6dce6dde 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -669,6 +669,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
 
 	prop->use_get_power_for_reset_history = true;
 
+	prop->configurable_stop_on_err = true;
+
 	return 0;
 }
 
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 3785fb33260d..c8143b6616af 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -483,6 +483,8 @@ int goya_set_fixed_properties(struct hl_device *hdev)
 
 	prop->use_get_power_for_reset_history = true;
 
+	prop->configurable_stop_on_err = true;
+
 	return 0;
 }
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/4] habanalabs: use proper max_power variable for device utilization
  2022-02-08 19:28 [PATCH 1/4] habanalabs: change function to static Oded Gabbay
  2022-02-08 19:28 ` [PATCH 2/4] habanalabs: enable stop-on-error debugfs setting per ASIC Oded Gabbay
@ 2022-02-08 19:28 ` Oded Gabbay
  2022-02-08 19:28 ` [PATCH 4/4] habanalabs: set max power on device init per ASIC Oded Gabbay
  2 siblings, 0 replies; 4+ messages in thread
From: Oded Gabbay @ 2022-02-08 19:28 UTC (permalink / raw)
  To: linux-kernel; +Cc: Tomer Tayar

From: Tomer Tayar <ttayar@habana.ai>

The max_power variable which is used for calculating the device
utilization is the ASIC specific property which is set during init.
However, the max value can be modified via sysfs, and thus the updated
value in the device structure should be used instead.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 60bfd737404b..63e2449ad136 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -614,7 +614,7 @@ int hl_device_utilization(struct hl_device *hdev, u32 *utilization)
 	u64 max_power, curr_power, dc_power, dividend;
 	int rc;
 
-	max_power = hdev->asic_prop.max_power_default;
+	max_power = hdev->max_power;
 	dc_power = hdev->asic_prop.dc_power_default;
 	rc = hl_fw_cpucp_power_get(hdev, &curr_power);
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 4/4] habanalabs: set max power on device init per ASIC
  2022-02-08 19:28 [PATCH 1/4] habanalabs: change function to static Oded Gabbay
  2022-02-08 19:28 ` [PATCH 2/4] habanalabs: enable stop-on-error debugfs setting per ASIC Oded Gabbay
  2022-02-08 19:28 ` [PATCH 3/4] habanalabs: use proper max_power variable for device utilization Oded Gabbay
@ 2022-02-08 19:28 ` Oded Gabbay
  2 siblings, 0 replies; 4+ messages in thread
From: Oded Gabbay @ 2022-02-08 19:28 UTC (permalink / raw)
  To: linux-kernel; +Cc: Tomer Tayar

From: Tomer Tayar <ttayar@habana.ai>

For current devices there is a need to send the max power value to F/W
during device init, for example because there might be several card
types.
In future devices, this info will be programmed in the device's EEPROM
and will be read by F/W, and hence the driver should not send it.

Modify the sending of the relevant message to be done only for ASIC
types that need it.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c     | 3 ++-
 drivers/misc/habanalabs/common/habanalabs.h | 2 ++
 drivers/misc/habanalabs/gaudi/gaudi.c       | 2 ++
 drivers/misc/habanalabs/goya/goya.c         | 2 ++
 4 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 63e2449ad136..8ea9dfe3f79b 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -1541,7 +1541,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 	/* Need to call this again because the max power might change,
 	 * depending on card type for certain ASICs
 	 */
-	hl_fw_set_max_power(hdev);
+	if (hdev->asic_prop.set_max_power_on_device_init)
+		hl_fw_set_max_power(hdev);
 
 	/*
 	 * hl_hwmon_init() must be called after device_late_init(), because only
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 93116fe71ef6..b5055ab528b8 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -562,6 +562,7 @@ struct hl_hints_range {
  *                              to the fact that training runs on multiple
  *                              devices)
  * @configurable_stop_on_err: is stop-on-error option configurable via debugfs.
+ * @set_max_power_on_device_init: true if need to set max power in F/W on device init.
  */
 struct asic_fixed_properties {
 	struct hw_queue_properties	*hw_queues_props;
@@ -646,6 +647,7 @@ struct asic_fixed_properties {
 	u8				supports_soft_reset;
 	u8				allow_inference_soft_reset;
 	u8				configurable_stop_on_err;
+	u8				set_max_power_on_device_init;
 };
 
 /**
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 61aa6dce6dde..ad640a268a3c 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -671,6 +671,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
 
 	prop->configurable_stop_on_err = true;
 
+	prop->set_max_power_on_device_init = true;
+
 	return 0;
 }
 
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index c8143b6616af..c08d96e43c9f 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -485,6 +485,8 @@ int goya_set_fixed_properties(struct hl_device *hdev)
 
 	prop->configurable_stop_on_err = true;
 
+	prop->set_max_power_on_device_init = true;
+
 	return 0;
 }
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-02-08 19:29 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-08 19:28 [PATCH 1/4] habanalabs: change function to static Oded Gabbay
2022-02-08 19:28 ` [PATCH 2/4] habanalabs: enable stop-on-error debugfs setting per ASIC Oded Gabbay
2022-02-08 19:28 ` [PATCH 3/4] habanalabs: use proper max_power variable for device utilization Oded Gabbay
2022-02-08 19:28 ` [PATCH 4/4] habanalabs: set max power on device init per ASIC Oded Gabbay

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.