All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] habanalabs: add debugfs node for configuring CS timeout
@ 2021-09-12  5:26 Oded Gabbay
  2021-09-12  5:26 ` [PATCH 2/2] habanalabs: create static map of f/w hwmon enums Oded Gabbay
  0 siblings, 1 reply; 2+ messages in thread
From: Oded Gabbay @ 2021-09-12  5:26 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ofir Bitton

From: Ofir Bitton <obitton@habana.ai>

Command submission timeout is currently determined during driver
loading time. As some environments requires this timeout to be
modified in runtime, we introduce a new debugfs node that controls
the timeout value without the need to reload the driver.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../ABI/testing/debugfs-driver-habanalabs     |  6 +++
 drivers/misc/habanalabs/common/debugfs.c      | 51 +++++++++++++++++++
 2 files changed, 57 insertions(+)

diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs
index 284e2dfa61cd..63c46d9d538f 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -226,6 +226,12 @@ Description:    Gets the state dump occurring on a CS timeout or failure.
                 Writing an integer X discards X state dumps, so that the
                 next read would return X+1-st newest state dump.
 
+What:           /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
+Date:           Sep 2021
+KernelVersion:  5.16
+Contact:        obitton@habana.ai
+Description:    Sets the command submission timeout value in seconds.
+
 What:           /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
 Date:           Mar 2020
 KernelVersion:  5.6
diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index 985f1f3dbd20..1f2a3dc6c4e2 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -1167,6 +1167,45 @@ static ssize_t hl_state_dump_write(struct file *f, const char __user *buf,
 	return count;
 }
 
+static ssize_t hl_timeout_locked_read(struct file *f, char __user *buf,
+					size_t count, loff_t *ppos)
+{
+	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+	struct hl_device *hdev = entry->hdev;
+	char tmp_buf[200];
+	ssize_t rc;
+
+	if (*ppos)
+		return 0;
+
+	sprintf(tmp_buf, "%d\n",
+		jiffies_to_msecs(hdev->timeout_jiffies) / 1000);
+	rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
+			strlen(tmp_buf) + 1);
+
+	return rc;
+}
+
+static ssize_t hl_timeout_locked_write(struct file *f, const char __user *buf,
+				     size_t count, loff_t *ppos)
+{
+	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+	struct hl_device *hdev = entry->hdev;
+	u32 value;
+	ssize_t rc;
+
+	rc = kstrtouint_from_user(buf, count, 10, &value);
+	if (rc)
+		return rc;
+
+	if (value)
+		hdev->timeout_jiffies = msecs_to_jiffies(value * 1000);
+	else
+		hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
+
+	return count;
+}
+
 static const struct file_operations hl_data32b_fops = {
 	.owner = THIS_MODULE,
 	.read = hl_data_read32,
@@ -1240,6 +1279,12 @@ static const struct file_operations hl_state_dump_fops = {
 	.write = hl_state_dump_write
 };
 
+static const struct file_operations hl_timeout_locked_fops = {
+	.owner = THIS_MODULE,
+	.read = hl_timeout_locked_read,
+	.write = hl_timeout_locked_write
+};
+
 static const struct hl_info_list hl_debugfs_list[] = {
 	{"command_buffers", command_buffers_show, NULL},
 	{"command_submission", command_submission_show, NULL},
@@ -1421,6 +1466,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
 				dev_entry,
 				&hl_state_dump_fops);
 
+	debugfs_create_file("timeout_locked",
+				0644,
+				dev_entry->root,
+				dev_entry,
+				&hl_timeout_locked_fops);
+
 	for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
 		debugfs_create_file(hl_debugfs_list[i].name,
 					0444,
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [PATCH 2/2] habanalabs: create static map of f/w hwmon enums
  2021-09-12  5:26 [PATCH 1/2] habanalabs: add debugfs node for configuring CS timeout Oded Gabbay
@ 2021-09-12  5:26 ` Oded Gabbay
  0 siblings, 0 replies; 2+ messages in thread
From: Oded Gabbay @ 2021-09-12  5:26 UTC (permalink / raw)
  To: linux-kernel; +Cc: Rajaravi Krishna Katta

From: Rajaravi Krishna Katta <rkatta@habana.ai>

Instead of using the Linux kernel HWMON enums definition when
communicating with the firmware, use proprietary HWMON based enums
i.e. map hwmon.h header enum to cpucp_if.h based enum while.

This is needed because the HWMON enums are not forcing backward
compatibility and therefore changes can break compatibility between
newer driver and older firmware.

The driver will check for CPU_BOOT_DEV_STS0_MAP_HWMON_EN bit to
validate if f/w supports cpucp->hwmon enum mapping to support older
firmware where this mapping won't be available.

Signed-off-by: Rajaravi Krishna Katta <rkatta@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/hwmon.c        | 100 ++++++++++++++++--
 .../habanalabs/include/common/hl_boot_if.h    |   6 ++
 2 files changed, 97 insertions(+), 9 deletions(-)

diff --git a/drivers/misc/habanalabs/common/hwmon.c b/drivers/misc/habanalabs/common/hwmon.c
index 6b421d76b311..8f12d99fda09 100644
--- a/drivers/misc/habanalabs/common/hwmon.c
+++ b/drivers/misc/habanalabs/common/hwmon.c
@@ -113,6 +113,9 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
 {
 	struct hl_device *hdev = dev_get_drvdata(dev);
 	int rc;
+	u32 cpucp_attr;
+	bool use_cpucp_enum = (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
+				CPU_BOOT_DEV_STS0_MAP_HWMON_EN) ? true : false;
 
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
@@ -121,65 +124,117 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
 	case hwmon_temp:
 		switch (attr) {
 		case hwmon_temp_input:
+			cpucp_attr = cpucp_temp_input;
+			break;
 		case hwmon_temp_max:
+			cpucp_attr = cpucp_temp_max;
+			break;
 		case hwmon_temp_crit:
+			cpucp_attr = cpucp_temp_crit;
+			break;
 		case hwmon_temp_max_hyst:
+			cpucp_attr = cpucp_temp_max_hyst;
+			break;
 		case hwmon_temp_crit_hyst:
+			cpucp_attr = cpucp_temp_crit_hyst;
+			break;
 		case hwmon_temp_offset:
+			cpucp_attr = cpucp_temp_offset;
+			break;
 		case hwmon_temp_highest:
+			cpucp_attr = cpucp_temp_highest;
 			break;
 		default:
 			return -EINVAL;
 		}
 
-		rc = hl_get_temperature(hdev, channel, attr, val);
+		if (use_cpucp_enum)
+			rc = hl_get_temperature(hdev, channel, cpucp_attr, val);
+		else
+			rc = hl_get_temperature(hdev, channel, attr, val);
 		break;
 	case hwmon_in:
 		switch (attr) {
 		case hwmon_in_input:
+			cpucp_attr = cpucp_in_input;
+			break;
 		case hwmon_in_min:
+			cpucp_attr = cpucp_in_min;
+			break;
 		case hwmon_in_max:
+			cpucp_attr = cpucp_in_max;
+			break;
 		case hwmon_in_highest:
+			cpucp_attr = cpucp_in_highest;
 			break;
 		default:
 			return -EINVAL;
 		}
 
-		rc = hl_get_voltage(hdev, channel, attr, val);
+		if (use_cpucp_enum)
+			rc = hl_get_voltage(hdev, channel, cpucp_attr, val);
+		else
+			rc = hl_get_voltage(hdev, channel, attr, val);
 		break;
 	case hwmon_curr:
 		switch (attr) {
 		case hwmon_curr_input:
+			cpucp_attr = cpucp_curr_input;
+			break;
 		case hwmon_curr_min:
+			cpucp_attr = cpucp_curr_min;
+			break;
 		case hwmon_curr_max:
+			cpucp_attr = cpucp_curr_max;
+			break;
 		case hwmon_curr_highest:
+			cpucp_attr = cpucp_curr_highest;
 			break;
 		default:
 			return -EINVAL;
 		}
 
-		rc = hl_get_current(hdev, channel, attr, val);
+		if (use_cpucp_enum)
+			rc = hl_get_current(hdev, channel, cpucp_attr, val);
+		else
+			rc = hl_get_current(hdev, channel, attr, val);
 		break;
 	case hwmon_fan:
 		switch (attr) {
 		case hwmon_fan_input:
+			cpucp_attr = cpucp_fan_input;
+			break;
 		case hwmon_fan_min:
+			cpucp_attr = cpucp_fan_min;
+			break;
 		case hwmon_fan_max:
+			cpucp_attr = cpucp_fan_max;
 			break;
 		default:
 			return -EINVAL;
 		}
-		rc = hl_get_fan_speed(hdev, channel, attr, val);
+
+		if (use_cpucp_enum)
+			rc = hl_get_fan_speed(hdev, channel, cpucp_attr, val);
+		else
+			rc = hl_get_fan_speed(hdev, channel, attr, val);
 		break;
 	case hwmon_pwm:
 		switch (attr) {
 		case hwmon_pwm_input:
+			cpucp_attr = cpucp_pwm_input;
+			break;
 		case hwmon_pwm_enable:
+			cpucp_attr = cpucp_pwm_enable;
 			break;
 		default:
 			return -EINVAL;
 		}
-		rc = hl_get_pwm_info(hdev, channel, attr, val);
+
+		if (use_cpucp_enum)
+			rc = hl_get_pwm_info(hdev, channel, cpucp_attr, val);
+		else
+			rc = hl_get_pwm_info(hdev, channel, attr, val);
 		break;
 	default:
 		return -EINVAL;
@@ -191,6 +246,9 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type,
 			u32 attr, int channel, long val)
 {
 	struct hl_device *hdev = dev_get_drvdata(dev);
+	u32 cpucp_attr;
+	bool use_cpucp_enum = (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
+				CPU_BOOT_DEV_STS0_MAP_HWMON_EN) ? true : false;
 
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
@@ -199,40 +257,64 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type,
 	case hwmon_temp:
 		switch (attr) {
 		case hwmon_temp_offset:
+			cpucp_attr = cpucp_temp_offset;
+			break;
 		case hwmon_temp_reset_history:
+			cpucp_attr = cpucp_temp_reset_history;
 			break;
 		default:
 			return -EINVAL;
 		}
-		hl_set_temperature(hdev, channel, attr, val);
+
+		if (use_cpucp_enum)
+			hl_set_temperature(hdev, channel, cpucp_attr, val);
+		else
+			hl_set_temperature(hdev, channel, attr, val);
 		break;
 	case hwmon_pwm:
 		switch (attr) {
 		case hwmon_pwm_input:
+			cpucp_attr = cpucp_pwm_input;
+			break;
 		case hwmon_pwm_enable:
+			cpucp_attr = cpucp_pwm_enable;
 			break;
 		default:
 			return -EINVAL;
 		}
-		hl_set_pwm_info(hdev, channel, attr, val);
+
+		if (use_cpucp_enum)
+			hl_set_pwm_info(hdev, channel, cpucp_attr, val);
+		else
+			hl_set_pwm_info(hdev, channel, attr, val);
 		break;
 	case hwmon_in:
 		switch (attr) {
 		case hwmon_in_reset_history:
+			cpucp_attr = cpucp_in_reset_history;
 			break;
 		default:
 			return -EINVAL;
 		}
-		hl_set_voltage(hdev, channel, attr, val);
+
+		if (use_cpucp_enum)
+			hl_set_voltage(hdev, channel, cpucp_attr, val);
+		else
+			hl_set_voltage(hdev, channel, attr, val);
 		break;
 	case hwmon_curr:
 		switch (attr) {
 		case hwmon_curr_reset_history:
+			cpucp_attr = cpucp_curr_reset_history;
 			break;
 		default:
 			return -EINVAL;
 		}
-		hl_set_current(hdev, channel, attr, val);
+
+		if (use_cpucp_enum)
+			hl_set_current(hdev, channel, cpucp_attr, val);
+		else
+			hl_set_current(hdev, channel, attr, val);
 		break;
 	default:
 		return -EINVAL;
diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h
index 3099653234e4..8837925b5d85 100644
--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -252,6 +252,11 @@
  *					where a bit is set if the engine is not idle.
  *					Initialized in: linux
  *
+ * CPU_BOOT_DEV_STS0_MAP_HWMON_EN
+ *					If set, means f/w supports proprietary
+ *					HWMON enum mapping to cpucp enums.
+ *					Initialized in: linux
+ *
  * CPU_BOOT_DEV_STS0_ENABLED		Device status register enabled.
  *					This is a main indication that the
  *					running FW populates the device status
@@ -287,6 +292,7 @@
 #define CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN		(1 << 23)
 #define CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN		(1 << 24)
 #define CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN		(1 << 25)
+#define CPU_BOOT_DEV_STS0_MAP_HWMON_EN			(1 << 26)
 #define CPU_BOOT_DEV_STS0_ENABLED			(1 << 31)
 #define CPU_BOOT_DEV_STS1_ENABLED			(1 << 31)
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-09-12  5:40 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-12  5:26 [PATCH 1/2] habanalabs: add debugfs node for configuring CS timeout Oded Gabbay
2021-09-12  5:26 ` [PATCH 2/2] habanalabs: create static map of f/w hwmon enums Oded Gabbay

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.