* [PATCH 3/5] habanalabs: support temperature offset via sysfs
2020-02-07 8:15 [PATCH 1/5] habanalabs: add debugfs write64/read64 Oded Gabbay
2020-02-07 8:15 ` [PATCH 2/5] habanalabs: ratelimit error prints of IRQs Oded Gabbay
@ 2020-02-07 8:15 ` Oded Gabbay
2020-02-07 8:15 ` [PATCH 4/5] habanalabs: modify the return values of hl_read/write routines Oded Gabbay
2020-02-07 8:15 ` [PATCH 5/5] habanalabs: provide historical maximum of various sensors Oded Gabbay
3 siblings, 0 replies; 6+ messages in thread
From: Oded Gabbay @ 2020-02-07 8:15 UTC (permalink / raw)
To: linux-kernel, oshpigelman, ttayar; +Cc: gregkh, Moti Haimovski
From: Moti Haimovski <mhaimovski@habana.ai>
This commit adds support for offsetting the temperatures reading
by a specified value as defined in
https://www.kernel.org/doc/Documentation/hwmon/sysfs-interface
using the standard sysfs defined for hwmon.
This is required by system administrators to inject errors to test
their monitoring applications in data centers.
Signed-off-by: Moti Haimovski <mhaimovski@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
drivers/misc/habanalabs/habanalabs.h | 2 ++
drivers/misc/habanalabs/hwmon.c | 37 ++++++++++++++++++++++
drivers/misc/habanalabs/include/armcp_if.h | 13 +++++++-
3 files changed, 51 insertions(+), 1 deletion(-)
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 4ef8cf23d099..4de12d3ff836 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -1610,6 +1610,8 @@ int hl_pci_set_dma_mask(struct hl_device *hdev, u8 dma_mask);
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr);
+int hl_set_temperature(struct hl_device *hdev,
+ int sensor_index, u32 attr, long value);
long hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr);
long hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr);
long hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr);
diff --git a/drivers/misc/habanalabs/hwmon.c b/drivers/misc/habanalabs/hwmon.c
index 7be4bace9b4f..70088fdb0a5b 100644
--- a/drivers/misc/habanalabs/hwmon.c
+++ b/drivers/misc/habanalabs/hwmon.c
@@ -125,6 +125,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
case hwmon_temp_crit:
case hwmon_temp_max_hyst:
case hwmon_temp_crit_hyst:
+ case hwmon_temp_offset:
break;
default:
return -EINVAL;
@@ -192,6 +193,15 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type,
return -ENODEV;
switch (type) {
+ case hwmon_temp:
+ switch (attr) {
+ case hwmon_temp_offset:
+ break;
+ default:
+ return -EINVAL;
+ }
+ hl_set_temperature(hdev, channel, attr, val);
+ break;
case hwmon_pwm:
switch (attr) {
case hwmon_pwm_input:
@@ -220,6 +230,8 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
case hwmon_temp_crit:
case hwmon_temp_crit_hyst:
return 0444;
+ case hwmon_temp_offset:
+ return 0644;
}
break;
case hwmon_in:
@@ -291,6 +303,31 @@ long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr)
return result;
}
+int hl_set_temperature(struct hl_device *hdev,
+ int sensor_index, u32 attr, long value)
+{
+ struct armcp_packet pkt;
+ int rc;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_SET <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.sensor_index = __cpu_to_le16(sensor_index);
+ pkt.type = __cpu_to_le16(attr);
+ pkt.value = __cpu_to_le64(value);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ SENSORS_PKT_TIMEOUT, NULL);
+
+ if (rc)
+ dev_err(hdev->dev,
+ "Failed to set temperature of sensor %d, error %d\n",
+ sensor_index, rc);
+
+ return rc;
+}
+
long hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr)
{
struct armcp_packet pkt;
diff --git a/drivers/misc/habanalabs/include/armcp_if.h b/drivers/misc/habanalabs/include/armcp_if.h
index e4c6699a1868..014549eaf919 100644
--- a/drivers/misc/habanalabs/include/armcp_if.h
+++ b/drivers/misc/habanalabs/include/armcp_if.h
@@ -189,6 +189,10 @@ enum pq_init_status {
* ArmCP to write to the structure, to prevent data corruption in case of
* mismatched driver/FW versions.
*
+ * ARMCP_PACKET_TEMPERATURE_SET -
+ * Set the value of the offset property of a specified thermal sensor.
+ * The packet's arguments specify the desired sensor and the field to
+ * set.
*/
enum armcp_packet_id {
@@ -214,6 +218,8 @@ enum armcp_packet_id {
ARMCP_PACKET_MAX_POWER_GET, /* sysfs */
ARMCP_PACKET_MAX_POWER_SET, /* sysfs */
ARMCP_PACKET_EEPROM_DATA_GET, /* sysfs */
+ ARMCP_RESERVED,
+ ARMCP_PACKET_TEMPERATURE_SET, /* sysfs */
};
#define ARMCP_PACKET_FENCE_VAL 0xFE8CE7A5
@@ -271,12 +277,17 @@ enum armcp_packet_rc {
armcp_packet_fault
};
+/*
+ * armcp_temp_type should adhere to hwmon_temp_attributes
+ * defined in Linux kernel hwmon.h file
+ */
enum armcp_temp_type {
armcp_temp_input,
armcp_temp_max = 6,
armcp_temp_max_hyst,
armcp_temp_crit,
- armcp_temp_crit_hyst
+ armcp_temp_crit_hyst,
+ armcp_temp_offset = 19
};
enum armcp_in_attributes {
--
2.17.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 4/5] habanalabs: modify the return values of hl_read/write routines
2020-02-07 8:15 [PATCH 1/5] habanalabs: add debugfs write64/read64 Oded Gabbay
2020-02-07 8:15 ` [PATCH 2/5] habanalabs: ratelimit error prints of IRQs Oded Gabbay
2020-02-07 8:15 ` [PATCH 3/5] habanalabs: support temperature offset via sysfs Oded Gabbay
@ 2020-02-07 8:15 ` Oded Gabbay
2020-02-07 8:15 ` [PATCH 5/5] habanalabs: provide historical maximum of various sensors Oded Gabbay
3 siblings, 0 replies; 6+ messages in thread
From: Oded Gabbay @ 2020-02-07 8:15 UTC (permalink / raw)
To: linux-kernel, oshpigelman, ttayar; +Cc: gregkh, Moti Haimovski
From: Moti Haimovski <mhaimovski@habana.ai>
The hl read and write routines implement the hwmon_ops read and write
interface routines respectively.
These routines are expected to return a completion status when called,
which was not the case until this commit.
This commit modifies these routines to return 0 upon success and a
negative error value upon failure.
Signed-off-by: Moti Haimovski <mhaimovski@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
drivers/misc/habanalabs/habanalabs.h | 17 +++++---
drivers/misc/habanalabs/hwmon.c | 63 ++++++++++++++--------------
2 files changed, 43 insertions(+), 37 deletions(-)
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 4de12d3ff836..9472da3ef847 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -1609,13 +1609,18 @@ int hl_pci_set_dma_mask(struct hl_device *hdev, u8 dma_mask);
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
-long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr);
+int hl_get_temperature(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value);
int hl_set_temperature(struct hl_device *hdev,
- int sensor_index, u32 attr, long value);
-long hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr);
-long hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr);
-long hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr);
-long hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr);
+ int sensor_index, u32 attr, long value);
+int hl_get_voltage(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value);
+int hl_get_current(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value);
+int hl_get_fan_speed(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value);
+int hl_get_pwm_info(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value);
void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
long value);
u64 hl_get_max_power(struct hl_device *hdev);
diff --git a/drivers/misc/habanalabs/hwmon.c b/drivers/misc/habanalabs/hwmon.c
index 70088fdb0a5b..3539190b1caa 100644
--- a/drivers/misc/habanalabs/hwmon.c
+++ b/drivers/misc/habanalabs/hwmon.c
@@ -113,6 +113,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
u32 attr, int channel, long *val)
{
struct hl_device *hdev = dev_get_drvdata(dev);
+ int rc;
if (hl_device_disabled_or_in_reset(hdev))
return -ENODEV;
@@ -131,7 +132,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
return -EINVAL;
}
- *val = hl_get_temperature(hdev, channel, attr);
+ rc = hl_get_temperature(hdev, channel, attr, val);
break;
case hwmon_in:
switch (attr) {
@@ -143,7 +144,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
return -EINVAL;
}
- *val = hl_get_voltage(hdev, channel, attr);
+ rc = hl_get_voltage(hdev, channel, attr, val);
break;
case hwmon_curr:
switch (attr) {
@@ -155,7 +156,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
return -EINVAL;
}
- *val = hl_get_current(hdev, channel, attr);
+ rc = hl_get_current(hdev, channel, attr, val);
break;
case hwmon_fan:
switch (attr) {
@@ -166,7 +167,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
default:
return -EINVAL;
}
- *val = hl_get_fan_speed(hdev, channel, attr);
+ rc = hl_get_fan_speed(hdev, channel, attr, val);
break;
case hwmon_pwm:
switch (attr) {
@@ -176,12 +177,12 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
default:
return -EINVAL;
}
- *val = hl_get_pwm_info(hdev, channel, attr);
+ rc = hl_get_pwm_info(hdev, channel, attr, val);
break;
default:
return -EINVAL;
}
- return 0;
+ return rc;
}
static int hl_write(struct device *dev, enum hwmon_sensor_types type,
@@ -277,10 +278,10 @@ static const struct hwmon_ops hl_hwmon_ops = {
.write = hl_write
};
-long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr)
+int hl_get_temperature(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value)
{
struct armcp_packet pkt;
- long result;
int rc;
memset(&pkt, 0, sizeof(pkt));
@@ -291,16 +292,16 @@ long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr)
pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- SENSORS_PKT_TIMEOUT, &result);
+ SENSORS_PKT_TIMEOUT, value);
if (rc) {
dev_err(hdev->dev,
"Failed to get temperature from sensor %d, error %d\n",
sensor_index, rc);
- result = 0;
+ *value = 0;
}
- return result;
+ return rc;
}
int hl_set_temperature(struct hl_device *hdev,
@@ -328,10 +329,10 @@ int hl_set_temperature(struct hl_device *hdev,
return rc;
}
-long hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr)
+int hl_get_voltage(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value)
{
struct armcp_packet pkt;
- long result;
int rc;
memset(&pkt, 0, sizeof(pkt));
@@ -342,22 +343,22 @@ long hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr)
pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- SENSORS_PKT_TIMEOUT, &result);
+ SENSORS_PKT_TIMEOUT, value);
if (rc) {
dev_err(hdev->dev,
"Failed to get voltage from sensor %d, error %d\n",
sensor_index, rc);
- result = 0;
+ *value = 0;
}
- return result;
+ return rc;
}
-long hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr)
+int hl_get_current(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value)
{
struct armcp_packet pkt;
- long result;
int rc;
memset(&pkt, 0, sizeof(pkt));
@@ -368,22 +369,22 @@ long hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr)
pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- SENSORS_PKT_TIMEOUT, &result);
+ SENSORS_PKT_TIMEOUT, value);
if (rc) {
dev_err(hdev->dev,
"Failed to get current from sensor %d, error %d\n",
sensor_index, rc);
- result = 0;
+ *value = 0;
}
- return result;
+ return rc;
}
-long hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr)
+int hl_get_fan_speed(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value)
{
struct armcp_packet pkt;
- long result;
int rc;
memset(&pkt, 0, sizeof(pkt));
@@ -394,22 +395,22 @@ long hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr)
pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- SENSORS_PKT_TIMEOUT, &result);
+ SENSORS_PKT_TIMEOUT, value);
if (rc) {
dev_err(hdev->dev,
"Failed to get fan speed from sensor %d, error %d\n",
sensor_index, rc);
- result = 0;
+ *value = 0;
}
- return result;
+ return rc;
}
-long hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr)
+int hl_get_pwm_info(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value)
{
struct armcp_packet pkt;
- long result;
int rc;
memset(&pkt, 0, sizeof(pkt));
@@ -420,16 +421,16 @@ long hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr)
pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- SENSORS_PKT_TIMEOUT, &result);
+ SENSORS_PKT_TIMEOUT, value);
if (rc) {
dev_err(hdev->dev,
"Failed to get pwm info from sensor %d, error %d\n",
sensor_index, rc);
- result = 0;
+ *value = 0;
}
- return result;
+ return rc;
}
void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
--
2.17.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 5/5] habanalabs: provide historical maximum of various sensors
2020-02-07 8:15 [PATCH 1/5] habanalabs: add debugfs write64/read64 Oded Gabbay
` (2 preceding siblings ...)
2020-02-07 8:15 ` [PATCH 4/5] habanalabs: modify the return values of hl_read/write routines Oded Gabbay
@ 2020-02-07 8:15 ` Oded Gabbay
3 siblings, 0 replies; 6+ messages in thread
From: Oded Gabbay @ 2020-02-07 8:15 UTC (permalink / raw)
To: linux-kernel, oshpigelman, ttayar; +Cc: gregkh, Christine Gharzuzi
From: Christine Gharzuzi <cgharzuzi@habana.ai>
Add support for hwmon_in_highest, hwmon_temp_highest and hwmon_curr_highest
attributes. These attributes retrieve the historical maximum voltage,
temperature and current that were sampled, respectively.
Signed-off-by: Christine Gharzuzi <cgharzuzi@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
drivers/misc/habanalabs/hwmon.c | 6 ++++++
drivers/misc/habanalabs/include/armcp_if.h | 9 ++++++---
2 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/drivers/misc/habanalabs/hwmon.c b/drivers/misc/habanalabs/hwmon.c
index 3539190b1caa..a21a26e07c3b 100644
--- a/drivers/misc/habanalabs/hwmon.c
+++ b/drivers/misc/habanalabs/hwmon.c
@@ -127,6 +127,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
case hwmon_temp_max_hyst:
case hwmon_temp_crit_hyst:
case hwmon_temp_offset:
+ case hwmon_temp_highest:
break;
default:
return -EINVAL;
@@ -139,6 +140,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
case hwmon_in_input:
case hwmon_in_min:
case hwmon_in_max:
+ case hwmon_in_highest:
break;
default:
return -EINVAL;
@@ -151,6 +153,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
case hwmon_curr_input:
case hwmon_curr_min:
case hwmon_curr_max:
+ case hwmon_curr_highest:
break;
default:
return -EINVAL;
@@ -230,6 +233,7 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
case hwmon_temp_max_hyst:
case hwmon_temp_crit:
case hwmon_temp_crit_hyst:
+ case hwmon_temp_highest:
return 0444;
case hwmon_temp_offset:
return 0644;
@@ -240,6 +244,7 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
case hwmon_in_input:
case hwmon_in_min:
case hwmon_in_max:
+ case hwmon_in_highest:
return 0444;
}
break;
@@ -248,6 +253,7 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
case hwmon_curr_input:
case hwmon_curr_min:
case hwmon_curr_max:
+ case hwmon_curr_highest:
return 0444;
}
break;
diff --git a/drivers/misc/habanalabs/include/armcp_if.h b/drivers/misc/habanalabs/include/armcp_if.h
index 014549eaf919..bdd0a4c3a9cf 100644
--- a/drivers/misc/habanalabs/include/armcp_if.h
+++ b/drivers/misc/habanalabs/include/armcp_if.h
@@ -287,19 +287,22 @@ enum armcp_temp_type {
armcp_temp_max_hyst,
armcp_temp_crit,
armcp_temp_crit_hyst,
- armcp_temp_offset = 19
+ armcp_temp_offset = 19,
+ armcp_temp_highest = 22
};
enum armcp_in_attributes {
armcp_in_input,
armcp_in_min,
- armcp_in_max
+ armcp_in_max,
+ armcp_in_highest = 7
};
enum armcp_curr_attributes {
armcp_curr_input,
armcp_curr_min,
- armcp_curr_max
+ armcp_curr_max,
+ armcp_curr_highest = 7
};
enum armcp_fan_attributes {
--
2.17.1
^ permalink raw reply related [flat|nested] 6+ messages in thread