All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/6] drm/amd/powerplay: support hwmon temperature channel labels
@ 2019-04-18  9:02 Evan Quan
       [not found] ` <20190418090302.8963-1-evan.quan-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 13+ messages in thread
From: Evan Quan @ 2019-04-18  9:02 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Evan Quan

Expose temp[1-3]_label hwmon interfaces. While temp2_label
and temp3_label are visible for SOC15 dGPUs only.

Change-Id: I7f1e10c52ec21d272027554cdf6da97103e0be58
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c        | 40 +++++++++++++++++++
 .../gpu/drm/amd/include/kgd_pp_interface.h    |  7 ++++
 2 files changed, 47 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 552127b74f78..c17eb228417e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -120,6 +120,15 @@ static const struct cg_flag_name clocks[] = {
 	{0, NULL},
 };
 
+static const struct hwmon_temp_label {
+	enum PP_HWMON_TEMP channel;
+	const char *label;
+} temp_label[] = {
+	{PP_TEMP_JUNCTION, "junction"},
+	{PP_TEMP_EDGE, "edge"},
+	{PP_TEMP_MEM, "mem"},
+};
+
 void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)
 {
 	if (adev->pm.dpm_enabled) {
@@ -1457,6 +1466,20 @@ static ssize_t amdgpu_hwmon_show_temp_thresh(struct device *dev,
 	return snprintf(buf, PAGE_SIZE, "%d\n", temp);
 }
 
+
+static ssize_t amdgpu_hwmon_show_temp_label(struct device *dev,
+					     struct device_attribute *attr,
+					     char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	int channel = to_sensor_dev_attr(attr)->index;
+
+	if (channel >= PP_TEMP_MAX)
+		return -EINVAL;
+
+	return snprintf(buf, PAGE_SIZE, "%s\n", temp_label[channel].label);
+}
+
 static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
 					    struct device_attribute *attr,
 					    char *buf)
@@ -2026,6 +2049,9 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
  *
  * hwmon interfaces for GPU temperature:
  *
+ * - temp[1-3]_label: temperature channel label
+ *   - temp2_label and temp3_label are supported on SOC15 dGPUs only
+ *
  * - temp1_input: the on die GPU temperature in millidegrees Celsius
  *
  * - temp1_crit: temperature critical max value in millidegrees Celsius
@@ -2081,6 +2107,9 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_JUNCTION);
+static SENSOR_DEVICE_ATTR(temp2_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_EDGE);
+static SENSOR_DEVICE_ATTR(temp3_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_MEM);
 static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0);
 static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0);
 static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0);
@@ -2107,6 +2136,9 @@ static struct attribute *hwmon_attributes[] = {
 	&sensor_dev_attr_temp1_input.dev_attr.attr,
 	&sensor_dev_attr_temp1_crit.dev_attr.attr,
 	&sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
+	&sensor_dev_attr_temp1_label.dev_attr.attr,
+	&sensor_dev_attr_temp2_label.dev_attr.attr,
+	&sensor_dev_attr_temp3_label.dev_attr.attr,
 	&sensor_dev_attr_pwm1.dev_attr.attr,
 	&sensor_dev_attr_pwm1_enable.dev_attr.attr,
 	&sensor_dev_attr_pwm1_min.dev_attr.attr,
@@ -2229,6 +2261,14 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
 	     attr == &sensor_dev_attr_freq2_label.dev_attr.attr))
 		return 0;
 
+	/* only SOC15 dGPUs support edge and mem temperatures */
+	if (((adev->flags & AMD_IS_APU) ||
+	     adev->asic_type < CHIP_VEGA10) &&
+	    (attr == &sensor_dev_attr_temp2_label.dev_attr.attr ||
+	     attr == &sensor_dev_attr_temp3_label.dev_attr.attr))
+		return 0;
+
+
 	return effective_mode;
 }
 
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 2b579ba9b685..17324c0d503e 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -159,6 +159,13 @@ struct pp_states_info {
 	uint32_t states[16];
 };
 
+enum PP_HWMON_TEMP {
+	PP_TEMP_JUNCTION = 0,
+	PP_TEMP_EDGE,
+	PP_TEMP_MEM,
+	PP_TEMP_MAX
+};
+
 #define PP_GROUP_MASK        0xF0000000
 #define PP_GROUP_SHIFT       28
 
-- 
2.21.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/6] drm/amd/powerplay: support edge/memory critical limit values
       [not found] ` <20190418090302.8963-1-evan.quan-5C7GfCeVMHo@public.gmane.org>
@ 2019-04-18  9:02   ` Evan Quan
       [not found]     ` <20190418090302.8963-2-evan.quan-5C7GfCeVMHo@public.gmane.org>
  2019-04-18  9:02   ` [PATCH 3/6] drm/amd/powerplay: support temperature emergency max values Evan Quan
                     ` (4 subsequent siblings)
  5 siblings, 1 reply; 13+ messages in thread
From: Evan Quan @ 2019-04-18  9:02 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Evan Quan

These new interfaces(temp2_crit, temp2_crit_hyst, temp3_crit,
temp3_crit_hyst) are supported on SOC15 dGPUs only.

Change-Id: Ia87e3f6ad816b51d6680eb74c8f755d6c2b0a6ae
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h       |  8 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c        | 51 +++++++++++++++++--
 .../drm/amd/powerplay/hwmgr/hardwaremanager.c | 12 ++++-
 .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c    |  6 +++
 .../drm/amd/powerplay/hwmgr/vega12_hwmgr.c    |  7 +++
 .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c    |  7 +++
 .../gpu/drm/amd/powerplay/inc/power_state.h   |  4 ++
 .../gpu/drm/amd/powerplay/inc/pp_thermal.h    |  8 +--
 8 files changed, 95 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index dca35407879d..e1492438ae7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -75,6 +75,14 @@ struct amdgpu_dpm_thermal {
 	int                min_temp;
 	/* high temperature threshold */
 	int                max_temp;
+	/* edge low temperature threshold */
+	int                min_edge_temp;
+	/* edge high temperature critical threshold */
+	int                max_edge_crit_temp;
+	/* memory low temperature threshold */
+	int                min_mem_temp;
+	/* memory high temperature critical threshold */
+	int                max_mem_crit_temp;
 	/* was last interrupt low to high or high to low */
 	bool               high_to_low;
 	/* interrupt source */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index c17eb228417e..3f6b5b5bb0c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -1466,6 +1466,37 @@ static ssize_t amdgpu_hwmon_show_temp_thresh(struct device *dev,
 	return snprintf(buf, PAGE_SIZE, "%d\n", temp);
 }
 
+static ssize_t amdgpu_hwmon_show_edge_temp_thresh(struct device *dev,
+					     struct device_attribute *attr,
+					     char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	int hyst = to_sensor_dev_attr(attr)->index;
+	int temp;
+
+	if (hyst)
+		temp = adev->pm.dpm.thermal.min_edge_temp;
+	else
+		temp = adev->pm.dpm.thermal.max_edge_crit_temp;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", temp);
+}
+
+static ssize_t amdgpu_hwmon_show_mem_temp_thresh(struct device *dev,
+					     struct device_attribute *attr,
+					     char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	int hyst = to_sensor_dev_attr(attr)->index;
+	int temp;
+
+	if (hyst)
+		temp = adev->pm.dpm.thermal.min_mem_temp;
+	else
+		temp = adev->pm.dpm.thermal.max_mem_crit_temp;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", temp);
+}
 
 static ssize_t amdgpu_hwmon_show_temp_label(struct device *dev,
 					     struct device_attribute *attr,
@@ -2054,9 +2085,11 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
  *
  * - temp1_input: the on die GPU temperature in millidegrees Celsius
  *
- * - temp1_crit: temperature critical max value in millidegrees Celsius
+ * - temp[1-3]_crit: temperature critical max value in millidegrees Celsius
+ *   - temp2_crit and temp3_crit are supported on SOC15 dGPUs only
  *
- * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
+ * - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
+ *   - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only
  *
  * hwmon interfaces for GPU voltage:
  *
@@ -2107,6 +2140,10 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_edge_temp_thresh, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_edge_temp_thresh, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1);
 static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_JUNCTION);
 static SENSOR_DEVICE_ATTR(temp2_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_EDGE);
 static SENSOR_DEVICE_ATTR(temp3_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_MEM);
@@ -2136,6 +2173,10 @@ static struct attribute *hwmon_attributes[] = {
 	&sensor_dev_attr_temp1_input.dev_attr.attr,
 	&sensor_dev_attr_temp1_crit.dev_attr.attr,
 	&sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
+	&sensor_dev_attr_temp2_crit.dev_attr.attr,
+	&sensor_dev_attr_temp2_crit_hyst.dev_attr.attr,
+	&sensor_dev_attr_temp3_crit.dev_attr.attr,
+	&sensor_dev_attr_temp3_crit_hyst.dev_attr.attr,
 	&sensor_dev_attr_temp1_label.dev_attr.attr,
 	&sensor_dev_attr_temp2_label.dev_attr.attr,
 	&sensor_dev_attr_temp3_label.dev_attr.attr,
@@ -2265,7 +2306,11 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
 	if (((adev->flags & AMD_IS_APU) ||
 	     adev->asic_type < CHIP_VEGA10) &&
 	    (attr == &sensor_dev_attr_temp2_label.dev_attr.attr ||
-	     attr == &sensor_dev_attr_temp3_label.dev_attr.attr))
+	     attr == &sensor_dev_attr_temp3_label.dev_attr.attr ||
+	     attr == &sensor_dev_attr_temp2_crit.dev_attr.attr ||
+	     attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr ||
+	     attr == &sensor_dev_attr_temp3_crit.dev_attr.attr ||
+	     attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr))
 		return 0;
 
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index 70f7f47a2fcf..c8397b20f71a 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -225,7 +225,13 @@ int phm_register_irq_handlers(struct pp_hwmgr *hwmgr)
 int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
 {
 	int ret = 0;
-	struct PP_TemperatureRange range = {TEMP_RANGE_MIN, TEMP_RANGE_MAX};
+	struct PP_TemperatureRange range = {
+		TEMP_RANGE_MIN,
+		TEMP_RANGE_MAX,
+		TEMP_RANGE_MIN,
+		TEMP_RANGE_MAX,
+		TEMP_RANGE_MIN,
+		TEMP_RANGE_MAX};
 	struct amdgpu_device *adev = hwmgr->adev;
 
 	if (hwmgr->hwmgr_func->get_thermal_temperature_range)
@@ -239,6 +245,10 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
 
 	adev->pm.dpm.thermal.min_temp = range.min;
 	adev->pm.dpm.thermal.max_temp = range.max;
+	adev->pm.dpm.thermal.min_edge_temp = range.edge_min;
+	adev->pm.dpm.thermal.max_edge_crit_temp = range.edge_crit_max;
+	adev->pm.dpm.thermal.min_mem_temp = range.mem_min;
+	adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max;
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 384c37875cd0..65aa7e70d7d9 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -4854,11 +4854,17 @@ static int vega10_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
 {
 	struct phm_ppt_v2_information *table_info =
 			(struct phm_ppt_v2_information *)hwmgr->pptable;
+	struct vega10_hwmgr *data = hwmgr->backend;
+	PPTable_t *pp_table = &(data->smc_state_table.pp_table);
 
 	memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
 
 	thermal_data->max = table_info->tdp_table->usSoftwareShutdownTemp *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->edge_crit_max = pp_table->TedgeLimit *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->mem_crit_max = pp_table->ThbmLimit *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
index 707cd4b0357f..c0994851e7c7 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
@@ -2528,11 +2528,18 @@ static int vega12_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
 {
 	struct phm_ppt_v3_information *pptable_information =
 		(struct phm_ppt_v3_information *)hwmgr->pptable;
+	struct vega12_hwmgr *data =
+			(struct vega12_hwmgr *)(hwmgr->backend);
+	PPTable_t *pp_table = &(data->smc_state_table.pp_table);
 
 	memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
 
 	thermal_data->max = pptable_information->us_software_shutdown_temp *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->edge_crit_max = pp_table->TedgeLimit *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->mem_crit_max = pp_table->ThbmLimit *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 39a547084e90..5d9aa0f22c86 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -3965,11 +3965,18 @@ static int vega20_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
 {
 	struct phm_ppt_v3_information *pptable_information =
 		(struct phm_ppt_v3_information *)hwmgr->pptable;
+	struct vega20_hwmgr *data =
+			(struct vega20_hwmgr *)(hwmgr->backend);
+	PPTable_t *pp_table = &(data->smc_state_table.pp_table);
 
 	memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
 
 	thermal_data->max = pptable_information->us_software_shutdown_temp *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->edge_crit_max = pp_table->TedgeLimit *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->mem_crit_max = pp_table->ThbmLimit *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/inc/power_state.h b/drivers/gpu/drm/amd/powerplay/inc/power_state.h
index a99b5cbb113e..c102415ddc98 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/power_state.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/power_state.h
@@ -124,6 +124,10 @@ struct PP_StateSoftwareAlgorithmBlock {
 struct PP_TemperatureRange {
 	int min;
 	int max;
+	int edge_min;
+	int edge_crit_max;
+	int mem_min;
+	int mem_crit_max;
 };
 
 struct PP_StateValidationBlock {
diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
index 201d2b6329ab..75a0a2f8bea2 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
@@ -27,14 +27,14 @@
 
 static const struct PP_TemperatureRange SMU7ThermalWithDelayPolicy[] =
 {
-	{-273150,  99000},
-	{ 120000, 120000},
+	{-273150,  99000, -273150, 99000, -273150, 99000},
+	{ 120000, 120000, 120000, 120000, 120000, 120000},
 };
 
 static const struct PP_TemperatureRange SMU7ThermalPolicy[] =
 {
-	{-273150,  99000},
-	{ 120000, 120000},
+	{-273150,  99000, -273150, 99000, -273150, 99000},
+	{ 120000, 120000, 120000, 120000, 120000, 120000},
 };
 
 #endif
-- 
2.21.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 3/6] drm/amd/powerplay: support temperature emergency max values
       [not found] ` <20190418090302.8963-1-evan.quan-5C7GfCeVMHo@public.gmane.org>
  2019-04-18  9:02   ` [PATCH 2/6] drm/amd/powerplay: support edge/memory critical limit values Evan Quan
@ 2019-04-18  9:02   ` Evan Quan
       [not found]     ` <20190418090302.8963-3-evan.quan-5C7GfCeVMHo@public.gmane.org>
  2019-04-18  9:03   ` [PATCH 4/6] drm/amd/powerplay: support SMU metrics table on Vega12 Evan Quan
                     ` (3 subsequent siblings)
  5 siblings, 1 reply; 13+ messages in thread
From: Evan Quan @ 2019-04-18  9:02 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Evan Quan

These new interfaces(temp1_emergency, temp2_emergency,
temp3_emergency) are supported on SOC15 dGPUs only.

Change-Id: I2552df63f9c8c50294b3940bb2a402217673c2bc
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h       |  6 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c        | 40 ++++++++++++++++++-
 .../drm/amd/powerplay/hwmgr/hardwaremanager.c |  6 +++
 .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c    |  6 +++
 .../drm/amd/powerplay/hwmgr/vega12_hwmgr.c    |  6 +++
 .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c    |  6 +++
 .../gpu/drm/amd/powerplay/inc/power_state.h   |  3 ++
 .../gpu/drm/amd/powerplay/inc/pp_thermal.h    | 12 ++++--
 8 files changed, 80 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index e1492438ae7b..32e2def42f30 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -75,14 +75,20 @@ struct amdgpu_dpm_thermal {
 	int                min_temp;
 	/* high temperature threshold */
 	int                max_temp;
+	/* hotspot max emergency(shutdown) temp */
+	int                max_hotspot_emergency_temp;
 	/* edge low temperature threshold */
 	int                min_edge_temp;
 	/* edge high temperature critical threshold */
 	int                max_edge_crit_temp;
+	/* edge max emergency(shutdown) temp */
+	int                max_edge_emergency_temp;
 	/* memory low temperature threshold */
 	int                min_mem_temp;
 	/* memory high temperature critical threshold */
 	int                max_mem_crit_temp;
+	/* memory max emergency(shutdown) temp */
+	int                max_mem_emergency_temp;
 	/* was last interrupt low to high or high to low */
 	bool               high_to_low;
 	/* interrupt source */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 3f6b5b5bb0c6..be33144e2dca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -1511,6 +1511,32 @@ static ssize_t amdgpu_hwmon_show_temp_label(struct device *dev,
 	return snprintf(buf, PAGE_SIZE, "%s\n", temp_label[channel].label);
 }
 
+static ssize_t amdgpu_hwmon_show_temp_emergency(struct device *dev,
+					     struct device_attribute *attr,
+					     char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	int channel = to_sensor_dev_attr(attr)->index;
+	int temp;
+
+	if (channel >= PP_TEMP_MAX)
+		return -EINVAL;
+
+	switch (channel) {
+	case PP_TEMP_JUNCTION:
+		temp = adev->pm.dpm.thermal.max_hotspot_emergency_temp;
+		break;
+	case PP_TEMP_EDGE:
+		temp = adev->pm.dpm.thermal.max_edge_emergency_temp;
+		break;
+	case PP_TEMP_MEM:
+		temp = adev->pm.dpm.thermal.max_mem_emergency_temp;
+		break;
+	}
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", temp);
+}
+
 static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
 					    struct device_attribute *attr,
 					    char *buf)
@@ -2091,6 +2117,9 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
  * - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
  *   - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only
  *
+ * - temp[1-3]_emergency: temperature emergency max value(asic shutdown) in millidegrees Celsius
+ *   - these are supported on SOC15 dGPUs only
+ *
  * hwmon interfaces for GPU voltage:
  *
  * - in0_input: the voltage on the GPU in millivolts
@@ -2140,10 +2169,13 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION);
 static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_edge_temp_thresh, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_edge_temp_thresh, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE);
 static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM);
 static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_JUNCTION);
 static SENSOR_DEVICE_ATTR(temp2_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_EDGE);
 static SENSOR_DEVICE_ATTR(temp3_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_MEM);
@@ -2180,6 +2212,9 @@ static struct attribute *hwmon_attributes[] = {
 	&sensor_dev_attr_temp1_label.dev_attr.attr,
 	&sensor_dev_attr_temp2_label.dev_attr.attr,
 	&sensor_dev_attr_temp3_label.dev_attr.attr,
+	&sensor_dev_attr_temp1_emergency.dev_attr.attr,
+	&sensor_dev_attr_temp2_emergency.dev_attr.attr,
+	&sensor_dev_attr_temp3_emergency.dev_attr.attr,
 	&sensor_dev_attr_pwm1.dev_attr.attr,
 	&sensor_dev_attr_pwm1_enable.dev_attr.attr,
 	&sensor_dev_attr_pwm1_min.dev_attr.attr,
@@ -2310,7 +2345,10 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
 	     attr == &sensor_dev_attr_temp2_crit.dev_attr.attr ||
 	     attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr ||
 	     attr == &sensor_dev_attr_temp3_crit.dev_attr.attr ||
-	     attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr))
+	     attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr ||
+	     attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr ||
+	     attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr ||
+	     attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr))
 		return 0;
 
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index c8397b20f71a..12027b194a4a 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -228,9 +228,12 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
 	struct PP_TemperatureRange range = {
 		TEMP_RANGE_MIN,
 		TEMP_RANGE_MAX,
+		TEMP_RANGE_MAX,
 		TEMP_RANGE_MIN,
 		TEMP_RANGE_MAX,
+		TEMP_RANGE_MAX,
 		TEMP_RANGE_MIN,
+		TEMP_RANGE_MAX,
 		TEMP_RANGE_MAX};
 	struct amdgpu_device *adev = hwmgr->adev;
 
@@ -245,10 +248,13 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
 
 	adev->pm.dpm.thermal.min_temp = range.min;
 	adev->pm.dpm.thermal.max_temp = range.max;
+	adev->pm.dpm.thermal.max_hotspot_emergency_temp = range.hotspot_emergency_max;
 	adev->pm.dpm.thermal.min_edge_temp = range.edge_min;
 	adev->pm.dpm.thermal.max_edge_crit_temp = range.edge_crit_max;
+	adev->pm.dpm.thermal.max_edge_emergency_temp = range.edge_emergency_max;
 	adev->pm.dpm.thermal.min_mem_temp = range.mem_min;
 	adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max;
+	adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max;
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 65aa7e70d7d9..1d78a5ee9523 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -4861,10 +4861,16 @@ static int vega10_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
 
 	thermal_data->max = table_info->tdp_table->usSoftwareShutdownTemp *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 	thermal_data->edge_crit_max = pp_table->TedgeLimit *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 	thermal_data->mem_crit_max = pp_table->ThbmLimit *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
index c0994851e7c7..a283046347c9 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
@@ -2536,10 +2536,16 @@ static int vega12_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
 
 	thermal_data->max = pptable_information->us_software_shutdown_temp *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 	thermal_data->edge_crit_max = pp_table->TedgeLimit *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 	thermal_data->mem_crit_max = pp_table->ThbmLimit *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 5d9aa0f22c86..0c0714862eb8 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -3973,10 +3973,16 @@ static int vega20_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
 
 	thermal_data->max = pptable_information->us_software_shutdown_temp *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 	thermal_data->edge_crit_max = pp_table->TedgeLimit *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 	thermal_data->mem_crit_max = pp_table->ThbmLimit *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/inc/power_state.h b/drivers/gpu/drm/amd/powerplay/inc/power_state.h
index c102415ddc98..48d6c4b9fa61 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/power_state.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/power_state.h
@@ -124,10 +124,13 @@ struct PP_StateSoftwareAlgorithmBlock {
 struct PP_TemperatureRange {
 	int min;
 	int max;
+	int hotspot_emergency_max;
 	int edge_min;
 	int edge_crit_max;
+	int edge_emergency_max;
 	int mem_min;
 	int mem_crit_max;
+	int mem_emergency_max;
 };
 
 struct PP_StateValidationBlock {
diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
index 75a0a2f8bea2..3e30768f9e1c 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
@@ -27,14 +27,18 @@
 
 static const struct PP_TemperatureRange SMU7ThermalWithDelayPolicy[] =
 {
-	{-273150,  99000, -273150, 99000, -273150, 99000},
-	{ 120000, 120000, 120000, 120000, 120000, 120000},
+	{-273150,  99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000},
+	{ 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000},
 };
 
 static const struct PP_TemperatureRange SMU7ThermalPolicy[] =
 {
-	{-273150,  99000, -273150, 99000, -273150, 99000},
-	{ 120000, 120000, 120000, 120000, 120000, 120000},
+	{-273150,  99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000},
+	{ 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000},
 };
 
+#define CTF_OFFSET_EDGE			5
+#define CTF_OFFSET_HOTSPOT		5
+#define CTF_OFFSET_HBM			5
+
 #endif
-- 
2.21.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 4/6] drm/amd/powerplay: support SMU metrics table on Vega12
       [not found] ` <20190418090302.8963-1-evan.quan-5C7GfCeVMHo@public.gmane.org>
  2019-04-18  9:02   ` [PATCH 2/6] drm/amd/powerplay: support edge/memory critical limit values Evan Quan
  2019-04-18  9:02   ` [PATCH 3/6] drm/amd/powerplay: support temperature emergency max values Evan Quan
@ 2019-04-18  9:03   ` Evan Quan
       [not found]     ` <20190418090302.8963-4-evan.quan-5C7GfCeVMHo@public.gmane.org>
  2019-04-18  9:03   ` [PATCH 5/6] drm/amd/powerplay: expose current edge and memory temperatures Evan Quan
                     ` (2 subsequent siblings)
  5 siblings, 1 reply; 13+ messages in thread
From: Evan Quan @ 2019-04-18  9:03 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Evan Quan

That should provide some necessary sensor information.

Change-Id: I898371cef06795c5369a14c4dd3fe8717959d81a
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
 .../drm/amd/powerplay/hwmgr/vega12_hwmgr.c    | 21 +++++++++++++++++++
 .../drm/amd/powerplay/hwmgr/vega12_hwmgr.h    |  3 +++
 .../drm/amd/powerplay/smumgr/vega12_smumgr.c  | 21 +++++++++++++++++++
 3 files changed, 45 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
index a283046347c9..695ac2875540 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
@@ -1237,6 +1237,27 @@ static uint32_t vega12_dpm_get_mclk(struct pp_hwmgr *hwmgr, bool low)
 	return (mem_clk * 100);
 }
 
+static int vega12_get_metrics_table(struct pp_hwmgr *hwmgr, SmuMetrics_t *metrics_table)
+{
+	struct vega12_hwmgr *data =
+			(struct vega12_hwmgr *)(hwmgr->backend);
+	int ret = 0;
+
+	if (!data->metrics_time || time_after(jiffies, data->metrics_time + HZ / 2)) {
+		ret = smum_smc_table_manager(hwmgr, (uint8_t *)metrics_table,
+				TABLE_SMU_METRICS, true);
+		if (ret) {
+			pr_info("Failed to export SMU metrics table!\n");
+			return ret;
+		}
+		memcpy(&data->metrics_table, metrics_table, sizeof(SmuMetrics_t));
+		data->metrics_time = jiffies;
+	} else
+		memcpy(metrics_table, &data->metrics_table, sizeof(SmuMetrics_t));
+
+	return ret;
+}
+
 static int vega12_get_gpu_power(struct pp_hwmgr *hwmgr, uint32_t *query)
 {
 #if 0
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h
index b3e424d28994..73875399666a 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h
@@ -396,6 +396,9 @@ struct vega12_hwmgr {
 
 	/* ---- Gfxoff ---- */
 	bool                           gfxoff_controlled_by_driver;
+
+	unsigned long                  metrics_time;
+	SmuMetrics_t                   metrics_table;
 };
 
 #define VEGA12_DPM2_NEAR_TDP_DEC                      10
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
index ddb801517667..1eaf0fa28ef7 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
@@ -287,8 +287,26 @@ static int vega12_smu_init(struct pp_hwmgr *hwmgr)
 	priv->smu_tables.entry[TABLE_OVERDRIVE].version = 0x01;
 	priv->smu_tables.entry[TABLE_OVERDRIVE].size = sizeof(OverDriveTable_t);
 
+	/* allocate space for SMU_METRICS table */
+	ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev,
+				      sizeof(SmuMetrics_t),
+				      PAGE_SIZE,
+				      AMDGPU_GEM_DOMAIN_VRAM,
+				      &priv->smu_tables.entry[TABLE_SMU_METRICS].handle,
+				      &priv->smu_tables.entry[TABLE_SMU_METRICS].mc_addr,
+				      &priv->smu_tables.entry[TABLE_SMU_METRICS].table);
+	if (ret)
+		goto err4;
+
+	priv->smu_tables.entry[TABLE_SMU_METRICS].version = 0x01;
+	priv->smu_tables.entry[TABLE_SMU_METRICS].size = sizeof(SmuMetrics_t);
+
 	return 0;
 
+err4:
+	amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_OVERDRIVE].handle,
+				&priv->smu_tables.entry[TABLE_OVERDRIVE].mc_addr,
+				&priv->smu_tables.entry[TABLE_OVERDRIVE].table);
 err3:
 	amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_AVFS_FUSE_OVERRIDE].handle,
 				&priv->smu_tables.entry[TABLE_AVFS_FUSE_OVERRIDE].mc_addr,
@@ -334,6 +352,9 @@ static int vega12_smu_fini(struct pp_hwmgr *hwmgr)
 		amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_OVERDRIVE].handle,
 				      &priv->smu_tables.entry[TABLE_OVERDRIVE].mc_addr,
 				      &priv->smu_tables.entry[TABLE_OVERDRIVE].table);
+		amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_SMU_METRICS].handle,
+				      &priv->smu_tables.entry[TABLE_SMU_METRICS].mc_addr,
+				      &priv->smu_tables.entry[TABLE_SMU_METRICS].table);
 		kfree(hwmgr->smu_backend);
 		hwmgr->smu_backend = NULL;
 	}
-- 
2.21.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 5/6] drm/amd/powerplay: expose current edge and memory temperatures
       [not found] ` <20190418090302.8963-1-evan.quan-5C7GfCeVMHo@public.gmane.org>
                     ` (2 preceding siblings ...)
  2019-04-18  9:03   ` [PATCH 4/6] drm/amd/powerplay: support SMU metrics table on Vega12 Evan Quan
@ 2019-04-18  9:03   ` Evan Quan
       [not found]     ` <20190418090302.8963-5-evan.quan-5C7GfCeVMHo@public.gmane.org>
  2019-04-18  9:03   ` [PATCH 6/6] drm/amd/powerplay: correct SOC15 hotspot temperature critical max Evan Quan
  2019-04-19 15:08   ` [PATCH 1/6] drm/amd/powerplay: support hwmon temperature channel labels Alex Deucher
  5 siblings, 1 reply; 13+ messages in thread
From: Evan Quan @ 2019-04-18  9:03 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Evan Quan

Two new hwmon interfaces(temp2_input and temp3_input) are added.
They are supported on SOC15 dGPUs only.

Change-Id: I935c512bd38e080fb8b6e3164c5e5294baff4e91
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c        | 45 +++++++++++++++----
 .../gpu/drm/amd/include/kgd_pp_interface.h    |  2 +
 .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c    | 12 +++++
 .../drm/amd/powerplay/hwmgr/vega12_hwmgr.c    | 19 ++++++++
 .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c    | 18 ++++++++
 5 files changed, 88 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index be33144e2dca..1007307845d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -1434,6 +1434,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
 	struct drm_device *ddev = adev->ddev;
+	int channel = to_sensor_dev_attr(attr)->index;
 	int r, temp, size = sizeof(temp);
 
 	/* Can't get temperature when the card is off */
@@ -1441,11 +1442,32 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
 	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
 		return -EINVAL;
 
-	/* get the temperature */
-	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP,
-				   (void *)&temp, &size);
-	if (r)
-		return r;
+	if (channel >= PP_TEMP_MAX)
+		return -EINVAL;
+
+	switch (channel) {
+	case PP_TEMP_JUNCTION:
+		/* get current junction temperature */
+		r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP,
+					   (void *)&temp, &size);
+		if (r)
+			return r;
+		break;
+	case PP_TEMP_EDGE:
+		/* get current edge temperature */
+		r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP,
+					   (void *)&temp, &size);
+		if (r)
+			return r;
+		break;
+	case PP_TEMP_MEM:
+		/* get current memory temperature */
+		r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP,
+					   (void *)&temp, &size);
+		if (r)
+			return r;
+		break;
+	}
 
 	return snprintf(buf, PAGE_SIZE, "%d\n", temp);
 }
@@ -2109,7 +2131,8 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
  * - temp[1-3]_label: temperature channel label
  *   - temp2_label and temp3_label are supported on SOC15 dGPUs only
  *
- * - temp1_input: the on die GPU temperature in millidegrees Celsius
+ * - temp[1-3]_input: the on die GPU temperature in millidegrees Celsius
+ *   - temp2_input and temp3_input are supported on SOC15 dGPUs only
  *
  * - temp[1-3]_crit: temperature critical max value in millidegrees Celsius
  *   - temp2_crit and temp3_crit are supported on SOC15 dGPUs only
@@ -2166,13 +2189,15 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
  *
  */
 
-static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_JUNCTION);
 static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
 static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION);
+static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_EDGE);
 static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_edge_temp_thresh, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_edge_temp_thresh, NULL, 1);
 static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE);
+static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_MEM);
 static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1);
 static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM);
@@ -2205,8 +2230,10 @@ static struct attribute *hwmon_attributes[] = {
 	&sensor_dev_attr_temp1_input.dev_attr.attr,
 	&sensor_dev_attr_temp1_crit.dev_attr.attr,
 	&sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
+	&sensor_dev_attr_temp2_input.dev_attr.attr,
 	&sensor_dev_attr_temp2_crit.dev_attr.attr,
 	&sensor_dev_attr_temp2_crit_hyst.dev_attr.attr,
+	&sensor_dev_attr_temp3_input.dev_attr.attr,
 	&sensor_dev_attr_temp3_crit.dev_attr.attr,
 	&sensor_dev_attr_temp3_crit_hyst.dev_attr.attr,
 	&sensor_dev_attr_temp1_label.dev_attr.attr,
@@ -2348,7 +2375,9 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
 	     attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr ||
 	     attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr ||
 	     attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr ||
-	     attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr))
+	     attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr ||
+	     attr == &sensor_dev_attr_temp2_input.dev_attr.attr ||
+	     attr == &sensor_dev_attr_temp3_input.dev_attr.attr))
 		return 0;
 
 
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 17324c0d503e..19713ffdb03e 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -111,6 +111,8 @@ enum amd_pp_sensors {
 	AMDGPU_PP_SENSOR_GPU_LOAD,
 	AMDGPU_PP_SENSOR_GFX_MCLK,
 	AMDGPU_PP_SENSOR_GPU_TEMP,
+	AMDGPU_PP_SENSOR_EDGE_TEMP,
+	AMDGPU_PP_SENSOR_MEM_TEMP,
 	AMDGPU_PP_SENSOR_VCE_POWER,
 	AMDGPU_PP_SENSOR_UVD_POWER,
 	AMDGPU_PP_SENSOR_GPU_POWER,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 1d78a5ee9523..f4ecbbe854ee 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -3785,6 +3785,18 @@ static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 		*((uint32_t *)value) = vega10_thermal_get_temperature(hwmgr);
 		*size = 4;
 		break;
+	case AMDGPU_PP_SENSOR_EDGE_TEMP:
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureEdge);
+		*((uint32_t *)value) = smum_get_argument(hwmgr) *
+			PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+		*size = 4;
+		break;
+	case AMDGPU_PP_SENSOR_MEM_TEMP:
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureHBM);
+		*((uint32_t *)value) = smum_get_argument(hwmgr) *
+			PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+		*size = 4;
+		break;
 	case AMDGPU_PP_SENSOR_UVD_POWER:
 		*((uint32_t *)value) = data->uvd_power_gated ? 0 : 1;
 		*size = 4;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
index 695ac2875540..86c48cb56f6c 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
@@ -1338,6 +1338,7 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 			      void *value, int *size)
 {
 	struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend);
+	SmuMetrics_t metrics_table;
 	int ret = 0;
 
 	switch (idx) {
@@ -1360,6 +1361,24 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 		*((uint32_t *)value) = vega12_thermal_get_temperature(hwmgr);
 		*size = 4;
 		break;
+	case AMDGPU_PP_SENSOR_EDGE_TEMP:
+		ret = vega12_get_metrics_table(hwmgr, &metrics_table);
+		if (ret)
+			return ret;
+
+		*((uint32_t *)value) = metrics_table.TemperatureEdge *
+			PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+		*size = 4;
+		break;
+	case AMDGPU_PP_SENSOR_MEM_TEMP:
+		ret = vega12_get_metrics_table(hwmgr, &metrics_table);
+		if (ret)
+			return ret;
+
+		*((uint32_t *)value) = metrics_table.TemperatureHBM *
+			PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+		*size = 4;
+		break;
 	case AMDGPU_PP_SENSOR_UVD_POWER:
 		*((uint32_t *)value) = data->uvd_power_gated ? 0 : 1;
 		*size = 4;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 0c0714862eb8..72a71a002f0b 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -2142,6 +2142,24 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 		*((uint32_t *)value) = vega20_thermal_get_temperature(hwmgr);
 		*size = 4;
 		break;
+	case AMDGPU_PP_SENSOR_EDGE_TEMP:
+		ret = vega20_get_metrics_table(hwmgr, &metrics_table);
+		if (ret)
+			return ret;
+
+		*((uint32_t *)value) = metrics_table.TemperatureEdge *
+			PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+		*size = 4;
+		break;
+	case AMDGPU_PP_SENSOR_MEM_TEMP:
+		ret = vega20_get_metrics_table(hwmgr, &metrics_table);
+		if (ret)
+			return ret;
+
+		*((uint32_t *)value) = metrics_table.TemperatureHBM *
+			PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+		*size = 4;
+		break;
 	case AMDGPU_PP_SENSOR_UVD_POWER:
 		*((uint32_t *)value) = data->uvd_power_gated ? 0 : 1;
 		*size = 4;
-- 
2.21.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 6/6] drm/amd/powerplay: correct SOC15 hotspot temperature critical max
       [not found] ` <20190418090302.8963-1-evan.quan-5C7GfCeVMHo@public.gmane.org>
                     ` (3 preceding siblings ...)
  2019-04-18  9:03   ` [PATCH 5/6] drm/amd/powerplay: expose current edge and memory temperatures Evan Quan
@ 2019-04-18  9:03   ` Evan Quan
       [not found]     ` <20190418090302.8963-6-evan.quan-5C7GfCeVMHo@public.gmane.org>
  2019-04-19 15:08   ` [PATCH 1/6] drm/amd/powerplay: support hwmon temperature channel labels Alex Deucher
  5 siblings, 1 reply; 13+ messages in thread
From: Evan Quan @ 2019-04-18  9:03 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Evan Quan

Correct Vega10, Vega12 and Vega20 hotspot temperature critical max
values.

Change-Id: I77bb77761e8530066ec4f3225f8555cf8f672348
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 2 +-
 drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c | 2 +-
 drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index f4ecbbe854ee..efd9947eb723 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -4871,7 +4871,7 @@ static int vega10_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
 
 	memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
 
-	thermal_data->max = table_info->tdp_table->usSoftwareShutdownTemp *
+	thermal_data->max = pp_table->ThotspotLimit *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 	thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
index 86c48cb56f6c..ba35118a35b8 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
@@ -2574,7 +2574,7 @@ static int vega12_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
 
 	memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
 
-	thermal_data->max = pptable_information->us_software_shutdown_temp *
+	thermal_data->max = pp_table->ThotspotLimit *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 	thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 72a71a002f0b..8dcd04561e8f 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -3989,7 +3989,7 @@ static int vega20_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
 
 	memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
 
-	thermal_data->max = pptable_information->us_software_shutdown_temp *
+	thermal_data->max = pp_table->ThotspotLimit *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 	thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
-- 
2.21.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/6] drm/amd/powerplay: support hwmon temperature channel labels
       [not found] ` <20190418090302.8963-1-evan.quan-5C7GfCeVMHo@public.gmane.org>
                     ` (4 preceding siblings ...)
  2019-04-18  9:03   ` [PATCH 6/6] drm/amd/powerplay: correct SOC15 hotspot temperature critical max Evan Quan
@ 2019-04-19 15:08   ` Alex Deucher
       [not found]     ` <CADnq5_PU-E_bgMNq5_ZokpXrKjxxE33uuhNYfqY2ggBOeePkhg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  5 siblings, 1 reply; 13+ messages in thread
From: Alex Deucher @ 2019-04-19 15:08 UTC (permalink / raw)
  To: Evan Quan; +Cc: amd-gfx list

On Thu, Apr 18, 2019 at 5:03 AM Evan Quan <evan.quan@amd.com> wrote:
>
> Expose temp[1-3]_label hwmon interfaces. While temp2_label
> and temp3_label are visible for SOC15 dGPUs only.
>
> Change-Id: I7f1e10c52ec21d272027554cdf6da97103e0be58
> Signed-off-by: Evan Quan <evan.quan@amd.com>

I'd suggest making this one last in the series since otherwise we'll
have labels without temps for a few commits.
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c        | 40 +++++++++++++++++++
>  .../gpu/drm/amd/include/kgd_pp_interface.h    |  7 ++++
>  2 files changed, 47 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> index 552127b74f78..c17eb228417e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> @@ -120,6 +120,15 @@ static const struct cg_flag_name clocks[] = {
>         {0, NULL},
>  };
>
> +static const struct hwmon_temp_label {
> +       enum PP_HWMON_TEMP channel;
> +       const char *label;
> +} temp_label[] = {
> +       {PP_TEMP_JUNCTION, "junction"},
> +       {PP_TEMP_EDGE, "edge"},
> +       {PP_TEMP_MEM, "mem"},
> +};
> +
>  void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)
>  {
>         if (adev->pm.dpm_enabled) {
> @@ -1457,6 +1466,20 @@ static ssize_t amdgpu_hwmon_show_temp_thresh(struct device *dev,
>         return snprintf(buf, PAGE_SIZE, "%d\n", temp);
>  }
>
> +
> +static ssize_t amdgpu_hwmon_show_temp_label(struct device *dev,
> +                                            struct device_attribute *attr,
> +                                            char *buf)
> +{
> +       struct amdgpu_device *adev = dev_get_drvdata(dev);
> +       int channel = to_sensor_dev_attr(attr)->index;
> +
> +       if (channel >= PP_TEMP_MAX)
> +               return -EINVAL;
> +
> +       return snprintf(buf, PAGE_SIZE, "%s\n", temp_label[channel].label);
> +}
> +
>  static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
>                                             struct device_attribute *attr,
>                                             char *buf)
> @@ -2026,6 +2049,9 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
>   *
>   * hwmon interfaces for GPU temperature:
>   *
> + * - temp[1-3]_label: temperature channel label
> + *   - temp2_label and temp3_label are supported on SOC15 dGPUs only
> + *
>   * - temp1_input: the on die GPU temperature in millidegrees Celsius
>   *
>   * - temp1_crit: temperature critical max value in millidegrees Celsius
> @@ -2081,6 +2107,9 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
>  static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
>  static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
>  static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
> +static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_JUNCTION);
> +static SENSOR_DEVICE_ATTR(temp2_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_EDGE);
> +static SENSOR_DEVICE_ATTR(temp3_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_MEM);
>  static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0);
>  static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0);
>  static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0);
> @@ -2107,6 +2136,9 @@ static struct attribute *hwmon_attributes[] = {
>         &sensor_dev_attr_temp1_input.dev_attr.attr,
>         &sensor_dev_attr_temp1_crit.dev_attr.attr,
>         &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
> +       &sensor_dev_attr_temp1_label.dev_attr.attr,
> +       &sensor_dev_attr_temp2_label.dev_attr.attr,
> +       &sensor_dev_attr_temp3_label.dev_attr.attr,
>         &sensor_dev_attr_pwm1.dev_attr.attr,
>         &sensor_dev_attr_pwm1_enable.dev_attr.attr,
>         &sensor_dev_attr_pwm1_min.dev_attr.attr,
> @@ -2229,6 +2261,14 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
>              attr == &sensor_dev_attr_freq2_label.dev_attr.attr))
>                 return 0;
>
> +       /* only SOC15 dGPUs support edge and mem temperatures */
> +       if (((adev->flags & AMD_IS_APU) ||
> +            adev->asic_type < CHIP_VEGA10) &&
> +           (attr == &sensor_dev_attr_temp2_label.dev_attr.attr ||
> +            attr == &sensor_dev_attr_temp3_label.dev_attr.attr))
> +               return 0;
> +
> +
>         return effective_mode;
>  }
>
> diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> index 2b579ba9b685..17324c0d503e 100644
> --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> @@ -159,6 +159,13 @@ struct pp_states_info {
>         uint32_t states[16];
>  };
>
> +enum PP_HWMON_TEMP {
> +       PP_TEMP_JUNCTION = 0,
> +       PP_TEMP_EDGE,
> +       PP_TEMP_MEM,
> +       PP_TEMP_MAX
> +};
> +
>  #define PP_GROUP_MASK        0xF0000000
>  #define PP_GROUP_SHIFT       28
>
> --
> 2.21.0
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/6] drm/amd/powerplay: support edge/memory critical limit values
       [not found]     ` <20190418090302.8963-2-evan.quan-5C7GfCeVMHo@public.gmane.org>
@ 2019-04-19 15:09       ` Alex Deucher
  0 siblings, 0 replies; 13+ messages in thread
From: Alex Deucher @ 2019-04-19 15:09 UTC (permalink / raw)
  To: Evan Quan; +Cc: amd-gfx list

On Thu, Apr 18, 2019 at 5:03 AM Evan Quan <evan.quan@amd.com> wrote:
>
> These new interfaces(temp2_crit, temp2_crit_hyst, temp3_crit,
> temp3_crit_hyst) are supported on SOC15 dGPUs only.
>
> Change-Id: Ia87e3f6ad816b51d6680eb74c8f755d6c2b0a6ae
> Signed-off-by: Evan Quan <evan.quan@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h       |  8 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c        | 51 +++++++++++++++++--
>  .../drm/amd/powerplay/hwmgr/hardwaremanager.c | 12 ++++-
>  .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c    |  6 +++
>  .../drm/amd/powerplay/hwmgr/vega12_hwmgr.c    |  7 +++
>  .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c    |  7 +++
>  .../gpu/drm/amd/powerplay/inc/power_state.h   |  4 ++
>  .../gpu/drm/amd/powerplay/inc/pp_thermal.h    |  8 +--
>  8 files changed, 95 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
> index dca35407879d..e1492438ae7b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
> @@ -75,6 +75,14 @@ struct amdgpu_dpm_thermal {
>         int                min_temp;
>         /* high temperature threshold */
>         int                max_temp;
> +       /* edge low temperature threshold */
> +       int                min_edge_temp;
> +       /* edge high temperature critical threshold */
> +       int                max_edge_crit_temp;
> +       /* memory low temperature threshold */
> +       int                min_mem_temp;
> +       /* memory high temperature critical threshold */
> +       int                max_mem_crit_temp;
>         /* was last interrupt low to high or high to low */
>         bool               high_to_low;
>         /* interrupt source */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> index c17eb228417e..3f6b5b5bb0c6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> @@ -1466,6 +1466,37 @@ static ssize_t amdgpu_hwmon_show_temp_thresh(struct device *dev,
>         return snprintf(buf, PAGE_SIZE, "%d\n", temp);
>  }
>
> +static ssize_t amdgpu_hwmon_show_edge_temp_thresh(struct device *dev,
> +                                            struct device_attribute *attr,
> +                                            char *buf)
> +{
> +       struct amdgpu_device *adev = dev_get_drvdata(dev);
> +       int hyst = to_sensor_dev_attr(attr)->index;
> +       int temp;
> +
> +       if (hyst)
> +               temp = adev->pm.dpm.thermal.min_edge_temp;
> +       else
> +               temp = adev->pm.dpm.thermal.max_edge_crit_temp;
> +
> +       return snprintf(buf, PAGE_SIZE, "%d\n", temp);
> +}
> +
> +static ssize_t amdgpu_hwmon_show_mem_temp_thresh(struct device *dev,
> +                                            struct device_attribute *attr,
> +                                            char *buf)
> +{
> +       struct amdgpu_device *adev = dev_get_drvdata(dev);
> +       int hyst = to_sensor_dev_attr(attr)->index;
> +       int temp;
> +
> +       if (hyst)
> +               temp = adev->pm.dpm.thermal.min_mem_temp;
> +       else
> +               temp = adev->pm.dpm.thermal.max_mem_crit_temp;
> +
> +       return snprintf(buf, PAGE_SIZE, "%d\n", temp);
> +}
>
>  static ssize_t amdgpu_hwmon_show_temp_label(struct device *dev,
>                                              struct device_attribute *attr,
> @@ -2054,9 +2085,11 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
>   *
>   * - temp1_input: the on die GPU temperature in millidegrees Celsius
>   *
> - * - temp1_crit: temperature critical max value in millidegrees Celsius
> + * - temp[1-3]_crit: temperature critical max value in millidegrees Celsius
> + *   - temp2_crit and temp3_crit are supported on SOC15 dGPUs only
>   *
> - * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
> + * - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
> + *   - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only
>   *
>   * hwmon interfaces for GPU voltage:
>   *
> @@ -2107,6 +2140,10 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
>  static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
>  static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
>  static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
> +static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_edge_temp_thresh, NULL, 0);
> +static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_edge_temp_thresh, NULL, 1);
> +static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0);
> +static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1);
>  static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_JUNCTION);
>  static SENSOR_DEVICE_ATTR(temp2_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_EDGE);
>  static SENSOR_DEVICE_ATTR(temp3_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_MEM);
> @@ -2136,6 +2173,10 @@ static struct attribute *hwmon_attributes[] = {
>         &sensor_dev_attr_temp1_input.dev_attr.attr,
>         &sensor_dev_attr_temp1_crit.dev_attr.attr,
>         &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
> +       &sensor_dev_attr_temp2_crit.dev_attr.attr,
> +       &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr,
> +       &sensor_dev_attr_temp3_crit.dev_attr.attr,
> +       &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr,
>         &sensor_dev_attr_temp1_label.dev_attr.attr,
>         &sensor_dev_attr_temp2_label.dev_attr.attr,
>         &sensor_dev_attr_temp3_label.dev_attr.attr,
> @@ -2265,7 +2306,11 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
>         if (((adev->flags & AMD_IS_APU) ||
>              adev->asic_type < CHIP_VEGA10) &&
>             (attr == &sensor_dev_attr_temp2_label.dev_attr.attr ||
> -            attr == &sensor_dev_attr_temp3_label.dev_attr.attr))
> +            attr == &sensor_dev_attr_temp3_label.dev_attr.attr ||
> +            attr == &sensor_dev_attr_temp2_crit.dev_attr.attr ||
> +            attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr ||
> +            attr == &sensor_dev_attr_temp3_crit.dev_attr.attr ||
> +            attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr))
>                 return 0;
>
>
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
> index 70f7f47a2fcf..c8397b20f71a 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
> @@ -225,7 +225,13 @@ int phm_register_irq_handlers(struct pp_hwmgr *hwmgr)
>  int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
>  {
>         int ret = 0;
> -       struct PP_TemperatureRange range = {TEMP_RANGE_MIN, TEMP_RANGE_MAX};
> +       struct PP_TemperatureRange range = {
> +               TEMP_RANGE_MIN,
> +               TEMP_RANGE_MAX,
> +               TEMP_RANGE_MIN,
> +               TEMP_RANGE_MAX,
> +               TEMP_RANGE_MIN,
> +               TEMP_RANGE_MAX};
>         struct amdgpu_device *adev = hwmgr->adev;
>
>         if (hwmgr->hwmgr_func->get_thermal_temperature_range)
> @@ -239,6 +245,10 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
>
>         adev->pm.dpm.thermal.min_temp = range.min;
>         adev->pm.dpm.thermal.max_temp = range.max;
> +       adev->pm.dpm.thermal.min_edge_temp = range.edge_min;
> +       adev->pm.dpm.thermal.max_edge_crit_temp = range.edge_crit_max;
> +       adev->pm.dpm.thermal.min_mem_temp = range.mem_min;
> +       adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max;
>
>         return ret;
>  }
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> index 384c37875cd0..65aa7e70d7d9 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> @@ -4854,11 +4854,17 @@ static int vega10_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
>  {
>         struct phm_ppt_v2_information *table_info =
>                         (struct phm_ppt_v2_information *)hwmgr->pptable;
> +       struct vega10_hwmgr *data = hwmgr->backend;
> +       PPTable_t *pp_table = &(data->smc_state_table.pp_table);
>
>         memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
>
>         thermal_data->max = table_info->tdp_table->usSoftwareShutdownTemp *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +       thermal_data->edge_crit_max = pp_table->TedgeLimit *
> +               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +       thermal_data->mem_crit_max = pp_table->ThbmLimit *
> +               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
>
>         return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
> index 707cd4b0357f..c0994851e7c7 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
> @@ -2528,11 +2528,18 @@ static int vega12_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
>  {
>         struct phm_ppt_v3_information *pptable_information =
>                 (struct phm_ppt_v3_information *)hwmgr->pptable;
> +       struct vega12_hwmgr *data =
> +                       (struct vega12_hwmgr *)(hwmgr->backend);
> +       PPTable_t *pp_table = &(data->smc_state_table.pp_table);
>
>         memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
>
>         thermal_data->max = pptable_information->us_software_shutdown_temp *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +       thermal_data->edge_crit_max = pp_table->TedgeLimit *
> +               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +       thermal_data->mem_crit_max = pp_table->ThbmLimit *
> +               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
>
>         return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> index 39a547084e90..5d9aa0f22c86 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> @@ -3965,11 +3965,18 @@ static int vega20_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
>  {
>         struct phm_ppt_v3_information *pptable_information =
>                 (struct phm_ppt_v3_information *)hwmgr->pptable;
> +       struct vega20_hwmgr *data =
> +                       (struct vega20_hwmgr *)(hwmgr->backend);
> +       PPTable_t *pp_table = &(data->smc_state_table.pp_table);
>
>         memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
>
>         thermal_data->max = pptable_information->us_software_shutdown_temp *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +       thermal_data->edge_crit_max = pp_table->TedgeLimit *
> +               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +       thermal_data->mem_crit_max = pp_table->ThbmLimit *
> +               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
>
>         return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/powerplay/inc/power_state.h b/drivers/gpu/drm/amd/powerplay/inc/power_state.h
> index a99b5cbb113e..c102415ddc98 100644
> --- a/drivers/gpu/drm/amd/powerplay/inc/power_state.h
> +++ b/drivers/gpu/drm/amd/powerplay/inc/power_state.h
> @@ -124,6 +124,10 @@ struct PP_StateSoftwareAlgorithmBlock {
>  struct PP_TemperatureRange {
>         int min;
>         int max;
> +       int edge_min;
> +       int edge_crit_max;
> +       int mem_min;
> +       int mem_crit_max;
>  };
>
>  struct PP_StateValidationBlock {
> diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
> index 201d2b6329ab..75a0a2f8bea2 100644
> --- a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
> +++ b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
> @@ -27,14 +27,14 @@
>
>  static const struct PP_TemperatureRange SMU7ThermalWithDelayPolicy[] =
>  {
> -       {-273150,  99000},
> -       { 120000, 120000},
> +       {-273150,  99000, -273150, 99000, -273150, 99000},
> +       { 120000, 120000, 120000, 120000, 120000, 120000},
>  };
>
>  static const struct PP_TemperatureRange SMU7ThermalPolicy[] =
>  {
> -       {-273150,  99000},
> -       { 120000, 120000},
> +       {-273150,  99000, -273150, 99000, -273150, 99000},
> +       { 120000, 120000, 120000, 120000, 120000, 120000},
>  };
>
>  #endif
> --
> 2.21.0
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/6] drm/amd/powerplay: support temperature emergency max values
       [not found]     ` <20190418090302.8963-3-evan.quan-5C7GfCeVMHo@public.gmane.org>
@ 2019-04-19 15:10       ` Alex Deucher
  0 siblings, 0 replies; 13+ messages in thread
From: Alex Deucher @ 2019-04-19 15:10 UTC (permalink / raw)
  To: Evan Quan; +Cc: amd-gfx list

On Thu, Apr 18, 2019 at 5:03 AM Evan Quan <evan.quan@amd.com> wrote:
>
> These new interfaces(temp1_emergency, temp2_emergency,
> temp3_emergency) are supported on SOC15 dGPUs only.
>
> Change-Id: I2552df63f9c8c50294b3940bb2a402217673c2bc
> Signed-off-by: Evan Quan <evan.quan@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h       |  6 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c        | 40 ++++++++++++++++++-
>  .../drm/amd/powerplay/hwmgr/hardwaremanager.c |  6 +++
>  .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c    |  6 +++
>  .../drm/amd/powerplay/hwmgr/vega12_hwmgr.c    |  6 +++
>  .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c    |  6 +++
>  .../gpu/drm/amd/powerplay/inc/power_state.h   |  3 ++
>  .../gpu/drm/amd/powerplay/inc/pp_thermal.h    | 12 ++++--
>  8 files changed, 80 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
> index e1492438ae7b..32e2def42f30 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
> @@ -75,14 +75,20 @@ struct amdgpu_dpm_thermal {
>         int                min_temp;
>         /* high temperature threshold */
>         int                max_temp;
> +       /* hotspot max emergency(shutdown) temp */
> +       int                max_hotspot_emergency_temp;
>         /* edge low temperature threshold */
>         int                min_edge_temp;
>         /* edge high temperature critical threshold */
>         int                max_edge_crit_temp;
> +       /* edge max emergency(shutdown) temp */
> +       int                max_edge_emergency_temp;
>         /* memory low temperature threshold */
>         int                min_mem_temp;
>         /* memory high temperature critical threshold */
>         int                max_mem_crit_temp;
> +       /* memory max emergency(shutdown) temp */
> +       int                max_mem_emergency_temp;
>         /* was last interrupt low to high or high to low */
>         bool               high_to_low;
>         /* interrupt source */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> index 3f6b5b5bb0c6..be33144e2dca 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> @@ -1511,6 +1511,32 @@ static ssize_t amdgpu_hwmon_show_temp_label(struct device *dev,
>         return snprintf(buf, PAGE_SIZE, "%s\n", temp_label[channel].label);
>  }
>
> +static ssize_t amdgpu_hwmon_show_temp_emergency(struct device *dev,
> +                                            struct device_attribute *attr,
> +                                            char *buf)
> +{
> +       struct amdgpu_device *adev = dev_get_drvdata(dev);
> +       int channel = to_sensor_dev_attr(attr)->index;
> +       int temp;
> +
> +       if (channel >= PP_TEMP_MAX)
> +               return -EINVAL;
> +
> +       switch (channel) {
> +       case PP_TEMP_JUNCTION:
> +               temp = adev->pm.dpm.thermal.max_hotspot_emergency_temp;
> +               break;
> +       case PP_TEMP_EDGE:
> +               temp = adev->pm.dpm.thermal.max_edge_emergency_temp;
> +               break;
> +       case PP_TEMP_MEM:
> +               temp = adev->pm.dpm.thermal.max_mem_emergency_temp;
> +               break;
> +       }
> +
> +       return snprintf(buf, PAGE_SIZE, "%d\n", temp);
> +}
> +
>  static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
>                                             struct device_attribute *attr,
>                                             char *buf)
> @@ -2091,6 +2117,9 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
>   * - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
>   *   - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only
>   *
> + * - temp[1-3]_emergency: temperature emergency max value(asic shutdown) in millidegrees Celsius
> + *   - these are supported on SOC15 dGPUs only
> + *
>   * hwmon interfaces for GPU voltage:
>   *
>   * - in0_input: the voltage on the GPU in millivolts
> @@ -2140,10 +2169,13 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
>  static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
>  static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
>  static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
> +static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION);
>  static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_edge_temp_thresh, NULL, 0);
>  static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_edge_temp_thresh, NULL, 1);
> +static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE);
>  static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0);
>  static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1);
> +static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM);
>  static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_JUNCTION);
>  static SENSOR_DEVICE_ATTR(temp2_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_EDGE);
>  static SENSOR_DEVICE_ATTR(temp3_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_MEM);
> @@ -2180,6 +2212,9 @@ static struct attribute *hwmon_attributes[] = {
>         &sensor_dev_attr_temp1_label.dev_attr.attr,
>         &sensor_dev_attr_temp2_label.dev_attr.attr,
>         &sensor_dev_attr_temp3_label.dev_attr.attr,
> +       &sensor_dev_attr_temp1_emergency.dev_attr.attr,
> +       &sensor_dev_attr_temp2_emergency.dev_attr.attr,
> +       &sensor_dev_attr_temp3_emergency.dev_attr.attr,
>         &sensor_dev_attr_pwm1.dev_attr.attr,
>         &sensor_dev_attr_pwm1_enable.dev_attr.attr,
>         &sensor_dev_attr_pwm1_min.dev_attr.attr,
> @@ -2310,7 +2345,10 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
>              attr == &sensor_dev_attr_temp2_crit.dev_attr.attr ||
>              attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr ||
>              attr == &sensor_dev_attr_temp3_crit.dev_attr.attr ||
> -            attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr))
> +            attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr ||
> +            attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr ||
> +            attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr ||
> +            attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr))
>                 return 0;
>
>
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
> index c8397b20f71a..12027b194a4a 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
> @@ -228,9 +228,12 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
>         struct PP_TemperatureRange range = {
>                 TEMP_RANGE_MIN,
>                 TEMP_RANGE_MAX,
> +               TEMP_RANGE_MAX,
>                 TEMP_RANGE_MIN,
>                 TEMP_RANGE_MAX,
> +               TEMP_RANGE_MAX,
>                 TEMP_RANGE_MIN,
> +               TEMP_RANGE_MAX,
>                 TEMP_RANGE_MAX};
>         struct amdgpu_device *adev = hwmgr->adev;
>
> @@ -245,10 +248,13 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
>
>         adev->pm.dpm.thermal.min_temp = range.min;
>         adev->pm.dpm.thermal.max_temp = range.max;
> +       adev->pm.dpm.thermal.max_hotspot_emergency_temp = range.hotspot_emergency_max;
>         adev->pm.dpm.thermal.min_edge_temp = range.edge_min;
>         adev->pm.dpm.thermal.max_edge_crit_temp = range.edge_crit_max;
> +       adev->pm.dpm.thermal.max_edge_emergency_temp = range.edge_emergency_max;
>         adev->pm.dpm.thermal.min_mem_temp = range.mem_min;
>         adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max;
> +       adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max;
>
>         return ret;
>  }
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> index 65aa7e70d7d9..1d78a5ee9523 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> @@ -4861,10 +4861,16 @@ static int vega10_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
>
>         thermal_data->max = table_info->tdp_table->usSoftwareShutdownTemp *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +       thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
> +               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
>         thermal_data->edge_crit_max = pp_table->TedgeLimit *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +       thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) *
> +               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
>         thermal_data->mem_crit_max = pp_table->ThbmLimit *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +       thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
> +               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
>
>         return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
> index c0994851e7c7..a283046347c9 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
> @@ -2536,10 +2536,16 @@ static int vega12_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
>
>         thermal_data->max = pptable_information->us_software_shutdown_temp *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +       thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
> +               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
>         thermal_data->edge_crit_max = pp_table->TedgeLimit *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +       thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) *
> +               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
>         thermal_data->mem_crit_max = pp_table->ThbmLimit *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +       thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
> +               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
>
>         return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> index 5d9aa0f22c86..0c0714862eb8 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> @@ -3973,10 +3973,16 @@ static int vega20_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
>
>         thermal_data->max = pptable_information->us_software_shutdown_temp *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +       thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
> +               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
>         thermal_data->edge_crit_max = pp_table->TedgeLimit *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +       thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) *
> +               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
>         thermal_data->mem_crit_max = pp_table->ThbmLimit *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +       thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
> +               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
>
>         return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/powerplay/inc/power_state.h b/drivers/gpu/drm/amd/powerplay/inc/power_state.h
> index c102415ddc98..48d6c4b9fa61 100644
> --- a/drivers/gpu/drm/amd/powerplay/inc/power_state.h
> +++ b/drivers/gpu/drm/amd/powerplay/inc/power_state.h
> @@ -124,10 +124,13 @@ struct PP_StateSoftwareAlgorithmBlock {
>  struct PP_TemperatureRange {
>         int min;
>         int max;
> +       int hotspot_emergency_max;
>         int edge_min;
>         int edge_crit_max;
> +       int edge_emergency_max;
>         int mem_min;
>         int mem_crit_max;
> +       int mem_emergency_max;
>  };
>
>  struct PP_StateValidationBlock {
> diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
> index 75a0a2f8bea2..3e30768f9e1c 100644
> --- a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
> +++ b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
> @@ -27,14 +27,18 @@
>
>  static const struct PP_TemperatureRange SMU7ThermalWithDelayPolicy[] =
>  {
> -       {-273150,  99000, -273150, 99000, -273150, 99000},
> -       { 120000, 120000, 120000, 120000, 120000, 120000},
> +       {-273150,  99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000},
> +       { 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000},
>  };
>
>  static const struct PP_TemperatureRange SMU7ThermalPolicy[] =
>  {
> -       {-273150,  99000, -273150, 99000, -273150, 99000},
> -       { 120000, 120000, 120000, 120000, 120000, 120000},
> +       {-273150,  99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000},
> +       { 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000},
>  };
>
> +#define CTF_OFFSET_EDGE                        5
> +#define CTF_OFFSET_HOTSPOT             5
> +#define CTF_OFFSET_HBM                 5
> +
>  #endif
> --
> 2.21.0
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 4/6] drm/amd/powerplay: support SMU metrics table on Vega12
       [not found]     ` <20190418090302.8963-4-evan.quan-5C7GfCeVMHo@public.gmane.org>
@ 2019-04-19 15:11       ` Alex Deucher
  0 siblings, 0 replies; 13+ messages in thread
From: Alex Deucher @ 2019-04-19 15:11 UTC (permalink / raw)
  To: Evan Quan; +Cc: amd-gfx list

On Thu, Apr 18, 2019 at 5:03 AM Evan Quan <evan.quan@amd.com> wrote:
>
> That should provide some necessary sensor information.
>
> Change-Id: I898371cef06795c5369a14c4dd3fe8717959d81a
> Signed-off-by: Evan Quan <evan.quan@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  .../drm/amd/powerplay/hwmgr/vega12_hwmgr.c    | 21 +++++++++++++++++++
>  .../drm/amd/powerplay/hwmgr/vega12_hwmgr.h    |  3 +++
>  .../drm/amd/powerplay/smumgr/vega12_smumgr.c  | 21 +++++++++++++++++++
>  3 files changed, 45 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
> index a283046347c9..695ac2875540 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
> @@ -1237,6 +1237,27 @@ static uint32_t vega12_dpm_get_mclk(struct pp_hwmgr *hwmgr, bool low)
>         return (mem_clk * 100);
>  }
>
> +static int vega12_get_metrics_table(struct pp_hwmgr *hwmgr, SmuMetrics_t *metrics_table)
> +{
> +       struct vega12_hwmgr *data =
> +                       (struct vega12_hwmgr *)(hwmgr->backend);
> +       int ret = 0;
> +
> +       if (!data->metrics_time || time_after(jiffies, data->metrics_time + HZ / 2)) {
> +               ret = smum_smc_table_manager(hwmgr, (uint8_t *)metrics_table,
> +                               TABLE_SMU_METRICS, true);
> +               if (ret) {
> +                       pr_info("Failed to export SMU metrics table!\n");
> +                       return ret;
> +               }
> +               memcpy(&data->metrics_table, metrics_table, sizeof(SmuMetrics_t));
> +               data->metrics_time = jiffies;
> +       } else
> +               memcpy(metrics_table, &data->metrics_table, sizeof(SmuMetrics_t));
> +
> +       return ret;
> +}
> +
>  static int vega12_get_gpu_power(struct pp_hwmgr *hwmgr, uint32_t *query)
>  {
>  #if 0
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h
> index b3e424d28994..73875399666a 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h
> @@ -396,6 +396,9 @@ struct vega12_hwmgr {
>
>         /* ---- Gfxoff ---- */
>         bool                           gfxoff_controlled_by_driver;
> +
> +       unsigned long                  metrics_time;
> +       SmuMetrics_t                   metrics_table;
>  };
>
>  #define VEGA12_DPM2_NEAR_TDP_DEC                      10
> diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
> index ddb801517667..1eaf0fa28ef7 100644
> --- a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
> @@ -287,8 +287,26 @@ static int vega12_smu_init(struct pp_hwmgr *hwmgr)
>         priv->smu_tables.entry[TABLE_OVERDRIVE].version = 0x01;
>         priv->smu_tables.entry[TABLE_OVERDRIVE].size = sizeof(OverDriveTable_t);
>
> +       /* allocate space for SMU_METRICS table */
> +       ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev,
> +                                     sizeof(SmuMetrics_t),
> +                                     PAGE_SIZE,
> +                                     AMDGPU_GEM_DOMAIN_VRAM,
> +                                     &priv->smu_tables.entry[TABLE_SMU_METRICS].handle,
> +                                     &priv->smu_tables.entry[TABLE_SMU_METRICS].mc_addr,
> +                                     &priv->smu_tables.entry[TABLE_SMU_METRICS].table);
> +       if (ret)
> +               goto err4;
> +
> +       priv->smu_tables.entry[TABLE_SMU_METRICS].version = 0x01;
> +       priv->smu_tables.entry[TABLE_SMU_METRICS].size = sizeof(SmuMetrics_t);
> +
>         return 0;
>
> +err4:
> +       amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_OVERDRIVE].handle,
> +                               &priv->smu_tables.entry[TABLE_OVERDRIVE].mc_addr,
> +                               &priv->smu_tables.entry[TABLE_OVERDRIVE].table);
>  err3:
>         amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_AVFS_FUSE_OVERRIDE].handle,
>                                 &priv->smu_tables.entry[TABLE_AVFS_FUSE_OVERRIDE].mc_addr,
> @@ -334,6 +352,9 @@ static int vega12_smu_fini(struct pp_hwmgr *hwmgr)
>                 amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_OVERDRIVE].handle,
>                                       &priv->smu_tables.entry[TABLE_OVERDRIVE].mc_addr,
>                                       &priv->smu_tables.entry[TABLE_OVERDRIVE].table);
> +               amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_SMU_METRICS].handle,
> +                                     &priv->smu_tables.entry[TABLE_SMU_METRICS].mc_addr,
> +                                     &priv->smu_tables.entry[TABLE_SMU_METRICS].table);
>                 kfree(hwmgr->smu_backend);
>                 hwmgr->smu_backend = NULL;
>         }
> --
> 2.21.0
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 5/6] drm/amd/powerplay: expose current edge and memory temperatures
       [not found]     ` <20190418090302.8963-5-evan.quan-5C7GfCeVMHo@public.gmane.org>
@ 2019-04-19 15:19       ` Alex Deucher
  0 siblings, 0 replies; 13+ messages in thread
From: Alex Deucher @ 2019-04-19 15:19 UTC (permalink / raw)
  To: Evan Quan; +Cc: amd-gfx list

On Thu, Apr 18, 2019 at 5:03 AM Evan Quan <evan.quan@amd.com> wrote:
>
> Two new hwmon interfaces(temp2_input and temp3_input) are added.
> They are supported on SOC15 dGPUs only.
>
> Change-Id: I935c512bd38e080fb8b6e3164c5e5294baff4e91
> Signed-off-by: Evan Quan <evan.quan@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c        | 45 +++++++++++++++----
>  .../gpu/drm/amd/include/kgd_pp_interface.h    |  2 +
>  .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c    | 12 +++++
>  .../drm/amd/powerplay/hwmgr/vega12_hwmgr.c    | 19 ++++++++
>  .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c    | 18 ++++++++
>  5 files changed, 88 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> index be33144e2dca..1007307845d8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> @@ -1434,6 +1434,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
>  {
>         struct amdgpu_device *adev = dev_get_drvdata(dev);
>         struct drm_device *ddev = adev->ddev;
> +       int channel = to_sensor_dev_attr(attr)->index;
>         int r, temp, size = sizeof(temp);
>
>         /* Can't get temperature when the card is off */
> @@ -1441,11 +1442,32 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
>              (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
>                 return -EINVAL;
>
> -       /* get the temperature */
> -       r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP,
> -                                  (void *)&temp, &size);
> -       if (r)
> -               return r;
> +       if (channel >= PP_TEMP_MAX)
> +               return -EINVAL;
> +
> +       switch (channel) {
> +       case PP_TEMP_JUNCTION:
> +               /* get current junction temperature */
> +               r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP,
> +                                          (void *)&temp, &size);
> +               if (r)
> +                       return r;
> +               break;
> +       case PP_TEMP_EDGE:
> +               /* get current edge temperature */
> +               r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP,
> +                                          (void *)&temp, &size);
> +               if (r)
> +                       return r;
> +               break;
> +       case PP_TEMP_MEM:
> +               /* get current memory temperature */
> +               r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP,
> +                                          (void *)&temp, &size);
> +               if (r)
> +                       return r;
> +               break;
> +       }
>
>         return snprintf(buf, PAGE_SIZE, "%d\n", temp);
>  }
> @@ -2109,7 +2131,8 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
>   * - temp[1-3]_label: temperature channel label
>   *   - temp2_label and temp3_label are supported on SOC15 dGPUs only
>   *
> - * - temp1_input: the on die GPU temperature in millidegrees Celsius
> + * - temp[1-3]_input: the on die GPU temperature in millidegrees Celsius
> + *   - temp2_input and temp3_input are supported on SOC15 dGPUs only
>   *
>   * - temp[1-3]_crit: temperature critical max value in millidegrees Celsius
>   *   - temp2_crit and temp3_crit are supported on SOC15 dGPUs only
> @@ -2166,13 +2189,15 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
>   *
>   */
>
> -static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
> +static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_JUNCTION);
>  static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
>  static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
>  static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION);
> +static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_EDGE);
>  static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_edge_temp_thresh, NULL, 0);
>  static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_edge_temp_thresh, NULL, 1);
>  static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE);
> +static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_MEM);
>  static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0);
>  static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1);
>  static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM);
> @@ -2205,8 +2230,10 @@ static struct attribute *hwmon_attributes[] = {
>         &sensor_dev_attr_temp1_input.dev_attr.attr,
>         &sensor_dev_attr_temp1_crit.dev_attr.attr,
>         &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
> +       &sensor_dev_attr_temp2_input.dev_attr.attr,
>         &sensor_dev_attr_temp2_crit.dev_attr.attr,
>         &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr,
> +       &sensor_dev_attr_temp3_input.dev_attr.attr,
>         &sensor_dev_attr_temp3_crit.dev_attr.attr,
>         &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr,
>         &sensor_dev_attr_temp1_label.dev_attr.attr,
> @@ -2348,7 +2375,9 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
>              attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr ||
>              attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr ||
>              attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr ||
> -            attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr))
> +            attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr ||
> +            attr == &sensor_dev_attr_temp2_input.dev_attr.attr ||
> +            attr == &sensor_dev_attr_temp3_input.dev_attr.attr))
>                 return 0;
>
>
> diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> index 17324c0d503e..19713ffdb03e 100644
> --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> @@ -111,6 +111,8 @@ enum amd_pp_sensors {
>         AMDGPU_PP_SENSOR_GPU_LOAD,
>         AMDGPU_PP_SENSOR_GFX_MCLK,
>         AMDGPU_PP_SENSOR_GPU_TEMP,

Add:
AMDGPU_PP_SENSOR_GPU_JUNCTION_TEMP = AMDGPU_PP_SENSOR_GPU_TEMP,
and use that for clarity.  That said, existing asics use
AMDGPU_PP_SENSOR_GPU_TEMP for the edge temperature, so I'd suggest
making
AMDGPU_PP_SENSOR_GPU_EDGE_TEMP = AMDGPU_PP_SENSOR_GPU_TEMP,
and then add a new entry for JUNCTION.

> +       AMDGPU_PP_SENSOR_EDGE_TEMP,
> +       AMDGPU_PP_SENSOR_MEM_TEMP,
>         AMDGPU_PP_SENSOR_VCE_POWER,
>         AMDGPU_PP_SENSOR_UVD_POWER,
>         AMDGPU_PP_SENSOR_GPU_POWER,
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> index 1d78a5ee9523..f4ecbbe854ee 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> @@ -3785,6 +3785,18 @@ static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx,
>                 *((uint32_t *)value) = vega10_thermal_get_temperature(hwmgr);

I think vega10_thermal_get_temperature() returns the edge temperature
on vega10.  Maybe it would be better to switch to
PPSMC_MSG_GetTemperatureHotspot for AMDGPU_PP_SENSOR_GPU_JUNCTION_TEMP
or use vega10_thermal_get_temperature() for EDGE.

>                 *size = 4;
>                 break;
> +       case AMDGPU_PP_SENSOR_EDGE_TEMP:
> +               smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureEdge);
> +               *((uint32_t *)value) = smum_get_argument(hwmgr) *
> +                       PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +               *size = 4;
> +               break;
> +       case AMDGPU_PP_SENSOR_MEM_TEMP:
> +               smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureHBM);
> +               *((uint32_t *)value) = smum_get_argument(hwmgr) *
> +                       PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +               *size = 4;
> +               break;
>         case AMDGPU_PP_SENSOR_UVD_POWER:
>                 *((uint32_t *)value) = data->uvd_power_gated ? 0 : 1;
>                 *size = 4;
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
> index 695ac2875540..86c48cb56f6c 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
> @@ -1338,6 +1338,7 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,
>                               void *value, int *size)
>  {
>         struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend);
> +       SmuMetrics_t metrics_table;
>         int ret = 0;
>
>         switch (idx) {
> @@ -1360,6 +1361,24 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,
>                 *((uint32_t *)value) = vega12_thermal_get_temperature(hwmgr);
>                 *size = 4;
>                 break;
> +       case AMDGPU_PP_SENSOR_EDGE_TEMP:
> +               ret = vega12_get_metrics_table(hwmgr, &metrics_table);
> +               if (ret)
> +                       return ret;
> +
> +               *((uint32_t *)value) = metrics_table.TemperatureEdge *
> +                       PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +               *size = 4;
> +               break;
> +       case AMDGPU_PP_SENSOR_MEM_TEMP:
> +               ret = vega12_get_metrics_table(hwmgr, &metrics_table);
> +               if (ret)
> +                       return ret;
> +
> +               *((uint32_t *)value) = metrics_table.TemperatureHBM *
> +                       PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +               *size = 4;
> +               break;
>         case AMDGPU_PP_SENSOR_UVD_POWER:
>                 *((uint32_t *)value) = data->uvd_power_gated ? 0 : 1;
>                 *size = 4;
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> index 0c0714862eb8..72a71a002f0b 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> @@ -2142,6 +2142,24 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, int idx,
>                 *((uint32_t *)value) = vega20_thermal_get_temperature(hwmgr);
>                 *size = 4;
>                 break;
> +       case AMDGPU_PP_SENSOR_EDGE_TEMP:
> +               ret = vega20_get_metrics_table(hwmgr, &metrics_table);
> +               if (ret)
> +                       return ret;
> +
> +               *((uint32_t *)value) = metrics_table.TemperatureEdge *
> +                       PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +               *size = 4;
> +               break;
> +       case AMDGPU_PP_SENSOR_MEM_TEMP:
> +               ret = vega20_get_metrics_table(hwmgr, &metrics_table);
> +               if (ret)
> +                       return ret;
> +
> +               *((uint32_t *)value) = metrics_table.TemperatureHBM *
> +                       PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> +               *size = 4;
> +               break;
>         case AMDGPU_PP_SENSOR_UVD_POWER:
>                 *((uint32_t *)value) = data->uvd_power_gated ? 0 : 1;
>                 *size = 4;
> --
> 2.21.0
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/6] drm/amd/powerplay: support hwmon temperature channel labels
       [not found]     ` <CADnq5_PU-E_bgMNq5_ZokpXrKjxxE33uuhNYfqY2ggBOeePkhg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2019-04-19 15:20       ` Alex Deucher
  0 siblings, 0 replies; 13+ messages in thread
From: Alex Deucher @ 2019-04-19 15:20 UTC (permalink / raw)
  To: Evan Quan; +Cc: amd-gfx list

On Fri, Apr 19, 2019 at 11:08 AM Alex Deucher <alexdeucher@gmail.com> wrote:
>
> On Thu, Apr 18, 2019 at 5:03 AM Evan Quan <evan.quan@amd.com> wrote:
> >
> > Expose temp[1-3]_label hwmon interfaces. While temp2_label
> > and temp3_label are visible for SOC15 dGPUs only.
> >
> > Change-Id: I7f1e10c52ec21d272027554cdf6da97103e0be58
> > Signed-off-by: Evan Quan <evan.quan@amd.com>
>
> I'd suggest making this one last in the series since otherwise we'll
> have labels without temps for a few commits.
> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c        | 40 +++++++++++++++++++
> >  .../gpu/drm/amd/include/kgd_pp_interface.h    |  7 ++++
> >  2 files changed, 47 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> > index 552127b74f78..c17eb228417e 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> > @@ -120,6 +120,15 @@ static const struct cg_flag_name clocks[] = {
> >         {0, NULL},
> >  };
> >
> > +static const struct hwmon_temp_label {
> > +       enum PP_HWMON_TEMP channel;
> > +       const char *label;
> > +} temp_label[] = {
> > +       {PP_TEMP_JUNCTION, "junction"},
> > +       {PP_TEMP_EDGE, "edge"},

Actually switch the order here.  Other than vega20, existing asics
expose edge today, so lets make temp1 be edge and then temp2 be
junction and temp3 be memory.

Alex

> > +       {PP_TEMP_MEM, "mem"},
> > +};
> > +
> >  void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)
> >  {
> >         if (adev->pm.dpm_enabled) {
> > @@ -1457,6 +1466,20 @@ static ssize_t amdgpu_hwmon_show_temp_thresh(struct device *dev,
> >         return snprintf(buf, PAGE_SIZE, "%d\n", temp);
> >  }
> >
> > +
> > +static ssize_t amdgpu_hwmon_show_temp_label(struct device *dev,
> > +                                            struct device_attribute *attr,
> > +                                            char *buf)
> > +{
> > +       struct amdgpu_device *adev = dev_get_drvdata(dev);
> > +       int channel = to_sensor_dev_attr(attr)->index;
> > +
> > +       if (channel >= PP_TEMP_MAX)
> > +               return -EINVAL;
> > +
> > +       return snprintf(buf, PAGE_SIZE, "%s\n", temp_label[channel].label);
> > +}
> > +
> >  static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
> >                                             struct device_attribute *attr,
> >                                             char *buf)
> > @@ -2026,6 +2049,9 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
> >   *
> >   * hwmon interfaces for GPU temperature:
> >   *
> > + * - temp[1-3]_label: temperature channel label
> > + *   - temp2_label and temp3_label are supported on SOC15 dGPUs only
> > + *
> >   * - temp1_input: the on die GPU temperature in millidegrees Celsius
> >   *
> >   * - temp1_crit: temperature critical max value in millidegrees Celsius
> > @@ -2081,6 +2107,9 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
> >  static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
> >  static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
> >  static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
> > +static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_JUNCTION);
> > +static SENSOR_DEVICE_ATTR(temp2_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_EDGE);
> > +static SENSOR_DEVICE_ATTR(temp3_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_MEM);
> >  static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0);
> >  static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0);
> >  static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0);
> > @@ -2107,6 +2136,9 @@ static struct attribute *hwmon_attributes[] = {
> >         &sensor_dev_attr_temp1_input.dev_attr.attr,
> >         &sensor_dev_attr_temp1_crit.dev_attr.attr,
> >         &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
> > +       &sensor_dev_attr_temp1_label.dev_attr.attr,
> > +       &sensor_dev_attr_temp2_label.dev_attr.attr,
> > +       &sensor_dev_attr_temp3_label.dev_attr.attr,
> >         &sensor_dev_attr_pwm1.dev_attr.attr,
> >         &sensor_dev_attr_pwm1_enable.dev_attr.attr,
> >         &sensor_dev_attr_pwm1_min.dev_attr.attr,
> > @@ -2229,6 +2261,14 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
> >              attr == &sensor_dev_attr_freq2_label.dev_attr.attr))
> >                 return 0;
> >
> > +       /* only SOC15 dGPUs support edge and mem temperatures */
> > +       if (((adev->flags & AMD_IS_APU) ||
> > +            adev->asic_type < CHIP_VEGA10) &&
> > +           (attr == &sensor_dev_attr_temp2_label.dev_attr.attr ||
> > +            attr == &sensor_dev_attr_temp3_label.dev_attr.attr))
> > +               return 0;
> > +
> > +
> >         return effective_mode;
> >  }
> >
> > diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> > index 2b579ba9b685..17324c0d503e 100644
> > --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> > +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> > @@ -159,6 +159,13 @@ struct pp_states_info {
> >         uint32_t states[16];
> >  };
> >
> > +enum PP_HWMON_TEMP {
> > +       PP_TEMP_JUNCTION = 0,
> > +       PP_TEMP_EDGE,
> > +       PP_TEMP_MEM,
> > +       PP_TEMP_MAX
> > +};
> > +
> >  #define PP_GROUP_MASK        0xF0000000
> >  #define PP_GROUP_SHIFT       28
> >
> > --
> > 2.21.0
> >
> > _______________________________________________
> > amd-gfx mailing list
> > amd-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 6/6] drm/amd/powerplay: correct SOC15 hotspot temperature critical max
       [not found]     ` <20190418090302.8963-6-evan.quan-5C7GfCeVMHo@public.gmane.org>
@ 2019-04-19 15:21       ` Alex Deucher
  0 siblings, 0 replies; 13+ messages in thread
From: Alex Deucher @ 2019-04-19 15:21 UTC (permalink / raw)
  To: Evan Quan; +Cc: amd-gfx list

On Thu, Apr 18, 2019 at 5:03 AM Evan Quan <evan.quan@amd.com> wrote:
>
> Correct Vega10, Vega12 and Vega20 hotspot temperature critical max
> values.
>
> Change-Id: I77bb77761e8530066ec4f3225f8555cf8f672348
> Signed-off-by: Evan Quan <evan.quan@amd.com>

Acked-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 2 +-
>  drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c | 2 +-
>  drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 2 +-
>  3 files changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> index f4ecbbe854ee..efd9947eb723 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> @@ -4871,7 +4871,7 @@ static int vega10_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
>
>         memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
>
> -       thermal_data->max = table_info->tdp_table->usSoftwareShutdownTemp *
> +       thermal_data->max = pp_table->ThotspotLimit *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
>         thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
> index 86c48cb56f6c..ba35118a35b8 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
> @@ -2574,7 +2574,7 @@ static int vega12_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
>
>         memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
>
> -       thermal_data->max = pptable_information->us_software_shutdown_temp *
> +       thermal_data->max = pp_table->ThotspotLimit *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
>         thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> index 72a71a002f0b..8dcd04561e8f 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> @@ -3989,7 +3989,7 @@ static int vega20_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
>
>         memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
>
> -       thermal_data->max = pptable_information->us_software_shutdown_temp *
> +       thermal_data->max = pp_table->ThotspotLimit *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
>         thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
>                 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
> --
> 2.21.0
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2019-04-19 15:21 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-18  9:02 [PATCH 1/6] drm/amd/powerplay: support hwmon temperature channel labels Evan Quan
     [not found] ` <20190418090302.8963-1-evan.quan-5C7GfCeVMHo@public.gmane.org>
2019-04-18  9:02   ` [PATCH 2/6] drm/amd/powerplay: support edge/memory critical limit values Evan Quan
     [not found]     ` <20190418090302.8963-2-evan.quan-5C7GfCeVMHo@public.gmane.org>
2019-04-19 15:09       ` Alex Deucher
2019-04-18  9:02   ` [PATCH 3/6] drm/amd/powerplay: support temperature emergency max values Evan Quan
     [not found]     ` <20190418090302.8963-3-evan.quan-5C7GfCeVMHo@public.gmane.org>
2019-04-19 15:10       ` Alex Deucher
2019-04-18  9:03   ` [PATCH 4/6] drm/amd/powerplay: support SMU metrics table on Vega12 Evan Quan
     [not found]     ` <20190418090302.8963-4-evan.quan-5C7GfCeVMHo@public.gmane.org>
2019-04-19 15:11       ` Alex Deucher
2019-04-18  9:03   ` [PATCH 5/6] drm/amd/powerplay: expose current edge and memory temperatures Evan Quan
     [not found]     ` <20190418090302.8963-5-evan.quan-5C7GfCeVMHo@public.gmane.org>
2019-04-19 15:19       ` Alex Deucher
2019-04-18  9:03   ` [PATCH 6/6] drm/amd/powerplay: correct SOC15 hotspot temperature critical max Evan Quan
     [not found]     ` <20190418090302.8963-6-evan.quan-5C7GfCeVMHo@public.gmane.org>
2019-04-19 15:21       ` Alex Deucher
2019-04-19 15:08   ` [PATCH 1/6] drm/amd/powerplay: support hwmon temperature channel labels Alex Deucher
     [not found]     ` <CADnq5_PU-E_bgMNq5_ZokpXrKjxxE33uuhNYfqY2ggBOeePkhg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2019-04-19 15:20       ` Alex Deucher

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.