All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/5] 0 MHz is not a valid current frequency
@ 2021-10-13  3:10 Luben Tuikov
  2021-10-13  3:10 ` [PATCH 1/5] drm/amd/pm: Slight function rename Luben Tuikov
                   ` (5 more replies)
  0 siblings, 6 replies; 29+ messages in thread
From: Luben Tuikov @ 2021-10-13  3:10 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Deucher, Luben Tuikov

Some ASIC support low-power functionality for the whole ASIC or just
an IP block. When in such low-power mode, some sysfs interfaces would
report a frequency of 0, e.g.,

$cat /sys/class/drm/card0/device/pp_dpm_sclk
0: 500Mhz 
1: 0Mhz *
2: 2200Mhz 
$_

An operating frequency of 0 MHz doesn't make sense, and this interface
is designed to report only operating clock frequencies, i.e. non-zero,
and possibly the current one.

When in this low-power state, round to the smallest
operating frequency, for this interface, as follows,

$cat /sys/class/drm/card0/device/pp_dpm_sclk
0: 500Mhz *
1: 2200Mhz 
$_

Luben Tuikov (5):
  drm/amd/pm: Slight function rename
  drm/amd/pm: Rename cur_value to curr_value
  drm/amd/pm: Rename freq_values --> freq_value
  dpm/amd/pm: Sienna: 0 MHz is not a current clock frequency
  dpm/amd/pm: Navi10: 0 MHz is not a current clock frequency

 .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 60 +++++++++------
 .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 73 ++++++++++++-------
 2 files changed, 86 insertions(+), 47 deletions(-)

-- 
2.33.1.558.g2bd2f258f4


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 1/5] drm/amd/pm: Slight function rename
  2021-10-13  3:10 [PATCH 0/5] 0 MHz is not a valid current frequency Luben Tuikov
@ 2021-10-13  3:10 ` Luben Tuikov
  2021-10-13 14:50   ` Russell, Kent
  2021-10-18 22:52   ` Paul Menzel
  2021-10-13  3:10 ` [PATCH 2/5] drm/amd/pm: Rename cur_value to curr_value Luben Tuikov
                   ` (4 subsequent siblings)
  5 siblings, 2 replies; 29+ messages in thread
From: Luben Tuikov @ 2021-10-13  3:10 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Deucher, Luben Tuikov

Rename
sienna_cichlid_is_support_fine_grained_dpm() to
sienna_cichlid_support_fine_grained_dpm().

Rename
navi10_is_support_fine_grained_dpm() to
navi10_supports_fine_grained_dpm().

Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
---
 drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c         | 7 ++++---
 drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 7 ++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index 71161f6b78fea9..0fe9790f67f5af 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -1231,7 +1231,8 @@ static int navi10_get_current_clk_freq_by_table(struct smu_context *smu,
 					   value);
 }
 
-static bool navi10_is_support_fine_grained_dpm(struct smu_context *smu, enum smu_clk_type clk_type)
+static bool navi10_supports_fine_grained_dpm(struct smu_context *smu,
+					     enum smu_clk_type clk_type)
 {
 	PPTable_t *pptable = smu->smu_table.driver_pptable;
 	DpmDescriptor_t *dpm_desc = NULL;
@@ -1299,7 +1300,7 @@ static int navi10_print_clk_levels(struct smu_context *smu,
 		if (ret)
 			return size;
 
-		if (!navi10_is_support_fine_grained_dpm(smu, clk_type)) {
+		if (!navi10_supports_fine_grained_dpm(smu, clk_type)) {
 			for (i = 0; i < count; i++) {
 				ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, i, &value);
 				if (ret)
@@ -1465,7 +1466,7 @@ static int navi10_force_clk_levels(struct smu_context *smu,
 	case SMU_UCLK:
 	case SMU_FCLK:
 		/* There is only 2 levels for fine grained DPM */
-		if (navi10_is_support_fine_grained_dpm(smu, clk_type)) {
+		if (navi10_supports_fine_grained_dpm(smu, clk_type)) {
 			soft_max_level = (soft_max_level >= 1 ? 1 : 0);
 			soft_min_level = (soft_min_level >= 1 ? 1 : 0);
 		}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 15e66e1912de33..3f5721baa5ff50 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -1006,7 +1006,8 @@ static int sienna_cichlid_get_current_clk_freq_by_table(struct smu_context *smu,
 
 }
 
-static bool sienna_cichlid_is_support_fine_grained_dpm(struct smu_context *smu, enum smu_clk_type clk_type)
+static bool sienna_cichlid_supports_fine_grained_dpm(struct smu_context *smu,
+						     enum smu_clk_type clk_type)
 {
 	DpmDescriptor_t *dpm_desc = NULL;
 	DpmDescriptor_t *table_member;
@@ -1084,7 +1085,7 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu,
 		if (ret)
 			goto print_clk_out;
 
-		if (!sienna_cichlid_is_support_fine_grained_dpm(smu, clk_type)) {
+		if (!sienna_cichlid_supports_fine_grained_dpm(smu, clk_type)) {
 			for (i = 0; i < count; i++) {
 				ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, i, &value);
 				if (ret)
@@ -1235,7 +1236,7 @@ static int sienna_cichlid_force_clk_levels(struct smu_context *smu,
 	case SMU_UCLK:
 	case SMU_FCLK:
 		/* There is only 2 levels for fine grained DPM */
-		if (sienna_cichlid_is_support_fine_grained_dpm(smu, clk_type)) {
+		if (sienna_cichlid_supports_fine_grained_dpm(smu, clk_type)) {
 			soft_max_level = (soft_max_level >= 1 ? 1 : 0);
 			soft_min_level = (soft_min_level >= 1 ? 1 : 0);
 		}
-- 
2.33.1.558.g2bd2f258f4


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 2/5] drm/amd/pm: Rename cur_value to curr_value
  2021-10-13  3:10 [PATCH 0/5] 0 MHz is not a valid current frequency Luben Tuikov
  2021-10-13  3:10 ` [PATCH 1/5] drm/amd/pm: Slight function rename Luben Tuikov
@ 2021-10-13  3:10 ` Luben Tuikov
  2021-10-13  3:10 ` [PATCH 3/5] drm/amd/pm: Rename freq_values --> freq_value Luben Tuikov
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 29+ messages in thread
From: Luben Tuikov @ 2021-10-13  3:10 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Deucher, Luben Tuikov

Rename "cur_value", which stands for "cursor
value" to "curr_value", which stands for "current
value".

Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
---
 drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 12 ++++++------
 .../drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 15 ++++++++-------
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index 0fe9790f67f5af..f810549df493d5 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -1267,7 +1267,7 @@ static int navi10_print_clk_levels(struct smu_context *smu,
 {
 	uint16_t *curve_settings;
 	int i, size = 0, ret = 0;
-	uint32_t cur_value = 0, value = 0, count = 0;
+	uint32_t curr_value = 0, value = 0, count = 0;
 	uint32_t freq_values[3] = {0};
 	uint32_t mark_index = 0;
 	struct smu_table_context *table_context = &smu->smu_table;
@@ -1292,7 +1292,7 @@ static int navi10_print_clk_levels(struct smu_context *smu,
 	case SMU_VCLK:
 	case SMU_DCLK:
 	case SMU_DCEFCLK:
-		ret = navi10_get_current_clk_freq_by_table(smu, clk_type, &cur_value);
+		ret = navi10_get_current_clk_freq_by_table(smu, clk_type, &curr_value);
 		if (ret)
 			return size;
 
@@ -1307,7 +1307,7 @@ static int navi10_print_clk_levels(struct smu_context *smu,
 					return size;
 
 				size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, value,
-						cur_value == value ? "*" : "");
+						curr_value == value ? "*" : "");
 			}
 		} else {
 			ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 0, &freq_values[0]);
@@ -1317,9 +1317,9 @@ static int navi10_print_clk_levels(struct smu_context *smu,
 			if (ret)
 				return size;
 
-			freq_values[1] = cur_value;
-			mark_index = cur_value == freq_values[0] ? 0 :
-				     cur_value == freq_values[2] ? 2 : 1;
+			freq_values[1] = curr_value;
+			mark_index = curr_value == freq_values[0] ? 0 :
+				     curr_value == freq_values[2] ? 2 : 1;
 			if (mark_index != 1)
 				freq_values[1] = (freq_values[0] + freq_values[2]) / 2;
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 3f5721baa5ff50..3ebded3a99b5f2 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -1052,7 +1052,7 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu,
 	OverDriveTable_t *od_table =
 		(OverDriveTable_t *)table_context->overdrive_table;
 	int i, size = 0, ret = 0;
-	uint32_t cur_value = 0, value = 0, count = 0;
+	uint32_t curr_value = 0, value = 0, count = 0;
 	uint32_t freq_values[3] = {0};
 	uint32_t mark_index = 0;
 	uint32_t gen_speed, lane_width;
@@ -1073,10 +1073,11 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu,
 	case SMU_DCLK:
 	case SMU_DCLK1:
 	case SMU_DCEFCLK:
-		ret = sienna_cichlid_get_current_clk_freq_by_table(smu, clk_type, &cur_value);
+		ret = sienna_cichlid_get_current_clk_freq_by_table(smu, clk_type, &curr_value);
 		if (ret)
 			goto print_clk_out;
 
+
 		/* no need to disable gfxoff when retrieving the current gfxclk */
 		if ((clk_type == SMU_GFXCLK) || (clk_type == SMU_SCLK))
 			amdgpu_gfx_off_ctrl(adev, false);
@@ -1092,7 +1093,7 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu,
 					goto print_clk_out;
 
 				size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, value,
-						cur_value == value ? "*" : "");
+						curr_value == value ? "*" : "");
 			}
 		} else {
 			ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 0, &freq_values[0]);
@@ -1102,9 +1103,9 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu,
 			if (ret)
 				goto print_clk_out;
 
-			freq_values[1] = cur_value;
-			mark_index = cur_value == freq_values[0] ? 0 :
-				     cur_value == freq_values[2] ? 2 : 1;
+			freq_values[1] = curr_value;
+			mark_index = curr_value == freq_values[0] ? 0 :
+				     curr_value == freq_values[2] ? 2 : 1;
 
 			count = 3;
 			if (mark_index != 1) {
@@ -1114,7 +1115,7 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu,
 
 			for (i = 0; i < count; i++) {
 				size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, freq_values[i],
-						cur_value  == freq_values[i] ? "*" : "");
+						curr_value  == freq_values[i] ? "*" : "");
 			}
 
 		}
-- 
2.33.1.558.g2bd2f258f4


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 3/5] drm/amd/pm: Rename freq_values --> freq_value
  2021-10-13  3:10 [PATCH 0/5] 0 MHz is not a valid current frequency Luben Tuikov
  2021-10-13  3:10 ` [PATCH 1/5] drm/amd/pm: Slight function rename Luben Tuikov
  2021-10-13  3:10 ` [PATCH 2/5] drm/amd/pm: Rename cur_value to curr_value Luben Tuikov
@ 2021-10-13  3:10 ` Luben Tuikov
  2021-10-13  3:10 ` [PATCH 4/5] dpm/amd/pm: Sienna: 0 MHz is not a current clock frequency Luben Tuikov
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 29+ messages in thread
From: Luben Tuikov @ 2021-10-13  3:10 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Deucher, Luben Tuikov

By usage: read freq_values[x] to freq_value[x].

Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
---
 .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c    | 16 ++++++++--------
 .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c    | 18 +++++++++---------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index f810549df493d5..646e9bbf8af42a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -1268,7 +1268,7 @@ static int navi10_print_clk_levels(struct smu_context *smu,
 	uint16_t *curve_settings;
 	int i, size = 0, ret = 0;
 	uint32_t curr_value = 0, value = 0, count = 0;
-	uint32_t freq_values[3] = {0};
+	uint32_t freq_value[3] = {0, 0, 0};
 	uint32_t mark_index = 0;
 	struct smu_table_context *table_context = &smu->smu_table;
 	uint32_t gen_speed, lane_width;
@@ -1310,21 +1310,21 @@ static int navi10_print_clk_levels(struct smu_context *smu,
 						curr_value == value ? "*" : "");
 			}
 		} else {
-			ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 0, &freq_values[0]);
+			ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 0, &freq_value[0]);
 			if (ret)
 				return size;
-			ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, count - 1, &freq_values[2]);
+			ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, count - 1, &freq_value[2]);
 			if (ret)
 				return size;
 
-			freq_values[1] = curr_value;
-			mark_index = curr_value == freq_values[0] ? 0 :
-				     curr_value == freq_values[2] ? 2 : 1;
+			freq_value[1] = curr_value;
+			mark_index = curr_value == freq_value[0] ? 0 :
+				     curr_value == freq_value[2] ? 2 : 1;
 			if (mark_index != 1)
-				freq_values[1] = (freq_values[0] + freq_values[2]) / 2;
+				freq_value[1] = (freq_value[0] + freq_value[2]) / 2;
 
 			for (i = 0; i < 3; i++) {
-				size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, freq_values[i],
+				size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, freq_value[i],
 						i == mark_index ? "*" : "");
 			}
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 3ebded3a99b5f2..f630d5e928ccfe 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -1053,7 +1053,7 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu,
 		(OverDriveTable_t *)table_context->overdrive_table;
 	int i, size = 0, ret = 0;
 	uint32_t curr_value = 0, value = 0, count = 0;
-	uint32_t freq_values[3] = {0};
+	uint32_t freq_value[3] = {0, 0, 0};
 	uint32_t mark_index = 0;
 	uint32_t gen_speed, lane_width;
 	uint32_t min_value, max_value;
@@ -1096,26 +1096,26 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu,
 						curr_value == value ? "*" : "");
 			}
 		} else {
-			ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 0, &freq_values[0]);
+			ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 0, &freq_value[0]);
 			if (ret)
 				goto print_clk_out;
-			ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, count - 1, &freq_values[2]);
+			ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, count - 1, &freq_value[2]);
 			if (ret)
 				goto print_clk_out;
 
-			freq_values[1] = curr_value;
-			mark_index = curr_value == freq_values[0] ? 0 :
-				     curr_value == freq_values[2] ? 2 : 1;
+			freq_value[1] = curr_value;
+			mark_index = curr_value == freq_value[0] ? 0 :
+				     curr_value == freq_value[2] ? 2 : 1;
 
 			count = 3;
 			if (mark_index != 1) {
 				count = 2;
-				freq_values[1] = freq_values[2];
+				freq_value[1] = freq_value[2];
 			}
 
 			for (i = 0; i < count; i++) {
-				size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, freq_values[i],
-						curr_value  == freq_values[i] ? "*" : "");
+				size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, freq_value[i],
+						curr_value  == freq_value[i] ? "*" : "");
 			}
 
 		}
-- 
2.33.1.558.g2bd2f258f4


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 4/5] dpm/amd/pm: Sienna: 0 MHz is not a current clock frequency
  2021-10-13  3:10 [PATCH 0/5] 0 MHz is not a valid current frequency Luben Tuikov
                   ` (2 preceding siblings ...)
  2021-10-13  3:10 ` [PATCH 3/5] drm/amd/pm: Rename freq_values --> freq_value Luben Tuikov
@ 2021-10-13  3:10 ` Luben Tuikov
  2021-10-13  3:10 ` [PATCH 5/5] dpm/amd/pm: Navi10: " Luben Tuikov
  2021-10-13  4:14 ` [PATCH 0/5] 0 MHz is not a valid current frequency Lazar, Lijo
  5 siblings, 0 replies; 29+ messages in thread
From: Luben Tuikov @ 2021-10-13  3:10 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Deucher, Luben Tuikov

A current value of a clock frequency of 0, means
that the IP block is in some kind of low power
state. Ignore it and don't report it here. Here we
only report the possible operating (non-zero)
frequencies of the block requested. So, if the
current clock value is 0, then report as the
current clock the minimum operating one, which is
non-zero.

Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
---
 .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 57 +++++++++++++------
 1 file changed, 39 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index f630d5e928ccfe..00be2b851baf93 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -1054,10 +1054,10 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu,
 	int i, size = 0, ret = 0;
 	uint32_t curr_value = 0, value = 0, count = 0;
 	uint32_t freq_value[3] = {0, 0, 0};
-	uint32_t mark_index = 0;
 	uint32_t gen_speed, lane_width;
 	uint32_t min_value, max_value;
 	uint32_t smu_version;
+	bool     fine_grained;
 
 	smu_cmn_get_sysfs_buf(&buf, &size);
 
@@ -1077,6 +1077,22 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu,
 		if (ret)
 			goto print_clk_out;
 
+		ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 0,
+						      &freq_value[0]);
+		if (ret)
+			goto print_clk_out;
+
+		/* A current value of a clock frequency of 0, means
+		 * that the IP block is in some kind of low power
+		 * state. Ignore it and don't report it here. Here we
+		 * only report the possible operating (non-zero)
+		 * frequencies of the block requested. So, if the
+		 * current clock value is 0, then report as the
+		 * current clock the minimum operating one, which is
+		 * non-zero.
+		 */
+		if (curr_value == 0)
+			curr_value = freq_value[0];
 
 		/* no need to disable gfxoff when retrieving the current gfxclk */
 		if ((clk_type == SMU_GFXCLK) || (clk_type == SMU_SCLK))
@@ -1086,38 +1102,43 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu,
 		if (ret)
 			goto print_clk_out;
 
-		if (!sienna_cichlid_supports_fine_grained_dpm(smu, clk_type)) {
-			for (i = 0; i < count; i++) {
+		fine_grained = sienna_cichlid_supports_fine_grained_dpm(smu, clk_type);
+		if (!fine_grained) {
+			/* We already got the 0-th index--print it
+			 * here and continue thereafter.
+			 */
+			size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", 0, freq_value[0],
+					      curr_value == freq_value[0] ? "*" : "");
+			for (i = 1; i < count; i++) {
 				ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, i, &value);
 				if (ret)
 					goto print_clk_out;
-
 				size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, value,
 						curr_value == value ? "*" : "");
 			}
 		} else {
-			ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 0, &freq_value[0]);
-			if (ret)
-				goto print_clk_out;
+			freq_value[1] = curr_value;
 			ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, count - 1, &freq_value[2]);
 			if (ret)
 				goto print_clk_out;
 
-			freq_value[1] = curr_value;
-			mark_index = curr_value == freq_value[0] ? 0 :
-				     curr_value == freq_value[2] ? 2 : 1;
-
-			count = 3;
-			if (mark_index != 1) {
+			if (freq_value[1] == freq_value[0]) {
+				i = 1;
+				count = 3;
+			} else if (freq_value[1] == freq_value[2]) {
+				i = 0;
 				count = 2;
-				freq_value[1] = freq_value[2];
+			} else {
+				i = 0;
+				count = 3;
 			}
 
-			for (i = 0; i < count; i++) {
-				size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, freq_value[i],
-						curr_value  == freq_value[i] ? "*" : "");
+			for ( ; i < count; i++) {
+				size += sysfs_emit_at(buf, size,
+						      "%d: %uMhz %s\n",
+						      i, freq_value[i],
+						      curr_value == freq_value[i] ? "*" : "");
 			}
-
 		}
 		break;
 	case SMU_PCIE:
-- 
2.33.1.558.g2bd2f258f4


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 5/5] dpm/amd/pm: Navi10: 0 MHz is not a current clock frequency
  2021-10-13  3:10 [PATCH 0/5] 0 MHz is not a valid current frequency Luben Tuikov
                   ` (3 preceding siblings ...)
  2021-10-13  3:10 ` [PATCH 4/5] dpm/amd/pm: Sienna: 0 MHz is not a current clock frequency Luben Tuikov
@ 2021-10-13  3:10 ` Luben Tuikov
  2021-10-13  4:14 ` [PATCH 0/5] 0 MHz is not a valid current frequency Lazar, Lijo
  5 siblings, 0 replies; 29+ messages in thread
From: Luben Tuikov @ 2021-10-13  3:10 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Deucher, Luben Tuikov

A current value of a clock frequency of 0, means
that the IP block is in some kind of low power
state. Ignore it and don't report it here. Here we
only report the possible operating (non-zero)
frequencies of the block requested. So, if the
current clock value is 0, then report as the
current clock the minimum operating one, which is
non-zero.

Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
---
 .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 45 ++++++++++++-------
 1 file changed, 30 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index 646e9bbf8af42a..de1a558dc81047 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -1269,7 +1269,6 @@ static int navi10_print_clk_levels(struct smu_context *smu,
 	int i, size = 0, ret = 0;
 	uint32_t curr_value = 0, value = 0, count = 0;
 	uint32_t freq_value[3] = {0, 0, 0};
-	uint32_t mark_index = 0;
 	struct smu_table_context *table_context = &smu->smu_table;
 	uint32_t gen_speed, lane_width;
 	struct smu_dpm_context *smu_dpm = &smu->smu_dpm;
@@ -1279,6 +1278,7 @@ static int navi10_print_clk_levels(struct smu_context *smu,
 		(OverDriveTable_t *)table_context->overdrive_table;
 	struct smu_11_0_overdrive_table *od_settings = smu->od_settings;
 	uint32_t min_value, max_value;
+	bool fine_grained;
 
 	smu_cmn_get_sysfs_buf(&buf, &size);
 
@@ -1296,12 +1296,23 @@ static int navi10_print_clk_levels(struct smu_context *smu,
 		if (ret)
 			return size;
 
+		ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 0,
+						      &freq_value[0]);
+		if (ret)
+			return size;
+
+		if (curr_value == 0)
+			curr_value = freq_value[0];
+
 		ret = smu_v11_0_get_dpm_level_count(smu, clk_type, &count);
 		if (ret)
 			return size;
 
-		if (!navi10_supports_fine_grained_dpm(smu, clk_type)) {
-			for (i = 0; i < count; i++) {
+		fine_grained = navi10_supports_fine_grained_dpm(smu, clk_type);
+		if (!fine_grained) {
+			size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", 0, freq_value[0],
+					      curr_value == freq_value[0] ? "*" : "");
+			for (i = 1; i < count; i++) {
 				ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, i, &value);
 				if (ret)
 					return size;
@@ -1310,24 +1321,28 @@ static int navi10_print_clk_levels(struct smu_context *smu,
 						curr_value == value ? "*" : "");
 			}
 		} else {
-			ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 0, &freq_value[0]);
-			if (ret)
-				return size;
+			freq_value[1] = curr_value;
 			ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, count - 1, &freq_value[2]);
 			if (ret)
 				return size;
 
-			freq_value[1] = curr_value;
-			mark_index = curr_value == freq_value[0] ? 0 :
-				     curr_value == freq_value[2] ? 2 : 1;
-			if (mark_index != 1)
-				freq_value[1] = (freq_value[0] + freq_value[2]) / 2;
-
-			for (i = 0; i < 3; i++) {
-				size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, freq_value[i],
-						i == mark_index ? "*" : "");
+			if (freq_value[1] == freq_value[0]) {
+				i = 1;
+				count = 3;
+			} else if (freq_value[1] == freq_value[2]) {
+				i = 0;
+				count = 2;
+			} else {
+				i = 0;
+				count = 3;
 			}
 
+			for ( ; i < count; i++) {
+				size += sysfs_emit_at(buf, size,
+						      "%d: %uMhz %s\n",
+						      i, freq_value[i],
+						      curr_value == freq_value[i] ? "*" : "");
+			}
 		}
 		break;
 	case SMU_PCIE:
-- 
2.33.1.558.g2bd2f258f4


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/5] 0 MHz is not a valid current frequency
  2021-10-13  3:10 [PATCH 0/5] 0 MHz is not a valid current frequency Luben Tuikov
                   ` (4 preceding siblings ...)
  2021-10-13  3:10 ` [PATCH 5/5] dpm/amd/pm: Navi10: " Luben Tuikov
@ 2021-10-13  4:14 ` Lazar, Lijo
  2021-10-13  7:06   ` Quan, Evan
                     ` (2 more replies)
  5 siblings, 3 replies; 29+ messages in thread
From: Lazar, Lijo @ 2021-10-13  4:14 UTC (permalink / raw)
  To: Luben Tuikov, amd-gfx; +Cc: Alex Deucher



On 10/13/2021 8:40 AM, Luben Tuikov wrote:
> Some ASIC support low-power functionality for the whole ASIC or just
> an IP block. When in such low-power mode, some sysfs interfaces would
> report a frequency of 0, e.g.,
> 
> $cat /sys/class/drm/card0/device/pp_dpm_sclk
> 0: 500Mhz
> 1: 0Mhz *
> 2: 2200Mhz
> $_
> 
> An operating frequency of 0 MHz doesn't make sense, and this interface
> is designed to report only operating clock frequencies, i.e. non-zero,
> and possibly the current one.
> 
> When in this low-power state, round to the smallest
> operating frequency, for this interface, as follows,
> 

Would rather avoid this -

1) It is manipulating FW reported value. If at all there is an uncaught 
issue in FW reporting of frequency values, that is masked here.
2) Otherwise, if 0MHz is described as GFX power gated case, this 
provides a convenient interface to check if GFX is power gated.

If seeing a '0' is not pleasing, consider changing to something like
	"NA" - not available (frequency cannot be fetched at the moment).

Thanks,
Lijo

> $cat /sys/class/drm/card0/device/pp_dpm_sclk
> 0: 500Mhz *
> 1: 2200Mhz
> $_
> 
> Luben Tuikov (5):
>    drm/amd/pm: Slight function rename
>    drm/amd/pm: Rename cur_value to curr_value
>    drm/amd/pm: Rename freq_values --> freq_value
>    dpm/amd/pm: Sienna: 0 MHz is not a current clock frequency
>    dpm/amd/pm: Navi10: 0 MHz is not a current clock frequency
> 
>   .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 60 +++++++++------
>   .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 73 ++++++++++++-------
>   2 files changed, 86 insertions(+), 47 deletions(-)
> 

^ permalink raw reply	[flat|nested] 29+ messages in thread

* RE: [PATCH 0/5] 0 MHz is not a valid current frequency
  2021-10-13  4:14 ` [PATCH 0/5] 0 MHz is not a valid current frequency Lazar, Lijo
@ 2021-10-13  7:06   ` Quan, Evan
  2021-10-13 13:58   ` Alex Deucher
  2021-10-13 16:22   ` Luben Tuikov
  2 siblings, 0 replies; 29+ messages in thread
From: Quan, Evan @ 2021-10-13  7:06 UTC (permalink / raw)
  To: Lazar, Lijo, Tuikov, Luben, amd-gfx; +Cc: Deucher, Alexander

[AMD Official Use Only]

I agree with Lijo. 
Reporting a "round to the smallest operating frequency" just makes user more confusing. 
As per designed, the frequency marked with "*" should reflect the current clock frequency.

BR
Evan
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
> Lazar, Lijo
> Sent: Wednesday, October 13, 2021 12:14 PM
> To: Tuikov, Luben <Luben.Tuikov@amd.com>; amd-
> gfx@lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
> Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency
> 
> 
> 
> On 10/13/2021 8:40 AM, Luben Tuikov wrote:
> > Some ASIC support low-power functionality for the whole ASIC or just
> > an IP block. When in such low-power mode, some sysfs interfaces would
> > report a frequency of 0, e.g.,
> >
> > $cat /sys/class/drm/card0/device/pp_dpm_sclk
> > 0: 500Mhz
> > 1: 0Mhz *
> > 2: 2200Mhz
> > $_
> >
> > An operating frequency of 0 MHz doesn't make sense, and this interface
> > is designed to report only operating clock frequencies, i.e. non-zero,
> > and possibly the current one.
> >
> > When in this low-power state, round to the smallest operating
> > frequency, for this interface, as follows,
> >
> 
> Would rather avoid this -
> 
> 1) It is manipulating FW reported value. If at all there is an uncaught issue in
> FW reporting of frequency values, that is masked here.
> 2) Otherwise, if 0MHz is described as GFX power gated case, this provides a
> convenient interface to check if GFX is power gated.
> 
> If seeing a '0' is not pleasing, consider changing to something like
> 	"NA" - not available (frequency cannot be fetched at the moment).
> 
> Thanks,
> Lijo
> 
> > $cat /sys/class/drm/card0/device/pp_dpm_sclk
> > 0: 500Mhz *
> > 1: 2200Mhz
> > $_
> >
> > Luben Tuikov (5):
> >    drm/amd/pm: Slight function rename
> >    drm/amd/pm: Rename cur_value to curr_value
> >    drm/amd/pm: Rename freq_values --> freq_value
> >    dpm/amd/pm: Sienna: 0 MHz is not a current clock frequency
> >    dpm/amd/pm: Navi10: 0 MHz is not a current clock frequency
> >
> >   .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 60 +++++++++------
> >   .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 73 ++++++++++++-----
> --
> >   2 files changed, 86 insertions(+), 47 deletions(-)
> >

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/5] 0 MHz is not a valid current frequency
  2021-10-13  4:14 ` [PATCH 0/5] 0 MHz is not a valid current frequency Lazar, Lijo
  2021-10-13  7:06   ` Quan, Evan
@ 2021-10-13 13:58   ` Alex Deucher
  2021-10-13 14:17     ` Lazar, Lijo
  2021-10-13 16:22   ` Luben Tuikov
  2 siblings, 1 reply; 29+ messages in thread
From: Alex Deucher @ 2021-10-13 13:58 UTC (permalink / raw)
  To: Lazar, Lijo; +Cc: Luben Tuikov, amd-gfx list, Alex Deucher

On Wed, Oct 13, 2021 at 12:14 AM Lazar, Lijo <lijo.lazar@amd.com> wrote:
>
>
>
> On 10/13/2021 8:40 AM, Luben Tuikov wrote:
> > Some ASIC support low-power functionality for the whole ASIC or just
> > an IP block. When in such low-power mode, some sysfs interfaces would
> > report a frequency of 0, e.g.,
> >
> > $cat /sys/class/drm/card0/device/pp_dpm_sclk
> > 0: 500Mhz
> > 1: 0Mhz *
> > 2: 2200Mhz
> > $_
> >
> > An operating frequency of 0 MHz doesn't make sense, and this interface
> > is designed to report only operating clock frequencies, i.e. non-zero,
> > and possibly the current one.
> >
> > When in this low-power state, round to the smallest
> > operating frequency, for this interface, as follows,
> >
>
> Would rather avoid this -
>
> 1) It is manipulating FW reported value. If at all there is an uncaught
> issue in FW reporting of frequency values, that is masked here.
> 2) Otherwise, if 0MHz is described as GFX power gated case, this
> provides a convenient interface to check if GFX is power gated.
>
> If seeing a '0' is not pleasing, consider changing to something like
>         "NA" - not available (frequency cannot be fetched at the moment).
>

I don't think this interface is really supposed to be the way to get
the current clock, although some use it that way.  It's supposed to
show the DPM states and which are active.  conflating the interface
with other information confuses things in my opinion.  Why is the
current clock less than the minimum clock?  Whether or not an IP is
turned off or in deep sleep or not is independent of DPM states.  When
the IP is active, it will never go below the minimum DPM level.  If we
want to query deep sleep or gfxoff we can use the amdgpu_pm_info
debugfs interface or add some other new interface.

Alex


> Thanks,
> Lijo
>
> > $cat /sys/class/drm/card0/device/pp_dpm_sclk
> > 0: 500Mhz *
> > 1: 2200Mhz
> > $_
> >
> > Luben Tuikov (5):
> >    drm/amd/pm: Slight function rename
> >    drm/amd/pm: Rename cur_value to curr_value
> >    drm/amd/pm: Rename freq_values --> freq_value
> >    dpm/amd/pm: Sienna: 0 MHz is not a current clock frequency
> >    dpm/amd/pm: Navi10: 0 MHz is not a current clock frequency
> >
> >   .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 60 +++++++++------
> >   .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 73 ++++++++++++-------
> >   2 files changed, 86 insertions(+), 47 deletions(-)
> >

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/5] 0 MHz is not a valid current frequency
  2021-10-13 13:58   ` Alex Deucher
@ 2021-10-13 14:17     ` Lazar, Lijo
  0 siblings, 0 replies; 29+ messages in thread
From: Lazar, Lijo @ 2021-10-13 14:17 UTC (permalink / raw)
  To: Alex Deucher; +Cc: Luben Tuikov, amd-gfx list, Alex Deucher



On 10/13/2021 7:28 PM, Alex Deucher wrote:
> On Wed, Oct 13, 2021 at 12:14 AM Lazar, Lijo <lijo.lazar@amd.com> wrote:
>>
>>
>>
>> On 10/13/2021 8:40 AM, Luben Tuikov wrote:
>>> Some ASIC support low-power functionality for the whole ASIC or just
>>> an IP block. When in such low-power mode, some sysfs interfaces would
>>> report a frequency of 0, e.g.,
>>>
>>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
>>> 0: 500Mhz
>>> 1: 0Mhz *
>>> 2: 2200Mhz
>>> $_
>>>
>>> An operating frequency of 0 MHz doesn't make sense, and this interface
>>> is designed to report only operating clock frequencies, i.e. non-zero,
>>> and possibly the current one.
>>>
>>> When in this low-power state, round to the smallest
>>> operating frequency, for this interface, as follows,
>>>
>>
>> Would rather avoid this -
>>
>> 1) It is manipulating FW reported value. If at all there is an uncaught
>> issue in FW reporting of frequency values, that is masked here.
>> 2) Otherwise, if 0MHz is described as GFX power gated case, this
>> provides a convenient interface to check if GFX is power gated.
>>
>> If seeing a '0' is not pleasing, consider changing to something like
>>          "NA" - not available (frequency cannot be fetched at the moment).
>>
> 
> I don't think this interface is really supposed to be the way to get
> the current clock, although some use it that way.  It's supposed to
> show the DPM states and which are active.  conflating the interface
> with other information confuses things in my opinion.  Why is the
> current clock less than the minimum clock?  Whether or not an IP is
> turned off or in deep sleep or not is independent of DPM states.  When
> the IP is active, it will never go below the minimum DPM level.  If we
> want to query deep sleep or gfxoff we can use the amdgpu_pm_info
> debugfs interface or add some other new interface.
> 

The idea of DPM level is deprecated with fine grained clock which 
provides only min and max. For fine grained clock, we fetch the current 
clock frequency and show it as an artificial DPM level between min/max. 
That itself should have confused users but it is not which means the 
users use the * to fetch the current frequency and not really bothered 
about the DPM level per se.

Also, some ASICs define 'min' as as the least possible freq (that 
happens during a throttle) and not the DPM level 0 min in the 
traditional sense (that is defined as idle frequency which doesn't come 
into min/max levels). It's usually from the idle frequency that GFX gets 
power gated. Showing a * against min in such cases would be confusing 
because that could be misinterpreted as a throttle scenario.

Thanks,
Lijo

> Alex
> 
> 
>> Thanks,
>> Lijo
>>
>>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
>>> 0: 500Mhz *
>>> 1: 2200Mhz
>>> $_
>>>
>>> Luben Tuikov (5):
>>>     drm/amd/pm: Slight function rename
>>>     drm/amd/pm: Rename cur_value to curr_value
>>>     drm/amd/pm: Rename freq_values --> freq_value
>>>     dpm/amd/pm: Sienna: 0 MHz is not a current clock frequency
>>>     dpm/amd/pm: Navi10: 0 MHz is not a current clock frequency
>>>
>>>    .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 60 +++++++++------
>>>    .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 73 ++++++++++++-------
>>>    2 files changed, 86 insertions(+), 47 deletions(-)
>>>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* RE: [PATCH 1/5] drm/amd/pm: Slight function rename
  2021-10-13  3:10 ` [PATCH 1/5] drm/amd/pm: Slight function rename Luben Tuikov
@ 2021-10-13 14:50   ` Russell, Kent
  2021-10-13 14:53     ` Luben Tuikov
  2021-10-18 22:52   ` Paul Menzel
  1 sibling, 1 reply; 29+ messages in thread
From: Russell, Kent @ 2021-10-13 14:50 UTC (permalink / raw)
  To: Tuikov, Luben, amd-gfx; +Cc: Deucher, Alexander, Tuikov, Luben

[AMD Official Use Only]

Pedantic tiny thing:

> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Luben Tuikov
> Sent: Tuesday, October 12, 2021 11:11 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Tuikov, Luben
> <Luben.Tuikov@amd.com>
> Subject: [PATCH 1/5] drm/amd/pm: Slight function rename
> 
> Rename
> sienna_cichlid_is_support_fine_grained_dpm() to
> sienna_cichlid_support_fine_grained_dpm().
^You would want cichlid_supports_fine_grained_dpm . The function is correct below, but anyone grepping git logs would miss it.

 Kent

> 
> Rename
> navi10_is_support_fine_grained_dpm() to
> navi10_supports_fine_grained_dpm().
> 
> Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
> ---
>  drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c         | 7 ++++---
>  drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 7 ++++---
>  2 files changed, 8 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> index 71161f6b78fea9..0fe9790f67f5af 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> @@ -1231,7 +1231,8 @@ static int navi10_get_current_clk_freq_by_table(struct
> smu_context *smu,
>  					   value);
>  }
> 
> -static bool navi10_is_support_fine_grained_dpm(struct smu_context *smu, enum
> smu_clk_type clk_type)
> +static bool navi10_supports_fine_grained_dpm(struct smu_context *smu,
> +					     enum smu_clk_type clk_type)
>  {
>  	PPTable_t *pptable = smu->smu_table.driver_pptable;
>  	DpmDescriptor_t *dpm_desc = NULL;
> @@ -1299,7 +1300,7 @@ static int navi10_print_clk_levels(struct smu_context *smu,
>  		if (ret)
>  			return size;
> 
> -		if (!navi10_is_support_fine_grained_dpm(smu, clk_type)) {
> +		if (!navi10_supports_fine_grained_dpm(smu, clk_type)) {
>  			for (i = 0; i < count; i++) {
>  				ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, i,
> &value);
>  				if (ret)
> @@ -1465,7 +1466,7 @@ static int navi10_force_clk_levels(struct smu_context *smu,
>  	case SMU_UCLK:
>  	case SMU_FCLK:
>  		/* There is only 2 levels for fine grained DPM */
> -		if (navi10_is_support_fine_grained_dpm(smu, clk_type)) {
> +		if (navi10_supports_fine_grained_dpm(smu, clk_type)) {
>  			soft_max_level = (soft_max_level >= 1 ? 1 : 0);
>  			soft_min_level = (soft_min_level >= 1 ? 1 : 0);
>  		}
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
> index 15e66e1912de33..3f5721baa5ff50 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
> @@ -1006,7 +1006,8 @@ static int sienna_cichlid_get_current_clk_freq_by_table(struct
> smu_context *smu,
> 
>  }
> 
> -static bool sienna_cichlid_is_support_fine_grained_dpm(struct smu_context *smu, enum
> smu_clk_type clk_type)
> +static bool sienna_cichlid_supports_fine_grained_dpm(struct smu_context *smu,
> +						     enum smu_clk_type clk_type)
>  {
>  	DpmDescriptor_t *dpm_desc = NULL;
>  	DpmDescriptor_t *table_member;
> @@ -1084,7 +1085,7 @@ static int sienna_cichlid_print_clk_levels(struct smu_context
> *smu,
>  		if (ret)
>  			goto print_clk_out;
> 
> -		if (!sienna_cichlid_is_support_fine_grained_dpm(smu, clk_type)) {
> +		if (!sienna_cichlid_supports_fine_grained_dpm(smu, clk_type)) {
>  			for (i = 0; i < count; i++) {
>  				ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, i,
> &value);
>  				if (ret)
> @@ -1235,7 +1236,7 @@ static int sienna_cichlid_force_clk_levels(struct smu_context
> *smu,
>  	case SMU_UCLK:
>  	case SMU_FCLK:
>  		/* There is only 2 levels for fine grained DPM */
> -		if (sienna_cichlid_is_support_fine_grained_dpm(smu, clk_type)) {
> +		if (sienna_cichlid_supports_fine_grained_dpm(smu, clk_type)) {
>  			soft_max_level = (soft_max_level >= 1 ? 1 : 0);
>  			soft_min_level = (soft_min_level >= 1 ? 1 : 0);
>  		}
> --
> 2.33.1.558.g2bd2f258f4

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/5] drm/amd/pm: Slight function rename
  2021-10-13 14:50   ` Russell, Kent
@ 2021-10-13 14:53     ` Luben Tuikov
  0 siblings, 0 replies; 29+ messages in thread
From: Luben Tuikov @ 2021-10-13 14:53 UTC (permalink / raw)
  To: Russell, Kent, amd-gfx; +Cc: Deucher, Alexander

On 2021-10-13 10:50, Russell, Kent wrote:
> [AMD Official Use Only]
>
> Pedantic tiny thing:
>
>> -----Original Message-----
>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Luben Tuikov
>> Sent: Tuesday, October 12, 2021 11:11 PM
>> To: amd-gfx@lists.freedesktop.org
>> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Tuikov, Luben
>> <Luben.Tuikov@amd.com>
>> Subject: [PATCH 1/5] drm/amd/pm: Slight function rename
>>
>> Rename
>> sienna_cichlid_is_support_fine_grained_dpm() to
>> sienna_cichlid_support_fine_grained_dpm().
> ^You would want cichlid_supports_fine_grained_dpm . The function is correct below, but anyone grepping git logs would miss it.

Ah, yes, thank you Kent--I'll fix the description to add the missing 's'.

Regards,
Luben

>
>  Kent
>
>> Rename
>> navi10_is_support_fine_grained_dpm() to
>> navi10_supports_fine_grained_dpm().
>>
>> Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
>> ---
>>  drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c         | 7 ++++---
>>  drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 7 ++++---
>>  2 files changed, 8 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
>> b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
>> index 71161f6b78fea9..0fe9790f67f5af 100644
>> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
>> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
>> @@ -1231,7 +1231,8 @@ static int navi10_get_current_clk_freq_by_table(struct
>> smu_context *smu,
>>  					   value);
>>  }
>>
>> -static bool navi10_is_support_fine_grained_dpm(struct smu_context *smu, enum
>> smu_clk_type clk_type)
>> +static bool navi10_supports_fine_grained_dpm(struct smu_context *smu,
>> +					     enum smu_clk_type clk_type)
>>  {
>>  	PPTable_t *pptable = smu->smu_table.driver_pptable;
>>  	DpmDescriptor_t *dpm_desc = NULL;
>> @@ -1299,7 +1300,7 @@ static int navi10_print_clk_levels(struct smu_context *smu,
>>  		if (ret)
>>  			return size;
>>
>> -		if (!navi10_is_support_fine_grained_dpm(smu, clk_type)) {
>> +		if (!navi10_supports_fine_grained_dpm(smu, clk_type)) {
>>  			for (i = 0; i < count; i++) {
>>  				ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, i,
>> &value);
>>  				if (ret)
>> @@ -1465,7 +1466,7 @@ static int navi10_force_clk_levels(struct smu_context *smu,
>>  	case SMU_UCLK:
>>  	case SMU_FCLK:
>>  		/* There is only 2 levels for fine grained DPM */
>> -		if (navi10_is_support_fine_grained_dpm(smu, clk_type)) {
>> +		if (navi10_supports_fine_grained_dpm(smu, clk_type)) {
>>  			soft_max_level = (soft_max_level >= 1 ? 1 : 0);
>>  			soft_min_level = (soft_min_level >= 1 ? 1 : 0);
>>  		}
>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
>> b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
>> index 15e66e1912de33..3f5721baa5ff50 100644
>> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
>> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
>> @@ -1006,7 +1006,8 @@ static int sienna_cichlid_get_current_clk_freq_by_table(struct
>> smu_context *smu,
>>
>>  }
>>
>> -static bool sienna_cichlid_is_support_fine_grained_dpm(struct smu_context *smu, enum
>> smu_clk_type clk_type)
>> +static bool sienna_cichlid_supports_fine_grained_dpm(struct smu_context *smu,
>> +						     enum smu_clk_type clk_type)
>>  {
>>  	DpmDescriptor_t *dpm_desc = NULL;
>>  	DpmDescriptor_t *table_member;
>> @@ -1084,7 +1085,7 @@ static int sienna_cichlid_print_clk_levels(struct smu_context
>> *smu,
>>  		if (ret)
>>  			goto print_clk_out;
>>
>> -		if (!sienna_cichlid_is_support_fine_grained_dpm(smu, clk_type)) {
>> +		if (!sienna_cichlid_supports_fine_grained_dpm(smu, clk_type)) {
>>  			for (i = 0; i < count; i++) {
>>  				ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, i,
>> &value);
>>  				if (ret)
>> @@ -1235,7 +1236,7 @@ static int sienna_cichlid_force_clk_levels(struct smu_context
>> *smu,
>>  	case SMU_UCLK:
>>  	case SMU_FCLK:
>>  		/* There is only 2 levels for fine grained DPM */
>> -		if (sienna_cichlid_is_support_fine_grained_dpm(smu, clk_type)) {
>> +		if (sienna_cichlid_supports_fine_grained_dpm(smu, clk_type)) {
>>  			soft_max_level = (soft_max_level >= 1 ? 1 : 0);
>>  			soft_min_level = (soft_min_level >= 1 ? 1 : 0);
>>  		}
>> --
>> 2.33.1.558.g2bd2f258f4


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/5] 0 MHz is not a valid current frequency
  2021-10-13  4:14 ` [PATCH 0/5] 0 MHz is not a valid current frequency Lazar, Lijo
  2021-10-13  7:06   ` Quan, Evan
  2021-10-13 13:58   ` Alex Deucher
@ 2021-10-13 16:22   ` Luben Tuikov
  2021-10-13 17:06     ` Lazar, Lijo
  2 siblings, 1 reply; 29+ messages in thread
From: Luben Tuikov @ 2021-10-13 16:22 UTC (permalink / raw)
  To: Lazar, Lijo, amd-gfx; +Cc: Alex Deucher

On 2021-10-13 00:14, Lazar, Lijo wrote:
>
> On 10/13/2021 8:40 AM, Luben Tuikov wrote:
>> Some ASIC support low-power functionality for the whole ASIC or just
>> an IP block. When in such low-power mode, some sysfs interfaces would
>> report a frequency of 0, e.g.,
>>
>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
>> 0: 500Mhz
>> 1: 0Mhz *
>> 2: 2200Mhz
>> $_
>>
>> An operating frequency of 0 MHz doesn't make sense, and this interface
>> is designed to report only operating clock frequencies, i.e. non-zero,
>> and possibly the current one.
>>
>> When in this low-power state, round to the smallest
>> operating frequency, for this interface, as follows,
>>
> Would rather avoid this -
>
> 1) It is manipulating FW reported value. If at all there is an uncaught 
> issue in FW reporting of frequency values, that is masked here.
> 2) Otherwise, if 0MHz is described as GFX power gated case, this 
> provides a convenient interface to check if GFX is power gated.
>
> If seeing a '0' is not pleasing, consider changing to something like
> 	"NA" - not available (frequency cannot be fetched at the moment).

There's a ROCm tool which literally asserts if the values are not ordered in increasing order. Now since 0 < 550, but 0 is listed as the second entry, the tool simply asserts and crashes.

It is not clear what you'd rather see here:

$cat /sys/class/drm/card0/device/pp_dpm_sclk
0: 550Mhz
1: N/A *
2: 2200MHz
$_

Is this what you want to see? (That'll crash other tools which expect %uMhz.)

Or maybe just a list without default hint, i.e. no asterisk?

$cat /sys/class/drm/card0/device/pp_dpm_sclk
0: 550Mhz
1: 2200MHz
$_

What should the output be?

We want to avoid showing 0, but still show numbers.

Regards,
Luben

>
> Thanks,
> Lijo
>
>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
>> 0: 500Mhz *
>> 1: 2200Mhz
>> $_
>>
>> Luben Tuikov (5):
>>    drm/amd/pm: Slight function rename
>>    drm/amd/pm: Rename cur_value to curr_value
>>    drm/amd/pm: Rename freq_values --> freq_value
>>    dpm/amd/pm: Sienna: 0 MHz is not a current clock frequency
>>    dpm/amd/pm: Navi10: 0 MHz is not a current clock frequency
>>
>>   .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 60 +++++++++------
>>   .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 73 ++++++++++++-------
>>   2 files changed, 86 insertions(+), 47 deletions(-)
>>


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/5] 0 MHz is not a valid current frequency
  2021-10-13 16:22   ` Luben Tuikov
@ 2021-10-13 17:06     ` Lazar, Lijo
  2021-10-13 17:19       ` Alex Deucher
  2021-10-15  2:25       ` Quan, Evan
  0 siblings, 2 replies; 29+ messages in thread
From: Lazar, Lijo @ 2021-10-13 17:06 UTC (permalink / raw)
  To: Tuikov, Luben, amd-gfx; +Cc: Deucher, Alexander

[-- Attachment #1: Type: text/plain, Size: 3038 bytes --]

[AMD Official Use Only]

>Or maybe just a list without default hint, i.e. no asterisk?

I think this is also fine meaning we are having trouble in determining the current frequency or DPM level. Evan/Alex? Don't know if this will crash the tools.

Thanks,
Lijo
________________________________
From: Tuikov, Luben <Luben.Tuikov@amd.com>
Sent: Wednesday, October 13, 2021 9:52:09 PM
To: Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency

On 2021-10-13 00:14, Lazar, Lijo wrote:
>
> On 10/13/2021 8:40 AM, Luben Tuikov wrote:
>> Some ASIC support low-power functionality for the whole ASIC or just
>> an IP block. When in such low-power mode, some sysfs interfaces would
>> report a frequency of 0, e.g.,
>>
>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
>> 0: 500Mhz
>> 1: 0Mhz *
>> 2: 2200Mhz
>> $_
>>
>> An operating frequency of 0 MHz doesn't make sense, and this interface
>> is designed to report only operating clock frequencies, i.e. non-zero,
>> and possibly the current one.
>>
>> When in this low-power state, round to the smallest
>> operating frequency, for this interface, as follows,
>>
> Would rather avoid this -
>
> 1) It is manipulating FW reported value. If at all there is an uncaught
> issue in FW reporting of frequency values, that is masked here.
> 2) Otherwise, if 0MHz is described as GFX power gated case, this
> provides a convenient interface to check if GFX is power gated.
>
> If seeing a '0' is not pleasing, consider changing to something like
>        "NA" - not available (frequency cannot be fetched at the moment).

There's a ROCm tool which literally asserts if the values are not ordered in increasing order. Now since 0 < 550, but 0 is listed as the second entry, the tool simply asserts and crashes.

It is not clear what you'd rather see here:

$cat /sys/class/drm/card0/device/pp_dpm_sclk
0: 550Mhz
1: N/A *
2: 2200MHz
$_

Is this what you want to see? (That'll crash other tools which expect %uMhz.)

Or maybe just a list without default hint, i.e. no asterisk?

$cat /sys/class/drm/card0/device/pp_dpm_sclk
0: 550Mhz
1: 2200MHz
$_

What should the output be?

We want to avoid showing 0, but still show numbers.

Regards,
Luben

>
> Thanks,
> Lijo
>
>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
>> 0: 500Mhz *
>> 1: 2200Mhz
>> $_
>>
>> Luben Tuikov (5):
>>    drm/amd/pm: Slight function rename
>>    drm/amd/pm: Rename cur_value to curr_value
>>    drm/amd/pm: Rename freq_values --> freq_value
>>    dpm/amd/pm: Sienna: 0 MHz is not a current clock frequency
>>    dpm/amd/pm: Navi10: 0 MHz is not a current clock frequency
>>
>>   .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 60 +++++++++------
>>   .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 73 ++++++++++++-------
>>   2 files changed, 86 insertions(+), 47 deletions(-)
>>


[-- Attachment #2: Type: text/html, Size: 5034 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/5] 0 MHz is not a valid current frequency
  2021-10-13 17:06     ` Lazar, Lijo
@ 2021-10-13 17:19       ` Alex Deucher
  2021-10-15  2:25       ` Quan, Evan
  1 sibling, 0 replies; 29+ messages in thread
From: Alex Deucher @ 2021-10-13 17:19 UTC (permalink / raw)
  To: Lazar, Lijo; +Cc: Tuikov, Luben, amd-gfx, Deucher, Alexander

On Wed, Oct 13, 2021 at 1:06 PM Lazar, Lijo <Lijo.Lazar@amd.com> wrote:
>
> [AMD Official Use Only]
>
>
> >Or maybe just a list without default hint, i.e. no asterisk?
>
> I think this is also fine meaning we are having trouble in determining the current frequency or DPM level. Evan/Alex? Don't know if this will crash the tools.
>

That seems reasonable to me.

Alex

> Thanks,
> Lijo
> ________________________________
> From: Tuikov, Luben <Luben.Tuikov@amd.com>
> Sent: Wednesday, October 13, 2021 9:52:09 PM
> To: Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
> Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency
>
> On 2021-10-13 00:14, Lazar, Lijo wrote:
> >
> > On 10/13/2021 8:40 AM, Luben Tuikov wrote:
> >> Some ASIC support low-power functionality for the whole ASIC or just
> >> an IP block. When in such low-power mode, some sysfs interfaces would
> >> report a frequency of 0, e.g.,
> >>
> >> $cat /sys/class/drm/card0/device/pp_dpm_sclk
> >> 0: 500Mhz
> >> 1: 0Mhz *
> >> 2: 2200Mhz
> >> $_
> >>
> >> An operating frequency of 0 MHz doesn't make sense, and this interface
> >> is designed to report only operating clock frequencies, i.e. non-zero,
> >> and possibly the current one.
> >>
> >> When in this low-power state, round to the smallest
> >> operating frequency, for this interface, as follows,
> >>
> > Would rather avoid this -
> >
> > 1) It is manipulating FW reported value. If at all there is an uncaught
> > issue in FW reporting of frequency values, that is masked here.
> > 2) Otherwise, if 0MHz is described as GFX power gated case, this
> > provides a convenient interface to check if GFX is power gated.
> >
> > If seeing a '0' is not pleasing, consider changing to something like
> >        "NA" - not available (frequency cannot be fetched at the moment).
>
> There's a ROCm tool which literally asserts if the values are not ordered in increasing order. Now since 0 < 550, but 0 is listed as the second entry, the tool simply asserts and crashes.
>
> It is not clear what you'd rather see here:
>
> $cat /sys/class/drm/card0/device/pp_dpm_sclk
> 0: 550Mhz
> 1: N/A *
> 2: 2200MHz
> $_
>
> Is this what you want to see? (That'll crash other tools which expect %uMhz.)
>
> Or maybe just a list without default hint, i.e. no asterisk?
>
> $cat /sys/class/drm/card0/device/pp_dpm_sclk
> 0: 550Mhz
> 1: 2200MHz
> $_
>
> What should the output be?
>
> We want to avoid showing 0, but still show numbers.
>
> Regards,
> Luben
>
> >
> > Thanks,
> > Lijo
> >
> >> $cat /sys/class/drm/card0/device/pp_dpm_sclk
> >> 0: 500Mhz *
> >> 1: 2200Mhz
> >> $_
> >>
> >> Luben Tuikov (5):
> >>    drm/amd/pm: Slight function rename
> >>    drm/amd/pm: Rename cur_value to curr_value
> >>    drm/amd/pm: Rename freq_values --> freq_value
> >>    dpm/amd/pm: Sienna: 0 MHz is not a current clock frequency
> >>    dpm/amd/pm: Navi10: 0 MHz is not a current clock frequency
> >>
> >>   .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 60 +++++++++------
> >>   .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 73 ++++++++++++-------
> >>   2 files changed, 86 insertions(+), 47 deletions(-)
> >>
>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* RE: [PATCH 0/5] 0 MHz is not a valid current frequency
  2021-10-13 17:06     ` Lazar, Lijo
  2021-10-13 17:19       ` Alex Deucher
@ 2021-10-15  2:25       ` Quan, Evan
  2021-10-18 20:19         ` Deucher, Alexander
  1 sibling, 1 reply; 29+ messages in thread
From: Quan, Evan @ 2021-10-15  2:25 UTC (permalink / raw)
  To: Lazar, Lijo, Tuikov, Luben, amd-gfx, Russell, Kent; +Cc: Deucher, Alexander

[-- Attachment #1: Type: text/plain, Size: 3611 bytes --]

[AMD Official Use Only]

+Kent who maintains the Rocm tool

From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Lazar, Lijo
Sent: Thursday, October 14, 2021 1:07 AM
To: Tuikov, Luben <Luben.Tuikov@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency


[AMD Official Use Only]


[AMD Official Use Only]

>Or maybe just a list without default hint, i.e. no asterisk?

I think this is also fine meaning we are having trouble in determining the current frequency or DPM level. Evan/Alex? Don't know if this will crash the tools.

Thanks,
Lijo
________________________________
From: Tuikov, Luben <Luben.Tuikov@amd.com<mailto:Luben.Tuikov@amd.com>>
Sent: Wednesday, October 13, 2021 9:52:09 PM
To: Lazar, Lijo <Lijo.Lazar@amd.com<mailto:Lijo.Lazar@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>>
Cc: Deucher, Alexander <Alexander.Deucher@amd.com<mailto:Alexander.Deucher@amd.com>>
Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency

On 2021-10-13 00:14, Lazar, Lijo wrote:
>
> On 10/13/2021 8:40 AM, Luben Tuikov wrote:
>> Some ASIC support low-power functionality for the whole ASIC or just
>> an IP block. When in such low-power mode, some sysfs interfaces would
>> report a frequency of 0, e.g.,
>>
>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
>> 0: 500Mhz
>> 1: 0Mhz *
>> 2: 2200Mhz
>> $_
>>
>> An operating frequency of 0 MHz doesn't make sense, and this interface
>> is designed to report only operating clock frequencies, i.e. non-zero,
>> and possibly the current one.
>>
>> When in this low-power state, round to the smallest
>> operating frequency, for this interface, as follows,
>>
> Would rather avoid this -
>
> 1) It is manipulating FW reported value. If at all there is an uncaught
> issue in FW reporting of frequency values, that is masked here.
> 2) Otherwise, if 0MHz is described as GFX power gated case, this
> provides a convenient interface to check if GFX is power gated.
>
> If seeing a '0' is not pleasing, consider changing to something like
>        "NA" - not available (frequency cannot be fetched at the moment).

There's a ROCm tool which literally asserts if the values are not ordered in increasing order. Now since 0 < 550, but 0 is listed as the second entry, the tool simply asserts and crashes.

It is not clear what you'd rather see here:

$cat /sys/class/drm/card0/device/pp_dpm_sclk
0: 550Mhz
1: N/A *
2: 2200MHz
$_

Is this what you want to see? (That'll crash other tools which expect %uMhz.)

Or maybe just a list without default hint, i.e. no asterisk?

$cat /sys/class/drm/card0/device/pp_dpm_sclk
0: 550Mhz
1: 2200MHz
$_

What should the output be?

We want to avoid showing 0, but still show numbers.

Regards,
Luben

>
> Thanks,
> Lijo
>
>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
>> 0: 500Mhz *
>> 1: 2200Mhz
>> $_
>>
>> Luben Tuikov (5):
>>    drm/amd/pm: Slight function rename
>>    drm/amd/pm: Rename cur_value to curr_value
>>    drm/amd/pm: Rename freq_values --> freq_value
>>    dpm/amd/pm: Sienna: 0 MHz is not a current clock frequency
>>    dpm/amd/pm: Navi10: 0 MHz is not a current clock frequency
>>
>>   .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 60 +++++++++------
>>   .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 73 ++++++++++++-------
>>   2 files changed, 86 insertions(+), 47 deletions(-)
>>

[-- Attachment #2: Type: text/html, Size: 8607 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/5] 0 MHz is not a valid current frequency
  2021-10-15  2:25       ` Quan, Evan
@ 2021-10-18 20:19         ` Deucher, Alexander
  2021-10-18 20:29           ` Luben Tuikov
  0 siblings, 1 reply; 29+ messages in thread
From: Deucher, Alexander @ 2021-10-18 20:19 UTC (permalink / raw)
  To: Quan, Evan, Lazar, Lijo, Tuikov, Luben, amd-gfx, Russell, Kent

[-- Attachment #1: Type: text/plain, Size: 4193 bytes --]

[Public]

We the current behavior (0 for clock) already crashes the tool, so I don't think we can really make things worse.

Alex

________________________________
From: Quan, Evan <Evan.Quan@amd.com>
Sent: Thursday, October 14, 2021 10:25 PM
To: Lazar, Lijo <Lijo.Lazar@amd.com>; Tuikov, Luben <Luben.Tuikov@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; Russell, Kent <Kent.Russell@amd.com>
Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
Subject: RE: [PATCH 0/5] 0 MHz is not a valid current frequency


[AMD Official Use Only]



+Kent who maintains the Rocm tool



From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Lazar, Lijo
Sent: Thursday, October 14, 2021 1:07 AM
To: Tuikov, Luben <Luben.Tuikov@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency



[AMD Official Use Only]



[AMD Official Use Only]



>Or maybe just a list without default hint, i.e. no asterisk?



I think this is also fine meaning we are having trouble in determining the current frequency or DPM level. Evan/Alex? Don't know if this will crash the tools.



Thanks,
Lijo

________________________________

From: Tuikov, Luben <Luben.Tuikov@amd.com<mailto:Luben.Tuikov@amd.com>>
Sent: Wednesday, October 13, 2021 9:52:09 PM
To: Lazar, Lijo <Lijo.Lazar@amd.com<mailto:Lijo.Lazar@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>>
Cc: Deucher, Alexander <Alexander.Deucher@amd.com<mailto:Alexander.Deucher@amd.com>>
Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency



On 2021-10-13 00:14, Lazar, Lijo wrote:
>
> On 10/13/2021 8:40 AM, Luben Tuikov wrote:
>> Some ASIC support low-power functionality for the whole ASIC or just
>> an IP block. When in such low-power mode, some sysfs interfaces would
>> report a frequency of 0, e.g.,
>>
>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
>> 0: 500Mhz
>> 1: 0Mhz *
>> 2: 2200Mhz
>> $_
>>
>> An operating frequency of 0 MHz doesn't make sense, and this interface
>> is designed to report only operating clock frequencies, i.e. non-zero,
>> and possibly the current one.
>>
>> When in this low-power state, round to the smallest
>> operating frequency, for this interface, as follows,
>>
> Would rather avoid this -
>
> 1) It is manipulating FW reported value. If at all there is an uncaught
> issue in FW reporting of frequency values, that is masked here.
> 2) Otherwise, if 0MHz is described as GFX power gated case, this
> provides a convenient interface to check if GFX is power gated.
>
> If seeing a '0' is not pleasing, consider changing to something like
>        "NA" - not available (frequency cannot be fetched at the moment).

There's a ROCm tool which literally asserts if the values are not ordered in increasing order. Now since 0 < 550, but 0 is listed as the second entry, the tool simply asserts and crashes.

It is not clear what you'd rather see here:

$cat /sys/class/drm/card0/device/pp_dpm_sclk
0: 550Mhz
1: N/A *
2: 2200MHz
$_

Is this what you want to see? (That'll crash other tools which expect %uMhz.)

Or maybe just a list without default hint, i.e. no asterisk?

$cat /sys/class/drm/card0/device/pp_dpm_sclk
0: 550Mhz
1: 2200MHz
$_

What should the output be?

We want to avoid showing 0, but still show numbers.

Regards,
Luben

>
> Thanks,
> Lijo
>
>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
>> 0: 500Mhz *
>> 1: 2200Mhz
>> $_
>>
>> Luben Tuikov (5):
>>    drm/amd/pm: Slight function rename
>>    drm/amd/pm: Rename cur_value to curr_value
>>    drm/amd/pm: Rename freq_values --> freq_value
>>    dpm/amd/pm: Sienna: 0 MHz is not a current clock frequency
>>    dpm/amd/pm: Navi10: 0 MHz is not a current clock frequency
>>
>>   .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 60 +++++++++------
>>   .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 73 ++++++++++++-------
>>   2 files changed, 86 insertions(+), 47 deletions(-)
>>

[-- Attachment #2: Type: text/html, Size: 8755 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/5] 0 MHz is not a valid current frequency
  2021-10-18 20:19         ` Deucher, Alexander
@ 2021-10-18 20:29           ` Luben Tuikov
  2021-10-19  1:04             ` Russell, Kent
  0 siblings, 1 reply; 29+ messages in thread
From: Luben Tuikov @ 2021-10-18 20:29 UTC (permalink / raw)
  To: Deucher, Alexander, Quan, Evan, Lazar, Lijo, amd-gfx, Russell, Kent

[-- Attachment #1: Type: text/html, Size: 14721 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/5] drm/amd/pm: Slight function rename
  2021-10-13  3:10 ` [PATCH 1/5] drm/amd/pm: Slight function rename Luben Tuikov
  2021-10-13 14:50   ` Russell, Kent
@ 2021-10-18 22:52   ` Paul Menzel
  2021-10-18 23:02     ` Luben Tuikov
  1 sibling, 1 reply; 29+ messages in thread
From: Paul Menzel @ 2021-10-18 22:52 UTC (permalink / raw)
  To: Luben Tuikov; +Cc: Alex Deucher, amd-gfx

Dear Luben,


Am 13.10.21 um 05:10 schrieb Luben Tuikov:

[…]

A small nit regarding the subject. It’d be great if you made it a 
statement by adding a verb (in imperative mood). git standard messages 
are doing the same with *Merge …* and *Revert …*.


Kind regards,

Paul

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/5] drm/amd/pm: Slight function rename
  2021-10-18 22:52   ` Paul Menzel
@ 2021-10-18 23:02     ` Luben Tuikov
  0 siblings, 0 replies; 29+ messages in thread
From: Luben Tuikov @ 2021-10-18 23:02 UTC (permalink / raw)
  To: Paul Menzel; +Cc: Alex Deucher, amd-gfx

Okay, no problem--I'll add a verb there and resend the patch set. Thanks for letting me know.

Regards,
Luben

On 2021-10-18 18:52, Paul Menzel wrote:
> Dear Luben,
>
>
> Am 13.10.21 um 05:10 schrieb Luben Tuikov:
>
> […]
>
> A small nit regarding the subject. It’d be great if you made it a 
> statement by adding a verb (in imperative mood). git standard messages 
> are doing the same with *Merge …* and *Revert …*.
>
>
> Kind regards,
>
> Paul


^ permalink raw reply	[flat|nested] 29+ messages in thread

* RE: [PATCH 0/5] 0 MHz is not a valid current frequency
  2021-10-18 20:29           ` Luben Tuikov
@ 2021-10-19  1:04             ` Russell, Kent
  2021-10-19  1:06               ` Russell, Kent
  0 siblings, 1 reply; 29+ messages in thread
From: Russell, Kent @ 2021-10-19  1:04 UTC (permalink / raw)
  To: Tuikov, Luben, Deucher, Alexander, Quan, Evan, Lazar, Lijo, amd-gfx
  Cc: Kasiviswanathan, Harish

[-- Attachment #1: Type: text/plain, Size: 5439 bytes --]

[AMD Official Use Only]

+Harish, rocm-smi falls under his purview now.

Kent

From: Tuikov, Luben <Luben.Tuikov@amd.com>
Sent: Monday, October 18, 2021 4:30 PM
To: Deucher, Alexander <Alexander.Deucher@amd.com>; Quan, Evan <Evan.Quan@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org; Russell, Kent <Kent.Russell@amd.com>
Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency

I think Kent is already seen these patches as he did comment on 1/5 patch.

The v3 version of the patch, posted last week, removes the asterisk to report the lowest frequency as the current frequency, when the current frequency is 0, i.e. when the block is in low power state. Does the tool rely on the asterisk? If this information is necessary could it not use amdgpu_pm_info?

Regards,
Luben

On 2021-10-18 16:19, Deucher, Alexander wrote:

[Public]

We the current behavior (0 for clock) already crashes the tool, so I don't think we can really make things worse.

Alex

________________________________
From: Quan, Evan <Evan.Quan@amd.com><mailto:Evan.Quan@amd.com>
Sent: Thursday, October 14, 2021 10:25 PM
To: Lazar, Lijo <Lijo.Lazar@amd.com><mailto:Lijo.Lazar@amd.com>; Tuikov, Luben <Luben.Tuikov@amd.com><mailto:Luben.Tuikov@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org><mailto:amd-gfx@lists.freedesktop.org>; Russell, Kent <Kent.Russell@amd.com><mailto:Kent.Russell@amd.com>
Cc: Deucher, Alexander <Alexander.Deucher@amd.com><mailto:Alexander.Deucher@amd.com>
Subject: RE: [PATCH 0/5] 0 MHz is not a valid current frequency


[AMD Official Use Only]



+Kent who maintains the Rocm tool



From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org><mailto:amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Lazar, Lijo
Sent: Thursday, October 14, 2021 1:07 AM
To: Tuikov, Luben <Luben.Tuikov@amd.com><mailto:Luben.Tuikov@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Cc: Deucher, Alexander <Alexander.Deucher@amd.com><mailto:Alexander.Deucher@amd.com>
Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency



[AMD Official Use Only]



[AMD Official Use Only]



>Or maybe just a list without default hint, i.e. no asterisk?



I think this is also fine meaning we are having trouble in determining the current frequency or DPM level. Evan/Alex? Don't know if this will crash the tools.



Thanks,
Lijo

________________________________

From: Tuikov, Luben <Luben.Tuikov@amd.com<mailto:Luben.Tuikov@amd.com>>
Sent: Wednesday, October 13, 2021 9:52:09 PM
To: Lazar, Lijo <Lijo.Lazar@amd.com<mailto:Lijo.Lazar@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>>
Cc: Deucher, Alexander <Alexander.Deucher@amd.com<mailto:Alexander.Deucher@amd.com>>
Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency



On 2021-10-13 00:14, Lazar, Lijo wrote:
>
> On 10/13/2021 8:40 AM, Luben Tuikov wrote:
>> Some ASIC support low-power functionality for the whole ASIC or just
>> an IP block. When in such low-power mode, some sysfs interfaces would
>> report a frequency of 0, e.g.,
>>
>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
>> 0: 500Mhz
>> 1: 0Mhz *
>> 2: 2200Mhz
>> $_
>>
>> An operating frequency of 0 MHz doesn't make sense, and this interface
>> is designed to report only operating clock frequencies, i.e. non-zero,
>> and possibly the current one.
>>
>> When in this low-power state, round to the smallest
>> operating frequency, for this interface, as follows,
>>
> Would rather avoid this -
>
> 1) It is manipulating FW reported value. If at all there is an uncaught
> issue in FW reporting of frequency values, that is masked here.
> 2) Otherwise, if 0MHz is described as GFX power gated case, this
> provides a convenient interface to check if GFX is power gated.
>
> If seeing a '0' is not pleasing, consider changing to something like
>        "NA" - not available (frequency cannot be fetched at the moment).

There's a ROCm tool which literally asserts if the values are not ordered in increasing order. Now since 0 < 550, but 0 is listed as the second entry, the tool simply asserts and crashes.

It is not clear what you'd rather see here:

$cat /sys/class/drm/card0/device/pp_dpm_sclk
0: 550Mhz
1: N/A *
2: 2200MHz
$_

Is this what you want to see? (That'll crash other tools which expect %uMhz.)

Or maybe just a list without default hint, i.e. no asterisk?

$cat /sys/class/drm/card0/device/pp_dpm_sclk
0: 550Mhz
1: 2200MHz
$_

What should the output be?

We want to avoid showing 0, but still show numbers.

Regards,
Luben

>
> Thanks,
> Lijo
>
>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
>> 0: 500Mhz *
>> 1: 2200Mhz
>> $_
>>
>> Luben Tuikov (5):
>>    drm/amd/pm: Slight function rename
>>    drm/amd/pm: Rename cur_value to curr_value
>>    drm/amd/pm: Rename freq_values --> freq_value
>>    dpm/amd/pm: Sienna: 0 MHz is not a current clock frequency
>>    dpm/amd/pm: Navi10: 0 MHz is not a current clock frequency
>>
>>   .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 60 +++++++++------
>>   .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 73 ++++++++++++-------
>>   2 files changed, 86 insertions(+), 47 deletions(-)
>>


[-- Attachment #2: Type: text/html, Size: 50653 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* RE: [PATCH 0/5] 0 MHz is not a valid current frequency
  2021-10-19  1:04             ` Russell, Kent
@ 2021-10-19  1:06               ` Russell, Kent
  2021-10-19  4:27                 ` Luben Tuikov
  0 siblings, 1 reply; 29+ messages in thread
From: Russell, Kent @ 2021-10-19  1:06 UTC (permalink / raw)
  To: Russell, Kent, Tuikov, Luben, Deucher, Alexander, Quan, Evan,
	Lazar, Lijo, amd-gfx
  Cc: Kasiviswanathan, Harish

[-- Attachment #1: Type: text/plain, Size: 6418 bytes --]

[AMD Official Use Only]

The * is required for the rocm-smi's functionality for showing what the current clocks are. We had a bug before where the * was removed, then the SMI died fantastically. Work could be done to try to handle that type of situation, but the SMI has a "show current clocks" and uses the * to determine which one is active

Kent

From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Russell, Kent
Sent: Monday, October 18, 2021 9:05 PM
To: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Quan, Evan <Evan.Quan@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Kasiviswanathan, Harish <Harish.Kasiviswanathan@amd.com>
Subject: RE: [PATCH 0/5] 0 MHz is not a valid current frequency


[AMD Official Use Only]

+Harish, rocm-smi falls under his purview now.

Kent

From: Tuikov, Luben <Luben.Tuikov@amd.com<mailto:Luben.Tuikov@amd.com>>
Sent: Monday, October 18, 2021 4:30 PM
To: Deucher, Alexander <Alexander.Deucher@amd.com<mailto:Alexander.Deucher@amd.com>>; Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>; Lazar, Lijo <Lijo.Lazar@amd.com<mailto:Lijo.Lazar@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>; Russell, Kent <Kent.Russell@amd.com<mailto:Kent.Russell@amd.com>>
Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency

I think Kent is already seen these patches as he did comment on 1/5 patch.

The v3 version of the patch, posted last week, removes the asterisk to report the lowest frequency as the current frequency, when the current frequency is 0, i.e. when the block is in low power state. Does the tool rely on the asterisk? If this information is necessary could it not use amdgpu_pm_info?

Regards,
Luben

On 2021-10-18 16:19, Deucher, Alexander wrote:

[Public]

We the current behavior (0 for clock) already crashes the tool, so I don't think we can really make things worse.

Alex

________________________________
From: Quan, Evan <Evan.Quan@amd.com><mailto:Evan.Quan@amd.com>
Sent: Thursday, October 14, 2021 10:25 PM
To: Lazar, Lijo <Lijo.Lazar@amd.com><mailto:Lijo.Lazar@amd.com>; Tuikov, Luben <Luben.Tuikov@amd.com><mailto:Luben.Tuikov@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org><mailto:amd-gfx@lists.freedesktop.org>; Russell, Kent <Kent.Russell@amd.com><mailto:Kent.Russell@amd.com>
Cc: Deucher, Alexander <Alexander.Deucher@amd.com><mailto:Alexander.Deucher@amd.com>
Subject: RE: [PATCH 0/5] 0 MHz is not a valid current frequency


[AMD Official Use Only]



+Kent who maintains the Rocm tool



From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org><mailto:amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Lazar, Lijo
Sent: Thursday, October 14, 2021 1:07 AM
To: Tuikov, Luben <Luben.Tuikov@amd.com><mailto:Luben.Tuikov@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Cc: Deucher, Alexander <Alexander.Deucher@amd.com><mailto:Alexander.Deucher@amd.com>
Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency



[AMD Official Use Only]



[AMD Official Use Only]



>Or maybe just a list without default hint, i.e. no asterisk?



I think this is also fine meaning we are having trouble in determining the current frequency or DPM level. Evan/Alex? Don't know if this will crash the tools.



Thanks,
Lijo

________________________________

From: Tuikov, Luben <Luben.Tuikov@amd.com<mailto:Luben.Tuikov@amd.com>>
Sent: Wednesday, October 13, 2021 9:52:09 PM
To: Lazar, Lijo <Lijo.Lazar@amd.com<mailto:Lijo.Lazar@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>>
Cc: Deucher, Alexander <Alexander.Deucher@amd.com<mailto:Alexander.Deucher@amd.com>>
Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency



On 2021-10-13 00:14, Lazar, Lijo wrote:
>
> On 10/13/2021 8:40 AM, Luben Tuikov wrote:
>> Some ASIC support low-power functionality for the whole ASIC or just
>> an IP block. When in such low-power mode, some sysfs interfaces would
>> report a frequency of 0, e.g.,
>>
>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
>> 0: 500Mhz
>> 1: 0Mhz *
>> 2: 2200Mhz
>> $_
>>
>> An operating frequency of 0 MHz doesn't make sense, and this interface
>> is designed to report only operating clock frequencies, i.e. non-zero,
>> and possibly the current one.
>>
>> When in this low-power state, round to the smallest
>> operating frequency, for this interface, as follows,
>>
> Would rather avoid this -
>
> 1) It is manipulating FW reported value. If at all there is an uncaught
> issue in FW reporting of frequency values, that is masked here.
> 2) Otherwise, if 0MHz is described as GFX power gated case, this
> provides a convenient interface to check if GFX is power gated.
>
> If seeing a '0' is not pleasing, consider changing to something like
>        "NA" - not available (frequency cannot be fetched at the moment).

There's a ROCm tool which literally asserts if the values are not ordered in increasing order. Now since 0 < 550, but 0 is listed as the second entry, the tool simply asserts and crashes.

It is not clear what you'd rather see here:

$cat /sys/class/drm/card0/device/pp_dpm_sclk
0: 550Mhz
1: N/A *
2: 2200MHz
$_

Is this what you want to see? (That'll crash other tools which expect %uMhz.)

Or maybe just a list without default hint, i.e. no asterisk?

$cat /sys/class/drm/card0/device/pp_dpm_sclk
0: 550Mhz
1: 2200MHz
$_

What should the output be?

We want to avoid showing 0, but still show numbers.

Regards,
Luben

>
> Thanks,
> Lijo
>
>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
>> 0: 500Mhz *
>> 1: 2200Mhz
>> $_
>>
>> Luben Tuikov (5):
>>    drm/amd/pm: Slight function rename
>>    drm/amd/pm: Rename cur_value to curr_value
>>    drm/amd/pm: Rename freq_values --> freq_value
>>    dpm/amd/pm: Sienna: 0 MHz is not a current clock frequency
>>    dpm/amd/pm: Navi10: 0 MHz is not a current clock frequency
>>
>>   .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 60 +++++++++------
>>   .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 73 ++++++++++++-------
>>   2 files changed, 86 insertions(+), 47 deletions(-)
>>


[-- Attachment #2: Type: text/html, Size: 53330 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/5] 0 MHz is not a valid current frequency
  2021-10-19  1:06               ` Russell, Kent
@ 2021-10-19  4:27                 ` Luben Tuikov
  2021-10-19 13:25                   ` Russell, Kent
  0 siblings, 1 reply; 29+ messages in thread
From: Luben Tuikov @ 2021-10-19  4:27 UTC (permalink / raw)
  To: Russell, Kent, Deucher, Alexander, Quan, Evan, Lazar, Lijo, amd-gfx
  Cc: Kasiviswanathan, Harish

[-- Attachment #1: Type: text/html, Size: 62131 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* RE: [PATCH 0/5] 0 MHz is not a valid current frequency
  2021-10-19  4:27                 ` Luben Tuikov
@ 2021-10-19 13:25                   ` Russell, Kent
  2021-10-19 13:54                     ` Luben Tuikov
  0 siblings, 1 reply; 29+ messages in thread
From: Russell, Kent @ 2021-10-19 13:25 UTC (permalink / raw)
  To: Tuikov, Luben, Deucher, Alexander, Quan, Evan, Lazar, Lijo, amd-gfx
  Cc: Kasiviswanathan, Harish

[-- Attachment #1: Type: text/plain, Size: 7537 bytes --]

[AMD Official Use Only]

It was the rocm-smi -c flag. Maybe some work was done to make it more robust, that would be nice. But the -c flag is supposed to show the current frequency for each clock type. -g would do the same, but just for SCLK.

Kent

From: Tuikov, Luben <Luben.Tuikov@amd.com>
Sent: Tuesday, October 19, 2021 12:27 AM
To: Russell, Kent <Kent.Russell@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Quan, Evan <Evan.Quan@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Kasiviswanathan, Harish <Harish.Kasiviswanathan@amd.com>
Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency

Kent,

What is the command which fails?
I can try to duplicate it here.

So far, things I've tried, I cannot make rocm-smi fail. Command arguments?

Regards,
Luben

On 2021-10-18 21:06, Russell, Kent wrote:

[AMD Official Use Only]

The * is required for the rocm-smi's functionality for showing what the current clocks are. We had a bug before where the * was removed, then the SMI died fantastically. Work could be done to try to handle that type of situation, but the SMI has a "show current clocks" and uses the * to determine which one is active

Kent

From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org><mailto:amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Russell, Kent
Sent: Monday, October 18, 2021 9:05 PM
To: Tuikov, Luben <Luben.Tuikov@amd.com><mailto:Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com><mailto:Alexander.Deucher@amd.com>; Quan, Evan <Evan.Quan@amd.com><mailto:Evan.Quan@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com><mailto:Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Cc: Kasiviswanathan, Harish <Harish.Kasiviswanathan@amd.com><mailto:Harish.Kasiviswanathan@amd.com>
Subject: RE: [PATCH 0/5] 0 MHz is not a valid current frequency


[AMD Official Use Only]

+Harish, rocm-smi falls under his purview now.

Kent

From: Tuikov, Luben <Luben.Tuikov@amd.com<mailto:Luben.Tuikov@amd.com>>
Sent: Monday, October 18, 2021 4:30 PM
To: Deucher, Alexander <Alexander.Deucher@amd.com<mailto:Alexander.Deucher@amd.com>>; Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>; Lazar, Lijo <Lijo.Lazar@amd.com<mailto:Lijo.Lazar@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>; Russell, Kent <Kent.Russell@amd.com<mailto:Kent.Russell@amd.com>>
Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency

I think Kent is already seen these patches as he did comment on 1/5 patch.

The v3 version of the patch, posted last week, removes the asterisk to report the lowest frequency as the current frequency, when the current frequency is 0, i.e. when the block is in low power state. Does the tool rely on the asterisk? If this information is necessary could it not use amdgpu_pm_info?

Regards,
Luben

On 2021-10-18 16:19, Deucher, Alexander wrote:

[Public]

We the current behavior (0 for clock) already crashes the tool, so I don't think we can really make things worse.

Alex

________________________________
From: Quan, Evan <Evan.Quan@amd.com><mailto:Evan.Quan@amd.com>
Sent: Thursday, October 14, 2021 10:25 PM
To: Lazar, Lijo <Lijo.Lazar@amd.com><mailto:Lijo.Lazar@amd.com>; Tuikov, Luben <Luben.Tuikov@amd.com><mailto:Luben.Tuikov@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org><mailto:amd-gfx@lists.freedesktop.org>; Russell, Kent <Kent.Russell@amd.com><mailto:Kent.Russell@amd.com>
Cc: Deucher, Alexander <Alexander.Deucher@amd.com><mailto:Alexander.Deucher@amd.com>
Subject: RE: [PATCH 0/5] 0 MHz is not a valid current frequency


[AMD Official Use Only]



+Kent who maintains the Rocm tool



From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org><mailto:amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Lazar, Lijo
Sent: Thursday, October 14, 2021 1:07 AM
To: Tuikov, Luben <Luben.Tuikov@amd.com><mailto:Luben.Tuikov@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Cc: Deucher, Alexander <Alexander.Deucher@amd.com><mailto:Alexander.Deucher@amd.com>
Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency



[AMD Official Use Only]



[AMD Official Use Only]



>Or maybe just a list without default hint, i.e. no asterisk?



I think this is also fine meaning we are having trouble in determining the current frequency or DPM level. Evan/Alex? Don't know if this will crash the tools.



Thanks,
Lijo

________________________________

From: Tuikov, Luben <Luben.Tuikov@amd.com<mailto:Luben.Tuikov@amd.com>>
Sent: Wednesday, October 13, 2021 9:52:09 PM
To: Lazar, Lijo <Lijo.Lazar@amd.com<mailto:Lijo.Lazar@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>>
Cc: Deucher, Alexander <Alexander.Deucher@amd.com<mailto:Alexander.Deucher@amd.com>>
Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency



On 2021-10-13 00:14, Lazar, Lijo wrote:
>
> On 10/13/2021 8:40 AM, Luben Tuikov wrote:
>> Some ASIC support low-power functionality for the whole ASIC or just
>> an IP block. When in such low-power mode, some sysfs interfaces would
>> report a frequency of 0, e.g.,
>>
>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
>> 0: 500Mhz
>> 1: 0Mhz *
>> 2: 2200Mhz
>> $_
>>
>> An operating frequency of 0 MHz doesn't make sense, and this interface
>> is designed to report only operating clock frequencies, i.e. non-zero,
>> and possibly the current one.
>>
>> When in this low-power state, round to the smallest
>> operating frequency, for this interface, as follows,
>>
> Would rather avoid this -
>
> 1) It is manipulating FW reported value. If at all there is an uncaught
> issue in FW reporting of frequency values, that is masked here.
> 2) Otherwise, if 0MHz is described as GFX power gated case, this
> provides a convenient interface to check if GFX is power gated.
>
> If seeing a '0' is not pleasing, consider changing to something like
>        "NA" - not available (frequency cannot be fetched at the moment).

There's a ROCm tool which literally asserts if the values are not ordered in increasing order. Now since 0 < 550, but 0 is listed as the second entry, the tool simply asserts and crashes.

It is not clear what you'd rather see here:

$cat /sys/class/drm/card0/device/pp_dpm_sclk
0: 550Mhz
1: N/A *
2: 2200MHz
$_

Is this what you want to see? (That'll crash other tools which expect %uMhz.)

Or maybe just a list without default hint, i.e. no asterisk?

$cat /sys/class/drm/card0/device/pp_dpm_sclk
0: 550Mhz
1: 2200MHz
$_

What should the output be?

We want to avoid showing 0, but still show numbers.

Regards,
Luben

>
> Thanks,
> Lijo
>
>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
>> 0: 500Mhz *
>> 1: 2200Mhz
>> $_
>>
>> Luben Tuikov (5):
>>    drm/amd/pm: Slight function rename
>>    drm/amd/pm: Rename cur_value to curr_value
>>    drm/amd/pm: Rename freq_values --> freq_value
>>    dpm/amd/pm: Sienna: 0 MHz is not a current clock frequency
>>    dpm/amd/pm: Navi10: 0 MHz is not a current clock frequency
>>
>>   .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 60 +++++++++------
>>   .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 73 ++++++++++++-------
>>   2 files changed, 86 insertions(+), 47 deletions(-)
>>



[-- Attachment #2: Type: text/html, Size: 56744 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/5] 0 MHz is not a valid current frequency
  2021-10-19 13:25                   ` Russell, Kent
@ 2021-10-19 13:54                     ` Luben Tuikov
  2021-10-26 21:26                       ` Alex Deucher
  0 siblings, 1 reply; 29+ messages in thread
From: Luben Tuikov @ 2021-10-19 13:54 UTC (permalink / raw)
  To: Russell, Kent, Deucher, Alexander, Quan, Evan, amd-gfx
  Cc: Kasiviswanathan, Harish

[-- Attachment #1: Type: text/html, Size: 70895 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/5] 0 MHz is not a valid current frequency
  2021-10-19 13:54                     ` Luben Tuikov
@ 2021-10-26 21:26                       ` Alex Deucher
  2021-10-26 22:00                         ` Luben Tuikov
  0 siblings, 1 reply; 29+ messages in thread
From: Alex Deucher @ 2021-10-26 21:26 UTC (permalink / raw)
  To: Luben Tuikov
  Cc: Russell, Kent, Deucher, Alexander, Quan, Evan, amd-gfx,
	Kasiviswanathan, Harish

On Tue, Oct 19, 2021 at 9:54 AM Luben Tuikov <luben.tuikov@amd.com> wrote:
>
> It again fails with the same message!
> But this time it is different!
> Here's why:
>
> openat(AT_FDCWD, "/sys/class/drm/card0/device/pp_dpm_fclk", O_RDONLY) = 3
> read(3, "0: 571Mhz \n1: 1274Mhz *\n2: 1221M"..., 8191) = 36
> read(3, "", 8191)                       = 0
> close(3)                                = 0
> write(2, "python3: /home/ltuikov/proj/amd/"..., 220python3: /home/ltuikov/proj/amd/rocm_smi_lib/src/rocm_smi.cc:913: rsmi_status_t get_frequencies(amd::smi::DevInfoTypes, uint32_t, rsmi_frequencies_t*, uint32_t*): Assertion `f->frequency[i-1] <= f->frequency[i]' failed.
> ) = 220
> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f531f9bc000
> rt_sigprocmask(SIG_UNBLOCK, [ABRT], NULL, 8) = 0
> rt_sigprocmask(SIG_BLOCK, ~[RTMIN RT_1], [], 8) = 0
> getpid()                                = 37861
> gettid()                                = 37861
> tgkill(37861, 37861, SIGABRT)           = 0
> rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
> --- SIGABRT {si_signo=SIGABRT, si_code=SI_TKILL, si_pid=37861, si_uid=1000} ---
> +++ killed by SIGABRT (core dumped) +++
> Aborted (core dumped)
> $cat /sys/class/drm/card0/device/pp_dpm_fclk
> 0: 571Mhz
> 1: 1274Mhz *
> 2: 1221Mhz
> $_
>
> Why is the mid frequency larger than the last?
> Why does get_frequencies() insists that they be ordered when they're not? (Does the tool need fixing or the kernel?)
>
> The current patchset doesn't report 0, and doesn't report any current if 0 would've been reported as current. But anything else is reported as it would've been reported before the patch. And I tested it with vanilla amd-staging-drm-next--same thing.
>

Seems to crash both ways.  I'd rather either:
1. Remove the * when the clock is outside of the min and max ranges
or
2. Clamp the clock to the max or min if it's above or below.

And then fix the tools accordingly.  Those seem like the choices of
least surprise considering the interface is supposed to show the
current and available DPM levels.

Alex


> Regards,
> Luben
>
>
> On 2021-10-19 09:25, Russell, Kent wrote:
>
> [AMD Official Use Only]
>
>
>
> It was the rocm-smi -c flag. Maybe some work was done to make it more robust, that would be nice. But the -c flag is supposed to show the current frequency for each clock type. -g would do the same, but just for SCLK.
>
>
>
> Kent
>
>
>
> From: Tuikov, Luben <Luben.Tuikov@amd.com>
> Sent: Tuesday, October 19, 2021 12:27 AM
> To: Russell, Kent <Kent.Russell@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Quan, Evan <Evan.Quan@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Kasiviswanathan, Harish <Harish.Kasiviswanathan@amd.com>
> Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency
>
>
>
> Kent,
>
> What is the command which fails?
> I can try to duplicate it here.
>
> So far, things I've tried, I cannot make rocm-smi fail. Command arguments?
>
> Regards,
> Luben
>
> On 2021-10-18 21:06, Russell, Kent wrote:
>
> [AMD Official Use Only]
>
>
>
> The * is required for the rocm-smi’s functionality for showing what the current clocks are. We had a bug before where the * was removed, then the SMI died fantastically. Work could be done to try to handle that type of situation, but the SMI has a “show current clocks” and uses the * to determine which one is active
>
>
>
> Kent
>
>
>
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Russell, Kent
> Sent: Monday, October 18, 2021 9:05 PM
> To: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Quan, Evan <Evan.Quan@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Kasiviswanathan, Harish <Harish.Kasiviswanathan@amd.com>
> Subject: RE: [PATCH 0/5] 0 MHz is not a valid current frequency
>
>
>
> [AMD Official Use Only]
>
>
>
> +Harish, rocm-smi falls under his purview now.
>
>
>
> Kent
>
>
>
> From: Tuikov, Luben <Luben.Tuikov@amd.com>
> Sent: Monday, October 18, 2021 4:30 PM
> To: Deucher, Alexander <Alexander.Deucher@amd.com>; Quan, Evan <Evan.Quan@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org; Russell, Kent <Kent.Russell@amd.com>
> Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency
>
>
>
> I think Kent is already seen these patches as he did comment on 1/5 patch.
>
> The v3 version of the patch, posted last week, removes the asterisk to report the lowest frequency as the current frequency, when the current frequency is 0, i.e. when the block is in low power state. Does the tool rely on the asterisk? If this information is necessary could it not use amdgpu_pm_info?
>
> Regards,
> Luben
>
> On 2021-10-18 16:19, Deucher, Alexander wrote:
>
> [Public]
>
>
>
> We the current behavior (0 for clock) already crashes the tool, so I don't think we can really make things worse.
>
>
>
> Alex
>
>
>
> ________________________________
>
> From: Quan, Evan <Evan.Quan@amd.com>
> Sent: Thursday, October 14, 2021 10:25 PM
> To: Lazar, Lijo <Lijo.Lazar@amd.com>; Tuikov, Luben <Luben.Tuikov@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; Russell, Kent <Kent.Russell@amd.com>
> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
> Subject: RE: [PATCH 0/5] 0 MHz is not a valid current frequency
>
>
>
> [AMD Official Use Only]
>
>
>
> +Kent who maintains the Rocm tool
>
>
>
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Lazar, Lijo
> Sent: Thursday, October 14, 2021 1:07 AM
> To: Tuikov, Luben <Luben.Tuikov@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
> Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency
>
>
>
> [AMD Official Use Only]
>
>
>
> [AMD Official Use Only]
>
>
>
> >Or maybe just a list without default hint, i.e. no asterisk?
>
>
>
> I think this is also fine meaning we are having trouble in determining the current frequency or DPM level. Evan/Alex? Don't know if this will crash the tools.
>
>
>
> Thanks,
> Lijo
>
> ________________________________
>
> From: Tuikov, Luben <Luben.Tuikov@amd.com>
> Sent: Wednesday, October 13, 2021 9:52:09 PM
> To: Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
> Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency
>
>
>
> On 2021-10-13 00:14, Lazar, Lijo wrote:
> >
> > On 10/13/2021 8:40 AM, Luben Tuikov wrote:
> >> Some ASIC support low-power functionality for the whole ASIC or just
> >> an IP block. When in such low-power mode, some sysfs interfaces would
> >> report a frequency of 0, e.g.,
> >>
> >> $cat /sys/class/drm/card0/device/pp_dpm_sclk
> >> 0: 500Mhz
> >> 1: 0Mhz *
> >> 2: 2200Mhz
> >> $_
> >>
> >> An operating frequency of 0 MHz doesn't make sense, and this interface
> >> is designed to report only operating clock frequencies, i.e. non-zero,
> >> and possibly the current one.
> >>
> >> When in this low-power state, round to the smallest
> >> operating frequency, for this interface, as follows,
> >>
> > Would rather avoid this -
> >
> > 1) It is manipulating FW reported value. If at all there is an uncaught
> > issue in FW reporting of frequency values, that is masked here.
> > 2) Otherwise, if 0MHz is described as GFX power gated case, this
> > provides a convenient interface to check if GFX is power gated.
> >
> > If seeing a '0' is not pleasing, consider changing to something like
> >        "NA" - not available (frequency cannot be fetched at the moment).
>
> There's a ROCm tool which literally asserts if the values are not ordered in increasing order. Now since 0 < 550, but 0 is listed as the second entry, the tool simply asserts and crashes.
>
> It is not clear what you'd rather see here:
>
> $cat /sys/class/drm/card0/device/pp_dpm_sclk
> 0: 550Mhz
> 1: N/A *
> 2: 2200MHz
> $_
>
> Is this what you want to see? (That'll crash other tools which expect %uMhz.)
>
> Or maybe just a list without default hint, i.e. no asterisk?
>
> $cat /sys/class/drm/card0/device/pp_dpm_sclk
> 0: 550Mhz
> 1: 2200MHz
> $_
>
> What should the output be?
>
> We want to avoid showing 0, but still show numbers.
>
> Regards,
> Luben
>
> >
> > Thanks,
> > Lijo
> >
> >> $cat /sys/class/drm/card0/device/pp_dpm_sclk
> >> 0: 500Mhz *
> >> 1: 2200Mhz
> >> $_
> >>
> >> Luben Tuikov (5):
> >>    drm/amd/pm: Slight function rename
> >>    drm/amd/pm: Rename cur_value to curr_value
> >>    drm/amd/pm: Rename freq_values --> freq_value
> >>    dpm/amd/pm: Sienna: 0 MHz is not a current clock frequency
> >>    dpm/amd/pm: Navi10: 0 MHz is not a current clock frequency
> >>
> >>   .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 60 +++++++++------
> >>   .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 73 ++++++++++++-------
> >>   2 files changed, 86 insertions(+), 47 deletions(-)
> >>
>
>
>
>
>
>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/5] 0 MHz is not a valid current frequency
  2021-10-26 21:26                       ` Alex Deucher
@ 2021-10-26 22:00                         ` Luben Tuikov
  2021-10-27  5:20                           ` Lazar, Lijo
  0 siblings, 1 reply; 29+ messages in thread
From: Luben Tuikov @ 2021-10-26 22:00 UTC (permalink / raw)
  To: Alex Deucher
  Cc: Russell, Kent, Deucher, Alexander, Quan, Evan, amd-gfx,
	Kasiviswanathan, Harish

On 2021-10-26 17:26, Alex Deucher wrote:
> On Tue, Oct 19, 2021 at 9:54 AM Luben Tuikov <luben.tuikov@amd.com> wrote:
>> It again fails with the same message!
>> But this time it is different!
>> Here's why:
>>
>> openat(AT_FDCWD, "/sys/class/drm/card0/device/pp_dpm_fclk", O_RDONLY) = 3
>> read(3, "0: 571Mhz \n1: 1274Mhz *\n2: 1221M"..., 8191) = 36
>> read(3, "", 8191)                       = 0
>> close(3)                                = 0
>> write(2, "python3: /home/ltuikov/proj/amd/"..., 220python3: /home/ltuikov/proj/amd/rocm_smi_lib/src/rocm_smi.cc:913: rsmi_status_t get_frequencies(amd::smi::DevInfoTypes, uint32_t, rsmi_frequencies_t*, uint32_t*): Assertion `f->frequency[i-1] <= f->frequency[i]' failed.
>> ) = 220
>> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f531f9bc000
>> rt_sigprocmask(SIG_UNBLOCK, [ABRT], NULL, 8) = 0
>> rt_sigprocmask(SIG_BLOCK, ~[RTMIN RT_1], [], 8) = 0
>> getpid()                                = 37861
>> gettid()                                = 37861
>> tgkill(37861, 37861, SIGABRT)           = 0
>> rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
>> --- SIGABRT {si_signo=SIGABRT, si_code=SI_TKILL, si_pid=37861, si_uid=1000} ---
>> +++ killed by SIGABRT (core dumped) +++
>> Aborted (core dumped)
>> $cat /sys/class/drm/card0/device/pp_dpm_fclk
>> 0: 571Mhz
>> 1: 1274Mhz *
>> 2: 1221Mhz
>> $_
>>
>> Why is the mid frequency larger than the last?
>> Why does get_frequencies() insists that they be ordered when they're not? (Does the tool need fixing or the kernel?)
>>
>> The current patchset doesn't report 0, and doesn't report any current if 0 would've been reported as current. But anything else is reported as it would've been reported before the patch. And I tested it with vanilla amd-staging-drm-next--same thing.
>>
> Seems to crash both ways.  I'd rather either:
> 1. Remove the * when the clock is outside of the min and max ranges
> or
> 2. Clamp the clock to the max or min if it's above or below.
>
> And then fix the tools accordingly.  Those seem like the choices of
> least surprise considering the interface is supposed to show the
> current and available DPM levels.

So, if we get a case such as the one above, we remove the whole line at 1:, not just the asterisk, right? (for option 1 above).
The rocm-smi lib would fail if they're out of order, so only removing the * char would still confuse the tool.

What does it mean when the current frequency (line 1:) is above the "max" (line 2:) as shown in my output above?

Do we perhaps want to sort them and report current still, and swap lines 1 and 2?

Or should we simply remove the ordering requirement in rocm-smi lib?

Regards,
Luben

>
> Alex
>
>
>> Regards,
>> Luben
>>
>>
>> On 2021-10-19 09:25, Russell, Kent wrote:
>>
>> [AMD Official Use Only]
>>
>>
>>
>> It was the rocm-smi -c flag. Maybe some work was done to make it more robust, that would be nice. But the -c flag is supposed to show the current frequency for each clock type. -g would do the same, but just for SCLK.
>>
>>
>>
>> Kent
>>
>>
>>
>> From: Tuikov, Luben <Luben.Tuikov@amd.com>
>> Sent: Tuesday, October 19, 2021 12:27 AM
>> To: Russell, Kent <Kent.Russell@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Quan, Evan <Evan.Quan@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org
>> Cc: Kasiviswanathan, Harish <Harish.Kasiviswanathan@amd.com>
>> Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency
>>
>>
>>
>> Kent,
>>
>> What is the command which fails?
>> I can try to duplicate it here.
>>
>> So far, things I've tried, I cannot make rocm-smi fail. Command arguments?
>>
>> Regards,
>> Luben
>>
>> On 2021-10-18 21:06, Russell, Kent wrote:
>>
>> [AMD Official Use Only]
>>
>>
>>
>> The * is required for the rocm-smi’s functionality for showing what the current clocks are. We had a bug before where the * was removed, then the SMI died fantastically. Work could be done to try to handle that type of situation, but the SMI has a “show current clocks” and uses the * to determine which one is active
>>
>>
>>
>> Kent
>>
>>
>>
>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Russell, Kent
>> Sent: Monday, October 18, 2021 9:05 PM
>> To: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Quan, Evan <Evan.Quan@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org
>> Cc: Kasiviswanathan, Harish <Harish.Kasiviswanathan@amd.com>
>> Subject: RE: [PATCH 0/5] 0 MHz is not a valid current frequency
>>
>>
>>
>> [AMD Official Use Only]
>>
>>
>>
>> +Harish, rocm-smi falls under his purview now.
>>
>>
>>
>> Kent
>>
>>
>>
>> From: Tuikov, Luben <Luben.Tuikov@amd.com>
>> Sent: Monday, October 18, 2021 4:30 PM
>> To: Deucher, Alexander <Alexander.Deucher@amd.com>; Quan, Evan <Evan.Quan@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org; Russell, Kent <Kent.Russell@amd.com>
>> Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency
>>
>>
>>
>> I think Kent is already seen these patches as he did comment on 1/5 patch.
>>
>> The v3 version of the patch, posted last week, removes the asterisk to report the lowest frequency as the current frequency, when the current frequency is 0, i.e. when the block is in low power state. Does the tool rely on the asterisk? If this information is necessary could it not use amdgpu_pm_info?
>>
>> Regards,
>> Luben
>>
>> On 2021-10-18 16:19, Deucher, Alexander wrote:
>>
>> [Public]
>>
>>
>>
>> We the current behavior (0 for clock) already crashes the tool, so I don't think we can really make things worse.
>>
>>
>>
>> Alex
>>
>>
>>
>> ________________________________
>>
>> From: Quan, Evan <Evan.Quan@amd.com>
>> Sent: Thursday, October 14, 2021 10:25 PM
>> To: Lazar, Lijo <Lijo.Lazar@amd.com>; Tuikov, Luben <Luben.Tuikov@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; Russell, Kent <Kent.Russell@amd.com>
>> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
>> Subject: RE: [PATCH 0/5] 0 MHz is not a valid current frequency
>>
>>
>>
>> [AMD Official Use Only]
>>
>>
>>
>> +Kent who maintains the Rocm tool
>>
>>
>>
>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Lazar, Lijo
>> Sent: Thursday, October 14, 2021 1:07 AM
>> To: Tuikov, Luben <Luben.Tuikov@amd.com>; amd-gfx@lists.freedesktop.org
>> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
>> Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency
>>
>>
>>
>> [AMD Official Use Only]
>>
>>
>>
>> [AMD Official Use Only]
>>
>>
>>
>>> Or maybe just a list without default hint, i.e. no asterisk?
>>
>>
>> I think this is also fine meaning we are having trouble in determining the current frequency or DPM level. Evan/Alex? Don't know if this will crash the tools.
>>
>>
>>
>> Thanks,
>> Lijo
>>
>> ________________________________
>>
>> From: Tuikov, Luben <Luben.Tuikov@amd.com>
>> Sent: Wednesday, October 13, 2021 9:52:09 PM
>> To: Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
>> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
>> Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency
>>
>>
>>
>> On 2021-10-13 00:14, Lazar, Lijo wrote:
>>> On 10/13/2021 8:40 AM, Luben Tuikov wrote:
>>>> Some ASIC support low-power functionality for the whole ASIC or just
>>>> an IP block. When in such low-power mode, some sysfs interfaces would
>>>> report a frequency of 0, e.g.,
>>>>
>>>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
>>>> 0: 500Mhz
>>>> 1: 0Mhz *
>>>> 2: 2200Mhz
>>>> $_
>>>>
>>>> An operating frequency of 0 MHz doesn't make sense, and this interface
>>>> is designed to report only operating clock frequencies, i.e. non-zero,
>>>> and possibly the current one.
>>>>
>>>> When in this low-power state, round to the smallest
>>>> operating frequency, for this interface, as follows,
>>>>
>>> Would rather avoid this -
>>>
>>> 1) It is manipulating FW reported value. If at all there is an uncaught
>>> issue in FW reporting of frequency values, that is masked here.
>>> 2) Otherwise, if 0MHz is described as GFX power gated case, this
>>> provides a convenient interface to check if GFX is power gated.
>>>
>>> If seeing a '0' is not pleasing, consider changing to something like
>>>        "NA" - not available (frequency cannot be fetched at the moment).
>> There's a ROCm tool which literally asserts if the values are not ordered in increasing order. Now since 0 < 550, but 0 is listed as the second entry, the tool simply asserts and crashes.
>>
>> It is not clear what you'd rather see here:
>>
>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
>> 0: 550Mhz
>> 1: N/A *
>> 2: 2200MHz
>> $_
>>
>> Is this what you want to see? (That'll crash other tools which expect %uMhz.)
>>
>> Or maybe just a list without default hint, i.e. no asterisk?
>>
>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
>> 0: 550Mhz
>> 1: 2200MHz
>> $_
>>
>> What should the output be?
>>
>> We want to avoid showing 0, but still show numbers.
>>
>> Regards,
>> Luben
>>
>>> Thanks,
>>> Lijo
>>>
>>>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
>>>> 0: 500Mhz *
>>>> 1: 2200Mhz
>>>> $_
>>>>
>>>> Luben Tuikov (5):
>>>>    drm/amd/pm: Slight function rename
>>>>    drm/amd/pm: Rename cur_value to curr_value
>>>>    drm/amd/pm: Rename freq_values --> freq_value
>>>>    dpm/amd/pm: Sienna: 0 MHz is not a current clock frequency
>>>>    dpm/amd/pm: Navi10: 0 MHz is not a current clock frequency
>>>>
>>>>   .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 60 +++++++++------
>>>>   .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 73 ++++++++++++-------
>>>>   2 files changed, 86 insertions(+), 47 deletions(-)
>>>>
>>
>>
>>
>>
>>


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/5] 0 MHz is not a valid current frequency
  2021-10-26 22:00                         ` Luben Tuikov
@ 2021-10-27  5:20                           ` Lazar, Lijo
  2021-10-27 15:12                             ` Alex Deucher
  0 siblings, 1 reply; 29+ messages in thread
From: Lazar, Lijo @ 2021-10-27  5:20 UTC (permalink / raw)
  To: Luben Tuikov, Alex Deucher
  Cc: Russell, Kent, Deucher, Alexander, Quan, Evan, amd-gfx,
	Kasiviswanathan, Harish



On 10/27/2021 3:30 AM, Luben Tuikov wrote:
> On 2021-10-26 17:26, Alex Deucher wrote:
>> On Tue, Oct 19, 2021 at 9:54 AM Luben Tuikov <luben.tuikov@amd.com> wrote:
>>> It again fails with the same message!
>>> But this time it is different!
>>> Here's why:
>>>
>>> openat(AT_FDCWD, "/sys/class/drm/card0/device/pp_dpm_fclk", O_RDONLY) = 3
>>> read(3, "0: 571Mhz \n1: 1274Mhz *\n2: 1221M"..., 8191) = 36
>>> read(3, "", 8191)                       = 0
>>> close(3)                                = 0
>>> write(2, "python3: /home/ltuikov/proj/amd/"..., 220python3: /home/ltuikov/proj/amd/rocm_smi_lib/src/rocm_smi.cc:913: rsmi_status_t get_frequencies(amd::smi::DevInfoTypes, uint32_t, rsmi_frequencies_t*, uint32_t*): Assertion `f->frequency[i-1] <= f->frequency[i]' failed.
>>> ) = 220
>>> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f531f9bc000
>>> rt_sigprocmask(SIG_UNBLOCK, [ABRT], NULL, 8) = 0
>>> rt_sigprocmask(SIG_BLOCK, ~[RTMIN RT_1], [], 8) = 0
>>> getpid()                                = 37861
>>> gettid()                                = 37861
>>> tgkill(37861, 37861, SIGABRT)           = 0
>>> rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
>>> --- SIGABRT {si_signo=SIGABRT, si_code=SI_TKILL, si_pid=37861, si_uid=1000} ---
>>> +++ killed by SIGABRT (core dumped) +++
>>> Aborted (core dumped)
>>> $cat /sys/class/drm/card0/device/pp_dpm_fclk
>>> 0: 571Mhz
>>> 1: 1274Mhz *
>>> 2: 1221Mhz
>>> $_
>>>
>>> Why is the mid frequency larger than the last?
>>> Why does get_frequencies() insists that they be ordered when they're not? (Does the tool need fixing or the kernel?)
>>>
>>> The current patchset doesn't report 0, and doesn't report any current if 0 would've been reported as current. But anything else is reported as it would've been reported before the patch. And I tested it with vanilla amd-staging-drm-next--same thing.
>>>
>> Seems to crash both ways.  I'd rather either:
>> 1. Remove the * when the clock is outside of the min and max ranges
>> or
>> 2. Clamp the clock to the max or min if it's above or below.
>>
>> And then fix the tools accordingly.  Those seem like the choices of
>> least surprise considering the interface is supposed to show the
>> current and available DPM levels.
> 
> So, if we get a case such as the one above, we remove the whole line at 1:, not just the asterisk, right? (for option 1 above).
> The rocm-smi lib would fail if they're out of order, so only removing the * char would still confuse the tool.
> 
> What does it mean when the current frequency (line 1:) is above the "max" (line 2:) as shown in my output above?
> 
> Do we perhaps want to sort them and report current still, and swap lines 1 and 2?
> 
> Or should we simply remove the ordering requirement in rocm-smi lib?
> 
These nodes help to find the current state of ASIC. Clamping the values 
will just erase questions like these and possible improvements/fixes.

For ex: if the situation above is a case of OD (this is only example, 
don't know what is really the case above) that goes beyond regular DPM 
min/max levels, then we could add + as improvement.

IMO, the node should reflect the current state of ASIC and masking the 
values shouldn't be done. Other cases could be 0 or FW handshake 
failures where * itself will be missing.

Thanks,
Lijo

> Regards,
> Luben
> 
>>
>> Alex
>>
>>
>>> Regards,
>>> Luben
>>>
>>>
>>> On 2021-10-19 09:25, Russell, Kent wrote:
>>>
>>> [AMD Official Use Only]
>>>
>>>
>>>
>>> It was the rocm-smi -c flag. Maybe some work was done to make it more robust, that would be nice. But the -c flag is supposed to show the current frequency for each clock type. -g would do the same, but just for SCLK.
>>>
>>>
>>>
>>> Kent
>>>
>>>
>>>
>>> From: Tuikov, Luben <Luben.Tuikov@amd.com>
>>> Sent: Tuesday, October 19, 2021 12:27 AM
>>> To: Russell, Kent <Kent.Russell@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Quan, Evan <Evan.Quan@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org
>>> Cc: Kasiviswanathan, Harish <Harish.Kasiviswanathan@amd.com>
>>> Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency
>>>
>>>
>>>
>>> Kent,
>>>
>>> What is the command which fails?
>>> I can try to duplicate it here.
>>>
>>> So far, things I've tried, I cannot make rocm-smi fail. Command arguments?
>>>
>>> Regards,
>>> Luben
>>>
>>> On 2021-10-18 21:06, Russell, Kent wrote:
>>>
>>> [AMD Official Use Only]
>>>
>>>
>>>
>>> The * is required for the rocm-smi’s functionality for showing what the current clocks are. We had a bug before where the * was removed, then the SMI died fantastically. Work could be done to try to handle that type of situation, but the SMI has a “show current clocks” and uses the * to determine which one is active
>>>
>>>
>>>
>>> Kent
>>>
>>>
>>>
>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Russell, Kent
>>> Sent: Monday, October 18, 2021 9:05 PM
>>> To: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Quan, Evan <Evan.Quan@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org
>>> Cc: Kasiviswanathan, Harish <Harish.Kasiviswanathan@amd.com>
>>> Subject: RE: [PATCH 0/5] 0 MHz is not a valid current frequency
>>>
>>>
>>>
>>> [AMD Official Use Only]
>>>
>>>
>>>
>>> +Harish, rocm-smi falls under his purview now.
>>>
>>>
>>>
>>> Kent
>>>
>>>
>>>
>>> From: Tuikov, Luben <Luben.Tuikov@amd.com>
>>> Sent: Monday, October 18, 2021 4:30 PM
>>> To: Deucher, Alexander <Alexander.Deucher@amd.com>; Quan, Evan <Evan.Quan@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org; Russell, Kent <Kent.Russell@amd.com>
>>> Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency
>>>
>>>
>>>
>>> I think Kent is already seen these patches as he did comment on 1/5 patch.
>>>
>>> The v3 version of the patch, posted last week, removes the asterisk to report the lowest frequency as the current frequency, when the current frequency is 0, i.e. when the block is in low power state. Does the tool rely on the asterisk? If this information is necessary could it not use amdgpu_pm_info?
>>>
>>> Regards,
>>> Luben
>>>
>>> On 2021-10-18 16:19, Deucher, Alexander wrote:
>>>
>>> [Public]
>>>
>>>
>>>
>>> We the current behavior (0 for clock) already crashes the tool, so I don't think we can really make things worse.
>>>
>>>
>>>
>>> Alex
>>>
>>>
>>>
>>> ________________________________
>>>
>>> From: Quan, Evan <Evan.Quan@amd.com>
>>> Sent: Thursday, October 14, 2021 10:25 PM
>>> To: Lazar, Lijo <Lijo.Lazar@amd.com>; Tuikov, Luben <Luben.Tuikov@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; Russell, Kent <Kent.Russell@amd.com>
>>> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
>>> Subject: RE: [PATCH 0/5] 0 MHz is not a valid current frequency
>>>
>>>
>>>
>>> [AMD Official Use Only]
>>>
>>>
>>>
>>> +Kent who maintains the Rocm tool
>>>
>>>
>>>
>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Lazar, Lijo
>>> Sent: Thursday, October 14, 2021 1:07 AM
>>> To: Tuikov, Luben <Luben.Tuikov@amd.com>; amd-gfx@lists.freedesktop.org
>>> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
>>> Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency
>>>
>>>
>>>
>>> [AMD Official Use Only]
>>>
>>>
>>>
>>> [AMD Official Use Only]
>>>
>>>
>>>
>>>> Or maybe just a list without default hint, i.e. no asterisk?
>>>
>>>
>>> I think this is also fine meaning we are having trouble in determining the current frequency or DPM level. Evan/Alex? Don't know if this will crash the tools.
>>>
>>>
>>>
>>> Thanks,
>>> Lijo
>>>
>>> ________________________________
>>>
>>> From: Tuikov, Luben <Luben.Tuikov@amd.com>
>>> Sent: Wednesday, October 13, 2021 9:52:09 PM
>>> To: Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
>>> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
>>> Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency
>>>
>>>
>>>
>>> On 2021-10-13 00:14, Lazar, Lijo wrote:
>>>> On 10/13/2021 8:40 AM, Luben Tuikov wrote:
>>>>> Some ASIC support low-power functionality for the whole ASIC or just
>>>>> an IP block. When in such low-power mode, some sysfs interfaces would
>>>>> report a frequency of 0, e.g.,
>>>>>
>>>>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
>>>>> 0: 500Mhz
>>>>> 1: 0Mhz *
>>>>> 2: 2200Mhz
>>>>> $_
>>>>>
>>>>> An operating frequency of 0 MHz doesn't make sense, and this interface
>>>>> is designed to report only operating clock frequencies, i.e. non-zero,
>>>>> and possibly the current one.
>>>>>
>>>>> When in this low-power state, round to the smallest
>>>>> operating frequency, for this interface, as follows,
>>>>>
>>>> Would rather avoid this -
>>>>
>>>> 1) It is manipulating FW reported value. If at all there is an uncaught
>>>> issue in FW reporting of frequency values, that is masked here.
>>>> 2) Otherwise, if 0MHz is described as GFX power gated case, this
>>>> provides a convenient interface to check if GFX is power gated.
>>>>
>>>> If seeing a '0' is not pleasing, consider changing to something like
>>>>         "NA" - not available (frequency cannot be fetched at the moment).
>>> There's a ROCm tool which literally asserts if the values are not ordered in increasing order. Now since 0 < 550, but 0 is listed as the second entry, the tool simply asserts and crashes.
>>>
>>> It is not clear what you'd rather see here:
>>>
>>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
>>> 0: 550Mhz
>>> 1: N/A *
>>> 2: 2200MHz
>>> $_
>>>
>>> Is this what you want to see? (That'll crash other tools which expect %uMhz.)
>>>
>>> Or maybe just a list without default hint, i.e. no asterisk?
>>>
>>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
>>> 0: 550Mhz
>>> 1: 2200MHz
>>> $_
>>>
>>> What should the output be?
>>>
>>> We want to avoid showing 0, but still show numbers.
>>>
>>> Regards,
>>> Luben
>>>
>>>> Thanks,
>>>> Lijo
>>>>
>>>>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
>>>>> 0: 500Mhz *
>>>>> 1: 2200Mhz
>>>>> $_
>>>>>
>>>>> Luben Tuikov (5):
>>>>>     drm/amd/pm: Slight function rename
>>>>>     drm/amd/pm: Rename cur_value to curr_value
>>>>>     drm/amd/pm: Rename freq_values --> freq_value
>>>>>     dpm/amd/pm: Sienna: 0 MHz is not a current clock frequency
>>>>>     dpm/amd/pm: Navi10: 0 MHz is not a current clock frequency
>>>>>
>>>>>    .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 60 +++++++++------
>>>>>    .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 73 ++++++++++++-------
>>>>>    2 files changed, 86 insertions(+), 47 deletions(-)
>>>>>
>>>
>>>
>>>
>>>
>>>
> 

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/5] 0 MHz is not a valid current frequency
  2021-10-27  5:20                           ` Lazar, Lijo
@ 2021-10-27 15:12                             ` Alex Deucher
  0 siblings, 0 replies; 29+ messages in thread
From: Alex Deucher @ 2021-10-27 15:12 UTC (permalink / raw)
  To: Lazar, Lijo
  Cc: Luben Tuikov, Russell, Kent, Deucher, Alexander, Quan, Evan,
	amd-gfx, Kasiviswanathan, Harish

On Wed, Oct 27, 2021 at 1:20 AM Lazar, Lijo <lijo.lazar@amd.com> wrote:
>
>
>
> On 10/27/2021 3:30 AM, Luben Tuikov wrote:
> > On 2021-10-26 17:26, Alex Deucher wrote:
> >> On Tue, Oct 19, 2021 at 9:54 AM Luben Tuikov <luben.tuikov@amd.com> wrote:
> >>> It again fails with the same message!
> >>> But this time it is different!
> >>> Here's why:
> >>>
> >>> openat(AT_FDCWD, "/sys/class/drm/card0/device/pp_dpm_fclk", O_RDONLY) = 3
> >>> read(3, "0: 571Mhz \n1: 1274Mhz *\n2: 1221M"..., 8191) = 36
> >>> read(3, "", 8191)                       = 0
> >>> close(3)                                = 0
> >>> write(2, "python3: /home/ltuikov/proj/amd/"..., 220python3: /home/ltuikov/proj/amd/rocm_smi_lib/src/rocm_smi.cc:913: rsmi_status_t get_frequencies(amd::smi::DevInfoTypes, uint32_t, rsmi_frequencies_t*, uint32_t*): Assertion `f->frequency[i-1] <= f->frequency[i]' failed.
> >>> ) = 220
> >>> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f531f9bc000
> >>> rt_sigprocmask(SIG_UNBLOCK, [ABRT], NULL, 8) = 0
> >>> rt_sigprocmask(SIG_BLOCK, ~[RTMIN RT_1], [], 8) = 0
> >>> getpid()                                = 37861
> >>> gettid()                                = 37861
> >>> tgkill(37861, 37861, SIGABRT)           = 0
> >>> rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
> >>> --- SIGABRT {si_signo=SIGABRT, si_code=SI_TKILL, si_pid=37861, si_uid=1000} ---
> >>> +++ killed by SIGABRT (core dumped) +++
> >>> Aborted (core dumped)
> >>> $cat /sys/class/drm/card0/device/pp_dpm_fclk
> >>> 0: 571Mhz
> >>> 1: 1274Mhz *
> >>> 2: 1221Mhz
> >>> $_
> >>>
> >>> Why is the mid frequency larger than the last?
> >>> Why does get_frequencies() insists that they be ordered when they're not? (Does the tool need fixing or the kernel?)
> >>>
> >>> The current patchset doesn't report 0, and doesn't report any current if 0 would've been reported as current. But anything else is reported as it would've been reported before the patch. And I tested it with vanilla amd-staging-drm-next--same thing.
> >>>
> >> Seems to crash both ways.  I'd rather either:
> >> 1. Remove the * when the clock is outside of the min and max ranges
> >> or
> >> 2. Clamp the clock to the max or min if it's above or below.
> >>
> >> And then fix the tools accordingly.  Those seem like the choices of
> >> least surprise considering the interface is supposed to show the
> >> current and available DPM levels.
> >
> > So, if we get a case such as the one above, we remove the whole line at 1:, not just the asterisk, right? (for option 1 above).
> > The rocm-smi lib would fail if they're out of order, so only removing the * char would still confuse the tool.
> >
> > What does it mean when the current frequency (line 1:) is above the "max" (line 2:) as shown in my output above?
> >
> > Do we perhaps want to sort them and report current still, and swap lines 1 and 2?
> >
> > Or should we simply remove the ordering requirement in rocm-smi lib?
> >
> These nodes help to find the current state of ASIC. Clamping the values
> will just erase questions like these and possible improvements/fixes.
>

I don't really think that is the case.  E.g., rocm-smi expects the
values to be ordered.  As was previously discussed, this was
originally added as an interface to see what the current DPM state was
and as a way to mask out specific DPM states.  When discrete DPM
states went away, we end up with just a min and max so a middle step
was added to show the current clock in relation to the min and max.
When the clock is on, DPM is active and the clock should be >= min and
<= max.

> For ex: if the situation above is a case of OD (this is only example,
> don't know what is really the case above) that goes beyond regular DPM
> min/max levels, then we could add + as improvement.

With over- or under-clocking, shouldn't the min/max change instead?

>
> IMO, the node should reflect the current state of ASIC and masking the
> values shouldn't be done. Other cases could be 0 or FW handshake
> failures where * itself will be missing.

I'm fine with dropping the * for cases outside of the min/max, but I
think clamping works as well.  E.g., while SMU reports 0 in some
cases, that could be due to the clock being off or the block being
powergated.  It's still in the lowest state.  If the clock were on, it
would be in the lowest state.  Same thing on the high end.  It's
possible there is a little slop in the clock calculations for
stability.  E.g., the clock may be a few Mhz above the max because SMU
determined that the current max frequency was not stable in
combination with some other clock.  It's still ostensibly in the
highest DPM state.

Alex


>
> Thanks,
> Lijo
>
> > Regards,
> > Luben
> >
> >>
> >> Alex
> >>
> >>
> >>> Regards,
> >>> Luben
> >>>
> >>>
> >>> On 2021-10-19 09:25, Russell, Kent wrote:
> >>>
> >>> [AMD Official Use Only]
> >>>
> >>>
> >>>
> >>> It was the rocm-smi -c flag. Maybe some work was done to make it more robust, that would be nice. But the -c flag is supposed to show the current frequency for each clock type. -g would do the same, but just for SCLK.
> >>>
> >>>
> >>>
> >>> Kent
> >>>
> >>>
> >>>
> >>> From: Tuikov, Luben <Luben.Tuikov@amd.com>
> >>> Sent: Tuesday, October 19, 2021 12:27 AM
> >>> To: Russell, Kent <Kent.Russell@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Quan, Evan <Evan.Quan@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org
> >>> Cc: Kasiviswanathan, Harish <Harish.Kasiviswanathan@amd.com>
> >>> Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency
> >>>
> >>>
> >>>
> >>> Kent,
> >>>
> >>> What is the command which fails?
> >>> I can try to duplicate it here.
> >>>
> >>> So far, things I've tried, I cannot make rocm-smi fail. Command arguments?
> >>>
> >>> Regards,
> >>> Luben
> >>>
> >>> On 2021-10-18 21:06, Russell, Kent wrote:
> >>>
> >>> [AMD Official Use Only]
> >>>
> >>>
> >>>
> >>> The * is required for the rocm-smi’s functionality for showing what the current clocks are. We had a bug before where the * was removed, then the SMI died fantastically. Work could be done to try to handle that type of situation, but the SMI has a “show current clocks” and uses the * to determine which one is active
> >>>
> >>>
> >>>
> >>> Kent
> >>>
> >>>
> >>>
> >>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Russell, Kent
> >>> Sent: Monday, October 18, 2021 9:05 PM
> >>> To: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Quan, Evan <Evan.Quan@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org
> >>> Cc: Kasiviswanathan, Harish <Harish.Kasiviswanathan@amd.com>
> >>> Subject: RE: [PATCH 0/5] 0 MHz is not a valid current frequency
> >>>
> >>>
> >>>
> >>> [AMD Official Use Only]
> >>>
> >>>
> >>>
> >>> +Harish, rocm-smi falls under his purview now.
> >>>
> >>>
> >>>
> >>> Kent
> >>>
> >>>
> >>>
> >>> From: Tuikov, Luben <Luben.Tuikov@amd.com>
> >>> Sent: Monday, October 18, 2021 4:30 PM
> >>> To: Deucher, Alexander <Alexander.Deucher@amd.com>; Quan, Evan <Evan.Quan@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org; Russell, Kent <Kent.Russell@amd.com>
> >>> Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency
> >>>
> >>>
> >>>
> >>> I think Kent is already seen these patches as he did comment on 1/5 patch.
> >>>
> >>> The v3 version of the patch, posted last week, removes the asterisk to report the lowest frequency as the current frequency, when the current frequency is 0, i.e. when the block is in low power state. Does the tool rely on the asterisk? If this information is necessary could it not use amdgpu_pm_info?
> >>>
> >>> Regards,
> >>> Luben
> >>>
> >>> On 2021-10-18 16:19, Deucher, Alexander wrote:
> >>>
> >>> [Public]
> >>>
> >>>
> >>>
> >>> We the current behavior (0 for clock) already crashes the tool, so I don't think we can really make things worse.
> >>>
> >>>
> >>>
> >>> Alex
> >>>
> >>>
> >>>
> >>> ________________________________
> >>>
> >>> From: Quan, Evan <Evan.Quan@amd.com>
> >>> Sent: Thursday, October 14, 2021 10:25 PM
> >>> To: Lazar, Lijo <Lijo.Lazar@amd.com>; Tuikov, Luben <Luben.Tuikov@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; Russell, Kent <Kent.Russell@amd.com>
> >>> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
> >>> Subject: RE: [PATCH 0/5] 0 MHz is not a valid current frequency
> >>>
> >>>
> >>>
> >>> [AMD Official Use Only]
> >>>
> >>>
> >>>
> >>> +Kent who maintains the Rocm tool
> >>>
> >>>
> >>>
> >>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Lazar, Lijo
> >>> Sent: Thursday, October 14, 2021 1:07 AM
> >>> To: Tuikov, Luben <Luben.Tuikov@amd.com>; amd-gfx@lists.freedesktop.org
> >>> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
> >>> Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency
> >>>
> >>>
> >>>
> >>> [AMD Official Use Only]
> >>>
> >>>
> >>>
> >>> [AMD Official Use Only]
> >>>
> >>>
> >>>
> >>>> Or maybe just a list without default hint, i.e. no asterisk?
> >>>
> >>>
> >>> I think this is also fine meaning we are having trouble in determining the current frequency or DPM level. Evan/Alex? Don't know if this will crash the tools.
> >>>
> >>>
> >>>
> >>> Thanks,
> >>> Lijo
> >>>
> >>> ________________________________
> >>>
> >>> From: Tuikov, Luben <Luben.Tuikov@amd.com>
> >>> Sent: Wednesday, October 13, 2021 9:52:09 PM
> >>> To: Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> >>> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
> >>> Subject: Re: [PATCH 0/5] 0 MHz is not a valid current frequency
> >>>
> >>>
> >>>
> >>> On 2021-10-13 00:14, Lazar, Lijo wrote:
> >>>> On 10/13/2021 8:40 AM, Luben Tuikov wrote:
> >>>>> Some ASIC support low-power functionality for the whole ASIC or just
> >>>>> an IP block. When in such low-power mode, some sysfs interfaces would
> >>>>> report a frequency of 0, e.g.,
> >>>>>
> >>>>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
> >>>>> 0: 500Mhz
> >>>>> 1: 0Mhz *
> >>>>> 2: 2200Mhz
> >>>>> $_
> >>>>>
> >>>>> An operating frequency of 0 MHz doesn't make sense, and this interface
> >>>>> is designed to report only operating clock frequencies, i.e. non-zero,
> >>>>> and possibly the current one.
> >>>>>
> >>>>> When in this low-power state, round to the smallest
> >>>>> operating frequency, for this interface, as follows,
> >>>>>
> >>>> Would rather avoid this -
> >>>>
> >>>> 1) It is manipulating FW reported value. If at all there is an uncaught
> >>>> issue in FW reporting of frequency values, that is masked here.
> >>>> 2) Otherwise, if 0MHz is described as GFX power gated case, this
> >>>> provides a convenient interface to check if GFX is power gated.
> >>>>
> >>>> If seeing a '0' is not pleasing, consider changing to something like
> >>>>         "NA" - not available (frequency cannot be fetched at the moment).
> >>> There's a ROCm tool which literally asserts if the values are not ordered in increasing order. Now since 0 < 550, but 0 is listed as the second entry, the tool simply asserts and crashes.
> >>>
> >>> It is not clear what you'd rather see here:
> >>>
> >>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
> >>> 0: 550Mhz
> >>> 1: N/A *
> >>> 2: 2200MHz
> >>> $_
> >>>
> >>> Is this what you want to see? (That'll crash other tools which expect %uMhz.)
> >>>
> >>> Or maybe just a list without default hint, i.e. no asterisk?
> >>>
> >>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
> >>> 0: 550Mhz
> >>> 1: 2200MHz
> >>> $_
> >>>
> >>> What should the output be?
> >>>
> >>> We want to avoid showing 0, but still show numbers.
> >>>
> >>> Regards,
> >>> Luben
> >>>
> >>>> Thanks,
> >>>> Lijo
> >>>>
> >>>>> $cat /sys/class/drm/card0/device/pp_dpm_sclk
> >>>>> 0: 500Mhz *
> >>>>> 1: 2200Mhz
> >>>>> $_
> >>>>>
> >>>>> Luben Tuikov (5):
> >>>>>     drm/amd/pm: Slight function rename
> >>>>>     drm/amd/pm: Rename cur_value to curr_value
> >>>>>     drm/amd/pm: Rename freq_values --> freq_value
> >>>>>     dpm/amd/pm: Sienna: 0 MHz is not a current clock frequency
> >>>>>     dpm/amd/pm: Navi10: 0 MHz is not a current clock frequency
> >>>>>
> >>>>>    .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 60 +++++++++------
> >>>>>    .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 73 ++++++++++++-------
> >>>>>    2 files changed, 86 insertions(+), 47 deletions(-)
> >>>>>
> >>>
> >>>
> >>>
> >>>
> >>>
> >

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2021-10-27 15:12 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-13  3:10 [PATCH 0/5] 0 MHz is not a valid current frequency Luben Tuikov
2021-10-13  3:10 ` [PATCH 1/5] drm/amd/pm: Slight function rename Luben Tuikov
2021-10-13 14:50   ` Russell, Kent
2021-10-13 14:53     ` Luben Tuikov
2021-10-18 22:52   ` Paul Menzel
2021-10-18 23:02     ` Luben Tuikov
2021-10-13  3:10 ` [PATCH 2/5] drm/amd/pm: Rename cur_value to curr_value Luben Tuikov
2021-10-13  3:10 ` [PATCH 3/5] drm/amd/pm: Rename freq_values --> freq_value Luben Tuikov
2021-10-13  3:10 ` [PATCH 4/5] dpm/amd/pm: Sienna: 0 MHz is not a current clock frequency Luben Tuikov
2021-10-13  3:10 ` [PATCH 5/5] dpm/amd/pm: Navi10: " Luben Tuikov
2021-10-13  4:14 ` [PATCH 0/5] 0 MHz is not a valid current frequency Lazar, Lijo
2021-10-13  7:06   ` Quan, Evan
2021-10-13 13:58   ` Alex Deucher
2021-10-13 14:17     ` Lazar, Lijo
2021-10-13 16:22   ` Luben Tuikov
2021-10-13 17:06     ` Lazar, Lijo
2021-10-13 17:19       ` Alex Deucher
2021-10-15  2:25       ` Quan, Evan
2021-10-18 20:19         ` Deucher, Alexander
2021-10-18 20:29           ` Luben Tuikov
2021-10-19  1:04             ` Russell, Kent
2021-10-19  1:06               ` Russell, Kent
2021-10-19  4:27                 ` Luben Tuikov
2021-10-19 13:25                   ` Russell, Kent
2021-10-19 13:54                     ` Luben Tuikov
2021-10-26 21:26                       ` Alex Deucher
2021-10-26 22:00                         ` Luben Tuikov
2021-10-27  5:20                           ` Lazar, Lijo
2021-10-27 15:12                             ` Alex Deucher

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.