* [PATCH 1/2] drm/amdgpu: fix possible pstate switch race condition
@ 2019-11-05 10:23 ` Quan, Evan
0 siblings, 0 replies; 20+ messages in thread
From: Quan, Evan @ 2019-11-05 10:23 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Strawbridge, Michael, Kim, Jonathan, Quan, Evan
Added lock protection so that the p-state switch will
be guarded to be sequential. Also update the hive
pstate only all device from the hive are in the same
state.
Change-Id: I165a6f44e8aec1e6da56eefa0fc49d36670e56fe
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +++
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 34 ++++++++++++++++++++++--
2 files changed, 35 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 0469cc51a6fb..41cf2abd6209 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1041,6 +1041,9 @@ struct amdgpu_device {
uint64_t unique_id;
uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS];
+
+ /* device pstate */
+ int pstate;
};
static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 167d9fbd2c4f..de20a9a1c444 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -274,12 +274,18 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
{
int ret = 0;
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
+ struct amdgpu_device *tmp_adev;
+ bool update_hive_pstate = true;
if (!hive)
return 0;
- if (hive->pstate == pstate)
+ mutex_lock(&hive->hive_lock);
+
+ if (hive->pstate == pstate) {
+ mutex_unlock(&hive->hive_lock);
return 0;
+ }
dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate);
@@ -290,11 +296,32 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle,
pstate);
- if (ret)
+ if (ret) {
dev_err(adev->dev,
"XGMI: Set pstate failure on device %llx, hive %llx, ret %d",
adev->gmc.xgmi.node_id,
adev->gmc.xgmi.hive_id, ret);
+ goto out;
+ }
+
+ /* Update device pstate */
+ adev->pstate = pstate;
+
+ /*
+ * Update the hive pstate only all devices of the hive
+ * are in the same pstate
+ */
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ if (tmp_adev->pstate != adev->pstate) {
+ update_hive_pstate = false;
+ break;
+ }
+ }
+ if (update_hive_pstate)
+ hive->pstate = pstate;
+
+out:
+ mutex_unlock(&hive->hive_lock);
return ret;
}
@@ -369,6 +396,9 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
goto exit;
}
+ /* Set default device pstate */
+ adev->pstate = -1;
+
top_info = &adev->psp.xgmi_context.top_info;
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
--
2.23.0
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH 1/2] drm/amdgpu: fix possible pstate switch race condition
@ 2019-11-05 10:23 ` Quan, Evan
0 siblings, 0 replies; 20+ messages in thread
From: Quan, Evan @ 2019-11-05 10:23 UTC (permalink / raw)
To: amd-gfx; +Cc: Strawbridge, Michael, Kim, Jonathan, Quan, Evan
Added lock protection so that the p-state switch will
be guarded to be sequential. Also update the hive
pstate only all device from the hive are in the same
state.
Change-Id: I165a6f44e8aec1e6da56eefa0fc49d36670e56fe
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +++
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 34 ++++++++++++++++++++++--
2 files changed, 35 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 0469cc51a6fb..41cf2abd6209 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1041,6 +1041,9 @@ struct amdgpu_device {
uint64_t unique_id;
uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS];
+
+ /* device pstate */
+ int pstate;
};
static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 167d9fbd2c4f..de20a9a1c444 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -274,12 +274,18 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
{
int ret = 0;
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
+ struct amdgpu_device *tmp_adev;
+ bool update_hive_pstate = true;
if (!hive)
return 0;
- if (hive->pstate == pstate)
+ mutex_lock(&hive->hive_lock);
+
+ if (hive->pstate == pstate) {
+ mutex_unlock(&hive->hive_lock);
return 0;
+ }
dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate);
@@ -290,11 +296,32 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle,
pstate);
- if (ret)
+ if (ret) {
dev_err(adev->dev,
"XGMI: Set pstate failure on device %llx, hive %llx, ret %d",
adev->gmc.xgmi.node_id,
adev->gmc.xgmi.hive_id, ret);
+ goto out;
+ }
+
+ /* Update device pstate */
+ adev->pstate = pstate;
+
+ /*
+ * Update the hive pstate only all devices of the hive
+ * are in the same pstate
+ */
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ if (tmp_adev->pstate != adev->pstate) {
+ update_hive_pstate = false;
+ break;
+ }
+ }
+ if (update_hive_pstate)
+ hive->pstate = pstate;
+
+out:
+ mutex_unlock(&hive->hive_lock);
return ret;
}
@@ -369,6 +396,9 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
goto exit;
}
+ /* Set default device pstate */
+ adev->pstate = -1;
+
top_info = &adev->psp.xgmi_context.top_info;
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
--
2.23.0
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
@ 2019-11-05 10:23 ` Quan, Evan
0 siblings, 0 replies; 20+ messages in thread
From: Quan, Evan @ 2019-11-05 10:23 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Strawbridge, Michael, Kim, Jonathan, Quan, Evan
P-state switch should be performed after all devices from the hive
get initialized.
Change-Id: Ifc7cac9ef0cf250447d2a412da35d601e2ac79ec
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 47 ++++++++++++++++------
1 file changed, 35 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e6ce949670e5..2d72d206cead 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2090,6 +2090,7 @@ static int amdgpu_device_enable_mgpu_fan_boost(void)
*/
static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
{
+ struct amdgpu_gpu_instance *gpu_instance;
int i = 0, r;
for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -2115,6 +2116,40 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ mutex_lock(&mgpu_info.mutex);
+
+ /*
+ * Reset device p-state to low as this was booted with high.
+ *
+ * This should be performed only after all devices from the same
+ * hive get initialized.
+ *
+ * However, it's unknown how many device in the hive in advance.
+ * As this is counted one by one during devices initializations.
+ *
+ * So, we wait for all XGMI interlinked devices initialized.
+ * This may bring some delays as those devices may come from
+ * different hives. But that should be OK.
+ */
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
+ r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
+ if (r) {
+ DRM_ERROR("pstate setting failed (%d).\n", r);
+ break;
+ }
+ }
+ }
+
+ mutex_unlock(&mgpu_info.mutex);
+ }
+
return 0;
}
@@ -2227,18 +2262,6 @@ static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
-
- /*
- * set to low pstate by default
- * This should be performed after all devices from
- * XGMI finish their initializations. Thus it's moved
- * to here.
- * The time delay is 2S. TODO: confirm whether that
- * is enough for all possible XGMI setups.
- */
- r = amdgpu_xgmi_set_pstate(adev, 0);
- if (r)
- DRM_ERROR("pstate setting failed (%d).\n", r);
}
static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
--
2.23.0
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
@ 2019-11-05 10:23 ` Quan, Evan
0 siblings, 0 replies; 20+ messages in thread
From: Quan, Evan @ 2019-11-05 10:23 UTC (permalink / raw)
To: amd-gfx; +Cc: Strawbridge, Michael, Kim, Jonathan, Quan, Evan
P-state switch should be performed after all devices from the hive
get initialized.
Change-Id: Ifc7cac9ef0cf250447d2a412da35d601e2ac79ec
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 47 ++++++++++++++++------
1 file changed, 35 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e6ce949670e5..2d72d206cead 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2090,6 +2090,7 @@ static int amdgpu_device_enable_mgpu_fan_boost(void)
*/
static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
{
+ struct amdgpu_gpu_instance *gpu_instance;
int i = 0, r;
for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -2115,6 +2116,40 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ mutex_lock(&mgpu_info.mutex);
+
+ /*
+ * Reset device p-state to low as this was booted with high.
+ *
+ * This should be performed only after all devices from the same
+ * hive get initialized.
+ *
+ * However, it's unknown how many device in the hive in advance.
+ * As this is counted one by one during devices initializations.
+ *
+ * So, we wait for all XGMI interlinked devices initialized.
+ * This may bring some delays as those devices may come from
+ * different hives. But that should be OK.
+ */
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
+ r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
+ if (r) {
+ DRM_ERROR("pstate setting failed (%d).\n", r);
+ break;
+ }
+ }
+ }
+
+ mutex_unlock(&mgpu_info.mutex);
+ }
+
return 0;
}
@@ -2227,18 +2262,6 @@ static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
-
- /*
- * set to low pstate by default
- * This should be performed after all devices from
- * XGMI finish their initializations. Thus it's moved
- * to here.
- * The time delay is 2S. TODO: confirm whether that
- * is enough for all possible XGMI setups.
- */
- r = amdgpu_xgmi_set_pstate(adev, 0);
- if (r)
- DRM_ERROR("pstate setting failed (%d).\n", r);
}
static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
--
2.23.0
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 20+ messages in thread
* RE: [PATCH 1/2] drm/amdgpu: fix possible pstate switch race condition
@ 2019-11-05 11:34 ` Xu, Feifei
0 siblings, 0 replies; 20+ messages in thread
From: Xu, Feifei @ 2019-11-05 11:34 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Strawbridge, Michael, Quan, Evan, Kim, Jonathan
Series is Reviewed-by: Feifei Xu <Feifei.Xu@amd.com>
-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Quan, Evan
Sent: 2019年11月5日 18:24
To: amd-gfx@lists.freedesktop.org
Cc: Strawbridge, Michael <Michael.Strawbridge@amd.com>; Kim, Jonathan <Jonathan.Kim@amd.com>; Quan, Evan <Evan.Quan@amd.com>
Subject: [PATCH 1/2] drm/amdgpu: fix possible pstate switch race condition
Added lock protection so that the p-state switch will be guarded to be sequential. Also update the hive pstate only all device from the hive are in the same state.
Change-Id: I165a6f44e8aec1e6da56eefa0fc49d36670e56fe
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +++
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 34 ++++++++++++++++++++++--
2 files changed, 35 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 0469cc51a6fb..41cf2abd6209 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1041,6 +1041,9 @@ struct amdgpu_device {
uint64_t unique_id;
uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS];
+
+ /* device pstate */
+ int pstate;
};
static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 167d9fbd2c4f..de20a9a1c444 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -274,12 +274,18 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) {
int ret = 0;
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
+ struct amdgpu_device *tmp_adev;
+ bool update_hive_pstate = true;
if (!hive)
return 0;
- if (hive->pstate == pstate)
+ mutex_lock(&hive->hive_lock);
+
+ if (hive->pstate == pstate) {
+ mutex_unlock(&hive->hive_lock);
return 0;
+ }
dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate);
@@ -290,11 +296,32 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle,
pstate);
- if (ret)
+ if (ret) {
dev_err(adev->dev,
"XGMI: Set pstate failure on device %llx, hive %llx, ret %d",
adev->gmc.xgmi.node_id,
adev->gmc.xgmi.hive_id, ret);
+ goto out;
+ }
+
+ /* Update device pstate */
+ adev->pstate = pstate;
+
+ /*
+ * Update the hive pstate only all devices of the hive
+ * are in the same pstate
+ */
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ if (tmp_adev->pstate != adev->pstate) {
+ update_hive_pstate = false;
+ break;
+ }
+ }
+ if (update_hive_pstate)
+ hive->pstate = pstate;
+
+out:
+ mutex_unlock(&hive->hive_lock);
return ret;
}
@@ -369,6 +396,9 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
goto exit;
}
+ /* Set default device pstate */
+ adev->pstate = -1;
+
top_info = &adev->psp.xgmi_context.top_info;
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
--
2.23.0
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 20+ messages in thread
* RE: [PATCH 1/2] drm/amdgpu: fix possible pstate switch race condition
@ 2019-11-05 11:34 ` Xu, Feifei
0 siblings, 0 replies; 20+ messages in thread
From: Xu, Feifei @ 2019-11-05 11:34 UTC (permalink / raw)
To: Quan, Evan, amd-gfx; +Cc: Strawbridge, Michael, Quan, Evan, Kim, Jonathan
Series is Reviewed-by: Feifei Xu <Feifei.Xu@amd.com>
-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Quan, Evan
Sent: 2019年11月5日 18:24
To: amd-gfx@lists.freedesktop.org
Cc: Strawbridge, Michael <Michael.Strawbridge@amd.com>; Kim, Jonathan <Jonathan.Kim@amd.com>; Quan, Evan <Evan.Quan@amd.com>
Subject: [PATCH 1/2] drm/amdgpu: fix possible pstate switch race condition
Added lock protection so that the p-state switch will be guarded to be sequential. Also update the hive pstate only all device from the hive are in the same state.
Change-Id: I165a6f44e8aec1e6da56eefa0fc49d36670e56fe
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +++
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 34 ++++++++++++++++++++++--
2 files changed, 35 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 0469cc51a6fb..41cf2abd6209 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1041,6 +1041,9 @@ struct amdgpu_device {
uint64_t unique_id;
uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS];
+
+ /* device pstate */
+ int pstate;
};
static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 167d9fbd2c4f..de20a9a1c444 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -274,12 +274,18 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) {
int ret = 0;
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
+ struct amdgpu_device *tmp_adev;
+ bool update_hive_pstate = true;
if (!hive)
return 0;
- if (hive->pstate == pstate)
+ mutex_lock(&hive->hive_lock);
+
+ if (hive->pstate == pstate) {
+ mutex_unlock(&hive->hive_lock);
return 0;
+ }
dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate);
@@ -290,11 +296,32 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle,
pstate);
- if (ret)
+ if (ret) {
dev_err(adev->dev,
"XGMI: Set pstate failure on device %llx, hive %llx, ret %d",
adev->gmc.xgmi.node_id,
adev->gmc.xgmi.hive_id, ret);
+ goto out;
+ }
+
+ /* Update device pstate */
+ adev->pstate = pstate;
+
+ /*
+ * Update the hive pstate only all devices of the hive
+ * are in the same pstate
+ */
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ if (tmp_adev->pstate != adev->pstate) {
+ update_hive_pstate = false;
+ break;
+ }
+ }
+ if (update_hive_pstate)
+ hive->pstate = pstate;
+
+out:
+ mutex_unlock(&hive->hive_lock);
return ret;
}
@@ -369,6 +396,9 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
goto exit;
}
+ /* Set default device pstate */
+ adev->pstate = -1;
+
top_info = &adev->psp.xgmi_context.top_info;
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
--
2.23.0
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 20+ messages in thread
* RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
@ 2019-11-05 16:32 ` Kim, Jonathan
0 siblings, 0 replies; 20+ messages in thread
From: Kim, Jonathan @ 2019-11-05 16:32 UTC (permalink / raw)
To: Quan, Evan, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Strawbridge, Michael
Please see inline.
Jon
-----Original Message-----
From: Quan, Evan <Evan.Quan@amd.com>
Sent: Tuesday, November 5, 2019 5:24 AM
To: amd-gfx@lists.freedesktop.org
Cc: Kim, Jonathan <Jonathan.Kim@amd.com>; Strawbridge, Michael <Michael.Strawbridge@amd.com>; Quan, Evan <Evan.Quan@amd.com>
Subject: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
P-state switch should be performed after all devices from the hive get initialized.
Change-Id: Ifc7cac9ef0cf250447d2a412da35d601e2ac79ec
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 47 ++++++++++++++++------
1 file changed, 35 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e6ce949670e5..2d72d206cead 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2090,6 +2090,7 @@ static int amdgpu_device_enable_mgpu_fan_boost(void)
*/
static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) {
+ struct amdgpu_gpu_instance *gpu_instance;
int i = 0, r;
for (i = 0; i < adev->num_ip_blocks; i++) { @@ -2115,6 +2116,40 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ mutex_lock(&mgpu_info.mutex);
+
+ /*
+ * Reset device p-state to low as this was booted with high.
+ *
+ * This should be performed only after all devices from the same
+ * hive get initialized.
+ *
+ * However, it's unknown how many device in the hive in advance.
+ * As this is counted one by one during devices initializations.
+ *
+ * So, we wait for all XGMI interlinked devices initialized.
+ * This may bring some delays as those devices may come from
+ * different hives. But that should be OK.
+ */
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
[JK] This condition will never succeed. mgpu_info.num_dgpu is 0-indexed while num_physical_nodes give total nodes.
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
+ r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
+ if (r) {
+ DRM_ERROR("pstate setting failed (%d).\n", r);
+ break;
+ }
+ }
+ }
+
+ mutex_unlock(&mgpu_info.mutex);
+ }
+
return 0;
}
@@ -2227,18 +2262,6 @@ static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
-
- /*
- * set to low pstate by default
- * This should be performed after all devices from
- * XGMI finish their initializations. Thus it's moved
- * to here.
- * The time delay is 2S. TODO: confirm whether that
- * is enough for all possible XGMI setups.
- */
- r = amdgpu_xgmi_set_pstate(adev, 0);
- if (r)
- DRM_ERROR("pstate setting failed (%d).\n", r);
}
static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
--
2.23.0
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 20+ messages in thread
* RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
@ 2019-11-05 16:32 ` Kim, Jonathan
0 siblings, 0 replies; 20+ messages in thread
From: Kim, Jonathan @ 2019-11-05 16:32 UTC (permalink / raw)
To: Quan, Evan, amd-gfx; +Cc: Strawbridge, Michael
Please see inline.
Jon
-----Original Message-----
From: Quan, Evan <Evan.Quan@amd.com>
Sent: Tuesday, November 5, 2019 5:24 AM
To: amd-gfx@lists.freedesktop.org
Cc: Kim, Jonathan <Jonathan.Kim@amd.com>; Strawbridge, Michael <Michael.Strawbridge@amd.com>; Quan, Evan <Evan.Quan@amd.com>
Subject: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
P-state switch should be performed after all devices from the hive get initialized.
Change-Id: Ifc7cac9ef0cf250447d2a412da35d601e2ac79ec
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 47 ++++++++++++++++------
1 file changed, 35 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e6ce949670e5..2d72d206cead 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2090,6 +2090,7 @@ static int amdgpu_device_enable_mgpu_fan_boost(void)
*/
static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) {
+ struct amdgpu_gpu_instance *gpu_instance;
int i = 0, r;
for (i = 0; i < adev->num_ip_blocks; i++) { @@ -2115,6 +2116,40 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ mutex_lock(&mgpu_info.mutex);
+
+ /*
+ * Reset device p-state to low as this was booted with high.
+ *
+ * This should be performed only after all devices from the same
+ * hive get initialized.
+ *
+ * However, it's unknown how many device in the hive in advance.
+ * As this is counted one by one during devices initializations.
+ *
+ * So, we wait for all XGMI interlinked devices initialized.
+ * This may bring some delays as those devices may come from
+ * different hives. But that should be OK.
+ */
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
[JK] This condition will never succeed. mgpu_info.num_dgpu is 0-indexed while num_physical_nodes give total nodes.
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
+ r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
+ if (r) {
+ DRM_ERROR("pstate setting failed (%d).\n", r);
+ break;
+ }
+ }
+ }
+
+ mutex_unlock(&mgpu_info.mutex);
+ }
+
return 0;
}
@@ -2227,18 +2262,6 @@ static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
-
- /*
- * set to low pstate by default
- * This should be performed after all devices from
- * XGMI finish their initializations. Thus it's moved
- * to here.
- * The time delay is 2S. TODO: confirm whether that
- * is enough for all possible XGMI setups.
- */
- r = amdgpu_xgmi_set_pstate(adev, 0);
- if (r)
- DRM_ERROR("pstate setting failed (%d).\n", r);
}
static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
--
2.23.0
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
@ 2019-11-05 16:39 ` Strawbridge, Michael
0 siblings, 0 replies; 20+ messages in thread
From: Strawbridge, Michael @ 2019-11-05 16:39 UTC (permalink / raw)
To: Kim, Jonathan, Quan, Evan, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
[-- Attachment #1.1: Type: text/plain, Size: 4632 bytes --]
Hi Jon,
Does that mean we can simply use this instead?
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes - 1) {
Thanks,
Michael
________________________________
From: Kim, Jonathan <Jonathan.Kim-5C7GfCeVMHo@public.gmane.org>
Sent: 05 November 2019 11:32 AM
To: Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org <amd-gfx@lists.freedesktop.org>
Cc: Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org>
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Please see inline.
Jon
-----Original Message-----
From: Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org>
Sent: Tuesday, November 5, 2019 5:24 AM
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Cc: Kim, Jonathan <Jonathan.Kim-5C7GfCeVMHo@public.gmane.org>; Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org>; Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org>
Subject: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
P-state switch should be performed after all devices from the hive get initialized.
Change-Id: Ifc7cac9ef0cf250447d2a412da35d601e2ac79ec
Signed-off-by: Evan Quan <evan.quan-5C7GfCeVMHo@public.gmane.org>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 47 ++++++++++++++++------
1 file changed, 35 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e6ce949670e5..2d72d206cead 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2090,6 +2090,7 @@ static int amdgpu_device_enable_mgpu_fan_boost(void)
*/
static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) {
+ struct amdgpu_gpu_instance *gpu_instance;
int i = 0, r;
for (i = 0; i < adev->num_ip_blocks; i++) { @@ -2115,6 +2116,40 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ mutex_lock(&mgpu_info.mutex);
+
+ /*
+ * Reset device p-state to low as this was booted with high.
+ *
+ * This should be performed only after all devices from the same
+ * hive get initialized.
+ *
+ * However, it's unknown how many device in the hive in advance.
+ * As this is counted one by one during devices initializations.
+ *
+ * So, we wait for all XGMI interlinked devices initialized.
+ * This may bring some delays as those devices may come from
+ * different hives. But that should be OK.
+ */
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
[JK] This condition will never succeed. mgpu_info.num_dgpu is 0-indexed while num_physical_nodes give total nodes.
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
+ r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
+ if (r) {
+ DRM_ERROR("pstate setting failed (%d).\n", r);
+ break;
+ }
+ }
+ }
+
+ mutex_unlock(&mgpu_info.mutex);
+ }
+
return 0;
}
@@ -2227,18 +2262,6 @@ static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
-
- /*
- * set to low pstate by default
- * This should be performed after all devices from
- * XGMI finish their initializations. Thus it's moved
- * to here.
- * The time delay is 2S. TODO: confirm whether that
- * is enough for all possible XGMI setups.
- */
- r = amdgpu_xgmi_set_pstate(adev, 0);
- if (r)
- DRM_ERROR("pstate setting failed (%d).\n", r);
}
static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
--
2.23.0
[-- Attachment #1.2: Type: text/html, Size: 11033 bytes --]
[-- Attachment #2: Type: text/plain, Size: 153 bytes --]
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
@ 2019-11-05 16:39 ` Strawbridge, Michael
0 siblings, 0 replies; 20+ messages in thread
From: Strawbridge, Michael @ 2019-11-05 16:39 UTC (permalink / raw)
To: Kim, Jonathan, Quan, Evan, amd-gfx
[-- Attachment #1.1: Type: text/plain, Size: 4408 bytes --]
Hi Jon,
Does that mean we can simply use this instead?
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes - 1) {
Thanks,
Michael
________________________________
From: Kim, Jonathan <Jonathan.Kim@amd.com>
Sent: 05 November 2019 11:32 AM
To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Cc: Strawbridge, Michael <Michael.Strawbridge@amd.com>
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Please see inline.
Jon
-----Original Message-----
From: Quan, Evan <Evan.Quan@amd.com>
Sent: Tuesday, November 5, 2019 5:24 AM
To: amd-gfx@lists.freedesktop.org
Cc: Kim, Jonathan <Jonathan.Kim@amd.com>; Strawbridge, Michael <Michael.Strawbridge@amd.com>; Quan, Evan <Evan.Quan@amd.com>
Subject: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
P-state switch should be performed after all devices from the hive get initialized.
Change-Id: Ifc7cac9ef0cf250447d2a412da35d601e2ac79ec
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 47 ++++++++++++++++------
1 file changed, 35 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e6ce949670e5..2d72d206cead 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2090,6 +2090,7 @@ static int amdgpu_device_enable_mgpu_fan_boost(void)
*/
static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) {
+ struct amdgpu_gpu_instance *gpu_instance;
int i = 0, r;
for (i = 0; i < adev->num_ip_blocks; i++) { @@ -2115,6 +2116,40 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ mutex_lock(&mgpu_info.mutex);
+
+ /*
+ * Reset device p-state to low as this was booted with high.
+ *
+ * This should be performed only after all devices from the same
+ * hive get initialized.
+ *
+ * However, it's unknown how many device in the hive in advance.
+ * As this is counted one by one during devices initializations.
+ *
+ * So, we wait for all XGMI interlinked devices initialized.
+ * This may bring some delays as those devices may come from
+ * different hives. But that should be OK.
+ */
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
[JK] This condition will never succeed. mgpu_info.num_dgpu is 0-indexed while num_physical_nodes give total nodes.
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
+ r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
+ if (r) {
+ DRM_ERROR("pstate setting failed (%d).\n", r);
+ break;
+ }
+ }
+ }
+
+ mutex_unlock(&mgpu_info.mutex);
+ }
+
return 0;
}
@@ -2227,18 +2262,6 @@ static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
-
- /*
- * set to low pstate by default
- * This should be performed after all devices from
- * XGMI finish their initializations. Thus it's moved
- * to here.
- * The time delay is 2S. TODO: confirm whether that
- * is enough for all possible XGMI setups.
- */
- r = amdgpu_xgmi_set_pstate(adev, 0);
- if (r)
- DRM_ERROR("pstate setting failed (%d).\n", r);
}
static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
--
2.23.0
[-- Attachment #1.2: Type: text/html, Size: 10778 bytes --]
[-- Attachment #2: Type: text/plain, Size: 153 bytes --]
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 20+ messages in thread
* RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
@ 2019-11-05 16:41 ` Kim, Jonathan
0 siblings, 0 replies; 20+ messages in thread
From: Kim, Jonathan @ 2019-11-05 16:41 UTC (permalink / raw)
To: Strawbridge, Michael, Quan, Evan,
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
[-- Attachment #1.1: Type: text/plain, Size: 5659 bytes --]
Yes that's correct. This should fix the issue.
Jon
From: Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org>
Sent: Tuesday, November 5, 2019 11:40 AM
To: Kim, Jonathan <Jonathan.Kim-5C7GfCeVMHo@public.gmane.org>; Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Subject: Re: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Hi Jon,
Does that mean we can simply use this instead?
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes - 1) {
Thanks,
Michael
________________________________
From: Kim, Jonathan <Jonathan.Kim-5C7GfCeVMHo@public.gmane.org<mailto:Jonathan.Kim-5C7GfCeVMHo@public.gmane.org>>
Sent: 05 November 2019 11:32 AM
To: Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org<mailto:Evan.Quan-5C7GfCeVMHo@public.gmane.org>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> <amd-gfx-PD4FTy7X32mMSPqsTGOZug@public.gmane.orgesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>>
Cc: Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org<mailto:Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org>>
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Please see inline.
Jon
-----Original Message-----
From: Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org<mailto:Evan.Quan-5C7GfCeVMHo@public.gmane.org>>
Sent: Tuesday, November 5, 2019 5:24 AM
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Cc: Kim, Jonathan <Jonathan.Kim-5C7GfCeVMHo@public.gmane.org<mailto:Jonathan.Kim-5C7GfCeVMHo@public.gmane.org>>; Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org<mailto:Michael.Strawbridge@amd.com>>; Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org<mailto:Evan.Quan-5C7GfCeVMHo@public.gmane.org>>
Subject: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
P-state switch should be performed after all devices from the hive get initialized.
Change-Id: Ifc7cac9ef0cf250447d2a412da35d601e2ac79ec
Signed-off-by: Evan Quan <evan.quan-5C7GfCeVMHo@public.gmane.org<mailto:evan.quan-5C7GfCeVMHo@public.gmane.org>>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 47 ++++++++++++++++------
1 file changed, 35 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e6ce949670e5..2d72d206cead 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2090,6 +2090,7 @@ static int amdgpu_device_enable_mgpu_fan_boost(void)
*/
static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) {
+ struct amdgpu_gpu_instance *gpu_instance;
int i = 0, r;
for (i = 0; i < adev->num_ip_blocks; i++) { @@ -2115,6 +2116,40 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ mutex_lock(&mgpu_info.mutex);
+
+ /*
+ * Reset device p-state to low as this was booted with high.
+ *
+ * This should be performed only after all devices from the same
+ * hive get initialized.
+ *
+ * However, it's unknown how many device in the hive in advance.
+ * As this is counted one by one during devices initializations.
+ *
+ * So, we wait for all XGMI interlinked devices initialized.
+ * This may bring some delays as those devices may come from
+ * different hives. But that should be OK.
+ */
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
[JK] This condition will never succeed. mgpu_info.num_dgpu is 0-indexed while num_physical_nodes give total nodes.
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
+ r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
+ if (r) {
+ DRM_ERROR("pstate setting failed (%d).\n", r);
+ break;
+ }
+ }
+ }
+
+ mutex_unlock(&mgpu_info.mutex);
+ }
+
return 0;
}
@@ -2227,18 +2262,6 @@ static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
-
- /*
- * set to low pstate by default
- * This should be performed after all devices from
- * XGMI finish their initializations. Thus it's moved
- * to here.
- * The time delay is 2S. TODO: confirm whether that
- * is enough for all possible XGMI setups.
- */
- r = amdgpu_xgmi_set_pstate(adev, 0);
- if (r)
- DRM_ERROR("pstate setting failed (%d).\n", r);
}
static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
--
2.23.0
[-- Attachment #1.2: Type: text/html, Size: 14075 bytes --]
[-- Attachment #2: Type: text/plain, Size: 153 bytes --]
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 20+ messages in thread
* RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
@ 2019-11-05 16:41 ` Kim, Jonathan
0 siblings, 0 replies; 20+ messages in thread
From: Kim, Jonathan @ 2019-11-05 16:41 UTC (permalink / raw)
To: Strawbridge, Michael, Quan, Evan, amd-gfx
[-- Attachment #1.1: Type: text/plain, Size: 5113 bytes --]
Yes that's correct. This should fix the issue.
Jon
From: Strawbridge, Michael <Michael.Strawbridge@amd.com>
Sent: Tuesday, November 5, 2019 11:40 AM
To: Kim, Jonathan <Jonathan.Kim@amd.com>; Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Hi Jon,
Does that mean we can simply use this instead?
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes - 1) {
Thanks,
Michael
________________________________
From: Kim, Jonathan <Jonathan.Kim@amd.com<mailto:Jonathan.Kim@amd.com>>
Sent: 05 November 2019 11:32 AM
To: Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>>
Cc: Strawbridge, Michael <Michael.Strawbridge@amd.com<mailto:Michael.Strawbridge@amd.com>>
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Please see inline.
Jon
-----Original Message-----
From: Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>
Sent: Tuesday, November 5, 2019 5:24 AM
To: amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Cc: Kim, Jonathan <Jonathan.Kim@amd.com<mailto:Jonathan.Kim@amd.com>>; Strawbridge, Michael <Michael.Strawbridge@amd.com<mailto:Michael.Strawbridge@amd.com>>; Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>
Subject: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
P-state switch should be performed after all devices from the hive get initialized.
Change-Id: Ifc7cac9ef0cf250447d2a412da35d601e2ac79ec
Signed-off-by: Evan Quan <evan.quan@amd.com<mailto:evan.quan@amd.com>>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 47 ++++++++++++++++------
1 file changed, 35 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e6ce949670e5..2d72d206cead 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2090,6 +2090,7 @@ static int amdgpu_device_enable_mgpu_fan_boost(void)
*/
static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) {
+ struct amdgpu_gpu_instance *gpu_instance;
int i = 0, r;
for (i = 0; i < adev->num_ip_blocks; i++) { @@ -2115,6 +2116,40 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ mutex_lock(&mgpu_info.mutex);
+
+ /*
+ * Reset device p-state to low as this was booted with high.
+ *
+ * This should be performed only after all devices from the same
+ * hive get initialized.
+ *
+ * However, it's unknown how many device in the hive in advance.
+ * As this is counted one by one during devices initializations.
+ *
+ * So, we wait for all XGMI interlinked devices initialized.
+ * This may bring some delays as those devices may come from
+ * different hives. But that should be OK.
+ */
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
[JK] This condition will never succeed. mgpu_info.num_dgpu is 0-indexed while num_physical_nodes give total nodes.
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
+ r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
+ if (r) {
+ DRM_ERROR("pstate setting failed (%d).\n", r);
+ break;
+ }
+ }
+ }
+
+ mutex_unlock(&mgpu_info.mutex);
+ }
+
return 0;
}
@@ -2227,18 +2262,6 @@ static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
-
- /*
- * set to low pstate by default
- * This should be performed after all devices from
- * XGMI finish their initializations. Thus it's moved
- * to here.
- * The time delay is 2S. TODO: confirm whether that
- * is enough for all possible XGMI setups.
- */
- r = amdgpu_xgmi_set_pstate(adev, 0);
- if (r)
- DRM_ERROR("pstate setting failed (%d).\n", r);
}
static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
--
2.23.0
[-- Attachment #1.2: Type: text/html, Size: 13613 bytes --]
[-- Attachment #2: Type: text/plain, Size: 153 bytes --]
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 20+ messages in thread
* RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
@ 2019-11-05 17:07 ` Kim, Jonathan
0 siblings, 0 replies; 20+ messages in thread
From: Kim, Jonathan @ 2019-11-05 17:07 UTC (permalink / raw)
To: Strawbridge, Michael, Quan, Evan,
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
[-- Attachment #1.1: Type: text/plain, Size: 6895 bytes --]
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
Also while num_physical_nodes should be used here instead. It doesn't make sense to have a pre-condition check i.e. (if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes - 1) ) against the total number of nodes while the loop checks for the current num gpus from mgpu_info. This will fail to set the last node to low pstate for the same 0-indexing issue once the pre-condition passes.
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
From: Kim, Jonathan
Sent: Tuesday, November 5, 2019 11:42 AM
To: Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org>; Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Yes that's correct. This should fix the issue.
Jon
From: Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org<mailto:Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org>>
Sent: Tuesday, November 5, 2019 11:40 AM
To: Kim, Jonathan <Jonathan.Kim-5C7GfCeVMHo@public.gmane.org<mailto:Jonathan.Kim-5C7GfCeVMHo@public.gmane.org>>; Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org<mailto:Evan.Quan-5C7GfCeVMHo@public.gmane.org>>; amd-gfx-PD4FTy7X32mqWrfYKbYh0A@public.gmane.orgktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Subject: Re: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Hi Jon,
Does that mean we can simply use this instead?
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes - 1) {
Thanks,
Michael
________________________________
From: Kim, Jonathan <Jonathan.Kim-5C7GfCeVMHo@public.gmane.org<mailto:Jonathan.Kim-5C7GfCeVMHo@public.gmane.org>>
Sent: 05 November 2019 11:32 AM
To: Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org<mailto:Evan.Quan-5C7GfCeVMHo@public.gmane.org>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> <amd-gfx-PD4FTy7X32mMSPqsTGOZug@public.gmane.orgesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>>
Cc: Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org<mailto:Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org>>
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Please see inline.
Jon
-----Original Message-----
From: Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org<mailto:Evan.Quan-5C7GfCeVMHo@public.gmane.org>>
Sent: Tuesday, November 5, 2019 5:24 AM
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Cc: Kim, Jonathan <Jonathan.Kim-5C7GfCeVMHo@public.gmane.org<mailto:Jonathan.Kim-5C7GfCeVMHo@public.gmane.org>>; Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org<mailto:Michael.Strawbridge@amd.com>>; Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org<mailto:Evan.Quan-5C7GfCeVMHo@public.gmane.org>>
Subject: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
P-state switch should be performed after all devices from the hive get initialized.
Change-Id: Ifc7cac9ef0cf250447d2a412da35d601e2ac79ec
Signed-off-by: Evan Quan <evan.quan-5C7GfCeVMHo@public.gmane.org<mailto:evan.quan-5C7GfCeVMHo@public.gmane.org>>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 47 ++++++++++++++++------
1 file changed, 35 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e6ce949670e5..2d72d206cead 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2090,6 +2090,7 @@ static int amdgpu_device_enable_mgpu_fan_boost(void)
*/
static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) {
+ struct amdgpu_gpu_instance *gpu_instance;
int i = 0, r;
for (i = 0; i < adev->num_ip_blocks; i++) { @@ -2115,6 +2116,40 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ mutex_lock(&mgpu_info.mutex);
+
+ /*
+ * Reset device p-state to low as this was booted with high.
+ *
+ * This should be performed only after all devices from the same
+ * hive get initialized.
+ *
+ * However, it's unknown how many device in the hive in advance.
+ * As this is counted one by one during devices initializations.
+ *
+ * So, we wait for all XGMI interlinked devices initialized.
+ * This may bring some delays as those devices may come from
+ * different hives. But that should be OK.
+ */
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
[JK] This condition will never succeed. mgpu_info.num_dgpu is 0-indexed while num_physical_nodes give total nodes.
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
+ r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
+ if (r) {
+ DRM_ERROR("pstate setting failed (%d).\n", r);
+ break;
+ }
+ }
+ }
+
+ mutex_unlock(&mgpu_info.mutex);
+ }
+
return 0;
}
@@ -2227,18 +2262,6 @@ static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
-
- /*
- * set to low pstate by default
- * This should be performed after all devices from
- * XGMI finish their initializations. Thus it's moved
- * to here.
- * The time delay is 2S. TODO: confirm whether that
- * is enough for all possible XGMI setups.
- */
- r = amdgpu_xgmi_set_pstate(adev, 0);
- if (r)
- DRM_ERROR("pstate setting failed (%d).\n", r);
}
static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
--
2.23.0
[-- Attachment #1.2: Type: text/html, Size: 16899 bytes --]
[-- Attachment #2: Type: text/plain, Size: 153 bytes --]
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 20+ messages in thread
* RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
@ 2019-11-05 17:07 ` Kim, Jonathan
0 siblings, 0 replies; 20+ messages in thread
From: Kim, Jonathan @ 2019-11-05 17:07 UTC (permalink / raw)
To: Strawbridge, Michael, Quan, Evan, amd-gfx
[-- Attachment #1.1: Type: text/plain, Size: 6190 bytes --]
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
Also while num_physical_nodes should be used here instead. It doesn't make sense to have a pre-condition check i.e. (if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes - 1) ) against the total number of nodes while the loop checks for the current num gpus from mgpu_info. This will fail to set the last node to low pstate for the same 0-indexing issue once the pre-condition passes.
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
From: Kim, Jonathan
Sent: Tuesday, November 5, 2019 11:42 AM
To: Strawbridge, Michael <Michael.Strawbridge@amd.com>; Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Yes that's correct. This should fix the issue.
Jon
From: Strawbridge, Michael <Michael.Strawbridge@amd.com<mailto:Michael.Strawbridge@amd.com>>
Sent: Tuesday, November 5, 2019 11:40 AM
To: Kim, Jonathan <Jonathan.Kim@amd.com<mailto:Jonathan.Kim@amd.com>>; Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Hi Jon,
Does that mean we can simply use this instead?
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes - 1) {
Thanks,
Michael
________________________________
From: Kim, Jonathan <Jonathan.Kim@amd.com<mailto:Jonathan.Kim@amd.com>>
Sent: 05 November 2019 11:32 AM
To: Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>>
Cc: Strawbridge, Michael <Michael.Strawbridge@amd.com<mailto:Michael.Strawbridge@amd.com>>
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Please see inline.
Jon
-----Original Message-----
From: Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>
Sent: Tuesday, November 5, 2019 5:24 AM
To: amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Cc: Kim, Jonathan <Jonathan.Kim@amd.com<mailto:Jonathan.Kim@amd.com>>; Strawbridge, Michael <Michael.Strawbridge@amd.com<mailto:Michael.Strawbridge@amd.com>>; Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>
Subject: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
P-state switch should be performed after all devices from the hive get initialized.
Change-Id: Ifc7cac9ef0cf250447d2a412da35d601e2ac79ec
Signed-off-by: Evan Quan <evan.quan@amd.com<mailto:evan.quan@amd.com>>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 47 ++++++++++++++++------
1 file changed, 35 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e6ce949670e5..2d72d206cead 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2090,6 +2090,7 @@ static int amdgpu_device_enable_mgpu_fan_boost(void)
*/
static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) {
+ struct amdgpu_gpu_instance *gpu_instance;
int i = 0, r;
for (i = 0; i < adev->num_ip_blocks; i++) { @@ -2115,6 +2116,40 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ mutex_lock(&mgpu_info.mutex);
+
+ /*
+ * Reset device p-state to low as this was booted with high.
+ *
+ * This should be performed only after all devices from the same
+ * hive get initialized.
+ *
+ * However, it's unknown how many device in the hive in advance.
+ * As this is counted one by one during devices initializations.
+ *
+ * So, we wait for all XGMI interlinked devices initialized.
+ * This may bring some delays as those devices may come from
+ * different hives. But that should be OK.
+ */
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
[JK] This condition will never succeed. mgpu_info.num_dgpu is 0-indexed while num_physical_nodes give total nodes.
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
+ r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
+ if (r) {
+ DRM_ERROR("pstate setting failed (%d).\n", r);
+ break;
+ }
+ }
+ }
+
+ mutex_unlock(&mgpu_info.mutex);
+ }
+
return 0;
}
@@ -2227,18 +2262,6 @@ static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
-
- /*
- * set to low pstate by default
- * This should be performed after all devices from
- * XGMI finish their initializations. Thus it's moved
- * to here.
- * The time delay is 2S. TODO: confirm whether that
- * is enough for all possible XGMI setups.
- */
- r = amdgpu_xgmi_set_pstate(adev, 0);
- if (r)
- DRM_ERROR("pstate setting failed (%d).\n", r);
}
static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
--
2.23.0
[-- Attachment #1.2: Type: text/html, Size: 16185 bytes --]
[-- Attachment #2: Type: text/plain, Size: 153 bytes --]
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 20+ messages in thread
* RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
@ 2019-11-06 2:17 ` Kim, Jonathan
0 siblings, 0 replies; 20+ messages in thread
From: Kim, Jonathan @ 2019-11-06 2:17 UTC (permalink / raw)
To: Strawbridge, Michael, Quan, Evan,
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
[-- Attachment #1.1: Type: text/plain, Size: 7773 bytes --]
Taking a look at this again, it's not an indexing issue, it's a placement problem. I don't think this solution will work if we expect to pstate switch all gpus.
>From amdgpu_kms.c, amdgpu_register_gpu_instance comes after amdgpu_device_init. This means we'll never reach mgpu_info.num_gpu == adev->gmc.xgmi.num_physical_nodes until in this code path.
From: Kim, Jonathan
Sent: Tuesday, November 5, 2019 12:07 PM
To: Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org>; Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
Also while num_physical_nodes should be used here instead. It doesn't make sense to have a pre-condition check i.e. (if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes - 1) ) against the total number of nodes while the loop checks for the current num gpus from mgpu_info. This will fail to set the last node to low pstate for the same 0-indexing issue once the pre-condition passes.
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
From: Kim, Jonathan
Sent: Tuesday, November 5, 2019 11:42 AM
To: Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org<mailto:Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org>>; Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org<mailto:Evan.Quan-5C7GfCeVMHo@public.gmane.org>>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Yes that's correct. This should fix the issue.
Jon
From: Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org<mailto:Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org>>
Sent: Tuesday, November 5, 2019 11:40 AM
To: Kim, Jonathan <Jonathan.Kim-5C7GfCeVMHo@public.gmane.org<mailto:Jonathan.Kim-5C7GfCeVMHo@public.gmane.org>>; Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org<mailto:Evan.Quan-5C7GfCeVMHo@public.gmane.org>>; amd-gfx-PD4FTy7X32mqWrfYKbYh0A@public.gmane.orgktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Subject: Re: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Hi Jon,
Does that mean we can simply use this instead?
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes - 1) {
Thanks,
Michael
________________________________
From: Kim, Jonathan <Jonathan.Kim-5C7GfCeVMHo@public.gmane.org<mailto:Jonathan.Kim-5C7GfCeVMHo@public.gmane.org>>
Sent: 05 November 2019 11:32 AM
To: Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org<mailto:Evan.Quan-5C7GfCeVMHo@public.gmane.org>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> <amd-gfx-PD4FTy7X32mMSPqsTGOZug@public.gmane.orgesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>>
Cc: Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org<mailto:Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org>>
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Please see inline.
Jon
-----Original Message-----
From: Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org<mailto:Evan.Quan-5C7GfCeVMHo@public.gmane.org>>
Sent: Tuesday, November 5, 2019 5:24 AM
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Cc: Kim, Jonathan <Jonathan.Kim-5C7GfCeVMHo@public.gmane.org<mailto:Jonathan.Kim-5C7GfCeVMHo@public.gmane.org>>; Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org<mailto:Michael.Strawbridge@amd.com>>; Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org<mailto:Evan.Quan-5C7GfCeVMHo@public.gmane.org>>
Subject: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
P-state switch should be performed after all devices from the hive get initialized.
Change-Id: Ifc7cac9ef0cf250447d2a412da35d601e2ac79ec
Signed-off-by: Evan Quan <evan.quan-5C7GfCeVMHo@public.gmane.org<mailto:evan.quan-5C7GfCeVMHo@public.gmane.org>>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 47 ++++++++++++++++------
1 file changed, 35 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e6ce949670e5..2d72d206cead 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2090,6 +2090,7 @@ static int amdgpu_device_enable_mgpu_fan_boost(void)
*/
static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) {
+ struct amdgpu_gpu_instance *gpu_instance;
int i = 0, r;
for (i = 0; i < adev->num_ip_blocks; i++) { @@ -2115,6 +2116,40 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ mutex_lock(&mgpu_info.mutex);
+
+ /*
+ * Reset device p-state to low as this was booted with high.
+ *
+ * This should be performed only after all devices from the same
+ * hive get initialized.
+ *
+ * However, it's unknown how many device in the hive in advance.
+ * As this is counted one by one during devices initializations.
+ *
+ * So, we wait for all XGMI interlinked devices initialized.
+ * This may bring some delays as those devices may come from
+ * different hives. But that should be OK.
+ */
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
[JK] This condition will never succeed. mgpu_info.num_dgpu is 0-indexed while num_physical_nodes give total nodes.
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
+ r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
+ if (r) {
+ DRM_ERROR("pstate setting failed (%d).\n", r);
+ break;
+ }
+ }
+ }
+
+ mutex_unlock(&mgpu_info.mutex);
+ }
+
return 0;
}
@@ -2227,18 +2262,6 @@ static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
-
- /*
- * set to low pstate by default
- * This should be performed after all devices from
- * XGMI finish their initializations. Thus it's moved
- * to here.
- * The time delay is 2S. TODO: confirm whether that
- * is enough for all possible XGMI setups.
- */
- r = amdgpu_xgmi_set_pstate(adev, 0);
- if (r)
- DRM_ERROR("pstate setting failed (%d).\n", r);
}
static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
--
2.23.0
[-- Attachment #1.2: Type: text/html, Size: 18338 bytes --]
[-- Attachment #2: Type: text/plain, Size: 153 bytes --]
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 20+ messages in thread
* RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
@ 2019-11-06 2:17 ` Kim, Jonathan
0 siblings, 0 replies; 20+ messages in thread
From: Kim, Jonathan @ 2019-11-06 2:17 UTC (permalink / raw)
To: Strawbridge, Michael, Quan, Evan, amd-gfx
[-- Attachment #1.1: Type: text/plain, Size: 6927 bytes --]
Taking a look at this again, it's not an indexing issue, it's a placement problem. I don't think this solution will work if we expect to pstate switch all gpus.
From amdgpu_kms.c, amdgpu_register_gpu_instance comes after amdgpu_device_init. This means we'll never reach mgpu_info.num_gpu == adev->gmc.xgmi.num_physical_nodes until in this code path.
From: Kim, Jonathan
Sent: Tuesday, November 5, 2019 12:07 PM
To: Strawbridge, Michael <Michael.Strawbridge@amd.com>; Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
Also while num_physical_nodes should be used here instead. It doesn't make sense to have a pre-condition check i.e. (if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes - 1) ) against the total number of nodes while the loop checks for the current num gpus from mgpu_info. This will fail to set the last node to low pstate for the same 0-indexing issue once the pre-condition passes.
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
From: Kim, Jonathan
Sent: Tuesday, November 5, 2019 11:42 AM
To: Strawbridge, Michael <Michael.Strawbridge@amd.com<mailto:Michael.Strawbridge@amd.com>>; Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Yes that's correct. This should fix the issue.
Jon
From: Strawbridge, Michael <Michael.Strawbridge@amd.com<mailto:Michael.Strawbridge@amd.com>>
Sent: Tuesday, November 5, 2019 11:40 AM
To: Kim, Jonathan <Jonathan.Kim@amd.com<mailto:Jonathan.Kim@amd.com>>; Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Hi Jon,
Does that mean we can simply use this instead?
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes - 1) {
Thanks,
Michael
________________________________
From: Kim, Jonathan <Jonathan.Kim@amd.com<mailto:Jonathan.Kim@amd.com>>
Sent: 05 November 2019 11:32 AM
To: Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>>
Cc: Strawbridge, Michael <Michael.Strawbridge@amd.com<mailto:Michael.Strawbridge@amd.com>>
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Please see inline.
Jon
-----Original Message-----
From: Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>
Sent: Tuesday, November 5, 2019 5:24 AM
To: amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Cc: Kim, Jonathan <Jonathan.Kim@amd.com<mailto:Jonathan.Kim@amd.com>>; Strawbridge, Michael <Michael.Strawbridge@amd.com<mailto:Michael.Strawbridge@amd.com>>; Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>
Subject: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
P-state switch should be performed after all devices from the hive get initialized.
Change-Id: Ifc7cac9ef0cf250447d2a412da35d601e2ac79ec
Signed-off-by: Evan Quan <evan.quan@amd.com<mailto:evan.quan@amd.com>>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 47 ++++++++++++++++------
1 file changed, 35 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e6ce949670e5..2d72d206cead 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2090,6 +2090,7 @@ static int amdgpu_device_enable_mgpu_fan_boost(void)
*/
static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) {
+ struct amdgpu_gpu_instance *gpu_instance;
int i = 0, r;
for (i = 0; i < adev->num_ip_blocks; i++) { @@ -2115,6 +2116,40 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ mutex_lock(&mgpu_info.mutex);
+
+ /*
+ * Reset device p-state to low as this was booted with high.
+ *
+ * This should be performed only after all devices from the same
+ * hive get initialized.
+ *
+ * However, it's unknown how many device in the hive in advance.
+ * As this is counted one by one during devices initializations.
+ *
+ * So, we wait for all XGMI interlinked devices initialized.
+ * This may bring some delays as those devices may come from
+ * different hives. But that should be OK.
+ */
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
[JK] This condition will never succeed. mgpu_info.num_dgpu is 0-indexed while num_physical_nodes give total nodes.
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
+ r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
+ if (r) {
+ DRM_ERROR("pstate setting failed (%d).\n", r);
+ break;
+ }
+ }
+ }
+
+ mutex_unlock(&mgpu_info.mutex);
+ }
+
return 0;
}
@@ -2227,18 +2262,6 @@ static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
-
- /*
- * set to low pstate by default
- * This should be performed after all devices from
- * XGMI finish their initializations. Thus it's moved
- * to here.
- * The time delay is 2S. TODO: confirm whether that
- * is enough for all possible XGMI setups.
- */
- r = amdgpu_xgmi_set_pstate(adev, 0);
- if (r)
- DRM_ERROR("pstate setting failed (%d).\n", r);
}
static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
--
2.23.0
[-- Attachment #1.2: Type: text/html, Size: 17502 bytes --]
[-- Attachment #2: Type: text/plain, Size: 153 bytes --]
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 20+ messages in thread
* RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
@ 2019-11-06 3:10 ` Quan, Evan
0 siblings, 0 replies; 20+ messages in thread
From: Quan, Evan @ 2019-11-06 3:10 UTC (permalink / raw)
To: Kim, Jonathan, Strawbridge, Michael,
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
[-- Attachment #1.1: Type: text/plain, Size: 8573 bytes --]
Thanks Jon.
Per discussed in another mail thread, this should be applied after the patch below
https://lists.freedesktop.org/archives/amd-gfx/2019-November/042160.html
Sorry for missing this important information.
Regards,
Evan
From: Kim, Jonathan <Jonathan.Kim-5C7GfCeVMHo@public.gmane.org>
Sent: Wednesday, November 6, 2019 10:18 AM
To: Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org>; Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Taking a look at this again, it's not an indexing issue, it's a placement problem. I don't think this solution will work if we expect to pstate switch all gpus.
>From amdgpu_kms.c, amdgpu_register_gpu_instance comes after amdgpu_device_init. This means we'll never reach mgpu_info.num_gpu == adev->gmc.xgmi.num_physical_nodes until in this code path.
From: Kim, Jonathan
Sent: Tuesday, November 5, 2019 12:07 PM
To: Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org<mailto:Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org>>; Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org<mailto:Evan.Quan-5C7GfCeVMHo@public.gmane.org>>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
Also while num_physical_nodes should be used here instead. It doesn't make sense to have a pre-condition check i.e. (if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes - 1) ) against the total number of nodes while the loop checks for the current num gpus from mgpu_info. This will fail to set the last node to low pstate for the same 0-indexing issue once the pre-condition passes.
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
From: Kim, Jonathan
Sent: Tuesday, November 5, 2019 11:42 AM
To: Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org<mailto:Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org>>; Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org<mailto:Evan.Quan-5C7GfCeVMHo@public.gmane.org>>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Yes that's correct. This should fix the issue.
Jon
From: Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org<mailto:Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org>>
Sent: Tuesday, November 5, 2019 11:40 AM
To: Kim, Jonathan <Jonathan.Kim-5C7GfCeVMHo@public.gmane.org<mailto:Jonathan.Kim-5C7GfCeVMHo@public.gmane.org>>; Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org<mailto:Evan.Quan-5C7GfCeVMHo@public.gmane.org>>; amd-gfx-PD4FTy7X32mqWrfYKbYh0A@public.gmane.orgktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Subject: Re: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Hi Jon,
Does that mean we can simply use this instead?
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes - 1) {
Thanks,
Michael
________________________________
From: Kim, Jonathan <Jonathan.Kim-5C7GfCeVMHo@public.gmane.org<mailto:Jonathan.Kim-5C7GfCeVMHo@public.gmane.org>>
Sent: 05 November 2019 11:32 AM
To: Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org<mailto:Evan.Quan-5C7GfCeVMHo@public.gmane.org>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> <amd-gfx-PD4FTy7X32mMSPqsTGOZug@public.gmane.orgesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>>
Cc: Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org<mailto:Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org>>
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Please see inline.
Jon
-----Original Message-----
From: Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org<mailto:Evan.Quan-5C7GfCeVMHo@public.gmane.org>>
Sent: Tuesday, November 5, 2019 5:24 AM
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Cc: Kim, Jonathan <Jonathan.Kim-5C7GfCeVMHo@public.gmane.org<mailto:Jonathan.Kim-5C7GfCeVMHo@public.gmane.org>>; Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org<mailto:Michael.Strawbridge@amd.com>>; Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org<mailto:Evan.Quan-5C7GfCeVMHo@public.gmane.org>>
Subject: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
P-state switch should be performed after all devices from the hive get initialized.
Change-Id: Ifc7cac9ef0cf250447d2a412da35d601e2ac79ec
Signed-off-by: Evan Quan <evan.quan-5C7GfCeVMHo@public.gmane.org<mailto:evan.quan-5C7GfCeVMHo@public.gmane.org>>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 47 ++++++++++++++++------
1 file changed, 35 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e6ce949670e5..2d72d206cead 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2090,6 +2090,7 @@ static int amdgpu_device_enable_mgpu_fan_boost(void)
*/
static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) {
+ struct amdgpu_gpu_instance *gpu_instance;
int i = 0, r;
for (i = 0; i < adev->num_ip_blocks; i++) { @@ -2115,6 +2116,40 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ mutex_lock(&mgpu_info.mutex);
+
+ /*
+ * Reset device p-state to low as this was booted with high.
+ *
+ * This should be performed only after all devices from the same
+ * hive get initialized.
+ *
+ * However, it's unknown how many device in the hive in advance.
+ * As this is counted one by one during devices initializations.
+ *
+ * So, we wait for all XGMI interlinked devices initialized.
+ * This may bring some delays as those devices may come from
+ * different hives. But that should be OK.
+ */
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
[JK] This condition will never succeed. mgpu_info.num_dgpu is 0-indexed while num_physical_nodes give total nodes.
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
+ r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
+ if (r) {
+ DRM_ERROR("pstate setting failed (%d).\n", r);
+ break;
+ }
+ }
+ }
+
+ mutex_unlock(&mgpu_info.mutex);
+ }
+
return 0;
}
@@ -2227,18 +2262,6 @@ static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
-
- /*
- * set to low pstate by default
- * This should be performed after all devices from
- * XGMI finish their initializations. Thus it's moved
- * to here.
- * The time delay is 2S. TODO: confirm whether that
- * is enough for all possible XGMI setups.
- */
- r = amdgpu_xgmi_set_pstate(adev, 0);
- if (r)
- DRM_ERROR("pstate setting failed (%d).\n", r);
}
static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
--
2.23.0
[-- Attachment #1.2: Type: text/html, Size: 20083 bytes --]
[-- Attachment #2: Type: text/plain, Size: 153 bytes --]
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 20+ messages in thread
* RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
@ 2019-11-06 3:10 ` Quan, Evan
0 siblings, 0 replies; 20+ messages in thread
From: Quan, Evan @ 2019-11-06 3:10 UTC (permalink / raw)
To: Kim, Jonathan, Strawbridge, Michael, amd-gfx
[-- Attachment #1.1: Type: text/plain, Size: 7566 bytes --]
Thanks Jon.
Per discussed in another mail thread, this should be applied after the patch below
https://lists.freedesktop.org/archives/amd-gfx/2019-November/042160.html
Sorry for missing this important information.
Regards,
Evan
From: Kim, Jonathan <Jonathan.Kim@amd.com>
Sent: Wednesday, November 6, 2019 10:18 AM
To: Strawbridge, Michael <Michael.Strawbridge@amd.com>; Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Taking a look at this again, it's not an indexing issue, it's a placement problem. I don't think this solution will work if we expect to pstate switch all gpus.
From amdgpu_kms.c, amdgpu_register_gpu_instance comes after amdgpu_device_init. This means we'll never reach mgpu_info.num_gpu == adev->gmc.xgmi.num_physical_nodes until in this code path.
From: Kim, Jonathan
Sent: Tuesday, November 5, 2019 12:07 PM
To: Strawbridge, Michael <Michael.Strawbridge@amd.com<mailto:Michael.Strawbridge@amd.com>>; Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
Also while num_physical_nodes should be used here instead. It doesn't make sense to have a pre-condition check i.e. (if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes - 1) ) against the total number of nodes while the loop checks for the current num gpus from mgpu_info. This will fail to set the last node to low pstate for the same 0-indexing issue once the pre-condition passes.
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
From: Kim, Jonathan
Sent: Tuesday, November 5, 2019 11:42 AM
To: Strawbridge, Michael <Michael.Strawbridge@amd.com<mailto:Michael.Strawbridge@amd.com>>; Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Yes that's correct. This should fix the issue.
Jon
From: Strawbridge, Michael <Michael.Strawbridge@amd.com<mailto:Michael.Strawbridge@amd.com>>
Sent: Tuesday, November 5, 2019 11:40 AM
To: Kim, Jonathan <Jonathan.Kim@amd.com<mailto:Jonathan.Kim@amd.com>>; Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Hi Jon,
Does that mean we can simply use this instead?
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes - 1) {
Thanks,
Michael
________________________________
From: Kim, Jonathan <Jonathan.Kim@amd.com<mailto:Jonathan.Kim@amd.com>>
Sent: 05 November 2019 11:32 AM
To: Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>>
Cc: Strawbridge, Michael <Michael.Strawbridge@amd.com<mailto:Michael.Strawbridge@amd.com>>
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Please see inline.
Jon
-----Original Message-----
From: Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>
Sent: Tuesday, November 5, 2019 5:24 AM
To: amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Cc: Kim, Jonathan <Jonathan.Kim@amd.com<mailto:Jonathan.Kim@amd.com>>; Strawbridge, Michael <Michael.Strawbridge@amd.com<mailto:Michael.Strawbridge@amd.com>>; Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>
Subject: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
P-state switch should be performed after all devices from the hive get initialized.
Change-Id: Ifc7cac9ef0cf250447d2a412da35d601e2ac79ec
Signed-off-by: Evan Quan <evan.quan@amd.com<mailto:evan.quan@amd.com>>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 47 ++++++++++++++++------
1 file changed, 35 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e6ce949670e5..2d72d206cead 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2090,6 +2090,7 @@ static int amdgpu_device_enable_mgpu_fan_boost(void)
*/
static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) {
+ struct amdgpu_gpu_instance *gpu_instance;
int i = 0, r;
for (i = 0; i < adev->num_ip_blocks; i++) { @@ -2115,6 +2116,40 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ mutex_lock(&mgpu_info.mutex);
+
+ /*
+ * Reset device p-state to low as this was booted with high.
+ *
+ * This should be performed only after all devices from the same
+ * hive get initialized.
+ *
+ * However, it's unknown how many device in the hive in advance.
+ * As this is counted one by one during devices initializations.
+ *
+ * So, we wait for all XGMI interlinked devices initialized.
+ * This may bring some delays as those devices may come from
+ * different hives. But that should be OK.
+ */
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
[JK] This condition will never succeed. mgpu_info.num_dgpu is 0-indexed while num_physical_nodes give total nodes.
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
+ r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
+ if (r) {
+ DRM_ERROR("pstate setting failed (%d).\n", r);
+ break;
+ }
+ }
+ }
+
+ mutex_unlock(&mgpu_info.mutex);
+ }
+
return 0;
}
@@ -2227,18 +2262,6 @@ static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
-
- /*
- * set to low pstate by default
- * This should be performed after all devices from
- * XGMI finish their initializations. Thus it's moved
- * to here.
- * The time delay is 2S. TODO: confirm whether that
- * is enough for all possible XGMI setups.
- */
- r = amdgpu_xgmi_set_pstate(adev, 0);
- if (r)
- DRM_ERROR("pstate setting failed (%d).\n", r);
}
static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
--
2.23.0
[-- Attachment #1.2: Type: text/html, Size: 19104 bytes --]
[-- Attachment #2: Type: text/plain, Size: 153 bytes --]
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 20+ messages in thread
* RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
@ 2019-11-06 3:49 ` Kim, Jonathan
0 siblings, 0 replies; 20+ messages in thread
From: Kim, Jonathan @ 2019-11-06 3:49 UTC (permalink / raw)
To: Quan, Evan, Strawbridge, Michael,
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
[-- Attachment #1.1: Type: text/plain, Size: 9375 bytes --]
No problem. With the incoming patch referenced below, this patch should be ok since it doesn't break the build.
Reviewed-by: Jonathan Kim <Jonathan.Kim-5C7GfCeVMHo@public.gmane.org>
From: Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org>
Sent: Tuesday, November 5, 2019 10:11 PM
To: Kim, Jonathan <Jonathan.Kim-5C7GfCeVMHo@public.gmane.org>; Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Thanks Jon.
Per discussed in another mail thread, this should be applied after the patch below
https://lists.freedesktop.org/archives/amd-gfx/2019-November/042160.html
Sorry for missing this important information.
Regards,
Evan
From: Kim, Jonathan <Jonathan.Kim-5C7GfCeVMHo@public.gmane.org<mailto:Jonathan.Kim-5C7GfCeVMHo@public.gmane.org>>
Sent: Wednesday, November 6, 2019 10:18 AM
To: Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org<mailto:Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org>>; Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org<mailto:Evan.Quan-5C7GfCeVMHo@public.gmane.org>>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Taking a look at this again, it's not an indexing issue, it's a placement problem. I don't think this solution will work if we expect to pstate switch all gpus.
>From amdgpu_kms.c, amdgpu_register_gpu_instance comes after amdgpu_device_init. This means we'll never reach mgpu_info.num_gpu == adev->gmc.xgmi.num_physical_nodes until in this code path.
From: Kim, Jonathan
Sent: Tuesday, November 5, 2019 12:07 PM
To: Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org<mailto:Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org>>; Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org<mailto:Evan.Quan-5C7GfCeVMHo@public.gmane.org>>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
Also while num_physical_nodes should be used here instead. It doesn't make sense to have a pre-condition check i.e. (if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes - 1) ) against the total number of nodes while the loop checks for the current num gpus from mgpu_info. This will fail to set the last node to low pstate for the same 0-indexing issue once the pre-condition passes.
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
From: Kim, Jonathan
Sent: Tuesday, November 5, 2019 11:42 AM
To: Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org<mailto:Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org>>; Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org<mailto:Evan.Quan-5C7GfCeVMHo@public.gmane.org>>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Yes that's correct. This should fix the issue.
Jon
From: Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org<mailto:Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org>>
Sent: Tuesday, November 5, 2019 11:40 AM
To: Kim, Jonathan <Jonathan.Kim-5C7GfCeVMHo@public.gmane.org<mailto:Jonathan.Kim-5C7GfCeVMHo@public.gmane.org>>; Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org<mailto:Evan.Quan-5C7GfCeVMHo@public.gmane.org>>; amd-gfx-PD4FTy7X32mqWrfYKbYh0A@public.gmane.orgktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Subject: Re: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Hi Jon,
Does that mean we can simply use this instead?
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes - 1) {
Thanks,
Michael
________________________________
From: Kim, Jonathan <Jonathan.Kim-5C7GfCeVMHo@public.gmane.org<mailto:Jonathan.Kim-5C7GfCeVMHo@public.gmane.org>>
Sent: 05 November 2019 11:32 AM
To: Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org<mailto:Evan.Quan-5C7GfCeVMHo@public.gmane.org>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> <amd-gfx-PD4FTy7X32mMSPqsTGOZug@public.gmane.orgesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>>
Cc: Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org<mailto:Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org>>
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Please see inline.
Jon
-----Original Message-----
From: Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org<mailto:Evan.Quan-5C7GfCeVMHo@public.gmane.org>>
Sent: Tuesday, November 5, 2019 5:24 AM
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Cc: Kim, Jonathan <Jonathan.Kim-5C7GfCeVMHo@public.gmane.org<mailto:Jonathan.Kim-5C7GfCeVMHo@public.gmane.org>>; Strawbridge, Michael <Michael.Strawbridge-5C7GfCeVMHo@public.gmane.org<mailto:Michael.Strawbridge@amd.com>>; Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org<mailto:Evan.Quan-5C7GfCeVMHo@public.gmane.org>>
Subject: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
P-state switch should be performed after all devices from the hive get initialized.
Change-Id: Ifc7cac9ef0cf250447d2a412da35d601e2ac79ec
Signed-off-by: Evan Quan <evan.quan-5C7GfCeVMHo@public.gmane.org<mailto:evan.quan-5C7GfCeVMHo@public.gmane.org>>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 47 ++++++++++++++++------
1 file changed, 35 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e6ce949670e5..2d72d206cead 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2090,6 +2090,7 @@ static int amdgpu_device_enable_mgpu_fan_boost(void)
*/
static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) {
+ struct amdgpu_gpu_instance *gpu_instance;
int i = 0, r;
for (i = 0; i < adev->num_ip_blocks; i++) { @@ -2115,6 +2116,40 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ mutex_lock(&mgpu_info.mutex);
+
+ /*
+ * Reset device p-state to low as this was booted with high.
+ *
+ * This should be performed only after all devices from the same
+ * hive get initialized.
+ *
+ * However, it's unknown how many device in the hive in advance.
+ * As this is counted one by one during devices initializations.
+ *
+ * So, we wait for all XGMI interlinked devices initialized.
+ * This may bring some delays as those devices may come from
+ * different hives. But that should be OK.
+ */
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
[JK] This condition will never succeed. mgpu_info.num_dgpu is 0-indexed while num_physical_nodes give total nodes.
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
+ r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
+ if (r) {
+ DRM_ERROR("pstate setting failed (%d).\n", r);
+ break;
+ }
+ }
+ }
+
+ mutex_unlock(&mgpu_info.mutex);
+ }
+
return 0;
}
@@ -2227,18 +2262,6 @@ static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
-
- /*
- * set to low pstate by default
- * This should be performed after all devices from
- * XGMI finish their initializations. Thus it's moved
- * to here.
- * The time delay is 2S. TODO: confirm whether that
- * is enough for all possible XGMI setups.
- */
- r = amdgpu_xgmi_set_pstate(adev, 0);
- if (r)
- DRM_ERROR("pstate setting failed (%d).\n", r);
}
static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
--
2.23.0
[-- Attachment #1.2: Type: text/html, Size: 21392 bytes --]
[-- Attachment #2: Type: text/plain, Size: 153 bytes --]
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 20+ messages in thread
* RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
@ 2019-11-06 3:49 ` Kim, Jonathan
0 siblings, 0 replies; 20+ messages in thread
From: Kim, Jonathan @ 2019-11-06 3:49 UTC (permalink / raw)
To: Quan, Evan, Strawbridge, Michael, amd-gfx
[-- Attachment #1.1: Type: text/plain, Size: 8165 bytes --]
No problem. With the incoming patch referenced below, this patch should be ok since it doesn't break the build.
Reviewed-by: Jonathan Kim <Jonathan.Kim@amd.com>
From: Quan, Evan <Evan.Quan@amd.com>
Sent: Tuesday, November 5, 2019 10:11 PM
To: Kim, Jonathan <Jonathan.Kim@amd.com>; Strawbridge, Michael <Michael.Strawbridge@amd.com>; amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Thanks Jon.
Per discussed in another mail thread, this should be applied after the patch below
https://lists.freedesktop.org/archives/amd-gfx/2019-November/042160.html
Sorry for missing this important information.
Regards,
Evan
From: Kim, Jonathan <Jonathan.Kim@amd.com<mailto:Jonathan.Kim@amd.com>>
Sent: Wednesday, November 6, 2019 10:18 AM
To: Strawbridge, Michael <Michael.Strawbridge@amd.com<mailto:Michael.Strawbridge@amd.com>>; Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Taking a look at this again, it's not an indexing issue, it's a placement problem. I don't think this solution will work if we expect to pstate switch all gpus.
From amdgpu_kms.c, amdgpu_register_gpu_instance comes after amdgpu_device_init. This means we'll never reach mgpu_info.num_gpu == adev->gmc.xgmi.num_physical_nodes until in this code path.
From: Kim, Jonathan
Sent: Tuesday, November 5, 2019 12:07 PM
To: Strawbridge, Michael <Michael.Strawbridge@amd.com<mailto:Michael.Strawbridge@amd.com>>; Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
Also while num_physical_nodes should be used here instead. It doesn't make sense to have a pre-condition check i.e. (if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes - 1) ) against the total number of nodes while the loop checks for the current num gpus from mgpu_info. This will fail to set the last node to low pstate for the same 0-indexing issue once the pre-condition passes.
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
From: Kim, Jonathan
Sent: Tuesday, November 5, 2019 11:42 AM
To: Strawbridge, Michael <Michael.Strawbridge@amd.com<mailto:Michael.Strawbridge@amd.com>>; Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Yes that's correct. This should fix the issue.
Jon
From: Strawbridge, Michael <Michael.Strawbridge@amd.com<mailto:Michael.Strawbridge@amd.com>>
Sent: Tuesday, November 5, 2019 11:40 AM
To: Kim, Jonathan <Jonathan.Kim@amd.com<mailto:Jonathan.Kim@amd.com>>; Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Hi Jon,
Does that mean we can simply use this instead?
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes - 1) {
Thanks,
Michael
________________________________
From: Kim, Jonathan <Jonathan.Kim@amd.com<mailto:Jonathan.Kim@amd.com>>
Sent: 05 November 2019 11:32 AM
To: Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>>
Cc: Strawbridge, Michael <Michael.Strawbridge@amd.com<mailto:Michael.Strawbridge@amd.com>>
Subject: RE: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
Please see inline.
Jon
-----Original Message-----
From: Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>
Sent: Tuesday, November 5, 2019 5:24 AM
To: amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Cc: Kim, Jonathan <Jonathan.Kim@amd.com<mailto:Jonathan.Kim@amd.com>>; Strawbridge, Michael <Michael.Strawbridge@amd.com<mailto:Michael.Strawbridge@amd.com>>; Quan, Evan <Evan.Quan@amd.com<mailto:Evan.Quan@amd.com>>
Subject: [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized
P-state switch should be performed after all devices from the hive get initialized.
Change-Id: Ifc7cac9ef0cf250447d2a412da35d601e2ac79ec
Signed-off-by: Evan Quan <evan.quan@amd.com<mailto:evan.quan@amd.com>>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 47 ++++++++++++++++------
1 file changed, 35 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e6ce949670e5..2d72d206cead 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2090,6 +2090,7 @@ static int amdgpu_device_enable_mgpu_fan_boost(void)
*/
static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) {
+ struct amdgpu_gpu_instance *gpu_instance;
int i = 0, r;
for (i = 0; i < adev->num_ip_blocks; i++) { @@ -2115,6 +2116,40 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ mutex_lock(&mgpu_info.mutex);
+
+ /*
+ * Reset device p-state to low as this was booted with high.
+ *
+ * This should be performed only after all devices from the same
+ * hive get initialized.
+ *
+ * However, it's unknown how many device in the hive in advance.
+ * As this is counted one by one during devices initializations.
+ *
+ * So, we wait for all XGMI interlinked devices initialized.
+ * This may bring some delays as those devices may come from
+ * different hives. But that should be OK.
+ */
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
[JK] This condition will never succeed. mgpu_info.num_dgpu is 0-indexed while num_physical_nodes give total nodes.
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
+ r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
+ if (r) {
+ DRM_ERROR("pstate setting failed (%d).\n", r);
+ break;
+ }
+ }
+ }
+
+ mutex_unlock(&mgpu_info.mutex);
+ }
+
return 0;
}
@@ -2227,18 +2262,6 @@ static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
-
- /*
- * set to low pstate by default
- * This should be performed after all devices from
- * XGMI finish their initializations. Thus it's moved
- * to here.
- * The time delay is 2S. TODO: confirm whether that
- * is enough for all possible XGMI setups.
- */
- r = amdgpu_xgmi_set_pstate(adev, 0);
- if (r)
- DRM_ERROR("pstate setting failed (%d).\n", r);
}
static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
--
2.23.0
[-- Attachment #1.2: Type: text/html, Size: 20249 bytes --]
[-- Attachment #2: Type: text/plain, Size: 153 bytes --]
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 20+ messages in thread
end of thread, other threads:[~2019-11-06 3:49 UTC | newest]
Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-05 10:23 [PATCH 1/2] drm/amdgpu: fix possible pstate switch race condition Quan, Evan
2019-11-05 10:23 ` Quan, Evan
[not found] ` <20191105102310.16657-1-evan.quan-5C7GfCeVMHo@public.gmane.org>
2019-11-05 10:23 ` [PATCH 2/2] drm/amdgpu: perform p-state switch after the whole hive initialized Quan, Evan
2019-11-05 10:23 ` Quan, Evan
[not found] ` <20191105102310.16657-2-evan.quan-5C7GfCeVMHo@public.gmane.org>
2019-11-05 16:32 ` Kim, Jonathan
2019-11-05 16:32 ` Kim, Jonathan
[not found] ` <CH2PR12MB3831CEABDD4CBA2FEB8DEFEB857E0-rV3HgkLYx2Wb1l0FfFgDpQdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-11-05 16:39 ` Strawbridge, Michael
2019-11-05 16:39 ` Strawbridge, Michael
[not found] ` <DM5PR12MB248696FDB2D0E563221E7CDFE87E0-2J9CzHegvk8hnZ7s9K7NnwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-11-05 16:41 ` Kim, Jonathan
2019-11-05 16:41 ` Kim, Jonathan
[not found] ` <CH2PR12MB3831D16D352FA009002800CC857E0-rV3HgkLYx2Wb1l0FfFgDpQdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-11-05 17:07 ` Kim, Jonathan
2019-11-05 17:07 ` Kim, Jonathan
[not found] ` <CH2PR12MB3831868F2A7B4A8BCCBA24F7857E0-rV3HgkLYx2Wb1l0FfFgDpQdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-11-06 2:17 ` Kim, Jonathan
2019-11-06 2:17 ` Kim, Jonathan
[not found] ` <CH2PR12MB383150DB7FA13B7E5472A37085790-rV3HgkLYx2Wb1l0FfFgDpQdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-11-06 3:10 ` Quan, Evan
2019-11-06 3:10 ` Quan, Evan
[not found] ` <MN2PR12MB3344BF9A70FE63F2D5E4AE63E4790-rweVpJHSKToDMgCC8P//OwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-11-06 3:49 ` Kim, Jonathan
2019-11-06 3:49 ` Kim, Jonathan
2019-11-05 11:34 ` [PATCH 1/2] drm/amdgpu: fix possible pstate switch race condition Xu, Feifei
2019-11-05 11:34 ` Xu, Feifei
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.