All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: fix the ib test hang when gfx is in "idle" state
@ 2018-04-20  9:40 Huang Rui
       [not found] ` <1524217255-25968-1-git-send-email-ray.huang-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Huang Rui @ 2018-04-20  9:40 UTC (permalink / raw)
  To: Alex Deucher, Christian König,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Huang Rui, Shirish S

"aaabaf4   drm/amdgpu: defer test IBs on the rings at boot (V3)"
Above patch defers the execution of gfx/compute ib tests. However, at that time,
the gfx may already go into idle state. If "idle" gfx receives command
submission, it will get hang in the system. So we must add is_gfx_on checking at
start of ib tests.

Signed-off-by: Huang Rui <ray.huang@amd.com>
Cc: Shirish S <shirish.s@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h               |  2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c             | 19 ++++++++++++++++++-
 drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c | 16 ++--------------
 3 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 59df4b7..a0263b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -905,6 +905,7 @@ struct amdgpu_gfx_funcs {
 	void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst);
 	void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst);
 	void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue);
+	bool (*is_gfx_on)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_ngg_buf {
@@ -1855,6 +1856,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_gds_switch(adev, r, v, d, w, a) (adev)->gds.funcs->patch_gds_switch((r), (v), (d), (w), (a))
 #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
 #define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q))
+#define amdgpu_gfx_is_gfx_on(adev) (adev)->gfx.funcs->is_gfx_on((adev))
 
 /* Common functions */
 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 6c2d278..a71d711 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -342,6 +342,18 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
 	return r;
 }
 
+static bool gfx_v9_0_is_gfx_on(struct amdgpu_device *adev)
+{
+	uint32_t reg;
+
+	reg = RREG32_SOC15(PWR, 0, mmPWR_MISC_CNTL_STATUS);
+	if ((reg & PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK) ==
+	    (0x2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT))
+		return true;
+
+	return false;
+}
+
 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -353,6 +365,10 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	uint32_t tmp;
 	long r;
 
+	/* confirm gfx is not in "idle" state */
+	if (!amdgpu_gfx_is_gfx_on(adev))
+		return 0;
+
 	r = amdgpu_device_wb_get(adev, &index);
 	if (r) {
 		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
@@ -1085,7 +1101,8 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
 	.read_wave_data = &gfx_v9_0_read_wave_data,
 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
 	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
-	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
+	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
+	.is_gfx_on = &gfx_v9_0_is_gfx_on
 };
 
 static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
index 7712eb6..3553fba 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
@@ -254,28 +254,16 @@ static int smu10_power_off_asic(struct pp_hwmgr *hwmgr)
 	return smu10_reset_cc6_data(hwmgr);
 }
 
-static bool smu10_is_gfx_on(struct pp_hwmgr *hwmgr)
-{
-	uint32_t reg;
-	struct amdgpu_device *adev = hwmgr->adev;
-
-	reg = RREG32_SOC15(PWR, 0, mmPWR_MISC_CNTL_STATUS);
-	if ((reg & PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK) ==
-	    (0x2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT))
-		return true;
-
-	return false;
-}
-
 static int smu10_disable_gfx_off(struct pp_hwmgr *hwmgr)
 {
 	struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend);
+	struct amdgpu_device *adev = hwmgr->adev;
 
 	if (smu10_data->gfx_off_controled_by_driver) {
 		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisableGfxOff);
 
 		/* confirm gfx is back to "on" state */
-		while (!smu10_is_gfx_on(hwmgr))
+		while (!amdgpu_gfx_is_gfx_on(adev))
 			msleep(1);
 	}
 
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: fix the ib test hang when gfx is in "idle" state
       [not found] ` <1524217255-25968-1-git-send-email-ray.huang-5C7GfCeVMHo@public.gmane.org>
@ 2018-04-20  9:59   ` Christian König
       [not found]     ` <aaaffd0c-96be-29b2-9bd4-df6fa6ea7e8a-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Christian König @ 2018-04-20  9:59 UTC (permalink / raw)
  To: Huang Rui, Alex Deucher, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Shirish S

Am 20.04.2018 um 11:40 schrieb Huang Rui:
> "aaabaf4   drm/amdgpu: defer test IBs on the rings at boot (V3)"
> Above patch defers the execution of gfx/compute ib tests. However, at that time,
> the gfx may already go into idle state. If "idle" gfx receives command
> submission, it will get hang in the system. So we must add is_gfx_on checking at
> start of ib tests.

Do I see that right that you just skip the IB test when the GFX block is 
already turned of? In this case that would be a clear NAK.

BTW: How do you detect that we need to turn GFX on again?

Regards,
Christian.

>
> Signed-off-by: Huang Rui <ray.huang@amd.com>
> Cc: Shirish S <shirish.s@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h               |  2 ++
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c             | 19 ++++++++++++++++++-
>   drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c | 16 ++--------------
>   3 files changed, 22 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 59df4b7..a0263b9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -905,6 +905,7 @@ struct amdgpu_gfx_funcs {
>   	void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst);
>   	void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst);
>   	void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue);
> +	bool (*is_gfx_on)(struct amdgpu_device *adev);
>   };
>   
>   struct amdgpu_ngg_buf {
> @@ -1855,6 +1856,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
>   #define amdgpu_gds_switch(adev, r, v, d, w, a) (adev)->gds.funcs->patch_gds_switch((r), (v), (d), (w), (a))
>   #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
>   #define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q))
> +#define amdgpu_gfx_is_gfx_on(adev) (adev)->gfx.funcs->is_gfx_on((adev))
>   
>   /* Common functions */
>   int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 6c2d278..a71d711 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -342,6 +342,18 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
>   	return r;
>   }
>   
> +static bool gfx_v9_0_is_gfx_on(struct amdgpu_device *adev)
> +{
> +	uint32_t reg;
> +
> +	reg = RREG32_SOC15(PWR, 0, mmPWR_MISC_CNTL_STATUS);
> +	if ((reg & PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK) ==
> +	    (0x2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT))
> +		return true;
> +
> +	return false;
> +}
> +
>   static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>   {
>   	struct amdgpu_device *adev = ring->adev;
> @@ -353,6 +365,10 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>   	uint32_t tmp;
>   	long r;
>   
> +	/* confirm gfx is not in "idle" state */
> +	if (!amdgpu_gfx_is_gfx_on(adev))
> +		return 0;
> +
>   	r = amdgpu_device_wb_get(adev, &index);
>   	if (r) {
>   		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
> @@ -1085,7 +1101,8 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
>   	.read_wave_data = &gfx_v9_0_read_wave_data,
>   	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
>   	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
> -	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
> +	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
> +	.is_gfx_on = &gfx_v9_0_is_gfx_on
>   };
>   
>   static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
> index 7712eb6..3553fba 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
> @@ -254,28 +254,16 @@ static int smu10_power_off_asic(struct pp_hwmgr *hwmgr)
>   	return smu10_reset_cc6_data(hwmgr);
>   }
>   
> -static bool smu10_is_gfx_on(struct pp_hwmgr *hwmgr)
> -{
> -	uint32_t reg;
> -	struct amdgpu_device *adev = hwmgr->adev;
> -
> -	reg = RREG32_SOC15(PWR, 0, mmPWR_MISC_CNTL_STATUS);
> -	if ((reg & PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK) ==
> -	    (0x2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT))
> -		return true;
> -
> -	return false;
> -}
> -
>   static int smu10_disable_gfx_off(struct pp_hwmgr *hwmgr)
>   {
>   	struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend);
> +	struct amdgpu_device *adev = hwmgr->adev;
>   
>   	if (smu10_data->gfx_off_controled_by_driver) {
>   		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisableGfxOff);
>   
>   		/* confirm gfx is back to "on" state */
> -		while (!smu10_is_gfx_on(hwmgr))
> +		while (!amdgpu_gfx_is_gfx_on(adev))
>   			msleep(1);
>   	}
>   

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: fix the ib test hang when gfx is in "idle" state
       [not found]     ` <aaaffd0c-96be-29b2-9bd4-df6fa6ea7e8a-5C7GfCeVMHo@public.gmane.org>
@ 2018-04-23  9:52       ` Huang Rui
  2018-04-23  9:57         ` Koenig, Christian
  2018-04-23 10:02         ` Huang Rui
  0 siblings, 2 replies; 9+ messages in thread
From: Huang Rui @ 2018-04-23  9:52 UTC (permalink / raw)
  To: Koenig, Christian
  Cc: Deucher, Alexander, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, S, Shirish

On Fri, Apr 20, 2018 at 05:59:16PM +0800, Koenig, Christian wrote:
> Am 20.04.2018 um 11:40 schrieb Huang Rui:
> > "aaabaf4   drm/amdgpu: defer test IBs on the rings at boot (V3)"
> > Above patch defers the execution of gfx/compute ib tests. However, at that time,
> > the gfx may already go into idle state. If "idle" gfx receives command
> > submission, it will get hang in the system. So we must add is_gfx_on checking at
> > start of ib tests.
> 
> Do I see that right that you just skip the IB test when the GFX block is 
> already turned of? In this case that would be a clear NAK.
> 
> BTW: How do you detect that we need to turn GFX on again?

Christian, I know point. But there is a hang issue if we would like try to
disable/enable gfxoff with SMC message at runtime. Actually, I am trying to
find a good sequence to fix it. After that, I can even expose an debugfs
interface to configure that. So I have to skip the test for the moment when
gfx is in "idle".

Thanks,
Ray
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: fix the ib test hang when gfx is in "idle" state
  2018-04-23  9:52       ` Huang Rui
@ 2018-04-23  9:57         ` Koenig, Christian
       [not found]           ` <afda993b-2d50-4181-9378-f3440b5d633c-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
  2018-04-23 10:02         ` Huang Rui
  1 sibling, 1 reply; 9+ messages in thread
From: Koenig, Christian @ 2018-04-23  9:57 UTC (permalink / raw)
  To: Huang, Ray
  Cc: Deucher, Alexander, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, S, Shirish


[-- Attachment #1.1: Type: text/plain, Size: 2445 bytes --]

Hi Ray,

Am 23.04.2018 11:47 schrieb Huang Rui <ray.huang-5C7GfCeVMHo@public.gmane.org>:
On Fri, Apr 20, 2018 at 05:59:16PM +0800, Koenig, Christian wrote:
> Am 20.04.2018 um 11:40 schrieb Huang Rui:
> > "aaabaf4   drm/amdgpu: defer test IBs on the rings at boot (V3)"
> > Above patch defers the execution of gfx/compute ib tests. However, at that time,
> > the gfx may already go into idle state. If "idle" gfx receives command
> > submission, it will get hang in the system. So we must add is_gfx_on checking at
> > start of ib tests.
>
> Do I see that right that you just skip the IB test when the GFX block is
> already turned of? In this case that would be a clear NAK.
>
> BTW: How do you detect that we need to turn GFX on again?

Christian, I know point. But there is a hang issue if we would like try to
disable/enable gfxoff with SMC message at runtime. Actually, I am trying to
find a good sequence to fix it. After that, I can even expose an debugfs
interface to configure that. So I have to skip the test for the moment when
gfx is in "idle".

Working around that issue for the moment is ok, but please note that explicitly in both the commit message and a code comment.

But don't you run into the same problem when the UMD starts to submit commands?

I mean the idea of the IB test is that you "simulate" an userspace command submission and see if it works.

Regards,
Christian.


Thanks,
Ray

On Fri, Apr 20, 2018 at 05:59:16PM +0800, Koenig, Christian wrote:
> Am 20.04.2018 um 11:40 schrieb Huang Rui:
> > "aaabaf4   drm/amdgpu: defer test IBs on the rings at boot (V3)"
> > Above patch defers the execution of gfx/compute ib tests. However, at that time,
> > the gfx may already go into idle state. If "idle" gfx receives command
> > submission, it will get hang in the system. So we must add is_gfx_on checking at
> > start of ib tests.
>
> Do I see that right that you just skip the IB test when the GFX block is
> already turned of? In this case that would be a clear NAK.
>
> BTW: How do you detect that we need to turn GFX on again?

Christian, I know point. But there is a hang issue if we would like try to
disable/enable gfxoff with SMC message at runtime. Actually, I am trying to
find a good sequence to fix it. After that, I can even expose an debugfs
interface to configure that. So I have to skip the test for the moment when
gfx is in "idle".

Thanks,
Ray

[-- Attachment #1.2: Type: text/html, Size: 4150 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: fix the ib test hang when gfx is in "idle" state
  2018-04-23  9:52       ` Huang Rui
  2018-04-23  9:57         ` Koenig, Christian
@ 2018-04-23 10:02         ` Huang Rui
  1 sibling, 0 replies; 9+ messages in thread
From: Huang Rui @ 2018-04-23 10:02 UTC (permalink / raw)
  To: Koenig, Christian
  Cc: Deucher, Alexander, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, S, Shirish

On Mon, Apr 23, 2018 at 05:52:28PM +0800, Huang Rui wrote:
> On Fri, Apr 20, 2018 at 05:59:16PM +0800, Koenig, Christian wrote:
> > Am 20.04.2018 um 11:40 schrieb Huang Rui:
> > > "aaabaf4   drm/amdgpu: defer test IBs on the rings at boot (V3)"
> > > Above patch defers the execution of gfx/compute ib tests. However, at that time,
> > > the gfx may already go into idle state. If "idle" gfx receives command
> > > submission, it will get hang in the system. So we must add is_gfx_on checking at
> > > start of ib tests.
> > 
> > Do I see that right that you just skip the IB test when the GFX block is 
> > already turned of? In this case that would be a clear NAK.
> > 
> > BTW: How do you detect that we need to turn GFX on again?
> 
> Christian, I know point. But there is a hang issue if we would like try to
> disable/enable gfxoff with SMC message at runtime. Actually, I am trying to
> find a good sequence to fix it. After that, I can even expose an debugfs
> interface to configure that. So I have to skip the test for the moment when
> gfx is in "idle".
> 

And in normal case, driver won't explicitly turn on/off the gfx. RLC
firmware will handle it.

Thanks,
Ray
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: fix the ib test hang when gfx is in "idle" state
       [not found]           ` <afda993b-2d50-4181-9378-f3440b5d633c-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
@ 2018-04-23 12:13             ` Huang Rui
  2018-04-23 13:07               ` Koenig, Christian
  2018-04-23 14:40               ` Alex Deucher
  0 siblings, 2 replies; 9+ messages in thread
From: Huang Rui @ 2018-04-23 12:13 UTC (permalink / raw)
  To: Koenig, Christian
  Cc: Deucher, Alexander, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, S, Shirish

On Mon, Apr 23, 2018 at 05:57:06PM +0800, Koenig, Christian wrote:
> Hi Ray,
> 
> Am 23.04.2018 11:47 schrieb Huang Rui <ray.huang@amd.com>:
> 
>     On Fri, Apr 20, 2018 at 05:59:16PM +0800, Koenig, Christian wrote:
>     > Am 20.04.2018 um 11:40 schrieb Huang Rui:
>     > > "aaabaf4   drm/amdgpu: defer test IBs on the rings at boot (V3)"
>     > > Above patch defers the execution of gfx/compute ib tests. However, at
>     that time,
>     > > the gfx may already go into idle state. If "idle" gfx receives command
>     > > submission, it will get hang in the system. So we must add is_gfx_on
>     checking at
>     > > start of ib tests.
>     >
>     > Do I see that right that you just skip the IB test when the GFX block is
>     > already turned of? In this case that would be a clear NAK.
>     >
>     > BTW: How do you detect that we need to turn GFX on again?
> 
>     Christian, I know point. But there is a hang issue if we would like try to
>     disable/enable gfxoff with SMC message at runtime. Actually, I am trying to
>     find a good sequence to fix it. After that, I can even expose an debugfs
>     interface to configure that. So I have to skip the test for the moment when
>     gfx is in "idle".
> 
> 
> Working around that issue for the moment is ok, but please note that explicitly
> in both the commit message and a code comment.

OK. Will add it at V2.

> 
> But don't you run into the same problem when the UMD starts to submit commands?

When UMD starts, RLC firmware will detect the "draw" command, then it will
power up gfx. So it won't have problem at that time. The mainly state
machine doesn't expose to driver side yet.

> 
> I mean the idea of the IB test is that you "simulate" an userspace command
> submission and see if it works.
> 

Yes, agree. Any idea to "simulate" the "draw" command? Or please wait for
my fix for enabling/disabling gfxoff at runtime.

Thanks,
Ray
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: fix the ib test hang when gfx is in "idle" state
  2018-04-23 12:13             ` Huang Rui
@ 2018-04-23 13:07               ` Koenig, Christian
  2018-04-23 14:40               ` Alex Deucher
  1 sibling, 0 replies; 9+ messages in thread
From: Koenig, Christian @ 2018-04-23 13:07 UTC (permalink / raw)
  To: Huang, Ray
  Cc: Deucher, Alexander, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, S, Shirish


[-- Attachment #1.1: Type: text/plain, Size: 2045 bytes --]

Hi Ray,

Am 23.04.2018 14:08 schrieb Huang Rui <ray.huang@amd
On Mon, Apr 23, 2018 at 05:57:06PM +0800, Koenig, Christian wrote:
> Hi Ray,
>
> Am 23.04.2018 11:47 schrieb Huang Rui <ray.huang-5C7GfCeVMHo@public.gmane.org>:
>
>     On Fri, Apr 20, 2018 at 05:59:16PM +0800, Koenig, Christian wrote:
>     > Am 20.04.2018 um 11:40 schrieb Huang Rui:
>     > > "aaabaf4   drm/amdgpu: defer test IBs on the rings at boot (V3)"
>     > > Above patch defers the execution of gfx/compute ib tests. However, at
>     that time,
>     > > the gfx may already go into idle state. If "idle" gfx receives command
>     > > submission, it will get hang in the system. So we must add is_gfx_on
>     checking at
>     > > start of ib tests.
>     >
>     > Do I see that right that you just skip the IB test when the GFX block is
>     > already turned of? In this case that would be a clear NAK.
>     >
>     > BTW: How do you detect that we need to turn GFX on again?
>
>     Christian, I know point. But there is a hang issue if we would like try to
>     disable/enable gfxoff with SMC message at runtime. Actually, I am trying to
>     find a good sequence to fix it. After that, I can even expose an debugfs
>     interface to configure that. So I have to skip the test for the moment when
>     gfx is in "idle".
>
>
> Working around that issue for the moment is ok, but please note that explicitly
> in both the commit message and a code comment.

OK. Will add it at V2.

>
> But don't you run into the same problem when the UMD starts to submit commands?

When UMD starts, RLC firmware will detect the "draw" command, then it will
power up gfx. So it won't have problem at that time. The mainly state
machine doesn't expose to driver side yet.

>
> I mean the idea of the IB test is that you "simulate" an userspace command
> submission and see if it works.
>

Yes, agree. Any idea to "simulate" the "draw" command? Or please wait for
my fix for enabling/disabling gfxoff at runtime.

Thanks,
Ray

[-- Attachment #1.2: Type: text/html, Size: 3422 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: fix the ib test hang when gfx is in "idle" state
  2018-04-23 12:13             ` Huang Rui
  2018-04-23 13:07               ` Koenig, Christian
@ 2018-04-23 14:40               ` Alex Deucher
       [not found]                 ` <CADnq5_N2ykxhnvfNX0dTXmSHDgKc7LA+QDR7nOFKgWwKp3BgxQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  1 sibling, 1 reply; 9+ messages in thread
From: Alex Deucher @ 2018-04-23 14:40 UTC (permalink / raw)
  To: Huang Rui
  Cc: Deucher, Alexander, Koenig, Christian,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, S, Shirish

On Mon, Apr 23, 2018 at 8:13 AM, Huang Rui <ray.huang@amd.com> wrote:
> On Mon, Apr 23, 2018 at 05:57:06PM +0800, Koenig, Christian wrote:
>> Hi Ray,
>>
>> Am 23.04.2018 11:47 schrieb Huang Rui <ray.huang@amd.com>:
>>
>>     On Fri, Apr 20, 2018 at 05:59:16PM +0800, Koenig, Christian wrote:
>>     > Am 20.04.2018 um 11:40 schrieb Huang Rui:
>>     > > "aaabaf4   drm/amdgpu: defer test IBs on the rings at boot (V3)"
>>     > > Above patch defers the execution of gfx/compute ib tests. However, at
>>     that time,
>>     > > the gfx may already go into idle state. If "idle" gfx receives command
>>     > > submission, it will get hang in the system. So we must add is_gfx_on
>>     checking at
>>     > > start of ib tests.
>>     >
>>     > Do I see that right that you just skip the IB test when the GFX block is
>>     > already turned of? In this case that would be a clear NAK.
>>     >
>>     > BTW: How do you detect that we need to turn GFX on again?
>>
>>     Christian, I know point. But there is a hang issue if we would like try to
>>     disable/enable gfxoff with SMC message at runtime. Actually, I am trying to
>>     find a good sequence to fix it. After that, I can even expose an debugfs
>>     interface to configure that. So I have to skip the test for the moment when
>>     gfx is in "idle".
>>
>>
>> Working around that issue for the moment is ok, but please note that explicitly
>> in both the commit message and a code comment.
>
> OK. Will add it at V2.
>
>>
>> But don't you run into the same problem when the UMD starts to submit commands?
>
> When UMD starts, RLC firmware will detect the "draw" command, then it will
> power up gfx. So it won't have problem at that time. The mainly state
> machine doesn't expose to driver side yet.
>
>>
>> I mean the idea of the IB test is that you "simulate" an userspace command
>> submission and see if it works.
>>
>
> Yes, agree. Any idea to "simulate" the "draw" command? Or please wait for
> my fix for enabling/disabling gfxoff at runtime.


Is there some special formatting in the IB required?  I don't really
see how this will work.  There is likely tons of state before the
actual draw command in the IB.

Alex

>
> Thanks,
> Ray
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: fix the ib test hang when gfx is in "idle" state
       [not found]                 ` <CADnq5_N2ykxhnvfNX0dTXmSHDgKc7LA+QDR7nOFKgWwKp3BgxQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2018-04-24  1:55                   ` Huang Rui
  0 siblings, 0 replies; 9+ messages in thread
From: Huang Rui @ 2018-04-24  1:55 UTC (permalink / raw)
  To: Alex Deucher
  Cc: Deucher, Alexander, Koenig, Christian,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, S, Shirish

On Mon, Apr 23, 2018 at 10:40:20PM +0800, Alex Deucher wrote:
> On Mon, Apr 23, 2018 at 8:13 AM, Huang Rui <ray.huang@amd.com> wrote:
> > On Mon, Apr 23, 2018 at 05:57:06PM +0800, Koenig, Christian wrote:
> >> Hi Ray,
> >>
> >> Am 23.04.2018 11:47 schrieb Huang Rui <ray.huang@amd.com>:
> >>
> >>     On Fri, Apr 20, 2018 at 05:59:16PM +0800, Koenig, Christian wrote:
> >>     > Am 20.04.2018 um 11:40 schrieb Huang Rui:
> >>     > > "aaabaf4   drm/amdgpu: defer test IBs on the rings at boot (V3)"
> >>     > > Above patch defers the execution of gfx/compute ib tests. However, at
> >>     that time,
> >>     > > the gfx may already go into idle state. If "idle" gfx receives command
> >>     > > submission, it will get hang in the system. So we must add is_gfx_on
> >>     checking at
> >>     > > start of ib tests.
> >>     >
> >>     > Do I see that right that you just skip the IB test when the GFX block is
> >>     > already turned of? In this case that would be a clear NAK.
> >>     >
> >>     > BTW: How do you detect that we need to turn GFX on again?
> >>
> >>     Christian, I know point. But there is a hang issue if we would like try to
> >>     disable/enable gfxoff with SMC message at runtime. Actually, I am trying to
> >>     find a good sequence to fix it. After that, I can even expose an debugfs
> >>     interface to configure that. So I have to skip the test for the moment when
> >>     gfx is in "idle".
> >>
> >>
> >> Working around that issue for the moment is ok, but please note that explicitly
> >> in both the commit message and a code comment.
> >
> > OK. Will add it at V2.
> >
> >>
> >> But don't you run into the same problem when the UMD starts to submit commands?
> >
> > When UMD starts, RLC firmware will detect the "draw" command, then it will
> > power up gfx. So it won't have problem at that time. The mainly state
> > machine doesn't expose to driver side yet.
> >
> >>
> >> I mean the idea of the IB test is that you "simulate" an userspace command
> >> submission and see if it works.
> >>
> >
> > Yes, agree. Any idea to "simulate" the "draw" command? Or please wait for
> > my fix for enabling/disabling gfxoff at runtime.
> 
> 
> Is there some special formatting in the IB required?  I don't really
> see how this will work.  There is likely tons of state before the
> actual draw command in the IB.
> 

No, there isn't. The mainly behavior to turn on/off gfx is almost in RLC
firmware. From driver's perspective, we only just use smc mesg to
enable/disable the feature.

Thanks,
Ray
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2018-04-24  1:55 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-20  9:40 [PATCH] drm/amdgpu: fix the ib test hang when gfx is in "idle" state Huang Rui
     [not found] ` <1524217255-25968-1-git-send-email-ray.huang-5C7GfCeVMHo@public.gmane.org>
2018-04-20  9:59   ` Christian König
     [not found]     ` <aaaffd0c-96be-29b2-9bd4-df6fa6ea7e8a-5C7GfCeVMHo@public.gmane.org>
2018-04-23  9:52       ` Huang Rui
2018-04-23  9:57         ` Koenig, Christian
     [not found]           ` <afda993b-2d50-4181-9378-f3440b5d633c-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
2018-04-23 12:13             ` Huang Rui
2018-04-23 13:07               ` Koenig, Christian
2018-04-23 14:40               ` Alex Deucher
     [not found]                 ` <CADnq5_N2ykxhnvfNX0dTXmSHDgKc7LA+QDR7nOFKgWwKp3BgxQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2018-04-24  1:55                   ` Huang Rui
2018-04-23 10:02         ` Huang Rui

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.