All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/1] drm/amdgpu: Read clock counter via MMIO to reduce delay (v5)
@ 2021-07-01  9:57 YuBiao Wang
  2021-07-01 10:47 ` Christian König
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: YuBiao Wang @ 2021-07-01  9:57 UTC (permalink / raw)
  To: amd-gfx
  Cc: YuBiao Wang, Andrey Grodzovsky, Jack Xiao, Feifei Xu,
	horace.chen, Kevin Wang, Tuikov Luben, Deucher Alexander,
	Evan Quan, Christian König, Monk Liu, Hawking Zhang

[Why]
GPU timing counters are read via KIQ under sriov, which will introduce
a delay.

[How]
It could be directly read by MMIO.

v2: Add additional check to prevent carryover issue.
v3: Only check for carryover for once to prevent performance issue.
v4: Add comments of the rough frequency where carryover happens.
v5: Remove mutex and gfxoff ctrl unused with current timing registers.

Signed-off-by: YuBiao Wang <YuBiao.Wang@amd.com>
Acked-by: Horace Chen <horace.chen@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index ff7e9f49040e..5f4eae9c9526 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -7609,10 +7609,8 @@ static int gfx_v10_0_soft_reset(void *handle)
 
 static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev)
 {
-	uint64_t clock;
+	uint64_t clock, clock_lo, clock_hi, hi_check;
 
-	amdgpu_gfx_off_ctrl(adev, false);
-	mutex_lock(&adev->gfx.gpu_clock_mutex);
 	switch (adev->asic_type) {
 	case CHIP_VANGOGH:
 	case CHIP_YELLOW_CARP:
@@ -7620,12 +7618,19 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev)
 			((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh) << 32ULL);
 		break;
 	default:
-		clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER) |
-			((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER) << 32ULL);
+		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER);
+		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER);
+		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER);
+		/* The GFX clock frequency is 100MHz, which sets 32-bit carry over
+		 * roughly every 42 seconds.
+		 */
+		if (hi_check != clock_hi) {
+			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER);
+			clock_hi = hi_check;
+		}
+		clock = (uint64_t)clock_lo | ((uint64_t)clock_hi << 32ULL);
 		break;
 	}
-	mutex_unlock(&adev->gfx.gpu_clock_mutex);
-	amdgpu_gfx_off_ctrl(adev, true);
 	return clock;
 }
 
-- 
2.25.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/1] drm/amdgpu: Read clock counter via MMIO to reduce delay (v5)
  2021-07-01  9:57 [PATCH 1/1] drm/amdgpu: Read clock counter via MMIO to reduce delay (v5) YuBiao Wang
@ 2021-07-01 10:47 ` Christian König
  2021-07-01 10:50 ` Lazar, Lijo
  2021-07-01 16:39 ` Luben Tuikov
  2 siblings, 0 replies; 4+ messages in thread
From: Christian König @ 2021-07-01 10:47 UTC (permalink / raw)
  To: YuBiao Wang, amd-gfx
  Cc: Andrey Grodzovsky, Jack Xiao, Feifei Xu, horace.chen, Kevin Wang,
	Tuikov Luben, Deucher Alexander, Evan Quan, Christian König,
	Monk Liu, Hawking Zhang



Am 01.07.21 um 11:57 schrieb YuBiao Wang:
> [Why]
> GPU timing counters are read via KIQ under sriov, which will introduce
> a delay.
>
> [How]
> It could be directly read by MMIO.
>
> v2: Add additional check to prevent carryover issue.
> v3: Only check for carryover for once to prevent performance issue.
> v4: Add comments of the rough frequency where carryover happens.
> v5: Remove mutex and gfxoff ctrl unused with current timing registers.
>
> Signed-off-by: YuBiao Wang <YuBiao.Wang@amd.com>
> Acked-by: Horace Chen <horace.chen@amd.com>

The preemption_disable()/_enable() would still be nice to have I think.

Anyway patch is Reviewed-by: Christian König <christian.koenig@amd.com> 
either way.

Regards,
Christian.

> ---
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 19 ++++++++++++-------
>   1 file changed, 12 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index ff7e9f49040e..5f4eae9c9526 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -7609,10 +7609,8 @@ static int gfx_v10_0_soft_reset(void *handle)
>   
>   static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev)
>   {
> -	uint64_t clock;
> +	uint64_t clock, clock_lo, clock_hi, hi_check;
>   
> -	amdgpu_gfx_off_ctrl(adev, false);
> -	mutex_lock(&adev->gfx.gpu_clock_mutex);
>   	switch (adev->asic_type) {
>   	case CHIP_VANGOGH:
>   	case CHIP_YELLOW_CARP:
> @@ -7620,12 +7618,19 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev)
>   			((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh) << 32ULL);
>   		break;
>   	default:
> -		clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER) |
> -			((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER) << 32ULL);
> +		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER);
> +		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER);
> +		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER);
> +		/* The GFX clock frequency is 100MHz, which sets 32-bit carry over
> +		 * roughly every 42 seconds.
> +		 */
> +		if (hi_check != clock_hi) {
> +			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER);
> +			clock_hi = hi_check;
> +		}
> +		clock = (uint64_t)clock_lo | ((uint64_t)clock_hi << 32ULL);
>   		break;
>   	}
> -	mutex_unlock(&adev->gfx.gpu_clock_mutex);
> -	amdgpu_gfx_off_ctrl(adev, true);
>   	return clock;
>   }
>   

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/1] drm/amdgpu: Read clock counter via MMIO to reduce delay (v5)
  2021-07-01  9:57 [PATCH 1/1] drm/amdgpu: Read clock counter via MMIO to reduce delay (v5) YuBiao Wang
  2021-07-01 10:47 ` Christian König
@ 2021-07-01 10:50 ` Lazar, Lijo
  2021-07-01 16:39 ` Luben Tuikov
  2 siblings, 0 replies; 4+ messages in thread
From: Lazar, Lijo @ 2021-07-01 10:50 UTC (permalink / raw)
  To: YuBiao Wang, amd-gfx
  Cc: Andrey Grodzovsky, Jack Xiao, Feifei Xu, horace.chen, Kevin Wang,
	Tuikov Luben, Deucher Alexander, Evan Quan, Christian König,
	Monk Liu, Hawking Zhang

A few comments to take care of before submitting the changes.

Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>

On 7/1/2021 3:27 PM, YuBiao Wang wrote:
> [Why]
> GPU timing counters are read via KIQ under sriov, which will introduce
> a delay.
> 
> [How]
> It could be directly read by MMIO.
> 
> v2: Add additional check to prevent carryover issue.
> v3: Only check for carryover for once to prevent performance issue.
> v4: Add comments of the rough frequency where carryover happens.
> v5: Remove mutex and gfxoff ctrl unused with current timing registers.
> 
> Signed-off-by: YuBiao Wang <YuBiao.Wang@amd.com>
> Acked-by: Horace Chen <horace.chen@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 19 ++++++++++++-------
>   1 file changed, 12 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index ff7e9f49040e..5f4eae9c9526 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -7609,10 +7609,8 @@ static int gfx_v10_0_soft_reset(void *handle)
>   
>   static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev)
>   {
> -	uint64_t clock;
> +	uint64_t clock, clock_lo, clock_hi, hi_check;
>   
> -	amdgpu_gfx_off_ctrl(adev, false);
> -	mutex_lock(&adev->gfx.gpu_clock_mutex);
>   	switch (adev->asic_type) {
>   	case CHIP_VANGOGH:
>   	case CHIP_YELLOW_CARP:
> @@ -7620,12 +7618,19 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev)
>   			((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh) << 32ULL);
>   		break;
>   	default:
> -		clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER) |
> -			((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER) << 32ULL);
> +		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER);
> +		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER);
> +		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER);
> +		/* The GFX clock frequency is 100MHz, which sets 32-bit carry over
> +		 * roughly every 42 seconds.
> +		 */

SMUIO TSC is not ticking based on GFX clock frequency, it's running at a 
fixed SMUIO TSC Clock frequency which is 100MHz (second part is right).

Replace GFX Clock with SMUIO TSC Clock.

> +		if (hi_check != clock_hi) {
> +			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER);
> +			clock_hi = hi_check;
> +		}
> +		clock = (uint64_t)clock_lo | ((uint64_t)clock_hi << 32ULL);

These types are already uint64_t and looks cleaner without the cast.

Thanks,
Lijo

>   		break;
>   	}
> -	mutex_unlock(&adev->gfx.gpu_clock_mutex);
> -	amdgpu_gfx_off_ctrl(adev, true);
>   	return clock;
>   }
>   
> 
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/1] drm/amdgpu: Read clock counter via MMIO to reduce delay (v5)
  2021-07-01  9:57 [PATCH 1/1] drm/amdgpu: Read clock counter via MMIO to reduce delay (v5) YuBiao Wang
  2021-07-01 10:47 ` Christian König
  2021-07-01 10:50 ` Lazar, Lijo
@ 2021-07-01 16:39 ` Luben Tuikov
  2 siblings, 0 replies; 4+ messages in thread
From: Luben Tuikov @ 2021-07-01 16:39 UTC (permalink / raw)
  To: YuBiao Wang, amd-gfx
  Cc: Andrey Grodzovsky, Jack Xiao, Feifei Xu, horace.chen, Kevin Wang,
	Deucher Alexander, Evan Quan, Christian König, Monk Liu,
	Hawking Zhang

On 2021-07-01 5:57 a.m., YuBiao Wang wrote:
> [Why]
> GPU timing counters are read via KIQ under sriov, which will introduce
> a delay.
>
> [How]
> It could be directly read by MMIO.
>
> v2: Add additional check to prevent carryover issue.
> v3: Only check for carryover for once to prevent performance issue.
> v4: Add comments of the rough frequency where carryover happens.
> v5: Remove mutex and gfxoff ctrl unused with current timing registers.
>
> Signed-off-by: YuBiao Wang <YuBiao.Wang@amd.com>
> Acked-by: Horace Chen <horace.chen@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 19 ++++++++++++-------
>  1 file changed, 12 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index ff7e9f49040e..5f4eae9c9526 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -7609,10 +7609,8 @@ static int gfx_v10_0_soft_reset(void *handle)
>  
>  static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev)
>  {
> -	uint64_t clock;
> +	uint64_t clock, clock_lo, clock_hi, hi_check;
>  
> -	amdgpu_gfx_off_ctrl(adev, false);
> -	mutex_lock(&adev->gfx.gpu_clock_mutex);
>  	switch (adev->asic_type) {
>  	case CHIP_VANGOGH:
>  	case CHIP_YELLOW_CARP:
> @@ -7620,12 +7618,19 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev)
>  			((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh) << 32ULL);
>  		break;
>  	default:
> -		clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER) |
> -			((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER) << 32ULL);
> +		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER);
> +		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER);
> +		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER);
> +		/* The GFX clock frequency is 100MHz, which sets 32-bit carry over
> +		 * roughly every 42 seconds.
> +		 */
> +		if (hi_check != clock_hi) {

Yeah, the comment is so much better now. Good job.

Reviewed-by: Luben Tuikov <luben.tuikov@amd.com>

Regards,
Luben

> +			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER);
> +			clock_hi = hi_check;
> +		}
> +		clock = (uint64_t)clock_lo | ((uint64_t)clock_hi << 32ULL);
>  		break;
>  	}
> -	mutex_unlock(&adev->gfx.gpu_clock_mutex);
> -	amdgpu_gfx_off_ctrl(adev, true);
>  	return clock;
>  }
>  

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-07-01 16:39 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-01  9:57 [PATCH 1/1] drm/amdgpu: Read clock counter via MMIO to reduce delay (v5) YuBiao Wang
2021-07-01 10:47 ` Christian König
2021-07-01 10:50 ` Lazar, Lijo
2021-07-01 16:39 ` Luben Tuikov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.