All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: enable 48-bit IH timestamp counter
@ 2020-11-10 17:55 Alex Sierra
  2020-11-10 17:57 ` Sierra Guiza, Alejandro (Alex)
  0 siblings, 1 reply; 9+ messages in thread
From: Alex Sierra @ 2020-11-10 17:55 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Sierra, Felix.Kuehling, christian.Koenig

By default this timestamp is based on a 32 bit counter.
This is used by the amdgpu_gmc_filter_faults, to
avoid process the same interrupt in retry configuration.
Apparently there's a problem when the timestamp coming from
IH overflows and compares against timestamp coming from the
the hash table.
This patch only extends the time overflow from 10 minutes to
aprx 455 days.

Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 6 ++++++
 drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index 837769fcb35b..bda916f33805 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -94,6 +94,8 @@ static void navi10_ih_enable_interrupts(struct amdgpu_device *adev)
 
 	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
 	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1);
+	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+				   RB_GPU_TS_ENABLE, 1);
 	if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) {
 		if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
 			DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
@@ -109,6 +111,8 @@ static void navi10_ih_enable_interrupts(struct amdgpu_device *adev)
 		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
 		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
 					   RB_ENABLE, 1);
+		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
+					   RB_GPU_TS_ENABLE, 1);
 		if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) {
 			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
 						ih_rb_cntl)) {
@@ -125,6 +129,8 @@ static void navi10_ih_enable_interrupts(struct amdgpu_device *adev)
 		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
 		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
 					   RB_ENABLE, 1);
+		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
+					   RB_GPU_TS_ENABLE, 1);
 		if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) {
 			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
 						ih_rb_cntl)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index 407c6093c2ec..35d68bc5d95e 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -50,6 +50,8 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)
 
 	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
 	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1);
+	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+				   RB_GPU_TS_ENABLE, 1);
 	if (amdgpu_sriov_vf(adev)) {
 		if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
 			DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
@@ -64,6 +66,8 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)
 		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
 		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
 					   RB_ENABLE, 1);
+		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
+					   RB_GPU_TS_ENABLE, 1);
 		if (amdgpu_sriov_vf(adev)) {
 			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
 						ih_rb_cntl)) {
@@ -80,6 +84,8 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)
 		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
 		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
 					   RB_ENABLE, 1);
+		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
+					   RB_GPU_TS_ENABLE, 1);
 		if (amdgpu_sriov_vf(adev)) {
 			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
 						ih_rb_cntl)) {
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* RE: [PATCH] drm/amdgpu: enable 48-bit IH timestamp counter
  2020-11-10 17:55 [PATCH] drm/amdgpu: enable 48-bit IH timestamp counter Alex Sierra
@ 2020-11-10 17:57 ` Sierra Guiza, Alejandro (Alex)
  2020-11-16 11:31   ` Christian König
  0 siblings, 1 reply; 9+ messages in thread
From: Sierra Guiza, Alejandro (Alex) @ 2020-11-10 17:57 UTC (permalink / raw)
  To: amd-gfx, Koenig,  Christian; +Cc: Kuehling, Felix

[AMD Public Use]

I just added support for vega10_ih too.

Regards,
Alex

> -----Original Message-----
> From: Sierra Guiza, Alejandro (Alex) <Alex.Sierra@amd.com>
> Sent: Tuesday, November 10, 2020 11:55 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Koenig, Christian <Christian.Koenig@amd.com>; Kuehling, Felix
> <Felix.Kuehling@amd.com>; Sierra Guiza, Alejandro (Alex)
> <Alex.Sierra@amd.com>
> Subject: [PATCH] drm/amdgpu: enable 48-bit IH timestamp counter
> 
> By default this timestamp is based on a 32 bit counter.
> This is used by the amdgpu_gmc_filter_faults, to avoid process the same
> interrupt in retry configuration.
> Apparently there's a problem when the timestamp coming from IH overflows
> and compares against timestamp coming from the the hash table.
> This patch only extends the time overflow from 10 minutes to aprx 455 days.
> 
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 6 ++++++
> drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 6 ++++++
>  2 files changed, 12 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
> b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
> index 837769fcb35b..bda916f33805 100644
> --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
> @@ -94,6 +94,8 @@ static void navi10_ih_enable_interrupts(struct
> amdgpu_device *adev)
> 
>  	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
>  	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR,
> 1);
> +	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
> +				   RB_GPU_TS_ENABLE, 1);
>  	if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) {
>  		if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL,
> ih_rb_cntl)) {
>  			DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
> @@ -109,6 +111,8 @@ static void navi10_ih_enable_interrupts(struct
> amdgpu_device *adev)
>  		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0,
> mmIH_RB_CNTL_RING1);
>  		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
>  					   RB_ENABLE, 1);
> +		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
> +					   RB_GPU_TS_ENABLE, 1);
>  		if (amdgpu_sriov_vf(adev) && adev->asic_type <
> CHIP_NAVI10) {
>  			if (psp_reg_program(&adev->psp,
> PSP_REG_IH_RB_CNTL_RING1,
>  						ih_rb_cntl)) {
> @@ -125,6 +129,8 @@ static void navi10_ih_enable_interrupts(struct
> amdgpu_device *adev)
>  		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0,
> mmIH_RB_CNTL_RING2);
>  		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
>  					   RB_ENABLE, 1);
> +		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
> +					   RB_GPU_TS_ENABLE, 1);
>  		if (amdgpu_sriov_vf(adev) && adev->asic_type <
> CHIP_NAVI10) {
>  			if (psp_reg_program(&adev->psp,
> PSP_REG_IH_RB_CNTL_RING2,
>  						ih_rb_cntl)) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
> b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
> index 407c6093c2ec..35d68bc5d95e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
> @@ -50,6 +50,8 @@ static void vega10_ih_enable_interrupts(struct
> amdgpu_device *adev)
> 
>  	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
>  	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR,
> 1);
> +	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
> +				   RB_GPU_TS_ENABLE, 1);
>  	if (amdgpu_sriov_vf(adev)) {
>  		if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL,
> ih_rb_cntl)) {
>  			DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
> @@ -64,6 +66,8 @@ static void vega10_ih_enable_interrupts(struct
> amdgpu_device *adev)
>  		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0,
> mmIH_RB_CNTL_RING1);
>  		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
>  					   RB_ENABLE, 1);
> +		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
> +					   RB_GPU_TS_ENABLE, 1);
>  		if (amdgpu_sriov_vf(adev)) {
>  			if (psp_reg_program(&adev->psp,
> PSP_REG_IH_RB_CNTL_RING1,
>  						ih_rb_cntl)) {
> @@ -80,6 +84,8 @@ static void vega10_ih_enable_interrupts(struct
> amdgpu_device *adev)
>  		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0,
> mmIH_RB_CNTL_RING2);
>  		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
>  					   RB_ENABLE, 1);
> +		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
> +					   RB_GPU_TS_ENABLE, 1);
>  		if (amdgpu_sriov_vf(adev)) {
>  			if (psp_reg_program(&adev->psp,
> PSP_REG_IH_RB_CNTL_RING2,
>  						ih_rb_cntl)) {
> --
> 2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH] drm/amdgpu: enable 48-bit IH timestamp counter
  2020-11-16 11:31   ` Christian König
@ 2020-11-13 17:11     ` Sierra Guiza, Alejandro (Alex)
  2020-11-13 17:15     ` Felix Kuehling
  1 sibling, 0 replies; 9+ messages in thread
From: Sierra Guiza, Alejandro (Alex) @ 2020-11-13 17:11 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx; +Cc: Kuehling, Felix

[AMD Public Use]

This give us time for the rest of the enablement we're doing. However, we should fix the fundamental problem in the near future. 

Regards,
Alejandro S.

> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
> Christian König
> Sent: Monday, November 16, 2020 5:31 AM
> To: Sierra Guiza, Alejandro (Alex) <Alex.Sierra@amd.com>; amd-
> gfx@lists.freedesktop.org; Koenig, Christian <Christian.Koenig@amd.com>
> Cc: Kuehling, Felix <Felix.Kuehling@amd.com>
> Subject: Re: [PATCH] drm/amdgpu: enable 48-bit IH timestamp counter
> 
> Feel free to keep my rb for this, but is 455 days enough in general or should
> we add wrap around handling?
> 
> Christian.
> 
> Am 10.11.20 um 18:57 schrieb Sierra Guiza, Alejandro (Alex):
> > [AMD Public Use]
> >
> > I just added support for vega10_ih too.
> >
> > Regards,
> > Alex
> >
> >> -----Original Message-----
> >> From: Sierra Guiza, Alejandro (Alex) <Alex.Sierra@amd.com>
> >> Sent: Tuesday, November 10, 2020 11:55 AM
> >> To: amd-gfx@lists.freedesktop.org
> >> Cc: Koenig, Christian <Christian.Koenig@amd.com>; Kuehling, Felix
> >> <Felix.Kuehling@amd.com>; Sierra Guiza, Alejandro (Alex)
> >> <Alex.Sierra@amd.com>
> >> Subject: [PATCH] drm/amdgpu: enable 48-bit IH timestamp counter
> >>
> >> By default this timestamp is based on a 32 bit counter.
> >> This is used by the amdgpu_gmc_filter_faults, to avoid process the
> >> same interrupt in retry configuration.
> >> Apparently there's a problem when the timestamp coming from IH
> >> overflows and compares against timestamp coming from the the hash
> table.
> >> This patch only extends the time overflow from 10 minutes to aprx 455
> days.
> >>
> >> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 6 ++++++
> >> drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 6 ++++++
> >>   2 files changed, 12 insertions(+)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
> >> b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
> >> index 837769fcb35b..bda916f33805 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
> >> @@ -94,6 +94,8 @@ static void navi10_ih_enable_interrupts(struct
> >> amdgpu_device *adev)
> >>
> >>   	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
> >>   	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR,
> >> 1);
> >> +	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
> >> +				   RB_GPU_TS_ENABLE, 1);
> >>   	if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) {
> >>   		if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL,
> >> ih_rb_cntl)) {
> >>   			DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
> @@ -109,6 +111,8
> >> @@ static void navi10_ih_enable_interrupts(struct
> >> amdgpu_device *adev)
> >>   		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0,
> mmIH_RB_CNTL_RING1);
> >>   		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
> >>   					   RB_ENABLE, 1);
> >> +		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
> >> +					   RB_GPU_TS_ENABLE, 1);
> >>   		if (amdgpu_sriov_vf(adev) && adev->asic_type <
> >> CHIP_NAVI10) {
> >>   			if (psp_reg_program(&adev->psp,
> >> PSP_REG_IH_RB_CNTL_RING1,
> >>   						ih_rb_cntl)) {
> >> @@ -125,6 +129,8 @@ static void navi10_ih_enable_interrupts(struct
> >> amdgpu_device *adev)
> >>   		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0,
> mmIH_RB_CNTL_RING2);
> >>   		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
> >>   					   RB_ENABLE, 1);
> >> +		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
> >> +					   RB_GPU_TS_ENABLE, 1);
> >>   		if (amdgpu_sriov_vf(adev) && adev->asic_type <
> >> CHIP_NAVI10) {
> >>   			if (psp_reg_program(&adev->psp,
> >> PSP_REG_IH_RB_CNTL_RING2,
> >>   						ih_rb_cntl)) {
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
> >> b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
> >> index 407c6093c2ec..35d68bc5d95e 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
> >> @@ -50,6 +50,8 @@ static void vega10_ih_enable_interrupts(struct
> >> amdgpu_device *adev)
> >>
> >>   	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
> >>   	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR,
> >> 1);
> >> +	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
> >> +				   RB_GPU_TS_ENABLE, 1);
> >>   	if (amdgpu_sriov_vf(adev)) {
> >>   		if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL,
> >> ih_rb_cntl)) {
> >>   			DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
> @@ -64,6 +66,8 @@
> >> static void vega10_ih_enable_interrupts(struct
> >> amdgpu_device *adev)
> >>   		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0,
> mmIH_RB_CNTL_RING1);
> >>   		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
> >>   					   RB_ENABLE, 1);
> >> +		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
> >> +					   RB_GPU_TS_ENABLE, 1);
> >>   		if (amdgpu_sriov_vf(adev)) {
> >>   			if (psp_reg_program(&adev->psp,
> >> PSP_REG_IH_RB_CNTL_RING1,
> >>   						ih_rb_cntl)) {
> >> @@ -80,6 +84,8 @@ static void vega10_ih_enable_interrupts(struct
> >> amdgpu_device *adev)
> >>   		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0,
> mmIH_RB_CNTL_RING2);
> >>   		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
> >>   					   RB_ENABLE, 1);
> >> +		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
> >> +					   RB_GPU_TS_ENABLE, 1);
> >>   		if (amdgpu_sriov_vf(adev)) {
> >>   			if (psp_reg_program(&adev->psp,
> >> PSP_REG_IH_RB_CNTL_RING2,
> >>   						ih_rb_cntl)) {
> >> --
> >> 2.17.1
> > _______________________________________________
> > amd-gfx mailing list
> > amd-gfx@lists.freedesktop.org
> > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flist
> > s.freedesktop.org%2Fmailman%2Flistinfo%2Famd-
> gfx&amp;data=04%7C01%7Cal
> >
> ex.sierra%40amd.com%7C2fd98389222842a15da008d887e93049%7C3dd8961f
> e4884
> >
> e608e11a82d994e183d%7C0%7C0%7C637408783493249465%7CUnknown%7C
> TWFpbGZsb
> >
> 3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0
> %3D%
> >
> 7C3000&amp;sdata=NyKN4mVAmDfq4zFl5rbuth551l8nYyQh2LQ3icSXojM%3
> D&amp;re
> > served=0
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.
> freedesktop.org%2Fmailman%2Flistinfo%2Famd-
> gfx&amp;data=04%7C01%7Calex.sierra%40amd.com%7C2fd98389222842a15
> da008d887e93049%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C63
> 7408783493249465%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMD
> AiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=
> NyKN4mVAmDfq4zFl5rbuth551l8nYyQh2LQ3icSXojM%3D&amp;reserved=0
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: enable 48-bit IH timestamp counter
  2020-11-16 11:31   ` Christian König
  2020-11-13 17:11     ` Sierra Guiza, Alejandro (Alex)
@ 2020-11-13 17:15     ` Felix Kuehling
  2020-11-14  8:26       ` Christian König
  1 sibling, 1 reply; 9+ messages in thread
From: Felix Kuehling @ 2020-11-13 17:15 UTC (permalink / raw)
  To: christian.koenig, Sierra Guiza, Alejandro (Alex), amd-gfx

I'd feel better with wrap-around handling. I think having a system up
for that long is not likely but not impossible. Having a known hard
limit on uptime is probably a bad thing. Imagine someone trying to
reproduce the problem ...

Regards,
  Felix

Am 2020-11-16 um 6:31 a.m. schrieb Christian König:
> Feel free to keep my rb for this, but is 455 days enough in general or
> should we add wrap around handling?
>
> Christian.
>
> Am 10.11.20 um 18:57 schrieb Sierra Guiza, Alejandro (Alex):
>> [AMD Public Use]
>>
>> I just added support for vega10_ih too.
>>
>> Regards,
>> Alex
>>
>>> -----Original Message-----
>>> From: Sierra Guiza, Alejandro (Alex) <Alex.Sierra@amd.com>
>>> Sent: Tuesday, November 10, 2020 11:55 AM
>>> To: amd-gfx@lists.freedesktop.org
>>> Cc: Koenig, Christian <Christian.Koenig@amd.com>; Kuehling, Felix
>>> <Felix.Kuehling@amd.com>; Sierra Guiza, Alejandro (Alex)
>>> <Alex.Sierra@amd.com>
>>> Subject: [PATCH] drm/amdgpu: enable 48-bit IH timestamp counter
>>>
>>> By default this timestamp is based on a 32 bit counter.
>>> This is used by the amdgpu_gmc_filter_faults, to avoid process the same
>>> interrupt in retry configuration.
>>> Apparently there's a problem when the timestamp coming from IH
>>> overflows
>>> and compares against timestamp coming from the the hash table.
>>> This patch only extends the time overflow from 10 minutes to aprx
>>> 455 days.
>>>
>>> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 6 ++++++
>>> drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 6 ++++++
>>>   2 files changed, 12 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
>>> b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
>>> index 837769fcb35b..bda916f33805 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
>>> @@ -94,6 +94,8 @@ static void navi10_ih_enable_interrupts(struct
>>> amdgpu_device *adev)
>>>
>>>       ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
>>>       ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR,
>>> 1);
>>> +    ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
>>> +                   RB_GPU_TS_ENABLE, 1);
>>>       if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) {
>>>           if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL,
>>> ih_rb_cntl)) {
>>>               DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
>>> @@ -109,6 +111,8 @@ static void navi10_ih_enable_interrupts(struct
>>> amdgpu_device *adev)
>>>           ih_rb_cntl = RREG32_SOC15(OSSSYS, 0,
>>> mmIH_RB_CNTL_RING1);
>>>           ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
>>>                          RB_ENABLE, 1);
>>> +        ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
>>> +                       RB_GPU_TS_ENABLE, 1);
>>>           if (amdgpu_sriov_vf(adev) && adev->asic_type <
>>> CHIP_NAVI10) {
>>>               if (psp_reg_program(&adev->psp,
>>> PSP_REG_IH_RB_CNTL_RING1,
>>>                           ih_rb_cntl)) {
>>> @@ -125,6 +129,8 @@ static void navi10_ih_enable_interrupts(struct
>>> amdgpu_device *adev)
>>>           ih_rb_cntl = RREG32_SOC15(OSSSYS, 0,
>>> mmIH_RB_CNTL_RING2);
>>>           ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
>>>                          RB_ENABLE, 1);
>>> +        ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
>>> +                       RB_GPU_TS_ENABLE, 1);
>>>           if (amdgpu_sriov_vf(adev) && adev->asic_type <
>>> CHIP_NAVI10) {
>>>               if (psp_reg_program(&adev->psp,
>>> PSP_REG_IH_RB_CNTL_RING2,
>>>                           ih_rb_cntl)) {
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
>>> b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
>>> index 407c6093c2ec..35d68bc5d95e 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
>>> @@ -50,6 +50,8 @@ static void vega10_ih_enable_interrupts(struct
>>> amdgpu_device *adev)
>>>
>>>       ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
>>>       ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR,
>>> 1);
>>> +    ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
>>> +                   RB_GPU_TS_ENABLE, 1);
>>>       if (amdgpu_sriov_vf(adev)) {
>>>           if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL,
>>> ih_rb_cntl)) {
>>>               DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
>>> @@ -64,6 +66,8 @@ static void vega10_ih_enable_interrupts(struct
>>> amdgpu_device *adev)
>>>           ih_rb_cntl = RREG32_SOC15(OSSSYS, 0,
>>> mmIH_RB_CNTL_RING1);
>>>           ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
>>>                          RB_ENABLE, 1);
>>> +        ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
>>> +                       RB_GPU_TS_ENABLE, 1);
>>>           if (amdgpu_sriov_vf(adev)) {
>>>               if (psp_reg_program(&adev->psp,
>>> PSP_REG_IH_RB_CNTL_RING1,
>>>                           ih_rb_cntl)) {
>>> @@ -80,6 +84,8 @@ static void vega10_ih_enable_interrupts(struct
>>> amdgpu_device *adev)
>>>           ih_rb_cntl = RREG32_SOC15(OSSSYS, 0,
>>> mmIH_RB_CNTL_RING2);
>>>           ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
>>>                          RB_ENABLE, 1);
>>> +        ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
>>> +                       RB_GPU_TS_ENABLE, 1);
>>>           if (amdgpu_sriov_vf(adev)) {
>>>               if (psp_reg_program(&adev->psp,
>>> PSP_REG_IH_RB_CNTL_RING2,
>>>                           ih_rb_cntl)) {
>>> -- 
>>> 2.17.1
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7Cfelix.kuehling%40amd.com%7C2227acf915064b27b07c08d887e93027%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637408782891525552%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=E482HSkR2W3XrGRFNd5%2FbY1vrR5H7DmoAqwMhDfP%2FM0%3D&amp;reserved=0
>>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7Cfelix.kuehling%40amd.com%7C2227acf915064b27b07c08d887e93027%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637408782891535517%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=7oJGQTnBArrurCsXNog0RW6rdzZi3ANZOVOAH8UW7i0%3D&amp;reserved=0
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: enable 48-bit IH timestamp counter
  2020-11-13 17:15     ` Felix Kuehling
@ 2020-11-14  8:26       ` Christian König
  0 siblings, 0 replies; 9+ messages in thread
From: Christian König @ 2020-11-14  8:26 UTC (permalink / raw)
  To: Felix Kuehling, Sierra Guiza, Alejandro (Alex), amd-gfx

Yes, exactly.

Is the timer guaranteed to monotonous increment? I strongly suspect yes 
and then a simple "if (old > new) ++upper_32_bits;" should be sufficient.

Regards,
Christian.

Am 13.11.20 um 18:15 schrieb Felix Kuehling:
> I'd feel better with wrap-around handling. I think having a system up
> for that long is not likely but not impossible. Having a known hard
> limit on uptime is probably a bad thing. Imagine someone trying to
> reproduce the problem ...
>
> Regards,
>    Felix
>
> Am 2020-11-16 um 6:31 a.m. schrieb Christian König:
>> Feel free to keep my rb for this, but is 455 days enough in general or
>> should we add wrap around handling?
>>
>> Christian.
>>
>> Am 10.11.20 um 18:57 schrieb Sierra Guiza, Alejandro (Alex):
>>> [AMD Public Use]
>>>
>>> I just added support for vega10_ih too.
>>>
>>> Regards,
>>> Alex
>>>
>>>> -----Original Message-----
>>>> From: Sierra Guiza, Alejandro (Alex) <Alex.Sierra@amd.com>
>>>> Sent: Tuesday, November 10, 2020 11:55 AM
>>>> To: amd-gfx@lists.freedesktop.org
>>>> Cc: Koenig, Christian <Christian.Koenig@amd.com>; Kuehling, Felix
>>>> <Felix.Kuehling@amd.com>; Sierra Guiza, Alejandro (Alex)
>>>> <Alex.Sierra@amd.com>
>>>> Subject: [PATCH] drm/amdgpu: enable 48-bit IH timestamp counter
>>>>
>>>> By default this timestamp is based on a 32 bit counter.
>>>> This is used by the amdgpu_gmc_filter_faults, to avoid process the same
>>>> interrupt in retry configuration.
>>>> Apparently there's a problem when the timestamp coming from IH
>>>> overflows
>>>> and compares against timestamp coming from the the hash table.
>>>> This patch only extends the time overflow from 10 minutes to aprx
>>>> 455 days.
>>>>
>>>> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 6 ++++++
>>>> drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 6 ++++++
>>>>    2 files changed, 12 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
>>>> b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
>>>> index 837769fcb35b..bda916f33805 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
>>>> @@ -94,6 +94,8 @@ static void navi10_ih_enable_interrupts(struct
>>>> amdgpu_device *adev)
>>>>
>>>>        ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
>>>>        ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR,
>>>> 1);
>>>> +    ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
>>>> +                   RB_GPU_TS_ENABLE, 1);
>>>>        if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) {
>>>>            if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL,
>>>> ih_rb_cntl)) {
>>>>                DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
>>>> @@ -109,6 +111,8 @@ static void navi10_ih_enable_interrupts(struct
>>>> amdgpu_device *adev)
>>>>            ih_rb_cntl = RREG32_SOC15(OSSSYS, 0,
>>>> mmIH_RB_CNTL_RING1);
>>>>            ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
>>>>                           RB_ENABLE, 1);
>>>> +        ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
>>>> +                       RB_GPU_TS_ENABLE, 1);
>>>>            if (amdgpu_sriov_vf(adev) && adev->asic_type <
>>>> CHIP_NAVI10) {
>>>>                if (psp_reg_program(&adev->psp,
>>>> PSP_REG_IH_RB_CNTL_RING1,
>>>>                            ih_rb_cntl)) {
>>>> @@ -125,6 +129,8 @@ static void navi10_ih_enable_interrupts(struct
>>>> amdgpu_device *adev)
>>>>            ih_rb_cntl = RREG32_SOC15(OSSSYS, 0,
>>>> mmIH_RB_CNTL_RING2);
>>>>            ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
>>>>                           RB_ENABLE, 1);
>>>> +        ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
>>>> +                       RB_GPU_TS_ENABLE, 1);
>>>>            if (amdgpu_sriov_vf(adev) && adev->asic_type <
>>>> CHIP_NAVI10) {
>>>>                if (psp_reg_program(&adev->psp,
>>>> PSP_REG_IH_RB_CNTL_RING2,
>>>>                            ih_rb_cntl)) {
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
>>>> b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
>>>> index 407c6093c2ec..35d68bc5d95e 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
>>>> @@ -50,6 +50,8 @@ static void vega10_ih_enable_interrupts(struct
>>>> amdgpu_device *adev)
>>>>
>>>>        ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
>>>>        ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR,
>>>> 1);
>>>> +    ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
>>>> +                   RB_GPU_TS_ENABLE, 1);
>>>>        if (amdgpu_sriov_vf(adev)) {
>>>>            if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL,
>>>> ih_rb_cntl)) {
>>>>                DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
>>>> @@ -64,6 +66,8 @@ static void vega10_ih_enable_interrupts(struct
>>>> amdgpu_device *adev)
>>>>            ih_rb_cntl = RREG32_SOC15(OSSSYS, 0,
>>>> mmIH_RB_CNTL_RING1);
>>>>            ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
>>>>                           RB_ENABLE, 1);
>>>> +        ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
>>>> +                       RB_GPU_TS_ENABLE, 1);
>>>>            if (amdgpu_sriov_vf(adev)) {
>>>>                if (psp_reg_program(&adev->psp,
>>>> PSP_REG_IH_RB_CNTL_RING1,
>>>>                            ih_rb_cntl)) {
>>>> @@ -80,6 +84,8 @@ static void vega10_ih_enable_interrupts(struct
>>>> amdgpu_device *adev)
>>>>            ih_rb_cntl = RREG32_SOC15(OSSSYS, 0,
>>>> mmIH_RB_CNTL_RING2);
>>>>            ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
>>>>                           RB_ENABLE, 1);
>>>> +        ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
>>>> +                       RB_GPU_TS_ENABLE, 1);
>>>>            if (amdgpu_sriov_vf(adev)) {
>>>>                if (psp_reg_program(&adev->psp,
>>>> PSP_REG_IH_RB_CNTL_RING2,
>>>>                            ih_rb_cntl)) {
>>>> -- 
>>>> 2.17.1
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7Cfelix.kuehling%40amd.com%7C2227acf915064b27b07c08d887e93027%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637408782891525552%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=E482HSkR2W3XrGRFNd5%2FbY1vrR5H7DmoAqwMhDfP%2FM0%3D&amp;reserved=0
>>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7Cfelix.kuehling%40amd.com%7C2227acf915064b27b07c08d887e93027%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637408782891535517%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=7oJGQTnBArrurCsXNog0RW6rdzZi3ANZOVOAH8UW7i0%3D&amp;reserved=0
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: enable 48-bit IH timestamp counter
  2020-11-10 17:57 ` Sierra Guiza, Alejandro (Alex)
@ 2020-11-16 11:31   ` Christian König
  2020-11-13 17:11     ` Sierra Guiza, Alejandro (Alex)
  2020-11-13 17:15     ` Felix Kuehling
  0 siblings, 2 replies; 9+ messages in thread
From: Christian König @ 2020-11-16 11:31 UTC (permalink / raw)
  To: Sierra Guiza, Alejandro (Alex), amd-gfx, Koenig, Christian
  Cc: Kuehling, Felix

Feel free to keep my rb for this, but is 455 days enough in general or 
should we add wrap around handling?

Christian.

Am 10.11.20 um 18:57 schrieb Sierra Guiza, Alejandro (Alex):
> [AMD Public Use]
>
> I just added support for vega10_ih too.
>
> Regards,
> Alex
>
>> -----Original Message-----
>> From: Sierra Guiza, Alejandro (Alex) <Alex.Sierra@amd.com>
>> Sent: Tuesday, November 10, 2020 11:55 AM
>> To: amd-gfx@lists.freedesktop.org
>> Cc: Koenig, Christian <Christian.Koenig@amd.com>; Kuehling, Felix
>> <Felix.Kuehling@amd.com>; Sierra Guiza, Alejandro (Alex)
>> <Alex.Sierra@amd.com>
>> Subject: [PATCH] drm/amdgpu: enable 48-bit IH timestamp counter
>>
>> By default this timestamp is based on a 32 bit counter.
>> This is used by the amdgpu_gmc_filter_faults, to avoid process the same
>> interrupt in retry configuration.
>> Apparently there's a problem when the timestamp coming from IH overflows
>> and compares against timestamp coming from the the hash table.
>> This patch only extends the time overflow from 10 minutes to aprx 455 days.
>>
>> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 6 ++++++
>> drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 6 ++++++
>>   2 files changed, 12 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
>> b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
>> index 837769fcb35b..bda916f33805 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
>> @@ -94,6 +94,8 @@ static void navi10_ih_enable_interrupts(struct
>> amdgpu_device *adev)
>>
>>   	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
>>   	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR,
>> 1);
>> +	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
>> +				   RB_GPU_TS_ENABLE, 1);
>>   	if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) {
>>   		if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL,
>> ih_rb_cntl)) {
>>   			DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
>> @@ -109,6 +111,8 @@ static void navi10_ih_enable_interrupts(struct
>> amdgpu_device *adev)
>>   		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0,
>> mmIH_RB_CNTL_RING1);
>>   		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
>>   					   RB_ENABLE, 1);
>> +		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
>> +					   RB_GPU_TS_ENABLE, 1);
>>   		if (amdgpu_sriov_vf(adev) && adev->asic_type <
>> CHIP_NAVI10) {
>>   			if (psp_reg_program(&adev->psp,
>> PSP_REG_IH_RB_CNTL_RING1,
>>   						ih_rb_cntl)) {
>> @@ -125,6 +129,8 @@ static void navi10_ih_enable_interrupts(struct
>> amdgpu_device *adev)
>>   		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0,
>> mmIH_RB_CNTL_RING2);
>>   		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
>>   					   RB_ENABLE, 1);
>> +		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
>> +					   RB_GPU_TS_ENABLE, 1);
>>   		if (amdgpu_sriov_vf(adev) && adev->asic_type <
>> CHIP_NAVI10) {
>>   			if (psp_reg_program(&adev->psp,
>> PSP_REG_IH_RB_CNTL_RING2,
>>   						ih_rb_cntl)) {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
>> b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
>> index 407c6093c2ec..35d68bc5d95e 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
>> @@ -50,6 +50,8 @@ static void vega10_ih_enable_interrupts(struct
>> amdgpu_device *adev)
>>
>>   	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
>>   	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR,
>> 1);
>> +	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
>> +				   RB_GPU_TS_ENABLE, 1);
>>   	if (amdgpu_sriov_vf(adev)) {
>>   		if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL,
>> ih_rb_cntl)) {
>>   			DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
>> @@ -64,6 +66,8 @@ static void vega10_ih_enable_interrupts(struct
>> amdgpu_device *adev)
>>   		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0,
>> mmIH_RB_CNTL_RING1);
>>   		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
>>   					   RB_ENABLE, 1);
>> +		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
>> +					   RB_GPU_TS_ENABLE, 1);
>>   		if (amdgpu_sriov_vf(adev)) {
>>   			if (psp_reg_program(&adev->psp,
>> PSP_REG_IH_RB_CNTL_RING1,
>>   						ih_rb_cntl)) {
>> @@ -80,6 +84,8 @@ static void vega10_ih_enable_interrupts(struct
>> amdgpu_device *adev)
>>   		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0,
>> mmIH_RB_CNTL_RING2);
>>   		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
>>   					   RB_ENABLE, 1);
>> +		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
>> +					   RB_GPU_TS_ENABLE, 1);
>>   		if (amdgpu_sriov_vf(adev)) {
>>   			if (psp_reg_program(&adev->psp,
>> PSP_REG_IH_RB_CNTL_RING2,
>>   						ih_rb_cntl)) {
>> --
>> 2.17.1
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: enable 48-bit IH timestamp counter
  2020-11-10  3:20 Alex Sierra
  2020-11-10 11:54 ` Christian König
@ 2020-11-10 18:08 ` philip yang
  1 sibling, 0 replies; 9+ messages in thread
From: philip yang @ 2020-11-10 18:08 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx; +Cc: Felix.Kuehling, christian.Koenig

[-- Attachment #1: Type: text/html, Size: 2813 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: enable 48-bit IH timestamp counter
  2020-11-10  3:20 Alex Sierra
@ 2020-11-10 11:54 ` Christian König
  2020-11-10 18:08 ` philip yang
  1 sibling, 0 replies; 9+ messages in thread
From: Christian König @ 2020-11-10 11:54 UTC (permalink / raw)
  To: Alex Sierra, amd-gfx; +Cc: Felix.Kuehling

Am 10.11.20 um 04:20 schrieb Alex Sierra:
> By default this timestamp is based on a 32 bit counter.
> This is used by the amdgpu_gmc_filter_faults, to
> avoid process the same interrupt in retry configuration.
> Apparently there's a problem when the timestamp coming from
> IH overflows and compares against timestamp coming from the
> the hash table.
> This patch only extends the time overflow from 10 minutes to
> aprx 455 days.

Good catch, I wasn't aware of that limitation. The documentation from 
the IH suggested that it is a 64bit value.

> Signed-off-by: Alex Sierra <alex.sierra@amd.com>

In the long term we probably need some wrap around handling, but for now 
Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 6 ++++++
>   1 file changed, 6 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
> index 837769fcb35b..bda916f33805 100644
> --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
> @@ -94,6 +94,8 @@ static void navi10_ih_enable_interrupts(struct amdgpu_device *adev)
>   
>   	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
>   	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1);
> +	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
> +				   RB_GPU_TS_ENABLE, 1);
>   	if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) {
>   		if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
>   			DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
> @@ -109,6 +111,8 @@ static void navi10_ih_enable_interrupts(struct amdgpu_device *adev)
>   		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
>   		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
>   					   RB_ENABLE, 1);
> +		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
> +					   RB_GPU_TS_ENABLE, 1);
>   		if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) {
>   			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
>   						ih_rb_cntl)) {
> @@ -125,6 +129,8 @@ static void navi10_ih_enable_interrupts(struct amdgpu_device *adev)
>   		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
>   		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
>   					   RB_ENABLE, 1);
> +		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
> +					   RB_GPU_TS_ENABLE, 1);
>   		if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) {
>   			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
>   						ih_rb_cntl)) {

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH] drm/amdgpu: enable 48-bit IH timestamp counter
@ 2020-11-10  3:20 Alex Sierra
  2020-11-10 11:54 ` Christian König
  2020-11-10 18:08 ` philip yang
  0 siblings, 2 replies; 9+ messages in thread
From: Alex Sierra @ 2020-11-10  3:20 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Sierra, Felix.Kuehling, christian.Koenig

By default this timestamp is based on a 32 bit counter.
This is used by the amdgpu_gmc_filter_faults, to
avoid process the same interrupt in retry configuration.
Apparently there's a problem when the timestamp coming from
IH overflows and compares against timestamp coming from the
the hash table.
This patch only extends the time overflow from 10 minutes to
aprx 455 days.

Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index 837769fcb35b..bda916f33805 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -94,6 +94,8 @@ static void navi10_ih_enable_interrupts(struct amdgpu_device *adev)
 
 	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
 	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1);
+	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+				   RB_GPU_TS_ENABLE, 1);
 	if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) {
 		if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
 			DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
@@ -109,6 +111,8 @@ static void navi10_ih_enable_interrupts(struct amdgpu_device *adev)
 		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
 		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
 					   RB_ENABLE, 1);
+		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
+					   RB_GPU_TS_ENABLE, 1);
 		if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) {
 			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
 						ih_rb_cntl)) {
@@ -125,6 +129,8 @@ static void navi10_ih_enable_interrupts(struct amdgpu_device *adev)
 		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
 		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
 					   RB_ENABLE, 1);
+		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
+					   RB_GPU_TS_ENABLE, 1);
 		if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) {
 			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
 						ih_rb_cntl)) {
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-11-14  8:26 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-10 17:55 [PATCH] drm/amdgpu: enable 48-bit IH timestamp counter Alex Sierra
2020-11-10 17:57 ` Sierra Guiza, Alejandro (Alex)
2020-11-16 11:31   ` Christian König
2020-11-13 17:11     ` Sierra Guiza, Alejandro (Alex)
2020-11-13 17:15     ` Felix Kuehling
2020-11-14  8:26       ` Christian König
  -- strict thread matches above, loose matches on Subject: below --
2020-11-10  3:20 Alex Sierra
2020-11-10 11:54 ` Christian König
2020-11-10 18:08 ` philip yang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.