All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/2] Fix a hung during memory pressure test
@ 2021-09-06  1:12 xinhui pan
  2021-09-06  1:12 ` [PATCH v2 1/2] drm/ttm: Fix a deadlock if the target BO is not idle during swap xinhui pan
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: xinhui pan @ 2021-09-06  1:12 UTC (permalink / raw)
  To: amd-gfx
  Cc: alexander.deucher, christian.koenig, chenli, dri-devel, xinhui pan

A long time ago, someone reports system got hung during memory test.
In recent days, I am trying to look for or understand the potential
deadlock in ttm/amdgpu code.

This patchset aims to fix the deadlock during ttm populate.

TTM has a parameter called pages_limit, when allocated GTT memory
reaches this limit, swapout would be triggered. As ttm_bo_swapout does
not return the correct retval, populate might get hung.

UVD ib test uses GTT which might be insufficient. So a gpu recovery
would hung if populate hung.

I have made one drm test which alloc two GTT BOs, submit gfx copy
commands and free these BOs without waiting fence. What's more, these
gfx copy commands will cause gfx ring hang. So gpu recovery would be
triggered.

Now here is one possible deadlock case.
gpu_recovery
 -> stop drm scheduler
 -> asic reset
   -> ib test
      -> tt populate (uvd ib test)
	->  ttm_bo_swapout (BO A) // this always fails as the fence of
	BO A would not be signaled by schedluer or HW. Hit deadlock.

I paste the drm test patch below.
#modprobe ttm pages_limit=65536
#amdgpu_test -s 1 -t 4
---
 tests/amdgpu/basic_tests.c | 32 ++++++++++++++------------------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c
index dbf02fee..f85ed340 100644
--- a/tests/amdgpu/basic_tests.c
+++ b/tests/amdgpu/basic_tests.c
@@ -65,13 +65,16 @@ static void amdgpu_direct_gma_test(void);
 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
-static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
+static void _amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
 				       unsigned ip_type,
 				       int instance, int pm4_dw, uint32_t *pm4_src,
 				       int res_cnt, amdgpu_bo_handle *resources,
 				       struct amdgpu_cs_ib_info *ib_info,
-				       struct amdgpu_cs_request *ibs_request);
+				       struct amdgpu_cs_request *ibs_request, int sync, int repeat);
  
+#define amdgpu_test_exec_cs_helper(...) \
+	_amdgpu_test_exec_cs_helper(__VA_ARGS__, 1, 1)
+
 CU_TestInfo basic_tests[] = {
 	{ "Query Info Test",  amdgpu_query_info_test },
 	{ "Userptr Test",  amdgpu_userptr_test },
@@ -1341,12 +1344,12 @@ static void amdgpu_command_submission_compute(void)
  * pm4_src, resources, ib_info, and ibs_request
  * submit command stream described in ibs_request and wait for this IB accomplished
  */
-static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
+static void _amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
 				       unsigned ip_type,
 				       int instance, int pm4_dw, uint32_t *pm4_src,
 				       int res_cnt, amdgpu_bo_handle *resources,
 				       struct amdgpu_cs_ib_info *ib_info,
-				       struct amdgpu_cs_request *ibs_request)
+				       struct amdgpu_cs_request *ibs_request, int sync, int repeat)
 {
 	int r;
 	uint32_t expired;
@@ -1395,12 +1398,15 @@ static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
 
 	/* submit CS */
-	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
+	while (repeat--)
+		r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
 	CU_ASSERT_EQUAL(r, 0);
 
 	r = amdgpu_bo_list_destroy(ibs_request->resources);
 	CU_ASSERT_EQUAL(r, 0);
 
+	if (!sync)
+		return;
 	fence_status.ip_type = ip_type;
 	fence_status.ip_instance = 0;
 	fence_status.ring = ibs_request->ring;
@@ -1667,7 +1673,7 @@ static void amdgpu_command_submission_sdma_const_fill(void)
 
 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
 {
-	const int sdma_write_length = 1024;
+	const int sdma_write_length = (255) << 20;
 	const int pm4_dw = 256;
 	amdgpu_context_handle context_handle;
 	amdgpu_bo_handle bo1, bo2;
@@ -1715,8 +1721,6 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
 							    &bo1_va_handle);
 				CU_ASSERT_EQUAL(r, 0);
 
-				/* set bo1 */
-				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
 
 				/* allocate UC bo2 for sDMA use */
 				r = amdgpu_bo_alloc_and_map(device_handle,
@@ -1727,8 +1731,6 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
 							    &bo2_va_handle);
 				CU_ASSERT_EQUAL(r, 0);
 
-				/* clear bo2 */
-				memset((void*)bo2_cpu, 0, sdma_write_length);
 
 				resources[0] = bo1;
 				resources[1] = bo2;
@@ -1785,17 +1787,11 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
 					}
 				}
 
-				amdgpu_test_exec_cs_helper(context_handle,
+				_amdgpu_test_exec_cs_helper(context_handle,
 							   ip_type, ring_id,
 							   i, pm4,
 							   2, resources,
-							   ib_info, ibs_request);
-
-				/* verify if SDMA test result meets with expected */
-				i = 0;
-				while(i < sdma_write_length) {
-					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
-				}
+							   ib_info, ibs_request, 0, 100);
 				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
 							     sdma_write_length);
 				CU_ASSERT_EQUAL(r, 0);
-- 

*** BLURB HERE ***

xinhui pan (2):
  drm/ttm: Fix a deadlock if the target BO is not idle during swap
  drm/amdpgu: Use VRAM domain in UVD IB test

 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 8 ++++++++
 drivers/gpu/drm/ttm/ttm_bo.c            | 6 +++---
 2 files changed, 11 insertions(+), 3 deletions(-)

-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v2 1/2] drm/ttm: Fix a deadlock if the target BO is not idle during swap
  2021-09-06  1:12 [PATCH v2 0/2] Fix a hung during memory pressure test xinhui pan
@ 2021-09-06  1:12 ` xinhui pan
  2021-09-06 11:26   ` Christian König
  2021-09-06  1:12 ` [PATCH v2 2/2] drm/amdpgu: Use VRAM domain in UVD IB test xinhui pan
  2021-09-06  9:04 ` [PATCH v2 0/2] Fix a hung during memory pressure test Christian König
  2 siblings, 1 reply; 12+ messages in thread
From: xinhui pan @ 2021-09-06  1:12 UTC (permalink / raw)
  To: amd-gfx
  Cc: alexander.deucher, christian.koenig, chenli, dri-devel, xinhui pan

The ret value might be -EBUSY, caller will think lru lock is still
locked but actually NOT. So return -ENOSPC instead. Otherwise we hit
list corruption.

ttm_bo_cleanup_refs might fail too if BO is not idle. If we return 0,
caller(ttm_tt_populate -> ttm_global_swapout ->ttm_device_swapout) will
be stuck as we actually did not free any BO memory. This usually happens
when the fence is not signaled for a long time.

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 1fedd0eb67ba..f1367107925b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1159,9 +1159,9 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
 	}
 
 	if (bo->deleted) {
-		ttm_bo_cleanup_refs(bo, false, false, locked);
+		ret = ttm_bo_cleanup_refs(bo, false, false, locked);
 		ttm_bo_put(bo);
-		return 0;
+		return ret == -EBUSY ? -ENOSPC : ret;
 	}
 
 	ttm_bo_move_to_pinned(bo);
@@ -1215,7 +1215,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
 	if (locked)
 		dma_resv_unlock(bo->base.resv);
 	ttm_bo_put(bo);
-	return ret;
+	return ret == -EBUSY ? -ENOSPC : ret;
 }
 
 void ttm_bo_tt_destroy(struct ttm_buffer_object *bo)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v2 2/2] drm/amdpgu: Use VRAM domain in UVD IB test
  2021-09-06  1:12 [PATCH v2 0/2] Fix a hung during memory pressure test xinhui pan
  2021-09-06  1:12 ` [PATCH v2 1/2] drm/ttm: Fix a deadlock if the target BO is not idle during swap xinhui pan
@ 2021-09-06  1:12 ` xinhui pan
  2021-09-06  9:01   ` Christian König
  2021-09-06  9:04 ` [PATCH v2 0/2] Fix a hung during memory pressure test Christian König
  2 siblings, 1 reply; 12+ messages in thread
From: xinhui pan @ 2021-09-06  1:12 UTC (permalink / raw)
  To: amd-gfx
  Cc: alexander.deucher, christian.koenig, chenli, dri-devel, xinhui pan

Like vce/vcn does, visible VRAM is OK for ib test.
While commit a11d9ff3ebe0 ("drm/amdgpu: use GTT for
uvd_get_create/destory_msg") says VRAM is not mapped correctly in his
platform which is likely an arm64.

So lets change back to use VRAM on x86_64 platform.

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index d451c359606a..e4b75f33ccc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -1178,7 +1178,11 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 	int r, i;
 
 	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
+#ifdef CONFIG_X86_64
+				      AMDGPU_GEM_DOMAIN_VRAM,
+#else
 				      AMDGPU_GEM_DOMAIN_GTT,
+#endif
 				      &bo, NULL, (void **)&msg);
 	if (r)
 		return r;
@@ -1210,7 +1214,11 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 	int r, i;
 
 	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
+#ifdef CONFIG_X86_64
+				      AMDGPU_GEM_DOMAIN_VRAM,
+#else
 				      AMDGPU_GEM_DOMAIN_GTT,
+#endif
 				      &bo, NULL, (void **)&msg);
 	if (r)
 		return r;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 2/2] drm/amdpgu: Use VRAM domain in UVD IB test
  2021-09-06  1:12 ` [PATCH v2 2/2] drm/amdpgu: Use VRAM domain in UVD IB test xinhui pan
@ 2021-09-06  9:01   ` Christian König
  0 siblings, 0 replies; 12+ messages in thread
From: Christian König @ 2021-09-06  9:01 UTC (permalink / raw)
  To: xinhui pan, amd-gfx; +Cc: alexander.deucher, chenli, dri-devel

Am 06.09.21 um 03:12 schrieb xinhui pan:
> Like vce/vcn does, visible VRAM is OK for ib test.
> While commit a11d9ff3ebe0 ("drm/amdgpu: use GTT for
> uvd_get_create/destory_msg") says VRAM is not mapped correctly in his
> platform which is likely an arm64.
>
> So lets change back to use VRAM on x86_64 platform.

That's still a rather clear NAK. This issue is not related to ARM at all 
and you are trying to fix a problem which is independent of the platform.

Christian.

>
> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 8 ++++++++
>   1 file changed, 8 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> index d451c359606a..e4b75f33ccc8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> @@ -1178,7 +1178,11 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>   	int r, i;
>   
>   	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
> +#ifdef CONFIG_X86_64
> +				      AMDGPU_GEM_DOMAIN_VRAM,
> +#else
>   				      AMDGPU_GEM_DOMAIN_GTT,
> +#endif
>   				      &bo, NULL, (void **)&msg);
>   	if (r)
>   		return r;
> @@ -1210,7 +1214,11 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>   	int r, i;
>   
>   	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
> +#ifdef CONFIG_X86_64
> +				      AMDGPU_GEM_DOMAIN_VRAM,
> +#else
>   				      AMDGPU_GEM_DOMAIN_GTT,
> +#endif
>   				      &bo, NULL, (void **)&msg);
>   	if (r)
>   		return r;


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 0/2] Fix a hung during memory pressure test
  2021-09-06  1:12 [PATCH v2 0/2] Fix a hung during memory pressure test xinhui pan
  2021-09-06  1:12 ` [PATCH v2 1/2] drm/ttm: Fix a deadlock if the target BO is not idle during swap xinhui pan
  2021-09-06  1:12 ` [PATCH v2 2/2] drm/amdpgu: Use VRAM domain in UVD IB test xinhui pan
@ 2021-09-06  9:04 ` Christian König
  2021-09-06 10:16     ` Pan, Xinhui
  2 siblings, 1 reply; 12+ messages in thread
From: Christian König @ 2021-09-06  9:04 UTC (permalink / raw)
  To: xinhui pan, amd-gfx
  Cc: alexander.deucher, christian.koenig, chenli, dri-devel



Am 06.09.21 um 03:12 schrieb xinhui pan:
> A long time ago, someone reports system got hung during memory test.
> In recent days, I am trying to look for or understand the potential
> deadlock in ttm/amdgpu code.
>
> This patchset aims to fix the deadlock during ttm populate.
>
> TTM has a parameter called pages_limit, when allocated GTT memory
> reaches this limit, swapout would be triggered. As ttm_bo_swapout does
> not return the correct retval, populate might get hung.
>
> UVD ib test uses GTT which might be insufficient. So a gpu recovery
> would hung if populate hung.

Ah, now I understand what you are trying to do.

Problem is that won't work either. Allocating VRAM can easily land you 
inside the same deadlock.

We need to avoid the allocation altogether for this for work correctly.

>
> I have made one drm test which alloc two GTT BOs, submit gfx copy
> commands and free these BOs without waiting fence. What's more, these
> gfx copy commands will cause gfx ring hang. So gpu recovery would be
> triggered.

Mhm, that should never be possible. It is perfectly valid for an 
application to terminate without waitting for the GFX submission to be 
completed.

Going to push patch #1 to drm-misc-fixes or drm-misc-next-fixes in a moment.

Thanks,
Christian.

>
> Now here is one possible deadlock case.
> gpu_recovery
>   -> stop drm scheduler
>   -> asic reset
>     -> ib test
>        -> tt populate (uvd ib test)
> 	->  ttm_bo_swapout (BO A) // this always fails as the fence of
> 	BO A would not be signaled by schedluer or HW. Hit deadlock.
>
> I paste the drm test patch below.
> #modprobe ttm pages_limit=65536
> #amdgpu_test -s 1 -t 4
> ---
>   tests/amdgpu/basic_tests.c | 32 ++++++++++++++------------------
>   1 file changed, 14 insertions(+), 18 deletions(-)
>
> diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c
> index dbf02fee..f85ed340 100644
> --- a/tests/amdgpu/basic_tests.c
> +++ b/tests/amdgpu/basic_tests.c
> @@ -65,13 +65,16 @@ static void amdgpu_direct_gma_test(void);
>   static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
>   static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
>   static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
> -static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
> +static void _amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
>   				       unsigned ip_type,
>   				       int instance, int pm4_dw, uint32_t *pm4_src,
>   				       int res_cnt, amdgpu_bo_handle *resources,
>   				       struct amdgpu_cs_ib_info *ib_info,
> -				       struct amdgpu_cs_request *ibs_request);
> +				       struct amdgpu_cs_request *ibs_request, int sync, int repeat);
>    
> +#define amdgpu_test_exec_cs_helper(...) \
> +	_amdgpu_test_exec_cs_helper(__VA_ARGS__, 1, 1)
> +
>   CU_TestInfo basic_tests[] = {
>   	{ "Query Info Test",  amdgpu_query_info_test },
>   	{ "Userptr Test",  amdgpu_userptr_test },
> @@ -1341,12 +1344,12 @@ static void amdgpu_command_submission_compute(void)
>    * pm4_src, resources, ib_info, and ibs_request
>    * submit command stream described in ibs_request and wait for this IB accomplished
>    */
> -static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
> +static void _amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
>   				       unsigned ip_type,
>   				       int instance, int pm4_dw, uint32_t *pm4_src,
>   				       int res_cnt, amdgpu_bo_handle *resources,
>   				       struct amdgpu_cs_ib_info *ib_info,
> -				       struct amdgpu_cs_request *ibs_request)
> +				       struct amdgpu_cs_request *ibs_request, int sync, int repeat)
>   {
>   	int r;
>   	uint32_t expired;
> @@ -1395,12 +1398,15 @@ static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
>   	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
>   
>   	/* submit CS */
> -	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
> +	while (repeat--)
> +		r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
>   	CU_ASSERT_EQUAL(r, 0);
>   
>   	r = amdgpu_bo_list_destroy(ibs_request->resources);
>   	CU_ASSERT_EQUAL(r, 0);
>   
> +	if (!sync)
> +		return;
>   	fence_status.ip_type = ip_type;
>   	fence_status.ip_instance = 0;
>   	fence_status.ring = ibs_request->ring;
> @@ -1667,7 +1673,7 @@ static void amdgpu_command_submission_sdma_const_fill(void)
>   
>   static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
>   {
> -	const int sdma_write_length = 1024;
> +	const int sdma_write_length = (255) << 20;
>   	const int pm4_dw = 256;
>   	amdgpu_context_handle context_handle;
>   	amdgpu_bo_handle bo1, bo2;
> @@ -1715,8 +1721,6 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
>   							    &bo1_va_handle);
>   				CU_ASSERT_EQUAL(r, 0);
>   
> -				/* set bo1 */
> -				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
>   
>   				/* allocate UC bo2 for sDMA use */
>   				r = amdgpu_bo_alloc_and_map(device_handle,
> @@ -1727,8 +1731,6 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
>   							    &bo2_va_handle);
>   				CU_ASSERT_EQUAL(r, 0);
>   
> -				/* clear bo2 */
> -				memset((void*)bo2_cpu, 0, sdma_write_length);
>   
>   				resources[0] = bo1;
>   				resources[1] = bo2;
> @@ -1785,17 +1787,11 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
>   					}
>   				}
>   
> -				amdgpu_test_exec_cs_helper(context_handle,
> +				_amdgpu_test_exec_cs_helper(context_handle,
>   							   ip_type, ring_id,
>   							   i, pm4,
>   							   2, resources,
> -							   ib_info, ibs_request);
> -
> -				/* verify if SDMA test result meets with expected */
> -				i = 0;
> -				while(i < sdma_write_length) {
> -					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
> -				}
> +							   ib_info, ibs_request, 0, 100);
>   				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
>   							     sdma_write_length);
>   				CU_ASSERT_EQUAL(r, 0);


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 0/2] Fix a hung during memory pressure test
  2021-09-06  9:04 ` [PATCH v2 0/2] Fix a hung during memory pressure test Christian König
@ 2021-09-06 10:16     ` Pan, Xinhui
  0 siblings, 0 replies; 12+ messages in thread
From: Pan, Xinhui @ 2021-09-06 10:16 UTC (permalink / raw)
  To: Christian König
  Cc: Pan, Xinhui, Pan, Xinhui, amd-gfx, Deucher, Alexander, Koenig,
	Christian, chenli, dri-devel



> 2021年9月6日 17:04,Christian König <ckoenig.leichtzumerken@gmail.com> 写道:
> 
> 
> 
> Am 06.09.21 um 03:12 schrieb xinhui pan:
>> A long time ago, someone reports system got hung during memory test.
>> In recent days, I am trying to look for or understand the potential
>> deadlock in ttm/amdgpu code.
>> 
>> This patchset aims to fix the deadlock during ttm populate.
>> 
>> TTM has a parameter called pages_limit, when allocated GTT memory
>> reaches this limit, swapout would be triggered. As ttm_bo_swapout does
>> not return the correct retval, populate might get hung.
>> 
>> UVD ib test uses GTT which might be insufficient. So a gpu recovery
>> would hung if populate hung.
> 
> Ah, now I understand what you are trying to do.
> 
> Problem is that won't work either. Allocating VRAM can easily land you inside the same deadlock.
> 
> We need to avoid the allocation altogether for this for work correctly.

looks like we need reserve some pages at sw init.

> 
>> 
>> I have made one drm test which alloc two GTT BOs, submit gfx copy
>> commands and free these BOs without waiting fence. What's more, these
>> gfx copy commands will cause gfx ring hang. So gpu recovery would be
>> triggered.
> 
> Mhm, that should never be possible. It is perfectly valid for an application to terminate without waitting for the GFX submission to be completed.

gfx ring hangs because of the command is illegal.
the packet is COMMAND [30:21] | BYTE_COUNT [20:0]
I use 0xFF << 20 to hang the ring on purpose.

> 
> Going to push patch #1 to drm-misc-fixes or drm-misc-next-fixes in a moment.
> 
> Thanks,
> Christian.
> 
>> 
>> Now here is one possible deadlock case.
>> gpu_recovery
>>  -> stop drm scheduler
>>  -> asic reset
>>    -> ib test
>>       -> tt populate (uvd ib test)
>> 	->  ttm_bo_swapout (BO A) // this always fails as the fence of
>> 	BO A would not be signaled by schedluer or HW. Hit deadlock.
>> 
>> I paste the drm test patch below.
>> #modprobe ttm pages_limit=65536
>> #amdgpu_test -s 1 -t 4
>> ---
>>  tests/amdgpu/basic_tests.c | 32 ++++++++++++++------------------
>>  1 file changed, 14 insertions(+), 18 deletions(-)
>> 
>> diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c
>> index dbf02fee..f85ed340 100644
>> --- a/tests/amdgpu/basic_tests.c
>> +++ b/tests/amdgpu/basic_tests.c
>> @@ -65,13 +65,16 @@ static void amdgpu_direct_gma_test(void);
>>  static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
>>  static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
>>  static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
>> -static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
>> +static void _amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
>>  				       unsigned ip_type,
>>  				       int instance, int pm4_dw, uint32_t *pm4_src,
>>  				       int res_cnt, amdgpu_bo_handle *resources,
>>  				       struct amdgpu_cs_ib_info *ib_info,
>> -				       struct amdgpu_cs_request *ibs_request);
>> +				       struct amdgpu_cs_request *ibs_request, int sync, int repeat);
>>   +#define amdgpu_test_exec_cs_helper(...) \
>> +	_amdgpu_test_exec_cs_helper(__VA_ARGS__, 1, 1)
>> +
>>  CU_TestInfo basic_tests[] = {
>>  	{ "Query Info Test",  amdgpu_query_info_test },
>>  	{ "Userptr Test",  amdgpu_userptr_test },
>> @@ -1341,12 +1344,12 @@ static void amdgpu_command_submission_compute(void)
>>   * pm4_src, resources, ib_info, and ibs_request
>>   * submit command stream described in ibs_request and wait for this IB accomplished
>>   */
>> -static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
>> +static void _amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
>>  				       unsigned ip_type,
>>  				       int instance, int pm4_dw, uint32_t *pm4_src,
>>  				       int res_cnt, amdgpu_bo_handle *resources,
>>  				       struct amdgpu_cs_ib_info *ib_info,
>> -				       struct amdgpu_cs_request *ibs_request)
>> +				       struct amdgpu_cs_request *ibs_request, int sync, int repeat)
>>  {
>>  	int r;
>>  	uint32_t expired;
>> @@ -1395,12 +1398,15 @@ static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
>>  	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
>>    	/* submit CS */
>> -	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
>> +	while (repeat--)
>> +		r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
>>  	CU_ASSERT_EQUAL(r, 0);
>>    	r = amdgpu_bo_list_destroy(ibs_request->resources);
>>  	CU_ASSERT_EQUAL(r, 0);
>>  +	if (!sync)
>> +		return;
>>  	fence_status.ip_type = ip_type;
>>  	fence_status.ip_instance = 0;
>>  	fence_status.ring = ibs_request->ring;
>> @@ -1667,7 +1673,7 @@ static void amdgpu_command_submission_sdma_const_fill(void)
>>    static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
>>  {
>> -	const int sdma_write_length = 1024;
>> +	const int sdma_write_length = (255) << 20;
>>  	const int pm4_dw = 256;
>>  	amdgpu_context_handle context_handle;
>>  	amdgpu_bo_handle bo1, bo2;
>> @@ -1715,8 +1721,6 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
>>  							    &bo1_va_handle);
>>  				CU_ASSERT_EQUAL(r, 0);
>>  -				/* set bo1 */
>> -				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
>>    				/* allocate UC bo2 for sDMA use */
>>  				r = amdgpu_bo_alloc_and_map(device_handle,
>> @@ -1727,8 +1731,6 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
>>  							    &bo2_va_handle);
>>  				CU_ASSERT_EQUAL(r, 0);
>>  -				/* clear bo2 */
>> -				memset((void*)bo2_cpu, 0, sdma_write_length);
>>    				resources[0] = bo1;
>>  				resources[1] = bo2;
>> @@ -1785,17 +1787,11 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
>>  					}
>>  				}
>>  -				amdgpu_test_exec_cs_helper(context_handle,
>> +				_amdgpu_test_exec_cs_helper(context_handle,
>>  							   ip_type, ring_id,
>>  							   i, pm4,
>>  							   2, resources,
>> -							   ib_info, ibs_request);
>> -
>> -				/* verify if SDMA test result meets with expected */
>> -				i = 0;
>> -				while(i < sdma_write_length) {
>> -					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
>> -				}
>> +							   ib_info, ibs_request, 0, 100);
>>  				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
>>  							     sdma_write_length);
>>  				CU_ASSERT_EQUAL(r, 0);
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 0/2] Fix a hung during memory pressure test
@ 2021-09-06 10:16     ` Pan, Xinhui
  0 siblings, 0 replies; 12+ messages in thread
From: Pan, Xinhui @ 2021-09-06 10:16 UTC (permalink / raw)
  To: Christian König
  Cc: Pan, Xinhui, Pan, Xinhui, amd-gfx, Deucher, Alexander, Koenig,
	Christian, chenli, dri-devel

[-- Attachment #1: Type: text/plain, Size: 6582 bytes --]



> 2021年9月6日 17:04,Christian König <ckoenig.leichtzumerken@gmail.com> 写道:
> 
> 
> 
> Am 06.09.21 um 03:12 schrieb xinhui pan:
>> A long time ago, someone reports system got hung during memory test.
>> In recent days, I am trying to look for or understand the potential
>> deadlock in ttm/amdgpu code.
>> 
>> This patchset aims to fix the deadlock during ttm populate.
>> 
>> TTM has a parameter called pages_limit, when allocated GTT memory
>> reaches this limit, swapout would be triggered. As ttm_bo_swapout does
>> not return the correct retval, populate might get hung.
>> 
>> UVD ib test uses GTT which might be insufficient. So a gpu recovery
>> would hung if populate hung.
> 
> Ah, now I understand what you are trying to do.
> 
> Problem is that won't work either. Allocating VRAM can easily land you inside the same deadlock.
> 
> We need to avoid the allocation altogether for this for work correctly.

looks like we need reserve some pages at sw init.

> 
>> 
>> I have made one drm test which alloc two GTT BOs, submit gfx copy
>> commands and free these BOs without waiting fence. What's more, these
>> gfx copy commands will cause gfx ring hang. So gpu recovery would be
>> triggered.
> 
> Mhm, that should never be possible. It is perfectly valid for an application to terminate without waitting for the GFX submission to be completed.

gfx ring hangs because of the command is illegal.
the packet is COMMAND [30:21] | BYTE_COUNT [20:0]
I use 0xFF << 20 to hang the ring on purpose.

> 
> Going to push patch #1 to drm-misc-fixes or drm-misc-next-fixes in a moment.
> 
> Thanks,
> Christian.
> 
>> 
>> Now here is one possible deadlock case.
>> gpu_recovery
>>  -> stop drm scheduler
>>  -> asic reset
>>    -> ib test
>>       -> tt populate (uvd ib test)
>> 	->  ttm_bo_swapout (BO A) // this always fails as the fence of
>> 	BO A would not be signaled by schedluer or HW. Hit deadlock.
>> 
>> I paste the drm test patch below.
>> #modprobe ttm pages_limit=65536
>> #amdgpu_test -s 1 -t 4
>> ---
>>  tests/amdgpu/basic_tests.c | 32 ++++++++++++++------------------
>>  1 file changed, 14 insertions(+), 18 deletions(-)
>> 
>> diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c
>> index dbf02fee..f85ed340 100644
>> --- a/tests/amdgpu/basic_tests.c
>> +++ b/tests/amdgpu/basic_tests.c
>> @@ -65,13 +65,16 @@ static void amdgpu_direct_gma_test(void);
>>  static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
>>  static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
>>  static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
>> -static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
>> +static void _amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
>>  				       unsigned ip_type,
>>  				       int instance, int pm4_dw, uint32_t *pm4_src,
>>  				       int res_cnt, amdgpu_bo_handle *resources,
>>  				       struct amdgpu_cs_ib_info *ib_info,
>> -				       struct amdgpu_cs_request *ibs_request);
>> +				       struct amdgpu_cs_request *ibs_request, int sync, int repeat);
>>   +#define amdgpu_test_exec_cs_helper(...) \
>> +	_amdgpu_test_exec_cs_helper(__VA_ARGS__, 1, 1)
>> +
>>  CU_TestInfo basic_tests[] = {
>>  	{ "Query Info Test",  amdgpu_query_info_test },
>>  	{ "Userptr Test",  amdgpu_userptr_test },
>> @@ -1341,12 +1344,12 @@ static void amdgpu_command_submission_compute(void)
>>   * pm4_src, resources, ib_info, and ibs_request
>>   * submit command stream described in ibs_request and wait for this IB accomplished
>>   */
>> -static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
>> +static void _amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
>>  				       unsigned ip_type,
>>  				       int instance, int pm4_dw, uint32_t *pm4_src,
>>  				       int res_cnt, amdgpu_bo_handle *resources,
>>  				       struct amdgpu_cs_ib_info *ib_info,
>> -				       struct amdgpu_cs_request *ibs_request)
>> +				       struct amdgpu_cs_request *ibs_request, int sync, int repeat)
>>  {
>>  	int r;
>>  	uint32_t expired;
>> @@ -1395,12 +1398,15 @@ static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
>>  	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
>>    	/* submit CS */
>> -	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
>> +	while (repeat--)
>> +		r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
>>  	CU_ASSERT_EQUAL(r, 0);
>>    	r = amdgpu_bo_list_destroy(ibs_request->resources);
>>  	CU_ASSERT_EQUAL(r, 0);
>>  +	if (!sync)
>> +		return;
>>  	fence_status.ip_type = ip_type;
>>  	fence_status.ip_instance = 0;
>>  	fence_status.ring = ibs_request->ring;
>> @@ -1667,7 +1673,7 @@ static void amdgpu_command_submission_sdma_const_fill(void)
>>    static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
>>  {
>> -	const int sdma_write_length = 1024;
>> +	const int sdma_write_length = (255) << 20;
>>  	const int pm4_dw = 256;
>>  	amdgpu_context_handle context_handle;
>>  	amdgpu_bo_handle bo1, bo2;
>> @@ -1715,8 +1721,6 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
>>  							    &bo1_va_handle);
>>  				CU_ASSERT_EQUAL(r, 0);
>>  -				/* set bo1 */
>> -				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
>>    				/* allocate UC bo2 for sDMA use */
>>  				r = amdgpu_bo_alloc_and_map(device_handle,
>> @@ -1727,8 +1731,6 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
>>  							    &bo2_va_handle);
>>  				CU_ASSERT_EQUAL(r, 0);
>>  -				/* clear bo2 */
>> -				memset((void*)bo2_cpu, 0, sdma_write_length);
>>    				resources[0] = bo1;
>>  				resources[1] = bo2;
>> @@ -1785,17 +1787,11 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
>>  					}
>>  				}
>>  -				amdgpu_test_exec_cs_helper(context_handle,
>> +				_amdgpu_test_exec_cs_helper(context_handle,
>>  							   ip_type, ring_id,
>>  							   i, pm4,
>>  							   2, resources,
>> -							   ib_info, ibs_request);
>> -
>> -				/* verify if SDMA test result meets with expected */
>> -				i = 0;
>> -				while(i < sdma_write_length) {
>> -					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
>> -				}
>> +							   ib_info, ibs_request, 0, 100);
>>  				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
>>  							     sdma_write_length);
>>  				CU_ASSERT_EQUAL(r, 0);
> 


[-- Attachment #2: winmail.dat --]
[-- Type: application/ms-tnef, Size: 15802 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 0/2] Fix a hung during memory pressure test
  2021-09-06 10:16     ` Pan, Xinhui
  (?)
@ 2021-09-06 11:04     ` Christian König
  -1 siblings, 0 replies; 12+ messages in thread
From: Christian König @ 2021-09-06 11:04 UTC (permalink / raw)
  To: Pan, Xinhui, Christian König
  Cc: amd-gfx, Deucher, Alexander, chenli, dri-devel

Am 06.09.21 um 12:16 schrieb Pan, Xinhui:
>> 2021年9月6日 17:04,Christian König <ckoenig.leichtzumerken@gmail.com> 写道:
>>
>>
>>
>> Am 06.09.21 um 03:12 schrieb xinhui pan:
>>> A long time ago, someone reports system got hung during memory test.
>>> In recent days, I am trying to look for or understand the potential
>>> deadlock in ttm/amdgpu code.
>>>
>>> This patchset aims to fix the deadlock during ttm populate.
>>>
>>> TTM has a parameter called pages_limit, when allocated GTT memory
>>> reaches this limit, swapout would be triggered. As ttm_bo_swapout does
>>> not return the correct retval, populate might get hung.
>>>
>>> UVD ib test uses GTT which might be insufficient. So a gpu recovery
>>> would hung if populate hung.
>> Ah, now I understand what you are trying to do.
>>
>> Problem is that won't work either. Allocating VRAM can easily land you inside the same deadlock.
>>
>> We need to avoid the allocation altogether for this for work correctly.
> looks like we need reserve some pages at sw init.

Yeah, something like that should do it.

But keep in mind that you then need a lock or similar when using the 
resource to prevent concurrent use.

>
>>> I have made one drm test which alloc two GTT BOs, submit gfx copy
>>> commands and free these BOs without waiting fence. What's more, these
>>> gfx copy commands will cause gfx ring hang. So gpu recovery would be
>>> triggered.
>> Mhm, that should never be possible. It is perfectly valid for an application to terminate without waitting for the GFX submission to be completed.
> gfx ring hangs because of the command is illegal.
> the packet is COMMAND [30:21] | BYTE_COUNT [20:0]
> I use 0xFF << 20 to hang the ring on purpose.

Ok that makes more sense.

Thanks,
Christian.

>
>> Going to push patch #1 to drm-misc-fixes or drm-misc-next-fixes in a moment.
>>
>> Thanks,
>> Christian.
>>
>>> Now here is one possible deadlock case.
>>> gpu_recovery
>>>   -> stop drm scheduler
>>>   -> asic reset
>>>     -> ib test
>>>        -> tt populate (uvd ib test)
>>> 	->  ttm_bo_swapout (BO A) // this always fails as the fence of
>>> 	BO A would not be signaled by schedluer or HW. Hit deadlock.
>>>
>>> I paste the drm test patch below.
>>> #modprobe ttm pages_limit=65536
>>> #amdgpu_test -s 1 -t 4
>>> ---
>>>   tests/amdgpu/basic_tests.c | 32 ++++++++++++++------------------
>>>   1 file changed, 14 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c
>>> index dbf02fee..f85ed340 100644
>>> --- a/tests/amdgpu/basic_tests.c
>>> +++ b/tests/amdgpu/basic_tests.c
>>> @@ -65,13 +65,16 @@ static void amdgpu_direct_gma_test(void);
>>>   static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
>>>   static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
>>>   static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
>>> -static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
>>> +static void _amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
>>>   				       unsigned ip_type,
>>>   				       int instance, int pm4_dw, uint32_t *pm4_src,
>>>   				       int res_cnt, amdgpu_bo_handle *resources,
>>>   				       struct amdgpu_cs_ib_info *ib_info,
>>> -				       struct amdgpu_cs_request *ibs_request);
>>> +				       struct amdgpu_cs_request *ibs_request, int sync, int repeat);
>>>    +#define amdgpu_test_exec_cs_helper(...) \
>>> +	_amdgpu_test_exec_cs_helper(__VA_ARGS__, 1, 1)
>>> +
>>>   CU_TestInfo basic_tests[] = {
>>>   	{ "Query Info Test",  amdgpu_query_info_test },
>>>   	{ "Userptr Test",  amdgpu_userptr_test },
>>> @@ -1341,12 +1344,12 @@ static void amdgpu_command_submission_compute(void)
>>>    * pm4_src, resources, ib_info, and ibs_request
>>>    * submit command stream described in ibs_request and wait for this IB accomplished
>>>    */
>>> -static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
>>> +static void _amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
>>>   				       unsigned ip_type,
>>>   				       int instance, int pm4_dw, uint32_t *pm4_src,
>>>   				       int res_cnt, amdgpu_bo_handle *resources,
>>>   				       struct amdgpu_cs_ib_info *ib_info,
>>> -				       struct amdgpu_cs_request *ibs_request)
>>> +				       struct amdgpu_cs_request *ibs_request, int sync, int repeat)
>>>   {
>>>   	int r;
>>>   	uint32_t expired;
>>> @@ -1395,12 +1398,15 @@ static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
>>>   	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
>>>     	/* submit CS */
>>> -	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
>>> +	while (repeat--)
>>> +		r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
>>>   	CU_ASSERT_EQUAL(r, 0);
>>>     	r = amdgpu_bo_list_destroy(ibs_request->resources);
>>>   	CU_ASSERT_EQUAL(r, 0);
>>>   +	if (!sync)
>>> +		return;
>>>   	fence_status.ip_type = ip_type;
>>>   	fence_status.ip_instance = 0;
>>>   	fence_status.ring = ibs_request->ring;
>>> @@ -1667,7 +1673,7 @@ static void amdgpu_command_submission_sdma_const_fill(void)
>>>     static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
>>>   {
>>> -	const int sdma_write_length = 1024;
>>> +	const int sdma_write_length = (255) << 20;
>>>   	const int pm4_dw = 256;
>>>   	amdgpu_context_handle context_handle;
>>>   	amdgpu_bo_handle bo1, bo2;
>>> @@ -1715,8 +1721,6 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
>>>   							    &bo1_va_handle);
>>>   				CU_ASSERT_EQUAL(r, 0);
>>>   -				/* set bo1 */
>>> -				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
>>>     				/* allocate UC bo2 for sDMA use */
>>>   				r = amdgpu_bo_alloc_and_map(device_handle,
>>> @@ -1727,8 +1731,6 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
>>>   							    &bo2_va_handle);
>>>   				CU_ASSERT_EQUAL(r, 0);
>>>   -				/* clear bo2 */
>>> -				memset((void*)bo2_cpu, 0, sdma_write_length);
>>>     				resources[0] = bo1;
>>>   				resources[1] = bo2;
>>> @@ -1785,17 +1787,11 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
>>>   					}
>>>   				}
>>>   -				amdgpu_test_exec_cs_helper(context_handle,
>>> +				_amdgpu_test_exec_cs_helper(context_handle,
>>>   							   ip_type, ring_id,
>>>   							   i, pm4,
>>>   							   2, resources,
>>> -							   ib_info, ibs_request);
>>> -
>>> -				/* verify if SDMA test result meets with expected */
>>> -				i = 0;
>>> -				while(i < sdma_write_length) {
>>> -					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
>>> -				}
>>> +							   ib_info, ibs_request, 0, 100);
>>>   				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
>>>   							     sdma_write_length);
>>>   				CU_ASSERT_EQUAL(r, 0);


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 1/2] drm/ttm: Fix a deadlock if the target BO is not idle during swap
  2021-09-06  1:12 ` [PATCH v2 1/2] drm/ttm: Fix a deadlock if the target BO is not idle during swap xinhui pan
@ 2021-09-06 11:26   ` Christian König
  2021-09-07  1:52     ` Pan, Xinhui
  0 siblings, 1 reply; 12+ messages in thread
From: Christian König @ 2021-09-06 11:26 UTC (permalink / raw)
  To: xinhui pan, amd-gfx; +Cc: alexander.deucher, chenli, dri-devel

Which branch is this patch based on? Please rebase on top drm-misc-fixes 
and resend.

Thanks,
Christian.

Am 06.09.21 um 03:12 schrieb xinhui pan:
> The ret value might be -EBUSY, caller will think lru lock is still
> locked but actually NOT. So return -ENOSPC instead. Otherwise we hit
> list corruption.
>
> ttm_bo_cleanup_refs might fail too if BO is not idle. If we return 0,
> caller(ttm_tt_populate -> ttm_global_swapout ->ttm_device_swapout) will
> be stuck as we actually did not free any BO memory. This usually happens
> when the fence is not signaled for a long time.
>
> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
> Reviewed-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c | 6 +++---
>   1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 1fedd0eb67ba..f1367107925b 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -1159,9 +1159,9 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>   	}
>   
>   	if (bo->deleted) {
> -		ttm_bo_cleanup_refs(bo, false, false, locked);
> +		ret = ttm_bo_cleanup_refs(bo, false, false, locked);
>   		ttm_bo_put(bo);
> -		return 0;
> +		return ret == -EBUSY ? -ENOSPC : ret;
>   	}
>   
>   	ttm_bo_move_to_pinned(bo);
> @@ -1215,7 +1215,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>   	if (locked)
>   		dma_resv_unlock(bo->base.resv);
>   	ttm_bo_put(bo);
> -	return ret;
> +	return ret == -EBUSY ? -ENOSPC : ret;
>   }
>   
>   void ttm_bo_tt_destroy(struct ttm_buffer_object *bo)


^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH v2 1/2] drm/ttm: Fix a deadlock if the target BO is not idle during swap
  2021-09-06 11:26   ` Christian König
@ 2021-09-07  1:52     ` Pan, Xinhui
  0 siblings, 0 replies; 12+ messages in thread
From: Pan, Xinhui @ 2021-09-07  1:52 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx; +Cc: Deucher, Alexander, chenli, dri-devel

[AMD Official Use Only]

It is the internal staging drm-next.

-----Original Message-----
From: Koenig, Christian <Christian.Koenig@amd.com>
Sent: 2021年9月6日 19:26
To: Pan, Xinhui <Xinhui.Pan@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; chenli@uniontech.com; dri-devel@lists.freedesktop.org
Subject: Re: [PATCH v2 1/2] drm/ttm: Fix a deadlock if the target BO is not idle during swap

Which branch is this patch based on? Please rebase on top drm-misc-fixes and resend.

Thanks,
Christian.

Am 06.09.21 um 03:12 schrieb xinhui pan:
> The ret value might be -EBUSY, caller will think lru lock is still
> locked but actually NOT. So return -ENOSPC instead. Otherwise we hit
> list corruption.
>
> ttm_bo_cleanup_refs might fail too if BO is not idle. If we return 0,
> caller(ttm_tt_populate -> ttm_global_swapout ->ttm_device_swapout)
> will be stuck as we actually did not free any BO memory. This usually
> happens when the fence is not signaled for a long time.
>
> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
> Reviewed-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c | 6 +++---
>   1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
> b/drivers/gpu/drm/ttm/ttm_bo.c index 1fedd0eb67ba..f1367107925b 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -1159,9 +1159,9 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>       }
>
>       if (bo->deleted) {
> -             ttm_bo_cleanup_refs(bo, false, false, locked);
> +             ret = ttm_bo_cleanup_refs(bo, false, false, locked);
>               ttm_bo_put(bo);
> -             return 0;
> +             return ret == -EBUSY ? -ENOSPC : ret;
>       }
>
>       ttm_bo_move_to_pinned(bo);
> @@ -1215,7 +1215,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>       if (locked)
>               dma_resv_unlock(bo->base.resv);
>       ttm_bo_put(bo);
> -     return ret;
> +     return ret == -EBUSY ? -ENOSPC : ret;
>   }
>
>   void ttm_bo_tt_destroy(struct ttm_buffer_object *bo)


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 2/2] drm/amdpgu: Use VRAM domain in UVD IB test
  2021-09-06  1:10 [PATCH v2 2/2] drm/amdpgu: Use VRAM domain in UVD IB test Pan, Xinhui
@ 2021-09-06  2:55 ` Wang, Kevin(Yang)
  0 siblings, 0 replies; 12+ messages in thread
From: Wang, Kevin(Yang) @ 2021-09-06  2:55 UTC (permalink / raw)
  To: Pan, Xinhui, amd-gfx, dri-devel
  Cc: Koenig, Christian, Deucher, Alexander, chenli

[-- Attachment #1: Type: text/plain, Size: 2317 bytes --]

[AMD Official Use Only]



________________________________
From: dri-devel <dri-devel-bounces@lists.freedesktop.org> on behalf of Pan, Xinhui <Xinhui.Pan@amd.com>
Sent: Monday, September 6, 2021 9:10 AM
To: amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; dri-devel@lists.freedesktop.org <dri-devel@lists.freedesktop.org>
Cc: Koenig, Christian <Christian.Koenig@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; chenli@uniontech.com <chenli@uniontech.com>
Subject: [PATCH v2 2/2] drm/amdpgu: Use VRAM domain in UVD IB test

[AMD Official Use Only]

[AMD Official Use Only]

Like vce/vcn does, visible VRAM is OK for ib test.
While commit a11d9ff3ebe0 ("drm/amdgpu: use GTT for
uvd_get_create/destory_msg") says VRAM is not mapped correctly in his
platform which is likely an arm64.

So lets change back to use VRAM on x86_64 platform.

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index d451c359606a..e4b75f33ccc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -1178,7 +1178,11 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
        int r, i;

        r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
+#ifdef CONFIG_X86_64
[kevin]:
It's better to replace this with macro of CONFIG_64BIT to cover more cases,
do you think it is ok ?

Best Regards,
Kevin

+                                     AMDGPU_GEM_DOMAIN_VRAM,
+#else
                                      AMDGPU_GEM_DOMAIN_GTT,
+#endif
                                      &bo, NULL, (void **)&msg);
        if (r)
                return r;
@@ -1210,7 +1214,11 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
        int r, i;

        r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
+#ifdef CONFIG_X86_64
+                                     AMDGPU_GEM_DOMAIN_VRAM,
+#else
                                      AMDGPU_GEM_DOMAIN_GTT,
+#endif
                                      &bo, NULL, (void **)&msg);
        if (r)
                return r;
--
2.25.1


[-- Attachment #2: Type: text/html, Size: 5213 bytes --]

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v2 2/2] drm/amdpgu: Use VRAM domain in UVD IB test
@ 2021-09-06  1:10 Pan, Xinhui
  2021-09-06  2:55 ` Wang, Kevin(Yang)
  0 siblings, 1 reply; 12+ messages in thread
From: Pan, Xinhui @ 2021-09-06  1:10 UTC (permalink / raw)
  To: amd-gfx, dri-devel; +Cc: Koenig, Christian, Deucher, Alexander, chenli

[AMD Official Use Only]

Like vce/vcn does, visible VRAM is OK for ib test.
While commit a11d9ff3ebe0 ("drm/amdgpu: use GTT for
uvd_get_create/destory_msg") says VRAM is not mapped correctly in his
platform which is likely an arm64.

So lets change back to use VRAM on x86_64 platform.

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index d451c359606a..e4b75f33ccc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -1178,7 +1178,11 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
        int r, i;

        r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
+#ifdef CONFIG_X86_64
+                                     AMDGPU_GEM_DOMAIN_VRAM,
+#else
                                      AMDGPU_GEM_DOMAIN_GTT,
+#endif
                                      &bo, NULL, (void **)&msg);
        if (r)
                return r;
@@ -1210,7 +1214,11 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
        int r, i;

        r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
+#ifdef CONFIG_X86_64
+                                     AMDGPU_GEM_DOMAIN_VRAM,
+#else
                                      AMDGPU_GEM_DOMAIN_GTT,
+#endif
                                      &bo, NULL, (void **)&msg);
        if (r)
                return r;
--
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2021-09-07  1:52 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-06  1:12 [PATCH v2 0/2] Fix a hung during memory pressure test xinhui pan
2021-09-06  1:12 ` [PATCH v2 1/2] drm/ttm: Fix a deadlock if the target BO is not idle during swap xinhui pan
2021-09-06 11:26   ` Christian König
2021-09-07  1:52     ` Pan, Xinhui
2021-09-06  1:12 ` [PATCH v2 2/2] drm/amdpgu: Use VRAM domain in UVD IB test xinhui pan
2021-09-06  9:01   ` Christian König
2021-09-06  9:04 ` [PATCH v2 0/2] Fix a hung during memory pressure test Christian König
2021-09-06 10:16   ` Pan, Xinhui
2021-09-06 10:16     ` Pan, Xinhui
2021-09-06 11:04     ` Christian König
  -- strict thread matches above, loose matches on Subject: below --
2021-09-06  1:10 [PATCH v2 2/2] drm/amdpgu: Use VRAM domain in UVD IB test Pan, Xinhui
2021-09-06  2:55 ` Wang, Kevin(Yang)

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.