All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] drm/amdgpu: Increase direct IB pool size
@ 2021-09-10  0:38 xinhui pan
  2021-09-10  0:38 ` [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test xinhui pan
                   ` (3 more replies)
  0 siblings, 4 replies; 18+ messages in thread
From: xinhui pan @ 2021-09-10  0:38 UTC (permalink / raw)
  To: amd-gfx; +Cc: alexander.deucher, christian.koenig, xinhui pan

Direct IB pool is used for vce/vcn IB extra msg too. Increase its size
to AMDGPU_IB_POOL_SIZE.

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index c076a6b9a5a2..9274f32c3661 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -307,13 +307,9 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
 		return 0;
 
 	for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) {
-		if (i == AMDGPU_IB_POOL_DIRECT)
-			size = PAGE_SIZE * 6;
-		else
-			size = AMDGPU_IB_POOL_SIZE;
-
 		r = amdgpu_sa_bo_manager_init(adev, &adev->ib_pools[i],
-					      size, AMDGPU_GPU_PAGE_SIZE,
+					      AMDGPU_IB_POOL_SIZE,
+					      AMDGPU_GPU_PAGE_SIZE,
 					      AMDGPU_GEM_DOMAIN_GTT);
 		if (r)
 			goto error;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
  2021-09-10  0:38 [PATCH 1/4] drm/amdgpu: Increase direct IB pool size xinhui pan
@ 2021-09-10  0:38 ` xinhui pan
  2021-09-10  6:24   ` Christian König
  2021-09-10  0:38 ` [PATCH 3/4] drm/amdgpu: VCE " xinhui pan
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 18+ messages in thread
From: xinhui pan @ 2021-09-10  0:38 UTC (permalink / raw)
  To: amd-gfx; +Cc: alexander.deucher, christian.koenig, xinhui pan

move BO allocation in sw_init.

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 75 +++++++++++++++----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |  1 +
 drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   |  8 +--
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c   |  8 +--
 4 files changed, 49 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index d451c359606a..e2eaac941d37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -141,6 +141,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 	const char *fw_name;
 	const struct common_firmware_header *hdr;
 	unsigned family_id;
+	struct amdgpu_bo *bo = NULL;
+	void *addr;
 	int i, j, r;
 
 	INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
@@ -298,9 +300,34 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 		adev->uvd.filp[i] = NULL;
 	}
 
+	r = amdgpu_bo_create_reserved(adev, 128 << 10, PAGE_SIZE,
+			AMDGPU_GEM_DOMAIN_GTT,
+			&bo, NULL, &addr);
+	if (r)
+		return r;
+
 	/* from uvd v5.0 HW addressing capacity increased to 64 bits */
-	if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
+	if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) {
 		adev->uvd.address_64_bit = true;
+		amdgpu_bo_kunmap(bo);
+		amdgpu_bo_unpin(bo);
+		r = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_VRAM,
+				0, 256 << 20);
+		if (r) {
+			amdgpu_bo_unreserve(bo);
+			amdgpu_bo_unref(&bo);
+			return r;
+		}
+		r = amdgpu_bo_kmap(bo, &addr);
+		if (r) {
+			amdgpu_bo_unpin(bo);
+			amdgpu_bo_unreserve(bo);
+			amdgpu_bo_unref(&bo);
+			return r;
+		}
+	}
+	adev->uvd.ib_bo = bo;
+	amdgpu_bo_unreserve(bo);
 
 	switch (adev->asic_type) {
 	case CHIP_TONGA:
@@ -342,6 +369,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
 		for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
 			amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
 	}
+	amdgpu_bo_free_kernel(&adev->uvd.ib_bo, NULL, NULL);
 	release_firmware(adev->uvd.fw);
 
 	return 0;
@@ -1080,23 +1108,10 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 	unsigned offset_idx = 0;
 	unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
 
-	amdgpu_bo_kunmap(bo);
-	amdgpu_bo_unpin(bo);
-
-	if (!ring->adev->uvd.address_64_bit) {
-		struct ttm_operation_ctx ctx = { true, false };
-
-		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
-		amdgpu_uvd_force_into_uvd_segment(bo);
-		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
-		if (r)
-			goto err;
-	}
-
 	r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
 				     AMDGPU_IB_POOL_DELAYED, &job);
 	if (r)
-		goto err;
+		return r;
 
 	if (adev->asic_type >= CHIP_VEGA10) {
 		offset_idx = 1 + ring->me;
@@ -1148,8 +1163,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 	}
 
 	amdgpu_bo_fence(bo, f, false);
-	amdgpu_bo_unreserve(bo);
-	amdgpu_bo_unref(&bo);
 
 	if (fence)
 		*fence = dma_fence_get(f);
@@ -1159,10 +1172,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 
 err_free:
 	amdgpu_job_free(job);
-
-err:
-	amdgpu_bo_unreserve(bo);
-	amdgpu_bo_unref(&bo);
 	return r;
 }
 
@@ -1173,16 +1182,15 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 			      struct dma_fence **fence)
 {
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_bo *bo = NULL;
+	struct amdgpu_bo *bo = adev->uvd.ib_bo;
 	uint32_t *msg;
 	int r, i;
 
-	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_GTT,
-				      &bo, NULL, (void **)&msg);
+	r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
 	if (r)
 		return r;
 
+	msg = amdgpu_bo_kptr(bo);
 	/* stitch together an UVD create msg */
 	msg[0] = cpu_to_le32(0x00000de4);
 	msg[1] = cpu_to_le32(0x00000000);
@@ -1198,23 +1206,25 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 	for (i = 11; i < 1024; ++i)
 		msg[i] = cpu_to_le32(0x0);
 
-	return amdgpu_uvd_send_msg(ring, bo, true, fence);
+	r = amdgpu_uvd_send_msg(ring, bo, true, fence);
+
+	amdgpu_bo_unreserve(bo);
+	return r;
 }
 
 int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 			       bool direct, struct dma_fence **fence)
 {
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_bo *bo = NULL;
+	struct amdgpu_bo *bo = adev->uvd.ib_bo;
 	uint32_t *msg;
 	int r, i;
 
-	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_GTT,
-				      &bo, NULL, (void **)&msg);
+	r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
 	if (r)
 		return r;
 
+	msg = amdgpu_bo_kptr(bo);
 	/* stitch together an UVD destroy msg */
 	msg[0] = cpu_to_le32(0x00000de4);
 	msg[1] = cpu_to_le32(0x00000002);
@@ -1223,7 +1233,10 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 	for (i = 4; i < 1024; ++i)
 		msg[i] = cpu_to_le32(0x0);
 
-	return amdgpu_uvd_send_msg(ring, bo, direct, fence);
+	r = amdgpu_uvd_send_msg(ring, bo, direct, fence);
+
+	amdgpu_bo_unreserve(bo);
+	return r;
 }
 
 static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
index edbb8194ee81..76ac9699885d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -68,6 +68,7 @@ struct amdgpu_uvd {
 	/* store image width to adjust nb memory state */
 	unsigned		decode_image_width;
 	uint32_t                keyselect;
+	struct amdgpu_bo	*ib_bo;
 };
 
 int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index bc571833632e..301c0cea7164 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -332,12 +332,10 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
 static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct dma_fence *fence = NULL;
-	struct amdgpu_bo *bo = NULL;
+	struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
 	long r;
 
-	r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_VRAM,
-				      &bo, NULL, NULL);
+	r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
 	if (r)
 		return r;
 
@@ -357,9 +355,7 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 
 error:
 	dma_fence_put(fence);
-	amdgpu_bo_unpin(bo);
 	amdgpu_bo_unreserve(bo);
-	amdgpu_bo_unref(&bo);
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index b6e82d75561f..efa270288029 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -338,12 +338,10 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl
 static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct dma_fence *fence = NULL;
-	struct amdgpu_bo *bo = NULL;
+	struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
 	long r;
 
-	r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_VRAM,
-				      &bo, NULL, NULL);
+	r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
 	if (r)
 		return r;
 
@@ -363,9 +361,7 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 
 error:
 	dma_fence_put(fence);
-	amdgpu_bo_unpin(bo);
 	amdgpu_bo_unreserve(bo);
-	amdgpu_bo_unref(&bo);
 	return r;
 }
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 3/4] drm/amdgpu: VCE avoid memory allocation during IB test
  2021-09-10  0:38 [PATCH 1/4] drm/amdgpu: Increase direct IB pool size xinhui pan
  2021-09-10  0:38 ` [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test xinhui pan
@ 2021-09-10  0:38 ` xinhui pan
  2021-09-10  6:29   ` Christian König
  2021-09-10  0:38 ` [PATCH 4/4] drm/amdgpu: VCN " xinhui pan
  2021-09-10  6:15 ` [PATCH 1/4] drm/amdgpu: Increase direct IB pool size Christian König
  3 siblings, 1 reply; 18+ messages in thread
From: xinhui pan @ 2021-09-10  0:38 UTC (permalink / raw)
  To: amd-gfx; +Cc: alexander.deucher, christian.koenig, xinhui pan

alloc extra msg from direct IB pool.

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index e9fdf49d69e8..45d98694db18 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -82,7 +82,6 @@ MODULE_FIRMWARE(FIRMWARE_VEGA20);
 
 static void amdgpu_vce_idle_work_handler(struct work_struct *work);
 static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
-				     struct amdgpu_bo *bo,
 				     struct dma_fence **fence);
 static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 				      bool direct, struct dma_fence **fence);
@@ -441,7 +440,6 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
  * Open up a stream for HW test
  */
 static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
-				     struct amdgpu_bo *bo,
 				     struct dma_fence **fence)
 {
 	const unsigned ib_size_dw = 1024;
@@ -451,14 +449,13 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 	uint64_t addr;
 	int i, r;
 
-	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4 + PAGE_SIZE,
 				     AMDGPU_IB_POOL_DIRECT, &job);
 	if (r)
 		return r;
 
 	ib = &job->ibs[0];
-
-	addr = amdgpu_bo_gpu_offset(bo);
+	addr = ib->gpu_addr + ib_size_dw * 4;
 
 	/* stitch together an VCE create msg */
 	ib->length_dw = 0;
@@ -1134,20 +1131,13 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
 int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct dma_fence *fence = NULL;
-	struct amdgpu_bo *bo = NULL;
 	long r;
 
 	/* skip vce ring1/2 ib test for now, since it's not reliable */
 	if (ring != &ring->adev->vce.ring[0])
 		return 0;
 
-	r = amdgpu_bo_create_reserved(ring->adev, 512, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_VRAM,
-				      &bo, NULL, NULL);
-	if (r)
-		return r;
-
-	r = amdgpu_vce_get_create_msg(ring, 1, bo, NULL);
+	r = amdgpu_vce_get_create_msg(ring, 1, NULL);
 	if (r)
 		goto error;
 
@@ -1163,8 +1153,6 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 
 error:
 	dma_fence_put(fence);
-	amdgpu_bo_unreserve(bo);
-	amdgpu_bo_free_kernel(&bo, NULL, NULL);
 	return r;
 }
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 4/4] drm/amdgpu: VCN avoid memory allocation during IB test
  2021-09-10  0:38 [PATCH 1/4] drm/amdgpu: Increase direct IB pool size xinhui pan
  2021-09-10  0:38 ` [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test xinhui pan
  2021-09-10  0:38 ` [PATCH 3/4] drm/amdgpu: VCE " xinhui pan
@ 2021-09-10  0:38 ` xinhui pan
  2021-09-10  6:33   ` Christian König
  2021-09-10  6:15 ` [PATCH 1/4] drm/amdgpu: Increase direct IB pool size Christian König
  3 siblings, 1 reply; 18+ messages in thread
From: xinhui pan @ 2021-09-10  0:38 UTC (permalink / raw)
  To: amd-gfx; +Cc: alexander.deucher, christian.koenig, xinhui pan

alloc extra msg from direct IB pool.

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 99 +++++++++++--------------
 1 file changed, 45 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 561296a85b43..b60d5f01fdae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -541,15 +541,14 @@ int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring)
 }
 
 static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
-				   struct amdgpu_bo *bo,
-				   struct dma_fence **fence)
+		struct amdgpu_ib *ib_msg,
+		struct dma_fence **fence)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct dma_fence *f = NULL;
 	struct amdgpu_job *job;
 	struct amdgpu_ib *ib;
-	uint64_t addr;
-	void *msg = NULL;
+	uint64_t addr = ib_msg->gpu_addr;
 	int i, r;
 
 	r = amdgpu_job_alloc_with_ib(adev, 64,
@@ -558,8 +557,6 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
 		goto err;
 
 	ib = &job->ibs[0];
-	addr = amdgpu_bo_gpu_offset(bo);
-	msg = amdgpu_bo_kptr(bo);
 	ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0);
 	ib->ptr[1] = addr;
 	ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0);
@@ -576,9 +573,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
 	if (r)
 		goto err_free;
 
-	amdgpu_bo_fence(bo, f, false);
-	amdgpu_bo_unreserve(bo);
-	amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg);
+	amdgpu_ib_free(adev, ib_msg, f);
 
 	if (fence)
 		*fence = dma_fence_get(f);
@@ -588,27 +583,26 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
 
 err_free:
 	amdgpu_job_free(job);
-
 err:
-	amdgpu_bo_unreserve(bo);
-	amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg);
+	amdgpu_ib_free(adev, ib_msg, f);
 	return r;
 }
 
 static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
-					 struct amdgpu_bo **bo)
+		struct amdgpu_ib *ib)
 {
 	struct amdgpu_device *adev = ring->adev;
 	uint32_t *msg;
 	int r, i;
 
-	*bo = NULL;
-	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_VRAM,
-				      bo, NULL, (void **)&msg);
+	memset(ib, 0, sizeof(*ib));
+	r = amdgpu_ib_get(adev, NULL, PAGE_SIZE,
+			AMDGPU_IB_POOL_DIRECT,
+			ib);
 	if (r)
 		return r;
 
+	msg = ib->ptr;
 	msg[0] = cpu_to_le32(0x00000028);
 	msg[1] = cpu_to_le32(0x00000038);
 	msg[2] = cpu_to_le32(0x00000001);
@@ -630,19 +624,20 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
 }
 
 static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
-					  struct amdgpu_bo **bo)
+					  struct amdgpu_ib *ib)
 {
 	struct amdgpu_device *adev = ring->adev;
 	uint32_t *msg;
 	int r, i;
 
-	*bo = NULL;
-	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_VRAM,
-				      bo, NULL, (void **)&msg);
+	memset(ib, 0, sizeof(*ib));
+	r = amdgpu_ib_get(adev, NULL, PAGE_SIZE,
+			AMDGPU_IB_POOL_DIRECT,
+			ib);
 	if (r)
 		return r;
 
+	msg = ib->ptr;
 	msg[0] = cpu_to_le32(0x00000028);
 	msg[1] = cpu_to_le32(0x00000018);
 	msg[2] = cpu_to_le32(0x00000000);
@@ -658,21 +653,21 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
 int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct dma_fence *fence = NULL;
-	struct amdgpu_bo *bo;
+	struct amdgpu_ib ib;
 	long r;
 
-	r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo);
+	r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib);
 	if (r)
 		goto error;
 
-	r = amdgpu_vcn_dec_send_msg(ring, bo, NULL);
+	r = amdgpu_vcn_dec_send_msg(ring, &ib, NULL);
 	if (r)
 		goto error;
-	r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo);
+	r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib);
 	if (r)
 		goto error;
 
-	r = amdgpu_vcn_dec_send_msg(ring, bo, &fence);
+	r = amdgpu_vcn_dec_send_msg(ring, &ib, &fence);
 	if (r)
 		goto error;
 
@@ -688,8 +683,8 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 }
 
 static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
-				   struct amdgpu_bo *bo,
-				   struct dma_fence **fence)
+		struct amdgpu_ib *ib_msg,
+		struct dma_fence **fence)
 {
 	struct amdgpu_vcn_decode_buffer *decode_buffer = NULL;
 	const unsigned int ib_size_dw = 64;
@@ -697,7 +692,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
 	struct dma_fence *f = NULL;
 	struct amdgpu_job *job;
 	struct amdgpu_ib *ib;
-	uint64_t addr;
+	uint64_t addr = ib_msg->gpu_addr;
 	int i, r;
 
 	r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4,
@@ -706,7 +701,6 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
 		goto err;
 
 	ib = &job->ibs[0];
-	addr = amdgpu_bo_gpu_offset(bo);
 	ib->length_dw = 0;
 
 	ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8;
@@ -726,9 +720,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
 	if (r)
 		goto err_free;
 
-	amdgpu_bo_fence(bo, f, false);
-	amdgpu_bo_unreserve(bo);
-	amdgpu_bo_unref(&bo);
+	amdgpu_ib_free(adev, ib_msg, f);
 
 	if (fence)
 		*fence = dma_fence_get(f);
@@ -738,31 +730,29 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
 
 err_free:
 	amdgpu_job_free(job);
-
 err:
-	amdgpu_bo_unreserve(bo);
-	amdgpu_bo_unref(&bo);
+	amdgpu_ib_free(adev, ib_msg, f);
 	return r;
 }
 
 int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct dma_fence *fence = NULL;
-	struct amdgpu_bo *bo;
+	struct amdgpu_ib ib;
 	long r;
 
-	r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo);
+	r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib);
 	if (r)
 		goto error;
 
-	r = amdgpu_vcn_dec_sw_send_msg(ring, bo, NULL);
+	r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, NULL);
 	if (r)
 		goto error;
-	r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo);
+	r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib);
 	if (r)
 		goto error;
 
-	r = amdgpu_vcn_dec_sw_send_msg(ring, bo, &fence);
+	r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, &fence);
 	if (r)
 		goto error;
 
@@ -809,7 +799,7 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
 }
 
 static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
-					 struct amdgpu_bo *bo,
+					 struct amdgpu_ib *ib_msg,
 					 struct dma_fence **fence)
 {
 	const unsigned ib_size_dw = 16;
@@ -825,7 +815,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
 		return r;
 
 	ib = &job->ibs[0];
-	addr = amdgpu_bo_gpu_offset(bo);
+	addr = ib_msg->gpu_addr;
 
 	ib->length_dw = 0;
 	ib->ptr[ib->length_dw++] = 0x00000018;
@@ -863,7 +853,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
 }
 
 static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
-					  struct amdgpu_bo *bo,
+					  struct amdgpu_ib *ib_msg,
 					  struct dma_fence **fence)
 {
 	const unsigned ib_size_dw = 16;
@@ -879,7 +869,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
 		return r;
 
 	ib = &job->ibs[0];
-	addr = amdgpu_bo_gpu_offset(bo);
+	addr = ib_msg->gpu_addr;
 
 	ib->length_dw = 0;
 	ib->ptr[ib->length_dw++] = 0x00000018;
@@ -918,21 +908,23 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
 
 int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
+	struct amdgpu_device *adev = ring->adev;
 	struct dma_fence *fence = NULL;
-	struct amdgpu_bo *bo = NULL;
+	struct amdgpu_ib ib;
 	long r;
 
-	r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_VRAM,
-				      &bo, NULL, NULL);
+	memset(&ib, 0, sizeof(ib));
+	r = amdgpu_ib_get(adev, NULL, 128 << 10,
+			AMDGPU_IB_POOL_DIRECT,
+			&ib);
 	if (r)
 		return r;
 
-	r = amdgpu_vcn_enc_get_create_msg(ring, 1, bo, NULL);
+	r = amdgpu_vcn_enc_get_create_msg(ring, 1, &ib, NULL);
 	if (r)
 		goto error;
 
-	r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, bo, &fence);
+	r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &ib, &fence);
 	if (r)
 		goto error;
 
@@ -943,9 +935,8 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 		r = 0;
 
 error:
+	amdgpu_ib_free(adev, &ib, fence);
 	dma_fence_put(fence);
-	amdgpu_bo_unreserve(bo);
-	amdgpu_bo_free_kernel(&bo, NULL, NULL);
 
 	return r;
 }
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/4] drm/amdgpu: Increase direct IB pool size
  2021-09-10  0:38 [PATCH 1/4] drm/amdgpu: Increase direct IB pool size xinhui pan
                   ` (2 preceding siblings ...)
  2021-09-10  0:38 ` [PATCH 4/4] drm/amdgpu: VCN " xinhui pan
@ 2021-09-10  6:15 ` Christian König
  3 siblings, 0 replies; 18+ messages in thread
From: Christian König @ 2021-09-10  6:15 UTC (permalink / raw)
  To: xinhui pan, amd-gfx; +Cc: alexander.deucher

Am 10.09.21 um 02:38 schrieb xinhui pan:
> Direct IB pool is used for vce/vcn IB extra msg too. Increase its size
> to AMDGPU_IB_POOL_SIZE.
>
> Signed-off-by: xinhui pan <xinhui.pan@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 8 ++------
>   1 file changed, 2 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index c076a6b9a5a2..9274f32c3661 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -307,13 +307,9 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
>   		return 0;
>   
>   	for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) {
> -		if (i == AMDGPU_IB_POOL_DIRECT)
> -			size = PAGE_SIZE * 6;
> -		else
> -			size = AMDGPU_IB_POOL_SIZE;
> -
>   		r = amdgpu_sa_bo_manager_init(adev, &adev->ib_pools[i],
> -					      size, AMDGPU_GPU_PAGE_SIZE,
> +					      AMDGPU_IB_POOL_SIZE,
> +					      AMDGPU_GPU_PAGE_SIZE,
>   					      AMDGPU_GEM_DOMAIN_GTT);
>   		if (r)
>   			goto error;


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
  2021-09-10  0:38 ` [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test xinhui pan
@ 2021-09-10  6:24   ` Christian König
  2021-09-10  8:18     ` 回复: " Pan, Xinhui
  0 siblings, 1 reply; 18+ messages in thread
From: Christian König @ 2021-09-10  6:24 UTC (permalink / raw)
  To: xinhui pan, amd-gfx; +Cc: alexander.deucher

Am 10.09.21 um 02:38 schrieb xinhui pan:
> move BO allocation in sw_init.
>
> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 75 +++++++++++++++----------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   |  8 +--
>   drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c   |  8 +--
>   4 files changed, 49 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> index d451c359606a..e2eaac941d37 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> @@ -141,6 +141,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>   	const char *fw_name;
>   	const struct common_firmware_header *hdr;
>   	unsigned family_id;
> +	struct amdgpu_bo *bo = NULL;
> +	void *addr;
>   	int i, j, r;
>   
>   	INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
> @@ -298,9 +300,34 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>   		adev->uvd.filp[i] = NULL;
>   	}
>   
> +	r = amdgpu_bo_create_reserved(adev, 128 << 10, PAGE_SIZE,
> +			AMDGPU_GEM_DOMAIN_GTT,
> +			&bo, NULL, &addr);
> +	if (r)
> +		return r;
> +
>   	/* from uvd v5.0 HW addressing capacity increased to 64 bits */
> -	if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
> +	if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) {
>   		adev->uvd.address_64_bit = true;
> +		amdgpu_bo_kunmap(bo);
> +		amdgpu_bo_unpin(bo);
> +		r = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_VRAM,
> +				0, 256 << 20);

Please keep using amdgpu_uvd_force_into_uvd_segment() and validate here, 
cause I want to remove amdgpu_bo_pin_restricted() sooner or later.

> +		if (r) {
> +			amdgpu_bo_unreserve(bo);
> +			amdgpu_bo_unref(&bo);
> +			return r;
> +		}
> +		r = amdgpu_bo_kmap(bo, &addr);
> +		if (r) {
> +			amdgpu_bo_unpin(bo);
> +			amdgpu_bo_unreserve(bo);
> +			amdgpu_bo_unref(&bo);
> +			return r;
> +		}
> +	}
> +	adev->uvd.ib_bo = bo;
> +	amdgpu_bo_unreserve(bo);
>   
>   	switch (adev->asic_type) {
>   	case CHIP_TONGA:
> @@ -342,6 +369,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
>   		for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
>   			amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
>   	}
> +	amdgpu_bo_free_kernel(&adev->uvd.ib_bo, NULL, NULL);
>   	release_firmware(adev->uvd.fw);
>   
>   	return 0;
> @@ -1080,23 +1108,10 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>   	unsigned offset_idx = 0;
>   	unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
>   
> -	amdgpu_bo_kunmap(bo);
> -	amdgpu_bo_unpin(bo);
> -
> -	if (!ring->adev->uvd.address_64_bit) {
> -		struct ttm_operation_ctx ctx = { true, false };
> -
> -		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
> -		amdgpu_uvd_force_into_uvd_segment(bo);
> -		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
> -		if (r)
> -			goto err;
> -	}
> -
>   	r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
>   				     AMDGPU_IB_POOL_DELAYED, &job);
>   	if (r)
> -		goto err;
> +		return r;
>   
>   	if (adev->asic_type >= CHIP_VEGA10) {
>   		offset_idx = 1 + ring->me;
> @@ -1148,8 +1163,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>   	}
>   
>   	amdgpu_bo_fence(bo, f, false);
> -	amdgpu_bo_unreserve(bo);
> -	amdgpu_bo_unref(&bo);
>   
>   	if (fence)
>   		*fence = dma_fence_get(f);
> @@ -1159,10 +1172,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>   
>   err_free:
>   	amdgpu_job_free(job);
> -
> -err:
> -	amdgpu_bo_unreserve(bo);
> -	amdgpu_bo_unref(&bo);
>   	return r;
>   }
>   
> @@ -1173,16 +1182,15 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>   			      struct dma_fence **fence)
>   {
>   	struct amdgpu_device *adev = ring->adev;
> -	struct amdgpu_bo *bo = NULL;
> +	struct amdgpu_bo *bo = adev->uvd.ib_bo;
>   	uint32_t *msg;
>   	int r, i;
>   
> -	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
> -				      AMDGPU_GEM_DOMAIN_GTT,
> -				      &bo, NULL, (void **)&msg);
> +	r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>   	if (r)
>   		return r;
>   
> +	msg = amdgpu_bo_kptr(bo);
>   	/* stitch together an UVD create msg */
>   	msg[0] = cpu_to_le32(0x00000de4);
>   	msg[1] = cpu_to_le32(0x00000000);
> @@ -1198,23 +1206,25 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>   	for (i = 11; i < 1024; ++i)
>   		msg[i] = cpu_to_le32(0x0);
>   
> -	return amdgpu_uvd_send_msg(ring, bo, true, fence);
> +	r = amdgpu_uvd_send_msg(ring, bo, true, fence);
> +
> +	amdgpu_bo_unreserve(bo);
> +	return r;
>   }
>   
>   int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>   			       bool direct, struct dma_fence **fence)
>   {
>   	struct amdgpu_device *adev = ring->adev;
> -	struct amdgpu_bo *bo = NULL;
> +	struct amdgpu_bo *bo = adev->uvd.ib_bo;
>   	uint32_t *msg;
>   	int r, i;
>   
> -	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
> -				      AMDGPU_GEM_DOMAIN_GTT,
> -				      &bo, NULL, (void **)&msg);
> +	r = ttm_bo_reserve(&bo->tbo, true, true, NULL);

Please use amdgpu_bo_reserve() here and elsewhere as well just to be on 
the clean side.

Lockdep will sooner or later complain that we reserve a BO in the reset 
path, but that is mostly a theoretical problem and existed before. So 
I'm fine with sticking with that for now cause the problem was there 
before as well.

It's just that trylock might not work because the BO is busy with 
indirect submission.

Apart from those two minor issues the patch looks good to me.

Thanks,
Christian.

>   	if (r)
>   		return r;
>   
> +	msg = amdgpu_bo_kptr(bo);
>   	/* stitch together an UVD destroy msg */
>   	msg[0] = cpu_to_le32(0x00000de4);
>   	msg[1] = cpu_to_le32(0x00000002);
> @@ -1223,7 +1233,10 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>   	for (i = 4; i < 1024; ++i)
>   		msg[i] = cpu_to_le32(0x0);
>   
> -	return amdgpu_uvd_send_msg(ring, bo, direct, fence);
> +	r = amdgpu_uvd_send_msg(ring, bo, direct, fence);
> +
> +	amdgpu_bo_unreserve(bo);
> +	return r;
>   }
>   
>   static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
> index edbb8194ee81..76ac9699885d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
> @@ -68,6 +68,7 @@ struct amdgpu_uvd {
>   	/* store image width to adjust nb memory state */
>   	unsigned		decode_image_width;
>   	uint32_t                keyselect;
> +	struct amdgpu_bo	*ib_bo;
>   };
>   
>   int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> index bc571833632e..301c0cea7164 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> @@ -332,12 +332,10 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
>   static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>   {
>   	struct dma_fence *fence = NULL;
> -	struct amdgpu_bo *bo = NULL;
> +	struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
>   	long r;
>   
> -	r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
> -				      AMDGPU_GEM_DOMAIN_VRAM,
> -				      &bo, NULL, NULL);
> +	r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>   	if (r)
>   		return r;
>   
> @@ -357,9 +355,7 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>   
>   error:
>   	dma_fence_put(fence);
> -	amdgpu_bo_unpin(bo);
>   	amdgpu_bo_unreserve(bo);
> -	amdgpu_bo_unref(&bo);
>   	return r;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> index b6e82d75561f..efa270288029 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> @@ -338,12 +338,10 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl
>   static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>   {
>   	struct dma_fence *fence = NULL;
> -	struct amdgpu_bo *bo = NULL;
> +	struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
>   	long r;
>   
> -	r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
> -				      AMDGPU_GEM_DOMAIN_VRAM,
> -				      &bo, NULL, NULL);
> +	r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>   	if (r)
>   		return r;
>   
> @@ -363,9 +361,7 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>   
>   error:
>   	dma_fence_put(fence);
> -	amdgpu_bo_unpin(bo);
>   	amdgpu_bo_unreserve(bo);
> -	amdgpu_bo_unref(&bo);
>   	return r;
>   }
>   


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/4] drm/amdgpu: VCE avoid memory allocation during IB test
  2021-09-10  0:38 ` [PATCH 3/4] drm/amdgpu: VCE " xinhui pan
@ 2021-09-10  6:29   ` Christian König
  0 siblings, 0 replies; 18+ messages in thread
From: Christian König @ 2021-09-10  6:29 UTC (permalink / raw)
  To: xinhui pan, amd-gfx, Liu, Leo; +Cc: alexander.deucher

Am 10.09.21 um 02:38 schrieb xinhui pan:
> alloc extra msg from direct IB pool.
>
> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 18 +++---------------
>   1 file changed, 3 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> index e9fdf49d69e8..45d98694db18 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> @@ -82,7 +82,6 @@ MODULE_FIRMWARE(FIRMWARE_VEGA20);
>   
>   static void amdgpu_vce_idle_work_handler(struct work_struct *work);
>   static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
> -				     struct amdgpu_bo *bo,
>   				     struct dma_fence **fence);
>   static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>   				      bool direct, struct dma_fence **fence);
> @@ -441,7 +440,6 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
>    * Open up a stream for HW test
>    */
>   static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
> -				     struct amdgpu_bo *bo,
>   				     struct dma_fence **fence)
>   {
>   	const unsigned ib_size_dw = 1024;
> @@ -451,14 +449,13 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>   	uint64_t addr;
>   	int i, r;
>   
> -	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
> +	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4 + PAGE_SIZE,

Please use AMDGPU_PAGE_SIZE since that here is not really related to the 
CPU page size.

>   				     AMDGPU_IB_POOL_DIRECT, &job);
>   	if (r)
>   		return r;
>   
>   	ib = &job->ibs[0];
> -
> -	addr = amdgpu_bo_gpu_offset(bo);
> +	addr = ib->gpu_addr + ib_size_dw * 4;

That here needs to be more aligned I think.

For UVD that used to be 256bytes, but no idea what VCE requires. Leo do 
you of hand know?

Thanks,
Christian.

>   
>   	/* stitch together an VCE create msg */
>   	ib->length_dw = 0;
> @@ -1134,20 +1131,13 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
>   int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>   {
>   	struct dma_fence *fence = NULL;
> -	struct amdgpu_bo *bo = NULL;
>   	long r;
>   
>   	/* skip vce ring1/2 ib test for now, since it's not reliable */
>   	if (ring != &ring->adev->vce.ring[0])
>   		return 0;
>   
> -	r = amdgpu_bo_create_reserved(ring->adev, 512, PAGE_SIZE,
> -				      AMDGPU_GEM_DOMAIN_VRAM,
> -				      &bo, NULL, NULL);
> -	if (r)
> -		return r;
> -
> -	r = amdgpu_vce_get_create_msg(ring, 1, bo, NULL);
> +	r = amdgpu_vce_get_create_msg(ring, 1, NULL);
>   	if (r)
>   		goto error;
>   
> @@ -1163,8 +1153,6 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>   
>   error:
>   	dma_fence_put(fence);
> -	amdgpu_bo_unreserve(bo);
> -	amdgpu_bo_free_kernel(&bo, NULL, NULL);
>   	return r;
>   }
>   


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/4] drm/amdgpu: VCN avoid memory allocation during IB test
  2021-09-10  0:38 ` [PATCH 4/4] drm/amdgpu: VCN " xinhui pan
@ 2021-09-10  6:33   ` Christian König
  2021-09-10  7:53     ` 回复: " Pan, Xinhui
  0 siblings, 1 reply; 18+ messages in thread
From: Christian König @ 2021-09-10  6:33 UTC (permalink / raw)
  To: xinhui pan, amd-gfx; +Cc: alexander.deucher



Am 10.09.21 um 02:38 schrieb xinhui pan:
> alloc extra msg from direct IB pool.
>
> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 99 +++++++++++--------------
>   1 file changed, 45 insertions(+), 54 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> index 561296a85b43..b60d5f01fdae 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> @@ -541,15 +541,14 @@ int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring)
>   }
>   
>   static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
> -				   struct amdgpu_bo *bo,
> -				   struct dma_fence **fence)
> +		struct amdgpu_ib *ib_msg,
> +		struct dma_fence **fence)

The parameter indentation here and at a few other places doesn't look 
correct to me, what editor are you using BTW?

Apart from that the patch is Reviewed-by: Christian König 
<christian.koenig@amd.com>.

Regards,
Christian.

>   {
>   	struct amdgpu_device *adev = ring->adev;
>   	struct dma_fence *f = NULL;
>   	struct amdgpu_job *job;
>   	struct amdgpu_ib *ib;
> -	uint64_t addr;
> -	void *msg = NULL;
> +	uint64_t addr = ib_msg->gpu_addr;
>   	int i, r;
>   
>   	r = amdgpu_job_alloc_with_ib(adev, 64,
> @@ -558,8 +557,6 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
>   		goto err;
>   
>   	ib = &job->ibs[0];
> -	addr = amdgpu_bo_gpu_offset(bo);
> -	msg = amdgpu_bo_kptr(bo);
>   	ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0);
>   	ib->ptr[1] = addr;
>   	ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0);
> @@ -576,9 +573,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
>   	if (r)
>   		goto err_free;
>   
> -	amdgpu_bo_fence(bo, f, false);
> -	amdgpu_bo_unreserve(bo);
> -	amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg);
> +	amdgpu_ib_free(adev, ib_msg, f);
>   
>   	if (fence)
>   		*fence = dma_fence_get(f);
> @@ -588,27 +583,26 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
>   
>   err_free:
>   	amdgpu_job_free(job);
> -
>   err:
> -	amdgpu_bo_unreserve(bo);
> -	amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg);
> +	amdgpu_ib_free(adev, ib_msg, f);
>   	return r;
>   }
>   
>   static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
> -					 struct amdgpu_bo **bo)
> +		struct amdgpu_ib *ib)
>   {
>   	struct amdgpu_device *adev = ring->adev;
>   	uint32_t *msg;
>   	int r, i;
>   
> -	*bo = NULL;
> -	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
> -				      AMDGPU_GEM_DOMAIN_VRAM,
> -				      bo, NULL, (void **)&msg);
> +	memset(ib, 0, sizeof(*ib));
> +	r = amdgpu_ib_get(adev, NULL, PAGE_SIZE,
> +			AMDGPU_IB_POOL_DIRECT,
> +			ib);
>   	if (r)
>   		return r;
>   
> +	msg = ib->ptr;
>   	msg[0] = cpu_to_le32(0x00000028);
>   	msg[1] = cpu_to_le32(0x00000038);
>   	msg[2] = cpu_to_le32(0x00000001);
> @@ -630,19 +624,20 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
>   }
>   
>   static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
> -					  struct amdgpu_bo **bo)
> +					  struct amdgpu_ib *ib)
>   {
>   	struct amdgpu_device *adev = ring->adev;
>   	uint32_t *msg;
>   	int r, i;
>   
> -	*bo = NULL;
> -	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
> -				      AMDGPU_GEM_DOMAIN_VRAM,
> -				      bo, NULL, (void **)&msg);
> +	memset(ib, 0, sizeof(*ib));
> +	r = amdgpu_ib_get(adev, NULL, PAGE_SIZE,
> +			AMDGPU_IB_POOL_DIRECT,
> +			ib);
>   	if (r)
>   		return r;
>   
> +	msg = ib->ptr;
>   	msg[0] = cpu_to_le32(0x00000028);
>   	msg[1] = cpu_to_le32(0x00000018);
>   	msg[2] = cpu_to_le32(0x00000000);
> @@ -658,21 +653,21 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
>   int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>   {
>   	struct dma_fence *fence = NULL;
> -	struct amdgpu_bo *bo;
> +	struct amdgpu_ib ib;
>   	long r;
>   
> -	r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo);
> +	r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib);
>   	if (r)
>   		goto error;
>   
> -	r = amdgpu_vcn_dec_send_msg(ring, bo, NULL);
> +	r = amdgpu_vcn_dec_send_msg(ring, &ib, NULL);
>   	if (r)
>   		goto error;
> -	r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo);
> +	r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib);
>   	if (r)
>   		goto error;
>   
> -	r = amdgpu_vcn_dec_send_msg(ring, bo, &fence);
> +	r = amdgpu_vcn_dec_send_msg(ring, &ib, &fence);
>   	if (r)
>   		goto error;
>   
> @@ -688,8 +683,8 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>   }
>   
>   static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
> -				   struct amdgpu_bo *bo,
> -				   struct dma_fence **fence)
> +		struct amdgpu_ib *ib_msg,
> +		struct dma_fence **fence)
>   {
>   	struct amdgpu_vcn_decode_buffer *decode_buffer = NULL;
>   	const unsigned int ib_size_dw = 64;
> @@ -697,7 +692,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
>   	struct dma_fence *f = NULL;
>   	struct amdgpu_job *job;
>   	struct amdgpu_ib *ib;
> -	uint64_t addr;
> +	uint64_t addr = ib_msg->gpu_addr;
>   	int i, r;
>   
>   	r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4,
> @@ -706,7 +701,6 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
>   		goto err;
>   
>   	ib = &job->ibs[0];
> -	addr = amdgpu_bo_gpu_offset(bo);
>   	ib->length_dw = 0;
>   
>   	ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8;
> @@ -726,9 +720,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
>   	if (r)
>   		goto err_free;
>   
> -	amdgpu_bo_fence(bo, f, false);
> -	amdgpu_bo_unreserve(bo);
> -	amdgpu_bo_unref(&bo);
> +	amdgpu_ib_free(adev, ib_msg, f);
>   
>   	if (fence)
>   		*fence = dma_fence_get(f);
> @@ -738,31 +730,29 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
>   
>   err_free:
>   	amdgpu_job_free(job);
> -
>   err:
> -	amdgpu_bo_unreserve(bo);
> -	amdgpu_bo_unref(&bo);
> +	amdgpu_ib_free(adev, ib_msg, f);
>   	return r;
>   }
>   
>   int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>   {
>   	struct dma_fence *fence = NULL;
> -	struct amdgpu_bo *bo;
> +	struct amdgpu_ib ib;
>   	long r;
>   
> -	r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo);
> +	r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib);
>   	if (r)
>   		goto error;
>   
> -	r = amdgpu_vcn_dec_sw_send_msg(ring, bo, NULL);
> +	r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, NULL);
>   	if (r)
>   		goto error;
> -	r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo);
> +	r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib);
>   	if (r)
>   		goto error;
>   
> -	r = amdgpu_vcn_dec_sw_send_msg(ring, bo, &fence);
> +	r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, &fence);
>   	if (r)
>   		goto error;
>   
> @@ -809,7 +799,7 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
>   }
>   
>   static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
> -					 struct amdgpu_bo *bo,
> +					 struct amdgpu_ib *ib_msg,
>   					 struct dma_fence **fence)
>   {
>   	const unsigned ib_size_dw = 16;
> @@ -825,7 +815,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
>   		return r;
>   
>   	ib = &job->ibs[0];
> -	addr = amdgpu_bo_gpu_offset(bo);
> +	addr = ib_msg->gpu_addr;
>   
>   	ib->length_dw = 0;
>   	ib->ptr[ib->length_dw++] = 0x00000018;
> @@ -863,7 +853,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
>   }
>   
>   static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
> -					  struct amdgpu_bo *bo,
> +					  struct amdgpu_ib *ib_msg,
>   					  struct dma_fence **fence)
>   {
>   	const unsigned ib_size_dw = 16;
> @@ -879,7 +869,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
>   		return r;
>   
>   	ib = &job->ibs[0];
> -	addr = amdgpu_bo_gpu_offset(bo);
> +	addr = ib_msg->gpu_addr;
>   
>   	ib->length_dw = 0;
>   	ib->ptr[ib->length_dw++] = 0x00000018;
> @@ -918,21 +908,23 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
>   
>   int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>   {
> +	struct amdgpu_device *adev = ring->adev;
>   	struct dma_fence *fence = NULL;
> -	struct amdgpu_bo *bo = NULL;
> +	struct amdgpu_ib ib;
>   	long r;
>   
> -	r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
> -				      AMDGPU_GEM_DOMAIN_VRAM,
> -				      &bo, NULL, NULL);
> +	memset(&ib, 0, sizeof(ib));
> +	r = amdgpu_ib_get(adev, NULL, 128 << 10,
> +			AMDGPU_IB_POOL_DIRECT,
> +			&ib);
>   	if (r)
>   		return r;
>   
> -	r = amdgpu_vcn_enc_get_create_msg(ring, 1, bo, NULL);
> +	r = amdgpu_vcn_enc_get_create_msg(ring, 1, &ib, NULL);
>   	if (r)
>   		goto error;
>   
> -	r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, bo, &fence);
> +	r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &ib, &fence);
>   	if (r)
>   		goto error;
>   
> @@ -943,9 +935,8 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>   		r = 0;
>   
>   error:
> +	amdgpu_ib_free(adev, &ib, fence);
>   	dma_fence_put(fence);
> -	amdgpu_bo_unreserve(bo);
> -	amdgpu_bo_free_kernel(&bo, NULL, NULL);
>   
>   	return r;
>   }


^ permalink raw reply	[flat|nested] 18+ messages in thread

* 回复: [PATCH 4/4] drm/amdgpu: VCN avoid memory allocation during IB test
  2021-09-10  6:33   ` Christian König
@ 2021-09-10  7:53     ` Pan, Xinhui
  2021-09-10  7:55       ` Christian König
  0 siblings, 1 reply; 18+ messages in thread
From: Pan, Xinhui @ 2021-09-10  7:53 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx; +Cc: Deucher, Alexander

[AMD Official Use Only]

I am using vim with
set tabstop=8
set shiftwidth=8
set softtabstop=8

________________________________________
发件人: Koenig, Christian <Christian.Koenig@amd.com>
发送时间: 2021年9月10日 14:33
收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander
主题: Re: [PATCH 4/4] drm/amdgpu: VCN avoid memory allocation during IB test



Am 10.09.21 um 02:38 schrieb xinhui pan:
> alloc extra msg from direct IB pool.
>
> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 99 +++++++++++--------------
>   1 file changed, 45 insertions(+), 54 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> index 561296a85b43..b60d5f01fdae 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> @@ -541,15 +541,14 @@ int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring)
>   }
>
>   static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
> -                                struct amdgpu_bo *bo,
> -                                struct dma_fence **fence)
> +             struct amdgpu_ib *ib_msg,
> +             struct dma_fence **fence)

The parameter indentation here and at a few other places doesn't look
correct to me, what editor are you using BTW?

Apart from that the patch is Reviewed-by: Christian König
<christian.koenig@amd.com>.

Regards,
Christian.

>   {
>       struct amdgpu_device *adev = ring->adev;
>       struct dma_fence *f = NULL;
>       struct amdgpu_job *job;
>       struct amdgpu_ib *ib;
> -     uint64_t addr;
> -     void *msg = NULL;
> +     uint64_t addr = ib_msg->gpu_addr;
>       int i, r;
>
>       r = amdgpu_job_alloc_with_ib(adev, 64,
> @@ -558,8 +557,6 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
>               goto err;
>
>       ib = &job->ibs[0];
> -     addr = amdgpu_bo_gpu_offset(bo);
> -     msg = amdgpu_bo_kptr(bo);
>       ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0);
>       ib->ptr[1] = addr;
>       ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0);
> @@ -576,9 +573,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
>       if (r)
>               goto err_free;
>
> -     amdgpu_bo_fence(bo, f, false);
> -     amdgpu_bo_unreserve(bo);
> -     amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg);
> +     amdgpu_ib_free(adev, ib_msg, f);
>
>       if (fence)
>               *fence = dma_fence_get(f);
> @@ -588,27 +583,26 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
>
>   err_free:
>       amdgpu_job_free(job);
> -
>   err:
> -     amdgpu_bo_unreserve(bo);
> -     amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg);
> +     amdgpu_ib_free(adev, ib_msg, f);
>       return r;
>   }
>
>   static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
> -                                      struct amdgpu_bo **bo)
> +             struct amdgpu_ib *ib)
>   {
>       struct amdgpu_device *adev = ring->adev;
>       uint32_t *msg;
>       int r, i;
>
> -     *bo = NULL;
> -     r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
> -                                   AMDGPU_GEM_DOMAIN_VRAM,
> -                                   bo, NULL, (void **)&msg);
> +     memset(ib, 0, sizeof(*ib));
> +     r = amdgpu_ib_get(adev, NULL, PAGE_SIZE,
> +                     AMDGPU_IB_POOL_DIRECT,
> +                     ib);
>       if (r)
>               return r;
>
> +     msg = ib->ptr;
>       msg[0] = cpu_to_le32(0x00000028);
>       msg[1] = cpu_to_le32(0x00000038);
>       msg[2] = cpu_to_le32(0x00000001);
> @@ -630,19 +624,20 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
>   }
>
>   static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
> -                                       struct amdgpu_bo **bo)
> +                                       struct amdgpu_ib *ib)
>   {
>       struct amdgpu_device *adev = ring->adev;
>       uint32_t *msg;
>       int r, i;
>
> -     *bo = NULL;
> -     r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
> -                                   AMDGPU_GEM_DOMAIN_VRAM,
> -                                   bo, NULL, (void **)&msg);
> +     memset(ib, 0, sizeof(*ib));
> +     r = amdgpu_ib_get(adev, NULL, PAGE_SIZE,
> +                     AMDGPU_IB_POOL_DIRECT,
> +                     ib);
>       if (r)
>               return r;
>
> +     msg = ib->ptr;
>       msg[0] = cpu_to_le32(0x00000028);
>       msg[1] = cpu_to_le32(0x00000018);
>       msg[2] = cpu_to_le32(0x00000000);
> @@ -658,21 +653,21 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
>   int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>   {
>       struct dma_fence *fence = NULL;
> -     struct amdgpu_bo *bo;
> +     struct amdgpu_ib ib;
>       long r;
>
> -     r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo);
> +     r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib);
>       if (r)
>               goto error;
>
> -     r = amdgpu_vcn_dec_send_msg(ring, bo, NULL);
> +     r = amdgpu_vcn_dec_send_msg(ring, &ib, NULL);
>       if (r)
>               goto error;
> -     r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo);
> +     r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib);
>       if (r)
>               goto error;
>
> -     r = amdgpu_vcn_dec_send_msg(ring, bo, &fence);
> +     r = amdgpu_vcn_dec_send_msg(ring, &ib, &fence);
>       if (r)
>               goto error;
>
> @@ -688,8 +683,8 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>   }
>
>   static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
> -                                struct amdgpu_bo *bo,
> -                                struct dma_fence **fence)
> +             struct amdgpu_ib *ib_msg,
> +             struct dma_fence **fence)
>   {
>       struct amdgpu_vcn_decode_buffer *decode_buffer = NULL;
>       const unsigned int ib_size_dw = 64;
> @@ -697,7 +692,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
>       struct dma_fence *f = NULL;
>       struct amdgpu_job *job;
>       struct amdgpu_ib *ib;
> -     uint64_t addr;
> +     uint64_t addr = ib_msg->gpu_addr;
>       int i, r;
>
>       r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4,
> @@ -706,7 +701,6 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
>               goto err;
>
>       ib = &job->ibs[0];
> -     addr = amdgpu_bo_gpu_offset(bo);
>       ib->length_dw = 0;
>
>       ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8;
> @@ -726,9 +720,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
>       if (r)
>               goto err_free;
>
> -     amdgpu_bo_fence(bo, f, false);
> -     amdgpu_bo_unreserve(bo);
> -     amdgpu_bo_unref(&bo);
> +     amdgpu_ib_free(adev, ib_msg, f);
>
>       if (fence)
>               *fence = dma_fence_get(f);
> @@ -738,31 +730,29 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
>
>   err_free:
>       amdgpu_job_free(job);
> -
>   err:
> -     amdgpu_bo_unreserve(bo);
> -     amdgpu_bo_unref(&bo);
> +     amdgpu_ib_free(adev, ib_msg, f);
>       return r;
>   }
>
>   int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>   {
>       struct dma_fence *fence = NULL;
> -     struct amdgpu_bo *bo;
> +     struct amdgpu_ib ib;
>       long r;
>
> -     r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo);
> +     r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib);
>       if (r)
>               goto error;
>
> -     r = amdgpu_vcn_dec_sw_send_msg(ring, bo, NULL);
> +     r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, NULL);
>       if (r)
>               goto error;
> -     r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo);
> +     r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib);
>       if (r)
>               goto error;
>
> -     r = amdgpu_vcn_dec_sw_send_msg(ring, bo, &fence);
> +     r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, &fence);
>       if (r)
>               goto error;
>
> @@ -809,7 +799,7 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
>   }
>
>   static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
> -                                      struct amdgpu_bo *bo,
> +                                      struct amdgpu_ib *ib_msg,
>                                        struct dma_fence **fence)
>   {
>       const unsigned ib_size_dw = 16;
> @@ -825,7 +815,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
>               return r;
>
>       ib = &job->ibs[0];
> -     addr = amdgpu_bo_gpu_offset(bo);
> +     addr = ib_msg->gpu_addr;
>
>       ib->length_dw = 0;
>       ib->ptr[ib->length_dw++] = 0x00000018;
> @@ -863,7 +853,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
>   }
>
>   static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
> -                                       struct amdgpu_bo *bo,
> +                                       struct amdgpu_ib *ib_msg,
>                                         struct dma_fence **fence)
>   {
>       const unsigned ib_size_dw = 16;
> @@ -879,7 +869,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
>               return r;
>
>       ib = &job->ibs[0];
> -     addr = amdgpu_bo_gpu_offset(bo);
> +     addr = ib_msg->gpu_addr;
>
>       ib->length_dw = 0;
>       ib->ptr[ib->length_dw++] = 0x00000018;
> @@ -918,21 +908,23 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
>
>   int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>   {
> +     struct amdgpu_device *adev = ring->adev;
>       struct dma_fence *fence = NULL;
> -     struct amdgpu_bo *bo = NULL;
> +     struct amdgpu_ib ib;
>       long r;
>
> -     r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
> -                                   AMDGPU_GEM_DOMAIN_VRAM,
> -                                   &bo, NULL, NULL);
> +     memset(&ib, 0, sizeof(ib));
> +     r = amdgpu_ib_get(adev, NULL, 128 << 10,
> +                     AMDGPU_IB_POOL_DIRECT,
> +                     &ib);
>       if (r)
>               return r;
>
> -     r = amdgpu_vcn_enc_get_create_msg(ring, 1, bo, NULL);
> +     r = amdgpu_vcn_enc_get_create_msg(ring, 1, &ib, NULL);
>       if (r)
>               goto error;
>
> -     r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, bo, &fence);
> +     r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &ib, &fence);
>       if (r)
>               goto error;
>
> @@ -943,9 +935,8 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>               r = 0;
>
>   error:
> +     amdgpu_ib_free(adev, &ib, fence);
>       dma_fence_put(fence);
> -     amdgpu_bo_unreserve(bo);
> -     amdgpu_bo_free_kernel(&bo, NULL, NULL);
>
>       return r;
>   }


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: 回复: [PATCH 4/4] drm/amdgpu: VCN avoid memory allocation during IB test
  2021-09-10  7:53     ` 回复: " Pan, Xinhui
@ 2021-09-10  7:55       ` Christian König
  0 siblings, 0 replies; 18+ messages in thread
From: Christian König @ 2021-09-10  7:55 UTC (permalink / raw)
  To: Pan, Xinhui, amd-gfx; +Cc: Deucher, Alexander

Try that plugin here https://github.com/vivien/vim-linux-coding-style

I'm using it for years and it really helpful.

Christian.

Am 10.09.21 um 09:53 schrieb Pan, Xinhui:
> [AMD Official Use Only]
>
> I am using vim with
> set tabstop=8
> set shiftwidth=8
> set softtabstop=8
>
> ________________________________________
> 发件人: Koenig, Christian <Christian.Koenig@amd.com>
> 发送时间: 2021年9月10日 14:33
> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
> 抄送: Deucher, Alexander
> 主题: Re: [PATCH 4/4] drm/amdgpu: VCN avoid memory allocation during IB test
>
>
>
> Am 10.09.21 um 02:38 schrieb xinhui pan:
>> alloc extra msg from direct IB pool.
>>
>> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 99 +++++++++++--------------
>>    1 file changed, 45 insertions(+), 54 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
>> index 561296a85b43..b60d5f01fdae 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
>> @@ -541,15 +541,14 @@ int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring)
>>    }
>>
>>    static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
>> -                                struct amdgpu_bo *bo,
>> -                                struct dma_fence **fence)
>> +             struct amdgpu_ib *ib_msg,
>> +             struct dma_fence **fence)
> The parameter indentation here and at a few other places doesn't look
> correct to me, what editor are you using BTW?
>
> Apart from that the patch is Reviewed-by: Christian König
> <christian.koenig@amd.com>.
>
> Regards,
> Christian.
>
>>    {
>>        struct amdgpu_device *adev = ring->adev;
>>        struct dma_fence *f = NULL;
>>        struct amdgpu_job *job;
>>        struct amdgpu_ib *ib;
>> -     uint64_t addr;
>> -     void *msg = NULL;
>> +     uint64_t addr = ib_msg->gpu_addr;
>>        int i, r;
>>
>>        r = amdgpu_job_alloc_with_ib(adev, 64,
>> @@ -558,8 +557,6 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
>>                goto err;
>>
>>        ib = &job->ibs[0];
>> -     addr = amdgpu_bo_gpu_offset(bo);
>> -     msg = amdgpu_bo_kptr(bo);
>>        ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0);
>>        ib->ptr[1] = addr;
>>        ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0);
>> @@ -576,9 +573,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
>>        if (r)
>>                goto err_free;
>>
>> -     amdgpu_bo_fence(bo, f, false);
>> -     amdgpu_bo_unreserve(bo);
>> -     amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg);
>> +     amdgpu_ib_free(adev, ib_msg, f);
>>
>>        if (fence)
>>                *fence = dma_fence_get(f);
>> @@ -588,27 +583,26 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
>>
>>    err_free:
>>        amdgpu_job_free(job);
>> -
>>    err:
>> -     amdgpu_bo_unreserve(bo);
>> -     amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg);
>> +     amdgpu_ib_free(adev, ib_msg, f);
>>        return r;
>>    }
>>
>>    static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>> -                                      struct amdgpu_bo **bo)
>> +             struct amdgpu_ib *ib)
>>    {
>>        struct amdgpu_device *adev = ring->adev;
>>        uint32_t *msg;
>>        int r, i;
>>
>> -     *bo = NULL;
>> -     r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
>> -                                   AMDGPU_GEM_DOMAIN_VRAM,
>> -                                   bo, NULL, (void **)&msg);
>> +     memset(ib, 0, sizeof(*ib));
>> +     r = amdgpu_ib_get(adev, NULL, PAGE_SIZE,
>> +                     AMDGPU_IB_POOL_DIRECT,
>> +                     ib);
>>        if (r)
>>                return r;
>>
>> +     msg = ib->ptr;
>>        msg[0] = cpu_to_le32(0x00000028);
>>        msg[1] = cpu_to_le32(0x00000038);
>>        msg[2] = cpu_to_le32(0x00000001);
>> @@ -630,19 +624,20 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
>>    }
>>
>>    static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>> -                                       struct amdgpu_bo **bo)
>> +                                       struct amdgpu_ib *ib)
>>    {
>>        struct amdgpu_device *adev = ring->adev;
>>        uint32_t *msg;
>>        int r, i;
>>
>> -     *bo = NULL;
>> -     r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
>> -                                   AMDGPU_GEM_DOMAIN_VRAM,
>> -                                   bo, NULL, (void **)&msg);
>> +     memset(ib, 0, sizeof(*ib));
>> +     r = amdgpu_ib_get(adev, NULL, PAGE_SIZE,
>> +                     AMDGPU_IB_POOL_DIRECT,
>> +                     ib);
>>        if (r)
>>                return r;
>>
>> +     msg = ib->ptr;
>>        msg[0] = cpu_to_le32(0x00000028);
>>        msg[1] = cpu_to_le32(0x00000018);
>>        msg[2] = cpu_to_le32(0x00000000);
>> @@ -658,21 +653,21 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
>>    int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>    {
>>        struct dma_fence *fence = NULL;
>> -     struct amdgpu_bo *bo;
>> +     struct amdgpu_ib ib;
>>        long r;
>>
>> -     r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo);
>> +     r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib);
>>        if (r)
>>                goto error;
>>
>> -     r = amdgpu_vcn_dec_send_msg(ring, bo, NULL);
>> +     r = amdgpu_vcn_dec_send_msg(ring, &ib, NULL);
>>        if (r)
>>                goto error;
>> -     r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo);
>> +     r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib);
>>        if (r)
>>                goto error;
>>
>> -     r = amdgpu_vcn_dec_send_msg(ring, bo, &fence);
>> +     r = amdgpu_vcn_dec_send_msg(ring, &ib, &fence);
>>        if (r)
>>                goto error;
>>
>> @@ -688,8 +683,8 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>    }
>>
>>    static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
>> -                                struct amdgpu_bo *bo,
>> -                                struct dma_fence **fence)
>> +             struct amdgpu_ib *ib_msg,
>> +             struct dma_fence **fence)
>>    {
>>        struct amdgpu_vcn_decode_buffer *decode_buffer = NULL;
>>        const unsigned int ib_size_dw = 64;
>> @@ -697,7 +692,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
>>        struct dma_fence *f = NULL;
>>        struct amdgpu_job *job;
>>        struct amdgpu_ib *ib;
>> -     uint64_t addr;
>> +     uint64_t addr = ib_msg->gpu_addr;
>>        int i, r;
>>
>>        r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4,
>> @@ -706,7 +701,6 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
>>                goto err;
>>
>>        ib = &job->ibs[0];
>> -     addr = amdgpu_bo_gpu_offset(bo);
>>        ib->length_dw = 0;
>>
>>        ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8;
>> @@ -726,9 +720,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
>>        if (r)
>>                goto err_free;
>>
>> -     amdgpu_bo_fence(bo, f, false);
>> -     amdgpu_bo_unreserve(bo);
>> -     amdgpu_bo_unref(&bo);
>> +     amdgpu_ib_free(adev, ib_msg, f);
>>
>>        if (fence)
>>                *fence = dma_fence_get(f);
>> @@ -738,31 +730,29 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
>>
>>    err_free:
>>        amdgpu_job_free(job);
>> -
>>    err:
>> -     amdgpu_bo_unreserve(bo);
>> -     amdgpu_bo_unref(&bo);
>> +     amdgpu_ib_free(adev, ib_msg, f);
>>        return r;
>>    }
>>
>>    int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>    {
>>        struct dma_fence *fence = NULL;
>> -     struct amdgpu_bo *bo;
>> +     struct amdgpu_ib ib;
>>        long r;
>>
>> -     r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo);
>> +     r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib);
>>        if (r)
>>                goto error;
>>
>> -     r = amdgpu_vcn_dec_sw_send_msg(ring, bo, NULL);
>> +     r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, NULL);
>>        if (r)
>>                goto error;
>> -     r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo);
>> +     r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib);
>>        if (r)
>>                goto error;
>>
>> -     r = amdgpu_vcn_dec_sw_send_msg(ring, bo, &fence);
>> +     r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, &fence);
>>        if (r)
>>                goto error;
>>
>> @@ -809,7 +799,7 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
>>    }
>>
>>    static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>> -                                      struct amdgpu_bo *bo,
>> +                                      struct amdgpu_ib *ib_msg,
>>                                         struct dma_fence **fence)
>>    {
>>        const unsigned ib_size_dw = 16;
>> @@ -825,7 +815,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
>>                return r;
>>
>>        ib = &job->ibs[0];
>> -     addr = amdgpu_bo_gpu_offset(bo);
>> +     addr = ib_msg->gpu_addr;
>>
>>        ib->length_dw = 0;
>>        ib->ptr[ib->length_dw++] = 0x00000018;
>> @@ -863,7 +853,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
>>    }
>>
>>    static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>> -                                       struct amdgpu_bo *bo,
>> +                                       struct amdgpu_ib *ib_msg,
>>                                          struct dma_fence **fence)
>>    {
>>        const unsigned ib_size_dw = 16;
>> @@ -879,7 +869,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
>>                return r;
>>
>>        ib = &job->ibs[0];
>> -     addr = amdgpu_bo_gpu_offset(bo);
>> +     addr = ib_msg->gpu_addr;
>>
>>        ib->length_dw = 0;
>>        ib->ptr[ib->length_dw++] = 0x00000018;
>> @@ -918,21 +908,23 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
>>
>>    int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>    {
>> +     struct amdgpu_device *adev = ring->adev;
>>        struct dma_fence *fence = NULL;
>> -     struct amdgpu_bo *bo = NULL;
>> +     struct amdgpu_ib ib;
>>        long r;
>>
>> -     r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
>> -                                   AMDGPU_GEM_DOMAIN_VRAM,
>> -                                   &bo, NULL, NULL);
>> +     memset(&ib, 0, sizeof(ib));
>> +     r = amdgpu_ib_get(adev, NULL, 128 << 10,
>> +                     AMDGPU_IB_POOL_DIRECT,
>> +                     &ib);
>>        if (r)
>>                return r;
>>
>> -     r = amdgpu_vcn_enc_get_create_msg(ring, 1, bo, NULL);
>> +     r = amdgpu_vcn_enc_get_create_msg(ring, 1, &ib, NULL);
>>        if (r)
>>                goto error;
>>
>> -     r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, bo, &fence);
>> +     r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &ib, &fence);
>>        if (r)
>>                goto error;
>>
>> @@ -943,9 +935,8 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>                r = 0;
>>
>>    error:
>> +     amdgpu_ib_free(adev, &ib, fence);
>>        dma_fence_put(fence);
>> -     amdgpu_bo_unreserve(bo);
>> -     amdgpu_bo_free_kernel(&bo, NULL, NULL);
>>
>>        return r;
>>    }


^ permalink raw reply	[flat|nested] 18+ messages in thread

* 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
  2021-09-10  6:24   ` Christian König
@ 2021-09-10  8:18     ` Pan, Xinhui
  2021-09-10  8:53       ` Christian König
  0 siblings, 1 reply; 18+ messages in thread
From: Pan, Xinhui @ 2021-09-10  8:18 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx; +Cc: Deucher, Alexander

[AMD Official Use Only]

I am wondering if amdgpu_bo_pin would change BO's placement in the futrue.
For now, the new placement is calculated by new = old ∩ new.

________________________________________
发件人: Koenig, Christian <Christian.Koenig@amd.com>
发送时间: 2021年9月10日 14:24
收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander
主题: Re: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test

Am 10.09.21 um 02:38 schrieb xinhui pan:
> move BO allocation in sw_init.
>
> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 75 +++++++++++++++----------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   |  8 +--
>   drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c   |  8 +--
>   4 files changed, 49 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> index d451c359606a..e2eaac941d37 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> @@ -141,6 +141,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>       const char *fw_name;
>       const struct common_firmware_header *hdr;
>       unsigned family_id;
> +     struct amdgpu_bo *bo = NULL;
> +     void *addr;
>       int i, j, r;
>
>       INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
> @@ -298,9 +300,34 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>               adev->uvd.filp[i] = NULL;
>       }
>
> +     r = amdgpu_bo_create_reserved(adev, 128 << 10, PAGE_SIZE,
> +                     AMDGPU_GEM_DOMAIN_GTT,
> +                     &bo, NULL, &addr);
> +     if (r)
> +             return r;
> +
>       /* from uvd v5.0 HW addressing capacity increased to 64 bits */
> -     if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
> +     if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) {
>               adev->uvd.address_64_bit = true;
> +             amdgpu_bo_kunmap(bo);
> +             amdgpu_bo_unpin(bo);
> +             r = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_VRAM,
> +                             0, 256 << 20);

Please keep using amdgpu_uvd_force_into_uvd_segment() and validate here,
cause I want to remove amdgpu_bo_pin_restricted() sooner or later.

> +             if (r) {
> +                     amdgpu_bo_unreserve(bo);
> +                     amdgpu_bo_unref(&bo);
> +                     return r;
> +             }
> +             r = amdgpu_bo_kmap(bo, &addr);
> +             if (r) {
> +                     amdgpu_bo_unpin(bo);
> +                     amdgpu_bo_unreserve(bo);
> +                     amdgpu_bo_unref(&bo);
> +                     return r;
> +             }
> +     }
> +     adev->uvd.ib_bo = bo;
> +     amdgpu_bo_unreserve(bo);
>
>       switch (adev->asic_type) {
>       case CHIP_TONGA:
> @@ -342,6 +369,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
>               for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
>                       amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
>       }
> +     amdgpu_bo_free_kernel(&adev->uvd.ib_bo, NULL, NULL);
>       release_firmware(adev->uvd.fw);
>
>       return 0;
> @@ -1080,23 +1108,10 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>       unsigned offset_idx = 0;
>       unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
>
> -     amdgpu_bo_kunmap(bo);
> -     amdgpu_bo_unpin(bo);
> -
> -     if (!ring->adev->uvd.address_64_bit) {
> -             struct ttm_operation_ctx ctx = { true, false };
> -
> -             amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
> -             amdgpu_uvd_force_into_uvd_segment(bo);
> -             r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
> -             if (r)
> -                     goto err;
> -     }
> -
>       r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
>                                    AMDGPU_IB_POOL_DELAYED, &job);
>       if (r)
> -             goto err;
> +             return r;
>
>       if (adev->asic_type >= CHIP_VEGA10) {
>               offset_idx = 1 + ring->me;
> @@ -1148,8 +1163,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>       }
>
>       amdgpu_bo_fence(bo, f, false);
> -     amdgpu_bo_unreserve(bo);
> -     amdgpu_bo_unref(&bo);
>
>       if (fence)
>               *fence = dma_fence_get(f);
> @@ -1159,10 +1172,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>
>   err_free:
>       amdgpu_job_free(job);
> -
> -err:
> -     amdgpu_bo_unreserve(bo);
> -     amdgpu_bo_unref(&bo);
>       return r;
>   }
>
> @@ -1173,16 +1182,15 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>                             struct dma_fence **fence)
>   {
>       struct amdgpu_device *adev = ring->adev;
> -     struct amdgpu_bo *bo = NULL;
> +     struct amdgpu_bo *bo = adev->uvd.ib_bo;
>       uint32_t *msg;
>       int r, i;
>
> -     r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
> -                                   AMDGPU_GEM_DOMAIN_GTT,
> -                                   &bo, NULL, (void **)&msg);
> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>       if (r)
>               return r;
>
> +     msg = amdgpu_bo_kptr(bo);
>       /* stitch together an UVD create msg */
>       msg[0] = cpu_to_le32(0x00000de4);
>       msg[1] = cpu_to_le32(0x00000000);
> @@ -1198,23 +1206,25 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>       for (i = 11; i < 1024; ++i)
>               msg[i] = cpu_to_le32(0x0);
>
> -     return amdgpu_uvd_send_msg(ring, bo, true, fence);
> +     r = amdgpu_uvd_send_msg(ring, bo, true, fence);
> +
> +     amdgpu_bo_unreserve(bo);
> +     return r;
>   }
>
>   int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>                              bool direct, struct dma_fence **fence)
>   {
>       struct amdgpu_device *adev = ring->adev;
> -     struct amdgpu_bo *bo = NULL;
> +     struct amdgpu_bo *bo = adev->uvd.ib_bo;
>       uint32_t *msg;
>       int r, i;
>
> -     r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
> -                                   AMDGPU_GEM_DOMAIN_GTT,
> -                                   &bo, NULL, (void **)&msg);
> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);

Please use amdgpu_bo_reserve() here and elsewhere as well just to be on
the clean side.

Lockdep will sooner or later complain that we reserve a BO in the reset
path, but that is mostly a theoretical problem and existed before. So
I'm fine with sticking with that for now cause the problem was there
before as well.

It's just that trylock might not work because the BO is busy with
indirect submission.

Apart from those two minor issues the patch looks good to me.

Thanks,
Christian.

>       if (r)
>               return r;
>
> +     msg = amdgpu_bo_kptr(bo);
>       /* stitch together an UVD destroy msg */
>       msg[0] = cpu_to_le32(0x00000de4);
>       msg[1] = cpu_to_le32(0x00000002);
> @@ -1223,7 +1233,10 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>       for (i = 4; i < 1024; ++i)
>               msg[i] = cpu_to_le32(0x0);
>
> -     return amdgpu_uvd_send_msg(ring, bo, direct, fence);
> +     r = amdgpu_uvd_send_msg(ring, bo, direct, fence);
> +
> +     amdgpu_bo_unreserve(bo);
> +     return r;
>   }
>
>   static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
> index edbb8194ee81..76ac9699885d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
> @@ -68,6 +68,7 @@ struct amdgpu_uvd {
>       /* store image width to adjust nb memory state */
>       unsigned                decode_image_width;
>       uint32_t                keyselect;
> +     struct amdgpu_bo        *ib_bo;
>   };
>
>   int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> index bc571833632e..301c0cea7164 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> @@ -332,12 +332,10 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
>   static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>   {
>       struct dma_fence *fence = NULL;
> -     struct amdgpu_bo *bo = NULL;
> +     struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
>       long r;
>
> -     r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
> -                                   AMDGPU_GEM_DOMAIN_VRAM,
> -                                   &bo, NULL, NULL);
> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>       if (r)
>               return r;
>
> @@ -357,9 +355,7 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>
>   error:
>       dma_fence_put(fence);
> -     amdgpu_bo_unpin(bo);
>       amdgpu_bo_unreserve(bo);
> -     amdgpu_bo_unref(&bo);
>       return r;
>   }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> index b6e82d75561f..efa270288029 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> @@ -338,12 +338,10 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl
>   static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>   {
>       struct dma_fence *fence = NULL;
> -     struct amdgpu_bo *bo = NULL;
> +     struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
>       long r;
>
> -     r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
> -                                   AMDGPU_GEM_DOMAIN_VRAM,
> -                                   &bo, NULL, NULL);
> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>       if (r)
>               return r;
>
> @@ -363,9 +361,7 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>
>   error:
>       dma_fence_put(fence);
> -     amdgpu_bo_unpin(bo);
>       amdgpu_bo_unreserve(bo);
> -     amdgpu_bo_unref(&bo);
>       return r;
>   }
>


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
  2021-09-10  8:18     ` 回复: " Pan, Xinhui
@ 2021-09-10  8:53       ` Christian König
  2021-09-10  9:42         ` 回复: " Pan, Xinhui
  0 siblings, 1 reply; 18+ messages in thread
From: Christian König @ 2021-09-10  8:53 UTC (permalink / raw)
  To: Pan, Xinhui, amd-gfx; +Cc: Deucher, Alexander

It should not unless we are OOM which should not happen during driver 
initialization.

But there is another problem I'm thinking about: Do we still use UVD IB 
submissions to forcefully tear down UVD sessions when a process crashes?

If yes that stuff could easily deadlock with an IB test executed during 
GPU reset.

Christian.

Am 10.09.21 um 10:18 schrieb Pan, Xinhui:
> [AMD Official Use Only]
>
> I am wondering if amdgpu_bo_pin would change BO's placement in the futrue.
> For now, the new placement is calculated by new = old ∩ new.
>
> ________________________________________
> 发件人: Koenig, Christian <Christian.Koenig@amd.com>
> 发送时间: 2021年9月10日 14:24
> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
> 抄送: Deucher, Alexander
> 主题: Re: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
>
> Am 10.09.21 um 02:38 schrieb xinhui pan:
>> move BO allocation in sw_init.
>>
>> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 75 +++++++++++++++----------
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |  1 +
>>    drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   |  8 +--
>>    drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c   |  8 +--
>>    4 files changed, 49 insertions(+), 43 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>> index d451c359606a..e2eaac941d37 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>> @@ -141,6 +141,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>>        const char *fw_name;
>>        const struct common_firmware_header *hdr;
>>        unsigned family_id;
>> +     struct amdgpu_bo *bo = NULL;
>> +     void *addr;
>>        int i, j, r;
>>
>>        INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
>> @@ -298,9 +300,34 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>>                adev->uvd.filp[i] = NULL;
>>        }
>>
>> +     r = amdgpu_bo_create_reserved(adev, 128 << 10, PAGE_SIZE,
>> +                     AMDGPU_GEM_DOMAIN_GTT,
>> +                     &bo, NULL, &addr);
>> +     if (r)
>> +             return r;
>> +
>>        /* from uvd v5.0 HW addressing capacity increased to 64 bits */
>> -     if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
>> +     if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) {
>>                adev->uvd.address_64_bit = true;
>> +             amdgpu_bo_kunmap(bo);
>> +             amdgpu_bo_unpin(bo);
>> +             r = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_VRAM,
>> +                             0, 256 << 20);
> Please keep using amdgpu_uvd_force_into_uvd_segment() and validate here,
> cause I want to remove amdgpu_bo_pin_restricted() sooner or later.
>
>> +             if (r) {
>> +                     amdgpu_bo_unreserve(bo);
>> +                     amdgpu_bo_unref(&bo);
>> +                     return r;
>> +             }
>> +             r = amdgpu_bo_kmap(bo, &addr);
>> +             if (r) {
>> +                     amdgpu_bo_unpin(bo);
>> +                     amdgpu_bo_unreserve(bo);
>> +                     amdgpu_bo_unref(&bo);
>> +                     return r;
>> +             }
>> +     }
>> +     adev->uvd.ib_bo = bo;
>> +     amdgpu_bo_unreserve(bo);
>>
>>        switch (adev->asic_type) {
>>        case CHIP_TONGA:
>> @@ -342,6 +369,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
>>                for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
>>                        amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
>>        }
>> +     amdgpu_bo_free_kernel(&adev->uvd.ib_bo, NULL, NULL);
>>        release_firmware(adev->uvd.fw);
>>
>>        return 0;
>> @@ -1080,23 +1108,10 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>        unsigned offset_idx = 0;
>>        unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
>>
>> -     amdgpu_bo_kunmap(bo);
>> -     amdgpu_bo_unpin(bo);
>> -
>> -     if (!ring->adev->uvd.address_64_bit) {
>> -             struct ttm_operation_ctx ctx = { true, false };
>> -
>> -             amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
>> -             amdgpu_uvd_force_into_uvd_segment(bo);
>> -             r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
>> -             if (r)
>> -                     goto err;
>> -     }
>> -
>>        r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
>>                                     AMDGPU_IB_POOL_DELAYED, &job);
>>        if (r)
>> -             goto err;
>> +             return r;
>>
>>        if (adev->asic_type >= CHIP_VEGA10) {
>>                offset_idx = 1 + ring->me;
>> @@ -1148,8 +1163,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>        }
>>
>>        amdgpu_bo_fence(bo, f, false);
>> -     amdgpu_bo_unreserve(bo);
>> -     amdgpu_bo_unref(&bo);
>>
>>        if (fence)
>>                *fence = dma_fence_get(f);
>> @@ -1159,10 +1172,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>
>>    err_free:
>>        amdgpu_job_free(job);
>> -
>> -err:
>> -     amdgpu_bo_unreserve(bo);
>> -     amdgpu_bo_unref(&bo);
>>        return r;
>>    }
>>
>> @@ -1173,16 +1182,15 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>>                              struct dma_fence **fence)
>>    {
>>        struct amdgpu_device *adev = ring->adev;
>> -     struct amdgpu_bo *bo = NULL;
>> +     struct amdgpu_bo *bo = adev->uvd.ib_bo;
>>        uint32_t *msg;
>>        int r, i;
>>
>> -     r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
>> -                                   AMDGPU_GEM_DOMAIN_GTT,
>> -                                   &bo, NULL, (void **)&msg);
>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>        if (r)
>>                return r;
>>
>> +     msg = amdgpu_bo_kptr(bo);
>>        /* stitch together an UVD create msg */
>>        msg[0] = cpu_to_le32(0x00000de4);
>>        msg[1] = cpu_to_le32(0x00000000);
>> @@ -1198,23 +1206,25 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>>        for (i = 11; i < 1024; ++i)
>>                msg[i] = cpu_to_le32(0x0);
>>
>> -     return amdgpu_uvd_send_msg(ring, bo, true, fence);
>> +     r = amdgpu_uvd_send_msg(ring, bo, true, fence);
>> +
>> +     amdgpu_bo_unreserve(bo);
>> +     return r;
>>    }
>>
>>    int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>>                               bool direct, struct dma_fence **fence)
>>    {
>>        struct amdgpu_device *adev = ring->adev;
>> -     struct amdgpu_bo *bo = NULL;
>> +     struct amdgpu_bo *bo = adev->uvd.ib_bo;
>>        uint32_t *msg;
>>        int r, i;
>>
>> -     r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
>> -                                   AMDGPU_GEM_DOMAIN_GTT,
>> -                                   &bo, NULL, (void **)&msg);
>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
> Please use amdgpu_bo_reserve() here and elsewhere as well just to be on
> the clean side.
>
> Lockdep will sooner or later complain that we reserve a BO in the reset
> path, but that is mostly a theoretical problem and existed before. So
> I'm fine with sticking with that for now cause the problem was there
> before as well.
>
> It's just that trylock might not work because the BO is busy with
> indirect submission.
>
> Apart from those two minor issues the patch looks good to me.
>
> Thanks,
> Christian.
>
>>        if (r)
>>                return r;
>>
>> +     msg = amdgpu_bo_kptr(bo);
>>        /* stitch together an UVD destroy msg */
>>        msg[0] = cpu_to_le32(0x00000de4);
>>        msg[1] = cpu_to_le32(0x00000002);
>> @@ -1223,7 +1233,10 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>>        for (i = 4; i < 1024; ++i)
>>                msg[i] = cpu_to_le32(0x0);
>>
>> -     return amdgpu_uvd_send_msg(ring, bo, direct, fence);
>> +     r = amdgpu_uvd_send_msg(ring, bo, direct, fence);
>> +
>> +     amdgpu_bo_unreserve(bo);
>> +     return r;
>>    }
>>
>>    static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>> index edbb8194ee81..76ac9699885d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>> @@ -68,6 +68,7 @@ struct amdgpu_uvd {
>>        /* store image width to adjust nb memory state */
>>        unsigned                decode_image_width;
>>        uint32_t                keyselect;
>> +     struct amdgpu_bo        *ib_bo;
>>    };
>>
>>    int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>> index bc571833632e..301c0cea7164 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>> @@ -332,12 +332,10 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
>>    static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>    {
>>        struct dma_fence *fence = NULL;
>> -     struct amdgpu_bo *bo = NULL;
>> +     struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
>>        long r;
>>
>> -     r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
>> -                                   AMDGPU_GEM_DOMAIN_VRAM,
>> -                                   &bo, NULL, NULL);
>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>        if (r)
>>                return r;
>>
>> @@ -357,9 +355,7 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>
>>    error:
>>        dma_fence_put(fence);
>> -     amdgpu_bo_unpin(bo);
>>        amdgpu_bo_unreserve(bo);
>> -     amdgpu_bo_unref(&bo);
>>        return r;
>>    }
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>> index b6e82d75561f..efa270288029 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>> @@ -338,12 +338,10 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl
>>    static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>    {
>>        struct dma_fence *fence = NULL;
>> -     struct amdgpu_bo *bo = NULL;
>> +     struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
>>        long r;
>>
>> -     r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
>> -                                   AMDGPU_GEM_DOMAIN_VRAM,
>> -                                   &bo, NULL, NULL);
>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>        if (r)
>>                return r;
>>
>> @@ -363,9 +361,7 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>
>>    error:
>>        dma_fence_put(fence);
>> -     amdgpu_bo_unpin(bo);
>>        amdgpu_bo_unreserve(bo);
>> -     amdgpu_bo_unref(&bo);
>>        return r;
>>    }
>>


^ permalink raw reply	[flat|nested] 18+ messages in thread

* 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
  2021-09-10  8:53       ` Christian König
@ 2021-09-10  9:42         ` Pan, Xinhui
  2021-09-10 10:02           ` Christian König
  0 siblings, 1 reply; 18+ messages in thread
From: Pan, Xinhui @ 2021-09-10  9:42 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx; +Cc: Deucher, Alexander

[AMD Official Use Only]

oh, god. uvd free handler submit delayed msg which depends on scheduler with reservation lock hold.
This is not allowed as commit c8e42d57859d "drm/amdgpu: implement more ib pools (v2)" says
Any jobs which schedule IBs without dependence on gpu scheduler should use DIRECT pool.

Looks like we should only use reserved BO for direct IB submission.
As for delayed IB submission, we could alloc a new one dynamicly.
________________________________________
发件人: Koenig, Christian <Christian.Koenig@amd.com>
发送时间: 2021年9月10日 16:53
收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander
主题: Re: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test

It should not unless we are OOM which should not happen during driver
initialization.

But there is another problem I'm thinking about: Do we still use UVD IB
submissions to forcefully tear down UVD sessions when a process crashes?

If yes that stuff could easily deadlock with an IB test executed during
GPU reset.

Christian.

Am 10.09.21 um 10:18 schrieb Pan, Xinhui:
> [AMD Official Use Only]
>
> I am wondering if amdgpu_bo_pin would change BO's placement in the futrue.
> For now, the new placement is calculated by new = old ∩ new.
>
> ________________________________________
> 发件人: Koenig, Christian <Christian.Koenig@amd.com>
> 发送时间: 2021年9月10日 14:24
> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
> 抄送: Deucher, Alexander
> 主题: Re: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
>
> Am 10.09.21 um 02:38 schrieb xinhui pan:
>> move BO allocation in sw_init.
>>
>> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 75 +++++++++++++++----------
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |  1 +
>>    drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   |  8 +--
>>    drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c   |  8 +--
>>    4 files changed, 49 insertions(+), 43 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>> index d451c359606a..e2eaac941d37 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>> @@ -141,6 +141,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>>        const char *fw_name;
>>        const struct common_firmware_header *hdr;
>>        unsigned family_id;
>> +     struct amdgpu_bo *bo = NULL;
>> +     void *addr;
>>        int i, j, r;
>>
>>        INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
>> @@ -298,9 +300,34 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>>                adev->uvd.filp[i] = NULL;
>>        }
>>
>> +     r = amdgpu_bo_create_reserved(adev, 128 << 10, PAGE_SIZE,
>> +                     AMDGPU_GEM_DOMAIN_GTT,
>> +                     &bo, NULL, &addr);
>> +     if (r)
>> +             return r;
>> +
>>        /* from uvd v5.0 HW addressing capacity increased to 64 bits */
>> -     if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
>> +     if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) {
>>                adev->uvd.address_64_bit = true;
>> +             amdgpu_bo_kunmap(bo);
>> +             amdgpu_bo_unpin(bo);
>> +             r = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_VRAM,
>> +                             0, 256 << 20);
> Please keep using amdgpu_uvd_force_into_uvd_segment() and validate here,
> cause I want to remove amdgpu_bo_pin_restricted() sooner or later.
>
>> +             if (r) {
>> +                     amdgpu_bo_unreserve(bo);
>> +                     amdgpu_bo_unref(&bo);
>> +                     return r;
>> +             }
>> +             r = amdgpu_bo_kmap(bo, &addr);
>> +             if (r) {
>> +                     amdgpu_bo_unpin(bo);
>> +                     amdgpu_bo_unreserve(bo);
>> +                     amdgpu_bo_unref(&bo);
>> +                     return r;
>> +             }
>> +     }
>> +     adev->uvd.ib_bo = bo;
>> +     amdgpu_bo_unreserve(bo);
>>
>>        switch (adev->asic_type) {
>>        case CHIP_TONGA:
>> @@ -342,6 +369,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
>>                for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
>>                        amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
>>        }
>> +     amdgpu_bo_free_kernel(&adev->uvd.ib_bo, NULL, NULL);
>>        release_firmware(adev->uvd.fw);
>>
>>        return 0;
>> @@ -1080,23 +1108,10 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>        unsigned offset_idx = 0;
>>        unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
>>
>> -     amdgpu_bo_kunmap(bo);
>> -     amdgpu_bo_unpin(bo);
>> -
>> -     if (!ring->adev->uvd.address_64_bit) {
>> -             struct ttm_operation_ctx ctx = { true, false };
>> -
>> -             amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
>> -             amdgpu_uvd_force_into_uvd_segment(bo);
>> -             r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
>> -             if (r)
>> -                     goto err;
>> -     }
>> -
>>        r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
>>                                     AMDGPU_IB_POOL_DELAYED, &job);
>>        if (r)
>> -             goto err;
>> +             return r;
>>
>>        if (adev->asic_type >= CHIP_VEGA10) {
>>                offset_idx = 1 + ring->me;
>> @@ -1148,8 +1163,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>        }
>>
>>        amdgpu_bo_fence(bo, f, false);
>> -     amdgpu_bo_unreserve(bo);
>> -     amdgpu_bo_unref(&bo);
>>
>>        if (fence)
>>                *fence = dma_fence_get(f);
>> @@ -1159,10 +1172,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>
>>    err_free:
>>        amdgpu_job_free(job);
>> -
>> -err:
>> -     amdgpu_bo_unreserve(bo);
>> -     amdgpu_bo_unref(&bo);
>>        return r;
>>    }
>>
>> @@ -1173,16 +1182,15 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>>                              struct dma_fence **fence)
>>    {
>>        struct amdgpu_device *adev = ring->adev;
>> -     struct amdgpu_bo *bo = NULL;
>> +     struct amdgpu_bo *bo = adev->uvd.ib_bo;
>>        uint32_t *msg;
>>        int r, i;
>>
>> -     r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
>> -                                   AMDGPU_GEM_DOMAIN_GTT,
>> -                                   &bo, NULL, (void **)&msg);
>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>        if (r)
>>                return r;
>>
>> +     msg = amdgpu_bo_kptr(bo);
>>        /* stitch together an UVD create msg */
>>        msg[0] = cpu_to_le32(0x00000de4);
>>        msg[1] = cpu_to_le32(0x00000000);
>> @@ -1198,23 +1206,25 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>>        for (i = 11; i < 1024; ++i)
>>                msg[i] = cpu_to_le32(0x0);
>>
>> -     return amdgpu_uvd_send_msg(ring, bo, true, fence);
>> +     r = amdgpu_uvd_send_msg(ring, bo, true, fence);
>> +
>> +     amdgpu_bo_unreserve(bo);
>> +     return r;
>>    }
>>
>>    int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>>                               bool direct, struct dma_fence **fence)
>>    {
>>        struct amdgpu_device *adev = ring->adev;
>> -     struct amdgpu_bo *bo = NULL;
>> +     struct amdgpu_bo *bo = adev->uvd.ib_bo;
>>        uint32_t *msg;
>>        int r, i;
>>
>> -     r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
>> -                                   AMDGPU_GEM_DOMAIN_GTT,
>> -                                   &bo, NULL, (void **)&msg);
>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
> Please use amdgpu_bo_reserve() here and elsewhere as well just to be on
> the clean side.
>
> Lockdep will sooner or later complain that we reserve a BO in the reset
> path, but that is mostly a theoretical problem and existed before. So
> I'm fine with sticking with that for now cause the problem was there
> before as well.
>
> It's just that trylock might not work because the BO is busy with
> indirect submission.
>
> Apart from those two minor issues the patch looks good to me.
>
> Thanks,
> Christian.
>
>>        if (r)
>>                return r;
>>
>> +     msg = amdgpu_bo_kptr(bo);
>>        /* stitch together an UVD destroy msg */
>>        msg[0] = cpu_to_le32(0x00000de4);
>>        msg[1] = cpu_to_le32(0x00000002);
>> @@ -1223,7 +1233,10 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>>        for (i = 4; i < 1024; ++i)
>>                msg[i] = cpu_to_le32(0x0);
>>
>> -     return amdgpu_uvd_send_msg(ring, bo, direct, fence);
>> +     r = amdgpu_uvd_send_msg(ring, bo, direct, fence);
>> +
>> +     amdgpu_bo_unreserve(bo);
>> +     return r;
>>    }
>>
>>    static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>> index edbb8194ee81..76ac9699885d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>> @@ -68,6 +68,7 @@ struct amdgpu_uvd {
>>        /* store image width to adjust nb memory state */
>>        unsigned                decode_image_width;
>>        uint32_t                keyselect;
>> +     struct amdgpu_bo        *ib_bo;
>>    };
>>
>>    int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>> index bc571833632e..301c0cea7164 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>> @@ -332,12 +332,10 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
>>    static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>    {
>>        struct dma_fence *fence = NULL;
>> -     struct amdgpu_bo *bo = NULL;
>> +     struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
>>        long r;
>>
>> -     r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
>> -                                   AMDGPU_GEM_DOMAIN_VRAM,
>> -                                   &bo, NULL, NULL);
>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>        if (r)
>>                return r;
>>
>> @@ -357,9 +355,7 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>
>>    error:
>>        dma_fence_put(fence);
>> -     amdgpu_bo_unpin(bo);
>>        amdgpu_bo_unreserve(bo);
>> -     amdgpu_bo_unref(&bo);
>>        return r;
>>    }
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>> index b6e82d75561f..efa270288029 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>> @@ -338,12 +338,10 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl
>>    static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>    {
>>        struct dma_fence *fence = NULL;
>> -     struct amdgpu_bo *bo = NULL;
>> +     struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
>>        long r;
>>
>> -     r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
>> -                                   AMDGPU_GEM_DOMAIN_VRAM,
>> -                                   &bo, NULL, NULL);
>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>        if (r)
>>                return r;
>>
>> @@ -363,9 +361,7 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>
>>    error:
>>        dma_fence_put(fence);
>> -     amdgpu_bo_unpin(bo);
>>        amdgpu_bo_unreserve(bo);
>> -     amdgpu_bo_unref(&bo);
>>        return r;
>>    }
>>


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
  2021-09-10  9:42         ` 回复: " Pan, Xinhui
@ 2021-09-10 10:02           ` Christian König
  2021-09-10 10:10             ` 回复: " Pan, Xinhui
  0 siblings, 1 reply; 18+ messages in thread
From: Christian König @ 2021-09-10 10:02 UTC (permalink / raw)
  To: Pan, Xinhui, amd-gfx; +Cc: Deucher, Alexander

*sigh* yeah, you are probably right. Wouldn't be to simple if this would 
be easy, doesn't it?

In this case you can also skip taking the reservation lock for the 
pre-allocated BO, since there should absolutely be only one user at a time.

Christian.

Am 10.09.21 um 11:42 schrieb Pan, Xinhui:
> [AMD Official Use Only]
>
> oh, god. uvd free handler submit delayed msg which depends on scheduler with reservation lock hold.
> This is not allowed as commit c8e42d57859d "drm/amdgpu: implement more ib pools (v2)" says
> Any jobs which schedule IBs without dependence on gpu scheduler should use DIRECT pool.
>
> Looks like we should only use reserved BO for direct IB submission.
> As for delayed IB submission, we could alloc a new one dynamicly.
> ________________________________________
> 发件人: Koenig, Christian <Christian.Koenig@amd.com>
> 发送时间: 2021年9月10日 16:53
> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
> 抄送: Deucher, Alexander
> 主题: Re: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
>
> It should not unless we are OOM which should not happen during driver
> initialization.
>
> But there is another problem I'm thinking about: Do we still use UVD IB
> submissions to forcefully tear down UVD sessions when a process crashes?
>
> If yes that stuff could easily deadlock with an IB test executed during
> GPU reset.
>
> Christian.
>
> Am 10.09.21 um 10:18 schrieb Pan, Xinhui:
>> [AMD Official Use Only]
>>
>> I am wondering if amdgpu_bo_pin would change BO's placement in the futrue.
>> For now, the new placement is calculated by new = old ∩ new.
>>
>> ________________________________________
>> 发件人: Koenig, Christian <Christian.Koenig@amd.com>
>> 发送时间: 2021年9月10日 14:24
>> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
>> 抄送: Deucher, Alexander
>> 主题: Re: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
>>
>> Am 10.09.21 um 02:38 schrieb xinhui pan:
>>> move BO allocation in sw_init.
>>>
>>> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
>>> ---
>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 75 +++++++++++++++----------
>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |  1 +
>>>     drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   |  8 +--
>>>     drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c   |  8 +--
>>>     4 files changed, 49 insertions(+), 43 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>> index d451c359606a..e2eaac941d37 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>> @@ -141,6 +141,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>>>         const char *fw_name;
>>>         const struct common_firmware_header *hdr;
>>>         unsigned family_id;
>>> +     struct amdgpu_bo *bo = NULL;
>>> +     void *addr;
>>>         int i, j, r;
>>>
>>>         INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
>>> @@ -298,9 +300,34 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>>>                 adev->uvd.filp[i] = NULL;
>>>         }
>>>
>>> +     r = amdgpu_bo_create_reserved(adev, 128 << 10, PAGE_SIZE,
>>> +                     AMDGPU_GEM_DOMAIN_GTT,
>>> +                     &bo, NULL, &addr);
>>> +     if (r)
>>> +             return r;
>>> +
>>>         /* from uvd v5.0 HW addressing capacity increased to 64 bits */
>>> -     if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
>>> +     if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) {
>>>                 adev->uvd.address_64_bit = true;
>>> +             amdgpu_bo_kunmap(bo);
>>> +             amdgpu_bo_unpin(bo);
>>> +             r = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_VRAM,
>>> +                             0, 256 << 20);
>> Please keep using amdgpu_uvd_force_into_uvd_segment() and validate here,
>> cause I want to remove amdgpu_bo_pin_restricted() sooner or later.
>>
>>> +             if (r) {
>>> +                     amdgpu_bo_unreserve(bo);
>>> +                     amdgpu_bo_unref(&bo);
>>> +                     return r;
>>> +             }
>>> +             r = amdgpu_bo_kmap(bo, &addr);
>>> +             if (r) {
>>> +                     amdgpu_bo_unpin(bo);
>>> +                     amdgpu_bo_unreserve(bo);
>>> +                     amdgpu_bo_unref(&bo);
>>> +                     return r;
>>> +             }
>>> +     }
>>> +     adev->uvd.ib_bo = bo;
>>> +     amdgpu_bo_unreserve(bo);
>>>
>>>         switch (adev->asic_type) {
>>>         case CHIP_TONGA:
>>> @@ -342,6 +369,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
>>>                 for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
>>>                         amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
>>>         }
>>> +     amdgpu_bo_free_kernel(&adev->uvd.ib_bo, NULL, NULL);
>>>         release_firmware(adev->uvd.fw);
>>>
>>>         return 0;
>>> @@ -1080,23 +1108,10 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>>         unsigned offset_idx = 0;
>>>         unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
>>>
>>> -     amdgpu_bo_kunmap(bo);
>>> -     amdgpu_bo_unpin(bo);
>>> -
>>> -     if (!ring->adev->uvd.address_64_bit) {
>>> -             struct ttm_operation_ctx ctx = { true, false };
>>> -
>>> -             amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
>>> -             amdgpu_uvd_force_into_uvd_segment(bo);
>>> -             r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
>>> -             if (r)
>>> -                     goto err;
>>> -     }
>>> -
>>>         r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
>>>                                      AMDGPU_IB_POOL_DELAYED, &job);
>>>         if (r)
>>> -             goto err;
>>> +             return r;
>>>
>>>         if (adev->asic_type >= CHIP_VEGA10) {
>>>                 offset_idx = 1 + ring->me;
>>> @@ -1148,8 +1163,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>>         }
>>>
>>>         amdgpu_bo_fence(bo, f, false);
>>> -     amdgpu_bo_unreserve(bo);
>>> -     amdgpu_bo_unref(&bo);
>>>
>>>         if (fence)
>>>                 *fence = dma_fence_get(f);
>>> @@ -1159,10 +1172,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>>
>>>     err_free:
>>>         amdgpu_job_free(job);
>>> -
>>> -err:
>>> -     amdgpu_bo_unreserve(bo);
>>> -     amdgpu_bo_unref(&bo);
>>>         return r;
>>>     }
>>>
>>> @@ -1173,16 +1182,15 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>>>                               struct dma_fence **fence)
>>>     {
>>>         struct amdgpu_device *adev = ring->adev;
>>> -     struct amdgpu_bo *bo = NULL;
>>> +     struct amdgpu_bo *bo = adev->uvd.ib_bo;
>>>         uint32_t *msg;
>>>         int r, i;
>>>
>>> -     r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
>>> -                                   AMDGPU_GEM_DOMAIN_GTT,
>>> -                                   &bo, NULL, (void **)&msg);
>>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>>         if (r)
>>>                 return r;
>>>
>>> +     msg = amdgpu_bo_kptr(bo);
>>>         /* stitch together an UVD create msg */
>>>         msg[0] = cpu_to_le32(0x00000de4);
>>>         msg[1] = cpu_to_le32(0x00000000);
>>> @@ -1198,23 +1206,25 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>>>         for (i = 11; i < 1024; ++i)
>>>                 msg[i] = cpu_to_le32(0x0);
>>>
>>> -     return amdgpu_uvd_send_msg(ring, bo, true, fence);
>>> +     r = amdgpu_uvd_send_msg(ring, bo, true, fence);
>>> +
>>> +     amdgpu_bo_unreserve(bo);
>>> +     return r;
>>>     }
>>>
>>>     int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>>>                                bool direct, struct dma_fence **fence)
>>>     {
>>>         struct amdgpu_device *adev = ring->adev;
>>> -     struct amdgpu_bo *bo = NULL;
>>> +     struct amdgpu_bo *bo = adev->uvd.ib_bo;
>>>         uint32_t *msg;
>>>         int r, i;
>>>
>>> -     r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
>>> -                                   AMDGPU_GEM_DOMAIN_GTT,
>>> -                                   &bo, NULL, (void **)&msg);
>>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>> Please use amdgpu_bo_reserve() here and elsewhere as well just to be on
>> the clean side.
>>
>> Lockdep will sooner or later complain that we reserve a BO in the reset
>> path, but that is mostly a theoretical problem and existed before. So
>> I'm fine with sticking with that for now cause the problem was there
>> before as well.
>>
>> It's just that trylock might not work because the BO is busy with
>> indirect submission.
>>
>> Apart from those two minor issues the patch looks good to me.
>>
>> Thanks,
>> Christian.
>>
>>>         if (r)
>>>                 return r;
>>>
>>> +     msg = amdgpu_bo_kptr(bo);
>>>         /* stitch together an UVD destroy msg */
>>>         msg[0] = cpu_to_le32(0x00000de4);
>>>         msg[1] = cpu_to_le32(0x00000002);
>>> @@ -1223,7 +1233,10 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>>>         for (i = 4; i < 1024; ++i)
>>>                 msg[i] = cpu_to_le32(0x0);
>>>
>>> -     return amdgpu_uvd_send_msg(ring, bo, direct, fence);
>>> +     r = amdgpu_uvd_send_msg(ring, bo, direct, fence);
>>> +
>>> +     amdgpu_bo_unreserve(bo);
>>> +     return r;
>>>     }
>>>
>>>     static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>>> index edbb8194ee81..76ac9699885d 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>>> @@ -68,6 +68,7 @@ struct amdgpu_uvd {
>>>         /* store image width to adjust nb memory state */
>>>         unsigned                decode_image_width;
>>>         uint32_t                keyselect;
>>> +     struct amdgpu_bo        *ib_bo;
>>>     };
>>>
>>>     int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>> index bc571833632e..301c0cea7164 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>> @@ -332,12 +332,10 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
>>>     static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>     {
>>>         struct dma_fence *fence = NULL;
>>> -     struct amdgpu_bo *bo = NULL;
>>> +     struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
>>>         long r;
>>>
>>> -     r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
>>> -                                   AMDGPU_GEM_DOMAIN_VRAM,
>>> -                                   &bo, NULL, NULL);
>>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>>         if (r)
>>>                 return r;
>>>
>>> @@ -357,9 +355,7 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>
>>>     error:
>>>         dma_fence_put(fence);
>>> -     amdgpu_bo_unpin(bo);
>>>         amdgpu_bo_unreserve(bo);
>>> -     amdgpu_bo_unref(&bo);
>>>         return r;
>>>     }
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>>> index b6e82d75561f..efa270288029 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>>> @@ -338,12 +338,10 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl
>>>     static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>     {
>>>         struct dma_fence *fence = NULL;
>>> -     struct amdgpu_bo *bo = NULL;
>>> +     struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
>>>         long r;
>>>
>>> -     r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
>>> -                                   AMDGPU_GEM_DOMAIN_VRAM,
>>> -                                   &bo, NULL, NULL);
>>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>>         if (r)
>>>                 return r;
>>>
>>> @@ -363,9 +361,7 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>
>>>     error:
>>>         dma_fence_put(fence);
>>> -     amdgpu_bo_unpin(bo);
>>>         amdgpu_bo_unreserve(bo);
>>> -     amdgpu_bo_unref(&bo);
>>>         return r;
>>>     }
>>>


^ permalink raw reply	[flat|nested] 18+ messages in thread

* 回复: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
  2021-09-10 10:02           ` Christian König
@ 2021-09-10 10:10             ` Pan, Xinhui
  2021-09-10 11:10               ` Christian König
  0 siblings, 1 reply; 18+ messages in thread
From: Pan, Xinhui @ 2021-09-10 10:10 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx; +Cc: Deucher, Alexander

[AMD Official Use Only]

we need take this lock.
IB test can be triggered through debugfs. Recent days I usually test it by cat gpu recovery and amdgpu_test_ib in debugfs.


________________________________________
发件人: Koenig, Christian <Christian.Koenig@amd.com>
发送时间: 2021年9月10日 18:02
收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander
主题: Re: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test

*sigh* yeah, you are probably right. Wouldn't be to simple if this would
be easy, doesn't it?

In this case you can also skip taking the reservation lock for the
pre-allocated BO, since there should absolutely be only one user at a time.

Christian.

Am 10.09.21 um 11:42 schrieb Pan, Xinhui:
> [AMD Official Use Only]
>
> oh, god. uvd free handler submit delayed msg which depends on scheduler with reservation lock hold.
> This is not allowed as commit c8e42d57859d "drm/amdgpu: implement more ib pools (v2)" says
> Any jobs which schedule IBs without dependence on gpu scheduler should use DIRECT pool.
>
> Looks like we should only use reserved BO for direct IB submission.
> As for delayed IB submission, we could alloc a new one dynamicly.
> ________________________________________
> 发件人: Koenig, Christian <Christian.Koenig@amd.com>
> 发送时间: 2021年9月10日 16:53
> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
> 抄送: Deucher, Alexander
> 主题: Re: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
>
> It should not unless we are OOM which should not happen during driver
> initialization.
>
> But there is another problem I'm thinking about: Do we still use UVD IB
> submissions to forcefully tear down UVD sessions when a process crashes?
>
> If yes that stuff could easily deadlock with an IB test executed during
> GPU reset.
>
> Christian.
>
> Am 10.09.21 um 10:18 schrieb Pan, Xinhui:
>> [AMD Official Use Only]
>>
>> I am wondering if amdgpu_bo_pin would change BO's placement in the futrue.
>> For now, the new placement is calculated by new = old ∩ new.
>>
>> ________________________________________
>> 发件人: Koenig, Christian <Christian.Koenig@amd.com>
>> 发送时间: 2021年9月10日 14:24
>> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
>> 抄送: Deucher, Alexander
>> 主题: Re: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
>>
>> Am 10.09.21 um 02:38 schrieb xinhui pan:
>>> move BO allocation in sw_init.
>>>
>>> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
>>> ---
>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 75 +++++++++++++++----------
>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |  1 +
>>>     drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   |  8 +--
>>>     drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c   |  8 +--
>>>     4 files changed, 49 insertions(+), 43 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>> index d451c359606a..e2eaac941d37 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>> @@ -141,6 +141,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>>>         const char *fw_name;
>>>         const struct common_firmware_header *hdr;
>>>         unsigned family_id;
>>> +     struct amdgpu_bo *bo = NULL;
>>> +     void *addr;
>>>         int i, j, r;
>>>
>>>         INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
>>> @@ -298,9 +300,34 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>>>                 adev->uvd.filp[i] = NULL;
>>>         }
>>>
>>> +     r = amdgpu_bo_create_reserved(adev, 128 << 10, PAGE_SIZE,
>>> +                     AMDGPU_GEM_DOMAIN_GTT,
>>> +                     &bo, NULL, &addr);
>>> +     if (r)
>>> +             return r;
>>> +
>>>         /* from uvd v5.0 HW addressing capacity increased to 64 bits */
>>> -     if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
>>> +     if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) {
>>>                 adev->uvd.address_64_bit = true;
>>> +             amdgpu_bo_kunmap(bo);
>>> +             amdgpu_bo_unpin(bo);
>>> +             r = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_VRAM,
>>> +                             0, 256 << 20);
>> Please keep using amdgpu_uvd_force_into_uvd_segment() and validate here,
>> cause I want to remove amdgpu_bo_pin_restricted() sooner or later.
>>
>>> +             if (r) {
>>> +                     amdgpu_bo_unreserve(bo);
>>> +                     amdgpu_bo_unref(&bo);
>>> +                     return r;
>>> +             }
>>> +             r = amdgpu_bo_kmap(bo, &addr);
>>> +             if (r) {
>>> +                     amdgpu_bo_unpin(bo);
>>> +                     amdgpu_bo_unreserve(bo);
>>> +                     amdgpu_bo_unref(&bo);
>>> +                     return r;
>>> +             }
>>> +     }
>>> +     adev->uvd.ib_bo = bo;
>>> +     amdgpu_bo_unreserve(bo);
>>>
>>>         switch (adev->asic_type) {
>>>         case CHIP_TONGA:
>>> @@ -342,6 +369,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
>>>                 for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
>>>                         amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
>>>         }
>>> +     amdgpu_bo_free_kernel(&adev->uvd.ib_bo, NULL, NULL);
>>>         release_firmware(adev->uvd.fw);
>>>
>>>         return 0;
>>> @@ -1080,23 +1108,10 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>>         unsigned offset_idx = 0;
>>>         unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
>>>
>>> -     amdgpu_bo_kunmap(bo);
>>> -     amdgpu_bo_unpin(bo);
>>> -
>>> -     if (!ring->adev->uvd.address_64_bit) {
>>> -             struct ttm_operation_ctx ctx = { true, false };
>>> -
>>> -             amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
>>> -             amdgpu_uvd_force_into_uvd_segment(bo);
>>> -             r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
>>> -             if (r)
>>> -                     goto err;
>>> -     }
>>> -
>>>         r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
>>>                                      AMDGPU_IB_POOL_DELAYED, &job);
>>>         if (r)
>>> -             goto err;
>>> +             return r;
>>>
>>>         if (adev->asic_type >= CHIP_VEGA10) {
>>>                 offset_idx = 1 + ring->me;
>>> @@ -1148,8 +1163,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>>         }
>>>
>>>         amdgpu_bo_fence(bo, f, false);
>>> -     amdgpu_bo_unreserve(bo);
>>> -     amdgpu_bo_unref(&bo);
>>>
>>>         if (fence)
>>>                 *fence = dma_fence_get(f);
>>> @@ -1159,10 +1172,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>>
>>>     err_free:
>>>         amdgpu_job_free(job);
>>> -
>>> -err:
>>> -     amdgpu_bo_unreserve(bo);
>>> -     amdgpu_bo_unref(&bo);
>>>         return r;
>>>     }
>>>
>>> @@ -1173,16 +1182,15 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>>>                               struct dma_fence **fence)
>>>     {
>>>         struct amdgpu_device *adev = ring->adev;
>>> -     struct amdgpu_bo *bo = NULL;
>>> +     struct amdgpu_bo *bo = adev->uvd.ib_bo;
>>>         uint32_t *msg;
>>>         int r, i;
>>>
>>> -     r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
>>> -                                   AMDGPU_GEM_DOMAIN_GTT,
>>> -                                   &bo, NULL, (void **)&msg);
>>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>>         if (r)
>>>                 return r;
>>>
>>> +     msg = amdgpu_bo_kptr(bo);
>>>         /* stitch together an UVD create msg */
>>>         msg[0] = cpu_to_le32(0x00000de4);
>>>         msg[1] = cpu_to_le32(0x00000000);
>>> @@ -1198,23 +1206,25 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>>>         for (i = 11; i < 1024; ++i)
>>>                 msg[i] = cpu_to_le32(0x0);
>>>
>>> -     return amdgpu_uvd_send_msg(ring, bo, true, fence);
>>> +     r = amdgpu_uvd_send_msg(ring, bo, true, fence);
>>> +
>>> +     amdgpu_bo_unreserve(bo);
>>> +     return r;
>>>     }
>>>
>>>     int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>>>                                bool direct, struct dma_fence **fence)
>>>     {
>>>         struct amdgpu_device *adev = ring->adev;
>>> -     struct amdgpu_bo *bo = NULL;
>>> +     struct amdgpu_bo *bo = adev->uvd.ib_bo;
>>>         uint32_t *msg;
>>>         int r, i;
>>>
>>> -     r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
>>> -                                   AMDGPU_GEM_DOMAIN_GTT,
>>> -                                   &bo, NULL, (void **)&msg);
>>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>> Please use amdgpu_bo_reserve() here and elsewhere as well just to be on
>> the clean side.
>>
>> Lockdep will sooner or later complain that we reserve a BO in the reset
>> path, but that is mostly a theoretical problem and existed before. So
>> I'm fine with sticking with that for now cause the problem was there
>> before as well.
>>
>> It's just that trylock might not work because the BO is busy with
>> indirect submission.
>>
>> Apart from those two minor issues the patch looks good to me.
>>
>> Thanks,
>> Christian.
>>
>>>         if (r)
>>>                 return r;
>>>
>>> +     msg = amdgpu_bo_kptr(bo);
>>>         /* stitch together an UVD destroy msg */
>>>         msg[0] = cpu_to_le32(0x00000de4);
>>>         msg[1] = cpu_to_le32(0x00000002);
>>> @@ -1223,7 +1233,10 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>>>         for (i = 4; i < 1024; ++i)
>>>                 msg[i] = cpu_to_le32(0x0);
>>>
>>> -     return amdgpu_uvd_send_msg(ring, bo, direct, fence);
>>> +     r = amdgpu_uvd_send_msg(ring, bo, direct, fence);
>>> +
>>> +     amdgpu_bo_unreserve(bo);
>>> +     return r;
>>>     }
>>>
>>>     static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>>> index edbb8194ee81..76ac9699885d 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>>> @@ -68,6 +68,7 @@ struct amdgpu_uvd {
>>>         /* store image width to adjust nb memory state */
>>>         unsigned                decode_image_width;
>>>         uint32_t                keyselect;
>>> +     struct amdgpu_bo        *ib_bo;
>>>     };
>>>
>>>     int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>> index bc571833632e..301c0cea7164 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>> @@ -332,12 +332,10 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
>>>     static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>     {
>>>         struct dma_fence *fence = NULL;
>>> -     struct amdgpu_bo *bo = NULL;
>>> +     struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
>>>         long r;
>>>
>>> -     r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
>>> -                                   AMDGPU_GEM_DOMAIN_VRAM,
>>> -                                   &bo, NULL, NULL);
>>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>>         if (r)
>>>                 return r;
>>>
>>> @@ -357,9 +355,7 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>
>>>     error:
>>>         dma_fence_put(fence);
>>> -     amdgpu_bo_unpin(bo);
>>>         amdgpu_bo_unreserve(bo);
>>> -     amdgpu_bo_unref(&bo);
>>>         return r;
>>>     }
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>>> index b6e82d75561f..efa270288029 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>>> @@ -338,12 +338,10 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl
>>>     static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>     {
>>>         struct dma_fence *fence = NULL;
>>> -     struct amdgpu_bo *bo = NULL;
>>> +     struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
>>>         long r;
>>>
>>> -     r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
>>> -                                   AMDGPU_GEM_DOMAIN_VRAM,
>>> -                                   &bo, NULL, NULL);
>>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>>         if (r)
>>>                 return r;
>>>
>>> @@ -363,9 +361,7 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>
>>>     error:
>>>         dma_fence_put(fence);
>>> -     amdgpu_bo_unpin(bo);
>>>         amdgpu_bo_unreserve(bo);
>>> -     amdgpu_bo_unref(&bo);
>>>         return r;
>>>     }
>>>


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: 回复: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
  2021-09-10 10:10             ` 回复: " Pan, Xinhui
@ 2021-09-10 11:10               ` Christian König
  2021-09-10 11:48                 ` 回复: " Pan, Xinhui
  0 siblings, 1 reply; 18+ messages in thread
From: Christian König @ 2021-09-10 11:10 UTC (permalink / raw)
  To: Pan, Xinhui, amd-gfx; +Cc: Deucher, Alexander

Yeah, but that IB test should use the indirect submission through the 
scheduler as well.

Otherwise you have IB test and scheduler writing both to the ring buffer 
at the same time and potentially corrupting it.

Christian.

Am 10.09.21 um 12:10 schrieb Pan, Xinhui:
> [AMD Official Use Only]
>
> we need take this lock.
> IB test can be triggered through debugfs. Recent days I usually test it by cat gpu recovery and amdgpu_test_ib in debugfs.
>
>
> ________________________________________
> 发件人: Koenig, Christian <Christian.Koenig@amd.com>
> 发送时间: 2021年9月10日 18:02
> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
> 抄送: Deucher, Alexander
> 主题: Re: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
>
> *sigh* yeah, you are probably right. Wouldn't be to simple if this would
> be easy, doesn't it?
>
> In this case you can also skip taking the reservation lock for the
> pre-allocated BO, since there should absolutely be only one user at a time.
>
> Christian.
>
> Am 10.09.21 um 11:42 schrieb Pan, Xinhui:
>> [AMD Official Use Only]
>>
>> oh, god. uvd free handler submit delayed msg which depends on scheduler with reservation lock hold.
>> This is not allowed as commit c8e42d57859d "drm/amdgpu: implement more ib pools (v2)" says
>> Any jobs which schedule IBs without dependence on gpu scheduler should use DIRECT pool.
>>
>> Looks like we should only use reserved BO for direct IB submission.
>> As for delayed IB submission, we could alloc a new one dynamicly.
>> ________________________________________
>> 发件人: Koenig, Christian <Christian.Koenig@amd.com>
>> 发送时间: 2021年9月10日 16:53
>> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
>> 抄送: Deucher, Alexander
>> 主题: Re: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
>>
>> It should not unless we are OOM which should not happen during driver
>> initialization.
>>
>> But there is another problem I'm thinking about: Do we still use UVD IB
>> submissions to forcefully tear down UVD sessions when a process crashes?
>>
>> If yes that stuff could easily deadlock with an IB test executed during
>> GPU reset.
>>
>> Christian.
>>
>> Am 10.09.21 um 10:18 schrieb Pan, Xinhui:
>>> [AMD Official Use Only]
>>>
>>> I am wondering if amdgpu_bo_pin would change BO's placement in the futrue.
>>> For now, the new placement is calculated by new = old ∩ new.
>>>
>>> ________________________________________
>>> 发件人: Koenig, Christian <Christian.Koenig@amd.com>
>>> 发送时间: 2021年9月10日 14:24
>>> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
>>> 抄送: Deucher, Alexander
>>> 主题: Re: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
>>>
>>> Am 10.09.21 um 02:38 schrieb xinhui pan:
>>>> move BO allocation in sw_init.
>>>>
>>>> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
>>>> ---
>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 75 +++++++++++++++----------
>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |  1 +
>>>>      drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   |  8 +--
>>>>      drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c   |  8 +--
>>>>      4 files changed, 49 insertions(+), 43 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>> index d451c359606a..e2eaac941d37 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>> @@ -141,6 +141,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>>>>          const char *fw_name;
>>>>          const struct common_firmware_header *hdr;
>>>>          unsigned family_id;
>>>> +     struct amdgpu_bo *bo = NULL;
>>>> +     void *addr;
>>>>          int i, j, r;
>>>>
>>>>          INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
>>>> @@ -298,9 +300,34 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>>>>                  adev->uvd.filp[i] = NULL;
>>>>          }
>>>>
>>>> +     r = amdgpu_bo_create_reserved(adev, 128 << 10, PAGE_SIZE,
>>>> +                     AMDGPU_GEM_DOMAIN_GTT,
>>>> +                     &bo, NULL, &addr);
>>>> +     if (r)
>>>> +             return r;
>>>> +
>>>>          /* from uvd v5.0 HW addressing capacity increased to 64 bits */
>>>> -     if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
>>>> +     if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) {
>>>>                  adev->uvd.address_64_bit = true;
>>>> +             amdgpu_bo_kunmap(bo);
>>>> +             amdgpu_bo_unpin(bo);
>>>> +             r = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_VRAM,
>>>> +                             0, 256 << 20);
>>> Please keep using amdgpu_uvd_force_into_uvd_segment() and validate here,
>>> cause I want to remove amdgpu_bo_pin_restricted() sooner or later.
>>>
>>>> +             if (r) {
>>>> +                     amdgpu_bo_unreserve(bo);
>>>> +                     amdgpu_bo_unref(&bo);
>>>> +                     return r;
>>>> +             }
>>>> +             r = amdgpu_bo_kmap(bo, &addr);
>>>> +             if (r) {
>>>> +                     amdgpu_bo_unpin(bo);
>>>> +                     amdgpu_bo_unreserve(bo);
>>>> +                     amdgpu_bo_unref(&bo);
>>>> +                     return r;
>>>> +             }
>>>> +     }
>>>> +     adev->uvd.ib_bo = bo;
>>>> +     amdgpu_bo_unreserve(bo);
>>>>
>>>>          switch (adev->asic_type) {
>>>>          case CHIP_TONGA:
>>>> @@ -342,6 +369,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
>>>>                  for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
>>>>                          amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
>>>>          }
>>>> +     amdgpu_bo_free_kernel(&adev->uvd.ib_bo, NULL, NULL);
>>>>          release_firmware(adev->uvd.fw);
>>>>
>>>>          return 0;
>>>> @@ -1080,23 +1108,10 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>>>          unsigned offset_idx = 0;
>>>>          unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
>>>>
>>>> -     amdgpu_bo_kunmap(bo);
>>>> -     amdgpu_bo_unpin(bo);
>>>> -
>>>> -     if (!ring->adev->uvd.address_64_bit) {
>>>> -             struct ttm_operation_ctx ctx = { true, false };
>>>> -
>>>> -             amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
>>>> -             amdgpu_uvd_force_into_uvd_segment(bo);
>>>> -             r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
>>>> -             if (r)
>>>> -                     goto err;
>>>> -     }
>>>> -
>>>>          r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
>>>>                                       AMDGPU_IB_POOL_DELAYED, &job);
>>>>          if (r)
>>>> -             goto err;
>>>> +             return r;
>>>>
>>>>          if (adev->asic_type >= CHIP_VEGA10) {
>>>>                  offset_idx = 1 + ring->me;
>>>> @@ -1148,8 +1163,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>>>          }
>>>>
>>>>          amdgpu_bo_fence(bo, f, false);
>>>> -     amdgpu_bo_unreserve(bo);
>>>> -     amdgpu_bo_unref(&bo);
>>>>
>>>>          if (fence)
>>>>                  *fence = dma_fence_get(f);
>>>> @@ -1159,10 +1172,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>>>
>>>>      err_free:
>>>>          amdgpu_job_free(job);
>>>> -
>>>> -err:
>>>> -     amdgpu_bo_unreserve(bo);
>>>> -     amdgpu_bo_unref(&bo);
>>>>          return r;
>>>>      }
>>>>
>>>> @@ -1173,16 +1182,15 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>>>>                                struct dma_fence **fence)
>>>>      {
>>>>          struct amdgpu_device *adev = ring->adev;
>>>> -     struct amdgpu_bo *bo = NULL;
>>>> +     struct amdgpu_bo *bo = adev->uvd.ib_bo;
>>>>          uint32_t *msg;
>>>>          int r, i;
>>>>
>>>> -     r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
>>>> -                                   AMDGPU_GEM_DOMAIN_GTT,
>>>> -                                   &bo, NULL, (void **)&msg);
>>>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>>>          if (r)
>>>>                  return r;
>>>>
>>>> +     msg = amdgpu_bo_kptr(bo);
>>>>          /* stitch together an UVD create msg */
>>>>          msg[0] = cpu_to_le32(0x00000de4);
>>>>          msg[1] = cpu_to_le32(0x00000000);
>>>> @@ -1198,23 +1206,25 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>>>>          for (i = 11; i < 1024; ++i)
>>>>                  msg[i] = cpu_to_le32(0x0);
>>>>
>>>> -     return amdgpu_uvd_send_msg(ring, bo, true, fence);
>>>> +     r = amdgpu_uvd_send_msg(ring, bo, true, fence);
>>>> +
>>>> +     amdgpu_bo_unreserve(bo);
>>>> +     return r;
>>>>      }
>>>>
>>>>      int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>>>>                                 bool direct, struct dma_fence **fence)
>>>>      {
>>>>          struct amdgpu_device *adev = ring->adev;
>>>> -     struct amdgpu_bo *bo = NULL;
>>>> +     struct amdgpu_bo *bo = adev->uvd.ib_bo;
>>>>          uint32_t *msg;
>>>>          int r, i;
>>>>
>>>> -     r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
>>>> -                                   AMDGPU_GEM_DOMAIN_GTT,
>>>> -                                   &bo, NULL, (void **)&msg);
>>>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>> Please use amdgpu_bo_reserve() here and elsewhere as well just to be on
>>> the clean side.
>>>
>>> Lockdep will sooner or later complain that we reserve a BO in the reset
>>> path, but that is mostly a theoretical problem and existed before. So
>>> I'm fine with sticking with that for now cause the problem was there
>>> before as well.
>>>
>>> It's just that trylock might not work because the BO is busy with
>>> indirect submission.
>>>
>>> Apart from those two minor issues the patch looks good to me.
>>>
>>> Thanks,
>>> Christian.
>>>
>>>>          if (r)
>>>>                  return r;
>>>>
>>>> +     msg = amdgpu_bo_kptr(bo);
>>>>          /* stitch together an UVD destroy msg */
>>>>          msg[0] = cpu_to_le32(0x00000de4);
>>>>          msg[1] = cpu_to_le32(0x00000002);
>>>> @@ -1223,7 +1233,10 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>>>>          for (i = 4; i < 1024; ++i)
>>>>                  msg[i] = cpu_to_le32(0x0);
>>>>
>>>> -     return amdgpu_uvd_send_msg(ring, bo, direct, fence);
>>>> +     r = amdgpu_uvd_send_msg(ring, bo, direct, fence);
>>>> +
>>>> +     amdgpu_bo_unreserve(bo);
>>>> +     return r;
>>>>      }
>>>>
>>>>      static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>>>> index edbb8194ee81..76ac9699885d 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>>>> @@ -68,6 +68,7 @@ struct amdgpu_uvd {
>>>>          /* store image width to adjust nb memory state */
>>>>          unsigned                decode_image_width;
>>>>          uint32_t                keyselect;
>>>> +     struct amdgpu_bo        *ib_bo;
>>>>      };
>>>>
>>>>      int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>>> index bc571833632e..301c0cea7164 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>>> @@ -332,12 +332,10 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
>>>>      static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>>      {
>>>>          struct dma_fence *fence = NULL;
>>>> -     struct amdgpu_bo *bo = NULL;
>>>> +     struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
>>>>          long r;
>>>>
>>>> -     r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
>>>> -                                   AMDGPU_GEM_DOMAIN_VRAM,
>>>> -                                   &bo, NULL, NULL);
>>>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>>>          if (r)
>>>>                  return r;
>>>>
>>>> @@ -357,9 +355,7 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>>
>>>>      error:
>>>>          dma_fence_put(fence);
>>>> -     amdgpu_bo_unpin(bo);
>>>>          amdgpu_bo_unreserve(bo);
>>>> -     amdgpu_bo_unref(&bo);
>>>>          return r;
>>>>      }
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>>>> index b6e82d75561f..efa270288029 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>>>> @@ -338,12 +338,10 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl
>>>>      static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>>      {
>>>>          struct dma_fence *fence = NULL;
>>>> -     struct amdgpu_bo *bo = NULL;
>>>> +     struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
>>>>          long r;
>>>>
>>>> -     r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
>>>> -                                   AMDGPU_GEM_DOMAIN_VRAM,
>>>> -                                   &bo, NULL, NULL);
>>>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>>>          if (r)
>>>>                  return r;
>>>>
>>>> @@ -363,9 +361,7 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>>
>>>>      error:
>>>>          dma_fence_put(fence);
>>>> -     amdgpu_bo_unpin(bo);
>>>>          amdgpu_bo_unreserve(bo);
>>>> -     amdgpu_bo_unref(&bo);
>>>>          return r;
>>>>      }
>>>>


^ permalink raw reply	[flat|nested] 18+ messages in thread

* 回复: 回复: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
  2021-09-10 11:10               ` Christian König
@ 2021-09-10 11:48                 ` Pan, Xinhui
  2021-09-10 12:23                   ` Christian König
  0 siblings, 1 reply; 18+ messages in thread
From: Pan, Xinhui @ 2021-09-10 11:48 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx; +Cc: Deucher, Alexander

[AMD Official Use Only]

looks like amdgpu_debugfs_test_ib_show use direct IB submission too.
It parks the ring scheduler thread, and down read the reset_sem to avoid race with gpu reset.
BUT, amdgpu_debugfs_test_ib_show itself could be called in parrel. Need fix it.

________________________________________
发件人: Koenig, Christian <Christian.Koenig@amd.com>
发送时间: 2021年9月10日 19:10
收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander
主题: Re: 回复: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test

Yeah, but that IB test should use the indirect submission through the
scheduler as well.

Otherwise you have IB test and scheduler writing both to the ring buffer
at the same time and potentially corrupting it.

Christian.

Am 10.09.21 um 12:10 schrieb Pan, Xinhui:
> [AMD Official Use Only]
>
> we need take this lock.
> IB test can be triggered through debugfs. Recent days I usually test it by cat gpu recovery and amdgpu_test_ib in debugfs.
>
>
> ________________________________________
> 发件人: Koenig, Christian <Christian.Koenig@amd.com>
> 发送时间: 2021年9月10日 18:02
> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
> 抄送: Deucher, Alexander
> 主题: Re: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
>
> *sigh* yeah, you are probably right. Wouldn't be to simple if this would
> be easy, doesn't it?
>
> In this case you can also skip taking the reservation lock for the
> pre-allocated BO, since there should absolutely be only one user at a time.
>
> Christian.
>
> Am 10.09.21 um 11:42 schrieb Pan, Xinhui:
>> [AMD Official Use Only]
>>
>> oh, god. uvd free handler submit delayed msg which depends on scheduler with reservation lock hold.
>> This is not allowed as commit c8e42d57859d "drm/amdgpu: implement more ib pools (v2)" says
>> Any jobs which schedule IBs without dependence on gpu scheduler should use DIRECT pool.
>>
>> Looks like we should only use reserved BO for direct IB submission.
>> As for delayed IB submission, we could alloc a new one dynamicly.
>> ________________________________________
>> 发件人: Koenig, Christian <Christian.Koenig@amd.com>
>> 发送时间: 2021年9月10日 16:53
>> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
>> 抄送: Deucher, Alexander
>> 主题: Re: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
>>
>> It should not unless we are OOM which should not happen during driver
>> initialization.
>>
>> But there is another problem I'm thinking about: Do we still use UVD IB
>> submissions to forcefully tear down UVD sessions when a process crashes?
>>
>> If yes that stuff could easily deadlock with an IB test executed during
>> GPU reset.
>>
>> Christian.
>>
>> Am 10.09.21 um 10:18 schrieb Pan, Xinhui:
>>> [AMD Official Use Only]
>>>
>>> I am wondering if amdgpu_bo_pin would change BO's placement in the futrue.
>>> For now, the new placement is calculated by new = old ∩ new.
>>>
>>> ________________________________________
>>> 发件人: Koenig, Christian <Christian.Koenig@amd.com>
>>> 发送时间: 2021年9月10日 14:24
>>> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
>>> 抄送: Deucher, Alexander
>>> 主题: Re: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
>>>
>>> Am 10.09.21 um 02:38 schrieb xinhui pan:
>>>> move BO allocation in sw_init.
>>>>
>>>> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
>>>> ---
>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 75 +++++++++++++++----------
>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |  1 +
>>>>      drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   |  8 +--
>>>>      drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c   |  8 +--
>>>>      4 files changed, 49 insertions(+), 43 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>> index d451c359606a..e2eaac941d37 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>> @@ -141,6 +141,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>>>>          const char *fw_name;
>>>>          const struct common_firmware_header *hdr;
>>>>          unsigned family_id;
>>>> +     struct amdgpu_bo *bo = NULL;
>>>> +     void *addr;
>>>>          int i, j, r;
>>>>
>>>>          INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
>>>> @@ -298,9 +300,34 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>>>>                  adev->uvd.filp[i] = NULL;
>>>>          }
>>>>
>>>> +     r = amdgpu_bo_create_reserved(adev, 128 << 10, PAGE_SIZE,
>>>> +                     AMDGPU_GEM_DOMAIN_GTT,
>>>> +                     &bo, NULL, &addr);
>>>> +     if (r)
>>>> +             return r;
>>>> +
>>>>          /* from uvd v5.0 HW addressing capacity increased to 64 bits */
>>>> -     if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
>>>> +     if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) {
>>>>                  adev->uvd.address_64_bit = true;
>>>> +             amdgpu_bo_kunmap(bo);
>>>> +             amdgpu_bo_unpin(bo);
>>>> +             r = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_VRAM,
>>>> +                             0, 256 << 20);
>>> Please keep using amdgpu_uvd_force_into_uvd_segment() and validate here,
>>> cause I want to remove amdgpu_bo_pin_restricted() sooner or later.
>>>
>>>> +             if (r) {
>>>> +                     amdgpu_bo_unreserve(bo);
>>>> +                     amdgpu_bo_unref(&bo);
>>>> +                     return r;
>>>> +             }
>>>> +             r = amdgpu_bo_kmap(bo, &addr);
>>>> +             if (r) {
>>>> +                     amdgpu_bo_unpin(bo);
>>>> +                     amdgpu_bo_unreserve(bo);
>>>> +                     amdgpu_bo_unref(&bo);
>>>> +                     return r;
>>>> +             }
>>>> +     }
>>>> +     adev->uvd.ib_bo = bo;
>>>> +     amdgpu_bo_unreserve(bo);
>>>>
>>>>          switch (adev->asic_type) {
>>>>          case CHIP_TONGA:
>>>> @@ -342,6 +369,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
>>>>                  for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
>>>>                          amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
>>>>          }
>>>> +     amdgpu_bo_free_kernel(&adev->uvd.ib_bo, NULL, NULL);
>>>>          release_firmware(adev->uvd.fw);
>>>>
>>>>          return 0;
>>>> @@ -1080,23 +1108,10 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>>>          unsigned offset_idx = 0;
>>>>          unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
>>>>
>>>> -     amdgpu_bo_kunmap(bo);
>>>> -     amdgpu_bo_unpin(bo);
>>>> -
>>>> -     if (!ring->adev->uvd.address_64_bit) {
>>>> -             struct ttm_operation_ctx ctx = { true, false };
>>>> -
>>>> -             amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
>>>> -             amdgpu_uvd_force_into_uvd_segment(bo);
>>>> -             r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
>>>> -             if (r)
>>>> -                     goto err;
>>>> -     }
>>>> -
>>>>          r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
>>>>                                       AMDGPU_IB_POOL_DELAYED, &job);
>>>>          if (r)
>>>> -             goto err;
>>>> +             return r;
>>>>
>>>>          if (adev->asic_type >= CHIP_VEGA10) {
>>>>                  offset_idx = 1 + ring->me;
>>>> @@ -1148,8 +1163,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>>>          }
>>>>
>>>>          amdgpu_bo_fence(bo, f, false);
>>>> -     amdgpu_bo_unreserve(bo);
>>>> -     amdgpu_bo_unref(&bo);
>>>>
>>>>          if (fence)
>>>>                  *fence = dma_fence_get(f);
>>>> @@ -1159,10 +1172,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>>>
>>>>      err_free:
>>>>          amdgpu_job_free(job);
>>>> -
>>>> -err:
>>>> -     amdgpu_bo_unreserve(bo);
>>>> -     amdgpu_bo_unref(&bo);
>>>>          return r;
>>>>      }
>>>>
>>>> @@ -1173,16 +1182,15 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>>>>                                struct dma_fence **fence)
>>>>      {
>>>>          struct amdgpu_device *adev = ring->adev;
>>>> -     struct amdgpu_bo *bo = NULL;
>>>> +     struct amdgpu_bo *bo = adev->uvd.ib_bo;
>>>>          uint32_t *msg;
>>>>          int r, i;
>>>>
>>>> -     r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
>>>> -                                   AMDGPU_GEM_DOMAIN_GTT,
>>>> -                                   &bo, NULL, (void **)&msg);
>>>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>>>          if (r)
>>>>                  return r;
>>>>
>>>> +     msg = amdgpu_bo_kptr(bo);
>>>>          /* stitch together an UVD create msg */
>>>>          msg[0] = cpu_to_le32(0x00000de4);
>>>>          msg[1] = cpu_to_le32(0x00000000);
>>>> @@ -1198,23 +1206,25 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>>>>          for (i = 11; i < 1024; ++i)
>>>>                  msg[i] = cpu_to_le32(0x0);
>>>>
>>>> -     return amdgpu_uvd_send_msg(ring, bo, true, fence);
>>>> +     r = amdgpu_uvd_send_msg(ring, bo, true, fence);
>>>> +
>>>> +     amdgpu_bo_unreserve(bo);
>>>> +     return r;
>>>>      }
>>>>
>>>>      int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>>>>                                 bool direct, struct dma_fence **fence)
>>>>      {
>>>>          struct amdgpu_device *adev = ring->adev;
>>>> -     struct amdgpu_bo *bo = NULL;
>>>> +     struct amdgpu_bo *bo = adev->uvd.ib_bo;
>>>>          uint32_t *msg;
>>>>          int r, i;
>>>>
>>>> -     r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
>>>> -                                   AMDGPU_GEM_DOMAIN_GTT,
>>>> -                                   &bo, NULL, (void **)&msg);
>>>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>> Please use amdgpu_bo_reserve() here and elsewhere as well just to be on
>>> the clean side.
>>>
>>> Lockdep will sooner or later complain that we reserve a BO in the reset
>>> path, but that is mostly a theoretical problem and existed before. So
>>> I'm fine with sticking with that for now cause the problem was there
>>> before as well.
>>>
>>> It's just that trylock might not work because the BO is busy with
>>> indirect submission.
>>>
>>> Apart from those two minor issues the patch looks good to me.
>>>
>>> Thanks,
>>> Christian.
>>>
>>>>          if (r)
>>>>                  return r;
>>>>
>>>> +     msg = amdgpu_bo_kptr(bo);
>>>>          /* stitch together an UVD destroy msg */
>>>>          msg[0] = cpu_to_le32(0x00000de4);
>>>>          msg[1] = cpu_to_le32(0x00000002);
>>>> @@ -1223,7 +1233,10 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>>>>          for (i = 4; i < 1024; ++i)
>>>>                  msg[i] = cpu_to_le32(0x0);
>>>>
>>>> -     return amdgpu_uvd_send_msg(ring, bo, direct, fence);
>>>> +     r = amdgpu_uvd_send_msg(ring, bo, direct, fence);
>>>> +
>>>> +     amdgpu_bo_unreserve(bo);
>>>> +     return r;
>>>>      }
>>>>
>>>>      static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>>>> index edbb8194ee81..76ac9699885d 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>>>> @@ -68,6 +68,7 @@ struct amdgpu_uvd {
>>>>          /* store image width to adjust nb memory state */
>>>>          unsigned                decode_image_width;
>>>>          uint32_t                keyselect;
>>>> +     struct amdgpu_bo        *ib_bo;
>>>>      };
>>>>
>>>>      int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>>> index bc571833632e..301c0cea7164 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>>> @@ -332,12 +332,10 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
>>>>      static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>>      {
>>>>          struct dma_fence *fence = NULL;
>>>> -     struct amdgpu_bo *bo = NULL;
>>>> +     struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
>>>>          long r;
>>>>
>>>> -     r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
>>>> -                                   AMDGPU_GEM_DOMAIN_VRAM,
>>>> -                                   &bo, NULL, NULL);
>>>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>>>          if (r)
>>>>                  return r;
>>>>
>>>> @@ -357,9 +355,7 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>>
>>>>      error:
>>>>          dma_fence_put(fence);
>>>> -     amdgpu_bo_unpin(bo);
>>>>          amdgpu_bo_unreserve(bo);
>>>> -     amdgpu_bo_unref(&bo);
>>>>          return r;
>>>>      }
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>>>> index b6e82d75561f..efa270288029 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>>>> @@ -338,12 +338,10 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl
>>>>      static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>>      {
>>>>          struct dma_fence *fence = NULL;
>>>> -     struct amdgpu_bo *bo = NULL;
>>>> +     struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
>>>>          long r;
>>>>
>>>> -     r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
>>>> -                                   AMDGPU_GEM_DOMAIN_VRAM,
>>>> -                                   &bo, NULL, NULL);
>>>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>>>          if (r)
>>>>                  return r;
>>>>
>>>> @@ -363,9 +361,7 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>>
>>>>      error:
>>>>          dma_fence_put(fence);
>>>> -     amdgpu_bo_unpin(bo);
>>>>          amdgpu_bo_unreserve(bo);
>>>> -     amdgpu_bo_unref(&bo);
>>>>          return r;
>>>>      }
>>>>


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: 回复: 回复: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
  2021-09-10 11:48                 ` 回复: " Pan, Xinhui
@ 2021-09-10 12:23                   ` Christian König
  0 siblings, 0 replies; 18+ messages in thread
From: Christian König @ 2021-09-10 12:23 UTC (permalink / raw)
  To: Pan, Xinhui, amd-gfx; +Cc: Deucher, Alexander

Yeah, that indeed sounds buggy. Probably easiest to just down write the 
reset sem.

Christian.

Am 10.09.21 um 13:48 schrieb Pan, Xinhui:
> [AMD Official Use Only]
>
> looks like amdgpu_debugfs_test_ib_show use direct IB submission too.
> It parks the ring scheduler thread, and down read the reset_sem to avoid race with gpu reset.
> BUT, amdgpu_debugfs_test_ib_show itself could be called in parrel. Need fix it.
>
> ________________________________________
> 发件人: Koenig, Christian <Christian.Koenig@amd.com>
> 发送时间: 2021年9月10日 19:10
> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
> 抄送: Deucher, Alexander
> 主题: Re: 回复: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
>
> Yeah, but that IB test should use the indirect submission through the
> scheduler as well.
>
> Otherwise you have IB test and scheduler writing both to the ring buffer
> at the same time and potentially corrupting it.
>
> Christian.
>
> Am 10.09.21 um 12:10 schrieb Pan, Xinhui:
>> [AMD Official Use Only]
>>
>> we need take this lock.
>> IB test can be triggered through debugfs. Recent days I usually test it by cat gpu recovery and amdgpu_test_ib in debugfs.
>>
>>
>> ________________________________________
>> 发件人: Koenig, Christian <Christian.Koenig@amd.com>
>> 发送时间: 2021年9月10日 18:02
>> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
>> 抄送: Deucher, Alexander
>> 主题: Re: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
>>
>> *sigh* yeah, you are probably right. Wouldn't be to simple if this would
>> be easy, doesn't it?
>>
>> In this case you can also skip taking the reservation lock for the
>> pre-allocated BO, since there should absolutely be only one user at a time.
>>
>> Christian.
>>
>> Am 10.09.21 um 11:42 schrieb Pan, Xinhui:
>>> [AMD Official Use Only]
>>>
>>> oh, god. uvd free handler submit delayed msg which depends on scheduler with reservation lock hold.
>>> This is not allowed as commit c8e42d57859d "drm/amdgpu: implement more ib pools (v2)" says
>>> Any jobs which schedule IBs without dependence on gpu scheduler should use DIRECT pool.
>>>
>>> Looks like we should only use reserved BO for direct IB submission.
>>> As for delayed IB submission, we could alloc a new one dynamicly.
>>> ________________________________________
>>> 发件人: Koenig, Christian <Christian.Koenig@amd.com>
>>> 发送时间: 2021年9月10日 16:53
>>> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
>>> 抄送: Deucher, Alexander
>>> 主题: Re: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
>>>
>>> It should not unless we are OOM which should not happen during driver
>>> initialization.
>>>
>>> But there is another problem I'm thinking about: Do we still use UVD IB
>>> submissions to forcefully tear down UVD sessions when a process crashes?
>>>
>>> If yes that stuff could easily deadlock with an IB test executed during
>>> GPU reset.
>>>
>>> Christian.
>>>
>>> Am 10.09.21 um 10:18 schrieb Pan, Xinhui:
>>>> [AMD Official Use Only]
>>>>
>>>> I am wondering if amdgpu_bo_pin would change BO's placement in the futrue.
>>>> For now, the new placement is calculated by new = old ∩ new.
>>>>
>>>> ________________________________________
>>>> 发件人: Koenig, Christian <Christian.Koenig@amd.com>
>>>> 发送时间: 2021年9月10日 14:24
>>>> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
>>>> 抄送: Deucher, Alexander
>>>> 主题: Re: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
>>>>
>>>> Am 10.09.21 um 02:38 schrieb xinhui pan:
>>>>> move BO allocation in sw_init.
>>>>>
>>>>> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
>>>>> ---
>>>>>       drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 75 +++++++++++++++----------
>>>>>       drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |  1 +
>>>>>       drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   |  8 +--
>>>>>       drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c   |  8 +--
>>>>>       4 files changed, 49 insertions(+), 43 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>>> index d451c359606a..e2eaac941d37 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>>> @@ -141,6 +141,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>>>>>           const char *fw_name;
>>>>>           const struct common_firmware_header *hdr;
>>>>>           unsigned family_id;
>>>>> +     struct amdgpu_bo *bo = NULL;
>>>>> +     void *addr;
>>>>>           int i, j, r;
>>>>>
>>>>>           INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
>>>>> @@ -298,9 +300,34 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>>>>>                   adev->uvd.filp[i] = NULL;
>>>>>           }
>>>>>
>>>>> +     r = amdgpu_bo_create_reserved(adev, 128 << 10, PAGE_SIZE,
>>>>> +                     AMDGPU_GEM_DOMAIN_GTT,
>>>>> +                     &bo, NULL, &addr);
>>>>> +     if (r)
>>>>> +             return r;
>>>>> +
>>>>>           /* from uvd v5.0 HW addressing capacity increased to 64 bits */
>>>>> -     if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
>>>>> +     if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) {
>>>>>                   adev->uvd.address_64_bit = true;
>>>>> +             amdgpu_bo_kunmap(bo);
>>>>> +             amdgpu_bo_unpin(bo);
>>>>> +             r = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_VRAM,
>>>>> +                             0, 256 << 20);
>>>> Please keep using amdgpu_uvd_force_into_uvd_segment() and validate here,
>>>> cause I want to remove amdgpu_bo_pin_restricted() sooner or later.
>>>>
>>>>> +             if (r) {
>>>>> +                     amdgpu_bo_unreserve(bo);
>>>>> +                     amdgpu_bo_unref(&bo);
>>>>> +                     return r;
>>>>> +             }
>>>>> +             r = amdgpu_bo_kmap(bo, &addr);
>>>>> +             if (r) {
>>>>> +                     amdgpu_bo_unpin(bo);
>>>>> +                     amdgpu_bo_unreserve(bo);
>>>>> +                     amdgpu_bo_unref(&bo);
>>>>> +                     return r;
>>>>> +             }
>>>>> +     }
>>>>> +     adev->uvd.ib_bo = bo;
>>>>> +     amdgpu_bo_unreserve(bo);
>>>>>
>>>>>           switch (adev->asic_type) {
>>>>>           case CHIP_TONGA:
>>>>> @@ -342,6 +369,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
>>>>>                   for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
>>>>>                           amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
>>>>>           }
>>>>> +     amdgpu_bo_free_kernel(&adev->uvd.ib_bo, NULL, NULL);
>>>>>           release_firmware(adev->uvd.fw);
>>>>>
>>>>>           return 0;
>>>>> @@ -1080,23 +1108,10 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>>>>           unsigned offset_idx = 0;
>>>>>           unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
>>>>>
>>>>> -     amdgpu_bo_kunmap(bo);
>>>>> -     amdgpu_bo_unpin(bo);
>>>>> -
>>>>> -     if (!ring->adev->uvd.address_64_bit) {
>>>>> -             struct ttm_operation_ctx ctx = { true, false };
>>>>> -
>>>>> -             amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
>>>>> -             amdgpu_uvd_force_into_uvd_segment(bo);
>>>>> -             r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
>>>>> -             if (r)
>>>>> -                     goto err;
>>>>> -     }
>>>>> -
>>>>>           r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
>>>>>                                        AMDGPU_IB_POOL_DELAYED, &job);
>>>>>           if (r)
>>>>> -             goto err;
>>>>> +             return r;
>>>>>
>>>>>           if (adev->asic_type >= CHIP_VEGA10) {
>>>>>                   offset_idx = 1 + ring->me;
>>>>> @@ -1148,8 +1163,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>>>>           }
>>>>>
>>>>>           amdgpu_bo_fence(bo, f, false);
>>>>> -     amdgpu_bo_unreserve(bo);
>>>>> -     amdgpu_bo_unref(&bo);
>>>>>
>>>>>           if (fence)
>>>>>                   *fence = dma_fence_get(f);
>>>>> @@ -1159,10 +1172,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>>>>
>>>>>       err_free:
>>>>>           amdgpu_job_free(job);
>>>>> -
>>>>> -err:
>>>>> -     amdgpu_bo_unreserve(bo);
>>>>> -     amdgpu_bo_unref(&bo);
>>>>>           return r;
>>>>>       }
>>>>>
>>>>> @@ -1173,16 +1182,15 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>>>>>                                 struct dma_fence **fence)
>>>>>       {
>>>>>           struct amdgpu_device *adev = ring->adev;
>>>>> -     struct amdgpu_bo *bo = NULL;
>>>>> +     struct amdgpu_bo *bo = adev->uvd.ib_bo;
>>>>>           uint32_t *msg;
>>>>>           int r, i;
>>>>>
>>>>> -     r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
>>>>> -                                   AMDGPU_GEM_DOMAIN_GTT,
>>>>> -                                   &bo, NULL, (void **)&msg);
>>>>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>>>>           if (r)
>>>>>                   return r;
>>>>>
>>>>> +     msg = amdgpu_bo_kptr(bo);
>>>>>           /* stitch together an UVD create msg */
>>>>>           msg[0] = cpu_to_le32(0x00000de4);
>>>>>           msg[1] = cpu_to_le32(0x00000000);
>>>>> @@ -1198,23 +1206,25 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>>>>>           for (i = 11; i < 1024; ++i)
>>>>>                   msg[i] = cpu_to_le32(0x0);
>>>>>
>>>>> -     return amdgpu_uvd_send_msg(ring, bo, true, fence);
>>>>> +     r = amdgpu_uvd_send_msg(ring, bo, true, fence);
>>>>> +
>>>>> +     amdgpu_bo_unreserve(bo);
>>>>> +     return r;
>>>>>       }
>>>>>
>>>>>       int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>>>>>                                  bool direct, struct dma_fence **fence)
>>>>>       {
>>>>>           struct amdgpu_device *adev = ring->adev;
>>>>> -     struct amdgpu_bo *bo = NULL;
>>>>> +     struct amdgpu_bo *bo = adev->uvd.ib_bo;
>>>>>           uint32_t *msg;
>>>>>           int r, i;
>>>>>
>>>>> -     r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
>>>>> -                                   AMDGPU_GEM_DOMAIN_GTT,
>>>>> -                                   &bo, NULL, (void **)&msg);
>>>>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>>> Please use amdgpu_bo_reserve() here and elsewhere as well just to be on
>>>> the clean side.
>>>>
>>>> Lockdep will sooner or later complain that we reserve a BO in the reset
>>>> path, but that is mostly a theoretical problem and existed before. So
>>>> I'm fine with sticking with that for now cause the problem was there
>>>> before as well.
>>>>
>>>> It's just that trylock might not work because the BO is busy with
>>>> indirect submission.
>>>>
>>>> Apart from those two minor issues the patch looks good to me.
>>>>
>>>> Thanks,
>>>> Christian.
>>>>
>>>>>           if (r)
>>>>>                   return r;
>>>>>
>>>>> +     msg = amdgpu_bo_kptr(bo);
>>>>>           /* stitch together an UVD destroy msg */
>>>>>           msg[0] = cpu_to_le32(0x00000de4);
>>>>>           msg[1] = cpu_to_le32(0x00000002);
>>>>> @@ -1223,7 +1233,10 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>>>>>           for (i = 4; i < 1024; ++i)
>>>>>                   msg[i] = cpu_to_le32(0x0);
>>>>>
>>>>> -     return amdgpu_uvd_send_msg(ring, bo, direct, fence);
>>>>> +     r = amdgpu_uvd_send_msg(ring, bo, direct, fence);
>>>>> +
>>>>> +     amdgpu_bo_unreserve(bo);
>>>>> +     return r;
>>>>>       }
>>>>>
>>>>>       static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>>>>> index edbb8194ee81..76ac9699885d 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>>>>> @@ -68,6 +68,7 @@ struct amdgpu_uvd {
>>>>>           /* store image width to adjust nb memory state */
>>>>>           unsigned                decode_image_width;
>>>>>           uint32_t                keyselect;
>>>>> +     struct amdgpu_bo        *ib_bo;
>>>>>       };
>>>>>
>>>>>       int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>>>> index bc571833632e..301c0cea7164 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>>>> @@ -332,12 +332,10 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
>>>>>       static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>>>       {
>>>>>           struct dma_fence *fence = NULL;
>>>>> -     struct amdgpu_bo *bo = NULL;
>>>>> +     struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
>>>>>           long r;
>>>>>
>>>>> -     r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
>>>>> -                                   AMDGPU_GEM_DOMAIN_VRAM,
>>>>> -                                   &bo, NULL, NULL);
>>>>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>>>>           if (r)
>>>>>                   return r;
>>>>>
>>>>> @@ -357,9 +355,7 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>>>
>>>>>       error:
>>>>>           dma_fence_put(fence);
>>>>> -     amdgpu_bo_unpin(bo);
>>>>>           amdgpu_bo_unreserve(bo);
>>>>> -     amdgpu_bo_unref(&bo);
>>>>>           return r;
>>>>>       }
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>>>>> index b6e82d75561f..efa270288029 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>>>>> @@ -338,12 +338,10 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl
>>>>>       static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>>>       {
>>>>>           struct dma_fence *fence = NULL;
>>>>> -     struct amdgpu_bo *bo = NULL;
>>>>> +     struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
>>>>>           long r;
>>>>>
>>>>> -     r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
>>>>> -                                   AMDGPU_GEM_DOMAIN_VRAM,
>>>>> -                                   &bo, NULL, NULL);
>>>>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>>>>           if (r)
>>>>>                   return r;
>>>>>
>>>>> @@ -363,9 +361,7 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>>>
>>>>>       error:
>>>>>           dma_fence_put(fence);
>>>>> -     amdgpu_bo_unpin(bo);
>>>>>           amdgpu_bo_unreserve(bo);
>>>>> -     amdgpu_bo_unref(&bo);
>>>>>           return r;
>>>>>       }
>>>>>


^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2021-09-10 12:23 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-10  0:38 [PATCH 1/4] drm/amdgpu: Increase direct IB pool size xinhui pan
2021-09-10  0:38 ` [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test xinhui pan
2021-09-10  6:24   ` Christian König
2021-09-10  8:18     ` 回复: " Pan, Xinhui
2021-09-10  8:53       ` Christian König
2021-09-10  9:42         ` 回复: " Pan, Xinhui
2021-09-10 10:02           ` Christian König
2021-09-10 10:10             ` 回复: " Pan, Xinhui
2021-09-10 11:10               ` Christian König
2021-09-10 11:48                 ` 回复: " Pan, Xinhui
2021-09-10 12:23                   ` Christian König
2021-09-10  0:38 ` [PATCH 3/4] drm/amdgpu: VCE " xinhui pan
2021-09-10  6:29   ` Christian König
2021-09-10  0:38 ` [PATCH 4/4] drm/amdgpu: VCN " xinhui pan
2021-09-10  6:33   ` Christian König
2021-09-10  7:53     ` 回复: " Pan, Xinhui
2021-09-10  7:55       ` Christian König
2021-09-10  6:15 ` [PATCH 1/4] drm/amdgpu: Increase direct IB pool size Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.