All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH] drm/amdgpu: alloc uvd msg from IB pool
@ 2021-09-07 12:20 xinhui pan
  0 siblings, 0 replies; 5+ messages in thread
From: xinhui pan @ 2021-09-07 12:20 UTC (permalink / raw)
  To: amd-gfx; +Cc: christian.koenig, alexander.deucher, xinhui pan

There is one dedicated IB pool for IB test. So lets use it for uvd msg
too.

For some older HW, use one reserved BO at specific range.

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 174 +++++++++++++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |   2 +
 2 files changed, 113 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index d451c359606a..c70b4321b342 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -299,8 +299,35 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 	}
 
 	/* from uvd v5.0 HW addressing capacity increased to 64 bits */
-	if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
+	if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) {
 		adev->uvd.address_64_bit = true;
+	} else {
+		struct amdgpu_bo *bo = NULL;
+
+		r = amdgpu_bo_create_reserved(adev, PAGE_SIZE, PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_VRAM,
+				&bo, NULL, &adev->uvd.ib_bo_cpu_addr);
+		if (r)
+			return r;
+		amdgpu_bo_kunmap(bo);
+		amdgpu_bo_unpin(bo);
+		r = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_VRAM,
+				0, 256 << 20);
+		if (r) {
+			amdgpu_bo_unreserve(bo);
+			amdgpu_bo_unref(&bo);
+			return r;
+		}
+		r = amdgpu_bo_kmap(bo, &adev->uvd.ib_bo_cpu_addr);
+		if (r) {
+			amdgpu_bo_unpin(bo);
+			amdgpu_bo_unreserve(bo);
+			amdgpu_bo_unref(&bo);
+			return r;
+		}
+		adev->uvd.ib_bo = bo;
+		amdgpu_bo_unreserve(bo);
+	}
 
 	switch (adev->asic_type) {
 	case CHIP_TONGA:
@@ -337,6 +364,9 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
 				      &adev->uvd.inst[j].gpu_addr,
 				      (void **)&adev->uvd.inst[j].cpu_addr);
 
+		amdgpu_bo_free_kernel(&adev->uvd.ib_bo, NULL,
+				      (void **)&adev->uvd.ib_bo_cpu_addr);
+
 		amdgpu_ring_fini(&adev->uvd.inst[j].ring);
 
 		for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
@@ -1066,7 +1096,7 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
 	return 0;
 }
 
-static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
+static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, uint64_t addr,
 			       bool direct, struct dma_fence **fence)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -1074,29 +1104,15 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 	struct amdgpu_job *job;
 	struct amdgpu_ib *ib;
 	uint32_t data[4];
-	uint64_t addr;
 	long r;
 	int i;
 	unsigned offset_idx = 0;
 	unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
 
-	amdgpu_bo_kunmap(bo);
-	amdgpu_bo_unpin(bo);
-
-	if (!ring->adev->uvd.address_64_bit) {
-		struct ttm_operation_ctx ctx = { true, false };
-
-		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
-		amdgpu_uvd_force_into_uvd_segment(bo);
-		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
-		if (r)
-			goto err;
-	}
-
 	r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
 				     AMDGPU_IB_POOL_DELAYED, &job);
 	if (r)
-		goto err;
+		return r;
 
 	if (adev->asic_type >= CHIP_VEGA10) {
 		offset_idx = 1 + ring->me;
@@ -1110,7 +1126,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 	data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0);
 
 	ib = &job->ibs[0];
-	addr = amdgpu_bo_gpu_offset(bo);
 	ib->ptr[0] = data[0];
 	ib->ptr[1] = addr;
 	ib->ptr[2] = data[1];
@@ -1123,33 +1138,13 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 	}
 	ib->length_dw = 16;
 
-	if (direct) {
-		r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false,
-					  msecs_to_jiffies(10));
-		if (r == 0)
-			r = -ETIMEDOUT;
-		if (r < 0)
-			goto err_free;
-
+	if (direct)
 		r = amdgpu_job_submit_direct(job, ring, &f);
-		if (r)
-			goto err_free;
-	} else {
-		r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.base.resv,
-				     AMDGPU_SYNC_ALWAYS,
-				     AMDGPU_FENCE_OWNER_UNDEFINED);
-		if (r)
-			goto err_free;
-
+	else
 		r = amdgpu_job_submit(job, &adev->uvd.entity,
-				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
-		if (r)
-			goto err_free;
-	}
-
-	amdgpu_bo_fence(bo, f, false);
-	amdgpu_bo_unreserve(bo);
-	amdgpu_bo_unref(&bo);
+				AMDGPU_FENCE_OWNER_UNDEFINED, &f);
+	if (r)
+		goto err_free;
 
 	if (fence)
 		*fence = dma_fence_get(f);
@@ -1159,10 +1154,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 
 err_free:
 	amdgpu_job_free(job);
-
-err:
-	amdgpu_bo_unreserve(bo);
-	amdgpu_bo_unref(&bo);
 	return r;
 }
 
@@ -1173,16 +1164,31 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 			      struct dma_fence **fence)
 {
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_bo *bo = NULL;
+	struct amdgpu_bo *bo = adev->uvd.ib_bo;
+	struct dma_fence *f = NULL;
+	struct amdgpu_ib ib;
 	uint32_t *msg;
 	int r, i;
 
-	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_GTT,
-				      &bo, NULL, (void **)&msg);
-	if (r)
-		return r;
-
+	if (bo) {
+		r = amdgpu_bo_reserve(bo, true);
+		if (r)
+			return r;
+		r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false,
+				msecs_to_jiffies(10));
+		if (r == 0)
+			r = -ETIMEDOUT;
+		if (r < 0)
+			goto err;
+		ib.gpu_addr = amdgpu_bo_gpu_offset(bo);
+		msg = adev->uvd.ib_bo_cpu_addr;
+	} else {
+		memset(&msg, 0, sizeof(msg));
+		r = amdgpu_ib_get(adev, NULL, PAGE_SIZE,
+				AMDGPU_IB_POOL_DIRECT,
+				&ib);
+		msg = ib.ptr;
+	}
 	/* stitch together an UVD create msg */
 	msg[0] = cpu_to_le32(0x00000de4);
 	msg[1] = cpu_to_le32(0x00000000);
@@ -1198,23 +1204,52 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 	for (i = 11; i < 1024; ++i)
 		msg[i] = cpu_to_le32(0x0);
 
-	return amdgpu_uvd_send_msg(ring, bo, true, fence);
+	r = amdgpu_uvd_send_msg(ring, ib.gpu_addr, true, &f);
+	if (r)
+		goto err;
+	if (bo)
+		amdgpu_bo_fence(bo, f, false);
+	else
+		amdgpu_ib_free(adev, &ib, f);
+	if (fence)
+		*fence = dma_fence_get(f);
+	dma_fence_put(f);
+err:
+	if (bo)
+		amdgpu_bo_unreserve(bo);
+	return r;
 }
 
 int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 			       bool direct, struct dma_fence **fence)
 {
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_bo *bo = NULL;
+	struct amdgpu_bo *bo = adev->uvd.ib_bo;
+	struct dma_fence *f = NULL;
+	struct amdgpu_ib ib;
 	uint32_t *msg;
 	int r, i;
 
-	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_GTT,
-				      &bo, NULL, (void **)&msg);
-	if (r)
-		return r;
-
+	if (bo) {
+		r = amdgpu_bo_reserve(bo, true);
+		if (r)
+			return r;
+		r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false,
+				msecs_to_jiffies(10));
+		if (r == 0)
+			r = -ETIMEDOUT;
+		if (r < 0)
+			goto err;
+		ib.gpu_addr = amdgpu_bo_gpu_offset(bo);
+		msg = adev->uvd.ib_bo_cpu_addr;
+	} else {
+		memset(&msg, 0, sizeof(msg));
+		r = amdgpu_ib_get(adev, NULL, PAGE_SIZE,
+				direct ?
+				AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_DELAYED,
+				&ib);
+		msg = ib.ptr;
+	}
 	/* stitch together an UVD destroy msg */
 	msg[0] = cpu_to_le32(0x00000de4);
 	msg[1] = cpu_to_le32(0x00000002);
@@ -1223,7 +1258,20 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 	for (i = 4; i < 1024; ++i)
 		msg[i] = cpu_to_le32(0x0);
 
-	return amdgpu_uvd_send_msg(ring, bo, direct, fence);
+	r = amdgpu_uvd_send_msg(ring, ib.gpu_addr, true, &f);
+	if (r)
+		goto err;
+	if (bo)
+		amdgpu_bo_fence(bo, f, false);
+	else
+		amdgpu_ib_free(adev, &ib, f);
+	if (fence)
+		*fence = dma_fence_get(f);
+	dma_fence_put(f);
+err:
+	if (bo)
+		amdgpu_bo_unreserve(bo);
+	return r;
 }
 
 static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
index edbb8194ee81..3ff49daf558c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -68,6 +68,8 @@ struct amdgpu_uvd {
 	/* store image width to adjust nb memory state */
 	unsigned		decode_image_width;
 	uint32_t                keyselect;
+	struct amdgpu_bo	*ib_bo;
+	void			*ib_bo_cpu_addr;
 };
 
 int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [RFC PATCH] drm/amdgpu: alloc uvd msg from IB pool
  2021-09-07  7:22 xinhui pan
@ 2021-09-07  7:32 ` Christian König
  0 siblings, 0 replies; 5+ messages in thread
From: Christian König @ 2021-09-07  7:32 UTC (permalink / raw)
  To: xinhui pan, amd-gfx; +Cc: christian.koenig

Am 07.09.21 um 09:22 schrieb xinhui pan:
> There is one dedicated IB pool for IB test. So lets use it for uvd msg
> too.
>
> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 223 +++++++++++++-----------
>   1 file changed, 126 insertions(+), 97 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> index d451c359606a..29fbe976a300 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> @@ -1066,37 +1066,65 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
>   	return 0;
>   }
>   
> -static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
> -			       bool direct, struct dma_fence **fence)
> +typedef void (*uvd_msg_cb) (struct amdgpu_ring *ring, uint32_t handle,
> +		struct amdgpu_ib *ib, uint32_t *msg, uint64_t msg_gpu_addr);

Please no typedef here, try to avoid the callback altogether if possible.

Apart from that this doesn't handle older UVD hardware correctly, we 
still allocate BOs in VRAM for that as well.

Christian.

> +
> +static void amdgpu_uvd_get_create_msg_cb(struct amdgpu_ring *ring, uint32_t handle,
> +		struct amdgpu_ib *ib, uint32_t *msg, uint64_t addr)
>   {
>   	struct amdgpu_device *adev = ring->adev;
> -	struct dma_fence *f = NULL;
> -	struct amdgpu_job *job;
> -	struct amdgpu_ib *ib;
>   	uint32_t data[4];
> -	uint64_t addr;
> -	long r;
> +	unsigned int offset_idx = 0;
> +	unsigned int offset[3] = { UVD_BASE_SI, 0, 0 };
>   	int i;
> -	unsigned offset_idx = 0;
> -	unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
>   
> -	amdgpu_bo_kunmap(bo);
> -	amdgpu_bo_unpin(bo);
> +	if (adev->asic_type >= CHIP_VEGA10) {
> +		offset_idx = 1 + ring->me;
> +		offset[1] = adev->reg_offset[UVD_HWIP][0][1];
> +		offset[2] = adev->reg_offset[UVD_HWIP][1][1];
> +	}
>   
> -	if (!ring->adev->uvd.address_64_bit) {
> -		struct ttm_operation_ctx ctx = { true, false };
> +	data[0] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA0, 0);
> +	data[1] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA1, 0);
> +	data[2] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_CMD, 0);
> +	data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0);
>   
> -		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
> -		amdgpu_uvd_force_into_uvd_segment(bo);
> -		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
> -		if (r)
> -			goto err;
> +	ib->ptr[0] = data[0];
> +	ib->ptr[1] = addr;
> +	ib->ptr[2] = data[1];
> +	ib->ptr[3] = addr >> 32;
> +	ib->ptr[4] = data[2];
> +	ib->ptr[5] = 0;
> +	for (i = 6; i < 16; i += 2) {
> +		ib->ptr[i] = data[3];
> +		ib->ptr[i+1] = 0;
>   	}
> +	ib->length_dw = 16;
>   
> -	r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
> -				     AMDGPU_IB_POOL_DELAYED, &job);
> -	if (r)
> -		goto err;
> +	/* stitch together an UVD create msg */
> +	msg[0] = cpu_to_le32(0x00000de4);
> +	msg[1] = cpu_to_le32(0x00000000);
> +	msg[2] = cpu_to_le32(handle);
> +	msg[3] = cpu_to_le32(0x00000000);
> +	msg[4] = cpu_to_le32(0x00000000);
> +	msg[5] = cpu_to_le32(0x00000000);
> +	msg[6] = cpu_to_le32(0x00000000);
> +	msg[7] = cpu_to_le32(0x00000780);
> +	msg[8] = cpu_to_le32(0x00000440);
> +	msg[9] = cpu_to_le32(0x00000000);
> +	msg[10] = cpu_to_le32(0x01b37000);
> +	for (i = 11; i < 1024; ++i)
> +		msg[i] = cpu_to_le32(0x0);
> +}
> +
> +static void amdgpu_uvd_get_destroy_msg_cb(struct amdgpu_ring *ring, uint32_t handle,
> +		struct amdgpu_ib *ib, uint32_t *msg, uint64_t addr)
> +{
> +	struct amdgpu_device *adev = ring->adev;
> +	uint32_t data[4];
> +	unsigned int offset_idx = 0;
> +	unsigned int offset[3] = { UVD_BASE_SI, 0, 0 };
> +	int i;
>   
>   	if (adev->asic_type >= CHIP_VEGA10) {
>   		offset_idx = 1 + ring->me;
> @@ -1109,8 +1137,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>   	data[2] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_CMD, 0);
>   	data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0);
>   
> -	ib = &job->ibs[0];
> -	addr = amdgpu_bo_gpu_offset(bo);
>   	ib->ptr[0] = data[0];
>   	ib->ptr[1] = addr;
>   	ib->ptr[2] = data[1];
> @@ -1123,46 +1149,92 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>   	}
>   	ib->length_dw = 16;
>   
> -	if (direct) {
> +	/* stitch together an UVD destroy msg */
> +	msg[0] = cpu_to_le32(0x00000de4);
> +	msg[1] = cpu_to_le32(0x00000002);
> +	msg[2] = cpu_to_le32(handle);
> +	msg[3] = cpu_to_le32(0x00000000);
> +	for (i = 4; i < 1024; ++i)
> +		msg[i] = cpu_to_le32(0x0);
> +}
> +
> +static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, bool direct,
> +		uvd_msg_cb cb, uint32_t handle, uint32_t ib_size,
> +		uint32_t msg_size, struct dma_fence **fence)
> +{
> +	struct amdgpu_device *adev = ring->adev;
> +	struct dma_fence *f = NULL;
> +	struct amdgpu_job *job;
> +	struct amdgpu_bo *bo = NULL;
> +	struct amdgpu_ib msg;
> +	int r;
> +
> +	if (!ring->adev->uvd.address_64_bit) {
> +		struct ttm_operation_ctx ctx = { true, false };
> +
> +		r = amdgpu_bo_create_reserved(adev, msg_size, PAGE_SIZE,
> +				AMDGPU_GEM_DOMAIN_VRAM,
> +				&bo, NULL, (void **)&msg.ptr);
> +		if (r)
> +			return r;
> +		amdgpu_bo_kunmap(bo);
> +		amdgpu_bo_unpin(bo);
> +		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
> +		amdgpu_uvd_force_into_uvd_segment(bo);
> +		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
> +		if (r)
> +			goto error;
>   		r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false,
> -					  msecs_to_jiffies(10));
> +				msecs_to_jiffies(10));
>   		if (r == 0)
>   			r = -ETIMEDOUT;
>   		if (r < 0)
> -			goto err_free;
> -
> -		r = amdgpu_job_submit_direct(job, ring, &f);
> -		if (r)
> -			goto err_free;
> +			goto error;
>   	} else {
> -		r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.base.resv,
> -				     AMDGPU_SYNC_ALWAYS,
> -				     AMDGPU_FENCE_OWNER_UNDEFINED);
> +		memset(&msg, 0, sizeof(msg));
> +		r = amdgpu_ib_get(adev, NULL, PAGE_ALIGN(msg_size),
> +				direct ?
> +				AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_DELAYED,
> +				&msg);
>   		if (r)
> -			goto err_free;
> +			goto error;
> +	}
>   
> +	r = amdgpu_job_alloc_with_ib(adev, ib_size,
> +			direct ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_DELAYED,
> +			&job);
> +	if (r)
> +		goto error;
> +
> +	cb(ring, handle, &(job->ibs[0]), msg.ptr,
> +			bo ? amdgpu_bo_gpu_offset(bo) : msg.gpu_addr);
> +
> +	if (direct)
> +		r = amdgpu_job_submit_direct(job, ring, &f);
> +	else
>   		r = amdgpu_job_submit(job, &adev->uvd.entity,
>   				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
> -		if (r)
> -			goto err_free;
> +	if (r) {
> +		amdgpu_job_free(job);
> +		goto error;
>   	}
>   
> -	amdgpu_bo_fence(bo, f, false);
> -	amdgpu_bo_unreserve(bo);
> -	amdgpu_bo_unref(&bo);
> +	if (bo)
> +		amdgpu_bo_fence(bo, f, false);
> +	else
> +		amdgpu_ib_free(adev, &msg, f);
>   
>   	if (fence)
>   		*fence = dma_fence_get(f);
>   	dma_fence_put(f);
> -
> -	return 0;
> -
> -err_free:
> -	amdgpu_job_free(job);
> -
> -err:
> -	amdgpu_bo_unreserve(bo);
> -	amdgpu_bo_unref(&bo);
> +error:
> +	if (bo) {
> +		amdgpu_bo_unreserve(bo);
> +		amdgpu_bo_unref(&bo);
> +	} else {
> +		if (r)
> +			amdgpu_ib_free(adev, &msg, NULL);
> +	}
>   	return r;
>   }
>   
> @@ -1172,58 +1244,15 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>   int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>   			      struct dma_fence **fence)
>   {
> -	struct amdgpu_device *adev = ring->adev;
> -	struct amdgpu_bo *bo = NULL;
> -	uint32_t *msg;
> -	int r, i;
> -
> -	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
> -				      AMDGPU_GEM_DOMAIN_GTT,
> -				      &bo, NULL, (void **)&msg);
> -	if (r)
> -		return r;
> -
> -	/* stitch together an UVD create msg */
> -	msg[0] = cpu_to_le32(0x00000de4);
> -	msg[1] = cpu_to_le32(0x00000000);
> -	msg[2] = cpu_to_le32(handle);
> -	msg[3] = cpu_to_le32(0x00000000);
> -	msg[4] = cpu_to_le32(0x00000000);
> -	msg[5] = cpu_to_le32(0x00000000);
> -	msg[6] = cpu_to_le32(0x00000000);
> -	msg[7] = cpu_to_le32(0x00000780);
> -	msg[8] = cpu_to_le32(0x00000440);
> -	msg[9] = cpu_to_le32(0x00000000);
> -	msg[10] = cpu_to_le32(0x01b37000);
> -	for (i = 11; i < 1024; ++i)
> -		msg[i] = cpu_to_le32(0x0);
> -
> -	return amdgpu_uvd_send_msg(ring, bo, true, fence);
> +	return amdgpu_uvd_send_msg(ring, true, amdgpu_uvd_get_create_msg_cb,
> +			handle, 64, PAGE_SIZE, fence);
>   }
>   
>   int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
> -			       bool direct, struct dma_fence **fence)
> +			      bool direct, struct dma_fence **fence)
>   {
> -	struct amdgpu_device *adev = ring->adev;
> -	struct amdgpu_bo *bo = NULL;
> -	uint32_t *msg;
> -	int r, i;
> -
> -	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
> -				      AMDGPU_GEM_DOMAIN_GTT,
> -				      &bo, NULL, (void **)&msg);
> -	if (r)
> -		return r;
> -
> -	/* stitch together an UVD destroy msg */
> -	msg[0] = cpu_to_le32(0x00000de4);
> -	msg[1] = cpu_to_le32(0x00000002);
> -	msg[2] = cpu_to_le32(handle);
> -	msg[3] = cpu_to_le32(0x00000000);
> -	for (i = 4; i < 1024; ++i)
> -		msg[i] = cpu_to_le32(0x0);
> -
> -	return amdgpu_uvd_send_msg(ring, bo, direct, fence);
> +	return amdgpu_uvd_send_msg(ring, direct, amdgpu_uvd_get_destroy_msg_cb,
> +			handle, 64, PAGE_SIZE, fence);
>   }
>   
>   static void amdgpu_uvd_idle_work_handler(struct work_struct *work)


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [RFC PATCH] drm/amdgpu: alloc uvd msg from IB pool
@ 2021-09-07  7:28 xinhui pan
  0 siblings, 0 replies; 5+ messages in thread
From: xinhui pan @ 2021-09-07  7:28 UTC (permalink / raw)
  To: amd-gfx; +Cc: xinhui pan

There is one dedicated IB pool for IB test. So lets use it for uvd msg
too.

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 223 +++++++++++++-----------
 1 file changed, 126 insertions(+), 97 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index d451c359606a..29fbe976a300 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -1066,37 +1066,65 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
 	return 0;
 }
 
-static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
-			       bool direct, struct dma_fence **fence)
+typedef void (*uvd_msg_cb) (struct amdgpu_ring *ring, uint32_t handle,
+		struct amdgpu_ib *ib, uint32_t *msg, uint64_t msg_gpu_addr);
+
+static void amdgpu_uvd_get_create_msg_cb(struct amdgpu_ring *ring, uint32_t handle,
+		struct amdgpu_ib *ib, uint32_t *msg, uint64_t addr)
 {
 	struct amdgpu_device *adev = ring->adev;
-	struct dma_fence *f = NULL;
-	struct amdgpu_job *job;
-	struct amdgpu_ib *ib;
 	uint32_t data[4];
-	uint64_t addr;
-	long r;
+	unsigned int offset_idx = 0;
+	unsigned int offset[3] = { UVD_BASE_SI, 0, 0 };
 	int i;
-	unsigned offset_idx = 0;
-	unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
 
-	amdgpu_bo_kunmap(bo);
-	amdgpu_bo_unpin(bo);
+	if (adev->asic_type >= CHIP_VEGA10) {
+		offset_idx = 1 + ring->me;
+		offset[1] = adev->reg_offset[UVD_HWIP][0][1];
+		offset[2] = adev->reg_offset[UVD_HWIP][1][1];
+	}
 
-	if (!ring->adev->uvd.address_64_bit) {
-		struct ttm_operation_ctx ctx = { true, false };
+	data[0] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA0, 0);
+	data[1] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA1, 0);
+	data[2] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_CMD, 0);
+	data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0);
 
-		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
-		amdgpu_uvd_force_into_uvd_segment(bo);
-		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
-		if (r)
-			goto err;
+	ib->ptr[0] = data[0];
+	ib->ptr[1] = addr;
+	ib->ptr[2] = data[1];
+	ib->ptr[3] = addr >> 32;
+	ib->ptr[4] = data[2];
+	ib->ptr[5] = 0;
+	for (i = 6; i < 16; i += 2) {
+		ib->ptr[i] = data[3];
+		ib->ptr[i+1] = 0;
 	}
+	ib->length_dw = 16;
 
-	r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
-				     AMDGPU_IB_POOL_DELAYED, &job);
-	if (r)
-		goto err;
+	/* stitch together an UVD create msg */
+	msg[0] = cpu_to_le32(0x00000de4);
+	msg[1] = cpu_to_le32(0x00000000);
+	msg[2] = cpu_to_le32(handle);
+	msg[3] = cpu_to_le32(0x00000000);
+	msg[4] = cpu_to_le32(0x00000000);
+	msg[5] = cpu_to_le32(0x00000000);
+	msg[6] = cpu_to_le32(0x00000000);
+	msg[7] = cpu_to_le32(0x00000780);
+	msg[8] = cpu_to_le32(0x00000440);
+	msg[9] = cpu_to_le32(0x00000000);
+	msg[10] = cpu_to_le32(0x01b37000);
+	for (i = 11; i < 1024; ++i)
+		msg[i] = cpu_to_le32(0x0);
+}
+
+static void amdgpu_uvd_get_destroy_msg_cb(struct amdgpu_ring *ring, uint32_t handle,
+		struct amdgpu_ib *ib, uint32_t *msg, uint64_t addr)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t data[4];
+	unsigned int offset_idx = 0;
+	unsigned int offset[3] = { UVD_BASE_SI, 0, 0 };
+	int i;
 
 	if (adev->asic_type >= CHIP_VEGA10) {
 		offset_idx = 1 + ring->me;
@@ -1109,8 +1137,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 	data[2] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_CMD, 0);
 	data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0);
 
-	ib = &job->ibs[0];
-	addr = amdgpu_bo_gpu_offset(bo);
 	ib->ptr[0] = data[0];
 	ib->ptr[1] = addr;
 	ib->ptr[2] = data[1];
@@ -1123,46 +1149,92 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 	}
 	ib->length_dw = 16;
 
-	if (direct) {
+	/* stitch together an UVD destroy msg */
+	msg[0] = cpu_to_le32(0x00000de4);
+	msg[1] = cpu_to_le32(0x00000002);
+	msg[2] = cpu_to_le32(handle);
+	msg[3] = cpu_to_le32(0x00000000);
+	for (i = 4; i < 1024; ++i)
+		msg[i] = cpu_to_le32(0x0);
+}
+
+static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, bool direct,
+		uvd_msg_cb cb, uint32_t handle, uint32_t ib_size,
+		uint32_t msg_size, struct dma_fence **fence)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct dma_fence *f = NULL;
+	struct amdgpu_job *job;
+	struct amdgpu_bo *bo = NULL;
+	struct amdgpu_ib msg;
+	int r;
+
+	if (!ring->adev->uvd.address_64_bit) {
+		struct ttm_operation_ctx ctx = { true, false };
+
+		r = amdgpu_bo_create_reserved(adev, msg_size, PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_VRAM,
+				&bo, NULL, (void **)&msg.ptr);
+		if (r)
+			return r;
+		amdgpu_bo_kunmap(bo);
+		amdgpu_bo_unpin(bo);
+		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
+		amdgpu_uvd_force_into_uvd_segment(bo);
+		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+		if (r)
+			goto error;
 		r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false,
-					  msecs_to_jiffies(10));
+				msecs_to_jiffies(10));
 		if (r == 0)
 			r = -ETIMEDOUT;
 		if (r < 0)
-			goto err_free;
-
-		r = amdgpu_job_submit_direct(job, ring, &f);
-		if (r)
-			goto err_free;
+			goto error;
 	} else {
-		r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.base.resv,
-				     AMDGPU_SYNC_ALWAYS,
-				     AMDGPU_FENCE_OWNER_UNDEFINED);
+		memset(&msg, 0, sizeof(msg));
+		r = amdgpu_ib_get(adev, NULL, PAGE_ALIGN(msg_size),
+				direct ?
+				AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_DELAYED,
+				&msg);
 		if (r)
-			goto err_free;
+			goto error;
+	}
 
+	r = amdgpu_job_alloc_with_ib(adev, ib_size,
+			direct ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_DELAYED,
+			&job);
+	if (r)
+		goto error;
+
+	cb(ring, handle, &(job->ibs[0]), msg.ptr,
+			bo ? amdgpu_bo_gpu_offset(bo) : msg.gpu_addr);
+
+	if (direct)
+		r = amdgpu_job_submit_direct(job, ring, &f);
+	else
 		r = amdgpu_job_submit(job, &adev->uvd.entity,
 				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
-		if (r)
-			goto err_free;
+	if (r) {
+		amdgpu_job_free(job);
+		goto error;
 	}
 
-	amdgpu_bo_fence(bo, f, false);
-	amdgpu_bo_unreserve(bo);
-	amdgpu_bo_unref(&bo);
+	if (bo)
+		amdgpu_bo_fence(bo, f, false);
+	else
+		amdgpu_ib_free(adev, &msg, f);
 
 	if (fence)
 		*fence = dma_fence_get(f);
 	dma_fence_put(f);
-
-	return 0;
-
-err_free:
-	amdgpu_job_free(job);
-
-err:
-	amdgpu_bo_unreserve(bo);
-	amdgpu_bo_unref(&bo);
+error:
+	if (bo) {
+		amdgpu_bo_unreserve(bo);
+		amdgpu_bo_unref(&bo);
+	} else {
+		if (r)
+			amdgpu_ib_free(adev, &msg, NULL);
+	}
 	return r;
 }
 
@@ -1172,58 +1244,15 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 			      struct dma_fence **fence)
 {
-	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_bo *bo = NULL;
-	uint32_t *msg;
-	int r, i;
-
-	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_GTT,
-				      &bo, NULL, (void **)&msg);
-	if (r)
-		return r;
-
-	/* stitch together an UVD create msg */
-	msg[0] = cpu_to_le32(0x00000de4);
-	msg[1] = cpu_to_le32(0x00000000);
-	msg[2] = cpu_to_le32(handle);
-	msg[3] = cpu_to_le32(0x00000000);
-	msg[4] = cpu_to_le32(0x00000000);
-	msg[5] = cpu_to_le32(0x00000000);
-	msg[6] = cpu_to_le32(0x00000000);
-	msg[7] = cpu_to_le32(0x00000780);
-	msg[8] = cpu_to_le32(0x00000440);
-	msg[9] = cpu_to_le32(0x00000000);
-	msg[10] = cpu_to_le32(0x01b37000);
-	for (i = 11; i < 1024; ++i)
-		msg[i] = cpu_to_le32(0x0);
-
-	return amdgpu_uvd_send_msg(ring, bo, true, fence);
+	return amdgpu_uvd_send_msg(ring, true, amdgpu_uvd_get_create_msg_cb,
+			handle, 64, PAGE_SIZE, fence);
 }
 
 int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
-			       bool direct, struct dma_fence **fence)
+			      bool direct, struct dma_fence **fence)
 {
-	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_bo *bo = NULL;
-	uint32_t *msg;
-	int r, i;
-
-	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_GTT,
-				      &bo, NULL, (void **)&msg);
-	if (r)
-		return r;
-
-	/* stitch together an UVD destroy msg */
-	msg[0] = cpu_to_le32(0x00000de4);
-	msg[1] = cpu_to_le32(0x00000002);
-	msg[2] = cpu_to_le32(handle);
-	msg[3] = cpu_to_le32(0x00000000);
-	for (i = 4; i < 1024; ++i)
-		msg[i] = cpu_to_le32(0x0);
-
-	return amdgpu_uvd_send_msg(ring, bo, direct, fence);
+	return amdgpu_uvd_send_msg(ring, direct, amdgpu_uvd_get_destroy_msg_cb,
+			handle, 64, PAGE_SIZE, fence);
 }
 
 static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [RFC PATCH] drm/amdgpu: alloc uvd msg from IB pool
@ 2021-09-07  7:22 xinhui pan
  2021-09-07  7:32 ` Christian König
  0 siblings, 1 reply; 5+ messages in thread
From: xinhui pan @ 2021-09-07  7:22 UTC (permalink / raw)
  To: amd-gfx; +Cc: christian.koenig, Xinhui.Pan, xinhui pan

There is one dedicated IB pool for IB test. So lets use it for uvd msg
too.

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 223 +++++++++++++-----------
 1 file changed, 126 insertions(+), 97 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index d451c359606a..29fbe976a300 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -1066,37 +1066,65 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
 	return 0;
 }
 
-static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
-			       bool direct, struct dma_fence **fence)
+typedef void (*uvd_msg_cb) (struct amdgpu_ring *ring, uint32_t handle,
+		struct amdgpu_ib *ib, uint32_t *msg, uint64_t msg_gpu_addr);
+
+static void amdgpu_uvd_get_create_msg_cb(struct amdgpu_ring *ring, uint32_t handle,
+		struct amdgpu_ib *ib, uint32_t *msg, uint64_t addr)
 {
 	struct amdgpu_device *adev = ring->adev;
-	struct dma_fence *f = NULL;
-	struct amdgpu_job *job;
-	struct amdgpu_ib *ib;
 	uint32_t data[4];
-	uint64_t addr;
-	long r;
+	unsigned int offset_idx = 0;
+	unsigned int offset[3] = { UVD_BASE_SI, 0, 0 };
 	int i;
-	unsigned offset_idx = 0;
-	unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
 
-	amdgpu_bo_kunmap(bo);
-	amdgpu_bo_unpin(bo);
+	if (adev->asic_type >= CHIP_VEGA10) {
+		offset_idx = 1 + ring->me;
+		offset[1] = adev->reg_offset[UVD_HWIP][0][1];
+		offset[2] = adev->reg_offset[UVD_HWIP][1][1];
+	}
 
-	if (!ring->adev->uvd.address_64_bit) {
-		struct ttm_operation_ctx ctx = { true, false };
+	data[0] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA0, 0);
+	data[1] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA1, 0);
+	data[2] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_CMD, 0);
+	data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0);
 
-		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
-		amdgpu_uvd_force_into_uvd_segment(bo);
-		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
-		if (r)
-			goto err;
+	ib->ptr[0] = data[0];
+	ib->ptr[1] = addr;
+	ib->ptr[2] = data[1];
+	ib->ptr[3] = addr >> 32;
+	ib->ptr[4] = data[2];
+	ib->ptr[5] = 0;
+	for (i = 6; i < 16; i += 2) {
+		ib->ptr[i] = data[3];
+		ib->ptr[i+1] = 0;
 	}
+	ib->length_dw = 16;
 
-	r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
-				     AMDGPU_IB_POOL_DELAYED, &job);
-	if (r)
-		goto err;
+	/* stitch together an UVD create msg */
+	msg[0] = cpu_to_le32(0x00000de4);
+	msg[1] = cpu_to_le32(0x00000000);
+	msg[2] = cpu_to_le32(handle);
+	msg[3] = cpu_to_le32(0x00000000);
+	msg[4] = cpu_to_le32(0x00000000);
+	msg[5] = cpu_to_le32(0x00000000);
+	msg[6] = cpu_to_le32(0x00000000);
+	msg[7] = cpu_to_le32(0x00000780);
+	msg[8] = cpu_to_le32(0x00000440);
+	msg[9] = cpu_to_le32(0x00000000);
+	msg[10] = cpu_to_le32(0x01b37000);
+	for (i = 11; i < 1024; ++i)
+		msg[i] = cpu_to_le32(0x0);
+}
+
+static void amdgpu_uvd_get_destroy_msg_cb(struct amdgpu_ring *ring, uint32_t handle,
+		struct amdgpu_ib *ib, uint32_t *msg, uint64_t addr)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t data[4];
+	unsigned int offset_idx = 0;
+	unsigned int offset[3] = { UVD_BASE_SI, 0, 0 };
+	int i;
 
 	if (adev->asic_type >= CHIP_VEGA10) {
 		offset_idx = 1 + ring->me;
@@ -1109,8 +1137,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 	data[2] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_CMD, 0);
 	data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0);
 
-	ib = &job->ibs[0];
-	addr = amdgpu_bo_gpu_offset(bo);
 	ib->ptr[0] = data[0];
 	ib->ptr[1] = addr;
 	ib->ptr[2] = data[1];
@@ -1123,46 +1149,92 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 	}
 	ib->length_dw = 16;
 
-	if (direct) {
+	/* stitch together an UVD destroy msg */
+	msg[0] = cpu_to_le32(0x00000de4);
+	msg[1] = cpu_to_le32(0x00000002);
+	msg[2] = cpu_to_le32(handle);
+	msg[3] = cpu_to_le32(0x00000000);
+	for (i = 4; i < 1024; ++i)
+		msg[i] = cpu_to_le32(0x0);
+}
+
+static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, bool direct,
+		uvd_msg_cb cb, uint32_t handle, uint32_t ib_size,
+		uint32_t msg_size, struct dma_fence **fence)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct dma_fence *f = NULL;
+	struct amdgpu_job *job;
+	struct amdgpu_bo *bo = NULL;
+	struct amdgpu_ib msg;
+	int r;
+
+	if (!ring->adev->uvd.address_64_bit) {
+		struct ttm_operation_ctx ctx = { true, false };
+
+		r = amdgpu_bo_create_reserved(adev, msg_size, PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_VRAM,
+				&bo, NULL, (void **)&msg.ptr);
+		if (r)
+			return r;
+		amdgpu_bo_kunmap(bo);
+		amdgpu_bo_unpin(bo);
+		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
+		amdgpu_uvd_force_into_uvd_segment(bo);
+		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+		if (r)
+			goto error;
 		r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false,
-					  msecs_to_jiffies(10));
+				msecs_to_jiffies(10));
 		if (r == 0)
 			r = -ETIMEDOUT;
 		if (r < 0)
-			goto err_free;
-
-		r = amdgpu_job_submit_direct(job, ring, &f);
-		if (r)
-			goto err_free;
+			goto error;
 	} else {
-		r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.base.resv,
-				     AMDGPU_SYNC_ALWAYS,
-				     AMDGPU_FENCE_OWNER_UNDEFINED);
+		memset(&msg, 0, sizeof(msg));
+		r = amdgpu_ib_get(adev, NULL, PAGE_ALIGN(msg_size),
+				direct ?
+				AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_DELAYED,
+				&msg);
 		if (r)
-			goto err_free;
+			goto error;
+	}
 
+	r = amdgpu_job_alloc_with_ib(adev, ib_size,
+			direct ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_DELAYED,
+			&job);
+	if (r)
+		goto error;
+
+	cb(ring, handle, &(job->ibs[0]), msg.ptr,
+			bo ? amdgpu_bo_gpu_offset(bo) : msg.gpu_addr);
+
+	if (direct)
+		r = amdgpu_job_submit_direct(job, ring, &f);
+	else
 		r = amdgpu_job_submit(job, &adev->uvd.entity,
 				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
-		if (r)
-			goto err_free;
+	if (r) {
+		amdgpu_job_free(job);
+		goto error;
 	}
 
-	amdgpu_bo_fence(bo, f, false);
-	amdgpu_bo_unreserve(bo);
-	amdgpu_bo_unref(&bo);
+	if (bo)
+		amdgpu_bo_fence(bo, f, false);
+	else
+		amdgpu_ib_free(adev, &msg, f);
 
 	if (fence)
 		*fence = dma_fence_get(f);
 	dma_fence_put(f);
-
-	return 0;
-
-err_free:
-	amdgpu_job_free(job);
-
-err:
-	amdgpu_bo_unreserve(bo);
-	amdgpu_bo_unref(&bo);
+error:
+	if (bo) {
+		amdgpu_bo_unreserve(bo);
+		amdgpu_bo_unref(&bo);
+	} else {
+		if (r)
+			amdgpu_ib_free(adev, &msg, NULL);
+	}
 	return r;
 }
 
@@ -1172,58 +1244,15 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 			      struct dma_fence **fence)
 {
-	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_bo *bo = NULL;
-	uint32_t *msg;
-	int r, i;
-
-	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_GTT,
-				      &bo, NULL, (void **)&msg);
-	if (r)
-		return r;
-
-	/* stitch together an UVD create msg */
-	msg[0] = cpu_to_le32(0x00000de4);
-	msg[1] = cpu_to_le32(0x00000000);
-	msg[2] = cpu_to_le32(handle);
-	msg[3] = cpu_to_le32(0x00000000);
-	msg[4] = cpu_to_le32(0x00000000);
-	msg[5] = cpu_to_le32(0x00000000);
-	msg[6] = cpu_to_le32(0x00000000);
-	msg[7] = cpu_to_le32(0x00000780);
-	msg[8] = cpu_to_le32(0x00000440);
-	msg[9] = cpu_to_le32(0x00000000);
-	msg[10] = cpu_to_le32(0x01b37000);
-	for (i = 11; i < 1024; ++i)
-		msg[i] = cpu_to_le32(0x0);
-
-	return amdgpu_uvd_send_msg(ring, bo, true, fence);
+	return amdgpu_uvd_send_msg(ring, true, amdgpu_uvd_get_create_msg_cb,
+			handle, 64, PAGE_SIZE, fence);
 }
 
 int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
-			       bool direct, struct dma_fence **fence)
+			      bool direct, struct dma_fence **fence)
 {
-	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_bo *bo = NULL;
-	uint32_t *msg;
-	int r, i;
-
-	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_GTT,
-				      &bo, NULL, (void **)&msg);
-	if (r)
-		return r;
-
-	/* stitch together an UVD destroy msg */
-	msg[0] = cpu_to_le32(0x00000de4);
-	msg[1] = cpu_to_le32(0x00000002);
-	msg[2] = cpu_to_le32(handle);
-	msg[3] = cpu_to_le32(0x00000000);
-	for (i = 4; i < 1024; ++i)
-		msg[i] = cpu_to_le32(0x0);
-
-	return amdgpu_uvd_send_msg(ring, bo, direct, fence);
+	return amdgpu_uvd_send_msg(ring, direct, amdgpu_uvd_get_destroy_msg_cb,
+			handle, 64, PAGE_SIZE, fence);
 }
 
 static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [RFC PATCH] drm/amdgpu: alloc uvd msg from IB pool
@ 2021-09-07  7:04 xinhui pan
  0 siblings, 0 replies; 5+ messages in thread
From: xinhui pan @ 2021-09-07  7:04 UTC (permalink / raw)
  To: amd-gfx; +Cc: christian.koenig, xinhui pan

There is one dedicated IB pool for IB test. So lets use it for uvd msg
too.

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 223 +++++++++++++-----------
 1 file changed, 126 insertions(+), 97 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index d451c359606a..29fbe976a300 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -1066,37 +1066,65 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
 	return 0;
 }
 
-static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
-			       bool direct, struct dma_fence **fence)
+typedef void (*uvd_msg_cb) (struct amdgpu_ring *ring, uint32_t handle,
+		struct amdgpu_ib *ib, uint32_t *msg, uint64_t msg_gpu_addr);
+
+static void amdgpu_uvd_get_create_msg_cb(struct amdgpu_ring *ring, uint32_t handle,
+		struct amdgpu_ib *ib, uint32_t *msg, uint64_t addr)
 {
 	struct amdgpu_device *adev = ring->adev;
-	struct dma_fence *f = NULL;
-	struct amdgpu_job *job;
-	struct amdgpu_ib *ib;
 	uint32_t data[4];
-	uint64_t addr;
-	long r;
+	unsigned int offset_idx = 0;
+	unsigned int offset[3] = { UVD_BASE_SI, 0, 0 };
 	int i;
-	unsigned offset_idx = 0;
-	unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
 
-	amdgpu_bo_kunmap(bo);
-	amdgpu_bo_unpin(bo);
+	if (adev->asic_type >= CHIP_VEGA10) {
+		offset_idx = 1 + ring->me;
+		offset[1] = adev->reg_offset[UVD_HWIP][0][1];
+		offset[2] = adev->reg_offset[UVD_HWIP][1][1];
+	}
 
-	if (!ring->adev->uvd.address_64_bit) {
-		struct ttm_operation_ctx ctx = { true, false };
+	data[0] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA0, 0);
+	data[1] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA1, 0);
+	data[2] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_CMD, 0);
+	data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0);
 
-		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
-		amdgpu_uvd_force_into_uvd_segment(bo);
-		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
-		if (r)
-			goto err;
+	ib->ptr[0] = data[0];
+	ib->ptr[1] = addr;
+	ib->ptr[2] = data[1];
+	ib->ptr[3] = addr >> 32;
+	ib->ptr[4] = data[2];
+	ib->ptr[5] = 0;
+	for (i = 6; i < 16; i += 2) {
+		ib->ptr[i] = data[3];
+		ib->ptr[i+1] = 0;
 	}
+	ib->length_dw = 16;
 
-	r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
-				     AMDGPU_IB_POOL_DELAYED, &job);
-	if (r)
-		goto err;
+	/* stitch together an UVD create msg */
+	msg[0] = cpu_to_le32(0x00000de4);
+	msg[1] = cpu_to_le32(0x00000000);
+	msg[2] = cpu_to_le32(handle);
+	msg[3] = cpu_to_le32(0x00000000);
+	msg[4] = cpu_to_le32(0x00000000);
+	msg[5] = cpu_to_le32(0x00000000);
+	msg[6] = cpu_to_le32(0x00000000);
+	msg[7] = cpu_to_le32(0x00000780);
+	msg[8] = cpu_to_le32(0x00000440);
+	msg[9] = cpu_to_le32(0x00000000);
+	msg[10] = cpu_to_le32(0x01b37000);
+	for (i = 11; i < 1024; ++i)
+		msg[i] = cpu_to_le32(0x0);
+}
+
+static void amdgpu_uvd_get_destroy_msg_cb(struct amdgpu_ring *ring, uint32_t handle,
+		struct amdgpu_ib *ib, uint32_t *msg, uint64_t addr)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t data[4];
+	unsigned int offset_idx = 0;
+	unsigned int offset[3] = { UVD_BASE_SI, 0, 0 };
+	int i;
 
 	if (adev->asic_type >= CHIP_VEGA10) {
 		offset_idx = 1 + ring->me;
@@ -1109,8 +1137,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 	data[2] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_CMD, 0);
 	data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0);
 
-	ib = &job->ibs[0];
-	addr = amdgpu_bo_gpu_offset(bo);
 	ib->ptr[0] = data[0];
 	ib->ptr[1] = addr;
 	ib->ptr[2] = data[1];
@@ -1123,46 +1149,92 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 	}
 	ib->length_dw = 16;
 
-	if (direct) {
+	/* stitch together an UVD destroy msg */
+	msg[0] = cpu_to_le32(0x00000de4);
+	msg[1] = cpu_to_le32(0x00000002);
+	msg[2] = cpu_to_le32(handle);
+	msg[3] = cpu_to_le32(0x00000000);
+	for (i = 4; i < 1024; ++i)
+		msg[i] = cpu_to_le32(0x0);
+}
+
+static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, bool direct,
+		uvd_msg_cb cb, uint32_t handle, uint32_t ib_size,
+		uint32_t msg_size, struct dma_fence **fence)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct dma_fence *f = NULL;
+	struct amdgpu_job *job;
+	struct amdgpu_bo *bo = NULL;
+	struct amdgpu_ib msg;
+	int r;
+
+	if (!ring->adev->uvd.address_64_bit) {
+		struct ttm_operation_ctx ctx = { true, false };
+
+		r = amdgpu_bo_create_reserved(adev, msg_size, PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_VRAM,
+				&bo, NULL, (void **)&msg.ptr);
+		if (r)
+			return r;
+		amdgpu_bo_kunmap(bo);
+		amdgpu_bo_unpin(bo);
+		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
+		amdgpu_uvd_force_into_uvd_segment(bo);
+		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+		if (r)
+			goto error;
 		r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false,
-					  msecs_to_jiffies(10));
+				msecs_to_jiffies(10));
 		if (r == 0)
 			r = -ETIMEDOUT;
 		if (r < 0)
-			goto err_free;
-
-		r = amdgpu_job_submit_direct(job, ring, &f);
-		if (r)
-			goto err_free;
+			goto error;
 	} else {
-		r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.base.resv,
-				     AMDGPU_SYNC_ALWAYS,
-				     AMDGPU_FENCE_OWNER_UNDEFINED);
+		memset(&msg, 0, sizeof(msg));
+		r = amdgpu_ib_get(adev, NULL, PAGE_ALIGN(msg_size),
+				direct ?
+				AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_DELAYED,
+				&msg);
 		if (r)
-			goto err_free;
+			goto error;
+	}
 
+	r = amdgpu_job_alloc_with_ib(adev, ib_size,
+			direct ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_DELAYED,
+			&job);
+	if (r)
+		goto error;
+
+	cb(ring, handle, &(job->ibs[0]), msg.ptr,
+			bo ? amdgpu_bo_gpu_offset(bo) : msg.gpu_addr);
+
+	if (direct)
+		r = amdgpu_job_submit_direct(job, ring, &f);
+	else
 		r = amdgpu_job_submit(job, &adev->uvd.entity,
 				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
-		if (r)
-			goto err_free;
+	if (r) {
+		amdgpu_job_free(job);
+		goto error;
 	}
 
-	amdgpu_bo_fence(bo, f, false);
-	amdgpu_bo_unreserve(bo);
-	amdgpu_bo_unref(&bo);
+	if (bo)
+		amdgpu_bo_fence(bo, f, false);
+	else
+		amdgpu_ib_free(adev, &msg, f);
 
 	if (fence)
 		*fence = dma_fence_get(f);
 	dma_fence_put(f);
-
-	return 0;
-
-err_free:
-	amdgpu_job_free(job);
-
-err:
-	amdgpu_bo_unreserve(bo);
-	amdgpu_bo_unref(&bo);
+error:
+	if (bo) {
+		amdgpu_bo_unreserve(bo);
+		amdgpu_bo_unref(&bo);
+	} else {
+		if (r)
+			amdgpu_ib_free(adev, &msg, NULL);
+	}
 	return r;
 }
 
@@ -1172,58 +1244,15 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 			      struct dma_fence **fence)
 {
-	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_bo *bo = NULL;
-	uint32_t *msg;
-	int r, i;
-
-	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_GTT,
-				      &bo, NULL, (void **)&msg);
-	if (r)
-		return r;
-
-	/* stitch together an UVD create msg */
-	msg[0] = cpu_to_le32(0x00000de4);
-	msg[1] = cpu_to_le32(0x00000000);
-	msg[2] = cpu_to_le32(handle);
-	msg[3] = cpu_to_le32(0x00000000);
-	msg[4] = cpu_to_le32(0x00000000);
-	msg[5] = cpu_to_le32(0x00000000);
-	msg[6] = cpu_to_le32(0x00000000);
-	msg[7] = cpu_to_le32(0x00000780);
-	msg[8] = cpu_to_le32(0x00000440);
-	msg[9] = cpu_to_le32(0x00000000);
-	msg[10] = cpu_to_le32(0x01b37000);
-	for (i = 11; i < 1024; ++i)
-		msg[i] = cpu_to_le32(0x0);
-
-	return amdgpu_uvd_send_msg(ring, bo, true, fence);
+	return amdgpu_uvd_send_msg(ring, true, amdgpu_uvd_get_create_msg_cb,
+			handle, 64, PAGE_SIZE, fence);
 }
 
 int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
-			       bool direct, struct dma_fence **fence)
+			      bool direct, struct dma_fence **fence)
 {
-	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_bo *bo = NULL;
-	uint32_t *msg;
-	int r, i;
-
-	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_GTT,
-				      &bo, NULL, (void **)&msg);
-	if (r)
-		return r;
-
-	/* stitch together an UVD destroy msg */
-	msg[0] = cpu_to_le32(0x00000de4);
-	msg[1] = cpu_to_le32(0x00000002);
-	msg[2] = cpu_to_le32(handle);
-	msg[3] = cpu_to_le32(0x00000000);
-	for (i = 4; i < 1024; ++i)
-		msg[i] = cpu_to_le32(0x0);
-
-	return amdgpu_uvd_send_msg(ring, bo, direct, fence);
+	return amdgpu_uvd_send_msg(ring, direct, amdgpu_uvd_get_destroy_msg_cb,
+			handle, 64, PAGE_SIZE, fence);
 }
 
 static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-09-07 12:21 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-07 12:20 [RFC PATCH] drm/amdgpu: alloc uvd msg from IB pool xinhui pan
  -- strict thread matches above, loose matches on Subject: below --
2021-09-07  7:28 xinhui pan
2021-09-07  7:22 xinhui pan
2021-09-07  7:32 ` Christian König
2021-09-07  7:04 xinhui pan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.