linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v5] media: mediatek: vcodec: Add to support VP9 inner racing mode
@ 2022-10-25  1:46 Mingjia Zhang
  2022-11-04 10:19 ` Hans Verkuil
  2022-11-07 11:46 ` Yunfei Dong (董云飞)
  0 siblings, 2 replies; 4+ messages in thread
From: Mingjia Zhang @ 2022-10-25  1:46 UTC (permalink / raw)
  To: Yunfei Dong, Alexandre Courbot, Nicolas Dufresne, Hans Verkuil,
	AngeloGioacchino Del Regno, Benjamin Gaignard, Tiffany Lin,
	Andrew-CT Chen, Mauro Carvalho Chehab, Rob Herring,
	Matthias Brugger, Tomasz Figa
  Cc: George Sun, Xiaoyong Lu, Hsin-Yi Wang, Fritz Koenig,
	Daniel Vetter, dri-devel, Irui Wang, Steve Cho, linux-media,
	devicetree, linux-kernel, linux-arm-kernel, linux-mediatek,
	Project_Global_Chrome_Upstream_Group

Enable VP9 inner racing mode
We send lat trans buffer to the core when trigger lat to work, instead of waiting for the lat decode done.
It can be reduce decoder latency.

Signed-off-by: Mingjia Zhang <mingjia.zhang@mediatek.com>
---
Changes from v3:

- CTS/GTS test pass
- Fluster result: Ran 275/303 tests successfully

Changes from v2:

- CTS/GTS test pass
- Fluster result: Ran 240/303 tests successfully

Changes from v1:

- CTS/GTS test pass
---
 .../vcodec/vdec/vdec_vp9_req_lat_if.c         | 85 ++++++++++---------
 1 file changed, 47 insertions(+), 38 deletions(-)

diff --git a/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c b/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c
index 81de876d51267..1b39119c89951 100644
--- a/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c
+++ b/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c
@@ -436,6 +436,7 @@ struct vdec_vp9_slice_ref {
  * @frame_ctx:		4 frame context according to VP9 Spec
  * @frame_ctx_helper:	4 frame context according to newest kernel spec
  * @dirty:		state of each frame context
+ * @local_vsi:		local instance vsi information
  * @init_vsi:		vsi used for initialized VP9 instance
  * @vsi:		vsi used for decoding/flush ...
  * @core_vsi:		vsi used for Core stage
@@ -482,6 +483,8 @@ struct vdec_vp9_slice_instance {
 	struct v4l2_vp9_frame_context frame_ctx_helper;
 	unsigned char dirty[4];
 
+	struct vdec_vp9_slice_vsi local_vsi;
+
 	/* MicroP vsi */
 	union {
 		struct vdec_vp9_slice_init_vsi *init_vsi;
@@ -1616,16 +1619,10 @@ static int vdec_vp9_slice_update_single(struct vdec_vp9_slice_instance *instance
 }
 
 static int vdec_vp9_slice_update_lat(struct vdec_vp9_slice_instance *instance,
-				     struct vdec_lat_buf *lat_buf,
-				     struct vdec_vp9_slice_pfc *pfc)
+				     struct vdec_vp9_slice_vsi *vsi)
 {
-	struct vdec_vp9_slice_vsi *vsi;
-
-	vsi = &pfc->vsi;
-	memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state));
-
 	mtk_vcodec_debug(instance, "Frame %u LAT CRC 0x%08x %lx %lx\n",
-			 pfc->seq, vsi->state.crc[0],
+			 (instance->seq - 1), vsi->state.crc[0],
 			 (unsigned long)vsi->trans.dma_addr,
 			 (unsigned long)vsi->trans.dma_addr_end);
 
@@ -2090,6 +2087,13 @@ static int vdec_vp9_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
 		return ret;
 	}
 
+	if (IS_VDEC_INNER_RACING(ctx->dev->dec_capability)) {
+		vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0);
+		memcpy(&instance->local_vsi, vsi, sizeof(*vsi));
+		vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx, lat_buf);
+		vsi = &instance->local_vsi;
+	}
+
 	if (instance->irq) {
 		ret = mtk_vcodec_wait_for_done_ctx(ctx,	MTK_INST_IRQ_RECEIVED,
 						   WAIT_INTR_TIMEOUT_MS, MTK_VDEC_LAT0);
@@ -2102,22 +2106,25 @@ static int vdec_vp9_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
 	}
 
 	vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0);
-	ret = vdec_vp9_slice_update_lat(instance, lat_buf, pfc);
+	ret = vdec_vp9_slice_update_lat(instance, vsi);
 
-	/* LAT trans full, no more UBE or decode timeout */
-	if (ret) {
-		mtk_vcodec_err(instance, "VP9 decode error: %d\n", ret);
-		return ret;
-	}
+	if (!IS_VDEC_INNER_RACING(ctx->dev->dec_capability))
+		/* LAT trans full, no more UBE or decode timeout */
+		if (ret) {
+			mtk_vcodec_err(instance, "frame[%d] decode error: %d\n",
+				       ret, (instance->seq - 1));
+			return ret;
+		}
 
-	mtk_vcodec_debug(instance, "lat dma addr: 0x%lx 0x%lx\n",
-			 (unsigned long)pfc->vsi.trans.dma_addr,
-			 (unsigned long)pfc->vsi.trans.dma_addr_end);
 
-	vdec_msg_queue_update_ube_wptr(&ctx->msg_queue,
-				       vsi->trans.dma_addr_end +
-				       ctx->msg_queue.wdma_addr.dma_addr);
-	vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx, lat_buf);
+	vsi->trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr;
+	vdec_msg_queue_update_ube_wptr(&ctx->msg_queue, vsi->trans.dma_addr_end);
+	if (!IS_VDEC_INNER_RACING(ctx->dev->dec_capability))
+		vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx, lat_buf);
+
+	mtk_vcodec_debug(instance, "lat trans end addr(0x%lx), ube start addr(0x%lx)\n",
+			 (unsigned long)vsi->trans.dma_addr_end,
+			 (unsigned long)ctx->msg_queue.wdma_addr.dma_addr);
 
 	return 0;
 }
@@ -2139,40 +2146,40 @@ static int vdec_vp9_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
 static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf)
 {
 	struct vdec_vp9_slice_instance *instance;
-	struct vdec_vp9_slice_pfc *pfc;
+	struct vdec_vp9_slice_pfc *pfc = NULL;
 	struct mtk_vcodec_ctx *ctx = NULL;
 	struct vdec_fb *fb = NULL;
 	int ret = -EINVAL;
 
 	if (!lat_buf)
-		goto err;
+		return -EINVAL;
 
 	pfc = lat_buf->private_data;
 	ctx = lat_buf->ctx;
 	if (!pfc || !ctx)
-		goto err;
+		return -EINVAL;
 
 	instance = ctx->drv_handle;
 	if (!instance)
-		goto err;
+		return -EINVAL;
 
 	fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx);
 	if (!fb) {
 		ret = -EBUSY;
-		goto err;
+		goto vdec_dec_end;
 	}
 
 	ret = vdec_vp9_slice_setup_core(instance, fb, lat_buf, pfc);
 	if (ret) {
 		mtk_vcodec_err(instance, "vdec_vp9_slice_setup_core\n");
-		goto err;
+		goto vdec_dec_end;
 	}
 	vdec_vp9_slice_vsi_to_remote(&pfc->vsi, instance->core_vsi);
 
 	ret = vpu_dec_core(&instance->vpu);
 	if (ret) {
 		mtk_vcodec_err(instance, "vpu_dec_core\n");
-		goto err;
+		goto vdec_dec_end;
 	}
 
 	if (instance->irq) {
@@ -2190,24 +2197,26 @@ static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf)
 	ret = vdec_vp9_slice_update_core(instance, lat_buf, pfc);
 	if (ret) {
 		mtk_vcodec_err(instance, "vdec_vp9_slice_update_core\n");
-		goto err;
+		goto vdec_dec_end;
 	}
 
-	pfc->vsi.trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr;
 	mtk_vcodec_debug(instance, "core dma_addr_end 0x%lx\n",
 			 (unsigned long)pfc->vsi.trans.dma_addr_end);
-	vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end);
-	ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf->src_buf_req);
-
-	return 0;
 
-err:
-	if (ctx && pfc) {
-		/* always update read pointer */
-		vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end);
+vdec_dec_end:
+	/* always update read pointer */
+	if (IS_VDEC_INNER_RACING(ctx->dev->dec_capability))
+		vdec_msg_queue_update_ube_rptr(&ctx->msg_queue,
+					       pfc->vsi.trans.dma_addr);
+	else
+		vdec_msg_queue_update_ube_rptr(&ctx->msg_queue,
+					       pfc->vsi.trans.dma_addr_end);
 
+	if (ret) {
 		if (fb)
 			ctx->dev->vdec_pdata->cap_to_disp(ctx, 1, lat_buf->src_buf_req);
+	} else {
+		ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf->src_buf_req);
 	}
 	return ret;
 }
-- 
2.18.0


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v5] media: mediatek: vcodec: Add to support VP9 inner racing mode
  2022-10-25  1:46 [PATCH v5] media: mediatek: vcodec: Add to support VP9 inner racing mode Mingjia Zhang
@ 2022-11-04 10:19 ` Hans Verkuil
  2022-11-07 11:46 ` Yunfei Dong (董云飞)
  1 sibling, 0 replies; 4+ messages in thread
From: Hans Verkuil @ 2022-11-04 10:19 UTC (permalink / raw)
  To: Mingjia Zhang, Yunfei Dong, Alexandre Courbot, Nicolas Dufresne,
	AngeloGioacchino Del Regno, Benjamin Gaignard, Tiffany Lin,
	Andrew-CT Chen, Mauro Carvalho Chehab, Rob Herring,
	Matthias Brugger, Tomasz Figa
  Cc: George Sun, Xiaoyong Lu, Hsin-Yi Wang, Fritz Koenig,
	Daniel Vetter, dri-devel, Irui Wang, Steve Cho, linux-media,
	devicetree, linux-kernel, linux-arm-kernel, linux-mediatek,
	Project_Global_Chrome_Upstream_Group

Hi Mingjia,

What are the changes since v4? You didn't mentioned that.

Yunfei, can you review this v5? I prefer to have your Acked/Reviewed-by before merging.

Regards,

	Hans

On 25/10/2022 03:46, Mingjia Zhang wrote:
> Enable VP9 inner racing mode
> We send lat trans buffer to the core when trigger lat to work, instead of waiting for the lat decode done.
> It can be reduce decoder latency.
> 
> Signed-off-by: Mingjia Zhang <mingjia.zhang@mediatek.com>
> ---
> Changes from v3:
> 
> - CTS/GTS test pass
> - Fluster result: Ran 275/303 tests successfully
> 
> Changes from v2:
> 
> - CTS/GTS test pass
> - Fluster result: Ran 240/303 tests successfully
> 
> Changes from v1:
> 
> - CTS/GTS test pass
> ---
>  .../vcodec/vdec/vdec_vp9_req_lat_if.c         | 85 ++++++++++---------
>  1 file changed, 47 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c b/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c
> index 81de876d51267..1b39119c89951 100644
> --- a/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c
> +++ b/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c
> @@ -436,6 +436,7 @@ struct vdec_vp9_slice_ref {
>   * @frame_ctx:		4 frame context according to VP9 Spec
>   * @frame_ctx_helper:	4 frame context according to newest kernel spec
>   * @dirty:		state of each frame context
> + * @local_vsi:		local instance vsi information
>   * @init_vsi:		vsi used for initialized VP9 instance
>   * @vsi:		vsi used for decoding/flush ...
>   * @core_vsi:		vsi used for Core stage
> @@ -482,6 +483,8 @@ struct vdec_vp9_slice_instance {
>  	struct v4l2_vp9_frame_context frame_ctx_helper;
>  	unsigned char dirty[4];
>  
> +	struct vdec_vp9_slice_vsi local_vsi;
> +
>  	/* MicroP vsi */
>  	union {
>  		struct vdec_vp9_slice_init_vsi *init_vsi;
> @@ -1616,16 +1619,10 @@ static int vdec_vp9_slice_update_single(struct vdec_vp9_slice_instance *instance
>  }
>  
>  static int vdec_vp9_slice_update_lat(struct vdec_vp9_slice_instance *instance,
> -				     struct vdec_lat_buf *lat_buf,
> -				     struct vdec_vp9_slice_pfc *pfc)
> +				     struct vdec_vp9_slice_vsi *vsi)
>  {
> -	struct vdec_vp9_slice_vsi *vsi;
> -
> -	vsi = &pfc->vsi;
> -	memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state));
> -
>  	mtk_vcodec_debug(instance, "Frame %u LAT CRC 0x%08x %lx %lx\n",
> -			 pfc->seq, vsi->state.crc[0],
> +			 (instance->seq - 1), vsi->state.crc[0],
>  			 (unsigned long)vsi->trans.dma_addr,
>  			 (unsigned long)vsi->trans.dma_addr_end);
>  
> @@ -2090,6 +2087,13 @@ static int vdec_vp9_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
>  		return ret;
>  	}
>  
> +	if (IS_VDEC_INNER_RACING(ctx->dev->dec_capability)) {
> +		vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0);
> +		memcpy(&instance->local_vsi, vsi, sizeof(*vsi));
> +		vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx, lat_buf);
> +		vsi = &instance->local_vsi;
> +	}
> +
>  	if (instance->irq) {
>  		ret = mtk_vcodec_wait_for_done_ctx(ctx,	MTK_INST_IRQ_RECEIVED,
>  						   WAIT_INTR_TIMEOUT_MS, MTK_VDEC_LAT0);
> @@ -2102,22 +2106,25 @@ static int vdec_vp9_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
>  	}
>  
>  	vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0);
> -	ret = vdec_vp9_slice_update_lat(instance, lat_buf, pfc);
> +	ret = vdec_vp9_slice_update_lat(instance, vsi);
>  
> -	/* LAT trans full, no more UBE or decode timeout */
> -	if (ret) {
> -		mtk_vcodec_err(instance, "VP9 decode error: %d\n", ret);
> -		return ret;
> -	}
> +	if (!IS_VDEC_INNER_RACING(ctx->dev->dec_capability))
> +		/* LAT trans full, no more UBE or decode timeout */
> +		if (ret) {
> +			mtk_vcodec_err(instance, "frame[%d] decode error: %d\n",
> +				       ret, (instance->seq - 1));
> +			return ret;
> +		}
>  
> -	mtk_vcodec_debug(instance, "lat dma addr: 0x%lx 0x%lx\n",
> -			 (unsigned long)pfc->vsi.trans.dma_addr,
> -			 (unsigned long)pfc->vsi.trans.dma_addr_end);
>  
> -	vdec_msg_queue_update_ube_wptr(&ctx->msg_queue,
> -				       vsi->trans.dma_addr_end +
> -				       ctx->msg_queue.wdma_addr.dma_addr);
> -	vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx, lat_buf);
> +	vsi->trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr;
> +	vdec_msg_queue_update_ube_wptr(&ctx->msg_queue, vsi->trans.dma_addr_end);
> +	if (!IS_VDEC_INNER_RACING(ctx->dev->dec_capability))
> +		vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx, lat_buf);
> +
> +	mtk_vcodec_debug(instance, "lat trans end addr(0x%lx), ube start addr(0x%lx)\n",
> +			 (unsigned long)vsi->trans.dma_addr_end,
> +			 (unsigned long)ctx->msg_queue.wdma_addr.dma_addr);
>  
>  	return 0;
>  }
> @@ -2139,40 +2146,40 @@ static int vdec_vp9_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
>  static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf)
>  {
>  	struct vdec_vp9_slice_instance *instance;
> -	struct vdec_vp9_slice_pfc *pfc;
> +	struct vdec_vp9_slice_pfc *pfc = NULL;
>  	struct mtk_vcodec_ctx *ctx = NULL;
>  	struct vdec_fb *fb = NULL;
>  	int ret = -EINVAL;
>  
>  	if (!lat_buf)
> -		goto err;
> +		return -EINVAL;
>  
>  	pfc = lat_buf->private_data;
>  	ctx = lat_buf->ctx;
>  	if (!pfc || !ctx)
> -		goto err;
> +		return -EINVAL;
>  
>  	instance = ctx->drv_handle;
>  	if (!instance)
> -		goto err;
> +		return -EINVAL;
>  
>  	fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx);
>  	if (!fb) {
>  		ret = -EBUSY;
> -		goto err;
> +		goto vdec_dec_end;
>  	}
>  
>  	ret = vdec_vp9_slice_setup_core(instance, fb, lat_buf, pfc);
>  	if (ret) {
>  		mtk_vcodec_err(instance, "vdec_vp9_slice_setup_core\n");
> -		goto err;
> +		goto vdec_dec_end;
>  	}
>  	vdec_vp9_slice_vsi_to_remote(&pfc->vsi, instance->core_vsi);
>  
>  	ret = vpu_dec_core(&instance->vpu);
>  	if (ret) {
>  		mtk_vcodec_err(instance, "vpu_dec_core\n");
> -		goto err;
> +		goto vdec_dec_end;
>  	}
>  
>  	if (instance->irq) {
> @@ -2190,24 +2197,26 @@ static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf)
>  	ret = vdec_vp9_slice_update_core(instance, lat_buf, pfc);
>  	if (ret) {
>  		mtk_vcodec_err(instance, "vdec_vp9_slice_update_core\n");
> -		goto err;
> +		goto vdec_dec_end;
>  	}
>  
> -	pfc->vsi.trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr;
>  	mtk_vcodec_debug(instance, "core dma_addr_end 0x%lx\n",
>  			 (unsigned long)pfc->vsi.trans.dma_addr_end);
> -	vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end);
> -	ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf->src_buf_req);
> -
> -	return 0;
>  
> -err:
> -	if (ctx && pfc) {
> -		/* always update read pointer */
> -		vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end);
> +vdec_dec_end:
> +	/* always update read pointer */
> +	if (IS_VDEC_INNER_RACING(ctx->dev->dec_capability))
> +		vdec_msg_queue_update_ube_rptr(&ctx->msg_queue,
> +					       pfc->vsi.trans.dma_addr);
> +	else
> +		vdec_msg_queue_update_ube_rptr(&ctx->msg_queue,
> +					       pfc->vsi.trans.dma_addr_end);
>  
> +	if (ret) {
>  		if (fb)
>  			ctx->dev->vdec_pdata->cap_to_disp(ctx, 1, lat_buf->src_buf_req);
> +	} else {
> +		ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf->src_buf_req);
>  	}
>  	return ret;
>  }


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v5] media: mediatek: vcodec: Add to support VP9 inner racing mode
  2022-10-25  1:46 [PATCH v5] media: mediatek: vcodec: Add to support VP9 inner racing mode Mingjia Zhang
  2022-11-04 10:19 ` Hans Verkuil
@ 2022-11-07 11:46 ` Yunfei Dong (董云飞)
  2022-11-07 13:18   ` AngeloGioacchino Del Regno
  1 sibling, 1 reply; 4+ messages in thread
From: Yunfei Dong (董云飞) @ 2022-11-07 11:46 UTC (permalink / raw)
  To: angelogioacchino.delregno, nicolas, robh+dt,
	Tiffany Lin (林慧珊),
	mchehab, Mingjia Zhang (张明佳),
	tfiga, benjamin.gaignard, hverkuil-cisco, matthias.bgg,
	Andrew-CT Chen (陳智迪),
	acourbot
  Cc: Xiaoyong Lu (卢小勇),
	linux-kernel, frkoenig, George Sun (孙林),
	stevecho, linux-media, devicetree, linux-mediatek, daniel,
	dri-devel, Irui Wang (王瑞),
	hsinyi, linux-arm-kernel, Project_Global_Chrome_Upstream_Group

Hi Mingjia,

Thanks for your patch.
Some comments need to get your feedback.
On Tue, 2022-10-25 at 09:46 +0800, Mingjia Zhang wrote:
> Enable VP9 inner racing mode
> We send lat trans buffer to the core when trigger lat to work,
> instead of waiting for the lat decode done.
> It can be reduce decoder latency.
> 
> Signed-off-by: Mingjia Zhang <mingjia.zhang@mediatek.com>
> ---
> Changes from v3:
> 
> - CTS/GTS test pass
> - Fluster result: Ran 275/303 tests successfully
> 
> Changes from v2:
> 
> - CTS/GTS test pass
> - Fluster result: Ran 240/303 tests successfully
> 
> Changes from v1:
> 
> - CTS/GTS test pass
> ---
>  .../vcodec/vdec/vdec_vp9_req_lat_if.c         | 85 ++++++++++-------
> --
>  1 file changed, 47 insertions(+), 38 deletions(-)
> 
> diff --git
> a/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c
> b/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c
> index 81de876d51267..1b39119c89951 100644
> ---
> a/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c
> +++
> b/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c
> @@ -436,6 +436,7 @@ struct vdec_vp9_slice_ref {
>   * @frame_ctx:		4 frame context according to VP9 Spec
>   * @frame_ctx_helper:	4 frame context according to newest
> kernel spec
>   * @dirty:		state of each frame context
> + * @local_vsi:		local instance vsi information
>   * @init_vsi:		vsi used for initialized VP9 instance
>   * @vsi:		vsi used for decoding/flush ...
>   * @core_vsi:		vsi used for Core stage
> @@ -482,6 +483,8 @@ struct vdec_vp9_slice_instance {
>  	struct v4l2_vp9_frame_context frame_ctx_helper;
>  	unsigned char dirty[4];
>  
> +	struct vdec_vp9_slice_vsi local_vsi;
> +
>  	/* MicroP vsi */
>  	union {
>  		struct vdec_vp9_slice_init_vsi *init_vsi;
> @@ -1616,16 +1619,10 @@ static int
> vdec_vp9_slice_update_single(struct vdec_vp9_slice_instance *instance
>  }
>  
>  static int vdec_vp9_slice_update_lat(struct vdec_vp9_slice_instance
> *instance,
> -				     struct vdec_lat_buf *lat_buf,
> -				     struct vdec_vp9_slice_pfc *pfc)
> +				     struct vdec_vp9_slice_vsi *vsi)
>  {
> -	struct vdec_vp9_slice_vsi *vsi;
> -
> -	vsi = &pfc->vsi;
> -	memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state));
> -
>  	mtk_vcodec_debug(instance, "Frame %u LAT CRC 0x%08x %lx %lx\n",
> -			 pfc->seq, vsi->state.crc[0],
> +			 (instance->seq - 1), vsi->state.crc[0],
>  			 (unsigned long)vsi->trans.dma_addr,
>  			 (unsigned long)vsi->trans.dma_addr_end);
>  
> @@ -2090,6 +2087,13 @@ static int vdec_vp9_slice_lat_decode(void
> *h_vdec, struct mtk_vcodec_mem *bs,
>  		return ret;
>  	}
>  
> +	if (IS_VDEC_INNER_RACING(ctx->dev->dec_capability)) {
> +		vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0);
> +		memcpy(&instance->local_vsi, vsi, sizeof(*vsi));
> +		vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx,
> lat_buf);
> +		vsi = &instance->local_vsi;
> +	}
> +
>  	if (instance->irq) {
>  		ret = mtk_vcodec_wait_for_done_ctx(ctx,	MTK_INST_IRQ_
> RECEIVED,
>  						   WAIT_INTR_TIMEOUT_MS
> , MTK_VDEC_LAT0);
> @@ -2102,22 +2106,25 @@ static int vdec_vp9_slice_lat_decode(void
> *h_vdec, struct mtk_vcodec_mem *bs,
>  	}
>  
>  	vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0);
> -	ret = vdec_vp9_slice_update_lat(instance, lat_buf, pfc);
> +	ret = vdec_vp9_slice_update_lat(instance, vsi);
>  
> -	/* LAT trans full, no more UBE or decode timeout */
> -	if (ret) {
> -		mtk_vcodec_err(instance, "VP9 decode error: %d\n",
> ret);
> -		return ret;
> -	}
> +	if (!IS_VDEC_INNER_RACING(ctx->dev->dec_capability))
> +		/* LAT trans full, no more UBE or decode timeout */
> +		if (ret) {
> +			mtk_vcodec_err(instance, "frame[%d] decode
> error: %d\n",
> +				       ret, (instance->seq - 1));
> +			return ret;
> +		}
>  
If inner racing decode error, how to do? Looks this error condition
only in non inner racing mode.
> -	mtk_vcodec_debug(instance, "lat dma addr: 0x%lx 0x%lx\n",
> -			 (unsigned long)pfc->vsi.trans.dma_addr,
> -			 (unsigned long)pfc->vsi.trans.dma_addr_end);
>  
> -	vdec_msg_queue_update_ube_wptr(&ctx->msg_queue,
> -				       vsi->trans.dma_addr_end +
> -				       ctx-
> >msg_queue.wdma_addr.dma_addr);
> -	vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx, lat_buf);
> +	vsi->trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr;
> +	vdec_msg_queue_update_ube_wptr(&ctx->msg_queue, vsi-
> >trans.dma_addr_end);
> +	if (!IS_VDEC_INNER_RACING(ctx->dev->dec_capability))
> +		vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx,
> lat_buf);
> +
> +	mtk_vcodec_debug(instance, "lat trans end addr(0x%lx), ube
> start addr(0x%lx)\n",
> +			 (unsigned long)vsi->trans.dma_addr_end,
> +			 (unsigned long)ctx-
> >msg_queue.wdma_addr.dma_addr);
>  
>  	return 0;
>  }
> @@ -2139,40 +2146,40 @@ static int vdec_vp9_slice_decode(void
> *h_vdec, struct mtk_vcodec_mem *bs,
>  static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf)
>  {
>  	struct vdec_vp9_slice_instance *instance;
> -	struct vdec_vp9_slice_pfc *pfc;
> +	struct vdec_vp9_slice_pfc *pfc = NULL;
>  	struct mtk_vcodec_ctx *ctx = NULL;
>  	struct vdec_fb *fb = NULL;
>  	int ret = -EINVAL;
>  
>  	if (!lat_buf)
> -		goto err;
> +		return -EINVAL;
>  
>  	pfc = lat_buf->private_data;
>  	ctx = lat_buf->ctx;
>  	if (!pfc || !ctx)
> -		goto err;
> +		return -EINVAL;
>  
>  	instance = ctx->drv_handle;
>  	if (!instance)
> -		goto err;
> +		return -EINVAL;
>  
>  	fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx);
>  	if (!fb) {
>  		ret = -EBUSY;
> -		goto err;
> +		goto vdec_dec_end;
>  	}
>  
>  	ret = vdec_vp9_slice_setup_core(instance, fb, lat_buf, pfc);
>  	if (ret) {
>  		mtk_vcodec_err(instance,
> "vdec_vp9_slice_setup_core\n");
> -		goto err;
> +		goto vdec_dec_end;
>  	}
>  	vdec_vp9_slice_vsi_to_remote(&pfc->vsi, instance->core_vsi);
>  
>  	ret = vpu_dec_core(&instance->vpu);
>  	if (ret) {
>  		mtk_vcodec_err(instance, "vpu_dec_core\n");
> -		goto err;
> +		goto vdec_dec_end;
>  	}
>  
>  	if (instance->irq) {
> @@ -2190,24 +2197,26 @@ static int vdec_vp9_slice_core_decode(struct
> vdec_lat_buf *lat_buf)
>  	ret = vdec_vp9_slice_update_core(instance, lat_buf, pfc);
>  	if (ret) {
>  		mtk_vcodec_err(instance,
> "vdec_vp9_slice_update_core\n");
> -		goto err;
> +		goto vdec_dec_end;
>  	}
>  
> -	pfc->vsi.trans.dma_addr_end += ctx-
> >msg_queue.wdma_addr.dma_addr;
>  	mtk_vcodec_debug(instance, "core dma_addr_end 0x%lx\n",
>  			 (unsigned long)pfc->vsi.trans.dma_addr_end);
> -	vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc-
> >vsi.trans.dma_addr_end);
> -	ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf-
> >src_buf_req);
> -
> -	return 0;
>  
> -err:
> -	if (ctx && pfc) {
> -		/* always update read pointer */
> -		vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc-
> >vsi.trans.dma_addr_end);
> +vdec_dec_end:
> +	/* always update read pointer */
> +	if (IS_VDEC_INNER_RACING(ctx->dev->dec_capability))
> +		vdec_msg_queue_update_ube_rptr(&ctx->msg_queue,
> +					       pfc-
> >vsi.trans.dma_addr);
> +	else
> +		vdec_msg_queue_update_ube_rptr(&ctx->msg_queue,
> +					       pfc-
> >vsi.trans.dma_addr_end);
>  
> +	if (ret) {
No need {

Best Regards,
Yunfei Dong
>  		if (fb)
>  			ctx->dev->vdec_pdata->cap_to_disp(ctx, 1,
> lat_buf->src_buf_req);
> +	} else {
> +		ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf-
> >src_buf_req);
>  	}
>  	return ret;
>  }
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v5] media: mediatek: vcodec: Add to support VP9 inner racing mode
  2022-11-07 11:46 ` Yunfei Dong (董云飞)
@ 2022-11-07 13:18   ` AngeloGioacchino Del Regno
  0 siblings, 0 replies; 4+ messages in thread
From: AngeloGioacchino Del Regno @ 2022-11-07 13:18 UTC (permalink / raw)
  To: Yunfei Dong (董云飞),
	nicolas, robh+dt, Tiffany Lin (林慧珊),
	mchehab, Mingjia Zhang (张明佳),
	tfiga, benjamin.gaignard, hverkuil-cisco, matthias.bgg,
	Andrew-CT Chen (陳智迪),
	acourbot
  Cc: Xiaoyong Lu (卢小勇),
	linux-kernel, frkoenig, George Sun (孙林),
	stevecho, linux-media, devicetree, linux-mediatek, daniel,
	dri-devel, Irui Wang (王瑞),
	hsinyi, linux-arm-kernel, Project_Global_Chrome_Upstream_Group

Il 07/11/22 12:46, Yunfei Dong (董云飞) ha scritto:
> Hi Mingjia,
> 
> Thanks for your patch.
> Some comments need to get your feedback.
> On Tue, 2022-10-25 at 09:46 +0800, Mingjia Zhang wrote:
>> Enable VP9 inner racing mode
>> We send lat trans buffer to the core when trigger lat to work,
>> instead of waiting for the lat decode done.
>> It can be reduce decoder latency.
>>
>> Signed-off-by: Mingjia Zhang <mingjia.zhang@mediatek.com>
>> ---
>> Changes from v3:
>>
>> - CTS/GTS test pass
>> - Fluster result: Ran 275/303 tests successfully
>>
>> Changes from v2:
>>
>> - CTS/GTS test pass
>> - Fluster result: Ran 240/303 tests successfully
>>
>> Changes from v1:
>>
>> - CTS/GTS test pass
>> ---
>>   .../vcodec/vdec/vdec_vp9_req_lat_if.c         | 85 ++++++++++-------
>> --
>>   1 file changed, 47 insertions(+), 38 deletions(-)
>>
>> diff --git
>> a/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c
>> b/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c
>> index 81de876d51267..1b39119c89951 100644
>> ---
>> a/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c
>> +++
>> b/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c
>> @@ -436,6 +436,7 @@ struct vdec_vp9_slice_ref {
>>    * @frame_ctx:		4 frame context according to VP9 Spec
>>    * @frame_ctx_helper:	4 frame context according to newest
>> kernel spec
>>    * @dirty:		state of each frame context
>> + * @local_vsi:		local instance vsi information
>>    * @init_vsi:		vsi used for initialized VP9 instance
>>    * @vsi:		vsi used for decoding/flush ...
>>    * @core_vsi:		vsi used for Core stage
>> @@ -482,6 +483,8 @@ struct vdec_vp9_slice_instance {
>>   	struct v4l2_vp9_frame_context frame_ctx_helper;
>>   	unsigned char dirty[4];
>>   
>> +	struct vdec_vp9_slice_vsi local_vsi;
>> +
>>   	/* MicroP vsi */
>>   	union {
>>   		struct vdec_vp9_slice_init_vsi *init_vsi;
>> @@ -1616,16 +1619,10 @@ static int
>> vdec_vp9_slice_update_single(struct vdec_vp9_slice_instance *instance
>>   }
>>   
>>   static int vdec_vp9_slice_update_lat(struct vdec_vp9_slice_instance
>> *instance,
>> -				     struct vdec_lat_buf *lat_buf,
>> -				     struct vdec_vp9_slice_pfc *pfc)
>> +				     struct vdec_vp9_slice_vsi *vsi)
>>   {
>> -	struct vdec_vp9_slice_vsi *vsi;
>> -
>> -	vsi = &pfc->vsi;
>> -	memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state));
>> -
>>   	mtk_vcodec_debug(instance, "Frame %u LAT CRC 0x%08x %lx %lx\n",
>> -			 pfc->seq, vsi->state.crc[0],
>> +			 (instance->seq - 1), vsi->state.crc[0],
>>   			 (unsigned long)vsi->trans.dma_addr,
>>   			 (unsigned long)vsi->trans.dma_addr_end);
>>   
>> @@ -2090,6 +2087,13 @@ static int vdec_vp9_slice_lat_decode(void
>> *h_vdec, struct mtk_vcodec_mem *bs,
>>   		return ret;
>>   	}
>>   
>> +	if (IS_VDEC_INNER_RACING(ctx->dev->dec_capability)) {
>> +		vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0);
>> +		memcpy(&instance->local_vsi, vsi, sizeof(*vsi));
>> +		vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx,
>> lat_buf);
>> +		vsi = &instance->local_vsi;
>> +	}
>> +
>>   	if (instance->irq) {
>>   		ret = mtk_vcodec_wait_for_done_ctx(ctx,	MTK_INST_IRQ_
>> RECEIVED,
>>   						   WAIT_INTR_TIMEOUT_MS
>> , MTK_VDEC_LAT0);
>> @@ -2102,22 +2106,25 @@ static int vdec_vp9_slice_lat_decode(void
>> *h_vdec, struct mtk_vcodec_mem *bs,
>>   	}
>>   
>>   	vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0);
>> -	ret = vdec_vp9_slice_update_lat(instance, lat_buf, pfc);
>> +	ret = vdec_vp9_slice_update_lat(instance, vsi);
>>   
>> -	/* LAT trans full, no more UBE or decode timeout */
>> -	if (ret) {
>> -		mtk_vcodec_err(instance, "VP9 decode error: %d\n",
>> ret);
>> -		return ret;
>> -	}
>> +	if (!IS_VDEC_INNER_RACING(ctx->dev->dec_capability))
>> +		/* LAT trans full, no more UBE or decode timeout */
>> +		if (ret) {
>> +			mtk_vcodec_err(instance, "frame[%d] decode
>> error: %d\n",
>> +				       ret, (instance->seq - 1));
>> +			return ret;
>> +		}
>>   
> If inner racing decode error, how to do? Looks this error condition
> only in non inner racing mode.
>> -	mtk_vcodec_debug(instance, "lat dma addr: 0x%lx 0x%lx\n",
>> -			 (unsigned long)pfc->vsi.trans.dma_addr,
>> -			 (unsigned long)pfc->vsi.trans.dma_addr_end);
>>   
>> -	vdec_msg_queue_update_ube_wptr(&ctx->msg_queue,
>> -				       vsi->trans.dma_addr_end +
>> -				       ctx-
>>> msg_queue.wdma_addr.dma_addr);
>> -	vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx, lat_buf);
>> +	vsi->trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr;
>> +	vdec_msg_queue_update_ube_wptr(&ctx->msg_queue, vsi-
>>> trans.dma_addr_end);
>> +	if (!IS_VDEC_INNER_RACING(ctx->dev->dec_capability))
>> +		vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx,
>> lat_buf);
>> +
>> +	mtk_vcodec_debug(instance, "lat trans end addr(0x%lx), ube
>> start addr(0x%lx)\n",
>> +			 (unsigned long)vsi->trans.dma_addr_end,
>> +			 (unsigned long)ctx-
>>> msg_queue.wdma_addr.dma_addr);
>>   
>>   	return 0;
>>   }
>> @@ -2139,40 +2146,40 @@ static int vdec_vp9_slice_decode(void
>> *h_vdec, struct mtk_vcodec_mem *bs,
>>   static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf)
>>   {
>>   	struct vdec_vp9_slice_instance *instance;
>> -	struct vdec_vp9_slice_pfc *pfc;
>> +	struct vdec_vp9_slice_pfc *pfc = NULL;
>>   	struct mtk_vcodec_ctx *ctx = NULL;
>>   	struct vdec_fb *fb = NULL;
>>   	int ret = -EINVAL;
>>   
>>   	if (!lat_buf)
>> -		goto err;
>> +		return -EINVAL;
>>   
>>   	pfc = lat_buf->private_data;
>>   	ctx = lat_buf->ctx;
>>   	if (!pfc || !ctx)
>> -		goto err;
>> +		return -EINVAL;
>>   
>>   	instance = ctx->drv_handle;
>>   	if (!instance)
>> -		goto err;
>> +		return -EINVAL;
>>   
>>   	fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx);
>>   	if (!fb) {
>>   		ret = -EBUSY;
>> -		goto err;
>> +		goto vdec_dec_end;
>>   	}
>>   
>>   	ret = vdec_vp9_slice_setup_core(instance, fb, lat_buf, pfc);
>>   	if (ret) {
>>   		mtk_vcodec_err(instance,
>> "vdec_vp9_slice_setup_core\n");
>> -		goto err;
>> +		goto vdec_dec_end;
>>   	}
>>   	vdec_vp9_slice_vsi_to_remote(&pfc->vsi, instance->core_vsi);
>>   
>>   	ret = vpu_dec_core(&instance->vpu);
>>   	if (ret) {
>>   		mtk_vcodec_err(instance, "vpu_dec_core\n");
>> -		goto err;
>> +		goto vdec_dec_end;
>>   	}
>>   
>>   	if (instance->irq) {
>> @@ -2190,24 +2197,26 @@ static int vdec_vp9_slice_core_decode(struct
>> vdec_lat_buf *lat_buf)
>>   	ret = vdec_vp9_slice_update_core(instance, lat_buf, pfc);
>>   	if (ret) {
>>   		mtk_vcodec_err(instance,
>> "vdec_vp9_slice_update_core\n");
>> -		goto err;
>> +		goto vdec_dec_end;
>>   	}
>>   
>> -	pfc->vsi.trans.dma_addr_end += ctx-
>>> msg_queue.wdma_addr.dma_addr;
>>   	mtk_vcodec_debug(instance, "core dma_addr_end 0x%lx\n",
>>   			 (unsigned long)pfc->vsi.trans.dma_addr_end);
>> -	vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc-
>>> vsi.trans.dma_addr_end);
>> -	ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf-
>>> src_buf_req);
>> -
>> -	return 0;
>>   
>> -err:
>> -	if (ctx && pfc) {
>> -		/* always update read pointer */
>> -		vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc-
>>> vsi.trans.dma_addr_end);
>> +vdec_dec_end:
>> +	/* always update read pointer */
>> +	if (IS_VDEC_INNER_RACING(ctx->dev->dec_capability))
>> +		vdec_msg_queue_update_ube_rptr(&ctx->msg_queue,
>> +					       pfc-
>>> vsi.trans.dma_addr);
>> +	else
>> +		vdec_msg_queue_update_ube_rptr(&ctx->msg_queue,
>> +					       pfc-
>>> vsi.trans.dma_addr_end);
>>   
>> +	if (ret) {
> No need {
> 

It's right, there's no *need* to, but in this case I'd keep it for the sake
of readability, as in..

	if (ret) {
		if (fb)
			some;
	} else {
		thing;
	}

looks more readable than

	if (ret)
		if (fb)
			some;
	else
		thing;

... alternatively, we can do something like ...

	if (ret == 0)
		thing;
	else if (fb)
		some;

But then, there's another consideration: vdec_vp9_slice_update_core() never
returns anything else than zero, so I don't see how, in this case, `ret` can
be different than zero.

This means that the "else if (fb)" part would never happen, so, unless you're
planning to introduce a change that may return a failure in
vdec_vp9_slice_update_core(), this branch makes no sense at all.

Of course, in case you plan to do that, you should add the check in that change
or you should add that change in this commit.

Regards,
Angelo

> Best Regards,
> Yunfei Dong
>>   		if (fb)
>>   			ctx->dev->vdec_pdata->cap_to_disp(ctx, 1,
>> lat_buf->src_buf_req);
>> +	} else {
>> +		ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf-
>>> src_buf_req);
>>   	}
>>   	return ret;
>>   }



_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-11-07 13:20 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-25  1:46 [PATCH v5] media: mediatek: vcodec: Add to support VP9 inner racing mode Mingjia Zhang
2022-11-04 10:19 ` Hans Verkuil
2022-11-07 11:46 ` Yunfei Dong (董云飞)
2022-11-07 13:18   ` AngeloGioacchino Del Regno

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).