linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jernej Skrabec <jernej.skrabec@siol.net>
To: paul.kocialkowski@bootlin.com, maxime.ripard@bootlin.com
Cc: wens@csie.org, mchehab@kernel.org, gregkh@linuxfoundation.org,
	linux-media@vger.kernel.org, devel@driverdev.osuosl.org,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org
Subject: [PATCH 7/7] media: cedrus: Improve H264 memory efficiency
Date: Thu, 30 May 2019 23:15:16 +0200	[thread overview]
Message-ID: <20190530211516.1891-8-jernej.skrabec@siol.net> (raw)
In-Reply-To: <20190530211516.1891-1-jernej.skrabec@siol.net>

H264 decoder driver preallocated pretty big worst case mv col buffer
pool. It turns out that pool is most of the time much bigger than it
needs to be.

Solution implemented here is to allocate memory only if capture buffer
is actually used and only as much as it is really necessary.

This is also preparation for 4K video decoding support, which will be
implemented later.

Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net>
---
 drivers/staging/media/sunxi/cedrus/cedrus.h   |  4 -
 .../staging/media/sunxi/cedrus/cedrus_h264.c  | 81 +++++++------------
 2 files changed, 28 insertions(+), 57 deletions(-)

diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h b/drivers/staging/media/sunxi/cedrus/cedrus.h
index 16c1bdfd243a..fcbbbef65494 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus.h
+++ b/drivers/staging/media/sunxi/cedrus/cedrus.h
@@ -106,10 +106,6 @@ struct cedrus_ctx {
 
 	union {
 		struct {
-			void		*mv_col_buf;
-			dma_addr_t	mv_col_buf_dma;
-			ssize_t		mv_col_buf_field_size;
-			ssize_t		mv_col_buf_size;
 			void		*pic_info_buf;
 			dma_addr_t	pic_info_buf_dma;
 			void		*neighbor_info_buf;
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c
index b2290f98d81a..758fd0049e8f 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c
@@ -54,17 +54,14 @@ static void cedrus_h264_write_sram(struct cedrus_dev *dev,
 		cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, *buffer++);
 }
 
-static dma_addr_t cedrus_h264_mv_col_buf_addr(struct cedrus_ctx *ctx,
-					      unsigned int position,
+static dma_addr_t cedrus_h264_mv_col_buf_addr(struct cedrus_buffer *buf,
 					      unsigned int field)
 {
-	dma_addr_t addr = ctx->codec.h264.mv_col_buf_dma;
-
-	/* Adjust for the position */
-	addr += position * ctx->codec.h264.mv_col_buf_field_size * 2;
+	dma_addr_t addr = buf->extra_buf_dma;
 
 	/* Adjust for the field */
-	addr += field * ctx->codec.h264.mv_col_buf_field_size;
+	if (field)
+		addr += buf->extra_buf_size / 2;
 
 	return addr;
 }
@@ -76,7 +73,6 @@ static void cedrus_fill_ref_pic(struct cedrus_ctx *ctx,
 				struct cedrus_h264_sram_ref_pic *pic)
 {
 	struct vb2_buffer *vbuf = &buf->m2m_buf.vb.vb2_buf;
-	unsigned int position = buf->codec.h264.position;
 
 	pic->top_field_order_cnt = cpu_to_le32(top_field_order_cnt);
 	pic->bottom_field_order_cnt = cpu_to_le32(bottom_field_order_cnt);
@@ -84,10 +80,8 @@ static void cedrus_fill_ref_pic(struct cedrus_ctx *ctx,
 
 	pic->luma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 0));
 	pic->chroma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 1));
-	pic->mv_col_top_ptr =
-		cpu_to_le32(cedrus_h264_mv_col_buf_addr(ctx, position, 0));
-	pic->mv_col_bot_ptr =
-		cpu_to_le32(cedrus_h264_mv_col_buf_addr(ctx, position, 1));
+	pic->mv_col_top_ptr = cpu_to_le32(cedrus_h264_mv_col_buf_addr(buf, 0));
+	pic->mv_col_bot_ptr = cpu_to_le32(cedrus_h264_mv_col_buf_addr(buf, 1));
 }
 
 static void cedrus_write_frame_list(struct cedrus_ctx *ctx,
@@ -142,6 +136,28 @@ static void cedrus_write_frame_list(struct cedrus_ctx *ctx,
 	output_buf = vb2_to_cedrus_buffer(&run->dst->vb2_buf);
 	output_buf->codec.h264.position = position;
 
+	if (!output_buf->extra_buf_size) {
+		const struct v4l2_ctrl_h264_sps *sps = run->h264.sps;
+		unsigned int field_size;
+
+		field_size = DIV_ROUND_UP(ctx->src_fmt.width, 16) *
+			DIV_ROUND_UP(ctx->src_fmt.height, 16) * 16;
+		if (!(sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE))
+			field_size = field_size * 2;
+		if (!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY))
+			field_size = field_size * 2;
+
+		output_buf->extra_buf_size = field_size * 2;
+		output_buf->extra_buf =
+			dma_alloc_coherent(dev->dev,
+					   output_buf->extra_buf_size,
+					   &output_buf->extra_buf_dma,
+					   GFP_KERNEL);
+
+		if (!output_buf->extra_buf)
+			output_buf->extra_buf_size = 0;
+	}
+
 	if (slice->flags & V4L2_H264_SLICE_FLAG_FIELD_PIC)
 		output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FIELD;
 	else if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)
@@ -476,8 +492,6 @@ static void cedrus_h264_setup(struct cedrus_ctx *ctx,
 static int cedrus_h264_start(struct cedrus_ctx *ctx)
 {
 	struct cedrus_dev *dev = ctx->dev;
-	unsigned int field_size;
-	unsigned int mv_col_size;
 	int ret;
 
 	/*
@@ -509,44 +523,8 @@ static int cedrus_h264_start(struct cedrus_ctx *ctx)
 		goto err_pic_buf;
 	}
 
-	field_size = DIV_ROUND_UP(ctx->src_fmt.width, 16) *
-		DIV_ROUND_UP(ctx->src_fmt.height, 16) * 16;
-
-	/*
-	 * FIXME: This is actually conditional to
-	 * V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE not being set, we
-	 * might have to rework this if memory efficiency ever is
-	 * something we need to work on.
-	 */
-	field_size = field_size * 2;
-
-	/*
-	 * FIXME: This is actually conditional to
-	 * V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY not being set, we might
-	 * have to rework this if memory efficiency ever is something
-	 * we need to work on.
-	 */
-	field_size = field_size * 2;
-	ctx->codec.h264.mv_col_buf_field_size = field_size;
-
-	mv_col_size = field_size * 2 * CEDRUS_H264_FRAME_NUM;
-	ctx->codec.h264.mv_col_buf_size = mv_col_size;
-	ctx->codec.h264.mv_col_buf = dma_alloc_coherent(dev->dev,
-							ctx->codec.h264.mv_col_buf_size,
-							&ctx->codec.h264.mv_col_buf_dma,
-							GFP_KERNEL);
-	if (!ctx->codec.h264.mv_col_buf) {
-		ret = -ENOMEM;
-		goto err_neighbor_buf;
-	}
-
 	return 0;
 
-err_neighbor_buf:
-	dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
-			  ctx->codec.h264.neighbor_info_buf,
-			  ctx->codec.h264.neighbor_info_buf_dma);
-
 err_pic_buf:
 	dma_free_coherent(dev->dev, CEDRUS_PIC_INFO_BUF_SIZE,
 			  ctx->codec.h264.pic_info_buf,
@@ -558,9 +536,6 @@ static void cedrus_h264_stop(struct cedrus_ctx *ctx)
 {
 	struct cedrus_dev *dev = ctx->dev;
 
-	dma_free_coherent(dev->dev, ctx->codec.h264.mv_col_buf_size,
-			  ctx->codec.h264.mv_col_buf,
-			  ctx->codec.h264.mv_col_buf_dma);
 	dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
 			  ctx->codec.h264.neighbor_info_buf,
 			  ctx->codec.h264.neighbor_info_buf_dma);
-- 
2.21.0


  parent reply	other threads:[~2019-05-30 21:15 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-30 21:15 [PATCH 0/7] media: cedrus: Improvements/cleanup Jernej Skrabec
2019-05-30 21:15 ` [PATCH 1/7] media: cedrus: Disable engine after each slice decoding Jernej Skrabec
2019-06-03 11:38   ` Maxime Ripard
2019-08-12 13:28     ` Ezequiel Garcia
2019-05-30 21:15 ` [PATCH 2/7] media: cedrus: Fix H264 default reference index count Jernej Skrabec
2019-06-03 11:46   ` Maxime Ripard
2019-06-03 15:34     ` Jernej Škrabec
2019-05-30 21:15 ` [PATCH 3/7] media: cedrus: Fix decoding for some H264 videos Jernej Skrabec
2019-06-03 11:55   ` Maxime Ripard
2019-06-03 15:37     ` Jernej Škrabec
2019-06-06 12:45       ` Dan Carpenter
2019-05-30 21:15 ` [PATCH 4/7] media: cedrus: Remove dst_bufs from context Jernej Skrabec
2019-06-03 12:13   ` Maxime Ripard
2019-06-05 21:07   ` Paul Kocialkowski
2019-08-12 13:42   ` Ezequiel Garcia
2019-05-30 21:15 ` [PATCH 5/7] media: cedrus: Don't set chroma size for scale & rotation Jernej Skrabec
2019-06-05 21:08   ` Paul Kocialkowski
2019-05-30 21:15 ` [PATCH 6/7] media: cedrus: Add infra for extra buffers connected to capture buffers Jernej Skrabec
2019-06-03 12:18   ` Maxime Ripard
2019-06-03 15:48     ` Jernej Škrabec
2019-06-05 21:10       ` Paul Kocialkowski
2019-06-05 21:52         ` Jernej Škrabec
2019-06-06  8:33       ` Maxime Ripard
2019-05-30 21:15 ` Jernej Skrabec [this message]
2019-06-03 12:23   ` [PATCH 7/7] media: cedrus: Improve H264 memory efficiency Maxime Ripard
2019-06-03 16:37     ` Jernej Škrabec
2019-06-05 21:12       ` Paul Kocialkowski
2019-08-12 12:12 ` [PATCH 0/7] media: cedrus: Improvements/cleanup Hans Verkuil
2019-08-12 12:21   ` Jernej Škrabec
2019-08-12 13:55   ` Maxime Ripard

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190530211516.1891-8-jernej.skrabec@siol.net \
    --to=jernej.skrabec@siol.net \
    --cc=devel@driverdev.osuosl.org \
    --cc=gregkh@linuxfoundation.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-media@vger.kernel.org \
    --cc=maxime.ripard@bootlin.com \
    --cc=mchehab@kernel.org \
    --cc=paul.kocialkowski@bootlin.com \
    --cc=wens@csie.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).