All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/7] gpu: ipu-v3: image-convert: fix output seam valid interval
@ 2019-08-14 11:54 Philipp Zabel
  2019-08-14 11:54 ` [PATCH 2/7] gpu: ipu-v3: image-convert: move output seam valid interval calculation into find_best_seam Philipp Zabel
                   ` (5 more replies)
  0 siblings, 6 replies; 8+ messages in thread
From: Philipp Zabel @ 2019-08-14 11:54 UTC (permalink / raw)
  To: dri-devel; +Cc: kernel, Steve Longerbeam

This fixes a failure to determine any seam if the output size is
exactly 1024 multiplied by the number of tiles in a given direction.
In that case an empty interval out_start == out_end is being passed
to find_best_seam, which looks for a seam out_start <= x < out_end.

Also reduce the interval all but the left column / top row, to
avoid returning position 0 as best fit.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c
index 9d25db6924b3..c9909f1c9ffb 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -723,9 +723,9 @@ static void find_seams(struct ipu_image_convert_ctx *ctx,
 		 */
 
 		/* Start within 1024 pixels of the right edge */
-		out_start = max_t(int, 0, out_right - 1024);
+		out_start = max_t(int, col * out_left_align, out_right - 1024);
 		/* End before having to add more columns to the left */
-		out_end = min_t(unsigned int, out_right, col * 1024);
+		out_end = min_t(unsigned int, out_right, col * 1024 + 1);
 
 		find_best_seam(ctx, out_start, out_end,
 			       in_right, out_right,
@@ -768,9 +768,9 @@ static void find_seams(struct ipu_image_convert_ctx *ctx,
 		unsigned int out_top;
 
 		/* Start within 1024 lines of the bottom edge */
-		out_start = max_t(int, 0, out_bottom - 1024);
+		out_start = max_t(int, row * out_top_align, out_bottom - 1024);
 		/* End before having to add more rows above */
-		out_end = min_t(unsigned int, out_bottom, row * 1024);
+		out_end = min_t(unsigned int, out_bottom, row * 1024 + 1);
 
 		find_best_seam(ctx, out_start, out_end,
 			       in_bottom, out_bottom,
-- 
2.20.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/7] gpu: ipu-v3: image-convert: move output seam valid interval calculation into find_best_seam
  2019-08-14 11:54 [PATCH 1/7] gpu: ipu-v3: image-convert: fix output seam valid interval Philipp Zabel
@ 2019-08-14 11:54 ` Philipp Zabel
  2019-08-14 15:09   ` Philipp Zabel
  2019-08-14 11:54 ` [PATCH 3/7] gpu: ipu-v3: image-convert: limit input seam position to hardware requirements Philipp Zabel
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 8+ messages in thread
From: Philipp Zabel @ 2019-08-14 11:54 UTC (permalink / raw)
  To: dri-devel; +Cc: kernel, Steve Longerbeam

This reduces code duplication and allows to easily calculate the valid
interval for the input seam position in the same place.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 30 +++++++++++---------------
 1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c
index c9909f1c9ffb..4ac7377cfe5c 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -442,12 +442,10 @@ static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx,
 #define round_closest(x, y) round_down((x) + (y)/2, (y))
 
 /*
- * Find the best aligned seam position in the inverval [out_start, out_end].
+ * Find the best aligned seam position for the given column / row index.
  * Rotation and image offsets are out of scope.
  *
- * @out_start: start of inverval, must be within 1024 pixels / lines
- *             of out_end
- * @out_end: end of interval, smaller than or equal to out_edge
+ * @index: column / row index, used to calculate valid interval
  * @in_edge: input right / bottom edge
  * @out_edge: output right / bottom edge
  * @in_align: input alignment, either horizontal 8-byte line start address
@@ -463,8 +461,7 @@ static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx,
  * @_out_seam: aligned output seam position return value
  */
 static void find_best_seam(struct ipu_image_convert_ctx *ctx,
-			   unsigned int out_start,
-			   unsigned int out_end,
+			   unsigned int index,
 			   unsigned int in_edge,
 			   unsigned int out_edge,
 			   unsigned int in_align,
@@ -482,6 +479,13 @@ static void find_best_seam(struct ipu_image_convert_ctx *ctx,
 	unsigned int out_seam = 0;
 	unsigned int in_seam = 0;
 	unsigned int min_diff = UINT_MAX;
+	unsigned int out_start;
+	unsigned int out_end;
+
+	/* Start within 1024 pixels of the right / bottom edge */
+	out_start = max_t(int, index * out_align, out_edge - 1024);
+	/* End before having to add more columns to the left / rows above */
+	out_end = min_t(unsigned int, out_edge, index * 1024 + 1);
 
 	/*
 	 * Output tiles must start at a multiple of 8 bytes horizontally and
@@ -722,12 +726,7 @@ static void find_seams(struct ipu_image_convert_ctx *ctx,
 		 * horizontally.
 		 */
 
-		/* Start within 1024 pixels of the right edge */
-		out_start = max_t(int, col * out_left_align, out_right - 1024);
-		/* End before having to add more columns to the left */
-		out_end = min_t(unsigned int, out_right, col * 1024 + 1);
-
-		find_best_seam(ctx, out_start, out_end,
+		find_best_seam(ctx, col,
 			       in_right, out_right,
 			       in_left_align, out_left_align,
 			       allow_in_overshoot ? 1 : 8 /* burst length */,
@@ -767,12 +766,7 @@ static void find_seams(struct ipu_image_convert_ctx *ctx,
 		unsigned int in_top;
 		unsigned int out_top;
 
-		/* Start within 1024 lines of the bottom edge */
-		out_start = max_t(int, row * out_top_align, out_bottom - 1024);
-		/* End before having to add more rows above */
-		out_end = min_t(unsigned int, out_bottom, row * 1024 + 1);
-
-		find_best_seam(ctx, out_start, out_end,
+		find_best_seam(ctx, row,
 			       in_bottom, out_bottom,
 			       in_top_align, out_top_align,
 			       1, allow_overshoot ? 1 : out_height_align,
-- 
2.20.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/7] gpu: ipu-v3: image-convert: limit input seam position to hardware requirements
  2019-08-14 11:54 [PATCH 1/7] gpu: ipu-v3: image-convert: fix output seam valid interval Philipp Zabel
  2019-08-14 11:54 ` [PATCH 2/7] gpu: ipu-v3: image-convert: move output seam valid interval calculation into find_best_seam Philipp Zabel
@ 2019-08-14 11:54 ` Philipp Zabel
  2019-08-14 11:54 ` [PATCH 4/7] gpu: ipu-v3: image-convert: fix image downsize coefficients and tiling calculation Philipp Zabel
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 8+ messages in thread
From: Philipp Zabel @ 2019-08-14 11:54 UTC (permalink / raw)
  To: dri-devel; +Cc: kernel, Steve Longerbeam

Limit the input seam position to an interval that guarantees the tile
size does not exceed 1024 pixels after the IC downsizing section and
that space is left for the next tile.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 30 ++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c
index 4ac7377cfe5c..9f631a0c39ea 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -481,12 +481,23 @@ static void find_best_seam(struct ipu_image_convert_ctx *ctx,
 	unsigned int min_diff = UINT_MAX;
 	unsigned int out_start;
 	unsigned int out_end;
+	unsigned int in_start;
+	unsigned int in_end;
 
 	/* Start within 1024 pixels of the right / bottom edge */
 	out_start = max_t(int, index * out_align, out_edge - 1024);
 	/* End before having to add more columns to the left / rows above */
 	out_end = min_t(unsigned int, out_edge, index * 1024 + 1);
 
+	/*
+	 * Limit input seam position to make sure that the downsized input tile
+	 * to the right or bottom does not exceed 1024 pixels.
+	 */
+	in_start = max_t(int, index * in_align,
+			 in_edge - (1024 << downsize_coeff));
+	in_end = min_t(unsigned int, in_edge,
+		       index * (1024 << downsize_coeff) + 1);
+
 	/*
 	 * Output tiles must start at a multiple of 8 bytes horizontally and
 	 * possibly at an even line horizontally depending on the pixel format.
@@ -496,6 +507,7 @@ static void find_best_seam(struct ipu_image_convert_ctx *ctx,
 	for (out_pos = out_start; out_pos < out_end; out_pos += out_align) {
 		unsigned int in_pos;
 		unsigned int in_pos_aligned;
+		unsigned int in_pos_rounded;
 		unsigned int abs_diff;
 
 		/*
@@ -516,9 +528,16 @@ static void find_best_seam(struct ipu_image_convert_ctx *ctx,
 		 * start the input tile at, 19.13 fixed point.
 		 */
 		in_pos_aligned = round_closest(in_pos, 8192U * in_align);
+		/* Convert 19.13 fixed point to integer */
+		in_pos_rounded = in_pos_aligned / 8192U;
+
+		if (in_pos_rounded < in_start)
+			continue;
+		if (in_pos_rounded >= in_end)
+			break;
 
 		if ((in_burst > 1) &&
-		    (in_edge - in_pos_aligned / 8192U) % in_burst)
+		    (in_edge - in_pos_rounded) % in_burst)
 			continue;
 
 		if (in_pos < in_pos_aligned)
@@ -527,19 +546,18 @@ static void find_best_seam(struct ipu_image_convert_ctx *ctx,
 			abs_diff = in_pos - in_pos_aligned;
 
 		if (abs_diff < min_diff) {
-			in_seam = in_pos_aligned;
+			in_seam = in_pos_rounded;
 			out_seam = out_pos;
 			min_diff = abs_diff;
 		}
 	}
 
 	*_out_seam = out_seam;
-	/* Convert 19.13 fixed point to integer seam position */
-	*_in_seam = DIV_ROUND_CLOSEST(in_seam, 8192U);
+	*_in_seam = in_seam;
 
-	dev_dbg(dev, "%s: out_seam %u(%u) in [%u, %u], in_seam %u(%u) diff %u.%03u\n",
+	dev_dbg(dev, "%s: out_seam %u(%u) in [%u, %u], in_seam %u(%u) in [%u, %u] diff %u.%03u\n",
 		__func__, out_seam, out_align, out_start, out_end,
-		*_in_seam, in_align, min_diff / 8192,
+		in_seam, in_align, in_start, in_end, min_diff / 8192,
 		DIV_ROUND_CLOSEST(min_diff % 8192 * 1000, 8192));
 }
 
-- 
2.20.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 4/7] gpu: ipu-v3: image-convert: fix image downsize coefficients and tiling calculation
  2019-08-14 11:54 [PATCH 1/7] gpu: ipu-v3: image-convert: fix output seam valid interval Philipp Zabel
  2019-08-14 11:54 ` [PATCH 2/7] gpu: ipu-v3: image-convert: move output seam valid interval calculation into find_best_seam Philipp Zabel
  2019-08-14 11:54 ` [PATCH 3/7] gpu: ipu-v3: image-convert: limit input seam position to hardware requirements Philipp Zabel
@ 2019-08-14 11:54 ` Philipp Zabel
  2019-08-14 11:54 ` [PATCH 5/7] gpu: ipu-v3: image-convert: bail on invalid tile sizes Philipp Zabel
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 8+ messages in thread
From: Philipp Zabel @ 2019-08-14 11:54 UTC (permalink / raw)
  To: dri-devel; +Cc: kernel, Steve Longerbeam

This patch effectively reverts commit 912bbf7e9ca4 ("gpu: ipu-v3:
image-convert: Fix image downsize coefficients") and replaces it with a
different solution based on the preceding patches.

The previous fix tried to solve the problem of intermediate tile size
between IC downsizing and main processing sections not being limited to
1024 pixels by downsizing the input image to a smaller intermediate size
in the downsizing box filter. This causes unnecessary blurring,
especially for scaling factors close to 1.

Now that the seam position calculation makes sure that the 1024 pixel
intermediate tile size limit is not exceeded, calculate the number of
tiles from the maximum of intermediate size and output size and avoid
unnecessary downsizing.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 49 ++++++++++++++++----------
 1 file changed, 31 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c
index 9f631a0c39ea..ccf11b654b58 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -376,8 +376,11 @@ static inline int num_stripes(int dim)
 
 /*
  * Calculate downsizing coefficients, which are the same for all tiles,
- * and bilinear resizing coefficients, which are used to find the best
- * seam positions.
+ * and initial bilinear resizing coefficients, which are used to find the
+ * best seam positions.
+ * Also determine the number of tiles necessary to guarantee that no tile
+ * is larger than 1024 pixels in either dimension at the output and between
+ * IC downsizing and main processing sections.
  */
 static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx,
 					  struct ipu_image *in,
@@ -391,6 +394,8 @@ static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx,
 	u32 resized_height = out->rect.height;
 	u32 resize_coeff_h;
 	u32 resize_coeff_v;
+	u32 cols;
+	u32 rows;
 
 	if (ipu_rot_mode_is_irt(ctx->rot_mode)) {
 		resized_width = out->rect.height;
@@ -401,14 +406,12 @@ static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx,
 	if (WARN_ON(resized_width == 0 || resized_height == 0))
 		return -EINVAL;
 
-	while (downsized_width > 1024 ||
-	       downsized_width >= resized_width * 2) {
+	while (downsized_width >= resized_width * 2) {
 		downsized_width >>= 1;
 		downsize_coeff_h++;
 	}
 
-	while (downsized_height > 1024 ||
-	       downsized_height >= resized_height * 2) {
+	while (downsized_height >= resized_height * 2) {
 		downsized_height >>= 1;
 		downsize_coeff_v++;
 	}
@@ -422,10 +425,18 @@ static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx,
 	resize_coeff_h = 8192 * (downsized_width - 1) / (resized_width - 1);
 	resize_coeff_v = 8192 * (downsized_height - 1) / (resized_height - 1);
 
+	/*
+	 * Both the output of the IC downsizing section before being passed to
+	 * the IC main processing section and the final output of the IC main
+	 * processing section must be <= 1024 pixels in both dimensions.
+	 */
+	cols = num_stripes(max_t(u32, downsized_width, resized_width));
+	rows = num_stripes(max_t(u32, downsized_height, resized_height));
+
 	dev_dbg(ctx->chan->priv->ipu->dev,
 		"%s: hscale: >>%u, *8192/%u vscale: >>%u, *8192/%u, %ux%u tiles\n",
 		__func__, downsize_coeff_h, resize_coeff_h, downsize_coeff_v,
-		resize_coeff_v, ctx->in.num_cols, ctx->in.num_rows);
+		resize_coeff_v, cols, rows);
 
 	if (downsize_coeff_h > 2 || downsize_coeff_v  > 2 ||
 	    resize_coeff_h > 0x3fff || resize_coeff_v > 0x3fff)
@@ -435,6 +446,8 @@ static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx,
 	ctx->downsize_coeff_v = downsize_coeff_v;
 	ctx->image_resize_coeff_h = resize_coeff_h;
 	ctx->image_resize_coeff_v = resize_coeff_v;
+	ctx->in.num_cols = cols;
+	ctx->in.num_rows = rows;
 
 	return 0;
 }
@@ -2036,22 +2049,26 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task,
 	ctx->chan = chan;
 	init_completion(&ctx->aborted);
 
+	ctx->rot_mode = rot_mode;
+
+	/* Sets ctx->in.num_rows/cols as well */
+	ret = calc_image_resize_coefficients(ctx, in, out);
+	if (ret)
+		goto out_free;
+
 	s_image = &ctx->in;
 	d_image = &ctx->out;
 
 	/* set tiling and rotation */
-	d_image->num_rows = num_stripes(out->pix.height);
-	d_image->num_cols = num_stripes(out->pix.width);
 	if (ipu_rot_mode_is_irt(rot_mode)) {
-		s_image->num_rows = d_image->num_cols;
-		s_image->num_cols = d_image->num_rows;
+		d_image->num_rows = s_image->num_cols;
+		d_image->num_cols = s_image->num_rows;
 	} else {
-		s_image->num_rows = d_image->num_rows;
-		s_image->num_cols = d_image->num_cols;
+		d_image->num_rows = s_image->num_rows;
+		d_image->num_cols = s_image->num_cols;
 	}
 
 	ctx->num_tiles = d_image->num_cols * d_image->num_rows;
-	ctx->rot_mode = rot_mode;
 
 	ret = fill_image(ctx, s_image, in, IMAGE_CONVERT_IN);
 	if (ret)
@@ -2060,10 +2077,6 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task,
 	if (ret)
 		goto out_free;
 
-	ret = calc_image_resize_coefficients(ctx, in, out);
-	if (ret)
-		goto out_free;
-
 	calc_out_tile_map(ctx);
 
 	find_seams(ctx, s_image, d_image);
-- 
2.20.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 5/7] gpu: ipu-v3: image-convert: bail on invalid tile sizes
  2019-08-14 11:54 [PATCH 1/7] gpu: ipu-v3: image-convert: fix output seam valid interval Philipp Zabel
                   ` (2 preceding siblings ...)
  2019-08-14 11:54 ` [PATCH 4/7] gpu: ipu-v3: image-convert: fix image downsize coefficients and tiling calculation Philipp Zabel
@ 2019-08-14 11:54 ` Philipp Zabel
  2019-08-14 11:54 ` [PATCH 6/7] gpu: ipu-v3: image-convert: move tile burst alignment out of loop Philipp Zabel
  2019-08-14 11:54 ` [PATCH 7/7] gpu: ipu-v3: image-convert: only sample into the next tile if necessary Philipp Zabel
  5 siblings, 0 replies; 8+ messages in thread
From: Philipp Zabel @ 2019-08-14 11:54 UTC (permalink / raw)
  To: dri-devel; +Cc: kernel, Steve Longerbeam

If we managed to create tiles sized 0x0 because of a bug in the seam
calculation, return with an error message instead of letting the driver
run into a division by zero later. Also check for tile sizes that are
larger than supported by the hardware.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 27 +++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c
index ccf11b654b58..ace831c54ed7 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -834,13 +834,21 @@ static void find_seams(struct ipu_image_convert_ctx *ctx,
 		in_bottom, flipped_out_top, out_bottom);
 }
 
-static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx,
-				 struct ipu_image_convert_image *image)
+static int calc_tile_dimensions(struct ipu_image_convert_ctx *ctx,
+				struct ipu_image_convert_image *image)
 {
 	struct ipu_image_convert_chan *chan = ctx->chan;
 	struct ipu_image_convert_priv *priv = chan->priv;
+	unsigned int max_width = 1024;
+	unsigned int max_height = 1024;
 	unsigned int i;
 
+	if (image->type == IMAGE_CONVERT_IN) {
+		/* Up to 4096x4096 input tile size */
+		max_width <<= ctx->downsize_coeff_h;
+		max_height <<= ctx->downsize_coeff_v;
+	}
+
 	for (i = 0; i < ctx->num_tiles; i++) {
 		struct ipu_image_tile *tile;
 		const unsigned int row = i / image->num_cols;
@@ -870,7 +878,17 @@ static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx,
 			image->type == IMAGE_CONVERT_IN ? "Input" : "Output",
 			row, col,
 			tile->width, tile->height, tile->left, tile->top);
+
+		if (!tile->width || tile->width > max_width ||
+		    !tile->height || tile->height > max_height) {
+			dev_err(priv->ipu->dev, "invalid %s tile size: %ux%u\n",
+				image->type == IMAGE_CONVERT_IN ? "input" :
+				"output", tile->width, tile->height);
+			return -EINVAL;
+		}
 	}
+
+	return 0;
 }
 
 /*
@@ -2081,7 +2099,10 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task,
 
 	find_seams(ctx, s_image, d_image);
 
-	calc_tile_dimensions(ctx, s_image);
+	ret = calc_tile_dimensions(ctx, s_image);
+	if (ret)
+		goto out_free;
+
 	ret = calc_tile_offsets(ctx, s_image);
 	if (ret)
 		goto out_free;
-- 
2.20.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 6/7] gpu: ipu-v3: image-convert: move tile burst alignment out of loop
  2019-08-14 11:54 [PATCH 1/7] gpu: ipu-v3: image-convert: fix output seam valid interval Philipp Zabel
                   ` (3 preceding siblings ...)
  2019-08-14 11:54 ` [PATCH 5/7] gpu: ipu-v3: image-convert: bail on invalid tile sizes Philipp Zabel
@ 2019-08-14 11:54 ` Philipp Zabel
  2019-08-14 11:54 ` [PATCH 7/7] gpu: ipu-v3: image-convert: only sample into the next tile if necessary Philipp Zabel
  5 siblings, 0 replies; 8+ messages in thread
From: Philipp Zabel @ 2019-08-14 11:54 UTC (permalink / raw)
  To: dri-devel; +Cc: kernel, Steve Longerbeam

Burst aligned input and output width can be calculated once per column,
instead of repeatedly for each tile in the column. The same goes for
input and output height per row. Also don't round up the same values
repeatedly.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 84 ++++++++++++++------------
 1 file changed, 45 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c
index ace831c54ed7..cc237c1f32f0 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -1119,6 +1119,7 @@ static void calc_tile_resize_coefficients(struct ipu_image_convert_ctx *ctx)
 			       !(ctx->rot_mode & IPU_ROT_BIT_HFLIP);
 		u32 resized_width;
 		u32 resize_coeff_h;
+		u32 in_width;
 
 		tile_idx = col;
 		in_tile = &ctx->in.tile[tile_idx];
@@ -1136,33 +1137,35 @@ static void calc_tile_resize_coefficients(struct ipu_image_convert_ctx *ctx)
 		dev_dbg(priv->ipu->dev, "%s: column %u hscale: *8192/%u\n",
 			__func__, col, resize_coeff_h);
 
+		/*
+		 * With the horizontal scaling factor known, round up resized
+		 * width (output width or height) to burst size.
+		 */
+		resized_width = round_up(resized_width, 8);
+
+		/*
+		 * Calculate input width from the last accessed input pixel
+		 * given resized width and scaling coefficients. Round up to
+		 * burst size.
+		 */
+		last_output = resized_width - 1;
+		if (closest)
+			last_output++;
+		in_width = round_up(
+			(DIV_ROUND_UP(last_output * resize_coeff_h, 8192) + 1)
+			<< ctx->downsize_coeff_h, 8);
 
 		for (row = 0; row < ctx->in.num_rows; row++) {
 			tile_idx = row * ctx->in.num_cols + col;
 			in_tile = &ctx->in.tile[tile_idx];
 			out_tile = &ctx->out.tile[ctx->out_tile_map[tile_idx]];
 
-			/*
-			 * With the horizontal scaling factor known, round up
-			 * resized width (output width or height) to burst size.
-			 */
 			if (ipu_rot_mode_is_irt(ctx->rot_mode))
-				out_tile->height = round_up(resized_width, 8);
+				out_tile->height = resized_width;
 			else
-				out_tile->width = round_up(resized_width, 8);
-
-			/*
-			 * Calculate input width from the last accessed input
-			 * pixel given resized width and scaling coefficients.
-			 * Round up to burst size.
-			 */
-			last_output = round_up(resized_width, 8) - 1;
-			if (closest)
-				last_output++;
-			in_tile->width = round_up(
-				(DIV_ROUND_UP(last_output * resize_coeff_h,
-					      8192) + 1)
-				<< ctx->downsize_coeff_h, 8);
+				out_tile->width = resized_width;
+
+			in_tile->width = in_width;
 		}
 
 		ctx->resize_coeffs_h[col] = resize_coeff_h;
@@ -1173,6 +1176,7 @@ static void calc_tile_resize_coefficients(struct ipu_image_convert_ctx *ctx)
 			       !(ctx->rot_mode & IPU_ROT_BIT_VFLIP);
 		u32 resized_height;
 		u32 resize_coeff_v;
+		u32 in_height;
 
 		tile_idx = row * ctx->in.num_cols;
 		in_tile = &ctx->in.tile[tile_idx];
@@ -1190,33 +1194,35 @@ static void calc_tile_resize_coefficients(struct ipu_image_convert_ctx *ctx)
 		dev_dbg(priv->ipu->dev, "%s: row %u vscale: *8192/%u\n",
 			__func__, row, resize_coeff_v);
 
+		/*
+		 * With the vertical scaling factor known, round up resized
+		 * height (output width or height) to IDMAC limitations.
+		 */
+		resized_height = round_up(resized_height, 2);
+
+		/*
+		 * Calculate input width from the last accessed input pixel
+		 * given resized height and scaling coefficients. Align to
+		 * IDMAC restrictions.
+		 */
+		last_output = resized_height - 1;
+		if (closest)
+			last_output++;
+		in_height = round_up(
+			(DIV_ROUND_UP(last_output * resize_coeff_v, 8192) + 1)
+			<< ctx->downsize_coeff_v, 2);
+
 		for (col = 0; col < ctx->in.num_cols; col++) {
 			tile_idx = row * ctx->in.num_cols + col;
 			in_tile = &ctx->in.tile[tile_idx];
 			out_tile = &ctx->out.tile[ctx->out_tile_map[tile_idx]];
 
-			/*
-			 * With the vertical scaling factor known, round up
-			 * resized height (output width or height) to IDMAC
-			 * limitations.
-			 */
 			if (ipu_rot_mode_is_irt(ctx->rot_mode))
-				out_tile->width = round_up(resized_height, 2);
+				out_tile->width = resized_height;
 			else
-				out_tile->height = round_up(resized_height, 2);
-
-			/*
-			 * Calculate input width from the last accessed input
-			 * pixel given resized height and scaling coefficients.
-			 * Align to IDMAC restrictions.
-			 */
-			last_output = round_up(resized_height, 2) - 1;
-			if (closest)
-				last_output++;
-			in_tile->height = round_up(
-				(DIV_ROUND_UP(last_output * resize_coeff_v,
-					      8192) + 1)
-				<< ctx->downsize_coeff_v, 2);
+				out_tile->height = resized_height;
+
+			in_tile->height = in_height;
 		}
 
 		ctx->resize_coeffs_v[row] = resize_coeff_v;
-- 
2.20.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 7/7] gpu: ipu-v3: image-convert: only sample into the next tile if necessary
  2019-08-14 11:54 [PATCH 1/7] gpu: ipu-v3: image-convert: fix output seam valid interval Philipp Zabel
                   ` (4 preceding siblings ...)
  2019-08-14 11:54 ` [PATCH 6/7] gpu: ipu-v3: image-convert: move tile burst alignment out of loop Philipp Zabel
@ 2019-08-14 11:54 ` Philipp Zabel
  5 siblings, 0 replies; 8+ messages in thread
From: Philipp Zabel @ 2019-08-14 11:54 UTC (permalink / raw)
  To: dri-devel; +Cc: kernel, Steve Longerbeam

The first pixel of the next tile is only sampled by the hardware if the
fractional input position corresponding to the last written output pixel
is not an integer position.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c
index cc237c1f32f0..06d658c8ca3b 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -1149,7 +1149,7 @@ static void calc_tile_resize_coefficients(struct ipu_image_convert_ctx *ctx)
 		 * burst size.
 		 */
 		last_output = resized_width - 1;
-		if (closest)
+		if (closest && ((last_output * resize_coeff_h) % 8192))
 			last_output++;
 		in_width = round_up(
 			(DIV_ROUND_UP(last_output * resize_coeff_h, 8192) + 1)
@@ -1206,7 +1206,7 @@ static void calc_tile_resize_coefficients(struct ipu_image_convert_ctx *ctx)
 		 * IDMAC restrictions.
 		 */
 		last_output = resized_height - 1;
-		if (closest)
+		if (closest && ((last_output * resize_coeff_v) % 8192))
 			last_output++;
 		in_height = round_up(
 			(DIV_ROUND_UP(last_output * resize_coeff_v, 8192) + 1)
-- 
2.20.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/7] gpu: ipu-v3: image-convert: move output seam valid interval calculation into find_best_seam
  2019-08-14 11:54 ` [PATCH 2/7] gpu: ipu-v3: image-convert: move output seam valid interval calculation into find_best_seam Philipp Zabel
@ 2019-08-14 15:09   ` Philipp Zabel
  0 siblings, 0 replies; 8+ messages in thread
From: Philipp Zabel @ 2019-08-14 15:09 UTC (permalink / raw)
  To: dri-devel; +Cc: kernel, Steve Longerbeam

On Wed, 2019-08-14 at 13:54 +0200, Philipp Zabel wrote:
> This reduces code duplication and allows to easily calculate the valid
> interval for the input seam position in the same place.
> 
> Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
> ---
>  drivers/gpu/ipu-v3/ipu-image-convert.c | 30 +++++++++++---------------
>  1 file changed, 12 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c
> index c9909f1c9ffb..4ac7377cfe5c 100644
> --- a/drivers/gpu/ipu-v3/ipu-image-convert.c
> +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
[...]
> @@ -722,12 +726,7 @@ static void find_seams(struct ipu_image_convert_ctx *ctx,
>  		 * horizontally.
>  		 */
>  
> -		/* Start within 1024 pixels of the right edge */
> -		out_start = max_t(int, col * out_left_align, out_right - 1024);
> -		/* End before having to add more columns to the left */
> -		out_end = min_t(unsigned int, out_right, col * 1024 + 1);
> -
> -		find_best_seam(ctx, out_start, out_end,
> +		find_best_seam(ctx, col,
>  			       in_right, out_right,
>  			       in_left_align, out_left_align,
>  			       allow_in_overshoot ? 1 : 8 /* burst length */,
> @@ -767,12 +766,7 @@ static void find_seams(struct ipu_image_convert_ctx *ctx,
>  		unsigned int in_top;
>  		unsigned int out_top;
>  
> -		/* Start within 1024 lines of the bottom edge */
> -		out_start = max_t(int, row * out_top_align, out_bottom - 1024);
> -		/* End before having to add more rows above */
> -		out_end = min_t(unsigned int, out_bottom, row * 1024 + 1);
> -
> -		find_best_seam(ctx, out_start, out_end,
> +		find_best_seam(ctx, row,
>  			       in_bottom, out_bottom,
>  			       in_top_align, out_top_align,
>  			       1, allow_overshoot ? 1 : out_height_align,

----------8<----------
From ccb75184975c6418a368762c2466441600f94a09 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Wed, 14 Aug 2019 17:05:59 +0200
Subject: [PATCH] fixup! gpu: ipu-v3: image-convert: move output seam valid
 interval calculation into find_best_seam

---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c
index f499509d72f2..eeca50d9a1ee 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -753,8 +753,6 @@ static void find_seams(struct ipu_image_convert_ctx *ctx,
 					  !(ctx->rot_mode & IPU_ROT_BIT_HFLIP);
 		bool allow_out_overshoot = (col < in->num_cols - 1) &&
 					   !(ctx->rot_mode & IPU_ROT_BIT_HFLIP);
-		unsigned int out_start;
-		unsigned int out_end;
 		unsigned int in_left;
 		unsigned int out_left;
 
@@ -798,8 +796,6 @@ static void find_seams(struct ipu_image_convert_ctx *ctx,
 
 	for (row = in->num_rows - 1; row > 0; row--) {
 		bool allow_overshoot = row < in->num_rows - 1;
-		unsigned int out_start;
-		unsigned int out_end;
 		unsigned int in_top;
 		unsigned int out_top;
 
-- 
2.11.0
---------->8----------
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2019-08-14 15:09 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-14 11:54 [PATCH 1/7] gpu: ipu-v3: image-convert: fix output seam valid interval Philipp Zabel
2019-08-14 11:54 ` [PATCH 2/7] gpu: ipu-v3: image-convert: move output seam valid interval calculation into find_best_seam Philipp Zabel
2019-08-14 15:09   ` Philipp Zabel
2019-08-14 11:54 ` [PATCH 3/7] gpu: ipu-v3: image-convert: limit input seam position to hardware requirements Philipp Zabel
2019-08-14 11:54 ` [PATCH 4/7] gpu: ipu-v3: image-convert: fix image downsize coefficients and tiling calculation Philipp Zabel
2019-08-14 11:54 ` [PATCH 5/7] gpu: ipu-v3: image-convert: bail on invalid tile sizes Philipp Zabel
2019-08-14 11:54 ` [PATCH 6/7] gpu: ipu-v3: image-convert: move tile burst alignment out of loop Philipp Zabel
2019-08-14 11:54 ` [PATCH 7/7] gpu: ipu-v3: image-convert: only sample into the next tile if necessary Philipp Zabel

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.