All of lore.kernel.org
 help / color / mirror / Atom feed
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
To: Ville Syrjala <ville.syrjala@linux.intel.com>,
	igt-dev@lists.freedesktop.org
Subject: Re: [igt-dev] [PATCH i-g-t 4/8] lib: Use igt_matrix for ycbcr<->rgb conversion
Date: Wed, 6 Jun 2018 12:11:41 +0200	[thread overview]
Message-ID: <c1baab10-5634-7703-ca91-f6ddce79f8ae@linux.intel.com> (raw)
In-Reply-To: <20180523183110.1975-4-ville.syrjala@linux.intel.com>

Op 23-05-18 om 20:31 schreef Ville Syrjala:
> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
>
> Probably horribly inefficient (not that the original code tried to be
> particularly efficient), but at least this is now pretty generic so
> it'll be super easy to add other color encodings and whatnot.
Hey,

Conversion routines taken out (0s converting):
real    0m22,732s

Unpatched kms_nv12 run:
real    0m27,044s (4.3s converting)

Patched kms_nv12 run:
real    0m37,778s (15s converting)

It's not just making things worse, it's making it 3.5x slower. :/

> v2: Rebase
> v3: Deal with the new color_encoding/range enums
> v4: Fix the code to actually work, and do things in 2x2 blocks
>     Keep the chroma siting comment and pimp it up a bit
>
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
>  lib/igt_fb.c | 228 +++++++++++++++++++++++++++++++++++++++++------------------
>  1 file changed, 159 insertions(+), 69 deletions(-)
>
> diff --git a/lib/igt_fb.c b/lib/igt_fb.c
> index 9edee74b57c3..ceaaea70f64b 100644
> --- a/lib/igt_fb.c
> +++ b/lib/igt_fb.c
> @@ -31,8 +31,10 @@
>  
>  #include "drmtest.h"
>  #include "igt_aux.h"
> +#include "igt_color_encoding.h"
>  #include "igt_fb.h"
>  #include "igt_kms.h"
> +#include "igt_matrix.h"
>  #include "igt_x86.h"
>  #include "ioctl_wrappers.h"
>  #include "intel_batchbuffer.h"
> @@ -1356,6 +1358,21 @@ static uint8_t clamprgb(float val)
>  	return clamp(val, 0.0f, 255.0f);
>  }
>  
> +static void read_rgb(struct igt_vec4 *rgb, const uint8_t *rgb24)
> +{
> +	rgb->d[0] = rgb24[2];
> +	rgb->d[1] = rgb24[1];
> +	rgb->d[2] = rgb24[0];
> +	rgb->d[3] = 1.0f;
> +}
> +
> +static void write_rgb(uint8_t *rgb24, const struct igt_vec4 *rgb)
> +{
> +	rgb24[2] = clamprgb(rgb->d[0]);
> +	rgb24[1] = clamprgb(rgb->d[1]);
> +	rgb24[0] = clamprgb(rgb->d[2]);
> +}
> +
>  static void convert_nv12_to_rgb24(struct igt_fb *fb, struct fb_convert_blit_upload *blit)
>  {
>  	int i, j;
> @@ -1363,6 +1380,8 @@ static void convert_nv12_to_rgb24(struct igt_fb *fb, struct fb_convert_blit_uplo
>  	uint8_t *rgb24 = blit->rgb24.map;
>  	unsigned rgb24_stride = blit->rgb24.stride, planar_stride = blit->linear.stride;
>  	uint8_t *buf = malloc(blit->linear.size);
> +	struct igt_mat4 m = igt_ycbcr_to_rgb_matrix(IGT_COLOR_YCBCR_BT601,
> +						    IGT_COLOR_YCBCR_LIMITED_RANGE);
>  
>  	/*
>  	 * Reading from the BO is awfully slow because of lack of read caching,
> @@ -1373,30 +1392,49 @@ static void convert_nv12_to_rgb24(struct igt_fb *fb, struct fb_convert_blit_uplo
>  	y = &buf[blit->linear.offsets[0]];
>  	uv = &buf[blit->linear.offsets[1]];
>  
> -	/* Convert from limited color range BT.601 */
>  	for (i = 0; i < fb->height / 2; i++) {
> -		for (j = 0; j < fb->width; j++) {
> -			float r_, g_, b_, y0, y1, cb, cr;
> -			/* Convert 1x2 pixel blocks */
> -
> -			y0 = 1.164f * (y[j] - 16.f);
> -			y1 = 1.164f * (y[j + planar_stride] - 16.f);
> +		for (j = 0; j < fb->width / 2; j++) {
> +			/* Convert 2x2 pixel blocks */
> +			struct igt_vec4 yuv[4];
> +			struct igt_vec4 rgb[4];
> +
> +			yuv[0].d[0] = y[j * 2 + 0];
> +			yuv[1].d[0] = y[j * 2 + 1];
> +			yuv[2].d[0] = y[j * 2 + 0 + planar_stride];
> +			yuv[3].d[0] = y[j * 2 + 1 + planar_stride];
> +
> +			yuv[0].d[1] = yuv[1].d[1] = yuv[2].d[1] = yuv[3].d[1] = uv[j * 2 + 0];
> +			yuv[0].d[2] = yuv[1].d[2] = yuv[2].d[2] = yuv[3].d[2] = uv[j * 2 + 1];
> +			yuv[0].d[3] = yuv[1].d[3] = yuv[2].d[3] = yuv[3].d[3] = 1.0f;
> +
> +			rgb[0] = igt_matrix_transform(&m, &yuv[0]);
> +			rgb[1] = igt_matrix_transform(&m, &yuv[1]);
> +			rgb[2] = igt_matrix_transform(&m, &yuv[2]);
> +			rgb[3] = igt_matrix_transform(&m, &yuv[3]);
> +
> +			write_rgb(&rgb24[j * 8 + 0], &rgb[0]);
> +			write_rgb(&rgb24[j * 8 + 4], &rgb[1]);
> +			write_rgb(&rgb24[j * 8 + 0 + rgb24_stride], &rgb[2]);
> +			write_rgb(&rgb24[j * 8 + 4 + rgb24_stride], &rgb[3]);
> +		}
>  
> -			cb = uv[j & ~1] - 128.f;
> -			cr = uv[j | 1] - 128.f;
> +		if (fb->width & 1) {
> +			/* Convert 1x2 pixel block */
> +			struct igt_vec4 yuv[2];
> +			struct igt_vec4 rgb[2];
>  
> -			r_ =  0.000f * cb +  1.596f * cr;
> -			g_ = -0.392f * cb + -0.813f * cr;
> -			b_ =  2.017f * cb +  0.000f * cr;
> +			yuv[0].d[0] = y[j * 2 + 0];
> +			yuv[1].d[0] = y[j * 2 + 0 + planar_stride];
>  
> -			rgb24[j * 4 + 2] = clamprgb(y0 + r_);
> -			rgb24[j * 4 + 2 + rgb24_stride] = clamprgb(y1 + r_);
> +			yuv[0].d[1] = yuv[1].d[1] = uv[j * 2 + 0];
> +			yuv[0].d[2] = yuv[1].d[2] = uv[j * 2 + 1];
> +			yuv[0].d[3] = yuv[1].d[3] = 1.0f;
>  
> -			rgb24[j * 4 + 1] = clamprgb(y0 + g_);
> -			rgb24[j * 4 + 1 + rgb24_stride] = clamprgb(y1 + g_);
> +			rgb[0] = igt_matrix_transform(&m, &yuv[0]);
> +			rgb[1] = igt_matrix_transform(&m, &yuv[1]);
>  
> -			rgb24[j * 4] = clamprgb(y0 + b_);
> -			rgb24[j * 4 + rgb24_stride] = clamprgb(y1 + b_);
> +			write_rgb(&rgb24[j * 8 + 0], &rgb[0]);
> +			write_rgb(&rgb24[j * 8 + 0 + rgb24_stride], &rgb[1]);
>  		}
>  
>  		rgb24 += 2 * rgb24_stride;
> @@ -1406,21 +1444,37 @@ static void convert_nv12_to_rgb24(struct igt_fb *fb, struct fb_convert_blit_uplo
>  
>  	if (fb->height & 1) {
>  		/* Convert last row */
> -		for (j = 0; j < fb->width; j++) {
> -			float r_, g_, b_, y0, cb, cr;
> +		for (j = 0; j < fb->width / 2; j++) {
> +			/* Convert 2x1 pixel blocks */
> +			struct igt_vec4 yuv[2];
> +			struct igt_vec4 rgb[2];
> +
> +			yuv[0].d[0] = y[j * 2 + 0];
> +			yuv[1].d[0] = y[j * 2 + 1];
> +			yuv[0].d[1] = yuv[1].d[1] = uv[j * 2 + 0];
> +			yuv[0].d[2] = yuv[1].d[2] = uv[j * 2 + 1];
> +			yuv[0].d[3] = yuv[1].d[3] = 1.0f;
> +
> +			rgb[0] = igt_matrix_transform(&m, &yuv[0]);
> +			rgb[1] = igt_matrix_transform(&m, &yuv[1]);
> +
> +			write_rgb(&rgb24[j * 8 + 0], &rgb[0]);
> +			write_rgb(&rgb24[j * 8 + 4], &rgb[0]);
> +		}
> +
> +		if (fb->width & 1) {
>  			/* Convert single pixel */
> +			struct igt_vec4 yuv;
> +			struct igt_vec4 rgb;
>  
> -			cb = uv[j & ~1] - 128.f;
> -			cr = uv[j | 1] - 128.f;
> +			yuv.d[0] = y[j * 2 + 0];
> +			yuv.d[1] = uv[j * 2 + 0];
> +			yuv.d[2] = uv[j * 2 + 1];
> +			yuv.d[3] = 1.0f;
>  
> -			y0 = 1.164f * (y[j] - 16.f);
> -			r_ =  0.000f * cb +  1.596f * cr;
> -			g_ = -0.392f * cb + -0.813f * cr;
> -			b_ =  2.017f * cb +  0.000f * cr;
> +			rgb = igt_matrix_transform(&m, &yuv);
>  
> -			rgb24[j * 4 + 2] = clamprgb(y0 + r_);
> -			rgb24[j * 4 + 1] = clamprgb(y0 + g_);
> -			rgb24[j * 4] = clamprgb(y0 + b_);
> +			write_rgb(&rgb24[j * 8 + 0], &rgb);
>  		}
>  	}
>  
> @@ -1435,65 +1489,101 @@ static void convert_rgb24_to_nv12(struct igt_fb *fb, struct fb_convert_blit_uplo
>  	const uint8_t *rgb24 = blit->rgb24.map;
>  	unsigned rgb24_stride = blit->rgb24.stride;
>  	unsigned planar_stride = blit->linear.stride;
> +	struct igt_mat4 m = igt_rgb_to_ycbcr_matrix(IGT_COLOR_YCBCR_BT601,
> +						    IGT_COLOR_YCBCR_LIMITED_RANGE);
>  
>  	igt_assert_f(fb->drm_format == DRM_FORMAT_NV12,
>  		     "Conversion not implemented for !NV12 planar formats\n");
>  
> -	for (i = 0; i < fb->plane_height[0]; i++) {
> -		/* Use limited color range BT.601 */
> -
> -		for (j = 0; j < fb->plane_width[0]; j++) {
> -			float yf = 0.257f * rgb24[j * 4 + 2] +
> -				   0.504f * rgb24[j * 4 + 1] +
> -				   0.098f * rgb24[j * 4] + 16;
> +	for (i = 0; i < fb->height / 2; i++) {
> +		for (j = 0; j < fb->width / 2; j++) {
> +			/* Convert 2x2 pixel blocks */
> +			struct igt_vec4 rgb[4];
> +			struct igt_vec4 yuv[4];
> +
> +			read_rgb(&rgb[0], &rgb24[j * 8 + 0]);
> +			read_rgb(&rgb[1], &rgb24[j * 8 + 4]);
> +			read_rgb(&rgb[2], &rgb24[j * 8 + 0 + rgb24_stride]);
> +			read_rgb(&rgb[3], &rgb24[j * 8 + 4 + rgb24_stride]);
> +
> +			yuv[0] = igt_matrix_transform(&m, &rgb[0]);
> +			yuv[1] = igt_matrix_transform(&m, &rgb[1]);
> +			yuv[2] = igt_matrix_transform(&m, &rgb[2]);
> +			yuv[3] = igt_matrix_transform(&m, &rgb[3]);
> +
> +			y[j * 2 + 0] = yuv[0].d[0];
> +			y[j * 2 + 1] = yuv[1].d[0];
> +			y[j * 2 + 0 + planar_stride] = yuv[2].d[0];
> +			y[j * 2 + 1 + planar_stride] = yuv[3].d[0];
>  
> -			y[j] = (uint8_t)yf;
> +			/*
> +			 * We assume the MPEG2 chroma siting convention, where
> +			 * pixel center for Cb'Cr' is between the left top and
> +			 * bottom pixel in a 2x2 block, so take the average.
> +			 */
> +			uv[j * 2 + 0] = (yuv[0].d[1] + yuv[2].d[1]) / 2.0f;
> +			uv[j * 2 + 1] = (yuv[0].d[2] + yuv[2].d[2]) / 2.0f;
>  		}
>  
> -		rgb24 += rgb24_stride;
> -		y += planar_stride;
> -	}
> +		if (fb->width & 1) {
> +			/* Convert 1x2 pixel block */
> +			struct igt_vec4 rgb[2];
> +			struct igt_vec4 yuv[2];
>  
> -	rgb24 = blit->rgb24.map;
> +			read_rgb(&rgb[0], &rgb24[j * 8 + 0]);
> +			read_rgb(&rgb[2], &rgb24[j * 8 + 0 + rgb24_stride]);
> +
> +			yuv[0] = igt_matrix_transform(&m, &rgb[0]);
> +			yuv[1] = igt_matrix_transform(&m, &rgb[1]);
> +
> +			y[j * 2 + 0] = yuv[0].d[0];
> +			y[j * 2 + 0 + planar_stride] = yuv[1].d[0];
>  
> -	for (i = 0; i < fb->height / 2; i++) {
> -		for (j = 0; j < fb->plane_width[1]; j++) {
>  			/*
> -			 * Pixel center for Cb'Cr' is between the left top and
> +			 * We assume the MPEG2 chroma siting convention, where
> +			 * pixel center for Cb'Cr' is between the left top and
>  			 * bottom pixel in a 2x2 block, so take the average.
>  			 */
> -			float uf = -0.148f/2 * rgb24[j * 8 + 2] +
> -				   -0.148f/2 * rgb24[j * 8 + 2 + rgb24_stride] +
> -				   -0.291f/2 * rgb24[j * 8 + 1] +
> -				   -0.291f/2 * rgb24[j * 8 + 1 + rgb24_stride] +
> -				    0.439f/2 * rgb24[j * 8] +
> -				    0.439f/2 * rgb24[j * 8 + rgb24_stride] + 128;
> -			float vf =  0.439f/2 * rgb24[j * 8 + 2] +
> -				    0.439f/2 * rgb24[j * 8 + 2 + rgb24_stride] +
> -				   -0.368f/2 * rgb24[j * 8 + 1] +
> -				   -0.368f/2 * rgb24[j * 8 + 1 + rgb24_stride] +
> -				   -0.071f/2 * rgb24[j * 8] +
> -				   -0.071f/2 * rgb24[j * 8 + rgb24_stride] + 128;
> -			uv[j * 2] = (uint8_t)uf;
> -			uv[j * 2 + 1] = (uint8_t)vf;
> +			uv[j * 2 + 0] = (yuv[0].d[1] + yuv[1].d[1]) / 2.0f;
> +			uv[j * 2 + 1] = (yuv[0].d[2] + yuv[1].d[2]) / 2.0f;
>  		}
>  
>  		rgb24 += 2 * rgb24_stride;
> +		y += 2 * planar_stride;
>  		uv += planar_stride;
>  	}
>  
>  	/* Last row cannot be interpolated between 2 pixels, take the single value */
> -	if (i < fb->plane_height[1]) {
> -		for (j = 0; j < fb->plane_width[1]; j++) {
> -			float uf = -0.148f * rgb24[j * 8 + 2] +
> -				   -0.291f * rgb24[j * 8 + 1] +
> -				    0.439f * rgb24[j * 8] + 128;
> -			float vf =  0.439f * rgb24[j * 8 + 2] +
> -				   -0.368f * rgb24[j * 8 + 1] +
> -				   -0.071f * rgb24[j * 8] + 128;
> -
> -			uv[j * 2] = (uint8_t)uf;
> -			uv[j * 2 + 1] = (uint8_t)vf;
> +	if (fb->height & 1) {
> +		for (j = 0; j < fb->width / 2; j++) {
> +			/* Convert 2x1 pixel blocks */
> +			struct igt_vec4 rgb[2];
> +			struct igt_vec4 yuv[2];
> +
> +			read_rgb(&rgb[0], &rgb24[j * 8 + 0]);
> +			read_rgb(&rgb[1], &rgb24[j * 8 + 4]);
> +
> +			yuv[0] = igt_matrix_transform(&m, &rgb[0]);
> +			yuv[1] = igt_matrix_transform(&m, &rgb[1]);
> +
> +			y[j * 2 + 0] = yuv[0].d[0];
> +			y[j * 2 + 1] = yuv[1].d[0];
> +			uv[j * 2 + 0] = yuv[0].d[1];
> +			uv[j * 2 + 1] = yuv[0].d[2];
> +		}
> +
> +		if (fb->width & 1) {
> +			/* Convert single pixel */
> +			struct igt_vec4 rgb;
> +			struct igt_vec4 yuv;
> +
> +			read_rgb(&rgb, &rgb24[j * 8 + 0]);
> +
> +			yuv = igt_matrix_transform(&m, &rgb);
> +
> +			y[j * 2 + 0] = yuv.d[0];
> +			uv[j * 2 + 0] = yuv.d[1];
> +			uv[j * 2 + 1] = yuv.d[2];
>  		}
>  	}
>  }

We could get rid of the height & 1 and width & 1 checks, we only allow an alignment with a multiple of 4 in i915, and I think restricting it to 2x2 in igt won't lose us much...

One thing we should definitely do is making igt_matrix_transform inline, and enable vectorization somehow. The current code is horribly unoptimized. :(

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

  reply	other threads:[~2018-06-06 10:11 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-23 18:31 [igt-dev] [PATCH i-g-t 1/8] lib: Clear packed YUV formats to black Ville Syrjala
2018-05-23 18:31 ` [igt-dev] [PATCH i-g-t 2/8] lib: Don't use dumb buffers for YCbCr Ville Syrjala
2018-06-05  9:51   ` Maarten Lankhorst
2018-05-23 18:31 ` [igt-dev] [PATCH i-g-t 3/8] lib: Clean up format_desc Ville Syrjala
2018-05-23 18:31 ` [igt-dev] [PATCH i-g-t 4/8] lib: Use igt_matrix for ycbcr<->rgb conversion Ville Syrjala
2018-06-06 10:11   ` Maarten Lankhorst [this message]
2018-05-23 18:31 ` [igt-dev] [PATCH i-g-t 5/8] lib: Add support for rendering into packed YCbCr framebuffers Ville Syrjala
2018-05-23 18:31 ` [igt-dev] [PATCH i-g-t 6/8] lib/fb: Add color_encoding/color_range to igt_fb Ville Syrjala
2018-05-23 18:31 ` [igt-dev] [PATCH i-g-t 7/8] lib/kms: Respect fb color_encoding/color_range Ville Syrjala
2018-05-23 18:31 ` [igt-dev] [PATCH i-g-t 8/8] tests/kms_plane: crc check plane pixel formats Ville Syrjala
2018-05-24 10:48   ` [igt-dev] [PATCH i-g-t v2 " Ville Syrjala
2018-06-07 15:18     ` Maarten Lankhorst
2018-05-23 19:09 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/8] lib: Clear packed YUV formats to black Patchwork
2018-05-23 23:27 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork
2018-05-24 17:15 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/8] lib: Clear packed YUV formats to black (rev2) Patchwork
2018-05-24 23:05 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
2018-06-05  9:42 ` [igt-dev] [PATCH i-g-t 1/8] lib: Clear packed YUV formats to black Maarten Lankhorst
2018-06-05 10:13   ` Ville Syrjälä
2018-06-05 10:33     ` Maarten Lankhorst

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c1baab10-5634-7703-ca91-f6ddce79f8ae@linux.intel.com \
    --to=maarten.lankhorst@linux.intel.com \
    --cc=igt-dev@lists.freedesktop.org \
    --cc=ville.syrjala@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.