All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/udl: optimize udl_compress_hline16
@ 2015-01-28 18:15 Haixia Shi
  2015-01-28 18:15 ` [PATCH 2/2] drm/udl: fix excessive prefetch_range Haixia Shi
  2015-01-28 21:12 ` [PATCH 1/2] drm/udl: optimize udl_compress_hline16 Chris Wilson
  0 siblings, 2 replies; 13+ messages in thread
From: Haixia Shi @ 2015-01-28 18:15 UTC (permalink / raw)
  To: dri-devel; +Cc: Haixia Shi

The run-length encoding algorithm should compare 16-bit encoded pixel
values instead of comparing raw pixel values. It allows pixels
with similar but different colors to be encoded as repeat pixels, and
thus potentially save USB bandwidth.

Signed-off-by: Haixia Shi <hshi@chromium.org>
Reviewed-by: Daniel Kurtz <djkurtz@chromium.org>
Tested-by: Haixia Shi <hshi@chromium.org>
---
 drivers/gpu/drm/udl/udl_transfer.c | 41 +++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/udl/udl_transfer.c b/drivers/gpu/drm/udl/udl_transfer.c
index f343db7..eadddf9 100644
--- a/drivers/gpu/drm/udl/udl_transfer.c
+++ b/drivers/gpu/drm/udl/udl_transfer.c
@@ -82,12 +82,14 @@ static inline u16 pixel32_to_be16(const uint32_t pixel)
 		((pixel >> 8) & 0xf800));
 }
 
-static bool pixel_repeats(const void *pixel, const uint32_t repeat, int bpp)
+static inline u16 get_pixel_val16(const uint8_t *pixel, int bpp)
 {
+	u16 pixel_val16 = 0;
 	if (bpp == 2)
-		return *(const uint16_t *)pixel == repeat;
-	else
-		return *(const uint32_t *)pixel == repeat;
+		pixel_val16 = *(uint16_t *)pixel;
+	else if (bpp == 4)
+		pixel_val16 = pixel32_to_be16p(pixel);
+	return pixel_val16;
 }
 
 /*
@@ -134,6 +136,7 @@ static void udl_compress_hline16(
 		uint8_t *cmd_pixels_count_byte = NULL;
 		const u8 *raw_pixel_start = NULL;
 		const u8 *cmd_pixel_start, *cmd_pixel_end = NULL;
+		uint16_t pixel_val16;
 
 		prefetchw((void *) cmd); /* pull in one cache line at least */
 
@@ -154,33 +157,29 @@ static void udl_compress_hline16(
 			    (int)(cmd_buffer_end - cmd) / 2))) * bpp;
 
 		prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp);
+		pixel_val16 = get_pixel_val16(pixel, bpp);
 
 		while (pixel < cmd_pixel_end) {
-			const u8 *const start = pixel;
-			u32 repeating_pixel;
-
-			if (bpp == 2) {
-				repeating_pixel = *(uint16_t *)pixel;
-				*(uint16_t *)cmd = cpu_to_be16(repeating_pixel);
-			} else {
-				repeating_pixel = *(uint32_t *)pixel;
-				*(uint16_t *)cmd = cpu_to_be16(pixel32_to_be16(repeating_pixel));
-			}
+			const u8 * const repeating_pixel = pixel;
+			const uint16_t repeating_pixel_val16 = pixel_val16;
+
+			*(uint16_t *)cmd = cpu_to_be16(pixel_val16);
 
 			cmd += 2;
 			pixel += bpp;
 
-			if (unlikely((pixel < cmd_pixel_end) &&
-				     (pixel_repeats(pixel, repeating_pixel, bpp)))) {
+			while (pixel < cmd_pixel_end) {
+				pixel_val16 = get_pixel_val16(pixel, bpp);
+				if (pixel_val16 != repeating_pixel_val16)
+					break;
+				pixel += bpp;
+			}
+
+			if (unlikely(pixel > repeating_pixel + bpp)) {
 				/* go back and fill in raw pixel count */
 				*raw_pixels_count_byte = (((start -
 						raw_pixel_start) / bpp) + 1) & 0xFF;
 
-				while ((pixel < cmd_pixel_end) &&
-				       (pixel_repeats(pixel, repeating_pixel, bpp))) {
-					pixel += bpp;
-				}
-
 				/* immediately after raw data is repeat byte */
 				*cmd++ = (((pixel - start) / bpp) - 1) & 0xFF;
 
-- 
2.2.0.rc0.207.ga3a616c

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/2] drm/udl: fix excessive prefetch_range
  2015-01-28 18:15 [PATCH 1/2] drm/udl: optimize udl_compress_hline16 Haixia Shi
@ 2015-01-28 18:15 ` Haixia Shi
  2015-01-28 20:56   ` Chris Wilson
  2015-01-28 21:12 ` [PATCH 1/2] drm/udl: optimize udl_compress_hline16 Chris Wilson
  1 sibling, 1 reply; 13+ messages in thread
From: Haixia Shi @ 2015-01-28 18:15 UTC (permalink / raw)
  To: dri-devel; +Cc: Haixia Shi

The prefetch_range amount is already in number of bytes. Multiplying again by
bpp is unnecessary.

Signed-off-by: Haixia Shi <hshi@chromium.org>
Reviewed-by: Daniel Kurtz <djkurtz@chromium.org>
Tested-by: Haixia Shi <hshi@chromium.org>
---
 drivers/gpu/drm/udl/udl_transfer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/udl/udl_transfer.c b/drivers/gpu/drm/udl/udl_transfer.c
index eadddf9..91e4ae2 100644
--- a/drivers/gpu/drm/udl/udl_transfer.c
+++ b/drivers/gpu/drm/udl/udl_transfer.c
@@ -156,7 +156,7 @@ static void udl_compress_hline16(
 			min((int)(pixel_end - pixel) / bpp,
 			    (int)(cmd_buffer_end - cmd) / 2))) * bpp;
 
-		prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp);
+		prefetch_range((void *) pixel, cmd_pixel_end - pixel);
 		pixel_val16 = get_pixel_val16(pixel, bpp);
 
 		while (pixel < cmd_pixel_end) {
-- 
2.2.0.rc0.207.ga3a616c

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/2] drm/udl: fix excessive prefetch_range
  2015-01-28 18:15 ` [PATCH 2/2] drm/udl: fix excessive prefetch_range Haixia Shi
@ 2015-01-28 20:56   ` Chris Wilson
  0 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2015-01-28 20:56 UTC (permalink / raw)
  To: Haixia Shi; +Cc: dri-devel

On Wed, Jan 28, 2015 at 10:15:30AM -0800, Haixia Shi wrote:
> The prefetch_range amount is already in number of bytes. Multiplying again by
> bpp is unnecessary.
> 
> Signed-off-by: Haixia Shi <hshi@chromium.org>
> Reviewed-by: Daniel Kurtz <djkurtz@chromium.org>
> Tested-by: Haixia Shi <hshi@chromium.org>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] drm/udl: optimize udl_compress_hline16
  2015-01-28 18:15 [PATCH 1/2] drm/udl: optimize udl_compress_hline16 Haixia Shi
  2015-01-28 18:15 ` [PATCH 2/2] drm/udl: fix excessive prefetch_range Haixia Shi
@ 2015-01-28 21:12 ` Chris Wilson
  2015-01-28 21:41   ` Haixia Shi
  2015-01-28 21:50   ` [PATCH 1/2] drm/udl: optimize udl_compress_hline16 Haixia Shi
  1 sibling, 2 replies; 13+ messages in thread
From: Chris Wilson @ 2015-01-28 21:12 UTC (permalink / raw)
  To: Haixia Shi; +Cc: dri-devel

On Wed, Jan 28, 2015 at 10:15:29AM -0800, Haixia Shi wrote:
> The run-length encoding algorithm should compare 16-bit encoded pixel
> values instead of comparing raw pixel values. It allows pixels
> with similar but different colors to be encoded as repeat pixels, and
> thus potentially save USB bandwidth.
> 
> Signed-off-by: Haixia Shi <hshi@chromium.org>
> Reviewed-by: Daniel Kurtz <djkurtz@chromium.org>
> Tested-by: Haixia Shi <hshi@chromium.org>

This is not based on upstream code, similar but it won't apply.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1/2] drm/udl: optimize udl_compress_hline16
  2015-01-28 21:12 ` [PATCH 1/2] drm/udl: optimize udl_compress_hline16 Chris Wilson
@ 2015-01-28 21:41   ` Haixia Shi
  2015-01-28 21:41     ` [PATCH 2/2] drm/udl: fix excessive prefetch_range Haixia Shi
  2015-01-30  3:45     ` [PATCH 1/2] drm/udl: optimize udl_compress_hline16 Dave Airlie
  2015-01-28 21:50   ` [PATCH 1/2] drm/udl: optimize udl_compress_hline16 Haixia Shi
  1 sibling, 2 replies; 13+ messages in thread
From: Haixia Shi @ 2015-01-28 21:41 UTC (permalink / raw)
  To: dri-devel; +Cc: Haixia Shi

The run-length encoding algorithm should compare 16-bit encoded pixel
values instead of comparing raw pixel values. It allows pixels
with similar but different colors to be encoded as repeat pixels, and
thus potentially save USB bandwidth.

Signed-off-by: Haixia Shi <hshi@chromium.org>
Reviewed-by: Daniel Kurtz <djkurtz@chromium.org>
Tested-by: Haixia Shi <hshi@chromium.org>
---
 drivers/gpu/drm/udl/udl_transfer.c | 41 +++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/udl/udl_transfer.c b/drivers/gpu/drm/udl/udl_transfer.c
index f343db7..eadddf9 100644
--- a/drivers/gpu/drm/udl/udl_transfer.c
+++ b/drivers/gpu/drm/udl/udl_transfer.c
@@ -82,12 +82,14 @@ static inline u16 pixel32_to_be16(const uint32_t pixel)
 		((pixel >> 8) & 0xf800));
 }
 
-static bool pixel_repeats(const void *pixel, const uint32_t repeat, int bpp)
+static inline u16 get_pixel_val16(const uint8_t *pixel, int bpp)
 {
+	u16 pixel_val16 = 0;
 	if (bpp == 2)
-		return *(const uint16_t *)pixel == repeat;
-	else
-		return *(const uint32_t *)pixel == repeat;
+		pixel_val16 = *(uint16_t *)pixel;
+	else if (bpp == 4)
+		pixel_val16 = pixel32_to_be16p(pixel);
+	return pixel_val16;
 }
 
 /*
@@ -134,6 +136,7 @@ static void udl_compress_hline16(
 		uint8_t *cmd_pixels_count_byte = NULL;
 		const u8 *raw_pixel_start = NULL;
 		const u8 *cmd_pixel_start, *cmd_pixel_end = NULL;
+		uint16_t pixel_val16;
 
 		prefetchw((void *) cmd); /* pull in one cache line at least */
 
@@ -154,33 +157,29 @@ static void udl_compress_hline16(
 			    (int)(cmd_buffer_end - cmd) / 2))) * bpp;
 
 		prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp);
+		pixel_val16 = get_pixel_val16(pixel, bpp);
 
 		while (pixel < cmd_pixel_end) {
-			const u8 *const start = pixel;
-			u32 repeating_pixel;
-
-			if (bpp == 2) {
-				repeating_pixel = *(uint16_t *)pixel;
-				*(uint16_t *)cmd = cpu_to_be16(repeating_pixel);
-			} else {
-				repeating_pixel = *(uint32_t *)pixel;
-				*(uint16_t *)cmd = cpu_to_be16(pixel32_to_be16(repeating_pixel));
-			}
+			const u8 * const repeating_pixel = pixel;
+			const uint16_t repeating_pixel_val16 = pixel_val16;
+
+			*(uint16_t *)cmd = cpu_to_be16(pixel_val16);
 
 			cmd += 2;
 			pixel += bpp;
 
-			if (unlikely((pixel < cmd_pixel_end) &&
-				     (pixel_repeats(pixel, repeating_pixel, bpp)))) {
+			while (pixel < cmd_pixel_end) {
+				pixel_val16 = get_pixel_val16(pixel, bpp);
+				if (pixel_val16 != repeating_pixel_val16)
+					break;
+				pixel += bpp;
+			}
+
+			if (unlikely(pixel > repeating_pixel + bpp)) {
 				/* go back and fill in raw pixel count */
 				*raw_pixels_count_byte = (((start -
 						raw_pixel_start) / bpp) + 1) & 0xFF;
 
-				while ((pixel < cmd_pixel_end) &&
-				       (pixel_repeats(pixel, repeating_pixel, bpp))) {
-					pixel += bpp;
-				}
-
 				/* immediately after raw data is repeat byte */
 				*cmd++ = (((pixel - start) / bpp) - 1) & 0xFF;
 
-- 
2.2.0.rc0.207.ga3a616c

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/2] drm/udl: fix excessive prefetch_range
  2015-01-28 21:41   ` Haixia Shi
@ 2015-01-28 21:41     ` Haixia Shi
  2015-01-30  3:45     ` [PATCH 1/2] drm/udl: optimize udl_compress_hline16 Dave Airlie
  1 sibling, 0 replies; 13+ messages in thread
From: Haixia Shi @ 2015-01-28 21:41 UTC (permalink / raw)
  To: dri-devel; +Cc: Haixia Shi

The prefetch_range amount is already in number of bytes. Multiplying again by
bpp is unnecessary.

Signed-off-by: Haixia Shi <hshi@chromium.org>
Reviewed-by: Daniel Kurtz <djkurtz@chromium.org>
Tested-by: Haixia Shi <hshi@chromium.org>
---
 drivers/gpu/drm/udl/udl_transfer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/udl/udl_transfer.c b/drivers/gpu/drm/udl/udl_transfer.c
index eadddf9..91e4ae2 100644
--- a/drivers/gpu/drm/udl/udl_transfer.c
+++ b/drivers/gpu/drm/udl/udl_transfer.c
@@ -156,7 +156,7 @@ static void udl_compress_hline16(
 			min((int)(pixel_end - pixel) / bpp,
 			    (int)(cmd_buffer_end - cmd) / 2))) * bpp;
 
-		prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp);
+		prefetch_range((void *) pixel, cmd_pixel_end - pixel);
 		pixel_val16 = get_pixel_val16(pixel, bpp);
 
 		while (pixel < cmd_pixel_end) {
-- 
2.2.0.rc0.207.ga3a616c

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] drm/udl: optimize udl_compress_hline16
  2015-01-28 21:12 ` [PATCH 1/2] drm/udl: optimize udl_compress_hline16 Chris Wilson
  2015-01-28 21:41   ` Haixia Shi
@ 2015-01-28 21:50   ` Haixia Shi
  1 sibling, 0 replies; 13+ messages in thread
From: Haixia Shi @ 2015-01-28 21:50 UTC (permalink / raw)
  To: Chris Wilson, Haixia Shi, dri-devel

Sorry about that; I have just re-sent the patches based on upstream code.

On Wed, Jan 28, 2015 at 1:12 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Wed, Jan 28, 2015 at 10:15:29AM -0800, Haixia Shi wrote:
>> The run-length encoding algorithm should compare 16-bit encoded pixel
>> values instead of comparing raw pixel values. It allows pixels
>> with similar but different colors to be encoded as repeat pixels, and
>> thus potentially save USB bandwidth.
>>
>> Signed-off-by: Haixia Shi <hshi@chromium.org>
>> Reviewed-by: Daniel Kurtz <djkurtz@chromium.org>
>> Tested-by: Haixia Shi <hshi@chromium.org>
>
> This is not based on upstream code, similar but it won't apply.
> -Chris
>
> --
> Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] drm/udl: optimize udl_compress_hline16
  2015-01-28 21:41   ` Haixia Shi
  2015-01-28 21:41     ` [PATCH 2/2] drm/udl: fix excessive prefetch_range Haixia Shi
@ 2015-01-30  3:45     ` Dave Airlie
  2015-01-30 18:20       ` Haixia Shi
  1 sibling, 1 reply; 13+ messages in thread
From: Dave Airlie @ 2015-01-30  3:45 UTC (permalink / raw)
  To: Haixia Shi; +Cc: dri-devel

On 29 January 2015 at 07:41, Haixia Shi <hshi@chromium.org> wrote:
> The run-length encoding algorithm should compare 16-bit encoded pixel
> values instead of comparing raw pixel values. It allows pixels
> with similar but different colors to be encoded as repeat pixels, and
> thus potentially save USB bandwidth.

This fails to build here, are we missing some precursor patches?

 CC [M]  drivers/gpu/drm/udl/udl_transfer.o
/home/airlied/devel/kernel/drm-next/drivers/gpu/drm/udl/udl_transfer.c:
In function ‘get_pixel_val16’:
/home/airlied/devel/kernel/drm-next/drivers/gpu/drm/udl/udl_transfer.c:91:3:
error: implicit declaration of function ‘pixel32_to_be16p’
[-Werror=implicit-function-declaration]
   pixel_val16 = pixel32_to_be16p(pixel);
   ^
/home/airlied/devel/kernel/drm-next/drivers/gpu/drm/udl/udl_transfer.c:
In function ‘udl_compress_hline16’:
/home/airlied/devel/kernel/drm-next/drivers/gpu/drm/udl/udl_transfer.c:180:33:
error: ‘start’ undeclared (first use in this function)
     *raw_pixels_count_byte = (((start -
                                 ^
/home/airlied/devel/kernel/drm-next/drivers/gpu/drm/udl/udl_transfer.c:180:33:
note: each undeclared identifier is reported only once for each
function it appears in
cc1: some warnings being treated as errors

Dave.
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] drm/udl: optimize udl_compress_hline16
  2015-01-30  3:45     ` [PATCH 1/2] drm/udl: optimize udl_compress_hline16 Dave Airlie
@ 2015-01-30 18:20       ` Haixia Shi
  2015-01-30 18:49         ` Haixia Shi
  2015-01-30 18:51         ` [PATCH 1/2] drm/udl: optimize udl_compress_hline16 (v2) Haixia Shi
  0 siblings, 2 replies; 13+ messages in thread
From: Haixia Shi @ 2015-01-30 18:20 UTC (permalink / raw)
  To: Dave Airlie; +Cc: dri-devel

Dave

Sorry it seems that my patch was rebased on top of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

I'm going to re-send the patch on top of the drm-next branch of your
tree (git://people.freedesktop.org/~airlied/linux)

On Thu, Jan 29, 2015 at 7:45 PM, Dave Airlie <airlied@gmail.com> wrote:
> On 29 January 2015 at 07:41, Haixia Shi <hshi@chromium.org> wrote:
>> The run-length encoding algorithm should compare 16-bit encoded pixel
>> values instead of comparing raw pixel values. It allows pixels
>> with similar but different colors to be encoded as repeat pixels, and
>> thus potentially save USB bandwidth.
>
> This fails to build here, are we missing some precursor patches?
>
>  CC [M]  drivers/gpu/drm/udl/udl_transfer.o
> /home/airlied/devel/kernel/drm-next/drivers/gpu/drm/udl/udl_transfer.c:
> In function ‘get_pixel_val16’:
> /home/airlied/devel/kernel/drm-next/drivers/gpu/drm/udl/udl_transfer.c:91:3:
> error: implicit declaration of function ‘pixel32_to_be16p’
> [-Werror=implicit-function-declaration]
>    pixel_val16 = pixel32_to_be16p(pixel);
>    ^
> /home/airlied/devel/kernel/drm-next/drivers/gpu/drm/udl/udl_transfer.c:
> In function ‘udl_compress_hline16’:
> /home/airlied/devel/kernel/drm-next/drivers/gpu/drm/udl/udl_transfer.c:180:33:
> error: ‘start’ undeclared (first use in this function)
>      *raw_pixels_count_byte = (((start -
>                                  ^
> /home/airlied/devel/kernel/drm-next/drivers/gpu/drm/udl/udl_transfer.c:180:33:
> note: each undeclared identifier is reported only once for each
> function it appears in
> cc1: some warnings being treated as errors
>
> Dave.
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1/2] drm/udl: optimize udl_compress_hline16
  2015-01-30 18:20       ` Haixia Shi
@ 2015-01-30 18:49         ` Haixia Shi
  2015-01-30 18:49           ` [PATCH 2/2] drm/udl: fix excessive prefetch_range Haixia Shi
  2015-01-30 18:51         ` [PATCH 1/2] drm/udl: optimize udl_compress_hline16 (v2) Haixia Shi
  1 sibling, 1 reply; 13+ messages in thread
From: Haixia Shi @ 2015-01-30 18:49 UTC (permalink / raw)
  To: dri-devel; +Cc: Haixia Shi

The run-length encoding algorithm should compare 16-bit encoded pixel
values instead of comparing raw pixel values. It allows pixels
with similar but different colors to be encoded as repeat pixels, and
thus potentially save USB bandwidth.

Signed-off-by: Haixia Shi <hshi@chromium.org>
Reviewed-by: Daniel Kurtz <djkurtz@chromium.org>
Tested-by: Haixia Shi <hshi@chromium.org>
---
 drivers/gpu/drm/udl/udl_transfer.c | 39 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/udl/udl_transfer.c b/drivers/gpu/drm/udl/udl_transfer.c
index f343db7..917dcb9 100644
--- a/drivers/gpu/drm/udl/udl_transfer.c
+++ b/drivers/gpu/drm/udl/udl_transfer.c
@@ -82,12 +82,14 @@ static inline u16 pixel32_to_be16(const uint32_t pixel)
 		((pixel >> 8) & 0xf800));
 }
 
-static bool pixel_repeats(const void *pixel, const uint32_t repeat, int bpp)
+static inline u16 get_pixel_val16(const uint8_t *pixel, int bpp)
 {
+	u16 pixel_val16 = 0;
 	if (bpp == 2)
-		return *(const uint16_t *)pixel == repeat;
-	else
-		return *(const uint32_t *)pixel == repeat;
+		pixel_val16 = *(const uint16_t *)pixel;
+	else if (bpp == 4)
+		pixel_val16 = pixel32_to_be16(*(const uint32_t *)pixel);
+	return pixel_val16;
 }
 
 /*
@@ -134,6 +136,7 @@ static void udl_compress_hline16(
 		uint8_t *cmd_pixels_count_byte = NULL;
 		const u8 *raw_pixel_start = NULL;
 		const u8 *cmd_pixel_start, *cmd_pixel_end = NULL;
+		uint16_t pixel_val16;
 
 		prefetchw((void *) cmd); /* pull in one cache line at least */
 
@@ -154,33 +157,29 @@ static void udl_compress_hline16(
 			    (int)(cmd_buffer_end - cmd) / 2))) * bpp;
 
 		prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp);
+		pixel_val16 = get_pixel_val16(pixel, bpp);
 
 		while (pixel < cmd_pixel_end) {
 			const u8 *const start = pixel;
-			u32 repeating_pixel;
-
-			if (bpp == 2) {
-				repeating_pixel = *(uint16_t *)pixel;
-				*(uint16_t *)cmd = cpu_to_be16(repeating_pixel);
-			} else {
-				repeating_pixel = *(uint32_t *)pixel;
-				*(uint16_t *)cmd = cpu_to_be16(pixel32_to_be16(repeating_pixel));
-			}
+			const uint16_t repeating_pixel_val16 = pixel_val16;
+
+			*(uint16_t *)cmd = cpu_to_be16(pixel_val16);
 
 			cmd += 2;
 			pixel += bpp;
 
-			if (unlikely((pixel < cmd_pixel_end) &&
-				     (pixel_repeats(pixel, repeating_pixel, bpp)))) {
+			while (pixel < cmd_pixel_end) {
+				pixel_val16 = get_pixel_val16(pixel, bpp);
+				if (pixel_val16 != repeating_pixel_val16)
+					break;
+				pixel += bpp;
+			}
+
+			if (unlikely(pixel > start + bpp)) {
 				/* go back and fill in raw pixel count */
 				*raw_pixels_count_byte = (((start -
 						raw_pixel_start) / bpp) + 1) & 0xFF;
 
-				while ((pixel < cmd_pixel_end) &&
-				       (pixel_repeats(pixel, repeating_pixel, bpp))) {
-					pixel += bpp;
-				}
-
 				/* immediately after raw data is repeat byte */
 				*cmd++ = (((pixel - start) / bpp) - 1) & 0xFF;
 
-- 
2.2.0.rc0.207.ga3a616c

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/2] drm/udl: fix excessive prefetch_range
  2015-01-30 18:49         ` Haixia Shi
@ 2015-01-30 18:49           ` Haixia Shi
  0 siblings, 0 replies; 13+ messages in thread
From: Haixia Shi @ 2015-01-30 18:49 UTC (permalink / raw)
  To: dri-devel; +Cc: Haixia Shi

The prefetch_range amount is already in number of bytes. Multiplying again by
bpp is unnecessary.

Signed-off-by: Haixia Shi <hshi@chromium.org>
Reviewed-by: Daniel Kurtz <djkurtz@chromium.org>
Tested-by: Haixia Shi <hshi@chromium.org>
---
 drivers/gpu/drm/udl/udl_transfer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/udl/udl_transfer.c b/drivers/gpu/drm/udl/udl_transfer.c
index 917dcb9..45f459f 100644
--- a/drivers/gpu/drm/udl/udl_transfer.c
+++ b/drivers/gpu/drm/udl/udl_transfer.c
@@ -156,7 +156,7 @@ static void udl_compress_hline16(
 			min((int)(pixel_end - pixel) / bpp,
 			    (int)(cmd_buffer_end - cmd) / 2))) * bpp;
 
-		prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp);
+		prefetch_range((void *) pixel, cmd_pixel_end - pixel);
 		pixel_val16 = get_pixel_val16(pixel, bpp);
 
 		while (pixel < cmd_pixel_end) {
-- 
2.2.0.rc0.207.ga3a616c

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 1/2] drm/udl: optimize udl_compress_hline16 (v2)
  2015-01-30 18:20       ` Haixia Shi
  2015-01-30 18:49         ` Haixia Shi
@ 2015-01-30 18:51         ` Haixia Shi
  2015-01-30 18:51           ` [PATCH 2/2] drm/udl: fix excessive prefetch_range (v2) Haixia Shi
  1 sibling, 1 reply; 13+ messages in thread
From: Haixia Shi @ 2015-01-30 18:51 UTC (permalink / raw)
  To: dri-devel; +Cc: Haixia Shi

The run-length encoding algorithm should compare 16-bit encoded pixel
values instead of comparing raw pixel values. It allows pixels
with similar but different colors to be encoded as repeat pixels, and
thus potentially save USB bandwidth.

Signed-off-by: Haixia Shi <hshi@chromium.org>
Reviewed-by: Daniel Kurtz <djkurtz@chromium.org>
Tested-by: Haixia Shi <hshi@chromium.org>
---
 drivers/gpu/drm/udl/udl_transfer.c | 39 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/udl/udl_transfer.c b/drivers/gpu/drm/udl/udl_transfer.c
index f343db7..917dcb9 100644
--- a/drivers/gpu/drm/udl/udl_transfer.c
+++ b/drivers/gpu/drm/udl/udl_transfer.c
@@ -82,12 +82,14 @@ static inline u16 pixel32_to_be16(const uint32_t pixel)
 		((pixel >> 8) & 0xf800));
 }
 
-static bool pixel_repeats(const void *pixel, const uint32_t repeat, int bpp)
+static inline u16 get_pixel_val16(const uint8_t *pixel, int bpp)
 {
+	u16 pixel_val16 = 0;
 	if (bpp == 2)
-		return *(const uint16_t *)pixel == repeat;
-	else
-		return *(const uint32_t *)pixel == repeat;
+		pixel_val16 = *(const uint16_t *)pixel;
+	else if (bpp == 4)
+		pixel_val16 = pixel32_to_be16(*(const uint32_t *)pixel);
+	return pixel_val16;
 }
 
 /*
@@ -134,6 +136,7 @@ static void udl_compress_hline16(
 		uint8_t *cmd_pixels_count_byte = NULL;
 		const u8 *raw_pixel_start = NULL;
 		const u8 *cmd_pixel_start, *cmd_pixel_end = NULL;
+		uint16_t pixel_val16;
 
 		prefetchw((void *) cmd); /* pull in one cache line at least */
 
@@ -154,33 +157,29 @@ static void udl_compress_hline16(
 			    (int)(cmd_buffer_end - cmd) / 2))) * bpp;
 
 		prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp);
+		pixel_val16 = get_pixel_val16(pixel, bpp);
 
 		while (pixel < cmd_pixel_end) {
 			const u8 *const start = pixel;
-			u32 repeating_pixel;
-
-			if (bpp == 2) {
-				repeating_pixel = *(uint16_t *)pixel;
-				*(uint16_t *)cmd = cpu_to_be16(repeating_pixel);
-			} else {
-				repeating_pixel = *(uint32_t *)pixel;
-				*(uint16_t *)cmd = cpu_to_be16(pixel32_to_be16(repeating_pixel));
-			}
+			const uint16_t repeating_pixel_val16 = pixel_val16;
+
+			*(uint16_t *)cmd = cpu_to_be16(pixel_val16);
 
 			cmd += 2;
 			pixel += bpp;
 
-			if (unlikely((pixel < cmd_pixel_end) &&
-				     (pixel_repeats(pixel, repeating_pixel, bpp)))) {
+			while (pixel < cmd_pixel_end) {
+				pixel_val16 = get_pixel_val16(pixel, bpp);
+				if (pixel_val16 != repeating_pixel_val16)
+					break;
+				pixel += bpp;
+			}
+
+			if (unlikely(pixel > start + bpp)) {
 				/* go back and fill in raw pixel count */
 				*raw_pixels_count_byte = (((start -
 						raw_pixel_start) / bpp) + 1) & 0xFF;
 
-				while ((pixel < cmd_pixel_end) &&
-				       (pixel_repeats(pixel, repeating_pixel, bpp))) {
-					pixel += bpp;
-				}
-
 				/* immediately after raw data is repeat byte */
 				*cmd++ = (((pixel - start) / bpp) - 1) & 0xFF;
 
-- 
2.2.0.rc0.207.ga3a616c

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/2] drm/udl: fix excessive prefetch_range (v2)
  2015-01-30 18:51         ` [PATCH 1/2] drm/udl: optimize udl_compress_hline16 (v2) Haixia Shi
@ 2015-01-30 18:51           ` Haixia Shi
  0 siblings, 0 replies; 13+ messages in thread
From: Haixia Shi @ 2015-01-30 18:51 UTC (permalink / raw)
  To: dri-devel; +Cc: Haixia Shi

The prefetch_range amount is already in number of bytes. Multiplying again by
bpp is unnecessary.

Signed-off-by: Haixia Shi <hshi@chromium.org>
Reviewed-by: Daniel Kurtz <djkurtz@chromium.org>
Tested-by: Haixia Shi <hshi@chromium.org>
---
 drivers/gpu/drm/udl/udl_transfer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/udl/udl_transfer.c b/drivers/gpu/drm/udl/udl_transfer.c
index 917dcb9..45f459f 100644
--- a/drivers/gpu/drm/udl/udl_transfer.c
+++ b/drivers/gpu/drm/udl/udl_transfer.c
@@ -156,7 +156,7 @@ static void udl_compress_hline16(
 			min((int)(pixel_end - pixel) / bpp,
 			    (int)(cmd_buffer_end - cmd) / 2))) * bpp;
 
-		prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp);
+		prefetch_range((void *) pixel, cmd_pixel_end - pixel);
 		pixel_val16 = get_pixel_val16(pixel, bpp);
 
 		while (pixel < cmd_pixel_end) {
-- 
2.2.0.rc0.207.ga3a616c

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2015-01-30 18:51 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-01-28 18:15 [PATCH 1/2] drm/udl: optimize udl_compress_hline16 Haixia Shi
2015-01-28 18:15 ` [PATCH 2/2] drm/udl: fix excessive prefetch_range Haixia Shi
2015-01-28 20:56   ` Chris Wilson
2015-01-28 21:12 ` [PATCH 1/2] drm/udl: optimize udl_compress_hline16 Chris Wilson
2015-01-28 21:41   ` Haixia Shi
2015-01-28 21:41     ` [PATCH 2/2] drm/udl: fix excessive prefetch_range Haixia Shi
2015-01-30  3:45     ` [PATCH 1/2] drm/udl: optimize udl_compress_hline16 Dave Airlie
2015-01-30 18:20       ` Haixia Shi
2015-01-30 18:49         ` Haixia Shi
2015-01-30 18:49           ` [PATCH 2/2] drm/udl: fix excessive prefetch_range Haixia Shi
2015-01-30 18:51         ` [PATCH 1/2] drm/udl: optimize udl_compress_hline16 (v2) Haixia Shi
2015-01-30 18:51           ` [PATCH 2/2] drm/udl: fix excessive prefetch_range (v2) Haixia Shi
2015-01-28 21:50   ` [PATCH 1/2] drm/udl: optimize udl_compress_hline16 Haixia Shi

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.