All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] fbdev: Fix image blitting for arbitrary image widths
@ 2022-03-13 19:29 ` Thomas Zimmermann
  0 siblings, 0 replies; 15+ messages in thread
From: Thomas Zimmermann @ 2022-03-13 19:29 UTC (permalink / raw)
  To: daniel, deller, m.szyprowski, geert, javierm, sam
  Cc: linux-fbdev, dri-devel, Thomas Zimmermann

Recent optimization of the fbdev image-bitting helpers broke the code for
image width that do not align with multiples of 8. Both, sys and cfb, are
affected. Fix this problem by handling the trailing pixels on each line
separately.

Tested with simpledrm and the 7x14 font.

Thomas Zimmermann (2):
  fbdev: Fix sys_imageblit() for arbitrary image widths
  fbdev: Fix cfb_imageblit() for arbitrary image widths

 drivers/video/fbdev/core/cfbimgblt.c | 28 +++++++++++++++++++++++----
 drivers/video/fbdev/core/sysimgblt.c | 29 ++++++++++++++++++++++++----
 2 files changed, 49 insertions(+), 8 deletions(-)

-- 
2.35.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 0/2] fbdev: Fix image blitting for arbitrary image widths
@ 2022-03-13 19:29 ` Thomas Zimmermann
  0 siblings, 0 replies; 15+ messages in thread
From: Thomas Zimmermann @ 2022-03-13 19:29 UTC (permalink / raw)
  To: daniel, deller, m.szyprowski, geert, javierm, sam
  Cc: linux-fbdev, Thomas Zimmermann, dri-devel

Recent optimization of the fbdev image-bitting helpers broke the code for
image width that do not align with multiples of 8. Both, sys and cfb, are
affected. Fix this problem by handling the trailing pixels on each line
separately.

Tested with simpledrm and the 7x14 font.

Thomas Zimmermann (2):
  fbdev: Fix sys_imageblit() for arbitrary image widths
  fbdev: Fix cfb_imageblit() for arbitrary image widths

 drivers/video/fbdev/core/cfbimgblt.c | 28 +++++++++++++++++++++++----
 drivers/video/fbdev/core/sysimgblt.c | 29 ++++++++++++++++++++++++----
 2 files changed, 49 insertions(+), 8 deletions(-)

-- 
2.35.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 1/2] fbdev: Fix sys_imageblit() for arbitrary image widths
  2022-03-13 19:29 ` Thomas Zimmermann
@ 2022-03-13 19:29   ` Thomas Zimmermann
  -1 siblings, 0 replies; 15+ messages in thread
From: Thomas Zimmermann @ 2022-03-13 19:29 UTC (permalink / raw)
  To: daniel, deller, m.szyprowski, geert, javierm, sam
  Cc: linux-fbdev, dri-devel, Thomas Zimmermann

Commit 6f29e04938bf ("fbdev: Improve performance of sys_imageblit()")
broke sys_imageblit() for image width that are not aligned to 8-bit
boundaries. Fix this by handling the trailing pixels on each line
separately. The performance improvements in the original commit do not
regress by this change.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Fixes: 6f29e04938bf ("fbdev: Improve performance of sys_imageblit()")
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Javier Martinez Canillas <javierm@redhat.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
---
 drivers/video/fbdev/core/sysimgblt.c | 29 ++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/drivers/video/fbdev/core/sysimgblt.c b/drivers/video/fbdev/core/sysimgblt.c
index 722c327a381b..335e92b813fc 100644
--- a/drivers/video/fbdev/core/sysimgblt.c
+++ b/drivers/video/fbdev/core/sysimgblt.c
@@ -188,7 +188,7 @@ static void fast_imageblit(const struct fb_image *image, struct fb_info *p,
 {
 	u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel;
 	u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
-	u32 bit_mask, eorx;
+	u32 bit_mask, eorx, shift;
 	const char *s = image->data, *src;
 	u32 *dst;
 	const u32 *tab;
@@ -229,17 +229,23 @@ static void fast_imageblit(const struct fb_image *image, struct fb_info *p,
 
 	for (i = image->height; i--; ) {
 		dst = dst1;
+		shift = 8;
 		src = s;
 
+		/*
+		 * Manually unroll the per-line copying loop for better
+		 * performance. This works until we processed the last
+		 * completely filled source byte (inclusive).
+		 */
 		switch (ppw) {
 		case 4: /* 8 bpp */
-			for (j = k; j; j -= 2, ++src) {
+			for (j = k; j >= 2; j -= 2, ++src) {
 				*dst++ = colortab[(*src >> 4) & bit_mask];
 				*dst++ = colortab[(*src >> 0) & bit_mask];
 			}
 			break;
 		case 2: /* 16 bpp */
-			for (j = k; j; j -= 4, ++src) {
+			for (j = k; j >= 4; j -= 4, ++src) {
 				*dst++ = colortab[(*src >> 6) & bit_mask];
 				*dst++ = colortab[(*src >> 4) & bit_mask];
 				*dst++ = colortab[(*src >> 2) & bit_mask];
@@ -247,7 +253,7 @@ static void fast_imageblit(const struct fb_image *image, struct fb_info *p,
 			}
 			break;
 		case 1: /* 32 bpp */
-			for (j = k; j; j -= 8, ++src) {
+			for (j = k; j >= 8; j -= 8, ++src) {
 				*dst++ = colortab[(*src >> 7) & bit_mask];
 				*dst++ = colortab[(*src >> 6) & bit_mask];
 				*dst++ = colortab[(*src >> 5) & bit_mask];
@@ -259,6 +265,21 @@ static void fast_imageblit(const struct fb_image *image, struct fb_info *p,
 			}
 			break;
 		}
+
+		/*
+		 * For image widths that are not a multiple of 8, there
+		 * are trailing pixels left on the current line. Print
+		 * them as well.
+		 */
+		for (; j--; ) {
+			shift -= ppw;
+			*dst++ = colortab[(*src >> shift) & bit_mask];
+			if (!shift) {
+				shift = 8;
+				++src;
+			}
+		}
+
 		dst1 += p->fix.line_length;
 		s += spitch;
 	}
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 1/2] fbdev: Fix sys_imageblit() for arbitrary image widths
@ 2022-03-13 19:29   ` Thomas Zimmermann
  0 siblings, 0 replies; 15+ messages in thread
From: Thomas Zimmermann @ 2022-03-13 19:29 UTC (permalink / raw)
  To: daniel, deller, m.szyprowski, geert, javierm, sam
  Cc: linux-fbdev, Thomas Zimmermann, dri-devel

Commit 6f29e04938bf ("fbdev: Improve performance of sys_imageblit()")
broke sys_imageblit() for image width that are not aligned to 8-bit
boundaries. Fix this by handling the trailing pixels on each line
separately. The performance improvements in the original commit do not
regress by this change.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Fixes: 6f29e04938bf ("fbdev: Improve performance of sys_imageblit()")
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Javier Martinez Canillas <javierm@redhat.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
---
 drivers/video/fbdev/core/sysimgblt.c | 29 ++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/drivers/video/fbdev/core/sysimgblt.c b/drivers/video/fbdev/core/sysimgblt.c
index 722c327a381b..335e92b813fc 100644
--- a/drivers/video/fbdev/core/sysimgblt.c
+++ b/drivers/video/fbdev/core/sysimgblt.c
@@ -188,7 +188,7 @@ static void fast_imageblit(const struct fb_image *image, struct fb_info *p,
 {
 	u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel;
 	u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
-	u32 bit_mask, eorx;
+	u32 bit_mask, eorx, shift;
 	const char *s = image->data, *src;
 	u32 *dst;
 	const u32 *tab;
@@ -229,17 +229,23 @@ static void fast_imageblit(const struct fb_image *image, struct fb_info *p,
 
 	for (i = image->height; i--; ) {
 		dst = dst1;
+		shift = 8;
 		src = s;
 
+		/*
+		 * Manually unroll the per-line copying loop for better
+		 * performance. This works until we processed the last
+		 * completely filled source byte (inclusive).
+		 */
 		switch (ppw) {
 		case 4: /* 8 bpp */
-			for (j = k; j; j -= 2, ++src) {
+			for (j = k; j >= 2; j -= 2, ++src) {
 				*dst++ = colortab[(*src >> 4) & bit_mask];
 				*dst++ = colortab[(*src >> 0) & bit_mask];
 			}
 			break;
 		case 2: /* 16 bpp */
-			for (j = k; j; j -= 4, ++src) {
+			for (j = k; j >= 4; j -= 4, ++src) {
 				*dst++ = colortab[(*src >> 6) & bit_mask];
 				*dst++ = colortab[(*src >> 4) & bit_mask];
 				*dst++ = colortab[(*src >> 2) & bit_mask];
@@ -247,7 +253,7 @@ static void fast_imageblit(const struct fb_image *image, struct fb_info *p,
 			}
 			break;
 		case 1: /* 32 bpp */
-			for (j = k; j; j -= 8, ++src) {
+			for (j = k; j >= 8; j -= 8, ++src) {
 				*dst++ = colortab[(*src >> 7) & bit_mask];
 				*dst++ = colortab[(*src >> 6) & bit_mask];
 				*dst++ = colortab[(*src >> 5) & bit_mask];
@@ -259,6 +265,21 @@ static void fast_imageblit(const struct fb_image *image, struct fb_info *p,
 			}
 			break;
 		}
+
+		/*
+		 * For image widths that are not a multiple of 8, there
+		 * are trailing pixels left on the current line. Print
+		 * them as well.
+		 */
+		for (; j--; ) {
+			shift -= ppw;
+			*dst++ = colortab[(*src >> shift) & bit_mask];
+			if (!shift) {
+				shift = 8;
+				++src;
+			}
+		}
+
 		dst1 += p->fix.line_length;
 		s += spitch;
 	}
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/2] fbdev: Fix cfb_imageblit() for arbitrary image widths
  2022-03-13 19:29 ` Thomas Zimmermann
@ 2022-03-13 19:29   ` Thomas Zimmermann
  -1 siblings, 0 replies; 15+ messages in thread
From: Thomas Zimmermann @ 2022-03-13 19:29 UTC (permalink / raw)
  To: daniel, deller, m.szyprowski, geert, javierm, sam
  Cc: linux-fbdev, dri-devel, Thomas Zimmermann

Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
broke cfb_imageblit() for image widths that are not aligned to 8-bit
boundaries. Fix this by handling the trailing pixels on each line
separately. The performance improvements in the original commit do not
regress by this change.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Javier Martinez Canillas <javierm@redhat.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
---
 drivers/video/fbdev/core/cfbimgblt.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/drivers/video/fbdev/core/cfbimgblt.c b/drivers/video/fbdev/core/cfbimgblt.c
index 7361cfabdd85..9ebda4e0dc7a 100644
--- a/drivers/video/fbdev/core/cfbimgblt.c
+++ b/drivers/video/fbdev/core/cfbimgblt.c
@@ -218,7 +218,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
 {
 	u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel;
 	u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
-	u32 bit_mask, eorx;
+	u32 bit_mask, eorx, shift;
 	const char *s = image->data, *src;
 	u32 __iomem *dst;
 	const u32 *tab = NULL;
@@ -259,17 +259,23 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
 
 	for (i = image->height; i--; ) {
 		dst = (u32 __iomem *)dst1;
+		shift = 8;
 		src = s;
 
+		/*
+		 * Manually unroll the per-line copying loop for better
+		 * performance. This works until we processed the last
+		 * completely filled source byte (inclusive).
+		 */
 		switch (ppw) {
 		case 4: /* 8 bpp */
-			for (j = k; j; j -= 2, ++src) {
+			for (j = k; j >= 2; j -= 2, ++src) {
 				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
 				FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++);
 			}
 			break;
 		case 2: /* 16 bpp */
-			for (j = k; j; j -= 4, ++src) {
+			for (j = k; j >= 4; j -= 4, ++src) {
 				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
 				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
 				FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++);
@@ -277,7 +283,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
 			}
 			break;
 		case 1: /* 32 bpp */
-			for (j = k; j; j -= 8, ++src) {
+			for (j = k; j >= 8; j -= 8, ++src) {
 				FB_WRITEL(colortab[(*src >> 7) & bit_mask], dst++);
 				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
 				FB_WRITEL(colortab[(*src >> 5) & bit_mask], dst++);
@@ -290,6 +296,20 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
 			break;
 		}
 
+		/*
+		 * For image widths that are not a multiple of 8, there
+		 * are trailing pixels left on the current line. Print
+		 * them as well.
+		 */
+		for (; j--; ) {
+			shift -= ppw;
+			FB_WRITEL(colortab[(*src >> shift) & bit_mask], dst++);
+			if (!shift) {
+				shift = 8;
+				++src;
+			}
+		}
+
 		dst1 += p->fix.line_length;
 		s += spitch;
 	}
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/2] fbdev: Fix cfb_imageblit() for arbitrary image widths
@ 2022-03-13 19:29   ` Thomas Zimmermann
  0 siblings, 0 replies; 15+ messages in thread
From: Thomas Zimmermann @ 2022-03-13 19:29 UTC (permalink / raw)
  To: daniel, deller, m.szyprowski, geert, javierm, sam
  Cc: linux-fbdev, Thomas Zimmermann, dri-devel

Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
broke cfb_imageblit() for image widths that are not aligned to 8-bit
boundaries. Fix this by handling the trailing pixels on each line
separately. The performance improvements in the original commit do not
regress by this change.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Javier Martinez Canillas <javierm@redhat.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
---
 drivers/video/fbdev/core/cfbimgblt.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/drivers/video/fbdev/core/cfbimgblt.c b/drivers/video/fbdev/core/cfbimgblt.c
index 7361cfabdd85..9ebda4e0dc7a 100644
--- a/drivers/video/fbdev/core/cfbimgblt.c
+++ b/drivers/video/fbdev/core/cfbimgblt.c
@@ -218,7 +218,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
 {
 	u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel;
 	u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
-	u32 bit_mask, eorx;
+	u32 bit_mask, eorx, shift;
 	const char *s = image->data, *src;
 	u32 __iomem *dst;
 	const u32 *tab = NULL;
@@ -259,17 +259,23 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
 
 	for (i = image->height; i--; ) {
 		dst = (u32 __iomem *)dst1;
+		shift = 8;
 		src = s;
 
+		/*
+		 * Manually unroll the per-line copying loop for better
+		 * performance. This works until we processed the last
+		 * completely filled source byte (inclusive).
+		 */
 		switch (ppw) {
 		case 4: /* 8 bpp */
-			for (j = k; j; j -= 2, ++src) {
+			for (j = k; j >= 2; j -= 2, ++src) {
 				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
 				FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++);
 			}
 			break;
 		case 2: /* 16 bpp */
-			for (j = k; j; j -= 4, ++src) {
+			for (j = k; j >= 4; j -= 4, ++src) {
 				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
 				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
 				FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++);
@@ -277,7 +283,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
 			}
 			break;
 		case 1: /* 32 bpp */
-			for (j = k; j; j -= 8, ++src) {
+			for (j = k; j >= 8; j -= 8, ++src) {
 				FB_WRITEL(colortab[(*src >> 7) & bit_mask], dst++);
 				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
 				FB_WRITEL(colortab[(*src >> 5) & bit_mask], dst++);
@@ -290,6 +296,20 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
 			break;
 		}
 
+		/*
+		 * For image widths that are not a multiple of 8, there
+		 * are trailing pixels left on the current line. Print
+		 * them as well.
+		 */
+		for (; j--; ) {
+			shift -= ppw;
+			FB_WRITEL(colortab[(*src >> shift) & bit_mask], dst++);
+			if (!shift) {
+				shift = 8;
+				++src;
+			}
+		}
+
 		dst1 += p->fix.line_length;
 		s += spitch;
 	}
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] fbdev: Fix cfb_imageblit() for arbitrary image widths
  2022-03-13 19:29   ` Thomas Zimmermann
  (?)
@ 2022-03-14  8:41   ` Marek Szyprowski
  -1 siblings, 0 replies; 15+ messages in thread
From: Marek Szyprowski @ 2022-03-14  8:41 UTC (permalink / raw)
  To: Thomas Zimmermann, daniel, deller, geert, javierm, sam
  Cc: linux-fbdev, dri-devel

On 13.03.2022 20:29, Thomas Zimmermann wrote:
> Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
> broke cfb_imageblit() for image widths that are not aligned to 8-bit
> boundaries. Fix this by handling the trailing pixels on each line
> separately. The performance improvements in the original commit do not
> regress by this change.
>
> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
> Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
> Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
> Cc: Thomas Zimmermann <tzimmermann@suse.de>
> Cc: Javier Martinez Canillas <javierm@redhat.com>
> Cc: Sam Ravnborg <sam@ravnborg.org>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
> ---
>   drivers/video/fbdev/core/cfbimgblt.c | 28 ++++++++++++++++++++++++----
>   1 file changed, 24 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/video/fbdev/core/cfbimgblt.c b/drivers/video/fbdev/core/cfbimgblt.c
> index 7361cfabdd85..9ebda4e0dc7a 100644
> --- a/drivers/video/fbdev/core/cfbimgblt.c
> +++ b/drivers/video/fbdev/core/cfbimgblt.c
> @@ -218,7 +218,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>   {
>   	u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel;
>   	u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
> -	u32 bit_mask, eorx;
> +	u32 bit_mask, eorx, shift;
>   	const char *s = image->data, *src;
>   	u32 __iomem *dst;
>   	const u32 *tab = NULL;
> @@ -259,17 +259,23 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>   
>   	for (i = image->height; i--; ) {
>   		dst = (u32 __iomem *)dst1;
> +		shift = 8;
>   		src = s;
>   
> +		/*
> +		 * Manually unroll the per-line copying loop for better
> +		 * performance. This works until we processed the last
> +		 * completely filled source byte (inclusive).
> +		 */
>   		switch (ppw) {
>   		case 4: /* 8 bpp */
> -			for (j = k; j; j -= 2, ++src) {
> +			for (j = k; j >= 2; j -= 2, ++src) {
>   				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
>   				FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++);
>   			}
>   			break;
>   		case 2: /* 16 bpp */
> -			for (j = k; j; j -= 4, ++src) {
> +			for (j = k; j >= 4; j -= 4, ++src) {
>   				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
>   				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
>   				FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++);
> @@ -277,7 +283,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>   			}
>   			break;
>   		case 1: /* 32 bpp */
> -			for (j = k; j; j -= 8, ++src) {
> +			for (j = k; j >= 8; j -= 8, ++src) {
>   				FB_WRITEL(colortab[(*src >> 7) & bit_mask], dst++);
>   				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
>   				FB_WRITEL(colortab[(*src >> 5) & bit_mask], dst++);
> @@ -290,6 +296,20 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>   			break;
>   		}
>   
> +		/*
> +		 * For image widths that are not a multiple of 8, there
> +		 * are trailing pixels left on the current line. Print
> +		 * them as well.
> +		 */
> +		for (; j--; ) {
> +			shift -= ppw;
> +			FB_WRITEL(colortab[(*src >> shift) & bit_mask], dst++);
> +			if (!shift) {
> +				shift = 8;
> +				++src;
> +			}
> +		}
> +
>   		dst1 += p->fix.line_length;
>   		s += spitch;
>   	}

Best regards
-- 
Marek Szyprowski, PhD
Samsung R&D Institute Poland


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] fbdev: Fix sys_imageblit() for arbitrary image widths
  2022-03-13 19:29   ` Thomas Zimmermann
@ 2022-03-14 20:56     ` Geert Uytterhoeven
  -1 siblings, 0 replies; 15+ messages in thread
From: Geert Uytterhoeven @ 2022-03-14 20:56 UTC (permalink / raw)
  To: Thomas Zimmermann
  Cc: Daniel Vetter, Helge Deller, Marek Szyprowski,
	Javier Martinez Canillas, Sam Ravnborg,
	Linux Fbdev development list, DRI Development

Hi Thomas,

On Sun, Mar 13, 2022 at 8:29 PM Thomas Zimmermann <tzimmermann@suse.de> wrote:
> Commit 6f29e04938bf ("fbdev: Improve performance of sys_imageblit()")
> broke sys_imageblit() for image width that are not aligned to 8-bit
> boundaries. Fix this by handling the trailing pixels on each line
> separately. The performance improvements in the original commit do not
> regress by this change.
>
> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>

Thanks for fixing! This was very valuable for hammering the bugs
out of ssd130xdrm and the xrgb888-to-mono conversion...

Tested-by: Geert Uytterhoeven <geert@linux-m68k.org>

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] fbdev: Fix sys_imageblit() for arbitrary image widths
@ 2022-03-14 20:56     ` Geert Uytterhoeven
  0 siblings, 0 replies; 15+ messages in thread
From: Geert Uytterhoeven @ 2022-03-14 20:56 UTC (permalink / raw)
  To: Thomas Zimmermann
  Cc: Linux Fbdev development list, Helge Deller,
	Javier Martinez Canillas, DRI Development, Sam Ravnborg,
	Marek Szyprowski

Hi Thomas,

On Sun, Mar 13, 2022 at 8:29 PM Thomas Zimmermann <tzimmermann@suse.de> wrote:
> Commit 6f29e04938bf ("fbdev: Improve performance of sys_imageblit()")
> broke sys_imageblit() for image width that are not aligned to 8-bit
> boundaries. Fix this by handling the trailing pixels on each line
> separately. The performance improvements in the original commit do not
> regress by this change.
>
> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>

Thanks for fixing! This was very valuable for hammering the bugs
out of ssd130xdrm and the xrgb888-to-mono conversion...

Tested-by: Geert Uytterhoeven <geert@linux-m68k.org>

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] fbdev: Fix cfb_imageblit() for arbitrary image widths
  2022-03-13 19:29   ` Thomas Zimmermann
@ 2022-03-17 10:54     ` Daniel Vetter
  -1 siblings, 0 replies; 15+ messages in thread
From: Daniel Vetter @ 2022-03-17 10:54 UTC (permalink / raw)
  To: Thomas Zimmermann
  Cc: daniel, deller, m.szyprowski, geert, javierm, sam, linux-fbdev,
	dri-devel

On Sun, Mar 13, 2022 at 08:29:52PM +0100, Thomas Zimmermann wrote:
> Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
> broke cfb_imageblit() for image widths that are not aligned to 8-bit
> boundaries. Fix this by handling the trailing pixels on each line
> separately. The performance improvements in the original commit do not
> regress by this change.
> 
> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
> Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
> Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
> Cc: Thomas Zimmermann <tzimmermann@suse.de>
> Cc: Javier Martinez Canillas <javierm@redhat.com>
> Cc: Sam Ravnborg <sam@ravnborg.org>

On both patches:

Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> ---
>  drivers/video/fbdev/core/cfbimgblt.c | 28 ++++++++++++++++++++++++----
>  1 file changed, 24 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/video/fbdev/core/cfbimgblt.c b/drivers/video/fbdev/core/cfbimgblt.c
> index 7361cfabdd85..9ebda4e0dc7a 100644
> --- a/drivers/video/fbdev/core/cfbimgblt.c
> +++ b/drivers/video/fbdev/core/cfbimgblt.c
> @@ -218,7 +218,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>  {
>  	u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel;
>  	u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
> -	u32 bit_mask, eorx;
> +	u32 bit_mask, eorx, shift;
>  	const char *s = image->data, *src;
>  	u32 __iomem *dst;
>  	const u32 *tab = NULL;
> @@ -259,17 +259,23 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>  
>  	for (i = image->height; i--; ) {
>  		dst = (u32 __iomem *)dst1;
> +		shift = 8;
>  		src = s;
>  
> +		/*
> +		 * Manually unroll the per-line copying loop for better
> +		 * performance. This works until we processed the last
> +		 * completely filled source byte (inclusive).
> +		 */
>  		switch (ppw) {
>  		case 4: /* 8 bpp */
> -			for (j = k; j; j -= 2, ++src) {
> +			for (j = k; j >= 2; j -= 2, ++src) {
>  				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++);
>  			}
>  			break;
>  		case 2: /* 16 bpp */
> -			for (j = k; j; j -= 4, ++src) {
> +			for (j = k; j >= 4; j -= 4, ++src) {
>  				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++);
> @@ -277,7 +283,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>  			}
>  			break;
>  		case 1: /* 32 bpp */
> -			for (j = k; j; j -= 8, ++src) {
> +			for (j = k; j >= 8; j -= 8, ++src) {
>  				FB_WRITEL(colortab[(*src >> 7) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 5) & bit_mask], dst++);
> @@ -290,6 +296,20 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>  			break;
>  		}
>  
> +		/*
> +		 * For image widths that are not a multiple of 8, there
> +		 * are trailing pixels left on the current line. Print
> +		 * them as well.
> +		 */
> +		for (; j--; ) {
> +			shift -= ppw;
> +			FB_WRITEL(colortab[(*src >> shift) & bit_mask], dst++);
> +			if (!shift) {
> +				shift = 8;
> +				++src;
> +			}
> +		}
> +
>  		dst1 += p->fix.line_length;
>  		s += spitch;
>  	}
> -- 
> 2.35.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] fbdev: Fix cfb_imageblit() for arbitrary image widths
@ 2022-03-17 10:54     ` Daniel Vetter
  0 siblings, 0 replies; 15+ messages in thread
From: Daniel Vetter @ 2022-03-17 10:54 UTC (permalink / raw)
  To: Thomas Zimmermann
  Cc: linux-fbdev, deller, javierm, dri-devel, geert, sam, m.szyprowski

On Sun, Mar 13, 2022 at 08:29:52PM +0100, Thomas Zimmermann wrote:
> Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
> broke cfb_imageblit() for image widths that are not aligned to 8-bit
> boundaries. Fix this by handling the trailing pixels on each line
> separately. The performance improvements in the original commit do not
> regress by this change.
> 
> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
> Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
> Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
> Cc: Thomas Zimmermann <tzimmermann@suse.de>
> Cc: Javier Martinez Canillas <javierm@redhat.com>
> Cc: Sam Ravnborg <sam@ravnborg.org>

On both patches:

Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> ---
>  drivers/video/fbdev/core/cfbimgblt.c | 28 ++++++++++++++++++++++++----
>  1 file changed, 24 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/video/fbdev/core/cfbimgblt.c b/drivers/video/fbdev/core/cfbimgblt.c
> index 7361cfabdd85..9ebda4e0dc7a 100644
> --- a/drivers/video/fbdev/core/cfbimgblt.c
> +++ b/drivers/video/fbdev/core/cfbimgblt.c
> @@ -218,7 +218,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>  {
>  	u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel;
>  	u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
> -	u32 bit_mask, eorx;
> +	u32 bit_mask, eorx, shift;
>  	const char *s = image->data, *src;
>  	u32 __iomem *dst;
>  	const u32 *tab = NULL;
> @@ -259,17 +259,23 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>  
>  	for (i = image->height; i--; ) {
>  		dst = (u32 __iomem *)dst1;
> +		shift = 8;
>  		src = s;
>  
> +		/*
> +		 * Manually unroll the per-line copying loop for better
> +		 * performance. This works until we processed the last
> +		 * completely filled source byte (inclusive).
> +		 */
>  		switch (ppw) {
>  		case 4: /* 8 bpp */
> -			for (j = k; j; j -= 2, ++src) {
> +			for (j = k; j >= 2; j -= 2, ++src) {
>  				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++);
>  			}
>  			break;
>  		case 2: /* 16 bpp */
> -			for (j = k; j; j -= 4, ++src) {
> +			for (j = k; j >= 4; j -= 4, ++src) {
>  				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++);
> @@ -277,7 +283,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>  			}
>  			break;
>  		case 1: /* 32 bpp */
> -			for (j = k; j; j -= 8, ++src) {
> +			for (j = k; j >= 8; j -= 8, ++src) {
>  				FB_WRITEL(colortab[(*src >> 7) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 5) & bit_mask], dst++);
> @@ -290,6 +296,20 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>  			break;
>  		}
>  
> +		/*
> +		 * For image widths that are not a multiple of 8, there
> +		 * are trailing pixels left on the current line. Print
> +		 * them as well.
> +		 */
> +		for (; j--; ) {
> +			shift -= ppw;
> +			FB_WRITEL(colortab[(*src >> shift) & bit_mask], dst++);
> +			if (!shift) {
> +				shift = 8;
> +				++src;
> +			}
> +		}
> +
>  		dst1 += p->fix.line_length;
>  		s += spitch;
>  	}
> -- 
> 2.35.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] fbdev: Fix sys_imageblit() for arbitrary image widths
  2022-03-13 19:29   ` Thomas Zimmermann
  (?)
  (?)
@ 2022-03-17 11:20   ` Javier Martinez Canillas
  -1 siblings, 0 replies; 15+ messages in thread
From: Javier Martinez Canillas @ 2022-03-17 11:20 UTC (permalink / raw)
  To: Thomas Zimmermann, daniel, deller, m.szyprowski, geert, sam
  Cc: linux-fbdev, dri-devel

Hello Thomas,

On 3/13/22 20:29, Thomas Zimmermann wrote:
> Commit 6f29e04938bf ("fbdev: Improve performance of sys_imageblit()")
> broke sys_imageblit() for image width that are not aligned to 8-bit
> boundaries. Fix this by handling the trailing pixels on each line
> separately. The performance improvements in the original commit do not
> regress by this change.
> 
> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
> Fixes: 6f29e04938bf ("fbdev: Improve performance of sys_imageblit()")
> Cc: Thomas Zimmermann <tzimmermann@suse.de>
> Cc: Javier Martinez Canillas <javierm@redhat.com>
> Cc: Sam Ravnborg <sam@ravnborg.org>
> ---

Looks good to me. Also Marek and Geert mentioned that fixes the issue
they were seeing.

Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>

-- 
Best regards,

Javier Martinez Canillas
Linux Engineering
Red Hat


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] fbdev: Fix cfb_imageblit() for arbitrary image widths
  2022-03-13 19:29   ` Thomas Zimmermann
                     ` (2 preceding siblings ...)
  (?)
@ 2022-03-17 11:22   ` Javier Martinez Canillas
  -1 siblings, 0 replies; 15+ messages in thread
From: Javier Martinez Canillas @ 2022-03-17 11:22 UTC (permalink / raw)
  To: Thomas Zimmermann, daniel, deller, m.szyprowski, geert, sam
  Cc: linux-fbdev, dri-devel

On 3/13/22 20:29, Thomas Zimmermann wrote:
> Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
> broke cfb_imageblit() for image widths that are not aligned to 8-bit
> boundaries. Fix this by handling the trailing pixels on each line
> separately. The performance improvements in the original commit do not
> regress by this change.
> 
> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
> Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
> Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
> Cc: Thomas Zimmermann <tzimmermann@suse.de>
> Cc: Javier Martinez Canillas <javierm@redhat.com>
> Cc: Sam Ravnborg <sam@ravnborg.org>
> ---

Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>

-- 
Best regards,

Javier Martinez Canillas
Linux Engineering
Red Hat


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [2/2] fbdev: Fix cfb_imageblit() for arbitrary image widths
  2022-03-13 19:29   ` Thomas Zimmermann
@ 2022-03-24 21:15     ` Guenter Roeck
  -1 siblings, 0 replies; 15+ messages in thread
From: Guenter Roeck @ 2022-03-24 21:15 UTC (permalink / raw)
  To: Thomas Zimmermann
  Cc: daniel, deller, m.szyprowski, geert, javierm, sam, linux-fbdev,
	dri-devel

On Sun, Mar 13, 2022 at 08:29:52PM +0100, Thomas Zimmermann wrote:
> Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
> broke cfb_imageblit() for image widths that are not aligned to 8-bit
> boundaries. Fix this by handling the trailing pixels on each line
> separately. The performance improvements in the original commit do not
> regress by this change.
> 
> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
> Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
> Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
> Cc: Thomas Zimmermann <tzimmermann@suse.de>
> Cc: Javier Martinez Canillas <javierm@redhat.com>
> Cc: Sam Ravnborg <sam@ravnborg.org>
> Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>

Tested-by: Guenter Roeck <linux@roeck-us.net>

> ---
>  drivers/video/fbdev/core/cfbimgblt.c | 28 ++++++++++++++++++++++++----
>  1 file changed, 24 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/video/fbdev/core/cfbimgblt.c b/drivers/video/fbdev/core/cfbimgblt.c
> index 7361cfabdd85..9ebda4e0dc7a 100644
> --- a/drivers/video/fbdev/core/cfbimgblt.c
> +++ b/drivers/video/fbdev/core/cfbimgblt.c
> @@ -218,7 +218,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>  {
>  	u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel;
>  	u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
> -	u32 bit_mask, eorx;
> +	u32 bit_mask, eorx, shift;
>  	const char *s = image->data, *src;
>  	u32 __iomem *dst;
>  	const u32 *tab = NULL;
> @@ -259,17 +259,23 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>  
>  	for (i = image->height; i--; ) {
>  		dst = (u32 __iomem *)dst1;
> +		shift = 8;
>  		src = s;
>  
> +		/*
> +		 * Manually unroll the per-line copying loop for better
> +		 * performance. This works until we processed the last
> +		 * completely filled source byte (inclusive).
> +		 */
>  		switch (ppw) {
>  		case 4: /* 8 bpp */
> -			for (j = k; j; j -= 2, ++src) {
> +			for (j = k; j >= 2; j -= 2, ++src) {
>  				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++);
>  			}
>  			break;
>  		case 2: /* 16 bpp */
> -			for (j = k; j; j -= 4, ++src) {
> +			for (j = k; j >= 4; j -= 4, ++src) {
>  				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++);
> @@ -277,7 +283,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>  			}
>  			break;
>  		case 1: /* 32 bpp */
> -			for (j = k; j; j -= 8, ++src) {
> +			for (j = k; j >= 8; j -= 8, ++src) {
>  				FB_WRITEL(colortab[(*src >> 7) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 5) & bit_mask], dst++);
> @@ -290,6 +296,20 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>  			break;
>  		}
>  
> +		/*
> +		 * For image widths that are not a multiple of 8, there
> +		 * are trailing pixels left on the current line. Print
> +		 * them as well.
> +		 */
> +		for (; j--; ) {
> +			shift -= ppw;
> +			FB_WRITEL(colortab[(*src >> shift) & bit_mask], dst++);
> +			if (!shift) {
> +				shift = 8;
> +				++src;
> +			}
> +		}
> +
>  		dst1 += p->fix.line_length;
>  		s += spitch;
>  	}

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [2/2] fbdev: Fix cfb_imageblit() for arbitrary image widths
@ 2022-03-24 21:15     ` Guenter Roeck
  0 siblings, 0 replies; 15+ messages in thread
From: Guenter Roeck @ 2022-03-24 21:15 UTC (permalink / raw)
  To: Thomas Zimmermann
  Cc: linux-fbdev, deller, javierm, dri-devel, geert, sam, m.szyprowski

On Sun, Mar 13, 2022 at 08:29:52PM +0100, Thomas Zimmermann wrote:
> Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
> broke cfb_imageblit() for image widths that are not aligned to 8-bit
> boundaries. Fix this by handling the trailing pixels on each line
> separately. The performance improvements in the original commit do not
> regress by this change.
> 
> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
> Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
> Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
> Cc: Thomas Zimmermann <tzimmermann@suse.de>
> Cc: Javier Martinez Canillas <javierm@redhat.com>
> Cc: Sam Ravnborg <sam@ravnborg.org>
> Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>

Tested-by: Guenter Roeck <linux@roeck-us.net>

> ---
>  drivers/video/fbdev/core/cfbimgblt.c | 28 ++++++++++++++++++++++++----
>  1 file changed, 24 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/video/fbdev/core/cfbimgblt.c b/drivers/video/fbdev/core/cfbimgblt.c
> index 7361cfabdd85..9ebda4e0dc7a 100644
> --- a/drivers/video/fbdev/core/cfbimgblt.c
> +++ b/drivers/video/fbdev/core/cfbimgblt.c
> @@ -218,7 +218,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>  {
>  	u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel;
>  	u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
> -	u32 bit_mask, eorx;
> +	u32 bit_mask, eorx, shift;
>  	const char *s = image->data, *src;
>  	u32 __iomem *dst;
>  	const u32 *tab = NULL;
> @@ -259,17 +259,23 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>  
>  	for (i = image->height; i--; ) {
>  		dst = (u32 __iomem *)dst1;
> +		shift = 8;
>  		src = s;
>  
> +		/*
> +		 * Manually unroll the per-line copying loop for better
> +		 * performance. This works until we processed the last
> +		 * completely filled source byte (inclusive).
> +		 */
>  		switch (ppw) {
>  		case 4: /* 8 bpp */
> -			for (j = k; j; j -= 2, ++src) {
> +			for (j = k; j >= 2; j -= 2, ++src) {
>  				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++);
>  			}
>  			break;
>  		case 2: /* 16 bpp */
> -			for (j = k; j; j -= 4, ++src) {
> +			for (j = k; j >= 4; j -= 4, ++src) {
>  				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++);
> @@ -277,7 +283,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>  			}
>  			break;
>  		case 1: /* 32 bpp */
> -			for (j = k; j; j -= 8, ++src) {
> +			for (j = k; j >= 8; j -= 8, ++src) {
>  				FB_WRITEL(colortab[(*src >> 7) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 5) & bit_mask], dst++);
> @@ -290,6 +296,20 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>  			break;
>  		}
>  
> +		/*
> +		 * For image widths that are not a multiple of 8, there
> +		 * are trailing pixels left on the current line. Print
> +		 * them as well.
> +		 */
> +		for (; j--; ) {
> +			shift -= ppw;
> +			FB_WRITEL(colortab[(*src >> shift) & bit_mask], dst++);
> +			if (!shift) {
> +				shift = 8;
> +				++src;
> +			}
> +		}
> +
>  		dst1 += p->fix.line_length;
>  		s += spitch;
>  	}

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2022-03-24 22:00 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-13 19:29 [PATCH 0/2] fbdev: Fix image blitting for arbitrary image widths Thomas Zimmermann
2022-03-13 19:29 ` Thomas Zimmermann
2022-03-13 19:29 ` [PATCH 1/2] fbdev: Fix sys_imageblit() " Thomas Zimmermann
2022-03-13 19:29   ` Thomas Zimmermann
2022-03-14 20:56   ` Geert Uytterhoeven
2022-03-14 20:56     ` Geert Uytterhoeven
2022-03-17 11:20   ` Javier Martinez Canillas
2022-03-13 19:29 ` [PATCH 2/2] fbdev: Fix cfb_imageblit() " Thomas Zimmermann
2022-03-13 19:29   ` Thomas Zimmermann
2022-03-14  8:41   ` Marek Szyprowski
2022-03-17 10:54   ` Daniel Vetter
2022-03-17 10:54     ` Daniel Vetter
2022-03-17 11:22   ` Javier Martinez Canillas
2022-03-24 21:15   ` [2/2] " Guenter Roeck
2022-03-24 21:15     ` Guenter Roeck

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.