All of lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH 2/2] fbdev: Fix cfb_imageblit() for arbitrary image widths
@ 2022-03-14  3:10 kernel test robot
  0 siblings, 0 replies; 8+ messages in thread
From: kernel test robot @ 2022-03-14  3:10 UTC (permalink / raw)
  To: kbuild

[-- Attachment #1: Type: text/plain, Size: 14423 bytes --]

CC: kbuild-all(a)lists.01.org
BCC: lkp(a)intel.com
In-Reply-To: <20220313192952.12058-3-tzimmermann@suse.de>
References: <20220313192952.12058-3-tzimmermann@suse.de>
TO: Thomas Zimmermann <tzimmermann@suse.de>
TO: daniel(a)ffwll.ch
TO: deller(a)gmx.de
TO: m.szyprowski(a)samsung.com
TO: geert(a)linux-m68k.org
TO: javierm(a)redhat.com
TO: sam(a)ravnborg.org
CC: linux-fbdev(a)vger.kernel.org
CC: dri-devel(a)lists.freedesktop.org
CC: Thomas Zimmermann <tzimmermann@suse.de>

Hi Thomas,

I love your patch! Perhaps something to improve:

[auto build test WARNING on next-20220310]
[cannot apply to linus/master v5.17-rc7 v5.17-rc6 v5.17-rc5 v5.17-rc8]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Thomas-Zimmermann/fbdev-Fix-image-blitting-for-arbitrary-image-widths/20220314-033209
base:    71941773e143369a73c9c4a3b62fbb60736a1182
:::::: branch date: 8 hours ago
:::::: commit date: 8 hours ago
config: i386-randconfig-m021 (https://download.01.org/0day-ci/archive/20220314/202203141119.bZSAwA1k-lkp(a)intel.com/config)
compiler: gcc-9 (Ubuntu 9.4.0-1ubuntu1~20.04) 9.4.0

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>

smatch warnings:
drivers/video/fbdev/core/cfbimgblt.c:304 fast_imageblit() error: uninitialized symbol 'j'.

vim +/j +304 drivers/video/fbdev/core/cfbimgblt.c

^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  206  
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  207  /*
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  208   * fast_imageblit - optimized monochrome color expansion
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  209   *
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  210   * Only if:  bits_per_pixel == 8, 16, or 32
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  211   *           image->width is divisible by pixel/dword (ppw);
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  212   *           fix->line_legth is divisible by 4;
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  213   *           beginning and end of a scanline is dword aligned
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  214   */
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  215  static inline void fast_imageblit(const struct fb_image *image, struct fb_info *p,
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  216  				  u8 __iomem *dst1, u32 fgcolor,
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  217  				  u32 bgcolor)
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  218  {
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  219  	u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel;
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  220  	u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
9410e2f8731c22 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  221  	u32 bit_mask, eorx, shift;
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  222  	const char *s = image->data, *src;
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  223  	u32 __iomem *dst;
d95159cf1b12e8 drivers/video/cfbimgblt.c            Helge Deller      2006-12-08  224  	const u32 *tab = NULL;
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  225  	size_t tablen;
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  226  	u32 colortab[16];
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  227  	int i, j, k;
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  228  
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  229  	switch (bpp) {
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  230  	case 8:
e4c690e061b909 drivers/video/cfbimgblt.c            Anton Vorontsov   2008-04-28  231  		tab = fb_be_math(p) ? cfb_tab8_be : cfb_tab8_le;
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  232  		tablen = 16;
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  233  		break;
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  234  	case 16:
e4c690e061b909 drivers/video/cfbimgblt.c            Anton Vorontsov   2008-04-28  235  		tab = fb_be_math(p) ? cfb_tab16_be : cfb_tab16_le;
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  236  		tablen = 4;
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  237  		break;
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  238  	case 32:
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  239  		tab = cfb_tab32;
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  240  		tablen = 2;
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  241  		break;
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  242  	default:
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  243  		return;
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  244  	}
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  245  
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  246  	for (i = ppw-1; i--; ) {
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  247  		fgx <<= bpp;
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  248  		bgx <<= bpp;
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  249  		fgx |= fgcolor;
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  250  		bgx |= bgcolor;
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  251  	}
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  252  
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  253  	bit_mask = (1 << ppw) - 1;
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  254  	eorx = fgx ^ bgx;
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  255  	k = image->width/ppw;
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  256  
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  257  	for (i = 0; i < tablen; ++i)
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  258  		colortab[i] = (tab[i] & eorx) ^ bgx;
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  259  
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  260  	for (i = image->height; i--; ) {
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  261  		dst = (u32 __iomem *)dst1;
9410e2f8731c22 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  262  		shift = 8;
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  263  		src = s;
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  264  
9410e2f8731c22 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  265  		/*
9410e2f8731c22 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  266  		 * Manually unroll the per-line copying loop for better
9410e2f8731c22 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  267  		 * performance. This works until we processed the last
9410e2f8731c22 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  268  		 * completely filled source byte (inclusive).
9410e2f8731c22 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  269  		 */
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  270  		switch (ppw) {
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  271  		case 4: /* 8 bpp */
9410e2f8731c22 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  272  			for (j = k; j >= 2; j -= 2, ++src) {
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  273  				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  274  				FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++);
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  275  			}
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  276  			break;
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  277  		case 2: /* 16 bpp */
9410e2f8731c22 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  278  			for (j = k; j >= 4; j -= 4, ++src) {
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  279  				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  280  				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  281  				FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++);
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  282  				FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++);
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  283  			}
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  284  			break;
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  285  		case 1: /* 32 bpp */
9410e2f8731c22 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  286  			for (j = k; j >= 8; j -= 8, ++src) {
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  287  				FB_WRITEL(colortab[(*src >> 7) & bit_mask], dst++);
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  288  				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  289  				FB_WRITEL(colortab[(*src >> 5) & bit_mask], dst++);
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  290  				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  291  				FB_WRITEL(colortab[(*src >> 3) & bit_mask], dst++);
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  292  				FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++);
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  293  				FB_WRITEL(colortab[(*src >> 1) & bit_mask], dst++);
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  294  				FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++);
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  295  			}
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  296  			break;
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  297  		}
0d03011894d232 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  298  
9410e2f8731c22 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  299  		/*
9410e2f8731c22 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  300  		 * For image widths that are not a multiple of 8, there
9410e2f8731c22 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  301  		 * are trailing pixels left on the current line. Print
9410e2f8731c22 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  302  		 * them as well.
9410e2f8731c22 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  303  		 */
9410e2f8731c22 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13 @304  		for (; j--; ) {
9410e2f8731c22 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  305  			shift -= ppw;
9410e2f8731c22 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  306  			FB_WRITEL(colortab[(*src >> shift) & bit_mask], dst++);
9410e2f8731c22 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  307  			if (!shift) {
9410e2f8731c22 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  308  				shift = 8;
9410e2f8731c22 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  309  				++src;
9410e2f8731c22 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  310  			}
9410e2f8731c22 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  311  		}
9410e2f8731c22 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  312  
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  313  		dst1 += p->fix.line_length;
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  314  		s += spitch;
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  315  	}
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  316  }
^1da177e4c3f41 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  317  

---
0-DAY CI Kernel Test Service
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] fbdev: Fix cfb_imageblit() for arbitrary image widths
  2022-03-13 19:29   ` Thomas Zimmermann
                     ` (2 preceding siblings ...)
  (?)
@ 2022-03-17 11:22   ` Javier Martinez Canillas
  -1 siblings, 0 replies; 8+ messages in thread
From: Javier Martinez Canillas @ 2022-03-17 11:22 UTC (permalink / raw)
  To: Thomas Zimmermann, daniel, deller, m.szyprowski, geert, sam
  Cc: linux-fbdev, dri-devel

On 3/13/22 20:29, Thomas Zimmermann wrote:
> Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
> broke cfb_imageblit() for image widths that are not aligned to 8-bit
> boundaries. Fix this by handling the trailing pixels on each line
> separately. The performance improvements in the original commit do not
> regress by this change.
> 
> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
> Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
> Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
> Cc: Thomas Zimmermann <tzimmermann@suse.de>
> Cc: Javier Martinez Canillas <javierm@redhat.com>
> Cc: Sam Ravnborg <sam@ravnborg.org>
> ---

Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>

-- 
Best regards,

Javier Martinez Canillas
Linux Engineering
Red Hat


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] fbdev: Fix cfb_imageblit() for arbitrary image widths
  2022-03-13 19:29   ` Thomas Zimmermann
@ 2022-03-17 10:54     ` Daniel Vetter
  -1 siblings, 0 replies; 8+ messages in thread
From: Daniel Vetter @ 2022-03-17 10:54 UTC (permalink / raw)
  To: Thomas Zimmermann
  Cc: daniel, deller, m.szyprowski, geert, javierm, sam, linux-fbdev,
	dri-devel

On Sun, Mar 13, 2022 at 08:29:52PM +0100, Thomas Zimmermann wrote:
> Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
> broke cfb_imageblit() for image widths that are not aligned to 8-bit
> boundaries. Fix this by handling the trailing pixels on each line
> separately. The performance improvements in the original commit do not
> regress by this change.
> 
> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
> Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
> Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
> Cc: Thomas Zimmermann <tzimmermann@suse.de>
> Cc: Javier Martinez Canillas <javierm@redhat.com>
> Cc: Sam Ravnborg <sam@ravnborg.org>

On both patches:

Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> ---
>  drivers/video/fbdev/core/cfbimgblt.c | 28 ++++++++++++++++++++++++----
>  1 file changed, 24 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/video/fbdev/core/cfbimgblt.c b/drivers/video/fbdev/core/cfbimgblt.c
> index 7361cfabdd85..9ebda4e0dc7a 100644
> --- a/drivers/video/fbdev/core/cfbimgblt.c
> +++ b/drivers/video/fbdev/core/cfbimgblt.c
> @@ -218,7 +218,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>  {
>  	u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel;
>  	u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
> -	u32 bit_mask, eorx;
> +	u32 bit_mask, eorx, shift;
>  	const char *s = image->data, *src;
>  	u32 __iomem *dst;
>  	const u32 *tab = NULL;
> @@ -259,17 +259,23 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>  
>  	for (i = image->height; i--; ) {
>  		dst = (u32 __iomem *)dst1;
> +		shift = 8;
>  		src = s;
>  
> +		/*
> +		 * Manually unroll the per-line copying loop for better
> +		 * performance. This works until we processed the last
> +		 * completely filled source byte (inclusive).
> +		 */
>  		switch (ppw) {
>  		case 4: /* 8 bpp */
> -			for (j = k; j; j -= 2, ++src) {
> +			for (j = k; j >= 2; j -= 2, ++src) {
>  				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++);
>  			}
>  			break;
>  		case 2: /* 16 bpp */
> -			for (j = k; j; j -= 4, ++src) {
> +			for (j = k; j >= 4; j -= 4, ++src) {
>  				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++);
> @@ -277,7 +283,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>  			}
>  			break;
>  		case 1: /* 32 bpp */
> -			for (j = k; j; j -= 8, ++src) {
> +			for (j = k; j >= 8; j -= 8, ++src) {
>  				FB_WRITEL(colortab[(*src >> 7) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 5) & bit_mask], dst++);
> @@ -290,6 +296,20 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>  			break;
>  		}
>  
> +		/*
> +		 * For image widths that are not a multiple of 8, there
> +		 * are trailing pixels left on the current line. Print
> +		 * them as well.
> +		 */
> +		for (; j--; ) {
> +			shift -= ppw;
> +			FB_WRITEL(colortab[(*src >> shift) & bit_mask], dst++);
> +			if (!shift) {
> +				shift = 8;
> +				++src;
> +			}
> +		}
> +
>  		dst1 += p->fix.line_length;
>  		s += spitch;
>  	}
> -- 
> 2.35.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] fbdev: Fix cfb_imageblit() for arbitrary image widths
@ 2022-03-17 10:54     ` Daniel Vetter
  0 siblings, 0 replies; 8+ messages in thread
From: Daniel Vetter @ 2022-03-17 10:54 UTC (permalink / raw)
  To: Thomas Zimmermann
  Cc: linux-fbdev, deller, javierm, dri-devel, geert, sam, m.szyprowski

On Sun, Mar 13, 2022 at 08:29:52PM +0100, Thomas Zimmermann wrote:
> Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
> broke cfb_imageblit() for image widths that are not aligned to 8-bit
> boundaries. Fix this by handling the trailing pixels on each line
> separately. The performance improvements in the original commit do not
> regress by this change.
> 
> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
> Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
> Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
> Cc: Thomas Zimmermann <tzimmermann@suse.de>
> Cc: Javier Martinez Canillas <javierm@redhat.com>
> Cc: Sam Ravnborg <sam@ravnborg.org>

On both patches:

Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> ---
>  drivers/video/fbdev/core/cfbimgblt.c | 28 ++++++++++++++++++++++++----
>  1 file changed, 24 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/video/fbdev/core/cfbimgblt.c b/drivers/video/fbdev/core/cfbimgblt.c
> index 7361cfabdd85..9ebda4e0dc7a 100644
> --- a/drivers/video/fbdev/core/cfbimgblt.c
> +++ b/drivers/video/fbdev/core/cfbimgblt.c
> @@ -218,7 +218,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>  {
>  	u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel;
>  	u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
> -	u32 bit_mask, eorx;
> +	u32 bit_mask, eorx, shift;
>  	const char *s = image->data, *src;
>  	u32 __iomem *dst;
>  	const u32 *tab = NULL;
> @@ -259,17 +259,23 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>  
>  	for (i = image->height; i--; ) {
>  		dst = (u32 __iomem *)dst1;
> +		shift = 8;
>  		src = s;
>  
> +		/*
> +		 * Manually unroll the per-line copying loop for better
> +		 * performance. This works until we processed the last
> +		 * completely filled source byte (inclusive).
> +		 */
>  		switch (ppw) {
>  		case 4: /* 8 bpp */
> -			for (j = k; j; j -= 2, ++src) {
> +			for (j = k; j >= 2; j -= 2, ++src) {
>  				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++);
>  			}
>  			break;
>  		case 2: /* 16 bpp */
> -			for (j = k; j; j -= 4, ++src) {
> +			for (j = k; j >= 4; j -= 4, ++src) {
>  				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++);
> @@ -277,7 +283,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>  			}
>  			break;
>  		case 1: /* 32 bpp */
> -			for (j = k; j; j -= 8, ++src) {
> +			for (j = k; j >= 8; j -= 8, ++src) {
>  				FB_WRITEL(colortab[(*src >> 7) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
>  				FB_WRITEL(colortab[(*src >> 5) & bit_mask], dst++);
> @@ -290,6 +296,20 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>  			break;
>  		}
>  
> +		/*
> +		 * For image widths that are not a multiple of 8, there
> +		 * are trailing pixels left on the current line. Print
> +		 * them as well.
> +		 */
> +		for (; j--; ) {
> +			shift -= ppw;
> +			FB_WRITEL(colortab[(*src >> shift) & bit_mask], dst++);
> +			if (!shift) {
> +				shift = 8;
> +				++src;
> +			}
> +		}
> +
>  		dst1 += p->fix.line_length;
>  		s += spitch;
>  	}
> -- 
> 2.35.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] fbdev: Fix cfb_imageblit() for arbitrary image widths
@ 2022-03-15  6:03 kernel test robot
  0 siblings, 0 replies; 8+ messages in thread
From: kernel test robot @ 2022-03-15  6:03 UTC (permalink / raw)
  To: kbuild

[-- Attachment #1: Type: text/plain, Size: 30099 bytes --]

CC: llvm(a)lists.linux.dev
CC: kbuild-all(a)lists.01.org
BCC: lkp(a)intel.com
In-Reply-To: <20220313192952.12058-3-tzimmermann@suse.de>
References: <20220313192952.12058-3-tzimmermann@suse.de>
TO: Thomas Zimmermann <tzimmermann@suse.de>
TO: daniel(a)ffwll.ch
TO: deller(a)gmx.de
TO: m.szyprowski(a)samsung.com
TO: geert(a)linux-m68k.org
TO: javierm(a)redhat.com
TO: sam(a)ravnborg.org
CC: linux-fbdev(a)vger.kernel.org
CC: dri-devel(a)lists.freedesktop.org
CC: Thomas Zimmermann <tzimmermann@suse.de>

Hi Thomas,

I love your patch! Perhaps something to improve:

[auto build test WARNING on next-20220310]
[cannot apply to linus/master v5.17-rc7 v5.17-rc6 v5.17-rc5 v5.17-rc8]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Thomas-Zimmermann/fbdev-Fix-image-blitting-for-arbitrary-image-widths/20220314-033209
base:    71941773e143369a73c9c4a3b62fbb60736a1182
:::::: branch date: 34 hours ago
:::::: commit date: 34 hours ago
config: x86_64-randconfig-c007 (https://download.01.org/0day-ci/archive/20220315/202203151313.gyWGP0AL-lkp(a)intel.com/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project 0467eb2cb7654c15ae366967ef35093c5724c416)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/9410e2f8731c2247eb12bf3b2c46d169917bb9b4
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Thomas-Zimmermann/fbdev-Fix-image-blitting-for-arbitrary-image-widths/20220314-033209
        git checkout 9410e2f8731c2247eb12bf3b2c46d169917bb9b4
        # save the config file to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 clang-analyzer 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


clang-analyzer warnings: (new ones prefixed by >>)
               ^
   include/linux/rcupdate.h:877:41: note: expanded from macro '__is_kvfree_rcu_offset'
   #define __is_kvfree_rcu_offset(offset) ((offset) < 4096)
                                           ^~~~~~~~~~~~~~~
   kernel/rcu/tree.c:3129:2: note: Taking false branch
           if (__is_kvfree_rcu_offset((unsigned long)func))
           ^
   kernel/rcu/tree.c:3140:6: note: Assuming the condition is false
           if (unlikely(rcu_rdp_is_offloaded(rdp))) {
               ^
   include/linux/compiler.h:78:40: note: expanded from macro 'unlikely'
   # define unlikely(x)    __builtin_expect(!!(x), 0)
                                             ^~~~
   kernel/rcu/tree.c:3140:2: note: Taking true branch
           if (unlikely(rcu_rdp_is_offloaded(rdp))) {
           ^
   kernel/rcu/tree.c:3141:3: note: 2nd function call argument is an uninitialized value
                   __call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */
                   ^                         ~~~~~~~~~~~
   Suppressed 51 warnings (51 in non-user code).
   Use -header-filter=.* to display errors from all non-system headers. Use -system-headers to display errors from system headers as well.
   21 warnings generated.
   drivers/acpi/acpica/utnonansi.c:135:2: warning: Call to function 'strcpy' is insecure as it does not provide bounding of the memory buffer. Replace unbounded copy functions with analogous functions that support length arguments such as 'strlcpy'. CWE-119 [clang-analyzer-security.insecureAPI.strcpy]
           strcpy(dest, source);
           ^~~~~~
   drivers/acpi/acpica/utnonansi.c:135:2: note: Call to function 'strcpy' is insecure as it does not provide bounding of the memory buffer. Replace unbounded copy functions with analogous functions that support length arguments such as 'strlcpy'. CWE-119
           strcpy(dest, source);
           ^~~~~~
   drivers/acpi/acpica/utnonansi.c:146:2: warning: Call to function 'strcat' is insecure as it does not provide bounding of the memory buffer. Replace unbounded copy functions with analogous functions that support length arguments such as 'strlcat'. CWE-119 [clang-analyzer-security.insecureAPI.strcpy]
           strcat(dest, source);
           ^~~~~~
   drivers/acpi/acpica/utnonansi.c:146:2: note: Call to function 'strcat' is insecure as it does not provide bounding of the memory buffer. Replace unbounded copy functions with analogous functions that support length arguments such as 'strlcat'. CWE-119
           strcat(dest, source);
           ^~~~~~
   drivers/acpi/acpica/utnonansi.c:163:2: warning: Call to function 'strncat' is insecure as it does not provide security checks introduced in the C11 standard. Replace with analogous functions that support length arguments or provides boundary checks such as 'strncat_s' in case of C11 [clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling]
           strncat(dest, source, max_transfer_length);
           ^~~~~~~
   drivers/acpi/acpica/utnonansi.c:163:2: note: Call to function 'strncat' is insecure as it does not provide security checks introduced in the C11 standard. Replace with analogous functions that support length arguments or provides boundary checks such as 'strncat_s' in case of C11
           strncat(dest, source, max_transfer_length);
           ^~~~~~~
   drivers/acpi/acpica/utnonansi.c:171:2: warning: Call to function 'strncpy' is insecure as it does not provide security checks introduced in the C11 standard. Replace with analogous functions that support length arguments or provides boundary checks such as 'strncpy_s' in case of C11 [clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling]
           strncpy(dest, source, dest_size);
           ^~~~~~~
   drivers/acpi/acpica/utnonansi.c:171:2: note: Call to function 'strncpy' is insecure as it does not provide security checks introduced in the C11 standard. Replace with analogous functions that support length arguments or provides boundary checks such as 'strncpy_s' in case of C11
           strncpy(dest, source, dest_size);
           ^~~~~~~
   Suppressed 17 warnings (17 in non-user code).
   Use -header-filter=.* to display errors from all non-system headers. Use -system-headers to display errors from system headers as well.
   17 warnings generated.
   Suppressed 17 warnings (17 in non-user code).
   Use -header-filter=.* to display errors from all non-system headers. Use -system-headers to display errors from system headers as well.
   48 warnings generated.
   drivers/video/fbdev/core/fbcvt.c:233:3: warning: Value stored to 'off' is never read [clang-analyzer-deadcode.DeadStores]
                   off += scnprintf(buf + off, size - off,
                   ^      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/video/fbdev/core/fbcvt.c:233:3: note: Value stored to 'off' is never read
                   off += scnprintf(buf + off, size - off,
                   ^      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/video/fbdev/core/fbcvt.c:252:4: warning: Value stored to 'off' is never read [clang-analyzer-deadcode.DeadStores]
                           off += scnprintf(buf + off, size - off, "-R");
                           ^      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/video/fbdev/core/fbcvt.c:252:4: note: Value stored to 'off' is never read
                           off += scnprintf(buf + off, size - off, "-R");
                           ^      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/video/fbdev/core/fbcvt.c:298:2: warning: Call to function 'memset' is insecure as it does not provide security checks introduced in the C11 standard. Replace with analogous functions that support length arguments or provides boundary checks such as 'memset_s' in case of C11 [clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling]
           memset(&cvt, 0, sizeof(cvt));
           ^
   include/linux/fortify-string.h:272:25: note: expanded from macro 'memset'
   #define memset(p, c, s) __fortify_memset_chk(p, c, s,                   \
                           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/fortify-string.h:265:2: note: expanded from macro '__fortify_memset_chk'
           __underlying_memset(p, c, __fortify_size);                      \
           ^~~~~~~~~~~~~~~~~~~
   include/linux/fortify-string.h:47:29: note: expanded from macro '__underlying_memset'
   #define __underlying_memset     __builtin_memset
                                   ^~~~~~~~~~~~~~~~
   drivers/video/fbdev/core/fbcvt.c:298:2: note: Call to function 'memset' is insecure as it does not provide security checks introduced in the C11 standard. Replace with analogous functions that support length arguments or provides boundary checks such as 'memset_s' in case of C11
           memset(&cvt, 0, sizeof(cvt));
           ^
   include/linux/fortify-string.h:272:25: note: expanded from macro 'memset'
   #define memset(p, c, s) __fortify_memset_chk(p, c, s,                   \
                           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/fortify-string.h:265:2: note: expanded from macro '__fortify_memset_chk'
           __underlying_memset(p, c, __fortify_size);                      \
           ^~~~~~~~~~~~~~~~~~~
   include/linux/fortify-string.h:47:29: note: expanded from macro '__underlying_memset'
   #define __underlying_memset     __builtin_memset
                                   ^~~~~~~~~~~~~~~~
   Suppressed 45 warnings (45 in non-user code).
   Use -header-filter=.* to display errors from all non-system headers. Use -system-headers to display errors from system headers as well.
   45 warnings generated.
   Suppressed 45 warnings (45 in non-user code).
   Use -header-filter=.* to display errors from all non-system headers. Use -system-headers to display errors from system headers as well.
   45 warnings generated.
   Suppressed 45 warnings (45 in non-user code).
   Use -header-filter=.* to display errors from all non-system headers. Use -system-headers to display errors from system headers as well.
   45 warnings generated.
   Suppressed 45 warnings (45 in non-user code).
   Use -header-filter=.* to display errors from all non-system headers. Use -system-headers to display errors from system headers as well.
   46 warnings generated.
>> drivers/video/fbdev/core/cfbimgblt.c:304:10: warning: The expression is an uninitialized value. The computed value will also be garbage [clang-analyzer-core.uninitialized.Assign]
                   for (; j--; ) {
                          ^
   drivers/video/fbdev/core/cfbimgblt.c:326:6: note: Assuming field 'state' is equal to FBINFO_STATE_RUNNING
           if (p->state != FBINFO_STATE_RUNNING)
               ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/video/fbdev/core/cfbimgblt.c:326:2: note: Taking false branch
           if (p->state != FBINFO_STATE_RUNNING)
           ^
   drivers/video/fbdev/core/cfbimgblt.c:337:6: note: Assuming field 'fb_sync' is null
           if (p->fbops->fb_sync)
               ^~~~~~~~~~~~~~~~~
   drivers/video/fbdev/core/cfbimgblt.c:337:2: note: Taking false branch
           if (p->fbops->fb_sync)
           ^
   drivers/video/fbdev/core/cfbimgblt.c:340:6: note: Assuming field 'depth' is equal to 1
           if (image->depth == 1) {
               ^~~~~~~~~~~~~~~~~
   drivers/video/fbdev/core/cfbimgblt.c:340:2: note: Taking true branch
           if (image->depth == 1) {
           ^
   drivers/video/fbdev/core/cfbimgblt.c:341:7: note: Assuming field 'visual' is not equal to FB_VISUAL_TRUECOLOR
                   if (p->fix.visual == FB_VISUAL_TRUECOLOR ||
                       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/video/fbdev/core/cfbimgblt.c:341:7: note: Left side of '||' is false
   drivers/video/fbdev/core/cfbimgblt.c:342:7: note: Assuming field 'visual' is not equal to FB_VISUAL_DIRECTCOLOR
                       p->fix.visual == FB_VISUAL_DIRECTCOLOR) {
                       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/video/fbdev/core/cfbimgblt.c:341:3: note: Taking false branch
                   if (p->fix.visual == FB_VISUAL_TRUECOLOR ||
                   ^
   drivers/video/fbdev/core/cfbimgblt.c:350:7: note: Assuming the condition is true
                   if (32 % bpp == 0 && !start_index && !pitch_index &&
                       ^~~~~~~~~~~~~
   drivers/video/fbdev/core/cfbimgblt.c:350:7: note: Left side of '&&' is true
   drivers/video/fbdev/core/cfbimgblt.c:350:24: note: Assuming 'start_index' is 0
                   if (32 % bpp == 0 && !start_index && !pitch_index &&
                                        ^~~~~~~~~~~~
   drivers/video/fbdev/core/cfbimgblt.c:350:7: note: Left side of '&&' is true
                   if (32 % bpp == 0 && !start_index && !pitch_index &&
                       ^
   drivers/video/fbdev/core/cfbimgblt.c:350:40: note: Assuming 'pitch_index' is 0
                   if (32 % bpp == 0 && !start_index && !pitch_index &&
                                                        ^~~~~~~~~~~~
   drivers/video/fbdev/core/cfbimgblt.c:350:7: note: Left side of '&&' is true
                   if (32 % bpp == 0 && !start_index && !pitch_index &&
                       ^
   drivers/video/fbdev/core/cfbimgblt.c:351:8: note: Assuming the condition is true
                       ((width & (32/bpp-1)) == 0) &&
                        ^~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/video/fbdev/core/cfbimgblt.c:350:7: note: Left side of '&&' is true
                   if (32 % bpp == 0 && !start_index && !pitch_index &&
                       ^
   drivers/video/fbdev/core/cfbimgblt.c:352:7: note: Assuming 'bpp' is >= 8
                       bpp >= 8 && bpp <= 32)
                       ^~~~~~~~
   drivers/video/fbdev/core/cfbimgblt.c:350:7: note: Left side of '&&' is true
                   if (32 % bpp == 0 && !start_index && !pitch_index &&
                       ^
   drivers/video/fbdev/core/cfbimgblt.c:352:19: note: Assuming 'bpp' is <= 32
                       bpp >= 8 && bpp <= 32)
                                   ^~~~~~~~~
   drivers/video/fbdev/core/cfbimgblt.c:350:3: note: Taking true branch
                   if (32 % bpp == 0 && !start_index && !pitch_index &&
                   ^
   drivers/video/fbdev/core/cfbimgblt.c:353:4: note: Calling 'fast_imageblit'
                           fast_imageblit(image, p, dst1, fgcolor, bgcolor);
                           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/video/fbdev/core/cfbimgblt.c:227:9: note: 'j' declared without an initial value
           int i, j, k;
                  ^
   drivers/video/fbdev/core/cfbimgblt.c:229:2: note: Control jumps to 'case 32:' @line 238
           switch (bpp) {
           ^
   drivers/video/fbdev/core/cfbimgblt.c:241:3: note:  Execution continues on line 246
                   break;
                   ^
   drivers/video/fbdev/core/cfbimgblt.c:246:2: note: Loop condition is false. Execution continues on line 253
           for (i = ppw-1; i--; ) {
           ^
   drivers/video/fbdev/core/cfbimgblt.c:257:2: note: Loop condition is true.  Entering loop body
           for (i = 0; i < tablen; ++i)
           ^
   drivers/video/fbdev/core/cfbimgblt.c:257:2: note: Loop condition is true.  Entering loop body
   drivers/video/fbdev/core/cfbimgblt.c:257:2: note: Loop condition is false. Execution continues on line 260
   drivers/video/fbdev/core/cfbimgblt.c:260:2: note: Loop condition is true.  Entering loop body
           for (i = image->height; i--; ) {
           ^
   drivers/video/fbdev/core/cfbimgblt.c:270:3: note: 'Default' branch taken. Execution continues on line 304
                   switch (ppw) {
                   ^
   drivers/video/fbdev/core/cfbimgblt.c:304:10: note: The expression is an uninitialized value. The computed value will also be garbage
                   for (; j--; ) {
                          ^
   Suppressed 45 warnings (45 in non-user code).
   Use -header-filter=.* to display errors from all non-system headers. Use -system-headers to display errors from system headers as well.
   17 warnings generated.
   Suppressed 17 warnings (17 in non-user code).
   Use -header-filter=.* to display errors from all non-system headers. Use -system-headers to display errors from system headers as well.
   19 warnings generated.
   drivers/acpi/acpica/tbprint.c:70:2: warning: Call to function 'memcpy' is insecure as it does not provide security checks introduced in the C11 standard. Replace with analogous functions that support length arguments or provides boundary checks such as 'memcpy_s' in case of C11 [clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling]

vim +304 drivers/video/fbdev/core/cfbimgblt.c

^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  206  
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  207  /*
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  208   * fast_imageblit - optimized monochrome color expansion
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  209   *
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  210   * Only if:  bits_per_pixel == 8, 16, or 32
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  211   *           image->width is divisible by pixel/dword (ppw);
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  212   *           fix->line_legth is divisible by 4;
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  213   *           beginning and end of a scanline is dword aligned
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  214   */
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  215  static inline void fast_imageblit(const struct fb_image *image, struct fb_info *p,
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  216  				  u8 __iomem *dst1, u32 fgcolor,
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  217  				  u32 bgcolor)
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  218  {
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  219  	u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel;
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  220  	u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
9410e2f8731c224 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  221  	u32 bit_mask, eorx, shift;
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  222  	const char *s = image->data, *src;
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  223  	u32 __iomem *dst;
d95159cf1b12e8e drivers/video/cfbimgblt.c            Helge Deller      2006-12-08  224  	const u32 *tab = NULL;
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  225  	size_t tablen;
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  226  	u32 colortab[16];
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  227  	int i, j, k;
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  228  
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  229  	switch (bpp) {
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  230  	case 8:
e4c690e061b9091 drivers/video/cfbimgblt.c            Anton Vorontsov   2008-04-28  231  		tab = fb_be_math(p) ? cfb_tab8_be : cfb_tab8_le;
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  232  		tablen = 16;
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  233  		break;
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  234  	case 16:
e4c690e061b9091 drivers/video/cfbimgblt.c            Anton Vorontsov   2008-04-28  235  		tab = fb_be_math(p) ? cfb_tab16_be : cfb_tab16_le;
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  236  		tablen = 4;
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  237  		break;
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  238  	case 32:
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  239  		tab = cfb_tab32;
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  240  		tablen = 2;
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  241  		break;
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  242  	default:
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  243  		return;
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  244  	}
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  245  
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  246  	for (i = ppw-1; i--; ) {
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  247  		fgx <<= bpp;
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  248  		bgx <<= bpp;
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  249  		fgx |= fgcolor;
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  250  		bgx |= bgcolor;
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  251  	}
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  252  
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  253  	bit_mask = (1 << ppw) - 1;
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  254  	eorx = fgx ^ bgx;
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  255  	k = image->width/ppw;
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  256  
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  257  	for (i = 0; i < tablen; ++i)
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  258  		colortab[i] = (tab[i] & eorx) ^ bgx;
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  259  
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  260  	for (i = image->height; i--; ) {
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  261  		dst = (u32 __iomem *)dst1;
9410e2f8731c224 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  262  		shift = 8;
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  263  		src = s;
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  264  
9410e2f8731c224 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  265  		/*
9410e2f8731c224 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  266  		 * Manually unroll the per-line copying loop for better
9410e2f8731c224 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  267  		 * performance. This works until we processed the last
9410e2f8731c224 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  268  		 * completely filled source byte (inclusive).
9410e2f8731c224 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  269  		 */
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  270  		switch (ppw) {
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  271  		case 4: /* 8 bpp */
9410e2f8731c224 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  272  			for (j = k; j >= 2; j -= 2, ++src) {
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  273  				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  274  				FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++);
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  275  			}
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  276  			break;
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  277  		case 2: /* 16 bpp */
9410e2f8731c224 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  278  			for (j = k; j >= 4; j -= 4, ++src) {
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  279  				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  280  				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  281  				FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++);
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  282  				FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++);
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  283  			}
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  284  			break;
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  285  		case 1: /* 32 bpp */
9410e2f8731c224 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  286  			for (j = k; j >= 8; j -= 8, ++src) {
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  287  				FB_WRITEL(colortab[(*src >> 7) & bit_mask], dst++);
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  288  				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  289  				FB_WRITEL(colortab[(*src >> 5) & bit_mask], dst++);
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  290  				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  291  				FB_WRITEL(colortab[(*src >> 3) & bit_mask], dst++);
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  292  				FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++);
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  293  				FB_WRITEL(colortab[(*src >> 1) & bit_mask], dst++);
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  294  				FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++);
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  295  			}
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  296  			break;
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  297  		}
0d03011894d2324 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-02-23  298  
9410e2f8731c224 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  299  		/*
9410e2f8731c224 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  300  		 * For image widths that are not a multiple of 8, there
9410e2f8731c224 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  301  		 * are trailing pixels left on the current line. Print
9410e2f8731c224 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  302  		 * them as well.
9410e2f8731c224 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  303  		 */
9410e2f8731c224 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13 @304  		for (; j--; ) {
9410e2f8731c224 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  305  			shift -= ppw;
9410e2f8731c224 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  306  			FB_WRITEL(colortab[(*src >> shift) & bit_mask], dst++);
9410e2f8731c224 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  307  			if (!shift) {
9410e2f8731c224 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  308  				shift = 8;
9410e2f8731c224 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  309  				++src;
9410e2f8731c224 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  310  			}
9410e2f8731c224 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  311  		}
9410e2f8731c224 drivers/video/fbdev/core/cfbimgblt.c Thomas Zimmermann 2022-03-13  312  
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  313  		dst1 += p->fix.line_length;
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  314  		s += spitch;
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  315  	}
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  316  }
^1da177e4c3f415 drivers/video/cfbimgblt.c            Linus Torvalds    2005-04-16  317  

---
0-DAY CI Kernel Test Service
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] fbdev: Fix cfb_imageblit() for arbitrary image widths
  2022-03-13 19:29   ` Thomas Zimmermann
  (?)
@ 2022-03-14  8:41   ` Marek Szyprowski
  -1 siblings, 0 replies; 8+ messages in thread
From: Marek Szyprowski @ 2022-03-14  8:41 UTC (permalink / raw)
  To: Thomas Zimmermann, daniel, deller, geert, javierm, sam
  Cc: linux-fbdev, dri-devel

On 13.03.2022 20:29, Thomas Zimmermann wrote:
> Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
> broke cfb_imageblit() for image widths that are not aligned to 8-bit
> boundaries. Fix this by handling the trailing pixels on each line
> separately. The performance improvements in the original commit do not
> regress by this change.
>
> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
> Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
> Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
> Cc: Thomas Zimmermann <tzimmermann@suse.de>
> Cc: Javier Martinez Canillas <javierm@redhat.com>
> Cc: Sam Ravnborg <sam@ravnborg.org>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
> ---
>   drivers/video/fbdev/core/cfbimgblt.c | 28 ++++++++++++++++++++++++----
>   1 file changed, 24 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/video/fbdev/core/cfbimgblt.c b/drivers/video/fbdev/core/cfbimgblt.c
> index 7361cfabdd85..9ebda4e0dc7a 100644
> --- a/drivers/video/fbdev/core/cfbimgblt.c
> +++ b/drivers/video/fbdev/core/cfbimgblt.c
> @@ -218,7 +218,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>   {
>   	u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel;
>   	u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
> -	u32 bit_mask, eorx;
> +	u32 bit_mask, eorx, shift;
>   	const char *s = image->data, *src;
>   	u32 __iomem *dst;
>   	const u32 *tab = NULL;
> @@ -259,17 +259,23 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>   
>   	for (i = image->height; i--; ) {
>   		dst = (u32 __iomem *)dst1;
> +		shift = 8;
>   		src = s;
>   
> +		/*
> +		 * Manually unroll the per-line copying loop for better
> +		 * performance. This works until we processed the last
> +		 * completely filled source byte (inclusive).
> +		 */
>   		switch (ppw) {
>   		case 4: /* 8 bpp */
> -			for (j = k; j; j -= 2, ++src) {
> +			for (j = k; j >= 2; j -= 2, ++src) {
>   				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
>   				FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++);
>   			}
>   			break;
>   		case 2: /* 16 bpp */
> -			for (j = k; j; j -= 4, ++src) {
> +			for (j = k; j >= 4; j -= 4, ++src) {
>   				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
>   				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
>   				FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++);
> @@ -277,7 +283,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>   			}
>   			break;
>   		case 1: /* 32 bpp */
> -			for (j = k; j; j -= 8, ++src) {
> +			for (j = k; j >= 8; j -= 8, ++src) {
>   				FB_WRITEL(colortab[(*src >> 7) & bit_mask], dst++);
>   				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
>   				FB_WRITEL(colortab[(*src >> 5) & bit_mask], dst++);
> @@ -290,6 +296,20 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
>   			break;
>   		}
>   
> +		/*
> +		 * For image widths that are not a multiple of 8, there
> +		 * are trailing pixels left on the current line. Print
> +		 * them as well.
> +		 */
> +		for (; j--; ) {
> +			shift -= ppw;
> +			FB_WRITEL(colortab[(*src >> shift) & bit_mask], dst++);
> +			if (!shift) {
> +				shift = 8;
> +				++src;
> +			}
> +		}
> +
>   		dst1 += p->fix.line_length;
>   		s += spitch;
>   	}

Best regards
-- 
Marek Szyprowski, PhD
Samsung R&D Institute Poland


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 2/2] fbdev: Fix cfb_imageblit() for arbitrary image widths
  2022-03-13 19:29 [PATCH 0/2] fbdev: Fix image blitting " Thomas Zimmermann
@ 2022-03-13 19:29   ` Thomas Zimmermann
  0 siblings, 0 replies; 8+ messages in thread
From: Thomas Zimmermann @ 2022-03-13 19:29 UTC (permalink / raw)
  To: daniel, deller, m.szyprowski, geert, javierm, sam
  Cc: linux-fbdev, dri-devel, Thomas Zimmermann

Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
broke cfb_imageblit() for image widths that are not aligned to 8-bit
boundaries. Fix this by handling the trailing pixels on each line
separately. The performance improvements in the original commit do not
regress by this change.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Javier Martinez Canillas <javierm@redhat.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
---
 drivers/video/fbdev/core/cfbimgblt.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/drivers/video/fbdev/core/cfbimgblt.c b/drivers/video/fbdev/core/cfbimgblt.c
index 7361cfabdd85..9ebda4e0dc7a 100644
--- a/drivers/video/fbdev/core/cfbimgblt.c
+++ b/drivers/video/fbdev/core/cfbimgblt.c
@@ -218,7 +218,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
 {
 	u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel;
 	u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
-	u32 bit_mask, eorx;
+	u32 bit_mask, eorx, shift;
 	const char *s = image->data, *src;
 	u32 __iomem *dst;
 	const u32 *tab = NULL;
@@ -259,17 +259,23 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
 
 	for (i = image->height; i--; ) {
 		dst = (u32 __iomem *)dst1;
+		shift = 8;
 		src = s;
 
+		/*
+		 * Manually unroll the per-line copying loop for better
+		 * performance. This works until we processed the last
+		 * completely filled source byte (inclusive).
+		 */
 		switch (ppw) {
 		case 4: /* 8 bpp */
-			for (j = k; j; j -= 2, ++src) {
+			for (j = k; j >= 2; j -= 2, ++src) {
 				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
 				FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++);
 			}
 			break;
 		case 2: /* 16 bpp */
-			for (j = k; j; j -= 4, ++src) {
+			for (j = k; j >= 4; j -= 4, ++src) {
 				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
 				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
 				FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++);
@@ -277,7 +283,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
 			}
 			break;
 		case 1: /* 32 bpp */
-			for (j = k; j; j -= 8, ++src) {
+			for (j = k; j >= 8; j -= 8, ++src) {
 				FB_WRITEL(colortab[(*src >> 7) & bit_mask], dst++);
 				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
 				FB_WRITEL(colortab[(*src >> 5) & bit_mask], dst++);
@@ -290,6 +296,20 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
 			break;
 		}
 
+		/*
+		 * For image widths that are not a multiple of 8, there
+		 * are trailing pixels left on the current line. Print
+		 * them as well.
+		 */
+		for (; j--; ) {
+			shift -= ppw;
+			FB_WRITEL(colortab[(*src >> shift) & bit_mask], dst++);
+			if (!shift) {
+				shift = 8;
+				++src;
+			}
+		}
+
 		dst1 += p->fix.line_length;
 		s += spitch;
 	}
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] fbdev: Fix cfb_imageblit() for arbitrary image widths
@ 2022-03-13 19:29   ` Thomas Zimmermann
  0 siblings, 0 replies; 8+ messages in thread
From: Thomas Zimmermann @ 2022-03-13 19:29 UTC (permalink / raw)
  To: daniel, deller, m.szyprowski, geert, javierm, sam
  Cc: linux-fbdev, Thomas Zimmermann, dri-devel

Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
broke cfb_imageblit() for image widths that are not aligned to 8-bit
boundaries. Fix this by handling the trailing pixels on each line
separately. The performance improvements in the original commit do not
regress by this change.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Javier Martinez Canillas <javierm@redhat.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
---
 drivers/video/fbdev/core/cfbimgblt.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/drivers/video/fbdev/core/cfbimgblt.c b/drivers/video/fbdev/core/cfbimgblt.c
index 7361cfabdd85..9ebda4e0dc7a 100644
--- a/drivers/video/fbdev/core/cfbimgblt.c
+++ b/drivers/video/fbdev/core/cfbimgblt.c
@@ -218,7 +218,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
 {
 	u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel;
 	u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
-	u32 bit_mask, eorx;
+	u32 bit_mask, eorx, shift;
 	const char *s = image->data, *src;
 	u32 __iomem *dst;
 	const u32 *tab = NULL;
@@ -259,17 +259,23 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
 
 	for (i = image->height; i--; ) {
 		dst = (u32 __iomem *)dst1;
+		shift = 8;
 		src = s;
 
+		/*
+		 * Manually unroll the per-line copying loop for better
+		 * performance. This works until we processed the last
+		 * completely filled source byte (inclusive).
+		 */
 		switch (ppw) {
 		case 4: /* 8 bpp */
-			for (j = k; j; j -= 2, ++src) {
+			for (j = k; j >= 2; j -= 2, ++src) {
 				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
 				FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++);
 			}
 			break;
 		case 2: /* 16 bpp */
-			for (j = k; j; j -= 4, ++src) {
+			for (j = k; j >= 4; j -= 4, ++src) {
 				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
 				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
 				FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++);
@@ -277,7 +283,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
 			}
 			break;
 		case 1: /* 32 bpp */
-			for (j = k; j; j -= 8, ++src) {
+			for (j = k; j >= 8; j -= 8, ++src) {
 				FB_WRITEL(colortab[(*src >> 7) & bit_mask], dst++);
 				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
 				FB_WRITEL(colortab[(*src >> 5) & bit_mask], dst++);
@@ -290,6 +296,20 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
 			break;
 		}
 
+		/*
+		 * For image widths that are not a multiple of 8, there
+		 * are trailing pixels left on the current line. Print
+		 * them as well.
+		 */
+		for (; j--; ) {
+			shift -= ppw;
+			FB_WRITEL(colortab[(*src >> shift) & bit_mask], dst++);
+			if (!shift) {
+				shift = 8;
+				++src;
+			}
+		}
+
 		dst1 += p->fix.line_length;
 		s += spitch;
 	}
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-03-17 11:22 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-14  3:10 [PATCH 2/2] fbdev: Fix cfb_imageblit() for arbitrary image widths kernel test robot
  -- strict thread matches above, loose matches on Subject: below --
2022-03-15  6:03 kernel test robot
2022-03-13 19:29 [PATCH 0/2] fbdev: Fix image blitting " Thomas Zimmermann
2022-03-13 19:29 ` [PATCH 2/2] fbdev: Fix cfb_imageblit() " Thomas Zimmermann
2022-03-13 19:29   ` Thomas Zimmermann
2022-03-14  8:41   ` Marek Szyprowski
2022-03-17 10:54   ` Daniel Vetter
2022-03-17 10:54     ` Daniel Vetter
2022-03-17 11:22   ` Javier Martinez Canillas

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.