All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Optimised 1bit blitters
@ 2009-08-21 15:33 Vladimir 'phcoder' Serbinenko
  2009-08-23 10:48 ` Robert Millan
  0 siblings, 1 reply; 7+ messages in thread
From: Vladimir 'phcoder' Serbinenko @ 2009-08-21 15:33 UTC (permalink / raw)
  To: The development of GRUB 2

[-- Attachment #1: Type: text/plain, Size: 280 bytes --]

Hello. Glyphs are 1-bit bitmaps and when they are blitted currently
generic and slow blitter is used. This patch adds optimised blitters
and this make text rendering faster

-- 
Regards
Vladimir 'phcoder' Serbinenko

Personal git repository: http://repo.or.cz/w/grub2/phcoder.git

[-- Attachment #2: 1bit.diff --]
[-- Type: text/plain, Size: 22900 bytes --]

diff --git a/ChangeLog b/ChangeLog
index b76fe38..7d86044 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,28 @@
+2009-08-21  Vladimir Serbinenko  <phcoder@gmail.com>
+
+	1-bit optimised blitters.
+
+	* include/grub/fbblit.h (grub_video_fbblit_replace_32bit_1bit): New
+	prototype.
+	(grub_video_fbblit_replace_24bit_1bit): Likewise.
+	(grub_video_fbblit_replace_16bit_1bit): Likewise.
+	(grub_video_fbblit_replace_8bit_1bit): Likewise.
+	(grub_video_fbblit_blend_XXXA8888_1bit): Likewise.
+	(grub_video_fbblit_blend_XXX888_1bit): Likewise.
+	(grub_video_fbblit_blend_XXX565_1bit): Likewise.
+	* video/fb/fbblit.c (grub_video_fbblit_replace_32bit_1bit): New
+	function.
+	(grub_video_fbblit_replace_24bit_1bit): Likewise.
+	(grub_video_fbblit_replace_16bit_1bit): Likewise.
+	(grub_video_fbblit_replace_8bit_1bit): Likewise.
+	(grub_video_fbblit_blend_XXXA8888_1bit): Likewise.
+	(grub_video_fbblit_blend_XXX888_1bit): Likewise.
+	(grub_video_fbblit_blend_XXX565_1bit): Likewise.
+	* video/fb/video_fb.c (common_blitter): Use 1-bit optimised blitters
+	when possible.
+	* video/video.c (grub_video_get_blit_format): Return 
+	GRUB_VIDEO_BLIT_FORMAT_1BIT_PACKED if bpp = 1.
+
 2009-08-17  Michal Suchanek  <hramrach@centrum.cz>
 
 	VBE cleanup.
diff --git a/include/grub/fbblit.h b/include/grub/fbblit.h
index 664f508..af97dfb 100644
--- a/include/grub/fbblit.h
+++ b/include/grub/fbblit.h
@@ -131,4 +131,52 @@ grub_video_fbblit_blend_index_RGBA8888 (struct grub_video_fbblit_info *dst,
 					int width, int height,
 					int offset_x, int offset_y);
 
+void
+grub_video_fbblit_replace_32bit_1bit (struct grub_video_fbblit_info *dst,
+				      struct grub_video_fbblit_info *src,
+				      int x, int y,
+				      int width, int height,
+				      int offset_x, int offset_y);
+
+void
+grub_video_fbblit_replace_24bit_1bit (struct grub_video_fbblit_info *dst,
+				      struct grub_video_fbblit_info *src,
+				      int x, int y,
+				      int width, int height,
+				      int offset_x, int offset_y);
+
+void
+grub_video_fbblit_replace_16bit_1bit (struct grub_video_fbblit_info *dst,
+				      struct grub_video_fbblit_info *src,
+				      int x, int y,
+				      int width, int height,
+				      int offset_x, int offset_y);
+
+void
+grub_video_fbblit_replace_8bit_1bit (struct grub_video_fbblit_info *dst,
+				     struct grub_video_fbblit_info *src,
+				     int x, int y,
+				     int width, int height,
+				     int offset_x, int offset_y);
+
+void
+grub_video_fbblit_blend_XXXA8888_1bit (struct grub_video_fbblit_info *dst,
+				       struct grub_video_fbblit_info *src,
+				       int x, int y,
+				       int width, int height,
+				       int offset_x, int offset_y);
+
+void
+grub_video_fbblit_blend_XXX888_1bit (struct grub_video_fbblit_info *dst,
+				       struct grub_video_fbblit_info *src,
+				       int x, int y,
+				       int width, int height,
+				       int offset_x, int offset_y);
+
+void
+grub_video_fbblit_blend_XXX565_1bit (struct grub_video_fbblit_info *dst,
+				     struct grub_video_fbblit_info *src,
+				     int x, int y,
+				     int width, int height,
+				     int offset_x, int offset_y);
 #endif /* ! GRUB_FBBLIT_HEADER */
diff --git a/video/fb/fbblit.c b/video/fb/fbblit.c
index 5b613bc..fe32bae 100644
--- a/video/fb/fbblit.c
+++ b/video/fb/fbblit.c
@@ -90,6 +90,302 @@ grub_video_fbblit_replace_directN (struct grub_video_fbblit_info *dst,
     }
 }
 
+/* Optimized replacing blitter for 1-bit to 32bit.  */
+void
+grub_video_fbblit_replace_32bit_1bit (struct grub_video_fbblit_info *dst,
+				      struct grub_video_fbblit_info *src,
+				      int x, int y,
+				      int width, int height,
+				      int offset_x, int offset_y)
+{
+  int i;
+  int j;
+  grub_uint8_t *srcptr;
+  grub_uint8_t *dstptr;
+  grub_uint8_t srcmask;
+  unsigned int dstrowskip;
+  unsigned int srcrowskipbyte, srcrowskipbit;
+  grub_uint32_t fgcolor, bgcolor;
+  int bit_index;
+
+  /* Calculate the number of bytes to advance from the end of one line
+     to the beginning of the next line.  */
+  dstrowskip = dst->mode_info->pitch - dst->mode_info->bytes_per_pixel * width;
+  srcrowskipbyte = (src->mode_info->width - width) >> 3;
+  srcrowskipbit = (src->mode_info->width - width) & 7;
+
+  bit_index = offset_y * src->mode_info->width + offset_x;
+  srcptr = (grub_uint8_t *) src->data + (bit_index >> 3);
+  srcmask = 1 << (~bit_index & 7);
+  dstptr = (grub_uint8_t *) get_data_ptr (dst, x, y);
+
+  fgcolor = grub_video_fb_map_rgba (src->mode_info->fg_red,
+				    src->mode_info->fg_green,
+				    src->mode_info->fg_blue,
+				    src->mode_info->fg_alpha);
+
+  bgcolor = grub_video_fb_map_rgba (src->mode_info->bg_red,
+				    src->mode_info->bg_green,
+				    src->mode_info->bg_blue,
+				    src->mode_info->bg_alpha);
+
+  for (j = 0; j < height; j++)
+    {
+      for (i = 0; i < width; i++)
+        {
+	  if (*srcptr & srcmask)
+	    *(grub_uint32_t *) dstptr = fgcolor;
+	  else
+	    *(grub_uint32_t *) dstptr = bgcolor;
+	  srcmask >>= 1;
+	  if (!srcmask)
+	    {
+	      srcptr++;
+	      srcmask = 0x80;
+	    }
+
+	  dstptr += 4;
+        }
+
+      srcptr += srcrowskipbyte;
+      if (srcmask >> srcrowskipbit)
+	srcmask >>= srcrowskipbit;
+      else
+	{
+	  srcptr++;
+	  srcmask <<= 8 - srcrowskipbit;
+	}
+      dstptr += dstrowskip;
+    }
+}
+
+
+/* Optimized replacing blitter for 1-bit to 24-bit.  */
+void
+grub_video_fbblit_replace_24bit_1bit (struct grub_video_fbblit_info *dst,
+				      struct grub_video_fbblit_info *src,
+				      int x, int y,
+				      int width, int height,
+				      int offset_x, int offset_y)
+{
+  int i;
+  int j;
+  grub_uint8_t *srcptr;
+  grub_uint8_t *dstptr;
+  grub_uint8_t srcmask;
+  unsigned int dstrowskip;
+  unsigned int srcrowskipbyte, srcrowskipbit;
+  grub_uint32_t fgcolor, bgcolor;
+  int bit_index;
+
+  /* Calculate the number of bytes to advance from the end of one line
+     to the beginning of the next line.  */
+  dstrowskip = dst->mode_info->pitch - dst->mode_info->bytes_per_pixel * width;
+  srcrowskipbyte = (src->mode_info->width - width) >> 3;
+  srcrowskipbit = (src->mode_info->width - width) & 7;
+
+  bit_index = offset_y * src->mode_info->width + offset_x;
+  srcptr = (grub_uint8_t *) src->data + (bit_index >> 3);
+  srcmask = 1 << (~bit_index & 7);
+  dstptr = (grub_uint8_t *) get_data_ptr (dst, x, y);
+
+  fgcolor = grub_video_fb_map_rgba (src->mode_info->fg_red,
+				    src->mode_info->fg_green,
+				    src->mode_info->fg_blue,
+				    src->mode_info->fg_alpha);
+
+  bgcolor = grub_video_fb_map_rgba (src->mode_info->bg_red,
+				    src->mode_info->bg_green,
+				    src->mode_info->bg_blue,
+				    src->mode_info->bg_alpha);
+
+  for (j = 0; j < height; j++)
+    {
+      for (i = 0; i < width - 1; i++)
+        {
+	  if (*srcptr & srcmask)
+	    *(grub_uint32_t *) dstptr = fgcolor;
+	  else
+	    *(grub_uint32_t *) dstptr = bgcolor;
+	  srcmask >>= 1;
+	  if (!srcmask)
+	    {
+	      srcptr++;
+	      srcmask = 0x80;
+	    }
+
+	  dstptr += 3;
+        }
+
+      if (*srcptr & srcmask)
+	{
+	  *dstptr++ = fgcolor & 0xff;
+	  *dstptr++ = (fgcolor & 0xff00) >> 8;
+	  *dstptr++ = (fgcolor & 0xff0000) >> 16;
+	}
+      else
+	{
+	  *dstptr++ = bgcolor & 0xff;
+	  *dstptr++ = (bgcolor & 0xff00) >> 8;
+	  *dstptr++ = (bgcolor & 0xff0000) >> 16;
+	}
+      srcmask >>= 1;
+      if (!srcmask)
+	{
+	  srcptr++;
+	  srcmask = 0x80;
+	}
+
+      srcptr += srcrowskipbyte;
+      if (srcmask >> srcrowskipbit)
+	srcmask >>= srcrowskipbit;
+      else
+	{
+	  srcptr++;
+	  srcmask <<= 8 - srcrowskipbit;
+	}
+      dstptr += dstrowskip;
+    }
+}
+
+/* Optimized replacing blitter for 1-bit to 16-bit.  */
+void
+grub_video_fbblit_replace_16bit_1bit (struct grub_video_fbblit_info *dst,
+				      struct grub_video_fbblit_info *src,
+				      int x, int y,
+				      int width, int height,
+				      int offset_x, int offset_y)
+{
+  int i;
+  int j;
+  grub_uint8_t *srcptr;
+  grub_uint8_t *dstptr;
+  grub_uint8_t srcmask;
+  unsigned int dstrowskip;
+  unsigned int srcrowskipbyte, srcrowskipbit;
+  grub_uint16_t fgcolor, bgcolor;
+  int bit_index;
+
+  /* Calculate the number of bytes to advance from the end of one line
+     to the beginning of the next line.  */
+  dstrowskip = dst->mode_info->pitch - dst->mode_info->bytes_per_pixel * width;
+  srcrowskipbyte = (src->mode_info->width - width) >> 3;
+  srcrowskipbit = (src->mode_info->width - width) & 7;
+
+  bit_index = offset_y * src->mode_info->width + offset_x;
+  srcptr = (grub_uint8_t *) src->data + (bit_index >> 3);
+  srcmask = 1 << (~bit_index & 7);
+  dstptr = (grub_uint8_t *) get_data_ptr (dst, x, y);
+
+  fgcolor = grub_video_fb_map_rgba (src->mode_info->fg_red,
+				    src->mode_info->fg_green,
+				    src->mode_info->fg_blue,
+				    src->mode_info->fg_alpha);
+
+  bgcolor = grub_video_fb_map_rgba (src->mode_info->bg_red,
+				    src->mode_info->bg_green,
+				    src->mode_info->bg_blue,
+				    src->mode_info->bg_alpha);
+
+  for (j = 0; j < height; j++)
+    {
+      for (i = 0; i < width; i++)
+        {
+	  if (*srcptr & srcmask)
+	    *(grub_uint16_t *) dstptr = fgcolor;
+	  else
+	    *(grub_uint16_t *) dstptr = bgcolor;
+	  srcmask >>= 1;
+	  if (!srcmask)
+	    {
+	      srcptr++;
+	      srcmask = 0x80;
+	    }
+
+	  dstptr += 2;
+        }
+
+      srcptr += srcrowskipbyte;
+      if (srcmask >> srcrowskipbit)
+	srcmask >>= srcrowskipbit;
+      else
+	{
+	  srcptr++;
+	  srcmask <<= 8 - srcrowskipbit;
+	}
+      dstptr += dstrowskip;
+    }
+}
+
+/* Optimized replacing blitter for 1-bit to 8-bit.  */
+void
+grub_video_fbblit_replace_8bit_1bit (struct grub_video_fbblit_info *dst,
+				      struct grub_video_fbblit_info *src,
+				      int x, int y,
+				      int width, int height,
+				      int offset_x, int offset_y)
+{
+  int i;
+  int j;
+  grub_uint8_t *srcptr;
+  grub_uint8_t *dstptr;
+  grub_uint8_t srcmask;
+  unsigned int dstrowskip;
+  unsigned int srcrowskipbyte, srcrowskipbit;
+  grub_uint8_t fgcolor, bgcolor;
+  int bit_index;
+
+  /* Calculate the number of bytes to advance from the end of one line
+     to the beginning of the next line.  */
+  dstrowskip = dst->mode_info->pitch - dst->mode_info->bytes_per_pixel * width;
+  srcrowskipbyte = (src->mode_info->width - width) >> 3;
+  srcrowskipbit = (src->mode_info->width - width) & 7;
+
+  bit_index = offset_y * src->mode_info->width + offset_x;
+  srcptr = (grub_uint8_t *) src->data + (bit_index >> 3);
+  srcmask = 1 << (~bit_index & 7);
+  dstptr = (grub_uint8_t *) get_data_ptr (dst, x, y);
+
+  fgcolor = grub_video_fb_map_rgba (src->mode_info->fg_red,
+				    src->mode_info->fg_green,
+				    src->mode_info->fg_blue,
+				    src->mode_info->fg_alpha);
+
+  bgcolor = grub_video_fb_map_rgba (src->mode_info->bg_red,
+				    src->mode_info->bg_green,
+				    src->mode_info->bg_blue,
+				    src->mode_info->bg_alpha);
+
+  for (j = 0; j < height; j++)
+    {
+      for (i = 0; i < width; i++)
+        {
+	  if (*srcptr & srcmask)
+	    *(grub_uint8_t *) dstptr = fgcolor;
+	  else
+	    *(grub_uint8_t *) dstptr = bgcolor;
+	  srcmask >>= 1;
+	  if (!srcmask)
+	    {
+	      srcptr++;
+	      srcmask = 0x80;
+	    }
+
+	  dstptr++;
+        }
+
+      srcptr += srcrowskipbyte;
+      if (srcmask >> srcrowskipbit)
+	srcmask >>= srcrowskipbit;
+      else
+	{
+	  srcptr++;
+	  srcmask <<= 8 - srcrowskipbit;
+	}
+      dstptr += dstrowskip;
+    }
+}
+
 /* Optimized replacing blitter for RGBX8888 to BGRX8888.  */
 void
 grub_video_fbblit_replace_BGRX8888_RGBX8888 (struct grub_video_fbblit_info *dst,
@@ -826,3 +1122,294 @@ grub_video_fbblit_blend_index_RGBA8888 (struct grub_video_fbblit_info *dst,
         }
     }
 }
+
+/* Optimized replacing blitter for 1-bit to XXXA8888.  */
+void
+grub_video_fbblit_blend_XXXA8888_1bit (struct grub_video_fbblit_info *dst,
+				       struct grub_video_fbblit_info *src,
+				       int x, int y,
+				       int width, int height,
+				       int offset_x, int offset_y)
+{
+  int i;
+  int j;
+  grub_uint8_t *srcptr;
+  grub_uint8_t *dstptr;
+  grub_uint8_t srcmask;
+  unsigned int dstrowskip;
+  unsigned int srcrowskipbyte, srcrowskipbit;
+  grub_uint32_t fgcolor, bgcolor;
+  int bit_index;
+
+  /* Calculate the number of bytes to advance from the end of one line
+     to the beginning of the next line.  */
+  dstrowskip = dst->mode_info->pitch - dst->mode_info->bytes_per_pixel * width;
+  srcrowskipbyte = (src->mode_info->width - width) >> 3;
+  srcrowskipbit = (src->mode_info->width - width) & 7;
+
+  bit_index = offset_y * src->mode_info->width + offset_x;
+  srcptr = (grub_uint8_t *) src->data + (bit_index >> 3);
+  srcmask = 1 << (~bit_index & 7);
+  dstptr = (grub_uint8_t *) get_data_ptr (dst, x, y);
+
+  fgcolor = grub_video_fb_map_rgba (src->mode_info->fg_red,
+				    src->mode_info->fg_green,
+				    src->mode_info->fg_blue,
+				    src->mode_info->fg_alpha);
+
+  bgcolor = grub_video_fb_map_rgba (src->mode_info->bg_red,
+				    src->mode_info->bg_green,
+				    src->mode_info->bg_blue,
+				    src->mode_info->bg_alpha);
+
+  for (j = 0; j < height; j++)
+    {
+      for (i = 0; i < width; i++)
+        {
+	  grub_uint32_t color;
+	  grub_uint8_t a;
+
+	  if (*srcptr & srcmask)
+	    color = fgcolor;
+	  else
+	    color = bgcolor;
+	  a = (color >> 24) & 0xff;
+
+	  if (a == 255)
+	    *(grub_uint32_t *) dstptr = color;
+	  else if (a != 0)
+	    {
+	      grub_uint8_t s1 = (color >> 0) & 0xFF;
+	      grub_uint8_t s2 = (color >> 8) & 0xFF;
+	      grub_uint8_t s3 = (color >> 16) & 0xFF;
+
+	      grub_uint8_t d1 = (*(grub_uint32_t *) dstptr >> 0) & 0xFF;
+	      grub_uint8_t d2 = (*(grub_uint32_t *) dstptr >> 8) & 0xFF;
+	      grub_uint8_t d3 = (*(grub_uint32_t *) dstptr >> 16) & 0xFF;
+
+	      d1 = (d1 * (255 - a) + s1 * a) / 255;
+	      d2 = (d2 * (255 - a) + s2 * a) / 255;
+	      d3 = (d3 * (255 - a) + s3 * a) / 255;
+
+	      *(grub_uint32_t *) dstptr = (a << 24) | (d3 << 16) | (d2 << 8)
+		| d1;
+	    }
+
+	  srcmask >>= 1;
+	  if (!srcmask)
+	    {
+	      srcptr++;
+	      srcmask = 0x80;
+	    }
+
+	  dstptr += 4;
+        }
+
+      srcptr += srcrowskipbyte;
+      if (srcmask >> srcrowskipbit)
+	srcmask >>= srcrowskipbit;
+      else
+	{
+	  srcptr++;
+	  srcmask <<= 8 - srcrowskipbit;
+	}
+      dstptr += dstrowskip;
+    }
+}
+
+/* Optimized replacing blitter for 1-bit to XXX888.  */
+void
+grub_video_fbblit_blend_XXX888_1bit (struct grub_video_fbblit_info *dst,
+				     struct grub_video_fbblit_info *src,
+				     int x, int y,
+				     int width, int height,
+				     int offset_x, int offset_y)
+{
+  int i;
+  int j;
+  grub_uint8_t *srcptr;
+  grub_uint8_t *dstptr;
+  grub_uint8_t srcmask;
+  unsigned int dstrowskip;
+  unsigned int srcrowskipbyte, srcrowskipbit;
+  grub_uint32_t fgcolor, bgcolor;
+  int bit_index;
+
+  /* Calculate the number of bytes to advance from the end of one line
+     to the beginning of the next line.  */
+  dstrowskip = dst->mode_info->pitch - dst->mode_info->bytes_per_pixel * width;
+  srcrowskipbyte = (src->mode_info->width - width) >> 3;
+  srcrowskipbit = (src->mode_info->width - width) & 7;
+
+  bit_index = offset_y * src->mode_info->width + offset_x;
+  srcptr = (grub_uint8_t *) src->data + (bit_index >> 3);
+  srcmask = 1 << (~bit_index & 7);
+  dstptr = (grub_uint8_t *) get_data_ptr (dst, x, y);
+
+  fgcolor = grub_video_fb_map_rgba (src->mode_info->fg_red,
+				    src->mode_info->fg_green,
+				    src->mode_info->fg_blue,
+				    src->mode_info->fg_alpha);
+
+  bgcolor = grub_video_fb_map_rgba (src->mode_info->bg_red,
+				    src->mode_info->bg_green,
+				    src->mode_info->bg_blue,
+				    src->mode_info->bg_alpha);
+
+  for (j = 0; j < height; j++)
+    {
+      for (i = 0; i < width; i++)
+        {
+	  grub_uint32_t color;
+	  grub_uint8_t a;
+	  if (*srcptr & srcmask)
+	    {
+	      color = fgcolor;
+	      a = src->mode_info->fg_alpha;
+	    }
+	  else
+	    {
+	      color = bgcolor;
+	      a = src->mode_info->bg_alpha;
+	    }
+
+	  if (a == 255)
+	    {
+	      ((grub_uint8_t *) dstptr)[0] = color & 0xff;
+	      ((grub_uint8_t *) dstptr)[1] = (color & 0xff00) >> 8;
+	      ((grub_uint8_t *) dstptr)[2] = (color & 0xff0000) >> 16;
+	    }
+	  else if (a != 0)
+	    {
+	      grub_uint8_t s1 = (color >> 0) & 0xFF;
+	      grub_uint8_t s2 = (color >> 8) & 0xFF;
+	      grub_uint8_t s3 = (color >> 16) & 0xFF;
+
+	      grub_uint8_t d1 = (*(grub_uint32_t *) dstptr >> 0) & 0xFF;
+	      grub_uint8_t d2 = (*(grub_uint32_t *) dstptr >> 8) & 0xFF;
+	      grub_uint8_t d3 = (*(grub_uint32_t *) dstptr >> 16) & 0xFF;
+
+	      ((grub_uint8_t *) dstptr)[0] = (d1 * (255 - a) + s1 * a) / 255;
+	      ((grub_uint8_t *) dstptr)[1] = (d2 * (255 - a) + s2 * a) / 255;
+	      ((grub_uint8_t *) dstptr)[2] = (d3 * (255 - a) + s3 * a) / 255;
+	    }
+
+	  srcmask >>= 1;
+	  if (!srcmask)
+	    {
+	      srcptr++;
+	      srcmask = 0x80;
+	    }
+
+	  dstptr += 3;
+        }
+
+      srcptr += srcrowskipbyte;
+      if (srcmask >> srcrowskipbit)
+	srcmask >>= srcrowskipbit;
+      else
+	{
+	  srcptr++;
+	  srcmask <<= 8 - srcrowskipbit;
+	}
+      dstptr += dstrowskip;
+    }
+}
+
+/* Optimized replacing blitter for 1-bit to XXX888.  */
+void
+grub_video_fbblit_blend_XXX565_1bit (struct grub_video_fbblit_info *dst,
+				     struct grub_video_fbblit_info *src,
+				     int x, int y,
+				     int width, int height,
+				     int offset_x, int offset_y)
+{
+  int i;
+  int j;
+  grub_uint8_t *srcptr;
+  grub_uint8_t *dstptr;
+  grub_uint8_t srcmask;
+  unsigned int dstrowskip;
+  unsigned int srcrowskipbyte, srcrowskipbit;
+  grub_uint16_t fgcolor, bgcolor;
+  int bit_index;
+
+  /* Calculate the number of bytes to advance from the end of one line
+     to the beginning of the next line.  */
+  dstrowskip = dst->mode_info->pitch - dst->mode_info->bytes_per_pixel * width;
+  srcrowskipbyte = (src->mode_info->width - width) >> 3;
+  srcrowskipbit = (src->mode_info->width - width) & 7;
+
+  bit_index = offset_y * src->mode_info->width + offset_x;
+  srcptr = (grub_uint8_t *) src->data + (bit_index >> 3);
+  srcmask = 1 << (~bit_index & 7);
+  dstptr = (grub_uint8_t *) get_data_ptr (dst, x, y);
+
+  fgcolor = grub_video_fb_map_rgba (src->mode_info->fg_red,
+				    src->mode_info->fg_green,
+				    src->mode_info->fg_blue,
+				    src->mode_info->fg_alpha);
+
+  bgcolor = grub_video_fb_map_rgba (src->mode_info->bg_red,
+				    src->mode_info->bg_green,
+				    src->mode_info->bg_blue,
+				    src->mode_info->bg_alpha);
+
+  for (j = 0; j < height; j++)
+    {
+      for (i = 0; i < width; i++)
+        {
+	  grub_uint32_t color;
+	  grub_uint8_t a;
+	  if (*srcptr & srcmask)
+	    {
+	      color = fgcolor;
+	      a = src->mode_info->fg_alpha;
+	    }
+	  else
+	    {
+	      color = bgcolor;
+	      a = src->mode_info->bg_alpha;
+	    }
+
+	  if (a == 255)
+	    *(grub_uint16_t *) dstptr = color;
+	  else if (a != 0)
+	    {
+	      grub_uint8_t s1 = (color >> 0) & 0x1F;
+	      grub_uint8_t s2 = (color >> 5) & 0x3F;
+	      grub_uint8_t s3 = (color >> 11) & 0x1F;
+
+	      grub_uint8_t d1 = (*(grub_uint16_t *) dstptr >> 0) & 0x1F;
+	      grub_uint8_t d2 = (*(grub_uint16_t *) dstptr >> 5) & 0x3F;
+	      grub_uint8_t d3 = (*(grub_uint16_t *) dstptr >> 11) & 0x1F;
+
+	      d1 = (d1 * (255 - a) + s1 * a) / 255;
+	      d2 = (d2 * (255 - a) + s2 * a) / 255;
+	      d3 = (d3 * (255 - a) + s3 * a) / 255;
+
+	      *(grub_uint16_t *) dstptr = (d1 & 0x1f) | ((d2 & 0x3f) << 5)
+		| ((d3 & 0x1f) << 11);
+	    }
+
+	  srcmask >>= 1;
+	  if (!srcmask)
+	    {
+	      srcptr++;
+	      srcmask = 0x80;
+	    }
+
+	  dstptr += 2;
+        }
+
+      srcptr += srcrowskipbyte;
+      if (srcmask >> srcrowskipbit)
+	srcmask >>= srcrowskipbit;
+      else
+	{
+	  srcptr++;
+	  srcmask <<= 8 - srcrowskipbit;
+	}
+      dstptr += dstrowskip;
+    }
+}
diff --git a/video/fb/video_fb.c b/video/fb/video_fb.c
index a35dd7a..5f2917d 100644
--- a/video/fb/video_fb.c
+++ b/video/fb/video_fb.c
@@ -587,6 +587,37 @@ common_blitter (struct grub_video_fbblit_info *target,
 	      return;
 	    }
 	}
+      else if (source->mode_info->blit_format == GRUB_VIDEO_BLIT_FORMAT_1BIT_PACKED)
+	{
+	  if (target->mode_info->bpp == 32)
+	    {
+	      grub_video_fbblit_replace_32bit_1bit (target, source,
+						    x, y, width, height,
+						    offset_x, offset_y);
+	      return;
+	    }
+	  else if (target->mode_info->bpp == 24)
+	    {
+	      grub_video_fbblit_replace_24bit_1bit (target, source,
+						    x, y, width, height,
+						    offset_x, offset_y);
+	      return;
+	    }
+	  else if (target->mode_info->bpp == 16)
+	    {
+	      grub_video_fbblit_replace_16bit_1bit (target, source,
+						    x, y, width, height,
+						    offset_x, offset_y);
+	      return;
+	    }
+	  else if (target->mode_info->bpp == 8)
+	    {
+	      grub_video_fbblit_replace_8bit_1bit (target, source,
+						   x, y, width, height,
+						   offset_x, offset_y);
+	      return;
+	    }
+	}
 
       /* No optimized replace operator found, use default (slow) blitter.  */
       grub_video_fbblit_replace (target, source, x, y, width, height,
@@ -674,6 +705,41 @@ common_blitter (struct grub_video_fbblit_info *target,
 	      return;
 	    }
 	}
+      else if (source->mode_info->blit_format == GRUB_VIDEO_BLIT_FORMAT_1BIT_PACKED)
+	{
+	  if (target->mode_info->blit_format
+	      == GRUB_VIDEO_BLIT_FORMAT_BGRA_8888
+	      || target->mode_info->blit_format
+	      == GRUB_VIDEO_BLIT_FORMAT_RGBA_8888)
+	    {
+	      grub_video_fbblit_blend_XXXA8888_1bit (target, source,
+						     x, y, width, height,
+						     offset_x, offset_y);
+	      return;
+	    }
+	  else if (target->mode_info->blit_format
+		   == GRUB_VIDEO_BLIT_FORMAT_BGR_888
+		   || target->mode_info->blit_format
+		   == GRUB_VIDEO_BLIT_FORMAT_RGB_888)
+	    {
+	      grub_video_fbblit_blend_XXX888_1bit (target, source,
+						   x, y, width, height,
+						   offset_x, offset_y);
+	      return;
+	    }
+	  else if (target->mode_info->blit_format
+		   == GRUB_VIDEO_BLIT_FORMAT_BGR_565
+		   || target->mode_info->blit_format
+		   == GRUB_VIDEO_BLIT_FORMAT_RGB_565)
+	    {
+	      grub_video_fbblit_blend_XXX565_1bit (target, source,
+						   x, y, width, height,
+						   offset_x, offset_y);
+	      return;
+	    }
+
+	}
+
 
       /* No optimized blend operation found, use default (slow) blitter.  */
       grub_video_fbblit_blend (target, source, x, y, width, height,
diff --git a/video/video.c b/video/video.c
index 36ebfd1..c1d66bd 100644
--- a/video/video.c
+++ b/video/video.c
@@ -181,6 +181,8 @@ grub_video_get_blit_format (struct grub_video_mode_info *mode_info)
 	  return GRUB_VIDEO_BLIT_FORMAT_RGB_565;
 	}
     }
+  else if (mode_info->bpp == 1)
+    return GRUB_VIDEO_BLIT_FORMAT_1BIT_PACKED;
 
   /* Backup route.  Unknown format.  */
 

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] Optimised 1bit blitters
  2009-08-21 15:33 [PATCH] Optimised 1bit blitters Vladimir 'phcoder' Serbinenko
@ 2009-08-23 10:48 ` Robert Millan
  2009-08-23 11:05   ` Vladimir 'phcoder' Serbinenko
  2009-08-25 14:06   ` Michal Suchanek
  0 siblings, 2 replies; 7+ messages in thread
From: Robert Millan @ 2009-08-23 10:48 UTC (permalink / raw)
  To: The development of GRUB 2

On Fri, Aug 21, 2009 at 05:33:30PM +0200, Vladimir 'phcoder' Serbinenko wrote:
> +  for (j = 0; j < height; j++)
> +    {
> +      for (i = 0; i < width; i++)
> +        {

It's a bit odd, but GCC doesn't seem to optimize those in a single loop.  Could
you use "i = 0; i < height * width; i++" instead?  (for this and the other
similar instances)

I can't comment much on the rest of this patch, as my understanding of
graphics is limited.  But please wait a few days before commit, hopefully
someone else will review.

-- 
Robert Millan

  The DRM opt-in fallacy: "Your data belongs to us. We will decide when (and
  how) you may access your data; but nobody's threatening your freedom: we
  still allow you to remove your data and not access it at all."



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Optimised 1bit blitters
  2009-08-23 10:48 ` Robert Millan
@ 2009-08-23 11:05   ` Vladimir 'phcoder' Serbinenko
  2009-08-23 23:07     ` Robert Millan
  2009-08-25 14:06   ` Michal Suchanek
  1 sibling, 1 reply; 7+ messages in thread
From: Vladimir 'phcoder' Serbinenko @ 2009-08-23 11:05 UTC (permalink / raw)
  To: The development of GRUB 2

On Sun, Aug 23, 2009 at 12:48 PM, Robert Millan<rmh@aybabtu.com> wrote:
> On Fri, Aug 21, 2009 at 05:33:30PM +0200, Vladimir 'phcoder' Serbinenko wrote:
>> +  for (j = 0; j < height; j++)
>> +    {
>> +      for (i = 0; i < width; i++)
>> +        {
>
> It's a bit odd, but GCC doesn't seem to optimize those in a single loop.  Could
> you use "i = 0; i < height * width; i++" instead?  (for this and the other
> similar instances)
There are some operations to do after completing inner loop.


-- 
Regards
Vladimir 'phcoder' Serbinenko

Personal git repository: http://repo.or.cz/w/grub2/phcoder.git



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Optimised 1bit blitters
  2009-08-23 11:05   ` Vladimir 'phcoder' Serbinenko
@ 2009-08-23 23:07     ` Robert Millan
  0 siblings, 0 replies; 7+ messages in thread
From: Robert Millan @ 2009-08-23 23:07 UTC (permalink / raw)
  To: The development of GRUB 2

On Sun, Aug 23, 2009 at 01:05:45PM +0200, Vladimir 'phcoder' Serbinenko wrote:
> On Sun, Aug 23, 2009 at 12:48 PM, Robert Millan<rmh@aybabtu.com> wrote:
> > On Fri, Aug 21, 2009 at 05:33:30PM +0200, Vladimir 'phcoder' Serbinenko wrote:
> >> +  for (j = 0; j < height; j++)
> >> +    {
> >> +      for (i = 0; i < width; i++)
> >> +        {
> >
> > It's a bit odd, but GCC doesn't seem to optimize those in a single loop.  Could
> > you use "i = 0; i < height * width; i++" instead?  (for this and the other
> > similar instances)
> There are some operations to do after completing inner loop.

Oh, right.  Sorry I missread it.  In that case, I have no objection but please
let it rest a few days to see if someone else will review it.

-- 
Robert Millan

  The DRM opt-in fallacy: "Your data belongs to us. We will decide when (and
  how) you may access your data; but nobody's threatening your freedom: we
  still allow you to remove your data and not access it at all."



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Optimised 1bit blitters
  2009-08-23 10:48 ` Robert Millan
  2009-08-23 11:05   ` Vladimir 'phcoder' Serbinenko
@ 2009-08-25 14:06   ` Michal Suchanek
  2009-08-25 14:41     ` Vladimir 'phcoder' Serbinenko
  1 sibling, 1 reply; 7+ messages in thread
From: Michal Suchanek @ 2009-08-25 14:06 UTC (permalink / raw)
  To: The development of GRUB 2

2009/8/23 Robert Millan <rmh@aybabtu.com>:
> On Fri, Aug 21, 2009 at 05:33:30PM +0200, Vladimir 'phcoder' Serbinenko wrote:
>> +  for (j = 0; j < height; j++)
>> +    {
>> +      for (i = 0; i < width; i++)
>> +        {
>
> It's a bit odd, but GCC doesn't seem to optimize those in a single loop.  Could
> you use "i = 0; i < height * width; i++" instead?  (for this and the other
> similar instances)
>
> I can't comment much on the rest of this patch, as my understanding of
> graphics is limited.  But please wait a few days before commit, hopefully
> someone else will review.

Well, this is not rocket science. You cache a function call which
would be done on every iteration otherwise. This is not feasible with
other bitmap types (except perhaps 8bit index->8bit index) because
they use many more colours.

How well tested is this? There are quite a few blitters and some may
not be ever used in current code.

The comment above the blend functions should probably not say they are
replace blitters.

Thanks

Michal



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Optimised 1bit blitters
  2009-08-25 14:06   ` Michal Suchanek
@ 2009-08-25 14:41     ` Vladimir 'phcoder' Serbinenko
  2009-08-28 13:54       ` Vladimir 'phcoder' Serbinenko
  0 siblings, 1 reply; 7+ messages in thread
From: Vladimir 'phcoder' Serbinenko @ 2009-08-25 14:41 UTC (permalink / raw)
  To: The development of GRUB 2

[-- Attachment #1: Type: text/plain, Size: 1646 bytes --]

On Tue, Aug 25, 2009 at 4:06 PM, Michal Suchanek<hramrach@centrum.cz> wrote:
> 2009/8/23 Robert Millan <rmh@aybabtu.com>:
>> On Fri, Aug 21, 2009 at 05:33:30PM +0200, Vladimir 'phcoder' Serbinenko wrote:
>>> +  for (j = 0; j < height; j++)
>>> +    {
>>> +      for (i = 0; i < width; i++)
>>> +        {
>>
>> It's a bit odd, but GCC doesn't seem to optimize those in a single loop.  Could
>> you use "i = 0; i < height * width; i++" instead?  (for this and the other
>> similar instances)
>>
>> I can't comment much on the rest of this patch, as my understanding of
>> graphics is limited.  But please wait a few days before commit, hopefully
>> someone else will review.
>
> Well, this is not rocket science. You cache a function call which
> would be done on every iteration otherwise. This is not feasible with
> other bitmap types (except perhaps 8bit index->8bit index) because
> they use many more colours.
It's possible with RGB(A) because color transformation is formula-based.
>
> How well tested is this? There are quite a few blitters and some may
> not be ever used in current code.
I modified videotest to test every blitting function. This part isn't
included in the patch because it's dirty.
>
> The comment above the blend functions should probably not say they are
> replace blitters.
>
Thanks.
> Thanks
>
> Michal
>
>
> _______________________________________________
> Grub-devel mailing list
> Grub-devel@gnu.org
> http://lists.gnu.org/mailman/listinfo/grub-devel
>



-- 
Regards
Vladimir 'phcoder' Serbinenko

Personal git repository: http://repo.or.cz/w/grub2/phcoder.git

[-- Attachment #2: 1bit.diff --]
[-- Type: text/plain, Size: 23018 bytes --]

diff --git a/ChangeLog b/ChangeLog
index d30cfd1..df4cff3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,28 @@
+2009-08-21  Vladimir Serbinenko  <phcoder@gmail.com>
+
+	1-bit optimised blitters.
+
+	* include/grub/fbblit.h (grub_video_fbblit_replace_32bit_1bit): New
+	prototype.
+	(grub_video_fbblit_replace_24bit_1bit): Likewise.
+	(grub_video_fbblit_replace_16bit_1bit): Likewise.
+	(grub_video_fbblit_replace_8bit_1bit): Likewise.
+	(grub_video_fbblit_blend_XXXA8888_1bit): Likewise.
+	(grub_video_fbblit_blend_XXX888_1bit): Likewise.
+	(grub_video_fbblit_blend_XXX565_1bit): Likewise.
+	* video/fb/fbblit.c (grub_video_fbblit_replace_32bit_1bit): New
+	function.
+	(grub_video_fbblit_replace_24bit_1bit): Likewise.
+	(grub_video_fbblit_replace_16bit_1bit): Likewise.
+	(grub_video_fbblit_replace_8bit_1bit): Likewise.
+	(grub_video_fbblit_blend_XXXA8888_1bit): Likewise.
+	(grub_video_fbblit_blend_XXX888_1bit): Likewise.
+	(grub_video_fbblit_blend_XXX565_1bit): Likewise.
+	* video/fb/video_fb.c (common_blitter): Use 1-bit optimised blitters
+	when possible.
+	* video/video.c (grub_video_get_blit_format): Return
+	GRUB_VIDEO_BLIT_FORMAT_1BIT_PACKED if bpp = 1.
+
 2009-08-25  Vladimir Serbinenko  <phcoder@gmail.com>
 
 	Fix breakage in grub-setup.
diff --git a/include/grub/fbblit.h b/include/grub/fbblit.h
index 664f508..af97dfb 100644
--- a/include/grub/fbblit.h
+++ b/include/grub/fbblit.h
@@ -131,4 +131,52 @@ grub_video_fbblit_blend_index_RGBA8888 (struct grub_video_fbblit_info *dst,
 					int width, int height,
 					int offset_x, int offset_y);
 
+void
+grub_video_fbblit_replace_32bit_1bit (struct grub_video_fbblit_info *dst,
+				      struct grub_video_fbblit_info *src,
+				      int x, int y,
+				      int width, int height,
+				      int offset_x, int offset_y);
+
+void
+grub_video_fbblit_replace_24bit_1bit (struct grub_video_fbblit_info *dst,
+				      struct grub_video_fbblit_info *src,
+				      int x, int y,
+				      int width, int height,
+				      int offset_x, int offset_y);
+
+void
+grub_video_fbblit_replace_16bit_1bit (struct grub_video_fbblit_info *dst,
+				      struct grub_video_fbblit_info *src,
+				      int x, int y,
+				      int width, int height,
+				      int offset_x, int offset_y);
+
+void
+grub_video_fbblit_replace_8bit_1bit (struct grub_video_fbblit_info *dst,
+				     struct grub_video_fbblit_info *src,
+				     int x, int y,
+				     int width, int height,
+				     int offset_x, int offset_y);
+
+void
+grub_video_fbblit_blend_XXXA8888_1bit (struct grub_video_fbblit_info *dst,
+				       struct grub_video_fbblit_info *src,
+				       int x, int y,
+				       int width, int height,
+				       int offset_x, int offset_y);
+
+void
+grub_video_fbblit_blend_XXX888_1bit (struct grub_video_fbblit_info *dst,
+				       struct grub_video_fbblit_info *src,
+				       int x, int y,
+				       int width, int height,
+				       int offset_x, int offset_y);
+
+void
+grub_video_fbblit_blend_XXX565_1bit (struct grub_video_fbblit_info *dst,
+				     struct grub_video_fbblit_info *src,
+				     int x, int y,
+				     int width, int height,
+				     int offset_x, int offset_y);
 #endif /* ! GRUB_FBBLIT_HEADER */
diff --git a/video/fb/fbblit.c b/video/fb/fbblit.c
index 5b613bc..a0f44d2 100644
--- a/video/fb/fbblit.c
+++ b/video/fb/fbblit.c
@@ -90,6 +90,302 @@ grub_video_fbblit_replace_directN (struct grub_video_fbblit_info *dst,
     }
 }
 
+/* Optimized replacing blitter for 1-bit to 32bit.  */
+void
+grub_video_fbblit_replace_32bit_1bit (struct grub_video_fbblit_info *dst,
+				      struct grub_video_fbblit_info *src,
+				      int x, int y,
+				      int width, int height,
+				      int offset_x, int offset_y)
+{
+  int i;
+  int j;
+  grub_uint8_t *srcptr;
+  grub_uint8_t *dstptr;
+  grub_uint8_t srcmask;
+  unsigned int dstrowskip;
+  unsigned int srcrowskipbyte, srcrowskipbit;
+  grub_uint32_t fgcolor, bgcolor;
+  int bit_index;
+
+  /* Calculate the number of bytes to advance from the end of one line
+     to the beginning of the next line.  */
+  dstrowskip = dst->mode_info->pitch - dst->mode_info->bytes_per_pixel * width;
+  srcrowskipbyte = (src->mode_info->width - width) >> 3;
+  srcrowskipbit = (src->mode_info->width - width) & 7;
+
+  bit_index = offset_y * src->mode_info->width + offset_x;
+  srcptr = (grub_uint8_t *) src->data + (bit_index >> 3);
+  srcmask = 1 << (~bit_index & 7);
+  dstptr = (grub_uint8_t *) grub_video_fb_get_video_ptr (dst, x, y);
+
+  fgcolor = grub_video_fb_map_rgba (src->mode_info->fg_red,
+				    src->mode_info->fg_green,
+				    src->mode_info->fg_blue,
+				    src->mode_info->fg_alpha);
+
+  bgcolor = grub_video_fb_map_rgba (src->mode_info->bg_red,
+				    src->mode_info->bg_green,
+				    src->mode_info->bg_blue,
+				    src->mode_info->bg_alpha);
+
+  for (j = 0; j < height; j++)
+    {
+      for (i = 0; i < width; i++)
+        {
+	  if (*srcptr & srcmask)
+	    *(grub_uint32_t *) dstptr = fgcolor;
+	  else
+	    *(grub_uint32_t *) dstptr = bgcolor;
+	  srcmask >>= 1;
+	  if (!srcmask)
+	    {
+	      srcptr++;
+	      srcmask = 0x80;
+	    }
+
+	  dstptr += 4;
+        }
+
+      srcptr += srcrowskipbyte;
+      if (srcmask >> srcrowskipbit)
+	srcmask >>= srcrowskipbit;
+      else
+	{
+	  srcptr++;
+	  srcmask <<= 8 - srcrowskipbit;
+	}
+      dstptr += dstrowskip;
+    }
+}
+
+
+/* Optimized replacing blitter for 1-bit to 24-bit.  */
+void
+grub_video_fbblit_replace_24bit_1bit (struct grub_video_fbblit_info *dst,
+				      struct grub_video_fbblit_info *src,
+				      int x, int y,
+				      int width, int height,
+				      int offset_x, int offset_y)
+{
+  int i;
+  int j;
+  grub_uint8_t *srcptr;
+  grub_uint8_t *dstptr;
+  grub_uint8_t srcmask;
+  unsigned int dstrowskip;
+  unsigned int srcrowskipbyte, srcrowskipbit;
+  grub_uint32_t fgcolor, bgcolor;
+  int bit_index;
+
+  /* Calculate the number of bytes to advance from the end of one line
+     to the beginning of the next line.  */
+  dstrowskip = dst->mode_info->pitch - dst->mode_info->bytes_per_pixel * width;
+  srcrowskipbyte = (src->mode_info->width - width) >> 3;
+  srcrowskipbit = (src->mode_info->width - width) & 7;
+
+  bit_index = offset_y * src->mode_info->width + offset_x;
+  srcptr = (grub_uint8_t *) src->data + (bit_index >> 3);
+  srcmask = 1 << (~bit_index & 7);
+  dstptr = (grub_uint8_t *) grub_video_fb_get_video_ptr (dst, x, y);
+
+  fgcolor = grub_video_fb_map_rgba (src->mode_info->fg_red,
+				    src->mode_info->fg_green,
+				    src->mode_info->fg_blue,
+				    src->mode_info->fg_alpha);
+
+  bgcolor = grub_video_fb_map_rgba (src->mode_info->bg_red,
+				    src->mode_info->bg_green,
+				    src->mode_info->bg_blue,
+				    src->mode_info->bg_alpha);
+
+  for (j = 0; j < height; j++)
+    {
+      for (i = 0; i < width - 1; i++)
+        {
+	  if (*srcptr & srcmask)
+	    *(grub_uint32_t *) dstptr = fgcolor;
+	  else
+	    *(grub_uint32_t *) dstptr = bgcolor;
+	  srcmask >>= 1;
+	  if (!srcmask)
+	    {
+	      srcptr++;
+	      srcmask = 0x80;
+	    }
+
+	  dstptr += 3;
+        }
+
+      if (*srcptr & srcmask)
+	{
+	  *dstptr++ = fgcolor & 0xff;
+	  *dstptr++ = (fgcolor & 0xff00) >> 8;
+	  *dstptr++ = (fgcolor & 0xff0000) >> 16;
+	}
+      else
+	{
+	  *dstptr++ = bgcolor & 0xff;
+	  *dstptr++ = (bgcolor & 0xff00) >> 8;
+	  *dstptr++ = (bgcolor & 0xff0000) >> 16;
+	}
+      srcmask >>= 1;
+      if (!srcmask)
+	{
+	  srcptr++;
+	  srcmask = 0x80;
+	}
+
+      srcptr += srcrowskipbyte;
+      if (srcmask >> srcrowskipbit)
+	srcmask >>= srcrowskipbit;
+      else
+	{
+	  srcptr++;
+	  srcmask <<= 8 - srcrowskipbit;
+	}
+      dstptr += dstrowskip;
+    }
+}
+
+/* Optimized replacing blitter for 1-bit to 16-bit.  */
+void
+grub_video_fbblit_replace_16bit_1bit (struct grub_video_fbblit_info *dst,
+				      struct grub_video_fbblit_info *src,
+				      int x, int y,
+				      int width, int height,
+				      int offset_x, int offset_y)
+{
+  int i;
+  int j;
+  grub_uint8_t *srcptr;
+  grub_uint8_t *dstptr;
+  grub_uint8_t srcmask;
+  unsigned int dstrowskip;
+  unsigned int srcrowskipbyte, srcrowskipbit;
+  grub_uint16_t fgcolor, bgcolor;
+  int bit_index;
+
+  /* Calculate the number of bytes to advance from the end of one line
+     to the beginning of the next line.  */
+  dstrowskip = dst->mode_info->pitch - dst->mode_info->bytes_per_pixel * width;
+  srcrowskipbyte = (src->mode_info->width - width) >> 3;
+  srcrowskipbit = (src->mode_info->width - width) & 7;
+
+  bit_index = offset_y * src->mode_info->width + offset_x;
+  srcptr = (grub_uint8_t *) src->data + (bit_index >> 3);
+  srcmask = 1 << (~bit_index & 7);
+  dstptr = (grub_uint8_t *) grub_video_fb_get_video_ptr (dst, x, y);
+
+  fgcolor = grub_video_fb_map_rgba (src->mode_info->fg_red,
+				    src->mode_info->fg_green,
+				    src->mode_info->fg_blue,
+				    src->mode_info->fg_alpha);
+
+  bgcolor = grub_video_fb_map_rgba (src->mode_info->bg_red,
+				    src->mode_info->bg_green,
+				    src->mode_info->bg_blue,
+				    src->mode_info->bg_alpha);
+
+  for (j = 0; j < height; j++)
+    {
+      for (i = 0; i < width; i++)
+        {
+	  if (*srcptr & srcmask)
+	    *(grub_uint16_t *) dstptr = fgcolor;
+	  else
+	    *(grub_uint16_t *) dstptr = bgcolor;
+	  srcmask >>= 1;
+	  if (!srcmask)
+	    {
+	      srcptr++;
+	      srcmask = 0x80;
+	    }
+
+	  dstptr += 2;
+        }
+
+      srcptr += srcrowskipbyte;
+      if (srcmask >> srcrowskipbit)
+	srcmask >>= srcrowskipbit;
+      else
+	{
+	  srcptr++;
+	  srcmask <<= 8 - srcrowskipbit;
+	}
+      dstptr += dstrowskip;
+    }
+}
+
+/* Optimized replacing blitter for 1-bit to 8-bit.  */
+void
+grub_video_fbblit_replace_8bit_1bit (struct grub_video_fbblit_info *dst,
+				      struct grub_video_fbblit_info *src,
+				      int x, int y,
+				      int width, int height,
+				      int offset_x, int offset_y)
+{
+  int i;
+  int j;
+  grub_uint8_t *srcptr;
+  grub_uint8_t *dstptr;
+  grub_uint8_t srcmask;
+  unsigned int dstrowskip;
+  unsigned int srcrowskipbyte, srcrowskipbit;
+  grub_uint8_t fgcolor, bgcolor;
+  int bit_index;
+
+  /* Calculate the number of bytes to advance from the end of one line
+     to the beginning of the next line.  */
+  dstrowskip = dst->mode_info->pitch - dst->mode_info->bytes_per_pixel * width;
+  srcrowskipbyte = (src->mode_info->width - width) >> 3;
+  srcrowskipbit = (src->mode_info->width - width) & 7;
+
+  bit_index = offset_y * src->mode_info->width + offset_x;
+  srcptr = (grub_uint8_t *) src->data + (bit_index >> 3);
+  srcmask = 1 << (~bit_index & 7);
+  dstptr = (grub_uint8_t *) grub_video_fb_get_video_ptr (dst, x, y);
+
+  fgcolor = grub_video_fb_map_rgba (src->mode_info->fg_red,
+				    src->mode_info->fg_green,
+				    src->mode_info->fg_blue,
+				    src->mode_info->fg_alpha);
+
+  bgcolor = grub_video_fb_map_rgba (src->mode_info->bg_red,
+				    src->mode_info->bg_green,
+				    src->mode_info->bg_blue,
+				    src->mode_info->bg_alpha);
+
+  for (j = 0; j < height; j++)
+    {
+      for (i = 0; i < width; i++)
+        {
+	  if (*srcptr & srcmask)
+	    *(grub_uint8_t *) dstptr = fgcolor;
+	  else
+	    *(grub_uint8_t *) dstptr = bgcolor;
+	  srcmask >>= 1;
+	  if (!srcmask)
+	    {
+	      srcptr++;
+	      srcmask = 0x80;
+	    }
+
+	  dstptr++;
+        }
+
+      srcptr += srcrowskipbyte;
+      if (srcmask >> srcrowskipbit)
+	srcmask >>= srcrowskipbit;
+      else
+	{
+	  srcptr++;
+	  srcmask <<= 8 - srcrowskipbit;
+	}
+      dstptr += dstrowskip;
+    }
+}
+
 /* Optimized replacing blitter for RGBX8888 to BGRX8888.  */
 void
 grub_video_fbblit_replace_BGRX8888_RGBX8888 (struct grub_video_fbblit_info *dst,
@@ -826,3 +1122,294 @@ grub_video_fbblit_blend_index_RGBA8888 (struct grub_video_fbblit_info *dst,
         }
     }
 }
+
+/* Optimized blending blitter for 1-bit to XXXA8888.  */
+void
+grub_video_fbblit_blend_XXXA8888_1bit (struct grub_video_fbblit_info *dst,
+				       struct grub_video_fbblit_info *src,
+				       int x, int y,
+				       int width, int height,
+				       int offset_x, int offset_y)
+{
+  int i;
+  int j;
+  grub_uint8_t *srcptr;
+  grub_uint8_t *dstptr;
+  grub_uint8_t srcmask;
+  unsigned int dstrowskip;
+  unsigned int srcrowskipbyte, srcrowskipbit;
+  grub_uint32_t fgcolor, bgcolor;
+  int bit_index;
+
+  /* Calculate the number of bytes to advance from the end of one line
+     to the beginning of the next line.  */
+  dstrowskip = dst->mode_info->pitch - dst->mode_info->bytes_per_pixel * width;
+  srcrowskipbyte = (src->mode_info->width - width) >> 3;
+  srcrowskipbit = (src->mode_info->width - width) & 7;
+
+  bit_index = offset_y * src->mode_info->width + offset_x;
+  srcptr = (grub_uint8_t *) src->data + (bit_index >> 3);
+  srcmask = 1 << (~bit_index & 7);
+  dstptr = (grub_uint8_t *) grub_video_fb_get_video_ptr (dst, x, y);
+
+  fgcolor = grub_video_fb_map_rgba (src->mode_info->fg_red,
+				    src->mode_info->fg_green,
+				    src->mode_info->fg_blue,
+				    src->mode_info->fg_alpha);
+
+  bgcolor = grub_video_fb_map_rgba (src->mode_info->bg_red,
+				    src->mode_info->bg_green,
+				    src->mode_info->bg_blue,
+				    src->mode_info->bg_alpha);
+
+  for (j = 0; j < height; j++)
+    {
+      for (i = 0; i < width; i++)
+        {
+	  grub_uint32_t color;
+	  grub_uint8_t a;
+
+	  if (*srcptr & srcmask)
+	    color = fgcolor;
+	  else
+	    color = bgcolor;
+	  a = (color >> 24) & 0xff;
+
+	  if (a == 255)
+	    *(grub_uint32_t *) dstptr = color;
+	  else if (a != 0)
+	    {
+	      grub_uint8_t s1 = (color >> 0) & 0xFF;
+	      grub_uint8_t s2 = (color >> 8) & 0xFF;
+	      grub_uint8_t s3 = (color >> 16) & 0xFF;
+
+	      grub_uint8_t d1 = (*(grub_uint32_t *) dstptr >> 0) & 0xFF;
+	      grub_uint8_t d2 = (*(grub_uint32_t *) dstptr >> 8) & 0xFF;
+	      grub_uint8_t d3 = (*(grub_uint32_t *) dstptr >> 16) & 0xFF;
+
+	      d1 = (d1 * (255 - a) + s1 * a) / 255;
+	      d2 = (d2 * (255 - a) + s2 * a) / 255;
+	      d3 = (d3 * (255 - a) + s3 * a) / 255;
+
+	      *(grub_uint32_t *) dstptr = (a << 24) | (d3 << 16) | (d2 << 8)
+		| d1;
+	    }
+
+	  srcmask >>= 1;
+	  if (!srcmask)
+	    {
+	      srcptr++;
+	      srcmask = 0x80;
+	    }
+
+	  dstptr += 4;
+        }
+
+      srcptr += srcrowskipbyte;
+      if (srcmask >> srcrowskipbit)
+	srcmask >>= srcrowskipbit;
+      else
+	{
+	  srcptr++;
+	  srcmask <<= 8 - srcrowskipbit;
+	}
+      dstptr += dstrowskip;
+    }
+}
+
+/* Optimized blending blitter for 1-bit to XXX888.  */
+void
+grub_video_fbblit_blend_XXX888_1bit (struct grub_video_fbblit_info *dst,
+				     struct grub_video_fbblit_info *src,
+				     int x, int y,
+				     int width, int height,
+				     int offset_x, int offset_y)
+{
+  int i;
+  int j;
+  grub_uint8_t *srcptr;
+  grub_uint8_t *dstptr;
+  grub_uint8_t srcmask;
+  unsigned int dstrowskip;
+  unsigned int srcrowskipbyte, srcrowskipbit;
+  grub_uint32_t fgcolor, bgcolor;
+  int bit_index;
+
+  /* Calculate the number of bytes to advance from the end of one line
+     to the beginning of the next line.  */
+  dstrowskip = dst->mode_info->pitch - dst->mode_info->bytes_per_pixel * width;
+  srcrowskipbyte = (src->mode_info->width - width) >> 3;
+  srcrowskipbit = (src->mode_info->width - width) & 7;
+
+  bit_index = offset_y * src->mode_info->width + offset_x;
+  srcptr = (grub_uint8_t *) src->data + (bit_index >> 3);
+  srcmask = 1 << (~bit_index & 7);
+  dstptr = (grub_uint8_t *) grub_video_fb_get_video_ptr (dst, x, y);
+
+  fgcolor = grub_video_fb_map_rgba (src->mode_info->fg_red,
+				    src->mode_info->fg_green,
+				    src->mode_info->fg_blue,
+				    src->mode_info->fg_alpha);
+
+  bgcolor = grub_video_fb_map_rgba (src->mode_info->bg_red,
+				    src->mode_info->bg_green,
+				    src->mode_info->bg_blue,
+				    src->mode_info->bg_alpha);
+
+  for (j = 0; j < height; j++)
+    {
+      for (i = 0; i < width; i++)
+        {
+	  grub_uint32_t color;
+	  grub_uint8_t a;
+	  if (*srcptr & srcmask)
+	    {
+	      color = fgcolor;
+	      a = src->mode_info->fg_alpha;
+	    }
+	  else
+	    {
+	      color = bgcolor;
+	      a = src->mode_info->bg_alpha;
+	    }
+
+	  if (a == 255)
+	    {
+	      ((grub_uint8_t *) dstptr)[0] = color & 0xff;
+	      ((grub_uint8_t *) dstptr)[1] = (color & 0xff00) >> 8;
+	      ((grub_uint8_t *) dstptr)[2] = (color & 0xff0000) >> 16;
+	    }
+	  else if (a != 0)
+	    {
+	      grub_uint8_t s1 = (color >> 0) & 0xFF;
+	      grub_uint8_t s2 = (color >> 8) & 0xFF;
+	      grub_uint8_t s3 = (color >> 16) & 0xFF;
+
+	      grub_uint8_t d1 = (*(grub_uint32_t *) dstptr >> 0) & 0xFF;
+	      grub_uint8_t d2 = (*(grub_uint32_t *) dstptr >> 8) & 0xFF;
+	      grub_uint8_t d3 = (*(grub_uint32_t *) dstptr >> 16) & 0xFF;
+
+	      ((grub_uint8_t *) dstptr)[0] = (d1 * (255 - a) + s1 * a) / 255;
+	      ((grub_uint8_t *) dstptr)[1] = (d2 * (255 - a) + s2 * a) / 255;
+	      ((grub_uint8_t *) dstptr)[2] = (d3 * (255 - a) + s3 * a) / 255;
+	    }
+
+	  srcmask >>= 1;
+	  if (!srcmask)
+	    {
+	      srcptr++;
+	      srcmask = 0x80;
+	    }
+
+	  dstptr += 3;
+        }
+
+      srcptr += srcrowskipbyte;
+      if (srcmask >> srcrowskipbit)
+	srcmask >>= srcrowskipbit;
+      else
+	{
+	  srcptr++;
+	  srcmask <<= 8 - srcrowskipbit;
+	}
+      dstptr += dstrowskip;
+    }
+}
+
+/* Optimized blending blitter for 1-bit to XXX888.  */
+void
+grub_video_fbblit_blend_XXX565_1bit (struct grub_video_fbblit_info *dst,
+				     struct grub_video_fbblit_info *src,
+				     int x, int y,
+				     int width, int height,
+				     int offset_x, int offset_y)
+{
+  int i;
+  int j;
+  grub_uint8_t *srcptr;
+  grub_uint8_t *dstptr;
+  grub_uint8_t srcmask;
+  unsigned int dstrowskip;
+  unsigned int srcrowskipbyte, srcrowskipbit;
+  grub_uint16_t fgcolor, bgcolor;
+  int bit_index;
+
+  /* Calculate the number of bytes to advance from the end of one line
+     to the beginning of the next line.  */
+  dstrowskip = dst->mode_info->pitch - dst->mode_info->bytes_per_pixel * width;
+  srcrowskipbyte = (src->mode_info->width - width) >> 3;
+  srcrowskipbit = (src->mode_info->width - width) & 7;
+
+  bit_index = offset_y * src->mode_info->width + offset_x;
+  srcptr = (grub_uint8_t *) src->data + (bit_index >> 3);
+  srcmask = 1 << (~bit_index & 7);
+  dstptr = (grub_uint8_t *) grub_video_fb_get_video_ptr (dst, x, y);
+
+  fgcolor = grub_video_fb_map_rgba (src->mode_info->fg_red,
+				    src->mode_info->fg_green,
+				    src->mode_info->fg_blue,
+				    src->mode_info->fg_alpha);
+
+  bgcolor = grub_video_fb_map_rgba (src->mode_info->bg_red,
+				    src->mode_info->bg_green,
+				    src->mode_info->bg_blue,
+				    src->mode_info->bg_alpha);
+
+  for (j = 0; j < height; j++)
+    {
+      for (i = 0; i < width; i++)
+        {
+	  grub_uint32_t color;
+	  grub_uint8_t a;
+	  if (*srcptr & srcmask)
+	    {
+	      color = fgcolor;
+	      a = src->mode_info->fg_alpha;
+	    }
+	  else
+	    {
+	      color = bgcolor;
+	      a = src->mode_info->bg_alpha;
+	    }
+
+	  if (a == 255)
+	    *(grub_uint16_t *) dstptr = color;
+	  else if (a != 0)
+	    {
+	      grub_uint8_t s1 = (color >> 0) & 0x1F;
+	      grub_uint8_t s2 = (color >> 5) & 0x3F;
+	      grub_uint8_t s3 = (color >> 11) & 0x1F;
+
+	      grub_uint8_t d1 = (*(grub_uint16_t *) dstptr >> 0) & 0x1F;
+	      grub_uint8_t d2 = (*(grub_uint16_t *) dstptr >> 5) & 0x3F;
+	      grub_uint8_t d3 = (*(grub_uint16_t *) dstptr >> 11) & 0x1F;
+
+	      d1 = (d1 * (255 - a) + s1 * a) / 255;
+	      d2 = (d2 * (255 - a) + s2 * a) / 255;
+	      d3 = (d3 * (255 - a) + s3 * a) / 255;
+
+	      *(grub_uint16_t *) dstptr = (d1 & 0x1f) | ((d2 & 0x3f) << 5)
+		| ((d3 & 0x1f) << 11);
+	    }
+
+	  srcmask >>= 1;
+	  if (!srcmask)
+	    {
+	      srcptr++;
+	      srcmask = 0x80;
+	    }
+
+	  dstptr += 2;
+        }
+
+      srcptr += srcrowskipbyte;
+      if (srcmask >> srcrowskipbit)
+	srcmask >>= srcrowskipbit;
+      else
+	{
+	  srcptr++;
+	  srcmask <<= 8 - srcrowskipbit;
+	}
+      dstptr += dstrowskip;
+    }
+}
diff --git a/video/fb/video_fb.c b/video/fb/video_fb.c
index a35dd7a..5f2917d 100644
--- a/video/fb/video_fb.c
+++ b/video/fb/video_fb.c
@@ -587,6 +587,37 @@ common_blitter (struct grub_video_fbblit_info *target,
 	      return;
 	    }
 	}
+      else if (source->mode_info->blit_format == GRUB_VIDEO_BLIT_FORMAT_1BIT_PACKED)
+	{
+	  if (target->mode_info->bpp == 32)
+	    {
+	      grub_video_fbblit_replace_32bit_1bit (target, source,
+						    x, y, width, height,
+						    offset_x, offset_y);
+	      return;
+	    }
+	  else if (target->mode_info->bpp == 24)
+	    {
+	      grub_video_fbblit_replace_24bit_1bit (target, source,
+						    x, y, width, height,
+						    offset_x, offset_y);
+	      return;
+	    }
+	  else if (target->mode_info->bpp == 16)
+	    {
+	      grub_video_fbblit_replace_16bit_1bit (target, source,
+						    x, y, width, height,
+						    offset_x, offset_y);
+	      return;
+	    }
+	  else if (target->mode_info->bpp == 8)
+	    {
+	      grub_video_fbblit_replace_8bit_1bit (target, source,
+						   x, y, width, height,
+						   offset_x, offset_y);
+	      return;
+	    }
+	}
 
       /* No optimized replace operator found, use default (slow) blitter.  */
       grub_video_fbblit_replace (target, source, x, y, width, height,
@@ -674,6 +705,41 @@ common_blitter (struct grub_video_fbblit_info *target,
 	      return;
 	    }
 	}
+      else if (source->mode_info->blit_format == GRUB_VIDEO_BLIT_FORMAT_1BIT_PACKED)
+	{
+	  if (target->mode_info->blit_format
+	      == GRUB_VIDEO_BLIT_FORMAT_BGRA_8888
+	      || target->mode_info->blit_format
+	      == GRUB_VIDEO_BLIT_FORMAT_RGBA_8888)
+	    {
+	      grub_video_fbblit_blend_XXXA8888_1bit (target, source,
+						     x, y, width, height,
+						     offset_x, offset_y);
+	      return;
+	    }
+	  else if (target->mode_info->blit_format
+		   == GRUB_VIDEO_BLIT_FORMAT_BGR_888
+		   || target->mode_info->blit_format
+		   == GRUB_VIDEO_BLIT_FORMAT_RGB_888)
+	    {
+	      grub_video_fbblit_blend_XXX888_1bit (target, source,
+						   x, y, width, height,
+						   offset_x, offset_y);
+	      return;
+	    }
+	  else if (target->mode_info->blit_format
+		   == GRUB_VIDEO_BLIT_FORMAT_BGR_565
+		   || target->mode_info->blit_format
+		   == GRUB_VIDEO_BLIT_FORMAT_RGB_565)
+	    {
+	      grub_video_fbblit_blend_XXX565_1bit (target, source,
+						   x, y, width, height,
+						   offset_x, offset_y);
+	      return;
+	    }
+
+	}
+
 
       /* No optimized blend operation found, use default (slow) blitter.  */
       grub_video_fbblit_blend (target, source, x, y, width, height,
diff --git a/video/video.c b/video/video.c
index 36ebfd1..c1d66bd 100644
--- a/video/video.c
+++ b/video/video.c
@@ -181,6 +181,8 @@ grub_video_get_blit_format (struct grub_video_mode_info *mode_info)
 	  return GRUB_VIDEO_BLIT_FORMAT_RGB_565;
 	}
     }
+  else if (mode_info->bpp == 1)
+    return GRUB_VIDEO_BLIT_FORMAT_1BIT_PACKED;
 
   /* Backup route.  Unknown format.  */
 

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] Optimised 1bit blitters
  2009-08-25 14:41     ` Vladimir 'phcoder' Serbinenko
@ 2009-08-28 13:54       ` Vladimir 'phcoder' Serbinenko
  0 siblings, 0 replies; 7+ messages in thread
From: Vladimir 'phcoder' Serbinenko @ 2009-08-28 13:54 UTC (permalink / raw)
  To: The development of GRUB 2

comitted

On Tue, Aug 25, 2009 at 4:41 PM, Vladimir 'phcoder'
Serbinenko<phcoder@gmail.com> wrote:
> On Tue, Aug 25, 2009 at 4:06 PM, Michal Suchanek<hramrach@centrum.cz> wrote:
>> 2009/8/23 Robert Millan <rmh@aybabtu.com>:
>>> On Fri, Aug 21, 2009 at 05:33:30PM +0200, Vladimir 'phcoder' Serbinenko wrote:
>>>> +  for (j = 0; j < height; j++)
>>>> +    {
>>>> +      for (i = 0; i < width; i++)
>>>> +        {
>>>
>>> It's a bit odd, but GCC doesn't seem to optimize those in a single loop.  Could
>>> you use "i = 0; i < height * width; i++" instead?  (for this and the other
>>> similar instances)
>>>
>>> I can't comment much on the rest of this patch, as my understanding of
>>> graphics is limited.  But please wait a few days before commit, hopefully
>>> someone else will review.
>>
>> Well, this is not rocket science. You cache a function call which
>> would be done on every iteration otherwise. This is not feasible with
>> other bitmap types (except perhaps 8bit index->8bit index) because
>> they use many more colours.
> It's possible with RGB(A) because color transformation is formula-based.
>>
>> How well tested is this? There are quite a few blitters and some may
>> not be ever used in current code.
> I modified videotest to test every blitting function. This part isn't
> included in the patch because it's dirty.
>>
>> The comment above the blend functions should probably not say they are
>> replace blitters.
>>
> Thanks.
>> Thanks
>>
>> Michal
>>
>>
>> _______________________________________________
>> Grub-devel mailing list
>> Grub-devel@gnu.org
>> http://lists.gnu.org/mailman/listinfo/grub-devel
>>
>
>
>
> --
> Regards
> Vladimir 'phcoder' Serbinenko
>
> Personal git repository: http://repo.or.cz/w/grub2/phcoder.git
>



-- 
Regards
Vladimir 'phcoder' Serbinenko

Personal git repository: http://repo.or.cz/w/grub2/phcoder.git



^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2009-08-28 13:54 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-08-21 15:33 [PATCH] Optimised 1bit blitters Vladimir 'phcoder' Serbinenko
2009-08-23 10:48 ` Robert Millan
2009-08-23 11:05   ` Vladimir 'phcoder' Serbinenko
2009-08-23 23:07     ` Robert Millan
2009-08-25 14:06   ` Michal Suchanek
2009-08-25 14:41     ` Vladimir 'phcoder' Serbinenko
2009-08-28 13:54       ` Vladimir 'phcoder' Serbinenko

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.