All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andre Przywara <andre.przywara@arm.com>
To: u-boot@lists.denx.de
Subject: [U-Boot] [PATCH 6/8] video/console: Convert UTF-8 codes to CP437 code points
Date: Sat, 23 Mar 2019 01:30:00 +0000	[thread overview]
Message-ID: <20190323013002.27117-7-andre.przywara@arm.com> (raw)
In-Reply-To: <20190323013002.27117-1-andre.przywara@arm.com>

The character set used by U-Boot's built-in fonts is the old "code
page 437" (from the original IBM PC).
However people would probably expect UTF-8 on a terminal these days, the
UEFI code definitely does.

Provide a conversion routine to convert a UTF-8 byte stream into a CP437
character code. This uses a combination of arrays and switch/case
statements to provide an efficient way of translating the large Unicode
character range to the 8 bits used for CP437.

This fixes UEFI display on the DM_VIDEO console, which were garbled for
any non-ASCII characters, for instance for the block graphic characters
used by Grub to display the menu.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
---
 drivers/video/Makefile            |   1 +
 drivers/video/utf8_cp437.c        | 170 ++++++++++++++++++++++++++++++++++++++
 drivers/video/vidconsole-uclass.c |   8 +-
 include/video_console.h           |   9 ++
 4 files changed, 186 insertions(+), 2 deletions(-)
 create mode 100644 drivers/video/utf8_cp437.c

diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 671f037c35..8decf407bb 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_CONSOLE_TRUETYPE) += console_truetype.o fonts/
 obj-$(CONFIG_DISPLAY) += display-uclass.o
 obj-$(CONFIG_DM_VIDEO) += backlight-uclass.o
 obj-$(CONFIG_DM_VIDEO) += panel-uclass.o simple_panel.o
+obj-$(CONFIG_DM_VIDEO) += utf8_cp437.o
 obj-$(CONFIG_DM_VIDEO) += video-uclass.o vidconsole-uclass.o
 obj-$(CONFIG_DM_VIDEO) += video_bmp.o
 endif
diff --git a/drivers/video/utf8_cp437.c b/drivers/video/utf8_cp437.c
new file mode 100644
index 0000000000..983da39406
--- /dev/null
+++ b/drivers/video/utf8_cp437.c
@@ -0,0 +1,170 @@
+/*
+ * Convert UTF-8 bytes into a code page 437 character.
+ * Based on the table in the Code_page_437 Wikipedia page.
+ */
+
+#include <stdint.h>
+
+static uint8_t code_points_00a0[] = {
+	255, 173, 155, 156,   7, 157,   7,  21,
+	  7,   7, 166, 174, 170,   7,   7,   7,
+	248, 241, 253,   7,   7, 230,  20, 250,
+	  7,   7, 167, 175, 172, 171,   7, 168,
+	  7,   7,   7,   7, 142, 143, 146, 128,
+	  7, 144,   7,   7,   7,   7,   7,   7,
+	  7, 165,   7,   7,   7,   7, 153,   7,
+	  7,   7,   7,   7, 154,   7,   7, 225,
+	133, 160, 131,   7, 132, 134, 145, 135,
+	138, 130, 136, 137, 141, 161, 140, 139,
+	  7, 164, 149, 162, 147,   7, 148, 246,
+	  7, 151, 163, 150, 129,   7,   7, 152,
+};
+
+static uint8_t code_points_2550[] = {
+	205, 186, 213, 214, 201, 184, 183, 187,
+	212, 211, 200, 190, 189, 188, 198, 199,
+	204, 181, 182, 185, 209, 210, 203, 207,
+	208, 202, 216, 215, 206
+};
+
+static uint8_t utf8_convert_11bit(uint16_t code)
+{
+	switch (code) {
+	case 0x0192: return 159;
+	case 0x0393: return 226;
+	case 0x0398: return 233;
+	case 0x03A3: return 228;
+	case 0x03A6: return 232;
+	case 0x03A9: return 234;
+	case 0x03B1: return 224;
+	case 0x03B4: return 235;
+	case 0x03B5: return 238;
+	case 0x03C0: return 227;
+	case 0x03C3: return 229;
+	case 0x03C4: return 231;
+	case 0x03C6: return 237;
+	}
+
+	return 0;
+};
+
+static uint8_t utf8_convert_2xxx(uint16_t code)
+{
+	switch (code) {
+	case 0x2022: return 7;
+	case 0x203C: return 19;
+	case 0x207F: return 252;
+	case 0x20A7: return 158;
+	case 0x2190: return 27;
+	case 0x2191: return 24;
+	case 0x2192: return 26;
+	case 0x2193: return 25;
+	case 0x2194: return 29;
+	case 0x2195: return 18;
+	case 0x21A8: return 23;
+	case 0x2219: return 249;
+	case 0x221A: return 251;
+	case 0x221E: return 236;
+	case 0x221F: return 28;
+	case 0x2229: return 239;
+	case 0x2248: return 247;
+	case 0x2261: return 240;
+	case 0x2264: return 243;
+	case 0x2265: return 242;
+	case 0x2310: return 169;
+	case 0x2320: return 244;
+	case 0x2321: return 245;
+	case 0x2500: return 196;
+	case 0x2502: return 179;
+	case 0x250C: return 218;
+	case 0x2510: return 191;
+	case 0x2514: return 192;
+	case 0x2518: return 217;
+	case 0x251C: return 195;
+	case 0x2524: return 180;
+	case 0x252C: return 194;
+	case 0x2534: return 193;
+	case 0x253C: return 197;
+	case 0x2580: return 223;
+	case 0x2584: return 220;
+	case 0x2588: return 219;
+	case 0x258C: return 221;
+	case 0x2590: return 222;
+	case 0x2591: return 176;
+	case 0x2592: return 177;
+	case 0x2593: return 178;
+	case 0x25A0: return 254;
+	case 0x25AC: return 22;
+	case 0x25B2: return 30;
+	case 0x25BA: return 16;
+	case 0x25BC: return 31;
+	case 0x25C4: return 17;
+	case 0x25CB: return 9;
+	case 0x25D8: return 8;
+	case 0x25D9: return 10;
+	case 0x263A: return 1;
+	case 0x263B: return 2;
+	case 0x263C: return 15;
+	case 0x2640: return 12;
+	case 0x2642: return 11;
+	case 0x2660: return 6;
+	case 0x2663: return 5;
+	case 0x2665: return 3;
+	case 0x2666: return 4;
+	case 0x266A: return 13;
+	case 0x266B: return 14;
+	}
+
+	return 0;
+}
+
+uint8_t convert_uc16_to_cp437(uint16_t code)
+{
+	if (code < 0x7f)		// ASCII
+		return code;
+	if (code < 0xa0)		// high control characters
+		return code;
+	if (code < 0x100)		// international characters
+		return code_points_00a0[code - 0xa0];
+	if (code < 0x800)
+		return utf8_convert_11bit(code);
+	if (code >= 0x2550 && code < 0x256d)	// block graphics
+		return code_points_2550[code - 0x2550];
+
+	return utf8_convert_2xxx(code);
+}
+
+uint8_t convert_utf8_to_cp437(uint8_t c, uint32_t *esc)
+{
+	int shift;
+	uint32_t ucp;
+
+	if (c < 127)			// ASCII
+		return c;
+	if (c == 127)
+		return 8;		// DEL (?)
+
+	switch (c & 0xf0) {
+	case 0xc0: case 0xd0:		// two bytes sequence
+		*esc = (1U << 24) | ((c & 0x1f) << 6);
+		return 0;
+	case 0xe0:			// three bytes sequence
+		*esc = (2U << 24) | ((c & 0x0f) << 12);
+		return 0;
+	case 0xf0:			// four bytes sequence
+		*esc = (3U << 24) | ((c & 0x07) << 18);
+		return 0;
+	case 0x80: case 0x90: case 0xa0: case 0xb0:	// continuation
+		shift = (*esc >> 24) - 1;
+		ucp = *esc & 0xffffff;
+		if (shift) {
+			*esc = (shift << 24) | ucp | (c & 0x3f) << (shift * 6);
+			return 0;
+		}
+		*esc = 0;
+
+		return convert_uc16_to_cp437(ucp | (c & 0x3f));
+	}
+
+	return 0;
+}
diff --git a/drivers/video/vidconsole-uclass.c b/drivers/video/vidconsole-uclass.c
index e16567029a..275c6c05c8 100644
--- a/drivers/video/vidconsole-uclass.c
+++ b/drivers/video/vidconsole-uclass.c
@@ -457,7 +457,7 @@ error:
 	priv->escape = 0;
 }
 
-/* Put that actual character on the screen (using the CP437 code page). */
+/* Put that actual character on the screen (using the font native code page). */
 static int vidconsole_output_glyph(struct udevice *dev, char ch)
 {
 	struct vidconsole_priv *priv = dev_get_uclass_priv(dev);
@@ -486,6 +486,7 @@ static int vidconsole_output_glyph(struct udevice *dev, char ch)
 int vidconsole_put_char(struct udevice *dev, char ch)
 {
 	struct vidconsole_priv *priv = dev_get_uclass_priv(dev);
+	uint8_t glyph_idx;
 	int ret;
 
 	if (priv->escape) {
@@ -520,7 +521,10 @@ int vidconsole_put_char(struct udevice *dev, char ch)
 		priv->last_ch = 0;
 		break;
 	default:
-		ret = vidconsole_output_glyph(dev, ch);
+		glyph_idx = convert_utf8_to_cp437(ch, &priv->ucs);
+		if (glyph_idx == 0)	/* UTF-8 continuation */
+			return 0;
+		ret = vidconsole_output_glyph(dev, glyph_idx);
 		if (ret < 0)
 			return ret;
 		break;
diff --git a/include/video_console.h b/include/video_console.h
index 52a41ac200..07e5fd0226 100644
--- a/include/video_console.h
+++ b/include/video_console.h
@@ -81,6 +81,7 @@ struct vidconsole_priv {
 	int escape_len;
 	int row_saved;
 	int col_saved;
+	u32 ucs;
 	char escape_buf[32];
 };
 
@@ -240,6 +241,14 @@ void vidconsole_position_cursor(struct udevice *dev, unsigned col,
  */
 u32 vid_console_color(struct video_priv *priv, unsigned int idx);
 
+/*
+ * Convert an UTF-8 byte into the corresponding character in the CP437
+ * code page. Returns 0 if that character is part of a multi-byte sequence.
+ * for which *esc holds the state of. Repeatedly feed in more bytes until
+ * the return value is not 0 anymore.
+ */
+uint8_t convert_utf8_to_cp437(uint8_t c, uint32_t *esc);
+
 #endif
 
 #endif
-- 
2.14.5

  parent reply	other threads:[~2019-03-23  1:30 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-03-23  1:29 [U-Boot] [PATCH 0/8] video/console: Fix various DM_VIDEO console issues Andre Przywara
2019-03-23  1:29 ` [U-Boot] [PATCH 1/8] video/console: Fix DM_VIDEO font glyph array indexing Andre Przywara
2019-03-30 21:18   ` Simon Glass
2019-04-09 21:03   ` Anatolij Gustschin
2019-03-23  1:29 ` [U-Boot] [PATCH 2/8] video/console: Implement reverse video ANSI sequence for DM_VIDEO Andre Przywara
2019-03-30 21:18   ` Simon Glass
2019-04-09 21:04   ` Anatolij Gustschin
2019-04-14 13:05   ` [U-Boot] [PATCH v2 " Anatolij Gustschin
2019-04-17 13:36     ` Anatolij Gustschin
2019-03-23  1:29 ` [U-Boot] [PATCH 3/8] video/console: Implement relative cursor movement ANSI handling Andre Przywara
2019-03-30 21:18   ` Simon Glass
2019-04-09 21:05   ` Anatolij Gustschin
2019-04-11 12:09     ` Anatolij Gustschin
2019-04-13 21:40       ` André Przywara
2019-04-14 12:54         ` Anatolij Gustschin
2019-04-14 21:49           ` André Przywara
2019-03-23  1:29 ` [U-Boot] [PATCH 4/8] video/console: Implement ANSI clear line command Andre Przywara
2019-03-30 21:18   ` Simon Glass
2019-04-09 21:05   ` Anatolij Gustschin
2019-03-23  1:29 ` [U-Boot] [PATCH 5/8] video/console: Factor out actual character output Andre Przywara
2019-03-30 21:18   ` Simon Glass
2019-04-09 21:06   ` Anatolij Gustschin
2019-03-23  1:30 ` Andre Przywara [this message]
2019-03-30 21:18   ` [U-Boot] [PATCH 6/8] video/console: Convert UTF-8 codes to CP437 code points Simon Glass
2019-03-31 18:28     ` Alexander Graf
2019-03-31 23:54       ` André Przywara
2019-03-23  1:30 ` [U-Boot] [PATCH 7/8] usb: kbd: Properly translate up/down arrow keys Andre Przywara
2019-03-30 21:18   ` Simon Glass
2019-04-01  0:04     ` André Przywara
2019-04-01  1:58       ` Simon Glass
2019-04-09 21:06   ` Anatolij Gustschin
2019-03-23  1:30 ` [U-Boot] [PATCH 8/8] sunxi: allow boards to de-select SYS_WHITE_ON_BLACK font scheme Andre Przywara
2019-03-30 21:18   ` Simon Glass
2019-04-09 21:07   ` Anatolij Gustschin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190323013002.27117-7-andre.przywara@arm.com \
    --to=andre.przywara@arm.com \
    --cc=u-boot@lists.denx.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.