* [PATCH][2.5] UTF-8 support in console
@ 2003-05-31 14:10 Chris Heath
2003-05-31 14:21 ` Christoph Hellwig
0 siblings, 1 reply; 8+ messages in thread
From: Chris Heath @ 2003-05-31 14:10 UTC (permalink / raw)
To: linux-kernel
[-- Attachment #1: Type: text/plain, Size: 496 bytes --]
Although the keyboard (keyboard.c) and terminal (vt.c) both have "UTF-8
modes", there are still a couple of short-comings at the kernel level.
* compose tables use 8-bit characters,
* selection doesn't copy/paste UTF-8.
(OK, so there are many more, but these two are the ones that make
it inconvenient to even use extended Latin characters.)
Here is a patch against 2.5.70 that fixes selection. It uses the
existing Unicode font information (ushorts) to create an inverse mapping.
Chris
[-- Attachment #2: selection.patch --]
[-- Type: application/octet-stream, Size: 6480 bytes --]
--- a/include/linux/consolemap.h 2003-05-04 19:53:56.000000000 -0400
+++ b/include/linux/consolemap.h 2003-05-26 15:17:45.000000000 -0400
@@ -10,6 +10,6 @@
struct vc_data;
-extern unsigned char inverse_translate(struct vc_data *conp, int glyph);
+extern u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode);
extern unsigned short *set_translate(int m,int currcons);
extern int conv_uni_to_pc(struct vc_data *conp, long ucs);
--- a/drivers/char/consolemap.c 2003-05-04 19:53:36.000000000 -0400
+++ b/drivers/char/consolemap.c 2003-05-26 15:01:58.000000000 -0400
@@ -178,6 +178,7 @@
unsigned long refcount;
unsigned long sum;
unsigned char *inverse_translations[4];
+ u16 *inverse_trans_unicode;
int readonly;
};
@@ -208,6 +209,39 @@
}
}
+static void set_inverse_trans_unicode(struct vc_data *conp, struct uni_pagedir *p)
+{
+ int i, j, k, glyph;
+ u16 **p1, *p2;
+ u16 *q;
+
+ if (!p) return;
+ q = p->inverse_trans_unicode;
+
+ if (!q) {
+ q = p->inverse_trans_unicode = (u16 *)
+ kmalloc(MAX_GLYPH * sizeof(u16), GFP_KERNEL);
+ if (!q) return;
+ }
+ memset(q, 0, MAX_GLYPH * sizeof(u16));
+
+ for (i = 0; i < 32; i++) {
+ p1 = p->uni_pgdir[i];
+ if (!p1)
+ continue;
+ for (j = 0; j < 32; j++) {
+ p2 = p1[j];
+ if (!p2)
+ continue;
+ for (k = 0; k < 64; k++) {
+ glyph = p2[k];
+ if (glyph >= 0 && glyph < MAX_GLYPH && q[glyph] < 32)
+ q[glyph] = (i << 11) + (j << 6) + k;
+ }
+ }
+ }
+}
+
unsigned short *set_translate(int m,int currcons)
{
inv_translate[currcons] = m;
@@ -218,19 +252,27 @@
* Inverse translation is impossible for several reasons:
* 1. The font<->character maps are not 1-1.
* 2. The text may have been written while a different translation map
- * was active, or using Unicode.
+ * was active.
* Still, it is now possible to a certain extent to cut and paste non-ASCII.
*/
-unsigned char inverse_translate(struct vc_data *conp, int glyph)
+u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode)
{
struct uni_pagedir *p;
if (glyph < 0 || glyph >= MAX_GLYPH)
return 0;
- else if (!(p = (struct uni_pagedir *)*conp->vc_uni_pagedir_loc) ||
- !p->inverse_translations[inv_translate[conp->vc_num]])
+ else if (!(p = (struct uni_pagedir *)*conp->vc_uni_pagedir_loc))
return glyph;
- else
- return p->inverse_translations[inv_translate[conp->vc_num]][glyph];
+ else if (use_unicode) {
+ if (!p->inverse_trans_unicode)
+ return glyph;
+ else
+ return p->inverse_trans_unicode[glyph];
+ } else {
+ if (!p->inverse_translations[inv_translate[conp->vc_num]])
+ return glyph;
+ else
+ return p->inverse_translations[inv_translate[conp->vc_num]][glyph];
+ }
}
static void update_user_maps(void)
@@ -362,6 +404,10 @@
kfree(p->inverse_translations[i]);
p->inverse_translations[i] = NULL;
}
+ if (p->inverse_trans_unicode) {
+ kfree(p->inverse_trans_unicode);
+ p->inverse_trans_unicode = NULL;
+ }
}
void con_free_unimap(int con)
@@ -522,6 +568,7 @@
for (i = 0; i <= 3; i++)
set_inverse_transl(conp, p, i); /* Update all inverse translations */
+ set_inverse_trans_unicode(conp, p);
return err;
}
@@ -574,6 +621,7 @@
for (i = 0; i <= 3; i++)
set_inverse_transl(conp, p, i); /* Update all inverse translations */
+ set_inverse_trans_unicode(conp, p);
dflt = p;
return err;
}
--- a/drivers/char/selection.c 2003-05-04 19:53:42.000000000 -0400
+++ b/drivers/char/selection.c 2003-05-26 15:26:07.000000000 -0400
@@ -20,6 +20,7 @@
#include <asm/uaccess.h>
+#include <linux/kbd_kern.h>
#include <linux/vt_kern.h>
#include <linux/consolemap.h>
#include <linux/selection.h>
@@ -36,6 +37,7 @@
/* Variables for selection control. */
/* Use a dynamic buffer, instead of static (Dec 1994) */
int sel_cons; /* must not be disallocated */
+static int use_unicode;
static volatile int sel_start = -1; /* cleared by clear_selection */
static int sel_end;
static int sel_buffer_lth;
@@ -56,10 +58,10 @@
complement_pos(sel_cons, where);
}
-static unsigned char
+static u16
sel_pos(int n)
{
- return inverse_translate(vc_cons[sel_cons].d, screen_glyph(sel_cons, n));
+ return inverse_translate(vc_cons[sel_cons].d, screen_glyph(sel_cons, n), use_unicode);
}
/* remove the current selection highlight, if any,
@@ -88,8 +90,8 @@
0xFF7FFFFF /* latin-1 accented letters, not division sign */
};
-static inline int inword(const unsigned char c) {
- return ( inwordLut[c>>5] >> (c & 0x1F) ) & 1;
+static inline int inword(const u16 c) {
+ return c > 0xff || (( inwordLut[c>>5] >> (c & 0x1F) ) & 1);
}
/* set inwordLut contents. Invoked by ioctl(). */
@@ -110,13 +112,36 @@
return (v > u) ? u : v;
}
+/* stores the char in UTF8 and returns the number of bytes used (1-3) */
+int store_utf8(u16 c, char *p)
+{
+ if (c < 0x80) {
+ /* 0******* */
+ p[0] = c;
+ return 1;
+ } else if (c < 0x800) {
+ /* 110***** 10****** */
+ p[0] = 0xc0 | (c >> 6);
+ p[1] = 0x80 | (c & 0x3f);
+ return 2;
+ } else {
+ /* 1110**** 10****** 10****** */
+ p[0] = 0xe0 | (c >> 12);
+ p[1] = 0x80 | ((c >> 6) & 0x3f);
+ p[2] = 0x80 | (c & 0x3f);
+ return 3;
+ }
+}
+
/* set the current selection. Invoked by ioctl() or by kernel code. */
int set_selection(const unsigned long arg, struct tty_struct *tty, int user)
{
int sel_mode, new_sel_start, new_sel_end, spc;
char *bp, *obp;
- int i, ps, pe;
+ int i, ps, pe, multiplier;
+ u16 c;
unsigned int currcons = fg_console;
+ struct kbd_struct *kbd = kbd_table + fg_console;
unblank_screen();
poke_blanked_console();
@@ -170,6 +195,7 @@
clear_selection();
sel_cons = fg_console;
}
+ use_unicode = kbd && kbd->kbdmode == VC_UNICODE;
switch (sel_mode)
{
@@ -252,7 +278,8 @@
sel_end = new_sel_end;
/* Allocate a new buffer before freeing the old one ... */
- bp = kmalloc((sel_end-sel_start)/2+1, GFP_KERNEL);
+ multiplier = use_unicode ? 3 : 1; /* chars can take up to 3 bytes */
+ bp = kmalloc((sel_end-sel_start)/2*multiplier+1, GFP_KERNEL);
if (!bp) {
printk(KERN_WARNING "selection: kmalloc() failed\n");
clear_selection();
@@ -264,8 +291,12 @@
obp = bp;
for (i = sel_start; i <= sel_end; i += 2) {
- *bp = sel_pos(i);
- if (!isspace(*bp++))
+ c = sel_pos(i);
+ if (use_unicode)
+ bp += store_utf8(c, bp);
+ else
+ *bp++ = c;
+ if (!isspace(c))
obp = bp;
if (! ((i + 2) % video_size_row)) {
/* strip trailing blanks from line and add newline,
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH][2.5] UTF-8 support in console
2003-05-31 14:10 [PATCH][2.5] UTF-8 support in console Chris Heath
@ 2003-05-31 14:21 ` Christoph Hellwig
2003-05-31 14:43 ` coding style (was Re: [PATCH][2.5] UTF-8 support in console) Larry McVoy
2003-05-31 19:37 ` [PATCH][2.5] UTF-8 support in console Chris Heath
0 siblings, 2 replies; 8+ messages in thread
From: Christoph Hellwig @ 2003-05-31 14:21 UTC (permalink / raw)
To: Chris Heath; +Cc: linux-kernel
+static void set_inverse_trans_unicode(struct vc_data *conp, struct uni_pagedir *p)
Please linewrap after 80 chars.
+{
+ int i, j, k, glyph;
+ u16 **p1, *p2;
+ u16 *q;
+
+ if (!p) return;
Please split this into two lines. Can p ever be null_
+ q = p->inverse_trans_unicode;
+
+ if (!q) {
Kill the blank line above.
+ q = p->inverse_trans_unicode = (u16 *)
+ kmalloc(MAX_GLYPH * sizeof(u16), GFP_KERNEL);
The cast is not needed. And btw, where is q freed?
+ if (!q) return;
Two lines again.
^ permalink raw reply [flat|nested] 8+ messages in thread
* coding style (was Re: [PATCH][2.5] UTF-8 support in console)
2003-05-31 14:21 ` Christoph Hellwig
@ 2003-05-31 14:43 ` Larry McVoy
2003-05-31 15:01 ` Dave Jones
2003-05-31 19:37 ` [PATCH][2.5] UTF-8 support in console Chris Heath
1 sibling, 1 reply; 8+ messages in thread
From: Larry McVoy @ 2003-05-31 14:43 UTC (permalink / raw)
To: Christoph Hellwig, Chris Heath, linux-kernel
> Please linewrap after 80 chars.
Amen to that.
> + if (!q) {
>
> Kill the blank line above.
>
> + if (!q) return;
>
> Two lines again.
A couple of comments: in the BK source tree, we've diverged from the Linux
coding style a bit (maybe a lot, Linus has read the source, ask him).
One thing is
unless (p) {
....
}
instead of
if (!p) {
....
}
It's just a
#define unless(x) if (!(x))
but it makes some code read quite a bit easier. I'm a stickler for not using
2 lines where one will do, i.e.,
FILE *f;
...
unless (f = fopen(file, "r")) {
error handling;
return (-1);
}
You hiccup the first time you see it, then you can read it, then you
start using it. Yeah, I know, I'm using the value of an assignment in
a conditional, trust me, it works fine.
One other one is the
if (!q) return;
Chris said two lines, we don't do it that way. The coding style we use is
a) one line is fine for a single statement.
b) in all other cases there are curly braces
unless (q) return; /* OK */
unless (q) { /* also OK */
return;
}
unless (q)
return; /* not OK, no "}" */
The point of this style is twofold: save a line when the thing you are
doing is a singe statement, and make it easier for your eyes (or my
tired old eyes) to run over the code. If you see indentation you know
it is a block and there will be a closing } without exception.
It keeps the line counts about 10% smaller or so in our source base.
If you are looking for bragging rights about how big your stuff is that
might be bad but I like it because I can read more code in a window.
--
---
Larry McVoy lm at bitmover.com http://www.bitmover.com/lm
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: coding style (was Re: [PATCH][2.5] UTF-8 support in console)
2003-05-31 14:43 ` coding style (was Re: [PATCH][2.5] UTF-8 support in console) Larry McVoy
@ 2003-05-31 15:01 ` Dave Jones
2003-05-31 15:39 ` Larry McVoy
0 siblings, 1 reply; 8+ messages in thread
From: Dave Jones @ 2003-05-31 15:01 UTC (permalink / raw)
To: Larry McVoy, Christoph Hellwig, Chris Heath, linux-kernel
On Sat, May 31, 2003 at 07:43:23AM -0700, Larry McVoy wrote:
> One other one is the
>
> if (!q) return;
>
> Chris said two lines, we don't do it that way. The coding style we use is
> a) one line is fine for a single statement.
> b) in all other cases there are curly braces
Saving a line over readability is utterly bogus.
Just look at some of the crap we have in devfs..
if (fs_info->devfsd_task == NULL) return (TRUE);
if (devfsd_queue_empty (fs_info) && fs_info->devfsd_sleeping) return TRUE;
if ( is_devfsd_or_child (fs_info) ) return (FALSE);
set_current_state (TASK_UNINTERRUPTIBLE);
add_wait_queue (&fs_info->revalidate_wait_queue, &wait);
if (!devfsd_queue_empty (fs_info) || !fs_info->devfsd_sleeping)
if (fs_info->devfsd_task) schedule ();
remove_wait_queue (&fs_info->revalidate_wait_queue, &wait);
__set_current_state (TASK_RUNNING);
return (TRUE);
*horror* to my eyes at least.
Parts of the DRI code use similar uglies. Whitespace is a *good* thing.
If you want more lines of code per screen, get a larger xterm, change a
font, whatever, but don't decrease code readability for something so bogus.
Dave
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: coding style (was Re: [PATCH][2.5] UTF-8 support in console)
2003-05-31 15:01 ` Dave Jones
@ 2003-05-31 15:39 ` Larry McVoy
2003-05-31 17:14 ` Steven Cole
0 siblings, 1 reply; 8+ messages in thread
From: Larry McVoy @ 2003-05-31 15:39 UTC (permalink / raw)
To: Dave Jones, Christoph Hellwig, Chris Heath, linux-kernel
On Sat, May 31, 2003 at 04:01:50PM +0100, Dave Jones wrote:
> Saving a line over readability is utterly bogus.
I agree 100%. If you have anything more complex than
if (error) return (error);
I want it to look like
if ((expr) || (expr2) || (expr3)) {
return (error);
}
> Just look at some of the crap we have in devfs..
No kidding, look at the nested if, that's insane.
> if (fs_info->devfsd_task == NULL) return (TRUE);
> if (devfsd_queue_empty (fs_info) && fs_info->devfsd_sleeping) return TRUE;
> if ( is_devfsd_or_child (fs_info) ) return (FALSE);
> set_current_state (TASK_UNINTERRUPTIBLE);
> add_wait_queue (&fs_info->revalidate_wait_queue, &wait);
> if (!devfsd_queue_empty (fs_info) || !fs_info->devfsd_sleeping)
> if (fs_info->devfsd_task) schedule ();
> remove_wait_queue (&fs_info->revalidate_wait_queue, &wait);
> __set_current_state (TASK_RUNNING);
> return (TRUE);
I took a pass at this, I think this is better (note the use of 1/2 tabs
as "continuation" lines, that's a Sun thing and it works pretty well:
if ((fs_info->devfsd_task == NULL) ||
(devfsd_queue_empty(fs_info) && fs_info->devfsd_sleeping)) {
return (TRUE);
}
if (is_devfsd_or_child(fs_info)) return (FALSE);
set_current_state (TASK_UNINTERRUPTIBLE);
add_wait_queue (&fs_info->revalidate_wait_queue, &wait);
if ((!devfsd_queue_empty (fs_info) || !fs_info->devfsd_sleeping) &&
fs_info->devfsd_task) {
schedule();
}
remove_wait_queue(&fs_info->revalidate_wait_queue, &wait);
__set_current_state(TASK_RUNNING);
return (TRUE);
--
---
Larry McVoy lm at bitmover.com http://www.bitmover.com/lm
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: coding style (was Re: [PATCH][2.5] UTF-8 support in console)
2003-05-31 15:39 ` Larry McVoy
@ 2003-05-31 17:14 ` Steven Cole
2003-05-31 17:56 ` viro
0 siblings, 1 reply; 8+ messages in thread
From: Steven Cole @ 2003-05-31 17:14 UTC (permalink / raw)
To: Larry McVoy; +Cc: Dave Jones, Christoph Hellwig, Chris Heath, linux-kernel
On Sat, 2003-05-31 at 09:39, Larry McVoy wrote:
> On Sat, May 31, 2003 at 04:01:50PM +0100, Dave Jones wrote:
> > Saving a line over readability is utterly bogus.
>
> I agree 100%. If you have anything more complex than
>
> if (error) return (error);
>
> I want it to look like
>
> if ((expr) || (expr2) || (expr3)) {
> return (error);
> }
>
This may just be pedantic minutiae, but aren't those parenthesis around
"error" unnecessary?
Here is a proposal for coding style: Only use parenthesis in the return
statement when needed.
return -ETOSENDERADDRESSUNKNOWN; /* this is OK */
return (value & ZORRO_MASK); /* so is this */
return (-ENOTENOUGHCOFFEE); /* bogus parenthesis */
Steven
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: coding style (was Re: [PATCH][2.5] UTF-8 support in console)
2003-05-31 17:14 ` Steven Cole
@ 2003-05-31 17:56 ` viro
0 siblings, 0 replies; 8+ messages in thread
From: viro @ 2003-05-31 17:56 UTC (permalink / raw)
To: Steven Cole
Cc: Larry McVoy, Dave Jones, Christoph Hellwig, Chris Heath, linux-kernel
On Sat, May 31, 2003 at 11:14:08AM -0600, Steven Cole wrote:
> statement when needed.
>
> return -ETOSENDERADDRESSUNKNOWN; /* this is OK */
> return (value & ZORRO_MASK); /* so is this */
Like hell it is. Parenthesis are _not_ needed here - production is
<statement> -> return <expression> ;
The only messy '('-related case in C grammar is sizeof as unary operation
vs. sizeof ( <type> ) (lovely way to torture parsers and students on exam:
sizeof (int)*p). Everything else is pretty straightforward...
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH][2.5] UTF-8 support in console
2003-05-31 14:21 ` Christoph Hellwig
2003-05-31 14:43 ` coding style (was Re: [PATCH][2.5] UTF-8 support in console) Larry McVoy
@ 2003-05-31 19:37 ` Chris Heath
1 sibling, 0 replies; 8+ messages in thread
From: Chris Heath @ 2003-05-31 19:37 UTC (permalink / raw)
To: Christoph Hellwig, Chris Heath, linux-kernel
[-- Attachment #1: Type: text/plain, Size: 414 bytes --]
Here's the patch again, with suggested changes.
> + if (!p) return;
>
> Please split this into two lines. Can p ever be null_
p could never be null, as it turns out. So I removed this line. :-)
> + q = p->inverse_trans_unicode = (u16 *)
> + kmalloc(MAX_GLYPH * sizeof(u16), GFP_KERNEL);
>
> The cast is not needed. And btw, where is q freed?
It is freed. Grep for kfree.*inverse_trans_unicode.
Chris
[-- Attachment #2: selection.patch --]
[-- Type: application/octet-stream, Size: 6480 bytes --]
--- a/include/linux/consolemap.h 2003-05-04 19:53:56.000000000 -0400
+++ b/include/linux/consolemap.h 2003-05-26 15:17:45.000000000 -0400
@@ -10,6 +10,6 @@
struct vc_data;
-extern unsigned char inverse_translate(struct vc_data *conp, int glyph);
+extern u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode);
extern unsigned short *set_translate(int m,int currcons);
extern int conv_uni_to_pc(struct vc_data *conp, long ucs);
--- a/drivers/char/consolemap.c 2003-05-04 19:53:36.000000000 -0400
+++ b/drivers/char/consolemap.c 2003-05-31 13:57:53.000000000 -0400
@@ -178,6 +178,7 @@
unsigned long refcount;
unsigned long sum;
unsigned char *inverse_translations[4];
+ u16 *inverse_trans_unicode;
int readonly;
};
@@ -208,6 +209,40 @@
}
}
+static void set_inverse_trans_unicode(struct vc_data *conp,
+ struct uni_pagedir *p)
+{
+ int i, j, k, glyph;
+ u16 **p1, *p2;
+ u16 *q;
+
+ q = p->inverse_trans_unicode;
+ if (!q) {
+ q = p->inverse_trans_unicode =
+ kmalloc(MAX_GLYPH * sizeof(u16), GFP_KERNEL);
+ if (!q)
+ return;
+ }
+ memset(q, 0, MAX_GLYPH * sizeof(u16));
+
+ for (i = 0; i < 32; i++) {
+ p1 = p->uni_pgdir[i];
+ if (!p1)
+ continue;
+ for (j = 0; j < 32; j++) {
+ p2 = p1[j];
+ if (!p2)
+ continue;
+ for (k = 0; k < 64; k++) {
+ glyph = p2[k];
+ if (glyph >= 0 && glyph < MAX_GLYPH
+ && q[glyph] < 32)
+ q[glyph] = (i << 11) + (j << 6) + k;
+ }
+ }
+ }
+}
+
unsigned short *set_translate(int m,int currcons)
{
inv_translate[currcons] = m;
@@ -218,19 +253,29 @@
* Inverse translation is impossible for several reasons:
* 1. The font<->character maps are not 1-1.
* 2. The text may have been written while a different translation map
- * was active, or using Unicode.
+ * was active.
* Still, it is now possible to a certain extent to cut and paste non-ASCII.
*/
-unsigned char inverse_translate(struct vc_data *conp, int glyph)
+u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode)
{
struct uni_pagedir *p;
+ int m;
if (glyph < 0 || glyph >= MAX_GLYPH)
return 0;
- else if (!(p = (struct uni_pagedir *)*conp->vc_uni_pagedir_loc) ||
- !p->inverse_translations[inv_translate[conp->vc_num]])
+ else if (!(p = (struct uni_pagedir *)*conp->vc_uni_pagedir_loc))
return glyph;
- else
- return p->inverse_translations[inv_translate[conp->vc_num]][glyph];
+ else if (use_unicode) {
+ if (!p->inverse_trans_unicode)
+ return glyph;
+ else
+ return p->inverse_trans_unicode[glyph];
+ } else {
+ m = inv_translate[conp->vc_num];
+ if (!p->inverse_translations[m])
+ return glyph;
+ else
+ return p->inverse_translations[m][glyph];
+ }
}
static void update_user_maps(void)
@@ -362,6 +407,10 @@
kfree(p->inverse_translations[i]);
p->inverse_translations[i] = NULL;
}
+ if (p->inverse_trans_unicode) {
+ kfree(p->inverse_trans_unicode);
+ p->inverse_trans_unicode = NULL;
+ }
}
void con_free_unimap(int con)
@@ -522,6 +571,7 @@
for (i = 0; i <= 3; i++)
set_inverse_transl(conp, p, i); /* Update all inverse translations */
+ set_inverse_trans_unicode(conp, p);
return err;
}
@@ -574,6 +624,7 @@
for (i = 0; i <= 3; i++)
set_inverse_transl(conp, p, i); /* Update all inverse translations */
+ set_inverse_trans_unicode(conp, p);
dflt = p;
return err;
}
--- a/drivers/char/selection.c 2003-05-04 19:53:42.000000000 -0400
+++ b/drivers/char/selection.c 2003-05-31 13:54:17.000000000 -0400
@@ -20,6 +20,7 @@
#include <asm/uaccess.h>
+#include <linux/kbd_kern.h>
#include <linux/vt_kern.h>
#include <linux/consolemap.h>
#include <linux/selection.h>
@@ -36,6 +37,7 @@
/* Variables for selection control. */
/* Use a dynamic buffer, instead of static (Dec 1994) */
int sel_cons; /* must not be disallocated */
+static int use_unicode;
static volatile int sel_start = -1; /* cleared by clear_selection */
static int sel_end;
static int sel_buffer_lth;
@@ -56,10 +58,11 @@
complement_pos(sel_cons, where);
}
-static unsigned char
+static u16
sel_pos(int n)
{
- return inverse_translate(vc_cons[sel_cons].d, screen_glyph(sel_cons, n));
+ return inverse_translate(vc_cons[sel_cons].d, screen_glyph(sel_cons, n),
+ use_unicode);
}
/* remove the current selection highlight, if any,
@@ -88,8 +91,8 @@
0xFF7FFFFF /* latin-1 accented letters, not division sign */
};
-static inline int inword(const unsigned char c) {
- return ( inwordLut[c>>5] >> (c & 0x1F) ) & 1;
+static inline int inword(const u16 c) {
+ return c > 0xff || (( inwordLut[c>>5] >> (c & 0x1F) ) & 1);
}
/* set inwordLut contents. Invoked by ioctl(). */
@@ -110,13 +113,36 @@
return (v > u) ? u : v;
}
+/* stores the char in UTF8 and returns the number of bytes used (1-3) */
+int store_utf8(u16 c, char *p)
+{
+ if (c < 0x80) {
+ /* 0******* */
+ p[0] = c;
+ return 1;
+ } else if (c < 0x800) {
+ /* 110***** 10****** */
+ p[0] = 0xc0 | (c >> 6);
+ p[1] = 0x80 | (c & 0x3f);
+ return 2;
+ } else {
+ /* 1110**** 10****** 10****** */
+ p[0] = 0xe0 | (c >> 12);
+ p[1] = 0x80 | ((c >> 6) & 0x3f);
+ p[2] = 0x80 | (c & 0x3f);
+ return 3;
+ }
+}
+
/* set the current selection. Invoked by ioctl() or by kernel code. */
int set_selection(const unsigned long arg, struct tty_struct *tty, int user)
{
int sel_mode, new_sel_start, new_sel_end, spc;
char *bp, *obp;
- int i, ps, pe;
+ int i, ps, pe, multiplier;
+ u16 c;
unsigned int currcons = fg_console;
+ struct kbd_struct *kbd = kbd_table + fg_console;
unblank_screen();
poke_blanked_console();
@@ -170,6 +196,7 @@
clear_selection();
sel_cons = fg_console;
}
+ use_unicode = kbd && kbd->kbdmode == VC_UNICODE;
switch (sel_mode)
{
@@ -252,7 +279,8 @@
sel_end = new_sel_end;
/* Allocate a new buffer before freeing the old one ... */
- bp = kmalloc((sel_end-sel_start)/2+1, GFP_KERNEL);
+ multiplier = use_unicode ? 3 : 1; /* chars can take up to 3 bytes */
+ bp = kmalloc((sel_end-sel_start)/2*multiplier+1, GFP_KERNEL);
if (!bp) {
printk(KERN_WARNING "selection: kmalloc() failed\n");
clear_selection();
@@ -264,8 +292,12 @@
obp = bp;
for (i = sel_start; i <= sel_end; i += 2) {
- *bp = sel_pos(i);
- if (!isspace(*bp++))
+ c = sel_pos(i);
+ if (use_unicode)
+ bp += store_utf8(c, bp);
+ else
+ *bp++ = c;
+ if (!isspace(c))
obp = bp;
if (! ((i + 2) % video_size_row)) {
/* strip trailing blanks from line and add newline,
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2003-05-31 19:24 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-05-31 14:10 [PATCH][2.5] UTF-8 support in console Chris Heath
2003-05-31 14:21 ` Christoph Hellwig
2003-05-31 14:43 ` coding style (was Re: [PATCH][2.5] UTF-8 support in console) Larry McVoy
2003-05-31 15:01 ` Dave Jones
2003-05-31 15:39 ` Larry McVoy
2003-05-31 17:14 ` Steven Cole
2003-05-31 17:56 ` viro
2003-05-31 19:37 ` [PATCH][2.5] UTF-8 support in console Chris Heath
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).