linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH][2.5] UTF-8 support in console
@ 2003-05-31 14:10 Chris Heath
  2003-05-31 14:21 ` Christoph Hellwig
  0 siblings, 1 reply; 11+ messages in thread
From: Chris Heath @ 2003-05-31 14:10 UTC (permalink / raw)
  To: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 496 bytes --]

Although the keyboard (keyboard.c) and terminal (vt.c) both have "UTF-8
modes", there are still a couple of short-comings at the kernel level.

  * compose tables use 8-bit characters,
  * selection doesn't copy/paste UTF-8.

(OK, so there are many more, but these two are the ones that make
it inconvenient to even use extended Latin characters.)

Here is a patch against 2.5.70 that fixes selection.  It uses the
existing Unicode font information (ushorts) to create an inverse mapping.

Chris

[-- Attachment #2: selection.patch --]
[-- Type: application/octet-stream, Size: 6480 bytes --]

--- a/include/linux/consolemap.h	2003-05-04 19:53:56.000000000 -0400
+++ b/include/linux/consolemap.h	2003-05-26 15:17:45.000000000 -0400
@@ -10,6 +10,6 @@
 
 struct vc_data;
 
-extern unsigned char inverse_translate(struct vc_data *conp, int glyph);
+extern u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode);
 extern unsigned short *set_translate(int m,int currcons);
 extern int conv_uni_to_pc(struct vc_data *conp, long ucs);
--- a/drivers/char/consolemap.c	2003-05-04 19:53:36.000000000 -0400
+++ b/drivers/char/consolemap.c	2003-05-26 15:01:58.000000000 -0400
@@ -178,6 +178,7 @@
 	unsigned long	refcount;
 	unsigned long	sum;
 	unsigned char	*inverse_translations[4];
+	u16		*inverse_trans_unicode;
 	int		readonly;
 };
 
@@ -208,6 +209,39 @@
 	}
 }
 
+static void set_inverse_trans_unicode(struct vc_data *conp, struct uni_pagedir *p)
+{
+	int i, j, k, glyph;
+	u16 **p1, *p2;
+	u16 *q;
+	
+	if (!p) return;
+	q = p->inverse_trans_unicode;
+
+	if (!q) {
+		q = p->inverse_trans_unicode = (u16 *) 
+			kmalloc(MAX_GLYPH * sizeof(u16), GFP_KERNEL);
+		if (!q) return;
+	}
+	memset(q, 0, MAX_GLYPH * sizeof(u16));
+
+	for (i = 0; i < 32; i++) {
+		p1 = p->uni_pgdir[i];
+		if (!p1)
+			continue;
+		for (j = 0; j < 32; j++) {
+			p2 = p1[j];
+			if (!p2)
+				continue;
+			for (k = 0; k < 64; k++) {
+				glyph = p2[k];
+				if (glyph >= 0 && glyph < MAX_GLYPH && q[glyph] < 32)
+		  			q[glyph] = (i << 11) + (j << 6) + k;
+			}
+		}
+	}
+}
+
 unsigned short *set_translate(int m,int currcons)
 {
 	inv_translate[currcons] = m;
@@ -218,19 +252,27 @@
  * Inverse translation is impossible for several reasons:
  * 1. The font<->character maps are not 1-1.
  * 2. The text may have been written while a different translation map
- *    was active, or using Unicode.
+ *    was active.
  * Still, it is now possible to a certain extent to cut and paste non-ASCII.
  */
-unsigned char inverse_translate(struct vc_data *conp, int glyph)
+u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode)
 {
 	struct uni_pagedir *p;
 	if (glyph < 0 || glyph >= MAX_GLYPH)
 		return 0;
-	else if (!(p = (struct uni_pagedir *)*conp->vc_uni_pagedir_loc) ||
-		 !p->inverse_translations[inv_translate[conp->vc_num]])
+	else if (!(p = (struct uni_pagedir *)*conp->vc_uni_pagedir_loc))
 		return glyph;
-	else
-		return p->inverse_translations[inv_translate[conp->vc_num]][glyph];
+	else if (use_unicode) {
+		if (!p->inverse_trans_unicode)
+			return glyph;
+		else
+			return p->inverse_trans_unicode[glyph];
+	} else {
+		if (!p->inverse_translations[inv_translate[conp->vc_num]])
+			return glyph;
+		else
+			return p->inverse_translations[inv_translate[conp->vc_num]][glyph];
+	}
 }
 
 static void update_user_maps(void)
@@ -362,6 +404,10 @@
 			kfree(p->inverse_translations[i]);
 			p->inverse_translations[i] = NULL;
 		}
+	if (p->inverse_trans_unicode) {
+		kfree(p->inverse_trans_unicode);
+		p->inverse_trans_unicode = NULL;
+	}
 }
 
 void con_free_unimap(int con)
@@ -522,6 +568,7 @@
 
 	for (i = 0; i <= 3; i++)
 		set_inverse_transl(conp, p, i); /* Update all inverse translations */
+	set_inverse_trans_unicode(conp, p);
   
 	return err;
 }
@@ -574,6 +621,7 @@
 
 	for (i = 0; i <= 3; i++)
 		set_inverse_transl(conp, p, i);	/* Update all inverse translations */
+	set_inverse_trans_unicode(conp, p);
 	dflt = p;
 	return err;
 }
--- a/drivers/char/selection.c	2003-05-04 19:53:42.000000000 -0400
+++ b/drivers/char/selection.c	2003-05-26 15:26:07.000000000 -0400
@@ -20,6 +20,7 @@
 
 #include <asm/uaccess.h>
 
+#include <linux/kbd_kern.h>
 #include <linux/vt_kern.h>
 #include <linux/consolemap.h>
 #include <linux/selection.h>
@@ -36,6 +37,7 @@
 /* Variables for selection control. */
 /* Use a dynamic buffer, instead of static (Dec 1994) */
        int sel_cons;		/* must not be disallocated */
+static int use_unicode;
 static volatile int sel_start = -1; 	/* cleared by clear_selection */
 static int sel_end;
 static int sel_buffer_lth;
@@ -56,10 +58,10 @@
 	complement_pos(sel_cons, where);
 }
 
-static unsigned char
+static u16
 sel_pos(int n)
 {
-	return inverse_translate(vc_cons[sel_cons].d, screen_glyph(sel_cons, n));
+	return inverse_translate(vc_cons[sel_cons].d, screen_glyph(sel_cons, n), use_unicode);
 }
 
 /* remove the current selection highlight, if any,
@@ -88,8 +90,8 @@
   0xFF7FFFFF  /* latin-1 accented letters, not division sign */
 };
 
-static inline int inword(const unsigned char c) {
-	return ( inwordLut[c>>5] >> (c & 0x1F) ) & 1;
+static inline int inword(const u16 c) {
+	return c > 0xff || (( inwordLut[c>>5] >> (c & 0x1F) ) & 1);
 }
 
 /* set inwordLut contents. Invoked by ioctl(). */
@@ -110,13 +112,36 @@
 	return (v > u) ? u : v;
 }
 
+/* stores the char in UTF8 and returns the number of bytes used (1-3) */
+int store_utf8(u16 c, char *p) 
+{
+	if (c < 0x80) {
+		/*  0******* */
+		p[0] = c;
+		return 1;
+	} else if (c < 0x800) {
+		/* 110***** 10****** */
+		p[0] = 0xc0 | (c >> 6);
+		p[1] = 0x80 | (c & 0x3f);
+		return 2;
+    	} else {
+		/* 1110**** 10****** 10****** */
+		p[0] = 0xe0 | (c >> 12);
+		p[1] = 0x80 | ((c >> 6) & 0x3f);
+		p[2] = 0x80 | (c & 0x3f);
+		return 3;
+    	}
+}
+
 /* set the current selection. Invoked by ioctl() or by kernel code. */
 int set_selection(const unsigned long arg, struct tty_struct *tty, int user)
 {
 	int sel_mode, new_sel_start, new_sel_end, spc;
 	char *bp, *obp;
-	int i, ps, pe;
+	int i, ps, pe, multiplier;
+	u16 c;
 	unsigned int currcons = fg_console;
+	struct kbd_struct *kbd = kbd_table + fg_console;
 
 	unblank_screen();
 	poke_blanked_console();
@@ -170,6 +195,7 @@
 		clear_selection();
 		sel_cons = fg_console;
 	}
+	use_unicode = kbd && kbd->kbdmode == VC_UNICODE;
 
 	switch (sel_mode)
 	{
@@ -252,7 +278,8 @@
 	sel_end = new_sel_end;
 
 	/* Allocate a new buffer before freeing the old one ... */
-	bp = kmalloc((sel_end-sel_start)/2+1, GFP_KERNEL);
+	multiplier = use_unicode ? 3 : 1;  /* chars can take up to 3 bytes */
+	bp = kmalloc((sel_end-sel_start)/2*multiplier+1, GFP_KERNEL);
 	if (!bp) {
 		printk(KERN_WARNING "selection: kmalloc() failed\n");
 		clear_selection();
@@ -264,8 +291,12 @@
 
 	obp = bp;
 	for (i = sel_start; i <= sel_end; i += 2) {
-		*bp = sel_pos(i);
-		if (!isspace(*bp++))
+		c = sel_pos(i);
+		if (use_unicode)
+			bp += store_utf8(c, bp);
+		else
+			*bp++ = c;
+		if (!isspace(c))
 			obp = bp;
 		if (! ((i + 2) % video_size_row)) {
 			/* strip trailing blanks from line and add newline,

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][2.5] UTF-8 support in console
  2003-05-31 14:10 [PATCH][2.5] UTF-8 support in console Chris Heath
@ 2003-05-31 14:21 ` Christoph Hellwig
  2003-05-31 14:43   ` coding style (was Re: [PATCH][2.5] UTF-8 support in console) Larry McVoy
  2003-05-31 19:37   ` [PATCH][2.5] UTF-8 support in console Chris Heath
  0 siblings, 2 replies; 11+ messages in thread
From: Christoph Hellwig @ 2003-05-31 14:21 UTC (permalink / raw)
  To: Chris Heath; +Cc: linux-kernel


+static void set_inverse_trans_unicode(struct vc_data *conp, struct uni_pagedir *p)

Please linewrap after 80 chars.

+{
+	int i, j, k, glyph;
+	u16 **p1, *p2;
+	u16 *q;
+	
+	if (!p) return;

Please split this into two lines. Can p ever be null_

+	q = p->inverse_trans_unicode;
+
+	if (!q) {

Kill the blank line above.

+		q = p->inverse_trans_unicode = (u16 *) 
+			kmalloc(MAX_GLYPH * sizeof(u16), GFP_KERNEL);

The cast is not needed.  And btw, where is q freed?

+		if (!q) return;

Two lines again.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* coding style (was Re: [PATCH][2.5] UTF-8 support in console)
  2003-05-31 14:21 ` Christoph Hellwig
@ 2003-05-31 14:43   ` Larry McVoy
  2003-05-31 15:01     ` Dave Jones
  2003-05-31 19:37   ` [PATCH][2.5] UTF-8 support in console Chris Heath
  1 sibling, 1 reply; 11+ messages in thread
From: Larry McVoy @ 2003-05-31 14:43 UTC (permalink / raw)
  To: Christoph Hellwig, Chris Heath, linux-kernel

> Please linewrap after 80 chars.

Amen to that.

> +	if (!q) {
> 
> Kill the blank line above.
> 
> +		if (!q) return;
> 
> Two lines again.

A couple of comments: in the BK source tree, we've diverged from the Linux
coding style a bit (maybe a lot, Linus has read the source, ask him).

One thing is 

	unless (p) {
		....
	}
instead of 
	if (!p) {
		....
	}

It's just a
#define unless(x) if (!(x)) 
but it makes some code read quite a bit easier.  I'm a stickler for not using
2 lines where one will do, i.e.,

	FILE	*f;

	...
	unless (f = fopen(file, "r")) {
		error handling;
		return (-1);
	}

You hiccup the first time you see it, then you can read it, then you
start using it.  Yeah, I know, I'm using the value of an assignment in
a conditional, trust me, it works fine.

One other one is the 

	if (!q) return;

Chris said two lines, we don't do it that way.  The coding style we use is
a) one line is fine for a single statement.
b) in all other cases there are curly braces

	unless (q) return;	/* OK */
	unless (q) {		/* also OK */
		return;
	}
	unless (q)
		return;		/* not OK, no "}" */


The point of this style is twofold: save a line when the thing you are
doing is a singe statement, and make it easier for your eyes (or my 
tired old eyes) to run over the code.  If you see indentation you know
it is a block and there will be a closing } without exception.

It keeps the line counts about 10% smaller or so in our source base.
If you are looking for bragging rights about how big your stuff is that
might be bad but I like it because I can read more code in a window.
-- 
---
Larry McVoy              lm at bitmover.com          http://www.bitmover.com/lm

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: coding style (was Re: [PATCH][2.5] UTF-8 support in console)
  2003-05-31 14:43   ` coding style (was Re: [PATCH][2.5] UTF-8 support in console) Larry McVoy
@ 2003-05-31 15:01     ` Dave Jones
  2003-05-31 15:39       ` Larry McVoy
  0 siblings, 1 reply; 11+ messages in thread
From: Dave Jones @ 2003-05-31 15:01 UTC (permalink / raw)
  To: Larry McVoy, Christoph Hellwig, Chris Heath, linux-kernel

On Sat, May 31, 2003 at 07:43:23AM -0700, Larry McVoy wrote:

 > One other one is the 
 > 
 > 	if (!q) return;
 > 
 > Chris said two lines, we don't do it that way.  The coding style we use is
 > a) one line is fine for a single statement.
 > b) in all other cases there are curly braces

Saving a line over readability is utterly bogus.
Just look at some of the crap we have in devfs..

    if (fs_info->devfsd_task == NULL) return (TRUE);
    if (devfsd_queue_empty (fs_info) && fs_info->devfsd_sleeping) return TRUE;
    if ( is_devfsd_or_child (fs_info) ) return (FALSE);
    set_current_state (TASK_UNINTERRUPTIBLE);
    add_wait_queue (&fs_info->revalidate_wait_queue, &wait);
    if (!devfsd_queue_empty (fs_info) || !fs_info->devfsd_sleeping)
        if (fs_info->devfsd_task) schedule ();
    remove_wait_queue (&fs_info->revalidate_wait_queue, &wait);
    __set_current_state (TASK_RUNNING);
    return (TRUE);

*horror* to my eyes at least.

Parts of the DRI code use similar uglies.  Whitespace is a *good* thing.
If you want more lines of code per screen, get a larger xterm, change a
font, whatever, but don't decrease code readability for something so bogus.

		Dave


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: coding style (was Re: [PATCH][2.5] UTF-8 support in console)
  2003-05-31 15:01     ` Dave Jones
@ 2003-05-31 15:39       ` Larry McVoy
  2003-05-31 17:14         ` Steven Cole
  0 siblings, 1 reply; 11+ messages in thread
From: Larry McVoy @ 2003-05-31 15:39 UTC (permalink / raw)
  To: Dave Jones, Christoph Hellwig, Chris Heath, linux-kernel

On Sat, May 31, 2003 at 04:01:50PM +0100, Dave Jones wrote:
> Saving a line over readability is utterly bogus.

I agree 100%.  If you have anything more complex than

	if (error) return (error);

I want it to look like
	
	if ((expr) || (expr2) || (expr3)) {
		return (error);
	}

> Just look at some of the crap we have in devfs..

No kidding, look at the nested if, that's insane.

>     if (fs_info->devfsd_task == NULL) return (TRUE);
>     if (devfsd_queue_empty (fs_info) && fs_info->devfsd_sleeping) return TRUE;
>     if ( is_devfsd_or_child (fs_info) ) return (FALSE);
>     set_current_state (TASK_UNINTERRUPTIBLE);
>     add_wait_queue (&fs_info->revalidate_wait_queue, &wait);
>     if (!devfsd_queue_empty (fs_info) || !fs_info->devfsd_sleeping)
>         if (fs_info->devfsd_task) schedule ();
>     remove_wait_queue (&fs_info->revalidate_wait_queue, &wait);
>     __set_current_state (TASK_RUNNING);
>     return (TRUE);

I took a pass at this, I think this is better (note the use of 1/2 tabs
as "continuation" lines, that's a Sun thing and it works pretty well:

	if ((fs_info->devfsd_task == NULL) ||
	    (devfsd_queue_empty(fs_info) && fs_info->devfsd_sleeping)) {
		return (TRUE);
	}
	if (is_devfsd_or_child(fs_info)) return (FALSE);
	set_current_state (TASK_UNINTERRUPTIBLE);
	add_wait_queue (&fs_info->revalidate_wait_queue, &wait);
	if ((!devfsd_queue_empty (fs_info) || !fs_info->devfsd_sleeping) &&
	    fs_info->devfsd_task) {
	    	schedule();
	}
	remove_wait_queue(&fs_info->revalidate_wait_queue, &wait);
	__set_current_state(TASK_RUNNING);
	return (TRUE);

-- 
---
Larry McVoy              lm at bitmover.com          http://www.bitmover.com/lm

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: coding style (was Re: [PATCH][2.5] UTF-8 support in console)
  2003-05-31 15:39       ` Larry McVoy
@ 2003-05-31 17:14         ` Steven Cole
  2003-05-31 17:56           ` viro
  0 siblings, 1 reply; 11+ messages in thread
From: Steven Cole @ 2003-05-31 17:14 UTC (permalink / raw)
  To: Larry McVoy; +Cc: Dave Jones, Christoph Hellwig, Chris Heath, linux-kernel

On Sat, 2003-05-31 at 09:39, Larry McVoy wrote:
> On Sat, May 31, 2003 at 04:01:50PM +0100, Dave Jones wrote:
> > Saving a line over readability is utterly bogus.
> 
> I agree 100%.  If you have anything more complex than
> 
> 	if (error) return (error);
> 
> I want it to look like
> 	
> 	if ((expr) || (expr2) || (expr3)) {
> 		return (error);
> 	}
> 
This may just be pedantic minutiae, but aren't those parenthesis around
"error" unnecessary?

Here is a proposal for coding style: Only use parenthesis in the return
statement when needed.

return -ETOSENDERADDRESSUNKNOWN;	/* this is OK */
return (value & ZORRO_MASK);		/* so is this */
return (-ENOTENOUGHCOFFEE);		/* bogus parenthesis */ 

Steven


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: coding style (was Re: [PATCH][2.5] UTF-8 support in console)
  2003-05-31 17:14         ` Steven Cole
@ 2003-05-31 17:56           ` viro
  0 siblings, 0 replies; 11+ messages in thread
From: viro @ 2003-05-31 17:56 UTC (permalink / raw)
  To: Steven Cole
  Cc: Larry McVoy, Dave Jones, Christoph Hellwig, Chris Heath, linux-kernel

On Sat, May 31, 2003 at 11:14:08AM -0600, Steven Cole wrote:

> statement when needed.
> 
> return -ETOSENDERADDRESSUNKNOWN;	/* this is OK */
> return (value & ZORRO_MASK);		/* so is this */

Like hell it is.  Parenthesis are _not_ needed here - production is
<statement> -> return <expression> ;

The only messy '('-related case in C grammar is sizeof as unary operation
vs. sizeof ( <type> ) (lovely way to torture parsers and students on exam:
sizeof (int)*p).  Everything else is pretty straightforward...

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][2.5] UTF-8 support in console
  2003-05-31 14:21 ` Christoph Hellwig
  2003-05-31 14:43   ` coding style (was Re: [PATCH][2.5] UTF-8 support in console) Larry McVoy
@ 2003-05-31 19:37   ` Chris Heath
  1 sibling, 0 replies; 11+ messages in thread
From: Chris Heath @ 2003-05-31 19:37 UTC (permalink / raw)
  To: Christoph Hellwig, Chris Heath, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 414 bytes --]

Here's the patch again, with suggested changes.


> +	if (!p) return;
> 
> Please split this into two lines. Can p ever be null_

p could never be null, as it turns out. So I removed this line. :-)


> +		q = p->inverse_trans_unicode = (u16 *) 
> +			kmalloc(MAX_GLYPH * sizeof(u16), GFP_KERNEL);
> 
> The cast is not needed.  And btw, where is q freed?

It is freed. Grep for kfree.*inverse_trans_unicode.

Chris

[-- Attachment #2: selection.patch --]
[-- Type: application/octet-stream, Size: 6480 bytes --]

--- a/include/linux/consolemap.h	2003-05-04 19:53:56.000000000 -0400
+++ b/include/linux/consolemap.h	2003-05-26 15:17:45.000000000 -0400
@@ -10,6 +10,6 @@
 
 struct vc_data;
 
-extern unsigned char inverse_translate(struct vc_data *conp, int glyph);
+extern u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode);
 extern unsigned short *set_translate(int m,int currcons);
 extern int conv_uni_to_pc(struct vc_data *conp, long ucs);
--- a/drivers/char/consolemap.c	2003-05-04 19:53:36.000000000 -0400
+++ b/drivers/char/consolemap.c	2003-05-31 13:57:53.000000000 -0400
@@ -178,6 +178,7 @@
 	unsigned long	refcount;
 	unsigned long	sum;
 	unsigned char	*inverse_translations[4];
+	u16		*inverse_trans_unicode;
 	int		readonly;
 };
 
@@ -208,6 +209,40 @@
 	}
 }
 
+static void set_inverse_trans_unicode(struct vc_data *conp, 
+				      struct uni_pagedir *p)
+{
+	int i, j, k, glyph;
+	u16 **p1, *p2;
+	u16 *q;
+	
+	q = p->inverse_trans_unicode;
+	if (!q) {
+		q = p->inverse_trans_unicode =
+			kmalloc(MAX_GLYPH * sizeof(u16), GFP_KERNEL);
+		if (!q)
+			return;
+	}
+	memset(q, 0, MAX_GLYPH * sizeof(u16));
+
+	for (i = 0; i < 32; i++) {
+		p1 = p->uni_pgdir[i];
+		if (!p1)
+			continue;
+		for (j = 0; j < 32; j++) {
+			p2 = p1[j];
+			if (!p2)
+				continue;
+			for (k = 0; k < 64; k++) {
+				glyph = p2[k];
+				if (glyph >= 0 && glyph < MAX_GLYPH 
+					       && q[glyph] < 32)
+		  			q[glyph] = (i << 11) + (j << 6) + k;
+			}
+		}
+	}
+}
+
 unsigned short *set_translate(int m,int currcons)
 {
 	inv_translate[currcons] = m;
@@ -218,19 +253,29 @@
  * Inverse translation is impossible for several reasons:
  * 1. The font<->character maps are not 1-1.
  * 2. The text may have been written while a different translation map
- *    was active, or using Unicode.
+ *    was active.
  * Still, it is now possible to a certain extent to cut and paste non-ASCII.
  */
-unsigned char inverse_translate(struct vc_data *conp, int glyph)
+u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode)
 {
 	struct uni_pagedir *p;
+	int m;
 	if (glyph < 0 || glyph >= MAX_GLYPH)
 		return 0;
-	else if (!(p = (struct uni_pagedir *)*conp->vc_uni_pagedir_loc) ||
-		 !p->inverse_translations[inv_translate[conp->vc_num]])
+	else if (!(p = (struct uni_pagedir *)*conp->vc_uni_pagedir_loc))
 		return glyph;
-	else
-		return p->inverse_translations[inv_translate[conp->vc_num]][glyph];
+	else if (use_unicode) {
+		if (!p->inverse_trans_unicode)
+			return glyph;
+		else
+			return p->inverse_trans_unicode[glyph];
+	} else {
+		m = inv_translate[conp->vc_num];
+		if (!p->inverse_translations[m])
+			return glyph;
+		else
+			return p->inverse_translations[m][glyph];
+	}
 }
 
 static void update_user_maps(void)
@@ -362,6 +407,10 @@
 			kfree(p->inverse_translations[i]);
 			p->inverse_translations[i] = NULL;
 		}
+	if (p->inverse_trans_unicode) {
+		kfree(p->inverse_trans_unicode);
+		p->inverse_trans_unicode = NULL;
+	}
 }
 
 void con_free_unimap(int con)
@@ -522,6 +571,7 @@
 
 	for (i = 0; i <= 3; i++)
 		set_inverse_transl(conp, p, i); /* Update all inverse translations */
+	set_inverse_trans_unicode(conp, p);
   
 	return err;
 }
@@ -574,6 +624,7 @@
 
 	for (i = 0; i <= 3; i++)
 		set_inverse_transl(conp, p, i);	/* Update all inverse translations */
+	set_inverse_trans_unicode(conp, p);
 	dflt = p;
 	return err;
 }
--- a/drivers/char/selection.c	2003-05-04 19:53:42.000000000 -0400
+++ b/drivers/char/selection.c	2003-05-31 13:54:17.000000000 -0400
@@ -20,6 +20,7 @@
 
 #include <asm/uaccess.h>
 
+#include <linux/kbd_kern.h>
 #include <linux/vt_kern.h>
 #include <linux/consolemap.h>
 #include <linux/selection.h>
@@ -36,6 +37,7 @@
 /* Variables for selection control. */
 /* Use a dynamic buffer, instead of static (Dec 1994) */
        int sel_cons;		/* must not be disallocated */
+static int use_unicode;
 static volatile int sel_start = -1; 	/* cleared by clear_selection */
 static int sel_end;
 static int sel_buffer_lth;
@@ -56,10 +58,11 @@
 	complement_pos(sel_cons, where);
 }
 
-static unsigned char
+static u16
 sel_pos(int n)
 {
-	return inverse_translate(vc_cons[sel_cons].d, screen_glyph(sel_cons, n));
+	return inverse_translate(vc_cons[sel_cons].d, screen_glyph(sel_cons, n),
+				 use_unicode);
 }
 
 /* remove the current selection highlight, if any,
@@ -88,8 +91,8 @@
   0xFF7FFFFF  /* latin-1 accented letters, not division sign */
 };
 
-static inline int inword(const unsigned char c) {
-	return ( inwordLut[c>>5] >> (c & 0x1F) ) & 1;
+static inline int inword(const u16 c) {
+	return c > 0xff || (( inwordLut[c>>5] >> (c & 0x1F) ) & 1);
 }
 
 /* set inwordLut contents. Invoked by ioctl(). */
@@ -110,13 +113,36 @@
 	return (v > u) ? u : v;
 }
 
+/* stores the char in UTF8 and returns the number of bytes used (1-3) */
+int store_utf8(u16 c, char *p) 
+{
+	if (c < 0x80) {
+		/*  0******* */
+		p[0] = c;
+		return 1;
+	} else if (c < 0x800) {
+		/* 110***** 10****** */
+		p[0] = 0xc0 | (c >> 6);
+		p[1] = 0x80 | (c & 0x3f);
+		return 2;
+    	} else {
+		/* 1110**** 10****** 10****** */
+		p[0] = 0xe0 | (c >> 12);
+		p[1] = 0x80 | ((c >> 6) & 0x3f);
+		p[2] = 0x80 | (c & 0x3f);
+		return 3;
+    	}
+}
+
 /* set the current selection. Invoked by ioctl() or by kernel code. */
 int set_selection(const unsigned long arg, struct tty_struct *tty, int user)
 {
 	int sel_mode, new_sel_start, new_sel_end, spc;
 	char *bp, *obp;
-	int i, ps, pe;
+	int i, ps, pe, multiplier;
+	u16 c;
 	unsigned int currcons = fg_console;
+	struct kbd_struct *kbd = kbd_table + fg_console;
 
 	unblank_screen();
 	poke_blanked_console();
@@ -170,6 +196,7 @@
 		clear_selection();
 		sel_cons = fg_console;
 	}
+	use_unicode = kbd && kbd->kbdmode == VC_UNICODE;
 
 	switch (sel_mode)
 	{
@@ -252,7 +279,8 @@
 	sel_end = new_sel_end;
 
 	/* Allocate a new buffer before freeing the old one ... */
-	bp = kmalloc((sel_end-sel_start)/2+1, GFP_KERNEL);
+	multiplier = use_unicode ? 3 : 1;  /* chars can take up to 3 bytes */
+	bp = kmalloc((sel_end-sel_start)/2*multiplier+1, GFP_KERNEL);
 	if (!bp) {
 		printk(KERN_WARNING "selection: kmalloc() failed\n");
 		clear_selection();
@@ -264,8 +292,12 @@
 
 	obp = bp;
 	for (i = sel_start; i <= sel_end; i += 2) {
-		*bp = sel_pos(i);
-		if (!isspace(*bp++))
+		c = sel_pos(i);
+		if (use_unicode)
+			bp += store_utf8(c, bp);
+		else
+			*bp++ = c;
+		if (!isspace(c))
 			obp = bp;
 		if (! ((i + 2) % video_size_row)) {
 			/* strip trailing blanks from line and add newline,

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: coding style (was Re: [PATCH][2.5] UTF-8 support in console)
  2003-06-01  4:41 ` Matt Mackall
@ 2003-06-01  5:18   ` Randy.Dunlap
  0 siblings, 0 replies; 11+ messages in thread
From: Randy.Dunlap @ 2003-06-01  5:18 UTC (permalink / raw)
  To: mpm; +Cc: john, linux-kernel

> On Sat, May 31, 2003 at 05:06:21PM +0100, john@grabjohn.com wrote:
>
>> Having said that, I hate code that is indented. Am I the only person who
>> mentally counts, "in, in, in, out", etc, when reading code?
>
> Yes.

I agree.  That's a "last resort" type of thing.

~Randy




^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: coding style (was Re: [PATCH][2.5] UTF-8 support in console)
  2003-05-31 16:06 coding style (was Re: [PATCH][2.5] UTF-8 support in console) john
@ 2003-06-01  4:41 ` Matt Mackall
  2003-06-01  5:18   ` Randy.Dunlap
  0 siblings, 1 reply; 11+ messages in thread
From: Matt Mackall @ 2003-06-01  4:41 UTC (permalink / raw)
  To: john; +Cc: linux-kernel

On Sat, May 31, 2003 at 05:06:21PM +0100, john@grabjohn.com wrote:

> Having said that, I hate code that is indented. Am I the only person
> who mentally counts, "in, in, in, out", etc, when reading code?

Yes.
-- 
Matt Mackall : http://www.selenic.com : of or relating to the moon

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: coding style (was Re: [PATCH][2.5] UTF-8 support in console)
@ 2003-05-31 16:06 john
  2003-06-01  4:41 ` Matt Mackall
  0 siblings, 1 reply; 11+ messages in thread
From: john @ 2003-05-31 16:06 UTC (permalink / raw)
  To: chris, davej, hch, linux-kernel, lm

> > Saving a line over readability is utterly bogus.

> I agree 100%.  If you have anything more complex than
>
>         if (error) return (error);
>
> I want it to look like
>
>         if ((expr) || (expr2) || (expr3)) {
>                 return (error);
>         }

Ergh, personally I hate reading code like that.

Having said that, I hate code that is indented.  Am I the only person who mentally
counts, "in, in, in, out", etc, when reading code?  Artificial indenting really
throws off my concentration, because the 'prompting' it provides does nothing to
help me, but the ragged left margin is irritating, and makes moving code around
awkward, because you have to correct the indenting to match the current, (actual),
nesting level of the code, (because indenting is a completely artificial concept).

I'm not trying to say my coding style is right, and yours is wrong, I'm just curious
 :-).

John.

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2003-06-01  5:05 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-05-31 14:10 [PATCH][2.5] UTF-8 support in console Chris Heath
2003-05-31 14:21 ` Christoph Hellwig
2003-05-31 14:43   ` coding style (was Re: [PATCH][2.5] UTF-8 support in console) Larry McVoy
2003-05-31 15:01     ` Dave Jones
2003-05-31 15:39       ` Larry McVoy
2003-05-31 17:14         ` Steven Cole
2003-05-31 17:56           ` viro
2003-05-31 19:37   ` [PATCH][2.5] UTF-8 support in console Chris Heath
2003-05-31 16:06 coding style (was Re: [PATCH][2.5] UTF-8 support in console) john
2003-06-01  4:41 ` Matt Mackall
2003-06-01  5:18   ` Randy.Dunlap

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).