All of lore.kernel.org
 help / color / mirror / Atom feed
From: Johannes Schindelin <Johannes.Schindelin@gmx.de>
To: Robin Rosenberg <robin.rosenberg@dewire.com>
Cc: git@vger.kernel.org
Subject: Re: [RFC 1/8] UTF helpers
Date: Wed, 13 May 2009 02:20:32 +0200 (CEST)	[thread overview]
Message-ID: <alpine.DEB.1.00.0905130215260.27348@pacific.mpi-cbg.de> (raw)
In-Reply-To: <1242168631-30753-2-git-send-email-robin.rosenberg@dewire.com>

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: TEXT/PLAIN; charset=X-UNKNOWN, Size: 6323 bytes --]

Hi,

On Wed, 13 May 2009, Robin Rosenberg wrote:

> ---

No SOB.

> diff --git a/git.c b/git.c
> index 6475847..bd4e726 100644
> --- a/git.c
> +++ b/git.c
> @@ -272,6 +272,15 @@ static void handle_internal_command(int argc, const char **argv, char **envp)
>  	};
>  	int i;
>  
> +#ifdef DEBUG
> +	if (debug()) {
> +		fprintf(stderr,"GIT-");
> +		for (i = 1; i<argc; ++i)
> +			fprintf(stderr,"%s",argv[i]);
> +		fprintf(stderr,"\n");
> +	}
> +#endif
> +

What does that have to do with UTF support?

> diff --git a/t/test-lib.sh b/t/test-lib.sh
> index 07cb706..e8aefd8 100755
> --- a/t/test-lib.sh
> +++  b/t/test-lib.sh
> @@ -4,11 +4,9 @@
>  #
>  
>  # For repeatability, reset the environment to known value.
> -LANG=C
> -LC_ALL=C
>  PAGER=cat
>  TZ=UTC
> -export LANG LC_ALL PAGER TZ
> +export PAGER TZ
>  EDITOR=:
>  VISUAL=:
>  unset AUTHOR_DATE

Likewise.

> diff --git a/test-utf.c b/test-utf.c
> new file mode 100644
> index 0000000..133eea0
> --- /dev/null
> +++ b/test-utf.c
> @@ -0,0 +1,61 @@
> +#include <stdio.h>
> +#include <time.h>
> +#include <assert.h>
> +
> +#include "cache.h"
> +#include "utf.h"
> +
> +int main(int argc, char **argv)
> +{
> +	int i;
> +
> +#if 0
> +	for (i = 1; i < argc; i++) {
> +		char result1[100];
> +		char result2[100];
> +
> +		utfcpy(result1, argv[i], strlen(argv[i])+1);
> +		localcpy(result2, result1, strlen(result1)+1);
> +
> +		printf("%s -> %s -> %s\n", argv[i], result1, result2);
> +	}
> +	return 0;
> +#endif
> +
> +#define test(name) case __LINE__: current_name=name; n++; printf("Testing case #%d: %s\n", n, current_name);
> +#define end_test break;
> +#define begin_suite() char *current_name=0; int n=1; for (i=0; i<1000; ++i) { switch(i) { 
> +#define concats(a,b) #a #b
> +
> +#undef strcmp
> +#define assertStringEquals(a,b) assert(#a #b && strcmp(a,b)==0)
> +#define assertIntEquals(a,b) assert(#a #b && (a)==(b))
> +
> +#define end_suite() }}
> +
> +	begin_suite();
> +
> +	test("utfcpy") {
> +	  char result[100];
> +	  utfcpy(result,"?ndrad",7);
> +	  assertStringEquals(result,"\303\204ndrad");
> +	} end_test;
> +
> +	test("utflen") {
> +	  int result=utflen("?ndrad",7);
> +	  assertIntEquals(result,8);
> +	} end_test;
> +
> +	test("localcpy") {
> +	  char result[100];
> +	  localcpy(result,"\303\204ndrad",8);
> +	  assertStringEquals(result,"?ndrad");
> +	} end_test;
> +
> +	test("locallen") {
> +	  int result=locallen("\303\204ndrad",8);
> +	  assertIntEquals(result,7);
> +	} end_test;
> +
> +	end_suite();
> +}

Should the test-utf binary not rather perform _actions_ (i.e. 
transformations) instead of checks?

> diff --git a/utf.c b/utf.c
> new file mode 100644
> index 0000000..eb430b2
> --- /dev/null
> +++ b/utf.c
> @@ -0,0 +1,207 @@
> +#undef UTF8INTERNAL
> +
> +#include <langinfo.h>
> +#include <iconv.h>
> +#include "cache.h"
> +#include <locale.h>
> +#include <stdarg.h>
> +
> +static iconv_t local_to_utf8 = (iconv_t)-1;
> +static iconv_t utf8_to_local = (iconv_t)-1;
> +static iconv_t utf8_to_utf8 = (iconv_t)-1;
> +static int same = 0;
> +
> +#if TEST
> +#define die printf
> +#endif

This is ugly.

> +
> +static void	initlocale()
> +{
> +#ifndef NO_ICONV
> +	if (!same && local_to_utf8 == (iconv_t)-1) {
> +		setlocale(LC_CTYPE, "");
> +		char *local_encoding = nl_langinfo(CODESET);
> +#ifdef DEBUG
> +		if (debug()) fprintf(stderr,"encoding=%s\n", local_encoding);
> +#endif

This is ugly.

> +		if (strcmp(local_encoding,"UTF-8") == 0) {
> +			same = 1;
> +			return;
> +		}
> +		local_to_utf8 = iconv_open("UTF-8",  local_encoding);
> +		if (local_to_utf8 == (iconv_t)-1) {
> +			die("cannot setup locale conversion from %s: %s", local_encoding, strerror(errno));
> +		}
> +#ifdef DEBUG
> +		if (debug()) fprintf(stderr,"utf8_to_local = iconv_open(%s,UTF-8)\n",local_encoding);
> +#endif

This is ugly.

> +		utf8_to_local = iconv_open(local_encoding,  "UTF-8");
> +		if (utf8_to_local == (iconv_t)-1) {
> +			die("cannot setup locale conversion from %s: %s", local_encoding, strerror(errno));
> +		}
> +
> +		utf8_to_utf8 = iconv_open("UTF-8","UTF-8");
> +		if (utf8_to_utf8 == (iconv_t)-1) {
> +			die("cannot setup locale conversion from UTF-8 to UTF-8: %s",strerror(errno));
> +		}
> +	}
> +#endif
> +}
> +
> +int maybe_utf8(const char *local, size_t len)
> +{
> +  char *self = xcalloc(1,len+1);
> +  char *selfp = self;
> +  size_t outlen = len+1;
> +  int ret = iconv(utf8_to_utf8, (char**)&local, &len, &selfp, &outlen);
> +  free(self);
> +  P(("maybelocal: %0.*s %s\n", len, local, ret!=-1 ? "yes" : "no"));
> +  return ret != -1;
> +}
> +
> +size_t utflen(const char *local, size_t locallen)
> +{
> +#ifndef NO_ICONV
> +	initlocale();
> +	if (same) {
> +		return locallen;
> +	}
> +	if (maybe_utf8(local, locallen))
> +		return locallen;
> +
> +	size_t outlen=locallen*6;
> +	char *outbuf=xcalloc(outlen,1);
> +	char *out=outbuf;
> +	iconv(local_to_utf8, NULL, NULL, NULL, NULL);
> +	const char *vlocal = local;
> +	size_t vlocallen = locallen;
> +	if (iconv(local_to_utf8,  (char**)&vlocal,  &vlocallen,  &out,  &outlen) == -1) {
> +#if TEST
> +		perror("failed");
> +#endif
> +		free(outbuf);
> +		return locallen;
> +	}
> +	*out = 0;
> +	free(outbuf);
> +	return locallen*6 - outlen;
> +#else
> +	return locallen;
> +#endif
> +}
> +
> +/* Copy and transform */
> +void utfcpy(char *to_utf, char *from_local, size_t localsize)
> +{
> +#ifdef DEBUG
> +	char *a=to_utf,*b=from_local;
> +#endif
> +#ifndef NO_ICONV
> +	initlocale();
> +	if (same) {
> +		memcpy(to_utf, from_local, localsize);
> +		return;
> +	}
> +	if (maybe_utf8(from_local, localsize)) {
> +		memcpy(to_utf, from_local, localsize);
> +		return;
> +	}
> +
> +	size_t outlen=localsize*6;
> +	iconv(local_to_utf8, NULL, NULL, NULL, NULL);
> +	char *vfrom_local = from_local;
> +	char *vto_utf = to_utf;
> +	size_t vlocalsize = localsize;
> +	if (iconv(local_to_utf8,  &vfrom_local,  &vlocalsize,  &vto_utf,  &outlen) == -1) {
> +		fprintf(stderr,"Failed to convert %0.*s to UTF\n", localsize, from_local);
> +		memcpy(to_utf,  from_local,  localsize);
> +	}
> +#else
> +	memcpy(to_utf, from_local, localsize);
> +#endif
> +#ifdef DEBUG
> +	if (debug()) fprintf(stderr,"%0.*s ->UTF %0.*s\n", localsize, b, localsize*6 - outlen, a);
> +#endif
> +}

Okay, I'll stop here.  You might want to clean up your patch series before 
resending.

Ciao,
Dscho

  parent reply	other threads:[~2009-05-13  0:20 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-05-12 22:50 [RFC 0/8] Antique UTF-8 filename support Robin Rosenberg
2009-05-12 22:50 ` [RFC 1/8] UTF helpers Robin Rosenberg
2009-05-12 22:50   ` [RFC 2/8] Messages in locale Robin Rosenberg
2009-05-12 22:50     ` [RFC 3/8] Extend tests to cover locale wrt to commit messages Robin Rosenberg
2009-05-12 22:50       ` [RFC 4/8] UTF file names Robin Rosenberg
     [not found]         ` <1242168631-30753-6-git-send-email-robin.rosenberg@dewire.com>
2009-05-12 22:50           ` [RFC 6/8] test of utf_locallinks Robin Rosenberg
2009-05-12 22:50             ` [RFC 7/8] Convert symlink dest in diff Robin Rosenberg
2009-05-12 22:50               ` [RFC 8/8] UTF-8 in non-SHA1-objects Robin Rosenberg
2009-05-13  0:20   ` Johannes Schindelin [this message]
2009-05-13  5:24     ` [RFC 1/8] UTF helpers Robin Rosenberg
2009-05-13  9:24       ` Esko Luontola
2009-05-13 10:02         ` Andreas Ericsson
2009-05-13 10:21           ` Esko Luontola
2009-05-13 11:44             ` Alex Riesen
2009-05-13 18:48         ` Junio C Hamano
2009-05-13 19:31           ` Esko Luontola
2009-05-13 20:10             ` Junio C Hamano
2009-05-13 10:14       ` Johannes Schindelin
2009-05-14  4:38       ` Junio C Hamano
2009-05-14 13:57         ` Jay Soffian

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=alpine.DEB.1.00.0905130215260.27348@pacific.mpi-cbg.de \
    --to=johannes.schindelin@gmx.de \
    --cc=git@vger.kernel.org \
    --cc=robin.rosenberg@dewire.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.