Hi, On Wed, 13 May 2009, Robin Rosenberg wrote: > --- No SOB. > diff --git a/git.c b/git.c > index 6475847..bd4e726 100644 > --- a/git.c > +++ b/git.c > @@ -272,6 +272,15 @@ static void handle_internal_command(int argc, const char **argv, char **envp) > }; > int i; > > +#ifdef DEBUG > + if (debug()) { > + fprintf(stderr,"GIT-"); > + for (i = 1; i + fprintf(stderr,"%s",argv[i]); > + fprintf(stderr,"\n"); > + } > +#endif > + What does that have to do with UTF support? > diff --git a/t/test-lib.sh b/t/test-lib.sh > index 07cb706..e8aefd8 100755 > --- a/t/test-lib.sh > +++ b/t/test-lib.sh > @@ -4,11 +4,9 @@ > # > > # For repeatability, reset the environment to known value. > -LANG=C > -LC_ALL=C > PAGER=cat > TZ=UTC > -export LANG LC_ALL PAGER TZ > +export PAGER TZ > EDITOR=: > VISUAL=: > unset AUTHOR_DATE Likewise. > diff --git a/test-utf.c b/test-utf.c > new file mode 100644 > index 0000000..133eea0 > --- /dev/null > +++ b/test-utf.c > @@ -0,0 +1,61 @@ > +#include > +#include > +#include > + > +#include "cache.h" > +#include "utf.h" > + > +int main(int argc, char **argv) > +{ > + int i; > + > +#if 0 > + for (i = 1; i < argc; i++) { > + char result1[100]; > + char result2[100]; > + > + utfcpy(result1, argv[i], strlen(argv[i])+1); > + localcpy(result2, result1, strlen(result1)+1); > + > + printf("%s -> %s -> %s\n", argv[i], result1, result2); > + } > + return 0; > +#endif > + > +#define test(name) case __LINE__: current_name=name; n++; printf("Testing case #%d: %s\n", n, current_name); > +#define end_test break; > +#define begin_suite() char *current_name=0; int n=1; for (i=0; i<1000; ++i) { switch(i) { > +#define concats(a,b) #a #b > + > +#undef strcmp > +#define assertStringEquals(a,b) assert(#a #b && strcmp(a,b)==0) > +#define assertIntEquals(a,b) assert(#a #b && (a)==(b)) > + > +#define end_suite() }} > + > + begin_suite(); > + > + test("utfcpy") { > + char result[100]; > + utfcpy(result,"?ndrad",7); > + assertStringEquals(result,"\303\204ndrad"); > + } end_test; > + > + test("utflen") { > + int result=utflen("?ndrad",7); > + assertIntEquals(result,8); > + } end_test; > + > + test("localcpy") { > + char result[100]; > + localcpy(result,"\303\204ndrad",8); > + assertStringEquals(result,"?ndrad"); > + } end_test; > + > + test("locallen") { > + int result=locallen("\303\204ndrad",8); > + assertIntEquals(result,7); > + } end_test; > + > + end_suite(); > +} Should the test-utf binary not rather perform _actions_ (i.e. transformations) instead of checks? > diff --git a/utf.c b/utf.c > new file mode 100644 > index 0000000..eb430b2 > --- /dev/null > +++ b/utf.c > @@ -0,0 +1,207 @@ > +#undef UTF8INTERNAL > + > +#include > +#include > +#include "cache.h" > +#include > +#include > + > +static iconv_t local_to_utf8 = (iconv_t)-1; > +static iconv_t utf8_to_local = (iconv_t)-1; > +static iconv_t utf8_to_utf8 = (iconv_t)-1; > +static int same = 0; > + > +#if TEST > +#define die printf > +#endif This is ugly. > + > +static void initlocale() > +{ > +#ifndef NO_ICONV > + if (!same && local_to_utf8 == (iconv_t)-1) { > + setlocale(LC_CTYPE, ""); > + char *local_encoding = nl_langinfo(CODESET); > +#ifdef DEBUG > + if (debug()) fprintf(stderr,"encoding=%s\n", local_encoding); > +#endif This is ugly. > + if (strcmp(local_encoding,"UTF-8") == 0) { > + same = 1; > + return; > + } > + local_to_utf8 = iconv_open("UTF-8", local_encoding); > + if (local_to_utf8 == (iconv_t)-1) { > + die("cannot setup locale conversion from %s: %s", local_encoding, strerror(errno)); > + } > +#ifdef DEBUG > + if (debug()) fprintf(stderr,"utf8_to_local = iconv_open(%s,UTF-8)\n",local_encoding); > +#endif This is ugly. > + utf8_to_local = iconv_open(local_encoding, "UTF-8"); > + if (utf8_to_local == (iconv_t)-1) { > + die("cannot setup locale conversion from %s: %s", local_encoding, strerror(errno)); > + } > + > + utf8_to_utf8 = iconv_open("UTF-8","UTF-8"); > + if (utf8_to_utf8 == (iconv_t)-1) { > + die("cannot setup locale conversion from UTF-8 to UTF-8: %s",strerror(errno)); > + } > + } > +#endif > +} > + > +int maybe_utf8(const char *local, size_t len) > +{ > + char *self = xcalloc(1,len+1); > + char *selfp = self; > + size_t outlen = len+1; > + int ret = iconv(utf8_to_utf8, (char**)&local, &len, &selfp, &outlen); > + free(self); > + P(("maybelocal: %0.*s %s\n", len, local, ret!=-1 ? "yes" : "no")); > + return ret != -1; > +} > + > +size_t utflen(const char *local, size_t locallen) > +{ > +#ifndef NO_ICONV > + initlocale(); > + if (same) { > + return locallen; > + } > + if (maybe_utf8(local, locallen)) > + return locallen; > + > + size_t outlen=locallen*6; > + char *outbuf=xcalloc(outlen,1); > + char *out=outbuf; > + iconv(local_to_utf8, NULL, NULL, NULL, NULL); > + const char *vlocal = local; > + size_t vlocallen = locallen; > + if (iconv(local_to_utf8, (char**)&vlocal, &vlocallen, &out, &outlen) == -1) { > +#if TEST > + perror("failed"); > +#endif > + free(outbuf); > + return locallen; > + } > + *out = 0; > + free(outbuf); > + return locallen*6 - outlen; > +#else > + return locallen; > +#endif > +} > + > +/* Copy and transform */ > +void utfcpy(char *to_utf, char *from_local, size_t localsize) > +{ > +#ifdef DEBUG > + char *a=to_utf,*b=from_local; > +#endif > +#ifndef NO_ICONV > + initlocale(); > + if (same) { > + memcpy(to_utf, from_local, localsize); > + return; > + } > + if (maybe_utf8(from_local, localsize)) { > + memcpy(to_utf, from_local, localsize); > + return; > + } > + > + size_t outlen=localsize*6; > + iconv(local_to_utf8, NULL, NULL, NULL, NULL); > + char *vfrom_local = from_local; > + char *vto_utf = to_utf; > + size_t vlocalsize = localsize; > + if (iconv(local_to_utf8, &vfrom_local, &vlocalsize, &vto_utf, &outlen) == -1) { > + fprintf(stderr,"Failed to convert %0.*s to UTF\n", localsize, from_local); > + memcpy(to_utf, from_local, localsize); > + } > +#else > + memcpy(to_utf, from_local, localsize); > +#endif > +#ifdef DEBUG > + if (debug()) fprintf(stderr,"%0.*s ->UTF %0.*s\n", localsize, b, localsize*6 - outlen, a); > +#endif > +} Okay, I'll stop here. You might want to clean up your patch series before resending. Ciao, Dscho