All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] http: Add Accept-Language header if possible
@ 2014-07-08 15:54 Yi EungJun
  2014-07-08 21:52 ` Eric Sunshine
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Yi EungJun @ 2014-07-08 15:54 UTC (permalink / raw)
  To: git; +Cc: Yi EungJun

From: Yi EungJun <eungjun.yi@navercorp.com>

Add an Accept-Language header which indicates the user's preferred
languages defined by 'LANGUAGE' environment variable if the variable is
not empty.

Example:
  LANGUAGE= -> ""
  LANGUAGE=ko -> "Accept-Language: ko; q=1.000, *; q=0.001"
  LANGUAGE=ko:en -> "Accept-Language: ko; q=1.000, en; q=0.999, *; q=0.001"

This gives git servers a chance to display remote error messages in
the user's preferred language.
---
 http.c                     | 43 +++++++++++++++++++++++++++++++++++++++++++
 t/t5550-http-fetch-dumb.sh | 10 ++++++++++
 2 files changed, 53 insertions(+)

diff --git a/http.c b/http.c
index 3a28b21..c345616 100644
--- a/http.c
+++ b/http.c
@@ -983,6 +983,47 @@ static void extract_content_type(struct strbuf *raw, struct strbuf *type,
 		strbuf_addstr(charset, "ISO-8859-1");
 }
 
+/*
+ * Add an Accept-Language header which indicates user's preferred languages
+ * defined by 'LANGUAGE' environment variable if the variable is not empty.
+ *
+ * Example:
+ *   LANGUAGE= -> ""
+ *   LANGUAGE=ko -> "Accept-Language: ko; q=1.000, *; q=0.001"
+ *   LANGUAGE=ko:en -> "Accept-Language: ko; q=1.000, en; q=0.999, *; q=0.001"
+ */
+static void add_accept_language(struct strbuf *buf)
+{
+	const char *p1, *p2;
+	float q = 1.000;
+
+	p1 = getenv("LANGUAGE");
+
+	if (p1 != NULL && p1[0] != '\0') {
+		strbuf_reset(buf);
+		strbuf_addstr(buf, "Accept-Language: ");
+		for (p2 = p1; q > 0.001; p2++) {
+			if ((*p2 == ':' || *p2 == '\0') && p1 != p2) {
+				if (q < 1.0) {
+					strbuf_addstr(buf, ", ");
+				}
+				strbuf_add(buf, p1, p2 - p1);
+				strbuf_addf(buf, "; q=%.3f", q);
+				q -= 0.001;
+				p1 = p2 + 1;
+
+				if (*p2 == '\0') {
+					break;
+				}
+			}
+		}
+		if (q < 1.0) {
+			strbuf_addstr(buf, ", ");
+		}
+		strbuf_addstr(buf, "*; q=0.001\r\n");
+	}
+}
+
 /* http_request() targets */
 #define HTTP_REQUEST_STRBUF	0
 #define HTTP_REQUEST_FILE	1
@@ -1020,6 +1061,8 @@ static int http_request(const char *url,
 					 fwrite_buffer);
 	}
 
+	add_accept_language(&buf);
+
 	strbuf_addstr(&buf, "Pragma:");
 	if (options && options->no_cache)
 		strbuf_addstr(&buf, " no-cache");
diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh
index ac71418..ea15158 100755
--- a/t/t5550-http-fetch-dumb.sh
+++ b/t/t5550-http-fetch-dumb.sh
@@ -196,5 +196,15 @@ test_expect_success 'reencoding is robust to whitespace oddities' '
 	grep "this is the error message" stderr
 '
 
+test_expect_success 'git client sends Accept-Language' '
+	GIT_CURL_VERBOSE=1 LANGUAGE=ko:en git clone "$HTTPD_URL/accept/language" 2>actual
+	grep "^Accept-Language: ko; q=1.000, en; q=0.999, \*; q=0.001" actual
+'
+
+test_expect_success 'git client does not send Accept-Language' '
+	GIT_CURL_VERBOSE=1 LANGUAGE= git clone "$HTTPD_URL/accept/language" 2>actual
+	test_must_fail grep "^Accept-Language:" actual
+'
+
 stop_httpd
 test_done
-- 
2.0.1.473.gafdefd9.dirty

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH] http: Add Accept-Language header if possible
  2014-07-08 15:54 [PATCH] http: Add Accept-Language header if possible Yi EungJun
@ 2014-07-08 21:52 ` Eric Sunshine
       [not found]   ` <CAFT+Tg-6fR9OJ93TT7ww3x=zYHY=Dh5u-7owgQMBK5o_JKLEaA@mail.gmail.com>
  2014-07-09  5:10 ` Jeff King
  2014-07-09 10:40 ` Peter Krefting
  2 siblings, 1 reply; 16+ messages in thread
From: Eric Sunshine @ 2014-07-08 21:52 UTC (permalink / raw)
  To: Yi EungJun; +Cc: Git List, Yi EungJun

On Tue, Jul 8, 2014 at 11:54 AM, Yi EungJun <semtlenori@gmail.com> wrote:
> From: Yi EungJun <eungjun.yi@navercorp.com>
>
> Add an Accept-Language header which indicates the user's preferred
> languages defined by 'LANGUAGE' environment variable if the variable is
> not empty.
>
> Example:
>   LANGUAGE= -> ""
>   LANGUAGE=ko -> "Accept-Language: ko; q=1.000, *; q=0.001"
>   LANGUAGE=ko:en -> "Accept-Language: ko; q=1.000, en; q=0.999, *; q=0.001"
>
> This gives git servers a chance to display remote error messages in
> the user's preferred language.
> ---
>  http.c                     | 43 +++++++++++++++++++++++++++++++++++++++++++
>  t/t5550-http-fetch-dumb.sh | 10 ++++++++++
>  2 files changed, 53 insertions(+)
>
> diff --git a/http.c b/http.c
> index 3a28b21..c345616 100644
> --- a/http.c
> +++ b/http.c
> @@ -983,6 +983,47 @@ static void extract_content_type(struct strbuf *raw, struct strbuf *type,
>                 strbuf_addstr(charset, "ISO-8859-1");
>  }
>
> +/*
> + * Add an Accept-Language header which indicates user's preferred languages
> + * defined by 'LANGUAGE' environment variable if the variable is not empty.
> + *
> + * Example:
> + *   LANGUAGE= -> ""
> + *   LANGUAGE=ko -> "Accept-Language: ko; q=1.000, *; q=0.001"
> + *   LANGUAGE=ko:en -> "Accept-Language: ko; q=1.000, en; q=0.999, *; q=0.001"
> + */
> +static void add_accept_language(struct strbuf *buf)
> +{
> +       const char *p1, *p2;
> +       float q = 1.000;
> +
> +       p1 = getenv("LANGUAGE");
> +
> +       if (p1 != NULL && p1[0] != '\0') {
> +               strbuf_reset(buf);

It seems wrong to clear 'buf' in a function named add_accept_language().

> +               strbuf_addstr(buf, "Accept-Language: ");
> +               for (p2 = p1; q > 0.001; p2++) {
> +                       if ((*p2 == ':' || *p2 == '\0') && p1 != p2) {
> +                               if (q < 1.0) {
> +                                       strbuf_addstr(buf, ", ");
> +                               }
> +                               strbuf_add(buf, p1, p2 - p1);
> +                               strbuf_addf(buf, "; q=%.3f", q);
> +                               q -= 0.001;
> +                               p1 = p2 + 1;
> +
> +                               if (*p2 == '\0') {
> +                                       break;
> +                               }
> +                       }
> +               }
> +               if (q < 1.0) {
> +                       strbuf_addstr(buf, ", ");
> +               }
> +               strbuf_addstr(buf, "*; q=0.001\r\n");

Manually adding "\r\n" is contraindicated. Headers passed to
curl_easy_setopt(c, CURLOPT_HTTPHEADER, headers) must not have "\r\n",
since curl will add terminators itself [1].

[1]: http://curl.haxx.se/libcurl/c/CURLOPT_HTTPHEADER.html

> +       }
> +}
> +
>  /* http_request() targets */
>  #define HTTP_REQUEST_STRBUF    0
>  #define HTTP_REQUEST_FILE      1
> @@ -1020,6 +1061,8 @@ static int http_request(const char *url,
>                                          fwrite_buffer);
>         }
>
> +       add_accept_language(&buf);

This is inconsistent with how other headers are handled by this
function. The existing idiom is:

    strbuf_add(&buf, ...); /* construct header */
    headers = curl_slist_apend(headers, buf.buf);
    strbuf_reset(&buf);

> +
>         strbuf_addstr(&buf, "Pragma:");
>         if (options && options->no_cache)
>                 strbuf_addstr(&buf, " no-cache");
> diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh
> index ac71418..ea15158 100755
> --- a/t/t5550-http-fetch-dumb.sh
> +++ b/t/t5550-http-fetch-dumb.sh
> @@ -196,5 +196,15 @@ test_expect_success 'reencoding is robust to whitespace oddities' '
>         grep "this is the error message" stderr
>  '
>
> +test_expect_success 'git client sends Accept-Language' '
> +       GIT_CURL_VERBOSE=1 LANGUAGE=ko:en git clone "$HTTPD_URL/accept/language" 2>actual

Broken &&-chain.

> +       grep "^Accept-Language: ko; q=1.000, en; q=0.999, \*; q=0.001" actual

Do you want to \-escape the periods? (Or maybe use 'grep -F'?)

> +'
> +
> +test_expect_success 'git client does not send Accept-Language' '
> +       GIT_CURL_VERBOSE=1 LANGUAGE= git clone "$HTTPD_URL/accept/language" 2>actual

Broken &&-chain.

> +       test_must_fail grep "^Accept-Language:" actual
> +'
> +
>  stop_httpd
>  test_done
> --
> 2.0.1.473.gafdefd9.dirty

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] http: Add Accept-Language header if possible
  2014-07-08 15:54 [PATCH] http: Add Accept-Language header if possible Yi EungJun
  2014-07-08 21:52 ` Eric Sunshine
@ 2014-07-09  5:10 ` Jeff King
  2014-07-09  5:46   ` Yi, EungJun
  2014-07-09 10:40 ` Peter Krefting
  2 siblings, 1 reply; 16+ messages in thread
From: Jeff King @ 2014-07-09  5:10 UTC (permalink / raw)
  To: Yi EungJun; +Cc: git, Yi EungJun

On Wed, Jul 09, 2014 at 12:54:06AM +0900, Yi EungJun wrote:

> From: Yi EungJun <eungjun.yi@navercorp.com>
> 
> Add an Accept-Language header which indicates the user's preferred
> languages defined by 'LANGUAGE' environment variable if the variable is
> not empty.
> 
> Example:
>   LANGUAGE= -> ""
>   LANGUAGE=ko -> "Accept-Language: ko; q=1.000, *; q=0.001"
>   LANGUAGE=ko:en -> "Accept-Language: ko; q=1.000, en; q=0.999, *; q=0.001"
> 
> This gives git servers a chance to display remote error messages in
> the user's preferred language.

Should this also take into account other language-related variables? I'd
think $LC_ALL, $LC_MESSAGES, and $LANG would affect it, too.  Are
colon-separated values a standard in $LANGUAGE? I have never seen them,
but I admit I am not very knowledgeable about localization issues.

Also, we do we need to do more parsing? My $LANG is set to en_US.UTF-8.
The encoding part is presumably uninteresting to the remote server.  I
also wonder if there are support functions in libc or as part of gettext
that can help us get these values.

-Peff

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] http: Add Accept-Language header if possible
  2014-07-09  5:10 ` Jeff King
@ 2014-07-09  5:46   ` Yi, EungJun
  2014-07-09  6:18     ` Jeff King
  0 siblings, 1 reply; 16+ messages in thread
From: Yi, EungJun @ 2014-07-09  5:46 UTC (permalink / raw)
  To: Jeff King; +Cc: git

2014-07-09 14:10 GMT+09:00 Jeff King <peff@peff.net>:
> On Wed, Jul 09, 2014 at 12:54:06AM +0900, Yi EungJun wrote:
>
>> From: Yi EungJun <eungjun.yi@navercorp.com>
>>
>> Add an Accept-Language header which indicates the user's preferred
>> languages defined by 'LANGUAGE' environment variable if the variable is
>> not empty.
>>
>> Example:
>>   LANGUAGE= -> ""
>>   LANGUAGE=ko -> "Accept-Language: ko; q=1.000, *; q=0.001"
>>   LANGUAGE=ko:en -> "Accept-Language: ko; q=1.000, en; q=0.999, *; q=0.001"
>>
>> This gives git servers a chance to display remote error messages in
>> the user's preferred language.
>
> Should this also take into account other language-related variables? I'd
> think $LC_ALL, $LC_MESSAGES, and $LANG would affect it, too.  Are
> colon-separated values a standard in $LANGUAGE? I have never seen them,
> but I admit I am not very knowledgeable about localization issues.
>
> Also, we do we need to do more parsing? My $LANG is set to en_US.UTF-8.
> The encoding part is presumably uninteresting to the remote server.  I
> also wonder if there are support functions in libc or as part of gettext
> that can help us get these values.
>
> -Peff

I agree with you. In fact, I tried to get user's preferred language in
the same way as gettext. It has guess_category_value() to do that and
the function is good enough because it considers $LANGUAGE, $LC_ALL,
$LANG, and also system-dependent preferences. But the function does
not seem a public API and I don't know how can I use the function in
Git. So I chose to use $LANGUAGE only.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] http: Add Accept-Language header if possible
  2014-07-09  5:46   ` Yi, EungJun
@ 2014-07-09  6:18     ` Jeff King
  2014-07-09 10:46       ` Peter Krefting
  0 siblings, 1 reply; 16+ messages in thread
From: Jeff King @ 2014-07-09  6:18 UTC (permalink / raw)
  To: Yi, EungJun; +Cc: git

On Wed, Jul 09, 2014 at 02:46:35PM +0900, Yi, EungJun wrote:

> I agree with you. In fact, I tried to get user's preferred language in
> the same way as gettext. It has guess_category_value() to do that and
> the function is good enough because it considers $LANGUAGE, $LC_ALL,
> $LANG, and also system-dependent preferences. But the function does
> not seem a public API and I don't know how can I use the function in
> Git. So I chose to use $LANGUAGE only.

I did some digging, and I think the public API is setlocale with a NULL
parameter, like:

  printf("%s\n", setlocale(LC_MESSAGES, NULL));

That still will end up like "en_US.UTF-8", though; I couldn't find any
standard functions for parsing that. It seems like it would be pretty
straightforward to do so, though.

>From my brief reading of rfc2616, that should probably become "en-us",
and any time we add "x-y", we may want to add "x" as a fallback (that is
certainly true for English; I don't know about other languages with
dialects).

-Peff

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] http: Add Accept-Language header if possible
  2014-07-08 15:54 [PATCH] http: Add Accept-Language header if possible Yi EungJun
  2014-07-08 21:52 ` Eric Sunshine
  2014-07-09  5:10 ` Jeff King
@ 2014-07-09 10:40 ` Peter Krefting
  2014-07-11  9:05   ` Yi, EungJun
  2 siblings, 1 reply; 16+ messages in thread
From: Peter Krefting @ 2014-07-09 10:40 UTC (permalink / raw)
  To: Yi EungJun; +Cc: git, Yi EungJun

Yi EungJun:

> Example:
>  LANGUAGE= -> ""
>  LANGUAGE=ko -> "Accept-Language: ko; q=1.000, *; q=0.001"
>  LANGUAGE=ko:en -> "Accept-Language: ko; q=1.000, en; q=0.999, *; q=0.001"

Avoid adding "q=1.000". It is redundant (the default for any 
unqualified language names is 1.0, and additionally there has 
historically been some buggy servers that failed if it was included.

> +	p1 = getenv("LANGUAGE");

You need a fallback mechanism here to parse all the possible language 
variables. I would use the first one I find of these:

  1. LANGUAGE
  2. LC_ALL
  3. LC_MESSAGES
  4. LANG

Only "LANGUAGE" holds a colon-separated list, but the same code can 
parse all of them, just yielding a single entry for the others.

> +				strbuf_add(buf, p1, p2 - p1);

The tokens are on the form language_COUNTRY.encoding@identifier, whereas 
Accept-Language wants language-COUNTRY, so you need to a) replace "_" 
with "-", and b) chop off anything following a "." or "@".

> +				strbuf_addf(buf, "; q=%.3f", q);
> +				q -= 0.001;

Three decimals seems a bit overkill, but some experimentation might be 
necessary.

> +		strbuf_addstr(buf, "*; q=0.001\r\n");

You should probably also add an explicit "en" here, if none was 
already included. I've seen some servers break horribly if "en" isn't 
included.



For reference, I have my LANGUAGE variable set to 
"sv_SE.utf8:sv:nb_NO.utf8:nb:da_DK.utf8:da:nn_NO.utf8:nn:en_GB.utf8:en_US.utf8:en"

-- 
\\// Peter - http://www.softwolves.pp.se/

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] http: Add Accept-Language header if possible
  2014-07-09  6:18     ` Jeff King
@ 2014-07-09 10:46       ` Peter Krefting
  2014-07-10 20:10         ` Jeff King
  0 siblings, 1 reply; 16+ messages in thread
From: Peter Krefting @ 2014-07-09 10:46 UTC (permalink / raw)
  To: Jeff King; +Cc: Yi, EungJun, git

Jeff King:

> I did some digging, and I think the public API is setlocale with a 
> NULL parameter, like:
>
>  printf("%s\n", setlocale(LC_MESSAGES, NULL));
>
> That still will end up like "en_US.UTF-8", though;

And it only yields the highest-priority language, I think.

> I couldn't find any standard functions for parsing that. It seems 
> like it would be pretty straightforward to do so, though.

RFC 5646 is the current specification on language tags, btw.

>  From my brief reading of rfc2616, that should probably become 
> "en-us", and any time we add "x-y", we may want to add "x" as a 
> fallback (that is certainly true for English; I don't know about 
> other languages with dialects).

Yes, adding the generic fallback is necessary, as "en-US" on the server 
matches a client's "en", but not vice versa. So if you request "en-US" 
and "de" and the server only has "en-GB" and "de", you'd get the 
"de" version.

Debian's website has a nice writeup on the subject: 
http://www.debian.org/intro/cn#howtoset

-- 
\\// Peter - http://www.softwolves.pp.se/

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] http: Add Accept-Language header if possible
  2014-07-09 10:46       ` Peter Krefting
@ 2014-07-10 20:10         ` Jeff King
  2014-07-11  9:43           ` Yi, EungJun
  2014-07-12 10:11           ` Peter Krefting
  0 siblings, 2 replies; 16+ messages in thread
From: Jeff King @ 2014-07-10 20:10 UTC (permalink / raw)
  To: Peter Krefting; +Cc: Yi, EungJun, git

On Wed, Jul 09, 2014 at 11:46:14AM +0100, Peter Krefting wrote:

> Jeff King:
> 
> >I did some digging, and I think the public API is setlocale with a NULL
> >parameter, like:
> >
> > printf("%s\n", setlocale(LC_MESSAGES, NULL));
> >
> >That still will end up like "en_US.UTF-8", though;
> 
> And it only yields the highest-priority language, I think.

I wasn't clear on whether POSIX locale variables actually supported
multiple languages with priorities. I have never seen that, though the
original commit message indicated that LANGUAGE=x:y was a thing (I
wasn't sure if that was a made-up thing, or something that libc actually
supported).

> Debian's website has a nice writeup on the subject:
> http://www.debian.org/intro/cn#howtoset

That seems to be about language settings in browsers, which are a much
richer set of preferences than POSIX locales (I think).

It would not be wrong to have that level of configuration for git's http
requests, but I do not know if it is worth the effort. Mapping the
user's gettext locale into an accept-language header seems like a
straightforward way to communicate to the other side what the client is
using to show errors (so that errors coming from the server can match).

If that is the case, though, I wonder if we should actually be adding it
as a git-protocol header so that all transports can benefit (i.e., we
could be localizing human-readable error messages in upload-pack,
receive-pack, etc).

-Peff

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] http: Add Accept-Language header if possible
  2014-07-09 10:40 ` Peter Krefting
@ 2014-07-11  9:05   ` Yi, EungJun
  0 siblings, 0 replies; 16+ messages in thread
From: Yi, EungJun @ 2014-07-11  9:05 UTC (permalink / raw)
  To: Peter Krefting; +Cc: git, Yi EungJun

2014-07-09 19:40 GMT+09:00 Peter Krefting <peter@softwolves.pp.se>:
> Yi EungJun:
>
>
>> Example:
>>  LANGUAGE= -> ""
>>  LANGUAGE=ko -> "Accept-Language: ko; q=1.000, *; q=0.001"
>>  LANGUAGE=ko:en -> "Accept-Language: ko; q=1.000, en; q=0.999, *; q=0.001"
>
>
> Avoid adding "q=1.000". It is redundant (the default for any unqualified
> language names is 1.0, and additionally there has historically been some
> buggy servers that failed if it was included.

Ok, I'll fix it.

>
>
>> +       p1 = getenv("LANGUAGE");
>
>
> You need a fallback mechanism here to parse all the possible language
> variables. I would use the first one I find of these:
>
>  1. LANGUAGE
>  2. LC_ALL
>  3. LC_MESSAGES
>  4. LANG
>
> Only "LANGUAGE" holds a colon-separated list, but the same code can parse
> all of them, just yielding a single entry for the others.

I'll use setlocale(LC_MESSAGES, NULL) as well as getenv("LANGUAGE").

>
>
>> +                               strbuf_add(buf, p1, p2 - p1);
>
>
> The tokens are on the form language_COUNTRY.encoding@identifier, whereas
> Accept-Language wants language-COUNTRY, so you need to a) replace "_" with
> "-", and b) chop off anything following a "." or "@".
>
>
>> +                               strbuf_addf(buf, "; q=%.3f", q);
>> +                               q -= 0.001;
>
>
> Three decimals seems a bit overkill, but some experimentation might be
> necessary.

I'll use three decimals only if there are 100 or more preferred languages.

>
>
>> +               strbuf_addstr(buf, "*; q=0.001\r\n");
>
>
> You should probably also add an explicit "en" here, if none was already
> included. I've seen some servers break horribly if "en" isn't included.

I'll send Accept-Language only if there is at least one preferred
language. Is it enough?

Thanks for your review.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] http: Add Accept-Language header if possible
  2014-07-10 20:10         ` Jeff King
@ 2014-07-11  9:43           ` Yi, EungJun
  2014-07-12 10:11           ` Peter Krefting
  1 sibling, 0 replies; 16+ messages in thread
From: Yi, EungJun @ 2014-07-11  9:43 UTC (permalink / raw)
  To: Jeff King; +Cc: Peter Krefting, git

2014-07-11 5:10 GMT+09:00 Jeff King <peff@peff.net>:
> On Wed, Jul 09, 2014 at 11:46:14AM +0100, Peter Krefting wrote:
>
>> Jeff King:
>>
>> >I did some digging, and I think the public API is setlocale with a NULL
>> >parameter, like:
>> >
>> > printf("%s\n", setlocale(LC_MESSAGES, NULL));
>> >
>> >That still will end up like "en_US.UTF-8", though;
>>
>> And it only yields the highest-priority language, I think.
>
> I wasn't clear on whether POSIX locale variables actually supported
> multiple languages with priorities. I have never seen that, though the
> original commit message indicated that LANGUAGE=x:y was a thing (I
> wasn't sure if that was a made-up thing, or something that libc actually
> supported).
>
>> Debian's website has a nice writeup on the subject:
>> http://www.debian.org/intro/cn#howtoset
>
> That seems to be about language settings in browsers, which are a much
> richer set of preferences than POSIX locales (I think).
>
> It would not be wrong to have that level of configuration for git's http
> requests, but I do not know if it is worth the effort. Mapping the
> user's gettext locale into an accept-language header seems like a
> straightforward way to communicate to the other side what the client is
> using to show errors (so that errors coming from the server can match).

Thanks for you advice. I'll write a path to use both of
setlocale(LC_MESSAGES, NULL) and getenv("LANGUAGE") to get the user's
preferred language. setlocale(LC_MESSAGES, NULL) is quite nice way
because it takes LC_ALL, LC_MESSAGES and LANG into account, but not
LANGUAGE. I think we should take also LANGUAGE into account as gettext
does. [1]

[1]: http://www.gnu.org/software/gettext/manual/gettext.html#Locale-Environment-Variables

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] http: Add Accept-Language header if possible
       [not found]     ` <CAPig+cQ05pzU9uVBqS8tBHvB8_3qAtgsPYz1sGhpa0W1CVymqA@mail.gmail.com>
@ 2014-07-11 16:35       ` Yi, EungJun
  0 siblings, 0 replies; 16+ messages in thread
From: Yi, EungJun @ 2014-07-11 16:35 UTC (permalink / raw)
  To: Eric Sunshine, git

2014-07-12 1:24 GMT+09:00 Eric Sunshine <sunshine@sunshineco.com>:
> On Fri, Jul 11, 2014 at 5:22 AM, Yi, EungJun <semtlenori@gmail.com> wrote:
>> 2014-07-09 6:52 GMT+09:00 Eric Sunshine <sunshine@sunshineco.com>:
>>>> +       grep "^Accept-Language: ko; q=1.000, en; q=0.999, \*; q=0.001" actual
>>>
>>> Do you want to \-escape the periods? (Or maybe use 'grep -F'?)
>>
>> I just want to match '*' character. I tried 'grep -F' but it does not help.
>
> I meant that the periods in your grep pattern are matching any
> character. If you want to be very strict, so that they match only
> period, then you should \-escape them.

Oops, I misunderstood you. You are right. I'll \- escape them.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] http: Add Accept-Language header if possible
  2014-07-10 20:10         ` Jeff King
  2014-07-11  9:43           ` Yi, EungJun
@ 2014-07-12 10:11           ` Peter Krefting
  1 sibling, 0 replies; 16+ messages in thread
From: Peter Krefting @ 2014-07-12 10:11 UTC (permalink / raw)
  To: Jeff King; +Cc: Yi, EungJun, git

Jeff King:

> If that is the case, though, I wonder if we should actually be adding it
> as a git-protocol header so that all transports can benefit (i.e., we
> could be localizing human-readable error messages in upload-pack,
> receive-pack, etc).

That would be very nice, thre is a lot of language mixing going on at 
the moment.

-- 
\\// Peter - http://www.softwolves.pp.se/

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] http: Add Accept-Language header if possible
  2015-01-28  6:15       ` Junio C Hamano
@ 2015-01-28 11:59         ` Yi, EungJun
  0 siblings, 0 replies; 16+ messages in thread
From: Yi, EungJun @ 2015-01-28 11:59 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Git List, Yi EungJun, Jeff King, Peter Krefting, Michael Blume,
	Torsten Bögershausen

I agree that a list of char* is enough for language_tags.

Thanks for your review and patch. I'll apply your patch and send v9.

On Wed, Jan 28, 2015 at 3:15 PM, Junio C Hamano <gitster@pobox.com> wrote:
> On Tue, Jan 27, 2015 at 3:34 PM, Junio C Hamano <gitster@pobox.com> wrote:
>> Yi EungJun <semtlenori@gmail.com> writes:
>>
>>> +
>>> +             sprintf(q_format, ";q=0.%%0%dd", decimal_places);
>>> +
>>> +             strbuf_addstr(buf, "Accept-Language: ");
>>> +
>>> +             for(i = 0; i < num_langs; i++) {
>>> +                     if (i > 0)
>>> +                             strbuf_addstr(buf, ", ");
>>> +
>>> +                     strbuf_addstr(buf, strbuf_detach(&language_tags[i], NULL));
>>
>> This is not wrong per-se, but it looks somewhat convoluted to me.
>> ...
>
> Actually, this is wrong, isn't it?
>
> strbuf_detach() removes the language_tags[i].buf from the strbuf,
> and the caller now owns that piece of memory. Then strbuf_addstr()
> appends a copy of that string to buf, and the piece of memory
> that was originally held by language_tags[i].buf is now lost forever.
>
> This is leaking.
>
>>> +     /* free language tags */
>>> +     for(i = 0; i < num_langs; i++) {
>>> +             strbuf_release(&language_tags[i]);
>>> +     }
>
> ... because this loop does not free memory for earlier parts of language_tags[].
>
>> I am wondering if using strbuf for each of the language_tags[] is
>> even necessary.  How about doing it this way instead?
>
> And I think my counter-proposal does not leak (as it does not us strbuf for
> language_tags[] anymore).
>
>>
>>  http.c | 22 +++++++++-------------
>>  1 file changed, 9 insertions(+), 13 deletions(-)
>>
>> diff --git a/http.c b/http.c
>> index 6111c6a..db591b3 100644
>> --- a/http.c
>> +++ b/http.c
>> @@ -1027,7 +1027,7 @@ static void write_accept_language(struct strbuf *buf)
>>         const int MAX_DECIMAL_PLACES = 3;
>>         const int MAX_LANGUAGE_TAGS = 1000;
>>         const int MAX_ACCEPT_LANGUAGE_HEADER_SIZE = 4000;
>> -       struct strbuf *language_tags = NULL;
>> +       char **language_tags = NULL;
>>         int num_langs = 0;
>>         const char *s = get_preferred_languages();
>>         int i;
>> @@ -1053,9 +1053,7 @@ static void write_accept_language(struct strbuf *buf)
>>                 if (tag.len) {
>>                         num_langs++;
>>                         REALLOC_ARRAY(language_tags, num_langs);
>> -                       strbuf_init(&language_tags[num_langs - 1], 0);
>> -                       strbuf_swap(&tag, &language_tags[num_langs - 1]);
>> -
>> +                       language_tags[num_langs - 1] = strbuf_detach(&tag, NULL);
>>                         if (num_langs >= MAX_LANGUAGE_TAGS - 1) /* -1 for '*' */
>>                                 break;
>>                 }
>> @@ -1070,13 +1068,12 @@ static void write_accept_language(struct strbuf *buf)
>>
>>                 /* add '*' */
>>                 REALLOC_ARRAY(language_tags, num_langs + 1);
>> -               strbuf_init(&language_tags[num_langs], 0);
>> -               strbuf_addstr(&language_tags[num_langs++], "*");
>> +               language_tags[num_langs++] = "*"; /* it's OK; this won't be freed */
>>
>>                 /* compute decimal_places */
>>                 for (max_q = 1, decimal_places = 0;
>> -                               max_q < num_langs && decimal_places <= MAX_DECIMAL_PLACES;
>> -                               decimal_places++, max_q *= 10)
>> +                    max_q < num_langs && decimal_places <= MAX_DECIMAL_PLACES;
>> +                    decimal_places++, max_q *= 10)
>>                         ;
>>
>>                 sprintf(q_format, ";q=0.%%0%dd", decimal_places);
>> @@ -1087,7 +1084,7 @@ static void write_accept_language(struct strbuf *buf)
>>                         if (i > 0)
>>                                 strbuf_addstr(buf, ", ");
>>
>> -                       strbuf_addstr(buf, strbuf_detach(&language_tags[i], NULL));
>> +                       strbuf_addstr(buf, language_tags[i]);
>>
>>                         if (i > 0)
>>                                 strbuf_addf(buf, q_format, max_q - i);
>> @@ -1101,10 +1098,9 @@ static void write_accept_language(struct strbuf *buf)
>>                 }
>>         }
>>
>> -       /* free language tags */
>> -       for(i = 0; i < num_langs; i++) {
>> -               strbuf_release(&language_tags[i]);
>> -       }
>> +       /* free language tags -- last one is a static '*' */
>> +       for(i = 0; i < num_langs - 1; i++)
>> +               free(language_tags[i]);
>>         free(language_tags);
>>  }
>>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] http: Add Accept-Language header if possible
  2015-01-27 23:34     ` Junio C Hamano
@ 2015-01-28  6:15       ` Junio C Hamano
  2015-01-28 11:59         ` Yi, EungJun
  0 siblings, 1 reply; 16+ messages in thread
From: Junio C Hamano @ 2015-01-28  6:15 UTC (permalink / raw)
  To: Yi EungJun
  Cc: Git List, Yi EungJun, Jeff King, Peter Krefting, Michael Blume,
	Torsten Bögershausen

On Tue, Jan 27, 2015 at 3:34 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Yi EungJun <semtlenori@gmail.com> writes:
>
>> +
>> +             sprintf(q_format, ";q=0.%%0%dd", decimal_places);
>> +
>> +             strbuf_addstr(buf, "Accept-Language: ");
>> +
>> +             for(i = 0; i < num_langs; i++) {
>> +                     if (i > 0)
>> +                             strbuf_addstr(buf, ", ");
>> +
>> +                     strbuf_addstr(buf, strbuf_detach(&language_tags[i], NULL));
>
> This is not wrong per-se, but it looks somewhat convoluted to me.
> ...

Actually, this is wrong, isn't it?

strbuf_detach() removes the language_tags[i].buf from the strbuf,
and the caller now owns that piece of memory. Then strbuf_addstr()
appends a copy of that string to buf, and the piece of memory
that was originally held by language_tags[i].buf is now lost forever.

This is leaking.

>> +     /* free language tags */
>> +     for(i = 0; i < num_langs; i++) {
>> +             strbuf_release(&language_tags[i]);
>> +     }

... because this loop does not free memory for earlier parts of language_tags[].

> I am wondering if using strbuf for each of the language_tags[] is
> even necessary.  How about doing it this way instead?

And I think my counter-proposal does not leak (as it does not us strbuf for
language_tags[] anymore).

>
>  http.c | 22 +++++++++-------------
>  1 file changed, 9 insertions(+), 13 deletions(-)
>
> diff --git a/http.c b/http.c
> index 6111c6a..db591b3 100644
> --- a/http.c
> +++ b/http.c
> @@ -1027,7 +1027,7 @@ static void write_accept_language(struct strbuf *buf)
>         const int MAX_DECIMAL_PLACES = 3;
>         const int MAX_LANGUAGE_TAGS = 1000;
>         const int MAX_ACCEPT_LANGUAGE_HEADER_SIZE = 4000;
> -       struct strbuf *language_tags = NULL;
> +       char **language_tags = NULL;
>         int num_langs = 0;
>         const char *s = get_preferred_languages();
>         int i;
> @@ -1053,9 +1053,7 @@ static void write_accept_language(struct strbuf *buf)
>                 if (tag.len) {
>                         num_langs++;
>                         REALLOC_ARRAY(language_tags, num_langs);
> -                       strbuf_init(&language_tags[num_langs - 1], 0);
> -                       strbuf_swap(&tag, &language_tags[num_langs - 1]);
> -
> +                       language_tags[num_langs - 1] = strbuf_detach(&tag, NULL);
>                         if (num_langs >= MAX_LANGUAGE_TAGS - 1) /* -1 for '*' */
>                                 break;
>                 }
> @@ -1070,13 +1068,12 @@ static void write_accept_language(struct strbuf *buf)
>
>                 /* add '*' */
>                 REALLOC_ARRAY(language_tags, num_langs + 1);
> -               strbuf_init(&language_tags[num_langs], 0);
> -               strbuf_addstr(&language_tags[num_langs++], "*");
> +               language_tags[num_langs++] = "*"; /* it's OK; this won't be freed */
>
>                 /* compute decimal_places */
>                 for (max_q = 1, decimal_places = 0;
> -                               max_q < num_langs && decimal_places <= MAX_DECIMAL_PLACES;
> -                               decimal_places++, max_q *= 10)
> +                    max_q < num_langs && decimal_places <= MAX_DECIMAL_PLACES;
> +                    decimal_places++, max_q *= 10)
>                         ;
>
>                 sprintf(q_format, ";q=0.%%0%dd", decimal_places);
> @@ -1087,7 +1084,7 @@ static void write_accept_language(struct strbuf *buf)
>                         if (i > 0)
>                                 strbuf_addstr(buf, ", ");
>
> -                       strbuf_addstr(buf, strbuf_detach(&language_tags[i], NULL));
> +                       strbuf_addstr(buf, language_tags[i]);
>
>                         if (i > 0)
>                                 strbuf_addf(buf, q_format, max_q - i);
> @@ -1101,10 +1098,9 @@ static void write_accept_language(struct strbuf *buf)
>                 }
>         }
>
> -       /* free language tags */
> -       for(i = 0; i < num_langs; i++) {
> -               strbuf_release(&language_tags[i]);
> -       }
> +       /* free language tags -- last one is a static '*' */
> +       for(i = 0; i < num_langs - 1; i++)
> +               free(language_tags[i]);
>         free(language_tags);
>  }
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] http: Add Accept-Language header if possible
  2015-01-27 15:51   ` [PATCH] " Yi EungJun
@ 2015-01-27 23:34     ` Junio C Hamano
  2015-01-28  6:15       ` Junio C Hamano
  0 siblings, 1 reply; 16+ messages in thread
From: Junio C Hamano @ 2015-01-27 23:34 UTC (permalink / raw)
  To: Yi EungJun
  Cc: Git List, Yi EungJun, Jeff King, Peter Krefting, Michael Blume,
	Torsten Bögershausen

Yi EungJun <semtlenori@gmail.com> writes:

> +static void write_accept_language(struct strbuf *buf)
> +{
> +	/*
> +	 * MAX_DECIMAL_PLACES must not be larger than 3. If it is larger than
> +	 * that, q-value will be smaller than 0.001, the minimum q-value the
> +	 * HTTP specification allows. See
> +	 * http://tools.ietf.org/html/rfc7231#section-5.3.1 for q-value.
> +	 */
> +	const int MAX_DECIMAL_PLACES = 3;
> +	const int MAX_LANGUAGE_TAGS = 1000;
> +	const int MAX_ACCEPT_LANGUAGE_HEADER_SIZE = 4000;
> +	struct strbuf *language_tags = NULL;
> +	int num_langs = 0;
> +	const char *s = get_preferred_languages();
> +	int i;
> +	struct strbuf tag = STRBUF_INIT;
> +
> +	/* Don't add Accept-Language header if no language is preferred. */
> +	if (!s)
> +		return;
> +
> +	/*
> +	 * Split the colon-separated string of preferred languages into
> +	 * language_tags array.
> +	 */
> +	do {
> +		/* collect language tag */
> +		for (; *s && (isalnum(*s) || *s == '_'); s++)
> +			strbuf_addch(&tag, *s == '_' ? '-' : *s);
> +
> +		/* skip .codeset, @modifier and any other unnecessary parts */
> +		while (*s && *s != ':')
> +			s++;
> +
> +		if (tag.len) {
> +			num_langs++;
> +			REALLOC_ARRAY(language_tags, num_langs);
> +			strbuf_init(&language_tags[num_langs - 1], 0);
> +			strbuf_swap(&tag, &language_tags[num_langs - 1]);
> +
> +			if (num_langs >= MAX_LANGUAGE_TAGS - 1) /* -1 for '*' */
> +				break;
> +		}
> +	} while (*s++);

The structure of this function is much easier to understand than any
of the previous rounds.  You collect up to the max you are going to
support, and then you format up to the max you are going to send.

Very straight-forward and simple.

> +	/* write Accept-Language header into buf */
> +	if (num_langs >= 1) {

micronit: should be OK to just say "if (num_langs)".

> +		int last_buf_len = 0;
> +		int max_q;
> +		int decimal_places;
> +		char q_format[32];
> +
> +		/* add '*' */
> +		REALLOC_ARRAY(language_tags, num_langs + 1);
> +		strbuf_init(&language_tags[num_langs], 0);
> +		strbuf_addstr(&language_tags[num_langs++], "*");
> +
> +		/* compute decimal_places */
> +		for (max_q = 1, decimal_places = 0;
> +				max_q < num_langs && decimal_places <= MAX_DECIMAL_PLACES;
> +				decimal_places++, max_q *= 10)
> +			;

micronit: the second and the third line are indented too deeply and
made me wonder if this has an overlong first line (i.e. the set-up
part to enter the for-loop) split into multiple lines.

> +
> +		sprintf(q_format, ";q=0.%%0%dd", decimal_places);
> +
> +		strbuf_addstr(buf, "Accept-Language: ");
> +
> +		for(i = 0; i < num_langs; i++) {
> +			if (i > 0)
> +				strbuf_addstr(buf, ", ");
> +
> +			strbuf_addstr(buf, strbuf_detach(&language_tags[i], NULL));

This is not wrong per-se, but it looks somewhat convoluted to me.

You can just peek language_tags[i].buf here without detaching, as
you will free the strbufs after you exit the loop anyway.  Your
version is not wrong and it does not result in double-freeing,
because detach clears the strbuf, but at the same time, makes the
responsibility to free language_tags[] strbuf split into here (for
elements up to the ones that are used to fill buf) and the cleanup
loop (for elements that are not used in this loop).

> +			if (i > 0)
> +				strbuf_addf(buf, q_format, max_q - i);
> +
> +			if (buf->len > MAX_ACCEPT_LANGUAGE_HEADER_SIZE) {
> +				strbuf_remove(buf, last_buf_len, buf->len - last_buf_len);
> +				break;
> +			}
> +
> +			last_buf_len = buf->len;
> +		}
> +	}
> +
> +	/* free language tags */
> +	for(i = 0; i < num_langs; i++) {
> +		strbuf_release(&language_tags[i]);
> +	}
> +	free(language_tags);
> +}

I am wondering if using strbuf for each of the language_tags[] is
even necessary.  How about doing it this way instead?

 http.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/http.c b/http.c
index 6111c6a..db591b3 100644
--- a/http.c
+++ b/http.c
@@ -1027,7 +1027,7 @@ static void write_accept_language(struct strbuf *buf)
 	const int MAX_DECIMAL_PLACES = 3;
 	const int MAX_LANGUAGE_TAGS = 1000;
 	const int MAX_ACCEPT_LANGUAGE_HEADER_SIZE = 4000;
-	struct strbuf *language_tags = NULL;
+	char **language_tags = NULL;
 	int num_langs = 0;
 	const char *s = get_preferred_languages();
 	int i;
@@ -1053,9 +1053,7 @@ static void write_accept_language(struct strbuf *buf)
 		if (tag.len) {
 			num_langs++;
 			REALLOC_ARRAY(language_tags, num_langs);
-			strbuf_init(&language_tags[num_langs - 1], 0);
-			strbuf_swap(&tag, &language_tags[num_langs - 1]);
-
+			language_tags[num_langs - 1] = strbuf_detach(&tag, NULL);
 			if (num_langs >= MAX_LANGUAGE_TAGS - 1) /* -1 for '*' */
 				break;
 		}
@@ -1070,13 +1068,12 @@ static void write_accept_language(struct strbuf *buf)
 
 		/* add '*' */
 		REALLOC_ARRAY(language_tags, num_langs + 1);
-		strbuf_init(&language_tags[num_langs], 0);
-		strbuf_addstr(&language_tags[num_langs++], "*");
+		language_tags[num_langs++] = "*"; /* it's OK; this won't be freed */
 
 		/* compute decimal_places */
 		for (max_q = 1, decimal_places = 0;
-				max_q < num_langs && decimal_places <= MAX_DECIMAL_PLACES;
-				decimal_places++, max_q *= 10)
+		     max_q < num_langs && decimal_places <= MAX_DECIMAL_PLACES;
+		     decimal_places++, max_q *= 10)
 			;
 
 		sprintf(q_format, ";q=0.%%0%dd", decimal_places);
@@ -1087,7 +1084,7 @@ static void write_accept_language(struct strbuf *buf)
 			if (i > 0)
 				strbuf_addstr(buf, ", ");
 
-			strbuf_addstr(buf, strbuf_detach(&language_tags[i], NULL));
+			strbuf_addstr(buf, language_tags[i]);
 
 			if (i > 0)
 				strbuf_addf(buf, q_format, max_q - i);
@@ -1101,10 +1098,9 @@ static void write_accept_language(struct strbuf *buf)
 		}
 	}
 
-	/* free language tags */
-	for(i = 0; i < num_langs; i++) {
-		strbuf_release(&language_tags[i]);
-	}
+	/* free language tags -- last one is a static '*' */
+	for(i = 0; i < num_langs - 1; i++)
+		free(language_tags[i]);
 	free(language_tags);
 }
 

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH] http: Add Accept-Language header if possible
  2015-01-27 15:51 ` [PATCH v8 0/1] " Yi EungJun
@ 2015-01-27 15:51   ` Yi EungJun
  2015-01-27 23:34     ` Junio C Hamano
  0 siblings, 1 reply; 16+ messages in thread
From: Yi EungJun @ 2015-01-27 15:51 UTC (permalink / raw)
  To: Git List
  Cc: Yi EungJun, Junio C Hamano, Jeff King, Peter Krefting,
	Michael Blume, Torsten Bögershausen

From: Yi EungJun <eungjun.yi@navercorp.com>

Add an Accept-Language header which indicates the user's preferred
languages defined by $LANGUAGE, $LC_ALL, $LC_MESSAGES and $LANG.

Examples:
  LANGUAGE= -> ""
  LANGUAGE=ko:en -> "Accept-Language: ko, en;q=0.9, *;q=0.1"
  LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *;q=0.1"
  LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *;q=0.1"

This gives git servers a chance to display remote error messages in
the user's preferred language.

Limit the number of languages to 1,000 because q-value must not be
smaller than 0.001, and limit the length of Accept-Language header to
4,000 bytes for some HTTP servers which cannot accept such long header.

Signed-off-by: Yi EungJun <eungjun.yi@navercorp.com>
---
 http.c                     | 151 +++++++++++++++++++++++++++++++++++++++++++++
 remote-curl.c              |   2 +
 t/t5550-http-fetch-dumb.sh |  42 +++++++++++++
 3 files changed, 195 insertions(+)

diff --git a/http.c b/http.c
index 040f362..6111c6a 100644
--- a/http.c
+++ b/http.c
@@ -68,6 +68,8 @@ static struct curl_slist *no_pragma_header;
 
 static struct active_request_slot *active_queue_head;
 
+static char *cached_accept_language;
+
 size_t fread_buffer(char *ptr, size_t eltsize, size_t nmemb, void *buffer_)
 {
 	size_t size = eltsize * nmemb;
@@ -515,6 +517,9 @@ void http_cleanup(void)
 		cert_auth.password = NULL;
 	}
 	ssl_cert_password_required = 0;
+
+	free(cached_accept_language);
+	cached_accept_language = NULL;
 }
 
 struct active_request_slot *get_active_slot(void)
@@ -986,6 +991,146 @@ static void extract_content_type(struct strbuf *raw, struct strbuf *type,
 		strbuf_addstr(charset, "ISO-8859-1");
 }
 
+/*
+ * Guess the user's preferred languages from the value in LANGUAGE environment
+ * variable and LC_MESSAGES locale category if NO_GETTEXT is not defined.
+ *
+ * The result can be a colon-separated list like "ko:ja:en".
+ */
+static const char *get_preferred_languages(void)
+{
+	const char *retval;
+
+	retval = getenv("LANGUAGE");
+	if (retval && *retval)
+		return retval;
+
+#ifndef NO_GETTEXT
+	retval = setlocale(LC_MESSAGES, NULL);
+	if (retval && *retval &&
+		strcmp(retval, "C") &&
+		strcmp(retval, "POSIX"))
+		return retval;
+#endif
+
+	return NULL;
+}
+
+static void write_accept_language(struct strbuf *buf)
+{
+	/*
+	 * MAX_DECIMAL_PLACES must not be larger than 3. If it is larger than
+	 * that, q-value will be smaller than 0.001, the minimum q-value the
+	 * HTTP specification allows. See
+	 * http://tools.ietf.org/html/rfc7231#section-5.3.1 for q-value.
+	 */
+	const int MAX_DECIMAL_PLACES = 3;
+	const int MAX_LANGUAGE_TAGS = 1000;
+	const int MAX_ACCEPT_LANGUAGE_HEADER_SIZE = 4000;
+	struct strbuf *language_tags = NULL;
+	int num_langs = 0;
+	const char *s = get_preferred_languages();
+	int i;
+	struct strbuf tag = STRBUF_INIT;
+
+	/* Don't add Accept-Language header if no language is preferred. */
+	if (!s)
+		return;
+
+	/*
+	 * Split the colon-separated string of preferred languages into
+	 * language_tags array.
+	 */
+	do {
+		/* collect language tag */
+		for (; *s && (isalnum(*s) || *s == '_'); s++)
+			strbuf_addch(&tag, *s == '_' ? '-' : *s);
+
+		/* skip .codeset, @modifier and any other unnecessary parts */
+		while (*s && *s != ':')
+			s++;
+
+		if (tag.len) {
+			num_langs++;
+			REALLOC_ARRAY(language_tags, num_langs);
+			strbuf_init(&language_tags[num_langs - 1], 0);
+			strbuf_swap(&tag, &language_tags[num_langs - 1]);
+
+			if (num_langs >= MAX_LANGUAGE_TAGS - 1) /* -1 for '*' */
+				break;
+		}
+	} while (*s++);
+
+	/* write Accept-Language header into buf */
+	if (num_langs >= 1) {
+		int last_buf_len = 0;
+		int max_q;
+		int decimal_places;
+		char q_format[32];
+
+		/* add '*' */
+		REALLOC_ARRAY(language_tags, num_langs + 1);
+		strbuf_init(&language_tags[num_langs], 0);
+		strbuf_addstr(&language_tags[num_langs++], "*");
+
+		/* compute decimal_places */
+		for (max_q = 1, decimal_places = 0;
+				max_q < num_langs && decimal_places <= MAX_DECIMAL_PLACES;
+				decimal_places++, max_q *= 10)
+			;
+
+		sprintf(q_format, ";q=0.%%0%dd", decimal_places);
+
+		strbuf_addstr(buf, "Accept-Language: ");
+
+		for(i = 0; i < num_langs; i++) {
+			if (i > 0)
+				strbuf_addstr(buf, ", ");
+
+			strbuf_addstr(buf, strbuf_detach(&language_tags[i], NULL));
+
+			if (i > 0)
+				strbuf_addf(buf, q_format, max_q - i);
+
+			if (buf->len > MAX_ACCEPT_LANGUAGE_HEADER_SIZE) {
+				strbuf_remove(buf, last_buf_len, buf->len - last_buf_len);
+				break;
+			}
+
+			last_buf_len = buf->len;
+		}
+	}
+
+	/* free language tags */
+	for(i = 0; i < num_langs; i++) {
+		strbuf_release(&language_tags[i]);
+	}
+	free(language_tags);
+}
+
+/*
+ * Get an Accept-Language header which indicates user's preferred languages.
+ *
+ * Examples:
+ *   LANGUAGE= -> ""
+ *   LANGUAGE=ko:en -> "Accept-Language: ko, en; q=0.9, *; q=0.1"
+ *   LANGUAGE=ko_KR.UTF-8:sr@latin -> "Accept-Language: ko-KR, sr; q=0.9, *; q=0.1"
+ *   LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *; q=0.1"
+ *   LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *; q=0.1"
+ *   LANGUAGE= LANG=C -> ""
+ */
+static const char *get_accept_language(void)
+{
+	if (!cached_accept_language) {
+		struct strbuf buf = STRBUF_INIT;
+		write_accept_language(&buf);
+		if (buf.len > 0)
+			cached_accept_language = strbuf_detach(&buf, NULL);
+	}
+
+	return cached_accept_language;
+}
+
 /* http_request() targets */
 #define HTTP_REQUEST_STRBUF	0
 #define HTTP_REQUEST_FILE	1
@@ -998,6 +1143,7 @@ static int http_request(const char *url,
 	struct slot_results results;
 	struct curl_slist *headers = NULL;
 	struct strbuf buf = STRBUF_INIT;
+	const char *accept_language;
 	int ret;
 
 	slot = get_active_slot();
@@ -1023,6 +1169,11 @@ static int http_request(const char *url,
 					 fwrite_buffer);
 	}
 
+	accept_language = get_accept_language();
+
+	if (accept_language)
+		headers = curl_slist_append(headers, accept_language);
+
 	strbuf_addstr(&buf, "Pragma:");
 	if (options && options->no_cache)
 		strbuf_addstr(&buf, " no-cache");
diff --git a/remote-curl.c b/remote-curl.c
index dd63bc2..04989e5 100644
--- a/remote-curl.c
+++ b/remote-curl.c
@@ -962,6 +962,8 @@ int main(int argc, const char **argv)
 	struct strbuf buf = STRBUF_INIT;
 	int nongit;
 
+	git_setup_gettext();
+
 	git_extract_argv0_path(argv[0]);
 	setup_git_directory_gently(&nongit);
 	if (argc < 2) {
diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh
index ac71418..e1e2938 100755
--- a/t/t5550-http-fetch-dumb.sh
+++ b/t/t5550-http-fetch-dumb.sh
@@ -196,5 +196,47 @@ test_expect_success 'reencoding is robust to whitespace oddities' '
 	grep "this is the error message" stderr
 '
 
+check_language () {
+	case "$2" in
+	'')
+		>expect
+		;;
+	?*)
+		echo "Accept-Language: $1" >expect
+		;;
+	esac &&
+	GIT_CURL_VERBOSE=1 \
+	LANGUAGE=$2 \
+	git ls-remote "$HTTPD_URL/dumb/repo.git" >output 2>&1 &&
+	tr -d '\015' <output |
+	sort -u |
+	sed -ne '/^Accept-Language:/ p' >actual &&
+	test_cmp expect actual
+}
+
+test_expect_success 'git client sends Accept-Language based on LANGUAGE' '
+	check_language "ko-KR, *;q=0.9" ko_KR.UTF-8'
+
+test_expect_success 'git client sends Accept-Language correctly with unordinary LANGUAGE' '
+	check_language "ko-KR, *;q=0.9" "ko_KR:" &&
+	check_language "ko-KR, en-US;q=0.9, *;q=0.8" "ko_KR::en_US" &&
+	check_language "ko-KR, *;q=0.9" ":::ko_KR" &&
+	check_language "ko-KR, en-US;q=0.9, *;q=0.8" "ko_KR!!:en_US" &&
+	check_language "ko-KR, ja-JP;q=0.9, *;q=0.8" "ko_KR en_US:ja_JP"'
+
+test_expect_success 'git client sends Accept-Language with many preferred languages' '
+	check_language "ko-KR, en-US;q=0.9, fr-CA;q=0.8, de;q=0.7, sr;q=0.6, \
+ja;q=0.5, zh;q=0.4, sv;q=0.3, pt;q=0.2, *;q=0.1" \
+		ko_KR.EUC-KR:en_US.UTF-8:fr_CA:de.UTF-8@euro:sr@latin:ja:zh:sv:pt &&
+	check_language "ko-KR, en-US;q=0.99, fr-CA;q=0.98, de;q=0.97, sr;q=0.96, \
+ja;q=0.95, zh;q=0.94, sv;q=0.93, pt;q=0.92, nb;q=0.91, *;q=0.90" \
+		ko_KR.EUC-KR:en_US.UTF-8:fr_CA:de.UTF-8@euro:sr@latin:ja:zh:sv:pt:nb
+'
+
+test_expect_success 'git client does not send an empty Accept-Language' '
+	GIT_CURL_VERBOSE=1 LANGUAGE= git ls-remote "$HTTPD_URL/dumb/repo.git" 2>stderr &&
+	! grep "^Accept-Language:" stderr
+'
+
 stop_httpd
 test_done
-- 
2.3.0.rc1.32.ga3df1c7

^ permalink raw reply related	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2015-01-29  1:48 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-07-08 15:54 [PATCH] http: Add Accept-Language header if possible Yi EungJun
2014-07-08 21:52 ` Eric Sunshine
     [not found]   ` <CAFT+Tg-6fR9OJ93TT7ww3x=zYHY=Dh5u-7owgQMBK5o_JKLEaA@mail.gmail.com>
     [not found]     ` <CAPig+cQ05pzU9uVBqS8tBHvB8_3qAtgsPYz1sGhpa0W1CVymqA@mail.gmail.com>
2014-07-11 16:35       ` Yi, EungJun
2014-07-09  5:10 ` Jeff King
2014-07-09  5:46   ` Yi, EungJun
2014-07-09  6:18     ` Jeff King
2014-07-09 10:46       ` Peter Krefting
2014-07-10 20:10         ` Jeff King
2014-07-11  9:43           ` Yi, EungJun
2014-07-12 10:11           ` Peter Krefting
2014-07-09 10:40 ` Peter Krefting
2014-07-11  9:05   ` Yi, EungJun
2015-01-22  7:54 [PATCH v7 1/1] " Junio C Hamano
2015-01-27 15:51 ` [PATCH v8 0/1] " Yi EungJun
2015-01-27 15:51   ` [PATCH] " Yi EungJun
2015-01-27 23:34     ` Junio C Hamano
2015-01-28  6:15       ` Junio C Hamano
2015-01-28 11:59         ` Yi, EungJun

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.