All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Verify Content-Type from smart HTTP servers
@ 2013-01-31 22:09 Junio C Hamano
  2013-02-01  8:52 ` Jeff King
  2013-02-06 10:24 ` Michael Schubert
  0 siblings, 2 replies; 13+ messages in thread
From: Junio C Hamano @ 2013-01-31 22:09 UTC (permalink / raw)
  To: Shawn Pearce; +Cc: git

Before parsing a suspected smart-HTTP response verify the returned
Content-Type matches the standard. This protects a client from
attempting to process a payload that smells like a smart-HTTP
server response.

JGit has been doing this check on all responses since the dawn of
time. I mistakenly failed to include it in git-core when smart HTTP
was introduced. At the time I didn't know how to get the Content-Type
from libcurl. I punted, meant to circle back and fix this, and just
plain forgot about it.

Signed-off-by: Shawn Pearce <spearce@spearce.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 * The original was picked up by majordomo taboo rules; resending
   after minor style fix.

   Was there a report of an attempted attack by malicious server or
   something that triggered this, or is this just a "common sense
   thing to do" in general?

 http-push.c                      |  4 ++--
 http.c                           | 31 ++++++++++++++++++++++---------
 http.h                           |  2 +-
 remote-curl.c                    | 15 +++++++++++----
 t/lib-httpd.sh                   |  1 +
 t/lib-httpd/apache.conf          |  4 ++++
 t/lib-httpd/broken-smart-http.sh | 11 +++++++++++
 t/t5551-http-fetch.sh            |  6 ++++++
 8 files changed, 58 insertions(+), 16 deletions(-)
 create mode 100755 t/lib-httpd/broken-smart-http.sh

diff --git a/http-push.c b/http-push.c
index 8701c12..ba45b7b 100644
--- a/http-push.c
+++ b/http-push.c
@@ -1560,7 +1560,7 @@ static int remote_exists(const char *path)
 
 	sprintf(url, "%s%s", repo->url, path);
 
-	switch (http_get_strbuf(url, NULL, 0)) {
+	switch (http_get_strbuf(url, NULL, NULL, 0)) {
 	case HTTP_OK:
 		ret = 1;
 		break;
@@ -1584,7 +1584,7 @@ static void fetch_symref(const char *path, char **symref, unsigned char *sha1)
 	url = xmalloc(strlen(repo->url) + strlen(path) + 1);
 	sprintf(url, "%s%s", repo->url, path);
 
-	if (http_get_strbuf(url, &buffer, 0) != HTTP_OK)
+	if (http_get_strbuf(url, NULL, &buffer, 0) != HTTP_OK)
 		die("Couldn't get %s for remote symref\n%s", url,
 		    curl_errorstr);
 	free(url);
diff --git a/http.c b/http.c
index 44f3525..d868d8b 100644
--- a/http.c
+++ b/http.c
@@ -788,7 +788,8 @@ int handle_curl_result(struct slot_results *results)
 #define HTTP_REQUEST_STRBUF	0
 #define HTTP_REQUEST_FILE	1
 
-static int http_request(const char *url, void *result, int target, int options)
+static int http_request(const char *url, struct strbuf *type,
+			void *result, int target, int options)
 {
 	struct active_request_slot *slot;
 	struct slot_results results;
@@ -838,24 +839,36 @@ static int http_request(const char *url, void *result, int target, int options)
 		ret = HTTP_START_FAILED;
 	}
 
+	if (type) {
+		char *t;
+		curl_easy_getinfo(slot->curl, CURLINFO_CONTENT_TYPE, &t);
+		if (t)
+			strbuf_addstr(type, t);
+	}
+
 	curl_slist_free_all(headers);
 	strbuf_release(&buf);
 
 	return ret;
 }
 
-static int http_request_reauth(const char *url, void *result, int target,
+static int http_request_reauth(const char *url,
+			       struct strbuf *type,
+			       void *result, int target,
 			       int options)
 {
-	int ret = http_request(url, result, target, options);
+	int ret = http_request(url, type, result, target, options);
 	if (ret != HTTP_REAUTH)
 		return ret;
-	return http_request(url, result, target, options);
+	return http_request(url, type, result, target, options);
 }
 
-int http_get_strbuf(const char *url, struct strbuf *result, int options)
+int http_get_strbuf(const char *url,
+		    struct strbuf *type,
+		    struct strbuf *result, int options)
 {
-	return http_request_reauth(url, result, HTTP_REQUEST_STRBUF, options);
+	return http_request_reauth(url, type, result,
+				   HTTP_REQUEST_STRBUF, options);
 }
 
 /*
@@ -878,7 +891,7 @@ static int http_get_file(const char *url, const char *filename, int options)
 		goto cleanup;
 	}
 
-	ret = http_request_reauth(url, result, HTTP_REQUEST_FILE, options);
+	ret = http_request_reauth(url, NULL, result, HTTP_REQUEST_FILE, options);
 	fclose(result);
 
 	if ((ret == HTTP_OK) && move_temp_to_file(tmpfile.buf, filename))
@@ -904,7 +917,7 @@ int http_fetch_ref(const char *base, struct ref *ref)
 	int ret = -1;
 
 	url = quote_ref_url(base, ref->name);
-	if (http_get_strbuf(url, &buffer, HTTP_NO_CACHE) == HTTP_OK) {
+	if (http_get_strbuf(url, NULL, &buffer, HTTP_NO_CACHE) == HTTP_OK) {
 		strbuf_rtrim(&buffer);
 		if (buffer.len == 40)
 			ret = get_sha1_hex(buffer.buf, ref->old_sha1);
@@ -997,7 +1010,7 @@ int http_get_info_packs(const char *base_url, struct packed_git **packs_head)
 	strbuf_addstr(&buf, "objects/info/packs");
 	url = strbuf_detach(&buf, NULL);
 
-	ret = http_get_strbuf(url, &buf, HTTP_NO_CACHE);
+	ret = http_get_strbuf(url, NULL, &buf, HTTP_NO_CACHE);
 	if (ret != HTTP_OK)
 		goto cleanup;
 
diff --git a/http.h b/http.h
index 0a80d30..25d1931 100644
--- a/http.h
+++ b/http.h
@@ -132,7 +132,7 @@ extern char *get_remote_object_url(const char *url, const char *hex,
  *
  * If the result pointer is NULL, a HTTP HEAD request is made instead of GET.
  */
-int http_get_strbuf(const char *url, struct strbuf *result, int options);
+int http_get_strbuf(const char *url, struct strbuf *content_type, struct strbuf *result, int options);
 
 /*
  * Prints an error message using error() containing url and curl_errorstr,
diff --git a/remote-curl.c b/remote-curl.c
index 9a8b123..e6f3b63 100644
--- a/remote-curl.c
+++ b/remote-curl.c
@@ -92,6 +92,8 @@ static void free_discovery(struct discovery *d)
 
 static struct discovery* discover_refs(const char *service)
 {
+	struct strbuf exp = STRBUF_INIT;
+	struct strbuf type = STRBUF_INIT;
 	struct strbuf buffer = STRBUF_INIT;
 	struct discovery *last = last_discovery;
 	char *refs_url;
@@ -113,7 +115,7 @@ static struct discovery* discover_refs(const char *service)
 	}
 	refs_url = strbuf_detach(&buffer, NULL);
 
-	http_ret = http_get_strbuf(refs_url, &buffer, HTTP_NO_CACHE);
+	http_ret = http_get_strbuf(refs_url, &type, &buffer, HTTP_NO_CACHE);
 	switch (http_ret) {
 	case HTTP_OK:
 		break;
@@ -133,16 +135,19 @@ static struct discovery* discover_refs(const char *service)
 	last->buf = last->buf_alloc;
 
 	if (maybe_smart && 5 <= last->len && last->buf[4] == '#') {
-		/* smart HTTP response; validate that the service
+		/*
+		 * smart HTTP response; validate that the service
 		 * pkt-line matches our request.
 		 */
-		struct strbuf exp = STRBUF_INIT;
-
+		strbuf_addf(&exp, "application/x-%s-advertisement", service);
+		if (strbuf_cmp(&exp, &type))
+			die("invalid content-type %s", type.buf);
 		if (packet_get_line(&buffer, &last->buf, &last->len) <= 0)
 			die("%s has invalid packet header", refs_url);
 		if (buffer.len && buffer.buf[buffer.len - 1] == '\n')
 			strbuf_setlen(&buffer, buffer.len - 1);
 
+		strbuf_reset(&exp);
 		strbuf_addf(&exp, "# service=%s", service);
 		if (strbuf_cmp(&exp, &buffer))
 			die("invalid server response; got '%s'", buffer.buf);
@@ -160,6 +165,8 @@ static struct discovery* discover_refs(const char *service)
 	}
 
 	free(refs_url);
+	strbuf_release(&exp);
+	strbuf_release(&type);
 	strbuf_release(&buffer);
 	last_discovery = last;
 	return last;
diff --git a/t/lib-httpd.sh b/t/lib-httpd.sh
index 02f442b..895b925 100644
--- a/t/lib-httpd.sh
+++ b/t/lib-httpd.sh
@@ -80,6 +80,7 @@ fi
 prepare_httpd() {
 	mkdir -p "$HTTPD_DOCUMENT_ROOT_PATH"
 	cp "$TEST_PATH"/passwd "$HTTPD_ROOT_PATH"
+	cp "$TEST_PATH"/broken-smart-http.sh "$HTTPD_ROOT_PATH"
 
 	ln -s "$LIB_HTTPD_MODULE_PATH" "$HTTPD_ROOT_PATH/modules"
 
diff --git a/t/lib-httpd/apache.conf b/t/lib-httpd/apache.conf
index fe76e84..938b4cf 100644
--- a/t/lib-httpd/apache.conf
+++ b/t/lib-httpd/apache.conf
@@ -62,9 +62,13 @@ Alias /auth/dumb/ www/auth/dumb/
 	SetEnv GIT_COMMITTER_EMAIL custom@example.com
 </LocationMatch>
 ScriptAliasMatch /smart_*[^/]*/(.*) ${GIT_EXEC_PATH}/git-http-backend/$1
+ScriptAlias /broken_smart/ broken-smart-http.sh/
 <Directory ${GIT_EXEC_PATH}>
 	Options FollowSymlinks
 </Directory>
+<Files broken-smart-http.sh>
+	Options ExecCGI
+</Files>
 <Files ${GIT_EXEC_PATH}/git-http-backend>
 	Options ExecCGI
 </Files>
diff --git a/t/lib-httpd/broken-smart-http.sh b/t/lib-httpd/broken-smart-http.sh
new file mode 100755
index 0000000..f7ebfff
--- /dev/null
+++ b/t/lib-httpd/broken-smart-http.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+printf "Content-Type: text/%s\n" "html"
+echo
+printf "%s\n" "001e# service=git-upload-pack"
+printf "%s"   "0000"
+printf "%s%c%s%s\n" \
+	"00a58681d9f286a48b08f37b3a095330da16689e3693 HEAD" \
+	0 \
+	" include-tag multi_ack_detailed multi_ack ofs-delta" \
+	" side-band side-band-64k thin-pack no-progress shallow no-done "
+printf "%s"   "0000"
diff --git a/t/t5551-http-fetch.sh b/t/t5551-http-fetch.sh
index c5cd2e3..cb95b95 100755
--- a/t/t5551-http-fetch.sh
+++ b/t/t5551-http-fetch.sh
@@ -157,6 +157,12 @@ test_expect_success 'GIT_SMART_HTTP can disable smart http' '
 	 test_must_fail git fetch)
 '
 
+test_expect_success 'invalid Content-Type rejected' '
+	echo "fatal: invalid content-type text/html" >expect
+	test_must_fail git clone $HTTPD_URL/broken_smart/repo.git 2>actual
+	test_cmp expect actual
+'
+
 test -n "$GIT_TEST_LONG" && test_set_prereq EXPENSIVE
 
 test_expect_success EXPENSIVE 'create 50,000 tags in the repo' '
-- 
1.8.1.2.605.gb99210a

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] Verify Content-Type from smart HTTP servers
  2013-01-31 22:09 [PATCH] Verify Content-Type from smart HTTP servers Junio C Hamano
@ 2013-02-01  8:52 ` Jeff King
  2013-02-01 18:09   ` Junio C Hamano
  2013-02-06 10:24 ` Michael Schubert
  1 sibling, 1 reply; 13+ messages in thread
From: Jeff King @ 2013-02-01  8:52 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Shawn Pearce, git

On Thu, Jan 31, 2013 at 02:09:40PM -0800, Junio C Hamano wrote:

> Before parsing a suspected smart-HTTP response verify the returned
> Content-Type matches the standard. This protects a client from
> attempting to process a payload that smells like a smart-HTTP
> server response.
> 
> JGit has been doing this check on all responses since the dawn of
> time. I mistakenly failed to include it in git-core when smart HTTP
> was introduced. At the time I didn't know how to get the Content-Type
> from libcurl. I punted, meant to circle back and fix this, and just
> plain forgot about it.
> 
> Signed-off-by: Shawn Pearce <spearce@spearce.org>
> Signed-off-by: Junio C Hamano <gitster@pobox.com>

Should this be "From:" Shawn? The tone of the message and the S-O-B
order makes it look like it.

> @@ -133,16 +135,19 @@ static struct discovery* discover_refs(const char *service)
>  	last->buf = last->buf_alloc;
>  
>  	if (maybe_smart && 5 <= last->len && last->buf[4] == '#') {
> -		/* smart HTTP response; validate that the service
> +		/*
> +		 * smart HTTP response; validate that the service
>  		 * pkt-line matches our request.
>  		 */
> -		struct strbuf exp = STRBUF_INIT;
> -
> +		strbuf_addf(&exp, "application/x-%s-advertisement", service);
> +		if (strbuf_cmp(&exp, &type))
> +			die("invalid content-type %s", type.buf);

Hmm. I wondered if it is possible for a non-smart server to send us down
this code path, which would now complain of the bogus content-type.
Something like an info/refs file with:

  # 1
  # the comment above is meaningless, but puts a "#" at position 4.

But I note that we would already die in the next line:

>  		if (packet_get_line(&buffer, &last->buf, &last->len) <= 0)
>  			die("%s has invalid packet header", refs_url);

so I do not think the patch makes anything worse. However, should we
take this opportunity to make the "did we get a smart response" test
more robust? That is, should we actually be checking the content-type
in the outer conditional, and going down the smart code-path if it is
application/x-%s-advertisement, and otherwise treating the result as
dumb?

It's probably not a big deal, as the false positive example above is
quite specific and unlikely, but it just seems cleaner to me.

As a side note, should we (can we) care about the content-type for dumb
http? It should probably be text/plain or application/octet-stream, but
I would not be surprised if we get a variety of random junk in the real
world, though.

-Peff

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Verify Content-Type from smart HTTP servers
  2013-02-01  8:52 ` Jeff King
@ 2013-02-01 18:09   ` Junio C Hamano
  2013-02-01 18:58     ` Jeff King
  0 siblings, 1 reply; 13+ messages in thread
From: Junio C Hamano @ 2013-02-01 18:09 UTC (permalink / raw)
  To: Jeff King; +Cc: Shawn Pearce, git

Jeff King <peff@peff.net> writes:

> Should this be "From:" Shawn? The tone of the message and the S-O-B
> order makes it look like it.

Yes. I should have left that line when edited the format-patch
output in my MUA to say I was resending something that vger rejected
and people did not see after tweaking the patch to slip their taboo
list.

>> @@ -133,16 +135,19 @@ static struct discovery* discover_refs(const char *service)
>>  	last->buf = last->buf_alloc;
>>  
>>  	if (maybe_smart && 5 <= last->len && last->buf[4] == '#') {
>> -		/* smart HTTP response; validate that the service
>> +		/*
>> +		 * smart HTTP response; validate that the service
>>  		 * pkt-line matches our request.
>>  		 */
>> -		struct strbuf exp = STRBUF_INIT;
>> -
>> +		strbuf_addf(&exp, "application/x-%s-advertisement", service);
>> +		if (strbuf_cmp(&exp, &type))
>> +			die("invalid content-type %s", type.buf);
>
> Hmm. I wondered if it is possible for a non-smart server to send us down
> this code path, which would now complain of the bogus content-type.
> Something like an info/refs file with:
>
>   # 1
>   # the comment above is meaningless, but puts a "#" at position 4.
>
> But I note that we would already die in the next line:
>
>>  		if (packet_get_line(&buffer, &last->buf, &last->len) <= 0)
>>  			die("%s has invalid packet header", refs_url);
>
> so I do not think the patch makes anything worse. However, should we
> take this opportunity to make the "did we get a smart response" test
> more robust? That is, should we actually be checking the content-type
> in the outer conditional, and going down the smart code-path if it is
> application/x-%s-advertisement, and otherwise treating the result as
> dumb?

Does the outer caller that switches between dumb and smart actually
know what service type it is requesting (I am not familiar with the
callchain involved)?  Even if it doesn't, it may still make sense.

> As a side note, should we (can we) care about the content-type for dumb
> http? It should probably be text/plain or application/octet-stream, but
> I would not be surprised if we get a variety of random junk in the real
> world, though.

The design objective of dumb http protocol was to allow working with
any dumb bit transfer thing, so I'd prefer to keep it lenient and
allow application/x-git-loose-object-file and somesuch.

Thanks.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Verify Content-Type from smart HTTP servers
  2013-02-01 18:09   ` Junio C Hamano
@ 2013-02-01 18:58     ` Jeff King
  2013-02-04  7:17       ` Junio C Hamano
  0 siblings, 1 reply; 13+ messages in thread
From: Jeff King @ 2013-02-01 18:58 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Shawn Pearce, git

On Fri, Feb 01, 2013 at 10:09:26AM -0800, Junio C Hamano wrote:

> > so I do not think the patch makes anything worse. However, should we
> > take this opportunity to make the "did we get a smart response" test
> > more robust? That is, should we actually be checking the content-type
> > in the outer conditional, and going down the smart code-path if it is
> > application/x-%s-advertisement, and otherwise treating the result as
> > dumb?
> 
> Does the outer caller that switches between dumb and smart actually
> know what service type it is requesting (I am not familiar with the
> callchain involved)?  Even if it doesn't, it may still make sense.

I was specifically thinking of this (on top of your patch):

diff --git a/remote-curl.c b/remote-curl.c
index e6f3b63..63680a8 100644
--- a/remote-curl.c
+++ b/remote-curl.c
@@ -134,14 +134,12 @@ static struct discovery* discover_refs(const char *service)
 	last->buf_alloc = strbuf_detach(&buffer, &last->len);
 	last->buf = last->buf_alloc;
 
-	if (maybe_smart && 5 <= last->len && last->buf[4] == '#') {
+	strbuf_addf(&exp, "application/x-%s-advertisement", service);
+	if (maybe_smart && !strbuf_cmp(&exp, &type)) {
 		/*
 		 * smart HTTP response; validate that the service
 		 * pkt-line matches our request.
 		 */
-		strbuf_addf(&exp, "application/x-%s-advertisement", service);
-		if (strbuf_cmp(&exp, &type))
-			die("invalid content-type %s", type.buf);
 		if (packet_get_line(&buffer, &last->buf, &last->len) <= 0)
 			die("%s has invalid packet header", refs_url);
 		if (buffer.len && buffer.buf[buffer.len - 1] == '\n')

To just follow the dumb path if we don't get the content-type we expect.
We may want to keep the '#' format check in addition (packet_get_line
will check it and die, anyway, but we may want to drop back to
considering it dumb, just to protect against a badly configured dumb
server which uses our mime type, but I do not think it likely).

> > As a side note, should we (can we) care about the content-type for dumb
> > http? It should probably be text/plain or application/octet-stream, but
> > I would not be surprised if we get a variety of random junk in the real
> > world, though.
> 
> The design objective of dumb http protocol was to allow working with
> any dumb bit transfer thing, so I'd prefer to keep it lenient and
> allow application/x-git-loose-object-file and somesuch.

Yeah, I do not think it really buys us anything in practice, and we have
no way of knowing what kind of crap is in the wild. Not worth it.

-Peff

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] Verify Content-Type from smart HTTP servers
  2013-02-01 18:58     ` Jeff King
@ 2013-02-04  7:17       ` Junio C Hamano
  2013-02-04  8:38         ` Jeff King
  0 siblings, 1 reply; 13+ messages in thread
From: Junio C Hamano @ 2013-02-04  7:17 UTC (permalink / raw)
  To: Jeff King; +Cc: Shawn Pearce, git

Jeff King <peff@peff.net> writes:

> I was specifically thinking of this (on top of your patch):
>
> diff --git a/remote-curl.c b/remote-curl.c
> index e6f3b63..63680a8 100644
> --- a/remote-curl.c
> +++ b/remote-curl.c
> @@ -134,14 +134,12 @@ static struct discovery* discover_refs(const char *service)
>  	last->buf_alloc = strbuf_detach(&buffer, &last->len);
>  	last->buf = last->buf_alloc;
>  
> -	if (maybe_smart && 5 <= last->len && last->buf[4] == '#') {
> +	strbuf_addf(&exp, "application/x-%s-advertisement", service);
> +	if (maybe_smart && !strbuf_cmp(&exp, &type)) {
>  		/*
>  		 * smart HTTP response; validate that the service
>  		 * pkt-line matches our request.
>  		 */
> -		strbuf_addf(&exp, "application/x-%s-advertisement", service);
> -		if (strbuf_cmp(&exp, &type))
> -			die("invalid content-type %s", type.buf);
>  		if (packet_get_line(&buffer, &last->buf, &last->len) <= 0)
>  			die("%s has invalid packet header", refs_url);
>  		if (buffer.len && buffer.buf[buffer.len - 1] == '\n')
>
> To just follow the dumb path if we don't get the content-type we expect.
> We may want to keep the '#' format check in addition (packet_get_line
> will check it and die, anyway, but we may want to drop back to
> considering it dumb, just to protect against a badly configured dumb
> server which uses our mime type, but I do not think it likely).

Yeah, but it doesn't cost anything to check, so let's do so.

Does this look good to both of you (relative to Shawn's patch)?

 remote-curl.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/remote-curl.c b/remote-curl.c
index e6f3b63..933c69a 100644
--- a/remote-curl.c
+++ b/remote-curl.c
@@ -134,14 +134,14 @@ static struct discovery* discover_refs(const char *service)
 	last->buf_alloc = strbuf_detach(&buffer, &last->len);
 	last->buf = last->buf_alloc;
 
-	if (maybe_smart && 5 <= last->len && last->buf[4] == '#') {
+	strbuf_addf(&exp, "application/x-%s-advertisement", service);
+	if (maybe_smart &&
+	    (5 <= last->len && last->buf[4] == '#') &&
+	    !strbuf_cmp(&exp, &type)) {
 		/*
 		 * smart HTTP response; validate that the service
 		 * pkt-line matches our request.
 		 */
-		strbuf_addf(&exp, "application/x-%s-advertisement", service);
-		if (strbuf_cmp(&exp, &type))
-			die("invalid content-type %s", type.buf);
 		if (packet_get_line(&buffer, &last->buf, &last->len) <= 0)
 			die("%s has invalid packet header", refs_url);
 		if (buffer.len && buffer.buf[buffer.len - 1] == '\n')

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] Verify Content-Type from smart HTTP servers
  2013-02-04  7:17       ` Junio C Hamano
@ 2013-02-04  8:38         ` Jeff King
  2013-02-04 23:49           ` Shawn Pearce
  0 siblings, 1 reply; 13+ messages in thread
From: Jeff King @ 2013-02-04  8:38 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Shawn Pearce, git

On Sun, Feb 03, 2013 at 11:17:33PM -0800, Junio C Hamano wrote:

> Does this look good to both of you (relative to Shawn's patch)?
> 
>  remote-curl.c | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/remote-curl.c b/remote-curl.c
> index e6f3b63..933c69a 100644
> --- a/remote-curl.c
> +++ b/remote-curl.c
> @@ -134,14 +134,14 @@ static struct discovery* discover_refs(const char *service)
>  	last->buf_alloc = strbuf_detach(&buffer, &last->len);
>  	last->buf = last->buf_alloc;
>  
> -	if (maybe_smart && 5 <= last->len && last->buf[4] == '#') {
> +	strbuf_addf(&exp, "application/x-%s-advertisement", service);
> +	if (maybe_smart &&
> +	    (5 <= last->len && last->buf[4] == '#') &&
> +	    !strbuf_cmp(&exp, &type)) {
>  		/*
>  		 * smart HTTP response; validate that the service
>  		 * pkt-line matches our request.
>  		 */
> -		strbuf_addf(&exp, "application/x-%s-advertisement", service);
> -		if (strbuf_cmp(&exp, &type))
> -			die("invalid content-type %s", type.buf);
>  		if (packet_get_line(&buffer, &last->buf, &last->len) <= 0)
>  			die("%s has invalid packet header", refs_url);
>  		if (buffer.len && buffer.buf[buffer.len - 1] == '\n')

Yeah, I think that's fine. Thanks.

-Peff

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Verify Content-Type from smart HTTP servers
  2013-02-04  8:38         ` Jeff King
@ 2013-02-04 23:49           ` Shawn Pearce
  2013-02-05  0:21             ` Junio C Hamano
  0 siblings, 1 reply; 13+ messages in thread
From: Shawn Pearce @ 2013-02-04 23:49 UTC (permalink / raw)
  To: Jeff King; +Cc: Junio C Hamano, git

On Mon, Feb 4, 2013 at 12:38 AM, Jeff King <peff@peff.net> wrote:
> On Sun, Feb 03, 2013 at 11:17:33PM -0800, Junio C Hamano wrote:
>
> > Does this look good to both of you (relative to Shawn's patch)?
> >
> >  remote-curl.c | 8 ++++----
> >  1 file changed, 4 insertions(+), 4 deletions(-)
> >
> > diff --git a/remote-curl.c b/remote-curl.c
> > index e6f3b63..933c69a 100644
> > --- a/remote-curl.c
> > +++ b/remote-curl.c
> > @@ -134,14 +134,14 @@ static struct discovery* discover_refs(const char *service)
> >       last->buf_alloc = strbuf_detach(&buffer, &last->len);
> >       last->buf = last->buf_alloc;
> >
> > -     if (maybe_smart && 5 <= last->len && last->buf[4] == '#') {
> > +     strbuf_addf(&exp, "application/x-%s-advertisement", service);
> > +     if (maybe_smart &&
> > +         (5 <= last->len && last->buf[4] == '#') &&
> > +         !strbuf_cmp(&exp, &type)) {
> >               /*
> >                * smart HTTP response; validate that the service
> >                * pkt-line matches our request.
> >                */
> > -             strbuf_addf(&exp, "application/x-%s-advertisement", service);
> > -             if (strbuf_cmp(&exp, &type))
> > -                     die("invalid content-type %s", type.buf);
> >               if (packet_get_line(&buffer, &last->buf, &last->len) <= 0)
> >                       die("%s has invalid packet header", refs_url);
> >               if (buffer.len && buffer.buf[buffer.len - 1] == '\n')
>
> Yeah, I think that's fine. Thanks.

Looks fine to me too, but I think the test won't work now. :-)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Verify Content-Type from smart HTTP servers
  2013-02-04 23:49           ` Shawn Pearce
@ 2013-02-05  0:21             ` Junio C Hamano
  0 siblings, 0 replies; 13+ messages in thread
From: Junio C Hamano @ 2013-02-05  0:21 UTC (permalink / raw)
  To: Shawn Pearce; +Cc: Jeff King, git

Shawn Pearce <spearce@spearce.org> writes:

> Looks fine to me too, but I think the test won't work now. :-)

Heh, that's amusing ;-)

 t/t5551-http-fetch.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/t/t5551-http-fetch.sh b/t/t5551-http-fetch.sh
index cb95b95..47eb769 100755
--- a/t/t5551-http-fetch.sh
+++ b/t/t5551-http-fetch.sh
@@ -158,9 +158,8 @@ test_expect_success 'GIT_SMART_HTTP can disable smart http' '
 '
 
 test_expect_success 'invalid Content-Type rejected' '
-	echo "fatal: invalid content-type text/html" >expect
 	test_must_fail git clone $HTTPD_URL/broken_smart/repo.git 2>actual
-	test_cmp expect actual
+	grep "not valid:" actual
 '
 
 test -n "$GIT_TEST_LONG" && test_set_prereq EXPENSIVE

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] Verify Content-Type from smart HTTP servers
  2013-01-31 22:09 [PATCH] Verify Content-Type from smart HTTP servers Junio C Hamano
  2013-02-01  8:52 ` Jeff King
@ 2013-02-06 10:24 ` Michael Schubert
  2013-02-06 10:39   ` Jeff King
  1 sibling, 1 reply; 13+ messages in thread
From: Michael Schubert @ 2013-02-06 10:24 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Shawn Pearce, git, Jeff King

On 01/31/2013 11:09 PM, Junio C Hamano wrote:

>  
> -static int http_request_reauth(const char *url, void *result, int target,
> +static int http_request_reauth(const char *url,
> +			       struct strbuf *type,
> +			       void *result, int target,
>  			       int options)
>  {
> -	int ret = http_request(url, result, target, options);
> +	int ret = http_request(url, type, result, target, options);
>  	if (ret != HTTP_REAUTH)
>  		return ret;
> -	return http_request(url, result, target, options);
> +	return http_request(url, type, result, target, options);
>  }

This needs something like

diff --git a/http.c b/http.c
index d868d8b..da43be3 100644
--- a/http.c
+++ b/http.c
@@ -860,6 +860,8 @@ static int http_request_reauth(const char *url,
        int ret = http_request(url, type, result, target, options);
        if (ret != HTTP_REAUTH)
                return ret;
+       if (type)
+               strbuf_reset(type);
        return http_request(url, type, result, target, options);
 }

on top. Otherwise we get

"text/plainapplication/x-git-receive-pack-advertisement"

when doing HTTP auth.

Thanks.

> -int http_get_strbuf(const char *url, struct strbuf *result, int options)
> +int http_get_strbuf(const char *url,
> +		    struct strbuf *type,
> +		    struct strbuf *result, int options)
>  {
> -	return http_request_reauth(url, result, HTTP_REQUEST_STRBUF, options);
> +	return http_request_reauth(url, type, result,
> +				   HTTP_REQUEST_STRBUF, options);
>  }
>  
>  /*
> @@ -878,7 +891,7 @@ static int http_get_file(const char *url, const char *filename, int options)
>  		goto cleanup;
>  	}
>  
> -	ret = http_request_reauth(url, result, HTTP_REQUEST_FILE, options);
> +	ret = http_request_reauth(url, NULL, result, HTTP_REQUEST_FILE, options);
>  	fclose(result);
>  
>  	if ((ret == HTTP_OK) && move_temp_to_file(tmpfile.buf, filename))
> @@ -904,7 +917,7 @@ int http_fetch_ref(const char *base, struct ref *ref)
>  	int ret = -1;
>  
>  	url = quote_ref_url(base, ref->name);
> -	if (http_get_strbuf(url, &buffer, HTTP_NO_CACHE) == HTTP_OK) {
> +	if (http_get_strbuf(url, NULL, &buffer, HTTP_NO_CACHE) == HTTP_OK) {
>  		strbuf_rtrim(&buffer);
>  		if (buffer.len == 40)
>  			ret = get_sha1_hex(buffer.buf, ref->old_sha1);
> @@ -997,7 +1010,7 @@ int http_get_info_packs(const char *base_url, struct packed_git **packs_head)
>  	strbuf_addstr(&buf, "objects/info/packs");
>  	url = strbuf_detach(&buf, NULL);
>  
> -	ret = http_get_strbuf(url, &buf, HTTP_NO_CACHE);
> +	ret = http_get_strbuf(url, NULL, &buf, HTTP_NO_CACHE);
>  	if (ret != HTTP_OK)
>  		goto cleanup;
>  
> diff --git a/http.h b/http.h
> index 0a80d30..25d1931 100644
> --- a/http.h
> +++ b/http.h
> @@ -132,7 +132,7 @@ extern char *get_remote_object_url(const char *url, const char *hex,
>   *
>   * If the result pointer is NULL, a HTTP HEAD request is made instead of GET.
>   */
> -int http_get_strbuf(const char *url, struct strbuf *result, int options);
> +int http_get_strbuf(const char *url, struct strbuf *content_type, struct strbuf *result, int options);
>  
>  /*
>   * Prints an error message using error() containing url and curl_errorstr,
> diff --git a/remote-curl.c b/remote-curl.c
> index 9a8b123..e6f3b63 100644
> --- a/remote-curl.c
> +++ b/remote-curl.c
> @@ -92,6 +92,8 @@ static void free_discovery(struct discovery *d)
>  
>  static struct discovery* discover_refs(const char *service)
>  {
> +	struct strbuf exp = STRBUF_INIT;
> +	struct strbuf type = STRBUF_INIT;
>  	struct strbuf buffer = STRBUF_INIT;
>  	struct discovery *last = last_discovery;
>  	char *refs_url;
> @@ -113,7 +115,7 @@ static struct discovery* discover_refs(const char *service)
>  	}
>  	refs_url = strbuf_detach(&buffer, NULL);
>  
> -	http_ret = http_get_strbuf(refs_url, &buffer, HTTP_NO_CACHE);
> +	http_ret = http_get_strbuf(refs_url, &type, &buffer, HTTP_NO_CACHE);
>  	switch (http_ret) {
>  	case HTTP_OK:
>  		break;
> @@ -133,16 +135,19 @@ static struct discovery* discover_refs(const char *service)
>  	last->buf = last->buf_alloc;
>  
>  	if (maybe_smart && 5 <= last->len && last->buf[4] == '#') {
> -		/* smart HTTP response; validate that the service
> +		/*
> +		 * smart HTTP response; validate that the service
>  		 * pkt-line matches our request.
>  		 */
> -		struct strbuf exp = STRBUF_INIT;
> -
> +		strbuf_addf(&exp, "application/x-%s-advertisement", service);
> +		if (strbuf_cmp(&exp, &type))
> +			die("invalid content-type %s", type.buf);
>  		if (packet_get_line(&buffer, &last->buf, &last->len) <= 0)
>  			die("%s has invalid packet header", refs_url);
>  		if (buffer.len && buffer.buf[buffer.len - 1] == '\n')
>  			strbuf_setlen(&buffer, buffer.len - 1);
>  
> +		strbuf_reset(&exp);
>  		strbuf_addf(&exp, "# service=%s", service);
>  		if (strbuf_cmp(&exp, &buffer))
>  			die("invalid server response; got '%s'", buffer.buf);
> @@ -160,6 +165,8 @@ static struct discovery* discover_refs(const char *service)
>  	}
>  
>  	free(refs_url);
> +	strbuf_release(&exp);
> +	strbuf_release(&type);
>  	strbuf_release(&buffer);
>  	last_discovery = last;
>  	return last;

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] Verify Content-Type from smart HTTP servers
  2013-02-06 10:24 ` Michael Schubert
@ 2013-02-06 10:39   ` Jeff King
  2013-02-06 15:56     ` Junio C Hamano
  0 siblings, 1 reply; 13+ messages in thread
From: Jeff King @ 2013-02-06 10:39 UTC (permalink / raw)
  To: Michael Schubert; +Cc: Junio C Hamano, Shawn Pearce, git

On Wed, Feb 06, 2013 at 11:24:41AM +0100, Michael Schubert wrote:

> On 01/31/2013 11:09 PM, Junio C Hamano wrote:
> 
> >  
> > -static int http_request_reauth(const char *url, void *result, int target,
> > +static int http_request_reauth(const char *url,
> > +			       struct strbuf *type,
> > +			       void *result, int target,
> >  			       int options)
> >  {
> > -	int ret = http_request(url, result, target, options);
> > +	int ret = http_request(url, type, result, target, options);
> >  	if (ret != HTTP_REAUTH)
> >  		return ret;
> > -	return http_request(url, result, target, options);
> > +	return http_request(url, type, result, target, options);
> >  }
> 
> This needs something like
> 
> diff --git a/http.c b/http.c
> index d868d8b..da43be3 100644
> --- a/http.c
> +++ b/http.c
> @@ -860,6 +860,8 @@ static int http_request_reauth(const char *url,
>         int ret = http_request(url, type, result, target, options);
>         if (ret != HTTP_REAUTH)
>                 return ret;
> +       if (type)
> +               strbuf_reset(type);
>         return http_request(url, type, result, target, options);
>  }
> 
> on top. Otherwise we get
> 
> "text/plainapplication/x-git-receive-pack-advertisement"
> 
> when doing HTTP auth.

Good catch. It probably makes sense to put it in http_request, so that
we also protect against any existing cruft from the callers of
http_get_*, like:

-- >8 --
Subject: [PATCH] http_request: reset "type" strbuf before adding

Callers may pass us a strbuf which we use to record the
content-type of the response. However, we simply appended to
it rather than overwriting its contents, meaning that cruft
in the strbuf gave us a bogus type. E.g., the multiple
requests triggered by http_request could yield a type like
"text/plainapplication/x-git-receive-pack-advertisement".

Reported-by: Michael Schubert <mschub@elegosoft.com>
Signed-off-by: Jeff King <peff@peff.net>
---
Is it worth having a strbuf_set* family of functions to match the
strbuf_add*? We seem to have these sorts of errors with strbuf from time
to time, and I wonder if that would make it easier (and more readable)
to do the right thing.

 http.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/http.c b/http.c
index d868d8b..d9d1aad 100644
--- a/http.c
+++ b/http.c
@@ -841,6 +841,7 @@ static int http_request(const char *url, struct strbuf *type,
 
 	if (type) {
 		char *t;
+		strbuf_reset(type);
 		curl_easy_getinfo(slot->curl, CURLINFO_CONTENT_TYPE, &t);
 		if (t)
 			strbuf_addstr(type, t);
-- 
1.8.1.2.11.g1a2f572

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] Verify Content-Type from smart HTTP servers
  2013-02-06 10:39   ` Jeff King
@ 2013-02-06 15:56     ` Junio C Hamano
  2013-02-06 22:47       ` Jeff King
  0 siblings, 1 reply; 13+ messages in thread
From: Junio C Hamano @ 2013-02-06 15:56 UTC (permalink / raw)
  To: Jeff King; +Cc: Michael Schubert, Shawn Pearce, git

Jeff King <peff@peff.net> writes:

> Is it worth having a strbuf_set* family of functions to match the
> strbuf_add*? We seem to have these sorts of errors with strbuf from time
> to time, and I wonder if that would make it easier (and more readable)
> to do the right thing.

Possibly.

The callsite below may be a poor example, though; you would need the
_reset() even if you change the _addstr() we can see in the context
to _setstr() to make sure later strbuf_*(type) will start from a
clean slate when !t anyway, no?

>
>  http.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/http.c b/http.c
> index d868d8b..d9d1aad 100644
> --- a/http.c
> +++ b/http.c
> @@ -841,6 +841,7 @@ static int http_request(const char *url, struct strbuf *type,
>  
>  	if (type) {
>  		char *t;
> +		strbuf_reset(type);
>  		curl_easy_getinfo(slot->curl, CURLINFO_CONTENT_TYPE, &t);
>  		if (t)
>  			strbuf_addstr(type, t);

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Verify Content-Type from smart HTTP servers
  2013-02-06 15:56     ` Junio C Hamano
@ 2013-02-06 22:47       ` Jeff King
  0 siblings, 0 replies; 13+ messages in thread
From: Jeff King @ 2013-02-06 22:47 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Michael Schubert, Shawn Pearce, git

On Wed, Feb 06, 2013 at 07:56:08AM -0800, Junio C Hamano wrote:

> Jeff King <peff@peff.net> writes:
> 
> > Is it worth having a strbuf_set* family of functions to match the
> > strbuf_add*? We seem to have these sorts of errors with strbuf from time
> > to time, and I wonder if that would make it easier (and more readable)
> > to do the right thing.
> 
> Possibly.
> 
> The callsite below may be a poor example, though; you would need the
> _reset() even if you change the _addstr() we can see in the context
> to _setstr() to make sure later strbuf_*(type) will start from a
> clean slate when !t anyway, no?

Ah, true. Let's not worry about it, then.

-Peff

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Verify Content-Type from smart HTTP servers
       [not found] ` <7vd2wlf1zf.fsf@alter.siamese.dyndns.org>
@ 2013-01-31 22:36   ` Shawn Pearce
  0 siblings, 0 replies; 13+ messages in thread
From: Shawn Pearce @ 2013-01-31 22:36 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Thu, Jan 31, 2013 at 1:59 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Shawn Pearce <spearce@spearce.org> writes:
>
>> Before parsing a suspected smart-HTTP response verify the returned
>> Content-Type matches the standard. This protects a client from
>> attempting to process a payload that smells like a smart-HTTP
>> server response.
>>
>> JGit has been doing this check on all responses since the dawn of
>> time. I mistakenly failed to include it in git-core when smart HTTP
>> was introduced. At the time I didn't know how to get the Content-Type
>> from libcurl. I punted, meant to circle back and fix this, and just
>> plain forgot about it.
>>
>> Signed-off-by: Shawn Pearce <spearce@spearce.org>
>> ---
>
> Sounds sensible.  Was there a report of attack attempts by malicious
> servers or something, or is it just a general "common sense" thing?

Common-sense cleanup.

I had a report a while ago about JGit not working with the Git servers
at Codeplex. This failure was caused by their HTTP servers returning
an invalid Content-Type, making JGit refuse to continue parsing. This
has since been fixed, I verified this morning that Codeplex is
returning the correct Content-Type.

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2013-02-06 22:47 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-01-31 22:09 [PATCH] Verify Content-Type from smart HTTP servers Junio C Hamano
2013-02-01  8:52 ` Jeff King
2013-02-01 18:09   ` Junio C Hamano
2013-02-01 18:58     ` Jeff King
2013-02-04  7:17       ` Junio C Hamano
2013-02-04  8:38         ` Jeff King
2013-02-04 23:49           ` Shawn Pearce
2013-02-05  0:21             ` Junio C Hamano
2013-02-06 10:24 ` Michael Schubert
2013-02-06 10:39   ` Jeff King
2013-02-06 15:56     ` Junio C Hamano
2013-02-06 22:47       ` Jeff King
     [not found] <1359666943-13316-1-git-send-email-spearce@spearce.org>
     [not found] ` <7vd2wlf1zf.fsf@alter.siamese.dyndns.org>
2013-01-31 22:36   ` Shawn Pearce

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.