All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2/3 v2] Use an external program to implement fetching with curl
@ 2009-07-28  6:08 Daniel Barkalow
  2009-07-28 13:19 ` Johannes Schindelin
  0 siblings, 1 reply; 6+ messages in thread
From: Daniel Barkalow @ 2009-07-28  6:08 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Linus Torvalds

Use the transport shim mechanism to fetch by http (and ftp, etc).

Signed-off-by: Daniel Barkalow <barkalow@iabervon.org>
---
 Makefile    |    5 ++
 shim-curl.c |  132 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 transport.c |  136 +----------------------------------------------------------
 3 files changed, 138 insertions(+), 135 deletions(-)
 create mode 100644 shim-curl.c

diff --git a/Makefile b/Makefile
index 01efc73..d3dd2ed 100644
--- a/Makefile
+++ b/Makefile
@@ -980,6 +980,7 @@ else
 		CURL_LIBCURL = -lcurl
 	endif
 	BUILTIN_OBJS += builtin-http-fetch.o
+	PROGRAMS += git-shim-curl$X
 	EXTLIBS += $(CURL_LIBCURL)
 	LIB_OBJS += http.o http-walker.o
 	curl_check := $(shell (echo 070908; curl-config --vernum) | sort -r | sed -ne 2p)
@@ -1490,6 +1491,10 @@ git-http-push$X: revision.o http.o http-push.o $(GITLIBS)
 	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
 		$(LIBS) $(CURL_LIBCURL) $(EXPAT_LIBEXPAT)
 
+git-shim-curl$X: shim-curl.o http.o http-walker.o $(GITLIBS)
+	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
+		$(LIBS) $(CURL_LIBCURL) $(EXPAT_LIBEXPAT)
+
 $(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H)
 $(patsubst git-%$X,%.o,$(PROGRAMS)) git.o: $(LIB_H) $(wildcard */*.h)
 builtin-revert.o wt-status.o: wt-status.h
diff --git a/shim-curl.c b/shim-curl.c
new file mode 100644
index 0000000..bde041b
--- /dev/null
+++ b/shim-curl.c
@@ -0,0 +1,132 @@
+#include "cache.h"
+#include "remote.h"
+#include "strbuf.h"
+#include "walker.h"
+#include "http.h"
+
+static struct ref *get_refs(struct walker *walker, const char *url)
+{
+	int http_ret;
+	char *refs_url;
+	struct strbuf buffer = STRBUF_INIT;
+	char *data, *start, *mid;
+	int i = 0;
+	char *ref_name;
+	struct ref *refs = NULL;
+	struct ref *ref = NULL;
+	struct ref *last_ref = NULL;
+
+	refs_url = xmalloc(strlen(url) + 11);
+	sprintf(refs_url, "%s/info/refs", url);
+
+	http_ret = http_get_strbuf(refs_url, &buffer, HTTP_NO_CACHE);
+	switch (http_ret) {
+	case HTTP_OK:
+		break;
+	case HTTP_MISSING_TARGET:
+		die("%s not found: did you run git update-server-info on the"
+		    " server?", refs_url);
+	default:
+		http_error(refs_url, http_ret);
+		die("HTTP request failed");
+	}
+	free(refs_url);
+
+	data = buffer.buf;
+	start = NULL;
+	mid = data;
+	while (i < buffer.len) {
+		if (!start) {
+			start = &data[i];
+		}
+		if (data[i] == '\t')
+			mid = &data[i];
+		if (data[i] == '\n') {
+			data[i] = 0;
+			ref_name = mid + 1;
+			ref = xmalloc(sizeof(struct ref) +
+				      strlen(ref_name) + 1);
+			memset(ref, 0, sizeof(struct ref));
+			strcpy(ref->name, ref_name);
+			get_sha1_hex(start, ref->old_sha1);
+			if (!refs)
+				refs = ref;
+			if (last_ref)
+				last_ref->next = ref;
+			last_ref = ref;
+			start = NULL;
+		}
+		i++;
+	}
+	strbuf_release(&buffer);
+
+	ref = alloc_ref("HEAD");
+	if (!walker->fetch_ref(walker, ref) &&
+	    !resolve_remote_symref(ref, refs)) {
+		ref->next = refs;
+		refs = ref;
+	} else {
+		free(ref);
+	}
+
+	return refs;
+}
+
+int main(int argc, const char **argv)
+{
+	struct remote *remote;
+	struct strbuf buf = STRBUF_INIT;
+	const char *url;
+	struct walker *walker = NULL;
+
+	setup_git_directory();
+	if (argc < 2) {
+		fprintf(stderr, "Remote needed\n");
+		return 1;
+	}
+
+	remote = remote_get(argv[1]);
+
+	if (argc > 2) {
+		url = argv[2];
+	} else {
+		url = remote->url[0];
+	}
+
+	do {
+		if (strbuf_getline(&buf, stdin, '\n') == EOF)
+			break;
+		if (!prefixcmp(buf.buf, "fetch ")) {
+			char *obj = buf.buf + strlen("fetch ");
+			if (!walker)
+				walker = get_http_walker(url, remote);
+			walker->get_all = 1;
+			walker->get_tree = 1;
+			walker->get_history = 1;
+			walker->get_verbosely = 0;
+			walker->get_recover = 0;
+			if (walker_fetch(walker, 1, &obj, NULL, NULL))
+				die("Fetch failed.");
+			printf("\n");
+			fflush(stdout);
+		} else if (!strcmp(buf.buf, "list")) {
+			struct ref *refs;
+			struct ref *posn;
+			if (!walker)
+				walker = get_http_walker(url, remote);
+			refs = get_refs(walker, url);
+			for (posn = refs; posn; posn = posn->next) {
+				if (posn->symref)
+					printf("@%s %s\n", posn->symref, posn->name);
+				else
+					printf("%s %s\n", sha1_to_hex(posn->old_sha1), posn->name);
+			}
+			printf("\n");
+			fflush(stdout);
+		} else {
+			return 1;
+		}
+		strbuf_reset(&buf);
+	} while (1);
+	return 0;
+}
diff --git a/transport.c b/transport.c
index de0d587..6d13b0e 100644
--- a/transport.c
+++ b/transport.c
@@ -1,9 +1,6 @@
 #include "cache.h"
 #include "transport.h"
 #include "run-command.h"
-#ifndef NO_CURL
-#include "http.h"
-#endif
 #include "pkt-line.h"
 #include "fetch-pack.h"
 #include "send-pack.h"
@@ -352,45 +349,6 @@ static int rsync_transport_push(struct transport *transport,
 	return result;
 }
 
-/* Generic functions for using commit walkers */
-
-#ifndef NO_CURL /* http fetch is the only user */
-static int fetch_objs_via_walker(struct transport *transport,
-				 int nr_objs, const struct ref **to_fetch)
-{
-	char *dest = xstrdup(transport->url);
-	struct walker *walker = transport->data;
-	char **objs = xmalloc(nr_objs * sizeof(*objs));
-	int i;
-
-	walker->get_all = 1;
-	walker->get_tree = 1;
-	walker->get_history = 1;
-	walker->get_verbosely = transport->verbose >= 0;
-	walker->get_recover = 0;
-
-	for (i = 0; i < nr_objs; i++)
-		objs[i] = xstrdup(sha1_to_hex(to_fetch[i]->old_sha1));
-
-	if (walker_fetch(walker, nr_objs, objs, NULL, NULL))
-		die("Fetch failed.");
-
-	for (i = 0; i < nr_objs; i++)
-		free(objs[i]);
-	free(objs);
-	free(dest);
-	return 0;
-}
-#endif /* NO_CURL */
-
-static int disconnect_walker(struct transport *transport)
-{
-	struct walker *walker = transport->data;
-	if (walker)
-		walker_free(walker);
-	return 0;
-}
-
 #ifndef NO_CURL
 static int curl_transport_push(struct transport *transport, int refspec_nr, const char **refspec, int flags)
 {
@@ -432,96 +390,6 @@ static int curl_transport_push(struct transport *transport, int refspec_nr, cons
 	return !!err;
 }
 
-static struct ref *get_refs_via_curl(struct transport *transport, int for_push)
-{
-	struct strbuf buffer = STRBUF_INIT;
-	char *data, *start, *mid;
-	char *ref_name;
-	char *refs_url;
-	int i = 0;
-	int http_ret;
-
-	struct ref *refs = NULL;
-	struct ref *ref = NULL;
-	struct ref *last_ref = NULL;
-
-	struct walker *walker;
-
-	if (for_push)
-		return NULL;
-
-	if (!transport->data)
-		transport->data = get_http_walker(transport->url,
-						transport->remote);
-
-	walker = transport->data;
-
-	refs_url = xmalloc(strlen(transport->url) + 11);
-	sprintf(refs_url, "%s/info/refs", transport->url);
-
-	http_ret = http_get_strbuf(refs_url, &buffer, HTTP_NO_CACHE);
-	switch (http_ret) {
-	case HTTP_OK:
-		break;
-	case HTTP_MISSING_TARGET:
-		die("%s not found: did you run git update-server-info on the"
-		    " server?", refs_url);
-	default:
-		http_error(refs_url, http_ret);
-		die("HTTP request failed");
-	}
-
-	data = buffer.buf;
-	start = NULL;
-	mid = data;
-	while (i < buffer.len) {
-		if (!start)
-			start = &data[i];
-		if (data[i] == '\t')
-			mid = &data[i];
-		if (data[i] == '\n') {
-			data[i] = 0;
-			ref_name = mid + 1;
-			ref = xmalloc(sizeof(struct ref) +
-				      strlen(ref_name) + 1);
-			memset(ref, 0, sizeof(struct ref));
-			strcpy(ref->name, ref_name);
-			get_sha1_hex(start, ref->old_sha1);
-			if (!refs)
-				refs = ref;
-			if (last_ref)
-				last_ref->next = ref;
-			last_ref = ref;
-			start = NULL;
-		}
-		i++;
-	}
-
-	strbuf_release(&buffer);
-
-	ref = alloc_ref("HEAD");
-	if (!walker->fetch_ref(walker, ref) &&
-	    !resolve_remote_symref(ref, refs)) {
-		ref->next = refs;
-		refs = ref;
-	} else {
-		free(ref);
-	}
-
-	strbuf_release(&buffer);
-	free(refs_url);
-	return refs;
-}
-
-static int fetch_objs_via_curl(struct transport *transport,
-				 int nr_objs, const struct ref **to_fetch)
-{
-	if (!transport->data)
-		transport->data = get_http_walker(transport->url,
-						transport->remote);
-	return fetch_objs_via_walker(transport, nr_objs, to_fetch);
-}
-
 #endif
 
 struct bundle_transport_data {
@@ -950,14 +818,12 @@ struct transport *transport_get(struct remote *remote, const char *url)
 	} else if (!prefixcmp(url, "http://")
 	        || !prefixcmp(url, "https://")
 	        || !prefixcmp(url, "ftp://")) {
+		transport_shim_init(ret, "curl");
 #ifdef NO_CURL
 		error("git was compiled without libcurl support.");
 #else
-		ret->get_refs_list = get_refs_via_curl;
-		ret->fetch = fetch_objs_via_curl;
 		ret->push = curl_transport_push;
 #endif
-		ret->disconnect = disconnect_walker;
 
 	} else if (is_local(url) && is_file(url)) {
 		struct bundle_transport_data *data = xcalloc(1, sizeof(*data));
-- 
1.6.4.rc2.13.ga9762.dirty

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/3 v2] Use an external program to implement fetching with curl
  2009-07-28  6:08 [PATCH 2/3 v2] Use an external program to implement fetching with curl Daniel Barkalow
@ 2009-07-28 13:19 ` Johannes Schindelin
  2009-07-28 13:30   ` Erik Faye-Lund
  2009-07-28 17:59   ` Daniel Barkalow
  0 siblings, 2 replies; 6+ messages in thread
From: Johannes Schindelin @ 2009-07-28 13:19 UTC (permalink / raw)
  To: Daniel Barkalow; +Cc: Junio C Hamano, git, Linus Torvalds

Hi,

On Tue, 28 Jul 2009, Daniel Barkalow wrote:

> diff --git a/Makefile b/Makefile
> index 01efc73..d3dd2ed 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -980,6 +980,7 @@ else
>  		CURL_LIBCURL = -lcurl
>  	endif
>  	BUILTIN_OBJS += builtin-http-fetch.o
> +	PROGRAMS += git-shim-curl$X

So now you not only use that disgustingly meaningless name "shim", but 
also abolish the notion that the helper is called by its protocol name?

> diff --git a/shim-curl.c b/shim-curl.c
> new file mode 100644
> index 0000000..bde041b
> --- /dev/null
> +++ b/shim-curl.c
> @@ -0,0 +1,132 @@
> +#include "cache.h"
> +#include "remote.h"
> +#include "strbuf.h"
> +#include "walker.h"
> +#include "http.h"
> +
> +static struct ref *get_refs(struct walker *walker, const char *url)
> +{
> +	int http_ret;
> +	char *refs_url;
> +	struct strbuf buffer = STRBUF_INIT;
> +	char *data, *start, *mid;
> +	int i = 0;
> +	char *ref_name;
> +	struct ref *refs = NULL;
> +	struct ref *ref = NULL;
> +	struct ref *last_ref = NULL;
> +
> +	refs_url = xmalloc(strlen(url) + 11);
> +	sprintf(refs_url, "%s/info/refs", url);

And I thought we had strbufs...

> +
> +	http_ret = http_get_strbuf(refs_url, &buffer, HTTP_NO_CACHE);
> +	switch (http_ret) {
> +	case HTTP_OK:
> +		break;
> +	case HTTP_MISSING_TARGET:
> +		die("%s not found: did you run git update-server-info on the"
> +		    " server?", refs_url);
> +	default:
> +		http_error(refs_url, http_ret);
> +		die("HTTP request failed");
> +	}
> +	free(refs_url);
> +
> +	data = buffer.buf;
> +	start = NULL;
> +	mid = data;

What does "mid" stand for?

> +	while (i < buffer.len) {
> +		if (!start) {
> +			start = &data[i];
> +		}

Why curly brackets around this one, but not this:

> +		if (data[i] == '\t')
> +			mid = &data[i];

> +		if (data[i] == '\n') {
> +			data[i] = 0;
> +			ref_name = mid + 1;
> +			ref = xmalloc(sizeof(struct ref) +
> +				      strlen(ref_name) + 1);

What is alloc_ref() for?

> +			memset(ref, 0, sizeof(struct ref));
> +			strcpy(ref->name, ref_name);
> +			get_sha1_hex(start, ref->old_sha1);
> +			if (!refs)
> +				refs = ref;
> +			if (last_ref)
> +				last_ref->next = ref;
> +			last_ref = ref;

Is this another "tail"?

> +			start = NULL;
> +		}
> +		i++;

Oh, so it _is_ a for() loop.

> +	}
> +	strbuf_release(&buffer);
> +
> +	ref = alloc_ref("HEAD");
> +	if (!walker->fetch_ref(walker, ref) &&
> +	    !resolve_remote_symref(ref, refs)) {
> +		ref->next = refs;
> +		refs = ref;
> +	} else {
> +		free(ref);
> +	}
> +
> +	return refs;
> +}
> +
> +int main(int argc, const char **argv)
> +{
> +	struct remote *remote;
> +	struct strbuf buf = STRBUF_INIT;
> +	const char *url;
> +	struct walker *walker = NULL;
> +
> +	setup_git_directory();
> +	if (argc < 2) {
> +		fprintf(stderr, "Remote needed\n");
> +		return 1;
> +	}
> +
> +	remote = remote_get(argv[1]);
> +
> +	if (argc > 2) {
> +		url = argv[2];
> +	} else {
> +		url = remote->url[0];
> +	}
> +
> +	do {
> +		if (strbuf_getline(&buf, stdin, '\n') == EOF)
> +			break;
> +		if (!prefixcmp(buf.buf, "fetch ")) {
> +			char *obj = buf.buf + strlen("fetch ");
> +			if (!walker)
> +				walker = get_http_walker(url, remote);
> +			walker->get_all = 1;
> +			walker->get_tree = 1;
> +			walker->get_history = 1;
> +			walker->get_verbosely = 0;
> +			walker->get_recover = 0;
> +			if (walker_fetch(walker, 1, &obj, NULL, NULL))
> +				die("Fetch failed.");
> +			printf("\n");
> +			fflush(stdout);
> +		} else if (!strcmp(buf.buf, "list")) {
> +			struct ref *refs;
> +			struct ref *posn;
> +			if (!walker)
> +				walker = get_http_walker(url, remote);
> +			refs = get_refs(walker, url);
> +			for (posn = refs; posn; posn = posn->next) {
> +				if (posn->symref)
> +					printf("@%s %s\n", posn->symref, posn->name);
> +				else
> +					printf("%s %s\n", sha1_to_hex(posn->old_sha1), posn->name);
> +			}
> +			printf("\n");
> +			fflush(stdout);
> +		} else {
> +			return 1;
> +		}
> +		strbuf_reset(&buf);
> +	} while (1);
> +	return 0;
> +}

If you already start some infrastructure like this, you should go the 
whole nine yards and make helper functions in remote.c or transport.c that 
help implementing "git-remote-<protocol>" helpers.

> diff --git a/transport.c b/transport.c
> index de0d587..6d13b0e 100644
> --- a/transport.c
> +++ b/transport.c
> @@ -950,14 +818,12 @@ struct transport *transport_get(struct remote *remote, const char *url)
>  	} else if (!prefixcmp(url, "http://")
>  	        || !prefixcmp(url, "https://")
>  	        || !prefixcmp(url, "ftp://")) {
> +		transport_shim_init(ret, "curl");

Like I said, identifying the remote helper by anything else than the 
protocol is a design catastrophe.

>  #ifdef NO_CURL
>  		error("git was compiled without libcurl support.");

Why the heck does this have to be guarded by an #ifdef NO_CURL?  It is the 
helper's task to say, not transport.c's.

>  #else
> -		ret->get_refs_list = get_refs_via_curl;
> -		ret->fetch = fetch_objs_via_curl;
>  		ret->push = curl_transport_push;
>  #endif
> -		ret->disconnect = disconnect_walker;
>  
>  	} else if (is_local(url) && is_file(url)) {
>  		struct bundle_transport_data *data = xcalloc(1, sizeof(*data));

I think the basic idea of yours is sound, but all I see is some incomplete 
separation between builtin git-fetch/-push and the helper.  As it stands, 
git-fetch/-push have to be compiled with the exact knowledge what helper 
is there, git-fetch/-push are _still_ aware if Git was compiled with http 
support (so that it is not possible to just install the helper 
afterwards), and the infrastructure forbids any extension of capabilities 
in the future.

I think these issues are serious enough to merit a rework.

Thanks,
Dscho

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/3 v2] Use an external program to implement fetching with  curl
  2009-07-28 13:19 ` Johannes Schindelin
@ 2009-07-28 13:30   ` Erik Faye-Lund
  2009-07-28 15:03     ` Johannes Schindelin
  2009-07-28 17:59   ` Daniel Barkalow
  1 sibling, 1 reply; 6+ messages in thread
From: Erik Faye-Lund @ 2009-07-28 13:30 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Daniel Barkalow, Junio C Hamano, git, Linus Torvalds

On Tue, Jul 28, 2009 at 3:19 PM, Johannes
Schindelin<Johannes.Schindelin@gmx.de> wrote:
> If you already start some infrastructure like this, you should go the
> whole nine yards and make helper functions in remote.c or transport.c that
> help implementing "git-remote-<protocol>" helpers.

You mean helper-helpers? :)

-- 
Erik "kusma" Faye-Lund
kusmabite@gmail.com
(+47) 986 59 656

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/3 v2] Use an external program to implement fetching with  curl
  2009-07-28 13:30   ` Erik Faye-Lund
@ 2009-07-28 15:03     ` Johannes Schindelin
  0 siblings, 0 replies; 6+ messages in thread
From: Johannes Schindelin @ 2009-07-28 15:03 UTC (permalink / raw)
  To: Erik Faye-Lund; +Cc: Daniel Barkalow, Junio C Hamano, git, Linus Torvalds

Hi,

On Tue, 28 Jul 2009, Erik Faye-Lund wrote:

> On Tue, Jul 28, 2009 at 3:19 PM, Johannes
> Schindelin<Johannes.Schindelin@gmx.de> wrote:
> > If you already start some infrastructure like this, you should go the 
> > whole nine yards and make helper functions in remote.c or transport.c 
> > that help implementing "git-remote-<protocol>" helpers.
> 
> You mean helper-helpers? :)

Not really... I meant functions that do all the stdin parsing for you, 
together with error handling, and only return proper lists of verified 
arguments in the appropriate data structure.  This functionality is likely 
to be needed by all remote helpers, that's why I would prefer to have it 
exposed as functions in libgit.a, to be reused by all the helpers.

Of course, this would not help shell scripting such helpers, but you have 
to start somewhere.  And that somewhere happens to be the http transport.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/3 v2] Use an external program to implement fetching with curl
  2009-07-28 13:19 ` Johannes Schindelin
  2009-07-28 13:30   ` Erik Faye-Lund
@ 2009-07-28 17:59   ` Daniel Barkalow
  2009-07-29 22:16     ` Johannes Schindelin
  1 sibling, 1 reply; 6+ messages in thread
From: Daniel Barkalow @ 2009-07-28 17:59 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Junio C Hamano, git, Linus Torvalds



On Tue, 28 Jul 2009, Johannes Schindelin wrote:

> Hi,
> 
> On Tue, 28 Jul 2009, Daniel Barkalow wrote:
> 
> > diff --git a/Makefile b/Makefile
> > index 01efc73..d3dd2ed 100644
> > --- a/Makefile
> > +++ b/Makefile
> > @@ -980,6 +980,7 @@ else
> >  		CURL_LIBCURL = -lcurl
> >  	endif
> >  	BUILTIN_OBJS += builtin-http-fetch.o
> > +	PROGRAMS += git-shim-curl$X
> 
> So now you not only use that disgustingly meaningless name "shim", but 
> also abolish the notion that the helper is called by its protocol name?
> 
> > diff --git a/shim-curl.c b/shim-curl.c
> > new file mode 100644
> > index 0000000..bde041b
> > --- /dev/null
> > +++ b/shim-curl.c
> > @@ -0,0 +1,132 @@
> > +#include "cache.h"
> > +#include "remote.h"
> > +#include "strbuf.h"
> > +#include "walker.h"
> > +#include "http.h"
> > +
> > +static struct ref *get_refs(struct walker *walker, const char *url)
> > +{
> > +	int http_ret;
> > +	char *refs_url;
> > +	struct strbuf buffer = STRBUF_INIT;
> > +	char *data, *start, *mid;
> > +	int i = 0;
> > +	char *ref_name;
> > +	struct ref *refs = NULL;
> > +	struct ref *ref = NULL;
> > +	struct ref *last_ref = NULL;
> > +
> > +	refs_url = xmalloc(strlen(url) + 11);
> > +	sprintf(refs_url, "%s/info/refs", url);
> 
> And I thought we had strbufs...

This whole chunk of code is a direct move from transport.c, which is 
unfortunately not obvious in the way we represent diffs. I'd prefer to 
keep the code the same across the move so that we'd be able to track it if 
"git blame" got especially smart.

> > +
> > +	http_ret = http_get_strbuf(refs_url, &buffer, HTTP_NO_CACHE);
> > +	switch (http_ret) {
> > +	case HTTP_OK:
> > +		break;
> > +	case HTTP_MISSING_TARGET:
> > +		die("%s not found: did you run git update-server-info on the"
> > +		    " server?", refs_url);
> > +	default:
> > +		http_error(refs_url, http_ret);
> > +		die("HTTP request failed");
> > +	}
> > +	free(refs_url);
> > +
> > +	data = buffer.buf;
> > +	start = NULL;
> > +	mid = data;
> 
> What does "mid" stand for?
> 
> > +	while (i < buffer.len) {
> > +		if (!start) {
> > +			start = &data[i];
> > +		}
> 
> Why curly brackets around this one, but not this:
> 
> > +		if (data[i] == '\t')
> > +			mid = &data[i];
> 
> > +		if (data[i] == '\n') {
> > +			data[i] = 0;
> > +			ref_name = mid + 1;
> > +			ref = xmalloc(sizeof(struct ref) +
> > +				      strlen(ref_name) + 1);
> 
> What is alloc_ref() for?
> 
> > +			memset(ref, 0, sizeof(struct ref));
> > +			strcpy(ref->name, ref_name);
> > +			get_sha1_hex(start, ref->old_sha1);
> > +			if (!refs)
> > +				refs = ref;
> > +			if (last_ref)
> > +				last_ref->next = ref;
> > +			last_ref = ref;
> 
> Is this another "tail"?
> 
> > +			start = NULL;
> > +		}
> > +		i++;
> 
> Oh, so it _is_ a for() loop.
> 
> > +	}
> > +	strbuf_release(&buffer);
> > +
> > +	ref = alloc_ref("HEAD");
> > +	if (!walker->fetch_ref(walker, ref) &&
> > +	    !resolve_remote_symref(ref, refs)) {
> > +		ref->next = refs;
> > +		refs = ref;
> > +	} else {
> > +		free(ref);
> > +	}
> > +
> > +	return refs;
> > +}
> > +
> > +int main(int argc, const char **argv)
> > +{
> > +	struct remote *remote;
> > +	struct strbuf buf = STRBUF_INIT;
> > +	const char *url;
> > +	struct walker *walker = NULL;
> > +
> > +	setup_git_directory();
> > +	if (argc < 2) {
> > +		fprintf(stderr, "Remote needed\n");
> > +		return 1;
> > +	}
> > +
> > +	remote = remote_get(argv[1]);
> > +
> > +	if (argc > 2) {
> > +		url = argv[2];
> > +	} else {
> > +		url = remote->url[0];
> > +	}
> > +
> > +	do {
> > +		if (strbuf_getline(&buf, stdin, '\n') == EOF)
> > +			break;
> > +		if (!prefixcmp(buf.buf, "fetch ")) {
> > +			char *obj = buf.buf + strlen("fetch ");
> > +			if (!walker)
> > +				walker = get_http_walker(url, remote);
> > +			walker->get_all = 1;
> > +			walker->get_tree = 1;
> > +			walker->get_history = 1;
> > +			walker->get_verbosely = 0;
> > +			walker->get_recover = 0;
> > +			if (walker_fetch(walker, 1, &obj, NULL, NULL))
> > +				die("Fetch failed.");
> > +			printf("\n");
> > +			fflush(stdout);
> > +		} else if (!strcmp(buf.buf, "list")) {
> > +			struct ref *refs;
> > +			struct ref *posn;
> > +			if (!walker)
> > +				walker = get_http_walker(url, remote);
> > +			refs = get_refs(walker, url);
> > +			for (posn = refs; posn; posn = posn->next) {
> > +				if (posn->symref)
> > +					printf("@%s %s\n", posn->symref, posn->name);
> > +				else
> > +					printf("%s %s\n", sha1_to_hex(posn->old_sha1), posn->name);
> > +			}
> > +			printf("\n");
> > +			fflush(stdout);
> > +		} else {
> > +			return 1;
> > +		}
> > +		strbuf_reset(&buf);
> > +	} while (1);
> > +	return 0;
> > +}
> 
> If you already start some infrastructure like this, you should go the 
> whole nine yards and make helper functions in remote.c or transport.c that 
> help implementing "git-remote-<protocol>" helpers.

I thought of that, but I couldn't think of any helper functions that 
wouldn't be harder to use than to not use. In any case, they should be in 
a separate file, since they'd only be used by a set of separate programs.

> > diff --git a/transport.c b/transport.c
> > index de0d587..6d13b0e 100644
> > --- a/transport.c
> > +++ b/transport.c
> > @@ -950,14 +818,12 @@ struct transport *transport_get(struct remote *remote, const char *url)
> >  	} else if (!prefixcmp(url, "http://")
> >  	        || !prefixcmp(url, "https://")
> >  	        || !prefixcmp(url, "ftp://")) {
> > +		transport_shim_init(ret, "curl");
> 
> Like I said, identifying the remote helper by anything else than the 
> protocol is a design catastrophe.

But this helper supports all of the protocols supported by curl, and may 
eventually not be the only available helper that supports some of these 
protocols. I think it's better to represent this fact by having 
transport.c explicitly know when to use this helper, rather than linking 
the same executable to git-remote-https, git-remote-http, git-remote-ftp, 
etc.

> >  #ifdef NO_CURL
> >  		error("git was compiled without libcurl support.");
> 
> Why the heck does this have to be guarded by an #ifdef NO_CURL?  It is the 
> helper's task to say, not transport.c's.

What helper? There is no helper at all if NO_CURL, and:

  git: 'remote-curl' is not a git-command. See 'git --help'.

is not a user-friendly message. For now, I'm keeping the existing 
behavior.

> >  #else
> > -		ret->get_refs_list = get_refs_via_curl;
> > -		ret->fetch = fetch_objs_via_curl;
> >  		ret->push = curl_transport_push;
> >  #endif
> > -		ret->disconnect = disconnect_walker;
> >  
> >  	} else if (is_local(url) && is_file(url)) {
> >  		struct bundle_transport_data *data = xcalloc(1, sizeof(*data));
> 
> I think the basic idea of yours is sound, but all I see is some incomplete 
> separation between builtin git-fetch/-push and the helper.  As it stands, 
> git-fetch/-push have to be compiled with the exact knowledge what helper 
> is there, git-fetch/-push are _still_ aware if Git was compiled with http 
> support (so that it is not possible to just install the helper 
> afterwards), and the infrastructure forbids any extension of capabilities 
> in the future.

In the long run, I want to have two methods for running an external 
helper: one where the installation in some way knows when to run the 
helper (either by having code in place or by having helpers with 
particular names or something), and one where the user specifies something 
explicit. That is, git shouldn't know that a remote with no url but some 
"codeline" settings that start with "//" is p4, and the user shouldn't 
have to know that git repositories whose urls start with http:// are done 
with an external helper, so no one mechanism is sufficient.

It would be nice if git supported querying its environment to see whether 
a command is available and printing a message suitted to why it was 
looking for it, but that's a separate topic.

	-Daniel
*This .sig left intentionally blank*

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/3 v2] Use an external program to implement fetching with curl
  2009-07-28 17:59   ` Daniel Barkalow
@ 2009-07-29 22:16     ` Johannes Schindelin
  0 siblings, 0 replies; 6+ messages in thread
From: Johannes Schindelin @ 2009-07-29 22:16 UTC (permalink / raw)
  To: Daniel Barkalow; +Cc: Junio C Hamano, git, Linus Torvalds

Hi,

On Tue, 28 Jul 2009, Daniel Barkalow wrote:

> On Tue, 28 Jul 2009, Johannes Schindelin wrote:
> 
> > On Tue, 28 Jul 2009, Daniel Barkalow wrote:
> > 
> > > diff --git a/shim-curl.c b/shim-curl.c
> > > new file mode 100644
> > > index 0000000..bde041b
> > > --- /dev/null
> > > +++ b/shim-curl.c
> > > @@ -0,0 +1,132 @@
> > > +#include "cache.h"
> > > +#include "remote.h"
> > > +#include "strbuf.h"
> > > +#include "walker.h"
> > > +#include "http.h"
> > > +
> > > +static struct ref *get_refs(struct walker *walker, const char *url)
> > > +{
> > > +	int http_ret;
> > > +	char *refs_url;
> > > +	struct strbuf buffer = STRBUF_INIT;
> > > +	char *data, *start, *mid;
> > > +	int i = 0;
> > > +	char *ref_name;
> > > +	struct ref *refs = NULL;
> > > +	struct ref *ref = NULL;
> > > +	struct ref *last_ref = NULL;
> > > +
> > > +	refs_url = xmalloc(strlen(url) + 11);
> > > +	sprintf(refs_url, "%s/info/refs", url);
> > 
> > And I thought we had strbufs...
> 
> This whole chunk of code is a direct move from transport.c, which is 
> unfortunately not obvious in the way we represent diffs. I'd prefer to 
> keep the code the same across the move so that we'd be able to track it 
> if "git blame" got especially smart.

... and make review of the current patch -- which is possible right now, 
without the need of a super clever git blame -- hard?  Do you really think 
this is a wise decision?

> > > [a couple of unaddressed comments]
> > >
> > > +int main(int argc, const char **argv)
> > > +{
> > > +	struct remote *remote;
> > > +	struct strbuf buf = STRBUF_INIT;
> > > +	const char *url;
> > > +	struct walker *walker = NULL;
> > > +
> > > +	setup_git_directory();
> > > +	if (argc < 2) {
> > > +		fprintf(stderr, "Remote needed\n");
> > > +		return 1;
> > > +	}
> > > +
> > > +	remote = remote_get(argv[1]);
> > > +
> > > +	if (argc > 2) {
> > > +		url = argv[2];
> > > +	} else {
> > > +		url = remote->url[0];
> > > +	}
> > > +
> > > +	do {
> > > +		if (strbuf_getline(&buf, stdin, '\n') == EOF)
> > > +			break;
> > > +		if (!prefixcmp(buf.buf, "fetch ")) {
> > > +			char *obj = buf.buf + strlen("fetch ");
> > > +			if (!walker)
> > > +				walker = get_http_walker(url, remote);
> > > +			walker->get_all = 1;
> > > +			walker->get_tree = 1;
> > > +			walker->get_history = 1;
> > > +			walker->get_verbosely = 0;
> > > +			walker->get_recover = 0;
> > > +			if (walker_fetch(walker, 1, &obj, NULL, NULL))
> > > +				die("Fetch failed.");
> > > +			printf("\n");
> > > +			fflush(stdout);
> > > +		} else if (!strcmp(buf.buf, "list")) {
> > > +			struct ref *refs;
> > > +			struct ref *posn;
> > > +			if (!walker)
> > > +				walker = get_http_walker(url, remote);
> > > +			refs = get_refs(walker, url);
> > > +			for (posn = refs; posn; posn = posn->next) {
> > > +				if (posn->symref)
> > > +					printf("@%s %s\n", posn->symref, posn->name);
> > > +				else
> > > +					printf("%s %s\n", sha1_to_hex(posn->old_sha1), posn->name);
> > > +			}
> > > +			printf("\n");
> > > +			fflush(stdout);
> > > +		} else {
> > > +			return 1;
> > > +		}
> > > +		strbuf_reset(&buf);
> > > +	} while (1);
> > > +	return 0;
> > > +}
> > 
> > If you already start some infrastructure like this, you should go the 
> > whole nine yards and make helper functions in remote.c or transport.c that 
> > help implementing "git-remote-<protocol>" helpers.
> 
> I thought of that, but I couldn't think of any helper functions that 
> wouldn't be harder to use than to not use. In any case, they should be in 
> a separate file, since they'd only be used by a set of separate programs.

So every helper should implement the same parsing of the whole protocol?

I mean, the protocol _lends_ itself to an interface that implements 
handlers for the commands sent over the pipe, but with compile time errors 
when, say, a parameter type mistake happened.

> > > diff --git a/transport.c b/transport.c
> > > index de0d587..6d13b0e 100644
> > > --- a/transport.c
> > > +++ b/transport.c
> > > @@ -950,14 +818,12 @@ struct transport *transport_get(struct remote *remote, const char *url)
> > >  	} else if (!prefixcmp(url, "http://")
> > >  	        || !prefixcmp(url, "https://")
> > >  	        || !prefixcmp(url, "ftp://")) {
> > > +		transport_shim_init(ret, "curl");
> > 
> > Like I said, identifying the remote helper by anything else than the 
> > protocol is a design catastrophe.
> 
> But this helper supports all of the protocols supported by curl, and may 
> eventually not be the only available helper that supports some of these 
> protocols.

Yes, and in the near future my computer may understand what I say, and 
guess my thoughts.

Let's stay with the presence, okay?

It is _all_ too easy to hardlink git-remote-http to git-remote-https.

And if you have two different handlers for the same protocol, how should 
_Git_ decide which one to take?

No, I firmly believe that this design decision is too short-sighted.

> I think it's better to represent this fact by having transport.c 
> explicitly know when to use this helper, rather than linking the same 
> executable to git-remote-https, git-remote-http, git-remote-ftp, etc.

You really want to recompile Git whenever somebody writes a new protocol 
helper?  That's wrong.

> > >  #ifdef NO_CURL
> > >  		error("git was compiled without libcurl support.");
> > 
> > Why the heck does this have to be guarded by an #ifdef NO_CURL?  It is the 
> > helper's task to say, not transport.c's.
> 
> What helper? There is no helper at all if NO_CURL,

Again, you want to _require_ that the helpers and Git are compiled 
together.  I stand firmly against this idea.

> and:
> 
>   git: 'remote-curl' is not a git-command. See 'git --help'.
> 
> is not a user-friendly message.

Oh?  You would want to show the user that message?  I wouldn't.  I would 
want to show the user:

	No helper found to handle protocol 'https' (i.e. no 
	git-remote-http was found in the PATH)

> > >  #else
> > > -		ret->get_refs_list = get_refs_via_curl;
> > > -		ret->fetch = fetch_objs_via_curl;
> > >  		ret->push = curl_transport_push;
> > >  #endif
> > > -		ret->disconnect = disconnect_walker;
> > >  
> > >  	} else if (is_local(url) && is_file(url)) {
> > >  		struct bundle_transport_data *data = xcalloc(1, sizeof(*data));
> > 
> > I think the basic idea of yours is sound, but all I see is some incomplete 
> > separation between builtin git-fetch/-push and the helper.  As it stands, 
> > git-fetch/-push have to be compiled with the exact knowledge what helper 
> > is there, git-fetch/-push are _still_ aware if Git was compiled with http 
> > support (so that it is not possible to just install the helper 
> > afterwards), and the infrastructure forbids any extension of capabilities 
> > in the future.
> 
> In the long run, I want to have two methods for running an external 
> helper: one where the installation in some way knows when to run the 
> helper (either by having code in place or by having helpers with 
> particular names or something), and one where the user specifies something 
> explicit.

I am not sure I like that.

> That is, git shouldn't know that a remote with no url but some 
> "codeline" settings that start with "//" is p4,

What speaks against a URL "p4://..."?  That would be _totally_ intuitive 
and easy to handle.

> and the user shouldn't have to know that git repositories whose urls 
> start with http:// are done with an external helper, so no one mechanism 
> is sufficient.

Why on earth should the user need to know that http:// is handled by an 
external helper?

Of course, if the user wants to support, say, VSS, it is now easy (well, 
the Git part) to provide a simple git-remote-vss helper _and not having to 
recompile Git at all_.

> It would be nice if git supported querying its environment to see whether 
> a command is available and printing a message suitted to why it was 
> looking for it, but that's a separate topic.

We do have such a framework in place already.  You even mentioned it.  Our 
git-help can look in the PATH what is available.

But then, we do not have to do that at all.

We just try to handle the protocol internally.  If that is not possible, 
we extract the protocol from the URL and try to call a program called 
git-remote-<protocol>.  We inquire via the capabilities command if that 
helper can fetch/push and continue accordingly.  If the program could not 
be launched, with exit status 127, we know there was no helper and can 
tell the user that.

We do not need to do any discovery of the environment at all.  
start_command() is all we ever need.

Simple is beautiful,
Dscho

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2009-07-29 22:16 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-07-28  6:08 [PATCH 2/3 v2] Use an external program to implement fetching with curl Daniel Barkalow
2009-07-28 13:19 ` Johannes Schindelin
2009-07-28 13:30   ` Erik Faye-Lund
2009-07-28 15:03     ` Johannes Schindelin
2009-07-28 17:59   ` Daniel Barkalow
2009-07-29 22:16     ` Johannes Schindelin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.