All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] describe: Don’t look up commits with --exact-match
@ 2010-11-17 23:33 Anders Kaseorg
  2010-12-03  8:43 ` Jonathan Nieder
  0 siblings, 1 reply; 28+ messages in thread
From: Anders Kaseorg @ 2010-11-17 23:33 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

This makes ‘git describe --exact-match HEAD’ about 15 times faster on
a cold cache (2.3s instead of 35s) in a linux-2.6 repository with many
packed tags.  That’s a huge win for the interactivity of the __git_ps1
shell prompt helper.

Signed-off-by: Anders Kaseorg <andersk@ksplice.com>
---
 builtin/describe.c |   64 ++++++++++++++++++++++++++-------------------------
 1 files changed, 33 insertions(+), 31 deletions(-)

diff --git a/builtin/describe.c b/builtin/describe.c
index 43caff2..1cdb831 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -22,7 +22,7 @@ static int tags;	/* Allow lightweight tags */
 static int longformat;
 static int abbrev = DEFAULT_ABBREV;
 static int max_candidates = 10;
-static int found_names;
+struct commit_name *names = NULL;
 static const char *pattern;
 static int always;
 static const char *dirty;
@@ -34,6 +34,8 @@ static const char *diff_index_args[] = {
 
 
 struct commit_name {
+	struct commit_name *next;
+	unsigned char peeled[20];
 	struct tag *tag;
 	unsigned prio:2; /* annotated tag = 2, tag = 1, head = 0 */
 	unsigned name_checked:1;
@@ -78,31 +80,26 @@ static int replace_name(struct commit_name *e,
 }
 
 static void add_to_known_names(const char *path,
-			       struct commit *commit,
+			       const unsigned char *peeled,
 			       int prio,
 			       const unsigned char *sha1)
 {
-	struct commit_name *e = commit->util;
 	struct tag *tag = NULL;
-	if (replace_name(e, prio, sha1, &tag)) {
-		size_t len = strlen(path)+1;
-		free(e);
-		e = xmalloc(sizeof(struct commit_name) + len);
-		e->tag = tag;
-		e->prio = prio;
-		e->name_checked = 0;
-		hashcpy(e->sha1, sha1);
-		memcpy(e->path, path, len);
-		commit->util = e;
-	}
-	found_names = 1;
+	size_t len = strlen(path)+1;
+	struct commit_name *e = xmalloc(sizeof(struct commit_name) + len);
+	hashcpy(e->peeled, peeled);
+	e->tag = tag;
+	e->prio = prio;
+	e->name_checked = 0;
+	hashcpy(e->sha1, sha1);
+	memcpy(e->path, path, len);
+	e->next = names;
+	names = e;
 }
 
 static int get_name(const char *path, const unsigned char *sha1, int flag, void *cb_data)
 {
 	int might_be_tag = !prefixcmp(path, "refs/tags/");
-	struct commit *commit;
-	struct object *object;
 	unsigned char peeled[20];
 	int is_tag, prio;
 
@@ -110,16 +107,10 @@ static int get_name(const char *path, const unsigned char *sha1, int flag, void
 		return 0;
 
 	if (!peel_ref(path, peeled) && !is_null_sha1(peeled)) {
-		commit = lookup_commit_reference_gently(peeled, 1);
-		if (!commit)
-			return 0;
-		is_tag = !!hashcmp(sha1, commit->object.sha1);
+		is_tag = !!hashcmp(sha1, peeled);
 	} else {
-		commit = lookup_commit_reference_gently(sha1, 1);
-		object = parse_object(sha1);
-		if (!commit || !object)
-			return 0;
-		is_tag = object->type == OBJ_TAG;
+		hashcpy(peeled, sha1);
+		is_tag = 0;
 	}
 
 	/* If --all, then any refs are used.
@@ -142,7 +133,7 @@ static int get_name(const char *path, const unsigned char *sha1, int flag, void
 		if (!prio)
 			return 0;
 	}
-	add_to_known_names(all ? path + 5 : path + 10, commit, prio, sha1);
+	add_to_known_names(all ? path + 5 : path + 10, peeled, prio, sha1);
 	return 0;
 }
 
@@ -228,7 +219,7 @@ static void describe(const char *arg, int last_one)
 	unsigned char sha1[20];
 	struct commit *cmit, *gave_up_on = NULL;
 	struct commit_list *list;
-	struct commit_name *n;
+	struct commit_name *n, *e;
 	struct possible_tag all_matches[MAX_TAGS];
 	unsigned int match_cnt = 0, annotated_cnt = 0, cur_match;
 	unsigned long seen_commits = 0;
@@ -240,7 +231,12 @@ static void describe(const char *arg, int last_one)
 	if (!cmit)
 		die("%s is not a valid '%s' object", arg, commit_type);
 
-	n = cmit->util;
+	n = NULL;
+	for (e = names; e; e = e->next) {
+		if (hashcmp(e->peeled, cmit->object.sha1) == 0 &&
+		    replace_name(n, e->prio, e->sha1, &e->tag))
+			n = e;
+	}
 	if (n && (tags || all || n->prio == 2)) {
 		/*
 		 * Exact match to an existing ref.
@@ -259,6 +255,12 @@ static void describe(const char *arg, int last_one)
 	if (debug)
 		fprintf(stderr, "searching to describe %s\n", arg);
 
+	for (e = names; e; e = e->next) {
+		struct commit *c = lookup_commit_reference_gently(e->peeled, 1);
+		if (c && replace_name(c->util, e->prio, e->sha1, &e->tag))
+			c->util = e;
+	}
+
 	list = NULL;
 	cmit->object.flags = SEEN;
 	commit_list_insert(cmit, &list);
@@ -418,8 +420,8 @@ int cmd_describe(int argc, const char **argv, const char *prefix)
 		return cmd_name_rev(i + argc, args, prefix);
 	}
 
-	for_each_ref(get_name, NULL);
-	if (!found_names && !always)
+	for_each_rawref(get_name, NULL);
+	if (!names && !always)
 		die("No names found, cannot describe anything.");
 
 	if (argc == 0) {
-- 
1.7.3.2

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH] describe: Don’t look up commits with --exact-match
  2010-11-17 23:33 [PATCH] describe: Don’t look up commits with --exact-match Anders Kaseorg
@ 2010-12-03  8:43 ` Jonathan Nieder
  2010-12-06  7:19   ` Anders Kaseorg
  0 siblings, 1 reply; 28+ messages in thread
From: Jonathan Nieder @ 2010-12-03  8:43 UTC (permalink / raw)
  To: Anders Kaseorg
  Cc: Junio C Hamano, git, SZEDER Gábor, Kirill Smelkov, Thomas Rast

(+cc: completion people, who might test; Thomas, who knows this
code well)

Anders Kaseorg wrote:

> This makes ‘git describe --exact-match HEAD’ about 15 times faster on
> a cold cache (2.3s instead of 35s) in a linux-2.6 repository with many
> packed tags.  That’s a huge win for the interactivity of the __git_ps1
> shell prompt helper.

Nice.

> --- a/builtin/describe.c
> +++ b/builtin/describe.c
> @@ -22,7 +22,7 @@ static int tags;	/* Allow lightweight tags */
>  static int longformat;
>  static int abbrev = DEFAULT_ABBREV;
>  static int max_candidates = 10;
> -static int found_names;
> +struct commit_name *names = NULL;

Nits:
 - static?
 - the convention in git is to leave off the zero-initializers
   for bss-allocated vars (static and globals).

> @@ -34,6 +34,8 @@ static const char *diff_index_args[] = {
>  
>  
>  struct commit_name {
> +	struct commit_name *next;
> +	unsigned char peeled[20];
>  	struct tag *tag;
>  	unsigned prio:2; /* annotated tag = 2, tag = 1, head = 0 */
>  	unsigned name_checked:1;

Hm, so the tags read so far will be stored in the "names" list.

> @@ -240,7 +231,12 @@ static void describe(const char *arg, int last_one)
>  	if (!cmit)
>  		die("%s is not a valid '%s' object", arg, commit_type);
>  
> -	n = cmit->util;
> +	n = NULL;
> +	for (e = names; e; e = e->next) {
> +		if (hashcmp(e->peeled, cmit->object.sha1) == 0 &&

The usual convention is to use !hashcmp(...) to match the !strcmp(...)
idiom.

> +		    replace_name(n, e->prio, e->sha1, &e->tag))
> +			n = e;
> +	}

Instead of looking up the commit to be matched exactly in the commits
hash table, this makes a linear search.

No change to the assymptotic running time, but would this make things
much slower in the case of many tags?  How many before it's a problem
(if ever)?  (If it's a problem in ordinary cases, I think the
optimization could be limited to --exact-match pretty easily.)

> @@ -259,6 +255,12 @@ static void describe(const char *arg, int last_one)
>  	if (debug)
>  		fprintf(stderr, "searching to describe %s\n", arg);
>  
> +	for (e = names; e; e = e->next) {
> +		struct commit *c = lookup_commit_reference_gently(e->peeled, 1);
> +		if (c && replace_name(c->util, e->prio, e->sha1, &e->tag))
> +			c->util = e;
> +	}

Filling the commits table in preparation for object traversal.  Now
the world's back to normal.

> @@ -418,8 +420,8 @@ int cmd_describe(int argc, const char **argv, const char *prefix)
>  		return cmd_name_rev(i + argc, args, prefix);
>  	}
>  
> -	for_each_ref(get_name, NULL);
> +	for_each_rawref(get_name, NULL);

Orthogonal change snuck in?

> -	if (!found_names && !always)
> +	if (!names && !always)

Everything else looks good and very readable.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] describe: Don’t look up commits with --exact-match
  2010-12-03  8:43 ` Jonathan Nieder
@ 2010-12-06  7:19   ` Anders Kaseorg
  2010-12-06  7:22     ` Anders Kaseorg
  2010-12-06  7:32     ` Jonathan Nieder
  0 siblings, 2 replies; 28+ messages in thread
From: Anders Kaseorg @ 2010-12-06  7:19 UTC (permalink / raw)
  To: Jonathan Nieder
  Cc: Junio C Hamano, git, SZEDER Gábor, Kirill Smelkov, Thomas Rast

On Fri, 3 Dec 2010, Jonathan Nieder wrote:
>  - static?
>  - the convention in git is to leave off the zero-initializers
>    for bss-allocated vars (static and globals).

Will fix.

> The usual convention is to use !hashcmp(...) to match the !strcmp(...)
> idiom.

Will fix (I hate that idiom, but yeah, match existing style).

> > +	for (e = names; e; e = e->next) {
> > +		if (!hashcmp(e->peeled, cmit->object.sha1) &&
> > +		    replace_name(n, e->prio, e->sha1, &e->tag))
> > +			n = e;
> > +	}
> 
> Instead of looking up the commit to be matched exactly in the commits
> hash table, this makes a linear search.
> 
> No change to the assymptotic running time, but would this make things
> much slower in the case of many tags?  How many before it's a problem
> (if ever)?

I don’t think it’s ever a problem: in my repository with 1800 tags on a 
warm cache, that loop accounts for about 0.1% of even the fastest 
non-exact-match query (a commit right after a tag).

> (If it's a problem in ordinary cases, I think the optimization could be 
> limited to --exact-match pretty easily.)

Then you’d lose the speedup in the case where --exact-match wasn’t 
specified but a tag happens to match exactly (which isn’t critical, but 
seemed nice).

> > -	for_each_ref(get_name, NULL);
> > +	for_each_rawref(get_name, NULL);
> 
> Orthogonal change snuck in?

This does fall under the category of “Don’t lookup commits,” and is 
necessary to get the speedup (otherwise for_each_ref has already looked up 
the commits that the rest of the patch is trying to avoid looking up).  
But I could split it out if you want.

Anders

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH] describe: Don’t look up commits with --exact-match
  2010-12-06  7:19   ` Anders Kaseorg
@ 2010-12-06  7:22     ` Anders Kaseorg
  2010-12-06  7:32     ` Jonathan Nieder
  1 sibling, 0 replies; 28+ messages in thread
From: Anders Kaseorg @ 2010-12-06  7:22 UTC (permalink / raw)
  To: Jonathan Nieder, Junio C Hamano
  Cc: git, SZEDER Gábor, Kirill Smelkov, Thomas Rast

This makes ‘git describe --exact-match HEAD’ about 15 times faster on
a cold cache (2.3s instead of 35s) in a linux-2.6 repository with many
packed tags.  That’s a huge win for the interactivity of the __git_ps1
shell prompt helper.

Signed-off-by: Anders Kaseorg <andersk@mit.edu>
---
 builtin/describe.c |   64 ++++++++++++++++++++++++++-------------------------
 1 files changed, 33 insertions(+), 31 deletions(-)

diff --git a/builtin/describe.c b/builtin/describe.c
index 43caff2..0cddef1 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -22,7 +22,7 @@ static int tags;	/* Allow lightweight tags */
 static int longformat;
 static int abbrev = DEFAULT_ABBREV;
 static int max_candidates = 10;
-static int found_names;
+static struct commit_name *names;
 static const char *pattern;
 static int always;
 static const char *dirty;
@@ -34,6 +34,8 @@ static const char *diff_index_args[] = {
 
 
 struct commit_name {
+	struct commit_name *next;
+	unsigned char peeled[20];
 	struct tag *tag;
 	unsigned prio:2; /* annotated tag = 2, tag = 1, head = 0 */
 	unsigned name_checked:1;
@@ -78,31 +80,26 @@ static int replace_name(struct commit_name *e,
 }
 
 static void add_to_known_names(const char *path,
-			       struct commit *commit,
+			       const unsigned char *peeled,
 			       int prio,
 			       const unsigned char *sha1)
 {
-	struct commit_name *e = commit->util;
 	struct tag *tag = NULL;
-	if (replace_name(e, prio, sha1, &tag)) {
-		size_t len = strlen(path)+1;
-		free(e);
-		e = xmalloc(sizeof(struct commit_name) + len);
-		e->tag = tag;
-		e->prio = prio;
-		e->name_checked = 0;
-		hashcpy(e->sha1, sha1);
-		memcpy(e->path, path, len);
-		commit->util = e;
-	}
-	found_names = 1;
+	size_t len = strlen(path)+1;
+	struct commit_name *e = xmalloc(sizeof(struct commit_name) + len);
+	hashcpy(e->peeled, peeled);
+	e->tag = tag;
+	e->prio = prio;
+	e->name_checked = 0;
+	hashcpy(e->sha1, sha1);
+	memcpy(e->path, path, len);
+	e->next = names;
+	names = e;
 }
 
 static int get_name(const char *path, const unsigned char *sha1, int flag, void *cb_data)
 {
 	int might_be_tag = !prefixcmp(path, "refs/tags/");
-	struct commit *commit;
-	struct object *object;
 	unsigned char peeled[20];
 	int is_tag, prio;
 
@@ -110,16 +107,10 @@ static int get_name(const char *path, const unsigned char *sha1, int flag, void
 		return 0;
 
 	if (!peel_ref(path, peeled) && !is_null_sha1(peeled)) {
-		commit = lookup_commit_reference_gently(peeled, 1);
-		if (!commit)
-			return 0;
-		is_tag = !!hashcmp(sha1, commit->object.sha1);
+		is_tag = !!hashcmp(sha1, peeled);
 	} else {
-		commit = lookup_commit_reference_gently(sha1, 1);
-		object = parse_object(sha1);
-		if (!commit || !object)
-			return 0;
-		is_tag = object->type == OBJ_TAG;
+		hashcpy(peeled, sha1);
+		is_tag = 0;
 	}
 
 	/* If --all, then any refs are used.
@@ -142,7 +133,7 @@ static int get_name(const char *path, const unsigned char *sha1, int flag, void
 		if (!prio)
 			return 0;
 	}
-	add_to_known_names(all ? path + 5 : path + 10, commit, prio, sha1);
+	add_to_known_names(all ? path + 5 : path + 10, peeled, prio, sha1);
 	return 0;
 }
 
@@ -228,7 +219,7 @@ static void describe(const char *arg, int last_one)
 	unsigned char sha1[20];
 	struct commit *cmit, *gave_up_on = NULL;
 	struct commit_list *list;
-	struct commit_name *n;
+	struct commit_name *n, *e;
 	struct possible_tag all_matches[MAX_TAGS];
 	unsigned int match_cnt = 0, annotated_cnt = 0, cur_match;
 	unsigned long seen_commits = 0;
@@ -240,7 +231,12 @@ static void describe(const char *arg, int last_one)
 	if (!cmit)
 		die("%s is not a valid '%s' object", arg, commit_type);
 
-	n = cmit->util;
+	n = NULL;
+	for (e = names; e; e = e->next) {
+		if (!hashcmp(e->peeled, cmit->object.sha1) &&
+		    replace_name(n, e->prio, e->sha1, &e->tag))
+			n = e;
+	}
 	if (n && (tags || all || n->prio == 2)) {
 		/*
 		 * Exact match to an existing ref.
@@ -259,6 +255,12 @@ static void describe(const char *arg, int last_one)
 	if (debug)
 		fprintf(stderr, "searching to describe %s\n", arg);
 
+	for (e = names; e; e = e->next) {
+		struct commit *c = lookup_commit_reference_gently(e->peeled, 1);
+		if (c && replace_name(c->util, e->prio, e->sha1, &e->tag))
+			c->util = e;
+	}
+
 	list = NULL;
 	cmit->object.flags = SEEN;
 	commit_list_insert(cmit, &list);
@@ -418,8 +420,8 @@ int cmd_describe(int argc, const char **argv, const char *prefix)
 		return cmd_name_rev(i + argc, args, prefix);
 	}
 
-	for_each_ref(get_name, NULL);
-	if (!found_names && !always)
+	for_each_rawref(get_name, NULL);
+	if (!names && !always)
 		die("No names found, cannot describe anything.");
 
 	if (argc == 0) {
-- 
1.7.3.2

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH] describe: Don’t look up commits with --exact-match
  2010-12-06  7:19   ` Anders Kaseorg
  2010-12-06  7:22     ` Anders Kaseorg
@ 2010-12-06  7:32     ` Jonathan Nieder
  2010-12-06 10:53       ` Thomas Rast
  2010-12-06 17:28       ` Anders Kaseorg
  1 sibling, 2 replies; 28+ messages in thread
From: Jonathan Nieder @ 2010-12-06  7:32 UTC (permalink / raw)
  To: Anders Kaseorg
  Cc: Junio C Hamano, git, SZEDER Gábor, Kirill Smelkov, Thomas Rast

Anders Kaseorg wrote:
> On Fri, 3 Dec 2010, Jonathan Nieder wrote:

>> Instead of looking up the commit to be matched exactly in the commits
>> hash table, this makes a linear search.
[...]
> I don’t think it’s ever a problem: in my repository with 1800 tags on a 
> warm cache, that loop accounts for about 0.1% of even the fastest 
> non-exact-match query (a commit right after a tag).

Thanks for checking.  Makes sense.

>>> -	for_each_ref(get_name, NULL);
>>> +	for_each_rawref(get_name, NULL);
>>
>> Orthogonal change snuck in?
>
> This does fall under the category of “Don’t lookup commits,” and is 
> necessary to get the speedup (otherwise for_each_ref has already looked up 
> the commits that the rest of the patch is trying to avoid looking up).  

Depends on what "Don't lookup commits" means, I suppose.  I
think the difference between _ref and _rawref is

      if (!(flags & DO_FOR_EACH_INCLUDE_BROKEN)) {
            if (entry->flag & REF_BROKEN)
                  return 0; /* ignore dangling symref */
            if (!has_sha1_file(entry->sha1)) {
                  error("%s does not point to a valid object!", entry->name);
                  return 0;
            }
      }

so if I understand correctly, for_each_ref would still allow one to
get away without unpacking the objects.  Is that correct?

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] describe:  Don’t look up commits with --exact-match
  2010-12-06  7:32     ` Jonathan Nieder
@ 2010-12-06 10:53       ` Thomas Rast
  2010-12-06 17:28       ` Anders Kaseorg
  1 sibling, 0 replies; 28+ messages in thread
From: Thomas Rast @ 2010-12-06 10:53 UTC (permalink / raw)
  To: Jonathan Nieder
  Cc: Anders Kaseorg, Junio C Hamano, git, SZEDER Gábor, Kirill Smelkov

Jonathan Nieder wrote:
> Anders Kaseorg wrote:
> > On Fri, 3 Dec 2010, Jonathan Nieder wrote:
> 
> >> Instead of looking up the commit to be matched exactly in the commits
> >> hash table, this makes a linear search.
> [...]
> > I don’t think it’s ever a problem: in my repository with 1800 tags on a 
> > warm cache, that loop accounts for about 0.1% of even the fastest 
> > non-exact-match query (a commit right after a tag).
> 
> Thanks for checking.  Makes sense.

Apart from measuring: for_each_ref *loads* the tags in a linear scan,
so another linear scan doesn't add to the runtime w.r.t. number of
tags.  It only hurts if you also describe many refs in one go.

-- 
Thomas Rast
trast@{inf,student}.ethz.ch

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] describe: Don’t look up commits with --exact-match
  2010-12-06  7:32     ` Jonathan Nieder
  2010-12-06 10:53       ` Thomas Rast
@ 2010-12-06 17:28       ` Anders Kaseorg
  2010-12-06 17:47         ` Jonathan Nieder
                           ` (2 more replies)
  1 sibling, 3 replies; 28+ messages in thread
From: Anders Kaseorg @ 2010-12-06 17:28 UTC (permalink / raw)
  To: Jonathan Nieder
  Cc: Junio C Hamano, git, SZEDER Gábor, Kirill Smelkov, Thomas Rast

On Mon, 6 Dec 2010, Jonathan Nieder wrote:
> Depends on what "Don't lookup commits" means, I suppose.  I
> think the difference between _ref and _rawref is
> […]
> so if I understand correctly, for_each_ref would still allow one to
> get away without unpacking the objects.  Is that correct?

Yeah.  Okay, I see that “lookup” implies unpacking, as opposed to “find”, 
which doesn’t.  The important part is to avoid the find, of course, 
because the I/O is expensive.

Anyway, here’s a series with that change split out.

Anders

-- 8< --
From 2ad1e58b8f6e9c117c77748b6e8b85227d9d5412 Mon Sep 17 00:00:00 2001
From: Anders Kaseorg <andersk@ksplice.com>
Subject: [PATCH 1/2] describe: Use for_each_rawref

Don’t waste time checking for dangling refs; they wouldn’t affect the
output of ‘git describe’ anyway.  Although this doesn’t gain much
performance by itself, it does in conjunction with the next commit.

Signed-off-by: Anders Kaseorg <andersk@ksplice.com>
---
 builtin/describe.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/builtin/describe.c b/builtin/describe.c
index 43caff2..700f740 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -418,7 +418,7 @@ int cmd_describe(int argc, const char **argv, const char *prefix)
 		return cmd_name_rev(i + argc, args, prefix);
 	}
 
-	for_each_ref(get_name, NULL);
+	for_each_rawref(get_name, NULL);
 	if (!found_names && !always)
 		die("No names found, cannot describe anything.");
 
-- 
1.7.3.3


From ce8a2ab9cf80247c2834d21f36f63cedb794e62f Mon Sep 17 00:00:00 2001
From: Anders Kaseorg <andersk@ksplice.com>
Subject: describe: Don’t look up commits with --exact-match

This makes ‘git describe --exact-match HEAD’ about 15 times faster on
a cold cache (2.3s instead of 35s) in a linux-2.6 repository with many
packed tags.  That’s a huge win for the interactivity of the __git_ps1
shell prompt helper.

Signed-off-by: Anders Kaseorg <andersk@ksplice.com>
---
 builtin/describe.c |   62 ++++++++++++++++++++++++++-------------------------
 1 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/builtin/describe.c b/builtin/describe.c
index 700f740..0cddef1 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -22,7 +22,7 @@ static int tags;	/* Allow lightweight tags */
 static int longformat;
 static int abbrev = DEFAULT_ABBREV;
 static int max_candidates = 10;
-static int found_names;
+static struct commit_name *names;
 static const char *pattern;
 static int always;
 static const char *dirty;
@@ -34,6 +34,8 @@ static const char *diff_index_args[] = {
 
 
 struct commit_name {
+	struct commit_name *next;
+	unsigned char peeled[20];
 	struct tag *tag;
 	unsigned prio:2; /* annotated tag = 2, tag = 1, head = 0 */
 	unsigned name_checked:1;
@@ -78,31 +80,26 @@ static int replace_name(struct commit_name *e,
 }
 
 static void add_to_known_names(const char *path,
-			       struct commit *commit,
+			       const unsigned char *peeled,
 			       int prio,
 			       const unsigned char *sha1)
 {
-	struct commit_name *e = commit->util;
 	struct tag *tag = NULL;
-	if (replace_name(e, prio, sha1, &tag)) {
-		size_t len = strlen(path)+1;
-		free(e);
-		e = xmalloc(sizeof(struct commit_name) + len);
-		e->tag = tag;
-		e->prio = prio;
-		e->name_checked = 0;
-		hashcpy(e->sha1, sha1);
-		memcpy(e->path, path, len);
-		commit->util = e;
-	}
-	found_names = 1;
+	size_t len = strlen(path)+1;
+	struct commit_name *e = xmalloc(sizeof(struct commit_name) + len);
+	hashcpy(e->peeled, peeled);
+	e->tag = tag;
+	e->prio = prio;
+	e->name_checked = 0;
+	hashcpy(e->sha1, sha1);
+	memcpy(e->path, path, len);
+	e->next = names;
+	names = e;
 }
 
 static int get_name(const char *path, const unsigned char *sha1, int flag, void *cb_data)
 {
 	int might_be_tag = !prefixcmp(path, "refs/tags/");
-	struct commit *commit;
-	struct object *object;
 	unsigned char peeled[20];
 	int is_tag, prio;
 
@@ -110,16 +107,10 @@ static int get_name(const char *path, const unsigned char *sha1, int flag, void
 		return 0;
 
 	if (!peel_ref(path, peeled) && !is_null_sha1(peeled)) {
-		commit = lookup_commit_reference_gently(peeled, 1);
-		if (!commit)
-			return 0;
-		is_tag = !!hashcmp(sha1, commit->object.sha1);
+		is_tag = !!hashcmp(sha1, peeled);
 	} else {
-		commit = lookup_commit_reference_gently(sha1, 1);
-		object = parse_object(sha1);
-		if (!commit || !object)
-			return 0;
-		is_tag = object->type == OBJ_TAG;
+		hashcpy(peeled, sha1);
+		is_tag = 0;
 	}
 
 	/* If --all, then any refs are used.
@@ -142,7 +133,7 @@ static int get_name(const char *path, const unsigned char *sha1, int flag, void
 		if (!prio)
 			return 0;
 	}
-	add_to_known_names(all ? path + 5 : path + 10, commit, prio, sha1);
+	add_to_known_names(all ? path + 5 : path + 10, peeled, prio, sha1);
 	return 0;
 }
 
@@ -228,7 +219,7 @@ static void describe(const char *arg, int last_one)
 	unsigned char sha1[20];
 	struct commit *cmit, *gave_up_on = NULL;
 	struct commit_list *list;
-	struct commit_name *n;
+	struct commit_name *n, *e;
 	struct possible_tag all_matches[MAX_TAGS];
 	unsigned int match_cnt = 0, annotated_cnt = 0, cur_match;
 	unsigned long seen_commits = 0;
@@ -240,7 +231,12 @@ static void describe(const char *arg, int last_one)
 	if (!cmit)
 		die("%s is not a valid '%s' object", arg, commit_type);
 
-	n = cmit->util;
+	n = NULL;
+	for (e = names; e; e = e->next) {
+		if (!hashcmp(e->peeled, cmit->object.sha1) &&
+		    replace_name(n, e->prio, e->sha1, &e->tag))
+			n = e;
+	}
 	if (n && (tags || all || n->prio == 2)) {
 		/*
 		 * Exact match to an existing ref.
@@ -259,6 +255,12 @@ static void describe(const char *arg, int last_one)
 	if (debug)
 		fprintf(stderr, "searching to describe %s\n", arg);
 
+	for (e = names; e; e = e->next) {
+		struct commit *c = lookup_commit_reference_gently(e->peeled, 1);
+		if (c && replace_name(c->util, e->prio, e->sha1, &e->tag))
+			c->util = e;
+	}
+
 	list = NULL;
 	cmit->object.flags = SEEN;
 	commit_list_insert(cmit, &list);
@@ -419,7 +421,7 @@ int cmd_describe(int argc, const char **argv, const char *prefix)
 	}
 
 	for_each_rawref(get_name, NULL);
-	if (!found_names && !always)
+	if (!names && !always)
 		die("No names found, cannot describe anything.");
 
 	if (argc == 0) {
-- 
1.7.3.3

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH] describe: Don’t look up commits with --exact-match
  2010-12-06 17:28       ` Anders Kaseorg
@ 2010-12-06 17:47         ` Jonathan Nieder
  2010-12-07  9:58         ` SZEDER Gábor
  2010-12-07 18:39         ` [PATCH] describe: Don’t look up commits with --exact-match Junio C Hamano
  2 siblings, 0 replies; 28+ messages in thread
From: Jonathan Nieder @ 2010-12-06 17:47 UTC (permalink / raw)
  To: Anders Kaseorg
  Cc: Junio C Hamano, git, SZEDER Gábor, Kirill Smelkov, Thomas Rast

Anders Kaseorg wrote:

> Yeah.  Okay, I see that “lookup” implies unpacking, as opposed to “find”, 
> which doesn’t.  The important part is to avoid the find, of course, 
> because the I/O is expensive.
>
> Anyway, here’s a series with that change split out.

Thanks.

In theory finding an object would only require looking at the pack
index (and directory listings), but find_pack_entry does

		/*
		 * We are about to tell the caller where they can
		 * locate the requested object.  We better make
		 * sure the packfile is still here and can be
		 * accessed before supplying that answer, as
		 * it may have been deleted since the index
		 * was loaded!
		 */

which spoils that plan.

Your original use of the word "lookup" seemed sane, too; I was just
fishing for a more detailed explanation.

Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>

> -- 8< --
> From 2ad1e58b8f6e9c117c77748b6e8b85227d9d5412 Mon Sep 17 00:00:00 2001
[...]

For the future: this does not follow the preferred patch format (one
patch per message, each of the form "description/---/diff").  In this
case I am guessing it won't be much trouble to massage on the
receiving end, so no terrible loss.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] describe: Don’t look up commits with --exact-match
  2010-12-06 17:28       ` Anders Kaseorg
  2010-12-06 17:47         ` Jonathan Nieder
@ 2010-12-07  9:58         ` SZEDER Gábor
  2010-12-07 18:22           ` [PATCH 1/2] describe: Use for_each_rawref Anders Kaseorg
  2010-12-07 18:39         ` [PATCH] describe: Don’t look up commits with --exact-match Junio C Hamano
  2 siblings, 1 reply; 28+ messages in thread
From: SZEDER Gábor @ 2010-12-07  9:58 UTC (permalink / raw)
  To: Anders Kaseorg
  Cc: Jonathan Nieder, Junio C Hamano, git, Kirill Smelkov, Thomas Rast

Hi,

On Mon, Dec 06, 2010 at 12:28:59PM -0500, Anders Kaseorg wrote:
> This makes ‘git describe --exact-match HEAD’ about 15 times faster on
> a cold cache (2.3s instead of 35s) in a linux-2.6 repository with many
> packed tags.  That’s a huge win for the interactivity of the __git_ps1
> shell prompt helper.

I wanted to give it a try to see the performance improvements in the
bash prompt for myself, but couldn't get to it so far.  However,
glancing through the logic finding out the current branch in
__git_ps1, it seems that git describe is only run when git
symbolic-ref HEAD failed, i.e. when on a detached head.  So the
performance improvement in the shell prompt is only there when on a
detached head (and with lots of tags), right?  If so, then I'd like to
have "when on a detached head" appended to the commit message.


Best,
Gábor

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 1/2] describe: Use for_each_rawref
  2010-12-07  9:58         ` SZEDER Gábor
@ 2010-12-07 18:22           ` Anders Kaseorg
  2010-12-07 18:22             ` [PATCH 2/2] describe: Don’t look up commits with --exact-match Anders Kaseorg
  2010-12-07 18:26             ` [PATCH 1/2] describe: Use for_each_rawref Anders Kaseorg
  0 siblings, 2 replies; 28+ messages in thread
From: Anders Kaseorg @ 2010-12-07 18:22 UTC (permalink / raw)
  To: Junio C Hamano, SZEDER Gábor
  Cc: Jonathan Nieder, git, Kirill Smelkov, Thomas Rast

Don’t waste time checking for dangling refs; they wouldn’t affect the
output of ‘git describe’ anyway.  Although this doesn’t gain much
performance by itself, it does in conjunction with the next commit.

Signed-off-by: Anders Kaseorg <andersk@ksplice.com>

diff --git a/builtin/describe.c b/builtin/describe.c
index 43caff2..700f740 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -418,7 +418,7 @@ int cmd_describe(int argc, const char **argv, const char *prefix)
 		return cmd_name_rev(i + argc, args, prefix);
 	}
 
-	for_each_ref(get_name, NULL);
+	for_each_rawref(get_name, NULL);
 	if (!found_names && !always)
 		die("No names found, cannot describe anything.");
 
-- 
1.7.3.3

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 2/2] describe: Don’t look up commits with --exact-match
  2010-12-07 18:22           ` [PATCH 1/2] describe: Use for_each_rawref Anders Kaseorg
@ 2010-12-07 18:22             ` Anders Kaseorg
  2010-12-07 18:26             ` [PATCH 1/2] describe: Use for_each_rawref Anders Kaseorg
  1 sibling, 0 replies; 28+ messages in thread
From: Anders Kaseorg @ 2010-12-07 18:22 UTC (permalink / raw)
  To: Junio C Hamano, SZEDER Gábor
  Cc: Jonathan Nieder, git, Kirill Smelkov, Thomas Rast

This makes ‘git describe --exact-match HEAD’ about 15 times faster on
a cold cache (2.3s instead of 35s) in a linux-2.6 repository with many
packed tags.  That’s a huge win for the interactivity of the __git_ps1
shell prompt helper when on a detached head.

Signed-off-by: Anders Kaseorg <andersk@ksplice.com>

diff --git a/builtin/describe.c b/builtin/describe.c
index 700f740..0cddef1 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -22,7 +22,7 @@ static int tags;	/* Allow lightweight tags */
 static int longformat;
 static int abbrev = DEFAULT_ABBREV;
 static int max_candidates = 10;
-static int found_names;
+static struct commit_name *names;
 static const char *pattern;
 static int always;
 static const char *dirty;
@@ -34,6 +34,8 @@ static const char *diff_index_args[] = {
 
 
 struct commit_name {
+	struct commit_name *next;
+	unsigned char peeled[20];
 	struct tag *tag;
 	unsigned prio:2; /* annotated tag = 2, tag = 1, head = 0 */
 	unsigned name_checked:1;
@@ -78,31 +80,26 @@ static int replace_name(struct commit_name *e,
 }
 
 static void add_to_known_names(const char *path,
-			       struct commit *commit,
+			       const unsigned char *peeled,
 			       int prio,
 			       const unsigned char *sha1)
 {
-	struct commit_name *e = commit->util;
 	struct tag *tag = NULL;
-	if (replace_name(e, prio, sha1, &tag)) {
-		size_t len = strlen(path)+1;
-		free(e);
-		e = xmalloc(sizeof(struct commit_name) + len);
-		e->tag = tag;
-		e->prio = prio;
-		e->name_checked = 0;
-		hashcpy(e->sha1, sha1);
-		memcpy(e->path, path, len);
-		commit->util = e;
-	}
-	found_names = 1;
+	size_t len = strlen(path)+1;
+	struct commit_name *e = xmalloc(sizeof(struct commit_name) + len);
+	hashcpy(e->peeled, peeled);
+	e->tag = tag;
+	e->prio = prio;
+	e->name_checked = 0;
+	hashcpy(e->sha1, sha1);
+	memcpy(e->path, path, len);
+	e->next = names;
+	names = e;
 }
 
 static int get_name(const char *path, const unsigned char *sha1, int flag, void *cb_data)
 {
 	int might_be_tag = !prefixcmp(path, "refs/tags/");
-	struct commit *commit;
-	struct object *object;
 	unsigned char peeled[20];
 	int is_tag, prio;
 
@@ -110,16 +107,10 @@ static int get_name(const char *path, const unsigned char *sha1, int flag, void
 		return 0;
 
 	if (!peel_ref(path, peeled) && !is_null_sha1(peeled)) {
-		commit = lookup_commit_reference_gently(peeled, 1);
-		if (!commit)
-			return 0;
-		is_tag = !!hashcmp(sha1, commit->object.sha1);
+		is_tag = !!hashcmp(sha1, peeled);
 	} else {
-		commit = lookup_commit_reference_gently(sha1, 1);
-		object = parse_object(sha1);
-		if (!commit || !object)
-			return 0;
-		is_tag = object->type == OBJ_TAG;
+		hashcpy(peeled, sha1);
+		is_tag = 0;
 	}
 
 	/* If --all, then any refs are used.
@@ -142,7 +133,7 @@ static int get_name(const char *path, const unsigned char *sha1, int flag, void
 		if (!prio)
 			return 0;
 	}
-	add_to_known_names(all ? path + 5 : path + 10, commit, prio, sha1);
+	add_to_known_names(all ? path + 5 : path + 10, peeled, prio, sha1);
 	return 0;
 }
 
@@ -228,7 +219,7 @@ static void describe(const char *arg, int last_one)
 	unsigned char sha1[20];
 	struct commit *cmit, *gave_up_on = NULL;
 	struct commit_list *list;
-	struct commit_name *n;
+	struct commit_name *n, *e;
 	struct possible_tag all_matches[MAX_TAGS];
 	unsigned int match_cnt = 0, annotated_cnt = 0, cur_match;
 	unsigned long seen_commits = 0;
@@ -240,7 +231,12 @@ static void describe(const char *arg, int last_one)
 	if (!cmit)
 		die("%s is not a valid '%s' object", arg, commit_type);
 
-	n = cmit->util;
+	n = NULL;
+	for (e = names; e; e = e->next) {
+		if (!hashcmp(e->peeled, cmit->object.sha1) &&
+		    replace_name(n, e->prio, e->sha1, &e->tag))
+			n = e;
+	}
 	if (n && (tags || all || n->prio == 2)) {
 		/*
 		 * Exact match to an existing ref.
@@ -259,6 +255,12 @@ static void describe(const char *arg, int last_one)
 	if (debug)
 		fprintf(stderr, "searching to describe %s\n", arg);
 
+	for (e = names; e; e = e->next) {
+		struct commit *c = lookup_commit_reference_gently(e->peeled, 1);
+		if (c && replace_name(c->util, e->prio, e->sha1, &e->tag))
+			c->util = e;
+	}
+
 	list = NULL;
 	cmit->object.flags = SEEN;
 	commit_list_insert(cmit, &list);
@@ -419,7 +421,7 @@ int cmd_describe(int argc, const char **argv, const char *prefix)
 	}
 
 	for_each_rawref(get_name, NULL);
-	if (!found_names && !always)
+	if (!names && !always)
 		die("No names found, cannot describe anything.");
 
 	if (argc == 0) {
-- 
1.7.3.3

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/2] describe: Use for_each_rawref
  2010-12-07 18:22           ` [PATCH 1/2] describe: Use for_each_rawref Anders Kaseorg
  2010-12-07 18:22             ` [PATCH 2/2] describe: Don’t look up commits with --exact-match Anders Kaseorg
@ 2010-12-07 18:26             ` Anders Kaseorg
  2010-12-07 19:49               ` Junio C Hamano
  1 sibling, 1 reply; 28+ messages in thread
From: Anders Kaseorg @ 2010-12-07 18:26 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: SZEDER Gábor, Jonathan Nieder, git, Kirill Smelkov, Thomas Rast

On Tue, 7 Dec 2010, Anders Kaseorg wrote:
> Signed-off-by: Anders Kaseorg <andersk@ksplice.com>
> 
> diff --git a/builtin/describe.c b/builtin/describe.c

(Gaaah, sorry, I accidentally used format-patch -p here.  Won’t happen 
again.  :-) )

Anders

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] describe: Don’t look up commits with --exact-match
  2010-12-06 17:28       ` Anders Kaseorg
  2010-12-06 17:47         ` Jonathan Nieder
  2010-12-07  9:58         ` SZEDER Gábor
@ 2010-12-07 18:39         ` Junio C Hamano
  2010-12-08  4:41           ` Anders Kaseorg
  2 siblings, 1 reply; 28+ messages in thread
From: Junio C Hamano @ 2010-12-07 18:39 UTC (permalink / raw)
  To: Anders Kaseorg
  Cc: Jonathan Nieder, git, SZEDER Gábor, Kirill Smelkov, Thomas Rast

Anders Kaseorg <andersk@ksplice.com> writes:

> From 2ad1e58b8f6e9c117c77748b6e8b85227d9d5412 Mon Sep 17 00:00:00 2001
> From: Anders Kaseorg <andersk@ksplice.com>
> Subject: [PATCH 1/2] describe: Use for_each_rawref
>
> Don’t waste time checking for dangling refs; they wouldn’t affect the
> output of ‘git describe’ anyway.  Although this doesn’t gain much
> performance by itself, it does in conjunction with the next commit.
>
> Signed-off-by: Anders Kaseorg <andersk@ksplice.com>
> ---
>  builtin/describe.c |    2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
>
> diff --git a/builtin/describe.c b/builtin/describe.c
> index 43caff2..700f740 100644
> --- a/builtin/describe.c
> +++ b/builtin/describe.c
> @@ -418,7 +418,7 @@ int cmd_describe(int argc, const char **argv, const char *prefix)
>  		return cmd_name_rev(i + argc, args, prefix);
>  	}
>  
> -	for_each_ref(get_name, NULL);
> +	for_each_rawref(get_name, NULL);
>  	if (!found_names && !always)
>  		die("No names found, cannot describe anything.");

It is Ok during a review cycle that is not meant as the final submission,
but please follow Documentation/SubmittingPatches for the final round
(i.e. not multiple-changes in a single mail), as mentioned by Jonathan.

> From ce8a2ab9cf80247c2834d21f36f63cedb794e62f Mon Sep 17 00:00:00 2001
> From: Anders Kaseorg <andersk@ksplice.com>
> Subject: describe: Don’t look up commits with --exact-match
>
> This makes ‘git describe --exact-match HEAD’ about 15 times faster on
> a cold cache (2.3s instead of 35s) in a linux-2.6 repository with many
> packed tags.  That’s a huge win for the interactivity of the __git_ps1
> shell prompt helper.

Good description of the motivation.  Care to describe how it is achieved?

I may be misreading your intention, but my understanding of the thinking
behind what the new code does is...

 - When the object name given from the command line happens to match one
   of the tags exactly (either a lightweight tag in which case we can just
   compare the object names to detect the match, or an annotated tag in
   which case we only need to parse the tag but not the underlying commit
   to do the comparison), we do not have to parse any commit object.

 - Other cases we would need to parse commit objects and traverse the
   history.

 - Hence, the code can be optimized for the case to describe an exact
   match by trying exact object name match first and then loading commit
   objects only when no exact matches found.

 - When we are only looking for exact matches (which is _not_ a rare
   corner case), we won't ever fall back to the "parse commit objects and
   traverse the history" codepath.  So it is important to lazily parse
   commit objects in order to optimize for this special case.

I however think this may hurt when more than one objects are asked to be
described and there is no exact match.  The fallback codepath that lazily
runs lookup_commit_reference_gently() and uses replace_name() to compute
the best name needs to run only once, but this part of the code seems to
run it for the elements on the names list once per describe() invocation
that needs to fall back to this codepath.  Would it remove this regression
if we make it run only once?

I don't think this affects correctness, though.

> @@ -259,6 +255,12 @@ static void describe(const char *arg, int last_one)
>  	if (debug)
>  		fprintf(stderr, "searching to describe %s\n", arg);
>  
> +	for (e = names; e; e = e->next) {
> +		struct commit *c = lookup_commit_reference_gently(e->peeled, 1);
> +		if (c && replace_name(c->util, e->prio, e->sha1, &e->tag))
> +			c->util = e;
> +	}
> +
>  	list = NULL;
>  	cmit->object.flags = SEEN;
>  	commit_list_insert(cmit, &list);

As you can see from its "if (!e->tag)" codepath, replace_name() is not
designed to be called excessively (e.g. we will end up running the
codepath for an annotated tag that we should know we won't find a tag).

Also, "struct tag *tag" does not need to exist in add_to_known_names()
anymore; a NULL is assigned to it and its only use is to get assigned
to e->tag.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/2] describe: Use for_each_rawref
  2010-12-07 18:26             ` [PATCH 1/2] describe: Use for_each_rawref Anders Kaseorg
@ 2010-12-07 19:49               ` Junio C Hamano
  2010-12-07 21:59                 ` Junio C Hamano
  0 siblings, 1 reply; 28+ messages in thread
From: Junio C Hamano @ 2010-12-07 19:49 UTC (permalink / raw)
  To: Anders Kaseorg
  Cc: Junio C Hamano, SZEDER Gábor, Jonathan Nieder, git,
	Kirill Smelkov, Thomas Rast

Anders Kaseorg <andersk@ksplice.com> writes:

> On Tue, 7 Dec 2010, Anders Kaseorg wrote:
>> Signed-off-by: Anders Kaseorg <andersk@ksplice.com>
>> 
>> diff --git a/builtin/describe.c b/builtin/describe.c
>
> (Gaaah, sorry, I accidentally used format-patch -p here.  Won’t happen 
> again.  :-) )

The default will work just fine, no?

Also labelling them like "[PATCH v2 1/2]" would have been nicer to spot
the progress of the series.

Thanks.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/2] describe: Use for_each_rawref
  2010-12-07 19:49               ` Junio C Hamano
@ 2010-12-07 21:59                 ` Junio C Hamano
  0 siblings, 0 replies; 28+ messages in thread
From: Junio C Hamano @ 2010-12-07 21:59 UTC (permalink / raw)
  To: Anders Kaseorg
  Cc: SZEDER Gábor, Jonathan Nieder, git, Kirill Smelkov,
	Thomas Rast, Shawn O. Pearce, Mark Levedahl, Jens Lehmann

Junio C Hamano <gitster@pobox.com> writes:

> Anders Kaseorg <andersk@ksplice.com> writes:
>
>> On Tue, 7 Dec 2010, Anders Kaseorg wrote:
>>> Signed-off-by: Anders Kaseorg <andersk@ksplice.com>
>>> 
>>> diff --git a/builtin/describe.c b/builtin/describe.c
>>
>> (Gaaah, sorry, I accidentally used format-patch -p here.  Won’t happen 
>> again.  :-) )
>
> The default will work just fine, no?
>
> Also labelling them like "[PATCH v2 1/2]" would have been nicer to spot
> the progress of the series.

Two more requests.

 * Please describe what was changed since earlier interations after "---"
   lines;

 * Please run tests before submitting patches.  It appears to break t7407.

What breaks in t7407 is "git submodule status --recursive" output.  In my
environment, it used to describe the tip as heads/master but after the
patch it says remotes/origin/master.  The output from "describe --all"
does not tiebreak between one branch from another, even though it does
favor tags over branches (and tries to use annotated ones), so it is not a
real regression in that sense, but then the test needs to be fixed not to
care about which equally good branches are shown.

Some might even argue that it is more relevant to show how the commit
relates to remote-tracking branch heads.  That is a separate issue (if
this position is widely supported, we would introduce another "prio"
between "all others (0)" and "lightweight tags (1)" for remote-tracking
branches), though.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] describe: Don’t look up commits with --exact-match
  2010-12-07 18:39         ` [PATCH] describe: Don’t look up commits with --exact-match Junio C Hamano
@ 2010-12-08  4:41           ` Anders Kaseorg
  2010-12-08  4:42             ` [PATCH v2 1/4] describe: Use for_each_rawref Anders Kaseorg
  0 siblings, 1 reply; 28+ messages in thread
From: Anders Kaseorg @ 2010-12-08  4:41 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Jonathan Nieder, git, SZEDER Gábor, Kirill Smelkov, Thomas Rast

On Tue, 7 Dec 2010, Junio C Hamano wrote:
> I however think this may hurt when more than one objects are asked to be
> described and there is no exact match.

Yeah, that turns out to be true.  So I rewrote the patch to put the 
commit_names into a hash table instead of a linked list, run 
replace_name() only once per tag, and run the lookup code only once per 
describe().  I also tried to split things up a bit more and explain more 
in the commit messages.

> * Please run tests before submitting patches.  It appears to break t7407.

Oops, I was lazy and only ran the tests that (directly) use describe; I’ll 
be more careful in the future. The new series doesn’t have this problem 
because it does the replace_name()s in the same order as the old code, and 
it passes all tests.

fixed   1
success 6426
failed  0
broken  39
total   6496

Anders

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH v2 1/4] describe: Use for_each_rawref
  2010-12-08  4:41           ` Anders Kaseorg
@ 2010-12-08  4:42             ` Anders Kaseorg
  2010-12-08  4:43               ` [PATCH v2 2/4] describe: Don’t use a flex array in struct commit_name Anders Kaseorg
                                 ` (2 more replies)
  0 siblings, 3 replies; 28+ messages in thread
From: Anders Kaseorg @ 2010-12-08  4:42 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Jonathan Nieder, git, SZEDER Gábor, Kirill Smelkov, Thomas Rast

Don’t waste time checking for dangling refs; they wouldn’t affect the
output of ‘git describe’ anyway.  Although this doesn’t gain much
performance by itself, it does in conjunction with the next commits.

Signed-off-by: Anders Kaseorg <andersk@ksplice.com>
---
 builtin/describe.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/builtin/describe.c b/builtin/describe.c
index 43caff2..700f740 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -418,7 +418,7 @@ int cmd_describe(int argc, const char **argv, const char *prefix)
 		return cmd_name_rev(i + argc, args, prefix);
 	}
 
-	for_each_ref(get_name, NULL);
+	for_each_rawref(get_name, NULL);
 	if (!found_names && !always)
 		die("No names found, cannot describe anything.");
 
-- 
1.7.3.3

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v2 2/4] describe: Don’t use a flex array in struct commit_name
  2010-12-08  4:42             ` [PATCH v2 1/4] describe: Use for_each_rawref Anders Kaseorg
@ 2010-12-08  4:43               ` Anders Kaseorg
  2010-12-08  4:43               ` [PATCH v2 3/4] describe: Store commit_names in a hash table by commit SHA1 Anders Kaseorg
  2010-12-08  4:46               ` [PATCH v2 4/4] describe: Delay looking up commits until searching for an inexact match Anders Kaseorg
  2 siblings, 0 replies; 28+ messages in thread
From: Anders Kaseorg @ 2010-12-08  4:43 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Jonathan Nieder, git, SZEDER Gábor, Kirill Smelkov, Thomas Rast

Now add_to_known_names overwrites commit_names in place when multiple
tags point to the same commit.  This will make it easier to store
commit_names in a hash table.

Signed-off-by: Anders Kaseorg <andersk@ksplice.com>
---
 builtin/describe.c |   12 ++++++------
 1 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/builtin/describe.c b/builtin/describe.c
index 700f740..5b8461d 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -38,7 +38,7 @@ struct commit_name {
 	unsigned prio:2; /* annotated tag = 2, tag = 1, head = 0 */
 	unsigned name_checked:1;
 	unsigned char sha1[20];
-	char path[FLEX_ARRAY]; /* more */
+	const char *path;
 };
 static const char *prio_names[] = {
 	"head", "lightweight", "annotated",
@@ -85,15 +85,15 @@ static void add_to_known_names(const char *path,
 	struct commit_name *e = commit->util;
 	struct tag *tag = NULL;
 	if (replace_name(e, prio, sha1, &tag)) {
-		size_t len = strlen(path)+1;
-		free(e);
-		e = xmalloc(sizeof(struct commit_name) + len);
+		if (!e) {
+			e = xmalloc(sizeof(struct commit_name));
+			commit->util = e;
+		}
 		e->tag = tag;
 		e->prio = prio;
 		e->name_checked = 0;
 		hashcpy(e->sha1, sha1);
-		memcpy(e->path, path, len);
-		commit->util = e;
+		e->path = path;
 	}
 	found_names = 1;
 }
-- 
1.7.3.3

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v2 3/4] describe: Store commit_names in a hash table by commit SHA1
  2010-12-08  4:42             ` [PATCH v2 1/4] describe: Use for_each_rawref Anders Kaseorg
  2010-12-08  4:43               ` [PATCH v2 2/4] describe: Don’t use a flex array in struct commit_name Anders Kaseorg
@ 2010-12-08  4:43               ` Anders Kaseorg
  2010-12-08 18:23                 ` [PATCH v2.1 " Anders Kaseorg
  2010-12-08  4:46               ` [PATCH v2 4/4] describe: Delay looking up commits until searching for an inexact match Anders Kaseorg
  2 siblings, 1 reply; 28+ messages in thread
From: Anders Kaseorg @ 2010-12-08  4:43 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Jonathan Nieder, git, SZEDER Gábor, Kirill Smelkov, Thomas Rast

describe is currently forced to look up the commit at each tag in
order to store the struct commit_name pointers in struct commit.util.
For --exact-match queries, those lookups are wasteful.  In preparation
for removing them, put the commit_names into a hash table, indexed by
commit SHA1, that can be used to quickly check for exact matches.

Signed-off-by: Anders Kaseorg <andersk@ksplice.com>
---
 builtin/describe.c |   36 +++++++++++++++++++++++++++++++-----
 1 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/builtin/describe.c b/builtin/describe.c
index 5b8461d..afc50c0 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -6,6 +6,7 @@
 #include "exec_cmd.h"
 #include "parse-options.h"
 #include "diff.h"
+#include "hash.h"
 
 #define SEEN		(1u<<0)
 #define MAX_TAGS	(FLAG_BITS - 1)
@@ -22,7 +23,7 @@ static int tags;	/* Allow lightweight tags */
 static int longformat;
 static int abbrev = DEFAULT_ABBREV;
 static int max_candidates = 10;
-static int found_names;
+struct hash_table names;
 static const char *pattern;
 static int always;
 static const char *dirty;
@@ -34,6 +35,8 @@ static const char *diff_index_args[] = {
 
 
 struct commit_name {
+	struct commit_name *next;
+	unsigned char peeled[20];
 	struct tag *tag;
 	unsigned prio:2; /* annotated tag = 2, tag = 1, head = 0 */
 	unsigned name_checked:1;
@@ -44,6 +47,19 @@ static const char *prio_names[] = {
 	"head", "lightweight", "annotated",
 };
 
+static inline unsigned int hash_sha1(const unsigned char *sha1)
+{
+	return *(unsigned int *)sha1;
+}
+
+static inline struct commit_name *find_commit_name(const unsigned char *peeled)
+{
+	struct commit_name *n = lookup_hash(hash_sha1(peeled), &names);
+	while (n && !!hashcmp(peeled, n->peeled))
+		n = n->next;
+	return n;
+}
+
 static int replace_name(struct commit_name *e,
 			       int prio,
 			       const unsigned char *sha1,
@@ -82,12 +98,22 @@ static void add_to_known_names(const char *path,
 			       int prio,
 			       const unsigned char *sha1)
 {
-	struct commit_name *e = commit->util;
+	const unsigned char *peeled = commit->object.sha1;
+	struct commit_name *e = find_commit_name(peeled);
 	struct tag *tag = NULL;
 	if (replace_name(e, prio, sha1, &tag)) {
 		if (!e) {
+			void **pos;
 			e = xmalloc(sizeof(struct commit_name));
 			commit->util = e;
+			hashcpy(e->peeled, peeled);
+			pos = insert_hash(hash_sha1(peeled), e, &names);
+			if (pos) {
+				e->next = *pos;
+				*pos = e;
+			} else {
+				e->next = NULL;
+			}
 		}
 		e->tag = tag;
 		e->prio = prio;
@@ -95,7 +121,6 @@ static void add_to_known_names(const char *path,
 		hashcpy(e->sha1, sha1);
 		e->path = path;
 	}
-	found_names = 1;
 }
 
 static int get_name(const char *path, const unsigned char *sha1, int flag, void *cb_data)
@@ -240,7 +265,7 @@ static void describe(const char *arg, int last_one)
 	if (!cmit)
 		die("%s is not a valid '%s' object", arg, commit_type);
 
-	n = cmit->util;
+	n = find_commit_name(cmit->object.sha1);
 	if (n && (tags || all || n->prio == 2)) {
 		/*
 		 * Exact match to an existing ref.
@@ -418,8 +443,9 @@ int cmd_describe(int argc, const char **argv, const char *prefix)
 		return cmd_name_rev(i + argc, args, prefix);
 	}
 
+	init_hash(&names);
 	for_each_rawref(get_name, NULL);
-	if (!found_names && !always)
+	if (!names.nr && !always)
 		die("No names found, cannot describe anything.");
 
 	if (argc == 0) {
-- 
1.7.3.3

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v2 4/4] describe: Delay looking up commits until searching for an inexact match
  2010-12-08  4:42             ` [PATCH v2 1/4] describe: Use for_each_rawref Anders Kaseorg
  2010-12-08  4:43               ` [PATCH v2 2/4] describe: Don’t use a flex array in struct commit_name Anders Kaseorg
  2010-12-08  4:43               ` [PATCH v2 3/4] describe: Store commit_names in a hash table by commit SHA1 Anders Kaseorg
@ 2010-12-08  4:46               ` Anders Kaseorg
  2010-12-08 22:50                 ` Junio C Hamano
  2 siblings, 1 reply; 28+ messages in thread
From: Anders Kaseorg @ 2010-12-08  4:46 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Jonathan Nieder, git, SZEDER Gábor, Kirill Smelkov, Thomas Rast

Now that struct commit.util is not used until after we’ve checked that
the argument doesn’t exactly match a tag, we can wait until then to
look up the commits for each tag.

This avoids a lot of I/O on --exact-match queries in repositories with
many tags.  For example, ‘git describe --exact-match HEAD’ becomes
about 12 times faster on a cold cache (3.2s instead of 39s) in a
linux-2.6 repository with 2000 packed tags.  That’s a huge win for the
interactivity of the __git_ps1 shell prompt helper when on a detached
HEAD.

Signed-off-by: Anders Kaseorg <andersk@ksplice.com>
---

(The benchmark numbers are different because my linux-2.6 repository has 
changed; I don’t think this version is actually slower.)

 builtin/describe.c |   35 ++++++++++++++++++++---------------
 1 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/builtin/describe.c b/builtin/describe.c
index afc50c0..2633371 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -24,6 +24,7 @@ static int longformat;
 static int abbrev = DEFAULT_ABBREV;
 static int max_candidates = 10;
 struct hash_table names;
+static int have_util;
 static const char *pattern;
 static int always;
 static const char *dirty;
@@ -60,6 +61,15 @@ static inline struct commit_name *find_commit_name(const unsigned char *peeled)
 	return n;
 }
 
+static int set_util(void *vn)
+{
+	struct commit_name *n = vn;
+	struct commit *c = lookup_commit_reference_gently(n->peeled, 1);
+	if (c)
+		c->util = n;
+	return 0;
+}
+
 static int replace_name(struct commit_name *e,
 			       int prio,
 			       const unsigned char *sha1,
@@ -94,18 +104,16 @@ static int replace_name(struct commit_name *e,
 }
 
 static void add_to_known_names(const char *path,
-			       struct commit *commit,
+			       const unsigned char *peeled,
 			       int prio,
 			       const unsigned char *sha1)
 {
-	const unsigned char *peeled = commit->object.sha1;
 	struct commit_name *e = find_commit_name(peeled);
 	struct tag *tag = NULL;
 	if (replace_name(e, prio, sha1, &tag)) {
 		if (!e) {
 			void **pos;
 			e = xmalloc(sizeof(struct commit_name));
-			commit->util = e;
 			hashcpy(e->peeled, peeled);
 			pos = insert_hash(hash_sha1(peeled), e, &names);
 			if (pos) {
@@ -126,8 +134,6 @@ static void add_to_known_names(const char *path,
 static int get_name(const char *path, const unsigned char *sha1, int flag, void *cb_data)
 {
 	int might_be_tag = !prefixcmp(path, "refs/tags/");
-	struct commit *commit;
-	struct object *object;
 	unsigned char peeled[20];
 	int is_tag, prio;
 
@@ -135,16 +141,10 @@ static int get_name(const char *path, const unsigned char *sha1, int flag, void
 		return 0;
 
 	if (!peel_ref(path, peeled) && !is_null_sha1(peeled)) {
-		commit = lookup_commit_reference_gently(peeled, 1);
-		if (!commit)
-			return 0;
-		is_tag = !!hashcmp(sha1, commit->object.sha1);
+		is_tag = !!hashcmp(sha1, peeled);
 	} else {
-		commit = lookup_commit_reference_gently(sha1, 1);
-		object = parse_object(sha1);
-		if (!commit || !object)
-			return 0;
-		is_tag = object->type == OBJ_TAG;
+		hashcpy(peeled, sha1);
+		is_tag = 0;
 	}
 
 	/* If --all, then any refs are used.
@@ -167,7 +167,7 @@ static int get_name(const char *path, const unsigned char *sha1, int flag, void
 		if (!prio)
 			return 0;
 	}
-	add_to_known_names(all ? path + 5 : path + 10, commit, prio, sha1);
+	add_to_known_names(all ? path + 5 : path + 10, peeled, prio, sha1);
 	return 0;
 }
 
@@ -284,6 +284,11 @@ static void describe(const char *arg, int last_one)
 	if (debug)
 		fprintf(stderr, "searching to describe %s\n", arg);
 
+	if (!have_util) {
+		for_each_hash(&names, set_util);
+		have_util = 1;
+	}
+
 	list = NULL;
 	cmit->object.flags = SEEN;
 	commit_list_insert(cmit, &list);
-- 
1.7.3.3

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v2.1 3/4] describe: Store commit_names in a hash table by commit SHA1
  2010-12-08  4:43               ` [PATCH v2 3/4] describe: Store commit_names in a hash table by commit SHA1 Anders Kaseorg
@ 2010-12-08 18:23                 ` Anders Kaseorg
  2010-12-08 22:50                   ` Junio C Hamano
  0 siblings, 1 reply; 28+ messages in thread
From: Anders Kaseorg @ 2010-12-08 18:23 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Jonathan Nieder, git, SZEDER Gábor, Kirill Smelkov, Thomas Rast

describe is currently forced to look up the commit at each tag in
order to store the struct commit_name pointers in struct commit.util.
For --exact-match queries, those lookups are wasteful.  In preparation
for removing them, put the commit_names into a hash table, indexed by
commit SHA1, that can be used to quickly check for exact matches.

Signed-off-by: Anders Kaseorg <andersk@ksplice.com>
---
Change from v2: I had forgotten to make ‘names’ static again (thanks 
Jonathan).

 builtin/describe.c |   36 +++++++++++++++++++++++++++++++-----
 1 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/builtin/describe.c b/builtin/describe.c
index 5b8461d..5d709b6 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -6,6 +6,7 @@
 #include "exec_cmd.h"
 #include "parse-options.h"
 #include "diff.h"
+#include "hash.h"
 
 #define SEEN		(1u<<0)
 #define MAX_TAGS	(FLAG_BITS - 1)
@@ -22,7 +23,7 @@ static int tags;	/* Allow lightweight tags */
 static int longformat;
 static int abbrev = DEFAULT_ABBREV;
 static int max_candidates = 10;
-static int found_names;
+static struct hash_table names;
 static const char *pattern;
 static int always;
 static const char *dirty;
@@ -34,6 +35,8 @@ static const char *diff_index_args[] = {
 
 
 struct commit_name {
+	struct commit_name *next;
+	unsigned char peeled[20];
 	struct tag *tag;
 	unsigned prio:2; /* annotated tag = 2, tag = 1, head = 0 */
 	unsigned name_checked:1;
@@ -44,6 +47,19 @@ static const char *prio_names[] = {
 	"head", "lightweight", "annotated",
 };
 
+static inline unsigned int hash_sha1(const unsigned char *sha1)
+{
+	return *(unsigned int *)sha1;
+}
+
+static inline struct commit_name *find_commit_name(const unsigned char *peeled)
+{
+	struct commit_name *n = lookup_hash(hash_sha1(peeled), &names);
+	while (n && !!hashcmp(peeled, n->peeled))
+		n = n->next;
+	return n;
+}
+
 static int replace_name(struct commit_name *e,
 			       int prio,
 			       const unsigned char *sha1,
@@ -82,12 +98,22 @@ static void add_to_known_names(const char *path,
 			       int prio,
 			       const unsigned char *sha1)
 {
-	struct commit_name *e = commit->util;
+	const unsigned char *peeled = commit->object.sha1;
+	struct commit_name *e = find_commit_name(peeled);
 	struct tag *tag = NULL;
 	if (replace_name(e, prio, sha1, &tag)) {
 		if (!e) {
+			void **pos;
 			e = xmalloc(sizeof(struct commit_name));
 			commit->util = e;
+			hashcpy(e->peeled, peeled);
+			pos = insert_hash(hash_sha1(peeled), e, &names);
+			if (pos) {
+				e->next = *pos;
+				*pos = e;
+			} else {
+				e->next = NULL;
+			}
 		}
 		e->tag = tag;
 		e->prio = prio;
@@ -95,7 +121,6 @@ static void add_to_known_names(const char *path,
 		hashcpy(e->sha1, sha1);
 		e->path = path;
 	}
-	found_names = 1;
 }
 
 static int get_name(const char *path, const unsigned char *sha1, int flag, void *cb_data)
@@ -240,7 +265,7 @@ static void describe(const char *arg, int last_one)
 	if (!cmit)
 		die("%s is not a valid '%s' object", arg, commit_type);
 
-	n = cmit->util;
+	n = find_commit_name(cmit->object.sha1);
 	if (n && (tags || all || n->prio == 2)) {
 		/*
 		 * Exact match to an existing ref.
@@ -418,8 +443,9 @@ int cmd_describe(int argc, const char **argv, const char *prefix)
 		return cmd_name_rev(i + argc, args, prefix);
 	}
 
+	init_hash(&names);
 	for_each_rawref(get_name, NULL);
-	if (!found_names && !always)
+	if (!names.nr && !always)
 		die("No names found, cannot describe anything.");
 
 	if (argc == 0) {
-- 
1.7.3.3

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v2 4/4] describe: Delay looking up commits until searching for an inexact match
  2010-12-08  4:46               ` [PATCH v2 4/4] describe: Delay looking up commits until searching for an inexact match Anders Kaseorg
@ 2010-12-08 22:50                 ` Junio C Hamano
  2010-12-08 23:47                   ` Anders Kaseorg
  0 siblings, 1 reply; 28+ messages in thread
From: Junio C Hamano @ 2010-12-08 22:50 UTC (permalink / raw)
  To: Anders Kaseorg
  Cc: Jonathan Nieder, git, SZEDER Gábor, Kirill Smelkov, Thomas Rast

Anders Kaseorg <andersk@ksplice.com> writes:

> Now that struct commit.util is not used until after we’ve checked that
> the argument doesn’t exactly match a tag, we can wait until then to
> look up the commits for each tag.
>
> This avoids a lot of I/O on --exact-match queries in repositories with
> many tags.  For example, ‘git describe --exact-match HEAD’ becomes
> about 12 times faster on a cold cache (3.2s instead of 39s) in a
> linux-2.6 repository with 2000 packed tags.  That’s a huge win for the
> interactivity of the __git_ps1 shell prompt helper when on a detached
> HEAD.

You seem to have gone in a slightly different direction with this reroll.
I am not sure if use of hash_table in this code would actually improve
anything (aside from the general readability and "reusing code from a good
infrastructure is a good thing" principle), though, no matter how many
tags you have in your repository.  In the code from the earlier round,
lookup_commit_reference_gently call in the fallback codepath to populate
commit->util used the *obj_hash[] to quickly look up the commits with the
same object name already.

> @@ -60,6 +61,15 @@ static inline struct commit_name *find_commit_name(const unsigned char *peeled)
>  	return n;
>  }
>  
> +static int set_util(void *vn)
> +{
> +	struct commit_name *n = vn;
> +	struct commit *c = lookup_commit_reference_gently(n->peeled, 1);
> +	if (c)
> +		c->util = n;
> +	return 0;
> +}

I don't know how the above would work in the face of hash collisions.

This is a callback for for_each_hash(), which visits each populated entry
in the underlying hash table.  The semantics of the hash table hash.c
gives you is "here is a flat hash table.  We insert only to an empty
field, and otherwise we will tell you where your new element would have
hashed into, so that you can implement linear overflow or chained buckets
yourself on top of this", and you chose to implement bucket chaining in
add_to_known_names(), so don't you need to walk the chain here for commits
that share the same first four bytes in their object names?

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2.1 3/4] describe: Store commit_names in a hash table by commit SHA1
  2010-12-08 18:23                 ` [PATCH v2.1 " Anders Kaseorg
@ 2010-12-08 22:50                   ` Junio C Hamano
  0 siblings, 0 replies; 28+ messages in thread
From: Junio C Hamano @ 2010-12-08 22:50 UTC (permalink / raw)
  To: Anders Kaseorg
  Cc: Jonathan Nieder, git, SZEDER Gábor, Kirill Smelkov, Thomas Rast

Anders Kaseorg <andersk@ksplice.com> writes:

> @@ -44,6 +47,19 @@ static const char *prio_names[] = {
>  	"head", "lightweight", "annotated",
>  };
>  
> +static inline unsigned int hash_sha1(const unsigned char *sha1)
> +{
> +	return *(unsigned int *)sha1;
> +}

Do we know that all the archs we will be compiled on will be happy with
this potentially unaligned access?  hash_filespec() in diffcore-rename.c
is written in a way to avoid such an issue, and I would feel safer to see
this do the same.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2 4/4] describe: Delay looking up commits until searching for an inexact match
  2010-12-08 22:50                 ` Junio C Hamano
@ 2010-12-08 23:47                   ` Anders Kaseorg
  2010-12-09  6:42                     ` [PATCH v3 1/4] describe: Use for_each_rawref Anders Kaseorg
  0 siblings, 1 reply; 28+ messages in thread
From: Anders Kaseorg @ 2010-12-08 23:47 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Jonathan Nieder, git, SZEDER Gábor, Kirill Smelkov, Thomas Rast

On Wed, 8 Dec 2010, Junio C Hamano wrote:
> You seem to have gone in a slightly different direction with this reroll.
> I am not sure if use of hash_table in this code would actually improve
> anything (aside from the general readability and "reusing code from a good
> infrastructure is a good thing" principle), though, no matter how many
> tags you have in your repository.  In the code from the earlier round,
> lookup_commit_reference_gently call in the fallback codepath to populate
> commit->util used the *obj_hash[] to quickly look up the commits with the
> same object name already.

Yes; the problem now is that, in order to avoid calling replace_name() 
excessively, I need to resolve all multiply-tagged commits in one pass, 
before I have those commits in the obj_hash.  This could have been done 
with the linked list by deleting any commit_name from the linked list the 
first time replace_name() decides it should be superseded by a different 
commit_name, but the hash table approach seems less fragile to me.  The 
hash table also has the advantage of avoiding the O(#tags * #arguments) 
complexity when describe is given many arguments.

Let me know if you’d like me to try it the other way.

> I don't know how the above would work in the face of hash collisions.

Oh right.  I’ll fix that.

static int set_util(void *chain)
{
	struct commit_name *n;
	for (n = chain; n; n = n->next) {
		struct commit *c = lookup_commit_reference_gently(n->peeled, 1);
		if (c)
			c->util = n;
	}
	return 0;
}

> Do we know that all the archs we will be compiled on will be happy with
> this potentially unaligned access?  hash_filespec() in diffcore-rename.c
> is written in a way to avoid such an issue, and I would feel safer to 
> see this do the same.

I’ll fix that too.

static inline unsigned int hash_sha1(const unsigned char *sha1)
{
	unsigned int hash;
	memcpy(&hash, sha1, sizeof(hash));
	return hash;
}

Anders

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH v3 1/4] describe: Use for_each_rawref
  2010-12-08 23:47                   ` Anders Kaseorg
@ 2010-12-09  6:42                     ` Anders Kaseorg
  2010-12-09  6:43                       ` [PATCH v3 2/4] describe: Don’t use a flex array in struct commit_name Anders Kaseorg
                                         ` (2 more replies)
  0 siblings, 3 replies; 28+ messages in thread
From: Anders Kaseorg @ 2010-12-09  6:42 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Jonathan Nieder, git, SZEDER Gábor, Kirill Smelkov, Thomas Rast

Don’t waste time checking for dangling refs; they wouldn’t affect the
output of ‘git describe’ anyway.  Although this doesn’t gain much
performance by itself, it does in conjunction with the next commits.

Signed-off-by: Anders Kaseorg <andersk@ksplice.com>
---
 builtin/describe.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/builtin/describe.c b/builtin/describe.c
index 43caff2..700f740 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -418,7 +418,7 @@ int cmd_describe(int argc, const char **argv, const char *prefix)
 		return cmd_name_rev(i + argc, args, prefix);
 	}
 
-	for_each_ref(get_name, NULL);
+	for_each_rawref(get_name, NULL);
 	if (!found_names && !always)
 		die("No names found, cannot describe anything.");
 
-- 
1.7.3.3

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v3 2/4] describe: Don’t use a flex array in struct commit_name
  2010-12-09  6:42                     ` [PATCH v3 1/4] describe: Use for_each_rawref Anders Kaseorg
@ 2010-12-09  6:43                       ` Anders Kaseorg
  2010-12-09  6:46                       ` [PATCH v3 3/4] describe: Store commit_names in a hash table by commit SHA1 Anders Kaseorg
  2010-12-09  6:47                       ` [PATCH v3 4/4] describe: Delay looking up commits until searching for an inexact match Anders Kaseorg
  2 siblings, 0 replies; 28+ messages in thread
From: Anders Kaseorg @ 2010-12-09  6:43 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Jonathan Nieder, git, SZEDER Gábor, Kirill Smelkov, Thomas Rast

Now add_to_known_names overwrites commit_names in place when multiple
tags point to the same commit.  This will make it easier to store
commit_names in a hash table.

Signed-off-by: Anders Kaseorg <andersk@ksplice.com>
---
 builtin/describe.c |   12 ++++++------
 1 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/builtin/describe.c b/builtin/describe.c
index 700f740..5b8461d 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -38,7 +38,7 @@ struct commit_name {
 	unsigned prio:2; /* annotated tag = 2, tag = 1, head = 0 */
 	unsigned name_checked:1;
 	unsigned char sha1[20];
-	char path[FLEX_ARRAY]; /* more */
+	const char *path;
 };
 static const char *prio_names[] = {
 	"head", "lightweight", "annotated",
@@ -85,15 +85,15 @@ static void add_to_known_names(const char *path,
 	struct commit_name *e = commit->util;
 	struct tag *tag = NULL;
 	if (replace_name(e, prio, sha1, &tag)) {
-		size_t len = strlen(path)+1;
-		free(e);
-		e = xmalloc(sizeof(struct commit_name) + len);
+		if (!e) {
+			e = xmalloc(sizeof(struct commit_name));
+			commit->util = e;
+		}
 		e->tag = tag;
 		e->prio = prio;
 		e->name_checked = 0;
 		hashcpy(e->sha1, sha1);
-		memcpy(e->path, path, len);
-		commit->util = e;
+		e->path = path;
 	}
 	found_names = 1;
 }
-- 
1.7.3.3

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v3 3/4] describe: Store commit_names in a hash table by commit SHA1
  2010-12-09  6:42                     ` [PATCH v3 1/4] describe: Use for_each_rawref Anders Kaseorg
  2010-12-09  6:43                       ` [PATCH v3 2/4] describe: Don’t use a flex array in struct commit_name Anders Kaseorg
@ 2010-12-09  6:46                       ` Anders Kaseorg
  2010-12-09  6:47                       ` [PATCH v3 4/4] describe: Delay looking up commits until searching for an inexact match Anders Kaseorg
  2 siblings, 0 replies; 28+ messages in thread
From: Anders Kaseorg @ 2010-12-09  6:46 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Jonathan Nieder, git, SZEDER Gábor, Kirill Smelkov, Thomas Rast

describe is currently forced to look up the commit at each tag in
order to store the struct commit_name pointers in struct commit.util.
For --exact-match queries, those lookups are wasteful.  In preparation
for removing them, put the commit_names into a hash table, indexed by
commit SHA1, that can be used to quickly check for exact matches.

Signed-off-by: Anders Kaseorg <andersk@ksplice.com>
---

Change from v2.1: Use memcpy to avoid unaligned accesses in hash_sha1().

 builtin/describe.c |   38 +++++++++++++++++++++++++++++++++-----
 1 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/builtin/describe.c b/builtin/describe.c
index 5b8461d..8149233 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -6,6 +6,7 @@
 #include "exec_cmd.h"
 #include "parse-options.h"
 #include "diff.h"
+#include "hash.h"
 
 #define SEEN		(1u<<0)
 #define MAX_TAGS	(FLAG_BITS - 1)
@@ -22,7 +23,7 @@ static int tags;	/* Allow lightweight tags */
 static int longformat;
 static int abbrev = DEFAULT_ABBREV;
 static int max_candidates = 10;
-static int found_names;
+static struct hash_table names;
 static const char *pattern;
 static int always;
 static const char *dirty;
@@ -34,6 +35,8 @@ static const char *diff_index_args[] = {
 
 
 struct commit_name {
+	struct commit_name *next;
+	unsigned char peeled[20];
 	struct tag *tag;
 	unsigned prio:2; /* annotated tag = 2, tag = 1, head = 0 */
 	unsigned name_checked:1;
@@ -44,6 +47,21 @@ static const char *prio_names[] = {
 	"head", "lightweight", "annotated",
 };
 
+static inline unsigned int hash_sha1(const unsigned char *sha1)
+{
+	unsigned int hash;
+	memcpy(&hash, sha1, sizeof(hash));
+	return hash;
+}
+
+static inline struct commit_name *find_commit_name(const unsigned char *peeled)
+{
+	struct commit_name *n = lookup_hash(hash_sha1(peeled), &names);
+	while (n && !!hashcmp(peeled, n->peeled))
+		n = n->next;
+	return n;
+}
+
 static int replace_name(struct commit_name *e,
 			       int prio,
 			       const unsigned char *sha1,
@@ -82,12 +100,22 @@ static void add_to_known_names(const char *path,
 			       int prio,
 			       const unsigned char *sha1)
 {
-	struct commit_name *e = commit->util;
+	const unsigned char *peeled = commit->object.sha1;
+	struct commit_name *e = find_commit_name(peeled);
 	struct tag *tag = NULL;
 	if (replace_name(e, prio, sha1, &tag)) {
 		if (!e) {
+			void **pos;
 			e = xmalloc(sizeof(struct commit_name));
 			commit->util = e;
+			hashcpy(e->peeled, peeled);
+			pos = insert_hash(hash_sha1(peeled), e, &names);
+			if (pos) {
+				e->next = *pos;
+				*pos = e;
+			} else {
+				e->next = NULL;
+			}
 		}
 		e->tag = tag;
 		e->prio = prio;
@@ -95,7 +123,6 @@ static void add_to_known_names(const char *path,
 		hashcpy(e->sha1, sha1);
 		e->path = path;
 	}
-	found_names = 1;
 }
 
 static int get_name(const char *path, const unsigned char *sha1, int flag, void *cb_data)
@@ -240,7 +267,7 @@ static void describe(const char *arg, int last_one)
 	if (!cmit)
 		die("%s is not a valid '%s' object", arg, commit_type);
 
-	n = cmit->util;
+	n = find_commit_name(cmit->object.sha1);
 	if (n && (tags || all || n->prio == 2)) {
 		/*
 		 * Exact match to an existing ref.
@@ -418,8 +445,9 @@ int cmd_describe(int argc, const char **argv, const char *prefix)
 		return cmd_name_rev(i + argc, args, prefix);
 	}
 
+	init_hash(&names);
 	for_each_rawref(get_name, NULL);
-	if (!found_names && !always)
+	if (!names.nr && !always)
 		die("No names found, cannot describe anything.");
 
 	if (argc == 0) {
-- 
1.7.3.3

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v3 4/4] describe: Delay looking up commits until searching for an inexact match
  2010-12-09  6:42                     ` [PATCH v3 1/4] describe: Use for_each_rawref Anders Kaseorg
  2010-12-09  6:43                       ` [PATCH v3 2/4] describe: Don’t use a flex array in struct commit_name Anders Kaseorg
  2010-12-09  6:46                       ` [PATCH v3 3/4] describe: Store commit_names in a hash table by commit SHA1 Anders Kaseorg
@ 2010-12-09  6:47                       ` Anders Kaseorg
  2 siblings, 0 replies; 28+ messages in thread
From: Anders Kaseorg @ 2010-12-09  6:47 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Jonathan Nieder, git, SZEDER Gábor, Kirill Smelkov, Thomas Rast

Now that struct commit.util is not used until after we’ve checked that
the argument doesn’t exactly match a tag, we can wait until then to
look up the commits for each tag.

This avoids a lot of I/O on --exact-match queries in repositories with
many tags.  For example, ‘git describe --exact-match HEAD’ becomes
about 12 times faster on a cold cache (3.2s instead of 39s) in a
linux-2.6 repository with 2000 packed tags.  That’s a huge win for the
interactivity of the __git_ps1 shell prompt helper when on a detached
HEAD.

Signed-off-by: Anders Kaseorg <andersk@ksplice.com>
---

Change from v2: Process the entire hash chain in set_util().

 builtin/describe.c |   37 ++++++++++++++++++++++---------------
 1 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/builtin/describe.c b/builtin/describe.c
index 8149233..a0f52c1 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -24,6 +24,7 @@ static int longformat;
 static int abbrev = DEFAULT_ABBREV;
 static int max_candidates = 10;
 static struct hash_table names;
+static int have_util;
 static const char *pattern;
 static int always;
 static const char *dirty;
@@ -62,6 +63,17 @@ static inline struct commit_name *find_commit_name(const unsigned char *peeled)
 	return n;
 }
 
+static int set_util(void *chain)
+{
+	struct commit_name *n;
+	for (n = chain; n; n = n->next) {
+		struct commit *c = lookup_commit_reference_gently(n->peeled, 1);
+		if (c)
+			c->util = n;
+	}
+	return 0;
+}
+
 static int replace_name(struct commit_name *e,
 			       int prio,
 			       const unsigned char *sha1,
@@ -96,18 +108,16 @@ static int replace_name(struct commit_name *e,
 }
 
 static void add_to_known_names(const char *path,
-			       struct commit *commit,
+			       const unsigned char *peeled,
 			       int prio,
 			       const unsigned char *sha1)
 {
-	const unsigned char *peeled = commit->object.sha1;
 	struct commit_name *e = find_commit_name(peeled);
 	struct tag *tag = NULL;
 	if (replace_name(e, prio, sha1, &tag)) {
 		if (!e) {
 			void **pos;
 			e = xmalloc(sizeof(struct commit_name));
-			commit->util = e;
 			hashcpy(e->peeled, peeled);
 			pos = insert_hash(hash_sha1(peeled), e, &names);
 			if (pos) {
@@ -128,8 +138,6 @@ static void add_to_known_names(const char *path,
 static int get_name(const char *path, const unsigned char *sha1, int flag, void *cb_data)
 {
 	int might_be_tag = !prefixcmp(path, "refs/tags/");
-	struct commit *commit;
-	struct object *object;
 	unsigned char peeled[20];
 	int is_tag, prio;
 
@@ -137,16 +145,10 @@ static int get_name(const char *path, const unsigned char *sha1, int flag, void
 		return 0;
 
 	if (!peel_ref(path, peeled) && !is_null_sha1(peeled)) {
-		commit = lookup_commit_reference_gently(peeled, 1);
-		if (!commit)
-			return 0;
-		is_tag = !!hashcmp(sha1, commit->object.sha1);
+		is_tag = !!hashcmp(sha1, peeled);
 	} else {
-		commit = lookup_commit_reference_gently(sha1, 1);
-		object = parse_object(sha1);
-		if (!commit || !object)
-			return 0;
-		is_tag = object->type == OBJ_TAG;
+		hashcpy(peeled, sha1);
+		is_tag = 0;
 	}
 
 	/* If --all, then any refs are used.
@@ -169,7 +171,7 @@ static int get_name(const char *path, const unsigned char *sha1, int flag, void
 		if (!prio)
 			return 0;
 	}
-	add_to_known_names(all ? path + 5 : path + 10, commit, prio, sha1);
+	add_to_known_names(all ? path + 5 : path + 10, peeled, prio, sha1);
 	return 0;
 }
 
@@ -286,6 +288,11 @@ static void describe(const char *arg, int last_one)
 	if (debug)
 		fprintf(stderr, "searching to describe %s\n", arg);
 
+	if (!have_util) {
+		for_each_hash(&names, set_util);
+		have_util = 1;
+	}
+
 	list = NULL;
 	cmit->object.flags = SEEN;
 	commit_list_insert(cmit, &list);
-- 
1.7.3.3

^ permalink raw reply related	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2010-12-09  6:47 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-11-17 23:33 [PATCH] describe: Don’t look up commits with --exact-match Anders Kaseorg
2010-12-03  8:43 ` Jonathan Nieder
2010-12-06  7:19   ` Anders Kaseorg
2010-12-06  7:22     ` Anders Kaseorg
2010-12-06  7:32     ` Jonathan Nieder
2010-12-06 10:53       ` Thomas Rast
2010-12-06 17:28       ` Anders Kaseorg
2010-12-06 17:47         ` Jonathan Nieder
2010-12-07  9:58         ` SZEDER Gábor
2010-12-07 18:22           ` [PATCH 1/2] describe: Use for_each_rawref Anders Kaseorg
2010-12-07 18:22             ` [PATCH 2/2] describe: Don’t look up commits with --exact-match Anders Kaseorg
2010-12-07 18:26             ` [PATCH 1/2] describe: Use for_each_rawref Anders Kaseorg
2010-12-07 19:49               ` Junio C Hamano
2010-12-07 21:59                 ` Junio C Hamano
2010-12-07 18:39         ` [PATCH] describe: Don’t look up commits with --exact-match Junio C Hamano
2010-12-08  4:41           ` Anders Kaseorg
2010-12-08  4:42             ` [PATCH v2 1/4] describe: Use for_each_rawref Anders Kaseorg
2010-12-08  4:43               ` [PATCH v2 2/4] describe: Don’t use a flex array in struct commit_name Anders Kaseorg
2010-12-08  4:43               ` [PATCH v2 3/4] describe: Store commit_names in a hash table by commit SHA1 Anders Kaseorg
2010-12-08 18:23                 ` [PATCH v2.1 " Anders Kaseorg
2010-12-08 22:50                   ` Junio C Hamano
2010-12-08  4:46               ` [PATCH v2 4/4] describe: Delay looking up commits until searching for an inexact match Anders Kaseorg
2010-12-08 22:50                 ` Junio C Hamano
2010-12-08 23:47                   ` Anders Kaseorg
2010-12-09  6:42                     ` [PATCH v3 1/4] describe: Use for_each_rawref Anders Kaseorg
2010-12-09  6:43                       ` [PATCH v3 2/4] describe: Don’t use a flex array in struct commit_name Anders Kaseorg
2010-12-09  6:46                       ` [PATCH v3 3/4] describe: Store commit_names in a hash table by commit SHA1 Anders Kaseorg
2010-12-09  6:47                       ` [PATCH v3 4/4] describe: Delay looking up commits until searching for an inexact match Anders Kaseorg

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.