All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] rev-list: preallocate object hash table in --all --objects
@ 2013-03-29 13:20 Nguyễn Thái Ngọc Duy
  2013-03-29 15:12 ` Jeff King
  0 siblings, 1 reply; 6+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2013-03-29 13:20 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Every time the object hash table grows, all entries must be
rearranged. The few last times could be really expensive when the
table contains a lot of entries.

When we do "--all --objects" we know in advance we may need to hash
all objects. Just prepare the hash table big enough, so there won't be
any resizing later on. The hash table is resized a couple times before
prehash_objects() is called. But that's ok because there aren't many
objects by that time (unless you have tons of refs, likely tags..)

On linux-2.6.git:

        before       after
real    0m34.402s    0m33.288s
user    0m34.111s    0m32.863s
sys     0m0.205s     0m0.352s

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 object.c   | 21 +++++++++++++++++++--
 object.h   |  2 ++
 revision.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 76 insertions(+), 6 deletions(-)

diff --git a/object.c b/object.c
index 20703f5..bcfd2c6 100644
--- a/object.c
+++ b/object.c
@@ -88,10 +88,10 @@ struct object *lookup_object(const unsigned char *sha1)
 	return obj;
 }
 
-static void grow_object_hash(void)
+void grow_object_hash_to(unsigned long nr)
 {
 	int i;
-	int new_hash_size = obj_hash_size < 32 ? 32 : 2 * obj_hash_size;
+	int new_hash_size = nr < 32 ? 32 : nr * 2;
 	struct object **new_hash;
 
 	new_hash = xcalloc(new_hash_size, sizeof(struct object *));
@@ -106,6 +106,11 @@ static void grow_object_hash(void)
 	obj_hash_size = new_hash_size;
 }
 
+static void grow_object_hash(void)
+{
+	grow_object_hash_to(obj_hash_size);
+}
+
 void *create_object(const unsigned char *sha1, int type, void *o)
 {
 	struct object *obj = o;
@@ -307,3 +312,15 @@ void clear_object_flags(unsigned flags)
 			obj->flags &= ~flags;
 	}
 }
+
+int has_object_flags(unsigned flags)
+{
+	int i;
+
+	for (i = 0; i < obj_hash_size; i++) {
+		struct object *obj = obj_hash[i];
+		if (obj && (obj->flags & flags))
+			return 1;
+	}
+	return 0;
+}
diff --git a/object.h b/object.h
index 97d384b..1e8fee8 100644
--- a/object.h
+++ b/object.h
@@ -52,6 +52,7 @@ extern struct object *get_indexed_object(unsigned int);
  */
 struct object *lookup_object(const unsigned char *sha1);
 
+extern void grow_object_hash_to(unsigned long nr);
 extern void *create_object(const unsigned char *sha1, int type, void *obj);
 
 /*
@@ -87,6 +88,7 @@ void add_object_array(struct object *obj, const char *name, struct object_array
 void add_object_array_with_mode(struct object *obj, const char *name, struct object_array *array, unsigned mode);
 void object_array_remove_duplicates(struct object_array *);
 
+int has_object_flags(unsigned flags);
 void clear_object_flags(unsigned flags);
 
 #endif /* OBJECT_H */
diff --git a/revision.c b/revision.c
index 71e62d8..f9ea2d1 100644
--- a/revision.c
+++ b/revision.c
@@ -1665,8 +1665,9 @@ static int for_each_good_bisect_ref(const char *submodule, each_ref_fn fn, void
 }
 
 static int handle_revision_pseudo_opt(const char *submodule,
-				struct rev_info *revs,
-				int argc, const char **argv, int *flags)
+				      struct rev_info *revs,
+				      int argc, const char **argv,
+				      int *flags, int *all)
 {
 	const char *arg = argv[0];
 	const char *optarg;
@@ -1685,6 +1686,7 @@ static int handle_revision_pseudo_opt(const char *submodule,
 	if (!strcmp(arg, "--all")) {
 		handle_refs(submodule, revs, *flags, for_each_ref_submodule);
 		handle_refs(submodule, revs, *flags, head_ref_submodule);
+		*all = 1;
 	} else if (!strcmp(arg, "--branches")) {
 		handle_refs(submodule, revs, *flags, for_each_branch_ref_submodule);
 	} else if (!strcmp(arg, "--bisect")) {
@@ -1738,6 +1740,49 @@ static int handle_revision_pseudo_opt(const char *submodule,
 	return 1;
 }
 
+static void preallocate_hash_table(void)
+{
+	unsigned long cnt = 0;
+	struct packed_git *p;
+	int i;
+
+	if (has_object_flags(UNINTERESTING))
+		/*
+		 * nope this is not simply "--all --objects"
+		 * not worth preallocation.
+		 */
+		return;
+
+	for (i = 0; i < 256; i++) {
+		struct dirent *ent;
+		DIR *d = opendir(git_path("objects/%02x", i));
+		if (!d)
+			continue;
+		while ((ent = readdir(d)) != NULL)
+			/*
+			 * We only worry about insufficient size which
+			 * leads to expensive growths later on. A few
+			 * extra slots in the hash table would not hurt.
+			 */
+			cnt++;
+		closedir(d);
+	}
+
+	if (!packed_git)
+		prepare_packed_git();
+
+	for (p = packed_git; p; p = p->next) {
+		if (!p->pack_local)
+			/* this may lead to extra growths later */
+			continue;
+		if (open_pack_index(p))
+			continue;
+		cnt += p->num_objects;
+	}
+
+	grow_object_hash_to(cnt);
+}
+
 /*
  * Parse revision information, filling in the "rev_info" structure,
  * and removing the used arguments from the argument list.
@@ -1750,6 +1795,7 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
 	int i, flags, left, seen_dashdash, read_from_stdin, got_rev_arg = 0, revarg_opt;
 	struct cmdline_pathspec prune_data;
 	const char *submodule = NULL;
+	int all = 0;
 
 	memset(&prune_data, 0, sizeof(prune_data));
 	if (opt)
@@ -1785,8 +1831,9 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
 			int opts;
 
 			opts = handle_revision_pseudo_opt(submodule,
-						revs, argc - i, argv + i,
-						&flags);
+							  revs, argc - i,
+							  argv + i,
+							  &flags, &all);
 			if (opts > 0) {
 				i += opts - 1;
 				continue;
@@ -1856,6 +1903,10 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
 			      get_pathspec(revs->prefix, prune_data.path));
 	}
 
+	if (all && revs->tag_objects &&
+	    revs->tree_objects && revs->blob_objects)
+		preallocate_hash_table();
+
 	if (revs->def == NULL)
 		revs->def = opt ? opt->def : NULL;
 	if (opt && opt->tweak)
-- 
1.8.2.83.gc99314b

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] rev-list: preallocate object hash table in --all --objects
  2013-03-29 13:20 [PATCH] rev-list: preallocate object hash table in --all --objects Nguyễn Thái Ngọc Duy
@ 2013-03-29 15:12 ` Jeff King
  2013-03-29 15:29   ` Duy Nguyen
  2013-03-29 16:04   ` Junio C Hamano
  0 siblings, 2 replies; 6+ messages in thread
From: Jeff King @ 2013-03-29 15:12 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git

On Fri, Mar 29, 2013 at 08:20:10PM +0700, Nguyen Thai Ngoc Duy wrote:

> Every time the object hash table grows, all entries must be
> rearranged. The few last times could be really expensive when the
> table contains a lot of entries.
> 
> When we do "--all --objects" we know in advance we may need to hash
> all objects. Just prepare the hash table big enough, so there won't be
> any resizing later on. The hash table is resized a couple times before
> prehash_objects() is called. But that's ok because there aren't many
> objects by that time (unless you have tons of refs, likely tags..)
> 
> On linux-2.6.git:
> 
>         before       after
> real    0m34.402s    0m33.288s
> user    0m34.111s    0m32.863s
> sys     0m0.205s     0m0.352s

This feels weirdly specific, and like we should just be tuning our hash
table growth better. You show a 3.2% speedup here. I was able to get a
2.8% speedup just by doing this:

diff --git a/object.c b/object.c
index 20703f5..8e5e12c 100644
--- a/object.c
+++ b/object.c
@@ -91,7 +91,7 @@ static void grow_object_hash(void)
 static void grow_object_hash(void)
 {
 	int i;
-	int new_hash_size = obj_hash_size < 32 ? 32 : 2 * obj_hash_size;
+	int new_hash_size = obj_hash_size < 32 ? 32 : 3 * obj_hash_size;
 	struct object **new_hash;
 
 	new_hash = xcalloc(new_hash_size, sizeof(struct object *));

It might be worth trying to figure out what the optimium growth rate is
first, which would help this use case and others. With less fragile
code.

-Peff

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] rev-list: preallocate object hash table in --all --objects
  2013-03-29 15:12 ` Jeff King
@ 2013-03-29 15:29   ` Duy Nguyen
  2013-03-29 20:32     ` Jeff King
  2013-03-29 16:04   ` Junio C Hamano
  1 sibling, 1 reply; 6+ messages in thread
From: Duy Nguyen @ 2013-03-29 15:29 UTC (permalink / raw)
  To: Jeff King; +Cc: git

On Fri, Mar 29, 2013 at 10:12 PM, Jeff King <peff@peff.net> wrote:
> This feels weirdly specific, and like we should just be tuning our hash
> table growth better. You show a 3.2% speedup here. I was able to get a
> 2.8% speedup just by doing this:

It also uses a lot more memory. 5.8m entries for ".. * 2" and 8.8m for
"... * 3". Probably no big deal for modern machines..

> It might be worth trying to figure out what the optimium growth rate is
> first, which would help this use case and others. With less fragile
> code.

Agreed. Although I think it's getting out of my domain. I'm not even
sure how many factors are involved.
-- 
Duy

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] rev-list: preallocate object hash table in --all --objects
  2013-03-29 15:12 ` Jeff King
  2013-03-29 15:29   ` Duy Nguyen
@ 2013-03-29 16:04   ` Junio C Hamano
  1 sibling, 0 replies; 6+ messages in thread
From: Junio C Hamano @ 2013-03-29 16:04 UTC (permalink / raw)
  To: Jeff King; +Cc: Nguyễn Thái Ngọc Duy, git

Jeff King <peff@peff.net> writes:

> This feels weirdly specific, and like we should just be tuning our hash
> table growth better. You show a 3.2% speedup here. I was able to get a
> 2.8% speedup just by doing this:
>
> diff --git a/object.c b/object.c
> index 20703f5..8e5e12c 100644
> --- a/object.c
> +++ b/object.c
> @@ -91,7 +91,7 @@ static void grow_object_hash(void)
>  static void grow_object_hash(void)
>  {
>  	int i;
> -	int new_hash_size = obj_hash_size < 32 ? 32 : 2 * obj_hash_size;
> +	int new_hash_size = obj_hash_size < 32 ? 32 : 3 * obj_hash_size;
>  	struct object **new_hash;
>  
>  	new_hash = xcalloc(new_hash_size, sizeof(struct object *));
>
> It might be worth trying to figure out what the optimium growth rate is
> first, which would help this use case and others. With less fragile
> code.

I agree with the general principle to avoid heuristics that is too
specific to the use case.  Thanks.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] rev-list: preallocate object hash table in --all --objects
  2013-03-29 15:29   ` Duy Nguyen
@ 2013-03-29 20:32     ` Jeff King
  2013-04-01 18:33       ` Jeff King
  0 siblings, 1 reply; 6+ messages in thread
From: Jeff King @ 2013-03-29 20:32 UTC (permalink / raw)
  To: Duy Nguyen; +Cc: git

On Fri, Mar 29, 2013 at 10:29:52PM +0700, Nguyen Thai Ngoc Duy wrote:

> On Fri, Mar 29, 2013 at 10:12 PM, Jeff King <peff@peff.net> wrote:
> > This feels weirdly specific, and like we should just be tuning our hash
> > table growth better. You show a 3.2% speedup here. I was able to get a
> > 2.8% speedup just by doing this:
> 
> It also uses a lot more memory. 5.8m entries for ".. * 2" and 8.8m for
> "... * 3". Probably no big deal for modern machines..

Yeah, it will use more, but it's not going to waste more than half again
more than we already were.

> > It might be worth trying to figure out what the optimium growth rate is
> > first, which would help this use case and others. With less fragile
> > code.
> 
> Agreed. Although I think it's getting out of my domain. I'm not even
> sure how many factors are involved.

There's the load factor that causes us to grow, and the growth factor of
how aggressively we expand when we do need to grow. I fooled around with
a few numbers and the patch below seemed to give good results. Probably
varying both numbers over a range and graphing the result would make
sense, but I don't have time to do it at the moment (each run takes a
while, if I do best-of-five).

[before]
real    0m46.255s
user    0m45.812s
sys     0m0.276s

[after]
real    0m43.729s
user    0m43.204s
sys     0m0.356s

diff --git a/object.c b/object.c
index 20703f5..c3be886 100644
--- a/object.c
+++ b/object.c
@@ -91,7 +91,7 @@ static void grow_object_hash(void)
 static void grow_object_hash(void)
 {
 	int i;
-	int new_hash_size = obj_hash_size < 32 ? 32 : 2 * obj_hash_size;
+	int new_hash_size = obj_hash_size < 32 ? 32 : obj_hash_size * 5/2;
 	struct object **new_hash;
 
 	new_hash = xcalloc(new_hash_size, sizeof(struct object *));
@@ -116,7 +116,7 @@ void *create_object(const unsigned char *sha1, int type, void *o)
 	obj->flags = 0;
 	hashcpy(obj->sha1, sha1);
 
-	if (obj_hash_size - 1 <= nr_objs * 2)
+	if (nr_objs + 1 > obj_hash_size * 1/3)
 		grow_object_hash();
 
 	insert_obj_hash(obj, obj_hash, obj_hash_size);

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] rev-list: preallocate object hash table in --all --objects
  2013-03-29 20:32     ` Jeff King
@ 2013-04-01 18:33       ` Jeff King
  0 siblings, 0 replies; 6+ messages in thread
From: Jeff King @ 2013-04-01 18:33 UTC (permalink / raw)
  To: Duy Nguyen; +Cc: Junio C Hamano, git

[-- Attachment #1: Type: text/plain, Size: 7259 bytes --]

On Fri, Mar 29, 2013 at 04:32:00PM -0400, Jeff King wrote:

> > Agreed. Although I think it's getting out of my domain. I'm not even
> > sure how many factors are involved.
> 
> There's the load factor that causes us to grow, and the growth factor of
> how aggressively we expand when we do need to grow. I fooled around with
> a few numbers and the patch below seemed to give good results. Probably
> varying both numbers over a range and graphing the result would make
> sense, but I don't have time to do it at the moment (each run takes a
> while, if I do best-of-five).

So I did that. I'm still not quite sure what it means, but here's the
data (all times are best-of-five wall-clock times to complete `git
rev-list --objects --all` on linux-2.6, in seconds).

 Load  | Growth |
Factor | Factor | Time
-------+--------+--------
 0.17  |  1.50  |  44.104
 0.17  |  2.00  |  43.373
 0.17  |  2.50  |  45.662
 0.17  |  3.00  |  44.909
 0.17  |  3.50  |  42.733
 0.17  |  4.00  |  42.659
 0.33  |  1.50  |  44.806
 0.33  |  2.00  |  44.876
 0.33  |  2.50  |  47.991
 0.33  |  3.00  |  44.940
 0.33  |  3.50  |  43.615
 0.33  |  4.00  |  43.707
 0.50  |  1.50  |  46.459
 0.50  |  2.00  |  45.872
 0.50  |  2.50  |  47.844
 0.50  |  3.00  |  47.466
 0.50  |  3.50  |  44.063
 0.50  |  4.00  |  43.807
 0.67  |  1.50  |  50.178
 0.67  |  2.00  |  48.692
 0.67  |  2.50  |  50.458
 0.67  |  3.00  |  47.307
 0.67  |  3.50  |  45.114
 0.67  |  4.00  |  45.114
 0.83  |  1.50  |  54.337
 0.83  |  2.00  |  51.286
 0.83  |  2.50  |  50.110
 0.83  |  3.00  |  47.736
 0.83  |  3.50  |  47.617
 0.83  |  4.00  |  47.282

I'm attaching a graph of the results, too, which I think makes it easier
to look at (and you probably want to look at it now, or the rest of this
won't make any sense). The interesting things I see are:

  1. The benefits of increasing the growth factor flatten out around
     3x-4x.

  2. Obviously having a smaller load factor increases efficiency.

  3. Increasing the growth factor compensates for a worse load factor
     (e.g., a growth rate of 3 performs about the same with a load
     factor of 1/2 to 5/6).

It makes sense that one could compensate for the other. Our pattern of
growth for the hash is to add a lot at first, and then more and more
frequently hit objects that we have already seen (because the number we
have seen is going up). So we do many more queries on the hash when it
is at size X than when it is at X/2.

Or another way of thinking about it is: it doesn't matter that much how
we get there, but when we reach our final size (where most of our
lookups are going to happen), how crowded is the hash table (i.e., how
many times are we going to see collisions and have to do extra
hashcmps?).

With a load factor of 0.17, we know it never goes over that. But with a
configured max load factor of 0.5, right after we double, we know the
load factor is now only 0.25; it will rise again from there, but not
necessarily even back up to 0.5 (if we never allocate again).

And I think that explains the weird spikes. Why, when we have a load
factor of 0.33, do we perform worse with a growth factor of 2.5 than
with 2? The hash should be more sparse. And I think the answer is: for
the number of objects we have, it so happens that the growth factor of 2
causes us to end up with a more sparsely populated table, and we see
a lot of queries on the table in that state. Whereas with 2.5, we do
fewer growth iterations, but end in a state that is slightly less
optimal.

Given this conjecture, I added an atexit() to determine the final state
of the hash. Here are the final states for a max load factor of 0.33,
and a few growth rates:

grow | objects | objects  | final
rate | used    | alloc    | load
-----+---------+----------+------
2    | 3005531 | 16777216 | 0.179
2.5  | 3005531 | 11920105 | 0.252
3    | 3005531 | 17006112 | 0.177

I think that supports the conjecture; the final load factor is much
worse with 2.5 than with 2 or 3. Not for any good reason; it just
happens to match the growth pattern we see given the number of objects
we have. Of course the tradeoff is that we waste more memory (37M with
8-byte pointers).

So what should we do? I don't think increasing the growth rate makes
sense. Whether we end up helping or hurting is somewhat random, as it is
really all about where we end up in terms of the final load factor,
where most of our queries happen.

We would do much better to tweak the max load factor, which ensures that
the final load factor (and the intermediate ones) is below a certain
value. Of course that comes at the cost of wasted memory. Moving from
the current load factor of 0.5 to 0.33 saves us about 1 second
processing time. But it means our memory usage jumps (in this case, it
doubles from 64M to 128M). So there are small savings to be had, but
bigger memory losses; I guess the question is how much we would care
about those memory losses (on a modern machine, using an extra 64M for
the kernel repo is not that big a deal).

And of course the other thing to do is to use a slotting mechanism that
reduces conflicts. Junio experimented with cuckoo hashing, and after
reading his attempt, I tried quadratic stepping. As I recall, neither
experiment yielded anything impressive (though I may simply have looked
at 1s speedup and considered it "not impressive"; I don't remember).

So I dunno. We are close to as good as it will get, I think. We might
steal a few percent by making a memory tradeoff, or doing something
clever with the hash stepping (cuckoo, quadratic, etc). But those are
big-ish jumps in complexity or memory use for not much gain.

My test harness patch is below in case anybody wants to play with.

-Peff

---
diff --git a/object.c b/object.c
index 20703f5..dd04009 100644
--- a/object.c
+++ b/object.c
@@ -88,12 +88,26 @@ static void grow_object_hash(void)
 	return obj;
 }
 
+static void print_hash_size(void)
+{
+	fprintf(stderr, "final hash size is %d/%d = %f\n",
+		nr_objs, obj_hash_size, ((double)nr_objs)/obj_hash_size);
+}
+
 static void grow_object_hash(void)
 {
+	static int rate;
 	int i;
-	int new_hash_size = obj_hash_size < 32 ? 32 : 2 * obj_hash_size;
+	int new_hash_size;
 	struct object **new_hash;
 
+	if (!rate) {
+		/* in units of 1/2 to give more resolution and avoid floats */
+		rate = atoi(getenv("GIT_GROW_RATE"));
+		atexit(print_hash_size);
+	}
+
+	new_hash_size = obj_hash_size < 32 ? 32 : obj_hash_size * rate / 2;
 	new_hash = xcalloc(new_hash_size, sizeof(struct object *));
 	for (i = 0; i < obj_hash_size; i++) {
 		struct object *obj = obj_hash[i];
@@ -109,6 +123,7 @@ void *create_object(const unsigned char *sha1, int type, void *o)
 void *create_object(const unsigned char *sha1, int type, void *o)
 {
 	struct object *obj = o;
+	static int factor;
 
 	obj->parsed = 0;
 	obj->used = 0;
@@ -116,7 +131,11 @@ void *create_object(const unsigned char *sha1, int type, void *o)
 	obj->flags = 0;
 	hashcpy(obj->sha1, sha1);
 
-	if (obj_hash_size - 1 <= nr_objs * 2)
+	/* in units of 1/6 to give more resolution and avoid floats */
+	if (!factor)
+		factor = atoi(getenv("GIT_LOAD_FACTOR"));
+
+	if (nr_objs + 1 > obj_hash_size * factor / 6)
 		grow_object_hash();
 
 	insert_obj_hash(obj, obj_hash, obj_hash_size);

[-- Attachment #2: load-growth.png --]
[-- Type: image/png, Size: 35409 bytes --]

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2013-04-01 18:34 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-03-29 13:20 [PATCH] rev-list: preallocate object hash table in --all --objects Nguyễn Thái Ngọc Duy
2013-03-29 15:12 ` Jeff King
2013-03-29 15:29   ` Duy Nguyen
2013-03-29 20:32     ` Jeff King
2013-04-01 18:33       ` Jeff King
2013-03-29 16:04   ` Junio C Hamano

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.