[PATCH] rev-list: preallocate object hash table in --all --objects

* [PATCH] rev-list: preallocate object hash table in --all --objects
@ 2013-03-29 13:20 Nguyễn Thái Ngọc Duy
  2013-03-29 15:12 ` Jeff King
  0 siblings, 1 reply; 6+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2013-03-29 13:20 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Every time the object hash table grows, all entries must be
rearranged. The few last times could be really expensive when the
table contains a lot of entries.

When we do "--all --objects" we know in advance we may need to hash
all objects. Just prepare the hash table big enough, so there won't be
any resizing later on. The hash table is resized a couple times before
prehash_objects() is called. But that's ok because there aren't many
objects by that time (unless you have tons of refs, likely tags..)

On linux-2.6.git:

        before       after
real    0m34.402s    0m33.288s
user    0m34.111s    0m32.863s
sys     0m0.205s     0m0.352s

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 object.c   | 21 +++++++++++++++++++--
 object.h   |  2 ++
 revision.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 76 insertions(+), 6 deletions(-)

diff --git a/object.c b/object.c
index 20703f5..bcfd2c6 100644
--- a/object.c
+++ b/object.c
@@ -88,10 +88,10 @@ struct object *lookup_object(const unsigned char *sha1)
 	return obj;
 }
 
-static void grow_object_hash(void)
+void grow_object_hash_to(unsigned long nr)
 {
 	int i;
-	int new_hash_size = obj_hash_size < 32 ? 32 : 2 * obj_hash_size;
+	int new_hash_size = nr < 32 ? 32 : nr * 2;
 	struct object **new_hash;
 
 	new_hash = xcalloc(new_hash_size, sizeof(struct object *));
@@ -106,6 +106,11 @@ static void grow_object_hash(void)
 	obj_hash_size = new_hash_size;
 }
 
+static void grow_object_hash(void)
+{
+	grow_object_hash_to(obj_hash_size);
+}
+
 void *create_object(const unsigned char *sha1, int type, void *o)
 {
 	struct object *obj = o;
@@ -307,3 +312,15 @@ void clear_object_flags(unsigned flags)
 			obj->flags &= ~flags;
 	}
 }
+
+int has_object_flags(unsigned flags)
+{
+	int i;
+
+	for (i = 0; i < obj_hash_size; i++) {
+		struct object *obj = obj_hash[i];
+		if (obj && (obj->flags & flags))
+			return 1;
+	}
+	return 0;
+}
diff --git a/object.h b/object.h
index 97d384b..1e8fee8 100644
--- a/object.h
+++ b/object.h
@@ -52,6 +52,7 @@ extern struct object *get_indexed_object(unsigned int);
  */
 struct object *lookup_object(const unsigned char *sha1);
 
+extern void grow_object_hash_to(unsigned long nr);
 extern void *create_object(const unsigned char *sha1, int type, void *obj);
 
 /*
@@ -87,6 +88,7 @@ void add_object_array(struct object *obj, const char *name, struct object_array
 void add_object_array_with_mode(struct object *obj, const char *name, struct object_array *array, unsigned mode);
 void object_array_remove_duplicates(struct object_array *);
 
+int has_object_flags(unsigned flags);
 void clear_object_flags(unsigned flags);
 
 #endif /* OBJECT_H */
diff --git a/revision.c b/revision.c
index 71e62d8..f9ea2d1 100644
--- a/revision.c
+++ b/revision.c
@@ -1665,8 +1665,9 @@ static int for_each_good_bisect_ref(const char *submodule, each_ref_fn fn, void
 }
 
 static int handle_revision_pseudo_opt(const char *submodule,
-				struct rev_info *revs,
-				int argc, const char **argv, int *flags)
+				      struct rev_info *revs,
+				      int argc, const char **argv,
+				      int *flags, int *all)
 {
 	const char *arg = argv[0];
 	const char *optarg;
@@ -1685,6 +1686,7 @@ static int handle_revision_pseudo_opt(const char *submodule,
 	if (!strcmp(arg, "--all")) {
 		handle_refs(submodule, revs, *flags, for_each_ref_submodule);
 		handle_refs(submodule, revs, *flags, head_ref_submodule);
+		*all = 1;
 	} else if (!strcmp(arg, "--branches")) {
 		handle_refs(submodule, revs, *flags, for_each_branch_ref_submodule);
 	} else if (!strcmp(arg, "--bisect")) {
@@ -1738,6 +1740,49 @@ static int handle_revision_pseudo_opt(const char *submodule,
 	return 1;
 }
 
+static void preallocate_hash_table(void)
+{
+	unsigned long cnt = 0;
+	struct packed_git *p;
+	int i;
+
+	if (has_object_flags(UNINTERESTING))
+		/*
+		 * nope this is not simply "--all --objects"
+		 * not worth preallocation.
+		 */
+		return;
+
+	for (i = 0; i < 256; i++) {
+		struct dirent *ent;
+		DIR *d = opendir(git_path("objects/%02x", i));
+		if (!d)
+			continue;
+		while ((ent = readdir(d)) != NULL)
+			/*
+			 * We only worry about insufficient size which
+			 * leads to expensive growths later on. A few
+			 * extra slots in the hash table would not hurt.
+			 */
+			cnt++;
+		closedir(d);
+	}
+
+	if (!packed_git)
+		prepare_packed_git();
+
+	for (p = packed_git; p; p = p->next) {
+		if (!p->pack_local)
+			/* this may lead to extra growths later */
+			continue;
+		if (open_pack_index(p))
+			continue;
+		cnt += p->num_objects;
+	}
+
+	grow_object_hash_to(cnt);
+}
+
 /*
  * Parse revision information, filling in the "rev_info" structure,
  * and removing the used arguments from the argument list.
@@ -1750,6 +1795,7 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
 	int i, flags, left, seen_dashdash, read_from_stdin, got_rev_arg = 0, revarg_opt;
 	struct cmdline_pathspec prune_data;
 	const char *submodule = NULL;
+	int all = 0;
 
 	memset(&prune_data, 0, sizeof(prune_data));
 	if (opt)
@@ -1785,8 +1831,9 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
 			int opts;
 
 			opts = handle_revision_pseudo_opt(submodule,
-						revs, argc - i, argv + i,
-						&flags);
+							  revs, argc - i,
+							  argv + i,
+							  &flags, &all);
 			if (opts > 0) {
 				i += opts - 1;
 				continue;
@@ -1856,6 +1903,10 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
 			      get_pathspec(revs->prefix, prune_data.path));
 	}
 
+	if (all && revs->tag_objects &&
+	    revs->tree_objects && revs->blob_objects)
+		preallocate_hash_table();
+
 	if (revs->def == NULL)
 		revs->def = opt ? opt->def : NULL;
 	if (opt && opt->tweak)
-- 
1.8.2.83.gc99314b

^ permalink raw reply related	[flat|nested] 6+ messages in thread