[PATCH] object: measure time needed for resolving hash collisions

* [PATCH] object: measure time needed for resolving hash collisions
@ 2016-09-15  2:01 Stefan Beller
  2016-09-15  6:47 ` Jeff King
  0 siblings, 1 reply; 7+ messages in thread
From: Stefan Beller @ 2016-09-15  2:01 UTC (permalink / raw)
  To: peff; +Cc: git, Stefan Beller

$ time ./git rev-list HEAD --count
44271
print_time_spent_object: measure time needed for resolving hash collisions

This shows that we roughly spent a third of the time in resolving
hash collisions:

I am using linux.git to measure:

$ time git rev-list --objects --count HEAD >/dev/null
print_time_spent_resolving_hash_collisions 9:445591664

real	0m31.733s
user	0m31.220s
sys	0m0.463s

For fun I reverted 9a414486d9f0:

$ time git rev-list --objects --count HEAD >/dev/null
print_time_spent_resolving_hash_collisions 14:338605504

real	0m37.008s
user	0m36.524s
sys	0m0.423s

The 17% that Jeff measured in there, equals to 1-31.733/37.008 = 0.14
in these measurements.
The time spent in collision resolving went down by 1/3 on itself
(14.33s to 9.44s), so there is still room for improvement.

Signed-off-by: Stefan Beller <sbeller@google.com>
---

 According to Jeff, sending patches that don't get accepted is the new hype!

 builtin/rev-list.c |  2 ++
 object.c           | 36 ++++++++++++++++++++++++++++++++++++
 object.h           |  2 ++
 3 files changed, 40 insertions(+)

diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index 8479f6e..d0e5922 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -412,5 +412,7 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
 			printf("%d\n", revs.count_left + revs.count_right);
 	}
 
+	print_time_spent_resolving_hash_collisions();
+
 	return 0;
 }
diff --git a/object.c b/object.c
index 67d9a9e..e9e73e0 100644
--- a/object.c
+++ b/object.c
@@ -5,9 +5,40 @@
 #include "commit.h"
 #include "tag.h"
 
+#include <time.h>
+
 static struct object **obj_hash;
 static int nr_objs, obj_hash_size;
 
+struct timespec caching = {0, 0};
+
+void diff(struct timespec *start, struct timespec *end, struct timespec *dst)
+{
+	if ((end->tv_nsec-start->tv_nsec)<0) {
+		dst->tv_sec = end->tv_sec-start->tv_sec-1;
+		dst->tv_nsec = 1000000000+end->tv_nsec-start->tv_nsec;
+	} else {
+		dst->tv_sec = end->tv_sec-start->tv_sec;
+		dst->tv_nsec = end->tv_nsec-start->tv_nsec;
+	}
+}
+
+void add_time_to(struct timespec *dst, struct timespec *addend)
+{
+	dst->tv_sec += addend->tv_sec;
+	dst->tv_nsec += addend->tv_nsec;
+	while (dst->tv_nsec > 1000000000) {
+		dst->tv_nsec -= 1000000000;
+		dst->tv_sec ++;
+	}
+}
+
+void print_time_spent_resolving_hash_collisions(void)
+{
+	fprintf(stderr, "print_time_spent_resolving_hash_collisions %ld:%9ld\n",
+		(long)caching.tv_sec, (long)caching.tv_nsec);
+}
+
 unsigned int get_max_object_index(void)
 {
 	return obj_hash_size;
@@ -86,11 +117,13 @@ struct object *lookup_object(const unsigned char *sha1)
 {
 	unsigned int i, first;
 	struct object *obj;
+	struct timespec time1, time2, t_diff;
 
 	if (!obj_hash)
 		return NULL;
 
 	first = i = hash_obj(sha1, obj_hash_size);
+	clock_gettime(CLOCK_MONOTONIC, &time1);
 	while ((obj = obj_hash[i]) != NULL) {
 		if (!hashcmp(sha1, obj->oid.hash))
 			break;
@@ -98,6 +131,9 @@ struct object *lookup_object(const unsigned char *sha1)
 		if (i == obj_hash_size)
 			i = 0;
 	}
+	clock_gettime(CLOCK_MONOTONIC, &time2);
+	diff(&time1, &time2, &t_diff);
+	add_time_to(&caching, &t_diff);
 	if (obj && i != first) {
 		/*
 		 * Move object to where we started to look for it so
diff --git a/object.h b/object.h
index f8b6442..ee6ab3a 100644
--- a/object.h
+++ b/object.h
@@ -56,6 +56,8 @@ extern const char *typename(unsigned int type);
 extern int type_from_string_gently(const char *str, ssize_t, int gentle);
 #define type_from_string(str) type_from_string_gently(str, -1, 0)
 
+void print_time_spent_resolving_hash_collisions(void);
+
 /*
  * Return the current number of buckets in the object hashmap.
  */
-- 
2.10.0.130.gef2bcd7.dirty


^ permalink raw reply related	[flat|nested] 7+ messages in thread