All of lore.kernel.org
 help / color / mirror / Atom feed
From: Johannes Schindelin <Johannes.Schindelin@gmx.de>
To: Jeff King <peff@peff.net>
Cc: Govind Salinas <govind@sophiasuchtig.com>,
	Git Mailing List <git@vger.kernel.org>
Subject: [PATCH 3/4] Speed up git notes lookup
Date: Sat, 20 Dec 2008 00:35:45 +0100 (CET)	[thread overview]
Message-ID: <alpine.DEB.1.00.0812200035360.30769@pacific.mpi-cbg.de> (raw)
In-Reply-To: <alpine.DEB.1.00.0812192347261.30769@pacific.mpi-cbg.de>


To avoid looking up each and every commit in the notes ref's tree
object, which is very expensive, speed things up by slurping the tree
object's contents into a hash_map.

The idea fo the hashmap singleton is from David Reiss, initial
benchmarking by Jeff King.

Note: the implementation allows for arbitrary entries in the notes
tree object, ignoring those that do not reference a valid object.  This
allows you to annotate arbitrary branches, or objects.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 notes.c |  113 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
 1 files changed, 102 insertions(+), 11 deletions(-)

diff --git a/notes.c b/notes.c
index 91ec77f..68bcb24 100644
--- a/notes.c
+++ b/notes.c
@@ -4,16 +4,112 @@
 #include "refs.h"
 #include "utf8.h"
 #include "strbuf.h"
+#include "tree-walk.h"
+
+struct entry {
+	unsigned char commit_sha1[20];
+	unsigned char notes_sha1[20];
+};
+
+struct hash_map {
+	struct entry *entries;
+	off_t count, size;
+};
 
 static int initialized;
+static struct hash_map hash_map;
+
+static int hash_index(struct hash_map *map, const unsigned char *sha1)
+{
+	int i = ((*(unsigned int *)sha1) % map->size);
+
+	for (;;) {
+		unsigned char *current = map->entries[i].commit_sha1;
+
+		if (!hashcmp(sha1, current))
+			return i;
+
+		if (is_null_sha1(current))
+			return -1 - i;
+
+		if (++i == map->size)
+			i = 0;
+	}
+}
+
+static void add_entry(const unsigned char *commit_sha1,
+		const unsigned char *notes_sha1)
+{
+	int index;
+
+	if (hash_map.count + 1 > hash_map.size >> 1) {
+		int i, old_size = hash_map.size;
+		struct entry *old = hash_map.entries;
+
+		hash_map.size = old_size ? old_size << 1 : 64;
+		hash_map.entries = (struct entry *)
+			xcalloc(sizeof(struct entry), hash_map.size);
+
+		for (i = 0; i < old_size; i++)
+			if (!is_null_sha1(old[i].commit_sha1)) {
+				index = -1 - hash_index(&hash_map,
+						old[i].commit_sha1);
+				memcpy(hash_map.entries + index, old + i,
+					sizeof(struct entry));
+			}
+		free(old);
+	}
+
+	index = hash_index(&hash_map, commit_sha1);
+	if (index < 0) {
+		index = -1 - index;
+		hash_map.count++;
+	}
+
+	hashcpy(hash_map.entries[index].commit_sha1, commit_sha1);
+	hashcpy(hash_map.entries[index].notes_sha1, notes_sha1);
+}
+
+static void initialize_hash_map(const char *notes_ref_name)
+{
+	unsigned char sha1[20], commit_sha1[20];
+	unsigned *mode;
+	struct tree_desc desc;
+	struct name_entry entry;
+	void *buf;
+
+	if (!notes_ref_name || read_ref(notes_ref_name, commit_sha1) ||
+			get_tree_entry(commit_sha1, "", sha1, mode))
+		return;
+
+	buf = fill_tree_descriptor(&desc, sha1);
+	if (!buf)
+		die ("Could not read %s for notes-index", sha1_to_hex(sha1));
+
+	while (tree_entry(&desc, &entry))
+		if (!get_sha1(entry.path, commit_sha1))
+			add_entry(commit_sha1, entry.sha1);
+	free(buf);
+}
+
+static unsigned char *lookup_notes(const unsigned char *commit_sha1)
+{
+	int index;
+
+	if (!hash_map.size)
+		return NULL;
+
+	index = hash_index(&hash_map, commit_sha1);
+	if (index < 0)
+		return NULL;
+	return hash_map.entries[index].notes_sha1;
+}
 
 void get_commit_notes(const struct commit *commit, struct strbuf *sb,
 		const char *output_encoding)
 {
 	static const char *utf8 = "utf-8";
-	struct strbuf name = STRBUF_INIT;
-	const char *hex;
-	unsigned char sha1[20];
+	unsigned char *sha1;
 	char *msg;
 	unsigned long msgoffset, msglen;
 	enum object_type type;
@@ -24,17 +120,12 @@ void get_commit_notes(const struct commit *commit, struct strbuf *sb,
 			notes_ref_name = getenv(GIT_NOTES_REF_ENVIRONMENT);
 		else if (!notes_ref_name)
 			notes_ref_name = GIT_NOTES_DEFAULT_REF;
-		if (notes_ref_name && read_ref(notes_ref_name, sha1))
-			notes_ref_name = NULL;
+		initialize_hash_map(notes_ref_name);
 		initialized = 1;
 	}
 
-	if (!notes_ref_name)
-		return;
-
-	strbuf_addf(&name, "%s:%s", notes_ref_name,
-			sha1_to_hex(commit->object.sha1));
-	if (get_sha1(name.buf, sha1))
+	sha1 = lookup_notes(commit->object.sha1);
+	if (!sha1)
 		return;
 
 	if (!(msg = read_sha1_file(sha1, &type, &msglen)) || !msglen ||
-- 
1.6.1.rc3.368.g63acc

  parent reply	other threads:[~2008-12-19 23:29 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-12-16  8:15 Git Notes idea Govind Salinas
2008-12-16  8:51 ` Jeff King
2008-12-16  8:53   ` Jeff King
2008-12-16 18:43   ` Govind Salinas
2008-12-16 23:48     ` Johannes Schindelin
2008-12-17  9:45       ` Jeff King
     [not found]       ` <5d46db230812161815s1c48af9dwc96a4701fb2a669b@mail.gmail.com>
     [not found]         ` <alpine.DEB.1.00.0812170420560.14632@racer>
2008-12-17 10:11           ` Jeff King
2008-12-17 11:38             ` Johannes Schindelin
2008-12-17 19:20               ` Junio C Hamano
2008-12-18  3:08                 ` Johannes Schindelin
2008-12-19 17:42               ` Govind Salinas
2008-12-19 17:18             ` Govind Salinas
2008-12-19 17:38               ` Govind Salinas
2008-12-19 21:25                 ` Jeff King
2008-12-19 22:24                   ` Govind Salinas
2008-12-20  4:54                     ` Jeff King
2008-12-17 12:21       ` Petr Baudis
2008-12-17  9:38     ` Jeff King
2008-12-17 17:06       ` Govind Salinas
2008-12-18 13:54         ` Jeff King
2008-12-17  0:12   ` rebasing commits that have notes, was " Johannes Schindelin
2008-12-17  9:15     ` Johan Herland
2008-12-17 17:55       ` Stephan Beyer
2008-12-19 23:34   ` [PATCH 0/4] Notes reloaded Johannes Schindelin
2008-12-19 23:35     ` [PATCH 1/4] Introduce commit notes Johannes Schindelin
2008-12-20  6:53       ` Jeff King
2008-12-20  7:55         ` Robin Rosenberg
2008-12-20  8:05           ` Jeff King
2008-12-20  8:17             ` Junio C Hamano
2008-12-20  8:23               ` Jeff King
2008-12-20 20:09                 ` Junio C Hamano
2008-12-20 12:04         ` [PATCH v2 0/4] Notes, reloaded Johannes Schindelin
2008-12-20 12:05           ` [PATCH v2 1/4] Introduce commit notes Johannes Schindelin
2008-12-20 12:05           ` [PATCH v2 2/4] Add a script to edit/inspect notes Johannes Schindelin
2008-12-20 12:05           ` [PATCH v2 3/4] Speed up git notes lookup Johannes Schindelin
2008-12-20 12:06           ` [PATCH v2 4/4] Add an expensive test for git-notes Johannes Schindelin
2008-12-19 23:35     ` [PATCH 2/4] Add a script to edit/inspect notes Johannes Schindelin
2008-12-19 23:35     ` Johannes Schindelin [this message]
2008-12-19 23:37     ` [PATCH 4/4] Add an expensive test for git-notes Johannes Schindelin
2008-12-19 23:49       ` Boyd Stephen Smith Jr.
2008-12-20 11:51         ` Johannes Schindelin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=alpine.DEB.1.00.0812200035360.30769@pacific.mpi-cbg.de \
    --to=johannes.schindelin@gmx.de \
    --cc=git@vger.kernel.org \
    --cc=govind@sophiasuchtig.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.