git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 6/6 (v4)] support for path name caching in rev-cache
@ 2009-08-17 12:31 Nick Edelen
  2009-08-18  3:24 ` Nicolas Pitre
                   ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Nick Edelen @ 2009-08-17 12:31 UTC (permalink / raw)
  To: Junio C Hamano, Nicolas Pitre, Johannes Schindelin, Sam Vilain,
	Michael J Gruber

An update to caching mechanism, allowing path names to be cached for blob and 
tree objects.  A list of names appearing in each cache slice is appended to the 
end of the slice, which is referenced by variable-sized indexes per entry.  
This allows pack-objects to more intelligently schedule unpacked/poorly packed 
object, and enables proper duplication of rev-list's behaivor.

Signed-off-by: Nick Edelen <sirnot@gmail.com>

---
 builtin-rev-cache.c       |    3 +-
 rev-cache.c               |  314 +++++++++++++++++++++++++++++++++++++--------
 rev-cache.h               |   16 ++-
 revision.h                |    6 +-
 t/t6015-rev-cache-list.sh |    4 +-
 5 files changed, 282 insertions(+), 61 deletions(-)

diff --git a/builtin-rev-cache.c b/builtin-rev-cache.c
index a3489ce..5ea5c6b 100644
--- a/builtin-rev-cache.c
+++ b/builtin-rev-cache.c
@@ -177,13 +177,14 @@ static int handle_walk(int argc, const char *argv[])
 	fprintf(stderr, "pending:\n");
 	for (i = 0; i < revs.pending.nr; i++) {
 		struct object *obj = revs.pending.objects[i].item;
+		const char *name = revs.pending.objects[i].name;
 
 		/* unfortunately, despite our careful generation, object duplication *is* a possibility...
 		 * (eg. same object introduced into two different branches) */
 		if (obj->flags & SEEN)
 			continue;
 
-		printf("%s\n", sha1_to_hex(revs.pending.objects[i].item->sha1));
+		printf("%s %s\n", sha1_to_hex(revs.pending.objects[i].item->sha1), name);
 		obj->flags |= SEEN;
 	}
 
diff --git a/rev-cache.c b/rev-cache.c
index 7eefd3c..1db5578 100644
--- a/rev-cache.c
+++ b/rev-cache.c
@@ -17,6 +17,14 @@ struct bad_slice {
 	struct bad_slice *next;
 };
 
+struct name_list {
+	unsigned char sha1[20];
+	unsigned int len;
+	struct name_list *next;
+
+	char buf[FLEX_ARRAY];
+};
+
 struct cache_slice_pointer {
 	char signature[8]; /* REVCOPTR */
 	char version;
@@ -29,10 +37,13 @@ static uint32_t fanout[0xff + 2];
 static unsigned char *idx_map;
 static int idx_size;
 static struct rc_index_header idx_head;
-static char no_idx, add_to_pending;
-static struct bad_slice *bad_slices;
+static char no_idx, add_to_pending, add_names;
 static unsigned char *idx_caches;
 
+static struct bad_slice *bad_slices;
+static struct name_list *name_lists, *cur_name_list;
+
+static struct strbuf *acc_name_buffer;
 static struct strbuf *acc_buffer;
 
 #define SLOP			5
@@ -78,7 +89,7 @@ struct rc_object_entry *from_disked_rc_object_entry(struct rc_object_entry_ondis
 	if (!dst)
 		dst = &entry[cur++ & 0x3];
 
-	dst->type = src->flags >> 5;
+	dst->type = src->flags >> 5 & 0x03;
 	dst->is_end = !!(src->flags & 0x10);
 	dst->is_start = !!(src->flags & 0x08);
 	dst->uninteresting = !!(src->flags & 0x04);
@@ -89,8 +100,9 @@ struct rc_object_entry *from_disked_rc_object_entry(struct rc_object_entry_ondis
 	dst->merge_nr = src->merge_nr;
 	dst->split_nr = src->split_nr;
 
-	dst->size_size = src->sizes >> 5;
-	dst->padding = src->sizes & 0x1f;
+	dst->size_size = src->sizes >> 5 & 0x03;
+	dst->name_size = src->sizes >> 2 & 0x03;
+	dst->padding = src->sizes & 0x02;
 
 	dst->date = ntohl(src->date);
 	dst->path = ntohs(src->path);
@@ -118,6 +130,7 @@ struct rc_object_entry_ondisk *to_disked_rc_object_entry(struct rc_object_entry
 	dst->split_nr = src->split_nr;
 
 	dst->sizes  = (unsigned char)src->size_size << 5;
+	dst->sizes |= (unsigned char)src->name_size << 2;
 	dst->sizes |= (unsigned char)src->padding;
 
 	dst->date = htonl(src->date);
@@ -190,6 +203,12 @@ static void cleanup_cache_slices(void)
 		idx_map = 0;
 	}
 
+	while (name_lists) {
+		struct name_list *nl = name_lists->next;
+		free(name_lists);
+		name_lists = nl;
+	}
+
 }
 
 static int init_index(void)
@@ -322,7 +341,7 @@ static void handle_noncommit(struct rev_info *revs, struct commit *commit, unsig
 	struct blob *blob;
 	struct tree *tree;
 	struct object *obj;
-	unsigned long size;
+	unsigned long size, name_index;
 
 	size = decode_size(ptr + RC_ENTRY_SIZE_OFFSET(entry), entry->size_size);
 	switch (entry->type) {
@@ -355,9 +374,22 @@ static void handle_noncommit(struct rev_info *revs, struct commit *commit, unsig
 		return;
 	}
 
+	if (add_names && cur_name_list) {
+		name_index = decode_size(ptr + RC_ENTRY_NAME_OFFSET(entry), entry->name_size);
+
+		if (name_index >= cur_name_list->len)
+			name_index = 0;
+	} else name_index = 0;
+
 	obj->flags |= FACE_VALUE;
-	if (add_to_pending)
-		add_pending_object(revs, obj, "");
+	if (add_to_pending) {
+		char *name = "";
+
+		if (name_index)
+			name = cur_name_list->buf + name_index;
+
+		add_pending_object(revs, obj, name);
+	}
 }
 
 static int setup_traversal(struct rc_slice_header *head, unsigned char *map, struct commit *commit, struct commit_list **work,
@@ -712,15 +744,44 @@ end:
 	return retval;
 }
 
-static int get_cache_slice_header(unsigned char *cache_sha1, unsigned char *map, int len, struct rc_slice_header *head)
+static struct name_list *get_cache_slice_name_list(struct rc_slice_header *head, int fd)
+{
+	struct name_list *nl = name_lists;
+
+	while (nl) {
+		if (!hashcmp(nl->sha1, head->sha1))
+			break;
+		nl = nl->next;
+	}
+
+	if (nl)
+		return nl;
+
+	nl = xcalloc(1, sizeof(struct name_list) + head->name_size);
+	nl->len = head->name_size;
+	hashcpy(nl->sha1, head->sha1);
+
+	lseek(fd, head->size, SEEK_SET);
+	read_in_full(fd, nl->buf, head->name_size);
+
+	nl->next = name_lists;
+	name_lists = nl;
+
+	return nl;
+}
+
+static int get_cache_slice_header(int fd, unsigned char *cache_sha1, int len, struct rc_slice_header *head)
 {
 	int t;
 
-	memcpy(head, map, sizeof(struct rc_slice_header));
+	if (xread(fd, head, sizeof(struct rc_slice_header)) != sizeof(struct rc_slice_header))
+		return -1;
+
 	head->ofs_objects = ntohl(head->ofs_objects);
 	head->object_nr = ntohl(head->object_nr);
 	head->size = ntohl(head->size);
 	head->path_nr = ntohs(head->path_nr);
+	head->name_size = ntohl(head->name_size);
 
 	if (memcmp(head->signature, "REVCACHE", 8))
 		return -1;
@@ -729,10 +790,10 @@ static int get_cache_slice_header(unsigned char *cache_sha1, unsigned char *map,
 	if (hashcmp(head->sha1, cache_sha1))
 		return -3;
 	t = sizeof(struct rc_slice_header);
-	if (t != head->ofs_objects || t >= len)
+	if (t != head->ofs_objects)
 		return -4;
-
-	head->size = len;
+	if (head->size + head->name_size != len)
+		return -5;
 
 	return 0;
 }
@@ -784,7 +845,7 @@ int traverse_cache_slice(struct rev_info *revs,
 	unsigned long *date_so_far, int *slop_so_far,
 	struct commit_list ***queue, struct commit_list **work)
 {
-	int fd = -1, retval = -3;
+	int fd = -1, t, retval;
 	struct stat fi;
 	struct rc_slice_header head;
 	struct rev_cache_info *rci;
@@ -800,26 +861,31 @@ int traverse_cache_slice(struct rev_info *revs,
 	/* load options */
 	rci = &revs->rev_cache_info;
 	add_to_pending = rci->add_to_pending;
+	add_names = rci->add_names;
 
 	memset(&head, 0, sizeof(struct rc_slice_header));
+#	define ERROR(x)		do { retval = (x); goto end; } while (0);
 
 	fd = open_cache_slice(cache_sha1, O_RDONLY);
 	if (fd == -1)
-		goto end;
+		ERROR(-1);
 	if (fstat(fd, &fi) || fi.st_size < sizeof(struct rc_slice_header))
-		goto end;
+		ERROR(-2);
+
+	if ((t = get_cache_slice_header(fd, cache_sha1, fi.st_size, &head)) < 0)
+		ERROR(-t);
+	if (add_names)
+		cur_name_list = get_cache_slice_name_list(&head, fd);
 
-	map = xmmap(0, fi.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	map = xmmap(0, head.size, PROT_READ, MAP_PRIVATE, fd, 0);
 	if (map == MAP_FAILED)
-		goto end;
-	if (get_cache_slice_header(cache_sha1, map, fi.st_size, &head))
-		goto end;
+		ERROR(-3);
 
 	retval = traverse_cache_slice_1(&head, map, revs, commit, date_so_far, slop_so_far, queue, work);
 
 end:
 	if (map != MAP_FAILED)
-		munmap(map, fi.st_size);
+		munmap(map, head.size);
 	if (fd != -1)
 		close(fd);
 
@@ -827,6 +893,7 @@ end:
 	if (retval)
 		mark_bad_slice(cache_sha1);
 
+#	undef ERROR
 	return retval;
 }
 
@@ -1111,23 +1178,110 @@ static unsigned long decode_size(unsigned char *str, int len)
 	return size;
 }
 
+
+#define NL_HASH_TABLE_SIZE		(0xffff + 1)
+#define NL_HASH_NUMBER			(NL_HASH_TABLE_SIZE >> 3)
+
+struct name_list_hash {
+	int ind;
+	struct name_list_hash *next;
+};
+
+static struct name_list_hash **nl_hash_table;
+static unsigned char *nl_hashes;
+
+/* FNV-1a hash */
+static unsigned int hash_name(const char *name)
+{
+	unsigned int hash = 2166136261ul;
+	const char *p = name;
+
+	while (*p) {
+		hash ^= *p++;
+		hash *= 16777619ul;
+	}
+
+	return hash & 0xffff;
+}
+
+static int name_in_list(const char *name)
+{
+	unsigned int h = hash_name(name);
+	struct name_list_hash *entry = nl_hash_table[h];
+
+	while (entry && strcmp(acc_name_buffer->buf + entry->ind, name))
+		entry = entry->next;
+
+	if (entry)
+		return entry->ind;
+
+	/* add name to buffer and create hash reference */
+	entry = xcalloc(1, sizeof(struct name_list_hash));
+	entry->ind = acc_name_buffer->len;
+	strbuf_add(acc_name_buffer, name, strlen(name) + 1);
+
+	entry->next = nl_hash_table[h];
+	nl_hash_table[h] = entry;
+
+	nl_hashes[h / 8] |= h % 8;
+
+	return entry->ind;
+}
+
+static void init_name_list_hash(void)
+{
+	nl_hash_table = xcalloc(NL_HASH_TABLE_SIZE, sizeof(struct name_list_hash));
+	nl_hashes = xcalloc(NL_HASH_NUMBER, 1);
+}
+
+static void cleanup_name_list_hash(void)
+{
+	int i;
+
+	for (i = 0; i < NL_HASH_NUMBER; i++) {
+		int j, ind = nl_hashes[i];
+
+		if (!ind)
+			continue;
+
+		for (j = 0; j < 8; j++) {
+			struct name_list_hash **entryp;
+
+			if (!(ind & 1 << j))
+				continue;
+
+			entryp = &nl_hash_table[i * 8 + j];
+			while (*entryp) {
+				struct name_list_hash *t = (*entryp)->next;
+
+				free(*entryp);
+				*entryp = t;
+			}
+		}
+	} /* code overhang! */
+
+	free(nl_hashes);
+	free(nl_hash_table);
+}
+
 static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *entryp,
-	struct strbuf *merge_str, struct strbuf *split_str)
+	struct strbuf *merge_str, struct strbuf *split_str, char *name, unsigned long size)
 {
 	struct rc_object_entry entry;
-	unsigned char size_str[7];
-	unsigned long size;
+	unsigned char size_str[7], name_str[7];
 	enum object_type type;
 	void *data;
 
 	if (entryp)
 		sha1 = entryp->sha1;
 
-	/* retrieve size data */
-	data = read_sha1_file(sha1, &type, &size);
+	if (!size) {
+		/* retrieve size data */
+		data = read_sha1_file(sha1, &type, &size);
 
-	if (data)
-		free(data);
+		if (data)
+			free(data);
+	}
 
 	/* initialize! */
 	if (!entryp) {
@@ -1145,6 +1299,9 @@ static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *
 
 	entryp->size_size = encode_size(size, size_str);
 
+	if (name)
+		entryp->name_size = encode_size(name_in_list(name), name_str);
+
 	/* write the muvabitch */
 	strbuf_add(acc_buffer, to_disked_rc_object_entry(entryp, 0), sizeof(struct rc_object_entry_ondisk));
 
@@ -1154,6 +1311,9 @@ static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *
 		strbuf_add(acc_buffer, split_str->buf, split_str->len);
 
 	strbuf_add(acc_buffer, size_str, entryp->size_size);
+
+	if (name)
+		strbuf_add(acc_buffer, name_str, entryp->name_size);
 }
 
 /* returns non-zero to continue parsing, 0 to skip */
@@ -1198,6 +1358,9 @@ continue_loop:
 static int dump_tree_callback(const unsigned char *sha1, const char *path, unsigned int mode)
 {
 	strbuf_add(acc_buffer, sha1, 20);
+	strbuf_add(acc_buffer, (char *)&acc_name_buffer->len, sizeof(size_t));
+
+	strbuf_add(acc_name_buffer, path, strlen(path) + 1);
 
 	return 1;
 }
@@ -1211,6 +1374,9 @@ static void tree_addremove(struct diff_options *options,
 		return;
 
 	strbuf_add(acc_buffer, sha1, 20);
+	strbuf_add(acc_buffer, (char *)&acc_name_buffer->len, sizeof(size_t));
+
+	strbuf_add(acc_name_buffer, concatpath, strlen(concatpath) + 1);
 }
 
 static void tree_change(struct diff_options *options,
@@ -1223,12 +1389,15 @@ static void tree_change(struct diff_options *options,
 		return;
 
 	strbuf_add(acc_buffer, new_sha1, 20);
+	strbuf_add(acc_buffer, (char *)&acc_name_buffer->len, sizeof(size_t));
+
+	strbuf_add(acc_name_buffer, concatpath, strlen(concatpath) + 1);
 }
 
 static int add_unique_objects(struct commit *commit)
 {
 	struct commit_list *list;
-	struct strbuf os, ost, *orig_buf;
+	struct strbuf os, ost, names, *orig_name_buf, *orig_buf;
 	struct diff_options opts;
 	int i, j, next;
 	char is_first = 1;
@@ -1236,13 +1405,17 @@ static int add_unique_objects(struct commit *commit)
 	/* ...no, calculate unique objects */
 	strbuf_init(&os, 0);
 	strbuf_init(&ost, 0);
+	strbuf_init(&names, 0);
 	orig_buf = acc_buffer;
+	orig_name_buf = acc_name_buffer;
+	acc_name_buffer = &names;
 
 	diff_setup(&opts);
 	DIFF_OPT_SET(&opts, RECURSIVE);
 	DIFF_OPT_SET(&opts, TREE_IN_RECURSIVE);
 	opts.change = tree_change;
 	opts.add_remove = tree_addremove;
+#	define ENTRY_SIZE (20 + sizeof(size_t))
 
 	/* this is only called for non-ends (ie. all parents interesting) */
 	for (list = commit->parents; list; list = list->next) {
@@ -1253,20 +1426,20 @@ static int add_unique_objects(struct commit *commit)
 
 		strbuf_setlen(acc_buffer, 0);
 		diff_tree_sha1(list->item->tree->object.sha1, commit->tree->object.sha1, "", &opts);
-		qsort(acc_buffer->buf, acc_buffer->len / 20, 20, (int (*)(const void *, const void *))hashcmp);
+		qsort(acc_buffer->buf, acc_buffer->len / ENTRY_SIZE, ENTRY_SIZE, (int (*)(const void *, const void *))hashcmp);
 
 		/* take intersection */
 		if (!is_first) {
-			for (next = i = j = 0; i < os.len; i += 20) {
+			for (next = i = j = 0; i < os.len; i += ENTRY_SIZE) {
 				while (j < ost.len && hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)) < 0)
-					j += 20;
+					j += ENTRY_SIZE;
 
 				if (j >= ost.len || hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)))
 					continue;
 
 				if (next != i)
-					memcpy(os.buf + next, os.buf + i, 20);
-				next += 20;
+					memcpy(os.buf + next, os.buf + i, ENTRY_SIZE);
+				next += ENTRY_SIZE;
 			}
 
 			if (next != i)
@@ -1283,26 +1456,34 @@ static int add_unique_objects(struct commit *commit)
 
 	/* the ordering of non-commit objects dosn't really matter, so we're not gonna bother */
 	acc_buffer = orig_buf;
-	for (i = 0; i < os.len; i += 20)
-		add_object_entry((unsigned char *)(os.buf + i), 0, 0, 0);
+	acc_name_buffer = orig_name_buf;
+	for (i = 0; i < os.len; i += ENTRY_SIZE)
+		add_object_entry((unsigned char *)(os.buf + i), 0, 0, 0, names.buf + *(size_t *)(os.buf + i + 20), 0);
 
 	/* last but not least, the main tree */
-	add_object_entry(commit->tree->object.sha1, 0, 0, 0);
+	add_object_entry(commit->tree->object.sha1, 0, 0, 0, 0, 0);
+
+	strbuf_release(&ost);
+	strbuf_release(&os);
+	strbuf_release(&names);
 
-	return i / 20 + 1;
+	return i / ENTRY_SIZE + 1;
+#	undef ENTRY_SIZE
 }
 
 static int add_objects_verbatim_1(struct rev_cache_slice_map *mapping, int *index)
 {
-	unsigned char *map = mapping->map;
 	int i = *index, object_nr = 0;
+	unsigned char *map = mapping->map;
 	struct rc_object_entry *entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
+	unsigned long size;
 
 	i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
 	while (i < mapping->size) {
-		int pos = i;
+		char *name;
+		int name_index, pos = i;
 
-		entry = RC_OBTAIN_OBJECT_ENTRY(map + i;
+		entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
 		i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
 
 		if (entry->type == OBJ_COMMIT) {
@@ -1310,7 +1491,15 @@ static int add_objects_verbatim_1(struct rev_cache_slice_map *mapping, int *inde
 			return object_nr;
 		}
 
-		strbuf_add(acc_buffer, map + pos, i - pos);
+		name_index = decode_size(map + pos + RC_ENTRY_NAME_OFFSET(entry), entry->name_size);
+		if (name_index && name_index < mapping->name_size)
+			name = mapping->names + name_index;
+		else
+			name = 0;
+
+		size = decode_size(map + pos + RC_ENTRY_SIZE_OFFSET(entry), entry->size_size);
+
+		add_object_entry(0, entry, 0, 0, name, size);
 		object_nr++;
 	}
 
@@ -1395,6 +1584,7 @@ void init_rev_cache_info(struct rev_cache_info *rci)
 	rci->overwrite_all = 0;
 
 	rci->add_to_pending = 1;
+	rci->add_names = 1;
 
 	rci->ignore_size = 0;
 }
@@ -1419,9 +1609,9 @@ int make_cache_slice(struct rev_cache_info *rci,
 	struct rc_slice_header head;
 	struct commit *commit;
 	unsigned char sha1[20];
-	struct strbuf merge_paths, split_paths;
+	struct strbuf merge_paths, split_paths, namelist;
 	int object_nr, total_sz, fd;
-	char file[PATH_MAX], *newfile;
+	char file[PATH_MAX], null, *newfile;
 	struct rev_cache_info *trci;
 	git_SHA_CTX ctx;
 
@@ -1436,7 +1626,13 @@ int make_cache_slice(struct rev_cache_info *rci,
 	strbuf_init(&endlist, 0);
 	strbuf_init(&merge_paths, 0);
 	strbuf_init(&split_paths, 0);
+	strbuf_init(&namelist, 0);
 	acc_buffer = &buffer;
+	acc_name_buffer = &namelist;
+
+	null = 0;
+	strbuf_add(&namelist, &null, 1);
+	init_name_list_hash();
 
 	if (!revs) {
 		revs = &therevs;
@@ -1467,6 +1663,7 @@ int make_cache_slice(struct rev_cache_info *rci,
 	trci = &revs->rev_cache_info;
 	init_rev_cache_info(trci);
 	trci->add_to_pending = 0;
+	trci->add_names = 0;
 
 	setup_revisions(0, 0, revs, 0);
 	if (prepare_revision_walk(revs))
@@ -1504,7 +1701,7 @@ int make_cache_slice(struct rev_cache_info *rci,
 
 		commit->indegree = 0;
 
-		add_object_entry(0, &object, &merge_paths, &split_paths);
+		add_object_entry(0, &object, &merge_paths, &split_paths, 0, 0);
 		object_nr++;
 
 		if (rci->objects && !object.is_end) {
@@ -1530,10 +1727,16 @@ int make_cache_slice(struct rev_cache_info *rci,
 		total_sz += buffer.len;
 	}
 
+	/* write path name lookup list */
+	head.name_size = htonl(namelist.len);
+	write_in_full(fd, namelist.buf, namelist.len);
+
 	/* go ahead a free some stuff... */
 	strbuf_release(&buffer);
 	strbuf_release(&merge_paths);
 	strbuf_release(&split_paths);
+	strbuf_release(&namelist);
+	cleanup_name_list_hash();
 	if (path_sz)
 		free(paths);
 	while (path_track_alloc)
@@ -1991,6 +2194,7 @@ int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
 	for (i = idx_head.cache_nr - 1; i >= 0; i--) {
 		struct rev_cache_slice_map *map = rci->maps + i;
 		struct stat fi;
+		struct rc_slice_header head;
 		int fd;
 
 		if (!map->size)
@@ -2003,13 +2207,20 @@ int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
 			continue;
 		if (fi.st_size < sizeof(struct rc_slice_header))
 			continue;
+		if (get_cache_slice_header(fd, idx_caches + i * 20, fi.st_size, &head))
+			continue;
 
-		map->map = xmmap(0, fi.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+		map->map = xmmap(0, head.size, PROT_READ, MAP_PRIVATE, fd, 0);
 		if (map->map == MAP_FAILED)
 			continue;
 
+		lseek(fd, head.size, SEEK_SET);
+		map->names = xcalloc(head.name_size, 1);
+		read_in_full(fd, map->names, head.name_size);
+
 		close(fd);
-		map->size = fi.st_size;
+		map->size = head.size;
+		map->name_size = head.name_size;
 	}
 
 	rci->make_index = 0;
@@ -2026,6 +2237,7 @@ int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
 		if (!map->size)
 			continue;
 
+		free(map->names);
 		munmap(map->map, map->size);
 	}
 	free(rci->maps);
@@ -2047,7 +2259,6 @@ static int verify_cache_slice(const char *slice_path, unsigned char *sha1)
 {
 	struct rc_slice_header head;
 	int fd, len, retval = -1;
-	unsigned char *map = MAP_FAILED;
 	struct stat fi;
 
 	len = strlen(slice_path);
@@ -2062,17 +2273,12 @@ static int verify_cache_slice(const char *slice_path, unsigned char *sha1)
 	if (fstat(fd, &fi) || fi.st_size < sizeof(head))
 		goto end;
 
-	map = xmmap(0, sizeof(head), PROT_READ, MAP_PRIVATE, fd, 0);
-	if (map == MAP_FAILED)
-		goto end;
-	if (get_cache_slice_header(sha1, map, fi.st_size, &head))
+	if (get_cache_slice_header(fd, sha1, fi.st_size, &head))
 		goto end;
 
 	retval = 0;
 
 end:
-	if (map != MAP_FAILED)
-		munmap(map, sizeof(head));
 	if (fd > 0)
 		close(fd);
 
diff --git a/rev-cache.h b/rev-cache.h
index 14437d8..c88ceae 100644
--- a/rev-cache.h
+++ b/rev-cache.h
@@ -10,8 +10,14 @@
 #define RC_OBTAIN_OBJECT_ENTRY(p)			from_disked_rc_object_entry((struct rc_object_entry_ondisk *)(p), 0)
 #define RC_OBTAIN_INDEX_ENTRY(p)			from_disked_rc_index_entry((struct rc_index_entry_ondisk *)(p), 0)
 
-#define RC_ACTUAL_OBJECT_ENTRY_SIZE(e)		(sizeof(struct rc_object_entry_ondisk) + RC_PATH_SIZE((e)->merge_nr + (e)->split_nr) + (e)->size_size)
-#define RC_ENTRY_SIZE_OFFSET(e)				(RC_ACTUAL_OBJECT_ENTRY_SIZE(e) - (e)->size_size)
+#define RC_ACTUAL_OBJECT_ENTRY_SIZE(e)	(\
+	sizeof(struct rc_object_entry_ondisk) + \
+	RC_PATH_SIZE((e)->merge_nr + (e)->split_nr) + \
+	(e)->size_size + \
+	(e)->name_size\
+)
+#define RC_ENTRY_SIZE_OFFSET(e)			(RC_ACTUAL_OBJECT_ENTRY_SIZE(e) - (e)->name_size - (e)->size_size)
+#define RC_ENTRY_NAME_OFFSET(e)			(RC_ACTUAL_OBJECT_ENTRY_SIZE(e) - (e)->name_size)
 
 /* single index maps objects to cache files */
 struct rc_index_header {
@@ -50,6 +56,8 @@ struct rc_slice_header {
 	uint32_t size;
 
 	unsigned char sha1[20];
+
+	uint32_t name_size;
 };
 
 struct rc_object_entry_ondisk {
@@ -76,7 +84,8 @@ struct rc_object_entry {
 	unsigned char merge_nr; /* : 7 */
 	unsigned char split_nr; /* : 7 */
 	unsigned size_size : 3;
-	unsigned padding : 5;
+	unsigned name_size : 3;
+	unsigned padding : 2;
 
 	uint32_t date;
 	uint16_t path;
@@ -84,6 +93,7 @@ struct rc_object_entry {
 	/* merge paths */
 	/* split paths */
 	/* size */
+	/* name id */
 };
 
 struct rc_index_entry *from_disked_rc_index_entry(struct rc_index_entry_ondisk *src, struct rc_index_entry *dst);
diff --git a/revision.h b/revision.h
index c3ec1b3..b2d5834 100644
--- a/revision.h
+++ b/revision.h
@@ -23,6 +23,9 @@ struct rev_cache_slice_map {
 	unsigned char *map;
 	int size;
 	int last_index;
+
+	char *names;
+	int name_size;
 };
 
 struct rev_cache_info {
@@ -36,7 +39,8 @@ struct rev_cache_info {
 	unsigned overwrite_all : 1;
 
 	/* traversal flags */
-	unsigned add_to_pending : 1;
+	unsigned add_to_pending : 1,
+		add_names : 1;
 
 	/* fuse options */
 	unsigned int ignore_size;
diff --git a/t/t6015-rev-cache-list.sh b/t/t6015-rev-cache-list.sh
index fa6df21..ff36881 100755
--- a/t/t6015-rev-cache-list.sh
+++ b/t/t6015-rev-cache-list.sh
@@ -4,8 +4,8 @@ test_description='git rev-cache tests'
 . ./test-lib.sh
 
 test_cmp_sorted() {
-	grep -io "[a-f0-9]*" $1 | sort >.tmpfile1 && 
-	grep -io "[a-f0-9]*" $2 | sort >.tmpfile2 && 
+	sort $1 >.tmpfile1 && 
+	sort $2 >.tmpfile2 && 
 	test_cmp .tmpfile1 .tmpfile2
 }
 
-- 
tg: (e2ef004..) t/revcache/names (depends on: t/revcache/docs)

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 6/6 (v4)] support for path name caching in rev-cache
  2009-08-17 12:31 [PATCH 6/6 (v4)] support for path name caching in rev-cache Nick Edelen
@ 2009-08-18  3:24 ` Nicolas Pitre
  2009-08-18 11:31   ` Nick Edelen
  2009-08-18 11:54   ` Johannes Schindelin
  2009-08-18 11:51 ` Nick Edelen
  2009-10-19 20:31 ` Nick Edelen
  2 siblings, 2 replies; 13+ messages in thread
From: Nicolas Pitre @ 2009-08-18  3:24 UTC (permalink / raw)
  To: Nick Edelen
  Cc: Junio C Hamano, Johannes Schindelin, Sam Vilain,
	Michael J Gruber, Jeff King, Shawn O. Pearce, Andreas Ericsson,
	Christian Couder, git

On Mon, 17 Aug 2009, Nick Edelen wrote:

> An update to caching mechanism, allowing path names to be cached for blob and 
> tree objects.  A list of names appearing in each cache slice is appended to the 
> end of the slice, which is referenced by variable-sized indexes per entry.  
> This allows pack-objects to more intelligently schedule unpacked/poorly packed 
> object, and enables proper duplication of rev-list's behaivor.
> 
> Signed-off-by: Nick Edelen <sirnot@gmail.com>

[...]

Well, OK.  Let's try it out for myself.

I'm applying the whole series on top of current "next" as of now.

Applying the first patch, git-am tells me: 

|warning: 70 lines add whitespace errors.

You might want to fix those.

Then build+install.  Things still work fine so far.  I'm using git's own 
git repository.  So let's get real:

|$ git rev-cache add --all
|fatal: Unable to create temporary file: Permission denied

Hmmm... why? Not good for a start.  Using strace:

|$ strace git rev-cache add --all
[...]
|stat(".git/rev-cache", {st_mode=S_IFDIR|0664, st_size=4096, ...}) = 0 
|open(".git/rev-cache/MGhgcp", O_RDWR|O_CREAT|O_EXCL, 0600) = -1 EACCES (Permission denied)
|write(2, "fatal: Unable to create temporary"..., 58) = 58 
|exit_group(128)                         = ?

OK, so attempting .git/rev-cache/MGhgcp fails with EACCES.  Let's see:

|$ ls -ld .git/rev-cache/
|drw-rw-r-- 2 nico nico 4096 2009-08-17 22:47 .git/rev-cache/

There is no directory execute permission at all.  Indeed, looking at 
rev-cache.c line 2314, the mode passed to mkdir() is 0x666.  This should 
rather be 0777.  Which brings the question: how could this ever work for 
you?  Are you testing your code as root?

|$ chmod +x .git/rev-cache
|$ git]$ git rev-cache add --all
|objects: 115733
|paths: 1535
|62e497619dbb2f8b783c89084054864965bb00d8
|endpoints:
|S 1cae2b588249e8b45239faebc658c7fa45948932
|S 4aec0d2391eb569776e332d9c15b354cd50a64c5
|S 1ff19ffed62bb581cd8eb635fe6e65ffca7ba1d0
|S fcd8ea7a91ad55d094a78c826083ebac149d81e5
|S 181301656f7a1086adcc41c3661551f190635003
|S 2898400a882bfb3c475fc2b53330912edc8a81f8
|S 7c7f2ebdb98f4844347f68b6e64c4968fa7f38e5
|S 1d9d1698ceb7b553f3cb1fdaa3150f0e85ab9cca
|S 348f73cc1db2d7b1412c40eba72319855e2959ff
|S fedc7d5458bd8e2e9589567041228a24a8d7eb4c
|S 0f57bf3ae8f0000de83907f0a674d70a879f7753
|S 7354ca323e31aa2d469498d06feebdcea137e93a
|S d47e28a143f40ad31b88e6d7b9b18bae60d21b01
|S 991ab5ed8769cd1a425b96b92772c89244bc957c
|S 07827905813bce9cadb9db2faac5848a61c7e69f
|S aff6ae5e2f10c4a8e399bf5aa446a58d74444aba
|S 6849908a55d0e7a95fa715310f739cfab4dd8def
|S ac34f56d4cf6a737f7d8cb56a9b57448f8d6e190
|S 740f1f8651561ee3f31d11cde492eedc77272ff1
|S 38b9118536279dd923e7d5c7444f5869b7f709b1
|S 13354f5377d82baee4d8c930df824c8dbeda396d
|S d82c2e2835dd1aca1a0b6b1fc9f6213ad0e0ae9c
|final return value: 0

Good, making progress.  By the way, is that output useful?  If so, is it 
documented somewhere?  Surely the "final return value" is probably not 
that useful...

Now I want to see how fast rev-list has become.

|$ git rev-list --all --objects > /dev/null
|Segmentation fault

BOOM!  :-(  And now half of the git commands are just as helpful with 
segmentation faults, including 'git log'.

Here's a backtrace from gdb:

|(gdb) bt
|#0  0x0000000000493c33 in to_disked_rc_object_entry (src=0x72d6e0,
|    dst=0x7ffff548c034) at rev-cache.c:121
|#1  0x00000000004957f0 in setup_traversal () at rev-cache.c:412
|#2  traverse_cache_slice_1 () at rev-cache.c:487
|#3  traverse_cache_slice (revs=0x7fffffffe010,
|    cache_sha1=0x7739b0 "b\227a\235/\213x<\211\b@T\206Ie",
|    commit=<value optimized out>, date_so_far=0x0, slop_so_far=0x0,
|    queue=0x7fffffffdef8, work=0x7fffffffe010) at rev-cache.c:884
|#4  0x000000000049a4e7 in get_revision_1 (revs=0x7fffffffe010)
|    at revision.c:1763
|#5  0x000000000049a55b in get_revision_internal (revs=0x7fffffffe010)
|    at revision.c:1886
|#6  0x000000000049a7c1 in get_revision (revs=0x7fffffffe010) at revision.c:1967
|#7  0x00000000004790a7 in traverse_commit_list (revs=0x7fffffffe010,
|    show_commit=0x4477d0 <show_commit>, show_object=0x447ba0 <show_object>,
|    data=0x7fffffffe3f0) at list-objects.c:164
|#8  0x000000000044808c in cmd_rev_list (argc=1, argv=0x7fffffffe680,
|    prefix=0x0) at builtin-rev-list.c:398
|#9  0x0000000000403d13 in run_builtin () at git.c:246
|#10 handle_internal_command (argc=3, argv=0x7fffffffe680) at git.c:391
|#11 0x0000000000403ebd in run_argv () at git.c:433
|#12 main (argc=3, argv=0x7fffffffe680) at git.c:504


Nicolas

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 6/6 (v4)] support for path name caching in rev-cache
  2009-08-18  3:24 ` Nicolas Pitre
@ 2009-08-18 11:31   ` Nick Edelen
  2009-08-19  3:52     ` Nicolas Pitre
  2009-08-18 11:54   ` Johannes Schindelin
  1 sibling, 1 reply; 13+ messages in thread
From: Nick Edelen @ 2009-08-18 11:31 UTC (permalink / raw)
  To: Nicolas Pitre
  Cc: Junio C Hamano, Johannes Schindelin, Sam Vilain,
	Michael J Gruber, Jeff King, Shawn O. Pearce, Andreas Ericsson,
	Christian Couder, git

AARGH scheissgepoops I'm so sorry :-(

> BOOM!  :-(  And now half of the git commands are just as helpful with
> segmentation faults, including 'git log'.
>
> Here's a backtrace from gdb:

I swear I must've been on drugs or something b/c I managed removed the
PROT_WRITE access permission from mmap :-/  cygwin didn't seem to
notice either.  And there's also your explanation of my idiotic
directory permissions choice.

You're also right about the command output; I've added a bit in the
docs explaining the output of each command.

I'll reply to *these* posts with everything fixed.  Then after that
hopefully I can just make patches for the patchset (yo dawg) and save
everyone a lot of bandwidth.

 - Nick

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 6/6 (v4)] support for path name caching in rev-cache
  2009-08-17 12:31 [PATCH 6/6 (v4)] support for path name caching in rev-cache Nick Edelen
  2009-08-18  3:24 ` Nicolas Pitre
@ 2009-08-18 11:51 ` Nick Edelen
  2009-08-21  4:48   ` Nick Edelen
  2009-10-02 22:12   ` Nick Edelen
  2009-10-19 20:31 ` Nick Edelen
  2 siblings, 2 replies; 13+ messages in thread
From: Nick Edelen @ 2009-08-18 11:51 UTC (permalink / raw)
  To: Nick Edelen, Junio C Hamano, Nicolas Pitre, Johannes Schindelin,
	Sam Vilain

An update to caching mechanism, allowing path names to be cached for blob and
tree objects.  A list of names appearing in each cache slice is appended to the
end of the slice, which is referenced by variable-sized indexes per entry.
This allows pack-objects to more intelligently schedule unpacked/poorly packed
object, and enables proper duplication of rev-list's behaivor.

Signed-off-by: Nick Edelen <sirnot@gmail.com>

---
 builtin-rev-cache.c       |    3 +-
 rev-cache.c               |  314 +++++++++++++++++++++++++++++++++++++--------
 rev-cache.h               |   16 ++-
 revision.h                |    6 +-
 t/t6015-rev-cache-list.sh |    6 +-
 5 files changed, 283 insertions(+), 62 deletions(-)

diff --git a/builtin-rev-cache.c b/builtin-rev-cache.c
index 8f41123..4c1766d 100644
--- a/builtin-rev-cache.c
+++ b/builtin-rev-cache.c
@@ -177,13 +177,14 @@ static int handle_walk(int argc, const char *argv[])
 	fprintf(stderr, "pending:\n");
 	for (i = 0; i < revs.pending.nr; i++) {
 		struct object *obj = revs.pending.objects[i].item;
+		const char *name = revs.pending.objects[i].name;
 
 		/* unfortunately, despite our careful generation, object duplication *is* a possibility...
 		 * (eg. same object introduced into two different branches) */
 		if (obj->flags & SEEN)
 			continue;
 
-		printf("%s\n", sha1_to_hex(revs.pending.objects[i].item->sha1));
+		printf("%s %s\n", sha1_to_hex(revs.pending.objects[i].item->sha1), name);
 		obj->flags |= SEEN;
 	}
 
diff --git a/rev-cache.c b/rev-cache.c
index 04a9b02..8801794 100644
--- a/rev-cache.c
+++ b/rev-cache.c
@@ -17,6 +17,14 @@ struct bad_slice {
 	struct bad_slice *next;
 };
 
+struct name_list {
+	unsigned char sha1[20];
+	unsigned int len;
+	struct name_list *next;
+
+	char buf[FLEX_ARRAY];
+};
+
 struct cache_slice_pointer {
 	char signature[8]; /* REVCOPTR */
 	char version;
@@ -29,10 +37,13 @@ static uint32_t fanout[0xff + 2];
 static unsigned char *idx_map;
 static int idx_size;
 static struct rc_index_header idx_head;
-static char no_idx, add_to_pending;
-static struct bad_slice *bad_slices;
+static char no_idx, add_to_pending, add_names;
 static unsigned char *idx_caches;
 
+static struct bad_slice *bad_slices;
+static struct name_list *name_lists, *cur_name_list;
+
+static struct strbuf *acc_name_buffer;
 static struct strbuf *acc_buffer;
 
 #define SLOP			5
@@ -79,7 +90,7 @@ struct rc_object_entry *from_disked_rc_object_entry(struct rc_object_entry_ondis
 	if (!dst)
 		dst = &entry[cur++ & 0x3];
 
-	dst->type = src->flags >> 5;
+	dst->type = src->flags >> 5 & 0x03;
 	dst->is_end = !!(src->flags & 0x10);
 	dst->is_start = !!(src->flags & 0x08);
 	dst->uninteresting = !!(src->flags & 0x04);
@@ -90,8 +101,9 @@ struct rc_object_entry *from_disked_rc_object_entry(struct rc_object_entry_ondis
 	dst->merge_nr = src->merge_nr;
 	dst->split_nr = src->split_nr;
 
-	dst->size_size = src->sizes >> 5;
-	dst->padding = src->sizes & 0x1f;
+	dst->size_size = src->sizes >> 5 & 0x03;
+	dst->name_size = src->sizes >> 2 & 0x03;
+	dst->padding = src->sizes & 0x02;
 
 	dst->date = ntohl(src->date);
 	dst->path = ntohs(src->path);
@@ -120,6 +132,7 @@ struct rc_object_entry_ondisk *to_disked_rc_object_entry(struct rc_object_entry
 	dst->split_nr = src->split_nr;
 
 	dst->sizes  = (unsigned char)src->size_size << 5;
+	dst->sizes |= (unsigned char)src->name_size << 2;
 	dst->sizes |= (unsigned char)src->padding;
 
 	dst->date = htonl(src->date);
@@ -192,6 +205,12 @@ static void cleanup_cache_slices(void)
 		idx_map = 0;
 	}
 
+	while (name_lists) {
+		struct name_list *nl = name_lists->next;
+		free(name_lists);
+		name_lists = nl;
+	}
+
 }
 
 static int init_index(void)
@@ -324,7 +343,7 @@ static void handle_noncommit(struct rev_info *revs, struct commit *commit, unsig
 	struct blob *blob;
 	struct tree *tree;
 	struct object *obj;
-	unsigned long size;
+	unsigned long size, name_index;
 
 	size = decode_size(ptr + RC_ENTRY_SIZE_OFFSET(entry), entry->size_size);
 	switch (entry->type) {
@@ -357,9 +376,22 @@ static void handle_noncommit(struct rev_info *revs, struct commit *commit, unsig
 		return;
 	}
 
+	if (add_names && cur_name_list) {
+		name_index = decode_size(ptr + RC_ENTRY_NAME_OFFSET(entry), entry->name_size);
+
+		if (name_index >= cur_name_list->len)
+			name_index = 0;
+	} else name_index = 0;
+
 	obj->flags |= FACE_VALUE;
-	if (add_to_pending)
-		add_pending_object(revs, obj, "");
+	if (add_to_pending) {
+		char *name = "";
+
+		if (name_index)
+			name = cur_name_list->buf + name_index;
+
+		add_pending_object(revs, obj, name);
+	}
 }
 
 static int setup_traversal(struct rc_slice_header *head, unsigned char *map, struct commit *commit, struct commit_list **work,
@@ -714,15 +746,44 @@ end:
 	return retval;
 }
 
-static int get_cache_slice_header(unsigned char *cache_sha1, unsigned char *map, int len, struct rc_slice_header *head)
+static struct name_list *get_cache_slice_name_list(struct rc_slice_header *head, int fd)
+{
+	struct name_list *nl = name_lists;
+
+	while (nl) {
+		if (!hashcmp(nl->sha1, head->sha1))
+			break;
+		nl = nl->next;
+	}
+
+	if (nl)
+		return nl;
+
+	nl = xcalloc(1, sizeof(struct name_list) + head->name_size);
+	nl->len = head->name_size;
+	hashcpy(nl->sha1, head->sha1);
+
+	lseek(fd, head->size, SEEK_SET);
+	read_in_full(fd, nl->buf, head->name_size);
+
+	nl->next = name_lists;
+	name_lists = nl;
+
+	return nl;
+}
+
+static int get_cache_slice_header(int fd, unsigned char *cache_sha1, int len, struct rc_slice_header *head)
 {
 	int t;
 
-	memcpy(head, map, sizeof(struct rc_slice_header));
+	if (xread(fd, head, sizeof(struct rc_slice_header)) != sizeof(struct rc_slice_header))
+		return -1;
+
 	head->ofs_objects = ntohl(head->ofs_objects);
 	head->object_nr = ntohl(head->object_nr);
 	head->size = ntohl(head->size);
 	head->path_nr = ntohs(head->path_nr);
+	head->name_size = ntohl(head->name_size);
 
 	if (memcmp(head->signature, "REVCACHE", 8))
 		return -1;
@@ -731,10 +792,10 @@ static int get_cache_slice_header(unsigned char *cache_sha1, unsigned char *map,
 	if (hashcmp(head->sha1, cache_sha1))
 		return -3;
 	t = sizeof(struct rc_slice_header);
-	if (t != head->ofs_objects || t >= len)
+	if (t != head->ofs_objects)
 		return -4;
-
-	head->size = len;
+	if (head->size + head->name_size != len)
+		return -5;
 
 	return 0;
 }
@@ -786,7 +847,7 @@ int traverse_cache_slice(struct rev_info *revs,
 	unsigned long *date_so_far, int *slop_so_far,
 	struct commit_list ***queue, struct commit_list **work)
 {
-	int fd = -1, retval = -3;
+	int fd = -1, t, retval;
 	struct stat fi;
 	struct rc_slice_header head;
 	struct rev_cache_info *rci;
@@ -802,26 +863,31 @@ int traverse_cache_slice(struct rev_info *revs,
 	/* load options */
 	rci = &revs->rev_cache_info;
 	add_to_pending = rci->add_to_pending;
+	add_names = rci->add_names;
 
 	memset(&head, 0, sizeof(struct rc_slice_header));
+#	define ERROR(x)		do { retval = (x); goto end; } while (0);
 
 	fd = open_cache_slice(cache_sha1, O_RDONLY);
 	if (fd == -1)
-		goto end;
+		ERROR(-1);
 	if (fstat(fd, &fi) || fi.st_size < sizeof(struct rc_slice_header))
-		goto end;
+		ERROR(-2);
+
+	if ((t = get_cache_slice_header(fd, cache_sha1, fi.st_size, &head)) < 0)
+		ERROR(-t);
+	if (add_names)
+		cur_name_list = get_cache_slice_name_list(&head, fd);
 
-	map = xmmap(0, fi.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+	map = xmmap(0, head.size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
 	if (map == MAP_FAILED)
-		goto end;
-	if (get_cache_slice_header(cache_sha1, map, fi.st_size, &head))
-		goto end;
+		ERROR(-3);
 
 	retval = traverse_cache_slice_1(&head, map, revs, commit, date_so_far, slop_so_far, queue, work);
 
 end:
 	if (map != MAP_FAILED)
-		munmap(map, fi.st_size);
+		munmap(map, head.size);
 	if (fd != -1)
 		close(fd);
 
@@ -829,6 +895,7 @@ end:
 	if (retval)
 		mark_bad_slice(cache_sha1);
 
+#	undef ERROR
 	return retval;
 }
 
@@ -1113,23 +1180,110 @@ static unsigned long decode_size(unsigned char *str, int len)
 	return size;
 }
 
+
+#define NL_HASH_TABLE_SIZE		(0xffff + 1)
+#define NL_HASH_NUMBER			(NL_HASH_TABLE_SIZE >> 3)
+
+struct name_list_hash {
+	int ind;
+	struct name_list_hash *next;
+};
+
+static struct name_list_hash **nl_hash_table;
+static unsigned char *nl_hashes;
+
+/* FNV-1a hash */
+static unsigned int hash_name(const char *name)
+{
+	unsigned int hash = 2166136261ul;
+	const char *p = name;
+
+	while (*p) {
+		hash ^= *p++;
+		hash *= 16777619ul;
+	}
+
+	return hash & 0xffff;
+}
+
+static int name_in_list(const char *name)
+{
+	unsigned int h = hash_name(name);
+	struct name_list_hash *entry = nl_hash_table[h];
+
+	while (entry && strcmp(acc_name_buffer->buf + entry->ind, name))
+		entry = entry->next;
+
+	if (entry)
+		return entry->ind;
+
+	/* add name to buffer and create hash reference */
+	entry = xcalloc(1, sizeof(struct name_list_hash));
+	entry->ind = acc_name_buffer->len;
+	strbuf_add(acc_name_buffer, name, strlen(name) + 1);
+
+	entry->next = nl_hash_table[h];
+	nl_hash_table[h] = entry;
+
+	nl_hashes[h / 8] |= h % 8;
+
+	return entry->ind;
+}
+
+static void init_name_list_hash(void)
+{
+	nl_hash_table = xcalloc(NL_HASH_TABLE_SIZE, sizeof(struct name_list_hash));
+	nl_hashes = xcalloc(NL_HASH_NUMBER, 1);
+}
+
+static void cleanup_name_list_hash(void)
+{
+	int i;
+
+	for (i = 0; i < NL_HASH_NUMBER; i++) {
+		int j, ind = nl_hashes[i];
+
+		if (!ind)
+			continue;
+
+		for (j = 0; j < 8; j++) {
+			struct name_list_hash **entryp;
+
+			if (!(ind & 1 << j))
+				continue;
+
+			entryp = &nl_hash_table[i * 8 + j];
+			while (*entryp) {
+				struct name_list_hash *t = (*entryp)->next;
+
+				free(*entryp);
+				*entryp = t;
+			}
+		}
+	} /* code overhang! */
+
+	free(nl_hashes);
+	free(nl_hash_table);
+}
+
 static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *entryp,
-	struct strbuf *merge_str, struct strbuf *split_str)
+	struct strbuf *merge_str, struct strbuf *split_str, char *name, unsigned long size)
 {
 	struct rc_object_entry entry;
-	unsigned char size_str[7];
-	unsigned long size;
+	unsigned char size_str[7], name_str[7];
 	enum object_type type;
 	void *data;
 
 	if (entryp)
 		sha1 = entryp->sha1;
 
-	/* retrieve size data */
-	data = read_sha1_file(sha1, &type, &size);
+	if (!size) {
+		/* retrieve size data */
+		data = read_sha1_file(sha1, &type, &size);
 
-	if (data)
-		free(data);
+		if (data)
+			free(data);
+	}
 
 	/* initialize! */
 	if (!entryp) {
@@ -1147,6 +1301,9 @@ static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *
 
 	entryp->size_size = encode_size(size, size_str);
 
+	if (name)
+		entryp->name_size = encode_size(name_in_list(name), name_str);
+
 	/* write the muvabitch */
 	strbuf_add(acc_buffer, to_disked_rc_object_entry(entryp, 0), sizeof(struct rc_object_entry_ondisk));
 
@@ -1156,6 +1313,9 @@ static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *
 		strbuf_add(acc_buffer, split_str->buf, split_str->len);
 
 	strbuf_add(acc_buffer, size_str, entryp->size_size);
+
+	if (name)
+		strbuf_add(acc_buffer, name_str, entryp->name_size);
 }
 
 /* returns non-zero to continue parsing, 0 to skip */
@@ -1200,6 +1360,9 @@ continue_loop:
 static int dump_tree_callback(const unsigned char *sha1, const char *path, unsigned int mode)
 {
 	strbuf_add(acc_buffer, sha1, 20);
+	strbuf_add(acc_buffer, (char *)&acc_name_buffer->len, sizeof(size_t));
+
+	strbuf_add(acc_name_buffer, path, strlen(path) + 1);
 
 	return 1;
 }
@@ -1213,6 +1376,9 @@ static void tree_addremove(struct diff_options *options,
 		return;
 
 	strbuf_add(acc_buffer, sha1, 20);
+	strbuf_add(acc_buffer, (char *)&acc_name_buffer->len, sizeof(size_t));
+
+	strbuf_add(acc_name_buffer, concatpath, strlen(concatpath) + 1);
 }
 
 static void tree_change(struct diff_options *options,
@@ -1225,12 +1391,15 @@ static void tree_change(struct diff_options *options,
 		return;
 
 	strbuf_add(acc_buffer, new_sha1, 20);
+	strbuf_add(acc_buffer, (char *)&acc_name_buffer->len, sizeof(size_t));
+
+	strbuf_add(acc_name_buffer, concatpath, strlen(concatpath) + 1);
 }
 
 static int add_unique_objects(struct commit *commit)
 {
 	struct commit_list *list;
-	struct strbuf os, ost, *orig_buf;
+	struct strbuf os, ost, names, *orig_name_buf, *orig_buf;
 	struct diff_options opts;
 	int i, j, next;
 	char is_first = 1;
@@ -1238,13 +1407,17 @@ static int add_unique_objects(struct commit *commit)
 	/* ...no, calculate unique objects */
 	strbuf_init(&os, 0);
 	strbuf_init(&ost, 0);
+	strbuf_init(&names, 0);
 	orig_buf = acc_buffer;
+	orig_name_buf = acc_name_buffer;
+	acc_name_buffer = &names;
 
 	diff_setup(&opts);
 	DIFF_OPT_SET(&opts, RECURSIVE);
 	DIFF_OPT_SET(&opts, TREE_IN_RECURSIVE);
 	opts.change = tree_change;
 	opts.add_remove = tree_addremove;
+#	define ENTRY_SIZE (20 + sizeof(size_t))
 
 	/* this is only called for non-ends (ie. all parents interesting) */
 	for (list = commit->parents; list; list = list->next) {
@@ -1255,20 +1428,20 @@ static int add_unique_objects(struct commit *commit)
 
 		strbuf_setlen(acc_buffer, 0);
 		diff_tree_sha1(list->item->tree->object.sha1, commit->tree->object.sha1, "", &opts);
-		qsort(acc_buffer->buf, acc_buffer->len / 20, 20, (int (*)(const void *, const void *))hashcmp);
+		qsort(acc_buffer->buf, acc_buffer->len / ENTRY_SIZE, ENTRY_SIZE, (int (*)(const void *, const void *))hashcmp);
 
 		/* take intersection */
 		if (!is_first) {
-			for (next = i = j = 0; i < os.len; i += 20) {
+			for (next = i = j = 0; i < os.len; i += ENTRY_SIZE) {
 				while (j < ost.len && hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)) < 0)
-					j += 20;
+					j += ENTRY_SIZE;
 
 				if (j >= ost.len || hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)))
 					continue;
 
 				if (next != i)
-					memcpy(os.buf + next, os.buf + i, 20);
-				next += 20;
+					memcpy(os.buf + next, os.buf + i, ENTRY_SIZE);
+				next += ENTRY_SIZE;
 			}
 
 			if (next != i)
@@ -1285,26 +1458,34 @@ static int add_unique_objects(struct commit *commit)
 
 	/* the ordering of non-commit objects dosn't really matter, so we're not gonna bother */
 	acc_buffer = orig_buf;
-	for (i = 0; i < os.len; i += 20)
-		add_object_entry((unsigned char *)(os.buf + i), 0, 0, 0);
+	acc_name_buffer = orig_name_buf;
+	for (i = 0; i < os.len; i += ENTRY_SIZE)
+		add_object_entry((unsigned char *)(os.buf + i), 0, 0, 0, names.buf + *(size_t *)(os.buf + i + 20), 0);
 
 	/* last but not least, the main tree */
-	add_object_entry(commit->tree->object.sha1, 0, 0, 0);
+	add_object_entry(commit->tree->object.sha1, 0, 0, 0, 0, 0);
+
+	strbuf_release(&ost);
+	strbuf_release(&os);
+	strbuf_release(&names);
 
-	return i / 20 + 1;
+	return i / ENTRY_SIZE + 1;
+#	undef ENTRY_SIZE
 }
 
 static int add_objects_verbatim_1(struct rev_cache_slice_map *mapping, int *index)
 {
-	unsigned char *map = mapping->map;
 	int i = *index, object_nr = 0;
+	unsigned char *map = mapping->map;
 	struct rc_object_entry *entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
+	unsigned long size;
 
 	i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
 	while (i < mapping->size) {
-		int pos = i;
+		char *name;
+		int name_index, pos = i;
 
-		entry = RC_OBTAIN_OBJECT_ENTRY(map + i;
+		entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
 		i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
 
 		if (entry->type == OBJ_COMMIT) {
@@ -1312,7 +1493,15 @@ static int add_objects_verbatim_1(struct rev_cache_slice_map *mapping, int *inde
 			return object_nr;
 		}
 
-		strbuf_add(acc_buffer, map + pos, i - pos);
+		name_index = decode_size(map + pos + RC_ENTRY_NAME_OFFSET(entry), entry->name_size);
+		if (name_index && name_index < mapping->name_size)
+			name = mapping->names + name_index;
+		else
+			name = 0;
+
+		size = decode_size(map + pos + RC_ENTRY_SIZE_OFFSET(entry), entry->size_size);
+
+		add_object_entry(0, entry, 0, 0, name, size);
 		object_nr++;
 	}
 
@@ -1397,6 +1586,7 @@ void init_rev_cache_info(struct rev_cache_info *rci)
 	rci->overwrite_all = 0;
 
 	rci->add_to_pending = 1;
+	rci->add_names = 1;
 
 	rci->ignore_size = 0;
 }
@@ -1421,9 +1611,9 @@ int make_cache_slice(struct rev_cache_info *rci,
 	struct rc_slice_header head;
 	struct commit *commit;
 	unsigned char sha1[20];
-	struct strbuf merge_paths, split_paths;
+	struct strbuf merge_paths, split_paths, namelist;
 	int object_nr, total_sz, fd;
-	char file[PATH_MAX], *newfile;
+	char file[PATH_MAX], null, *newfile;
 	struct rev_cache_info *trci;
 	git_SHA_CTX ctx;
 
@@ -1438,7 +1628,13 @@ int make_cache_slice(struct rev_cache_info *rci,
 	strbuf_init(&endlist, 0);
 	strbuf_init(&merge_paths, 0);
 	strbuf_init(&split_paths, 0);
+	strbuf_init(&namelist, 0);
 	acc_buffer = &buffer;
+	acc_name_buffer = &namelist;
+
+	null = 0;
+	strbuf_add(&namelist, &null, 1);
+	init_name_list_hash();
 
 	if (!revs) {
 		revs = &therevs;
@@ -1469,6 +1665,7 @@ int make_cache_slice(struct rev_cache_info *rci,
 	trci = &revs->rev_cache_info;
 	init_rev_cache_info(trci);
 	trci->add_to_pending = 0;
+	trci->add_names = 0;
 
 	setup_revisions(0, 0, revs, 0);
 	if (prepare_revision_walk(revs))
@@ -1506,7 +1703,7 @@ int make_cache_slice(struct rev_cache_info *rci,
 
 		commit->indegree = 0;
 
-		add_object_entry(0, &object, &merge_paths, &split_paths);
+		add_object_entry(0, &object, &merge_paths, &split_paths, 0, 0);
 		object_nr++;
 
 		if (rci->objects && !object.is_end) {
@@ -1532,10 +1729,16 @@ int make_cache_slice(struct rev_cache_info *rci,
 		total_sz += buffer.len;
 	}
 
+	/* write path name lookup list */
+	head.name_size = htonl(namelist.len);
+	write_in_full(fd, namelist.buf, namelist.len);
+
 	/* go ahead a free some stuff... */
 	strbuf_release(&buffer);
 	strbuf_release(&merge_paths);
 	strbuf_release(&split_paths);
+	strbuf_release(&namelist);
+	cleanup_name_list_hash();
 	if (path_sz)
 		free(paths);
 	while (path_track_alloc)
@@ -1993,6 +2196,7 @@ int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
 	for (i = idx_head.cache_nr - 1; i >= 0; i--) {
 		struct rev_cache_slice_map *map = rci->maps + i;
 		struct stat fi;
+		struct rc_slice_header head;
 		int fd;
 
 		if (!map->size)
@@ -2005,13 +2209,20 @@ int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
 			continue;
 		if (fi.st_size < sizeof(struct rc_slice_header))
 			continue;
+		if (get_cache_slice_header(fd, idx_caches + i * 20, fi.st_size, &head))
+			continue;
 
-		map->map = xmmap(0, fi.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+		map->map = xmmap(0, head.size, PROT_READ, MAP_PRIVATE, fd, 0);
 		if (map->map == MAP_FAILED)
 			continue;
 
+		lseek(fd, head.size, SEEK_SET);
+		map->names = xcalloc(head.name_size, 1);
+		read_in_full(fd, map->names, head.name_size);
+
 		close(fd);
-		map->size = fi.st_size;
+		map->size = head.size;
+		map->name_size = head.name_size;
 	}
 
 	rci->make_index = 0;
@@ -2028,6 +2239,7 @@ int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
 		if (!map->size)
 			continue;
 
+		free(map->names);
 		munmap(map->map, map->size);
 	}
 	free(rci->maps);
@@ -2049,7 +2261,6 @@ static int verify_cache_slice(const char *slice_path, unsigned char *sha1)
 {
 	struct rc_slice_header head;
 	int fd, len, retval = -1;
-	unsigned char *map = MAP_FAILED;
 	struct stat fi;
 
 	len = strlen(slice_path);
@@ -2064,17 +2275,12 @@ static int verify_cache_slice(const char *slice_path, unsigned char *sha1)
 	if (fstat(fd, &fi) || fi.st_size < sizeof(head))
 		goto end;
 
-	map = xmmap(0, sizeof(head), PROT_READ, MAP_PRIVATE, fd, 0);
-	if (map == MAP_FAILED)
-		goto end;
-	if (get_cache_slice_header(sha1, map, fi.st_size, &head))
+	if (get_cache_slice_header(fd, sha1, fi.st_size, &head))
 		goto end;
 
 	retval = 0;
 
 end:
-	if (map != MAP_FAILED)
-		munmap(map, sizeof(head));
 	if (fd > 0)
 		close(fd);
 
diff --git a/rev-cache.h b/rev-cache.h
index 14437d8..c88ceae 100644
--- a/rev-cache.h
+++ b/rev-cache.h
@@ -10,8 +10,14 @@
 #define RC_OBTAIN_OBJECT_ENTRY(p)			from_disked_rc_object_entry((struct rc_object_entry_ondisk *)(p), 0)
 #define RC_OBTAIN_INDEX_ENTRY(p)			from_disked_rc_index_entry((struct rc_index_entry_ondisk *)(p), 0)
 
-#define RC_ACTUAL_OBJECT_ENTRY_SIZE(e)		(sizeof(struct rc_object_entry_ondisk) + RC_PATH_SIZE((e)->merge_nr + (e)->split_nr) + (e)->size_size)
-#define RC_ENTRY_SIZE_OFFSET(e)				(RC_ACTUAL_OBJECT_ENTRY_SIZE(e) - (e)->size_size)
+#define RC_ACTUAL_OBJECT_ENTRY_SIZE(e)	(\
+	sizeof(struct rc_object_entry_ondisk) + \
+	RC_PATH_SIZE((e)->merge_nr + (e)->split_nr) + \
+	(e)->size_size + \
+	(e)->name_size\
+)
+#define RC_ENTRY_SIZE_OFFSET(e)			(RC_ACTUAL_OBJECT_ENTRY_SIZE(e) - (e)->name_size - (e)->size_size)
+#define RC_ENTRY_NAME_OFFSET(e)			(RC_ACTUAL_OBJECT_ENTRY_SIZE(e) - (e)->name_size)
 
 /* single index maps objects to cache files */
 struct rc_index_header {
@@ -50,6 +56,8 @@ struct rc_slice_header {
 	uint32_t size;
 
 	unsigned char sha1[20];
+
+	uint32_t name_size;
 };
 
 struct rc_object_entry_ondisk {
@@ -76,7 +84,8 @@ struct rc_object_entry {
 	unsigned char merge_nr; /* : 7 */
 	unsigned char split_nr; /* : 7 */
 	unsigned size_size : 3;
-	unsigned padding : 5;
+	unsigned name_size : 3;
+	unsigned padding : 2;
 
 	uint32_t date;
 	uint16_t path;
@@ -84,6 +93,7 @@ struct rc_object_entry {
 	/* merge paths */
 	/* split paths */
 	/* size */
+	/* name id */
 };
 
 struct rc_index_entry *from_disked_rc_index_entry(struct rc_index_entry_ondisk *src, struct rc_index_entry *dst);
diff --git a/revision.h b/revision.h
index c3ec1b3..b2d5834 100644
--- a/revision.h
+++ b/revision.h
@@ -23,6 +23,9 @@ struct rev_cache_slice_map {
 	unsigned char *map;
 	int size;
 	int last_index;
+
+	char *names;
+	int name_size;
 };
 
 struct rev_cache_info {
@@ -36,7 +39,8 @@ struct rev_cache_info {
 	unsigned overwrite_all : 1;
 
 	/* traversal flags */
-	unsigned add_to_pending : 1;
+	unsigned add_to_pending : 1,
+		add_names : 1;
 
 	/* fuse options */
 	unsigned int ignore_size;
diff --git a/t/t6015-rev-cache-list.sh b/t/t6015-rev-cache-list.sh
index 065f214..e603f85 100755
--- a/t/t6015-rev-cache-list.sh
+++ b/t/t6015-rev-cache-list.sh
@@ -4,8 +4,8 @@ test_description='git rev-cache tests'
 . ./test-lib.sh
 
 test_cmp_sorted() {
-	grep -io "[a-f0-9]*" $1 | sort >.tmpfile1 &&
-	grep -io "[a-f0-9]*" $2 | sort >.tmpfile2 &&
+	sort $1 >.tmpfile1 &&
+	sort $2 >.tmpfile2 &&
 	test_cmp .tmpfile1 .tmpfile2
 }
 
@@ -75,7 +75,7 @@ test_expect_success 'init repo' '
 	sleep 2 &&
 	git checkout master &&
 	git merge -m "triple merge" b1 b11 &&
-	git rm -r d1 && 
+	git rm -r d1 &&
 	sleep 2 &&
 	git commit -a -m "oh noes"
 '
-- 
tg: (635e0b5..) t/revcache/names (depends on: t/revcache/docs)

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 6/6 (v4)] support for path name caching in rev-cache
  2009-08-18  3:24 ` Nicolas Pitre
  2009-08-18 11:31   ` Nick Edelen
@ 2009-08-18 11:54   ` Johannes Schindelin
  1 sibling, 0 replies; 13+ messages in thread
From: Johannes Schindelin @ 2009-08-18 11:54 UTC (permalink / raw)
  To: Nicolas Pitre
  Cc: Nick Edelen, Junio C Hamano, Sam Vilain, Michael J Gruber,
	Jeff King, Shawn O. Pearce, Andreas Ericsson, Christian Couder,
	git

Hi,

On Mon, 17 Aug 2009, Nicolas Pitre wrote:

> |$ ls -ld .git/rev-cache/
> |drw-rw-r-- 2 nico nico 4096 2009-08-17 22:47 .git/rev-cache/
> 
> There is no directory execute permission at all.  Indeed, looking at 
> rev-cache.c line 2314, the mode passed to mkdir() is 0x666.  This should 
> rather be 0777.  Which brings the question: how could this ever work for 
> you?  Are you testing your code as root?

No: Nick is on Windows.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 6/6 (v4)] support for path name caching in rev-cache
  2009-08-18 11:31   ` Nick Edelen
@ 2009-08-19  3:52     ` Nicolas Pitre
  2009-08-20 12:43       ` Nick Edelen
  0 siblings, 1 reply; 13+ messages in thread
From: Nicolas Pitre @ 2009-08-19  3:52 UTC (permalink / raw)
  To: Nick Edelen
  Cc: Junio C Hamano, Johannes Schindelin, Sam Vilain,
	Michael J Gruber, Jeff King, Shawn O. Pearce, Andreas Ericsson,
	Christian Couder, git

On Tue, 18 Aug 2009, Nick Edelen wrote:

> I'll reply to *these* posts with everything fixed.  Then after that
> hopefully I can just make patches for the patchset (yo dawg) and save
> everyone a lot of bandwidth.

We have bandwidth to spare.  The bandwidth is cheaper than the time 
needed to reconstruct a patch series using a previous series and having 
to modify it with additional patches.  In other words, always make it 
easier for the people willing to review and test your stuff.

Now... testing your latest series looks promising.  The speed increase 
is really there which is good.

However, there are still some issues:

|$ rm -rf .git/rev-cache/
|$ git rev-list --all --objects > /tmp/l1
|$ git rev-cache add --all
|objects: 115789
|paths: 1535
|f0cb1f09e309ee426c307cb85bb870467877de77
|endpoints:
|S 1cae2b588249e8b45239faebc658c7fa45948932
|S 4aec0d2391eb569776e332d9c15b354cd50a64c5
|S 1ff19ffed62bb581cd8eb635fe6e65ffca7ba1d0
|S fcd8ea7a91ad55d094a78c826083ebac149d81e5
|S 181301656f7a1086adcc41c3661551f190635003
|S 2898400a882bfb3c475fc2b53330912edc8a81f8
|S 7c7f2ebdb98f4844347f68b6e64c4968fa7f38e5
|S 1d9d1698ceb7b553f3cb1fdaa3150f0e85ab9cca
|S 348f73cc1db2d7b1412c40eba72319855e2959ff
|S fedc7d5458bd8e2e9589567041228a24a8d7eb4c
|S 0f57bf3ae8f0000de83907f0a674d70a879f7753
|S 7354ca323e31aa2d469498d06feebdcea137e93a
|S d47e28a143f40ad31b88e6d7b9b18bae60d21b01
|S 991ab5ed8769cd1a425b96b92772c89244bc957c
|S 07827905813bce9cadb9db2faac5848a61c7e69f
|S aff6ae5e2f10c4a8e399bf5aa446a58d74444aba
|S 6849908a55d0e7a95fa715310f739cfab4dd8def
|S f073bf0b423cf6a2ee5a5e9b5e70be3b91897a7f
|S b9376927508a2401c20bdb5c5c0608797f822524
|S 9d412d627575478ffdda4209eca9babde061fe2f
|S d187b5de2c2895f6cb4a544e78fcbc1ecf3ba172
|final return value: 0
|$ git rev-list --all --objects > /tmp/l2
|$ wc -l -c /tmp/l1 /tmp/l2
|  109382  5525988 /tmp/l1
|  109382  5473891 /tmp/l2

Result with the rev-cache populated returns the same number of 
revisions, but not the same amount of data.

|$ diff -u /tmp/l1 /tmp/l2
|--- /tmp/l1     2009-08-18 23:22:03.000000000 -0400
|+++ /tmp/l2     2009-08-18 23:25:02.000000000 -0400
|@@ -4,221 +4,38 @@
| ff212f80287ab15162035d39bac511f6edacca00
| 69527b0cc148b5fa320cce68e82216f0ba7117d9
| fdbe5cf05690accc4d701e2e8d8c69baccf76812
|-9d412d627575478ffdda4209eca9babde061fe2f
|-b9376927508a2401c20bdb5c5c0608797f822524
|-f073bf0b423cf6a2ee5a5e9b5e70be3b91897a7f
|-fd31906d2285fb914e51a2fb78de6683b481951b
|-7415dcd3bc6a6103d532d75b77897399af2bc015
|-397b844013068a658e2c4dbea05fb57559433f6e
|-15173bda08b5481c2014f04228b95f90f1e500e6
|-d6758648e1ff1f7ace2ddb3682a8189ec634925b
|-7475ee6d358167e2e4dcdea6c7a67f3619c35315
|-83bf4a2869721ba728c5c8b0bdb8ad9a3a43b94b
|-5f50d92cc49150a83e8fa7b352cb9e14ae4d6570
[...]

The object order appears to be rather different.  Why so?

|$ sort /tmp/l1 > /tmp/l1_sorted
|$ sort /tmp/l2 > /tmp/l2_sorted
|$ diff -u /tmp/l1_sorted  /tmp/l2_sorted
|--- /tmp/l1_sorted      2009-08-18 23:35:48.000000000 -0400
|+++ /tmp/l2_sorted      2009-08-18 23:36:23.000000000 -0400
|@@ -1,7 +1,7 @@
| 000079a2eaef17b7eae70e1f0f635557ea67b644
| 00013cafe6980411aa6fdd940784917b5ff50f0a man1/git-merge-base.1
| 000147bbe4a00525d68efb1358c013812e10dcca contrib/thunderbird-patch-inline/README
|-0001710072c5f0f708323d99e1bb632d7911c842 Documentation/git-peek-remote.txt
|+0001710072c5f0f708323d99e1bb632d7911c842 git-peek-remote.txt
| 000182eacf99cde27d5916aa415921924b82972c
| 0002d8a2fb6add585184350d11284840293dea4d git-daemon.html
| 0003692409f153dd725b3455dfc2e128276cfbe2
|@@ -29,7 +29,7 @@
| 0012954b6f795d75d442f4c58f29dc194888d022 remote.c
| 0012ba2108aa42947dedf19f3db2de73a67cc4f5
| 00133ed4f910f42ae7aaba32bcda1c1e261bc776 man1/git-update-server-info.1
|-001503205b24d5c20ec10792c4ab6c4c7221bcb7 Documentation/diff-format.txt
|+001503205b24d5c20ec10792c4ab6c4c7221bcb7 diff-format.txt
| 0016a48251abefed11efc919703d980a21c95f2c
| 00183cbb3d0f60852d7286e766c9b631c0c1f952
| 00188e33e825c8000dee8e3f4209a264615ce8a1
[...]

So... Why is the leading path component dropped sometimes?  That 
explains the output size difference.  And the drop is not coherent 
either:

|$ grep "diff-format.txt" /tmp/l2
|b71712473ed13020bca3f133ea4a28c5081b7f9e diff-format.txt
|1eeb1c76838c1911fc4d57b36a16dece0538809a diff-format.txt
|aafd3a394126e4718b593eb5727412e16d2334e4 diff-format.txt
|400cbb3b1c120b93278472678ee7bdb87a74f95b diff-format.txt
|2c3a4c433b2a6d2b0846243a4f1dbebeed45236e diff-format.txt
|9709c35c98bc678d1f2e339c8e2d4bbcd7e6231f diff-format.txt
|001503205b24d5c20ec10792c4ab6c4c7221bcb7 diff-format.txt
|18d49d2c3baa81983b19a938598eb091fb7809ef diff-format.txt
|e38a1f14056b2e3cfe3c281eb7df7e5b44d520db diff-format.txt
|378e72f38f37eef50135c3907ccd605652f4fd96 diff-format.txt
|883c1bb0a638d97278cdb66e288bc766a32626af diff-format.txt
|e4520e28e53661159454e02c703be772d43bbc09 diff-format.txt
|ed4ebcbab76c23e599a3f3d62072e16d2cbe7cb1 diff-format.txt
|617d8f526f914360c612d2e2822f1c883c9f5115 diff-format.txt
|0398b408c05d2dccb9806c0add6d1acd13871d74 diff-format.txt
|97756ec03086614051d5780b00825e24ee5ef56f diff-format.txt
|2060ae2fcb9935ed11ecb71a093ad22c661339bf Documentation/diff-format.txt
|174d63a1ee4238aac88c76daee6f3f1bead0d6b5 Documentation/diff-format.txt
|b426a14f5e5fa29bfdb8f026d995dc182930073d Documentation/diff-format.txt
|d1d0d2d3dc8760a8030313de3dd14a942d0fc2bf Documentation/diff-format.txt
|bfe634dcd3664d0e3b18b212db2f3f16b63a385b Documentation/diff-format.txt
|dacd8fb53488fadd7ebb6c1964e60a60072eae80 Documentation/diff-format.txt
|6e9fa8cdb70faead4649f71aea4fff8f50d17271 Documentation/diff-format.txt
|424e75a1c2d4747226ac1b55caac90b544f8f273 Documentation/diff-format.txt
|811d143808a13034de41c1cdcc834ef838d01ce8 Documentation/diff-format.txt
|9298d79e51bf87ccff66357227c7df4db4c820c4 Documentation/diff-format.txt
|d6ce035419081e3fbfc2375ac667517f5f52c980 Documentation/diff-format.txt
|6748761ef61cf35473ee304bc8bd44134cdccaf4 Documentation/diff-format.txt
|1d92a01a02543e55d0feb3541ee594fbc638136c Documentation/diff-format.txt
|f85a605f0a336f506cf5cf46476a43e4c56b3e66 Documentation/diff-format.txt
|7e9a515ad74b16c3f82eba71a9503c6696c77503 Documentation/diff-format.txt
|9e645399752e9b18f097840906fc640a1121d982 Documentation/diff-format.txt
|1a99e85ee58bfc47192e3e3cd409af948c989f80 Documentation/diff-format.txt
|3af197cd2c62f990f7a67ac52277ccf0521d268e Documentation/diff-format.txt

I think you'll have to fix those issues.


Nicolas

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 6/6 (v4)] support for path name caching in rev-cache
  2009-08-19  3:52     ` Nicolas Pitre
@ 2009-08-20 12:43       ` Nick Edelen
  2009-08-20 23:22         ` Nick Edelen
  0 siblings, 1 reply; 13+ messages in thread
From: Nick Edelen @ 2009-08-20 12:43 UTC (permalink / raw)
  To: Nicolas Pitre
  Cc: Junio C Hamano, Johannes Schindelin, Sam Vilain,
	Michael J Gruber, Jeff King, Shawn O. Pearce, Andreas Ericsson,
	Christian Couder, git

> Result with the rev-cache populated returns the same number of
> revisions, but not the same amount of data.
> [...]
> So... Why is the leading path component dropped sometimes?  That
> explains the output size difference.  And the drop is not coherent
> either:

It looks like I didn't realize that tree_entry() returns only the
entry name, and not the full path, which seems like a case of not
thinking, just being logical.  The unit tests didn't catch it b/c it
only occurs for root commits, and none of the root commits in the test
had directories involved.

> The object order appears to be rather different.  Why so?

The non-commit object order has to be different because they're added
in a different way.  It's sorta like vanilla rev-list ordering, except
objects are /only/ appended that are introduced per current commit,
rather than all that haven't been seen yet.  It's still a coherent
ordering, and despite the different mechanism I've still enforced the
tag-tree-blob ordering of the normal rev-list.

I've fixed the name issue and added that scenario to the unit tests.
I'll re-upload this last patch in a bit (either minutes if my flight
dosn't leave soon or hours if so).

 - Nick

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 6/6 (v4)] support for path name caching in rev-cache
  2009-08-20 12:43       ` Nick Edelen
@ 2009-08-20 23:22         ` Nick Edelen
  2009-08-21  0:05           ` Nicolas Pitre
  0 siblings, 1 reply; 13+ messages in thread
From: Nick Edelen @ 2009-08-20 23:22 UTC (permalink / raw)
  To: Nicolas Pitre
  Cc: Junio C Hamano, Johannes Schindelin, Sam Vilain,
	Michael J Gruber, Jeff King, Shawn O. Pearce, Andreas Ericsson,
	Christian Couder, git

Ok we actually have a small problem, semi-related to the object
listing.  By default rev-list will list everything not seen in each
tree, whereas rev-cache will only list object introduced in a given
commit.  This becomes problematic if you have two different files with
the same content in the same tree: rev-cache will show the name of the
youngest file; vanilla rev-list will list the name soonest encountered
in the tree (which can even change if, e.g., a subdir is renamed so as
to be list in a different order).

In fact, even if they're not in the same tree we could have a similar
problem.  Commits are stored topologically in cache slices, so output
is always in topo order.  If the same object is introduced in parallel
branches under different names, the outputted name with `rev-list
--all --objects` (vanilla) could be different from `rev-list --all
--objects` (cached) could be different from `rev-list --all
--topo-order --objects`.

This isn't feasably changable in rev-cache, as a) the cached position
(and hence final output order) is effectively unrelated to tree
structure, and b) commits _have_ to be ordered topologically for
rev-cache to function.

The descrepency strikes me as something of a non-issue with
pack-objects' deltafication, as the object will fit with either of its
names.  It will mean that the (already sorta finicky) object names
won't have garuanteed consistency between cached/non-cached calls to
rev-list.  This is something of a corner case and dosn't strike me as
a huge issue, but I figured I should consult you all before presuming
things about git's interface.

 - Nick

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 6/6 (v4)] support for path name caching in rev-cache
  2009-08-20 23:22         ` Nick Edelen
@ 2009-08-21  0:05           ` Nicolas Pitre
  0 siblings, 0 replies; 13+ messages in thread
From: Nicolas Pitre @ 2009-08-21  0:05 UTC (permalink / raw)
  To: Nick Edelen
  Cc: Junio C Hamano, Johannes Schindelin, Sam Vilain,
	Michael J Gruber, Jeff King, Shawn O. Pearce, Andreas Ericsson,
	Christian Couder, git

On Fri, 21 Aug 2009, Nick Edelen wrote:

> Ok we actually have a small problem, semi-related to the object
> listing.  By default rev-list will list everything not seen in each
> tree, whereas rev-cache will only list object introduced in a given
> commit.  This becomes problematic if you have two different files with
> the same content in the same tree: rev-cache will show the name of the
> youngest file; vanilla rev-list will list the name soonest encountered
> in the tree (which can even change if, e.g., a subdir is renamed so as
> to be list in a different order).
> 
> In fact, even if they're not in the same tree we could have a similar
> problem.  Commits are stored topologically in cache slices, so output
> is always in topo order.  If the same object is introduced in parallel
> branches under different names, the outputted name with `rev-list
> --all --objects` (vanilla) could be different from `rev-list --all
> --objects` (cached) could be different from `rev-list --all
> --topo-order --objects`.
> 
> This isn't feasably changable in rev-cache, as a) the cached position
> (and hence final output order) is effectively unrelated to tree
> structure, and b) commits _have_ to be ordered topologically for
> rev-cache to function.
> 
> The descrepency strikes me as something of a non-issue with
> pack-objects' deltafication, as the object will fit with either of its
> names.  It will mean that the (already sorta finicky) object names
> won't have garuanteed consistency between cached/non-cached calls to
> rev-list.  This is something of a corner case and dosn't strike me as
> a huge issue, but I figured I should consult you all before presuming
> things about git's interface.

The name is actually used only as a clue to delta similar objects 
together.  So this is indeed a non issue, as long as the discrepency is 
well understood and, more importantly, properly documented.  The above 
is certainly a good start.


Nicolas

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 6/6 (v4)] support for path name caching in rev-cache
  2009-08-18 11:51 ` Nick Edelen
@ 2009-08-21  4:48   ` Nick Edelen
  2009-09-07 14:11     ` Nick Edelen
  2009-10-02 22:12   ` Nick Edelen
  1 sibling, 1 reply; 13+ messages in thread
From: Nick Edelen @ 2009-08-21  4:48 UTC (permalink / raw)
  To: Nick Edelen, Junio C Hamano, Nicolas Pitre, Johannes Schindelin,
	Sam Vilain

An update to caching mechanism, allowing path names to be cached for blob and 
tree objects.  A list of names appearing in each cache slice is appended to the 
end of the slice, which is referenced by variable-sized indexes per entry.  
This allows pack-objects to more intelligently schedule unpacked/poorly packed 
object, and enables proper duplication of rev-list's behaivor.

Signed-off-by: Nick Edelen <sirnot@gmail.com>

---
fixed name caching issues, added unit test for error.

 builtin-rev-cache.c       |    3 +-
 rev-cache.c               |  333 +++++++++++++++++++++++++++++++++++++--------
 rev-cache.h               |   16 ++-
 revision.h                |    6 +-
 t/t6015-rev-cache-list.sh |    8 +-
 5 files changed, 300 insertions(+), 66 deletions(-)

diff --git a/builtin-rev-cache.c b/builtin-rev-cache.c
index 8f41123..4c1766d 100644
--- a/builtin-rev-cache.c
+++ b/builtin-rev-cache.c
@@ -177,13 +177,14 @@ static int handle_walk(int argc, const char *argv[])
 	fprintf(stderr, "pending:\n");
 	for (i = 0; i < revs.pending.nr; i++) {
 		struct object *obj = revs.pending.objects[i].item;
+		const char *name = revs.pending.objects[i].name;
 
 		/* unfortunately, despite our careful generation, object duplication *is* a possibility...
 		 * (eg. same object introduced into two different branches) */
 		if (obj->flags & SEEN)
 			continue;
 
-		printf("%s\n", sha1_to_hex(revs.pending.objects[i].item->sha1));
+		printf("%s %s\n", sha1_to_hex(revs.pending.objects[i].item->sha1), name);
 		obj->flags |= SEEN;
 	}
 
diff --git a/rev-cache.c b/rev-cache.c
index 04a9b02..3595f66 100644
--- a/rev-cache.c
+++ b/rev-cache.c
@@ -17,6 +17,14 @@ struct bad_slice {
 	struct bad_slice *next;
 };
 
+struct name_list {
+	unsigned char sha1[20];
+	unsigned int len;
+	struct name_list *next;
+
+	char buf[FLEX_ARRAY];
+};
+
 struct cache_slice_pointer {
 	char signature[8]; /* REVCOPTR */
 	char version;
@@ -29,10 +37,13 @@ static uint32_t fanout[0xff + 2];
 static unsigned char *idx_map;
 static int idx_size;
 static struct rc_index_header idx_head;
-static char no_idx, add_to_pending;
-static struct bad_slice *bad_slices;
+static char no_idx, add_to_pending, add_names;
 static unsigned char *idx_caches;
 
+static struct bad_slice *bad_slices;
+static struct name_list *name_lists, *cur_name_list;
+
+static struct strbuf *acc_name_buffer;
 static struct strbuf *acc_buffer;
 
 #define SLOP			5
@@ -79,7 +90,7 @@ struct rc_object_entry *from_disked_rc_object_entry(struct rc_object_entry_ondis
 	if (!dst)
 		dst = &entry[cur++ & 0x3];
 
-	dst->type = src->flags >> 5;
+	dst->type = src->flags >> 5 & 0x03;
 	dst->is_end = !!(src->flags & 0x10);
 	dst->is_start = !!(src->flags & 0x08);
 	dst->uninteresting = !!(src->flags & 0x04);
@@ -90,8 +101,9 @@ struct rc_object_entry *from_disked_rc_object_entry(struct rc_object_entry_ondis
 	dst->merge_nr = src->merge_nr;
 	dst->split_nr = src->split_nr;
 
-	dst->size_size = src->sizes >> 5;
-	dst->padding = src->sizes & 0x1f;
+	dst->size_size = src->sizes >> 5 & 0x03;
+	dst->name_size = src->sizes >> 2 & 0x03;
+	dst->padding = src->sizes & 0x02;
 
 	dst->date = ntohl(src->date);
 	dst->path = ntohs(src->path);
@@ -120,6 +132,7 @@ struct rc_object_entry_ondisk *to_disked_rc_object_entry(struct rc_object_entry
 	dst->split_nr = src->split_nr;
 
 	dst->sizes  = (unsigned char)src->size_size << 5;
+	dst->sizes |= (unsigned char)src->name_size << 2;
 	dst->sizes |= (unsigned char)src->padding;
 
 	dst->date = htonl(src->date);
@@ -192,6 +205,12 @@ static void cleanup_cache_slices(void)
 		idx_map = 0;
 	}
 
+	while (name_lists) {
+		struct name_list *nl = name_lists->next;
+		free(name_lists);
+		name_lists = nl;
+	}
+
 }
 
 static int init_index(void)
@@ -324,7 +343,7 @@ static void handle_noncommit(struct rev_info *revs, struct commit *commit, unsig
 	struct blob *blob;
 	struct tree *tree;
 	struct object *obj;
-	unsigned long size;
+	unsigned long size, name_index;
 
 	size = decode_size(ptr + RC_ENTRY_SIZE_OFFSET(entry), entry->size_size);
 	switch (entry->type) {
@@ -357,9 +376,22 @@ static void handle_noncommit(struct rev_info *revs, struct commit *commit, unsig
 		return;
 	}
 
+	if (add_names && cur_name_list) {
+		name_index = decode_size(ptr + RC_ENTRY_NAME_OFFSET(entry), entry->name_size);
+
+		if (name_index >= cur_name_list->len)
+			name_index = 0;
+	} else name_index = 0;
+
 	obj->flags |= FACE_VALUE;
-	if (add_to_pending)
-		add_pending_object(revs, obj, "");
+	if (add_to_pending) {
+		char *name = "";
+
+		if (name_index)
+			name = cur_name_list->buf + name_index;
+
+		add_pending_object(revs, obj, name);
+	}
 }
 
 static int setup_traversal(struct rc_slice_header *head, unsigned char *map, struct commit *commit, struct commit_list **work,
@@ -714,15 +746,44 @@ end:
 	return retval;
 }
 
-static int get_cache_slice_header(unsigned char *cache_sha1, unsigned char *map, int len, struct rc_slice_header *head)
+static struct name_list *get_cache_slice_name_list(struct rc_slice_header *head, int fd)
+{
+	struct name_list *nl = name_lists;
+
+	while (nl) {
+		if (!hashcmp(nl->sha1, head->sha1))
+			break;
+		nl = nl->next;
+	}
+
+	if (nl)
+		return nl;
+
+	nl = xcalloc(1, sizeof(struct name_list) + head->name_size);
+	nl->len = head->name_size;
+	hashcpy(nl->sha1, head->sha1);
+
+	lseek(fd, head->size, SEEK_SET);
+	read_in_full(fd, nl->buf, head->name_size);
+
+	nl->next = name_lists;
+	name_lists = nl;
+
+	return nl;
+}
+
+static int get_cache_slice_header(int fd, unsigned char *cache_sha1, int len, struct rc_slice_header *head)
 {
 	int t;
 
-	memcpy(head, map, sizeof(struct rc_slice_header));
+	if (xread(fd, head, sizeof(struct rc_slice_header)) != sizeof(struct rc_slice_header))
+		return -1;
+
 	head->ofs_objects = ntohl(head->ofs_objects);
 	head->object_nr = ntohl(head->object_nr);
 	head->size = ntohl(head->size);
 	head->path_nr = ntohs(head->path_nr);
+	head->name_size = ntohl(head->name_size);
 
 	if (memcmp(head->signature, "REVCACHE", 8))
 		return -1;
@@ -731,10 +792,10 @@ static int get_cache_slice_header(unsigned char *cache_sha1, unsigned char *map,
 	if (hashcmp(head->sha1, cache_sha1))
 		return -3;
 	t = sizeof(struct rc_slice_header);
-	if (t != head->ofs_objects || t >= len)
+	if (t != head->ofs_objects)
 		return -4;
-
-	head->size = len;
+	if (head->size + head->name_size != len)
+		return -5;
 
 	return 0;
 }
@@ -786,7 +847,7 @@ int traverse_cache_slice(struct rev_info *revs,
 	unsigned long *date_so_far, int *slop_so_far,
 	struct commit_list ***queue, struct commit_list **work)
 {
-	int fd = -1, retval = -3;
+	int fd = -1, t, retval;
 	struct stat fi;
 	struct rc_slice_header head;
 	struct rev_cache_info *rci;
@@ -802,26 +863,31 @@ int traverse_cache_slice(struct rev_info *revs,
 	/* load options */
 	rci = &revs->rev_cache_info;
 	add_to_pending = rci->add_to_pending;
+	add_names = rci->add_names;
 
 	memset(&head, 0, sizeof(struct rc_slice_header));
+#	define ERROR(x)		do { retval = (x); goto end; } while (0);
 
 	fd = open_cache_slice(cache_sha1, O_RDONLY);
 	if (fd == -1)
-		goto end;
+		ERROR(-1);
 	if (fstat(fd, &fi) || fi.st_size < sizeof(struct rc_slice_header))
-		goto end;
+		ERROR(-2);
+
+	if ((t = get_cache_slice_header(fd, cache_sha1, fi.st_size, &head)) < 0)
+		ERROR(-t);
+	if (add_names)
+		cur_name_list = get_cache_slice_name_list(&head, fd);
 
-	map = xmmap(0, fi.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+	map = xmmap(0, head.size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
 	if (map == MAP_FAILED)
-		goto end;
-	if (get_cache_slice_header(cache_sha1, map, fi.st_size, &head))
-		goto end;
+		ERROR(-3);
 
 	retval = traverse_cache_slice_1(&head, map, revs, commit, date_so_far, slop_so_far, queue, work);
 
 end:
 	if (map != MAP_FAILED)
-		munmap(map, fi.st_size);
+		munmap(map, head.size);
 	if (fd != -1)
 		close(fd);
 
@@ -829,6 +895,7 @@ end:
 	if (retval)
 		mark_bad_slice(cache_sha1);
 
+#	undef ERROR
 	return retval;
 }
 
@@ -1113,23 +1180,110 @@ static unsigned long decode_size(unsigned char *str, int len)
 	return size;
 }
 
+
+#define NL_HASH_TABLE_SIZE		(0xffff + 1)
+#define NL_HASH_NUMBER			(NL_HASH_TABLE_SIZE >> 3)
+
+struct name_list_hash {
+	int ind;
+	struct name_list_hash *next;
+};
+
+static struct name_list_hash **nl_hash_table;
+static unsigned char *nl_hashes;
+
+/* FNV-1a hash */
+static unsigned int hash_name(const char *name)
+{
+	unsigned int hash = 2166136261ul;
+	const char *p = name;
+
+	while (*p) {
+		hash ^= *p++;
+		hash *= 16777619ul;
+	}
+
+	return hash & 0xffff;
+}
+
+static int name_in_list(const char *name)
+{
+	unsigned int h = hash_name(name);
+	struct name_list_hash *entry = nl_hash_table[h];
+
+	while (entry && strcmp(acc_name_buffer->buf + entry->ind, name))
+		entry = entry->next;
+
+	if (entry)
+		return entry->ind;
+
+	/* add name to buffer and create hash reference */
+	entry = xcalloc(1, sizeof(struct name_list_hash));
+	entry->ind = acc_name_buffer->len;
+	strbuf_add(acc_name_buffer, name, strlen(name) + 1);
+
+	entry->next = nl_hash_table[h];
+	nl_hash_table[h] = entry;
+
+	nl_hashes[h / 8] |= h % 8;
+
+	return entry->ind;
+}
+
+static void init_name_list_hash(void)
+{
+	nl_hash_table = xcalloc(NL_HASH_TABLE_SIZE, sizeof(struct name_list_hash));
+	nl_hashes = xcalloc(NL_HASH_NUMBER, 1);
+}
+
+static void cleanup_name_list_hash(void)
+{
+	int i;
+
+	for (i = 0; i < NL_HASH_NUMBER; i++) {
+		int j, ind = nl_hashes[i];
+
+		if (!ind)
+			continue;
+
+		for (j = 0; j < 8; j++) {
+			struct name_list_hash **entryp;
+
+			if (!(ind & 1 << j))
+				continue;
+
+			entryp = &nl_hash_table[i * 8 + j];
+			while (*entryp) {
+				struct name_list_hash *t = (*entryp)->next;
+
+				free(*entryp);
+				*entryp = t;
+			}
+		}
+	} /* code overhang! */
+
+	free(nl_hashes);
+	free(nl_hash_table);
+}
+
 static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *entryp,
-	struct strbuf *merge_str, struct strbuf *split_str)
+	struct strbuf *merge_str, struct strbuf *split_str, char *name, unsigned long size)
 {
 	struct rc_object_entry entry;
-	unsigned char size_str[7];
-	unsigned long size;
+	unsigned char size_str[7], name_str[7];
 	enum object_type type;
 	void *data;
 
 	if (entryp)
 		sha1 = entryp->sha1;
 
-	/* retrieve size data */
-	data = read_sha1_file(sha1, &type, &size);
+	if (!size) {
+		/* retrieve size data */
+		data = read_sha1_file(sha1, &type, &size);
 
-	if (data)
-		free(data);
+		if (data)
+			free(data);
+	}
 
 	/* initialize! */
 	if (!entryp) {
@@ -1147,6 +1301,9 @@ static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *
 
 	entryp->size_size = encode_size(size, size_str);
 
+	if (name)
+		entryp->name_size = encode_size(name_in_list(name), name_str);
+
 	/* write the muvabitch */
 	strbuf_add(acc_buffer, to_disked_rc_object_entry(entryp, 0), sizeof(struct rc_object_entry_ondisk));
 
@@ -1156,25 +1313,36 @@ static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *
 		strbuf_add(acc_buffer, split_str->buf, split_str->len);
 
 	strbuf_add(acc_buffer, size_str, entryp->size_size);
+
+	if (name)
+		strbuf_add(acc_buffer, name_str, entryp->name_size);
 }
 
 /* returns non-zero to continue parsing, 0 to skip */
 typedef int (*dump_tree_fn)(const unsigned char *, const char *, unsigned int); /* sha1, path, mode */
 
 /* we need to walk the trees by hash, so unfortunately we can't use traverse_trees in tree-walk.c */
-static int dump_tree(struct tree *tree, dump_tree_fn fn)
+static int dump_tree(struct tree *tree, dump_tree_fn fn, char *base)
 {
 	struct tree_desc desc;
 	struct name_entry entry;
 	struct tree *subtree;
-	int r;
+	char concatpath[PATH_MAX];
+	int r, baselen;
 
 	if (parse_tree(tree))
 		return -1;
 
+	baselen = strlen(base);
+	strcpy(concatpath, base);
+
 	init_tree_desc(&desc, tree->buffer, tree->size);
 	while (tree_entry(&desc, &entry)) {
-		switch (fn(entry.sha1, entry.path, entry.mode)) {
+		if (baselen + strlen(entry.path) + 1 >= PATH_MAX)
+			die("we have a problem: %s%s is too big for me to handle", base, entry.path);
+		strcpy(concatpath + baselen, entry.path);
+
+		switch (fn(entry.sha1, concatpath, entry.mode)) {
 		case 0 :
 			goto continue_loop;
 		default :
@@ -1186,7 +1354,8 @@ static int dump_tree(struct tree *tree, dump_tree_fn fn)
 			if (!subtree)
 				return -2;
 
-			if ((r = dump_tree(subtree, fn)) < 0)
+			strcat(concatpath, "/");
+			if ((r = dump_tree(subtree, fn, concatpath)) < 0)
 				return r;
 		}
 
@@ -1200,6 +1369,9 @@ continue_loop:
 static int dump_tree_callback(const unsigned char *sha1, const char *path, unsigned int mode)
 {
 	strbuf_add(acc_buffer, sha1, 20);
+	strbuf_add(acc_buffer, (char *)&acc_name_buffer->len, sizeof(size_t));
+
+	strbuf_add(acc_name_buffer, path, strlen(path) + 1);
 
 	return 1;
 }
@@ -1213,6 +1385,9 @@ static void tree_addremove(struct diff_options *options,
 		return;
 
 	strbuf_add(acc_buffer, sha1, 20);
+	strbuf_add(acc_buffer, (char *)&acc_name_buffer->len, sizeof(size_t));
+
+	strbuf_add(acc_name_buffer, concatpath, strlen(concatpath) + 1);
 }
 
 static void tree_change(struct diff_options *options,
@@ -1225,12 +1400,15 @@ static void tree_change(struct diff_options *options,
 		return;
 
 	strbuf_add(acc_buffer, new_sha1, 20);
+	strbuf_add(acc_buffer, (char *)&acc_name_buffer->len, sizeof(size_t));
+
+	strbuf_add(acc_name_buffer, concatpath, strlen(concatpath) + 1);
 }
 
 static int add_unique_objects(struct commit *commit)
 {
 	struct commit_list *list;
-	struct strbuf os, ost, *orig_buf;
+	struct strbuf os, ost, names, *orig_name_buf, *orig_buf;
 	struct diff_options opts;
 	int i, j, next;
 	char is_first = 1;
@@ -1238,13 +1416,17 @@ static int add_unique_objects(struct commit *commit)
 	/* ...no, calculate unique objects */
 	strbuf_init(&os, 0);
 	strbuf_init(&ost, 0);
+	strbuf_init(&names, 0);
 	orig_buf = acc_buffer;
+	orig_name_buf = acc_name_buffer;
+	acc_name_buffer = &names;
 
 	diff_setup(&opts);
 	DIFF_OPT_SET(&opts, RECURSIVE);
 	DIFF_OPT_SET(&opts, TREE_IN_RECURSIVE);
 	opts.change = tree_change;
 	opts.add_remove = tree_addremove;
+#	define ENTRY_SIZE (20 + sizeof(size_t))
 
 	/* this is only called for non-ends (ie. all parents interesting) */
 	for (list = commit->parents; list; list = list->next) {
@@ -1255,20 +1437,20 @@ static int add_unique_objects(struct commit *commit)
 
 		strbuf_setlen(acc_buffer, 0);
 		diff_tree_sha1(list->item->tree->object.sha1, commit->tree->object.sha1, "", &opts);
-		qsort(acc_buffer->buf, acc_buffer->len / 20, 20, (int (*)(const void *, const void *))hashcmp);
+		qsort(acc_buffer->buf, acc_buffer->len / ENTRY_SIZE, ENTRY_SIZE, (int (*)(const void *, const void *))hashcmp);
 
 		/* take intersection */
 		if (!is_first) {
-			for (next = i = j = 0; i < os.len; i += 20) {
+			for (next = i = j = 0; i < os.len; i += ENTRY_SIZE) {
 				while (j < ost.len && hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)) < 0)
-					j += 20;
+					j += ENTRY_SIZE;
 
 				if (j >= ost.len || hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)))
 					continue;
 
 				if (next != i)
-					memcpy(os.buf + next, os.buf + i, 20);
-				next += 20;
+					memcpy(os.buf + next, os.buf + i, ENTRY_SIZE);
+				next += ENTRY_SIZE;
 			}
 
 			if (next != i)
@@ -1280,31 +1462,39 @@ static int add_unique_objects(struct commit *commit)
 	/* no parents (!) */
 	if (is_first) {
 		acc_buffer = &os;
-		dump_tree(commit->tree, dump_tree_callback);
+		dump_tree(commit->tree, dump_tree_callback, "");
 	}
 
 	/* the ordering of non-commit objects dosn't really matter, so we're not gonna bother */
 	acc_buffer = orig_buf;
-	for (i = 0; i < os.len; i += 20)
-		add_object_entry((unsigned char *)(os.buf + i), 0, 0, 0);
+	acc_name_buffer = orig_name_buf;
+	for (i = 0; i < os.len; i += ENTRY_SIZE)
+		add_object_entry((unsigned char *)(os.buf + i), 0, 0, 0, names.buf + *(size_t *)(os.buf + i + 20), 0);
 
 	/* last but not least, the main tree */
-	add_object_entry(commit->tree->object.sha1, 0, 0, 0);
+	add_object_entry(commit->tree->object.sha1, 0, 0, 0, 0, 0);
 
-	return i / 20 + 1;
+	strbuf_release(&ost);
+	strbuf_release(&os);
+	strbuf_release(&names);
+
+	return i / ENTRY_SIZE + 1;
+#	undef ENTRY_SIZE
 }
 
 static int add_objects_verbatim_1(struct rev_cache_slice_map *mapping, int *index)
 {
-	unsigned char *map = mapping->map;
 	int i = *index, object_nr = 0;
+	unsigned char *map = mapping->map;
 	struct rc_object_entry *entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
+	unsigned long size;
 
 	i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
 	while (i < mapping->size) {
-		int pos = i;
+		char *name;
+		int name_index, pos = i;
 
-		entry = RC_OBTAIN_OBJECT_ENTRY(map + i;
+		entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
 		i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
 
 		if (entry->type == OBJ_COMMIT) {
@@ -1312,7 +1502,15 @@ static int add_objects_verbatim_1(struct rev_cache_slice_map *mapping, int *inde
 			return object_nr;
 		}
 
-		strbuf_add(acc_buffer, map + pos, i - pos);
+		name_index = decode_size(map + pos + RC_ENTRY_NAME_OFFSET(entry), entry->name_size);
+		if (name_index && name_index < mapping->name_size)
+			name = mapping->names + name_index;
+		else
+			name = 0;
+
+		size = decode_size(map + pos + RC_ENTRY_SIZE_OFFSET(entry), entry->size_size);
+
+		add_object_entry(0, entry, 0, 0, name, size);
 		object_nr++;
 	}
 
@@ -1397,6 +1595,7 @@ void init_rev_cache_info(struct rev_cache_info *rci)
 	rci->overwrite_all = 0;
 
 	rci->add_to_pending = 1;
+	rci->add_names = 1;
 
 	rci->ignore_size = 0;
 }
@@ -1421,9 +1620,9 @@ int make_cache_slice(struct rev_cache_info *rci,
 	struct rc_slice_header head;
 	struct commit *commit;
 	unsigned char sha1[20];
-	struct strbuf merge_paths, split_paths;
+	struct strbuf merge_paths, split_paths, namelist;
 	int object_nr, total_sz, fd;
-	char file[PATH_MAX], *newfile;
+	char file[PATH_MAX], null, *newfile;
 	struct rev_cache_info *trci;
 	git_SHA_CTX ctx;
 
@@ -1438,7 +1637,13 @@ int make_cache_slice(struct rev_cache_info *rci,
 	strbuf_init(&endlist, 0);
 	strbuf_init(&merge_paths, 0);
 	strbuf_init(&split_paths, 0);
+	strbuf_init(&namelist, 0);
 	acc_buffer = &buffer;
+	acc_name_buffer = &namelist;
+
+	null = 0;
+	strbuf_add(&namelist, &null, 1);
+	init_name_list_hash();
 
 	if (!revs) {
 		revs = &therevs;
@@ -1469,6 +1674,7 @@ int make_cache_slice(struct rev_cache_info *rci,
 	trci = &revs->rev_cache_info;
 	init_rev_cache_info(trci);
 	trci->add_to_pending = 0;
+	trci->add_names = 0;
 
 	setup_revisions(0, 0, revs, 0);
 	if (prepare_revision_walk(revs))
@@ -1506,7 +1712,7 @@ int make_cache_slice(struct rev_cache_info *rci,
 
 		commit->indegree = 0;
 
-		add_object_entry(0, &object, &merge_paths, &split_paths);
+		add_object_entry(0, &object, &merge_paths, &split_paths, 0, 0);
 		object_nr++;
 
 		if (rci->objects && !object.is_end) {
@@ -1532,10 +1738,16 @@ int make_cache_slice(struct rev_cache_info *rci,
 		total_sz += buffer.len;
 	}
 
+	/* write path name lookup list */
+	head.name_size = htonl(namelist.len);
+	write_in_full(fd, namelist.buf, namelist.len);
+
 	/* go ahead a free some stuff... */
 	strbuf_release(&buffer);
 	strbuf_release(&merge_paths);
 	strbuf_release(&split_paths);
+	strbuf_release(&namelist);
+	cleanup_name_list_hash();
 	if (path_sz)
 		free(paths);
 	while (path_track_alloc)
@@ -1993,6 +2205,7 @@ int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
 	for (i = idx_head.cache_nr - 1; i >= 0; i--) {
 		struct rev_cache_slice_map *map = rci->maps + i;
 		struct stat fi;
+		struct rc_slice_header head;
 		int fd;
 
 		if (!map->size)
@@ -2005,13 +2218,20 @@ int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
 			continue;
 		if (fi.st_size < sizeof(struct rc_slice_header))
 			continue;
+		if (get_cache_slice_header(fd, idx_caches + i * 20, fi.st_size, &head))
+			continue;
 
-		map->map = xmmap(0, fi.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+		map->map = xmmap(0, head.size, PROT_READ, MAP_PRIVATE, fd, 0);
 		if (map->map == MAP_FAILED)
 			continue;
 
+		lseek(fd, head.size, SEEK_SET);
+		map->names = xcalloc(head.name_size, 1);
+		read_in_full(fd, map->names, head.name_size);
+
 		close(fd);
-		map->size = fi.st_size;
+		map->size = head.size;
+		map->name_size = head.name_size;
 	}
 
 	rci->make_index = 0;
@@ -2028,6 +2248,7 @@ int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
 		if (!map->size)
 			continue;
 
+		free(map->names);
 		munmap(map->map, map->size);
 	}
 	free(rci->maps);
@@ -2049,7 +2270,6 @@ static int verify_cache_slice(const char *slice_path, unsigned char *sha1)
 {
 	struct rc_slice_header head;
 	int fd, len, retval = -1;
-	unsigned char *map = MAP_FAILED;
 	struct stat fi;
 
 	len = strlen(slice_path);
@@ -2064,17 +2284,12 @@ static int verify_cache_slice(const char *slice_path, unsigned char *sha1)
 	if (fstat(fd, &fi) || fi.st_size < sizeof(head))
 		goto end;
 
-	map = xmmap(0, sizeof(head), PROT_READ, MAP_PRIVATE, fd, 0);
-	if (map == MAP_FAILED)
-		goto end;
-	if (get_cache_slice_header(sha1, map, fi.st_size, &head))
+	if (get_cache_slice_header(fd, sha1, fi.st_size, &head))
 		goto end;
 
 	retval = 0;
 
 end:
-	if (map != MAP_FAILED)
-		munmap(map, sizeof(head));
 	if (fd > 0)
 		close(fd);
 
diff --git a/rev-cache.h b/rev-cache.h
index 14437d8..c88ceae 100644
--- a/rev-cache.h
+++ b/rev-cache.h
@@ -10,8 +10,14 @@
 #define RC_OBTAIN_OBJECT_ENTRY(p)			from_disked_rc_object_entry((struct rc_object_entry_ondisk *)(p), 0)
 #define RC_OBTAIN_INDEX_ENTRY(p)			from_disked_rc_index_entry((struct rc_index_entry_ondisk *)(p), 0)
 
-#define RC_ACTUAL_OBJECT_ENTRY_SIZE(e)		(sizeof(struct rc_object_entry_ondisk) + RC_PATH_SIZE((e)->merge_nr + (e)->split_nr) + (e)->size_size)
-#define RC_ENTRY_SIZE_OFFSET(e)				(RC_ACTUAL_OBJECT_ENTRY_SIZE(e) - (e)->size_size)
+#define RC_ACTUAL_OBJECT_ENTRY_SIZE(e)	(\
+	sizeof(struct rc_object_entry_ondisk) + \
+	RC_PATH_SIZE((e)->merge_nr + (e)->split_nr) + \
+	(e)->size_size + \
+	(e)->name_size\
+)
+#define RC_ENTRY_SIZE_OFFSET(e)			(RC_ACTUAL_OBJECT_ENTRY_SIZE(e) - (e)->name_size - (e)->size_size)
+#define RC_ENTRY_NAME_OFFSET(e)			(RC_ACTUAL_OBJECT_ENTRY_SIZE(e) - (e)->name_size)
 
 /* single index maps objects to cache files */
 struct rc_index_header {
@@ -50,6 +56,8 @@ struct rc_slice_header {
 	uint32_t size;
 
 	unsigned char sha1[20];
+
+	uint32_t name_size;
 };
 
 struct rc_object_entry_ondisk {
@@ -76,7 +84,8 @@ struct rc_object_entry {
 	unsigned char merge_nr; /* : 7 */
 	unsigned char split_nr; /* : 7 */
 	unsigned size_size : 3;
-	unsigned padding : 5;
+	unsigned name_size : 3;
+	unsigned padding : 2;
 
 	uint32_t date;
 	uint16_t path;
@@ -84,6 +93,7 @@ struct rc_object_entry {
 	/* merge paths */
 	/* split paths */
 	/* size */
+	/* name id */
 };
 
 struct rc_index_entry *from_disked_rc_index_entry(struct rc_index_entry_ondisk *src, struct rc_index_entry *dst);
diff --git a/revision.h b/revision.h
index c3ec1b3..b2d5834 100644
--- a/revision.h
+++ b/revision.h
@@ -23,6 +23,9 @@ struct rev_cache_slice_map {
 	unsigned char *map;
 	int size;
 	int last_index;
+
+	char *names;
+	int name_size;
 };
 
 struct rev_cache_info {
@@ -36,7 +39,8 @@ struct rev_cache_info {
 	unsigned overwrite_all : 1;
 
 	/* traversal flags */
-	unsigned add_to_pending : 1;
+	unsigned add_to_pending : 1,
+		add_names : 1;
 
 	/* fuse options */
 	unsigned int ignore_size;
diff --git a/t/t6015-rev-cache-list.sh b/t/t6015-rev-cache-list.sh
index f2e34b1..3286560 100755
--- a/t/t6015-rev-cache-list.sh
+++ b/t/t6015-rev-cache-list.sh
@@ -4,8 +4,10 @@ test_description='git rev-cache tests'
 . ./test-lib.sh
 
 test_cmp_sorted() {
-	grep -io "[a-f0-9]*" $1 | sort >.tmpfile1 &&
-	grep -io "[a-f0-9]*" $2 | sort >.tmpfile2 &&
+# note that we're tip-toeing around the corner case of two objects/names
+# for the same SHA-1 => discrepencies between cached and non-cached walks
+	sort $1 >.tmpfile1 &&
+	sort $2 >.tmpfile2 &&
 	test_cmp .tmpfile1 .tmpfile2
 }
 
@@ -15,6 +17,8 @@ test_cmp_sorted() {
 # reuse
 test_expect_success 'init repo' '
 	echo bla >file &&
+	mkdir amaindir &&
+	echo watskeburt >amaindir/file &&
 	git add . &&
 	git commit -m "bla" &&
 
-- 
tg: (3e3bb6a..) t/revcache/names (depends on: t/revcache/docs)

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 6/6 (v4)] support for path name caching in rev-cache
  2009-08-21  4:48   ` Nick Edelen
@ 2009-09-07 14:11     ` Nick Edelen
  0 siblings, 0 replies; 13+ messages in thread
From: Nick Edelen @ 2009-09-07 14:11 UTC (permalink / raw)
  To: Nick Edelen, Junio C Hamano, Nicolas Pitre, Johannes Schindelin,
	Sam Vilain

An update to caching mechanism, allowing path names to be cached for blob and 
tree objects.  A list of names appearing in each cache slice is appended to the 
end of the slice, which is referenced by variable-sized indexes per entry.  
This allows pack-objects to more intelligently schedule unpacked/poorly packed 
object, and enables proper duplication of rev-list's behaivor.

Signed-off-by: Nick Edelen <sirnot@gmail.com>

---
 builtin-rev-cache.c       |    3 +-
 rev-cache.c               |  331 +++++++++++++++++++++++++++++++++++++--------
 rev-cache.h               |   16 ++-
 revision.h                |    6 +-
 t/t6017-rev-cache-list.sh |    8 +-
 5 files changed, 299 insertions(+), 65 deletions(-)

diff --git a/builtin-rev-cache.c b/builtin-rev-cache.c
index 8f41123..4c1766d 100644
--- a/builtin-rev-cache.c
+++ b/builtin-rev-cache.c
@@ -177,13 +177,14 @@ static int handle_walk(int argc, const char *argv[])
 	fprintf(stderr, "pending:\n");
 	for (i = 0; i < revs.pending.nr; i++) {
 		struct object *obj = revs.pending.objects[i].item;
+		const char *name = revs.pending.objects[i].name;
 
 		/* unfortunately, despite our careful generation, object duplication *is* a possibility...
 		 * (eg. same object introduced into two different branches) */
 		if (obj->flags & SEEN)
 			continue;
 
-		printf("%s\n", sha1_to_hex(revs.pending.objects[i].item->sha1));
+		printf("%s %s\n", sha1_to_hex(revs.pending.objects[i].item->sha1), name);
 		obj->flags |= SEEN;
 	}
 
diff --git a/rev-cache.c b/rev-cache.c
index 6becd4b..3595f66 100644
--- a/rev-cache.c
+++ b/rev-cache.c
@@ -17,6 +17,14 @@ struct bad_slice {
 	struct bad_slice *next;
 };
 
+struct name_list {
+	unsigned char sha1[20];
+	unsigned int len;
+	struct name_list *next;
+
+	char buf[FLEX_ARRAY];
+};
+
 struct cache_slice_pointer {
 	char signature[8]; /* REVCOPTR */
 	char version;
@@ -29,10 +37,13 @@ static uint32_t fanout[0xff + 2];
 static unsigned char *idx_map;
 static int idx_size;
 static struct rc_index_header idx_head;
-static char no_idx, add_to_pending;
-static struct bad_slice *bad_slices;
+static char no_idx, add_to_pending, add_names;
 static unsigned char *idx_caches;
 
+static struct bad_slice *bad_slices;
+static struct name_list *name_lists, *cur_name_list;
+
+static struct strbuf *acc_name_buffer;
 static struct strbuf *acc_buffer;
 
 #define SLOP			5
@@ -79,7 +90,7 @@ struct rc_object_entry *from_disked_rc_object_entry(struct rc_object_entry_ondis
 	if (!dst)
 		dst = &entry[cur++ & 0x3];
 
-	dst->type = src->flags >> 5;
+	dst->type = src->flags >> 5 & 0x03;
 	dst->is_end = !!(src->flags & 0x10);
 	dst->is_start = !!(src->flags & 0x08);
 	dst->uninteresting = !!(src->flags & 0x04);
@@ -90,8 +101,9 @@ struct rc_object_entry *from_disked_rc_object_entry(struct rc_object_entry_ondis
 	dst->merge_nr = src->merge_nr;
 	dst->split_nr = src->split_nr;
 
-	dst->size_size = src->sizes >> 5;
-	dst->padding = src->sizes & 0x1f;
+	dst->size_size = src->sizes >> 5 & 0x03;
+	dst->name_size = src->sizes >> 2 & 0x03;
+	dst->padding = src->sizes & 0x02;
 
 	dst->date = ntohl(src->date);
 	dst->path = ntohs(src->path);
@@ -120,6 +132,7 @@ struct rc_object_entry_ondisk *to_disked_rc_object_entry(struct rc_object_entry
 	dst->split_nr = src->split_nr;
 
 	dst->sizes  = (unsigned char)src->size_size << 5;
+	dst->sizes |= (unsigned char)src->name_size << 2;
 	dst->sizes |= (unsigned char)src->padding;
 
 	dst->date = htonl(src->date);
@@ -192,6 +205,12 @@ static void cleanup_cache_slices(void)
 		idx_map = 0;
 	}
 
+	while (name_lists) {
+		struct name_list *nl = name_lists->next;
+		free(name_lists);
+		name_lists = nl;
+	}
+
 }
 
 static int init_index(void)
@@ -324,7 +343,7 @@ static void handle_noncommit(struct rev_info *revs, struct commit *commit, unsig
 	struct blob *blob;
 	struct tree *tree;
 	struct object *obj;
-	unsigned long size;
+	unsigned long size, name_index;
 
 	size = decode_size(ptr + RC_ENTRY_SIZE_OFFSET(entry), entry->size_size);
 	switch (entry->type) {
@@ -357,9 +376,22 @@ static void handle_noncommit(struct rev_info *revs, struct commit *commit, unsig
 		return;
 	}
 
+	if (add_names && cur_name_list) {
+		name_index = decode_size(ptr + RC_ENTRY_NAME_OFFSET(entry), entry->name_size);
+
+		if (name_index >= cur_name_list->len)
+			name_index = 0;
+	} else name_index = 0;
+
 	obj->flags |= FACE_VALUE;
-	if (add_to_pending)
-		add_pending_object(revs, obj, "");
+	if (add_to_pending) {
+		char *name = "";
+
+		if (name_index)
+			name = cur_name_list->buf + name_index;
+
+		add_pending_object(revs, obj, name);
+	}
 }
 
 static int setup_traversal(struct rc_slice_header *head, unsigned char *map, struct commit *commit, struct commit_list **work,
@@ -714,15 +746,44 @@ end:
 	return retval;
 }
 
-static int get_cache_slice_header(unsigned char *cache_sha1, unsigned char *map, int len, struct rc_slice_header *head)
+static struct name_list *get_cache_slice_name_list(struct rc_slice_header *head, int fd)
+{
+	struct name_list *nl = name_lists;
+
+	while (nl) {
+		if (!hashcmp(nl->sha1, head->sha1))
+			break;
+		nl = nl->next;
+	}
+
+	if (nl)
+		return nl;
+
+	nl = xcalloc(1, sizeof(struct name_list) + head->name_size);
+	nl->len = head->name_size;
+	hashcpy(nl->sha1, head->sha1);
+
+	lseek(fd, head->size, SEEK_SET);
+	read_in_full(fd, nl->buf, head->name_size);
+
+	nl->next = name_lists;
+	name_lists = nl;
+
+	return nl;
+}
+
+static int get_cache_slice_header(int fd, unsigned char *cache_sha1, int len, struct rc_slice_header *head)
 {
 	int t;
 
-	memcpy(head, map, sizeof(struct rc_slice_header));
+	if (xread(fd, head, sizeof(struct rc_slice_header)) != sizeof(struct rc_slice_header))
+		return -1;
+
 	head->ofs_objects = ntohl(head->ofs_objects);
 	head->object_nr = ntohl(head->object_nr);
 	head->size = ntohl(head->size);
 	head->path_nr = ntohs(head->path_nr);
+	head->name_size = ntohl(head->name_size);
 
 	if (memcmp(head->signature, "REVCACHE", 8))
 		return -1;
@@ -731,10 +792,10 @@ static int get_cache_slice_header(unsigned char *cache_sha1, unsigned char *map,
 	if (hashcmp(head->sha1, cache_sha1))
 		return -3;
 	t = sizeof(struct rc_slice_header);
-	if (t != head->ofs_objects || t >= len)
+	if (t != head->ofs_objects)
 		return -4;
-
-	head->size = len;
+	if (head->size + head->name_size != len)
+		return -5;
 
 	return 0;
 }
@@ -786,7 +847,7 @@ int traverse_cache_slice(struct rev_info *revs,
 	unsigned long *date_so_far, int *slop_so_far,
 	struct commit_list ***queue, struct commit_list **work)
 {
-	int fd = -1, retval = -3;
+	int fd = -1, t, retval;
 	struct stat fi;
 	struct rc_slice_header head;
 	struct rev_cache_info *rci;
@@ -802,26 +863,31 @@ int traverse_cache_slice(struct rev_info *revs,
 	/* load options */
 	rci = &revs->rev_cache_info;
 	add_to_pending = rci->add_to_pending;
+	add_names = rci->add_names;
 
 	memset(&head, 0, sizeof(struct rc_slice_header));
+#	define ERROR(x)		do { retval = (x); goto end; } while (0);
 
 	fd = open_cache_slice(cache_sha1, O_RDONLY);
 	if (fd == -1)
-		goto end;
+		ERROR(-1);
 	if (fstat(fd, &fi) || fi.st_size < sizeof(struct rc_slice_header))
-		goto end;
+		ERROR(-2);
+
+	if ((t = get_cache_slice_header(fd, cache_sha1, fi.st_size, &head)) < 0)
+		ERROR(-t);
+	if (add_names)
+		cur_name_list = get_cache_slice_name_list(&head, fd);
 
-	map = xmmap(0, fi.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+	map = xmmap(0, head.size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
 	if (map == MAP_FAILED)
-		goto end;
-	if (get_cache_slice_header(cache_sha1, map, fi.st_size, &head))
-		goto end;
+		ERROR(-3);
 
 	retval = traverse_cache_slice_1(&head, map, revs, commit, date_so_far, slop_so_far, queue, work);
 
 end:
 	if (map != MAP_FAILED)
-		munmap(map, fi.st_size);
+		munmap(map, head.size);
 	if (fd != -1)
 		close(fd);
 
@@ -829,6 +895,7 @@ end:
 	if (retval)
 		mark_bad_slice(cache_sha1);
 
+#	undef ERROR
 	return retval;
 }
 
@@ -1113,23 +1180,110 @@ static unsigned long decode_size(unsigned char *str, int len)
 	return size;
 }
 
+
+#define NL_HASH_TABLE_SIZE		(0xffff + 1)
+#define NL_HASH_NUMBER			(NL_HASH_TABLE_SIZE >> 3)
+
+struct name_list_hash {
+	int ind;
+	struct name_list_hash *next;
+};
+
+static struct name_list_hash **nl_hash_table;
+static unsigned char *nl_hashes;
+
+/* FNV-1a hash */
+static unsigned int hash_name(const char *name)
+{
+	unsigned int hash = 2166136261ul;
+	const char *p = name;
+
+	while (*p) {
+		hash ^= *p++;
+		hash *= 16777619ul;
+	}
+
+	return hash & 0xffff;
+}
+
+static int name_in_list(const char *name)
+{
+	unsigned int h = hash_name(name);
+	struct name_list_hash *entry = nl_hash_table[h];
+
+	while (entry && strcmp(acc_name_buffer->buf + entry->ind, name))
+		entry = entry->next;
+
+	if (entry)
+		return entry->ind;
+
+	/* add name to buffer and create hash reference */
+	entry = xcalloc(1, sizeof(struct name_list_hash));
+	entry->ind = acc_name_buffer->len;
+	strbuf_add(acc_name_buffer, name, strlen(name) + 1);
+
+	entry->next = nl_hash_table[h];
+	nl_hash_table[h] = entry;
+
+	nl_hashes[h / 8] |= h % 8;
+
+	return entry->ind;
+}
+
+static void init_name_list_hash(void)
+{
+	nl_hash_table = xcalloc(NL_HASH_TABLE_SIZE, sizeof(struct name_list_hash));
+	nl_hashes = xcalloc(NL_HASH_NUMBER, 1);
+}
+
+static void cleanup_name_list_hash(void)
+{
+	int i;
+
+	for (i = 0; i < NL_HASH_NUMBER; i++) {
+		int j, ind = nl_hashes[i];
+
+		if (!ind)
+			continue;
+
+		for (j = 0; j < 8; j++) {
+			struct name_list_hash **entryp;
+
+			if (!(ind & 1 << j))
+				continue;
+
+			entryp = &nl_hash_table[i * 8 + j];
+			while (*entryp) {
+				struct name_list_hash *t = (*entryp)->next;
+
+				free(*entryp);
+				*entryp = t;
+			}
+		}
+	} /* code overhang! */
+
+	free(nl_hashes);
+	free(nl_hash_table);
+}
+
 static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *entryp,
-	struct strbuf *merge_str, struct strbuf *split_str)
+	struct strbuf *merge_str, struct strbuf *split_str, char *name, unsigned long size)
 {
 	struct rc_object_entry entry;
-	unsigned char size_str[7];
-	unsigned long size;
+	unsigned char size_str[7], name_str[7];
 	enum object_type type;
 	void *data;
 
 	if (entryp)
 		sha1 = entryp->sha1;
 
-	/* retrieve size data */
-	data = read_sha1_file(sha1, &type, &size);
+	if (!size) {
+		/* retrieve size data */
+		data = read_sha1_file(sha1, &type, &size);
 
-	if (data)
-		free(data);
+		if (data)
+			free(data);
+	}
 
 	/* initialize! */
 	if (!entryp) {
@@ -1147,6 +1301,9 @@ static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *
 
 	entryp->size_size = encode_size(size, size_str);
 
+	if (name)
+		entryp->name_size = encode_size(name_in_list(name), name_str);
+
 	/* write the muvabitch */
 	strbuf_add(acc_buffer, to_disked_rc_object_entry(entryp, 0), sizeof(struct rc_object_entry_ondisk));
 
@@ -1156,25 +1313,36 @@ static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *
 		strbuf_add(acc_buffer, split_str->buf, split_str->len);
 
 	strbuf_add(acc_buffer, size_str, entryp->size_size);
+
+	if (name)
+		strbuf_add(acc_buffer, name_str, entryp->name_size);
 }
 
 /* returns non-zero to continue parsing, 0 to skip */
 typedef int (*dump_tree_fn)(const unsigned char *, const char *, unsigned int); /* sha1, path, mode */
 
 /* we need to walk the trees by hash, so unfortunately we can't use traverse_trees in tree-walk.c */
-static int dump_tree(struct tree *tree, dump_tree_fn fn)
+static int dump_tree(struct tree *tree, dump_tree_fn fn, char *base)
 {
 	struct tree_desc desc;
 	struct name_entry entry;
 	struct tree *subtree;
-	int r;
+	char concatpath[PATH_MAX];
+	int r, baselen;
 
 	if (parse_tree(tree))
 		return -1;
 
+	baselen = strlen(base);
+	strcpy(concatpath, base);
+
 	init_tree_desc(&desc, tree->buffer, tree->size);
 	while (tree_entry(&desc, &entry)) {
-		switch (fn(entry.sha1, entry.path, entry.mode)) {
+		if (baselen + strlen(entry.path) + 1 >= PATH_MAX)
+			die("we have a problem: %s%s is too big for me to handle", base, entry.path);
+		strcpy(concatpath + baselen, entry.path);
+
+		switch (fn(entry.sha1, concatpath, entry.mode)) {
 		case 0 :
 			goto continue_loop;
 		default :
@@ -1186,7 +1354,8 @@ static int dump_tree(struct tree *tree, dump_tree_fn fn)
 			if (!subtree)
 				return -2;
 
-			if ((r = dump_tree(subtree, fn)) < 0)
+			strcat(concatpath, "/");
+			if ((r = dump_tree(subtree, fn, concatpath)) < 0)
 				return r;
 		}
 
@@ -1200,6 +1369,9 @@ continue_loop:
 static int dump_tree_callback(const unsigned char *sha1, const char *path, unsigned int mode)
 {
 	strbuf_add(acc_buffer, sha1, 20);
+	strbuf_add(acc_buffer, (char *)&acc_name_buffer->len, sizeof(size_t));
+
+	strbuf_add(acc_name_buffer, path, strlen(path) + 1);
 
 	return 1;
 }
@@ -1213,6 +1385,9 @@ static void tree_addremove(struct diff_options *options,
 		return;
 
 	strbuf_add(acc_buffer, sha1, 20);
+	strbuf_add(acc_buffer, (char *)&acc_name_buffer->len, sizeof(size_t));
+
+	strbuf_add(acc_name_buffer, concatpath, strlen(concatpath) + 1);
 }
 
 static void tree_change(struct diff_options *options,
@@ -1225,12 +1400,15 @@ static void tree_change(struct diff_options *options,
 		return;
 
 	strbuf_add(acc_buffer, new_sha1, 20);
+	strbuf_add(acc_buffer, (char *)&acc_name_buffer->len, sizeof(size_t));
+
+	strbuf_add(acc_name_buffer, concatpath, strlen(concatpath) + 1);
 }
 
 static int add_unique_objects(struct commit *commit)
 {
 	struct commit_list *list;
-	struct strbuf os, ost, *orig_buf;
+	struct strbuf os, ost, names, *orig_name_buf, *orig_buf;
 	struct diff_options opts;
 	int i, j, next;
 	char is_first = 1;
@@ -1238,13 +1416,17 @@ static int add_unique_objects(struct commit *commit)
 	/* ...no, calculate unique objects */
 	strbuf_init(&os, 0);
 	strbuf_init(&ost, 0);
+	strbuf_init(&names, 0);
 	orig_buf = acc_buffer;
+	orig_name_buf = acc_name_buffer;
+	acc_name_buffer = &names;
 
 	diff_setup(&opts);
 	DIFF_OPT_SET(&opts, RECURSIVE);
 	DIFF_OPT_SET(&opts, TREE_IN_RECURSIVE);
 	opts.change = tree_change;
 	opts.add_remove = tree_addremove;
+#	define ENTRY_SIZE (20 + sizeof(size_t))
 
 	/* this is only called for non-ends (ie. all parents interesting) */
 	for (list = commit->parents; list; list = list->next) {
@@ -1255,20 +1437,20 @@ static int add_unique_objects(struct commit *commit)
 
 		strbuf_setlen(acc_buffer, 0);
 		diff_tree_sha1(list->item->tree->object.sha1, commit->tree->object.sha1, "", &opts);
-		qsort(acc_buffer->buf, acc_buffer->len / 20, 20, (int (*)(const void *, const void *))hashcmp);
+		qsort(acc_buffer->buf, acc_buffer->len / ENTRY_SIZE, ENTRY_SIZE, (int (*)(const void *, const void *))hashcmp);
 
 		/* take intersection */
 		if (!is_first) {
-			for (next = i = j = 0; i < os.len; i += 20) {
+			for (next = i = j = 0; i < os.len; i += ENTRY_SIZE) {
 				while (j < ost.len && hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)) < 0)
-					j += 20;
+					j += ENTRY_SIZE;
 
 				if (j >= ost.len || hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)))
 					continue;
 
 				if (next != i)
-					memcpy(os.buf + next, os.buf + i, 20);
-				next += 20;
+					memcpy(os.buf + next, os.buf + i, ENTRY_SIZE);
+				next += ENTRY_SIZE;
 			}
 
 			if (next != i)
@@ -1280,29 +1462,37 @@ static int add_unique_objects(struct commit *commit)
 	/* no parents (!) */
 	if (is_first) {
 		acc_buffer = &os;
-		dump_tree(commit->tree, dump_tree_callback);
+		dump_tree(commit->tree, dump_tree_callback, "");
 	}
 
 	/* the ordering of non-commit objects dosn't really matter, so we're not gonna bother */
 	acc_buffer = orig_buf;
-	for (i = 0; i < os.len; i += 20)
-		add_object_entry((unsigned char *)(os.buf + i), 0, 0, 0);
+	acc_name_buffer = orig_name_buf;
+	for (i = 0; i < os.len; i += ENTRY_SIZE)
+		add_object_entry((unsigned char *)(os.buf + i), 0, 0, 0, names.buf + *(size_t *)(os.buf + i + 20), 0);
 
 	/* last but not least, the main tree */
-	add_object_entry(commit->tree->object.sha1, 0, 0, 0);
+	add_object_entry(commit->tree->object.sha1, 0, 0, 0, 0, 0);
 
-	return i / 20 + 1;
+	strbuf_release(&ost);
+	strbuf_release(&os);
+	strbuf_release(&names);
+
+	return i / ENTRY_SIZE + 1;
+#	undef ENTRY_SIZE
 }
 
 static int add_objects_verbatim_1(struct rev_cache_slice_map *mapping, int *index)
 {
-	unsigned char *map = mapping->map;
 	int i = *index, object_nr = 0;
+	unsigned char *map = mapping->map;
 	struct rc_object_entry *entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
+	unsigned long size;
 
 	i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
 	while (i < mapping->size) {
-		int pos = i;
+		char *name;
+		int name_index, pos = i;
 
 		entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
 		i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
@@ -1312,7 +1502,15 @@ static int add_objects_verbatim_1(struct rev_cache_slice_map *mapping, int *inde
 			return object_nr;
 		}
 
-		strbuf_add(acc_buffer, map + pos, i - pos);
+		name_index = decode_size(map + pos + RC_ENTRY_NAME_OFFSET(entry), entry->name_size);
+		if (name_index && name_index < mapping->name_size)
+			name = mapping->names + name_index;
+		else
+			name = 0;
+
+		size = decode_size(map + pos + RC_ENTRY_SIZE_OFFSET(entry), entry->size_size);
+
+		add_object_entry(0, entry, 0, 0, name, size);
 		object_nr++;
 	}
 
@@ -1397,6 +1595,7 @@ void init_rev_cache_info(struct rev_cache_info *rci)
 	rci->overwrite_all = 0;
 
 	rci->add_to_pending = 1;
+	rci->add_names = 1;
 
 	rci->ignore_size = 0;
 }
@@ -1421,9 +1620,9 @@ int make_cache_slice(struct rev_cache_info *rci,
 	struct rc_slice_header head;
 	struct commit *commit;
 	unsigned char sha1[20];
-	struct strbuf merge_paths, split_paths;
+	struct strbuf merge_paths, split_paths, namelist;
 	int object_nr, total_sz, fd;
-	char file[PATH_MAX], *newfile;
+	char file[PATH_MAX], null, *newfile;
 	struct rev_cache_info *trci;
 	git_SHA_CTX ctx;
 
@@ -1438,7 +1637,13 @@ int make_cache_slice(struct rev_cache_info *rci,
 	strbuf_init(&endlist, 0);
 	strbuf_init(&merge_paths, 0);
 	strbuf_init(&split_paths, 0);
+	strbuf_init(&namelist, 0);
 	acc_buffer = &buffer;
+	acc_name_buffer = &namelist;
+
+	null = 0;
+	strbuf_add(&namelist, &null, 1);
+	init_name_list_hash();
 
 	if (!revs) {
 		revs = &therevs;
@@ -1469,6 +1674,7 @@ int make_cache_slice(struct rev_cache_info *rci,
 	trci = &revs->rev_cache_info;
 	init_rev_cache_info(trci);
 	trci->add_to_pending = 0;
+	trci->add_names = 0;
 
 	setup_revisions(0, 0, revs, 0);
 	if (prepare_revision_walk(revs))
@@ -1506,7 +1712,7 @@ int make_cache_slice(struct rev_cache_info *rci,
 
 		commit->indegree = 0;
 
-		add_object_entry(0, &object, &merge_paths, &split_paths);
+		add_object_entry(0, &object, &merge_paths, &split_paths, 0, 0);
 		object_nr++;
 
 		if (rci->objects && !object.is_end) {
@@ -1532,10 +1738,16 @@ int make_cache_slice(struct rev_cache_info *rci,
 		total_sz += buffer.len;
 	}
 
+	/* write path name lookup list */
+	head.name_size = htonl(namelist.len);
+	write_in_full(fd, namelist.buf, namelist.len);
+
 	/* go ahead a free some stuff... */
 	strbuf_release(&buffer);
 	strbuf_release(&merge_paths);
 	strbuf_release(&split_paths);
+	strbuf_release(&namelist);
+	cleanup_name_list_hash();
 	if (path_sz)
 		free(paths);
 	while (path_track_alloc)
@@ -1993,6 +2205,7 @@ int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
 	for (i = idx_head.cache_nr - 1; i >= 0; i--) {
 		struct rev_cache_slice_map *map = rci->maps + i;
 		struct stat fi;
+		struct rc_slice_header head;
 		int fd;
 
 		if (!map->size)
@@ -2005,13 +2218,20 @@ int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
 			continue;
 		if (fi.st_size < sizeof(struct rc_slice_header))
 			continue;
+		if (get_cache_slice_header(fd, idx_caches + i * 20, fi.st_size, &head))
+			continue;
 
-		map->map = xmmap(0, fi.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+		map->map = xmmap(0, head.size, PROT_READ, MAP_PRIVATE, fd, 0);
 		if (map->map == MAP_FAILED)
 			continue;
 
+		lseek(fd, head.size, SEEK_SET);
+		map->names = xcalloc(head.name_size, 1);
+		read_in_full(fd, map->names, head.name_size);
+
 		close(fd);
-		map->size = fi.st_size;
+		map->size = head.size;
+		map->name_size = head.name_size;
 	}
 
 	rci->make_index = 0;
@@ -2028,6 +2248,7 @@ int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
 		if (!map->size)
 			continue;
 
+		free(map->names);
 		munmap(map->map, map->size);
 	}
 	free(rci->maps);
@@ -2049,7 +2270,6 @@ static int verify_cache_slice(const char *slice_path, unsigned char *sha1)
 {
 	struct rc_slice_header head;
 	int fd, len, retval = -1;
-	unsigned char *map = MAP_FAILED;
 	struct stat fi;
 
 	len = strlen(slice_path);
@@ -2064,17 +2284,12 @@ static int verify_cache_slice(const char *slice_path, unsigned char *sha1)
 	if (fstat(fd, &fi) || fi.st_size < sizeof(head))
 		goto end;
 
-	map = xmmap(0, sizeof(head), PROT_READ, MAP_PRIVATE, fd, 0);
-	if (map == MAP_FAILED)
-		goto end;
-	if (get_cache_slice_header(sha1, map, fi.st_size, &head))
+	if (get_cache_slice_header(fd, sha1, fi.st_size, &head))
 		goto end;
 
 	retval = 0;
 
 end:
-	if (map != MAP_FAILED)
-		munmap(map, sizeof(head));
 	if (fd > 0)
 		close(fd);
 
diff --git a/rev-cache.h b/rev-cache.h
index 14437d8..c88ceae 100644
--- a/rev-cache.h
+++ b/rev-cache.h
@@ -10,8 +10,14 @@
 #define RC_OBTAIN_OBJECT_ENTRY(p)			from_disked_rc_object_entry((struct rc_object_entry_ondisk *)(p), 0)
 #define RC_OBTAIN_INDEX_ENTRY(p)			from_disked_rc_index_entry((struct rc_index_entry_ondisk *)(p), 0)
 
-#define RC_ACTUAL_OBJECT_ENTRY_SIZE(e)		(sizeof(struct rc_object_entry_ondisk) + RC_PATH_SIZE((e)->merge_nr + (e)->split_nr) + (e)->size_size)
-#define RC_ENTRY_SIZE_OFFSET(e)				(RC_ACTUAL_OBJECT_ENTRY_SIZE(e) - (e)->size_size)
+#define RC_ACTUAL_OBJECT_ENTRY_SIZE(e)	(\
+	sizeof(struct rc_object_entry_ondisk) + \
+	RC_PATH_SIZE((e)->merge_nr + (e)->split_nr) + \
+	(e)->size_size + \
+	(e)->name_size\
+)
+#define RC_ENTRY_SIZE_OFFSET(e)			(RC_ACTUAL_OBJECT_ENTRY_SIZE(e) - (e)->name_size - (e)->size_size)
+#define RC_ENTRY_NAME_OFFSET(e)			(RC_ACTUAL_OBJECT_ENTRY_SIZE(e) - (e)->name_size)
 
 /* single index maps objects to cache files */
 struct rc_index_header {
@@ -50,6 +56,8 @@ struct rc_slice_header {
 	uint32_t size;
 
 	unsigned char sha1[20];
+
+	uint32_t name_size;
 };
 
 struct rc_object_entry_ondisk {
@@ -76,7 +84,8 @@ struct rc_object_entry {
 	unsigned char merge_nr; /* : 7 */
 	unsigned char split_nr; /* : 7 */
 	unsigned size_size : 3;
-	unsigned padding : 5;
+	unsigned name_size : 3;
+	unsigned padding : 2;
 
 	uint32_t date;
 	uint16_t path;
@@ -84,6 +93,7 @@ struct rc_object_entry {
 	/* merge paths */
 	/* split paths */
 	/* size */
+	/* name id */
 };
 
 struct rc_index_entry *from_disked_rc_index_entry(struct rc_index_entry_ondisk *src, struct rc_index_entry *dst);
diff --git a/revision.h b/revision.h
index cc5c259..c62e85b 100644
--- a/revision.h
+++ b/revision.h
@@ -26,6 +26,9 @@ struct rev_cache_slice_map {
 	unsigned char *map;
 	int size;
 	int last_index;
+
+	char *names;
+	int name_size;
 };
 
 struct rev_cache_info {
@@ -39,7 +42,8 @@ struct rev_cache_info {
 	unsigned overwrite_all : 1;
 
 	/* traversal flags */
-	unsigned add_to_pending : 1;
+	unsigned add_to_pending : 1,
+		add_names : 1;
 
 	/* fuse options */
 	unsigned int ignore_size;
diff --git a/t/t6017-rev-cache-list.sh b/t/t6017-rev-cache-list.sh
index f2e34b1..3286560 100755
--- a/t/t6017-rev-cache-list.sh
+++ b/t/t6017-rev-cache-list.sh
@@ -4,8 +4,10 @@ test_description='git rev-cache tests'
 . ./test-lib.sh
 
 test_cmp_sorted() {
-	grep -io "[a-f0-9]*" $1 | sort >.tmpfile1 &&
-	grep -io "[a-f0-9]*" $2 | sort >.tmpfile2 &&
+# note that we're tip-toeing around the corner case of two objects/names
+# for the same SHA-1 => discrepencies between cached and non-cached walks
+	sort $1 >.tmpfile1 &&
+	sort $2 >.tmpfile2 &&
 	test_cmp .tmpfile1 .tmpfile2
 }
 
@@ -15,6 +17,8 @@ test_cmp_sorted() {
 # reuse
 test_expect_success 'init repo' '
 	echo bla >file &&
+	mkdir amaindir &&
+	echo watskeburt >amaindir/file &&
 	git add . &&
 	git commit -m "bla" &&
 
-- 
tg: (716470e..) t/revcache/names (depends on: t/revcache/docs)

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 6/6 (v4)] support for path name caching in rev-cache
  2009-08-18 11:51 ` Nick Edelen
  2009-08-21  4:48   ` Nick Edelen
@ 2009-10-02 22:12   ` Nick Edelen
  1 sibling, 0 replies; 13+ messages in thread
From: Nick Edelen @ 2009-10-02 22:12 UTC (permalink / raw)
  To: Nick Edelen, Junio C Hamano, Nicolas Pitre, Johannes Schindelin,
	Sam Vilain

An update to caching mechanism, allowing path names to be cached for blob and
tree objects.  A list of names appearing in each cache slice is appended to the
end of the slice, which is referenced by variable-sized indexes per entry.
This allows pack-objects to more intelligently schedule unpacked/poorly packed
object, and enables proper duplication of rev-list's behaivor.

Signed-off-by: Nick Edelen <sirnot@gmail.com>

---
  builtin-rev-cache.c       |    3 +-
  rev-cache.c               |  332 +++++++++++++++++++++++++++++++++++++--------
  rev-cache.h               |   16 ++-
  revision.h                |    6 +-
  t/t6017-rev-cache-list.sh |    8 +-
  5 files changed, 300 insertions(+), 65 deletions(-)

diff --git a/builtin-rev-cache.c b/builtin-rev-cache.c
index 8f41123..4c1766d 100644
--- a/builtin-rev-cache.c
+++ b/builtin-rev-cache.c
@@ -177,13 +177,14 @@ static int handle_walk(int argc, const char *argv[])
  	fprintf(stderr, "pending:\n");
  	for (i = 0; i < revs.pending.nr; i++) {
  		struct object *obj = revs.pending.objects[i].item;
+		const char *name = revs.pending.objects[i].name;

  		/* unfortunately, despite our careful generation, object duplication *is* a possibility...
  		 * (eg. same object introduced into two different branches) */
  		if (obj->flags & SEEN)
  			continue;

-		printf("%s\n", sha1_to_hex(revs.pending.objects[i].item->sha1));
+		printf("%s %s\n", sha1_to_hex(revs.pending.objects[i].item->sha1), name);
  		obj->flags |= SEEN;
  	}

diff --git a/rev-cache.c b/rev-cache.c
index 4ef5287..6c96297 100644
--- a/rev-cache.c
+++ b/rev-cache.c
@@ -17,6 +17,14 @@ struct bad_slice {
  	struct bad_slice *next;
  };

+struct name_list {
+	unsigned char sha1[20];
+	unsigned int len;
+	struct name_list *next;
+
+	char buf[FLEX_ARRAY];
+};
+
  struct cache_slice_pointer {
  	char signature[8]; /* REVCOPTR */
  	char version;
@@ -29,10 +37,13 @@ static uint32_t fanout[0xff + 2];
  static unsigned char *idx_map;
  static int idx_size;
  static struct rc_index_header idx_head;
-static char no_idx, add_to_pending;
-static struct bad_slice *bad_slices;
+static char no_idx, add_to_pending, add_names;
  static unsigned char *idx_caches;

+static struct bad_slice *bad_slices;
+static struct name_list *name_lists, *cur_name_list;
+
+static struct strbuf *acc_name_buffer;
  static struct strbuf *acc_buffer;

  #define SLOP			5
@@ -79,7 +90,7 @@ struct rc_object_entry *from_disked_rc_object_entry(struct rc_object_entry_ondis
  	if (!dst)
  		dst = &entry[cur++ & 0x3];

-	dst->type = src->flags >> 5;
+	dst->type = src->flags >> 5 & 0x03;
  	dst->is_end = !!(src->flags & 0x10);
  	dst->is_start = !!(src->flags & 0x08);
  	dst->uninteresting = !!(src->flags & 0x04);
@@ -90,8 +101,9 @@ struct rc_object_entry *from_disked_rc_object_entry(struct rc_object_entry_ondis
  	dst->merge_nr = src->merge_nr;
  	dst->split_nr = src->split_nr;

-	dst->size_size = src->sizes >> 5;
-	dst->padding = src->sizes & 0x1f;
+	dst->size_size = src->sizes >> 5 & 0x03;
+	dst->name_size = src->sizes >> 2 & 0x03;
+	dst->padding = src->sizes & 0x02;

  	dst->date = ntohl(src->date);
  	dst->path = ntohs(src->path);
@@ -120,6 +132,7 @@ struct rc_object_entry_ondisk *to_disked_rc_object_entry(struct rc_object_entry
  	dst->split_nr = src->split_nr;

  	dst->sizes  = (unsigned char)src->size_size << 5;
+	dst->sizes |= (unsigned char)src->name_size << 2;
  	dst->sizes |= (unsigned char)src->padding;

  	dst->date = htonl(src->date);
@@ -192,6 +205,12 @@ static void cleanup_cache_slices(void)
  		idx_map = 0;
  	}

+	while (name_lists) {
+		struct name_list *nl = name_lists->next;
+		free(name_lists);
+		name_lists = nl;
+	}
+
  }

  static int init_index(void)
@@ -324,7 +343,7 @@ static void handle_noncommit(struct rev_info *revs, struct commit *commit, unsig
  	struct blob *blob;
  	struct tree *tree;
  	struct object *obj;
-	unsigned long size;
+	unsigned long size, name_index;

  	size = decode_size(ptr + RC_ENTRY_SIZE_OFFSET(entry), entry->size_size);
  	switch (entry->type) {
@@ -357,9 +376,23 @@ static void handle_noncommit(struct rev_info *revs, struct commit *commit, unsig
  		return;
  	}

+	if (add_names && cur_name_list) {
+		name_index = decode_size(ptr + RC_ENTRY_NAME_OFFSET(entry), entry->name_size);
+
+		if (name_index >= cur_name_list->len)
+			name_index = 0;
+	} else
+		name_index = 0;
+
  	obj->flags |= FACE_VALUE;
-	if (add_to_pending)
-		add_pending_object(revs, obj, "");
+	if (add_to_pending) {
+		char *name = "";
+
+		if (name_index)
+			name = cur_name_list->buf + name_index;
+
+		add_pending_object(revs, obj, name);
+	}
  }

  static int setup_traversal(struct rc_slice_header *head, unsigned char *map, struct commit *commit, struct commit_list **work,
@@ -713,15 +746,44 @@ end:
  	return retval;
  }

-static int get_cache_slice_header(unsigned char *cache_sha1, unsigned char *map, int len, struct rc_slice_header *head)
+static struct name_list *get_cache_slice_name_list(struct rc_slice_header *head, int fd)
+{
+	struct name_list *nl = name_lists;
+
+	while (nl) {
+		if (!hashcmp(nl->sha1, head->sha1))
+			break;
+		nl = nl->next;
+	}
+
+	if (nl)
+		return nl;
+
+	nl = xcalloc(1, sizeof(struct name_list) + head->name_size);
+	nl->len = head->name_size;
+	hashcpy(nl->sha1, head->sha1);
+
+	lseek(fd, head->size, SEEK_SET);
+	read_in_full(fd, nl->buf, head->name_size);
+
+	nl->next = name_lists;
+	name_lists = nl;
+
+	return nl;
+}
+
+static int get_cache_slice_header(int fd, unsigned char *cache_sha1, int len, struct rc_slice_header *head)
  {
  	int t;

-	memcpy(head, map, sizeof(struct rc_slice_header));
+	if (xread(fd, head, sizeof(struct rc_slice_header)) != sizeof(struct rc_slice_header))
+		return -1;
+
  	head->ofs_objects = ntohl(head->ofs_objects);
  	head->object_nr = ntohl(head->object_nr);
  	head->size = ntohl(head->size);
  	head->path_nr = ntohs(head->path_nr);
+	head->name_size = ntohl(head->name_size);

  	if (memcmp(head->signature, "REVCACHE", 8))
  		return -1;
@@ -730,10 +792,10 @@ static int get_cache_slice_header(unsigned char *cache_sha1, unsigned char *map,
  	if (hashcmp(head->sha1, cache_sha1))
  		return -3;
  	t = sizeof(struct rc_slice_header);
-	if (t != head->ofs_objects || t >= len)
+	if (t != head->ofs_objects)
  		return -4;
-
-	head->size = len;
+	if (head->size + head->name_size != len)
+		return -5;

  	return 0;
  }
@@ -785,7 +847,7 @@ int traverse_cache_slice(struct rev_info *revs,
  	unsigned long *date_so_far, int *slop_so_far,
  	struct commit_list ***queue, struct commit_list **work)
  {
-	int fd = -1, retval = -3;
+	int fd = -1, t, retval;
  	struct stat fi;
  	struct rc_slice_header head;
  	struct rev_cache_info *rci;
@@ -801,26 +863,31 @@ int traverse_cache_slice(struct rev_info *revs,
  	/* load options */
  	rci = &revs->rev_cache_info;
  	add_to_pending = rci->add_to_pending;
+	add_names = rci->add_names;

  	memset(&head, 0, sizeof(struct rc_slice_header));
+#	define ERROR(x)		do { retval = (x); goto end; } while (0);

  	fd = open_cache_slice(cache_sha1, O_RDONLY);
  	if (fd == -1)
-		goto end;
+		ERROR(-1);
  	if (fstat(fd, &fi) || fi.st_size < sizeof(struct rc_slice_header))
-		goto end;
+		ERROR(-2);
+
+	if ((t = get_cache_slice_header(fd, cache_sha1, fi.st_size, &head)) < 0)
+		ERROR(-t);
+	if (add_names)
+		cur_name_list = get_cache_slice_name_list(&head, fd);

-	map = xmmap(0, fi.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+	map = xmmap(0, head.size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
  	if (map == MAP_FAILED)
-		goto end;
-	if (get_cache_slice_header(cache_sha1, map, fi.st_size, &head))
-		goto end;
+		ERROR(-3);

  	retval = traverse_cache_slice_1(&head, map, revs, commit, date_so_far, slop_so_far, queue, work);

  end:
  	if (map != MAP_FAILED)
-		munmap(map, fi.st_size);
+		munmap(map, head.size);
  	if (fd != -1)
  		close(fd);

@@ -828,6 +895,7 @@ end:
  	if (retval)
  		mark_bad_slice(cache_sha1);

+#	undef ERROR
  	return retval;
  }

@@ -1112,23 +1180,110 @@ static unsigned long decode_size(unsigned char *str, int len)
  	return size;
  }

+
+#define NL_HASH_TABLE_SIZE		(0xffff + 1)
+#define NL_HASH_NUMBER			(NL_HASH_TABLE_SIZE >> 3)
+
+struct name_list_hash {
+	int ind;
+	struct name_list_hash *next;
+};
+
+static struct name_list_hash **nl_hash_table;
+static unsigned char *nl_hashes;
+
+/* FNV-1a hash */
+static unsigned int hash_name(const char *name)
+{
+	unsigned int hash = 2166136261ul;
+	const char *p = name;
+
+	while (*p) {
+		hash ^= *p++;
+		hash *= 16777619ul;
+	}
+
+	return hash & 0xffff;
+}
+
+static int name_in_list(const char *name)
+{
+	unsigned int h = hash_name(name);
+	struct name_list_hash *entry = nl_hash_table[h];
+
+	while (entry && strcmp(acc_name_buffer->buf + entry->ind, name))
+		entry = entry->next;
+
+	if (entry)
+		return entry->ind;
+
+	/* add name to buffer and create hash reference */
+	entry = xcalloc(1, sizeof(struct name_list_hash));
+	entry->ind = acc_name_buffer->len;
+	strbuf_add(acc_name_buffer, name, strlen(name) + 1);
+
+	entry->next = nl_hash_table[h];
+	nl_hash_table[h] = entry;
+
+	nl_hashes[h / 8] |= h % 8;
+
+	return entry->ind;
+}
+
+static void init_name_list_hash(void)
+{
+	nl_hash_table = xcalloc(NL_HASH_TABLE_SIZE, sizeof(struct name_list_hash));
+	nl_hashes = xcalloc(NL_HASH_NUMBER, 1);
+}
+
+static void cleanup_name_list_hash(void)
+{
+	int i;
+
+	for (i = 0; i < NL_HASH_NUMBER; i++) {
+		int j, ind = nl_hashes[i];
+
+		if (!ind)
+			continue;
+
+		for (j = 0; j < 8; j++) {
+			struct name_list_hash **entryp;
+
+			if (!(ind & 1 << j))
+				continue;
+
+			entryp = &nl_hash_table[i * 8 + j];
+			while (*entryp) {
+				struct name_list_hash *t = (*entryp)->next;
+
+				free(*entryp);
+				*entryp = t;
+			}
+		}
+	} /* code overhang! */
+
+	free(nl_hashes);
+	free(nl_hash_table);
+}
+
  static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *entryp,
-	struct strbuf *merge_str, struct strbuf *split_str)
+	struct strbuf *merge_str, struct strbuf *split_str, char *name, unsigned long size)
  {
  	struct rc_object_entry entry;
-	unsigned char size_str[7];
-	unsigned long size;
+	unsigned char size_str[7], name_str[7];
  	enum object_type type;
  	void *data;

  	if (entryp)
  		sha1 = entryp->sha1;

-	/* retrieve size data */
-	data = read_sha1_file(sha1, &type, &size);
+	if (!size) {
+		/* retrieve size data */
+		data = read_sha1_file(sha1, &type, &size);

-	if (data)
-		free(data);
+		if (data)
+			free(data);
+	}

  	/* initialize! */
  	if (!entryp) {
@@ -1146,6 +1301,9 @@ static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *

  	entryp->size_size = encode_size(size, size_str);

+	if (name)
+		entryp->name_size = encode_size(name_in_list(name), name_str);
+
  	/* write the muvabitch */
  	strbuf_add(acc_buffer, to_disked_rc_object_entry(entryp, 0), sizeof(struct rc_object_entry_ondisk));

@@ -1155,25 +1313,36 @@ static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *
  		strbuf_add(acc_buffer, split_str->buf, split_str->len);

  	strbuf_add(acc_buffer, size_str, entryp->size_size);
+
+	if (name)
+		strbuf_add(acc_buffer, name_str, entryp->name_size);
  }

  /* returns non-zero to continue parsing, 0 to skip */
  typedef int (*dump_tree_fn)(const unsigned char *, const char *, unsigned int); /* sha1, path, mode */

  /* we need to walk the trees by hash, so unfortunately we can't use traverse_trees in tree-walk.c */
-static int dump_tree(struct tree *tree, dump_tree_fn fn)
+static int dump_tree(struct tree *tree, dump_tree_fn fn, char *base)
  {
  	struct tree_desc desc;
  	struct name_entry entry;
  	struct tree *subtree;
-	int r;
+	char concatpath[PATH_MAX];
+	int r, baselen;

  	if (parse_tree(tree))
  		return -1;

+	baselen = strlen(base);
+	strcpy(concatpath, base);
+
  	init_tree_desc(&desc, tree->buffer, tree->size);
  	while (tree_entry(&desc, &entry)) {
-		switch (fn(entry.sha1, entry.path, entry.mode)) {
+		if (baselen + strlen(entry.path) + 1 >= PATH_MAX)
+			die("we have a problem: %s%s is too big for me to handle", base, entry.path);
+		strcpy(concatpath + baselen, entry.path);
+
+		switch (fn(entry.sha1, concatpath, entry.mode)) {
  		case 0:
  			goto continue_loop;
  		default:
@@ -1185,7 +1354,8 @@ static int dump_tree(struct tree *tree, dump_tree_fn fn)
  			if (!subtree)
  				return -2;

-			if ((r = dump_tree(subtree, fn)) < 0)
+			strcat(concatpath, "/");
+			if ((r = dump_tree(subtree, fn, concatpath)) < 0)
  				return r;
  		}

@@ -1199,6 +1369,9 @@ continue_loop:
  static int dump_tree_callback(const unsigned char *sha1, const char *path, unsigned int mode)
  {
  	strbuf_add(acc_buffer, sha1, 20);
+	strbuf_add(acc_buffer, (char *)&acc_name_buffer->len, sizeof(size_t));
+
+	strbuf_add(acc_name_buffer, path, strlen(path) + 1);

  	return 1;
  }
@@ -1212,6 +1385,9 @@ static void tree_addremove(struct diff_options *options,
  		return;

  	strbuf_add(acc_buffer, sha1, 20);
+	strbuf_add(acc_buffer, (char *)&acc_name_buffer->len, sizeof(size_t));
+
+	strbuf_add(acc_name_buffer, concatpath, strlen(concatpath) + 1);
  }

  static void tree_change(struct diff_options *options,
@@ -1224,12 +1400,15 @@ static void tree_change(struct diff_options *options,
  		return;

  	strbuf_add(acc_buffer, new_sha1, 20);
+	strbuf_add(acc_buffer, (char *)&acc_name_buffer->len, sizeof(size_t));
+
+	strbuf_add(acc_name_buffer, concatpath, strlen(concatpath) + 1);
  }

  static int add_unique_objects(struct commit *commit)
  {
  	struct commit_list *list;
-	struct strbuf os, ost, *orig_buf;
+	struct strbuf os, ost, names, *orig_name_buf, *orig_buf;
  	struct diff_options opts;
  	int i, j, next;
  	char is_first = 1;
@@ -1237,13 +1416,17 @@ static int add_unique_objects(struct commit *commit)
  	/* ...no, calculate unique objects */
  	strbuf_init(&os, 0);
  	strbuf_init(&ost, 0);
+	strbuf_init(&names, 0);
  	orig_buf = acc_buffer;
+	orig_name_buf = acc_name_buffer;
+	acc_name_buffer = &names;

  	diff_setup(&opts);
  	DIFF_OPT_SET(&opts, RECURSIVE);
  	DIFF_OPT_SET(&opts, TREE_IN_RECURSIVE);
  	opts.change = tree_change;
  	opts.add_remove = tree_addremove;
+#	define ENTRY_SIZE (20 + sizeof(size_t))

  	/* this is only called for non-ends (ie. all parents interesting) */
  	for (list = commit->parents; list; list = list->next) {
@@ -1254,20 +1437,20 @@ static int add_unique_objects(struct commit *commit)

  		strbuf_setlen(acc_buffer, 0);
  		diff_tree_sha1(list->item->tree->object.sha1, commit->tree->object.sha1, "", &opts);
-		qsort(acc_buffer->buf, acc_buffer->len / 20, 20, (int (*)(const void *, const void *))hashcmp);
+		qsort(acc_buffer->buf, acc_buffer->len / ENTRY_SIZE, ENTRY_SIZE, (int (*)(const void *, const void *))hashcmp);

  		/* take intersection */
  		if (!is_first) {
-			for (next = i = j = 0; i < os.len; i += 20) {
+			for (next = i = j = 0; i < os.len; i += ENTRY_SIZE) {
  				while (j < ost.len && hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)) < 0)
-					j += 20;
+					j += ENTRY_SIZE;

  				if (j >= ost.len || hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)))
  					continue;

  				if (next != i)
-					memcpy(os.buf + next, os.buf + i, 20);
-				next += 20;
+					memcpy(os.buf + next, os.buf + i, ENTRY_SIZE);
+				next += ENTRY_SIZE;
  			}

  			if (next != i)
@@ -1279,29 +1462,37 @@ static int add_unique_objects(struct commit *commit)
  	/* no parents (!) */
  	if (is_first) {
  		acc_buffer = &os;
-		dump_tree(commit->tree, dump_tree_callback);
+		dump_tree(commit->tree, dump_tree_callback, "");
  	}

  	/* the ordering of non-commit objects dosn't really matter, so we're not gonna bother */
  	acc_buffer = orig_buf;
-	for (i = 0; i < os.len; i += 20)
-		add_object_entry((unsigned char *)(os.buf + i), 0, 0, 0);
+	acc_name_buffer = orig_name_buf;
+	for (i = 0; i < os.len; i += ENTRY_SIZE)
+		add_object_entry((unsigned char *)(os.buf + i), 0, 0, 0, names.buf + *(size_t *)(os.buf + i + 20), 0);

  	/* last but not least, the main tree */
-	add_object_entry(commit->tree->object.sha1, 0, 0, 0);
+	add_object_entry(commit->tree->object.sha1, 0, 0, 0, 0, 0);

-	return i / 20 + 1;
+	strbuf_release(&ost);
+	strbuf_release(&os);
+	strbuf_release(&names);
+
+	return i / ENTRY_SIZE + 1;
+#	undef ENTRY_SIZE
  }

  static int add_objects_verbatim_1(struct rev_cache_slice_map *mapping, int *index)
  {
-	unsigned char *map = mapping->map;
  	int i = *index, object_nr = 0;
+	unsigned char *map = mapping->map;
  	struct rc_object_entry *entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
+	unsigned long size;

  	i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
  	while (i < mapping->size) {
-		int pos = i;
+		char *name;
+		int name_index, pos = i;

  		entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
  		i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
@@ -1311,7 +1502,15 @@ static int add_objects_verbatim_1(struct rev_cache_slice_map *mapping, int *inde
  			return object_nr;
  		}

-		strbuf_add(acc_buffer, map + pos, i - pos);
+		name_index = decode_size(map + pos + RC_ENTRY_NAME_OFFSET(entry), entry->name_size);
+		if (name_index && name_index < mapping->name_size)
+			name = mapping->names + name_index;
+		else
+			name = 0;
+
+		size = decode_size(map + pos + RC_ENTRY_SIZE_OFFSET(entry), entry->size_size);
+
+		add_object_entry(0, entry, 0, 0, name, size);
  		object_nr++;
  	}

@@ -1396,6 +1595,7 @@ void init_rev_cache_info(struct rev_cache_info *rci)
  	rci->overwrite_all = 0;

  	rci->add_to_pending = 1;
+	rci->add_names = 1;

  	rci->ignore_size = 0;
  }
@@ -1420,9 +1620,9 @@ int make_cache_slice(struct rev_cache_info *rci,
  	struct rc_slice_header head;
  	struct commit *commit;
  	unsigned char sha1[20];
-	struct strbuf merge_paths, split_paths;
+	struct strbuf merge_paths, split_paths, namelist;
  	int object_nr, total_sz, fd;
-	char file[PATH_MAX], *newfile;
+	char file[PATH_MAX], null, *newfile;
  	struct rev_cache_info *trci;
  	git_SHA_CTX ctx;

@@ -1437,7 +1637,13 @@ int make_cache_slice(struct rev_cache_info *rci,
  	strbuf_init(&endlist, 0);
  	strbuf_init(&merge_paths, 0);
  	strbuf_init(&split_paths, 0);
+	strbuf_init(&namelist, 0);
  	acc_buffer = &buffer;
+	acc_name_buffer = &namelist;
+
+	null = 0;
+	strbuf_add(&namelist, &null, 1);
+	init_name_list_hash();

  	if (!revs) {
  		revs = &therevs;
@@ -1468,6 +1674,7 @@ int make_cache_slice(struct rev_cache_info *rci,
  	trci = &revs->rev_cache_info;
  	init_rev_cache_info(trci);
  	trci->add_to_pending = 0;
+	trci->add_names = 0;

  	setup_revisions(0, 0, revs, 0);
  	if (prepare_revision_walk(revs))
@@ -1505,7 +1712,7 @@ int make_cache_slice(struct rev_cache_info *rci,

  		commit->indegree = 0;

-		add_object_entry(0, &object, &merge_paths, &split_paths);
+		add_object_entry(0, &object, &merge_paths, &split_paths, 0, 0);
  		object_nr++;

  		if (rci->objects && !object.is_end) {
@@ -1531,10 +1738,16 @@ int make_cache_slice(struct rev_cache_info *rci,
  		total_sz += buffer.len;
  	}

+	/* write path name lookup list */
+	head.name_size = htonl(namelist.len);
+	write_in_full(fd, namelist.buf, namelist.len);
+
  	/* go ahead a free some stuff... */
  	strbuf_release(&buffer);
  	strbuf_release(&merge_paths);
  	strbuf_release(&split_paths);
+	strbuf_release(&namelist);
+	cleanup_name_list_hash();
  	if (path_sz)
  		free(paths);
  	while (path_track_alloc)
@@ -1992,6 +2205,7 @@ int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
  	for (i = idx_head.cache_nr - 1; i >= 0; i--) {
  		struct rev_cache_slice_map *map = rci->maps + i;
  		struct stat fi;
+		struct rc_slice_header head;
  		int fd;

  		if (!map->size)
@@ -2004,13 +2218,20 @@ int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
  			continue;
  		if (fi.st_size < sizeof(struct rc_slice_header))
  			continue;
+		if (get_cache_slice_header(fd, idx_caches + i * 20, fi.st_size, &head))
+			continue;

-		map->map = xmmap(0, fi.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+		map->map = xmmap(0, head.size, PROT_READ, MAP_PRIVATE, fd, 0);
  		if (map->map == MAP_FAILED)
  			continue;

+		lseek(fd, head.size, SEEK_SET);
+		map->names = xcalloc(head.name_size, 1);
+		read_in_full(fd, map->names, head.name_size);
+
  		close(fd);
-		map->size = fi.st_size;
+		map->size = head.size;
+		map->name_size = head.name_size;
  	}

  	rci->make_index = 0;
@@ -2027,6 +2248,7 @@ int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
  		if (!map->size)
  			continue;

+		free(map->names);
  		munmap(map->map, map->size);
  	}
  	free(rci->maps);
@@ -2048,7 +2270,6 @@ static int verify_cache_slice(const char *slice_path, unsigned char *sha1)
  {
  	struct rc_slice_header head;
  	int fd, len, retval = -1;
-	unsigned char *map = MAP_FAILED;
  	struct stat fi;

  	len = strlen(slice_path);
@@ -2063,17 +2284,12 @@ static int verify_cache_slice(const char *slice_path, unsigned char *sha1)
  	if (fstat(fd, &fi) || fi.st_size < sizeof(head))
  		goto end;

-	map = xmmap(0, sizeof(head), PROT_READ, MAP_PRIVATE, fd, 0);
-	if (map == MAP_FAILED)
-		goto end;
-	if (get_cache_slice_header(sha1, map, fi.st_size, &head))
+	if (get_cache_slice_header(fd, sha1, fi.st_size, &head))
  		goto end;

  	retval = 0;

  end:
-	if (map != MAP_FAILED)
-		munmap(map, sizeof(head));
  	if (fd > 0)
  		close(fd);

diff --git a/rev-cache.h b/rev-cache.h
index a1af337..0fa9b44 100644
--- a/rev-cache.h
+++ b/rev-cache.h
@@ -10,8 +10,14 @@
  #define RC_OBTAIN_OBJECT_ENTRY(p)			from_disked_rc_object_entry((struct rc_object_entry_ondisk *)(p), 0)
  #define RC_OBTAIN_INDEX_ENTRY(p)			from_disked_rc_index_entry((struct rc_index_entry_ondisk *)(p), 0)

-#define RC_ACTUAL_OBJECT_ENTRY_SIZE(e)		(sizeof(struct rc_object_entry_ondisk) + RC_PATH_SIZE((e)->merge_nr + (e)->split_nr) + (e)->size_size)
-#define RC_ENTRY_SIZE_OFFSET(e)				(RC_ACTUAL_OBJECT_ENTRY_SIZE(e) - (e)->size_size)
+#define RC_ACTUAL_OBJECT_ENTRY_SIZE(e)	(\
+	sizeof(struct rc_object_entry_ondisk) + \
+	RC_PATH_SIZE((e)->merge_nr + (e)->split_nr) + \
+	(e)->size_size + \
+	(e)->name_size\
+)
+#define RC_ENTRY_SIZE_OFFSET(e)			(RC_ACTUAL_OBJECT_ENTRY_SIZE(e) - (e)->name_size - (e)->size_size)
+#define RC_ENTRY_NAME_OFFSET(e)			(RC_ACTUAL_OBJECT_ENTRY_SIZE(e) - (e)->name_size)

  /* single index maps objects to cache files */
  struct rc_index_header {
@@ -50,6 +56,8 @@ struct rc_slice_header {
  	uint32_t size;

  	unsigned char sha1[20];
+
+	uint32_t name_size;
  };

  struct rc_object_entry_ondisk {
@@ -76,7 +84,8 @@ struct rc_object_entry {
  	unsigned char merge_nr; /* : 7 */
  	unsigned char split_nr; /* : 7 */
  	unsigned size_size:3;
-	unsigned padding:5;
+	unsigned name_size:3;
+	unsigned padding:2;

  	uint32_t date;
  	uint16_t path;
@@ -84,6 +93,7 @@ struct rc_object_entry {
  	/* merge paths */
  	/* split paths */
  	/* size */
+	/* name id */
  };

  struct rc_index_entry *from_disked_rc_index_entry(struct rc_index_entry_ondisk *src, struct rc_index_entry *dst);
diff --git a/revision.h b/revision.h
index cc5c259..c62e85b 100644
--- a/revision.h
+++ b/revision.h
@@ -26,6 +26,9 @@ struct rev_cache_slice_map {
  	unsigned char *map;
  	int size;
  	int last_index;
+
+	char *names;
+	int name_size;
  };

  struct rev_cache_info {
@@ -39,7 +42,8 @@ struct rev_cache_info {
  	unsigned overwrite_all : 1;

  	/* traversal flags */
-	unsigned add_to_pending : 1;
+	unsigned add_to_pending : 1,
+		add_names : 1;

  	/* fuse options */
  	unsigned int ignore_size;
diff --git a/t/t6017-rev-cache-list.sh b/t/t6017-rev-cache-list.sh
index 982fb15..f0f3bcf 100755
--- a/t/t6017-rev-cache-list.sh
+++ b/t/t6017-rev-cache-list.sh
@@ -4,8 +4,10 @@ test_description='git rev-cache tests'
  . ./test-lib.sh

  test_cmp_sorted() {
-	grep -io "[a-f0-9]*" $1 | sort >.tmpfile1 &&
-	grep -io "[a-f0-9]*" $2 | sort >.tmpfile2 &&
+# note that we're tip-toeing around the corner case of two objects/names
+# for the same SHA-1 => discrepencies between cached and non-cached walks
+	sort $1 >.tmpfile1 &&
+	sort $2 >.tmpfile2 &&
  	test_cmp .tmpfile1 .tmpfile2
  }

@@ -15,6 +17,8 @@ test_cmp_sorted() {
  # reuse
  test_expect_success 'init repo' '
  	echo bla >file &&
+	mkdir amaindir &&
+	echo watskeburt >amaindir/file &&
  	git add . &&
  	git commit -m "bla" &&

-- 
tg: (2b7d538..) t/revcache/names (depends on: t/revcache/docs)

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 6/6 (v4)] support for path name caching in rev-cache
  2009-08-17 12:31 [PATCH 6/6 (v4)] support for path name caching in rev-cache Nick Edelen
  2009-08-18  3:24 ` Nicolas Pitre
  2009-08-18 11:51 ` Nick Edelen
@ 2009-10-19 20:31 ` Nick Edelen
  2 siblings, 0 replies; 13+ messages in thread
From: Nick Edelen @ 2009-10-19 20:31 UTC (permalink / raw)
  To: Junio C Hamano, Nicolas Pitre, Johannes Schindelin, Sam Vilain,
	Michael J Gruber

An update to caching mechanism, allowing path names to be cached for blob and 
tree objects.  A list of names appearing in each cache slice is appended to the 
end of the slice, which is referenced by variable-sized indexes per entry.  
This allows pack-objects to more intelligently schedule unpacked/poorly packed 
object, and enables proper duplication of rev-list's behaivor.

The mechanism for this involves adding a 'name' field to blob and tree objects, 
mainly to facilitate reuse of caches during maintenence (like the 'unique' 
field).

Signed-off-by: Nick Edelen <sirnot@gmail.com>

---
 builtin-rev-cache.c       |    3 +-
 rev-cache.c               |  332 +++++++++++++++++++++++++++++++++++++--------
 rev-cache.h               |   16 ++-
 revision.h                |    6 +-
 t/t6017-rev-cache-list.sh |    8 +-
 5 files changed, 300 insertions(+), 65 deletions(-)

diff --git a/builtin-rev-cache.c b/builtin-rev-cache.c
index 8f41123..4c1766d 100644
--- a/builtin-rev-cache.c
+++ b/builtin-rev-cache.c
@@ -177,13 +177,14 @@ static int handle_walk(int argc, const char *argv[])
 	fprintf(stderr, "pending:\n");
 	for (i = 0; i < revs.pending.nr; i++) {
 		struct object *obj = revs.pending.objects[i].item;
+		const char *name = revs.pending.objects[i].name;
 
 		/* unfortunately, despite our careful generation, object duplication *is* a possibility...
 		 * (eg. same object introduced into two different branches) */
 		if (obj->flags & SEEN)
 			continue;
 
-		printf("%s\n", sha1_to_hex(revs.pending.objects[i].item->sha1));
+		printf("%s %s\n", sha1_to_hex(revs.pending.objects[i].item->sha1), name);
 		obj->flags |= SEEN;
 	}
 
diff --git a/rev-cache.c b/rev-cache.c
index 4ef5287..6c96297 100644
--- a/rev-cache.c
+++ b/rev-cache.c
@@ -17,6 +17,14 @@ struct bad_slice {
 	struct bad_slice *next;
 };
 
+struct name_list {
+	unsigned char sha1[20];
+	unsigned int len;
+	struct name_list *next;
+
+	char buf[FLEX_ARRAY];
+};
+
 struct cache_slice_pointer {
 	char signature[8]; /* REVCOPTR */
 	char version;
@@ -29,10 +37,13 @@ static uint32_t fanout[0xff + 2];
 static unsigned char *idx_map;
 static int idx_size;
 static struct rc_index_header idx_head;
-static char no_idx, add_to_pending;
-static struct bad_slice *bad_slices;
+static char no_idx, add_to_pending, add_names;
 static unsigned char *idx_caches;
 
+static struct bad_slice *bad_slices;
+static struct name_list *name_lists, *cur_name_list;
+
+static struct strbuf *acc_name_buffer;
 static struct strbuf *acc_buffer;
 
 #define SLOP			5
@@ -79,7 +90,7 @@ struct rc_object_entry *from_disked_rc_object_entry(struct rc_object_entry_ondis
 	if (!dst)
 		dst = &entry[cur++ & 0x3];
 
-	dst->type = src->flags >> 5;
+	dst->type = src->flags >> 5 & 0x03;
 	dst->is_end = !!(src->flags & 0x10);
 	dst->is_start = !!(src->flags & 0x08);
 	dst->uninteresting = !!(src->flags & 0x04);
@@ -90,8 +101,9 @@ struct rc_object_entry *from_disked_rc_object_entry(struct rc_object_entry_ondis
 	dst->merge_nr = src->merge_nr;
 	dst->split_nr = src->split_nr;
 
-	dst->size_size = src->sizes >> 5;
-	dst->padding = src->sizes & 0x1f;
+	dst->size_size = src->sizes >> 5 & 0x03;
+	dst->name_size = src->sizes >> 2 & 0x03;
+	dst->padding = src->sizes & 0x02;
 
 	dst->date = ntohl(src->date);
 	dst->path = ntohs(src->path);
@@ -120,6 +132,7 @@ struct rc_object_entry_ondisk *to_disked_rc_object_entry(struct rc_object_entry
 	dst->split_nr = src->split_nr;
 
 	dst->sizes  = (unsigned char)src->size_size << 5;
+	dst->sizes |= (unsigned char)src->name_size << 2;
 	dst->sizes |= (unsigned char)src->padding;
 
 	dst->date = htonl(src->date);
@@ -192,6 +205,12 @@ static void cleanup_cache_slices(void)
 		idx_map = 0;
 	}
 
+	while (name_lists) {
+		struct name_list *nl = name_lists->next;
+		free(name_lists);
+		name_lists = nl;
+	}
+
 }
 
 static int init_index(void)
@@ -324,7 +343,7 @@ static void handle_noncommit(struct rev_info *revs, struct commit *commit, unsig
 	struct blob *blob;
 	struct tree *tree;
 	struct object *obj;
-	unsigned long size;
+	unsigned long size, name_index;
 
 	size = decode_size(ptr + RC_ENTRY_SIZE_OFFSET(entry), entry->size_size);
 	switch (entry->type) {
@@ -357,9 +376,23 @@ static void handle_noncommit(struct rev_info *revs, struct commit *commit, unsig
 		return;
 	}
 
+	if (add_names && cur_name_list) {
+		name_index = decode_size(ptr + RC_ENTRY_NAME_OFFSET(entry), entry->name_size);
+
+		if (name_index >= cur_name_list->len)
+			name_index = 0;
+	} else
+		name_index = 0;
+
 	obj->flags |= FACE_VALUE;
-	if (add_to_pending)
-		add_pending_object(revs, obj, "");
+	if (add_to_pending) {
+		char *name = "";
+
+		if (name_index)
+			name = cur_name_list->buf + name_index;
+
+		add_pending_object(revs, obj, name);
+	}
 }
 
 static int setup_traversal(struct rc_slice_header *head, unsigned char *map, struct commit *commit, struct commit_list **work,
@@ -713,15 +746,44 @@ end:
 	return retval;
 }
 
-static int get_cache_slice_header(unsigned char *cache_sha1, unsigned char *map, int len, struct rc_slice_header *head)
+static struct name_list *get_cache_slice_name_list(struct rc_slice_header *head, int fd)
+{
+	struct name_list *nl = name_lists;
+
+	while (nl) {
+		if (!hashcmp(nl->sha1, head->sha1))
+			break;
+		nl = nl->next;
+	}
+
+	if (nl)
+		return nl;
+
+	nl = xcalloc(1, sizeof(struct name_list) + head->name_size);
+	nl->len = head->name_size;
+	hashcpy(nl->sha1, head->sha1);
+
+	lseek(fd, head->size, SEEK_SET);
+	read_in_full(fd, nl->buf, head->name_size);
+
+	nl->next = name_lists;
+	name_lists = nl;
+
+	return nl;
+}
+
+static int get_cache_slice_header(int fd, unsigned char *cache_sha1, int len, struct rc_slice_header *head)
 {
 	int t;
 
-	memcpy(head, map, sizeof(struct rc_slice_header));
+	if (xread(fd, head, sizeof(struct rc_slice_header)) != sizeof(struct rc_slice_header))
+		return -1;
+
 	head->ofs_objects = ntohl(head->ofs_objects);
 	head->object_nr = ntohl(head->object_nr);
 	head->size = ntohl(head->size);
 	head->path_nr = ntohs(head->path_nr);
+	head->name_size = ntohl(head->name_size);
 
 	if (memcmp(head->signature, "REVCACHE", 8))
 		return -1;
@@ -730,10 +792,10 @@ static int get_cache_slice_header(unsigned char *cache_sha1, unsigned char *map,
 	if (hashcmp(head->sha1, cache_sha1))
 		return -3;
 	t = sizeof(struct rc_slice_header);
-	if (t != head->ofs_objects || t >= len)
+	if (t != head->ofs_objects)
 		return -4;
-
-	head->size = len;
+	if (head->size + head->name_size != len)
+		return -5;
 
 	return 0;
 }
@@ -785,7 +847,7 @@ int traverse_cache_slice(struct rev_info *revs,
 	unsigned long *date_so_far, int *slop_so_far,
 	struct commit_list ***queue, struct commit_list **work)
 {
-	int fd = -1, retval = -3;
+	int fd = -1, t, retval;
 	struct stat fi;
 	struct rc_slice_header head;
 	struct rev_cache_info *rci;
@@ -801,26 +863,31 @@ int traverse_cache_slice(struct rev_info *revs,
 	/* load options */
 	rci = &revs->rev_cache_info;
 	add_to_pending = rci->add_to_pending;
+	add_names = rci->add_names;
 
 	memset(&head, 0, sizeof(struct rc_slice_header));
+#	define ERROR(x)		do { retval = (x); goto end; } while (0);
 
 	fd = open_cache_slice(cache_sha1, O_RDONLY);
 	if (fd == -1)
-		goto end;
+		ERROR(-1);
 	if (fstat(fd, &fi) || fi.st_size < sizeof(struct rc_slice_header))
-		goto end;
+		ERROR(-2);
+
+	if ((t = get_cache_slice_header(fd, cache_sha1, fi.st_size, &head)) < 0)
+		ERROR(-t);
+	if (add_names)
+		cur_name_list = get_cache_slice_name_list(&head, fd);
 
-	map = xmmap(0, fi.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+	map = xmmap(0, head.size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
 	if (map == MAP_FAILED)
-		goto end;
-	if (get_cache_slice_header(cache_sha1, map, fi.st_size, &head))
-		goto end;
+		ERROR(-3);
 
 	retval = traverse_cache_slice_1(&head, map, revs, commit, date_so_far, slop_so_far, queue, work);
 
 end:
 	if (map != MAP_FAILED)
-		munmap(map, fi.st_size);
+		munmap(map, head.size);
 	if (fd != -1)
 		close(fd);
 
@@ -828,6 +895,7 @@ end:
 	if (retval)
 		mark_bad_slice(cache_sha1);
 
+#	undef ERROR
 	return retval;
 }
 
@@ -1112,23 +1180,110 @@ static unsigned long decode_size(unsigned char *str, int len)
 	return size;
 }
 
+
+#define NL_HASH_TABLE_SIZE		(0xffff + 1)
+#define NL_HASH_NUMBER			(NL_HASH_TABLE_SIZE >> 3)
+
+struct name_list_hash {
+	int ind;
+	struct name_list_hash *next;
+};
+
+static struct name_list_hash **nl_hash_table;
+static unsigned char *nl_hashes;
+
+/* FNV-1a hash */
+static unsigned int hash_name(const char *name)
+{
+	unsigned int hash = 2166136261ul;
+	const char *p = name;
+
+	while (*p) {
+		hash ^= *p++;
+		hash *= 16777619ul;
+	}
+
+	return hash & 0xffff;
+}
+
+static int name_in_list(const char *name)
+{
+	unsigned int h = hash_name(name);
+	struct name_list_hash *entry = nl_hash_table[h];
+
+	while (entry && strcmp(acc_name_buffer->buf + entry->ind, name))
+		entry = entry->next;
+
+	if (entry)
+		return entry->ind;
+
+	/* add name to buffer and create hash reference */
+	entry = xcalloc(1, sizeof(struct name_list_hash));
+	entry->ind = acc_name_buffer->len;
+	strbuf_add(acc_name_buffer, name, strlen(name) + 1);
+
+	entry->next = nl_hash_table[h];
+	nl_hash_table[h] = entry;
+
+	nl_hashes[h / 8] |= h % 8;
+
+	return entry->ind;
+}
+
+static void init_name_list_hash(void)
+{
+	nl_hash_table = xcalloc(NL_HASH_TABLE_SIZE, sizeof(struct name_list_hash));
+	nl_hashes = xcalloc(NL_HASH_NUMBER, 1);
+}
+
+static void cleanup_name_list_hash(void)
+{
+	int i;
+
+	for (i = 0; i < NL_HASH_NUMBER; i++) {
+		int j, ind = nl_hashes[i];
+
+		if (!ind)
+			continue;
+
+		for (j = 0; j < 8; j++) {
+			struct name_list_hash **entryp;
+
+			if (!(ind & 1 << j))
+				continue;
+
+			entryp = &nl_hash_table[i * 8 + j];
+			while (*entryp) {
+				struct name_list_hash *t = (*entryp)->next;
+
+				free(*entryp);
+				*entryp = t;
+			}
+		}
+	} /* code overhang! */
+
+	free(nl_hashes);
+	free(nl_hash_table);
+}
+
 static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *entryp,
-	struct strbuf *merge_str, struct strbuf *split_str)
+	struct strbuf *merge_str, struct strbuf *split_str, char *name, unsigned long size)
 {
 	struct rc_object_entry entry;
-	unsigned char size_str[7];
-	unsigned long size;
+	unsigned char size_str[7], name_str[7];
 	enum object_type type;
 	void *data;
 
 	if (entryp)
 		sha1 = entryp->sha1;
 
-	/* retrieve size data */
-	data = read_sha1_file(sha1, &type, &size);
+	if (!size) {
+		/* retrieve size data */
+		data = read_sha1_file(sha1, &type, &size);
 
-	if (data)
-		free(data);
+		if (data)
+			free(data);
+	}
 
 	/* initialize! */
 	if (!entryp) {
@@ -1146,6 +1301,9 @@ static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *
 
 	entryp->size_size = encode_size(size, size_str);
 
+	if (name)
+		entryp->name_size = encode_size(name_in_list(name), name_str);
+
 	/* write the muvabitch */
 	strbuf_add(acc_buffer, to_disked_rc_object_entry(entryp, 0), sizeof(struct rc_object_entry_ondisk));
 
@@ -1155,25 +1313,36 @@ static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *
 		strbuf_add(acc_buffer, split_str->buf, split_str->len);
 
 	strbuf_add(acc_buffer, size_str, entryp->size_size);
+
+	if (name)
+		strbuf_add(acc_buffer, name_str, entryp->name_size);
 }
 
 /* returns non-zero to continue parsing, 0 to skip */
 typedef int (*dump_tree_fn)(const unsigned char *, const char *, unsigned int); /* sha1, path, mode */
 
 /* we need to walk the trees by hash, so unfortunately we can't use traverse_trees in tree-walk.c */
-static int dump_tree(struct tree *tree, dump_tree_fn fn)
+static int dump_tree(struct tree *tree, dump_tree_fn fn, char *base)
 {
 	struct tree_desc desc;
 	struct name_entry entry;
 	struct tree *subtree;
-	int r;
+	char concatpath[PATH_MAX];
+	int r, baselen;
 
 	if (parse_tree(tree))
 		return -1;
 
+	baselen = strlen(base);
+	strcpy(concatpath, base);
+
 	init_tree_desc(&desc, tree->buffer, tree->size);
 	while (tree_entry(&desc, &entry)) {
-		switch (fn(entry.sha1, entry.path, entry.mode)) {
+		if (baselen + strlen(entry.path) + 1 >= PATH_MAX)
+			die("we have a problem: %s%s is too big for me to handle", base, entry.path);
+		strcpy(concatpath + baselen, entry.path);
+
+		switch (fn(entry.sha1, concatpath, entry.mode)) {
 		case 0:
 			goto continue_loop;
 		default:
@@ -1185,7 +1354,8 @@ static int dump_tree(struct tree *tree, dump_tree_fn fn)
 			if (!subtree)
 				return -2;
 
-			if ((r = dump_tree(subtree, fn)) < 0)
+			strcat(concatpath, "/");
+			if ((r = dump_tree(subtree, fn, concatpath)) < 0)
 				return r;
 		}
 
@@ -1199,6 +1369,9 @@ continue_loop:
 static int dump_tree_callback(const unsigned char *sha1, const char *path, unsigned int mode)
 {
 	strbuf_add(acc_buffer, sha1, 20);
+	strbuf_add(acc_buffer, (char *)&acc_name_buffer->len, sizeof(size_t));
+
+	strbuf_add(acc_name_buffer, path, strlen(path) + 1);
 
 	return 1;
 }
@@ -1212,6 +1385,9 @@ static void tree_addremove(struct diff_options *options,
 		return;
 
 	strbuf_add(acc_buffer, sha1, 20);
+	strbuf_add(acc_buffer, (char *)&acc_name_buffer->len, sizeof(size_t));
+
+	strbuf_add(acc_name_buffer, concatpath, strlen(concatpath) + 1);
 }
 
 static void tree_change(struct diff_options *options,
@@ -1224,12 +1400,15 @@ static void tree_change(struct diff_options *options,
 		return;
 
 	strbuf_add(acc_buffer, new_sha1, 20);
+	strbuf_add(acc_buffer, (char *)&acc_name_buffer->len, sizeof(size_t));
+
+	strbuf_add(acc_name_buffer, concatpath, strlen(concatpath) + 1);
 }
 
 static int add_unique_objects(struct commit *commit)
 {
 	struct commit_list *list;
-	struct strbuf os, ost, *orig_buf;
+	struct strbuf os, ost, names, *orig_name_buf, *orig_buf;
 	struct diff_options opts;
 	int i, j, next;
 	char is_first = 1;
@@ -1237,13 +1416,17 @@ static int add_unique_objects(struct commit *commit)
 	/* ...no, calculate unique objects */
 	strbuf_init(&os, 0);
 	strbuf_init(&ost, 0);
+	strbuf_init(&names, 0);
 	orig_buf = acc_buffer;
+	orig_name_buf = acc_name_buffer;
+	acc_name_buffer = &names;
 
 	diff_setup(&opts);
 	DIFF_OPT_SET(&opts, RECURSIVE);
 	DIFF_OPT_SET(&opts, TREE_IN_RECURSIVE);
 	opts.change = tree_change;
 	opts.add_remove = tree_addremove;
+#	define ENTRY_SIZE (20 + sizeof(size_t))
 
 	/* this is only called for non-ends (ie. all parents interesting) */
 	for (list = commit->parents; list; list = list->next) {
@@ -1254,20 +1437,20 @@ static int add_unique_objects(struct commit *commit)
 
 		strbuf_setlen(acc_buffer, 0);
 		diff_tree_sha1(list->item->tree->object.sha1, commit->tree->object.sha1, "", &opts);
-		qsort(acc_buffer->buf, acc_buffer->len / 20, 20, (int (*)(const void *, const void *))hashcmp);
+		qsort(acc_buffer->buf, acc_buffer->len / ENTRY_SIZE, ENTRY_SIZE, (int (*)(const void *, const void *))hashcmp);
 
 		/* take intersection */
 		if (!is_first) {
-			for (next = i = j = 0; i < os.len; i += 20) {
+			for (next = i = j = 0; i < os.len; i += ENTRY_SIZE) {
 				while (j < ost.len && hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)) < 0)
-					j += 20;
+					j += ENTRY_SIZE;
 
 				if (j >= ost.len || hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)))
 					continue;
 
 				if (next != i)
-					memcpy(os.buf + next, os.buf + i, 20);
-				next += 20;
+					memcpy(os.buf + next, os.buf + i, ENTRY_SIZE);
+				next += ENTRY_SIZE;
 			}
 
 			if (next != i)
@@ -1279,29 +1462,37 @@ static int add_unique_objects(struct commit *commit)
 	/* no parents (!) */
 	if (is_first) {
 		acc_buffer = &os;
-		dump_tree(commit->tree, dump_tree_callback);
+		dump_tree(commit->tree, dump_tree_callback, "");
 	}
 
 	/* the ordering of non-commit objects dosn't really matter, so we're not gonna bother */
 	acc_buffer = orig_buf;
-	for (i = 0; i < os.len; i += 20)
-		add_object_entry((unsigned char *)(os.buf + i), 0, 0, 0);
+	acc_name_buffer = orig_name_buf;
+	for (i = 0; i < os.len; i += ENTRY_SIZE)
+		add_object_entry((unsigned char *)(os.buf + i), 0, 0, 0, names.buf + *(size_t *)(os.buf + i + 20), 0);
 
 	/* last but not least, the main tree */
-	add_object_entry(commit->tree->object.sha1, 0, 0, 0);
+	add_object_entry(commit->tree->object.sha1, 0, 0, 0, 0, 0);
 
-	return i / 20 + 1;
+	strbuf_release(&ost);
+	strbuf_release(&os);
+	strbuf_release(&names);
+
+	return i / ENTRY_SIZE + 1;
+#	undef ENTRY_SIZE
 }
 
 static int add_objects_verbatim_1(struct rev_cache_slice_map *mapping, int *index)
 {
-	unsigned char *map = mapping->map;
 	int i = *index, object_nr = 0;
+	unsigned char *map = mapping->map;
 	struct rc_object_entry *entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
+	unsigned long size;
 
 	i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
 	while (i < mapping->size) {
-		int pos = i;
+		char *name;
+		int name_index, pos = i;
 
 		entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
 		i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
@@ -1311,7 +1502,15 @@ static int add_objects_verbatim_1(struct rev_cache_slice_map *mapping, int *inde
 			return object_nr;
 		}
 
-		strbuf_add(acc_buffer, map + pos, i - pos);
+		name_index = decode_size(map + pos + RC_ENTRY_NAME_OFFSET(entry), entry->name_size);
+		if (name_index && name_index < mapping->name_size)
+			name = mapping->names + name_index;
+		else
+			name = 0;
+
+		size = decode_size(map + pos + RC_ENTRY_SIZE_OFFSET(entry), entry->size_size);
+
+		add_object_entry(0, entry, 0, 0, name, size);
 		object_nr++;
 	}
 
@@ -1396,6 +1595,7 @@ void init_rev_cache_info(struct rev_cache_info *rci)
 	rci->overwrite_all = 0;
 
 	rci->add_to_pending = 1;
+	rci->add_names = 1;
 
 	rci->ignore_size = 0;
 }
@@ -1420,9 +1620,9 @@ int make_cache_slice(struct rev_cache_info *rci,
 	struct rc_slice_header head;
 	struct commit *commit;
 	unsigned char sha1[20];
-	struct strbuf merge_paths, split_paths;
+	struct strbuf merge_paths, split_paths, namelist;
 	int object_nr, total_sz, fd;
-	char file[PATH_MAX], *newfile;
+	char file[PATH_MAX], null, *newfile;
 	struct rev_cache_info *trci;
 	git_SHA_CTX ctx;
 
@@ -1437,7 +1637,13 @@ int make_cache_slice(struct rev_cache_info *rci,
 	strbuf_init(&endlist, 0);
 	strbuf_init(&merge_paths, 0);
 	strbuf_init(&split_paths, 0);
+	strbuf_init(&namelist, 0);
 	acc_buffer = &buffer;
+	acc_name_buffer = &namelist;
+
+	null = 0;
+	strbuf_add(&namelist, &null, 1);
+	init_name_list_hash();
 
 	if (!revs) {
 		revs = &therevs;
@@ -1468,6 +1674,7 @@ int make_cache_slice(struct rev_cache_info *rci,
 	trci = &revs->rev_cache_info;
 	init_rev_cache_info(trci);
 	trci->add_to_pending = 0;
+	trci->add_names = 0;
 
 	setup_revisions(0, 0, revs, 0);
 	if (prepare_revision_walk(revs))
@@ -1505,7 +1712,7 @@ int make_cache_slice(struct rev_cache_info *rci,
 
 		commit->indegree = 0;
 
-		add_object_entry(0, &object, &merge_paths, &split_paths);
+		add_object_entry(0, &object, &merge_paths, &split_paths, 0, 0);
 		object_nr++;
 
 		if (rci->objects && !object.is_end) {
@@ -1531,10 +1738,16 @@ int make_cache_slice(struct rev_cache_info *rci,
 		total_sz += buffer.len;
 	}
 
+	/* write path name lookup list */
+	head.name_size = htonl(namelist.len);
+	write_in_full(fd, namelist.buf, namelist.len);
+
 	/* go ahead a free some stuff... */
 	strbuf_release(&buffer);
 	strbuf_release(&merge_paths);
 	strbuf_release(&split_paths);
+	strbuf_release(&namelist);
+	cleanup_name_list_hash();
 	if (path_sz)
 		free(paths);
 	while (path_track_alloc)
@@ -1992,6 +2205,7 @@ int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
 	for (i = idx_head.cache_nr - 1; i >= 0; i--) {
 		struct rev_cache_slice_map *map = rci->maps + i;
 		struct stat fi;
+		struct rc_slice_header head;
 		int fd;
 
 		if (!map->size)
@@ -2004,13 +2218,20 @@ int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
 			continue;
 		if (fi.st_size < sizeof(struct rc_slice_header))
 			continue;
+		if (get_cache_slice_header(fd, idx_caches + i * 20, fi.st_size, &head))
+			continue;
 
-		map->map = xmmap(0, fi.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+		map->map = xmmap(0, head.size, PROT_READ, MAP_PRIVATE, fd, 0);
 		if (map->map == MAP_FAILED)
 			continue;
 
+		lseek(fd, head.size, SEEK_SET);
+		map->names = xcalloc(head.name_size, 1);
+		read_in_full(fd, map->names, head.name_size);
+
 		close(fd);
-		map->size = fi.st_size;
+		map->size = head.size;
+		map->name_size = head.name_size;
 	}
 
 	rci->make_index = 0;
@@ -2027,6 +2248,7 @@ int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
 		if (!map->size)
 			continue;
 
+		free(map->names);
 		munmap(map->map, map->size);
 	}
 	free(rci->maps);
@@ -2048,7 +2270,6 @@ static int verify_cache_slice(const char *slice_path, unsigned char *sha1)
 {
 	struct rc_slice_header head;
 	int fd, len, retval = -1;
-	unsigned char *map = MAP_FAILED;
 	struct stat fi;
 
 	len = strlen(slice_path);
@@ -2063,17 +2284,12 @@ static int verify_cache_slice(const char *slice_path, unsigned char *sha1)
 	if (fstat(fd, &fi) || fi.st_size < sizeof(head))
 		goto end;
 
-	map = xmmap(0, sizeof(head), PROT_READ, MAP_PRIVATE, fd, 0);
-	if (map == MAP_FAILED)
-		goto end;
-	if (get_cache_slice_header(sha1, map, fi.st_size, &head))
+	if (get_cache_slice_header(fd, sha1, fi.st_size, &head))
 		goto end;
 
 	retval = 0;
 
 end:
-	if (map != MAP_FAILED)
-		munmap(map, sizeof(head));
 	if (fd > 0)
 		close(fd);
 
diff --git a/rev-cache.h b/rev-cache.h
index a1af337..0fa9b44 100644
--- a/rev-cache.h
+++ b/rev-cache.h
@@ -10,8 +10,14 @@
 #define RC_OBTAIN_OBJECT_ENTRY(p)			from_disked_rc_object_entry((struct rc_object_entry_ondisk *)(p), 0)
 #define RC_OBTAIN_INDEX_ENTRY(p)			from_disked_rc_index_entry((struct rc_index_entry_ondisk *)(p), 0)
 
-#define RC_ACTUAL_OBJECT_ENTRY_SIZE(e)		(sizeof(struct rc_object_entry_ondisk) + RC_PATH_SIZE((e)->merge_nr + (e)->split_nr) + (e)->size_size)
-#define RC_ENTRY_SIZE_OFFSET(e)				(RC_ACTUAL_OBJECT_ENTRY_SIZE(e) - (e)->size_size)
+#define RC_ACTUAL_OBJECT_ENTRY_SIZE(e)	(\
+	sizeof(struct rc_object_entry_ondisk) + \
+	RC_PATH_SIZE((e)->merge_nr + (e)->split_nr) + \
+	(e)->size_size + \
+	(e)->name_size\
+)
+#define RC_ENTRY_SIZE_OFFSET(e)			(RC_ACTUAL_OBJECT_ENTRY_SIZE(e) - (e)->name_size - (e)->size_size)
+#define RC_ENTRY_NAME_OFFSET(e)			(RC_ACTUAL_OBJECT_ENTRY_SIZE(e) - (e)->name_size)
 
 /* single index maps objects to cache files */
 struct rc_index_header {
@@ -50,6 +56,8 @@ struct rc_slice_header {
 	uint32_t size;
 
 	unsigned char sha1[20];
+
+	uint32_t name_size;
 };
 
 struct rc_object_entry_ondisk {
@@ -76,7 +84,8 @@ struct rc_object_entry {
 	unsigned char merge_nr; /* : 7 */
 	unsigned char split_nr; /* : 7 */
 	unsigned size_size:3;
-	unsigned padding:5;
+	unsigned name_size:3;
+	unsigned padding:2;
 
 	uint32_t date;
 	uint16_t path;
@@ -84,6 +93,7 @@ struct rc_object_entry {
 	/* merge paths */
 	/* split paths */
 	/* size */
+	/* name id */
 };
 
 struct rc_index_entry *from_disked_rc_index_entry(struct rc_index_entry_ondisk *src, struct rc_index_entry *dst);
diff --git a/revision.h b/revision.h
index d160e14..dd51d27 100644
--- a/revision.h
+++ b/revision.h
@@ -26,6 +26,9 @@ struct rev_cache_slice_map {
 	unsigned char *map;
 	int size;
 	int last_index;
+
+	char *names;
+	int name_size;
 };
 
 struct rev_cache_info {
@@ -39,7 +42,8 @@ struct rev_cache_info {
 	unsigned overwrite_all : 1;
 
 	/* traversal flags */
-	unsigned add_to_pending : 1;
+	unsigned add_to_pending : 1,
+		add_names : 1;
 
 	/* fuse options */
 	unsigned int ignore_size;
diff --git a/t/t6017-rev-cache-list.sh b/t/t6017-rev-cache-list.sh
index 982fb15..f0f3bcf 100755
--- a/t/t6017-rev-cache-list.sh
+++ b/t/t6017-rev-cache-list.sh
@@ -4,8 +4,10 @@ test_description='git rev-cache tests'
 . ./test-lib.sh
 
 test_cmp_sorted() {
-	grep -io "[a-f0-9]*" $1 | sort >.tmpfile1 &&
-	grep -io "[a-f0-9]*" $2 | sort >.tmpfile2 &&
+# note that we're tip-toeing around the corner case of two objects/names
+# for the same SHA-1 => discrepencies between cached and non-cached walks
+	sort $1 >.tmpfile1 &&
+	sort $2 >.tmpfile2 &&
 	test_cmp .tmpfile1 .tmpfile2
 }
 
@@ -15,6 +17,8 @@ test_cmp_sorted() {
 # reuse
 test_expect_success 'init repo' '
 	echo bla >file &&
+	mkdir amaindir &&
+	echo watskeburt >amaindir/file &&
 	git add . &&
 	git commit -m "bla" &&
 
-- 
tg: (9e5164a..) t/revcache/names (depends on: t/revcache/docs)

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2009-10-19 20:31 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-08-17 12:31 [PATCH 6/6 (v4)] support for path name caching in rev-cache Nick Edelen
2009-08-18  3:24 ` Nicolas Pitre
2009-08-18 11:31   ` Nick Edelen
2009-08-19  3:52     ` Nicolas Pitre
2009-08-20 12:43       ` Nick Edelen
2009-08-20 23:22         ` Nick Edelen
2009-08-21  0:05           ` Nicolas Pitre
2009-08-18 11:54   ` Johannes Schindelin
2009-08-18 11:51 ` Nick Edelen
2009-08-21  4:48   ` Nick Edelen
2009-09-07 14:11     ` Nick Edelen
2009-10-02 22:12   ` Nick Edelen
2009-10-19 20:31 ` Nick Edelen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).