All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH/RFC v2 0/16] Introduce index file format version 5
@ 2012-08-05 21:48 Thomas Gummerer
  2012-08-05 21:48 ` [PATCH/RFC v2 01/16] Modify cache_header to prepare for other index formats Thomas Gummerer
                   ` (16 more replies)
  0 siblings, 17 replies; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-05 21:48 UTC (permalink / raw)
  To: git; +Cc: trast, mhagger, gitster, pcouds, robin.rosenberg

Fist again apologies for those who were not credited in the first
version of this series.

The first version of the series was here: $gmane/202752.

Changes since the last version:

This series now applies to the latest master.

[PATCH/RFC v2 01/16] Modify cache_header to prepare for other index formats
Moved the cache_header structs to read-cache.c and redefined
cache_version_header in test-index-version.c

[PATCH/RFC v2 02/16] Modify read functions to prepare for other index
Make read_index_v2 static.

[PATCH/RFC v2 04/16] Modify write functions to prepare for other index
Make write_index_v2 static.

[PATCH/RFC v2 05/16] t2104: Don't fail for index versions other than [23]
Changed the test so that it converts the index to v2 at the beginning
of the test.

[PATCH/RFC v2 11/16] Read cache-tree in index-v5
Added a comment for cache_tree_convert_v5 that it is a destructive
function for the directory entries.

Added credits and reworded some commit messages.

Cleaned the patches up.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* [PATCH/RFC v2 01/16] Modify cache_header to prepare for other index formats
  2012-08-05 21:48 [PATCH/RFC v2 0/16] Introduce index file format version 5 Thomas Gummerer
@ 2012-08-05 21:48 ` Thomas Gummerer
  2012-08-06  1:17   ` Junio C Hamano
  2012-08-05 21:48 ` [PATCH/RFC v2 02/16] Modify read functions " Thomas Gummerer
                   ` (15 subsequent siblings)
  16 siblings, 1 reply; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-05 21:48 UTC (permalink / raw)
  To: git; +Cc: trast, mhagger, gitster, pcouds, robin.rosenberg, Thomas Gummerer

Modify the cache_header such that other index file formats
can be added and reusing the common part of each index format.

The signature and version have to be present in every
version of the index file format, to check if it can be read
by a specific version of git, while other entries (eg. number
of entries for index v2/3/4) can be different from one file
format to another. Therefore it is split to its own struct.

The structs are also moved to read-cache.c, since they are
not used in any other place except test-index-version, where
cache_version_header is redefined.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
 cache.h              |    8 --------
 read-cache.c         |   32 +++++++++++++++++++++++++-------
 test-index-version.c |    7 ++++++-
 3 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/cache.h b/cache.h
index 67f28b4..9bfc9f3 100644
--- a/cache.h
+++ b/cache.h
@@ -94,16 +94,8 @@ unsigned long git_deflate_bound(git_zstream *, unsigned long);
  */
 #define DEFAULT_GIT_PORT 9418
 
-/*
- * Basic data structures for the directory cache
- */
 
 #define CACHE_SIGNATURE 0x44495243	/* "DIRC" */
-struct cache_header {
-	unsigned int hdr_signature;
-	unsigned int hdr_version;
-	unsigned int hdr_entries;
-};
 
 #define INDEX_FORMAT_LB 2
 #define INDEX_FORMAT_UB 4
diff --git a/read-cache.c b/read-cache.c
index 2f8159f..5d61d92 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1198,6 +1198,18 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int reall
 #define INDEX_FORMAT_DEFAULT 3
 
 /*
+ * Basic data structures for the directory cache
+ */
+struct cache_version_header {
+	unsigned int hdr_signature;
+	unsigned int hdr_version;
+};
+
+struct cache_header_v2 {
+	unsigned int hdr_entries;
+};
+
+/*
  * dev/ino/uid/gid/size are also just tracked to the low 32 bits
  * Again - this is just a (very strong in practice) heuristic that
  * the inode hasn't changed.
@@ -1247,7 +1259,7 @@ struct ondisk_cache_entry_extended {
 			    ondisk_cache_entry_extended_size(ce_namelen(ce)) : \
 			    ondisk_cache_entry_size(ce_namelen(ce)))
 
-static int verify_hdr(struct cache_header *hdr, unsigned long size)
+static int verify_hdr(struct cache_version_header *hdr, unsigned long size)
 {
 	git_SHA_CTX c;
 	unsigned char sha1[20];
@@ -1409,7 +1421,8 @@ int read_index_from(struct index_state *istate, const char *path)
 	int fd, i;
 	struct stat st;
 	unsigned long src_offset;
-	struct cache_header *hdr;
+	struct cache_version_header *hdr;
+	struct cache_header_v2 *hdr_v2;
 	void *mmap;
 	size_t mmap_size;
 	struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
@@ -1433,7 +1446,7 @@ int read_index_from(struct index_state *istate, const char *path)
 
 	errno = EINVAL;
 	mmap_size = xsize_t(st.st_size);
-	if (mmap_size < sizeof(struct cache_header) + 20)
+	if (mmap_size < sizeof(struct cache_version_header) + 20)
 		die("index file smaller than expected");
 
 	mmap = xmmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
@@ -1442,11 +1455,13 @@ int read_index_from(struct index_state *istate, const char *path)
 		die_errno("unable to map index file");
 
 	hdr = mmap;
+	hdr_v2 =  mmap + sizeof(*hdr);
 	if (verify_hdr(hdr, mmap_size) < 0)
 		goto unmap;
 
+	hdr_v2 = mmap + sizeof(*hdr);
 	istate->version = ntohl(hdr->hdr_version);
-	istate->cache_nr = ntohl(hdr->hdr_entries);
+	istate->cache_nr = ntohl(hdr_v2->hdr_entries);
 	istate->cache_alloc = alloc_nr(istate->cache_nr);
 	istate->cache = xcalloc(istate->cache_alloc, sizeof(struct cache_entry *));
 	istate->initialized = 1;
@@ -1456,7 +1471,7 @@ int read_index_from(struct index_state *istate, const char *path)
 	else
 		previous_name = NULL;
 
-	src_offset = sizeof(*hdr);
+	src_offset = sizeof(*hdr) + sizeof(*hdr_v2);
 	for (i = 0; i < istate->cache_nr; i++) {
 		struct ondisk_cache_entry *disk_ce;
 		struct cache_entry *ce;
@@ -1757,7 +1772,8 @@ void update_index_if_able(struct index_state *istate, struct lock_file *lockfile
 int write_index(struct index_state *istate, int newfd)
 {
 	git_SHA_CTX c;
-	struct cache_header hdr;
+	struct cache_version_header hdr;
+	struct cache_header_v2 hdr_v2;
 	int i, err, removed, extended, hdr_version;
 	struct cache_entry **cache = istate->cache;
 	int entries = istate->cache_nr;
@@ -1787,11 +1803,13 @@ int write_index(struct index_state *istate, int newfd)
 
 	hdr.hdr_signature = htonl(CACHE_SIGNATURE);
 	hdr.hdr_version = htonl(hdr_version);
-	hdr.hdr_entries = htonl(entries - removed);
+	hdr_v2.hdr_entries = htonl(entries - removed);
 
 	git_SHA1_Init(&c);
 	if (ce_write(&c, newfd, &hdr, sizeof(hdr)) < 0)
 		return -1;
+	if (ce_write(&c, newfd, &hdr_v2, sizeof(hdr_v2)) < 0)
+		return -1;
 
 	previous_name = (hdr_version == 4) ? &previous_name_buf : NULL;
 	for (i = 0; i < entries; i++) {
diff --git a/test-index-version.c b/test-index-version.c
index bfaad9e..3899a2f 100644
--- a/test-index-version.c
+++ b/test-index-version.c
@@ -1,8 +1,13 @@
 #include "cache.h"
 
+struct cache_version_header {
+	unsigned int hdr_signature;
+	unsigned int hdr_version;
+};
+
 int main(int argc, const char **argv)
 {
-	struct cache_header hdr;
+	struct cache_version_header hdr;
 	int version;
 
 	memset(&hdr,0,sizeof(hdr));
-- 
1.7.10.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH/RFC v2 02/16] Modify read functions to prepare for other index formats
  2012-08-05 21:48 [PATCH/RFC v2 0/16] Introduce index file format version 5 Thomas Gummerer
  2012-08-05 21:48 ` [PATCH/RFC v2 01/16] Modify cache_header to prepare for other index formats Thomas Gummerer
@ 2012-08-05 21:48 ` Thomas Gummerer
  2012-08-05 21:49 ` [PATCH/RFC v2 03/16] Modify match_stat_basic " Thomas Gummerer
                   ` (14 subsequent siblings)
  16 siblings, 0 replies; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-05 21:48 UTC (permalink / raw)
  To: git; +Cc: trast, mhagger, gitster, pcouds, robin.rosenberg, Thomas Gummerer

Modify the read_index_from function, splitting it up into
one function that stays the same for every index format,
doing the basic operations such as verifying the header,
and a function which is specific for each index version,
which does the real reading of the index.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
 read-cache.c |  107 +++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 64 insertions(+), 43 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 5d61d92..24b5e02 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1259,10 +1259,8 @@ struct ondisk_cache_entry_extended {
 			    ondisk_cache_entry_extended_size(ce_namelen(ce)) : \
 			    ondisk_cache_entry_size(ce_namelen(ce)))
 
-static int verify_hdr(struct cache_version_header *hdr, unsigned long size)
+static int verify_hdr_version(struct cache_version_header *hdr, unsigned long size)
 {
-	git_SHA_CTX c;
-	unsigned char sha1[20];
 	int hdr_version;
 
 	if (hdr->hdr_signature != htonl(CACHE_SIGNATURE))
@@ -1270,6 +1268,14 @@ static int verify_hdr(struct cache_version_header *hdr, unsigned long size)
 	hdr_version = ntohl(hdr->hdr_version);
 	if (hdr_version < 2 || 4 < hdr_version)
 		return error("bad index version %d", hdr_version);
+	return 0;
+}
+
+static int verify_hdr_v2(struct cache_version_header *hdr, unsigned long size)
+{
+	git_SHA_CTX c;
+	unsigned char sha1[20];
+
 	git_SHA1_Init(&c);
 	git_SHA1_Update(&c, hdr, size - 20);
 	git_SHA1_Final(sha1, &c);
@@ -1415,50 +1421,15 @@ static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk,
 	return ce;
 }
 
-/* remember to discard_cache() before reading a different cache! */
-int read_index_from(struct index_state *istate, const char *path)
+static void read_index_v2(struct index_state *istate, void *mmap, int mmap_size)
 {
-	int fd, i;
-	struct stat st;
+	int i;
 	unsigned long src_offset;
 	struct cache_version_header *hdr;
 	struct cache_header_v2 *hdr_v2;
-	void *mmap;
-	size_t mmap_size;
 	struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
 
-	errno = EBUSY;
-	if (istate->initialized)
-		return istate->cache_nr;
-
-	errno = ENOENT;
-	istate->timestamp.sec = 0;
-	istate->timestamp.nsec = 0;
-	fd = open(path, O_RDONLY);
-	if (fd < 0) {
-		if (errno == ENOENT)
-			return 0;
-		die_errno("index file open failed");
-	}
-
-	if (fstat(fd, &st))
-		die_errno("cannot stat the open index");
-
-	errno = EINVAL;
-	mmap_size = xsize_t(st.st_size);
-	if (mmap_size < sizeof(struct cache_version_header) + 20)
-		die("index file smaller than expected");
-
-	mmap = xmmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
-	close(fd);
-	if (mmap == MAP_FAILED)
-		die_errno("unable to map index file");
-
 	hdr = mmap;
-	hdr_v2 =  mmap + sizeof(*hdr);
-	if (verify_hdr(hdr, mmap_size) < 0)
-		goto unmap;
-
 	hdr_v2 = mmap + sizeof(*hdr);
 	istate->version = ntohl(hdr->hdr_version);
 	istate->cache_nr = ntohl(hdr_v2->hdr_entries);
@@ -1484,8 +1455,6 @@ int read_index_from(struct index_state *istate, const char *path)
 		src_offset += consumed;
 	}
 	strbuf_release(&previous_name_buf);
-	istate->timestamp.sec = st.st_mtime;
-	istate->timestamp.nsec = ST_MTIME_NSEC(st);
 
 	while (src_offset <= mmap_size - 20 - 8) {
 		/* After an array of active_nr index entries,
@@ -1505,12 +1474,64 @@ int read_index_from(struct index_state *istate, const char *path)
 		src_offset += 8;
 		src_offset += extsize;
 	}
+	return;
+unmap:
+	munmap(mmap, mmap_size);
+	die("index file corrupt");
+}
+
+/* remember to discard_cache() before reading a different cache! */
+int read_index_from(struct index_state *istate, const char *path)
+{
+	int fd;
+	struct stat st;
+	struct cache_version_header *hdr;
+	void *mmap;
+	size_t mmap_size;
+
+	errno = EBUSY;
+	if (istate->initialized)
+		return istate->cache_nr;
+
+	errno = ENOENT;
+	istate->timestamp.sec = 0;
+	istate->timestamp.nsec = 0;
+	fd = open(path, O_RDONLY);
+	if (fd < 0) {
+		if (errno == ENOENT)
+			return 0;
+		die_errno("index file open failed");
+	}
+
+	if (fstat(fd, &st))
+		die_errno("cannot stat the open index");
+
+	errno = EINVAL;
+	mmap_size = xsize_t(st.st_size);
+	if (mmap_size < sizeof(struct cache_version_header) + 20)
+		die("index file smaller than expected");
+
+	mmap = xmmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+	if (mmap == MAP_FAILED)
+		die_errno("unable to map index file");
+
+	hdr = mmap;
+	if (verify_hdr_version(hdr, mmap_size) < 0)
+		goto unmap;
+
+	if (verify_hdr_v2(hdr, mmap_size) < 0)
+		goto unmap;
+
+	read_index_v2(istate, mmap, mmap_size);
+	istate->timestamp.sec = st.st_mtime;
+	istate->timestamp.nsec = ST_MTIME_NSEC(st);
+
+	close(fd);
 	munmap(mmap, mmap_size);
 	return istate->cache_nr;
 
 unmap:
 	munmap(mmap, mmap_size);
-	errno = EINVAL;
 	die("index file corrupt");
 }
 
-- 
1.7.10.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH/RFC v2 03/16] Modify match_stat_basic to prepare for other index formats
  2012-08-05 21:48 [PATCH/RFC v2 0/16] Introduce index file format version 5 Thomas Gummerer
  2012-08-05 21:48 ` [PATCH/RFC v2 01/16] Modify cache_header to prepare for other index formats Thomas Gummerer
  2012-08-05 21:48 ` [PATCH/RFC v2 02/16] Modify read functions " Thomas Gummerer
@ 2012-08-05 21:49 ` Thomas Gummerer
  2012-08-05 21:49 ` [PATCH/RFC v2 04/16] Modify write functions " Thomas Gummerer
                   ` (13 subsequent siblings)
  16 siblings, 0 replies; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-05 21:49 UTC (permalink / raw)
  To: git; +Cc: trast, mhagger, gitster, pcouds, robin.rosenberg, Thomas Gummerer

Modify match_stat_basic, into one function that handles the
general case, which is the same for all index formats, and
a function that handles the specific parts for each index
file version.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
 read-cache.c |   71 +++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 40 insertions(+), 31 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 24b5e02..6e8991a 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -163,38 +163,10 @@ static int ce_modified_check_fs(struct cache_entry *ce, struct stat *st)
 	return 0;
 }
 
-static int ce_match_stat_basic(struct cache_entry *ce, struct stat *st)
+static int ce_match_stat_basic_v2(struct cache_entry *ce,
+				struct stat *st,
+				int changed)
 {
-	unsigned int changed = 0;
-
-	if (ce->ce_flags & CE_REMOVE)
-		return MODE_CHANGED | DATA_CHANGED | TYPE_CHANGED;
-
-	switch (ce->ce_mode & S_IFMT) {
-	case S_IFREG:
-		changed |= !S_ISREG(st->st_mode) ? TYPE_CHANGED : 0;
-		/* We consider only the owner x bit to be relevant for
-		 * "mode changes"
-		 */
-		if (trust_executable_bit &&
-		    (0100 & (ce->ce_mode ^ st->st_mode)))
-			changed |= MODE_CHANGED;
-		break;
-	case S_IFLNK:
-		if (!S_ISLNK(st->st_mode) &&
-		    (has_symlinks || !S_ISREG(st->st_mode)))
-			changed |= TYPE_CHANGED;
-		break;
-	case S_IFGITLINK:
-		/* We ignore most of the st_xxx fields for gitlinks */
-		if (!S_ISDIR(st->st_mode))
-			changed |= TYPE_CHANGED;
-		else if (ce_compare_gitlink(ce))
-			changed |= DATA_CHANGED;
-		return changed;
-	default:
-		die("internal error: ce_mode is %o", ce->ce_mode);
-	}
 	if (ce->ce_mtime.sec != (unsigned int)st->st_mtime)
 		changed |= MTIME_CHANGED;
 	if (trust_ctime && ce->ce_ctime.sec != (unsigned int)st->st_ctime)
@@ -235,6 +207,43 @@ static int ce_match_stat_basic(struct cache_entry *ce, struct stat *st)
 	return changed;
 }
 
+static int ce_match_stat_basic(struct cache_entry *ce, struct stat *st)
+{
+	unsigned int changed = 0;
+
+	if (ce->ce_flags & CE_REMOVE)
+		return MODE_CHANGED | DATA_CHANGED | TYPE_CHANGED;
+
+	switch (ce->ce_mode & S_IFMT) {
+	case S_IFREG:
+		changed |= !S_ISREG(st->st_mode) ? TYPE_CHANGED : 0;
+		/* We consider only the owner x bit to be relevant for
+		 * "mode changes"
+		 */
+		if (trust_executable_bit &&
+		    (0100 & (ce->ce_mode ^ st->st_mode)))
+			changed |= MODE_CHANGED;
+		break;
+	case S_IFLNK:
+		if (!S_ISLNK(st->st_mode) &&
+		    (has_symlinks || !S_ISREG(st->st_mode)))
+			changed |= TYPE_CHANGED;
+		break;
+	case S_IFGITLINK:
+		/* We ignore most of the st_xxx fields for gitlinks */
+		if (!S_ISDIR(st->st_mode))
+			changed |= TYPE_CHANGED;
+		else if (ce_compare_gitlink(ce))
+			changed |= DATA_CHANGED;
+		return changed;
+	default:
+		die("internal error: ce_mode is %o", ce->ce_mode);
+	}
+
+	changed = ce_match_stat_basic_v2(ce, st, changed);
+	return changed;
+}
+
 static int is_racy_timestamp(const struct index_state *istate, struct cache_entry *ce)
 {
 	return (!S_ISGITLINK(ce->ce_mode) &&
-- 
1.7.10.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH/RFC v2 04/16] Modify write functions to prepare for other index formats
  2012-08-05 21:48 [PATCH/RFC v2 0/16] Introduce index file format version 5 Thomas Gummerer
                   ` (2 preceding siblings ...)
  2012-08-05 21:49 ` [PATCH/RFC v2 03/16] Modify match_stat_basic " Thomas Gummerer
@ 2012-08-05 21:49 ` Thomas Gummerer
  2012-08-06  1:34   ` Junio C Hamano
  2012-08-05 21:49 ` [PATCH/RFC v2 05/16] t2104: Don't fail for index versions other than [23] Thomas Gummerer
                   ` (12 subsequent siblings)
  16 siblings, 1 reply; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-05 21:49 UTC (permalink / raw)
  To: git; +Cc: trast, mhagger, gitster, pcouds, robin.rosenberg, Thomas Gummerer

Modify the write_index function to add the possibility to add
other index formats, that are written in a different way. Also
mark all functions, which shall only be used with v2-v4 as v2
functions.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
 read-cache.c |   43 ++++++++++++++++++++++++-------------------
 1 file changed, 24 insertions(+), 19 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 6e8991a..dceaa5c 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1595,7 +1595,7 @@ static int ce_write_flush(git_SHA_CTX *context, int fd)
 	return 0;
 }
 
-static int ce_write(git_SHA_CTX *context, int fd, void *data, unsigned int len)
+static int ce_write_v2(git_SHA_CTX *context, int fd, void *data, unsigned int len)
 {
 	while (len) {
 		unsigned int buffered = write_buffer_len;
@@ -1617,13 +1617,13 @@ static int ce_write(git_SHA_CTX *context, int fd, void *data, unsigned int len)
 	return 0;
 }
 
-static int write_index_ext_header(git_SHA_CTX *context, int fd,
+static int write_index_ext_header_v2(git_SHA_CTX *context, int fd,
 				  unsigned int ext, unsigned int sz)
 {
 	ext = htonl(ext);
 	sz = htonl(sz);
-	return ((ce_write(context, fd, &ext, 4) < 0) ||
-		(ce_write(context, fd, &sz, 4) < 0)) ? -1 : 0;
+	return ((ce_write_v2(context, fd, &ext, 4) < 0) ||
+		(ce_write_v2(context, fd, &sz, 4) < 0)) ? -1 : 0;
 }
 
 static int ce_flush(git_SHA_CTX *context, int fd)
@@ -1648,7 +1648,7 @@ static int ce_flush(git_SHA_CTX *context, int fd)
 	return (write_in_full(fd, write_buffer, left) != left) ? -1 : 0;
 }
 
-static void ce_smudge_racily_clean_entry(struct cache_entry *ce)
+static void ce_smudge_racily_clean_entry_v2(struct cache_entry *ce)
 {
 	/*
 	 * The only thing we care about in this function is to smudge the
@@ -1729,7 +1729,7 @@ static char *copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk,
 	}
 }
 
-static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce,
+static int ce_write_entry_v2(git_SHA_CTX *c, int fd, struct cache_entry *ce,
 			  struct strbuf *previous_name)
 {
 	int size;
@@ -1769,7 +1769,7 @@ static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce,
 			      ce->name + common, ce_namelen(ce) - common);
 	}
 
-	result = ce_write(c, fd, ondisk, size);
+	result = ce_write_v2(c, fd, ondisk, size);
 	free(ondisk);
 	return result;
 }
@@ -1799,7 +1799,7 @@ void update_index_if_able(struct index_state *istate, struct lock_file *lockfile
 		rollback_lock_file(lockfile);
 }
 
-int write_index(struct index_state *istate, int newfd)
+static int write_index_v2(struct index_state *istate, int newfd)
 {
 	git_SHA_CTX c;
 	struct cache_version_header hdr;
@@ -1822,9 +1822,6 @@ int write_index(struct index_state *istate, int newfd)
 		}
 	}
 
-	if (!istate->version)
-		istate->version = INDEX_FORMAT_DEFAULT;
-
 	/* demote version 3 to version 2 when the latter suffices */
 	if (istate->version == 3 || istate->version == 2)
 		istate->version = extended ? 3 : 2;
@@ -1836,9 +1833,9 @@ int write_index(struct index_state *istate, int newfd)
 	hdr_v2.hdr_entries = htonl(entries - removed);
 
 	git_SHA1_Init(&c);
-	if (ce_write(&c, newfd, &hdr, sizeof(hdr)) < 0)
+	if (ce_write_v2(&c, newfd, &hdr, sizeof(hdr)) < 0)
 		return -1;
-	if (ce_write(&c, newfd, &hdr_v2, sizeof(hdr_v2)) < 0)
+	if (ce_write_v2(&c, newfd, &hdr_v2, sizeof(hdr_v2)) < 0)
 		return -1;
 
 	previous_name = (hdr_version == 4) ? &previous_name_buf : NULL;
@@ -1847,8 +1844,8 @@ int write_index(struct index_state *istate, int newfd)
 		if (ce->ce_flags & CE_REMOVE)
 			continue;
 		if (!ce_uptodate(ce) && is_racy_timestamp(istate, ce))
-			ce_smudge_racily_clean_entry(ce);
-		if (ce_write_entry(&c, newfd, ce, previous_name) < 0)
+			ce_smudge_racily_clean_entry_v2(ce);
+		if (ce_write_entry_v2(&c, newfd, ce, previous_name) < 0)
 			return -1;
 	}
 	strbuf_release(&previous_name_buf);
@@ -1858,8 +1855,8 @@ int write_index(struct index_state *istate, int newfd)
 		struct strbuf sb = STRBUF_INIT;
 
 		cache_tree_write(&sb, istate->cache_tree);
-		err = write_index_ext_header(&c, newfd, CACHE_EXT_TREE, sb.len) < 0
-			|| ce_write(&c, newfd, sb.buf, sb.len) < 0;
+		err = write_index_ext_header_v2(&c, newfd, CACHE_EXT_TREE, sb.len) < 0
+			|| ce_write_v2(&c, newfd, sb.buf, sb.len) < 0;
 		strbuf_release(&sb);
 		if (err)
 			return -1;
@@ -1868,9 +1865,9 @@ int write_index(struct index_state *istate, int newfd)
 		struct strbuf sb = STRBUF_INIT;
 
 		resolve_undo_write(&sb, istate->resolve_undo);
-		err = write_index_ext_header(&c, newfd, CACHE_EXT_RESOLVE_UNDO,
+		err = write_index_ext_header_v2(&c, newfd, CACHE_EXT_RESOLVE_UNDO,
 					     sb.len) < 0
-			|| ce_write(&c, newfd, sb.buf, sb.len) < 0;
+			|| ce_write_v2(&c, newfd, sb.buf, sb.len) < 0;
 		strbuf_release(&sb);
 		if (err)
 			return -1;
@@ -1883,6 +1880,14 @@ int write_index(struct index_state *istate, int newfd)
 	return 0;
 }
 
+int write_index(struct index_state *istate, int newfd)
+{
+	if (!istate->version)
+		istate->version = INDEX_FORMAT_DEFAULT;
+
+	return write_index_v2(istate, newfd);
+}
+
 /*
  * Read the index file that is potentially unmerged into given
  * index_state, dropping any unmerged entries.  Returns true if
-- 
1.7.10.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH/RFC v2 05/16] t2104: Don't fail for index versions other than [23]
  2012-08-05 21:48 [PATCH/RFC v2 0/16] Introduce index file format version 5 Thomas Gummerer
                   ` (3 preceding siblings ...)
  2012-08-05 21:49 ` [PATCH/RFC v2 04/16] Modify write functions " Thomas Gummerer
@ 2012-08-05 21:49 ` Thomas Gummerer
  2012-08-06  1:36   ` Junio C Hamano
  2012-08-05 21:49 ` [PATCH/RFC v2 06/16] t3700: sleep for 1 second, to avoid interfering with the racy code Thomas Gummerer
                   ` (11 subsequent siblings)
  16 siblings, 1 reply; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-05 21:49 UTC (permalink / raw)
  To: git; +Cc: trast, mhagger, gitster, pcouds, robin.rosenberg, Thomas Gummerer

t2104 currently checks for the exact index version 2 or 3,
depending if there is a skip-worktree flag or not. Other
index versions do not use extended flags and thus cannot
be tested for version changes.

Make this test update the index to version 2 at the beginning
of the test. Testing the skip-worktree flags for the default
index format is still covered by t7011 and t7012.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
 t/t2104-update-index-skip-worktree.sh |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/t/t2104-update-index-skip-worktree.sh b/t/t2104-update-index-skip-worktree.sh
index 1d0879b..4ef7d99 100755
--- a/t/t2104-update-index-skip-worktree.sh
+++ b/t/t2104-update-index-skip-worktree.sh
@@ -25,7 +25,8 @@ test_expect_success 'setup' '
 	mkdir sub &&
 	touch ./1 ./2 sub/1 sub/2 &&
 	git add 1 2 sub/1 sub/2 &&
-	git ls-files -t | test_cmp expect.full -
+	git ls-files -t | test_cmp expect.full - &&
+	git update-index --index-version=2
 '
 
 test_expect_success 'index is at version 2' '
-- 
1.7.10.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH/RFC v2 06/16] t3700: sleep for 1 second, to avoid interfering with the racy code
  2012-08-05 21:48 [PATCH/RFC v2 0/16] Introduce index file format version 5 Thomas Gummerer
                   ` (4 preceding siblings ...)
  2012-08-05 21:49 ` [PATCH/RFC v2 05/16] t2104: Don't fail for index versions other than [23] Thomas Gummerer
@ 2012-08-05 21:49 ` Thomas Gummerer
  2012-08-06  1:43   ` Junio C Hamano
  2012-08-05 21:49 ` [PATCH/RFC v2 07/16] Add documentation of the index-v5 file format Thomas Gummerer
                   ` (10 subsequent siblings)
  16 siblings, 1 reply; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-05 21:49 UTC (permalink / raw)
  To: git; +Cc: trast, mhagger, gitster, pcouds, robin.rosenberg, Thomas Gummerer

The new git racy code uses the mtime of cache-entries to smudge
a racy clean entry, and loads the work, of checking the file-system
if the entry has really changed, off to the reader. This interferes
with this test, because the entry is racily smudged and thus has
mtime 0. We wait 1 second to avoid smudging the entry and getting
correct test results.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
 t/t3700-add.sh |    1 +
 1 file changed, 1 insertion(+)

diff --git a/t/t3700-add.sh b/t/t3700-add.sh
index 874b3a6..4d70805 100755
--- a/t/t3700-add.sh
+++ b/t/t3700-add.sh
@@ -184,6 +184,7 @@ test_expect_success 'git add --refresh with pathspec' '
 	echo >foo && echo >bar && echo >baz &&
 	git add foo bar baz && H=$(git rev-parse :foo) && git rm -f foo &&
 	echo "100644 $H 3	foo" | git update-index --index-info &&
+	sleep 1 &&
 	test-chmtime -60 bar baz &&
 	>expect &&
 	git add --refresh bar >actual &&
-- 
1.7.10.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH/RFC v2 07/16] Add documentation of the index-v5 file format
  2012-08-05 21:48 [PATCH/RFC v2 0/16] Introduce index file format version 5 Thomas Gummerer
                   ` (5 preceding siblings ...)
  2012-08-05 21:49 ` [PATCH/RFC v2 06/16] t3700: sleep for 1 second, to avoid interfering with the racy code Thomas Gummerer
@ 2012-08-05 21:49 ` Thomas Gummerer
  2012-08-05 21:49 ` [PATCH/RFC v2 08/16] Make in-memory format aware of stat_crc Thomas Gummerer
                   ` (9 subsequent siblings)
  16 siblings, 0 replies; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-05 21:49 UTC (permalink / raw)
  To: git; +Cc: trast, mhagger, gitster, pcouds, robin.rosenberg, Thomas Gummerer

Add a documentation of the index file format version 5 to
Documentation/technical.

Helped-by: Michael Haggerty <mhagger@alum.mit.edu>
Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Thomas Rast <trast@student.ethz.ch>
Helped-by: Nguyen Thai Ngoc Duy <pclouds@gmail.com>
Helped-by: Robin Rosenberg <robin.rosenberg@dewire.com>
Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
 Documentation/technical/index-file-format-v5.txt |  281 ++++++++++++++++++++++
 1 file changed, 281 insertions(+)
 create mode 100644 Documentation/technical/index-file-format-v5.txt

diff --git a/Documentation/technical/index-file-format-v5.txt b/Documentation/technical/index-file-format-v5.txt
new file mode 100644
index 0000000..6253e34
--- /dev/null
+++ b/Documentation/technical/index-file-format-v5.txt
@@ -0,0 +1,281 @@
+GIT index format
+================
+
+== The git index file format
+
+   The git index file (.git/index) documents the status of the files
+     in the git staging area.
+
+   The staging area is used for preparing commits, merging, etc.
+
+   All binary numbers are in network byte order. Version 5 is described
+     here.
+
+   - A 20-byte header consisting of
+
+     sig (32-bits): Signature:
+       The signature is { 'D', 'I', 'R', 'C' } (stands for "dircache")
+
+     vnr (32-bits): Version number:
+       The current supported versions are 2, 3, 4 and 5.
+
+     ndir (32-bits): number of directories in the index.
+
+     nfile (32-bits): number of file entries in the index.
+
+     fblockoffset (32-bits): offset to the file block, relative to the
+       beginning of the file.
+
+   - Offset to the extensions.
+
+     nextensions (32-bits): number of extensions.
+
+     extoffset (32-bits): offset to the extension. (Possibly none, as
+       many as indicated in the 4-byte number of extensions)
+
+     headercrc (32-bits): crc checksum for the header and extension
+       offsets
+
+   - diroffsets (ndir * directory offsets): A directory offset for each
+       of the ndir directories in the index, sorted by pathname (of the
+       directory it's pointing to) (see below). The diroffsets are relative
+       to the beginning of the direntries block. [1]
+
+   - direntries (ndir * directory entries): A directory entry for each
+       of the ndir directories in the index, sorted by pathname (see
+       below). [2]
+
+   - fileoffsets (nfile * file offsets): A file offset for each of the
+       nfile files in the index (see below). The file offsets are relative
+       to the beginning of the fileentries block. [1]
+
+   - fileentries (nfile * file entries): A file entry for each of the
+       nfile files in the index (see below).
+
+   - crdata: A number of entries for conflicted data/resolved conflicts
+       (see below).
+
+   - Extensions (Currently none, see below in the future)
+
+     Extensions are identified by signature. Optional extensions can
+     be ignored if GIT does not understand them.
+
+     GIT supports an arbitrary number of extension, but currently none
+     is implemented. [3]
+
+     extsig (32-bits): extension signature. If the first byte is 'A'..'Z'
+     the extension is optional and can be ignored.
+
+     extsize (32-bits): size of the extension, excluding the header
+       (extsig, extsize, extchecksum).
+
+     extchecksum (32-bits): crc32 checksum of the extension signature
+       and size.
+
+    - Extension data.
+
+
+== Directory offsets (diroffsets)
+
+  diroffset (32-bits): offset to the directory relative to the beginning
+    of the index file. There are ndir + 1 offsets in the diroffset table,
+    the last is pointing to the end of the last direntry. With this last
+    entry, we can replace the strlen when reading each filename, by
+    calculating its length with the offsets.
+
+  This part is needed for making the directory entries bisectable and
+    thus allowing a binary search.
+
+== Directory entry (direntries)
+  
+  Directory entries are sorted in lexicographic order by the name 
+    of their path starting with the root.
+  
+  pathname (variable length, nul terminated): relative to top level
+    directory (without the leading slash). '/' is used as path
+    separator. A string of length 0 ('') indicates the root directory.
+    The special path components ".", and ".." (without quotes) are
+    disallowed. The path also includes a trailing slash. [9]
+
+  foffset (32-bits): offset to the lexicographically first file in 
+    the file offsets (fileoffsets), relative to the beginning of
+    the fileoffset block.
+
+  cr (32-bits): offset to conflicted/resolved data at the end of the
+    index. 0 if there is no such data. [4]
+
+  ncr (32-bits): number of conflicted/resolved data entries at the
+    end of the index if the offset is non 0. If cr is 0, ncr is
+    also 0.
+
+  nsubtrees (32-bits): number of subtrees this tree has in the index.
+
+  nfiles (32-bits): number of files in the directory, that are in
+    the index.
+
+  nentries (32-bits): number of entries in the index that is covered
+    by the tree this entry represents. (-1 if the entry is invalid).
+    This number includes all the files in this tree, recursively.
+
+  objname (160-bits): object name for the object that would result
+    from writing this span of index as a tree. This is only valid
+    if nentries is valid, meaning the cache-tree is valid.
+
+  flags (16-bits): 'flags' field split into (high to low bits) (For
+    D/F conflicts)
+    
+    stage (2-bits): stage of the directory during merge
+
+    14-bit unused
+
+  dircrc (32-bits): crc32 checksum for each directory entry.
+
+  The last 24 bytes (4-byte number of entries + 160-bit object name) are
+    for the cache tree. An entry can be in an invalidated state which is
+    represented by having -1 in the entry_count field.
+
+  The entries are written out in the top-down, depth-first order. The
+    first entry represents the root level of the repository, followed by
+    the first subtree - let's call it A - of the root level, followed by
+    the first subtree of A, ... There is no prefix compression for
+    directories.
+
+== File offsets (fileoffsets)
+
+  fileoffset (32-bits): offset to the file.
+
+  This part is needed for making the file entries bisectable and
+    thus allowing a binary search. There are nfile + 1 offsets in the
+    fileoffset table, the last is pointing to the end of the last
+    fileentry. With this last entry, we can replace the strlen when
+    reading each filename, by calculating its length with the offsets.
+
+== File entry (fileentries)
+  
+  File entries are sorted in ascending order on the name field, after the
+  respective offset given by the directory entries. All file names are
+  prefix compressed, meaning the file name is relative to the directory.
+
+  filename (variable length, nul terminated). The exact encoding is 
+    undefined, but the filename cannot contain a NUL byte (iow, the same
+    encoding as a UNIX pathname).
+
+  flags (16-bits): 'flags' field split into (high to low bits)
+
+    assumevalid (1-bit): assume-valid flag
+
+    intenttoadd (1-bit): intent-to-add flag, used by "git add -N".
+      Extended flag in index v3.
+
+    stage (2-bit): stage of the file during merge
+
+    skipworktree (1-bit): skip-worktree flag, used by sparse checkout.
+      Extended flag in index v3.
+
+    11-bit unused, must be zero [6]
+
+  mode (16-bits): file mode, split into (high to low bits)
+
+    objtype (4-bits): object type
+      valid values in binary are 1000 (regular file), 1010 (symbolic
+      link) and 1110 (gitlink)
+
+    3-bit unused
+
+    permission (9-bits): unix permission. Only 0755 and 0644 are valid
+      for regular files. Symbolic links and gitlinks have value 0 in 
+      this field.
+
+  mtimes (32-bits): mtime seconds, the last time a file's data changed
+    this is stat(2) data
+
+  mtimens (32-bits): mtime nanosecond fractions
+    this is stat(2) data
+
+  statcrc (32-bits): crc32 checksum over ctime seconds, ctime
+    nanoseconds, ino, file size, dev, uid, gid (All stat(2) data
+    except mtime) [7]
+
+  objhash (160-bits): SHA-1 for the represented object
+
+# This will probably be changed in future versions as discussed here: http://colabti.org/irclogger/irclogger_log/git-devel?date=2012-06-21
+  entrycrc (32-bits): crc32 checksum for the file entry. The crc code
+    includes the offset to the file.
+
+== Conflict data
+
+  A conflict is represented in the index as a set of higher stage entries.
+  These entries are stored at the end of the index. When a conflict is 
+  resolved (e.g. with "git add path"). A bit is flipped, to indicate that
+  the conflict is resolved, but the entries will be kept, so that
+  conflicts can be recreated (e.g. with "git checkout -m", in case users
+  want to redo a conflict resolution from scratch.
+
+  The first part of a conflict (usually stage 1) will be stored both in
+  the entries part of the index and in the conflict part. All other parts
+  will only be stored in the conflict part.
+
+  filename (variable length, nul terminated): filename of the entry,
+    relative to its containing directory).
+
+  nfileconflicts (32-bits): number of conflicts for the file [8]
+
+  flags (nfileconflicts * flags) (16-bits): 'flags' field split into:
+    
+    conflicted (1-bit): conflicted state (conflicted/resolved) (1 if
+      conflicted)
+
+    stage (2-bits): stage during merge.
+   
+    13-bit unused
+
+  entry_mode (nfileconflicts * entry mode) (16-bits): octal numbers, entry
+    mode of eache entry in the different stages. (How many is defined by
+    the 4-byte number before)
+
+  objectnames (nfileconflicts * object names) (160-bits): object names 
+    of the different stages.
+
+  conflictcrc (32-bits): crc32 checksum over conflict data.
+
+== Design explanations
+
+[1] The directory and file offsets are included in the index format
+    to enable bisectability of the index, for binary searches.Updating
+    a single entry and partial reading will benefit from this.
+
+[2] The directories are saved in their own block, to be able to
+    quickly search for a directory in the index. They include a
+    offset to the (lexically) first file in the directory.
+
+[3] The data of the cache-tree extension and the resolve undo
+    extension is now part of the index itself, but if other extensions
+    come up in the future, there is no need to change the index, they
+    can simply be added at the end.
+
+[4] To avoid rewrites of the whole index when there are conflicts or
+    conflicts are being resolved, conflicted data will be stored at
+    the end of the index. To mark the conflict resolved, just a bit
+    has to be flipped. The data will still be there, if a user wants
+    to redo the conflict resolution.
+
+[5] Since only 4 modes are effectively allowed in git but 32-bit are
+    used to store them, having a two bit flag for the mode is enough
+    and saves 4 byte per entry.
+
+[6] The length of the file name was dropped, since each file name is
+    nul terminated anyway.
+
+[7] Since all stat data (except mtime and ctime) is just used for
+    checking if a file has changed a checksum of the data is enough.
+    In addition to that Thomas Rast suggested ctime could be ditched
+    completely (core.trustctime=false) and thus included in the
+    checksum. This would save 24 bytes per index entry, which would
+    be about 4 MB on the Webkit index.
+    (Thanks for the suggestion to Michael Haggerty)
+
+[8] Since there can be more stage #1 entries, it is necessary to know
+    the number of conflict data entries there are.
+
+[9] As Michael Haggerty pointed out on the mailing list, storing the
+    trailing slash will simplify a few operations.
-- 
1.7.10.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH/RFC v2 08/16] Make in-memory format aware of stat_crc
  2012-08-05 21:48 [PATCH/RFC v2 0/16] Introduce index file format version 5 Thomas Gummerer
                   ` (6 preceding siblings ...)
  2012-08-05 21:49 ` [PATCH/RFC v2 07/16] Add documentation of the index-v5 file format Thomas Gummerer
@ 2012-08-05 21:49 ` Thomas Gummerer
  2012-08-06  1:46   ` Junio C Hamano
  2012-08-05 21:49 ` [PATCH/RFC v2 09/16] Read index-v5 Thomas Gummerer
                   ` (8 subsequent siblings)
  16 siblings, 1 reply; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-05 21:49 UTC (permalink / raw)
  To: git; +Cc: trast, mhagger, gitster, pcouds, robin.rosenberg, Thomas Gummerer

Make the in-memory format aware of the stat_crc used by index-v5.
It is simply ignored by index version prior to v5.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
 cache.h      |    1 +
 read-cache.c |   27 +++++++++++++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/cache.h b/cache.h
index 9bfc9f3..076d6af 100644
--- a/cache.h
+++ b/cache.h
@@ -122,6 +122,7 @@ struct cache_entry {
 	unsigned int ce_flags;
 	unsigned int ce_namelen;
 	unsigned char sha1[20];
+	uint32_t ce_stat_crc;
 	struct cache_entry *next;
 	struct cache_entry *dir_next;
 	char name[FLEX_ARRAY]; /* more */
diff --git a/read-cache.c b/read-cache.c
index dceaa5c..4243606 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -67,6 +67,31 @@ void rename_index_entry_at(struct index_state *istate, int nr, const char *new_n
 	add_index_entry(istate, new, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);
 }
 
+static uint32_t calculate_stat_crc(struct cache_entry *ce)
+{
+	unsigned int ctimens = 0;
+	uint32_t stat, stat_crc;
+
+	stat = htonl(ce->ce_ctime.sec);
+	stat_crc = crc32(0, (Bytef*)&stat, 4);
+#ifdef USE_NSEC
+	ctimens = ce->ce_ctime.nsec;
+#endif
+	stat = htonl(ctimens);
+	stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
+	stat = htonl(ce->ce_ino);
+	stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
+	stat = htonl(ce->ce_size);
+	stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
+	stat = htonl(ce->ce_dev);
+	stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
+	stat = htonl(ce->ce_uid);
+	stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
+	stat = htonl(ce->ce_gid);
+	stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
+	return stat_crc;
+}
+
 /*
  * This only updates the "non-critical" parts of the directory
  * cache, ie the parts that aren't tracked by GIT, and only used
@@ -89,6 +114,8 @@ void fill_stat_cache_info(struct cache_entry *ce, struct stat *st)
 
 	if (S_ISREG(st->st_mode))
 		ce_mark_uptodate(ce);
+
+	ce->ce_stat_crc = calculate_stat_crc(ce);
 }
 
 static int ce_compare_data(struct cache_entry *ce, struct stat *st)
-- 
1.7.10.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH/RFC v2 09/16] Read index-v5
  2012-08-05 21:48 [PATCH/RFC v2 0/16] Introduce index file format version 5 Thomas Gummerer
                   ` (7 preceding siblings ...)
  2012-08-05 21:49 ` [PATCH/RFC v2 08/16] Make in-memory format aware of stat_crc Thomas Gummerer
@ 2012-08-05 21:49 ` Thomas Gummerer
  2012-08-06  5:17   ` Junio C Hamano
  2012-08-05 21:49 ` [PATCH/RFC v2 10/16] Read resolve-undo data Thomas Gummerer
                   ` (7 subsequent siblings)
  16 siblings, 1 reply; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-05 21:49 UTC (permalink / raw)
  To: git; +Cc: trast, mhagger, gitster, pcouds, robin.rosenberg, Thomas Gummerer

Make git read the index file version 5 without complaining.

This version of the reader doesn't read neither the cache-tree
nor the resolve undo data, but doesn't choke on an index that
includes such data.

Helped-by: Thomas Rast <trast@student.ethz.ch>
Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
 cache.h      |   72 +++++++
 read-cache.c |  590 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 657 insertions(+), 5 deletions(-)

diff --git a/cache.h b/cache.h
index 076d6af..98adcd9 100644
--- a/cache.h
+++ b/cache.h
@@ -110,6 +110,15 @@ struct cache_time {
 	unsigned int nsec;
 };
 
+/*
+ * The *next pointer is used in read_entries_v5 for holding
+ * all the elements of a directory, and points to the next
+ * cache_entry in a directory.
+ *
+ * It is reset by the add_name_hash call in set_index_entry
+ * to set it to point to the next cache_entry in the
+ * correct in-memory format ordering.
+ */
 struct cache_entry {
 	struct cache_time ce_ctime;
 	struct cache_time ce_mtime;
@@ -128,11 +137,58 @@ struct cache_entry {
 	char name[FLEX_ARRAY]; /* more */
 };
 
+struct directory_entry {
+	struct directory_entry *next;
+	struct directory_entry *next_hash;
+	struct cache_entry *ce;
+	struct cache_entry *ce_last;
+	struct conflict_entry *conflict;
+	struct conflict_entry *conflict_last;
+	unsigned int conflict_size;
+	unsigned int de_foffset;
+	unsigned int de_cr;
+	unsigned int de_ncr;
+	unsigned int de_nsubtrees;
+	unsigned int de_nfiles;
+	unsigned int de_nentries;
+	unsigned char sha1[20];
+	unsigned short de_flags;
+	unsigned int de_pathlen;
+	char pathname[FLEX_ARRAY];
+};
+
+struct conflict_part {
+	struct conflict_part *next;
+	unsigned short flags;
+	unsigned short entry_mode;
+	unsigned char sha1[20];
+};
+
+struct conflict_entry {
+	struct conflict_entry *next;
+	unsigned int nfileconflicts;
+	struct conflict_part *entries;
+	unsigned int namelen;
+	unsigned int pathlen;
+	char name[FLEX_ARRAY];
+};
+
+struct ondisk_conflict_part {
+	unsigned short flags;
+	unsigned short entry_mode;
+	unsigned char sha1[20];
+};
+
+#define CE_NAMEMASK  (0x0fff)
 #define CE_STAGEMASK (0x3000)
 #define CE_EXTENDED  (0x4000)
 #define CE_VALID     (0x8000)
 #define CE_STAGESHIFT 12
 
+#define CONFLICT_CONFLICTED (0x8000)
+#define CONFLICT_STAGESHIFT 13
+#define CONFLICT_STAGEMASK (0x6000)
+
 /*
  * Range 0xFFFF0000 in ce_flags is divided into
  * two parts: in-memory flags and on-disk ones.
@@ -166,6 +222,18 @@ struct cache_entry {
 #define CE_EXTENDED_FLAGS (CE_INTENT_TO_ADD | CE_SKIP_WORKTREE)
 
 /*
+ * Representation of the extended on-disk flags in the v5 format.
+ * They must not collide with the ordinary on-disk flags, and need to
+ * fit in 16 bits.  Note however that v5 does not save the name
+ * length.
+ */
+#define CE_INTENT_TO_ADD_V5  (0x4000)
+#define CE_SKIP_WORKTREE_V5  (0x0800)
+#if (CE_VALID|CE_STAGEMASK) & (CE_INTENTTOADD_V5|CE_SKIPWORKTREE_V5)
+#error "v5 on-disk flags collide with ordinary on-disk flags"
+#endif
+
+/*
  * Safeguard to avoid saving wrong flags:
  *  - CE_EXTENDED2 won't get saved until its semantic is known
  *  - Bits in 0x0000FFFF have been saved in ce_flags already
@@ -203,6 +271,8 @@ static inline unsigned create_ce_flags(unsigned stage)
 #define ce_skip_worktree(ce) ((ce)->ce_flags & CE_SKIP_WORKTREE)
 #define ce_mark_uptodate(ce) ((ce)->ce_flags |= CE_UPTODATE)
 
+#define conflict_stage(c) ((CONFLICT_STAGEMASK & (c)->flags) >> CONFLICT_STAGESHIFT)
+
 #define ce_permissions(mode) (((mode) & 0100) ? 0755 : 0644)
 static inline unsigned int create_ce_mode(unsigned int mode)
 {
@@ -249,6 +319,8 @@ static inline unsigned int canon_mode(unsigned int mode)
 }
 
 #define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1)
+#define directory_entry_size(len) (offsetof(struct directory_entry,pathname) + (len) + 1)
+#define conflict_entry_size(len) (offsetof(struct conflict_entry,name) + (len) + 1)
 
 struct index_state {
 	struct cache_entry **cache;
diff --git a/read-cache.c b/read-cache.c
index 4243606..70334f9 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -234,6 +234,55 @@ static int ce_match_stat_basic_v2(struct cache_entry *ce,
 	return changed;
 }
 
+static int match_stat_crc(struct stat *st, uint32_t expected_crc)
+{
+	uint32_t data, stat_crc = 0;
+	unsigned int ctimens = 0;
+
+	data = htonl(st->st_ctime);
+	stat_crc = crc32(0, (Bytef*)&data, 4);
+#ifdef USE_NSEC
+	ctimens = ST_MTIME_NSEC(*st);
+#endif
+	data = htonl(ctimens);
+	stat_crc = crc32(stat_crc, (Bytef*)&data, 4);
+	data = htonl(st->st_ino);
+	stat_crc = crc32(stat_crc, (Bytef*)&data, 4);
+	data = htonl(st->st_size);
+	stat_crc = crc32(stat_crc, (Bytef*)&data, 4);
+	data = htonl(st->st_dev);
+	stat_crc = crc32(stat_crc, (Bytef*)&data, 4);
+	data = htonl(st->st_uid);
+	stat_crc = crc32(stat_crc, (Bytef*)&data, 4);
+	data = htonl(st->st_gid);
+	stat_crc = crc32(stat_crc, (Bytef*)&data, 4);
+
+	return stat_crc == expected_crc;
+}
+
+static int ce_match_stat_basic_v5(struct cache_entry *ce,
+				struct stat *st,
+				int changed)
+{
+
+	if (ce->ce_mtime.sec != 0 && ce->ce_mtime.sec != (unsigned int)st->st_mtime)
+		changed |= MTIME_CHANGED;
+#ifdef USE_NSEC
+	if (ce->ce_mtime.nsec != 0 && ce->ce_mtime.nsec != ST_MTIME_NSEC(*st))
+		changed |= MTIME_CHANGED;
+#endif
+	if (!match_stat_crc(st, ce->ce_stat_crc)) {
+		changed |= OWNER_CHANGED;
+		changed |= INODE_CHANGED;
+	}
+	/* Racily smudged entry? */
+	if (!ce->ce_mtime.sec && !ce->ce_mtime.nsec) {
+		if (!changed && !is_empty_blob_sha1(ce->sha1) && ce_modified_check_fs(ce, st))
+			changed |= DATA_CHANGED;
+	}
+	return changed;
+}
+
 static int ce_match_stat_basic(struct cache_entry *ce, struct stat *st)
 {
 	unsigned int changed = 0;
@@ -267,7 +316,10 @@ static int ce_match_stat_basic(struct cache_entry *ce, struct stat *st)
 		die("internal error: ce_mode is %o", ce->ce_mode);
 	}
 
-	changed = ce_match_stat_basic_v2(ce, st, changed);
+	if (the_index.version != 5)
+		changed = ce_match_stat_basic_v2(ce, st, changed);
+	else
+		changed = ce_match_stat_basic_v5(ce, st, changed);
 	return changed;
 }
 
@@ -1245,6 +1297,14 @@ struct cache_header_v2 {
 	unsigned int hdr_entries;
 };
 
+struct cache_header_v5 {
+	unsigned int hdr_ndir;
+	unsigned int hdr_nfile;
+	unsigned int hdr_fblockoffset;
+	unsigned int hdr_nextension;
+};
+
+
 /*
  * dev/ino/uid/gid/size are also just tracked to the low 32 bits
  * Again - this is just a (very strong in practice) heuristic that
@@ -1287,6 +1347,25 @@ struct ondisk_cache_entry_extended {
 	char name[FLEX_ARRAY]; /* more */
 };
 
+struct ondisk_cache_entry_v5 {
+	unsigned short flags;
+	unsigned short mode;
+	struct cache_time mtime;
+	int stat_crc;
+	unsigned char sha1[20];
+};
+
+struct ondisk_directory_entry {
+	unsigned int foffset;
+	unsigned int cr;
+	unsigned int ncr;
+	unsigned int nsubtrees;
+	unsigned int nfiles;
+	unsigned int nentries;
+	unsigned char sha1[20];
+	unsigned short flags;
+};
+
 /* These are only used for v3 or lower */
 #define align_flex_name(STRUCT,len) ((offsetof(struct STRUCT,name) + (len) + 8) & ~7)
 #define ondisk_cache_entry_size(len) align_flex_name(ondisk_cache_entry,len)
@@ -1295,6 +1374,17 @@ struct ondisk_cache_entry_extended {
 			    ondisk_cache_entry_extended_size(ce_namelen(ce)) : \
 			    ondisk_cache_entry_size(ce_namelen(ce)))
 
+static int check_crc32(int initialcrc,
+			void *data,
+			size_t len,
+			unsigned int expected_crc)
+{
+	int crc;
+
+	crc = crc32(initialcrc, (Bytef*)data, len);
+	return crc == expected_crc;
+}
+
 static int verify_hdr_version(struct cache_version_header *hdr, unsigned long size)
 {
 	int hdr_version;
@@ -1302,7 +1392,7 @@ static int verify_hdr_version(struct cache_version_header *hdr, unsigned long si
 	if (hdr->hdr_signature != htonl(CACHE_SIGNATURE))
 		return error("bad signature");
 	hdr_version = ntohl(hdr->hdr_version);
-	if (hdr_version < 2 || 4 < hdr_version)
+	if (hdr_version < 2 || 5 < hdr_version)
 		return error("bad index version %d", hdr_version);
 	return 0;
 }
@@ -1320,6 +1410,24 @@ static int verify_hdr_v2(struct cache_version_header *hdr, unsigned long size)
 	return 0;
 }
 
+static int verify_hdr_v5(void *mmap)
+{
+	uint32_t *filecrc;
+	unsigned int header_size_v5;
+	struct cache_version_header *hdr;
+	struct cache_header_v5 *hdr_v5;
+
+	hdr = mmap;
+	hdr_v5 = mmap + sizeof(*hdr);
+	/* Size of the header + the size of the extensionoffsets */
+	header_size_v5 = sizeof(*hdr_v5) + hdr_v5->hdr_nextension * 4;
+	/* Initialize crc */
+	filecrc = mmap + sizeof(*hdr) + header_size_v5;
+	if (!check_crc32(0, hdr, sizeof(*hdr) + header_size_v5, ntohl(*filecrc)))
+		return error("bad index file header crc signature");
+	return 0;
+}
+
 static int read_index_extension(struct index_state *istate,
 				const char *ext, void *data, unsigned long sz)
 {
@@ -1390,6 +1498,98 @@ static struct cache_entry *cache_entry_from_ondisk(struct ondisk_cache_entry *on
 	return ce;
 }
 
+static struct cache_entry *cache_entry_from_ondisk_v5(struct ondisk_cache_entry_v5 *ondisk,
+						   struct directory_entry *de,
+						   char *name,
+						   size_t len,
+						   size_t prefix_len)
+{
+	struct cache_entry *ce = xmalloc(cache_entry_size(len + de->de_pathlen));
+	int flags;
+
+	flags = ntoh_s(ondisk->flags);
+	ce->ce_ctime.sec  = 0;
+	ce->ce_mtime.sec  = ntoh_l(ondisk->mtime.sec);
+	ce->ce_ctime.nsec = 0;
+	ce->ce_mtime.nsec = ntoh_l(ondisk->mtime.nsec);
+	ce->ce_dev        = 0;
+	ce->ce_ino        = 0;
+	ce->ce_mode       = ntoh_s(ondisk->mode);
+	ce->ce_uid        = 0;
+	ce->ce_gid        = 0;
+	ce->ce_size       = 0;
+	ce->ce_flags      = flags & CE_STAGEMASK;
+	ce->ce_flags     |= flags & CE_VALID;
+	if (flags & CE_INTENT_TO_ADD_V5)
+		ce->ce_flags |= CE_INTENT_TO_ADD;
+	if (flags & CE_SKIP_WORKTREE_V5)
+		ce->ce_flags |= CE_SKIP_WORKTREE;
+	ce->ce_stat_crc   = ntoh_l(ondisk->stat_crc);
+	ce->ce_namelen    = len + de->de_pathlen;
+	hashcpy(ce->sha1, ondisk->sha1);
+	memcpy(ce->name, de->pathname, de->de_pathlen);
+	memcpy(ce->name + de->de_pathlen, name, len);
+	ce->name[len + de->de_pathlen] = '\0';
+	return ce;
+}
+
+static struct directory_entry *directory_entry_from_ondisk(struct ondisk_directory_entry *ondisk,
+						   const char *name,
+						   size_t len)
+{
+	struct directory_entry *de = xmalloc(directory_entry_size(len));
+
+
+	memcpy(de->pathname, name, len);
+	de->pathname[len] = '\0';
+	de->de_flags      = ntoh_s(ondisk->flags);
+	de->de_foffset    = ntoh_l(ondisk->foffset);
+	de->de_cr         = ntoh_l(ondisk->cr);
+	de->de_ncr        = ntoh_l(ondisk->ncr);
+	de->de_nsubtrees  = ntoh_l(ondisk->nsubtrees);
+	de->de_nfiles     = ntoh_l(ondisk->nfiles);
+	de->de_nentries   = ntoh_l(ondisk->nentries);
+	de->de_pathlen    = len;
+	hashcpy(de->sha1, ondisk->sha1);
+	return de;
+}
+
+static struct conflict_part *conflict_part_from_ondisk(struct ondisk_conflict_part *ondisk)
+{
+	struct conflict_part *cp = xmalloc(sizeof(struct conflict_part));
+
+	cp->flags      = ntoh_s(ondisk->flags);
+	cp->entry_mode = ntoh_s(ondisk->entry_mode);
+	hashcpy(cp->sha1, ondisk->sha1);
+	return cp;
+}
+
+static struct cache_entry *convert_conflict_part(struct conflict_part *cp,
+						char * name,
+						unsigned int len)
+{
+
+	struct cache_entry *ce = xmalloc(cache_entry_size(len));
+
+	ce->ce_ctime.sec  = 0;
+	ce->ce_mtime.sec  = 0;
+	ce->ce_ctime.nsec = 0;
+	ce->ce_mtime.nsec = 0;
+	ce->ce_dev        = 0;
+	ce->ce_ino        = 0;
+	ce->ce_mode       = cp->entry_mode;
+	ce->ce_uid        = 0;
+	ce->ce_gid        = 0;
+	ce->ce_size       = 0;
+	ce->ce_flags      = conflict_stage(cp) << CE_STAGESHIFT;
+	ce->ce_stat_crc   = 0;
+	ce->ce_namelen    = len;
+	hashcpy(ce->sha1, cp->sha1);
+	memcpy(ce->name, name, len);
+	ce->name[len] = '\0';
+	return ce;
+}
+
 /*
  * Adjacent cache entries tend to share the leading paths, so it makes
  * sense to only store the differences in later entries.  In the v4
@@ -1457,6 +1657,345 @@ static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk,
 	return ce;
 }
 
+static struct directory_entry *read_directories_v5(unsigned int *dir_offset,
+				unsigned int *dir_table_offset,
+				void *mmap,
+				int mmap_size)
+{
+	int i, ondisk_directory_size;
+	uint32_t *filecrc, *beginning, *end;
+	struct directory_entry *current = NULL;
+	struct ondisk_directory_entry *disk_de;
+	struct directory_entry *de;
+	unsigned int data_len, len;
+	char *name;
+
+	ondisk_directory_size = sizeof(disk_de->flags)
+		+ sizeof(disk_de->foffset)
+		+ sizeof(disk_de->cr)
+		+ sizeof(disk_de->ncr)
+		+ sizeof(disk_de->nsubtrees)
+		+ sizeof(disk_de->nfiles)
+		+ sizeof(disk_de->nentries)
+		+ sizeof(disk_de->sha1);
+	name = (char *)mmap + *dir_offset;
+	beginning = mmap + *dir_table_offset;
+	end = mmap + *dir_table_offset + 4;
+	len = ntoh_l(*end) - ntoh_l(*beginning) - ondisk_directory_size - 5;
+	disk_de = (struct ondisk_directory_entry *)
+			((char *)mmap + *dir_offset + len + 1);
+	de = directory_entry_from_ondisk(disk_de, name, len);
+	de->next = NULL;
+
+	/* Length of pathname + nul byte for termination + size of
+	 * members of ondisk_directory_entry. (Just using the size
+	 * of the stuct doesn't work, because there may be padding
+	 * bytes for the struct)
+	 */
+	data_len = len + 1 + ondisk_directory_size;
+
+	filecrc = mmap + *dir_offset + data_len;
+	if (!check_crc32(0, mmap + *dir_offset, data_len, ntoh_l(*filecrc)))
+		goto unmap;
+
+	*dir_table_offset += 4;
+	*dir_offset += data_len + 4; /* crc code */
+
+	current = de;
+	for (i = 0; i < de->de_nsubtrees; i++) {
+		current->next = read_directories_v5(dir_offset, dir_table_offset,
+						mmap, mmap_size);
+		while (current->next)
+			current = current->next;
+	}
+
+	return de;
+unmap:
+	munmap(mmap, mmap_size);
+	die("directory crc doesn't match for '%s'", de->pathname);
+}
+
+static struct cache_entry *read_entry_v5(struct directory_entry *de,
+			unsigned long *entry_offset,
+			void **mmap,
+			unsigned long mmap_size,
+			unsigned int *foffsetblock,
+			int fd)
+{
+	int len, crc_wrong, i = 0, offset_to_offset;
+	char *name;
+	uint32_t foffsetblockcrc;
+	uint32_t *filecrc, *beginning, *end;
+	struct cache_entry *ce;
+	struct ondisk_cache_entry_v5 *disk_ce;
+
+	do {
+		name = (char *)*mmap + *entry_offset;
+		beginning = *mmap + *foffsetblock;
+		end = *mmap + *foffsetblock + 4;
+		len = ntoh_l(*end) - ntoh_l(*beginning) - sizeof(struct ondisk_cache_entry_v5) - 5;
+		disk_ce = (struct ondisk_cache_entry_v5 *)
+				((char *)*mmap + *entry_offset + len + 1);
+		ce = cache_entry_from_ondisk_v5(disk_ce, de, name, len, de->de_pathlen);
+		filecrc = *mmap + *entry_offset + len + 1 + sizeof(*disk_ce);
+		offset_to_offset = htonl(*foffsetblock);
+		foffsetblockcrc = crc32(0, (Bytef*)&offset_to_offset, 4);
+		crc_wrong = !check_crc32(foffsetblockcrc,
+			*mmap + *entry_offset, len + 1 + sizeof(*disk_ce),
+			ntoh_l(*filecrc));
+		if (crc_wrong) {
+			/* wait for 10 milliseconds */
+			usleep(10*1000);
+			munmap(*mmap, mmap_size);
+			*mmap = xmmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+		}
+		i++;
+		/*
+		 * Retry for 500 ms maximum, before giving up and saying the
+		 * checksum is wrong.
+		 */
+	} while (crc_wrong && i < 50);
+	if (crc_wrong)
+		goto unmap;
+	*entry_offset += len + 1 + sizeof(*disk_ce) + 4;
+	return ce;
+unmap:
+	munmap(*mmap, mmap_size);
+	die("file crc doesn't match for '%s'", ce->name);
+}
+
+static void ce_queue_push(struct cache_entry **head,
+			     struct cache_entry **tail,
+			     struct cache_entry *ce)
+{
+	if (!*head) {
+		*head = *tail = ce;
+		(*tail)->next = NULL;
+		return;
+	}
+
+	(*tail)->next = ce;
+	ce->next = NULL;
+	*tail = (*tail)->next;
+}
+
+static void conflict_entry_push(struct conflict_entry **head,
+				struct conflict_entry **tail,
+				struct conflict_entry *conflict_entry)
+{
+	if (!*head) {
+		*head = *tail = conflict_entry;
+		(*tail)->next = NULL;
+		return;
+	}
+
+	(*tail)->next = conflict_entry;
+	conflict_entry->next = NULL;
+	*tail = (*tail)->next;
+}
+
+static struct cache_entry *ce_queue_pop(struct cache_entry **head)
+{
+	struct cache_entry *ce;
+
+	ce = *head;
+	*head = (*head)->next;
+	return ce;
+}
+
+static void conflict_part_head_remove(struct conflict_part **head)
+{
+	struct conflict_part *to_free;
+
+	to_free = *head;
+	*head = (*head)->next;
+	free(to_free);
+}
+
+static void conflict_entry_head_remove(struct conflict_entry **head)
+{
+	struct conflict_entry *to_free;
+
+	to_free = *head;
+	*head = (*head)->next;
+	free(to_free);
+}
+
+struct conflict_entry *create_new_conflict(char *name, int len, int pathlen)
+{
+	struct conflict_entry *conflict_entry;
+
+	if (pathlen)
+		pathlen++;
+	conflict_entry = xmalloc(conflict_entry_size(len));
+	conflict_entry->entries = NULL;
+	conflict_entry->nfileconflicts = 0;
+	conflict_entry->namelen = len;
+	memcpy(conflict_entry->name, name, len);
+	conflict_entry->name[len] = '\0';
+	conflict_entry->pathlen = pathlen;
+	conflict_entry->next = NULL;
+
+	return conflict_entry;
+}
+
+void add_part_to_conflict_entry(struct directory_entry *de,
+					struct conflict_entry *entry,
+					struct conflict_part *conflict_part)
+{
+
+	struct conflict_part *conflict_search;
+
+	entry->nfileconflicts++;
+	de->conflict_size += sizeof(struct ondisk_conflict_part);
+	if (!entry->entries)
+		entry->entries = conflict_part;
+	else {
+		conflict_search = entry->entries;
+		while (conflict_search->next)
+			conflict_search = conflict_search->next;
+		conflict_search->next = conflict_part;
+	}
+}
+
+static struct conflict_entry *read_conflicts_v5(struct directory_entry *de,
+						void **mmap,
+						unsigned long mmap_size,
+						int fd)
+{
+	struct conflict_entry *head, *tail;
+	unsigned int croffset, i, j = 0;
+	char *full_name;
+
+	croffset = de->de_cr;
+	tail = NULL;
+	head = NULL;
+	for (i = 0; i < de->de_ncr; i++) {
+		struct conflict_entry *conflict_new;
+		unsigned int len, *nfileconflicts;
+		char *name;
+		void *crc_start;
+		int k, offset, crc_wrong;
+		uint32_t *filecrc;
+
+		do {
+			offset = croffset;
+			crc_start = *mmap + offset;
+			name = (char *)*mmap + offset;
+			len = strlen(name);
+			offset += len + 1;
+			nfileconflicts = *mmap + offset;
+			offset += 4;
+
+			full_name = xmalloc(sizeof(char) * (len + de->de_pathlen));
+			memcpy(full_name, de->pathname, de->de_pathlen);
+			memcpy(full_name + de->de_pathlen, name, len);
+			conflict_new = create_new_conflict(full_name,
+					len + de->de_pathlen, de->de_pathlen);
+			for (k = 0; k < ntoh_l(*nfileconflicts); k++) {
+				struct ondisk_conflict_part *ondisk;
+				struct conflict_part *cp;
+
+				ondisk = *mmap + offset;
+				cp = conflict_part_from_ondisk(ondisk);
+				cp->next = NULL;
+				add_part_to_conflict_entry(de, conflict_new, cp);
+				offset += sizeof(struct ondisk_conflict_part);
+			}
+			filecrc = *mmap + offset;
+			crc_wrong = !check_crc32(0, crc_start,
+				len + 1 + 4 + conflict_new->nfileconflicts
+				* sizeof(struct ondisk_conflict_part),
+				ntoh_l(*filecrc));
+			if (crc_wrong) {
+				/* wait for 10 milliseconds */
+				usleep(10*1000);
+				munmap(*mmap, mmap_size);
+				*mmap = xmmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+			}
+			free(full_name);
+			j++;
+		} while (crc_wrong && j < 50);
+		if (crc_wrong)
+			goto unmap;
+		croffset = offset + 4;
+		conflict_entry_push(&head, &tail, conflict_new);
+	}
+	return head;
+unmap:
+	munmap(*mmap, mmap_size);
+	die("wrong crc for conflict: %s", full_name);
+}
+
+static struct directory_entry *read_entries_v5(struct index_state *istate,
+					struct directory_entry *de,
+					unsigned long *entry_offset,
+					void **mmap,
+					unsigned long mmap_size,
+					int *nr,
+					unsigned int *foffsetblock,
+					int fd)
+{
+	struct cache_entry *head = NULL, *tail = NULL;
+	struct conflict_entry *conflict_queue;
+	struct cache_entry *ce;
+	int i;
+
+	conflict_queue = read_conflicts_v5(de, mmap, mmap_size, fd);
+	for (i = 0; i < de->de_nfiles; i++) {
+		ce = read_entry_v5(de,
+				entry_offset,
+				mmap,
+				mmap_size,
+				foffsetblock,
+				fd);
+		ce_queue_push(&head, &tail, ce);
+		*foffsetblock += 4;
+
+		/* Add the conflicted entries at the end of the index file
+		 * to the in memory format
+		 */
+		if (conflict_queue &&
+		    (conflict_queue->entries->flags & CONFLICT_CONFLICTED) != 0 &&
+		    !cache_name_compare(conflict_queue->name, conflict_queue->namelen,
+					ce->name, ce_namelen(ce))) {
+			struct conflict_part *cp;
+			cp = conflict_queue->entries;
+			cp = cp->next;
+			while (cp) {
+				ce = convert_conflict_part(cp,
+						conflict_queue->name,
+						conflict_queue->namelen);
+				ce_queue_push(&head, &tail, ce);
+				conflict_part_head_remove(&cp);
+			}
+			conflict_entry_head_remove(&conflict_queue);
+		}
+	}
+
+	de = de->next;
+
+	while (head) {
+		if (de != NULL
+		    && strcmp(head->name, de->pathname) > 0) {
+			de = read_entries_v5(istate,
+					de,
+					entry_offset,
+					mmap,
+					mmap_size,
+					nr,
+					foffsetblock,
+					fd);
+		} else {
+			ce = ce_queue_pop(&head);
+			set_index_entry(istate, *nr, ce);
+			(*nr)++;
+		}
+	}
+
+	return de;
+}
+
 static void read_index_v2(struct index_state *istate, void *mmap, int mmap_size)
 {
 	int i;
@@ -1516,6 +2055,39 @@ unmap:
 	die("index file corrupt");
 }
 
+static void read_index_v5(struct index_state *istate, void *mmap, int mmap_size, int fd)
+{
+	unsigned long entry_offset;
+	unsigned int dir_offset, dir_table_offset;
+	struct cache_version_header *hdr;
+	struct cache_header_v5 *hdr_v5;
+	struct directory_entry *root_directory, *de;
+	int nr;
+	unsigned int foffsetblock;
+
+	hdr = mmap;
+	hdr_v5 = mmap + sizeof(*hdr);
+	istate->version = ntohl(hdr->hdr_version);
+	istate->cache_nr = ntohl(hdr_v5->hdr_nfile);
+	istate->cache_alloc = alloc_nr(istate->cache_nr);
+	istate->cache = xcalloc(istate->cache_alloc, sizeof(struct cache_entry *));
+	istate->initialized = 1;
+
+	/* Skip size of the header + crc sum + size of offsets */
+	dir_offset = sizeof(*hdr) + sizeof(*hdr_v5) + 4 + (ntohl(hdr_v5->hdr_ndir) + 1) * 4;
+	dir_table_offset = sizeof(*hdr) + sizeof(*hdr_v5) + 4;
+	root_directory = read_directories_v5(&dir_offset, &dir_table_offset, mmap, mmap_size);
+
+	entry_offset = ntohl(hdr_v5->hdr_fblockoffset);
+
+	nr = 0;
+	foffsetblock = dir_offset;
+	de = root_directory;
+	while (de)
+		de = read_entries_v5(istate, de, &entry_offset,
+				&mmap, mmap_size, &nr, &foffsetblock, fd);
+}
+
 /* remember to discard_cache() before reading a different cache! */
 int read_index_from(struct index_state *istate, const char *path)
 {
@@ -1555,10 +2127,18 @@ int read_index_from(struct index_state *istate, const char *path)
 	if (verify_hdr_version(hdr, mmap_size) < 0)
 		goto unmap;
 
-	if (verify_hdr_v2(hdr, mmap_size) < 0)
-		goto unmap;
+	if (htonl(hdr->hdr_version) != 5) {
+		if (verify_hdr_v2(hdr, mmap_size) < 0)
+			goto unmap;
 
-	read_index_v2(istate, mmap, mmap_size);
+		read_index_v2(istate, mmap, mmap_size);
+	} else {
+		if (verify_hdr_v5(hdr) < 0)
+			goto unmap;
+
+		read_index_v5(istate, mmap, mmap_size, fd);
+	}
+	close(fd);
 	istate->timestamp.sec = st.st_mtime;
 	istate->timestamp.nsec = ST_MTIME_NSEC(st);
 
-- 
1.7.10.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH/RFC v2 10/16] Read resolve-undo data
  2012-08-05 21:48 [PATCH/RFC v2 0/16] Introduce index file format version 5 Thomas Gummerer
                   ` (8 preceding siblings ...)
  2012-08-05 21:49 ` [PATCH/RFC v2 09/16] Read index-v5 Thomas Gummerer
@ 2012-08-05 21:49 ` Thomas Gummerer
  2012-08-06  1:51   ` Junio C Hamano
  2012-08-05 21:49 ` [PATCH/RFC v2 11/16] Read cache-tree in index-v5 Thomas Gummerer
                   ` (6 subsequent siblings)
  16 siblings, 1 reply; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-05 21:49 UTC (permalink / raw)
  To: git; +Cc: trast, mhagger, gitster, pcouds, robin.rosenberg, Thomas Gummerer

Make git read the resolve-undo data from the index.

Since the resolve-undo data is joined with the conflicts in
the ondisk format of the index file version 5, conflicts and
resolved data is read at the same time, and the resolve-undo
data is then converted to the in-memory format.

Helped-by: Thomas Rast <trast@student.ethz.ch>
Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
 read-cache.c   |    1 +
 resolve-undo.c |   36 ++++++++++++++++++++++++++++++++++++
 resolve-undo.h |    2 ++
 3 files changed, 39 insertions(+)

diff --git a/read-cache.c b/read-cache.c
index 70334f9..03370f9 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1942,6 +1942,7 @@ static struct directory_entry *read_entries_v5(struct index_state *istate,
 	int i;
 
 	conflict_queue = read_conflicts_v5(de, mmap, mmap_size, fd);
+	resolve_undo_convert_v5(istate, conflict_queue);
 	for (i = 0; i < de->de_nfiles; i++) {
 		ce = read_entry_v5(de,
 				entry_offset,
diff --git a/resolve-undo.c b/resolve-undo.c
index 72b4612..f96c6ba 100644
--- a/resolve-undo.c
+++ b/resolve-undo.c
@@ -170,3 +170,39 @@ void unmerge_index(struct index_state *istate, const char **pathspec)
 		i = unmerge_index_entry_at(istate, i);
 	}
 }
+
+void resolve_undo_convert_v5(struct index_state *istate,
+					struct conflict_entry *ce)
+{
+	int i;
+
+	while (ce) {
+		struct string_list_item *lost;
+		struct resolve_undo_info *ui;
+		struct conflict_part *cp;
+
+		if (ce->entries && (ce->entries->flags & CONFLICT_CONFLICTED) != 0) {
+			ce = ce->next;
+			continue;
+		}
+		if (!istate->resolve_undo) {
+			istate->resolve_undo = xcalloc(1, sizeof(struct string_list));
+			istate->resolve_undo->strdup_strings = 1;
+		}
+
+		lost = string_list_insert(istate->resolve_undo, ce->name);
+		if (!lost->util)
+			lost->util = xcalloc(1, sizeof(*ui));
+		ui = lost->util;
+
+		cp = ce->entries;
+		for (i = 0; i < 3; i++)
+			ui->mode[i] = 0;
+		while (cp) {
+			ui->mode[conflict_stage(cp) - 1] = cp->entry_mode;
+			hashcpy(ui->sha1[conflict_stage(cp) - 1], cp->sha1);
+			cp = cp->next;
+		}
+		ce = ce->next;
+	}
+}
diff --git a/resolve-undo.h b/resolve-undo.h
index 8458769..ab660a6 100644
--- a/resolve-undo.h
+++ b/resolve-undo.h
@@ -13,4 +13,6 @@ extern void resolve_undo_clear_index(struct index_state *);
 extern int unmerge_index_entry_at(struct index_state *, int);
 extern void unmerge_index(struct index_state *, const char **);
 
+extern void resolve_undo_convert_v5(struct index_state *, struct conflict_entry *);
+
 #endif
-- 
1.7.10.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH/RFC v2 11/16] Read cache-tree in index-v5
  2012-08-05 21:48 [PATCH/RFC v2 0/16] Introduce index file format version 5 Thomas Gummerer
                   ` (9 preceding siblings ...)
  2012-08-05 21:49 ` [PATCH/RFC v2 10/16] Read resolve-undo data Thomas Gummerer
@ 2012-08-05 21:49 ` Thomas Gummerer
  2012-08-05 21:49 ` [PATCH/RFC v2 12/16] Write index-v5 Thomas Gummerer
                   ` (5 subsequent siblings)
  16 siblings, 0 replies; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-05 21:49 UTC (permalink / raw)
  To: git; +Cc: trast, mhagger, gitster, pcouds, robin.rosenberg, Thomas Gummerer

Since the cache-tree data is saved as part of the directory data,
we already read it at the beginning of the index. The cache-tree
is only converted from this directory data.

The cache-tree data is arranged in a tree, with the children sorted by
pathlen at each node, while the ondisk format is sorted lexically.
So we have to rebuild this format from the on-disk directory list.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
 cache-tree.c |   93 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 cache-tree.h |   10 +++++++
 read-cache.c |    1 +
 3 files changed, 104 insertions(+)

diff --git a/cache-tree.c b/cache-tree.c
index 28ed657..440cd04 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -519,6 +519,99 @@ struct cache_tree *cache_tree_read(const char *buffer, unsigned long size)
 	return read_one(&buffer, &size);
 }
 
+static struct cache_tree *convert_one(struct directory_queue *queue, int dirnr)
+{
+	int i, subtree_nr;
+	struct cache_tree *it;
+	struct directory_queue *down;
+
+	it = cache_tree();
+	it->entry_count = queue[dirnr].de->de_nentries;
+	subtree_nr = queue[dirnr].de->de_nsubtrees;
+	if (0 <= it->entry_count)
+		hashcpy(it->sha1, queue[dirnr].de->sha1);
+
+	/*
+	* Just a heuristic -- we do not add directories that often but
+	* we do not want to have to extend it immediately when we do,
+	* hence +2.
+	*/
+	it->subtree_alloc = subtree_nr + 2;
+	it->down = xcalloc(it->subtree_alloc, sizeof(struct cache_tree_sub *));
+	down = queue[dirnr].down;
+	for (i = 0; i < subtree_nr; i++) {
+		struct cache_tree *sub;
+		struct cache_tree_sub *subtree;
+		char *buf, *name;
+
+		name = "";
+		buf = strtok(down[i].de->pathname, "/");
+		while (buf) {
+			name = buf;
+			buf = strtok(NULL, "/");
+		}
+		sub = convert_one(down, i);
+		if(!sub)
+			goto free_return;
+		subtree = cache_tree_sub(it, name);
+		subtree->cache_tree = sub;
+	}
+	if (subtree_nr != it->subtree_nr)
+		die("cache-tree: internal error");
+	return it;
+ free_return:
+	cache_tree_free(&it);
+	return NULL;
+}
+
+static int compare_cache_tree_elements(const void *a, const void *b)
+{
+	const struct directory_entry *de1, *de2;
+
+	de1 = ((const struct directory_queue *)a)->de;
+	de2 = ((const struct directory_queue *)b)->de;
+	return subtree_name_cmp(de1->pathname, de1->de_pathlen,
+				de2->pathname, de2->de_pathlen);
+}
+
+static struct directory_entry *sort_directories(struct directory_entry *de,
+						struct directory_queue *queue)
+{
+	int i, nsubtrees;
+
+	nsubtrees = de->de_nsubtrees;
+	for (i = 0; i < nsubtrees; i++) {
+		struct directory_entry *new_de;
+		de = de->next;
+		new_de = xmalloc(directory_entry_size(de->de_pathlen));
+		memcpy(new_de, de, directory_entry_size(de->de_pathlen));
+		queue[i].de = new_de;
+		if (de->de_nsubtrees) {
+			queue[i].down = xcalloc(de->de_nsubtrees,
+					sizeof(struct directory_queue));
+			de = sort_directories(de,
+					queue[i].down);
+		}
+	}
+	qsort(queue, nsubtrees, sizeof(struct directory_queue),
+			compare_cache_tree_elements);
+	return de;
+}
+
+struct cache_tree *cache_tree_convert_v5(struct directory_entry *de)
+{
+	struct directory_queue *queue;
+
+	if (!de->de_nentries)
+		return NULL;
+	queue = xcalloc(1, sizeof(struct directory_queue));
+	queue[0].de = de;
+	queue[0].down = xcalloc(de->de_nsubtrees, sizeof(struct directory_queue));
+
+	sort_directories(de, queue[0].down);
+	return convert_one(queue, 0);
+}
+
 static struct cache_tree *cache_tree_find(struct cache_tree *it, const char *path)
 {
 	if (!it)
diff --git a/cache-tree.h b/cache-tree.h
index d8cb2e9..7f29d26 100644
--- a/cache-tree.h
+++ b/cache-tree.h
@@ -20,6 +20,11 @@ struct cache_tree {
 	struct cache_tree_sub **down;
 };
 
+struct directory_queue {
+	struct directory_queue *down;
+	struct directory_entry *de;
+};
+
 struct cache_tree *cache_tree(void);
 void cache_tree_free(struct cache_tree **);
 void cache_tree_invalidate_path(struct cache_tree *, const char *);
@@ -27,6 +32,11 @@ struct cache_tree_sub *cache_tree_sub(struct cache_tree *, const char *);
 
 void cache_tree_write(struct strbuf *, struct cache_tree *root);
 struct cache_tree *cache_tree_read(const char *buffer, unsigned long size);
+/*
+ * This function modifys the directory argument that is given to it.
+ * Don't use it if the directory entries are still needed after.
+ */
+struct cache_tree *cache_tree_convert_v5(struct directory_entry *de);
 
 int cache_tree_fully_valid(struct cache_tree *);
 int cache_tree_update(struct cache_tree *, struct cache_entry **, int, int);
diff --git a/read-cache.c b/read-cache.c
index 03370f9..21ae804 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2087,6 +2087,7 @@ static void read_index_v5(struct index_state *istate, void *mmap, int mmap_size,
 	while (de)
 		de = read_entries_v5(istate, de, &entry_offset,
 				&mmap, mmap_size, &nr, &foffsetblock, fd);
+	istate->cache_tree = cache_tree_convert_v5(root_directory);
 }
 
 /* remember to discard_cache() before reading a different cache! */
-- 
1.7.10.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH/RFC v2 12/16] Write index-v5
  2012-08-05 21:48 [PATCH/RFC v2 0/16] Introduce index file format version 5 Thomas Gummerer
                   ` (10 preceding siblings ...)
  2012-08-05 21:49 ` [PATCH/RFC v2 11/16] Read cache-tree in index-v5 Thomas Gummerer
@ 2012-08-05 21:49 ` Thomas Gummerer
  2012-08-05 21:49 ` [PATCH/RFC v2 13/16] Write index-v5 cache-tree data Thomas Gummerer
                   ` (4 subsequent siblings)
  16 siblings, 0 replies; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-05 21:49 UTC (permalink / raw)
  To: git; +Cc: trast, mhagger, gitster, pcouds, robin.rosenberg, Thomas Gummerer

Write the index version 5 file format to disk. This version doesn't
write the cache-tree data and resolve-undo data to the file.

The main work is done when filtering out the directories from the
current in-memory format, where in the same turn also the conflicts
and the file data is calculated.

Helped-by: Thomas Rast <trast@student.ethz.ch>
Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
 cache.h      |   10 +-
 read-cache.c |  587 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 595 insertions(+), 2 deletions(-)

diff --git a/cache.h b/cache.h
index 98adcd9..f953db4 100644
--- a/cache.h
+++ b/cache.h
@@ -98,7 +98,7 @@ unsigned long git_deflate_bound(git_zstream *, unsigned long);
 #define CACHE_SIGNATURE 0x44495243	/* "DIRC" */
 
 #define INDEX_FORMAT_LB 2
-#define INDEX_FORMAT_UB 4
+#define INDEX_FORMAT_UB 5
 
 /*
  * The "cache_time" is just the low 32 bits of the
@@ -509,6 +509,7 @@ extern int verify_path(const char *path);
 extern struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int igncase);
 extern int index_name_stage_pos(const struct index_state *, const char *name, int namelen, int stage);
 extern int index_name_pos(const struct index_state *, const char *name, int namelen);
+extern struct directory_entry *init_directory_entry(char *pathname, int len);
 #define ADD_CACHE_OK_TO_ADD 1		/* Ok to add */
 #define ADD_CACHE_OK_TO_REPLACE 2	/* Ok to replace file/directory */
 #define ADD_CACHE_SKIP_DFCHECK 4	/* Ok to skip DF conflict checks */
@@ -1243,6 +1244,13 @@ static inline ssize_t write_str_in_full(int fd, const char *str)
 	return write_in_full(fd, str, strlen(str));
 }
 
+/* index-v5 helper functions */
+extern char *super_directory(const char *filename);
+extern void insert_directory_entry(struct directory_entry *, struct hash_table *, int *, unsigned int *, uint32_t);
+extern void add_conflict_to_directory_entry(struct directory_entry *, struct conflict_entry *);
+extern void add_part_to_conflict_entry(struct directory_entry *, struct conflict_entry *, struct conflict_part *);
+extern struct conflict_entry *create_new_conflict(char *, int, int);
+
 /* pager.c */
 extern void setup_pager(void);
 extern const char *pager_program;
diff --git a/read-cache.c b/read-cache.c
index 21ae804..2c47a97 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2204,6 +2204,17 @@ static int ce_write_flush(git_SHA_CTX *context, int fd)
 	return 0;
 }
 
+static int ce_write_flush_v5(int fd)
+{
+	unsigned int buffered = write_buffer_len;
+	if (buffered) {
+		if (write_in_full(fd, write_buffer, buffered) != buffered)
+			return -1;
+		write_buffer_len = 0;
+	}
+	return 0;
+}
+
 static int ce_write_v2(git_SHA_CTX *context, int fd, void *data, unsigned int len)
 {
 	while (len) {
@@ -2226,6 +2237,30 @@ static int ce_write_v2(git_SHA_CTX *context, int fd, void *data, unsigned int le
 	return 0;
 }
 
+static int ce_write_v5(uint32_t *crc, int fd, void *data, unsigned int len)
+{
+	if (crc)
+		*crc = crc32(*crc, (Bytef*)data, len);
+	while (len) {
+		unsigned int buffered = write_buffer_len;
+		unsigned int partial = WRITE_BUFFER_SIZE - buffered;
+		if (partial > len)
+			partial = len;
+		memcpy(write_buffer + buffered, data, partial);
+		buffered += partial;
+		if (buffered == WRITE_BUFFER_SIZE) {
+			write_buffer_len = buffered;
+			if (ce_write_flush_v5(fd))
+				return -1;
+			buffered = 0;
+		}
+		write_buffer_len = buffered;
+		len -= partial;
+		data = (char *) data + partial;
+	}
+	return 0;
+}
+
 static int write_index_ext_header_v2(git_SHA_CTX *context, int fd,
 				  unsigned int ext, unsigned int sz)
 {
@@ -2257,6 +2292,19 @@ static int ce_flush(git_SHA_CTX *context, int fd)
 	return (write_in_full(fd, write_buffer, left) != left) ? -1 : 0;
 }
 
+static int ce_flush_v5(int fd)
+{
+	unsigned int left = write_buffer_len;
+
+	if (left)
+		write_buffer_len = 0;
+
+	if (write_in_full(fd, write_buffer, left) != left)
+		return -1;
+
+	return 0;
+}
+
 static void ce_smudge_racily_clean_entry_v2(struct cache_entry *ce)
 {
 	/*
@@ -2306,6 +2354,22 @@ static void ce_smudge_racily_clean_entry_v2(struct cache_entry *ce)
 	}
 }
 
+static void ce_smudge_racily_clean_entry_v5(struct cache_entry *ce)
+{
+	/*
+	 * This method shall only be called if the timestamp of ce
+	 * is racy (check with is_racy_timestamp). If the timestamp
+	 * is racy, the writer will just set the time to 0.
+	 *
+	 * The reader (ce_match_stat_basic_v5) will then take care
+	 * of checking if the entry is really changed or not, by
+	 * taking into account the stat_crc and if that hasn't changed
+	 * checking the sha1.
+	 */
+	ce->ce_mtime.sec = 0;
+	ce->ce_mtime.nsec = 0;
+}
+
 /* Copy miscellaneous fields but not the name */
 static char *copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk,
 				       struct cache_entry *ce)
@@ -2489,12 +2553,533 @@ static int write_index_v2(struct index_state *istate, int newfd)
 	return 0;
 }
 
+char *super_directory(const char *filename)
+{
+	char *slash;
+
+	slash = strrchr(filename, '/');
+	if (slash)
+		return xmemdupz(filename, slash-filename);
+	return NULL;
+}
+
+struct directory_entry *init_directory_entry(char *pathname, int len)
+{
+	struct directory_entry *de = xmalloc(directory_entry_size(len));
+
+	memcpy(de->pathname, pathname, len);
+	de->pathname[len] = '\0';
+	de->de_flags      = 0;
+	de->de_foffset    = 0;
+	de->de_cr         = 0;
+	de->de_ncr        = 0;
+	de->de_nsubtrees  = 0;
+	de->de_nfiles     = 0;
+	de->de_nentries   = 0;
+	memset(de->sha1, 0, 20);
+	de->de_pathlen    = len;
+	de->next          = NULL;
+	de->next_hash     = NULL;
+	de->ce            = NULL;
+	de->ce_last       = NULL;
+	de->conflict      = NULL;
+	de->conflict_last = NULL;
+	de->conflict_size = 0;
+	return de;
+}
+
+static void ondisk_from_directory_entry(struct directory_entry *de,
+					struct ondisk_directory_entry *ondisk)
+{
+	ondisk->foffset   = htonl(de->de_foffset);
+	ondisk->cr        = htonl(de->de_cr);
+	ondisk->ncr       = htonl(de->de_ncr);
+	ondisk->nsubtrees = htonl(de->de_nsubtrees);
+	ondisk->nfiles    = htonl(de->de_nfiles);
+	ondisk->nentries  = htonl(de->de_nentries);
+	hashcpy(ondisk->sha1, de->sha1);
+	ondisk->flags     = htons(de->de_flags);
+}
+
+static struct conflict_part *conflict_part_from_inmemory(struct cache_entry *ce)
+{
+	struct conflict_part *conflict;
+	short flags;
+
+	conflict = xmalloc(sizeof(struct conflict_part));
+	flags                = CONFLICT_CONFLICTED;
+	flags               |= ce_stage(ce) << CONFLICT_STAGESHIFT;
+	conflict->flags      = flags;
+	conflict->entry_mode = ce->ce_mode;
+	conflict->next       = NULL;
+	hashcpy(conflict->sha1, ce->sha1);
+	return conflict;
+}
+
+static void conflict_to_ondisk(struct conflict_part *cp,
+				struct ondisk_conflict_part *ondisk)
+{
+	ondisk->flags      = htons(cp->flags);
+	ondisk->entry_mode = htons(cp->entry_mode);
+	hashcpy(ondisk->sha1, cp->sha1);
+}
+
+void add_conflict_to_directory_entry(struct directory_entry *de,
+					struct conflict_entry *conflict_entry)
+{
+	de->de_ncr++;
+	de->conflict_size += conflict_entry->namelen + 1 + 8 - conflict_entry->pathlen;
+	conflict_entry_push(&de->conflict, &de->conflict_last, conflict_entry);
+}
+
+void insert_directory_entry(struct directory_entry *de,
+			struct hash_table *table,
+			int *total_dir_len,
+			unsigned int *ndir,
+			uint32_t crc)
+{
+	struct directory_entry *insert;
+
+	insert = (struct directory_entry *)insert_hash(crc, de, table);
+	if (insert) {
+		de->next_hash = insert->next_hash;
+		insert->next_hash = de;
+	}
+	(*ndir)++;
+	if (de->de_pathlen == 0)
+		(*total_dir_len)++;
+	else
+		*total_dir_len += de->de_pathlen + 2;
+}
+
+static struct conflict_entry *create_conflict_entry_from_ce(struct cache_entry *ce,
+								int pathlen)
+{
+	return create_new_conflict(ce->name, ce_namelen(ce), pathlen);
+}
+
+static struct directory_entry *compile_directory_data(struct index_state *istate,
+						int nfile,
+						unsigned int *ndir,
+						int *non_conflicted,
+						int *total_dir_len,
+						int *total_file_len)
+{
+	int i, dir_len = -1;
+	char *dir;
+	struct directory_entry *de, *current, *search, *found, *new, *previous_entry;
+	struct cache_entry **cache = istate->cache;
+	struct conflict_entry *conflict_entry;
+	struct hash_table table;
+	uint32_t crc;
+
+	init_hash(&table);
+	de = init_directory_entry("", 0);
+	current = de;
+	*ndir = 1;
+	*total_dir_len = 1;
+	crc = crc32(0, (Bytef*)de->pathname, de->de_pathlen);
+	insert_hash(crc, de, &table);
+	conflict_entry = NULL;
+	for (i = 0; i < nfile; i++) {
+		int new_entry;
+		if (cache[i]->ce_flags & CE_REMOVE)
+			continue;
+
+		new_entry = !ce_stage(cache[i]) || !conflict_entry
+		    || cache_name_compare(conflict_entry->name, conflict_entry->namelen,
+					cache[i]->name, ce_namelen(cache[i]));
+		if (new_entry)
+			(*non_conflicted)++;
+		if (dir_len < 0 || strncmp(cache[i]->name, dir, dir_len)
+		    || cache[i]->name[dir_len] != '/'
+		    || strchr(cache[i]->name + dir_len + 1, '/')) {
+			dir = super_directory(cache[i]->name);
+			if (!dir)
+				dir_len = 0;
+			else
+				dir_len = strlen(dir);
+			crc = crc32(0, (Bytef*)dir, dir_len);
+			found = lookup_hash(crc, &table);
+			search = found;
+			while (search && dir_len != 0 && strcmp(dir, search->pathname) != 0)
+				search = search->next_hash;
+		}
+		previous_entry = current;
+		if (!search || !found) {
+			new = init_directory_entry(dir, dir_len);
+			current->next = new;
+			current = current->next;
+			insert_directory_entry(new, &table, total_dir_len, ndir, crc);
+			search = current;
+		}
+		if (new_entry) {
+			search->de_nfiles++;
+			*total_file_len += ce_namelen(cache[i]) + 1;
+			if (search->de_pathlen)
+				*total_file_len -= search->de_pathlen + 1;
+			ce_queue_push(&(search->ce), &(search->ce_last), cache[i]);
+		}
+		if (ce_stage(cache[i]) > 0) {
+			struct conflict_part *conflict_part;
+			if (new_entry) {
+				conflict_entry = create_conflict_entry_from_ce(cache[i], search->de_pathlen);
+				add_conflict_to_directory_entry(search, conflict_entry);
+			}
+			conflict_part = conflict_part_from_inmemory(cache[i]);
+			add_part_to_conflict_entry(search, conflict_entry, conflict_part);
+		}
+		if (dir && !found) {
+			struct directory_entry *no_subtrees;
+
+			no_subtrees = current;
+			dir = super_directory(dir);
+			if (dir)
+				dir_len = strlen(dir);
+			else
+				dir_len = 0;
+			crc = crc32(0, (Bytef*)dir, dir_len);
+			found = lookup_hash(crc, &table);
+			while (!found) {
+				new = init_directory_entry(dir, dir_len);
+				new->de_nsubtrees = 1;
+				new->next = no_subtrees;
+				no_subtrees = new;
+				insert_directory_entry(new, &table, total_dir_len, ndir, crc);
+				dir = super_directory(dir);
+				if (!dir)
+					dir_len = 0;
+				else
+					dir_len = strlen(dir);
+				crc = crc32(0, (Bytef*)dir, dir_len);
+				found = lookup_hash(crc, &table);
+			}
+			search = found;
+			while (search->next_hash && strcmp(dir, search->pathname) != 0)
+				search = search->next_hash;
+			if (search)
+				found = search;
+			found->de_nsubtrees++;
+			previous_entry->next = no_subtrees;
+		}
+	}
+	return de;
+}
+
+static void ondisk_from_cache_entry(struct cache_entry *ce,
+				    struct ondisk_cache_entry_v5 *ondisk)
+{
+	unsigned int flags;
+
+	flags  = ce->ce_flags & CE_STAGEMASK;
+	flags |= ce->ce_flags & CE_VALID;
+	if (ce->ce_flags & CE_INTENT_TO_ADD)
+		flags |= CE_INTENT_TO_ADD_V5;
+	if (ce->ce_flags & CE_SKIP_WORKTREE)
+		flags |= CE_SKIP_WORKTREE_V5;
+	ondisk->flags      = htons(flags);
+	ondisk->mode       = htons(ce->ce_mode);
+	ondisk->mtime.sec  = htonl(ce->ce_mtime.sec);
+#ifdef USE_NSEC
+	ondisk->mtime.nsec = htonl(ce->ce_mtime.nsec);
+#else
+	ondisk->mtime.nsec = 0;
+#endif
+	if (!ce->ce_stat_crc)
+		ce->ce_stat_crc = calculate_stat_crc(ce);
+	ondisk->stat_crc   = htonl(ce->ce_stat_crc);
+	hashcpy(ondisk->sha1, ce->sha1);
+}
+
+static int write_directories_v5(struct directory_entry *de, int fd, int conflict_offset)
+{
+	struct directory_entry *current;
+	struct ondisk_directory_entry ondisk;
+	int current_offset, offset_write, ondisk_size, foffset;
+	uint32_t crc;
+
+	/*
+	 * This is needed because the compiler aligns structs to sizes multipe
+	 * of 4
+	 */
+	ondisk_size = sizeof(ondisk.flags)
+		+ sizeof(ondisk.foffset)
+		+ sizeof(ondisk.cr)
+		+ sizeof(ondisk.ncr)
+		+ sizeof(ondisk.nsubtrees)
+		+ sizeof(ondisk.nfiles)
+		+ sizeof(ondisk.nentries)
+		+ sizeof(ondisk.sha1);
+	current = de;
+	current_offset = 0;
+	foffset = 0;
+	while (current) {
+		int pathlen;
+
+		offset_write = htonl(current_offset);
+		if (ce_write_v5(NULL, fd, &offset_write, 4) < 0)
+			return -1;
+		if (current->de_pathlen == 0)
+			pathlen = 0;
+		else
+			pathlen = current->de_pathlen + 1;
+		current_offset += pathlen + 1 + ondisk_size + 4;
+		current = current->next;
+	}
+	/*
+	 * Write one more offset, which points to the end of the entries,
+	 * because we use it for calculating the dir length, instead of
+	 * using strlen.
+	 */
+	offset_write = htonl(current_offset);
+	if (ce_write_v5(NULL, fd, &offset_write, 4) < 0)
+		return -1;
+	current = de;
+	while (current) {
+		crc = 0;
+		if (current->de_pathlen == 0) {
+			if (ce_write_v5(&crc, fd, current->pathname, 1) < 0)
+				return -1;
+		} else {
+			char *path;
+			path = xmalloc(sizeof(char) * (current->de_pathlen + 2));
+			memcpy(path, current->pathname, current->de_pathlen);
+			memcpy(path + current->de_pathlen, "/\0", 2);
+			if (ce_write_v5(&crc, fd, path, current->de_pathlen + 2) < 0)
+				return -1;
+		}
+		current->de_foffset = foffset;
+		current->de_cr = conflict_offset;
+		ondisk_from_directory_entry(current, &ondisk);
+		if (ce_write_v5(&crc, fd, &ondisk, ondisk_size) < 0)
+			return -1;
+		crc = htonl(crc);
+		if (ce_write_v5(NULL, fd, &crc, 4) < 0)
+			return -1;
+		conflict_offset += current->conflict_size;
+		foffset += current->de_nfiles * 4;
+		current = current->next;
+	}
+	return 0;
+}
+
+static int write_entries_v5(struct index_state *istate,
+			    struct directory_entry *de,
+			    int entries,
+			    int fd,
+			    int offset_to_offset)
+{
+	int offset, offset_write, ondisk_size;
+	struct directory_entry *current;
+
+	offset = 0;
+	ondisk_size = sizeof(struct ondisk_cache_entry_v5);
+	current = de;
+	while (current) {
+		int pathlen;
+		struct cache_entry *ce = current->ce;
+
+		if (current->de_pathlen == 0)
+			pathlen = 0;
+		else
+			pathlen = current->de_pathlen + 1;
+		while (ce) {
+			if (ce->ce_flags & CE_REMOVE)
+				continue;
+			if (!ce_uptodate(ce) && is_racy_timestamp(istate, ce))
+				ce_smudge_racily_clean_entry_v5(ce);
+
+			offset_write = htonl(offset);
+			if (ce_write_v5(NULL, fd, &offset_write, 4) < 0)
+				return -1;
+			offset += ce_namelen(ce) - pathlen + 1 + ondisk_size + 4;
+			ce = ce->next;
+		}
+		current = current->next;
+	}
+	/*
+	 * Write one more offset, which points to the end of the entries,
+	 * because we use it for calculating the file length, instead of
+	 * using strlen.
+	 */
+	offset_write = htonl(offset);
+	if (ce_write_v5(NULL, fd, &offset_write, 4) < 0)
+		return -1;
+
+	offset = offset_to_offset;
+	current = de;
+	while (current) {
+		int pathlen;
+		struct cache_entry *ce = current->ce;
+
+		if (current->de_pathlen == 0)
+			pathlen = 0;
+		else
+			pathlen = current->de_pathlen + 1;
+		while (ce) {
+			struct ondisk_cache_entry_v5 ondisk;
+			uint32_t crc, calc_crc;
+
+			if (ce->ce_flags & CE_REMOVE)
+				continue;
+			calc_crc = htonl(offset);
+			crc = crc32(0, (Bytef*)&calc_crc, 4);
+			if (ce_write_v5(&crc, fd, ce->name + pathlen,
+					ce_namelen(ce) - pathlen + 1) < 0)
+				return -1;
+			ondisk_from_cache_entry(ce, &ondisk);
+			if (ce_write_v5(&crc, fd, &ondisk, ondisk_size) < 0)
+				return -1;
+			crc = htonl(crc);
+			if (ce_write_v5(NULL, fd, &crc, 4) < 0)
+				return -1;
+			offset += 4;
+			ce = ce->next;
+		}
+		current = current->next;
+	}
+	return 0;
+}
+
+static int write_conflict_v5(struct conflict_entry *conflict, int fd)
+{
+	struct conflict_entry *current;
+	struct conflict_part *current_part;
+	uint32_t crc;
+
+	current = conflict;
+	while (current) {
+		unsigned int to_write;
+
+		crc = 0;
+		if (ce_write_v5(&crc, fd,
+		     (Bytef*)(current->name + current->pathlen),
+		     current->namelen - current->pathlen) < 0)
+			return -1;
+		if (ce_write_v5(&crc, fd, (Bytef*)"\0", 1) < 0)
+			return -1;
+		to_write = htonl(current->nfileconflicts);
+		if (ce_write_v5(&crc, fd, (Bytef*)&to_write, 4) < 0)
+			return -1;
+		current_part = current->entries;
+		while (current_part) {
+			struct ondisk_conflict_part ondisk;
+
+			conflict_to_ondisk(current_part, &ondisk);
+			if (ce_write_v5(&crc, fd, (Bytef*)&ondisk, sizeof(struct ondisk_conflict_part)) < 0)
+				return 0;
+			current_part = current_part->next;
+		}
+		to_write = htonl(crc);
+		if (ce_write_v5(NULL, fd, (Bytef*)&to_write, 4) < 0)
+			return -1;
+		current = current->next;
+	}
+	return 0;
+}
+
+static int write_conflicts_v5(struct index_state *istate,
+			      struct directory_entry *de,
+			      int fd)
+{
+	struct directory_entry *current;
+
+	current = de;
+	while (current) {
+		if (current->de_ncr != 0) {
+			if (write_conflict_v5(current->conflict, fd) < 0)
+				return -1;
+		}
+		current = current->next;
+	}
+	return 0;
+}
+
+static int write_index_v5(struct index_state *istate, int newfd)
+{
+	struct cache_version_header hdr;
+	struct cache_header_v5 hdr_v5;
+	struct cache_entry **cache = istate->cache;
+	struct directory_entry *de;
+	struct ondisk_directory_entry *ondisk;
+	int entries = istate->cache_nr;
+	int i, removed, non_conflicted, total_dir_len, ondisk_directory_size;
+	int total_file_len, conflict_offset, offset_to_offset;
+	unsigned int ndir;
+	uint32_t crc;
+
+	for (i = removed = 0; i < entries; i++) {
+		if (cache[i]->ce_flags & CE_REMOVE)
+			removed++;
+	}
+	hdr.hdr_signature = htonl(CACHE_SIGNATURE);
+	hdr.hdr_version = htonl(istate->version);
+	hdr_v5.hdr_nfile = htonl(entries - removed);
+	hdr_v5.hdr_nextension = htonl(0); /* Currently no extensions are supported */
+
+	non_conflicted = 0;
+	total_dir_len = 0;
+	total_file_len = 0;
+	de = compile_directory_data(istate, entries, &ndir, &non_conflicted,
+			&total_dir_len, &total_file_len);
+	hdr_v5.hdr_ndir = htonl(ndir);
+
+	/*
+	 * This is needed because the compiler aligns structs to sizes multipe
+	 * of 4
+	 */
+	ondisk_directory_size = sizeof(ondisk->flags)
+		+ sizeof(ondisk->foffset)
+		+ sizeof(ondisk->cr)
+		+ sizeof(ondisk->ncr)
+		+ sizeof(ondisk->nsubtrees)
+		+ sizeof(ondisk->nfiles)
+		+ sizeof(ondisk->nentries)
+		+ sizeof(ondisk->sha1);
+	hdr_v5.hdr_fblockoffset = htonl(sizeof(hdr) + sizeof(hdr_v5) + 4
+		+ (ndir + 1) * 4
+		+ total_dir_len
+		+ ndir * (ondisk_directory_size + 4)
+		+ (non_conflicted + 1) * 4);
+
+	crc = 0;
+	if (ce_write_v5(&crc, newfd, &hdr, sizeof(hdr)) < 0)
+		return -1;
+	if (ce_write_v5(&crc, newfd, &hdr_v5, sizeof(hdr_v5)) < 0)
+		return -1;
+	crc = htonl(crc);
+	if (ce_write_v5(NULL, newfd, &crc, 4) < 0)
+		return -1;
+
+	conflict_offset = sizeof(hdr) + sizeof(hdr_v5) + 4
+		+ (ndir + 1) * 4
+		+ total_dir_len
+		+ ndir * (ondisk_directory_size + 4)
+		+ (non_conflicted + 1) * 4
+		+ total_file_len
+		+ non_conflicted * (sizeof(struct ondisk_cache_entry_v5) + 4);
+	if (write_directories_v5(de, newfd, conflict_offset) < 0)
+		return -1;
+	offset_to_offset = sizeof(hdr) + sizeof(hdr_v5) + 4
+		+ (ndir + 1) * 4
+		+ total_dir_len
+		+ ndir * (ondisk_directory_size + 4);
+	if (write_entries_v5(istate, de, entries, newfd, offset_to_offset) < 0)
+		return -1;
+	if (write_conflicts_v5(istate, de, newfd) < 0)
+		return -1;
+	return ce_flush_v5(newfd);
+}
+
 int write_index(struct index_state *istate, int newfd)
 {
 	if (!istate->version)
 		istate->version = INDEX_FORMAT_DEFAULT;
 
-	return write_index_v2(istate, newfd);
+	if (istate->version != 5)
+		return write_index_v2(istate, newfd);
+	else
+		return write_index_v5(istate, newfd);
 }
 
 /*
-- 
1.7.10.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH/RFC v2 13/16] Write index-v5 cache-tree data
  2012-08-05 21:48 [PATCH/RFC v2 0/16] Introduce index file format version 5 Thomas Gummerer
                   ` (11 preceding siblings ...)
  2012-08-05 21:49 ` [PATCH/RFC v2 12/16] Write index-v5 Thomas Gummerer
@ 2012-08-05 21:49 ` Thomas Gummerer
  2012-08-05 21:49 ` [PATCH/RFC v2 14/16] Write resolve-undo data for index-v5 Thomas Gummerer
                   ` (3 subsequent siblings)
  16 siblings, 0 replies; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-05 21:49 UTC (permalink / raw)
  To: git; +Cc: trast, mhagger, gitster, pcouds, robin.rosenberg, Thomas Gummerer

Write the cache-tree data for the index version 5 file format. The
in-memory cache-tree data is converted to the ondisk format, by adding
it to the directory entries, that were compiled from the cache-entries
in the step before.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
 cache-tree.c |   52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 cache-tree.h |    1 +
 read-cache.c |    2 ++
 3 files changed, 55 insertions(+)

diff --git a/cache-tree.c b/cache-tree.c
index 440cd04..e167b61 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -612,6 +612,58 @@ struct cache_tree *cache_tree_convert_v5(struct directory_entry *de)
 	return convert_one(queue, 0);
 }
 
+
+static void convert_one_to_ondisk_v5(struct hash_table *table, struct cache_tree *it,
+				const char *path, int pathlen, uint32_t crc)
+{
+	int i;
+	struct directory_entry *found, *search;
+
+	crc = crc32(crc, (Bytef*)path, pathlen);
+	found = lookup_hash(crc, table);
+	search = found;
+	while (search && strcmp(path, search->pathname + search->de_pathlen - strlen(path)) != 0)
+		search = search->next_hash;
+	if (!search)
+		return;
+	/*
+	 * The number of subtrees is already calculated by
+	 * compile_directory_data, therefore we only need to
+	 * add the entry_count
+	 */
+	search->de_nentries = it->entry_count;
+	if (0 <= it->entry_count)
+		hashcpy(search->sha1, it->sha1);
+	if (strcmp(path, "") != 0)
+		crc = crc32(crc, (Bytef*)"/", 1);
+
+#if DEBUG
+	if (0 <= it->entry_count)
+		fprintf(stderr, "cache-tree <%.*s> (%d ent, %d subtree) %s\n",
+			pathlen, path, it->entry_count, it->subtree_nr,
+			sha1_to_hex(it->sha1));
+	else
+		fprintf(stderr, "cache-tree <%.*s> (%d subtree) invalid\n",
+			pathlen, path, it->subtree_nr);
+#endif
+
+	for (i = 0; i < it->subtree_nr; i++) {
+		struct cache_tree_sub *down = it->down[i];
+		if (i) {
+			struct cache_tree_sub *prev = it->down[i-1];
+			if (subtree_name_cmp(down->name, down->namelen,
+					     prev->name, prev->namelen) <= 0)
+				die("fatal - unsorted cache subtree");
+		}
+		convert_one_to_ondisk_v5(table, down->cache_tree, down->name, down->namelen, crc);
+	}
+}
+
+void cache_tree_to_ondisk_v5(struct hash_table *table, struct cache_tree *root)
+{
+	convert_one_to_ondisk_v5(table, root, "", 0, 0);
+}
+
 static struct cache_tree *cache_tree_find(struct cache_tree *it, const char *path)
 {
 	if (!it)
diff --git a/cache-tree.h b/cache-tree.h
index 7f29d26..e08bc31 100644
--- a/cache-tree.h
+++ b/cache-tree.h
@@ -37,6 +37,7 @@ struct cache_tree *cache_tree_read(const char *buffer, unsigned long size);
  * Don't use it if the directory entries are still needed after.
  */
 struct cache_tree *cache_tree_convert_v5(struct directory_entry *de);
+void cache_tree_to_ondisk_v5(struct hash_table *table, struct cache_tree *root);
 
 int cache_tree_fully_valid(struct cache_tree *);
 int cache_tree_update(struct cache_tree *, struct cache_entry **, int, int);
diff --git a/read-cache.c b/read-cache.c
index 2c47a97..d18383f 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2763,6 +2763,8 @@ static struct directory_entry *compile_directory_data(struct index_state *istate
 			previous_entry->next = no_subtrees;
 		}
 	}
+	if (istate->cache_tree)
+		cache_tree_to_ondisk_v5(&table, istate->cache_tree);
 	return de;
 }
 
-- 
1.7.10.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH/RFC v2 14/16] Write resolve-undo data for index-v5
  2012-08-05 21:48 [PATCH/RFC v2 0/16] Introduce index file format version 5 Thomas Gummerer
                   ` (12 preceding siblings ...)
  2012-08-05 21:49 ` [PATCH/RFC v2 13/16] Write index-v5 cache-tree data Thomas Gummerer
@ 2012-08-05 21:49 ` Thomas Gummerer
  2012-08-05 21:49 ` [PATCH/RFC v2 15/16] update-index.c: add a force-rewrite option Thomas Gummerer
                   ` (2 subsequent siblings)
  16 siblings, 0 replies; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-05 21:49 UTC (permalink / raw)
  To: git; +Cc: trast, mhagger, gitster, pcouds, robin.rosenberg, Thomas Gummerer

Write the resolve undo data to the ondisk format, by joining the data
in the resolve-undo string-list with the already existing conflicts
that were compiled before, when searching the directories and add
them to the corresponding directory entries.

Helped-by: Thomas Rast <trast@student.ethz.ch>
Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
 read-cache.c   |    1 +
 resolve-undo.c |   93 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 resolve-undo.h |    1 +
 3 files changed, 95 insertions(+)

diff --git a/read-cache.c b/read-cache.c
index d18383f..6496cc4 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2765,6 +2765,7 @@ static struct directory_entry *compile_directory_data(struct index_state *istate
 	}
 	if (istate->cache_tree)
 		cache_tree_to_ondisk_v5(&table, istate->cache_tree);
+	resolve_undo_to_ondisk_v5(&table, istate->resolve_undo, ndir, total_dir_len, de);
 	return de;
 }
 
diff --git a/resolve-undo.c b/resolve-undo.c
index f96c6ba..4568dcc 100644
--- a/resolve-undo.c
+++ b/resolve-undo.c
@@ -206,3 +206,96 @@ void resolve_undo_convert_v5(struct index_state *istate,
 		ce = ce->next;
 	}
 }
+
+void resolve_undo_to_ondisk_v5(struct hash_table *table,
+				struct string_list *resolve_undo,
+				unsigned int *ndir, int *total_dir_len,
+				struct directory_entry *de)
+{
+	struct string_list_item *item;
+	struct directory_entry *search;
+
+	if (!resolve_undo)
+		return;
+	for_each_string_list_item(item, resolve_undo) {
+		struct conflict_entry *conflict_entry;
+		struct resolve_undo_info *ui = item->util;
+		char *super;
+		int i, dir_len, len;
+		uint32_t crc;
+		struct directory_entry *found, *current, *new_tree;
+
+		if (!ui)
+			continue;
+
+		super = super_directory(item->string);
+		if (!super)
+			dir_len = 0;
+		else
+			dir_len = strlen(super);
+		crc = crc32(0, (Bytef*)super, dir_len);
+		found = lookup_hash(crc, table);
+		current = NULL;
+		new_tree = NULL;
+		
+		while (!found) {
+			struct directory_entry *new;
+
+			new = init_directory_entry(super, dir_len);
+			if (!current)
+				current = new;
+			insert_directory_entry(new, table, total_dir_len, ndir, crc);
+			if (new_tree != NULL)
+				new->de_nsubtrees = 1;
+			new->next = new_tree;
+			new_tree = new;
+			super = super_directory(super);
+			if (!super)
+				dir_len = 0;
+			else
+				dir_len = strlen(super);
+			crc = crc32(0, (Bytef*)super, dir_len);
+			found = lookup_hash(crc, table);
+		}
+		search = found;
+		while (search->next_hash && strcmp(super, search->pathname) != 0)
+			search = search->next_hash;
+		if (search && !current)
+			current = search;
+		if (!search && !current)
+			current = new_tree;
+		if (!super && new_tree) {
+			new_tree->next = de->next;
+			de->next = new_tree;
+			de->de_nsubtrees++;
+		} else if (new_tree) {
+			struct directory_entry *temp;
+
+			search = de->next;
+			while (strcmp(super, search->pathname))
+				search = search->next;
+			temp = new_tree;
+			while (temp->next)
+				temp = temp->next;
+			search->de_nsubtrees++;
+			temp->next = search->next;
+			search->next = new_tree;
+		}
+
+		len = strlen(item->string);
+		conflict_entry = create_new_conflict(item->string, len, current->de_pathlen);
+		add_conflict_to_directory_entry(current, conflict_entry);
+		for (i = 0; i < 3; i++) {
+			if (ui->mode[i]) {
+				struct conflict_part *cp;
+
+				cp = xmalloc(sizeof(struct conflict_part));
+				cp->flags = (i + 1) << CONFLICT_STAGESHIFT;
+				cp->entry_mode = ui->mode[i];
+				cp->next = NULL;
+				hashcpy(cp->sha1, ui->sha1[i]);
+				add_part_to_conflict_entry(current, conflict_entry, cp);
+			}
+		}
+	}
+}
diff --git a/resolve-undo.h b/resolve-undo.h
index ab660a6..ff80d84 100644
--- a/resolve-undo.h
+++ b/resolve-undo.h
@@ -14,5 +14,6 @@ extern int unmerge_index_entry_at(struct index_state *, int);
 extern void unmerge_index(struct index_state *, const char **);
 
 extern void resolve_undo_convert_v5(struct index_state *, struct conflict_entry *);
+extern void resolve_undo_to_ondisk_v5(struct hash_table *, struct string_list *, unsigned int *, int *, struct directory_entry *);
 
 #endif
-- 
1.7.10.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH/RFC v2 15/16] update-index.c: add a force-rewrite option
  2012-08-05 21:48 [PATCH/RFC v2 0/16] Introduce index file format version 5 Thomas Gummerer
                   ` (13 preceding siblings ...)
  2012-08-05 21:49 ` [PATCH/RFC v2 14/16] Write resolve-undo data for index-v5 Thomas Gummerer
@ 2012-08-05 21:49 ` Thomas Gummerer
  2012-08-06  1:58   ` Junio C Hamano
  2012-08-05 21:49 ` [PATCH/RFC v2 16/16] p0002-index.sh: add perf test for the index formats Thomas Gummerer
  2012-08-06 14:35 ` [PATCH/RFC v2 0/16] Introduce index file format version 5 Nguyễn Thái Ngọc Duy
  16 siblings, 1 reply; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-05 21:49 UTC (permalink / raw)
  To: git; +Cc: trast, mhagger, gitster, pcouds, robin.rosenberg, Thomas Gummerer

Add a force-rewrite option to update-index, which allows the user
to rewrite the index, even if there are no changes. This can be used
to do performance tests of both the reader and the writer.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
 builtin/update-index.c |    5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/builtin/update-index.c b/builtin/update-index.c
index 4ce341c..7fedc8f 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -24,6 +24,7 @@ static int allow_remove;
 static int allow_replace;
 static int info_only;
 static int force_remove;
+static int force_rewrite;
 static int verbose;
 static int mark_valid_only;
 static int mark_skip_worktree_only;
@@ -728,6 +729,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 		OPT_BIT(0, "unmerged", &refresh_args.flags,
 			"refresh even if index contains unmerged entries",
 			REFRESH_UNMERGED),
+		OPT_SET_INT(0, "force-rewrite", &force_rewrite,
+			"force a index rewrite even if there is no change", 1),
 		{OPTION_CALLBACK, 0, "refresh", &refresh_args, NULL,
 			"refresh stat information",
 			PARSE_OPT_NOARG | PARSE_OPT_NONEG,
@@ -886,7 +889,7 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 		strbuf_release(&buf);
 	}
 
-	if (active_cache_changed) {
+	if (active_cache_changed || force_rewrite) {
 		if (newfd < 0) {
 			if (refresh_args.flags & REFRESH_QUIET)
 				exit(128);
-- 
1.7.10.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH/RFC v2 16/16] p0002-index.sh: add perf test for the index formats
  2012-08-05 21:48 [PATCH/RFC v2 0/16] Introduce index file format version 5 Thomas Gummerer
                   ` (14 preceding siblings ...)
  2012-08-05 21:49 ` [PATCH/RFC v2 15/16] update-index.c: add a force-rewrite option Thomas Gummerer
@ 2012-08-05 21:49 ` Thomas Gummerer
  2012-08-06 14:35 ` [PATCH/RFC v2 0/16] Introduce index file format version 5 Nguyễn Thái Ngọc Duy
  16 siblings, 0 replies; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-05 21:49 UTC (permalink / raw)
  To: git; +Cc: trast, mhagger, gitster, pcouds, robin.rosenberg, Thomas Gummerer

From: Thomas Rast <trast@student.ethz.ch>

Add a performance test for index version [23]/4/5 by using
git update-index --force-rewrite, thus testing both the reader
and the writer speed of all index formats.

Signed-off-by: Thomas Rast <trast@student.ethz.ch>
Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
 t/perf/p0002-index.sh |   33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100755 t/perf/p0002-index.sh

diff --git a/t/perf/p0002-index.sh b/t/perf/p0002-index.sh
new file mode 100755
index 0000000..2996357
--- /dev/null
+++ b/t/perf/p0002-index.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+test_description="Tests index versions [23]/4/5"
+
+. ./perf-lib.sh
+
+test_perf_large_repo
+
+test_expect_success 'convert to v3' '
+	git update-index --index-version=3
+'
+
+test_perf 'v[23]: update-index' '
+	git update-index --force-rewrite >/dev/null
+'
+
+test_expect_success 'convert to v4' '
+	git update-index --index-version=4
+'
+
+test_perf 'v4: update-index' '
+	git update-index --force-rewrite >/dev/null
+'
+
+test_expect_success 'convert to v5' '
+	git update-index --index-version=5
+'
+
+test_perf 'v5: update-index' '
+	git update-index --force-rewrite >/dev/null
+'
+
+test_done
-- 
1.7.10.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 01/16] Modify cache_header to prepare for other index formats
  2012-08-05 21:48 ` [PATCH/RFC v2 01/16] Modify cache_header to prepare for other index formats Thomas Gummerer
@ 2012-08-06  1:17   ` Junio C Hamano
  2012-08-07 12:41     ` Thomas Gummerer
  0 siblings, 1 reply; 59+ messages in thread
From: Junio C Hamano @ 2012-08-06  1:17 UTC (permalink / raw)
  To: Thomas Gummerer; +Cc: git, trast, mhagger, pcouds, robin.rosenberg

Thomas Gummerer <t.gummerer@gmail.com> writes:

> diff --git a/read-cache.c b/read-cache.c
> index 2f8159f..5d61d92 100644
> --- a/read-cache.c
> +++ b/read-cache.c
> @@ -1433,7 +1446,7 @@ int read_index_from(struct index_state *istate, const char *path)
>  
>  	errno = EINVAL;
>  	mmap_size = xsize_t(st.st_size);
> -	if (mmap_size < sizeof(struct cache_header) + 20)
> +	if (mmap_size < sizeof(struct cache_version_header) + 20)
>  		die("index file smaller than expected");

At the design level, I have a large problem with this change.  I
understand that you wanted to make sure that some versions can lack
the num-entries word in the header, but then what is the point of
keeping that "+20" here?  Are all versions of the file format still
required to have the 20-byte trailing SHA-1 sum over the whole file?

	Side note: I am actually fine with that "sum at the end"
	requirement, but then it needs to be documented what are
	assumed to be unomittable and why.

        I also do not see why v5 *needs* to drop the num-entries
        word from the header in the first place.

At the practical level, we used to error out, upon seeing a file
that claims to be v2 in the header but is too small to hold the
version header, the number of entries word and the trailing SHA-1
sum.  We no longer do this and happily call verify_hdr() in the
following code even when the file is too small, no?

> @@ -1442,11 +1455,13 @@ int read_index_from(struct index_state *istate, const char *path)
>  		die_errno("unable to map index file");
>  
>  	hdr = mmap;
> +	hdr_v2 =  mmap + sizeof(*hdr);
>  	if (verify_hdr(hdr, mmap_size) < 0)
>  		goto unmap;

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 04/16] Modify write functions to prepare for other index formats
  2012-08-05 21:49 ` [PATCH/RFC v2 04/16] Modify write functions " Thomas Gummerer
@ 2012-08-06  1:34   ` Junio C Hamano
  2012-08-07 12:50     ` Thomas Gummerer
  0 siblings, 1 reply; 59+ messages in thread
From: Junio C Hamano @ 2012-08-06  1:34 UTC (permalink / raw)
  To: Thomas Gummerer; +Cc: git, trast, mhagger, pcouds, robin.rosenberg

Thomas Gummerer <t.gummerer@gmail.com> writes:

> -static int ce_write(git_SHA_CTX *context, int fd, void *data, unsigned int len)
> +static int ce_write_v2(git_SHA_CTX *context, int fd, void *data, unsigned int len)
>  {

Mild NAK to name this function with any hint that it is for v2 only.
The type of "data" is not "struct ondisk_index_entry_v2" and this is
just a way to stream data to "fd" while hashing, which is similar in
spirit to what csum-file.c "sha1file'"API does.  Perhaps we may want
to update ce_write() interface to build on top of sha1file API?

At this step in the series, is it too early to split read-cache.c
into two files, move all the v2 specific part to read-cache-v2.c,
and keep static function names like write_index_ext_header() as they
are?  After all, the main dispatch would become

> +int write_index(struct index_state *istate, int newfd)
> +{
> +	if (!istate->version)
> +		istate->version = INDEX_FORMAT_DEFAULT;
> +
> +	return write_index_v2(istate, newfd);
> +}

so read-cache-v2.c would need to export write_index_v2() but the
functions to implement it like ce_write_entry() do not have to be
exposed outside the file, no?

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 05/16] t2104: Don't fail for index versions other than [23]
  2012-08-05 21:49 ` [PATCH/RFC v2 05/16] t2104: Don't fail for index versions other than [23] Thomas Gummerer
@ 2012-08-06  1:36   ` Junio C Hamano
  0 siblings, 0 replies; 59+ messages in thread
From: Junio C Hamano @ 2012-08-06  1:36 UTC (permalink / raw)
  To: Thomas Gummerer; +Cc: git, trast, mhagger, pcouds, robin.rosenberg

Thomas Gummerer <t.gummerer@gmail.com> writes:

> t2104 currently checks for the exact index version 2 or 3,
> depending if there is a skip-worktree flag or not. Other
> index versions do not use extended flags and thus cannot
> be tested for version changes.
>
> Make this test update the index to version 2 at the beginning
> of the test. Testing the skip-worktree flags for the default
> index format is still covered by t7011 and t7012.
>
> Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
> ---
>  t/t2104-update-index-skip-worktree.sh |    3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/t/t2104-update-index-skip-worktree.sh b/t/t2104-update-index-skip-worktree.sh
> index 1d0879b..4ef7d99 100755
> --- a/t/t2104-update-index-skip-worktree.sh
> +++ b/t/t2104-update-index-skip-worktree.sh
> @@ -25,7 +25,8 @@ test_expect_success 'setup' '
>  	mkdir sub &&
>  	touch ./1 ./2 sub/1 sub/2 &&
>  	git add 1 2 sub/1 sub/2 &&
> -	git ls-files -t | test_cmp expect.full -
> +	git ls-files -t | test_cmp expect.full - &&
> +	git update-index --index-version=2
>  '

Makes sense, but wouldn't it make even more sense to do this at the
very beginning?  i.e.

        git update-index --index-version 2 &&
	mkdir sub &&
        touch 1 2 sub/1 sub/2 &&
        git add 1 2 sub/1 sub/2 &&
        git ls-files -t | test_cmp expect.full -

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 06/16] t3700: sleep for 1 second, to avoid interfering with the racy code
  2012-08-05 21:49 ` [PATCH/RFC v2 06/16] t3700: sleep for 1 second, to avoid interfering with the racy code Thomas Gummerer
@ 2012-08-06  1:43   ` Junio C Hamano
  2012-08-07 16:59     ` Thomas Gummerer
  0 siblings, 1 reply; 59+ messages in thread
From: Junio C Hamano @ 2012-08-06  1:43 UTC (permalink / raw)
  To: Thomas Gummerer; +Cc: git, trast, mhagger, pcouds, robin.rosenberg

Thomas Gummerer <t.gummerer@gmail.com> writes:

> The new git racy code uses the mtime of cache-entries to smudge
> a racy clean entry, and loads the work, of checking the file-system

-ECANTPARSE.

> if the entry has really changed, off to the reader. This interferes
> with this test, because the entry is racily smudged and thus has
> mtime 0. We wait 1 second to avoid smudging the entry and getting
> correct test results.

Mild NAK, especially it is totally unclear why you even need to muck
with racy-git check in the current format of the index in the first
place, and even if it were necessary, it is unclear why this cannot
be done with test-chmtime.

> Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
> ---
>  t/t3700-add.sh |    1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/t/t3700-add.sh b/t/t3700-add.sh
> index 874b3a6..4d70805 100755
> --- a/t/t3700-add.sh
> +++ b/t/t3700-add.sh
> @@ -184,6 +184,7 @@ test_expect_success 'git add --refresh with pathspec' '
>  	echo >foo && echo >bar && echo >baz &&
>  	git add foo bar baz && H=$(git rev-parse :foo) && git rm -f foo &&
>  	echo "100644 $H 3	foo" | git update-index --index-info &&
> +	sleep 1 &&
>  	test-chmtime -60 bar baz &&
>  	>expect &&
>  	git add --refresh bar >actual &&

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 08/16] Make in-memory format aware of stat_crc
  2012-08-05 21:49 ` [PATCH/RFC v2 08/16] Make in-memory format aware of stat_crc Thomas Gummerer
@ 2012-08-06  1:46   ` Junio C Hamano
  2012-08-07 19:02     ` Thomas Gummerer
  0 siblings, 1 reply; 59+ messages in thread
From: Junio C Hamano @ 2012-08-06  1:46 UTC (permalink / raw)
  To: Thomas Gummerer; +Cc: git, trast, mhagger, pcouds, robin.rosenberg

Thomas Gummerer <t.gummerer@gmail.com> writes:

> +	stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
> +	stat = htonl(ce->ce_ino);
> +	stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
> +	stat = htonl(ce->ce_size);
> +	stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
> +	stat = htonl(ce->ce_dev);
> +	stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
> +	stat = htonl(ce->ce_uid);
> +	stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
> +	stat = htonl(ce->ce_gid);
> +	stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
> +	return stat_crc;

What are these (Bytef *) casts are about?  We do not use it in any
of our existing calls to crc32().

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 10/16] Read resolve-undo data
  2012-08-05 21:49 ` [PATCH/RFC v2 10/16] Read resolve-undo data Thomas Gummerer
@ 2012-08-06  1:51   ` Junio C Hamano
  2012-08-07 19:17     ` Thomas Gummerer
  0 siblings, 1 reply; 59+ messages in thread
From: Junio C Hamano @ 2012-08-06  1:51 UTC (permalink / raw)
  To: Thomas Gummerer; +Cc: git, trast, mhagger, pcouds, robin.rosenberg

Thomas Gummerer <t.gummerer@gmail.com> writes:

> Make git read the resolve-undo data from the index.
>
> Since the resolve-undo data is joined with the conflicts in
> the ondisk format of the index file version 5, conflicts and
> resolved data is read at the same time, and the resolve-undo
> data is then converted to the in-memory format.
>
> Helped-by: Thomas Rast <trast@student.ethz.ch>
> Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
> ---
>  read-cache.c   |    1 +
>  resolve-undo.c |   36 ++++++++++++++++++++++++++++++++++++
>  resolve-undo.h |    2 ++
>  3 files changed, 39 insertions(+)
>
> diff --git a/read-cache.c b/read-cache.c
> index 70334f9..03370f9 100644
> --- a/read-cache.c
> +++ b/read-cache.c
> @@ -1942,6 +1942,7 @@ static struct directory_entry *read_entries_v5(struct index_state *istate,
>  	int i;
>  
>  	conflict_queue = read_conflicts_v5(de, mmap, mmap_size, fd);
> +	resolve_undo_convert_v5(istate, conflict_queue);
>  	for (i = 0; i < de->de_nfiles; i++) {
>  		ce = read_entry_v5(de,
>  				entry_offset,
> diff --git a/resolve-undo.c b/resolve-undo.c
> index 72b4612..f96c6ba 100644
> --- a/resolve-undo.c
> +++ b/resolve-undo.c
> @@ -170,3 +170,39 @@ void unmerge_index(struct index_state *istate, const char **pathspec)
>  		i = unmerge_index_entry_at(istate, i);
>  	}
>  }
> +
> +void resolve_undo_convert_v5(struct index_state *istate,
> +					struct conflict_entry *ce)
> +{

It is unclear why this needs to be part of resolve-undo.c and
exported from it.  Shouldn't it (and bulk of the previous few
patches) be part of a read-cache-v5.c file (with v2/3/4 specific
part separated out from read-cache.c to form read-cache-v2.c)?

> +	int i;
> +
> +	while (ce) {
> +		struct string_list_item *lost;
> +		struct resolve_undo_info *ui;
> +		struct conflict_part *cp;
> +
> +		if (ce->entries && (ce->entries->flags & CONFLICT_CONFLICTED) != 0) {
> +			ce = ce->next;
> +			continue;
> +		}
> +		if (!istate->resolve_undo) {
> +			istate->resolve_undo = xcalloc(1, sizeof(struct string_list));
> +			istate->resolve_undo->strdup_strings = 1;
> +		}
> +
> +		lost = string_list_insert(istate->resolve_undo, ce->name);
> +		if (!lost->util)
> +			lost->util = xcalloc(1, sizeof(*ui));
> +		ui = lost->util;
> +
> +		cp = ce->entries;
> +		for (i = 0; i < 3; i++)
> +			ui->mode[i] = 0;
> +		while (cp) {
> +			ui->mode[conflict_stage(cp) - 1] = cp->entry_mode;
> +			hashcpy(ui->sha1[conflict_stage(cp) - 1], cp->sha1);
> +			cp = cp->next;
> +		}
> +		ce = ce->next;
> +	}
> +}
> diff --git a/resolve-undo.h b/resolve-undo.h
> index 8458769..ab660a6 100644
> --- a/resolve-undo.h
> +++ b/resolve-undo.h
> @@ -13,4 +13,6 @@ extern void resolve_undo_clear_index(struct index_state *);
>  extern int unmerge_index_entry_at(struct index_state *, int);
>  extern void unmerge_index(struct index_state *, const char **);
>  
> +extern void resolve_undo_convert_v5(struct index_state *, struct conflict_entry *);
> +
>  #endif

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 15/16] update-index.c: add a force-rewrite option
  2012-08-05 21:49 ` [PATCH/RFC v2 15/16] update-index.c: add a force-rewrite option Thomas Gummerer
@ 2012-08-06  1:58   ` Junio C Hamano
  2012-08-08  7:31     ` Thomas Gummerer
  0 siblings, 1 reply; 59+ messages in thread
From: Junio C Hamano @ 2012-08-06  1:58 UTC (permalink / raw)
  To: Thomas Gummerer; +Cc: git, trast, mhagger, pcouds, robin.rosenberg

Thomas Gummerer <t.gummerer@gmail.com> writes:

> Add a force-rewrite option to update-index, which allows the user
> to rewrite the index, even if there are no changes. This can be used
> to do performance tests of both the reader and the writer.
>
> Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
> ---
>  builtin/update-index.c |    5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)

I do not think this is wrong per-se, but is a new command that needs
to be documented?  If it is only for benchmarking and debugging, it
might be sufficient to make "--index-version <n>" always rewrite the
index.

> diff --git a/builtin/update-index.c b/builtin/update-index.c
> index 4ce341c..7fedc8f 100644
> --- a/builtin/update-index.c
> +++ b/builtin/update-index.c
> @@ -24,6 +24,7 @@ static int allow_remove;
>  static int allow_replace;
>  static int info_only;
>  static int force_remove;
> +static int force_rewrite;
>  static int verbose;
>  static int mark_valid_only;
>  static int mark_skip_worktree_only;
> @@ -728,6 +729,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
>  		OPT_BIT(0, "unmerged", &refresh_args.flags,
>  			"refresh even if index contains unmerged entries",
>  			REFRESH_UNMERGED),
> +		OPT_SET_INT(0, "force-rewrite", &force_rewrite,
> +			"force a index rewrite even if there is no change", 1),
>  		{OPTION_CALLBACK, 0, "refresh", &refresh_args, NULL,
>  			"refresh stat information",
>  			PARSE_OPT_NOARG | PARSE_OPT_NONEG,
> @@ -886,7 +889,7 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
>  		strbuf_release(&buf);
>  	}
>  
> -	if (active_cache_changed) {
> +	if (active_cache_changed || force_rewrite) {
>  		if (newfd < 0) {
>  			if (refresh_args.flags & REFRESH_QUIET)
>  				exit(128);

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 09/16] Read index-v5
  2012-08-05 21:49 ` [PATCH/RFC v2 09/16] Read index-v5 Thomas Gummerer
@ 2012-08-06  5:17   ` Junio C Hamano
  2012-08-08  7:41     ` Thomas Gummerer
  0 siblings, 1 reply; 59+ messages in thread
From: Junio C Hamano @ 2012-08-06  5:17 UTC (permalink / raw)
  To: Thomas Gummerer; +Cc: git, trast, mhagger, pcouds, robin.rosenberg

Thomas Gummerer <t.gummerer@gmail.com> writes:

> +static struct directory_entry *read_directories_v5(unsigned int *dir_offset,
> +				unsigned int *dir_table_offset,
> +				void *mmap,
> +				int mmap_size)
> +{
> +	int i, ondisk_directory_size;
> +	uint32_t *filecrc, *beginning, *end;
> +	struct directory_entry *current = NULL;
> +	struct ondisk_directory_entry *disk_de;
> +	struct directory_entry *de;
> +	unsigned int data_len, len;
> +	char *name;
> +
> +	ondisk_directory_size = sizeof(disk_de->flags)
> +		+ sizeof(disk_de->foffset)
> +		+ sizeof(disk_de->cr)
> +		+ sizeof(disk_de->ncr)
> +		+ sizeof(disk_de->nsubtrees)
> +		+ sizeof(disk_de->nfiles)
> +		+ sizeof(disk_de->nentries)
> +		+ sizeof(disk_de->sha1);
> +	name = (char *)mmap + *dir_offset;
> +	beginning = mmap + *dir_table_offset;

Notice how you computed name with pointer arithmetic by first
casting mmap (which is "void *") and when computing beginning, you
forgot to cast mmap and attempted pointer arithmetic with "void *".
The latter does not work and breaks compilation.

The pointer-arith with "void *" is not limited to this function.

Please check the a band-aid (I wouldn't call it a fix-up) patch I
added on top of the series before queuing the topic to 'pu'; it is
primarily to illustrate the places I noticed that have this issue.

I do not necessarily suggest that the way the band-aid patch makes
it compile is the best approach.  It might be cleaner to use a saner
type like "char *" (or perhaps "const char *") as the type to point
at a piece of memory you read from the disk.  I haven't formed an
opinion.

Thanks.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 0/16] Introduce index file format version 5
  2012-08-05 21:48 [PATCH/RFC v2 0/16] Introduce index file format version 5 Thomas Gummerer
                   ` (15 preceding siblings ...)
  2012-08-05 21:49 ` [PATCH/RFC v2 16/16] p0002-index.sh: add perf test for the index formats Thomas Gummerer
@ 2012-08-06 14:35 ` Nguyễn Thái Ngọc Duy
  2012-08-06 14:35   ` [PATCH 1/2] Move index v2 specific code out of read-cache Nguyễn Thái Ngọc Duy
                     ` (3 more replies)
  16 siblings, 4 replies; 59+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-08-06 14:35 UTC (permalink / raw)
  To: Thomas Gummerer
  Cc: git, trast, mhagger, gitster, robin.rosenberg,
	Nguyễn Thái Ngọc Duy

These mails are about cosmetics only. But I think it helps maintenance
in long term. I notice in your series we have many functions with _v2
and _v5 mixed together. Worse, some functions that are _v2 only are
not suffixed with _v2. I still think separating v2/v5 changes is a
good idea. So I played a bit, see how it might become.

The next two emails demonstrate how we take v2-specific code out to
read-cache-v2.c, then add v5 code in the next patch. Notice there's very
little change in read-cache.c in the second patch. I wanted to see how
v5 changes affects v2 users and the second patch shows it.

I'm not happy with the first patch either. Ideally it should consist
of code move only, no other changes. All updates in read_index_from
and the introduction of struct index_ops should happen in patches
before that.

Then of course you need to split the second patch into several logical
patches again. We can drop _v5 suffix in read-cache-v5.c (I haven't
done that). When we add partial read/write for v5, we can add more
func pointers to index_ops and implement them in v2 (probably as no-op
or assertion)

There are still some v5 bits in the first patch. This series is not
meant to be used anyway, so it does not matter much. Hope it helps.

Nguyễn Thái Ngọc Duy (2):
  Move index v2 specific code out of read-cache
  Add index-v5

 Makefile        |    3 +
 cache.h         |   92 ++++-
 read-cache-v2.c |  570 +++++++++++++++++++++++++++
 read-cache-v5.c | 1170 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 read-cache.c    |  618 +++---------------------------
 read-cache.h    |   54 +++
 6 files changed, 1932 insertions(+), 575 deletions(-)
 create mode 100644 read-cache-v2.c
 create mode 100644 read-cache-v5.c
 create mode 100644 read-cache.h

-- 
1.7.8

^ permalink raw reply	[flat|nested] 59+ messages in thread

* [PATCH 1/2] Move index v2 specific code out of read-cache
  2012-08-06 14:35 ` [PATCH/RFC v2 0/16] Introduce index file format version 5 Nguyễn Thái Ngọc Duy
@ 2012-08-06 14:35   ` Nguyễn Thái Ngọc Duy
  2012-08-06 14:36   ` [PATCH 2/2] Add index-v5 Nguyễn Thái Ngọc Duy
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 59+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-08-06 14:35 UTC (permalink / raw)
  To: Thomas Gummerer
  Cc: git, trast, mhagger, gitster, robin.rosenberg,
	Nguyễn Thái Ngọc Duy

---
 Makefile        |    2 +
 cache.h         |   92 ++++++++-
 read-cache-v2.c |  570 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 read-cache.c    |  591 +++----------------------------------------------------
 read-cache.h    |   54 +++++
 5 files changed, 734 insertions(+), 575 deletions(-)
 create mode 100644 read-cache-v2.c
 create mode 100644 read-cache.h

diff --git a/Makefile b/Makefile
index 4b58b91..b4a7c73 100644
--- a/Makefile
+++ b/Makefile
@@ -645,6 +645,7 @@ LIB_H += progress.h
 LIB_H += prompt.h
 LIB_H += quote.h
 LIB_H += reachable.h
+LIB_H += read-cache.h
 LIB_H += reflog-walk.h
 LIB_H += refs.h
 LIB_H += remote.h
@@ -768,6 +769,7 @@ LIB_OBJS += prompt.o
 LIB_OBJS += quote.o
 LIB_OBJS += reachable.o
 LIB_OBJS += read-cache.o
+LIB_OBJS += read-cache-v2.o
 LIB_OBJS += reflog-walk.o
 LIB_OBJS += refs.o
 LIB_OBJS += remote.o
diff --git a/cache.h b/cache.h
index 67f28b4..83109f5 100644
--- a/cache.h
+++ b/cache.h
@@ -94,19 +94,11 @@ unsigned long git_deflate_bound(git_zstream *, unsigned long);
  */
 #define DEFAULT_GIT_PORT 9418
 
-/*
- * Basic data structures for the directory cache
- */
 
 #define CACHE_SIGNATURE 0x44495243	/* "DIRC" */
-struct cache_header {
-	unsigned int hdr_signature;
-	unsigned int hdr_version;
-	unsigned int hdr_entries;
-};
 
 #define INDEX_FORMAT_LB 2
-#define INDEX_FORMAT_UB 4
+#define INDEX_FORMAT_UB 5
 
 /*
  * The "cache_time" is just the low 32 bits of the
@@ -130,16 +131,64 @@ struct cache_entry {
 	unsigned int ce_flags;
 	unsigned int ce_namelen;
 	unsigned char sha1[20];
 	struct cache_entry *next;
 	struct cache_entry *dir_next;
 	char name[FLEX_ARRAY]; /* more */
 };
 
 #define CE_STAGEMASK (0x3000)
 #define CE_EXTENDED  (0x4000)
 #define CE_VALID     (0x8000)
 #define CE_STAGESHIFT 12
 
 /*
  * Range 0xFFFF0000 in ce_flags is divided into
  * two parts: in-memory flags and on-disk ones.
@@ -173,6 +222,18 @@ struct cache_entry {
 #define CE_EXTENDED_FLAGS (CE_INTENT_TO_ADD | CE_SKIP_WORKTREE)
 
 /*
  * Safeguard to avoid saving wrong flags:
  *  - CE_EXTENDED2 won't get saved until its semantic is known
  *  - Bits in 0x0000FFFF have been saved in ce_flags already
@@ -210,6 +271,8 @@ static inline unsigned create_ce_flags(unsigned stage)
 #define ce_skip_worktree(ce) ((ce)->ce_flags & CE_SKIP_WORKTREE)
 #define ce_mark_uptodate(ce) ((ce)->ce_flags |= CE_UPTODATE)
 
 #define ce_permissions(mode) (((mode) & 0100) ? 0755 : 0644)
 static inline unsigned int create_ce_mode(unsigned int mode)
 {
@@ -256,6 +319,8 @@ static inline unsigned int canon_mode(unsigned int mode)
 }
 
 #define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1)
 
 struct index_state {
 	struct cache_entry **cache;
@@ -267,6 +332,7 @@ struct index_state {
 	unsigned name_hash_initialized : 1,
 		 initialized : 1;
 	struct hash_table name_hash;
+	const struct index_ops *ops;
 };
 
 extern struct index_state the_index;
@@ -444,6 +510,7 @@ extern int verify_path(const char *path);
 extern struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int igncase);
 extern int index_name_stage_pos(const struct index_state *, const char *name, int namelen, int stage);
 extern int index_name_pos(const struct index_state *, const char *name, int namelen);
 #define ADD_CACHE_OK_TO_ADD 1		/* Ok to add */
 #define ADD_CACHE_OK_TO_REPLACE 2	/* Ok to replace file/directory */
 #define ADD_CACHE_SKIP_DFCHECK 4	/* Ok to skip DF conflict checks */
@@ -1178,6 +1245,13 @@ static inline ssize_t write_str_in_full(int fd, const char *str)
 	return write_in_full(fd, str, strlen(str));
 }
 
 /* pager.c */
 extern void setup_pager(void);
 extern const char *pager_program;
diff --git a/read-cache-v2.c b/read-cache-v2.c
new file mode 100644
index 0000000..9e100fc
--- /dev/null
+++ b/read-cache-v2.c
@@ -0,0 +1,570 @@
+#include "cache.h"
+#include "read-cache.h"
+#include "resolve-undo.h"
+#include "cache-tree.h"
+#include "varint.h"
+
+struct cache_header_v2 {
+	unsigned int hdr_entries;
+};
+
+/*
+ * dev/ino/uid/gid/size are also just tracked to the low 32 bits
+ * Again - this is just a (very strong in practice) heuristic that
+ * the inode hasn't changed.
+ *
+ * We save the fields in big-endian order to allow using the
+ * index file over NFS transparently.
+ */
+struct ondisk_cache_entry {
+	struct cache_time ctime;
+	struct cache_time mtime;
+	unsigned int dev;
+	unsigned int ino;
+	unsigned int mode;
+	unsigned int uid;
+	unsigned int gid;
+	unsigned int size;
+	unsigned char sha1[20];
+	unsigned short flags;
+	char name[FLEX_ARRAY]; /* more */
+};
+
+/*
+ * This struct is used when CE_EXTENDED bit is 1
+ * The struct must match ondisk_cache_entry exactly from
+ * ctime till flags
+ */
+struct ondisk_cache_entry_extended {
+	struct cache_time ctime;
+	struct cache_time mtime;
+	unsigned int dev;
+	unsigned int ino;
+	unsigned int mode;
+	unsigned int uid;
+	unsigned int gid;
+	unsigned int size;
+	unsigned char sha1[20];
+	unsigned short flags;
+	unsigned short flags2;
+	char name[FLEX_ARRAY]; /* more */
+};
+
+#define align_flex_name(STRUCT,len) ((offsetof(struct STRUCT,name) + (len) + 8) & ~7)
+#define ondisk_cache_entry_size(len) align_flex_name(ondisk_cache_entry,len)
+#define ondisk_cache_entry_extended_size(len) align_flex_name(ondisk_cache_entry_extended,len)
+#define ondisk_ce_size(ce) (((ce)->ce_flags & CE_EXTENDED) ? \
+			    ondisk_cache_entry_extended_size(ce_namelen(ce)) : \
+			    ondisk_cache_entry_size(ce_namelen(ce)))
+
+static int match_stat_basic(struct cache_entry *ce,
+			    struct stat *st,
+			    int changed)
+{
+	if (ce->ce_mtime.sec != (unsigned int)st->st_mtime)
+		changed |= MTIME_CHANGED;
+	if (trust_ctime && ce->ce_ctime.sec != (unsigned int)st->st_ctime)
+		changed |= CTIME_CHANGED;
+
+#ifdef USE_NSEC
+	if (ce->ce_mtime.nsec != ST_MTIME_NSEC(*st))
+		changed |= MTIME_CHANGED;
+	if (trust_ctime && ce->ce_ctime.nsec != ST_CTIME_NSEC(*st))
+		changed |= CTIME_CHANGED;
+#endif
+
+	if (ce->ce_uid != (unsigned int) st->st_uid ||
+	    ce->ce_gid != (unsigned int) st->st_gid)
+		changed |= OWNER_CHANGED;
+	if (ce->ce_ino != (unsigned int) st->st_ino)
+		changed |= INODE_CHANGED;
+
+#ifdef USE_STDEV
+	/*
+	 * st_dev breaks on network filesystems where different
+	 * clients will have different views of what "device"
+	 * the filesystem is on
+	 */
+	if (ce->ce_dev != (unsigned int) st->st_dev)
+		changed |= INODE_CHANGED;
+#endif
+
+	if (ce->ce_size != (unsigned int) st->st_size)
+		changed |= DATA_CHANGED;
+
+	/* Racily smudged entry? */
+	if (!ce->ce_size) {
+		if (!is_empty_blob_sha1(ce->sha1))
+			changed |= DATA_CHANGED;
+	}
+
+	return changed;
+}
+
+static int verify_hdr(struct cache_version_header *hdr, unsigned long size)
+{
+	git_SHA_CTX c;
+	unsigned char sha1[20];
+
+	git_SHA1_Init(&c);
+	git_SHA1_Update(&c, hdr, size - 20);
+	git_SHA1_Final(sha1, &c);
+	if (hashcmp(sha1, (unsigned char *)hdr + size - 20))
+		return error("bad index file sha1 signature");
+	return 0;
+}
+
+static struct cache_entry *cache_entry_from_ondisk(struct ondisk_cache_entry *ondisk,
+						   unsigned int flags,
+						   const char *name,
+						   size_t len)
+{
+	struct cache_entry *ce = xmalloc(cache_entry_size(len));
+
+	ce->ce_ctime.sec = ntoh_l(ondisk->ctime.sec);
+	ce->ce_mtime.sec = ntoh_l(ondisk->mtime.sec);
+	ce->ce_ctime.nsec = ntoh_l(ondisk->ctime.nsec);
+	ce->ce_mtime.nsec = ntoh_l(ondisk->mtime.nsec);
+	ce->ce_dev   = ntoh_l(ondisk->dev);
+	ce->ce_ino   = ntoh_l(ondisk->ino);
+	ce->ce_mode  = ntoh_l(ondisk->mode);
+	ce->ce_uid   = ntoh_l(ondisk->uid);
+	ce->ce_gid   = ntoh_l(ondisk->gid);
+	ce->ce_size  = ntoh_l(ondisk->size);
+	ce->ce_flags = flags & ~CE_NAMEMASK;
+	ce->ce_namelen = len;
+	hashcpy(ce->sha1, ondisk->sha1);
+	memcpy(ce->name, name, len);
+	ce->name[len] = '\0';
+	return ce;
+}
+
+/*
+ * Adjacent cache entries tend to share the leading paths, so it makes
+ * sense to only store the differences in later entries.  In the v4
+ * on-disk format of the index, each on-disk cache entry stores the
+ * number of bytes to be stripped from the end of the previous name,
+ * and the bytes to append to the result, to come up with its name.
+ */
+static unsigned long expand_name_field(struct strbuf *name, const char *cp_)
+{
+	const unsigned char *ep, *cp = (const unsigned char *)cp_;
+	size_t len = decode_varint(&cp);
+
+	if (name->len < len)
+		die("malformed name field in the index");
+	strbuf_remove(name, name->len - len, len);
+	for (ep = cp; *ep; ep++)
+		; /* find the end */
+	strbuf_add(name, cp, ep - cp);
+	return (const char *)ep + 1 - cp_;
+}
+
+static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk,
+					    unsigned long *ent_size,
+					    struct strbuf *previous_name)
+{
+	struct cache_entry *ce;
+	size_t len;
+	const char *name;
+	unsigned int flags;
+
+	/* On-disk flags are just 16 bits */
+	flags = ntoh_s(ondisk->flags);
+	len = flags & CE_NAMEMASK;
+
+	if (flags & CE_EXTENDED) {
+		struct ondisk_cache_entry_extended *ondisk2;
+		int extended_flags;
+		ondisk2 = (struct ondisk_cache_entry_extended *)ondisk;
+		extended_flags = ntoh_s(ondisk2->flags2) << 16;
+		/* We do not yet understand any bit out of CE_EXTENDED_FLAGS */
+		if (extended_flags & ~CE_EXTENDED_FLAGS)
+			die("Unknown index entry format %08x", extended_flags);
+		flags |= extended_flags;
+		name = ondisk2->name;
+	}
+	else
+		name = ondisk->name;
+
+	if (!previous_name) {
+		/* v3 and earlier */
+		if (len == CE_NAMEMASK)
+			len = strlen(name);
+		ce = cache_entry_from_ondisk(ondisk, flags, name, len);
+
+		*ent_size = ondisk_ce_size(ce);
+	} else {
+		unsigned long consumed;
+		consumed = expand_name_field(previous_name, name);
+		ce = cache_entry_from_ondisk(ondisk, flags,
+					     previous_name->buf,
+					     previous_name->len);
+
+		*ent_size = (name - ((char *)ondisk)) + consumed;
+	}
+	return ce;
+}
+
+static int read_index_extension(struct index_state *istate,
+				const char *ext, void *data, unsigned long sz)
+{
+	switch (CACHE_EXT(ext)) {
+	case CACHE_EXT_TREE:
+		istate->cache_tree = cache_tree_read(data, sz);
+		break;
+	case CACHE_EXT_RESOLVE_UNDO:
+		istate->resolve_undo = resolve_undo_read(data, sz);
+		break;
+	default:
+		if (*ext < 'A' || 'Z' < *ext)
+			return error("index uses %.4s extension, which we do not understand",
+				     ext);
+		fprintf(stderr, "ignoring %.4s extension\n", ext);
+		break;
+	}
+	return 0;
+}
+
+static void read_index_v2(struct index_state *istate, void *mmap, int mmap_size, int fd)
+{
+	int i;
+	unsigned long src_offset;
+	struct cache_version_header *hdr;
+	struct cache_header_v2 *hdr_v2;
+	struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
+
+	hdr = mmap;
+	hdr_v2 = mmap + sizeof(*hdr);
+	istate->version = ntohl(hdr->hdr_version);
+	istate->cache_nr = ntohl(hdr_v2->hdr_entries);
+	istate->cache_alloc = alloc_nr(istate->cache_nr);
+	istate->cache = xcalloc(istate->cache_alloc, sizeof(struct cache_entry *));
+	istate->initialized = 1;
+
+	if (istate->version == 4)
+		previous_name = &previous_name_buf;
+	else
+		previous_name = NULL;
+
+	src_offset = sizeof(*hdr) + sizeof(*hdr_v2);
+	for (i = 0; i < istate->cache_nr; i++) {
+		struct ondisk_cache_entry *disk_ce;
+		struct cache_entry *ce;
+		unsigned long consumed;
+
+		disk_ce = (struct ondisk_cache_entry *)((char *)mmap + src_offset);
+		ce = create_from_disk(disk_ce, &consumed, previous_name);
+		set_index_entry(istate, i, ce);
+
+		src_offset += consumed;
+	}
+	strbuf_release(&previous_name_buf);
+
+	while (src_offset <= mmap_size - 20 - 8) {
+		/* After an array of active_nr index entries,
+		 * there can be arbitrary number of extended
+		 * sections, each of which is prefixed with
+		 * extension name (4-byte) and section length
+		 * in 4-byte network byte order.
+		 */
+		uint32_t extsize;
+		memcpy(&extsize, (char *)mmap + src_offset + 4, 4);
+		extsize = ntohl(extsize);
+		if (read_index_extension(istate,
+					 (const char *) mmap + src_offset,
+					 (char *) mmap + src_offset + 8,
+					 extsize) < 0)
+			goto unmap;
+		src_offset += 8;
+		src_offset += extsize;
+	}
+	return;
+unmap:
+	munmap(mmap, mmap_size);
+	die("index file corrupt");
+}
+
+static void ce_smudge_racily_clean_entry(const struct index_state *istate,
+					 struct cache_entry *ce)
+{
+	/*
+	 * The only thing we care about in this function is to smudge the
+	 * falsely clean entry due to touch-update-touch race, so we leave
+	 * everything else as they are.  We are called for entries whose
+	 * ce_mtime match the index file mtime.
+	 *
+	 * Note that this actually does not do much for gitlinks, for
+	 * which ce_match_stat_basic() always goes to the actual
+	 * contents.  The caller checks with is_racy_timestamp() which
+	 * always says "no" for gitlinks, so we are not called for them ;-)
+	 */
+	struct stat st;
+
+	if (lstat(ce->name, &st) < 0)
+		return;
+	if (ce_match_stat_basic(istate, ce, &st))
+		return;
+	if (ce_modified_check_fs(ce, &st)) {
+		/* This is "racily clean"; smudge it.  Note that this
+		 * is a tricky code.  At first glance, it may appear
+		 * that it can break with this sequence:
+		 *
+		 * $ echo xyzzy >frotz
+		 * $ git-update-index --add frotz
+		 * $ : >frotz
+		 * $ sleep 3
+		 * $ echo filfre >nitfol
+		 * $ git-update-index --add nitfol
+		 *
+		 * but it does not.  When the second update-index runs,
+		 * it notices that the entry "frotz" has the same timestamp
+		 * as index, and if we were to smudge it by resetting its
+		 * size to zero here, then the object name recorded
+		 * in index is the 6-byte file but the cached stat information
+		 * becomes zero --- which would then match what we would
+		 * obtain from the filesystem next time we stat("frotz").
+		 *
+		 * However, the second update-index, before calling
+		 * this function, notices that the cached size is 6
+		 * bytes and what is on the filesystem is an empty
+		 * file, and never calls us, so the cached size information
+		 * for "frotz" stays 6 which does not match the filesystem.
+		 */
+		ce->ce_size = 0;
+	}
+}
+
+/* Copy miscellaneous fields but not the name */
+static char *copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk,
+				       struct cache_entry *ce)
+{
+	short flags;
+
+	ondisk->ctime.sec = htonl(ce->ce_ctime.sec);
+	ondisk->mtime.sec = htonl(ce->ce_mtime.sec);
+	ondisk->ctime.nsec = htonl(ce->ce_ctime.nsec);
+	ondisk->mtime.nsec = htonl(ce->ce_mtime.nsec);
+	ondisk->dev  = htonl(ce->ce_dev);
+	ondisk->ino  = htonl(ce->ce_ino);
+	ondisk->mode = htonl(ce->ce_mode);
+	ondisk->uid  = htonl(ce->ce_uid);
+	ondisk->gid  = htonl(ce->ce_gid);
+	ondisk->size = htonl(ce->ce_size);
+	hashcpy(ondisk->sha1, ce->sha1);
+
+	flags = ce->ce_flags;
+	flags |= (ce_namelen(ce) >= CE_NAMEMASK ? CE_NAMEMASK : ce_namelen(ce));
+	ondisk->flags = htons(flags);
+	if (ce->ce_flags & CE_EXTENDED) {
+		struct ondisk_cache_entry_extended *ondisk2;
+		ondisk2 = (struct ondisk_cache_entry_extended *)ondisk;
+		ondisk2->flags2 = htons((ce->ce_flags & CE_EXTENDED_FLAGS) >> 16);
+		return ondisk2->name;
+	}
+	else {
+		return ondisk->name;
+	}
+}
+
+#define WRITE_BUFFER_SIZE 8192
+static unsigned char write_buffer[WRITE_BUFFER_SIZE];
+static unsigned long write_buffer_len;
+
+static int ce_write_flush(git_SHA_CTX *context, int fd)
+{
+	unsigned int buffered = write_buffer_len;
+	if (buffered) {
+		git_SHA1_Update(context, write_buffer, buffered);
+		if (write_in_full(fd, write_buffer, buffered) != buffered)
+			return -1;
+		write_buffer_len = 0;
+	}
+	return 0;
+}
+
+static int ce_write_v2(git_SHA_CTX *context, int fd, void *data, unsigned int len)
+{
+	while (len) {
+		unsigned int buffered = write_buffer_len;
+		unsigned int partial = WRITE_BUFFER_SIZE - buffered;
+		if (partial > len)
+			partial = len;
+		memcpy(write_buffer + buffered, data, partial);
+		buffered += partial;
+		if (buffered == WRITE_BUFFER_SIZE) {
+			write_buffer_len = buffered;
+			if (ce_write_flush(context, fd))
+				return -1;
+			buffered = 0;
+		}
+		write_buffer_len = buffered;
+		len -= partial;
+		data = (char *) data + partial;
+	}
+	return 0;
+}
+
+static int ce_write_entry_v2(git_SHA_CTX *c, int fd, struct cache_entry *ce,
+			  struct strbuf *previous_name)
+{
+	int size;
+	struct ondisk_cache_entry *ondisk;
+	char *name;
+	int result;
+
+	if (!previous_name) {
+		size = ondisk_ce_size(ce);
+		ondisk = xcalloc(1, size);
+		name = copy_cache_entry_to_ondisk(ondisk, ce);
+		memcpy(name, ce->name, ce_namelen(ce));
+	} else {
+		int common, to_remove, prefix_size;
+		unsigned char to_remove_vi[16];
+		for (common = 0;
+		     (ce->name[common] &&
+		      common < previous_name->len &&
+		      ce->name[common] == previous_name->buf[common]);
+		     common++)
+			; /* still matching */
+		to_remove = previous_name->len - common;
+		prefix_size = encode_varint(to_remove, to_remove_vi);
+
+		if (ce->ce_flags & CE_EXTENDED)
+			size = offsetof(struct ondisk_cache_entry_extended, name);
+		else
+			size = offsetof(struct ondisk_cache_entry, name);
+		size += prefix_size + (ce_namelen(ce) - common + 1);
+
+		ondisk = xcalloc(1, size);
+		name = copy_cache_entry_to_ondisk(ondisk, ce);
+		memcpy(name, to_remove_vi, prefix_size);
+		memcpy(name + prefix_size, ce->name + common, ce_namelen(ce) - common);
+
+		strbuf_splice(previous_name, common, to_remove,
+			      ce->name + common, ce_namelen(ce) - common);
+	}
+
+	result = ce_write_v2(c, fd, ondisk, size);
+	free(ondisk);
+	return result;
+}
+
+static int write_index_ext_header_v2(git_SHA_CTX *context, int fd,
+				  unsigned int ext, unsigned int sz)
+{
+	ext = htonl(ext);
+	sz = htonl(sz);
+	return ((ce_write_v2(context, fd, &ext, 4) < 0) ||
+		(ce_write_v2(context, fd, &sz, 4) < 0)) ? -1 : 0;
+}
+
+static int ce_flush(git_SHA_CTX *context, int fd)
+{
+	unsigned int left = write_buffer_len;
+
+	if (left) {
+		write_buffer_len = 0;
+		git_SHA1_Update(context, write_buffer, left);
+	}
+
+	/* Flush first if not enough space for SHA1 signature */
+	if (left + 20 > WRITE_BUFFER_SIZE) {
+		if (write_in_full(fd, write_buffer, left) != left)
+			return -1;
+		left = 0;
+	}
+
+	/* Append the SHA1 signature at the end */
+	git_SHA1_Final(write_buffer + left, context);
+	left += 20;
+	return (write_in_full(fd, write_buffer, left) != left) ? -1 : 0;
+}
+
+static int write_index_v2(struct index_state *istate, int newfd)
+{
+	git_SHA_CTX c;
+	struct cache_version_header hdr;
+	struct cache_header_v2 hdr_v2;
+	int i, err, removed, extended, hdr_version;
+	struct cache_entry **cache = istate->cache;
+	int entries = istate->cache_nr;
+	struct stat st;
+	struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
+
+	for (i = removed = extended = 0; i < entries; i++) {
+		if (cache[i]->ce_flags & CE_REMOVE)
+			removed++;
+
+		/* reduce extended entries if possible */
+		cache[i]->ce_flags &= ~CE_EXTENDED;
+		if (cache[i]->ce_flags & CE_EXTENDED_FLAGS) {
+			extended++;
+			cache[i]->ce_flags |= CE_EXTENDED;
+		}
+	}
+
+	/* demote version 3 to version 2 when the latter suffices */
+	if (istate->version == 3 || istate->version == 2)
+		istate->version = extended ? 3 : 2;
+
+	hdr_version = istate->version;
+
+	hdr.hdr_signature = htonl(CACHE_SIGNATURE);
+	hdr.hdr_version = htonl(hdr_version);
+	hdr_v2.hdr_entries = htonl(entries - removed);
+
+	git_SHA1_Init(&c);
+	if (ce_write_v2(&c, newfd, &hdr, sizeof(hdr)) < 0)
+		return -1;
+	if (ce_write_v2(&c, newfd, &hdr_v2, sizeof(hdr_v2)) < 0)
+		return -1;
+
+	previous_name = (hdr_version == 4) ? &previous_name_buf : NULL;
+	for (i = 0; i < entries; i++) {
+		struct cache_entry *ce = cache[i];
+		if (ce->ce_flags & CE_REMOVE)
+			continue;
+		if (!ce_uptodate(ce) && is_racy_timestamp(istate, ce))
+			ce_smudge_racily_clean_entry(istate, ce);
+		if (ce_write_entry_v2(&c, newfd, ce, previous_name) < 0)
+			return -1;
+	}
+	strbuf_release(&previous_name_buf);
+
+	/* Write extension data here */
+	if (istate->cache_tree) {
+		struct strbuf sb = STRBUF_INIT;
+
+		cache_tree_write(&sb, istate->cache_tree);
+		err = write_index_ext_header_v2(&c, newfd, CACHE_EXT_TREE, sb.len) < 0
+			|| ce_write_v2(&c, newfd, sb.buf, sb.len) < 0;
+		strbuf_release(&sb);
+		if (err)
+			return -1;
+	}
+	if (istate->resolve_undo) {
+		struct strbuf sb = STRBUF_INIT;
+
+		resolve_undo_write(&sb, istate->resolve_undo);
+		err = write_index_ext_header_v2(&c, newfd, CACHE_EXT_RESOLVE_UNDO,
+					     sb.len) < 0
+			|| ce_write_v2(&c, newfd, sb.buf, sb.len) < 0;
+		strbuf_release(&sb);
+		if (err)
+			return -1;
+	}
+
+	if (ce_flush(&c, newfd) || fstat(newfd, &st))
+		return -1;
+	istate->timestamp.sec = (unsigned int)st.st_mtime;
+	istate->timestamp.nsec = ST_MTIME_NSEC(st);
+	return 0;
+}
+
+struct index_ops v2_ops = {
+	match_stat_basic,
+	verify_hdr,
+	read_index_v2,
+	write_index_v2
+};
diff --git a/read-cache.c b/read-cache.c
index 2f8159f..215c91f 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -5,6 +5,7 @@
  */
 #define NO_THE_INDEX_COMPATIBILITY_MACROS
 #include "cache.h"
+#include "read-cache.h"
 #include "cache-tree.h"
 #include "refs.h"
 #include "dir.h"
@@ -13,7 +14,6 @@
 #include "blob.h"
 #include "resolve-undo.h"
 #include "strbuf.h"
-#include "varint.h"
 
 static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int really);
 
@@ -21,22 +21,10 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int reall
 
 #define CE_NAMEMASK  (0x0fff)
 
-/* Index extensions.
- *
- * The first letter should be 'A'..'Z' for extensions that are not
- * necessary for a correct operation (i.e. optimization data).
- * When new extensions are added that _needs_ to be understood in
- * order to correctly interpret the index file, pick character that
- * is outside the range, to cause the reader to abort.
- */
-
-#define CACHE_EXT(s) ( (s[0]<<24)|(s[1]<<16)|(s[2]<<8)|(s[3]) )
-#define CACHE_EXT_TREE 0x54524545	/* "TREE" */
-#define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* "REUC" */
 
 struct index_state the_index;
 
-static void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce)
+void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce)
 {
 	istate->cache[nr] = ce;
 	add_name_hash(istate, ce);
@@ -143,7 +131,7 @@ static int ce_compare_gitlink(struct cache_entry *ce)
 	return hashcmp(sha1, ce->sha1);
 }
 
-static int ce_modified_check_fs(struct cache_entry *ce, struct stat *st)
+int ce_modified_check_fs(struct cache_entry *ce, struct stat *st)
 {
 	switch (st->st_mode & S_IFMT) {
 	case S_IFREG:
@@ -163,7 +151,8 @@ static int ce_modified_check_fs(struct cache_entry *ce, struct stat *st)
 	return 0;
 }
 
-static int ce_match_stat_basic(struct cache_entry *ce, struct stat *st)
+int ce_match_stat_basic(const struct index_state *istate,
+			struct cache_entry *ce, struct stat *st)
 {
 	unsigned int changed = 0;
 
@@ -195,47 +184,11 @@ static int ce_match_stat_basic(struct cache_entry *ce, struct stat *st)
 	default:
 		die("internal error: ce_mode is %o", ce->ce_mode);
 	}
-	if (ce->ce_mtime.sec != (unsigned int)st->st_mtime)
-		changed |= MTIME_CHANGED;
-	if (trust_ctime && ce->ce_ctime.sec != (unsigned int)st->st_ctime)
-		changed |= CTIME_CHANGED;
-
-#ifdef USE_NSEC
-	if (ce->ce_mtime.nsec != ST_MTIME_NSEC(*st))
-		changed |= MTIME_CHANGED;
-	if (trust_ctime && ce->ce_ctime.nsec != ST_CTIME_NSEC(*st))
-		changed |= CTIME_CHANGED;
-#endif
-
-	if (ce->ce_uid != (unsigned int) st->st_uid ||
-	    ce->ce_gid != (unsigned int) st->st_gid)
-		changed |= OWNER_CHANGED;
-	if (ce->ce_ino != (unsigned int) st->st_ino)
-		changed |= INODE_CHANGED;
-
-#ifdef USE_STDEV
-	/*
-	 * st_dev breaks on network filesystems where different
-	 * clients will have different views of what "device"
-	 * the filesystem is on
-	 */
-	if (ce->ce_dev != (unsigned int) st->st_dev)
-		changed |= INODE_CHANGED;
-#endif
-
-	if (ce->ce_size != (unsigned int) st->st_size)
-		changed |= DATA_CHANGED;
-
-	/* Racily smudged entry? */
-	if (!ce->ce_size) {
-		if (!is_empty_blob_sha1(ce->sha1))
-			changed |= DATA_CHANGED;
-	}
 
-	return changed;
+	return istate->ops->match_stat_basic(ce, st, changed);
 }
 
-static int is_racy_timestamp(const struct index_state *istate, struct cache_entry *ce)
+int is_racy_timestamp(const struct index_state *istate, struct cache_entry *ce)
 {
 	return (!S_ISGITLINK(ce->ce_mode) &&
 		istate->timestamp.sec &&
@@ -278,7 +231,7 @@ int ie_match_stat(const struct index_state *istate,
 	if (ce->ce_flags & CE_INTENT_TO_ADD)
 		return DATA_CHANGED | TYPE_CHANGED | MODE_CHANGED;
 
-	changed = ce_match_stat_basic(ce, st);
+	changed = ce_match_stat_basic(istate, ce, st);
 
 	/*
 	 * Within 1 second of this sequence:
@@ -1197,92 +1150,22 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int reall
 
 #define INDEX_FORMAT_DEFAULT 3
 
-/*
- * dev/ino/uid/gid/size are also just tracked to the low 32 bits
- * Again - this is just a (very strong in practice) heuristic that
- * the inode hasn't changed.
- *
- * We save the fields in big-endian order to allow using the
- * index file over NFS transparently.
- */
-struct ondisk_cache_entry {
-	struct cache_time ctime;
-	struct cache_time mtime;
-	unsigned int dev;
-	unsigned int ino;
-	unsigned int mode;
-	unsigned int uid;
-	unsigned int gid;
-	unsigned int size;
-	unsigned char sha1[20];
-	unsigned short flags;
-	char name[FLEX_ARRAY]; /* more */
-};
 
-/*
- * This struct is used when CE_EXTENDED bit is 1
- * The struct must match ondisk_cache_entry exactly from
- * ctime till flags
- */
-struct ondisk_cache_entry_extended {
-	struct cache_time ctime;
-	struct cache_time mtime;
-	unsigned int dev;
-	unsigned int ino;
-	unsigned int mode;
-	unsigned int uid;
-	unsigned int gid;
-	unsigned int size;
-	unsigned char sha1[20];
-	unsigned short flags;
-	unsigned short flags2;
-	char name[FLEX_ARRAY]; /* more */
-};
-
-/* These are only used for v3 or lower */
-#define align_flex_name(STRUCT,len) ((offsetof(struct STRUCT,name) + (len) + 8) & ~7)
-#define ondisk_cache_entry_size(len) align_flex_name(ondisk_cache_entry,len)
-#define ondisk_cache_entry_extended_size(len) align_flex_name(ondisk_cache_entry_extended,len)
-#define ondisk_ce_size(ce) (((ce)->ce_flags & CE_EXTENDED) ? \
-			    ondisk_cache_entry_extended_size(ce_namelen(ce)) : \
-			    ondisk_cache_entry_size(ce_namelen(ce)))
-
-static int verify_hdr(struct cache_header *hdr, unsigned long size)
+
+static int verify_hdr_version(struct index_state *istate,
+			      struct cache_version_header *hdr, unsigned long size)
 {
-	git_SHA_CTX c;
-	unsigned char sha1[20];
 	int hdr_version;
 
 	if (hdr->hdr_signature != htonl(CACHE_SIGNATURE))
 		return error("bad signature");
 	hdr_version = ntohl(hdr->hdr_version);
-	if (hdr_version < 2 || 4 < hdr_version)
+	if (2 <= hdr_version && hdr_version < 5)
+		istate->ops = &v2_ops;
+	else if (hdr_version == 5)
+		istate->ops = &v5_ops;
+	else
 		return error("bad index version %d", hdr_version);
-	git_SHA1_Init(&c);
-	git_SHA1_Update(&c, hdr, size - 20);
-	git_SHA1_Final(sha1, &c);
-	if (hashcmp(sha1, (unsigned char *)hdr + size - 20))
-		return error("bad index file sha1 signature");
-	return 0;
-}
-
-static int read_index_extension(struct index_state *istate,
-				const char *ext, void *data, unsigned long sz)
-{
-	switch (CACHE_EXT(ext)) {
-	case CACHE_EXT_TREE:
-		istate->cache_tree = cache_tree_read(data, sz);
-		break;
-	case CACHE_EXT_RESOLVE_UNDO:
-		istate->resolve_undo = resolve_undo_read(data, sz);
-		break;
-	default:
-		if (*ext < 'A' || 'Z' < *ext)
-			return error("index uses %.4s extension, which we do not understand",
-				     ext);
-		fprintf(stderr, "ignoring %.4s extension\n", ext);
-		break;
-	}
 	return 0;
 }
 
@@ -1291,134 +1174,18 @@ int read_index(struct index_state *istate)
 	return read_index_from(istate, get_index_file());
 }
 
-#ifndef NEEDS_ALIGNED_ACCESS
-#define ntoh_s(var) ntohs(var)
-#define ntoh_l(var) ntohl(var)
-#else
-static inline uint16_t ntoh_s_force_align(void *p)
-{
-	uint16_t x;
-	memcpy(&x, p, sizeof(x));
-	return ntohs(x);
-}
-static inline uint32_t ntoh_l_force_align(void *p)
-{
-	uint32_t x;
-	memcpy(&x, p, sizeof(x));
-	return ntohl(x);
-}
-#define ntoh_s(var) ntoh_s_force_align(&(var))
-#define ntoh_l(var) ntoh_l_force_align(&(var))
-#endif
-
-static struct cache_entry *cache_entry_from_ondisk(struct ondisk_cache_entry *ondisk,
-						   unsigned int flags,
-						   const char *name,
-						   size_t len)
-{
-	struct cache_entry *ce = xmalloc(cache_entry_size(len));
-
-	ce->ce_ctime.sec = ntoh_l(ondisk->ctime.sec);
-	ce->ce_mtime.sec = ntoh_l(ondisk->mtime.sec);
-	ce->ce_ctime.nsec = ntoh_l(ondisk->ctime.nsec);
-	ce->ce_mtime.nsec = ntoh_l(ondisk->mtime.nsec);
-	ce->ce_dev   = ntoh_l(ondisk->dev);
-	ce->ce_ino   = ntoh_l(ondisk->ino);
-	ce->ce_mode  = ntoh_l(ondisk->mode);
-	ce->ce_uid   = ntoh_l(ondisk->uid);
-	ce->ce_gid   = ntoh_l(ondisk->gid);
-	ce->ce_size  = ntoh_l(ondisk->size);
-	ce->ce_flags = flags & ~CE_NAMEMASK;
-	ce->ce_namelen = len;
-	hashcpy(ce->sha1, ondisk->sha1);
-	memcpy(ce->name, name, len);
-	ce->name[len] = '\0';
-	return ce;
-}
-
-/*
- * Adjacent cache entries tend to share the leading paths, so it makes
- * sense to only store the differences in later entries.  In the v4
- * on-disk format of the index, each on-disk cache entry stores the
- * number of bytes to be stripped from the end of the previous name,
- * and the bytes to append to the result, to come up with its name.
- */
-static unsigned long expand_name_field(struct strbuf *name, const char *cp_)
-{
-	const unsigned char *ep, *cp = (const unsigned char *)cp_;
-	size_t len = decode_varint(&cp);
-
-	if (name->len < len)
-		die("malformed name field in the index");
-	strbuf_remove(name, name->len - len, len);
-	for (ep = cp; *ep; ep++)
-		; /* find the end */
-	strbuf_add(name, cp, ep - cp);
-	return (const char *)ep + 1 - cp_;
-}
-
-static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk,
-					    unsigned long *ent_size,
-					    struct strbuf *previous_name)
-{
-	struct cache_entry *ce;
-	size_t len;
-	const char *name;
-	unsigned int flags;
-
-	/* On-disk flags are just 16 bits */
-	flags = ntoh_s(ondisk->flags);
-	len = flags & CE_NAMEMASK;
-
-	if (flags & CE_EXTENDED) {
-		struct ondisk_cache_entry_extended *ondisk2;
-		int extended_flags;
-		ondisk2 = (struct ondisk_cache_entry_extended *)ondisk;
-		extended_flags = ntoh_s(ondisk2->flags2) << 16;
-		/* We do not yet understand any bit out of CE_EXTENDED_FLAGS */
-		if (extended_flags & ~CE_EXTENDED_FLAGS)
-			die("Unknown index entry format %08x", extended_flags);
-		flags |= extended_flags;
-		name = ondisk2->name;
-	}
-	else
-		name = ondisk->name;
-
-	if (!previous_name) {
-		/* v3 and earlier */
-		if (len == CE_NAMEMASK)
-			len = strlen(name);
-		ce = cache_entry_from_ondisk(ondisk, flags, name, len);
-
-		*ent_size = ondisk_ce_size(ce);
-	} else {
-		unsigned long consumed;
-		consumed = expand_name_field(previous_name, name);
-		ce = cache_entry_from_ondisk(ondisk, flags,
-					     previous_name->buf,
-					     previous_name->len);
-
-		*ent_size = (name - ((char *)ondisk)) + consumed;
-	}
-	return ce;
-}
-
 /* remember to discard_cache() before reading a different cache! */
 int read_index_from(struct index_state *istate, const char *path)
 {
-	int fd, i;
+	int fd;
 	struct stat st;
-	unsigned long src_offset;
-	struct cache_header *hdr;
+	struct cache_version_header *hdr;
 	void *mmap;
 	size_t mmap_size;
-	struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
 
-	errno = EBUSY;
 	if (istate->initialized)
 		return istate->cache_nr;
 
-	errno = ENOENT;
 	istate->timestamp.sec = 0;
 	istate->timestamp.nsec = 0;
 	fd = open(path, O_RDONLY);
@@ -1431,71 +1198,31 @@ int read_index_from(struct index_state *istate, const char *path)
 	if (fstat(fd, &st))
 		die_errno("cannot stat the open index");
 
-	errno = EINVAL;
 	mmap_size = xsize_t(st.st_size);
-	if (mmap_size < sizeof(struct cache_header) + 20)
+	if (mmap_size < sizeof(struct cache_version_header) + 20)
 		die("index file smaller than expected");
 
 	mmap = xmmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
-	close(fd);
 	if (mmap == MAP_FAILED)
 		die_errno("unable to map index file");
 
 	hdr = mmap;
-	if (verify_hdr(hdr, mmap_size) < 0)
+	if (verify_hdr_version(istate, hdr, mmap_size) < 0)
 		goto unmap;
 
-	istate->version = ntohl(hdr->hdr_version);
-	istate->cache_nr = ntohl(hdr->hdr_entries);
-	istate->cache_alloc = alloc_nr(istate->cache_nr);
-	istate->cache = xcalloc(istate->cache_alloc, sizeof(struct cache_entry *));
-	istate->initialized = 1;
-
-	if (istate->version == 4)
-		previous_name = &previous_name_buf;
-	else
-		previous_name = NULL;
-
-	src_offset = sizeof(*hdr);
-	for (i = 0; i < istate->cache_nr; i++) {
-		struct ondisk_cache_entry *disk_ce;
-		struct cache_entry *ce;
-		unsigned long consumed;
-
-		disk_ce = (struct ondisk_cache_entry *)((char *)mmap + src_offset);
-		ce = create_from_disk(disk_ce, &consumed, previous_name);
-		set_index_entry(istate, i, ce);
+	if (istate->ops->verify_hdr(hdr, mmap_size) < 0)
+		goto unmap;
 
-		src_offset += consumed;
-	}
-	strbuf_release(&previous_name_buf);
+	istate->ops->read_index(istate, mmap, mmap_size, fd);
+	close(fd);
 	istate->timestamp.sec = st.st_mtime;
 	istate->timestamp.nsec = ST_MTIME_NSEC(st);
 
-	while (src_offset <= mmap_size - 20 - 8) {
-		/* After an array of active_nr index entries,
-		 * there can be arbitrary number of extended
-		 * sections, each of which is prefixed with
-		 * extension name (4-byte) and section length
-		 * in 4-byte network byte order.
-		 */
-		uint32_t extsize;
-		memcpy(&extsize, (char *)mmap + src_offset + 4, 4);
-		extsize = ntohl(extsize);
-		if (read_index_extension(istate,
-					 (const char *) mmap + src_offset,
-					 (char *) mmap + src_offset + 8,
-					 extsize) < 0)
-			goto unmap;
-		src_offset += 8;
-		src_offset += extsize;
-	}
 	munmap(mmap, mmap_size);
 	return istate->cache_nr;
 
 unmap:
 	munmap(mmap, mmap_size);
-	errno = EINVAL;
 	die("index file corrupt");
 }
 
@@ -1534,201 +1261,6 @@ int unmerged_index(const struct index_state *istate)
 	return 0;
 }
 
-#define WRITE_BUFFER_SIZE 8192
-static unsigned char write_buffer[WRITE_BUFFER_SIZE];
-static unsigned long write_buffer_len;
-
-static int ce_write_flush(git_SHA_CTX *context, int fd)
-{
-	unsigned int buffered = write_buffer_len;
-	if (buffered) {
-		git_SHA1_Update(context, write_buffer, buffered);
-		if (write_in_full(fd, write_buffer, buffered) != buffered)
-			return -1;
-		write_buffer_len = 0;
-	}
-	return 0;
-}
-
-static int ce_write(git_SHA_CTX *context, int fd, void *data, unsigned int len)
-{
-	while (len) {
-		unsigned int buffered = write_buffer_len;
-		unsigned int partial = WRITE_BUFFER_SIZE - buffered;
-		if (partial > len)
-			partial = len;
-		memcpy(write_buffer + buffered, data, partial);
-		buffered += partial;
-		if (buffered == WRITE_BUFFER_SIZE) {
-			write_buffer_len = buffered;
-			if (ce_write_flush(context, fd))
-				return -1;
-			buffered = 0;
-		}
-		write_buffer_len = buffered;
-		len -= partial;
-		data = (char *) data + partial;
-	}
-	return 0;
-}
-
-static int write_index_ext_header(git_SHA_CTX *context, int fd,
-				  unsigned int ext, unsigned int sz)
-{
-	ext = htonl(ext);
-	sz = htonl(sz);
-	return ((ce_write(context, fd, &ext, 4) < 0) ||
-		(ce_write(context, fd, &sz, 4) < 0)) ? -1 : 0;
-}
-
-static int ce_flush(git_SHA_CTX *context, int fd)
-{
-	unsigned int left = write_buffer_len;
-
-	if (left) {
-		write_buffer_len = 0;
-		git_SHA1_Update(context, write_buffer, left);
-	}
-
-	/* Flush first if not enough space for SHA1 signature */
-	if (left + 20 > WRITE_BUFFER_SIZE) {
-		if (write_in_full(fd, write_buffer, left) != left)
-			return -1;
-		left = 0;
-	}
-
-	/* Append the SHA1 signature at the end */
-	git_SHA1_Final(write_buffer + left, context);
-	left += 20;
-	return (write_in_full(fd, write_buffer, left) != left) ? -1 : 0;
-}
-
-static void ce_smudge_racily_clean_entry(struct cache_entry *ce)
-{
-	/*
-	 * The only thing we care about in this function is to smudge the
-	 * falsely clean entry due to touch-update-touch race, so we leave
-	 * everything else as they are.  We are called for entries whose
-	 * ce_mtime match the index file mtime.
-	 *
-	 * Note that this actually does not do much for gitlinks, for
-	 * which ce_match_stat_basic() always goes to the actual
-	 * contents.  The caller checks with is_racy_timestamp() which
-	 * always says "no" for gitlinks, so we are not called for them ;-)
-	 */
-	struct stat st;
-
-	if (lstat(ce->name, &st) < 0)
-		return;
-	if (ce_match_stat_basic(ce, &st))
-		return;
-	if (ce_modified_check_fs(ce, &st)) {
-		/* This is "racily clean"; smudge it.  Note that this
-		 * is a tricky code.  At first glance, it may appear
-		 * that it can break with this sequence:
-		 *
-		 * $ echo xyzzy >frotz
-		 * $ git-update-index --add frotz
-		 * $ : >frotz
-		 * $ sleep 3
-		 * $ echo filfre >nitfol
-		 * $ git-update-index --add nitfol
-		 *
-		 * but it does not.  When the second update-index runs,
-		 * it notices that the entry "frotz" has the same timestamp
-		 * as index, and if we were to smudge it by resetting its
-		 * size to zero here, then the object name recorded
-		 * in index is the 6-byte file but the cached stat information
-		 * becomes zero --- which would then match what we would
-		 * obtain from the filesystem next time we stat("frotz").
-		 *
-		 * However, the second update-index, before calling
-		 * this function, notices that the cached size is 6
-		 * bytes and what is on the filesystem is an empty
-		 * file, and never calls us, so the cached size information
-		 * for "frotz" stays 6 which does not match the filesystem.
-		 */
-		ce->ce_size = 0;
-	}
-}
-
-/* Copy miscellaneous fields but not the name */
-static char *copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk,
-				       struct cache_entry *ce)
-{
-	short flags;
-
-	ondisk->ctime.sec = htonl(ce->ce_ctime.sec);
-	ondisk->mtime.sec = htonl(ce->ce_mtime.sec);
-	ondisk->ctime.nsec = htonl(ce->ce_ctime.nsec);
-	ondisk->mtime.nsec = htonl(ce->ce_mtime.nsec);
-	ondisk->dev  = htonl(ce->ce_dev);
-	ondisk->ino  = htonl(ce->ce_ino);
-	ondisk->mode = htonl(ce->ce_mode);
-	ondisk->uid  = htonl(ce->ce_uid);
-	ondisk->gid  = htonl(ce->ce_gid);
-	ondisk->size = htonl(ce->ce_size);
-	hashcpy(ondisk->sha1, ce->sha1);
-
-	flags = ce->ce_flags;
-	flags |= (ce_namelen(ce) >= CE_NAMEMASK ? CE_NAMEMASK : ce_namelen(ce));
-	ondisk->flags = htons(flags);
-	if (ce->ce_flags & CE_EXTENDED) {
-		struct ondisk_cache_entry_extended *ondisk2;
-		ondisk2 = (struct ondisk_cache_entry_extended *)ondisk;
-		ondisk2->flags2 = htons((ce->ce_flags & CE_EXTENDED_FLAGS) >> 16);
-		return ondisk2->name;
-	}
-	else {
-		return ondisk->name;
-	}
-}
-
-static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce,
-			  struct strbuf *previous_name)
-{
-	int size;
-	struct ondisk_cache_entry *ondisk;
-	char *name;
-	int result;
-
-	if (!previous_name) {
-		size = ondisk_ce_size(ce);
-		ondisk = xcalloc(1, size);
-		name = copy_cache_entry_to_ondisk(ondisk, ce);
-		memcpy(name, ce->name, ce_namelen(ce));
-	} else {
-		int common, to_remove, prefix_size;
-		unsigned char to_remove_vi[16];
-		for (common = 0;
-		     (ce->name[common] &&
-		      common < previous_name->len &&
-		      ce->name[common] == previous_name->buf[common]);
-		     common++)
-			; /* still matching */
-		to_remove = previous_name->len - common;
-		prefix_size = encode_varint(to_remove, to_remove_vi);
-
-		if (ce->ce_flags & CE_EXTENDED)
-			size = offsetof(struct ondisk_cache_entry_extended, name);
-		else
-			size = offsetof(struct ondisk_cache_entry, name);
-		size += prefix_size + (ce_namelen(ce) - common + 1);
-
-		ondisk = xcalloc(1, size);
-		name = copy_cache_entry_to_ondisk(ondisk, ce);
-		memcpy(name, to_remove_vi, prefix_size);
-		memcpy(name + prefix_size, ce->name + common, ce_namelen(ce) - common);
-
-		strbuf_splice(previous_name, common, to_remove,
-			      ce->name + common, ce_namelen(ce) - common);
-	}
-
-	result = ce_write(c, fd, ondisk, size);
-	free(ondisk);
-	return result;
-}
-
 static int has_racy_timestamp(struct index_state *istate)
 {
 	int entries = istate->cache_nr;
@@ -1756,83 +1288,10 @@ void update_index_if_able(struct index_state *istate, struct lock_file *lockfile
 
 int write_index(struct index_state *istate, int newfd)
 {
-	git_SHA_CTX c;
-	struct cache_header hdr;
-	int i, err, removed, extended, hdr_version;
-	struct cache_entry **cache = istate->cache;
-	int entries = istate->cache_nr;
-	struct stat st;
-	struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
-
-	for (i = removed = extended = 0; i < entries; i++) {
-		if (cache[i]->ce_flags & CE_REMOVE)
-			removed++;
-
-		/* reduce extended entries if possible */
-		cache[i]->ce_flags &= ~CE_EXTENDED;
-		if (cache[i]->ce_flags & CE_EXTENDED_FLAGS) {
-			extended++;
-			cache[i]->ce_flags |= CE_EXTENDED;
-		}
-	}
-
 	if (!istate->version)
 		istate->version = INDEX_FORMAT_DEFAULT;
 
-	/* demote version 3 to version 2 when the latter suffices */
-	if (istate->version == 3 || istate->version == 2)
-		istate->version = extended ? 3 : 2;
-
-	hdr_version = istate->version;
-
-	hdr.hdr_signature = htonl(CACHE_SIGNATURE);
-	hdr.hdr_version = htonl(hdr_version);
-	hdr.hdr_entries = htonl(entries - removed);
-
-	git_SHA1_Init(&c);
-	if (ce_write(&c, newfd, &hdr, sizeof(hdr)) < 0)
-		return -1;
-
-	previous_name = (hdr_version == 4) ? &previous_name_buf : NULL;
-	for (i = 0; i < entries; i++) {
-		struct cache_entry *ce = cache[i];
-		if (ce->ce_flags & CE_REMOVE)
-			continue;
-		if (!ce_uptodate(ce) && is_racy_timestamp(istate, ce))
-			ce_smudge_racily_clean_entry(ce);
-		if (ce_write_entry(&c, newfd, ce, previous_name) < 0)
-			return -1;
-	}
-	strbuf_release(&previous_name_buf);
-
-	/* Write extension data here */
-	if (istate->cache_tree) {
-		struct strbuf sb = STRBUF_INIT;
-
-		cache_tree_write(&sb, istate->cache_tree);
-		err = write_index_ext_header(&c, newfd, CACHE_EXT_TREE, sb.len) < 0
-			|| ce_write(&c, newfd, sb.buf, sb.len) < 0;
-		strbuf_release(&sb);
-		if (err)
-			return -1;
-	}
-	if (istate->resolve_undo) {
-		struct strbuf sb = STRBUF_INIT;
-
-		resolve_undo_write(&sb, istate->resolve_undo);
-		err = write_index_ext_header(&c, newfd, CACHE_EXT_RESOLVE_UNDO,
-					     sb.len) < 0
-			|| ce_write(&c, newfd, sb.buf, sb.len) < 0;
-		strbuf_release(&sb);
-		if (err)
-			return -1;
-	}
-
-	if (ce_flush(&c, newfd) || fstat(newfd, &st))
-		return -1;
-	istate->timestamp.sec = (unsigned int)st.st_mtime;
-	istate->timestamp.nsec = ST_MTIME_NSEC(st);
-	return 0;
+	return istate->ops->write_index(istate, newfd);
 }
 
 /*
diff --git a/read-cache.h b/read-cache.h
new file mode 100644
index 0000000..284ab30
--- /dev/null
+++ b/read-cache.h
@@ -0,0 +1,54 @@
+/* Index extensions.
+ *
+ * The first letter should be 'A'..'Z' for extensions that are not
+ * necessary for a correct operation (i.e. optimization data).
+ * When new extensions are added that _needs_ to be understood in
+ * order to correctly interpret the index file, pick character that
+ * is outside the range, to cause the reader to abort.
+ */
+
+#define CACHE_EXT(s) ( (s[0]<<24)|(s[1]<<16)|(s[2]<<8)|(s[3]) )
+#define CACHE_EXT_TREE 0x54524545	/* "TREE" */
+#define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* "REUC" */
+
+struct cache_version_header {
+	unsigned int hdr_signature;
+	unsigned int hdr_version;
+};
+
+struct index_ops {
+	int (*match_stat_basic)(struct cache_entry *ce, struct stat *st, int changed);
+	int (*verify_hdr)(struct cache_version_header *hdr, unsigned long size);
+	void (*read_index)(struct index_state *istate, void *mmap, int mmap_size, int fd);
+	int (*write_index)(struct index_state *istate, int newfd);
+};
+
+extern struct index_ops v5_ops;
+extern struct index_ops v2_ops;
+
+#ifndef NEEDS_ALIGNED_ACCESS
+#define ntoh_s(var) ntohs(var)
+#define ntoh_l(var) ntohl(var)
+#else
+static inline uint16_t ntoh_s_force_align(void *p)
+{
+	uint16_t x;
+	memcpy(&x, p, sizeof(x));
+	return ntohs(x);
+}
+static inline uint32_t ntoh_l_force_align(void *p)
+{
+	uint32_t x;
+	memcpy(&x, p, sizeof(x));
+	return ntohl(x);
+}
+#define ntoh_s(var) ntoh_s_force_align(&(var))
+#define ntoh_l(var) ntoh_l_force_align(&(var))
+#endif
+
+extern void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce);
+extern int ce_match_stat_basic(const struct index_state *istate,
+			       struct cache_entry *ce, struct stat *st);
+extern int ce_modified_check_fs(struct cache_entry *ce, struct stat *st);
+extern int is_racy_timestamp(const struct index_state *istate, struct cache_entry *ce);
+extern uint32_t calculate_stat_crc(struct cache_entry *ce);
-- 
1.7.8

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH 2/2] Add index-v5
  2012-08-06 14:35 ` [PATCH/RFC v2 0/16] Introduce index file format version 5 Nguyễn Thái Ngọc Duy
  2012-08-06 14:35   ` [PATCH 1/2] Move index v2 specific code out of read-cache Nguyễn Thái Ngọc Duy
@ 2012-08-06 14:36   ` Nguyễn Thái Ngọc Duy
  2012-08-07 21:52     ` Robin Rosenberg
  2012-08-06 15:51   ` [PATCH/RFC v2 0/16] Introduce index file format version 5 Junio C Hamano
  2012-08-06 17:46   ` Junio C Hamano
  3 siblings, 1 reply; 59+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-08-06 14:36 UTC (permalink / raw)
  To: Thomas Gummerer
  Cc: git, trast, mhagger, gitster, robin.rosenberg,
	Nguyễn Thái Ngọc Duy

---
 Makefile        |    1 +
 read-cache-v5.c | 1170 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 read-cache.c    |   27 ++
 3 files changed, 1198 insertions(+), 0 deletions(-)
 create mode 100644 read-cache-v5.c

diff --git a/Makefile b/Makefile
index b4a7c73..77be175 100644
--- a/Makefile
+++ b/Makefile
@@ -770,6 +770,7 @@ LIB_OBJS += quote.o
 LIB_OBJS += reachable.o
 LIB_OBJS += read-cache.o
 LIB_OBJS += read-cache-v2.o
+LIB_OBJS += read-cache-v5.o
 LIB_OBJS += reflog-walk.o
 LIB_OBJS += refs.o
 LIB_OBJS += remote.o
diff --git a/read-cache-v5.c b/read-cache-v5.c
new file mode 100644
index 0000000..a4a3746
--- /dev/null
+++ b/read-cache-v5.c
@@ -0,0 +1,1170 @@
+#include "cache.h"
+#include "read-cache.h"
+#include "resolve-undo.h"
+#include "cache-tree.h"
+
+struct cache_header_v5 {
+	unsigned int hdr_ndir;
+	unsigned int hdr_nfile;
+	unsigned int hdr_fblockoffset;
+	unsigned int hdr_nextension;
+};
+
+struct ondisk_cache_entry_v5 {
+	unsigned short flags;
+	unsigned short mode;
+	struct cache_time mtime;
+	int stat_crc;
+	unsigned char sha1[20];
+};
+
+struct ondisk_directory_entry {
+	unsigned int foffset;
+	unsigned int cr;
+	unsigned int ncr;
+	unsigned int nsubtrees;
+	unsigned int nfiles;
+	unsigned int nentries;
+	unsigned char sha1[20];
+	unsigned short flags;
+};
+
+static int match_stat_crc(struct stat *st, uint32_t expected_crc)
+{
+	uint32_t data, stat_crc = 0;
+	unsigned int ctimens = 0;
+
+	data = htonl(st->st_ctime);
+	stat_crc = crc32(0, (Bytef*)&data, 4);
+#ifdef USE_NSEC
+	ctimens = ST_MTIME_NSEC(*st);
+#endif
+	data = htonl(ctimens);
+	stat_crc = crc32(stat_crc, (Bytef*)&data, 4);
+	data = htonl(st->st_ino);
+	stat_crc = crc32(stat_crc, (Bytef*)&data, 4);
+	data = htonl(st->st_size);
+	stat_crc = crc32(stat_crc, (Bytef*)&data, 4);
+	data = htonl(st->st_dev);
+	stat_crc = crc32(stat_crc, (Bytef*)&data, 4);
+	data = htonl(st->st_uid);
+	stat_crc = crc32(stat_crc, (Bytef*)&data, 4);
+	data = htonl(st->st_gid);
+	stat_crc = crc32(stat_crc, (Bytef*)&data, 4);
+
+	return stat_crc == expected_crc;
+}
+
+static int match_stat_basic(struct cache_entry *ce,
+			    struct stat *st,
+			    int changed)
+{
+
+	if (ce->ce_mtime.sec != 0 && ce->ce_mtime.sec != (unsigned int)st->st_mtime)
+		changed |= MTIME_CHANGED;
+#ifdef USE_NSEC
+	if (ce->ce_mtime.nsec != 0 && ce->ce_mtime.nsec != ST_MTIME_NSEC(*st))
+		changed |= MTIME_CHANGED;
+#endif
+	if (!match_stat_crc(st, ce->ce_stat_crc)) {
+		changed |= OWNER_CHANGED;
+		changed |= INODE_CHANGED;
+	}
+	/* Racily smudged entry? */
+	if (!ce->ce_mtime.sec && !ce->ce_mtime.nsec) {
+		if (!changed && !is_empty_blob_sha1(ce->sha1) && ce_modified_check_fs(ce, st))
+			changed |= DATA_CHANGED;
+	}
+	return changed;
+}
+
+static int check_crc32(int initialcrc,
+			void *data,
+			size_t len,
+			unsigned int expected_crc)
+{
+	int crc;
+
+	crc = crc32(initialcrc, (Bytef*)data, len);
+	return crc == expected_crc;
+}
+
+static int verify_hdr(struct cache_version_header *hdr, unsigned long size)
+{
+	uint32_t *filecrc;
+	unsigned int header_size_v5;
+	struct cache_header_v5 *hdr_v5;
+	void *mmap = hdr;;
+
+	hdr = mmap;
+	hdr_v5 = mmap + sizeof(*hdr);
+	/* Size of the header + the size of the extensionoffsets */
+	header_size_v5 = sizeof(*hdr_v5) + hdr_v5->hdr_nextension * 4;
+	/* Initialize crc */
+	filecrc = mmap + sizeof(*hdr) + header_size_v5;
+	if (!check_crc32(0, hdr, sizeof(*hdr) + header_size_v5, ntohl(*filecrc)))
+		return error("bad index file header crc signature");
+	return 0;
+}
+
+static struct cache_entry *cache_entry_from_ondisk_v5(struct ondisk_cache_entry_v5 *ondisk,
+						   struct directory_entry *de,
+						   char *name,
+						   size_t len,
+						   size_t prefix_len)
+{
+	struct cache_entry *ce = xmalloc(cache_entry_size(len + de->de_pathlen));
+	int flags;
+
+	flags = ntoh_s(ondisk->flags);
+	ce->ce_ctime.sec  = 0;
+	ce->ce_mtime.sec  = ntoh_l(ondisk->mtime.sec);
+	ce->ce_ctime.nsec = 0;
+	ce->ce_mtime.nsec = ntoh_l(ondisk->mtime.nsec);
+	ce->ce_dev        = 0;
+	ce->ce_ino        = 0;
+	ce->ce_mode       = ntoh_s(ondisk->mode);
+	ce->ce_uid        = 0;
+	ce->ce_gid        = 0;
+	ce->ce_size       = 0;
+	ce->ce_flags      = flags & CE_STAGEMASK;
+	ce->ce_flags     |= flags & CE_VALID;
+	if (flags & CE_INTENT_TO_ADD_V5)
+		ce->ce_flags |= CE_INTENT_TO_ADD;
+	if (flags & CE_SKIP_WORKTREE_V5)
+		ce->ce_flags |= CE_SKIP_WORKTREE;
+	ce->ce_stat_crc   = ntoh_l(ondisk->stat_crc);
+	ce->ce_namelen    = len + de->de_pathlen;
+	hashcpy(ce->sha1, ondisk->sha1);
+	memcpy(ce->name, de->pathname, de->de_pathlen);
+	memcpy(ce->name + de->de_pathlen, name, len);
+	ce->name[len + de->de_pathlen] = '\0';
+	return ce;
+}
+
+static struct directory_entry *directory_entry_from_ondisk(struct ondisk_directory_entry *ondisk,
+						   const char *name,
+						   size_t len)
+{
+	struct directory_entry *de = xmalloc(directory_entry_size(len));
+
+
+	memcpy(de->pathname, name, len);
+	de->pathname[len] = '\0';
+	de->de_flags      = ntoh_s(ondisk->flags);
+	de->de_foffset    = ntoh_l(ondisk->foffset);
+	de->de_cr         = ntoh_l(ondisk->cr);
+	de->de_ncr        = ntoh_l(ondisk->ncr);
+	de->de_nsubtrees  = ntoh_l(ondisk->nsubtrees);
+	de->de_nfiles     = ntoh_l(ondisk->nfiles);
+	de->de_nentries   = ntoh_l(ondisk->nentries);
+	de->de_pathlen    = len;
+	hashcpy(de->sha1, ondisk->sha1);
+	return de;
+}
+
+static struct conflict_part *conflict_part_from_ondisk(struct ondisk_conflict_part *ondisk)
+{
+	struct conflict_part *cp = xmalloc(sizeof(struct conflict_part));
+
+	cp->flags      = ntoh_s(ondisk->flags);
+	cp->entry_mode = ntoh_s(ondisk->entry_mode);
+	hashcpy(cp->sha1, ondisk->sha1);
+	return cp;
+}
+
+static struct cache_entry *convert_conflict_part(struct conflict_part *cp,
+						char * name,
+						unsigned int len)
+{
+
+	struct cache_entry *ce = xmalloc(cache_entry_size(len));
+
+	ce->ce_ctime.sec  = 0;
+	ce->ce_mtime.sec  = 0;
+	ce->ce_ctime.nsec = 0;
+	ce->ce_mtime.nsec = 0;
+	ce->ce_dev        = 0;
+	ce->ce_ino        = 0;
+	ce->ce_mode       = cp->entry_mode;
+	ce->ce_uid        = 0;
+	ce->ce_gid        = 0;
+	ce->ce_size       = 0;
+	ce->ce_flags      = conflict_stage(cp) << CE_STAGESHIFT;
+	ce->ce_stat_crc   = 0;
+	ce->ce_namelen    = len;
+	hashcpy(ce->sha1, cp->sha1);
+	memcpy(ce->name, name, len);
+	ce->name[len] = '\0';
+	return ce;
+}
+
+static struct directory_entry *read_directories_v5(unsigned int *dir_offset,
+				unsigned int *dir_table_offset,
+				void *mmap,
+				int mmap_size)
+{
+	int i, ondisk_directory_size;
+	uint32_t *filecrc, *beginning, *end;
+	struct directory_entry *current = NULL;
+	struct ondisk_directory_entry *disk_de;
+	struct directory_entry *de;
+	unsigned int data_len, len;
+	char *name;
+
+	ondisk_directory_size = sizeof(disk_de->flags)
+		+ sizeof(disk_de->foffset)
+		+ sizeof(disk_de->cr)
+		+ sizeof(disk_de->ncr)
+		+ sizeof(disk_de->nsubtrees)
+		+ sizeof(disk_de->nfiles)
+		+ sizeof(disk_de->nentries)
+		+ sizeof(disk_de->sha1);
+	name = (char *)mmap + *dir_offset;
+	beginning = mmap + *dir_table_offset;
+	end = mmap + *dir_table_offset + 4;
+	len = ntoh_l(*end) - ntoh_l(*beginning) - ondisk_directory_size - 5;
+	disk_de = (struct ondisk_directory_entry *)
+			((char *)mmap + *dir_offset + len + 1);
+	de = directory_entry_from_ondisk(disk_de, name, len);
+	de->next = NULL;
+
+	/* Length of pathname + nul byte for termination + size of
+	 * members of ondisk_directory_entry. (Just using the size
+	 * of the stuct doesn't work, because there may be padding
+	 * bytes for the struct)
+	 */
+	data_len = len + 1 + ondisk_directory_size;
+
+	filecrc = mmap + *dir_offset + data_len;
+	if (!check_crc32(0, mmap + *dir_offset, data_len, ntoh_l(*filecrc)))
+		goto unmap;
+
+	*dir_table_offset += 4;
+	*dir_offset += data_len + 4; /* crc code */
+
+	current = de;
+	for (i = 0; i < de->de_nsubtrees; i++) {
+		current->next = read_directories_v5(dir_offset, dir_table_offset,
+						mmap, mmap_size);
+		while (current->next)
+			current = current->next;
+	}
+
+	return de;
+unmap:
+	munmap(mmap, mmap_size);
+	die("directory crc doesn't match for '%s'", de->pathname);
+}
+
+static struct cache_entry *read_entry_v5(struct directory_entry *de,
+			unsigned long *entry_offset,
+			void **mmap,
+			unsigned long mmap_size,
+			unsigned int *foffsetblock,
+			int fd)
+{
+	int len, crc_wrong, i = 0, offset_to_offset;
+	char *name;
+	uint32_t foffsetblockcrc;
+	uint32_t *filecrc, *beginning, *end;
+	struct cache_entry *ce;
+	struct ondisk_cache_entry_v5 *disk_ce;
+
+	do {
+		name = (char *)*mmap + *entry_offset;
+		beginning = *mmap + *foffsetblock;
+		end = *mmap + *foffsetblock + 4;
+		len = ntoh_l(*end) - ntoh_l(*beginning) - sizeof(struct ondisk_cache_entry_v5) - 5;
+		disk_ce = (struct ondisk_cache_entry_v5 *)
+				((char *)*mmap + *entry_offset + len + 1);
+		ce = cache_entry_from_ondisk_v5(disk_ce, de, name, len, de->de_pathlen);
+		filecrc = *mmap + *entry_offset + len + 1 + sizeof(*disk_ce);
+		offset_to_offset = htonl(*foffsetblock);
+		foffsetblockcrc = crc32(0, (Bytef*)&offset_to_offset, 4);
+		crc_wrong = !check_crc32(foffsetblockcrc,
+			*mmap + *entry_offset, len + 1 + sizeof(*disk_ce),
+			ntoh_l(*filecrc));
+		if (crc_wrong) {
+			/* wait for 10 milliseconds */
+			usleep(10*1000);
+			munmap(*mmap, mmap_size);
+			*mmap = xmmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+		}
+		i++;
+		/*
+		 * Retry for 500 ms maximum, before giving up and saying the
+		 * checksum is wrong.
+		 */
+	} while (crc_wrong && i < 50);
+	if (crc_wrong)
+		goto unmap;
+	*entry_offset += len + 1 + sizeof(*disk_ce) + 4;
+	return ce;
+unmap:
+	munmap(*mmap, mmap_size);
+	die("file crc doesn't match for '%s'", ce->name);
+}
+
+static void ce_queue_push(struct cache_entry **head,
+			     struct cache_entry **tail,
+			     struct cache_entry *ce)
+{
+	if (!*head) {
+		*head = *tail = ce;
+		(*tail)->next = NULL;
+		return;
+	}
+
+	(*tail)->next = ce;
+	ce->next = NULL;
+	*tail = (*tail)->next;
+}
+
+static void conflict_entry_push(struct conflict_entry **head,
+				struct conflict_entry **tail,
+				struct conflict_entry *conflict_entry)
+{
+	if (!*head) {
+		*head = *tail = conflict_entry;
+		(*tail)->next = NULL;
+		return;
+	}
+
+	(*tail)->next = conflict_entry;
+	conflict_entry->next = NULL;
+	*tail = (*tail)->next;
+}
+
+static struct cache_entry *ce_queue_pop(struct cache_entry **head)
+{
+	struct cache_entry *ce;
+
+	ce = *head;
+	*head = (*head)->next;
+	return ce;
+}
+
+static void conflict_part_head_remove(struct conflict_part **head)
+{
+	struct conflict_part *to_free;
+
+	to_free = *head;
+	*head = (*head)->next;
+	free(to_free);
+}
+
+static void conflict_entry_head_remove(struct conflict_entry **head)
+{
+	struct conflict_entry *to_free;
+
+	to_free = *head;
+	*head = (*head)->next;
+	free(to_free);
+}
+
+struct conflict_entry *create_new_conflict(char *name, int len, int pathlen)
+{
+	struct conflict_entry *conflict_entry;
+
+	if (pathlen)
+		pathlen++;
+	conflict_entry = xmalloc(conflict_entry_size(len));
+	conflict_entry->entries = NULL;
+	conflict_entry->nfileconflicts = 0;
+	conflict_entry->namelen = len;
+	memcpy(conflict_entry->name, name, len);
+	conflict_entry->name[len] = '\0';
+	conflict_entry->pathlen = pathlen;
+	conflict_entry->next = NULL;
+
+	return conflict_entry;
+}
+
+void add_part_to_conflict_entry(struct directory_entry *de,
+					struct conflict_entry *entry,
+					struct conflict_part *conflict_part)
+{
+
+	struct conflict_part *conflict_search;
+
+	entry->nfileconflicts++;
+	de->conflict_size += sizeof(struct ondisk_conflict_part);
+	if (!entry->entries)
+		entry->entries = conflict_part;
+	else {
+		conflict_search = entry->entries;
+		while (conflict_search->next)
+			conflict_search = conflict_search->next;
+		conflict_search->next = conflict_part;
+	}
+}
+
+static struct conflict_entry *read_conflicts_v5(struct directory_entry *de,
+						void **mmap,
+						unsigned long mmap_size,
+						int fd)
+{
+	struct conflict_entry *head, *tail;
+	unsigned int croffset, i, j = 0;
+	char *full_name;
+
+	croffset = de->de_cr;
+	tail = NULL;
+	head = NULL;
+	for (i = 0; i < de->de_ncr; i++) {
+		struct conflict_entry *conflict_new;
+		unsigned int len, *nfileconflicts;
+		char *name;
+		void *crc_start;
+		int k, offset, crc_wrong;
+		uint32_t *filecrc;
+
+		do {
+			offset = croffset;
+			crc_start = *mmap + offset;
+			name = (char *)*mmap + offset;
+			len = strlen(name);
+			offset += len + 1;
+			nfileconflicts = *mmap + offset;
+			offset += 4;
+
+			full_name = xmalloc(sizeof(char) * (len + de->de_pathlen));
+			memcpy(full_name, de->pathname, de->de_pathlen);
+			memcpy(full_name + de->de_pathlen, name, len);
+			conflict_new = create_new_conflict(full_name,
+					len + de->de_pathlen, de->de_pathlen);
+			for (k = 0; k < ntoh_l(*nfileconflicts); k++) {
+				struct ondisk_conflict_part *ondisk;
+				struct conflict_part *cp;
+
+				ondisk = *mmap + offset;
+				cp = conflict_part_from_ondisk(ondisk);
+				cp->next = NULL;
+				add_part_to_conflict_entry(de, conflict_new, cp);
+				offset += sizeof(struct ondisk_conflict_part);
+			}
+			filecrc = *mmap + offset;
+			crc_wrong = !check_crc32(0, crc_start,
+				len + 1 + 4 + conflict_new->nfileconflicts
+				* sizeof(struct ondisk_conflict_part),
+				ntoh_l(*filecrc));
+			if (crc_wrong) {
+				/* wait for 10 milliseconds */
+				usleep(10*1000);
+				munmap(*mmap, mmap_size);
+				*mmap = xmmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+			}
+			free(full_name);
+			j++;
+		} while (crc_wrong && j < 50);
+		if (crc_wrong)
+			goto unmap;
+		croffset = offset + 4;
+		conflict_entry_push(&head, &tail, conflict_new);
+	}
+	return head;
+unmap:
+	munmap(*mmap, mmap_size);
+	die("wrong crc for conflict: %s", full_name);
+}
+
+static struct directory_entry *read_entries_v5(struct index_state *istate,
+					struct directory_entry *de,
+					unsigned long *entry_offset,
+					void **mmap,
+					unsigned long mmap_size,
+					int *nr,
+					unsigned int *foffsetblock,
+					int fd)
+{
+	struct cache_entry *head = NULL, *tail = NULL;
+	struct conflict_entry *conflict_queue;
+	struct cache_entry *ce;
+	int i;
+
+	conflict_queue = read_conflicts_v5(de, mmap, mmap_size, fd);
+	resolve_undo_convert_v5(istate, conflict_queue);
+	for (i = 0; i < de->de_nfiles; i++) {
+		ce = read_entry_v5(de,
+				entry_offset,
+				mmap,
+				mmap_size,
+				foffsetblock,
+				fd);
+		ce_queue_push(&head, &tail, ce);
+		*foffsetblock += 4;
+
+		/* Add the conflicted entries at the end of the index file
+		 * to the in memory format
+		 */
+		if (conflict_queue &&
+		    (conflict_queue->entries->flags & CONFLICT_CONFLICTED) != 0 &&
+		    !cache_name_compare(conflict_queue->name, conflict_queue->namelen,
+					ce->name, ce_namelen(ce))) {
+			struct conflict_part *cp;
+			cp = conflict_queue->entries;
+			cp = cp->next;
+			while (cp) {
+				ce = convert_conflict_part(cp,
+						conflict_queue->name,
+						conflict_queue->namelen);
+				ce_queue_push(&head, &tail, ce);
+				conflict_part_head_remove(&cp);
+			}
+			conflict_entry_head_remove(&conflict_queue);
+		}
+	}
+
+	de = de->next;
+
+	while (head) {
+		if (de != NULL
+		    && strcmp(head->name, de->pathname) > 0) {
+			de = read_entries_v5(istate,
+					de,
+					entry_offset,
+					mmap,
+					mmap_size,
+					nr,
+					foffsetblock,
+					fd);
+		} else {
+			ce = ce_queue_pop(&head);
+			set_index_entry(istate, *nr, ce);
+			(*nr)++;
+		}
+	}
+
+	return de;
+}
+
+static void read_index_v5(struct index_state *istate, void *mmap, int mmap_size, int fd)
+{
+	unsigned long entry_offset;
+	unsigned int dir_offset, dir_table_offset;
+	struct cache_version_header *hdr;
+	struct cache_header_v5 *hdr_v5;
+	struct directory_entry *root_directory, *de;
+	int nr;
+	unsigned int foffsetblock;
+
+	hdr = mmap;
+	hdr_v5 = mmap + sizeof(*hdr);
+	istate->version = ntohl(hdr->hdr_version);
+	istate->cache_nr = ntohl(hdr_v5->hdr_nfile);
+	istate->cache_alloc = alloc_nr(istate->cache_nr);
+	istate->cache = xcalloc(istate->cache_alloc, sizeof(struct cache_entry *));
+	istate->initialized = 1;
+
+	/* Skip size of the header + crc sum + size of offsets */
+	dir_offset = sizeof(*hdr) + sizeof(*hdr_v5) + 4 + (ntohl(hdr_v5->hdr_ndir) + 1) * 4;
+	dir_table_offset = sizeof(*hdr) + sizeof(*hdr_v5) + 4;
+	root_directory = read_directories_v5(&dir_offset, &dir_table_offset, mmap, mmap_size);
+
+	entry_offset = ntohl(hdr_v5->hdr_fblockoffset);
+
+	nr = 0;
+	foffsetblock = dir_offset;
+	de = root_directory;
+	while (de)
+		de = read_entries_v5(istate, de, &entry_offset,
+				&mmap, mmap_size, &nr, &foffsetblock, fd);
+	istate->cache_tree = cache_tree_convert_v5(root_directory);
+}
+
+#define WRITE_BUFFER_SIZE 8192
+static unsigned char write_buffer[WRITE_BUFFER_SIZE];
+static unsigned long write_buffer_len;
+
+static int ce_flush_v5(int fd)
+{
+	unsigned int left = write_buffer_len;
+
+	if (left)
+		write_buffer_len = 0;
+
+	if (write_in_full(fd, write_buffer, left) != left)
+		return -1;
+
+	return 0;
+}
+
+static void ce_smudge_racily_clean_entry_v5(struct cache_entry *ce)
+{
+	/*
+	 * This method shall only be called if the timestamp of ce
+	 * is racy (check with is_racy_timestamp). If the timestamp
+	 * is racy, the writer will just set the time to 0.
+	 *
+	 * The reader (ce_match_stat_basic_v5) will then take care
+	 * of checking if the entry is really changed or not, by
+	 * taking into account the stat_crc and if that hasn't changed
+	 * checking the sha1.
+	 */
+	ce->ce_mtime.sec = 0;
+	ce->ce_mtime.nsec = 0;
+}
+
+static int ce_write_flush_v5(int fd)
+{
+	unsigned int buffered = write_buffer_len;
+	if (buffered) {
+		if (write_in_full(fd, write_buffer, buffered) != buffered)
+			return -1;
+		write_buffer_len = 0;
+	}
+	return 0;
+}
+
+static int ce_write_v5(uint32_t *crc, int fd, void *data, unsigned int len)
+{
+	if (crc)
+		*crc = crc32(*crc, (Bytef*)data, len);
+	while (len) {
+		unsigned int buffered = write_buffer_len;
+		unsigned int partial = WRITE_BUFFER_SIZE - buffered;
+		if (partial > len)
+			partial = len;
+		memcpy(write_buffer + buffered, data, partial);
+		buffered += partial;
+		if (buffered == WRITE_BUFFER_SIZE) {
+			write_buffer_len = buffered;
+			if (ce_write_flush_v5(fd))
+				return -1;
+			buffered = 0;
+		}
+		write_buffer_len = buffered;
+		len -= partial;
+		data = (char *) data + partial;
+	}
+	return 0;
+}
+
+char *super_directory(const char *filename)
+{
+	char *slash;
+
+	slash = strrchr(filename, '/');
+	if (slash)
+		return xmemdupz(filename, slash-filename);
+	return NULL;
+}
+
+struct directory_entry *init_directory_entry(char *pathname, int len)
+{
+	struct directory_entry *de = xmalloc(directory_entry_size(len));
+
+	memcpy(de->pathname, pathname, len);
+	de->pathname[len] = '\0';
+	de->de_flags      = 0;
+	de->de_foffset    = 0;
+	de->de_cr         = 0;
+	de->de_ncr        = 0;
+	de->de_nsubtrees  = 0;
+	de->de_nfiles     = 0;
+	de->de_nentries   = 0;
+	memset(de->sha1, 0, 20);
+	de->de_pathlen    = len;
+	de->next          = NULL;
+	de->next_hash     = NULL;
+	de->ce            = NULL;
+	de->ce_last       = NULL;
+	de->conflict      = NULL;
+	de->conflict_last = NULL;
+	de->conflict_size = 0;
+	return de;
+}
+
+static void ondisk_from_directory_entry(struct directory_entry *de,
+					struct ondisk_directory_entry *ondisk)
+{
+	ondisk->foffset   = htonl(de->de_foffset);
+	ondisk->cr        = htonl(de->de_cr);
+	ondisk->ncr       = htonl(de->de_ncr);
+	ondisk->nsubtrees = htonl(de->de_nsubtrees);
+	ondisk->nfiles    = htonl(de->de_nfiles);
+	ondisk->nentries  = htonl(de->de_nentries);
+	hashcpy(ondisk->sha1, de->sha1);
+	ondisk->flags     = htons(de->de_flags);
+}
+
+static struct conflict_part *conflict_part_from_inmemory(struct cache_entry *ce)
+{
+	struct conflict_part *conflict;
+	short flags;
+
+	conflict = xmalloc(sizeof(struct conflict_part));
+	flags                = CONFLICT_CONFLICTED;
+	flags               |= ce_stage(ce) << CONFLICT_STAGESHIFT;
+	conflict->flags      = flags;
+	conflict->entry_mode = ce->ce_mode;
+	conflict->next       = NULL;
+	hashcpy(conflict->sha1, ce->sha1);
+	return conflict;
+}
+
+static void conflict_to_ondisk(struct conflict_part *cp,
+				struct ondisk_conflict_part *ondisk)
+{
+	ondisk->flags      = htons(cp->flags);
+	ondisk->entry_mode = htons(cp->entry_mode);
+	hashcpy(ondisk->sha1, cp->sha1);
+}
+
+void add_conflict_to_directory_entry(struct directory_entry *de,
+					struct conflict_entry *conflict_entry)
+{
+	de->de_ncr++;
+	de->conflict_size += conflict_entry->namelen + 1 + 8 - conflict_entry->pathlen;
+	conflict_entry_push(&de->conflict, &de->conflict_last, conflict_entry);
+}
+
+void insert_directory_entry(struct directory_entry *de,
+			struct hash_table *table,
+			int *total_dir_len,
+			unsigned int *ndir,
+			uint32_t crc)
+{
+	struct directory_entry *insert;
+
+	insert = (struct directory_entry *)insert_hash(crc, de, table);
+	if (insert) {
+		de->next_hash = insert->next_hash;
+		insert->next_hash = de;
+	}
+	(*ndir)++;
+	if (de->de_pathlen == 0)
+		(*total_dir_len)++;
+	else
+		*total_dir_len += de->de_pathlen + 2;
+}
+
+static struct conflict_entry *create_conflict_entry_from_ce(struct cache_entry *ce,
+								int pathlen)
+{
+	return create_new_conflict(ce->name, ce_namelen(ce), pathlen);
+}
+
+static struct directory_entry *compile_directory_data(struct index_state *istate,
+						int nfile,
+						unsigned int *ndir,
+						int *non_conflicted,
+						int *total_dir_len,
+						int *total_file_len)
+{
+	int i, dir_len = -1;
+	char *dir;
+	struct directory_entry *de, *current, *search, *found, *new, *previous_entry;
+	struct cache_entry **cache = istate->cache;
+	struct conflict_entry *conflict_entry;
+	struct hash_table table;
+	uint32_t crc;
+
+	init_hash(&table);
+	de = init_directory_entry("", 0);
+	current = de;
+	*ndir = 1;
+	*total_dir_len = 1;
+	crc = crc32(0, (Bytef*)de->pathname, de->de_pathlen);
+	insert_hash(crc, de, &table);
+	conflict_entry = NULL;
+	for (i = 0; i < nfile; i++) {
+		int new_entry;
+		if (cache[i]->ce_flags & CE_REMOVE)
+			continue;
+
+		new_entry = !ce_stage(cache[i]) || !conflict_entry
+		    || cache_name_compare(conflict_entry->name, conflict_entry->namelen,
+					cache[i]->name, ce_namelen(cache[i]));
+		if (new_entry)
+			(*non_conflicted)++;
+		if (dir_len < 0 || strncmp(cache[i]->name, dir, dir_len)
+		    || cache[i]->name[dir_len] != '/'
+		    || strchr(cache[i]->name + dir_len + 1, '/')) {
+			dir = super_directory(cache[i]->name);
+			if (!dir)
+				dir_len = 0;
+			else
+				dir_len = strlen(dir);
+			crc = crc32(0, (Bytef*)dir, dir_len);
+			found = lookup_hash(crc, &table);
+			search = found;
+			while (search && dir_len != 0 && strcmp(dir, search->pathname) != 0)
+				search = search->next_hash;
+		}
+		previous_entry = current;
+		if (!search || !found) {
+			new = init_directory_entry(dir, dir_len);
+			current->next = new;
+			current = current->next;
+			insert_directory_entry(new, &table, total_dir_len, ndir, crc);
+			search = current;
+		}
+		if (new_entry) {
+			search->de_nfiles++;
+			*total_file_len += ce_namelen(cache[i]) + 1;
+			if (search->de_pathlen)
+				*total_file_len -= search->de_pathlen + 1;
+			ce_queue_push(&(search->ce), &(search->ce_last), cache[i]);
+		}
+		if (ce_stage(cache[i]) > 0) {
+			struct conflict_part *conflict_part;
+			if (new_entry) {
+				conflict_entry = create_conflict_entry_from_ce(cache[i], search->de_pathlen);
+				add_conflict_to_directory_entry(search, conflict_entry);
+			}
+			conflict_part = conflict_part_from_inmemory(cache[i]);
+			add_part_to_conflict_entry(search, conflict_entry, conflict_part);
+		}
+		if (dir && !found) {
+			struct directory_entry *no_subtrees;
+
+			no_subtrees = current;
+			dir = super_directory(dir);
+			if (dir)
+				dir_len = strlen(dir);
+			else
+				dir_len = 0;
+			crc = crc32(0, (Bytef*)dir, dir_len);
+			found = lookup_hash(crc, &table);
+			while (!found) {
+				new = init_directory_entry(dir, dir_len);
+				new->de_nsubtrees = 1;
+				new->next = no_subtrees;
+				no_subtrees = new;
+				insert_directory_entry(new, &table, total_dir_len, ndir, crc);
+				dir = super_directory(dir);
+				if (!dir)
+					dir_len = 0;
+				else
+					dir_len = strlen(dir);
+				crc = crc32(0, (Bytef*)dir, dir_len);
+				found = lookup_hash(crc, &table);
+			}
+			search = found;
+			while (search->next_hash && strcmp(dir, search->pathname) != 0)
+				search = search->next_hash;
+			if (search)
+				found = search;
+			found->de_nsubtrees++;
+			previous_entry->next = no_subtrees;
+		}
+	}
+	if (istate->cache_tree)
+		cache_tree_to_ondisk_v5(&table, istate->cache_tree);
+	resolve_undo_to_ondisk_v5(&table, istate->resolve_undo, ndir, total_dir_len, de);
+	return de;
+}
+
+static void ondisk_from_cache_entry(struct cache_entry *ce,
+				    struct ondisk_cache_entry_v5 *ondisk)
+{
+	unsigned int flags;
+
+	flags  = ce->ce_flags & CE_STAGEMASK;
+	flags |= ce->ce_flags & CE_VALID;
+	if (ce->ce_flags & CE_INTENT_TO_ADD)
+		flags |= CE_INTENT_TO_ADD_V5;
+	if (ce->ce_flags & CE_SKIP_WORKTREE)
+		flags |= CE_SKIP_WORKTREE_V5;
+	ondisk->flags      = htons(flags);
+	ondisk->mode       = htons(ce->ce_mode);
+	ondisk->mtime.sec  = htonl(ce->ce_mtime.sec);
+#ifdef USE_NSEC
+	ondisk->mtime.nsec = htonl(ce->ce_mtime.nsec);
+#else
+	ondisk->mtime.nsec = 0;
+#endif
+	if (!ce->ce_stat_crc)
+		ce->ce_stat_crc = calculate_stat_crc(ce);
+	ondisk->stat_crc   = htonl(ce->ce_stat_crc);
+	hashcpy(ondisk->sha1, ce->sha1);
+}
+
+static int write_directories_v5(struct directory_entry *de, int fd, int conflict_offset)
+{
+	struct directory_entry *current;
+	struct ondisk_directory_entry ondisk;
+	int current_offset, offset_write, ondisk_size, foffset;
+	uint32_t crc;
+
+	/*
+	 * This is needed because the compiler aligns structs to sizes multipe
+	 * of 4
+	 */
+	ondisk_size = sizeof(ondisk.flags)
+		+ sizeof(ondisk.foffset)
+		+ sizeof(ondisk.cr)
+		+ sizeof(ondisk.ncr)
+		+ sizeof(ondisk.nsubtrees)
+		+ sizeof(ondisk.nfiles)
+		+ sizeof(ondisk.nentries)
+		+ sizeof(ondisk.sha1);
+	current = de;
+	current_offset = 0;
+	foffset = 0;
+	while (current) {
+		int pathlen;
+
+		offset_write = htonl(current_offset);
+		if (ce_write_v5(NULL, fd, &offset_write, 4) < 0)
+			return -1;
+		if (current->de_pathlen == 0)
+			pathlen = 0;
+		else
+			pathlen = current->de_pathlen + 1;
+		current_offset += pathlen + 1 + ondisk_size + 4;
+		current = current->next;
+	}
+	/*
+	 * Write one more offset, which points to the end of the entries,
+	 * because we use it for calculating the dir length, instead of
+	 * using strlen.
+	 */
+	offset_write = htonl(current_offset);
+	if (ce_write_v5(NULL, fd, &offset_write, 4) < 0)
+		return -1;
+	current = de;
+	while (current) {
+		crc = 0;
+		if (current->de_pathlen == 0) {
+			if (ce_write_v5(&crc, fd, current->pathname, 1) < 0)
+				return -1;
+		} else {
+			char *path;
+			path = xmalloc(sizeof(char) * (current->de_pathlen + 2));
+			memcpy(path, current->pathname, current->de_pathlen);
+			memcpy(path + current->de_pathlen, "/\0", 2);
+			if (ce_write_v5(&crc, fd, path, current->de_pathlen + 2) < 0)
+				return -1;
+		}
+		current->de_foffset = foffset;
+		current->de_cr = conflict_offset;
+		ondisk_from_directory_entry(current, &ondisk);
+		if (ce_write_v5(&crc, fd, &ondisk, ondisk_size) < 0)
+			return -1;
+		crc = htonl(crc);
+		if (ce_write_v5(NULL, fd, &crc, 4) < 0)
+			return -1;
+		conflict_offset += current->conflict_size;
+		foffset += current->de_nfiles * 4;
+		current = current->next;
+	}
+	return 0;
+}
+
+static int write_entries_v5(struct index_state *istate,
+			    struct directory_entry *de,
+			    int entries,
+			    int fd,
+			    int offset_to_offset)
+{
+	int offset, offset_write, ondisk_size;
+	struct directory_entry *current;
+
+	offset = 0;
+	ondisk_size = sizeof(struct ondisk_cache_entry_v5);
+	current = de;
+	while (current) {
+		int pathlen;
+		struct cache_entry *ce = current->ce;
+
+		if (current->de_pathlen == 0)
+			pathlen = 0;
+		else
+			pathlen = current->de_pathlen + 1;
+		while (ce) {
+			if (ce->ce_flags & CE_REMOVE)
+				continue;
+			if (!ce_uptodate(ce) && is_racy_timestamp(istate, ce))
+				ce_smudge_racily_clean_entry_v5(ce);
+
+			offset_write = htonl(offset);
+			if (ce_write_v5(NULL, fd, &offset_write, 4) < 0)
+				return -1;
+			offset += ce_namelen(ce) - pathlen + 1 + ondisk_size + 4;
+			ce = ce->next;
+		}
+		current = current->next;
+	}
+	/*
+	 * Write one more offset, which points to the end of the entries,
+	 * because we use it for calculating the file length, instead of
+	 * using strlen.
+	 */
+	offset_write = htonl(offset);
+	if (ce_write_v5(NULL, fd, &offset_write, 4) < 0)
+		return -1;
+
+	offset = offset_to_offset;
+	current = de;
+	while (current) {
+		int pathlen;
+		struct cache_entry *ce = current->ce;
+
+		if (current->de_pathlen == 0)
+			pathlen = 0;
+		else
+			pathlen = current->de_pathlen + 1;
+		while (ce) {
+			struct ondisk_cache_entry_v5 ondisk;
+			uint32_t crc, calc_crc;
+
+			if (ce->ce_flags & CE_REMOVE)
+				continue;
+			calc_crc = htonl(offset);
+			crc = crc32(0, (Bytef*)&calc_crc, 4);
+			if (ce_write_v5(&crc, fd, ce->name + pathlen,
+					ce_namelen(ce) - pathlen + 1) < 0)
+				return -1;
+			ondisk_from_cache_entry(ce, &ondisk);
+			if (ce_write_v5(&crc, fd, &ondisk, ondisk_size) < 0)
+				return -1;
+			crc = htonl(crc);
+			if (ce_write_v5(NULL, fd, &crc, 4) < 0)
+				return -1;
+			offset += 4;
+			ce = ce->next;
+		}
+		current = current->next;
+	}
+	return 0;
+}
+
+static int write_conflict_v5(struct conflict_entry *conflict, int fd)
+{
+	struct conflict_entry *current;
+	struct conflict_part *current_part;
+	uint32_t crc;
+
+	current = conflict;
+	while (current) {
+		unsigned int to_write;
+
+		crc = 0;
+		if (ce_write_v5(&crc, fd,
+		     (Bytef*)(current->name + current->pathlen),
+		     current->namelen - current->pathlen) < 0)
+			return -1;
+		if (ce_write_v5(&crc, fd, (Bytef*)"\0", 1) < 0)
+			return -1;
+		to_write = htonl(current->nfileconflicts);
+		if (ce_write_v5(&crc, fd, (Bytef*)&to_write, 4) < 0)
+			return -1;
+		current_part = current->entries;
+		while (current_part) {
+			struct ondisk_conflict_part ondisk;
+
+			conflict_to_ondisk(current_part, &ondisk);
+			if (ce_write_v5(&crc, fd, (Bytef*)&ondisk, sizeof(struct ondisk_conflict_part)) < 0)
+				return 0;
+			current_part = current_part->next;
+		}
+		to_write = htonl(crc);
+		if (ce_write_v5(NULL, fd, (Bytef*)&to_write, 4) < 0)
+			return -1;
+		current = current->next;
+	}
+	return 0;
+}
+
+static int write_conflicts_v5(struct index_state *istate,
+			      struct directory_entry *de,
+			      int fd)
+{
+	struct directory_entry *current;
+
+	current = de;
+	while (current) {
+		if (current->de_ncr != 0) {
+			if (write_conflict_v5(current->conflict, fd) < 0)
+				return -1;
+		}
+		current = current->next;
+	}
+	return 0;
+}
+
+static int write_index_v5(struct index_state *istate, int newfd)
+{
+	struct cache_version_header hdr;
+	struct cache_header_v5 hdr_v5;
+	struct cache_entry **cache = istate->cache;
+	struct directory_entry *de;
+	struct ondisk_directory_entry *ondisk;
+	int entries = istate->cache_nr;
+	int i, removed, non_conflicted, total_dir_len, ondisk_directory_size;
+	int total_file_len, conflict_offset, offset_to_offset;
+	unsigned int ndir;
+	uint32_t crc;
+
+	for (i = removed = 0; i < entries; i++) {
+		if (cache[i]->ce_flags & CE_REMOVE)
+			removed++;
+	}
+	hdr.hdr_signature = htonl(CACHE_SIGNATURE);
+	hdr.hdr_version = htonl(istate->version);
+	hdr_v5.hdr_nfile = htonl(entries - removed);
+	hdr_v5.hdr_nextension = htonl(0); /* Currently no extensions are supported */
+
+	non_conflicted = 0;
+	total_dir_len = 0;
+	total_file_len = 0;
+	de = compile_directory_data(istate, entries, &ndir, &non_conflicted,
+			&total_dir_len, &total_file_len);
+	hdr_v5.hdr_ndir = htonl(ndir);
+
+	/*
+	 * This is needed because the compiler aligns structs to sizes multipe
+	 * of 4
+	 */
+	ondisk_directory_size = sizeof(ondisk->flags)
+		+ sizeof(ondisk->foffset)
+		+ sizeof(ondisk->cr)
+		+ sizeof(ondisk->ncr)
+		+ sizeof(ondisk->nsubtrees)
+		+ sizeof(ondisk->nfiles)
+		+ sizeof(ondisk->nentries)
+		+ sizeof(ondisk->sha1);
+	hdr_v5.hdr_fblockoffset = htonl(sizeof(hdr) + sizeof(hdr_v5) + 4
+		+ (ndir + 1) * 4
+		+ total_dir_len
+		+ ndir * (ondisk_directory_size + 4)
+		+ (non_conflicted + 1) * 4);
+
+	crc = 0;
+	if (ce_write_v5(&crc, newfd, &hdr, sizeof(hdr)) < 0)
+		return -1;
+	if (ce_write_v5(&crc, newfd, &hdr_v5, sizeof(hdr_v5)) < 0)
+		return -1;
+	crc = htonl(crc);
+	if (ce_write_v5(NULL, newfd, &crc, 4) < 0)
+		return -1;
+
+	conflict_offset = sizeof(hdr) + sizeof(hdr_v5) + 4
+		+ (ndir + 1) * 4
+		+ total_dir_len
+		+ ndir * (ondisk_directory_size + 4)
+		+ (non_conflicted + 1) * 4
+		+ total_file_len
+		+ non_conflicted * (sizeof(struct ondisk_cache_entry_v5) + 4);
+	if (write_directories_v5(de, newfd, conflict_offset) < 0)
+		return -1;
+	offset_to_offset = sizeof(hdr) + sizeof(hdr_v5) + 4
+		+ (ndir + 1) * 4
+		+ total_dir_len
+		+ ndir * (ondisk_directory_size + 4);
+	if (write_entries_v5(istate, de, entries, newfd, offset_to_offset) < 0)
+		return -1;
+	if (write_conflicts_v5(istate, de, newfd) < 0)
+		return -1;
+	return ce_flush_v5(newfd);
+}
+
+struct index_ops v5_ops = {
+	match_stat_basic,
+	verify_hdr,
+	read_index_v5,
+	write_index_v5
+};
diff --git a/read-cache.c b/read-cache.c
index 215c91f..61e2ea8 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -55,6 +55,31 @@ void rename_index_entry_at(struct index_state *istate, int nr, const char *new_n
 	add_index_entry(istate, new, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);
 }
 
+uint32_t calculate_stat_crc(struct cache_entry *ce)
+{
+	unsigned int ctimens = 0;
+	uint32_t stat, stat_crc;
+
+	stat = htonl(ce->ce_ctime.sec);
+	stat_crc = crc32(0, (Bytef*)&stat, 4);
+#ifdef USE_NSEC
+	ctimens = ce->ce_ctime.nsec;
+#endif
+	stat = htonl(ctimens);
+	stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
+	stat = htonl(ce->ce_ino);
+	stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
+	stat = htonl(ce->ce_size);
+	stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
+	stat = htonl(ce->ce_dev);
+	stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
+	stat = htonl(ce->ce_uid);
+	stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
+	stat = htonl(ce->ce_gid);
+	stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
+	return stat_crc;
+}
+
 /*
  * This only updates the "non-critical" parts of the directory
  * cache, ie the parts that aren't tracked by GIT, and only used
@@ -77,6 +102,8 @@ void fill_stat_cache_info(struct cache_entry *ce, struct stat *st)
 
 	if (S_ISREG(st->st_mode))
 		ce_mark_uptodate(ce);
+
+	ce->ce_stat_crc = calculate_stat_crc(ce);
 }
 
 static int ce_compare_data(struct cache_entry *ce, struct stat *st)
-- 
1.7.8

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 0/16] Introduce index file format version 5
  2012-08-06 14:35 ` [PATCH/RFC v2 0/16] Introduce index file format version 5 Nguyễn Thái Ngọc Duy
  2012-08-06 14:35   ` [PATCH 1/2] Move index v2 specific code out of read-cache Nguyễn Thái Ngọc Duy
  2012-08-06 14:36   ` [PATCH 2/2] Add index-v5 Nguyễn Thái Ngọc Duy
@ 2012-08-06 15:51   ` Junio C Hamano
  2012-08-06 16:06     ` Thomas Gummerer
  2012-08-06 17:46   ` Junio C Hamano
  3 siblings, 1 reply; 59+ messages in thread
From: Junio C Hamano @ 2012-08-06 15:51 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy
  Cc: Thomas Gummerer, git, trast, mhagger, robin.rosenberg

Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:

> These mails are about cosmetics only. But I think it helps maintenance
> in long term. I notice in your series we have many functions with _v2
> and _v5 mixed together. Worse, some functions that are _v2 only are
> not suffixed with _v2. I still think separating v2/v5 changes is a
> good idea. So I played a bit, see how it might become.
>
> The next two emails demonstrate how we take v2-specific code out to
> read-cache-v2.c, then add v5 code in the next patch. Notice there's very
> little change in read-cache.c in the second patch. I wanted to see how
> v5 changes affects v2 users and the second patch shows it.

I like the splitting of the backend into two files; it is a good
direction to go, but I really prefer to see it done way before in
the series, so that many symbols in read-cache-v2.c do not have to
be contaminated with foo_v2 suffix, and similarly _v5 suffix for
symbols in read-cache-v5.c when they are added.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 0/16] Introduce index file format version 5
  2012-08-06 15:51   ` [PATCH/RFC v2 0/16] Introduce index file format version 5 Junio C Hamano
@ 2012-08-06 16:06     ` Thomas Gummerer
  0 siblings, 0 replies; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-06 16:06 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Nguy� n Thái Ngọc Duy, git, trast, mhagger,
	robin.rosenberg

On 08/06, Junio C Hamano wrote:
> Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:
> 
> > These mails are about cosmetics only. But I think it helps maintenance
> > in long term. I notice in your series we have many functions with _v2
> > and _v5 mixed together. Worse, some functions that are _v2 only are
> > not suffixed with _v2. I still think separating v2/v5 changes is a
> > good idea. So I played a bit, see how it might become.
> >
> > The next two emails demonstrate how we take v2-specific code out to
> > read-cache-v2.c, then add v5 code in the next patch. Notice there's very
> > little change in read-cache.c in the second patch. I wanted to see how
> > v5 changes affects v2 users and the second patch shows it.
> 
> I like the splitting of the backend into two files; it is a good
> direction to go, but I really prefer to see it done way before in
> the series, so that many symbols in read-cache-v2.c do not have to
> be contaminated with foo_v2 suffix, and similarly _v5 suffix for
> symbols in read-cache-v5.c when they are added.

I agree. I planned to make those changes in the re-roll of this series,
basically making patch 1/2/3/4 in this series in one commit, moving it
to read-cache-v2.c and building read-cache-v5.c along the commits in this
series. The re-roll should be out by tomorrow.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 0/16] Introduce index file format version 5
  2012-08-06 14:35 ` [PATCH/RFC v2 0/16] Introduce index file format version 5 Nguyễn Thái Ngọc Duy
                     ` (2 preceding siblings ...)
  2012-08-06 15:51   ` [PATCH/RFC v2 0/16] Introduce index file format version 5 Junio C Hamano
@ 2012-08-06 17:46   ` Junio C Hamano
  2012-08-07 12:16     ` Nguyen Thai Ngoc Duy
  2012-08-07 22:31     ` Thomas Rast
  3 siblings, 2 replies; 59+ messages in thread
From: Junio C Hamano @ 2012-08-06 17:46 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy
  Cc: Thomas Gummerer, git, trast, mhagger, robin.rosenberg

Nguyễn Thái Ngọc Duy <pclouds@gmail.com> writes:

> These mails are about cosmetics only. But I think it helps maintenance
> in long term. I notice in your series we have many functions with _v2
> and _v5 mixed together. Worse, some functions that are _v2 only are
> not suffixed with _v2. I still think separating v2/v5 changes is a
> good idea. So I played a bit, see how it might become.
>
> The next two emails demonstrate how we take v2-specific code out to
> read-cache-v2.c, then add v5 code in the next patch. Notice there's very
> little change in read-cache.c in the second patch. I wanted to see how
> v5 changes affects v2 users and the second patch shows it.
>
> I'm not happy with the first patch either. Ideally it should consist
> of code move only, no other changes. All updates in read_index_from
> and the introduction of struct index_ops should happen in patches
> before that.

Right.

> Then of course you need to split the second patch into several logical
> patches again. We can drop _v5 suffix in read-cache-v5.c (I haven't
> done that). When we add partial read/write for v5, we can add more
> func pointers to index_ops and implement them in v2 (probably as no-op
> or assertion)

The index_ops abstraction is a right way to go, and I like it, but I
think the split illustrated in this patch might turn out to be at
wrong levels (and it is OK, as I understand this is a illustration
of concept patch).

For example, add_to_index() interface may be a good candidate to
have in index_ops.  Because your in-core index may not be holding
everything in a flat array, "find the location in the flat array the
entry would sit, replace the existing one if there is any, otherwise
insert" cannot be a generic way to add a new entry.  If you make the
whole thing an abstract API entry point, a sparse implementation of
the in-core index could still implement it without bringing the
untouched and irrelevant parts of the index to core.

        Side note: with a tree-like implementation of the in-core
        index, "find the location the entry would sit", "get the
        entry at the location", "insert the entry at the location",
        could still be a set of good abstract API, though.  The
        definition of _location_ may be quite different from "the
        offset of the entry counting from the beginning of a flat
        array", which is what index_name_pos() returns.

The story is the same on the removal front.  The current
remove_index_entry_at() interface is tied to the flat array
implementation, so "remove the nth entry from the beginning" is an
inappropriate interface for anything but such an implementation
(unless we come up with an abstract notion of the "location" that is
usable efficiently in a tree-like implementation, that is).

I wish that the development of this topic was done more in a
top-down direction, instead of bottom-up, so that it identified the
necessary access patterns to the in-core index early and come up
with a good set of abstract API first, and then only after that is
done, came up with in-core and on-disk format to support the
necessary operations.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 0/16] Introduce index file format version 5
  2012-08-06 17:46   ` Junio C Hamano
@ 2012-08-07 12:16     ` Nguyen Thai Ngoc Duy
  2012-08-08  1:38       ` Junio C Hamano
  2012-08-07 22:31     ` Thomas Rast
  1 sibling, 1 reply; 59+ messages in thread
From: Nguyen Thai Ngoc Duy @ 2012-08-07 12:16 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Thomas Gummerer, git, trast, mhagger, robin.rosenberg

On Tue, Aug 7, 2012 at 12:46 AM, Junio C Hamano <gitster@pobox.com> wrote:
> The index_ops abstraction is a right way to go, and I like it, but I
> think the split illustrated in this patch might turn out to be at
> wrong levels (and it is OK, as I understand this is a illustration
> of concept patch).
>
> For example, add_to_index() interface may be a good candidate to
> have in index_ops.  Because your in-core index may not be holding
> everything in a flat array, "find the location in the flat array the
> entry would sit, replace the existing one if there is any, otherwise
> insert" cannot be a generic way to add a new entry.  If you make the
> whole thing an abstract API entry point, a sparse implementation of
> the in-core index could still implement it without bringing the
> untouched and irrelevant parts of the index to core.
>
>         Side note: with a tree-like implementation of the in-core
>         index, "find the location the entry would sit", "get the
>         entry at the location", "insert the entry at the location",
>         could still be a set of good abstract API, though.  The
>         definition of _location_ may be quite different from "the
>         offset of the entry counting from the beginning of a flat
>         array", which is what index_name_pos() returns.
>
> The story is the same on the removal front.  The current
> remove_index_entry_at() interface is tied to the flat array
> implementation, so "remove the nth entry from the beginning" is an
> inappropriate interface for anything but such an implementation
> (unless we come up with an abstract notion of the "location" that is
> usable efficiently in a tree-like implementation, that is).

add_to_index and remove_index_entry_at seem good places for the cut.
But do we need to redefine the location? I think we need to sketch out
a long term plan first. In my mind it's like this:

 - for 3-5 years since v5 is released, we support v2 and v5 in
parallel. Other code can take advantage of v5, but it must neither
sacrifice v2 performance, compatibility nor maintainability
 - after that, we deprecate v2. v2 is automatically converted to v5 in
memory. v2 perf may suffer but at that point we don't care any more as
the majority of users should have been migrated to v5 (*)

If the long term plan is actually that, we will need to stick to flat
array implementation for forseeable future as moving from it most
likely impacts v2 performance. When v5 is used, it must maintain two
views, tree and list, at the same time. We can then postpone thinking
about the redefinition until v2 is deprecated and in-core moved to
tree view only.

This might not be the best way forward as v2 incompatible features
(like keeping empty directories in index, what else?) may never come
until v2 is deprecated.

(*) this is questionable though. Depending on the benchmarks, we may
want to support both v2 and v5 for indefinite time with v2 recommended
for small projects and v5 the rest. If it's so, yeah we need to think
of better API now.

> I wish that the development of this topic was done more in a
> top-down direction, instead of bottom-up, so that it identified the
> necessary access patterns to the in-core index early and come up
> with a good set of abstract API first, and then only after that is
> done, came up with in-core and on-disk format to support the
> necessary operations.

Yeah, which is why I asked to try out partial reading/writing early as
I'm a learn by example kind of guy. Speaking of which, now that we
have something substantial, what should be done before this may be
considered for 'next'?

I don't think we should wait until it reaches full potential (i.e.
significant perf gain from all major index-related commands). Apart
from patch preparation, more testing and benchmarking, should we wait
until we get new public API or just use current index API?

One API addition that I (if nobody else) will do soon is
read_index_partial(<pathspec>) and adapt as many read-only commands as
possible to it. (v2 just ignores the pathspec input and loads the
whole thing, so all commands must be aware the the loaded may be more
than what they asked). But this can wait until v5 gets in.
-- 
Duy

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 01/16] Modify cache_header to prepare for other index formats
  2012-08-06  1:17   ` Junio C Hamano
@ 2012-08-07 12:41     ` Thomas Gummerer
  2012-08-07 15:45       ` Junio C Hamano
  0 siblings, 1 reply; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-07 12:41 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, trast, mhagger, pcouds, robin.rosenberg

On 08/05, Junio C Hamano wrote:
> Thomas Gummerer <t.gummerer@gmail.com> writes:
> 
> > diff --git a/read-cache.c b/read-cache.c
> > index 2f8159f..5d61d92 100644
> > --- a/read-cache.c
> > +++ b/read-cache.c
> > @@ -1433,7 +1446,7 @@ int read_index_from(struct index_state *istate, const char *path)
> >  
> >  	errno = EINVAL;
> >  	mmap_size = xsize_t(st.st_size);
> > -	if (mmap_size < sizeof(struct cache_header) + 20)
> > +	if (mmap_size < sizeof(struct cache_version_header) + 20)
> >  		die("index file smaller than expected");
> 
> At the design level, I have a large problem with this change.  I
> understand that you wanted to make sure that some versions can lack
> the num-entries word in the header, but then what is the point of
> keeping that "+20" here?  Are all versions of the file format still
> required to have the 20-byte trailing SHA-1 sum over the whole file?

No, index-v5 doesn't have the trailing SHA-1 over the whole file.

> 	Side note: I am actually fine with that "sum at the end"
> 	requirement, but then it needs to be documented what are
> 	assumed to be unomittable and why.
> 
>         I also do not see why v5 *needs* to drop the num-entries
>         word from the header in the first place.

v5 still has the num-entries word, but at a different position.
The +20 however would still be wrong, because of the missing
SHA-1 over the file.

> At the practical level, we used to error out, upon seeing a file
> that claims to be v2 in the header but is too small to hold the
> version header, the number of entries word and the trailing SHA-1
> sum.  We no longer do this and happily call verify_hdr() in the
> following code even when the file is too small, no?

This part is called even before we know what version of the index
we will read, and before the file is mmaped.  The best solution
i think is to drop the check and just call verify_hdr, since it will 
check the checksum anyway and detect the error, while not having
a big cost on a index file that is very small.

> > @@ -1442,11 +1455,13 @@ int read_index_from(struct index_state *istate, const char *path)
> >  		die_errno("unable to map index file");
> >  
> >  	hdr = mmap;
> > +	hdr_v2 =  mmap + sizeof(*hdr);
> >  	if (verify_hdr(hdr, mmap_size) < 0)
> >  		goto unmap;

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 04/16] Modify write functions to prepare for other index formats
  2012-08-06  1:34   ` Junio C Hamano
@ 2012-08-07 12:50     ` Thomas Gummerer
  0 siblings, 0 replies; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-07 12:50 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, trast, mhagger, pclouds, robin.rosenberg



On 08/05, Junio C Hamano wrote:
> Thomas Gummerer <t.gummerer@gmail.com> writes:
> 
> > -static int ce_write(git_SHA_CTX *context, int fd, void *data, unsigned int len)
> > +static int ce_write_v2(git_SHA_CTX *context, int fd, void *data, unsigned int len)
> >  {
> 
> Mild NAK to name this function with any hint that it is for v2 only.
> The type of "data" is not "struct ondisk_index_entry_v2" and this is
> just a way to stream data to "fd" while hashing, which is similar in
> spirit to what csum-file.c "sha1file'"API does.  Perhaps we may want
> to update ce_write() interface to build on top of sha1file API?
> 
> At this step in the series, is it too early to split read-cache.c
> into two files, move all the v2 specific part to read-cache-v2.c,
> and keep static function names like write_index_ext_header() as they
> are?  After all, the main dispatch would become
> 
> > +int write_index(struct index_state *istate, int newfd)
> > +{
> > +	if (!istate->version)
> > +		istate->version = INDEX_FORMAT_DEFAULT;
> > +
> > +	return write_index_v2(istate, newfd);
> > +}
> 
> so read-cache-v2.c would need to export write_index_v2() but the
> functions to implement it like ce_write_entry() do not have to be
> exposed outside the file, no?

No I think it makes sense to split them at this point. I'll do it along
the lines of what Duy suggested with his patch. [1]

[1] http://thread.gmane.org/gmane.comp.version-control.git/202923/focus=202964

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 01/16] Modify cache_header to prepare for other index formats
  2012-08-07 12:41     ` Thomas Gummerer
@ 2012-08-07 15:45       ` Junio C Hamano
  0 siblings, 0 replies; 59+ messages in thread
From: Junio C Hamano @ 2012-08-07 15:45 UTC (permalink / raw)
  To: Thomas Gummerer; +Cc: git, trast, mhagger, pcouds, robin.rosenberg

Thomas Gummerer <t.gummerer@gmail.com> writes:

> This part is called even before we know what version of the index
> we will read, and before the file is mmaped.  The best solution
> i think is to drop the check and just call verify_hdr, ...

Exactly.  And do the length checking inside verify_hdr() or its
callee where we know what the minimum length is depending on the
version as necessary to avoid over-reading.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 06/16] t3700: sleep for 1 second, to avoid interfering with the racy code
  2012-08-06  1:43   ` Junio C Hamano
@ 2012-08-07 16:59     ` Thomas Gummerer
  2012-08-08 20:16       ` Junio C Hamano
  0 siblings, 1 reply; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-07 16:59 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, trast, mhagger, pcouds, robin.rosenberg

On 08/05, Junio C Hamano wrote:
> Thomas Gummerer <t.gummerer@gmail.com> writes:
> 
> > The new git racy code uses the mtime of cache-entries to smudge
> > a racy clean entry, and loads the work, of checking the file-system
> 
> -ECANTPARSE.

The git racy code for index-v5 uses the mtime of the cache-entries as
smudge markers. The work of checking the file-system is loaded of to
the reader.

> > if the entry has really changed, off to the reader. This interferes
> > with this test, because the entry is racily smudged and thus has
> > mtime 0. We wait 1 second to avoid smudging the entry and getting
> > correct test results.
> 
> Mild NAK, especially it is totally unclear why you even need to muck
> with racy-git check in the current format of the index in the first
> place, and even if it were necessary, it is unclear why this cannot
> be done with test-chmtime.

The racy-git code needs to be changed, to avoid problems when implementing
the partial writing for index-v5. Otherwise it could cause problems, when
we have entries that should be smudged, but are not due to the different
racy algorithms.

I'll do it with test-chmtime in the reroll though.

> > Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
> > ---
> >  t/t3700-add.sh |    1 +
> >  1 file changed, 1 insertion(+)
> >
> > diff --git a/t/t3700-add.sh b/t/t3700-add.sh
> > index 874b3a6..4d70805 100755
> > --- a/t/t3700-add.sh
> > +++ b/t/t3700-add.sh
> > @@ -184,6 +184,7 @@ test_expect_success 'git add --refresh with pathspec' '
> >  	echo >foo && echo >bar && echo >baz &&
> >  	git add foo bar baz && H=$(git rev-parse :foo) && git rm -f foo &&
> >  	echo "100644 $H 3	foo" | git update-index --index-info &&
> > +	sleep 1 &&
> >  	test-chmtime -60 bar baz &&
> >  	>expect &&
> >  	git add --refresh bar >actual &&

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 08/16] Make in-memory format aware of stat_crc
  2012-08-06  1:46   ` Junio C Hamano
@ 2012-08-07 19:02     ` Thomas Gummerer
  0 siblings, 0 replies; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-07 19:02 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, trast, mhagger, pclouds, robin.rosenberg

On 08/05, Junio C Hamano wrote:
> Thomas Gummerer <t.gummerer@gmail.com> writes:
> 
> > +	stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
> > +	stat = htonl(ce->ce_ino);
> > +	stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
> > +	stat = htonl(ce->ce_size);
> > +	stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
> > +	stat = htonl(ce->ce_dev);
> > +	stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
> > +	stat = htonl(ce->ce_uid);
> > +	stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
> > +	stat = htonl(ce->ce_gid);
> > +	stat_crc = crc32(stat_crc, (Bytef*)&stat, 4);
> > +	return stat_crc;
> 
> What are these (Bytef *) casts are about?  We do not use it in any
> of our existing calls to crc32().

>From a quick look over the existing calls, their argument is
always either a void* or a char* pointer.  Using pointers other
than those two or Bytef* gives compiler warnings.  I can cast
to either void* or char* if that's preferred.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 10/16] Read resolve-undo data
  2012-08-06  1:51   ` Junio C Hamano
@ 2012-08-07 19:17     ` Thomas Gummerer
  0 siblings, 0 replies; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-07 19:17 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, trast, mhagger, pcouds, robin.rosenberg



On 08/05, Junio C Hamano wrote:
> Thomas Gummerer <t.gummerer@gmail.com> writes:
> 
> > Make git read the resolve-undo data from the index.
> >
> > Since the resolve-undo data is joined with the conflicts in
> > the ondisk format of the index file version 5, conflicts and
> > resolved data is read at the same time, and the resolve-undo
> > data is then converted to the in-memory format.
> >
> > Helped-by: Thomas Rast <trast@student.ethz.ch>
> > Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
> > ---
> >  read-cache.c   |    1 +
> >  resolve-undo.c |   36 ++++++++++++++++++++++++++++++++++++
> >  resolve-undo.h |    2 ++
> >  3 files changed, 39 insertions(+)
> >
> > diff --git a/read-cache.c b/read-cache.c
> > index 70334f9..03370f9 100644
> > --- a/read-cache.c
> > +++ b/read-cache.c
> > @@ -1942,6 +1942,7 @@ static struct directory_entry *read_entries_v5(struct index_state *istate,
> >  	int i;
> >  
> >  	conflict_queue = read_conflicts_v5(de, mmap, mmap_size, fd);
> > +	resolve_undo_convert_v5(istate, conflict_queue);
> >  	for (i = 0; i < de->de_nfiles; i++) {
> >  		ce = read_entry_v5(de,
> >  				entry_offset,
> > diff --git a/resolve-undo.c b/resolve-undo.c
> > index 72b4612..f96c6ba 100644
> > --- a/resolve-undo.c
> > +++ b/resolve-undo.c
> > @@ -170,3 +170,39 @@ void unmerge_index(struct index_state *istate, const char **pathspec)
> >  		i = unmerge_index_entry_at(istate, i);
> >  	}
> >  }
> > +
> > +void resolve_undo_convert_v5(struct index_state *istate,
> > +					struct conflict_entry *ce)
> > +{
> 
> It is unclear why this needs to be part of resolve-undo.c and
> exported from it.  Shouldn't it (and bulk of the previous few
> patches) be part of a read-cache-v5.c file (with v2/3/4 specific
> part separated out from read-cache.c to form read-cache-v2.c)?

I thought this should be part of resolve-undo.c, to keep everything
that has to do with resolve-undo in the same file, taking model
from resolve_undo_read and resolve_undo_write.  But I don't care
to deeply about it, it can easily be moved to read-cache-v5.c.

> > +	int i;
> > +
> > +	while (ce) {
> > +		struct string_list_item *lost;
> > +		struct resolve_undo_info *ui;
> > +		struct conflict_part *cp;
> > +
> > +		if (ce->entries && (ce->entries->flags & CONFLICT_CONFLICTED) != 0) {
> > +			ce = ce->next;
> > +			continue;
> > +		}
> > +		if (!istate->resolve_undo) {
> > +			istate->resolve_undo = xcalloc(1, sizeof(struct string_list));
> > +			istate->resolve_undo->strdup_strings = 1;
> > +		}
> > +
> > +		lost = string_list_insert(istate->resolve_undo, ce->name);
> > +		if (!lost->util)
> > +			lost->util = xcalloc(1, sizeof(*ui));
> > +		ui = lost->util;
> > +
> > +		cp = ce->entries;
> > +		for (i = 0; i < 3; i++)
> > +			ui->mode[i] = 0;
> > +		while (cp) {
> > +			ui->mode[conflict_stage(cp) - 1] = cp->entry_mode;
> > +			hashcpy(ui->sha1[conflict_stage(cp) - 1], cp->sha1);
> > +			cp = cp->next;
> > +		}
> > +		ce = ce->next;
> > +	}
> > +}
> > diff --git a/resolve-undo.h b/resolve-undo.h
> > index 8458769..ab660a6 100644
> > --- a/resolve-undo.h
> > +++ b/resolve-undo.h
> > @@ -13,4 +13,6 @@ extern void resolve_undo_clear_index(struct index_state *);
> >  extern int unmerge_index_entry_at(struct index_state *, int);
> >  extern void unmerge_index(struct index_state *, const char **);
> >  
> > +extern void resolve_undo_convert_v5(struct index_state *, struct conflict_entry *);
> > +
> >  #endif

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 2/2] Add index-v5
  2012-08-06 14:36   ` [PATCH 2/2] Add index-v5 Nguyễn Thái Ngọc Duy
@ 2012-08-07 21:52     ` Robin Rosenberg
  2012-08-08 10:54       ` Thomas Gummerer
  0 siblings, 1 reply; 59+ messages in thread
From: Robin Rosenberg @ 2012-08-07 21:52 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy
  Cc: Thomas Gummerer, git, trast, mhagger, gitster

Nguyễn Thái Ngọc Duy skrev 2012-08-06 16.36:

> +++ b/read-cache-v5.c
> @@ -0,0 +1,1170 @@
> +#include "cache.h"
> +#include "read-cache.h"
> +#include "resolve-undo.h"
> +#include "cache-tree.h"
> +
> +struct cache_header_v5 {
> +	unsigned int hdr_ndir;
> +	unsigned int hdr_nfile;
> +	unsigned int hdr_fblockoffset;
> +	unsigned int hdr_nextension;
> +};
> +
> +struct ondisk_cache_entry_v5 {
> +	unsigned short flags;
> +	unsigned short mode;
> +	struct cache_time mtime;
> +	int stat_crc;
> +	unsigned char sha1[20];
> +};

I mentioned this before in another thread, but for JGit I'd like
to see size as a separate attribute. The rest of stat_crc is not
available to Java so when this index gets its way into JGit,
stat_crc will be zero and will never be checked.

-- robin

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 0/16] Introduce index file format version 5
  2012-08-06 17:46   ` Junio C Hamano
  2012-08-07 12:16     ` Nguyen Thai Ngoc Duy
@ 2012-08-07 22:31     ` Thomas Rast
  2012-08-07 23:26       ` Junio C Hamano
  2012-08-08 10:30       ` Nguyen Thai Ngoc Duy
  1 sibling, 2 replies; 59+ messages in thread
From: Thomas Rast @ 2012-08-07 22:31 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Nguyễn Thái Ngọc Duy, Thomas Gummerer, git,
	trast, mhagger, robin.rosenberg

Junio C Hamano <gitster@pobox.com> writes:

>> Then of course you need to split the second patch into several logical
>> patches again. We can drop _v5 suffix in read-cache-v5.c (I haven't
>> done that). When we add partial read/write for v5, we can add more
>> func pointers to index_ops and implement them in v2 (probably as no-op
>> or assertion)
>
> The index_ops abstraction is a right way to go, and I like it, but I
> think the split illustrated in this patch might turn out to be at
> wrong levels (and it is OK, as I understand this is a illustration
> of concept patch).
>
> For example, add_to_index() interface may be a good candidate to
> have in index_ops.  Because your in-core index may not be holding
> everything in a flat array, "find the location in the flat array the
> entry would sit, replace the existing one if there is any, otherwise
> insert" cannot be a generic way to add a new entry.  If you make the
> whole thing an abstract API entry point, a sparse implementation of
> the in-core index could still implement it without bringing the
> untouched and irrelevant parts of the index to core.
[...]
> I wish that the development of this topic was done more in a
> top-down direction, instead of bottom-up, so that it identified the
> necessary access patterns to the in-core index early and come up
> with a good set of abstract API first, and then only after that is
> done, came up with in-core and on-disk format to support the
> necessary operations.

I like the general idea, too, but I think there is a long way ahead, and
we shouldn't hold up v5 on this.

Thomas and me -- it was mostly my bad idea -- spent some time going
through all the loops that iterate over the index.  You can get some
taste of it with 'git grep ce_stage', mostly because many of them either
skip unmerged entries or specifically look for them.  There are subtle
differences between the loops on many points: what do they do when they
hit an unmerged entry?  Or a CE_REMOVED or CE_VALID one?

I gave up after treating half of them and horribly breaking the test
suite.  I suppose eventually we will have to classify these loops by
properties like how they treat unmerged entries, and replace them by
some clever for_each_cache_entry macro.

It would open some interesting possibilities.  For example, for v5 it
would be far better if conflicted and resolve-undo entries were a
property of the normal index entry, instead of something that so happens
to be consecutive entries and in a completely different place,
respectively.

-- 
Thomas Rast
trast@{inf,student}.ethz.ch

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 0/16] Introduce index file format version 5
  2012-08-07 22:31     ` Thomas Rast
@ 2012-08-07 23:26       ` Junio C Hamano
  2012-08-08  9:07         ` Thomas Rast
  2012-08-08 10:30       ` Nguyen Thai Ngoc Duy
  1 sibling, 1 reply; 59+ messages in thread
From: Junio C Hamano @ 2012-08-07 23:26 UTC (permalink / raw)
  To: Thomas Rast
  Cc: Nguyễn Thái Ngọc Duy, Thomas Gummerer, git,
	mhagger, robin.rosenberg

Thomas Rast <trast@student.ethz.ch> writes:

> I like the general idea, too, but I think there is a long way ahead, and
> we shouldn't hold up v5 on this.

We shouldn't rush, only to keep some deadline, and regret it later
that we butchered the index format without thinking things through.
When this was added to the GSoC idea page, I already said upfront
that this was way too big a topic to be a GSoC project, didn't I?

> It would open some interesting possibilities.

It is unclear why the current format is less easier to get the same
kind of enhancement compared to the proposed v5 for the same kind of
"possibilities."

"This codepath currently does things this way because it is limited
by the flat in-core index.  That codepath does a similar thing, and
that other one has the same issue.  They all can benefit if we give
them this API, and the implementation of the API could benefit if
the underlying on-disk format is changed that way.  And the other
codepaths that use the current API won't be broken by the on-disk
format change, as all the accesses are encapsulated with this patch
without losing performance, readability nor modifiability" is very
much acceptable [*1*], but "The new on-disk format is different from
the current one, and as it is different from the current one, we can
easily enhance it even more by hooking anything interesting to it!"
does not sound like a valid argument.  

> For example, for v5 it
> would be far better if conflicted and resolve-undo entries were a
> property of the normal index entry, instead of something that so happens
> to be consecutive entries and in a completely different place,
> respectively.

I am not sure I am convinced.  Conflicts are already expressed by an
attribute on a normal index entry (it is called "stage"), and
because we check for "is the index fully merged" fairly often, it
makes sense to have it in each entry.  Actually having an unmerged
entry is a rare event (happens only during a mergy operation that
gave control back to you), so we do not lose much by expressing them
as consecutive entries.  Resolve-undo is far less often used, and is
not an essential feature, so it makes perfect sense to have it as an
optional index extension to allow versions of Git that are unaware
of it to still use an index file that has it.

I do not find your "For example" argument particularly convincing
rationale to go to the proposed v5, even if I thought resolve-undo
were one of the more important things in the index (which I don't).


[Footnote]

*1* Duy's "'ls-files $path' would benefit from a path-limited index
file reader, and the function to do so would be an obvious new API
that would benefit from tree-shaped on-disk format" suggestion is a
design going in the right direction, as long as it is accompanied
with "for the remaining users that need the whole index as a linear
array, reading such a tree-shaped on-disk format can be supported
without loss of performance with this patch".

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 0/16] Introduce index file format version 5
  2012-08-07 12:16     ` Nguyen Thai Ngoc Duy
@ 2012-08-08  1:38       ` Junio C Hamano
  2012-08-08 13:54         ` Nguyen Thai Ngoc Duy
  0 siblings, 1 reply; 59+ messages in thread
From: Junio C Hamano @ 2012-08-08  1:38 UTC (permalink / raw)
  To: Nguyen Thai Ngoc Duy
  Cc: Thomas Gummerer, git, trast, mhagger, robin.rosenberg

Nguyen Thai Ngoc Duy <pclouds@gmail.com> writes:

> add_to_index and remove_index_entry_at seem good places for the cut.
> But do we need to redefine the location?

That is one of the things we need to think about carefully.  Of
course, if add_to_index() just takes a pathname out of the ce the
caller wants to add, you can define remove_from_index() that takes a
pathname (and possibly a stage), finds the ce with that pathname in
the index and removes it.  But that would unnecessrily penalize the
callers that follow "see if there is such an entry (i.e. "locate"),
optionally inspect the entry and then decide to remove", especially
if the "locate" is expensive.  If a nonsignificant number of callers
follow such a pattern, then having a separate "locate" API that can
return a "location" that is expressed either as the number of
entries to skip from the beginning (in a flat in-core array) or a
pair of in-core "directory" structure and the index in the directory
to let the caller find the entry quickly, and then later pass it to
"remove", would be more appropriate.  add_index_entry_at() may also
not a bad thing to have if many callers turn out to follow a similar
access pattern (i.e. locate, decide to or not to replace when there
already is one, and then add it).

>  - for 3-5 years since v5 is released, we support v2 and v5 in
> parallel. Other code can take advantage of v5, but it must neither
> sacrifice v2 performance, compatibility nor maintainability
>  - after that, we deprecate v2. v2 is automatically converted to v5 in
> memory. v2 perf may suffer but at that point we don't care any more as
> the majority of users should have been migrated to v5 (*)

As long as the performance of Git on a working tree that used to get
certain performance back when it was using v2 does not degrade when
it is converted to v5 or later, I think the above is a good way
forward.

> If the long term plan is actually that, we will need to stick to flat
> array implementation for forseeable future as moving from it most
> likely impacts v2 performance.

I do not see why we need to "stick to"; I do not see why it is
necessarily a bad thing if we end up choosing to "stick to" if the
reason we choose it is because the flat in-core performs better.

If the workload we _care_ about is served better by using an API
that works over an in-core tree-shaped index data structure, I do
not think it is unreasonable to read the v2 on-disk format and
represent it as a tree-shaped index while we read it.  Of course,
there are things that are not as effective when reading from the
flat v2 on-disk format (e.g. path limited reading will have to at
least _scan_ the whole thing, even though it may process only the
entries that match the pathspec) compared to reading from a
tree-shaped on-disk format, but I doubt that the difference between
the cost of reading into a flat array and the cost of reading and
forming whatever non-flat data structure you seem to think is better
is so big that it would negate the benefit of using a better in-core
structure.

> This might not be the best way forward as v2 incompatible features
> (like keeping empty directories in index, what else?) may never come
> until v2 is deprecated.

I do not think "empty directories" matter to begin with, but even if
it did, I do not think v2 is inherently incapable of being enhanced
to record one if you really wanted to.  Either you come up with a
new "mode" bits and add it as a regular cache entry, or record the
fact that there is a directory in a new index extension.

The real issue to solve is to decide what semantics you want
(e.g. What to do when you earlier have added an empty directory,
added a file in it and then removed the file, making it empty again?
What if that happened during a merge?), to verify the semantics you
define are sane, to add "keep_empty_directory()" function to
read-cache.c, and to sprinkle callers to the API function as needed.

These have to be done regardless of the actual on-disk format.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 15/16] update-index.c: add a force-rewrite option
  2012-08-06  1:58   ` Junio C Hamano
@ 2012-08-08  7:31     ` Thomas Gummerer
  0 siblings, 0 replies; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-08  7:31 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, trast, mhagger, pclouds, robin.rosenberg

On 08/05, Junio C Hamano wrote:
> Thomas Gummerer <t.gummerer@gmail.com> writes:
> 
> > Add a force-rewrite option to update-index, which allows the user
> > to rewrite the index, even if there are no changes. This can be used
> > to do performance tests of both the reader and the writer.
> >
> > Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
> > ---
> >  builtin/update-index.c |    5 ++++-
> >  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> I do not think this is wrong per-se, but is a new command that needs
> to be documented?  If it is only for benchmarking and debugging, it
> might be sufficient to make "--index-version <n>" always rewrite the
> index.

The command is only for benchmarking, I don't see another case where
it makes sense for anyone to rewrite the whole index, without changing
anything. I've made --index-version rewrite the index for the re-roll.

> > diff --git a/builtin/update-index.c b/builtin/update-index.c
> > index 4ce341c..7fedc8f 100644
> > --- a/builtin/update-index.c
> > +++ b/builtin/update-index.c
> > @@ -24,6 +24,7 @@ static int allow_remove;
> >  static int allow_replace;
> >  static int info_only;
> >  static int force_remove;
> > +static int force_rewrite;
> >  static int verbose;
> >  static int mark_valid_only;
> >  static int mark_skip_worktree_only;
> > @@ -728,6 +729,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
> >  		OPT_BIT(0, "unmerged", &refresh_args.flags,
> >  			"refresh even if index contains unmerged entries",
> >  			REFRESH_UNMERGED),
> > +		OPT_SET_INT(0, "force-rewrite", &force_rewrite,
> > +			"force a index rewrite even if there is no change", 1),
> >  		{OPTION_CALLBACK, 0, "refresh", &refresh_args, NULL,
> >  			"refresh stat information",
> >  			PARSE_OPT_NOARG | PARSE_OPT_NONEG,
> > @@ -886,7 +889,7 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
> >  		strbuf_release(&buf);
> >  	}
> >  
> > -	if (active_cache_changed) {
> > +	if (active_cache_changed || force_rewrite) {
> >  		if (newfd < 0) {
> >  			if (refresh_args.flags & REFRESH_QUIET)
> >  				exit(128);

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 09/16] Read index-v5
  2012-08-06  5:17   ` Junio C Hamano
@ 2012-08-08  7:41     ` Thomas Gummerer
  2012-08-08 16:49       ` Junio C Hamano
  0 siblings, 1 reply; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-08  7:41 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, trast, mhagger, pclouds, robin.rosenberg

On 08/05, Junio C Hamano wrote:
> Thomas Gummerer <t.gummerer@gmail.com> writes:
> 
> > +static struct directory_entry *read_directories_v5(unsigned int *dir_offset,
> > +				unsigned int *dir_table_offset,
> > +				void *mmap,
> > +				int mmap_size)
> > +{
> > +	int i, ondisk_directory_size;
> > +	uint32_t *filecrc, *beginning, *end;
> > +	struct directory_entry *current = NULL;
> > +	struct ondisk_directory_entry *disk_de;
> > +	struct directory_entry *de;
> > +	unsigned int data_len, len;
> > +	char *name;
> > +
> > +	ondisk_directory_size = sizeof(disk_de->flags)
> > +		+ sizeof(disk_de->foffset)
> > +		+ sizeof(disk_de->cr)
> > +		+ sizeof(disk_de->ncr)
> > +		+ sizeof(disk_de->nsubtrees)
> > +		+ sizeof(disk_de->nfiles)
> > +		+ sizeof(disk_de->nentries)
> > +		+ sizeof(disk_de->sha1);
> > +	name = (char *)mmap + *dir_offset;
> > +	beginning = mmap + *dir_table_offset;
> 
> Notice how you computed name with pointer arithmetic by first
> casting mmap (which is "void *") and when computing beginning, you
> forgot to cast mmap and attempted pointer arithmetic with "void *".
> The latter does not work and breaks compilation.
> 
> The pointer-arith with "void *" is not limited to this function.

Sorry for not noticing this, it always compiled fine for me. Guess
I should use -pedantic more often ;-)

> Please check the a band-aid (I wouldn't call it a fix-up) patch I
> added on top of the series before queuing the topic to 'pu'; it is
> primarily to illustrate the places I noticed that have this issue.
> 
> I do not necessarily suggest that the way the band-aid patch makes
> it compile is the best approach.  It might be cleaner to use a saner
> type like "char *" (or perhaps "const char *") as the type to point
> at a piece of memory you read from the disk.  I haven't formed an
> opinion.
> 
> Thanks.

I've used the type of the respective assignment for now. e.g. i have
struct cache_header *hdr, so I'm using
hdr = (struct cache_header *)mmap + x;

read-cache-v5.c compiles with -pedantic without warnings.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 0/16] Introduce index file format version 5
  2012-08-07 23:26       ` Junio C Hamano
@ 2012-08-08  9:07         ` Thomas Rast
  2012-08-08 22:47           ` Junio C Hamano
  0 siblings, 1 reply; 59+ messages in thread
From: Thomas Rast @ 2012-08-08  9:07 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Nguyễn Thái Ngọc Duy, Thomas Gummerer, git,
	mhagger, robin.rosenberg

Junio C Hamano <gitster@pobox.com> writes:

> Thomas Rast <trast@student.ethz.ch> writes:
>
>> I like the general idea, too, but I think there is a long way ahead, and
>> we shouldn't hold up v5 on this.
>
> We shouldn't rush, only to keep some deadline, and regret it later
> that we butchered the index format without thinking things through.
> When this was added to the GSoC idea page, I already said upfront
> that this was way too big a topic to be a GSoC project, didn't I?

Let me spell out my concern.  There are two v5s here:

* The extent of the GSoC task.

* The eventual implementation of index-v5 that goes into Git mainline.

IMHO this thread is mixing up the two.  There indeed must not be any
rush in the final implementation of index-v5.  However, the GSoC ends in
less than two weeks, and I have to evaluate Thomas on whatever is
finished until then.

AFAIK Thomas is now cleaning up the existing code to be in readable
shape, using your feedback, which is great.  However, the above
suggestion is such a fuzzily-specified task that there is no way to even
find out what needs to be done within the next two weeks.  Perhaps it
makes sense, at this point, to wrap anything that ended up having _v[25]
suffixes in an index_ops like Duy did.  That's a long way from actually
following through on the idea, though.

> [...] "The new on-disk format is different from
> the current one, and as it is different from the current one, we can
> easily enhance it even more by hooking anything interesting to it!"
> does not sound like a valid argument.  
>
>> For example, for v5 it
>> would be far better if conflicted and resolve-undo entries were a
>> property of the normal index entry, instead of something that so happens
>> to be consecutive entries and in a completely different place,
>> respectively.
>
> I am not sure I am convinced.  Conflicts are already expressed by an
> attribute on a normal index entry (it is called "stage"), and
> because we check for "is the index fully merged" fairly often, it
> makes sense to have it in each entry.  Actually having an unmerged
> entry is a rare event (happens only during a mergy operation that
> gave control back to you), so we do not lose much by expressing them
> as consecutive entries.  Resolve-undo is far less often used, and is
> not an essential feature, so it makes perfect sense to have it as an
> optional index extension to allow versions of Git that are unaware
> of it to still use an index file that has it.

I picked this example because in the big picture, the current code goes
to silly contortions to shuffle data around.  Conflicts and resolve-undo
entries are two faces of the same coin, but the code does not express
this at all.  Whenever the user resolves a conflict, it removes the
existing index entries (consecutive in a flat table) and inserts them in
the resolve-undo tree (tree-shaped where every entry has all stages
embedded).  When using 'checkout -m' to recover the conflict, it goes
the other way.

v5 would simplify this: the difference between a conflict and a
resolve-undo entry is only one bit.  But because it needs to maintain v2
compatiblity, it first untangles the mixed conflict/resolve-undo data
and puts them in the right format, then later reassembles them.

So "v5 could do it faster if all the code were written for it" is only
half of it.  v5's data layout would also result in simpler data flow,
but as long as it is not allowed to exploit this, it's actually *more*
layers of complexity.

I think the part you snipped

>> the loops that iterate over the index [...] either
>> skip unmerged entries or specifically look for them.  There are subtle
>> differences between the loops on many points: what do they do when they
>> hit an unmerged entry?  Or a CE_REMOVED or CE_VALID one?

is a symptom of the same general problem: the data structures are sound,
but they are leaking all over the code and now we have lots of
complexity to do even simple operations like "for each unmerged entry".

-- 
Thomas Rast
trast@{inf,student}.ethz.ch

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 0/16] Introduce index file format version 5
  2012-08-07 22:31     ` Thomas Rast
  2012-08-07 23:26       ` Junio C Hamano
@ 2012-08-08 10:30       ` Nguyen Thai Ngoc Duy
  1 sibling, 0 replies; 59+ messages in thread
From: Nguyen Thai Ngoc Duy @ 2012-08-08 10:30 UTC (permalink / raw)
  To: Thomas Rast
  Cc: Junio C Hamano, Thomas Gummerer, git, mhagger, robin.rosenberg

On Wed, Aug 8, 2012 at 5:31 AM, Thomas Rast <trast@student.ethz.ch> wrote:
> Thomas and me -- it was mostly my bad idea -- spent some time going
> through all the loops that iterate over the index.  You can get some
> taste of it with 'git grep ce_stage', mostly because many of them either
> skip unmerged entries or specifically look for them.  There are subtle
> differences between the loops on many points: what do they do when they
> hit an unmerged entry?

Most of them ignore unmerged entries, git-add and git-update-index can
remove unmerged entries, unpack-trees (reset, merge, checkout...) can
generate them. What's the problem with it?

> Or a CE_REMOVED or CE_VALID one?

CE_VALID is assume-unchanged feature. I don't think we have problems with it.

CE_REMOVED is to say "we are going to remove this entry both in index
and worktree, but if we remove it now we would have no way to know
which file in worktree to be removed later on, so we just mark it here
as a ghost entry in index". It's only used by unpack-trees, I think.
From the index pov, CE_REMOVED entries never get written to file. It
may complicate tree building for v5.
-- 
Duy

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 2/2] Add index-v5
  2012-08-07 21:52     ` Robin Rosenberg
@ 2012-08-08 10:54       ` Thomas Gummerer
  0 siblings, 0 replies; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-08 10:54 UTC (permalink / raw)
  To: Robin Rosenberg
  Cc: Nguy� n Thái Ngọc Duy, git, trast, mhagger, gitster



On 08/07, Robin Rosenberg wrote:
> Nguyễn Thái Ngọc Duy skrev 2012-08-06 16.36:
> 
> >+++ b/read-cache-v5.c
> >@@ -0,0 +1,1170 @@
> >+#include "cache.h"
> >+#include "read-cache.h"
> >+#include "resolve-undo.h"
> >+#include "cache-tree.h"
> >+
> >+struct cache_header_v5 {
> >+	unsigned int hdr_ndir;
> >+	unsigned int hdr_nfile;
> >+	unsigned int hdr_fblockoffset;
> >+	unsigned int hdr_nextension;
> >+};
> >+
> >+struct ondisk_cache_entry_v5 {
> >+	unsigned short flags;
> >+	unsigned short mode;
> >+	struct cache_time mtime;
> >+	int stat_crc;
> >+	unsigned char sha1[20];
> >+};
> 
> I mentioned this before in another thread, but for JGit I'd like
> to see size as a separate attribute. The rest of stat_crc is not
> available to Java so when this index gets its way into JGit,
> stat_crc will be zero and will never be checked.
> 

I'm sorry for forgetting to add this, it will be included in the
re-roll.  The stat_crc will be ignored if it is 0 in the ondisk
index.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 0/16] Introduce index file format version 5
  2012-08-08  1:38       ` Junio C Hamano
@ 2012-08-08 13:54         ` Nguyen Thai Ngoc Duy
  2012-08-08 16:31           ` Junio C Hamano
  0 siblings, 1 reply; 59+ messages in thread
From: Nguyen Thai Ngoc Duy @ 2012-08-08 13:54 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Thomas Gummerer, git, trast, mhagger, robin.rosenberg

On Wed, Aug 8, 2012 at 8:38 AM, Junio C Hamano <gitster@pobox.com> wrote:
> If the workload we _care_ about is served better by using an API
> that works over an in-core tree-shaped index data structure, I do
> not think it is unreasonable to read the v2 on-disk format and
> represent it as a tree-shaped index while we read it.  Of course,
> there are things that are not as effective when reading from the
> flat v2 on-disk format (e.g. path limited reading will have to at
> least _scan_ the whole thing, even though it may process only the
> entries that match the pathspec) compared to reading from a
> tree-shaped on-disk format, but I doubt that the difference between
> the cost of reading into a flat array and the cost of reading and
> forming whatever non-flat data structure you seem to think is better
> is so big that it would negate the benefit of using a better in-core
> structure.

OK how about this. The general idea is preserve/extend current flat
index API and add a new (tree-based) one. Index users can use either.
They can even mix them up (which they do because we can't just flip
the API in one day for about 200 source files).

The day that unpack_trees() is converted to tree API, I will declare
v5 victory ;)

= Cleanup =

struct cache_entry becomes partly opaque. ce_ctime..ce_gid are hidden
in -v2.c and -v5.c. We only expose ce_size, ce_flags, ce_namelen, name
and sha1 to index users. Extra v5 fields like ce_stat_crc, next and
dir_next are also hidden. These fields can be put in a real struct in
read-cache.h, which is supposedly included by -v2.c and -v5.c

= Updating =

All index update API (add_index_entry, add_to_index,
remove_index_entry_at, remove_marked_cached_entries) are hooked by v5
when the loaded index is v5. v5 can update internal data when these
are called (e.g. conflict resolution), or just mark them "dirty" to be
worked on later in flush_index().

Anybody who updates a cache_entry is supposed to call
cache_entry_updated() function, which is no-op for v2 but v5 may want
to watch this activity.

Refreshing index is a special operation. Of course it's hooked by v5.
v5 may need its own implementation because it could walk working tree
and index tree at the same time. Of course v5 impl must also update
flat API data structure along the way.

A new function flush_index() is introduced, where v5 can update all
internal data and keep it in sync with index_state. When flat/tree
APIs are mixed, flush_index() must be called when switching from flat
API to tree API.

To help v5 deal with index rewrite in unpack_trees(),
index_bulk_update() may be introduced, which tells v5 "we are going to
do a lot of adding/removing/shuffling, keep your actions to minimum,
you most likely have to rebuild the trees at flush_index() anyway"

New API may be introduced for some big operations if it proves
v5-beneficial. I'm thinking of adding/removing a bunch of files by
pathspec, where v5 can walk working directory at the same time it
walks index directory tables.

= Tree traversal =

I don't see big problems here. We support opendir/readdir-like API for
tree traversing (with pathspec filtering). We also support
lookup_cache_entry to get cache_entry* of a certain path.

When tree traversal gets to a conflict entry, it lets the caller know
there's a conflict entry, it does not traverse through stage 1-3
during traversal. Caller is expected to use conflict lookup API for
that.

We also support reading partial index, filtered by pathspec. On v2, it
reads full index.

= Tree update =

At some point we may want to work on trees exclusively. Any operations
here must keep flat API data structure in sync.

We may want to postpone the sync if it's a lot of work, by doing all
the work in flush_index() before caller switches from tree API to flat
API again.

= Flat API deprecation =

At some point, tree update API will not update flat API any more
unless explicitly asked by caller. I don't expect "cache" in struct
index_state to be removed, unless we do really good merges using tree
API.
-- 
Duy

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 0/16] Introduce index file format version 5
  2012-08-08 13:54         ` Nguyen Thai Ngoc Duy
@ 2012-08-08 16:31           ` Junio C Hamano
  2012-08-09  2:28             ` Nguyen Thai Ngoc Duy
  0 siblings, 1 reply; 59+ messages in thread
From: Junio C Hamano @ 2012-08-08 16:31 UTC (permalink / raw)
  To: Nguyen Thai Ngoc Duy
  Cc: Thomas Gummerer, git, trast, mhagger, robin.rosenberg

Nguyen Thai Ngoc Duy <pclouds@gmail.com> writes:

> OK how about this. The general idea is preserve/extend current flat
> index API and add a new (tree-based) one. Index users can use either.
> They can even mix them up (which they do because we can't just flip
> the API in one day for about 200 source files).
>
> The day that unpack_trees() is converted to tree API, I will declare
> v5 victory ;)

s/API, /API and benchmark says tree-shaped index is an overall win, /;

> = Cleanup =
>
> struct cache_entry becomes partly opaque. ce_ctime..ce_gid are hidden
> in -v2.c and -v5.c. We only expose ce_size, ce_flags, ce_namelen, name
> and sha1 to index users. Extra v5 fields like ce_stat_crc, next and
> dir_next are also hidden. These fields can be put in a real struct in
> read-cache.h, which is supposedly included by -v2.c and -v5.c

I do not particularly see a reason to keep different in-core
cache_entry representations even in an early round of the API
updates.  If v2 needs ctime and gid and v5 needs crc, keep both
fields for simplicity.  When coming from the filesystem, ctime, gid
and friends are immediately available and crc needs to be computed
only immediately before it is written out or it is compared with an
existing entry.

I also do not see a reason to keep two representations of in-core
index_state representations for that matter.

The current code that access nth entry from the index->cache[nth]
would need to be updated to use an accessor function, whether the
"nth" comes from index_name_pos() or from the for-loop that iterates
over the entire index.  For the latter, you would need to give the
users a function that returns a cursor into the in-core index to
allow iterating over it.

When you use an in-core representation that is not a flat array, the
type of "nth", which is essentially a cursor, may have to change to
something that is richer than a simple integer, in order to give the
implementation of the in-core index a more efficient way to access
the entry than traversing the leaves of the tree depth first, and
you would need to update index_name_pos() to return such a "cursor".
That design and development cost is part of updating the in-core
data structure. In the end result, the runtime cost to manipulate an
index entry that the cursor refers to should be minimum, as that
would be the cost paid by all the users of the API anyway, even if
we _were_ starting from an ideal world where there weren't any flat
in-core index in the first place.

Because the v2 on-disk format forces us to scan the whole thing at
least once, with a properly designed in-core representation, the
overall system would not suffer performance penalty when reading
from v2, as both the current code and the updated code have to read
everything, and accesses based on the cursor given by either
index_name_pos() or the index iterator has to be fast anyway (if the
latter does not hold true, your updated in-core representation that
is not a flat array needs to be rethought).

On top of such a solid foundation, we can map the updated in-core
representation to an on-disk representation with confidence, as any
performance improvement or degradation from that point on must be
solely attributable to the on-disk format difference.

Without such a foundation, it is hard to justify a different on-disk
format without handwaving, no?

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 09/16] Read index-v5
  2012-08-08  7:41     ` Thomas Gummerer
@ 2012-08-08 16:49       ` Junio C Hamano
  2012-08-08 20:44         ` Thomas Gummerer
  0 siblings, 1 reply; 59+ messages in thread
From: Junio C Hamano @ 2012-08-08 16:49 UTC (permalink / raw)
  To: Thomas Gummerer; +Cc: git, trast, mhagger, pclouds, robin.rosenberg

Thomas Gummerer <t.gummerer@gmail.com> writes:

>> > +	name = (char *)mmap + *dir_offset;
>> > +	beginning = mmap + *dir_table_offset;
>> 
>> Notice how you computed name with pointer arithmetic by first
>> casting mmap (which is "void *") and when computing beginning, you
>> forgot to cast mmap and attempted pointer arithmetic with "void *".
>> The latter does not work and breaks compilation.
>> 
>> The pointer-arith with "void *" is not limited to this function.
> ...
> I've used the type of the respective assignment for now. e.g. i have
> struct cache_header *hdr, so I'm using
> hdr = (struct cache_header *)mmap + x;

You need to be careful when rewriting the above to choose the right
value for 'x' if you go that route (which I wouldn't recommend).

With

    hdr = ptr_add(mmap, x);

you are making "hdr" point at x BYTES beyond mmap, but

    hdr = (struct cache_header *)mmap + x;

means something entirely different, no?  "hdr" points at x entries
of "struct cache_header" beyond mmap (in other words, if mmap[] were
defined as "struct cache_header mmap[]", the above is saying the
same as "hdr = &mmap[x]").

I think the way you casted to compute the value for the "name"
pointer is the (second) right thing to do.  The cast (char *)
applied to "mmap" is about "mmap is a typeless blob of memory I want
to count bytes in.  Give me *dir_offset bytes into that blob".  It
is not tied to the type of LHS (i.e. "name") at all.  The result
then needs to be casted to the type of LHS (i.e. "name"), and in
this case the types happen to be the same, so you do not have to
cast the result of the addition but that is mere luck.

The next line is not so lucky and you would need to say something
like:

    beginning = (uint32_t *)((char *)mmap + *dir_table_offset);

Again, inner cast is about "mmap is a blob counted in bytes", the
outer cast is about type mismatch between a byte-address and LHS of
the assignment.

If mmap variable in this function were not "void *" but something
more sane like "const char *", you wouldn't have to have the inner
cast to begin with, and that is why I said the way you did "name" is
the second right thing.  Then you can write them like

    name = mmap + *dir_offset;
    beginning = (uint32_t *)(mmap + *dir_offset);

After thinking about this, the ptr_add() macro might be the best
solution, even though I originally called it as a band-aid.  We know
mmap is a blob of memory, byte-offset of each component of which we
know about, so we can say

    name = ptr_add(mmap, *dir_offset);
    beginning = ptr_add(mmap, *dir_offset);

Hmmm..

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 06/16] t3700: sleep for 1 second, to avoid interfering with the racy code
  2012-08-07 16:59     ` Thomas Gummerer
@ 2012-08-08 20:16       ` Junio C Hamano
  2012-08-08 20:57         ` Junio C Hamano
  0 siblings, 1 reply; 59+ messages in thread
From: Junio C Hamano @ 2012-08-08 20:16 UTC (permalink / raw)
  To: Thomas Gummerer; +Cc: git, trast, mhagger, pcouds, robin.rosenberg

Thomas Gummerer <t.gummerer@gmail.com> writes:

> On 08/05, Junio C Hamano wrote:
>> Thomas Gummerer <t.gummerer@gmail.com> writes:
>> 
>> > The new git racy code uses the mtime of cache-entries to smudge
>> > a racy clean entry, and loads the work, of checking the file-system
>> 
>> -ECANTPARSE.
>
> The git racy code for index-v5 uses the mtime of the cache-entries as
> smudge markers. The work of checking the file-system is loaded of to
> the reader.

OK, now I can parse, perhaps with either s/is loaded of/&f/ or
s/is loaded of/is offloaded/.

Thanks for clarifying the grammar.

But doesn't the current code make it the responsibilty of the reader
to check the contents with ce_modified_check_fs() already?  You may
have switched st_size to st_mtime as the field to mark a racily
clean entry, but it is unclear how that change affects anything.

>> > if the entry has really changed, off to the reader. This interferes
>> > with this test, because the entry is racily smudged and thus has
>> > mtime 0. We wait 1 second to avoid smudging the entry and getting
>> > correct test results.
>> 
>> Mild NAK, especially it is totally unclear why you even need to muck
>> with racy-git check in the current format of the index in the first
>> place, and even if it were necessary, it is unclear why this cannot
>> be done with test-chmtime.
>
> The racy-git code needs to be changed, to avoid problems when implementing
> the partial writing for index-v5. Otherwise it could cause problems, when
> we have entries that should be smudged, but are not due to the different
> racy algorithms.

Hrmph.  But if racy detection and checking is now a responsibility
of the later reader, the overall end result should be the same, no?
Perhaps the existing test was checking a wrong thing?

We should not care if the index still has a racily clean entries, or
how that fact is marked in the index entry.  The primary thing we
care about is that we do not mistake an actual change as no change
due to raciness.

So whether done with "sleep" or "test-chmtime", avoiding a racily
clean situation sounds like sweeping a bug in the v5 code in racy
situation under the rug to me (unless I am misunderstanding what
you are doing with this change and in your explanation, or the test
was checking a wrong thing, that is).

Even more confused....

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 09/16] Read index-v5
  2012-08-08 16:49       ` Junio C Hamano
@ 2012-08-08 20:44         ` Thomas Gummerer
  2012-08-08 21:50           ` Junio C Hamano
  0 siblings, 1 reply; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-08 20:44 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, trast, mhagger, pclouds, robin.rosenberg



On 08/08, Junio C Hamano wrote:
> Thomas Gummerer <t.gummerer@gmail.com> writes:
> 
> >> > +	name = (char *)mmap + *dir_offset;
> >> > +	beginning = mmap + *dir_table_offset;
> >> 
> >> Notice how you computed name with pointer arithmetic by first
> >> casting mmap (which is "void *") and when computing beginning, you
> >> forgot to cast mmap and attempted pointer arithmetic with "void *".
> >> The latter does not work and breaks compilation.
> >> 
> >> The pointer-arith with "void *" is not limited to this function.
> > ...
> > I've used the type of the respective assignment for now. e.g. i have
> > struct cache_header *hdr, so I'm using
> > hdr = (struct cache_header *)mmap + x;
> 
> You need to be careful when rewriting the above to choose the right
> value for 'x' if you go that route (which I wouldn't recommend).
> 
> With
> 
>     hdr = ptr_add(mmap, x);
> 
> you are making "hdr" point at x BYTES beyond mmap, but
> 
>     hdr = (struct cache_header *)mmap + x;
> 
> means something entirely different, no?  "hdr" points at x entries
> of "struct cache_header" beyond mmap (in other words, if mmap[] were
> defined as "struct cache_header mmap[]", the above is saying the
> same as "hdr = &mmap[x]").
> 
> I think the way you casted to compute the value for the "name"
> pointer is the (second) right thing to do.  The cast (char *)
> applied to "mmap" is about "mmap is a typeless blob of memory I want
> to count bytes in.  Give me *dir_offset bytes into that blob".  It
> is not tied to the type of LHS (i.e. "name") at all.  The result
> then needs to be casted to the type of LHS (i.e. "name"), and in
> this case the types happen to be the same, so you do not have to
> cast the result of the addition but that is mere luck.
> 
> The next line is not so lucky and you would need to say something
> like:
> 
>     beginning = (uint32_t *)((char *)mmap + *dir_table_offset);
> 
> Again, inner cast is about "mmap is a blob counted in bytes", the
> outer cast is about type mismatch between a byte-address and LHS of
> the assignment.

This is what I tried in v3 of the series, but it didn't seem quiet
right.

> If mmap variable in this function were not "void *" but something
> more sane like "const char *", you wouldn't have to have the inner
> cast to begin with, and that is why I said the way you did "name" is
> the second right thing.  Then you can write them like
> 
>     name = mmap + *dir_offset;
>     beginning = (uint32_t *)(mmap + *dir_offset);
> 
> After thinking about this, the ptr_add() macro might be the best
> solution, even though I originally called it as a band-aid.  We know
> mmap is a blob of memory, byte-offset of each component of which we
> know about, so we can say
> 
>     name = ptr_add(mmap, *dir_offset);
>     beginning = ptr_add(mmap, *dir_offset);
> 
> Hmmm..

I start to think so too. Casting the mmap variable to "const char *"
in the method call doesn't feel right to me, even though it would work.
Unless there are any objections I'll use ptr_add in the next version.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 06/16] t3700: sleep for 1 second, to avoid interfering with the racy code
  2012-08-08 20:16       ` Junio C Hamano
@ 2012-08-08 20:57         ` Junio C Hamano
  2012-08-09 13:19           ` Thomas Gummerer
  0 siblings, 1 reply; 59+ messages in thread
From: Junio C Hamano @ 2012-08-08 20:57 UTC (permalink / raw)
  To: Thomas Gummerer; +Cc: git, trast, mhagger, pcouds, robin.rosenberg

Junio C Hamano <gitster@pobox.com> writes:

> So whether done with "sleep" or "test-chmtime", avoiding a racily
> clean situation sounds like sweeping a bug in the v5 code in racy
> situation under the rug to me (unless I am misunderstanding what
> you are doing with this change and in your explanation, or the test
> was checking a wrong thing, that is).
>
> Even more confused....

OK, after staring this test for a long time, and going back to
3d1f148 (refresh_index: do not show unmerged path that is outside
pathspec, 2012-02-17), I give up.

Let me ask the same question in a more direct way.  Which part of
this test break with your series?

        test_expect_success 'git add --refresh with pathspec' '
                git reset --hard &&
                echo >foo && echo >bar && echo >baz &&
                git add foo bar baz && H=$(git rev-parse :foo) && git rm -f foo &&
                echo "100644 $H 3	foo" | git update-index --index-info &&
	# "sleep 1 &&" in the update here ...
                test-chmtime -60 bar baz &&
                >expect &&
                git add --refresh bar >actual &&
                test_cmp expect actual &&

                git diff-files --name-only >actual &&
                ! grep bar actual&&
                grep baz actual
        '

We prepare an index with bunch of paths, we make "foo" unmerged, we
smudge bar and baz stat-dirty, so that "diff-files" would report
them, even though their contents match what is recorded in the
index.

Then we say "git add --refresh bar".  As far as I know, the output
from "git add --refresh <pathspec>" is limited to "foo: needs merge"
if and only if "foo" is covered by <pathspec> and "foo" is unmerged.

	Side note: If "--verbose" is given to the same command, we
	also give "Unstaged changes after refreshing the index:"
	followed by "M foo" or "U foo" if "foo" does not match the
	index but not unmerged, or if "foo" is unmerged, again if
	and only if "foo" is covered by <pathspec>.  But that is not
	how we invoke "git add --refresh" in this test.

So if you are getting a test failure from the test_cmp, wouldn't it
mean that your series broke what 3d1f148 did (namely, make sure we
report only on paths that are covered by <pathspec>, in this case
"bar"), as the contents of "bar" in the working tree matches what is
recorded in the index?

If the failure you are seeing is that "bar" appears in the output of
"git diff-files --name-only", it means that "diff-files" noticed
that "bar" is stat-dirty after "git add --refresh bar".  Wouldn't it
mean that the series broke "git add --refresh bar" in such a way
that it does not to refresh what it was told to refresh?

Another test that could fail after the point you added "sleep 1" is
that the output from "git diff-files --name-only" fails to list
"baz" in its output, but with "test-chmtime -60 bar baz", we made
sure that "bar" and "baz" are stat-dirty, and we only refreshed
"bar" and not "baz".  If that is the case, then would it mean that
the series broke "git add --refresh bar" in such a way that it
refreshes something other than what it was told to refresh?

In any case, having to change this test in any way smells like there
is some breakage in the series; it is not immediately obvious to me
that the current test is checking anything wrong as I suspected in
the earlier message.

So,... I dunno.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 09/16] Read index-v5
  2012-08-08 20:44         ` Thomas Gummerer
@ 2012-08-08 21:50           ` Junio C Hamano
  0 siblings, 0 replies; 59+ messages in thread
From: Junio C Hamano @ 2012-08-08 21:50 UTC (permalink / raw)
  To: Thomas Gummerer; +Cc: git, trast, mhagger, pclouds, robin.rosenberg

Thomas Gummerer <t.gummerer@gmail.com> writes:

> On 08/08, Junio C Hamano wrote:
>> Thomas Gummerer <t.gummerer@gmail.com> writes:
>> ... 
>> After thinking about this, the ptr_add() macro might be the best
>> solution, even though I originally called it as a band-aid.  We know
>> mmap is a blob of memory, byte-offset of each component of which we
>> know about, so we can say
>> 
>>     name = ptr_add(mmap, *dir_offset);
>>     beginning = ptr_add(mmap, *dir_offset);
>> 
>> Hmmm..
>
> I start to think so too. Casting the mmap variable to "const char *"
> in the method call doesn't feel right to me, even though it would work.
> Unless there are any objections I'll use ptr_add in the next version.

Thanks for sanity checking my thinking.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 0/16] Introduce index file format version 5
  2012-08-08  9:07         ` Thomas Rast
@ 2012-08-08 22:47           ` Junio C Hamano
  0 siblings, 0 replies; 59+ messages in thread
From: Junio C Hamano @ 2012-08-08 22:47 UTC (permalink / raw)
  To: Thomas Rast
  Cc: Nguyễn Thái Ngọc Duy, Thomas Gummerer, git,
	mhagger, robin.rosenberg

Thomas Rast <trast@inf.ethz.ch> writes:

> Junio C Hamano <gitster@pobox.com> writes:
>
>> Thomas Rast <trast@student.ethz.ch> writes:
>>
>>> I like the general idea, too, but I think there is a long way ahead, and
>>> we shouldn't hold up v5 on this.
>>
>> We shouldn't rush, only to keep some deadline, and regret it later
>> that we butchered the index format without thinking things through.
>> When this was added to the GSoC idea page, I already said upfront
>> that this was way too big a topic to be a GSoC project, didn't I?
>
> Let me spell out my concern.  There are two v5s here:
>
> * The extent of the GSoC task.
>
> * The eventual implementation of index-v5 that goes into Git mainline.
>
> IMHO this thread is mixing up the two.  There indeed must not be any
> rush in the final implementation of index-v5.  However, the GSoC ends in
> less than two weeks, and I have to evaluate Thomas on whatever is
> finished until then.

This is the primary reason why I have recused myself from the Mentor
pool.  My involvement in this thread is mostly about the latter.  It
is not like "I do not really care about GSoC", but the maintainer
works for what is best for the project, not for GSoC schedule.

> AFAIK Thomas is now cleaning up the existing code to be in readable
> shape, using your feedback, which is great.  However, the above
> suggestion is such a fuzzily-specified task that there is no way to even
> find out what needs to be done within the next two weeks.

Yes, it is the mentor's job to (1) keep an eye on the progress of
the student, (2) avoid giving a task that is too big to chew within
the given timeframe, and (3) help the student learn the skill to
break down large tasks to manageable pieces.

> Perhaps it
> makes sense, at this point, to wrap anything that ended up having _v[25]
> suffixes in an index_ops like Duy did.

Yes, I think that suggestion was a welcome input for the mentor and
the student (item (3) above).

> That's a long way from actually
> following through on the idea, though.

I think that is perfectly fine, both from the point of view of the
project maintainer (who officially does not give a whit about GSoC
schedule) and from the point of view of somebody who cares about the
health of the development community (and as one part of it, cares
about the GSoC student project).

If Git GSoC admins initially picked a project that is too large by
mistake, finishing a subpart of it that is of reasonable size and
polishing the result into a nice shape would be the best the student
can do, and the grading should be done on the quality of that
subtask alone.  It may not directly help the project without the
remainder, but that is not the student's fault.  But as I am not
part of the Mentor pool, what I wrote in this paragraph is just my
opinion.

> I think the part you snipped
>
>>> the loops that iterate over the index [...] either
>>> skip unmerged entries or specifically look for them.  There are subtle
>>> differences between the loops on many points: what do they do when they
>>> hit an unmerged entry?  Or a CE_REMOVED or CE_VALID one?
>
> is a symptom of the same general problem: the data structures are sound,
> but they are leaking all over the code and now we have lots of
> complexity to do even simple operations like "for each unmerged entry".

I do not think I was arguing against an updated cleaner API, so we
are in agreement.  In fact, I was saying that the calling code
should be ported to such a cleaner API and in-core data structure
first, and only then an optimal on-disk representation of the
in-core data structure can be designed.

The mistaken title of this GSoC topic was one of the root cause of
the issues, I think, you are seeing.  It said "faster file format",
but file format is a result of a design of the code that uses the
data, not the other way around.

That, and also the project scope is too large for a summer student
project as I said in the very beginning.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 0/16] Introduce index file format version 5
  2012-08-08 16:31           ` Junio C Hamano
@ 2012-08-09  2:28             ` Nguyen Thai Ngoc Duy
  0 siblings, 0 replies; 59+ messages in thread
From: Nguyen Thai Ngoc Duy @ 2012-08-09  2:28 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Thomas Gummerer, git, trast, mhagger, robin.rosenberg

On Wed, Aug 8, 2012 at 11:31 PM, Junio C Hamano <gitster@pobox.com> wrote:
> The current code that access nth entry from the index->cache[nth]
> would need to be updated to use an accessor function, whether the
> "nth" comes from index_name_pos() or from the for-loop that iterates
> over the entire index.  For the latter, you would need to give the
> users a function that returns a cursor into the in-core index to
> allow iterating over it.
>
> When you use an in-core representation that is not a flat array, the
> type of "nth", which is essentially a cursor, may have to change to
> something that is richer than a simple integer, in order to give the
> implementation of the in-core index a more efficient way to access
> the entry than traversing the leaves of the tree depth first, and
> you would need to update index_name_pos() to return such a "cursor".
> That design and development cost is part of updating the in-core
> data structure. In the end result, the runtime cost to manipulate an
> index entry that the cursor refers to should be minimum, as that
> would be the cost paid by all the users of the API anyway, even if
> we _were_ starting from an ideal world where there weren't any flat
> in-core index in the first place.

Interesting. So you hide the entire tree walk behind the cursor
concept. And we can make pathspec filter as part of cursor
initialization. Index iteration code this way looks really neat
(compared to how we do traverse sha-1 trees nowadays). The hard part
is updating the index while iterating (or avodiing running into such a
situation). Maybe C++ STL has done it already with std::map::iterator.
I fear that by hiding the trees, we might miss some optimization
opportunities. But I haven't figured it all out yet so I may be wrong.
-- 
Duy

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 06/16] t3700: sleep for 1 second, to avoid interfering with the racy code
  2012-08-08 20:57         ` Junio C Hamano
@ 2012-08-09 13:19           ` Thomas Gummerer
  2012-08-09 16:51             ` Junio C Hamano
  0 siblings, 1 reply; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-09 13:19 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, trast, mhagger, pcouds, robin.rosenberg

On 08/08, Junio C Hamano wrote:
> Junio C Hamano <gitster@pobox.com> writes:
> 
> > So whether done with "sleep" or "test-chmtime", avoiding a racily
> > clean situation sounds like sweeping a bug in the v5 code in racy
> > situation under the rug to me (unless I am misunderstanding what
> > you are doing with this change and in your explanation, or the test
> > was checking a wrong thing, that is).
> >
> > Even more confused....
> 
> OK, after staring this test for a long time, and going back to
> 3d1f148 (refresh_index: do not show unmerged path that is outside
> pathspec, 2012-02-17), I give up.
> 
> Let me ask the same question in a more direct way.  Which part of
> this test break with your series?
> 
>         test_expect_success 'git add --refresh with pathspec' '
>                 git reset --hard &&
>                 echo >foo && echo >bar && echo >baz &&
>                 git add foo bar baz && H=$(git rev-parse :foo) && git rm -f foo &&
>                 echo "100644 $H 3	foo" | git update-index --index-info &&
> 	# "sleep 1 &&" in the update here ...
>                 test-chmtime -60 bar baz &&
>                 >expect &&
>                 git add --refresh bar >actual &&
>                 test_cmp expect actual &&
> 
>                 git diff-files --name-only >actual &&
>                 ! grep bar actual&&
>                 grep baz actual
>         '
> 
> We prepare an index with bunch of paths, we make "foo" unmerged, we
> smudge bar and baz stat-dirty, so that "diff-files" would report
> them, even though their contents match what is recorded in the
> index.

After getting confused a bit myself, I now think here is the problem.
The v5 code smudges baz when doing git add --refresh bar.  Therefore
baz isn't considered stat-dirty by the code, but a racily smudged entry
and therefore its content gets checked, thus not showing up in
git diff-files.  The mtime doesn't get checked anymore as it is used
as smudge marker and thus 0.  Adding sleep just avoids smudging the
entry.

The alternative would be to use the size or the crc as smudge marker
but I don't think they are good canidates, as they can still be used
by the reader to avoid checking the filesystem.

Another alternative would be to introduce a CE_SMUDGED flag as it was
suggested by Thomas on irc IIRC, but we chose to use the mtime as
smudge marker instead.

> Then we say "git add --refresh bar".  As far as I know, the output
> from "git add --refresh <pathspec>" is limited to "foo: needs merge"
> if and only if "foo" is covered by <pathspec> and "foo" is unmerged.
> 
> 	Side note: If "--verbose" is given to the same command, we
> 	also give "Unstaged changes after refreshing the index:"
> 	followed by "M foo" or "U foo" if "foo" does not match the
> 	index but not unmerged, or if "foo" is unmerged, again if
> 	and only if "foo" is covered by <pathspec>.  But that is not
> 	how we invoke "git add --refresh" in this test.
> 
> So if you are getting a test failure from the test_cmp, wouldn't it
> mean that your series broke what 3d1f148 did (namely, make sure we
> report only on paths that are covered by <pathspec>, in this case
> "bar"), as the contents of "bar" in the working tree matches what is
> recorded in the index?
> 
> If the failure you are seeing is that "bar" appears in the output of
> "git diff-files --name-only", it means that "diff-files" noticed
> that "bar" is stat-dirty after "git add --refresh bar".  Wouldn't it
> mean that the series broke "git add --refresh bar" in such a way
> that it does not to refresh what it was told to refresh?
> 
> Another test that could fail after the point you added "sleep 1" is
> that the output from "git diff-files --name-only" fails to list
> "baz" in its output, but with "test-chmtime -60 bar baz", we made
> sure that "bar" and "baz" are stat-dirty, and we only refreshed
> "bar" and not "baz".  If that is the case, then would it mean that
> the series broke "git add --refresh bar" in such a way that it
> refreshes something other than what it was told to refresh?
>
> In any case, having to change this test in any way smells like there
> is some breakage in the series; it is not immediately obvious to me
> that the current test is checking anything wrong as I suspected in
> the earlier message.
> 
> So,... I dunno.
> 

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 06/16] t3700: sleep for 1 second, to avoid interfering with the racy code
  2012-08-09 13:19           ` Thomas Gummerer
@ 2012-08-09 16:51             ` Junio C Hamano
  2012-08-09 22:51               ` Thomas Gummerer
  0 siblings, 1 reply; 59+ messages in thread
From: Junio C Hamano @ 2012-08-09 16:51 UTC (permalink / raw)
  To: Thomas Gummerer; +Cc: git, trast, mhagger, pcouds, robin.rosenberg

Thomas Gummerer <t.gummerer@gmail.com> writes:

> On 08/08, Junio C Hamano wrote:
>> ...
>> Let me ask the same question in a more direct way.  Which part of
>> this test break with your series?
>> 
>>         test_expect_success 'git add --refresh with pathspec' '
>>                 git reset --hard &&
>>                 echo >foo && echo >bar && echo >baz &&
>>                 git add foo bar baz && H=$(git rev-parse :foo) && git rm -f foo &&
>>                 echo "100644 $H 3	foo" | git update-index --index-info &&
>> 	# "sleep 1 &&" in the update here ...
>>                 test-chmtime -60 bar baz &&
>>                 >expect &&
>>                 git add --refresh bar >actual &&
>>                 test_cmp expect actual &&
>> 
>>                 git diff-files --name-only >actual &&
>>                 ! grep bar actual&&
>>                 grep baz actual
>>         '
>> 
>> We prepare an index with bunch of paths, we make "foo" unmerged, we
>> smudge bar and baz stat-dirty, so that "diff-files" would report
>> them, even though their contents match what is recorded in the
>> index.
>
> After getting confused a bit myself, I now think here is the problem.
> The v5 code smudges baz when doing git add --refresh bar.  Therefore
> baz isn't considered stat-dirty by the code, but a racily smudged entry
> and therefore its content gets checked, thus not showing up in
> git diff-files.

So in short, the breakage is the last one among the three choices I
gave you in my message you are responding to.  The user asked to
refresh "bar" so that later diff-files won't report a false change
on it, but "baz" effectively ends up getting refreshed at the same
time and a false change is not reported.

That "breakage" is, from the correctness point of view, not a
breakage.  As the primary purpose of "refreshing" is to support
commands that want to rely on a quick ce_modified() call to tell
files that are modified in the working tree since it was last added
to the index---you refresh once, and then you call such commands
many times without having to worry about having to compare the
contents between the indexed objects and the working tree files.

But from the performance point of view, which is the whole point of
"refresh", the behaviour of the new code is dubious.  If the user is
working in a large working tree (which automatically means large
index, the primary reason we are doing this v5 experiment), the user
often is working in a deep and narrow subdirectory of it, and a path
limited refresh (the test names a specific file "bar", but imagine
it were "." to limit it to the directory the user is working in) may
be a cheap way not to bother even checking outside the area the user
currently is working in.  Also, smudging more entries than necessary
to be checked by ce_modified_check_fs() later at runtime may mean
that it defeats the "refresh once and then compare cheaply many
times" pattern that is employed by existing scripts.

Is the root cause really where the "racily-clean so smudge to tell
later runtime to check contents" bit goes?  I am hoping that the
issue is not coming from the difference between the current code and
your code when they decide to "smudge", what entries they decide to
"smudge" and based on what condition.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH/RFC v2 06/16] t3700: sleep for 1 second, to avoid interfering with the racy code
  2012-08-09 16:51             ` Junio C Hamano
@ 2012-08-09 22:51               ` Thomas Gummerer
  0 siblings, 0 replies; 59+ messages in thread
From: Thomas Gummerer @ 2012-08-09 22:51 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, trast, mhagger, pcouds, robin.rosenberg

On 08/09, Junio C Hamano wrote:
> Thomas Gummerer <t.gummerer@gmail.com> writes:
> 
> > On 08/08, Junio C Hamano wrote:
> >> ...
> >> Let me ask the same question in a more direct way.  Which part of
> >> this test break with your series?
> >> 
> >>         test_expect_success 'git add --refresh with pathspec' '
> >>                 git reset --hard &&
> >>                 echo >foo && echo >bar && echo >baz &&
> >>                 git add foo bar baz && H=$(git rev-parse :foo) && git rm -f foo &&
> >>                 echo "100644 $H 3	foo" | git update-index --index-info &&
> >> 	# "sleep 1 &&" in the update here ...
> >>                 test-chmtime -60 bar baz &&
> >>                 >expect &&
> >>                 git add --refresh bar >actual &&
> >>                 test_cmp expect actual &&
> >> 
> >>                 git diff-files --name-only >actual &&
> >>                 ! grep bar actual&&
> >>                 grep baz actual
> >>         '
> >> 
> >> We prepare an index with bunch of paths, we make "foo" unmerged, we
> >> smudge bar and baz stat-dirty, so that "diff-files" would report
> >> them, even though their contents match what is recorded in the
> >> index.
> >
> > After getting confused a bit myself, I now think here is the problem.
> > The v5 code smudges baz when doing git add --refresh bar.  Therefore
> > baz isn't considered stat-dirty by the code, but a racily smudged entry
> > and therefore its content gets checked, thus not showing up in
> > git diff-files.
> 
> So in short, the breakage is the last one among the three choices I
> gave you in my message you are responding to.  The user asked to
> refresh "bar" so that later diff-files won't report a false change
> on it, but "baz" effectively ends up getting refreshed at the same
> time and a false change is not reported.

Exactly.

> That "breakage" is, from the correctness point of view, not a
> breakage.  As the primary purpose of "refreshing" is to support
> commands that want to rely on a quick ce_modified() call to tell
> files that are modified in the working tree since it was last added
> to the index---you refresh once, and then you call such commands
> many times without having to worry about having to compare the
> contents between the indexed objects and the working tree files.
> 
> But from the performance point of view, which is the whole point of
> "refresh", the behaviour of the new code is dubious.  If the user is
> working in a large working tree (which automatically means large
> index, the primary reason we are doing this v5 experiment), the user
> often is working in a deep and narrow subdirectory of it, and a path
> limited refresh (the test names a specific file "bar", but imagine
> it were "." to limit it to the directory the user is working in) may
> be a cheap way not to bother even checking outside the area the user
> currently is working in.

That's true, but once we have the partial reader/writer, we do not
bother checking outside the area the user is currently working in
anyway.

Also and probably more importantly, this will only affect a *very*
small number of entries, because timestamps outside of the directory
in which the user is working in are rarely updated recently and
thus racy.

> Also, smudging more entries than necessary
> to be checked by ce_modified_check_fs() later at runtime may mean
> that it defeats the "refresh once and then compare cheaply many
> times" pattern that is employed by existing scripts.

The new racy code also calls ce_modified_check_fs() only if the size
and the stat_crc are not changed.  It's true that ce_modified_check_fs()
can be called multiple times, when match_stat_crc() is called, but that
could be solved by adding an additional flag CE_IS_MODIFIED, which
indicates that ce_modified_check_fs() was already run.

> Is the root cause really where the "racily-clean so smudge to tell
> later runtime to check contents" bit goes?  I am hoping that the
> issue is not coming from the difference between the current code and
> your code when they decide to "smudge", what entries they decide to
> "smudge" and based on what condition.

I just gave it a try using a CE_SMUDGED flag, instead of the mtime
as smudge marker, which which this test works without any problems.
It doesn't work the other way round, the test as the test doesn't
break when using mtime as smudge marker in v2, because we do the
ce_modified_check_fs() test earlier.

^ permalink raw reply	[flat|nested] 59+ messages in thread

end of thread, other threads:[~2012-08-09 22:52 UTC | newest]

Thread overview: 59+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-08-05 21:48 [PATCH/RFC v2 0/16] Introduce index file format version 5 Thomas Gummerer
2012-08-05 21:48 ` [PATCH/RFC v2 01/16] Modify cache_header to prepare for other index formats Thomas Gummerer
2012-08-06  1:17   ` Junio C Hamano
2012-08-07 12:41     ` Thomas Gummerer
2012-08-07 15:45       ` Junio C Hamano
2012-08-05 21:48 ` [PATCH/RFC v2 02/16] Modify read functions " Thomas Gummerer
2012-08-05 21:49 ` [PATCH/RFC v2 03/16] Modify match_stat_basic " Thomas Gummerer
2012-08-05 21:49 ` [PATCH/RFC v2 04/16] Modify write functions " Thomas Gummerer
2012-08-06  1:34   ` Junio C Hamano
2012-08-07 12:50     ` Thomas Gummerer
2012-08-05 21:49 ` [PATCH/RFC v2 05/16] t2104: Don't fail for index versions other than [23] Thomas Gummerer
2012-08-06  1:36   ` Junio C Hamano
2012-08-05 21:49 ` [PATCH/RFC v2 06/16] t3700: sleep for 1 second, to avoid interfering with the racy code Thomas Gummerer
2012-08-06  1:43   ` Junio C Hamano
2012-08-07 16:59     ` Thomas Gummerer
2012-08-08 20:16       ` Junio C Hamano
2012-08-08 20:57         ` Junio C Hamano
2012-08-09 13:19           ` Thomas Gummerer
2012-08-09 16:51             ` Junio C Hamano
2012-08-09 22:51               ` Thomas Gummerer
2012-08-05 21:49 ` [PATCH/RFC v2 07/16] Add documentation of the index-v5 file format Thomas Gummerer
2012-08-05 21:49 ` [PATCH/RFC v2 08/16] Make in-memory format aware of stat_crc Thomas Gummerer
2012-08-06  1:46   ` Junio C Hamano
2012-08-07 19:02     ` Thomas Gummerer
2012-08-05 21:49 ` [PATCH/RFC v2 09/16] Read index-v5 Thomas Gummerer
2012-08-06  5:17   ` Junio C Hamano
2012-08-08  7:41     ` Thomas Gummerer
2012-08-08 16:49       ` Junio C Hamano
2012-08-08 20:44         ` Thomas Gummerer
2012-08-08 21:50           ` Junio C Hamano
2012-08-05 21:49 ` [PATCH/RFC v2 10/16] Read resolve-undo data Thomas Gummerer
2012-08-06  1:51   ` Junio C Hamano
2012-08-07 19:17     ` Thomas Gummerer
2012-08-05 21:49 ` [PATCH/RFC v2 11/16] Read cache-tree in index-v5 Thomas Gummerer
2012-08-05 21:49 ` [PATCH/RFC v2 12/16] Write index-v5 Thomas Gummerer
2012-08-05 21:49 ` [PATCH/RFC v2 13/16] Write index-v5 cache-tree data Thomas Gummerer
2012-08-05 21:49 ` [PATCH/RFC v2 14/16] Write resolve-undo data for index-v5 Thomas Gummerer
2012-08-05 21:49 ` [PATCH/RFC v2 15/16] update-index.c: add a force-rewrite option Thomas Gummerer
2012-08-06  1:58   ` Junio C Hamano
2012-08-08  7:31     ` Thomas Gummerer
2012-08-05 21:49 ` [PATCH/RFC v2 16/16] p0002-index.sh: add perf test for the index formats Thomas Gummerer
2012-08-06 14:35 ` [PATCH/RFC v2 0/16] Introduce index file format version 5 Nguyễn Thái Ngọc Duy
2012-08-06 14:35   ` [PATCH 1/2] Move index v2 specific code out of read-cache Nguyễn Thái Ngọc Duy
2012-08-06 14:36   ` [PATCH 2/2] Add index-v5 Nguyễn Thái Ngọc Duy
2012-08-07 21:52     ` Robin Rosenberg
2012-08-08 10:54       ` Thomas Gummerer
2012-08-06 15:51   ` [PATCH/RFC v2 0/16] Introduce index file format version 5 Junio C Hamano
2012-08-06 16:06     ` Thomas Gummerer
2012-08-06 17:46   ` Junio C Hamano
2012-08-07 12:16     ` Nguyen Thai Ngoc Duy
2012-08-08  1:38       ` Junio C Hamano
2012-08-08 13:54         ` Nguyen Thai Ngoc Duy
2012-08-08 16:31           ` Junio C Hamano
2012-08-09  2:28             ` Nguyen Thai Ngoc Duy
2012-08-07 22:31     ` Thomas Rast
2012-08-07 23:26       ` Junio C Hamano
2012-08-08  9:07         ` Thomas Rast
2012-08-08 22:47           ` Junio C Hamano
2012-08-08 10:30       ` Nguyen Thai Ngoc Duy

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.