All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Verify index file before we opportunistically update it
@ 2014-04-09 22:06 Yiannis Marangos
  2014-04-09 22:06 ` Yiannis Marangos
                   ` (7 more replies)
  0 siblings, 8 replies; 31+ messages in thread
From: Yiannis Marangos @ 2014-04-09 22:06 UTC (permalink / raw)
  To: git; +Cc: Yiannis Marangos

This is a fix for the following bug:
http://thread.gmane.org/gmane.comp.version-control.git/245946/focus=245965

I added 2 functions: verify_index_from and verify_index. They return 1
if the sha1 is correct, otherwise 0. I choose to not die if any errors
are occurred because we just want to not proceed to "opportunistic
update".

Some questions:
1) Is it better to have these functions as static?
2) If the answer of (1) is no, should I define verify_cache*() also?
3) If something goes wrong in verify_hdr(), it will print an error
   message, should I make a "quietly" version of it?

Yiannis Marangos (1):
  Verify index file before we opportunistically update it

 cache.h      |  3 +++
 read-cache.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 71 insertions(+), 11 deletions(-)

-- 
1.9.1

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH] Verify index file before we opportunistically update it
  2014-04-09 22:06 [PATCH] Verify index file before we opportunistically update it Yiannis Marangos
@ 2014-04-09 22:06 ` Yiannis Marangos
  2014-04-09 22:34 ` [PATCH v2] " Yiannis Marangos
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 31+ messages in thread
From: Yiannis Marangos @ 2014-04-09 22:06 UTC (permalink / raw)
  To: git; +Cc: Yiannis Marangos

Before we proceed to "opportunistic update" we must verify that the
current index file is the same as the one that we read before. There
is a possible race if we don't do this:

  1. process A calls git-rebase

  2. process A applies 1st commit

  3. process B calls git-status

  4. process B reads index

  5. process A applies 2nd commit

  6. process B takes the lock, then overwrites process A's changes.

  7. process A applies 3rd commit

As an end result the 3rd commit will have a revert of the 2nd commit.

Signed-off-by: Yiannis Marangos <yiannis.marangos@gmail.com>
---
 cache.h      |  3 +++
 read-cache.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 71 insertions(+), 11 deletions(-)

diff --git a/cache.h b/cache.h
index 107ac61..b0eedad 100644
--- a/cache.h
+++ b/cache.h
@@ -279,6 +279,7 @@ struct index_state {
 		 initialized : 1;
 	struct hashmap name_hash;
 	struct hashmap dir_hash;
+	unsigned char sha1[20];
 };
 
 extern struct index_state the_index;
@@ -456,6 +457,8 @@ extern int daemonize(void);
 	} while (0)
 
 /* Initialize and use the cache information */
+extern int verify_index_from(const struct index_state *, const char *path);
+extern int verify_index(struct index_state *);
 extern int read_index(struct index_state *);
 extern int read_index_preload(struct index_state *, const struct pathspec *pathspec);
 extern int read_index_from(struct index_state *, const char *path);
diff --git a/read-cache.c b/read-cache.c
index ba13353..fac8655 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -14,6 +14,7 @@
 #include "resolve-undo.h"
 #include "strbuf.h"
 #include "varint.h"
+#include "git-compat-util.h"
 
 static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
 					       unsigned int options);
@@ -1321,6 +1322,59 @@ static int verify_hdr(struct cache_header *hdr, unsigned long size)
 	return 0;
 }
 
+/* This function verifies if index_state has the correct sha1 of an index file.
+ * Don't die if we have any other failure, just return 0. */
+int verify_index_from(const struct index_state *istate, const char *path)
+{
+	int fd;
+	struct stat st;
+	struct cache_header *hdr;
+	void *mmap_addr;
+	size_t mmap_size;
+
+	if (!istate->initialized)
+		return 0;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return 0;
+
+	if (fstat(fd, &st))
+		return 0;
+
+	/* file is too big */
+	if (st.st_size > (size_t)st.st_size)
+		return 0;
+
+	mmap_size = (size_t)st.st_size;
+	if (mmap_size < sizeof(struct cache_header) + 20)
+		return 0;
+
+	mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+	close(fd);
+	if (mmap_addr == MAP_FAILED)
+		return 0;
+
+	hdr = mmap_addr;
+	if (verify_hdr(hdr, mmap_size) < 0)
+		goto unmap;
+
+	if (hashcmp(istate->sha1, (unsigned char *)hdr + mmap_size - 20))
+		goto unmap;
+
+	munmap(mmap_addr, mmap_size);
+	return 1;
+
+unmap:
+	munmap(mmap_addr, mmap_size);
+	return 0;
+}
+
+int verify_index(struct index_state *istate)
+{
+	return verify_index_from(istate, get_index_file());
+}
+
 static int read_index_extension(struct index_state *istate,
 				const char *ext, void *data, unsigned long sz)
 {
@@ -1445,7 +1499,7 @@ int read_index_from(struct index_state *istate, const char *path)
 	struct stat st;
 	unsigned long src_offset;
 	struct cache_header *hdr;
-	void *mmap;
+	void *mmap_addr;
 	size_t mmap_size;
 	struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
 
@@ -1468,15 +1522,16 @@ int read_index_from(struct index_state *istate, const char *path)
 	if (mmap_size < sizeof(struct cache_header) + 20)
 		die("index file smaller than expected");
 
-	mmap = xmmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
-	if (mmap == MAP_FAILED)
+	mmap_addr = xmmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+	if (mmap_addr == MAP_FAILED)
 		die_errno("unable to map index file");
 	close(fd);
 
-	hdr = mmap;
+	hdr = mmap_addr;
 	if (verify_hdr(hdr, mmap_size) < 0)
 		goto unmap;
 
+	hashcpy(istate->sha1, (unsigned char *)hdr + mmap_size - 20);
 	istate->version = ntohl(hdr->hdr_version);
 	istate->cache_nr = ntohl(hdr->hdr_entries);
 	istate->cache_alloc = alloc_nr(istate->cache_nr);
@@ -1494,7 +1549,7 @@ int read_index_from(struct index_state *istate, const char *path)
 		struct cache_entry *ce;
 		unsigned long consumed;
 
-		disk_ce = (struct ondisk_cache_entry *)((char *)mmap + src_offset);
+		disk_ce = (struct ondisk_cache_entry *)((char *)mmap_addr + src_offset);
 		ce = create_from_disk(disk_ce, &consumed, previous_name);
 		set_index_entry(istate, i, ce);
 
@@ -1512,21 +1567,21 @@ int read_index_from(struct index_state *istate, const char *path)
 		 * in 4-byte network byte order.
 		 */
 		uint32_t extsize;
-		memcpy(&extsize, (char *)mmap + src_offset + 4, 4);
+		memcpy(&extsize, (char *)mmap_addr + src_offset + 4, 4);
 		extsize = ntohl(extsize);
 		if (read_index_extension(istate,
-					 (const char *) mmap + src_offset,
-					 (char *) mmap + src_offset + 8,
+					 (const char *) mmap_addr + src_offset,
+					 (char *) mmap_addr + src_offset + 8,
 					 extsize) < 0)
 			goto unmap;
 		src_offset += 8;
 		src_offset += extsize;
 	}
-	munmap(mmap, mmap_size);
+	munmap(mmap_addr, mmap_size);
 	return istate->cache_nr;
 
 unmap:
-	munmap(mmap, mmap_size);
+	munmap(mmap_addr, mmap_size);
 	die("index file corrupt");
 }
 
@@ -1778,8 +1833,10 @@ static int has_racy_timestamp(struct index_state *istate)
  */
 void update_index_if_able(struct index_state *istate, struct lock_file *lockfile)
 {
+	printf("cache_changed: %d  has_racy_timestamp: %d\n",
+	       istate->cache_changed, has_racy_timestamp(istate));
 	if ((istate->cache_changed || has_racy_timestamp(istate)) &&
-	    !write_index(istate, lockfile->fd))
+	    verify_index(istate) && !write_index(istate, lockfile->fd))
 		commit_locked_index(lockfile);
 	else
 		rollback_lock_file(lockfile);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v2] Verify index file before we opportunistically update it
  2014-04-09 22:06 [PATCH] Verify index file before we opportunistically update it Yiannis Marangos
  2014-04-09 22:06 ` Yiannis Marangos
@ 2014-04-09 22:34 ` Yiannis Marangos
  2014-04-09 23:05   ` Junio C Hamano
  2014-04-09 22:52 ` [PATCH v3] " Yiannis Marangos
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 31+ messages in thread
From: Yiannis Marangos @ 2014-04-09 22:34 UTC (permalink / raw)
  To: git; +Cc: Yiannis Marangos

Before we proceed to "opportunistic update" we must verify that the
current index file is the same as the one that we read before. There
is a possible race if we don't do this:

  1. process A calls git-rebase

  2. process A applies 1st commit

  3. process B calls git-status

  4. process B reads index

  5. process A applies 2nd commit

  6. process B takes the lock, then overwrites process A's changes.

  7. process A applies 3rd commit

As an end result the 3rd commit will have a revert of the 2nd commit.

Signed-off-by: Yiannis Marangos <yiannis.marangos@gmail.com>
---
 cache.h      |  3 +++
 read-cache.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 69 insertions(+), 11 deletions(-)

diff --git a/cache.h b/cache.h
index 107ac61..b0eedad 100644
--- a/cache.h
+++ b/cache.h
@@ -279,6 +279,7 @@ struct index_state {
 		 initialized : 1;
 	struct hashmap name_hash;
 	struct hashmap dir_hash;
+	unsigned char sha1[20];
 };
 
 extern struct index_state the_index;
@@ -456,6 +457,8 @@ extern int daemonize(void);
 	} while (0)
 
 /* Initialize and use the cache information */
+extern int verify_index_from(const struct index_state *, const char *path);
+extern int verify_index(struct index_state *);
 extern int read_index(struct index_state *);
 extern int read_index_preload(struct index_state *, const struct pathspec *pathspec);
 extern int read_index_from(struct index_state *, const char *path);
diff --git a/read-cache.c b/read-cache.c
index ba13353..a49a441 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -14,6 +14,7 @@
 #include "resolve-undo.h"
 #include "strbuf.h"
 #include "varint.h"
+#include "git-compat-util.h"
 
 static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
 					       unsigned int options);
@@ -1321,6 +1322,59 @@ static int verify_hdr(struct cache_header *hdr, unsigned long size)
 	return 0;
 }
 
+/* This function verifies if index_state has the correct sha1 of an index file.
+ * Don't die if we have any other failure, just return 0. */
+int verify_index_from(const struct index_state *istate, const char *path)
+{
+	int fd;
+	struct stat st;
+	struct cache_header *hdr;
+	void *mmap_addr;
+	size_t mmap_size;
+
+	if (!istate->initialized)
+		return 0;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return 0;
+
+	if (fstat(fd, &st))
+		return 0;
+
+	/* file is too big */
+	if (st.st_size > (size_t)st.st_size)
+		return 0;
+
+	mmap_size = (size_t)st.st_size;
+	if (mmap_size < sizeof(struct cache_header) + 20)
+		return 0;
+
+	mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+	close(fd);
+	if (mmap_addr == MAP_FAILED)
+		return 0;
+
+	hdr = mmap_addr;
+	if (verify_hdr(hdr, mmap_size) < 0)
+		goto unmap;
+
+	if (hashcmp(istate->sha1, (unsigned char *)hdr + mmap_size - 20))
+		goto unmap;
+
+	munmap(mmap_addr, mmap_size);
+	return 1;
+
+unmap:
+	munmap(mmap_addr, mmap_size);
+	return 0;
+}
+
+int verify_index(struct index_state *istate)
+{
+	return verify_index_from(istate, get_index_file());
+}
+
 static int read_index_extension(struct index_state *istate,
 				const char *ext, void *data, unsigned long sz)
 {
@@ -1445,7 +1499,7 @@ int read_index_from(struct index_state *istate, const char *path)
 	struct stat st;
 	unsigned long src_offset;
 	struct cache_header *hdr;
-	void *mmap;
+	void *mmap_addr;
 	size_t mmap_size;
 	struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
 
@@ -1468,15 +1522,16 @@ int read_index_from(struct index_state *istate, const char *path)
 	if (mmap_size < sizeof(struct cache_header) + 20)
 		die("index file smaller than expected");
 
-	mmap = xmmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
-	if (mmap == MAP_FAILED)
+	mmap_addr = xmmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+	if (mmap_addr == MAP_FAILED)
 		die_errno("unable to map index file");
 	close(fd);
 
-	hdr = mmap;
+	hdr = mmap_addr;
 	if (verify_hdr(hdr, mmap_size) < 0)
 		goto unmap;
 
+	hashcpy(istate->sha1, (unsigned char *)hdr + mmap_size - 20);
 	istate->version = ntohl(hdr->hdr_version);
 	istate->cache_nr = ntohl(hdr->hdr_entries);
 	istate->cache_alloc = alloc_nr(istate->cache_nr);
@@ -1494,7 +1549,7 @@ int read_index_from(struct index_state *istate, const char *path)
 		struct cache_entry *ce;
 		unsigned long consumed;
 
-		disk_ce = (struct ondisk_cache_entry *)((char *)mmap + src_offset);
+		disk_ce = (struct ondisk_cache_entry *)((char *)mmap_addr + src_offset);
 		ce = create_from_disk(disk_ce, &consumed, previous_name);
 		set_index_entry(istate, i, ce);
 
@@ -1512,21 +1567,21 @@ int read_index_from(struct index_state *istate, const char *path)
 		 * in 4-byte network byte order.
 		 */
 		uint32_t extsize;
-		memcpy(&extsize, (char *)mmap + src_offset + 4, 4);
+		memcpy(&extsize, (char *)mmap_addr + src_offset + 4, 4);
 		extsize = ntohl(extsize);
 		if (read_index_extension(istate,
-					 (const char *) mmap + src_offset,
-					 (char *) mmap + src_offset + 8,
+					 (const char *) mmap_addr + src_offset,
+					 (char *) mmap_addr + src_offset + 8,
 					 extsize) < 0)
 			goto unmap;
 		src_offset += 8;
 		src_offset += extsize;
 	}
-	munmap(mmap, mmap_size);
+	munmap(mmap_addr, mmap_size);
 	return istate->cache_nr;
 
 unmap:
-	munmap(mmap, mmap_size);
+	munmap(mmap_addr, mmap_size);
 	die("index file corrupt");
 }
 
@@ -1779,7 +1834,7 @@ static int has_racy_timestamp(struct index_state *istate)
 void update_index_if_able(struct index_state *istate, struct lock_file *lockfile)
 {
 	if ((istate->cache_changed || has_racy_timestamp(istate)) &&
-	    !write_index(istate, lockfile->fd))
+	    verify_index(istate) && !write_index(istate, lockfile->fd))
 		commit_locked_index(lockfile);
 	else
 		rollback_lock_file(lockfile);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v3] Verify index file before we opportunistically update it
  2014-04-09 22:06 [PATCH] Verify index file before we opportunistically update it Yiannis Marangos
  2014-04-09 22:06 ` Yiannis Marangos
  2014-04-09 22:34 ` [PATCH v2] " Yiannis Marangos
@ 2014-04-09 22:52 ` Yiannis Marangos
  2014-04-10  5:22 ` [PATCH v4] " Yiannis Marangos
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 31+ messages in thread
From: Yiannis Marangos @ 2014-04-09 22:52 UTC (permalink / raw)
  To: git; +Cc: Yiannis Marangos

Before we proceed to "opportunistic update" we must verify that the
current index file is the same as the one that we read before. There
is a possible race if we don't do this:

  1. process A calls git-rebase

  2. process A applies 1st commit

  3. process B calls git-status

  4. process B reads index

  5. process A applies 2nd commit

  6. process B takes the lock, then overwrites process A's changes.

  7. process A applies 3rd commit

As an end result the 3rd commit will have a revert of the 2nd commit.

Signed-off-by: Yiannis Marangos <yiannis.marangos@gmail.com>
---
 cache.h      |  3 +++
 read-cache.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 69 insertions(+), 11 deletions(-)

diff --git a/cache.h b/cache.h
index 107ac61..76ce3d9 100644
--- a/cache.h
+++ b/cache.h
@@ -279,6 +279,7 @@ struct index_state {
 		 initialized : 1;
 	struct hashmap name_hash;
 	struct hashmap dir_hash;
+	unsigned char sha1[20];
 };
 
 extern struct index_state the_index;
@@ -456,6 +457,8 @@ extern int daemonize(void);
 	} while (0)
 
 /* Initialize and use the cache information */
+extern int verify_index_from(const struct index_state *, const char *path);
+extern int verify_index(const struct index_state *);
 extern int read_index(struct index_state *);
 extern int read_index_preload(struct index_state *, const struct pathspec *pathspec);
 extern int read_index_from(struct index_state *, const char *path);
diff --git a/read-cache.c b/read-cache.c
index ba13353..3604d8c 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -14,6 +14,7 @@
 #include "resolve-undo.h"
 #include "strbuf.h"
 #include "varint.h"
+#include "git-compat-util.h"
 
 static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
 					       unsigned int options);
@@ -1321,6 +1322,59 @@ static int verify_hdr(struct cache_header *hdr, unsigned long size)
 	return 0;
 }
 
+/* This function verifies if index_state has the correct sha1 of an index file.
+ * Don't die if we have any other failure, just return 0. */
+int verify_index_from(const struct index_state *istate, const char *path)
+{
+	int fd;
+	struct stat st;
+	struct cache_header *hdr;
+	void *mmap_addr;
+	size_t mmap_size;
+
+	if (!istate->initialized)
+		return 0;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return 0;
+
+	if (fstat(fd, &st))
+		return 0;
+
+	/* file is too big */
+	if (st.st_size > (size_t)st.st_size)
+		return 0;
+
+	mmap_size = (size_t)st.st_size;
+	if (mmap_size < sizeof(struct cache_header) + 20)
+		return 0;
+
+	mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+	close(fd);
+	if (mmap_addr == MAP_FAILED)
+		return 0;
+
+	hdr = mmap_addr;
+	if (verify_hdr(hdr, mmap_size) < 0)
+		goto unmap;
+
+	if (hashcmp(istate->sha1, (unsigned char *)hdr + mmap_size - 20))
+		goto unmap;
+
+	munmap(mmap_addr, mmap_size);
+	return 1;
+
+unmap:
+	munmap(mmap_addr, mmap_size);
+	return 0;
+}
+
+int verify_index(const struct index_state *istate)
+{
+	return verify_index_from(istate, get_index_file());
+}
+
 static int read_index_extension(struct index_state *istate,
 				const char *ext, void *data, unsigned long sz)
 {
@@ -1445,7 +1499,7 @@ int read_index_from(struct index_state *istate, const char *path)
 	struct stat st;
 	unsigned long src_offset;
 	struct cache_header *hdr;
-	void *mmap;
+	void *mmap_addr;
 	size_t mmap_size;
 	struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
 
@@ -1468,15 +1522,16 @@ int read_index_from(struct index_state *istate, const char *path)
 	if (mmap_size < sizeof(struct cache_header) + 20)
 		die("index file smaller than expected");
 
-	mmap = xmmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
-	if (mmap == MAP_FAILED)
+	mmap_addr = xmmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+	if (mmap_addr == MAP_FAILED)
 		die_errno("unable to map index file");
 	close(fd);
 
-	hdr = mmap;
+	hdr = mmap_addr;
 	if (verify_hdr(hdr, mmap_size) < 0)
 		goto unmap;
 
+	hashcpy(istate->sha1, (unsigned char *)hdr + mmap_size - 20);
 	istate->version = ntohl(hdr->hdr_version);
 	istate->cache_nr = ntohl(hdr->hdr_entries);
 	istate->cache_alloc = alloc_nr(istate->cache_nr);
@@ -1494,7 +1549,7 @@ int read_index_from(struct index_state *istate, const char *path)
 		struct cache_entry *ce;
 		unsigned long consumed;
 
-		disk_ce = (struct ondisk_cache_entry *)((char *)mmap + src_offset);
+		disk_ce = (struct ondisk_cache_entry *)((char *)mmap_addr + src_offset);
 		ce = create_from_disk(disk_ce, &consumed, previous_name);
 		set_index_entry(istate, i, ce);
 
@@ -1512,21 +1567,21 @@ int read_index_from(struct index_state *istate, const char *path)
 		 * in 4-byte network byte order.
 		 */
 		uint32_t extsize;
-		memcpy(&extsize, (char *)mmap + src_offset + 4, 4);
+		memcpy(&extsize, (char *)mmap_addr + src_offset + 4, 4);
 		extsize = ntohl(extsize);
 		if (read_index_extension(istate,
-					 (const char *) mmap + src_offset,
-					 (char *) mmap + src_offset + 8,
+					 (const char *) mmap_addr + src_offset,
+					 (char *) mmap_addr + src_offset + 8,
 					 extsize) < 0)
 			goto unmap;
 		src_offset += 8;
 		src_offset += extsize;
 	}
-	munmap(mmap, mmap_size);
+	munmap(mmap_addr, mmap_size);
 	return istate->cache_nr;
 
 unmap:
-	munmap(mmap, mmap_size);
+	munmap(mmap_addr, mmap_size);
 	die("index file corrupt");
 }
 
@@ -1779,7 +1834,7 @@ static int has_racy_timestamp(struct index_state *istate)
 void update_index_if_able(struct index_state *istate, struct lock_file *lockfile)
 {
 	if ((istate->cache_changed || has_racy_timestamp(istate)) &&
-	    !write_index(istate, lockfile->fd))
+	    verify_index(istate) && !write_index(istate, lockfile->fd))
 		commit_locked_index(lockfile);
 	else
 		rollback_lock_file(lockfile);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] Verify index file before we opportunistically update it
  2014-04-09 22:34 ` [PATCH v2] " Yiannis Marangos
@ 2014-04-09 23:05   ` Junio C Hamano
  0 siblings, 0 replies; 31+ messages in thread
From: Junio C Hamano @ 2014-04-09 23:05 UTC (permalink / raw)
  To: Yiannis Marangos; +Cc: git

Yiannis Marangos <yiannis.marangos@gmail.com> writes:

> Before we proceed to "opportunistic update" we must verify that the
> current index file is the same as the one that we read before. There
> is a possible race if we don't do this:

Please somehow make it clear that the race is in general, and use of
"git rebase" in this description is merely an example.

>   1. process A calls git-rebase

... or does anything that uses the index.

>   2. process A applies 1st commit
>
>   3. process B calls git-status
>
>   4. process B reads index
>
>   5. process A applies 2nd commit

... or does anything that updates the index.

>   6. process B takes the lock, then overwrites process A's changes.
>
>   7. process A applies 3rd commit
>
> As an end result the 3rd commit will have a revert of the 2nd commit.

And the missing piece is...?  "When process B takes the lock, it
needs to make sure that the index hasn't changed since it read at
step 4."

> Signed-off-by: Yiannis Marangos <yiannis.marangos@gmail.com>
> ---

This is a place for you to describe how this version is different
from the previous round.  I am guessing that the only change is that
you removed the unnecessary printf() from the top of if-able, but I
didn't compare (nor read either versions carefully).

>  cache.h      |  3 +++
>  read-cache.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++---------
>  2 files changed, 69 insertions(+), 11 deletions(-)
>
> diff --git a/cache.h b/cache.h
> index 107ac61..b0eedad 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -279,6 +279,7 @@ struct index_state {
>  		 initialized : 1;
>  	struct hashmap name_hash;
>  	struct hashmap dir_hash;
> +	unsigned char sha1[20];
>  };
>  
>  extern struct index_state the_index;
> @@ -456,6 +457,8 @@ extern int daemonize(void);
>  	} while (0)
>  
>  /* Initialize and use the cache information */
> +extern int verify_index_from(const struct index_state *, const char *path);
> +extern int verify_index(struct index_state *);

I do not think these should be extern; they are implementation
details of the opportunistic update, and update-if-able is the only
thing the outside world needs to know about, I think.

>  extern int read_index(struct index_state *);
>  extern int read_index_preload(struct index_state *, const struct pathspec *pathspec);
>  extern int read_index_from(struct index_state *, const char *path);
> diff --git a/read-cache.c b/read-cache.c
> index ba13353..a49a441 100644
> --- a/read-cache.c
> +++ b/read-cache.c
> @@ -14,6 +14,7 @@
>  #include "resolve-undo.h"
>  #include "strbuf.h"
>  #include "varint.h"
> +#include "git-compat-util.h"

An unnecessary change; we include "cache.h" as the very first thing
in this file.

> @@ -1321,6 +1322,59 @@ static int verify_hdr(struct cache_header *hdr, unsigned long size)
>  	return 0;
>  }
>  
> +/* This function verifies if index_state has the correct sha1 of an index file.
> + * Don't die if we have any other failure, just return 0. */

	/*
         * Please write multi-line comment
         * like this.
         */

> +int verify_index_from(const struct index_state *istate, const char *path)
> +{

Also, this does not have to be extern.  At least not yet.

> +	int fd;
> +	struct stat st;
> +	struct cache_header *hdr;
> +	void *mmap_addr;
> +	size_t mmap_size;
> +
> +	if (!istate->initialized)
> +		return 0;
> +
> +	fd = open(path, O_RDONLY);
> +	if (fd < 0)
> +		return 0;
> +
> +	if (fstat(fd, &st))
> +		return 0;
> +
> +	/* file is too big */
> +	if (st.st_size > (size_t)st.st_size)
> +		return 0;
> +
> +	mmap_size = (size_t)st.st_size;
> +	if (mmap_size < sizeof(struct cache_header) + 20)
> +		return 0;
> +
> +	mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);

Why PROT_WRITE?

> +	close(fd);
> +	if (mmap_addr == MAP_FAILED)
> +		return 0;
> +
> +	hdr = mmap_addr;
> +	if (verify_hdr(hdr, mmap_size) < 0)
> +		goto unmap;
> +
> +	if (hashcmp(istate->sha1, (unsigned char *)hdr + mmap_size - 20))
> +		goto unmap;
> +
> +	munmap(mmap_addr, mmap_size);
> +	return 1;
> +
> +unmap:
> +	munmap(mmap_addr, mmap_size);
> +	return 0;
> +}
> +
> +int verify_index(struct index_state *istate)
> +{
> +	return verify_index_from(istate, get_index_file());
> +}
> +
>  static int read_index_extension(struct index_state *istate,
>  				const char *ext, void *data, unsigned long sz)
>  {
> @@ -1445,7 +1499,7 @@ int read_index_from(struct index_state *istate, const char *path)
>  	struct stat st;
>  	unsigned long src_offset;
>  	struct cache_header *hdr;
> -	void *mmap;
> +	void *mmap_addr;

This introduces noise to the patch to make it harder to review, and
does not look a necessary change.  Am I mistaken?

>  	size_t mmap_size;
>  	struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
>  
> @@ -1468,15 +1522,16 @@ int read_index_from(struct index_state *istate, const char *path)
>  	if (mmap_size < sizeof(struct cache_header) + 20)
>  		die("index file smaller than expected");
>  
> -	mmap = xmmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
> -	if (mmap == MAP_FAILED)
> +	mmap_addr = xmmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
> +	if (mmap_addr == MAP_FAILED)
>  		die_errno("unable to map index file");
>  	close(fd);
>  
> -	hdr = mmap;
> +	hdr = mmap_addr;
>  	if (verify_hdr(hdr, mmap_size) < 0)
>  		goto unmap;
>  
> +	hashcpy(istate->sha1, (unsigned char *)hdr + mmap_size - 20);
>  	istate->version = ntohl(hdr->hdr_version);
>  	istate->cache_nr = ntohl(hdr->hdr_entries);
>  	istate->cache_alloc = alloc_nr(istate->cache_nr);
> @@ -1494,7 +1549,7 @@ int read_index_from(struct index_state *istate, const char *path)
>  		struct cache_entry *ce;
>  		unsigned long consumed;
>  
> -		disk_ce = (struct ondisk_cache_entry *)((char *)mmap + src_offset);
> +		disk_ce = (struct ondisk_cache_entry *)((char *)mmap_addr + src_offset);
>  		ce = create_from_disk(disk_ce, &consumed, previous_name);
>  		set_index_entry(istate, i, ce);
>  
> @@ -1512,21 +1567,21 @@ int read_index_from(struct index_state *istate, const char *path)
>  		 * in 4-byte network byte order.
>  		 */
>  		uint32_t extsize;
> -		memcpy(&extsize, (char *)mmap + src_offset + 4, 4);
> +		memcpy(&extsize, (char *)mmap_addr + src_offset + 4, 4);
>  		extsize = ntohl(extsize);
>  		if (read_index_extension(istate,
> -					 (const char *) mmap + src_offset,
> -					 (char *) mmap + src_offset + 8,
> +					 (const char *) mmap_addr + src_offset,
> +					 (char *) mmap_addr + src_offset + 8,
>  					 extsize) < 0)
>  			goto unmap;
>  		src_offset += 8;
>  		src_offset += extsize;
>  	}
> -	munmap(mmap, mmap_size);
> +	munmap(mmap_addr, mmap_size);
>  	return istate->cache_nr;
>  
>  unmap:
> -	munmap(mmap, mmap_size);
> +	munmap(mmap_addr, mmap_size);
>  	die("index file corrupt");
>  }
>  
> @@ -1779,7 +1834,7 @@ static int has_racy_timestamp(struct index_state *istate)
>  void update_index_if_able(struct index_state *istate, struct lock_file *lockfile)
>  {
>  	if ((istate->cache_changed || has_racy_timestamp(istate)) &&
> -	    !write_index(istate, lockfile->fd))
> +	    verify_index(istate) && !write_index(istate, lockfile->fd))
>  		commit_locked_index(lockfile);
>  	else
>  		rollback_lock_file(lockfile);

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v4] Verify index file before we opportunistically update it
  2014-04-09 22:06 [PATCH] Verify index file before we opportunistically update it Yiannis Marangos
                   ` (2 preceding siblings ...)
  2014-04-09 22:52 ` [PATCH v3] " Yiannis Marangos
@ 2014-04-10  5:22 ` Yiannis Marangos
  2014-04-10  5:34 ` [PATCH v5] " Yiannis Marangos
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 31+ messages in thread
From: Yiannis Marangos @ 2014-04-10  5:22 UTC (permalink / raw)
  To: git; +Cc: Yiannis Marangos

Before we proceed to "opportunistic update" we must verify that the
current index file is the same as the one that we read before. There
is a possible race if we don't do this. In the example below git-show
does "opportunistic update" and git-rebase updates the index, but the
race can happen in general.

  1. process A calls git-rebase (or does anything that uses the index)

  2. process A applies 1st commit

  3. process B calls git-status (or does anything that updates the index)

  4. process B reads index

  5. process A applies 2nd commit

  6. process B takes the lock, then overwrites process A's changes.

  7. process A applies 3rd commit

As an end result the 3rd commit will have a revert of the 2nd commit.
When process B takes the lock, it needs to make sure that the index
hasn't changed since step 4.

Signed-off-by: Yiannis Marangos <yiannis.marangos@gmail.com>
---

Version 4 contains fixes based on Junio's comments.

 cache.h      |  1 +
 read-cache.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/cache.h b/cache.h
index 107ac61..0460f06 100644
--- a/cache.h
+++ b/cache.h
@@ -279,6 +279,7 @@ struct index_state {
 		 initialized : 1;
 	struct hashmap name_hash;
 	struct hashmap dir_hash;
+	unsigned char sha1[20];
 };
 
 extern struct index_state the_index;
diff --git a/read-cache.c b/read-cache.c
index ba13353..73a15b1 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1477,6 +1477,7 @@ int read_index_from(struct index_state *istate, const char *path)
 	if (verify_hdr(hdr, mmap_size) < 0)
 		goto unmap;
 
+	hashcpy(istate->sha1, (unsigned char *)hdr + mmap_size - 20);
 	istate->version = ntohl(hdr->hdr_version);
 	istate->cache_nr = ntohl(hdr->hdr_entries);
 	istate->cache_alloc = alloc_nr(istate->cache_nr);
@@ -1760,6 +1761,61 @@ static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce,
 	return result;
 }
 
+/*
+ * This function verifies if index_state has the correct sha1 of an index file.
+ * Don't die if we have any other failure, just return 0.
+ */
+static int verify_index_from(const struct index_state *istate, const char *path)
+{
+	int fd;
+	struct stat st;
+	struct cache_header *hdr;
+	void *mmap_addr;
+	size_t mmap_size;
+
+	if (!istate->initialized)
+		return 0;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return 0;
+
+	if (fstat(fd, &st))
+		return 0;
+
+	/* file is too big */
+	if (st.st_size > (size_t)st.st_size)
+		return 0;
+
+	mmap_size = (size_t)st.st_size;
+	if (mmap_size < sizeof(struct cache_header) + 20)
+		return 0;
+
+	mmap_addr = mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	close(fd);
+	if (mmap_addr == MAP_FAILED)
+		return 0;
+
+	hdr = mmap_addr;
+	if (verify_hdr(hdr, mmap_size) < 0)
+		goto unmap;
+
+	if (hashcmp(istate->sha1, (unsigned char *)hdr + mmap_size - 20))
+		goto unmap;
+
+	munmap(mmap_addr, mmap_size);
+	return 1;
+
+unmap:
+	munmap(mmap_addr, mmap_size);
+	return 0;
+}
+
+static int verify_index(const struct index_state *istate)
+{
+	return verify_index_from(istate, get_index_file());
+}
+
 static int has_racy_timestamp(struct index_state *istate)
 {
 	int entries = istate->cache_nr;
@@ -1779,7 +1835,7 @@ static int has_racy_timestamp(struct index_state *istate)
 void update_index_if_able(struct index_state *istate, struct lock_file *lockfile)
 {
 	if ((istate->cache_changed || has_racy_timestamp(istate)) &&
-	    !write_index(istate, lockfile->fd))
+	    verify_index(istate) && !write_index(istate, lockfile->fd))
 		commit_locked_index(lockfile);
 	else
 		rollback_lock_file(lockfile);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v5] Verify index file before we opportunistically update it
  2014-04-09 22:06 [PATCH] Verify index file before we opportunistically update it Yiannis Marangos
                   ` (3 preceding siblings ...)
  2014-04-10  5:22 ` [PATCH v4] " Yiannis Marangos
@ 2014-04-10  5:34 ` Yiannis Marangos
  2014-04-10 10:40   ` Duy Nguyen
  2014-04-10 13:11 ` [PATCH v6] " Yiannis Marangos
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 31+ messages in thread
From: Yiannis Marangos @ 2014-04-10  5:34 UTC (permalink / raw)
  To: git; +Cc: Yiannis Marangos

Before we proceed to "opportunistic update" we must verify that the
current index file is the same as the one that we read before. There
is a possible race if we don't do this. In the example below git-status
does "opportunistic update" and git-rebase updates the index, but the
race can happen in general.

  1. process A calls git-rebase (or does anything that uses the index)

  2. process A applies 1st commit

  3. process B calls git-status (or does anything that updates the index)

  4. process B reads index

  5. process A applies 2nd commit

  6. process B takes the lock, then overwrites process A's changes.

  7. process A applies 3rd commit

As an end result the 3rd commit will have a revert of the 2nd commit.
When process B takes the lock, it needs to make sure that the index
hasn't changed since step 4.

Signed-off-by: Yiannis Marangos <yiannis.marangos@gmail.com>
---

Version 4 contains fixes based on Junio's comments.
Version 5 fixs a typo in commit message (git-show -> git-status)

 cache.h      |  1 +
 read-cache.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/cache.h b/cache.h
index 107ac61..0460f06 100644
--- a/cache.h
+++ b/cache.h
@@ -279,6 +279,7 @@ struct index_state {
 		 initialized : 1;
 	struct hashmap name_hash;
 	struct hashmap dir_hash;
+	unsigned char sha1[20];
 };
 
 extern struct index_state the_index;
diff --git a/read-cache.c b/read-cache.c
index ba13353..73a15b1 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1477,6 +1477,7 @@ int read_index_from(struct index_state *istate, const char *path)
 	if (verify_hdr(hdr, mmap_size) < 0)
 		goto unmap;
 
+	hashcpy(istate->sha1, (unsigned char *)hdr + mmap_size - 20);
 	istate->version = ntohl(hdr->hdr_version);
 	istate->cache_nr = ntohl(hdr->hdr_entries);
 	istate->cache_alloc = alloc_nr(istate->cache_nr);
@@ -1760,6 +1761,61 @@ static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce,
 	return result;
 }
 
+/*
+ * This function verifies if index_state has the correct sha1 of an index file.
+ * Don't die if we have any other failure, just return 0.
+ */
+static int verify_index_from(const struct index_state *istate, const char *path)
+{
+	int fd;
+	struct stat st;
+	struct cache_header *hdr;
+	void *mmap_addr;
+	size_t mmap_size;
+
+	if (!istate->initialized)
+		return 0;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return 0;
+
+	if (fstat(fd, &st))
+		return 0;
+
+	/* file is too big */
+	if (st.st_size > (size_t)st.st_size)
+		return 0;
+
+	mmap_size = (size_t)st.st_size;
+	if (mmap_size < sizeof(struct cache_header) + 20)
+		return 0;
+
+	mmap_addr = mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	close(fd);
+	if (mmap_addr == MAP_FAILED)
+		return 0;
+
+	hdr = mmap_addr;
+	if (verify_hdr(hdr, mmap_size) < 0)
+		goto unmap;
+
+	if (hashcmp(istate->sha1, (unsigned char *)hdr + mmap_size - 20))
+		goto unmap;
+
+	munmap(mmap_addr, mmap_size);
+	return 1;
+
+unmap:
+	munmap(mmap_addr, mmap_size);
+	return 0;
+}
+
+static int verify_index(const struct index_state *istate)
+{
+	return verify_index_from(istate, get_index_file());
+}
+
 static int has_racy_timestamp(struct index_state *istate)
 {
 	int entries = istate->cache_nr;
@@ -1779,7 +1835,7 @@ static int has_racy_timestamp(struct index_state *istate)
 void update_index_if_able(struct index_state *istate, struct lock_file *lockfile)
 {
 	if ((istate->cache_changed || has_racy_timestamp(istate)) &&
-	    !write_index(istate, lockfile->fd))
+	    verify_index(istate) && !write_index(istate, lockfile->fd))
 		commit_locked_index(lockfile);
 	else
 		rollback_lock_file(lockfile);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH v5] Verify index file before we opportunistically update it
  2014-04-10  5:34 ` [PATCH v5] " Yiannis Marangos
@ 2014-04-10 10:40   ` Duy Nguyen
  2014-04-10 11:57     ` Yiannis Marangos
  2014-04-10 16:57     ` Junio C Hamano
  0 siblings, 2 replies; 31+ messages in thread
From: Duy Nguyen @ 2014-04-10 10:40 UTC (permalink / raw)
  To: Yiannis Marangos; +Cc: Git Mailing List

On Thu, Apr 10, 2014 at 12:34 PM, Yiannis Marangos
<yiannis.marangos@gmail.com> wrote:
> +/*
> + * This function verifies if index_state has the correct sha1 of an index file.
> + * Don't die if we have any other failure, just return 0.
> + */
> +static int verify_index_from(const struct index_state *istate, const char *path)
> +{
> +       int fd;
> +       struct stat st;
> +       struct cache_header *hdr;
> +       void *mmap_addr;
> +       size_t mmap_size;
> +
> +       if (!istate->initialized)
> +               return 0;
> +
> +       fd = open(path, O_RDONLY);
> +       if (fd < 0)
> +               return 0;
> +
> +       if (fstat(fd, &st))
> +               return 0;
> +
> +       /* file is too big */
> +       if (st.st_size > (size_t)st.st_size)
> +               return 0;
> +
> +       mmap_size = (size_t)st.st_size;
> +       if (mmap_size < sizeof(struct cache_header) + 20)
> +               return 0;
> +
> +       mmap_addr = mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, fd, 0);
> +       close(fd);
> +       if (mmap_addr == MAP_FAILED)
> +               return 0;
> +
> +       hdr = mmap_addr;
> +       if (verify_hdr(hdr, mmap_size) < 0)
> +               goto unmap;

verify_hdr() is a bit expensive because you need to digest the whole
index file (could big as big as 14MB on webkit). Could we get away
without it? I mean, is it enough that we pick the last 20 bytes and
compare it with istate->sha1? If we only need 20 bytes, pread() may be
better than mmap().

The chance of SHA-1 collision is small enough for us to ignore, I
think. And if a client updates the index without updating the trailing
sha-1, the index is broken and we don't have to worry about
overwriting it.

> +
> +       if (hashcmp(istate->sha1, (unsigned char *)hdr + mmap_size - 20))
> +               goto unmap;
> +
> +       munmap(mmap_addr, mmap_size);
> +       return 1;
> +
> +unmap:
> +       munmap(mmap_addr, mmap_size);
> +       return 0;
> +}
-- 
Duy

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v5] Verify index file before we opportunistically update it
  2014-04-10 10:40   ` Duy Nguyen
@ 2014-04-10 11:57     ` Yiannis Marangos
  2014-04-10 16:57     ` Junio C Hamano
  1 sibling, 0 replies; 31+ messages in thread
From: Yiannis Marangos @ 2014-04-10 11:57 UTC (permalink / raw)
  To: Duy Nguyen; +Cc: Git Mailing List

On Thu, Apr 10, 2014 at 05:40:55PM +0700, Duy Nguyen wrote:
> verify_hdr() is a bit expensive because you need to digest the whole
> index file (could big as big as 14MB on webkit). Could we get away
> without it? I mean, is it enough that we pick the last 20 bytes and
> compare it with istate->sha1? If we only need 20 bytes, pread() may be
> better than mmap().
> 
> The chance of SHA-1 collision is small enough for us to ignore, I
> think. And if a client updates the index without updating the trailing
> sha-1, the index is broken and we don't have to worry about
> overwriting it.

That's true, I will commit version 6 in a bit.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v6] Verify index file before we opportunistically update it
  2014-04-09 22:06 [PATCH] Verify index file before we opportunistically update it Yiannis Marangos
                   ` (4 preceding siblings ...)
  2014-04-10  5:34 ` [PATCH v5] " Yiannis Marangos
@ 2014-04-10 13:11 ` Yiannis Marangos
  2014-04-10 18:31 ` [PATCH v7 1/2] Add xpread() and xpwrite() Yiannis Marangos
  2014-04-10 18:54 ` [PATCH v8 1/2] Add xpread() Yiannis Marangos
  7 siblings, 0 replies; 31+ messages in thread
From: Yiannis Marangos @ 2014-04-10 13:11 UTC (permalink / raw)
  To: git; +Cc: Yiannis Marangos

Before we proceed to "opportunistic update" we must verify that the
current index file is the same as the one that we read before. There
is a possible race if we don't do this. In the example below git-status
does "opportunistic update" and git-rebase updates the index, but the
race can happen in general.

  1. process A calls git-rebase (or does anything that uses the index)

  2. process A applies 1st commit

  3. process B calls git-status (or does anything that updates the index)

  4. process B reads index

  5. process A applies 2nd commit

  6. process B takes the lock, then overwrites process A's changes.

  7. process A applies 3rd commit

As an end result the 3rd commit will have a revert of the 2nd commit.
When process B takes the lock, it needs to make sure that the index
hasn't changed since step 4.

Signed-off-by: Yiannis Marangos <yiannis.marangos@gmail.com>
---

Version 4 contains fixes based on Junio's comments.
Version 5 fixs a typo in commit message (git-show -> git-status)
Version 6 removes verify_hdr() and use pread() instead of mmap().

 cache.h      |  1 +
 read-cache.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/cache.h b/cache.h
index 107ac61..0460f06 100644
--- a/cache.h
+++ b/cache.h
@@ -279,6 +279,7 @@ struct index_state {
 		 initialized : 1;
 	struct hashmap name_hash;
 	struct hashmap dir_hash;
+	unsigned char sha1[20];
 };
 
 extern struct index_state the_index;
diff --git a/read-cache.c b/read-cache.c
index ba13353..034865d 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1477,6 +1477,7 @@ int read_index_from(struct index_state *istate, const char *path)
 	if (verify_hdr(hdr, mmap_size) < 0)
 		goto unmap;
 
+	hashcpy(istate->sha1, (unsigned char *)hdr + mmap_size - 20);
 	istate->version = ntohl(hdr->hdr_version);
 	istate->cache_nr = ntohl(hdr->hdr_entries);
 	istate->cache_alloc = alloc_nr(istate->cache_nr);
@@ -1760,6 +1761,50 @@ static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce,
 	return result;
 }
 
+/*
+ * This function verifies if index_state has the correct sha1 of an index file.
+ * Don't die if we have any other failure, just return 0.
+ */
+static int verify_index_from(const struct index_state *istate, const char *path)
+{
+	int fd;
+	ssize_t n;
+	struct stat st;
+	unsigned char sha1[20];
+
+	if (!istate->initialized)
+		return 0;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return 0;
+
+	if (fstat(fd, &st))
+		goto out;
+
+	if (st.st_size < sizeof(struct cache_header) + 20)
+		goto out;
+
+	n = pread(fd, sha1, 20, st.st_size - 20);
+	if (n != 20)
+		goto out;
+
+	if (hashcmp(istate->sha1, sha1))
+		goto out;
+
+	close(fd);
+	return 1;
+
+out:
+	close(fd);
+	return 0;
+}
+
+static int verify_index(const struct index_state *istate)
+{
+	return verify_index_from(istate, get_index_file());
+}
+
 static int has_racy_timestamp(struct index_state *istate)
 {
 	int entries = istate->cache_nr;
@@ -1779,7 +1824,7 @@ static int has_racy_timestamp(struct index_state *istate)
 void update_index_if_able(struct index_state *istate, struct lock_file *lockfile)
 {
 	if ((istate->cache_changed || has_racy_timestamp(istate)) &&
-	    !write_index(istate, lockfile->fd))
+	    verify_index(istate) && !write_index(istate, lockfile->fd))
 		commit_locked_index(lockfile);
 	else
 		rollback_lock_file(lockfile);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH v5] Verify index file before we opportunistically update it
  2014-04-10 10:40   ` Duy Nguyen
  2014-04-10 11:57     ` Yiannis Marangos
@ 2014-04-10 16:57     ` Junio C Hamano
  1 sibling, 0 replies; 31+ messages in thread
From: Junio C Hamano @ 2014-04-10 16:57 UTC (permalink / raw)
  To: Duy Nguyen; +Cc: Yiannis Marangos, Git Mailing List

Duy Nguyen <pclouds@gmail.com> writes:

> verify_hdr() is a bit expensive because you need to digest the whole
> index file (could big as big as 14MB on webkit). Could we get away
> without it? I mean, is it enough that we pick the last 20 bytes and
> compare it with istate->sha1? If we only need 20 bytes, pread() may be
> better than mmap().

True.

I do not think we offer xread() equivalent for pread() to help
coders to avoid the common mistake of failing to resume interrupted
system calls, though.

The only callsite of pread() we currently have is in index-pack.c,
which just does

		n = pread(pack_fd, inbuf, n, from);
		if (n < 0)
			die_errno(_("cannot pread pack file"));

without paying attention to EAGAIN/EINTR, that seems wrong and may
want to be fixed, before we introduce another call site of pread().

Thanks.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v7 1/2] Add xpread() and xpwrite()
  2014-04-09 22:06 [PATCH] Verify index file before we opportunistically update it Yiannis Marangos
                   ` (5 preceding siblings ...)
  2014-04-10 13:11 ` [PATCH v6] " Yiannis Marangos
@ 2014-04-10 18:31 ` Yiannis Marangos
  2014-04-10 18:31   ` [PATCH v7 2/2] Verify index file before we opportunistically update it Yiannis Marangos
  2014-04-10 18:35   ` [PATCH v7 1/2] Add xpread() and xpwrite() Junio C Hamano
  2014-04-10 18:54 ` [PATCH v8 1/2] Add xpread() Yiannis Marangos
  7 siblings, 2 replies; 31+ messages in thread
From: Yiannis Marangos @ 2014-04-10 18:31 UTC (permalink / raw)
  To: git; +Cc: Yiannis Marangos

xpread() and xpwrite() pay attention to EAGAIN/EINTR, so they will resume
automatically on interrupted call.

Signed-off-by: Yiannis Marangos <yiannis.marangos@gmail.com>
---
 builtin/index-pack.c |  2 +-
 compat/mmap.c        |  4 +---
 git-compat-util.h    |  2 ++
 wrapper.c            | 36 ++++++++++++++++++++++++++++++++++++
 4 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index b9f6e12..1bac0f5 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -542,7 +542,7 @@ static void *unpack_data(struct object_entry *obj,
 
 	do {
 		ssize_t n = (len < 64*1024) ? len : 64*1024;
-		n = pread(pack_fd, inbuf, n, from);
+		n = xpread(pack_fd, inbuf, n, from);
 		if (n < 0)
 			die_errno(_("cannot pread pack file"));
 		if (!n)
diff --git a/compat/mmap.c b/compat/mmap.c
index c9d46d1..7f662fe 100644
--- a/compat/mmap.c
+++ b/compat/mmap.c
@@ -14,7 +14,7 @@ void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t of
 	}
 
 	while (n < length) {
-		ssize_t count = pread(fd, (char *)start + n, length - n, offset + n);
+		ssize_t count = xpread(fd, (char *)start + n, length - n, offset + n);
 
 		if (count == 0) {
 			memset((char *)start+n, 0, length-n);
@@ -22,8 +22,6 @@ void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t of
 		}
 
 		if (count < 0) {
-			if (errno == EAGAIN || errno == EINTR)
-				continue;
 			free(start);
 			errno = EACCES;
 			return MAP_FAILED;
diff --git a/git-compat-util.h b/git-compat-util.h
index f6d3a46..4da04c6 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -531,6 +531,8 @@ extern void *xcalloc(size_t nmemb, size_t size);
 extern void *xmmap(void *start, size_t length, int prot, int flags, int fd, off_t offset);
 extern ssize_t xread(int fd, void *buf, size_t len);
 extern ssize_t xwrite(int fd, const void *buf, size_t len);
+extern ssize_t xpread(int fd, void *buf, size_t len, off_t offset);
+extern ssize_t xpwrite(int fd, const void *buf, size_t len, off_t offset);
 extern int xdup(int fd);
 extern FILE *xfdopen(int fd, const char *mode);
 extern int xmkstemp(char *template);
diff --git a/wrapper.c b/wrapper.c
index 0cc5636..25b7419 100644
--- a/wrapper.c
+++ b/wrapper.c
@@ -174,6 +174,42 @@ ssize_t xwrite(int fd, const void *buf, size_t len)
 	}
 }
 
+/*
+ * xpread() is the same as pread(), but it automatically restarts pread()
+ * operations with a recoverable error (EAGAIN and EINTR). xpread() DOES
+ * NOT GUARANTEE that "len" bytes is read even if the data is available.
+ */
+ssize_t xpread(int fd, void *buf, size_t len, off_t offset)
+{
+	ssize_t nr;
+	if (len > MAX_IO_SIZE)
+	    len = MAX_IO_SIZE;
+	while (1) {
+		nr = pread(fd, buf, len, offset);
+		if ((nr < 0) && (errno == EAGAIN || errno == EINTR))
+			continue;
+		return nr;
+	}
+}
+
+/*
+ * xpwrite() is the same as pwrite(), but it automatically restarts pwrite()
+ * operations with a recoverable error (EAGAIN and EINTR). xpwrite() DOES NOT
+ * GUARANTEE that "len" bytes is written even if the operation is successful.
+ */
+ssize_t xpwrite(int fd, const void *buf, size_t len, off_t offset)
+{
+	ssize_t nr;
+	if (len > MAX_IO_SIZE)
+	    len = MAX_IO_SIZE;
+	while (1) {
+		nr = pwrite(fd, buf, len, offset);
+		if ((nr < 0) && (errno == EAGAIN || errno == EINTR))
+			continue;
+		return nr;
+	}
+}
+
 ssize_t read_in_full(int fd, void *buf, size_t count)
 {
 	char *p = buf;
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v7 2/2] Verify index file before we opportunistically update it
  2014-04-10 18:31 ` [PATCH v7 1/2] Add xpread() and xpwrite() Yiannis Marangos
@ 2014-04-10 18:31   ` Yiannis Marangos
  2014-04-10 19:28     ` Junio C Hamano
  2014-04-10 18:35   ` [PATCH v7 1/2] Add xpread() and xpwrite() Junio C Hamano
  1 sibling, 1 reply; 31+ messages in thread
From: Yiannis Marangos @ 2014-04-10 18:31 UTC (permalink / raw)
  To: git; +Cc: Yiannis Marangos

Before we proceed to "opportunistic update" we must verify that the
current index file is the same as the one that we read before. There
is a possible race if we don't do this. In the example below git-status
does "opportunistic update" and git-rebase updates the index, but the
race can happen in general.

  1. process A calls git-rebase (or does anything that uses the index)

  2. process A applies 1st commit

  3. process B calls git-status (or does anything that updates the index)

  4. process B reads index

  5. process A applies 2nd commit

  6. process B takes the lock, then overwrites process A's changes.

  7. process A applies 3rd commit

As an end result the 3rd commit will have a revert of the 2nd commit.
When process B takes the lock, it needs to make sure that the index
hasn't changed since step 4.

Signed-off-by: Yiannis Marangos <yiannis.marangos@gmail.com>
---

Version 4 contains fixes based on Junio's comments.
Version 5 fixs a typo in commit message (git-show -> git-status).
Version 6 removes verify_hdr() and use pread() instead of mmap().
Version 7 use xpread() (added with [PATCH v7 1/2]) instead of pread().

 cache.h      |  1 +
 read-cache.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/cache.h b/cache.h
index 107ac61..0460f06 100644
--- a/cache.h
+++ b/cache.h
@@ -279,6 +279,7 @@ struct index_state {
 		 initialized : 1;
 	struct hashmap name_hash;
 	struct hashmap dir_hash;
+	unsigned char sha1[20];
 };
 
 extern struct index_state the_index;
diff --git a/read-cache.c b/read-cache.c
index ba13353..28de1a6 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1477,6 +1477,7 @@ int read_index_from(struct index_state *istate, const char *path)
 	if (verify_hdr(hdr, mmap_size) < 0)
 		goto unmap;
 
+	hashcpy(istate->sha1, (unsigned char *)hdr + mmap_size - 20);
 	istate->version = ntohl(hdr->hdr_version);
 	istate->cache_nr = ntohl(hdr->hdr_entries);
 	istate->cache_alloc = alloc_nr(istate->cache_nr);
@@ -1760,6 +1761,50 @@ static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce,
 	return result;
 }
 
+/*
+ * This function verifies if index_state has the correct sha1 of an index file.
+ * Don't die if we have any other failure, just return 0.
+ */
+static int verify_index_from(const struct index_state *istate, const char *path)
+{
+	int fd;
+	ssize_t n;
+	struct stat st;
+	unsigned char sha1[20];
+
+	if (!istate->initialized)
+		return 0;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return 0;
+
+	if (fstat(fd, &st))
+		goto out;
+
+	if (st.st_size < sizeof(struct cache_header) + 20)
+		goto out;
+
+	n = xpread(fd, sha1, 20, st.st_size - 20);
+	if (n != 20)
+		goto out;
+
+	if (hashcmp(istate->sha1, sha1))
+		goto out;
+
+	close(fd);
+	return 1;
+
+out:
+	close(fd);
+	return 0;
+}
+
+static int verify_index(const struct index_state *istate)
+{
+	return verify_index_from(istate, get_index_file());
+}
+
 static int has_racy_timestamp(struct index_state *istate)
 {
 	int entries = istate->cache_nr;
@@ -1779,7 +1824,7 @@ static int has_racy_timestamp(struct index_state *istate)
 void update_index_if_able(struct index_state *istate, struct lock_file *lockfile)
 {
 	if ((istate->cache_changed || has_racy_timestamp(istate)) &&
-	    !write_index(istate, lockfile->fd))
+	    verify_index(istate) && !write_index(istate, lockfile->fd))
 		commit_locked_index(lockfile);
 	else
 		rollback_lock_file(lockfile);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH v7 1/2] Add xpread() and xpwrite()
  2014-04-10 18:31 ` [PATCH v7 1/2] Add xpread() and xpwrite() Yiannis Marangos
  2014-04-10 18:31   ` [PATCH v7 2/2] Verify index file before we opportunistically update it Yiannis Marangos
@ 2014-04-10 18:35   ` Junio C Hamano
  2014-04-10 18:44     ` Yiannis Marangos
  1 sibling, 1 reply; 31+ messages in thread
From: Junio C Hamano @ 2014-04-10 18:35 UTC (permalink / raw)
  To: Yiannis Marangos; +Cc: git

Yiannis Marangos <yiannis.marangos@gmail.com> writes:

> xpread() and xpwrite() pay attention to EAGAIN/EINTR, so they will resume
> automatically on interrupted call.

We do not even use pwrite(); please don't add anything unnecessary
and unexercised, like xpwrite(), as potential bugs in it will go
unnoticed long after its introduction until it first gets used.

> diff --git a/builtin/index-pack.c b/builtin/index-pack.c
> index b9f6e12..1bac0f5 100644
> --- a/builtin/index-pack.c
> +++ b/builtin/index-pack.c
> @@ -542,7 +542,7 @@ static void *unpack_data(struct object_entry *obj,
>  
>  	do {
>  		ssize_t n = (len < 64*1024) ? len : 64*1024;
> -		n = pread(pack_fd, inbuf, n, from);
> +		n = xpread(pack_fd, inbuf, n, from);
>  		if (n < 0)
>  			die_errno(_("cannot pread pack file"));
>  		if (!n)

OK.

> diff --git a/compat/mmap.c b/compat/mmap.c
> index c9d46d1..7f662fe 100644
> --- a/compat/mmap.c
> +++ b/compat/mmap.c
> @@ -14,7 +14,7 @@ void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t of
>  	}
>  
>  	while (n < length) {
> -		ssize_t count = pread(fd, (char *)start + n, length - n, offset + n);
> +		ssize_t count = xpread(fd, (char *)start + n, length - n, offset + n);
>  
>  		if (count == 0) {
>  			memset((char *)start+n, 0, length-n);
> @@ -22,8 +22,6 @@ void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t of
>  		}
>  
>  		if (count < 0) {
> -			if (errno == EAGAIN || errno == EINTR)
> -				continue;
>  			free(start);
>  			errno = EACCES;
>  			return MAP_FAILED;

OK.

> diff --git a/wrapper.c b/wrapper.c
> index 0cc5636..25b7419 100644
> --- a/wrapper.c
> +++ b/wrapper.c
> @@ -174,6 +174,42 @@ ssize_t xwrite(int fd, const void *buf, size_t len)
>  	}
>  }
>  
> +/*
> + * xpread() is the same as pread(), but it automatically restarts pread()
> + * operations with a recoverable error (EAGAIN and EINTR). xpread() DOES
> + * NOT GUARANTEE that "len" bytes is read even if the data is available.
> + */
> +ssize_t xpread(int fd, void *buf, size_t len, off_t offset)
> +{
> +	ssize_t nr;
> +	if (len > MAX_IO_SIZE)
> +	    len = MAX_IO_SIZE;
> +	while (1) {
> +		nr = pread(fd, buf, len, offset);
> +		if ((nr < 0) && (errno == EAGAIN || errno == EINTR))
> +			continue;
> +		return nr;
> +	}
> +}

OK.

Thanks.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v7 1/2] Add xpread() and xpwrite()
  2014-04-10 18:35   ` [PATCH v7 1/2] Add xpread() and xpwrite() Junio C Hamano
@ 2014-04-10 18:44     ` Yiannis Marangos
  0 siblings, 0 replies; 31+ messages in thread
From: Yiannis Marangos @ 2014-04-10 18:44 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Thu, Apr 10, 2014 at 11:35:42AM -0700, Junio C Hamano wrote:
> We do not even use pwrite(); please don't add anything unnecessary
> and unexercised, like xpwrite(), as potential bugs in it will go
> unnoticed long after its introduction until it first gets used.

Correct, my mistake.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v8 1/2] Add xpread()
  2014-04-09 22:06 [PATCH] Verify index file before we opportunistically update it Yiannis Marangos
                   ` (6 preceding siblings ...)
  2014-04-10 18:31 ` [PATCH v7 1/2] Add xpread() and xpwrite() Yiannis Marangos
@ 2014-04-10 18:54 ` Yiannis Marangos
  2014-04-10 19:12   ` Johannes Sixt
  7 siblings, 1 reply; 31+ messages in thread
From: Yiannis Marangos @ 2014-04-10 18:54 UTC (permalink / raw)
  To: git; +Cc: Yiannis Marangos

xpread() pay attention to EAGAIN/EINTR, so it will resume
automatically on interrupted call.

Signed-off-by: Yiannis Marangos <yiannis.marangos@gmail.com>
---

This is actually the 2nd version of this commit but before I emailed
it as version 7 because it's a dependency of [PATCH v7 2/2]. Is this
the correct way to introduce a new descendant commit? Sorry, It's my
first time that I use mailing lists for contribution.

Version 8 removes xpwrite()

 builtin/index-pack.c |  2 +-
 compat/mmap.c        |  4 +---
 git-compat-util.h    |  1 +
 wrapper.c            | 18 ++++++++++++++++++
 4 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index b9f6e12..1bac0f5 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -542,7 +542,7 @@ static void *unpack_data(struct object_entry *obj,
 
 	do {
 		ssize_t n = (len < 64*1024) ? len : 64*1024;
-		n = pread(pack_fd, inbuf, n, from);
+		n = xpread(pack_fd, inbuf, n, from);
 		if (n < 0)
 			die_errno(_("cannot pread pack file"));
 		if (!n)
diff --git a/compat/mmap.c b/compat/mmap.c
index c9d46d1..7f662fe 100644
--- a/compat/mmap.c
+++ b/compat/mmap.c
@@ -14,7 +14,7 @@ void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t of
 	}
 
 	while (n < length) {
-		ssize_t count = pread(fd, (char *)start + n, length - n, offset + n);
+		ssize_t count = xpread(fd, (char *)start + n, length - n, offset + n);
 
 		if (count == 0) {
 			memset((char *)start+n, 0, length-n);
@@ -22,8 +22,6 @@ void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t of
 		}
 
 		if (count < 0) {
-			if (errno == EAGAIN || errno == EINTR)
-				continue;
 			free(start);
 			errno = EACCES;
 			return MAP_FAILED;
diff --git a/git-compat-util.h b/git-compat-util.h
index f6d3a46..e6a4159 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -531,6 +531,7 @@ extern void *xcalloc(size_t nmemb, size_t size);
 extern void *xmmap(void *start, size_t length, int prot, int flags, int fd, off_t offset);
 extern ssize_t xread(int fd, void *buf, size_t len);
 extern ssize_t xwrite(int fd, const void *buf, size_t len);
+extern ssize_t xpread(int fd, void *buf, size_t len, off_t offset);
 extern int xdup(int fd);
 extern FILE *xfdopen(int fd, const char *mode);
 extern int xmkstemp(char *template);
diff --git a/wrapper.c b/wrapper.c
index 0cc5636..cf71817 100644
--- a/wrapper.c
+++ b/wrapper.c
@@ -174,6 +174,24 @@ ssize_t xwrite(int fd, const void *buf, size_t len)
 	}
 }
 
+/*
+ * xpread() is the same as pread(), but it automatically restarts pread()
+ * operations with a recoverable error (EAGAIN and EINTR). xpread() DOES
+ * NOT GUARANTEE that "len" bytes is read even if the data is available.
+ */
+ssize_t xpread(int fd, void *buf, size_t len, off_t offset)
+{
+	ssize_t nr;
+	if (len > MAX_IO_SIZE)
+	    len = MAX_IO_SIZE;
+	while (1) {
+		nr = pread(fd, buf, len, offset);
+		if ((nr < 0) && (errno == EAGAIN || errno == EINTR))
+			continue;
+		return nr;
+	}
+}
+
 ssize_t read_in_full(int fd, void *buf, size_t count)
 {
 	char *p = buf;
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH v8 1/2] Add xpread()
  2014-04-10 18:54 ` [PATCH v8 1/2] Add xpread() Yiannis Marangos
@ 2014-04-10 19:12   ` Johannes Sixt
  2014-04-10 19:20     ` Junio C Hamano
  0 siblings, 1 reply; 31+ messages in thread
From: Johannes Sixt @ 2014-04-10 19:12 UTC (permalink / raw)
  To: Yiannis Marangos, git

Am 10.04.2014 20:54, schrieb Yiannis Marangos:
> +ssize_t xpread(int fd, void *buf, size_t len, off_t offset)
> +{
> +	ssize_t nr;
> +	if (len > MAX_IO_SIZE)
> +	    len = MAX_IO_SIZE;

Odd indentation here.

-- Hannes

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v8 1/2] Add xpread()
  2014-04-10 19:12   ` Johannes Sixt
@ 2014-04-10 19:20     ` Junio C Hamano
  0 siblings, 0 replies; 31+ messages in thread
From: Junio C Hamano @ 2014-04-10 19:20 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: Yiannis Marangos, git

Johannes Sixt <j6t@kdbg.org> writes:

> Am 10.04.2014 20:54, schrieb Yiannis Marangos:
>> +ssize_t xpread(int fd, void *buf, size_t len, off_t offset)
>> +{
>> +	ssize_t nr;
>> +	if (len > MAX_IO_SIZE)
>> +	    len = MAX_IO_SIZE;
>
> Odd indentation here.
>
> -- Hannes

Good eyes, even though this is copy&pasting an existing error from
surrounding code ;-)

I'll queue this instead (rebased on top of maint-1.8.5 even though I
doubt we would be issuing 1.8.5.6).

-- >8 --
From: Yiannis Marangos <yiannis.marangos@gmail.com>
Date: Thu, 10 Apr 2014 21:54:12 +0300
Subject: [PATCH] wrapper.c: add xpread() similar to xread()

It is a common mistake to call read(2)/pread(2) and forget to
anticipate that they may return error with EAGAIN/EINTR when the
system call is interrupted.

We have xread() helper to relieve callers of read(2) from having to
worry about it; add xpread() helper to do the same for pread(2).

Update the caller in the builtin/index-pack.c and the mmap emulation
in compat/.

Signed-off-by: Yiannis Marangos <yiannis.marangos@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/index-pack.c |  2 +-
 compat/mmap.c        |  4 +---
 git-compat-util.h    |  1 +
 wrapper.c            | 18 ++++++++++++++++++
 4 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 9e9eb4b..e7a6b53 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -542,7 +542,7 @@ static void *unpack_data(struct object_entry *obj,
 
 	do {
 		ssize_t n = (len < 64*1024) ? len : 64*1024;
-		n = pread(pack_fd, inbuf, n, from);
+		n = xpread(pack_fd, inbuf, n, from);
 		if (n < 0)
 			die_errno(_("cannot pread pack file"));
 		if (!n)
diff --git a/compat/mmap.c b/compat/mmap.c
index c9d46d1..7f662fe 100644
--- a/compat/mmap.c
+++ b/compat/mmap.c
@@ -14,7 +14,7 @@ void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t of
 	}
 
 	while (n < length) {
-		ssize_t count = pread(fd, (char *)start + n, length - n, offset + n);
+		ssize_t count = xpread(fd, (char *)start + n, length - n, offset + n);
 
 		if (count == 0) {
 			memset((char *)start+n, 0, length-n);
@@ -22,8 +22,6 @@ void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t of
 		}
 
 		if (count < 0) {
-			if (errno == EAGAIN || errno == EINTR)
-				continue;
 			free(start);
 			errno = EACCES;
 			return MAP_FAILED;
diff --git a/git-compat-util.h b/git-compat-util.h
index 7776f12..9eec5fb 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -534,6 +534,7 @@ extern void *xcalloc(size_t nmemb, size_t size);
 extern void *xmmap(void *start, size_t length, int prot, int flags, int fd, off_t offset);
 extern ssize_t xread(int fd, void *buf, size_t len);
 extern ssize_t xwrite(int fd, const void *buf, size_t len);
+extern ssize_t xpread(int fd, void *buf, size_t len, off_t offset);
 extern int xdup(int fd);
 extern FILE *xfdopen(int fd, const char *mode);
 extern int xmkstemp(char *template);
diff --git a/wrapper.c b/wrapper.c
index 0cc5636..5b3c7fc 100644
--- a/wrapper.c
+++ b/wrapper.c
@@ -174,6 +174,24 @@ ssize_t xwrite(int fd, const void *buf, size_t len)
 	}
 }
 
+/*
+ * xpread() is the same as pread(), but it automatically restarts pread()
+ * operations with a recoverable error (EAGAIN and EINTR). xpread() DOES
+ * NOT GUARANTEE that "len" bytes is read even if the data is available.
+ */
+ssize_t xpread(int fd, void *buf, size_t len, off_t offset)
+{
+	ssize_t nr;
+	if (len > MAX_IO_SIZE)
+		len = MAX_IO_SIZE;
+	while (1) {
+		nr = pread(fd, buf, len, offset);
+		if ((nr < 0) && (errno == EAGAIN || errno == EINTR))
+			continue;
+		return nr;
+	}
+}
+
 ssize_t read_in_full(int fd, void *buf, size_t count)
 {
 	char *p = buf;
-- 
1.9.2-590-g468068b

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH v7 2/2] Verify index file before we opportunistically update it
  2014-04-10 18:31   ` [PATCH v7 2/2] Verify index file before we opportunistically update it Yiannis Marangos
@ 2014-04-10 19:28     ` Junio C Hamano
  2014-04-11  2:57       ` Duy Nguyen
                         ` (2 more replies)
  0 siblings, 3 replies; 31+ messages in thread
From: Junio C Hamano @ 2014-04-10 19:28 UTC (permalink / raw)
  To: Yiannis Marangos; +Cc: git

Yiannis Marangos <yiannis.marangos@gmail.com> writes:

> +	n = xpread(fd, sha1, 20, st.st_size - 20);
> +	if (n != 20)
> +		goto out;

I think it is possible for pread(2) to give you a short-read.

The existing callers of emulated mmap and index-pack are prepared to
handle a short-read correctly, but I do not think this code does.

I'll queue this instead in the meantime.

-- >8 --
From: Yiannis Marangos <yiannis.marangos@gmail.com>
Date: Thu, 10 Apr 2014 21:31:21 +0300
Subject: [PATCH] read-cache.c: verify index file before we opportunistically update it

Before we proceed to opportunistically update the index (often done
by an otherwise read-only operation like "git status" and "git diff"
that internally refreshes the index), we must verify that the
current index file is the same as the one that we read earlier
before we took the lock on it, in order to avoid a possible race.

In the example below git-status does "opportunistic update" and
git-rebase updates the index, but the race can happen in general.

  1. process A calls git-rebase (or does anything that uses the index)

  2. process A applies 1st commit

  3. process B calls git-status (or does anything that updates the index)

  4. process B reads index

  5. process A applies 2nd commit

  6. process B takes the lock, then overwrites process A's changes.

  7. process A applies 3rd commit

As an end result the 3rd commit will have a revert of the 2nd commit.
When process B takes the lock, it needs to make sure that the index
hasn't changed since step 4.

Signed-off-by: Yiannis Marangos <yiannis.marangos@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 cache.h      |  3 +++
 read-cache.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
 wrapper.c    | 20 ++++++++++++++++++++
 3 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/cache.h b/cache.h
index ce377e1..9244c38 100644
--- a/cache.h
+++ b/cache.h
@@ -279,6 +279,7 @@ struct index_state {
 		 initialized : 1;
 	struct hash_table name_hash;
 	struct hash_table dir_hash;
+	unsigned char sha1[20];
 };
 
 extern struct index_state the_index;
@@ -1199,6 +1200,8 @@ extern void fsync_or_die(int fd, const char *);
 
 extern ssize_t read_in_full(int fd, void *buf, size_t count);
 extern ssize_t write_in_full(int fd, const void *buf, size_t count);
+extern ssize_t pread_in_full(int fd, void *buf, size_t count, off_t offset);
+
 static inline ssize_t write_str_in_full(int fd, const char *str)
 {
 	return write_in_full(fd, str, strlen(str));
diff --git a/read-cache.c b/read-cache.c
index 33dd676..f4a0d61 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1464,6 +1464,7 @@ int read_index_from(struct index_state *istate, const char *path)
 	if (verify_hdr(hdr, mmap_size) < 0)
 		goto unmap;
 
+	hashcpy(istate->sha1, (unsigned char *)hdr + mmap_size - 20);
 	istate->version = ntohl(hdr->hdr_version);
 	istate->cache_nr = ntohl(hdr->hdr_entries);
 	istate->cache_alloc = alloc_nr(istate->cache_nr);
@@ -1747,6 +1748,50 @@ static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce,
 	return result;
 }
 
+/*
+ * This function verifies if index_state has the correct sha1 of the
+ * index file.  Don't die if we have any other failure, just return 0.
+ */
+static int verify_index_from(const struct index_state *istate, const char *path)
+{
+	int fd;
+	ssize_t n;
+	struct stat st;
+	unsigned char sha1[20];
+
+	if (!istate->initialized)
+		return 0;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return 0;
+
+	if (fstat(fd, &st))
+		goto out;
+
+	if (st.st_size < sizeof(struct cache_header) + 20)
+		goto out;
+
+	n = pread_in_full(fd, sha1, 20, st.st_size - 20);
+	if (n != 20)
+		goto out;
+
+	if (hashcmp(istate->sha1, sha1))
+		goto out;
+
+	close(fd);
+	return 1;
+
+out:
+	close(fd);
+	return 0;
+}
+
+static int verify_index(const struct index_state *istate)
+{
+	return verify_index_from(istate, get_index_file());
+}
+
 static int has_racy_timestamp(struct index_state *istate)
 {
 	int entries = istate->cache_nr;
@@ -1766,7 +1811,7 @@ static int has_racy_timestamp(struct index_state *istate)
 void update_index_if_able(struct index_state *istate, struct lock_file *lockfile)
 {
 	if ((istate->cache_changed || has_racy_timestamp(istate)) &&
-	    !write_index(istate, lockfile->fd))
+	    verify_index(istate) && !write_index(istate, lockfile->fd))
 		commit_locked_index(lockfile);
 	else
 		rollback_lock_file(lockfile);
diff --git a/wrapper.c b/wrapper.c
index 5b3c7fc..bc1bfb8 100644
--- a/wrapper.c
+++ b/wrapper.c
@@ -232,6 +232,26 @@ ssize_t write_in_full(int fd, const void *buf, size_t count)
 	return total;
 }
 
+ssize_t pread_in_full(int fd, void *buf, size_t count, off_t offset)
+{
+	char *p = buf;
+	ssize_t total = 0;
+
+	while (count > 0) {
+		ssize_t loaded = xpread(fd, p, count, offset);
+		if (loaded < 0)
+			return -1;
+		if (loaded == 0)
+			return total;
+		count -= loaded;
+		p += loaded;
+		total += loaded;
+		offset += loaded;
+	}
+
+	return total;
+}
+
 int xdup(int fd)
 {
 	int ret = dup(fd);
-- 
1.9.2-590-g468068b

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH v7 2/2] Verify index file before we opportunistically update it
  2014-04-10 19:28     ` Junio C Hamano
@ 2014-04-11  2:57       ` Duy Nguyen
  2014-04-11 19:24         ` Junio C Hamano
  2014-04-11  7:47       ` Torsten Bögershausen
  2014-04-11 10:36       ` Duy Nguyen
  2 siblings, 1 reply; 31+ messages in thread
From: Duy Nguyen @ 2014-04-11  2:57 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Yiannis Marangos, Git Mailing List

On Fri, Apr 11, 2014 at 2:28 AM, Junio C Hamano <gitster@pobox.com> wrote:
> Yiannis Marangos <yiannis.marangos@gmail.com> writes:
>
>> +     n = xpread(fd, sha1, 20, st.st_size - 20);
>> +     if (n != 20)
>> +             goto out;
>
> I think it is possible for pread(2) to give you a short-read.
>
> The existing callers of emulated mmap and index-pack are prepared to
> handle a short-read correctly, but I do not think this code does.
>
> I'll queue this instead in the meantime.

There are two things to sort out (sorry I can't spend much time on it
right now): should the same sha1 test be done in write_index(), in
addition to update_index_if_able(). And what do we do about
istate->sha1[] in discard_index(), keep it or clear it.
-- 
Duy

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v7 2/2] Verify index file before we opportunistically update it
  2014-04-10 19:28     ` Junio C Hamano
  2014-04-11  2:57       ` Duy Nguyen
@ 2014-04-11  7:47       ` Torsten Bögershausen
  2014-04-11 15:58         ` Yiannis Marangos
  2014-04-11 10:36       ` Duy Nguyen
  2 siblings, 1 reply; 31+ messages in thread
From: Torsten Bögershausen @ 2014-04-11  7:47 UTC (permalink / raw)
  To: Junio C Hamano, Yiannis Marangos; +Cc: git, Torsten Bögershausen

On 2014-04-10 21.28, Junio C Hamano wrote:
> Yiannis Marangos <yiannis.marangos@gmail.com> writes:
> 
>> +	n = xpread(fd, sha1, 20, st.st_size - 20);
>> +	if (n != 20)
>> +		goto out;
> 
> I think it is possible for pread(2) to give you a short-read.
> 
> The existing callers of emulated mmap and index-pack are prepared to
> handle a short-read correctly, but I do not think this code does.
> 
> I'll queue this instead in the meantime.
> 
> -- >8 --
> From: Yiannis Marangos <yiannis.marangos@gmail.com>
> Date: Thu, 10 Apr 2014 21:31:21 +0300
> Subject: [PATCH] read-cache.c: verify index file before we opportunistically update it
> 
> Before we proceed to opportunistically update the index (often done
> by an otherwise read-only operation like "git status" and "git diff"
> that internally refreshes the index), we must verify that the
> current index file is the same as the one that we read earlier
> before we took the lock on it, in order to avoid a possible race.
> 
> In the example below git-status does "opportunistic update" and
> git-rebase updates the index, but the race can happen in general.
I'm not sure if we need the second or third commit of process A at all.
My understanding is that the simplified version will have problems as well:

  1. process A calls git-rebase (or does anything that updates the index)
  2. process change
  3. process B calls git-status (or does anything that updates the index)
  4. process B reads the index file into memory
  5. process change
  6. process A applies a commit:
      - read the index into memory
      - take the lock
      - update the index file on disc
      - release the lock
  7. process change
  8. process B applies a commit:
      - take the lock
      - update the index in memory and write the index file to disc
      - release the lock

   Now process B has overwritten the commit from process A, which is wrong.

The new code works like this:

  8. process B applies a commit:
      - take the lock
      - verifies tha the index file on disc has the same sha as the one read before
      # And if not: What do we do? die() or retry() ?
      - update the index file on disc
      - release the lock
[]
> 
> +static int verify_index_from(const struct index_state *istate, const char *path)
> +{
> +	int fd;
> +	ssize_t n;
> +	struct stat st;
> +	unsigned char sha1[20];
> +
> +	if (!istate->initialized)
> +		return 0;
> +
> +	fd = open(path, O_RDONLY);
> +	if (fd < 0)
> +		return 0;
> +
> +	if (fstat(fd, &st))
> +		goto out;
> +
> +	if (st.st_size < sizeof(struct cache_header) + 20)
> +		goto out;
> +
> +	n = pread_in_full(fd, sha1, 20, st.st_size - 20);
Minor :
What is the advantage of pread() against a lseek()/read_in_full() combo?
fd is opened and used only in one thread.
Introducing pread()/ pread_in_full() could be done in an other commit,
or do I miss something ?
> +	if (n != 20)
> +		goto out;

> +static int verify_index(const struct index_state *istate)
> +{
> +	return verify_index_from(istate, get_index_file());
> +}
> +
Minor:
Do we really need the wrapper function verify_index_from()?
It seems as if the whole code from verify_index_from() could go into
verify_index(), which will call get_index_file()
  
 
>  static int has_racy_timestamp(struct index_state *istate)
>  {
>  	int entries = istate->cache_nr;
> @@ -1766,7 +1811,7 @@ static int has_racy_timestamp(struct index_state *istate)
>  void update_index_if_able(struct index_state *istate, struct lock_file *lockfile)
>  {
>  	if ((istate->cache_changed || has_racy_timestamp(istate)) &&
> -	    !write_index(istate, lockfile->fd))
> +	    verify_index(istate) && !write_index(istate, lockfile->fd))
>  		commit_locked_index(lockfile);
>  	else
>  		rollback_lock_file(lockfile);
> diff --git a/wrapper.c b/wrapper.c
> index 5b3c7fc..bc1bfb8 100644
> --- a/wrapper.c
> +++ b/wrapper.c
> @@ -232,6 +232,26 @@ ssize_t write_in_full(int fd, const void *buf, size_t count)
>  	return total;
>  }
>  
> +ssize_t pread_in_full(int fd, void *buf, size_t count, off_t offset)
> +{
> +	char *p = buf;
> +	ssize_t total = 0;
> +
> +	while (count > 0) {
> +		ssize_t loaded = xpread(fd, p, count, offset);
> +		if (loaded < 0)
> +			return -1;
> +		if (loaded == 0)
> +			return total;
> +		count -= loaded;
> +		p += loaded;
> +		total += loaded;
> +		offset += loaded;
> +	}
> +
> +	return total;
> +}
> +
>  int xdup(int fd)
>  {
>  	int ret = dup(fd);
> 

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v7 2/2] Verify index file before we opportunistically update it
  2014-04-10 19:28     ` Junio C Hamano
  2014-04-11  2:57       ` Duy Nguyen
  2014-04-11  7:47       ` Torsten Bögershausen
@ 2014-04-11 10:36       ` Duy Nguyen
  2 siblings, 0 replies; 31+ messages in thread
From: Duy Nguyen @ 2014-04-11 10:36 UTC (permalink / raw)
  To: Yiannis Marangos; +Cc: Git Mailing List, Junio C Hamano

On Fri, Apr 11, 2014 at 2:28 AM, Junio C Hamano <gitster@pobox.com> wrote:
> +static int verify_index_from(const struct index_state *istate, const char *path)
> +{
> +       int fd;
> +       ssize_t n;
> +       struct stat st;
> +       unsigned char sha1[20];
> +
> +       if (!istate->initialized)
> +               return 0;
> +
> +       fd = open(path, O_RDONLY);
> +       if (fd < 0)
> +               return 0;

Suppose another process "git rm --cached :/" is racing with us and
this imaginary git is so smart that it figures if nothing valueable is
left in the index, there's no need to write the index down at all. So
it removes $GIT_DIR/index, then $GIT_DIR/index.lock. When we come
here, we see ENOENT, but we should return 1 instead because the file
removal in this case is a form of change.

That opens a question about writing a new index. I think we could use
all-zero sha-1 as the indicator that this is a fresh new index. If
istate->sha1[] is all-zero and no index file exists, then we do not
need to verify (i.e. return 0). However if istate->sha1[] is all-zero,
but $GIT_DIR/index exists, then return 1.

I'm still not sure if elsewhere in the code base we read
$GIT_DIR/index to active_index, create a new index_state, copy entries
over (but not active_index.sha1[]), then write the _new_ index_state
down. That would hit the "however" statement above and incorrectly
return 1.

I suppose that all other errors except ENOENT could be safely ignored
(i.e. return 0 regardless of istate->sha1[]).

> +
> +       if (fstat(fd, &st))
> +               goto out;
> +
> +       if (st.st_size < sizeof(struct cache_header) + 20)
> +               goto out;
> +
> +       n = pread_in_full(fd, sha1, 20, st.st_size - 20);
> +       if (n != 20)
> +               goto out;
> +
> +       if (hashcmp(istate->sha1, sha1))
> +               goto out;
> +
> +       close(fd);
> +       return 1;
> +
> +out:
> +       close(fd);
> +       return 0;
> +}
> +
> +static int verify_index(const struct index_state *istate)
> +{
> +       return verify_index_from(istate, get_index_file());
> +}
> +
>  static int has_racy_timestamp(struct index_state *istate)
>  {
>         int entries = istate->cache_nr;
> @@ -1766,7 +1811,7 @@ static int has_racy_timestamp(struct index_state *istate)
>  void update_index_if_able(struct index_state *istate, struct lock_file *lockfile)
>  {
>         if ((istate->cache_changed || has_racy_timestamp(istate)) &&
> -           !write_index(istate, lockfile->fd))
> +           verify_index(istate) && !write_index(istate, lockfile->fd))
>                 commit_locked_index(lockfile);
>         else
>                 rollback_lock_file(lockfile);
> diff --git a/wrapper.c b/wrapper.c
> index 5b3c7fc..bc1bfb8 100644
> --- a/wrapper.c
> +++ b/wrapper.c
> @@ -232,6 +232,26 @@ ssize_t write_in_full(int fd, const void *buf, size_t count)
>         return total;
>  }
>
> +ssize_t pread_in_full(int fd, void *buf, size_t count, off_t offset)
> +{
> +       char *p = buf;
> +       ssize_t total = 0;
> +
> +       while (count > 0) {
> +               ssize_t loaded = xpread(fd, p, count, offset);
> +               if (loaded < 0)
> +                       return -1;
> +               if (loaded == 0)
> +                       return total;
> +               count -= loaded;
> +               p += loaded;
> +               total += loaded;
> +               offset += loaded;
> +       }
> +
> +       return total;
> +}
> +
>  int xdup(int fd)
>  {
>         int ret = dup(fd);
> --
> 1.9.2-590-g468068b
>
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe git" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Duy

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v7 2/2] Verify index file before we opportunistically update it
  2014-04-11  7:47       ` Torsten Bögershausen
@ 2014-04-11 15:58         ` Yiannis Marangos
  0 siblings, 0 replies; 31+ messages in thread
From: Yiannis Marangos @ 2014-04-11 15:58 UTC (permalink / raw)
  To: Torsten Bögershausen; +Cc: Junio C Hamano, git

On Fri, Apr 11, 2014 at 09:47:09AM +0200, Torsten Bögershausen wrote:
>   6. process A applies a commit:
>       - read the index into memory
>       - take the lock
>       - update the index file on disc
>       - release the lock

So here we can have race condition. Maybe we should implement Duy's
idea of verifying the sha1 in write_index()?


> The new code works like this:
>
>   8. process B applies a commit:
>       - take the lock
>       - verifies tha the index file on disc has the same sha as the one read before
>       # And if not: What do we do? die() or retry() ?
>       - update the index file on disc
>       - release the lock

IMO, we must never die() if we want to do opportunistic update and we
should never retry because it's a bit expensive if we do opportunistic update.


--
Yiannis

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v7 2/2] Verify index file before we opportunistically update it
  2014-04-11  2:57       ` Duy Nguyen
@ 2014-04-11 19:24         ` Junio C Hamano
  2014-04-11 20:43           ` Junio C Hamano
  0 siblings, 1 reply; 31+ messages in thread
From: Junio C Hamano @ 2014-04-11 19:24 UTC (permalink / raw)
  To: Duy Nguyen; +Cc: Yiannis Marangos, Git Mailing List

Duy Nguyen <pclouds@gmail.com> writes:

> On Fri, Apr 11, 2014 at 2:28 AM, Junio C Hamano <gitster@pobox.com> wrote:
>> Yiannis Marangos <yiannis.marangos@gmail.com> writes:
>>
>>> +     n = xpread(fd, sha1, 20, st.st_size - 20);
>>> +     if (n != 20)
>>> +             goto out;
>>
>> I think it is possible for pread(2) to give you a short-read.
>>
>> The existing callers of emulated mmap and index-pack are prepared to
>> handle a short-read correctly, but I do not think this code does.
>>
>> I'll queue this instead in the meantime.
>
> There are two things to sort out (sorry I can't spend much time on it
> right now): should the same sha1 test be done in write_index(), in
> addition to update_index_if_able(). And what do we do about
> istate->sha1[] in discard_index(), keep it or clear it.

Yeah, I was hoping that the real write codepath (as opposed to "this
is read only and we read the index without holding a lock---now we
noticed that the index needs refreshing, and we know how the
resulting refreshed index should look like, perhaps we can write it
to save cycles for other processes" codepath where we cannot and
should not take a lock early) would take the lock and then read, but
because that is not the way they work, the need the same protection,
I would think.  As discard_index() is not "we are removing all the
entries because the user told us to 'rm -r .'" but is "for the
purpose of our internal processing we do not need the old contents
of the index", my knee-jerk reaction is that we should not clear it.

Any operation that makes the resulting index empty (i.e. no path
being tracked) should still write an empty index (with the usual
header and the most importantly the trailing file checksum), but
that goes without saying.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v7 2/2] Verify index file before we opportunistically update it
  2014-04-11 19:24         ` Junio C Hamano
@ 2014-04-11 20:43           ` Junio C Hamano
  2014-04-11 23:30             ` Yiannis Marangos
  2014-04-12  0:10             ` Duy Nguyen
  0 siblings, 2 replies; 31+ messages in thread
From: Junio C Hamano @ 2014-04-11 20:43 UTC (permalink / raw)
  To: Duy Nguyen; +Cc: Yiannis Marangos, Git Mailing List

Junio C Hamano <gitster@pobox.com> writes:

> Yeah, I was hoping that the real write codepath (as opposed to "this
> is read only and we read the index without holding a lock---now we
> noticed that the index needs refreshing, and we know how the
> resulting refreshed index should look like, perhaps we can write it
> to save cycles for other processes" codepath where we cannot and
> should not take a lock early) would take the lock and then read, but
> because that is not the way they work, the need the same protection,
> I would think.

Having said that, nobody sane would be running two simultaneous
operations that are clearly write-oriented competing with each other
against the same index file.  So in that sense that can be done as a
less urgent follow-up for this topic.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v7 2/2] Verify index file before we opportunistically update it
  2014-04-11 20:43           ` Junio C Hamano
@ 2014-04-11 23:30             ` Yiannis Marangos
  2014-04-12  0:10             ` Duy Nguyen
  1 sibling, 0 replies; 31+ messages in thread
From: Yiannis Marangos @ 2014-04-11 23:30 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Duy Nguyen, Git Mailing List

On Fri, Apr 11, 2014 at 01:43:47PM -0700, Junio C Hamano wrote:
> Having said that, nobody sane would be running two simultaneous
> operations that are clearly write-oriented competing with each other
> against the same index file.  So in that sense that can be done as a
> less urgent follow-up for this topic.

I'm willing to take this task, I decide to spend some of my free time
for git development. But since I'm a newbie with it's architecture I
don't know when it will be ready because I want to explore the code a
bit.

--
Yiannis

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v7 2/2] Verify index file before we opportunistically update it
  2014-04-11 20:43           ` Junio C Hamano
  2014-04-11 23:30             ` Yiannis Marangos
@ 2014-04-12  0:10             ` Duy Nguyen
  2014-04-12  4:19               ` Junio C Hamano
  1 sibling, 1 reply; 31+ messages in thread
From: Duy Nguyen @ 2014-04-12  0:10 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Yiannis Marangos, Git Mailing List

On Sat, Apr 12, 2014 at 3:43 AM, Junio C Hamano <gitster@pobox.com> wrote:
> Having said that, nobody sane would be running two simultaneous
> operations that are clearly write-oriented competing with each other
> against the same index file.

When it comes to racing, sanity does not matter much. People could
just do it without thinking what exactly is happening behind the
scene.

> So in that sense that can be done as a less urgent follow-up for this topic.

Yeah if racing at refresh time is a real problem, sure we should solve it first.
-- 
Duy

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v7 2/2] Verify index file before we opportunistically update it
  2014-04-12  0:10             ` Duy Nguyen
@ 2014-04-12  4:19               ` Junio C Hamano
  2014-04-12  7:05                 ` Junio C Hamano
  2014-04-12 10:13                 ` Duy Nguyen
  0 siblings, 2 replies; 31+ messages in thread
From: Junio C Hamano @ 2014-04-12  4:19 UTC (permalink / raw)
  To: Duy Nguyen; +Cc: Yiannis Marangos, Git Mailing List

Duy Nguyen <pclouds@gmail.com> writes:

> On Sat, Apr 12, 2014 at 3:43 AM, Junio C Hamano <gitster@pobox.com> wrote:
>> Having said that, nobody sane would be running two simultaneous
>> operations that are clearly write-oriented competing with each other
>> against the same index file.
>
> When it comes to racing, sanity does not matter much. People could
> just do it without thinking what exactly is happening behind the
> scene.

Well, "a race is a race is a race" would also be my knee-jerk
reaction when anybody utters the word "race", but you need to
realize that we are not talking about races like object creation
while "gc --auto" running in the background or two pushes trying to
update the same ref to different values, which are meaningful use
cases.

What is the race under discussion about?  It is about the index,
which corresponds one-to-one to the working tree, so in order for
the "race" to matter, you need to be racing against another process
that is not cooperating with you (e.g. a continuous and uncontrolled
"git add" updating the index while you are doing a real work),
mucking with the index in the same working tree.  How could such a
workflow any useful in the real life?

In the spectrum between useful and insane, there is a point where we
should just tell the insane: don't do it then.

Having said that...

>> So in that sense that can be done as a less urgent follow-up for this topic.
>
> Yeah if racing at refresh time is a real problem, sure we should solve it first.

... In order to solve the write-vs-write competition in a useful
way, you must serialize the competing accesses.  E.g. "git add"
would first take a write lock on the index before read_cache(), and
then do its operation and finally release the lock by the usual
write-to-lock-close-then-rename dance.

The lazy "read and refresh in-core first, hoping that we did not
compete with anybody, and then check just before writing out after
taking the lock" is a very good solution for the opportunistic
update codepath, because it is an option not to write the result out
when there actually was an update by somebody else.  But such an
opportunistic locking scheme does not work for write-vs-write
competition.  Upon a real conflict, you need to fail the entire
operation.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v7 2/2] Verify index file before we opportunistically update it
  2014-04-12  4:19               ` Junio C Hamano
@ 2014-04-12  7:05                 ` Junio C Hamano
  2014-04-12 10:13                 ` Duy Nguyen
  1 sibling, 0 replies; 31+ messages in thread
From: Junio C Hamano @ 2014-04-12  7:05 UTC (permalink / raw)
  To: Duy Nguyen; +Cc: Yiannis Marangos, Git Mailing List

Junio C Hamano <gitster@pobox.com> writes:

> What is the race under discussion about?  It is about the index,
> which corresponds one-to-one to the working tree, so in order for
> the "race" to matter, you need to be racing against another process
> that is not cooperating with you (e.g. a continuous and uncontrolled
> "git add" updating the index while you are doing a real work),
> mucking with the index in the same working tree.  How could such a
> workflow any useful in the real life?
>
> In the spectrum between useful and insane, there is a point where we
> should just tell the insane: don't do it then.
>
> Having said that...
>
>>> So in that sense that can be done as a less urgent follow-up for this topic.
>>
>> Yeah if racing at refresh time is a real problem, sure we should solve it first.
>
> ... In order to solve the write-vs-write competition in a useful
> way, you must serialize the competing accesses.  E.g. "git add"
> would first take a write lock on the index before read_cache(), and
> then do its operation and finally release the lock by the usual
> write-to-lock-close-then-rename dance.

Extending on this a bit.

Here I didn't mean the traditional ".lock" we create via the
lockfile() API.  Rather, we would want to use fcntl/flock style lock
that lets others _wait_, not _fail_.  Because...

> The lazy "read and refresh in-core first, hoping that we did not
> compete with anybody, and then check just before writing out after
> taking the lock" is a very good solution for the opportunistic
> update codepath, because it is an option not to write the result out
> when there actually was an update by somebody else.  But such an
> opportunistic locking scheme does not work for write-vs-write
> competition.  Upon a real conflict, you need to fail the entire
> operation.

...having multiple conflicting writers on the single index file is
what you thought about worth protecting against.  When somebody else
is pounding on the index file you are trying to prepare your next
commmit in, with his writing that can unexpectedly overwrite what
you prepared, you would at least want the accesses serialized,
without getting half of your attempt to "git add" fail (and having
to redo).  For that, you would want your "git add" to wait while the
other party is mucking with the index under lock, instead of
failing, which is what you would get from the traditional lockfile
API based locking.

But that still takes us back to the "don't do it then".  It is true
that, with serialization, you may be able to guarantee that one "git
add" you do would start from one state (which may record the state
of your previous "git add", or which may have further been modified
by the other process) and ends with whatever that "git add" added,
without any other change.  But even in that case, when you finally
do a "git commit", can you say you know what is in the index?  I do
not think so.  After all, the root cause of the "race" issue is that
the other process pounds at the same index while you are working on
it, without any coordination with you.

So...

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v7 2/2] Verify index file before we opportunistically update it
  2014-04-12  4:19               ` Junio C Hamano
  2014-04-12  7:05                 ` Junio C Hamano
@ 2014-04-12 10:13                 ` Duy Nguyen
  2014-04-14 18:50                   ` Junio C Hamano
  1 sibling, 1 reply; 31+ messages in thread
From: Duy Nguyen @ 2014-04-12 10:13 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Yiannis Marangos, Git Mailing List

On Sat, Apr 12, 2014 at 11:19 AM, Junio C Hamano <gitster@pobox.com> wrote:
> Duy Nguyen <pclouds@gmail.com> writes:
>
>> On Sat, Apr 12, 2014 at 3:43 AM, Junio C Hamano <gitster@pobox.com> wrote:
>>> Having said that, nobody sane would be running two simultaneous
>>> operations that are clearly write-oriented competing with each other
>>> against the same index file.
>>
>> When it comes to racing, sanity does not matter much. People could
>> just do it without thinking what exactly is happening behind the
>> scene.
>
> Well, "a race is a race is a race" would also be my knee-jerk
> reaction when anybody utters the word "race", but you need to
> realize that we are not talking about races like object creation
> while "gc --auto" running in the background or two pushes trying to
> update the same ref to different values, which are meaningful use
> cases.
>
> What is the race under discussion about?  It is about the index,
> which corresponds one-to-one to the working tree, so in order for
> the "race" to matter, you need to be racing against another process
> that is not cooperating with you (e.g. a continuous and uncontrolled
> "git add" updating the index while you are doing a real work),
> mucking with the index in the same working tree.  How could such a
> workflow any useful in the real life?
>
> In the spectrum between useful and insane, there is a point where we
> should just tell the insane: don't do it then.

The thing is people do not usually have that level of knowledge of how
git works. They could write a cron job to do something in some repos,
not aware at all about these non-cooperations. Telling people not to
automatically touch a git directory at all is a bit extreme. I think
this patch is about guarding the user from shooting themselves. Either
a command works correctly, not not work at all. A process' dying is a
way of telling people "this is insane" without having to draw a line
between dos and dont's in documents.

Serializing write access to make both competing processes is nice, but
that's a separate step that may or may not be worth pursuing. And I'm
towards not worth pursuing. As you metioned in the next mail,
serializing helps two competing processes, but not two command
sequences.
-- 
Duy

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v7 2/2] Verify index file before we opportunistically update it
  2014-04-12 10:13                 ` Duy Nguyen
@ 2014-04-14 18:50                   ` Junio C Hamano
  0 siblings, 0 replies; 31+ messages in thread
From: Junio C Hamano @ 2014-04-14 18:50 UTC (permalink / raw)
  To: Duy Nguyen; +Cc: Yiannis Marangos, Git Mailing List

Duy Nguyen <pclouds@gmail.com> writes:

> On Sat, Apr 12, 2014 at 11:19 AM, Junio C Hamano <gitster@pobox.com> wrote:
>
>> In the spectrum between useful and insane, there is a point where we
>> should just tell the insane: don't do it then.
>
> ... A process' dying is a
> way of telling people "this is insane" without having to draw a line
> between dos and dont's in documents.

Fair enough.  Instead of "that is insane and you may end up
recording what you do not intend to", being able to say "that is
insane and you will see some commands failing when you may end up
being in such a situation" is a lot more explicit.

Thanks for injecting some sanity to me.

> Serializing write access to make both competing processes is nice, but
> that's a separate step that may or may not be worth pursuing. And I'm
> towards not worth pursuing.

I think we are in agreement on that point.

I would want to see the proposed "read without locking, compute
heavily and then take a lock and revalidate before writing it out,
or fail" done with a focus on not hurting the "sane" use pattern,
i.e. making the "revalidate" part as light as possible.  I however
cannot seem to find any optimization opportunities other than the
"open, read and compare the checksum".  Two I thought and discarded
are:

 - checking timestamp of the index file itself, and failing when it
   has changed (without opening or reading the checksum) is not
   good; it is optimizing for the wrong case, because we would need
   to check the checksum anyway when the timestamps match.

 - checking i-num of the index file itself, and failing when it has
   changed (without opening or reading the checksum) is not good,
   either.  Creating "index.lock", writing to it and renaming it to
   "index", keeping the "index" during the whole period, would
   ensure that the index file that results with this single cycle
   would get a different i-num from the original, but if that is
   done twice or more, the same i-num can be reused and defeat the
   check.

^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2014-04-14 18:51 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-04-09 22:06 [PATCH] Verify index file before we opportunistically update it Yiannis Marangos
2014-04-09 22:06 ` Yiannis Marangos
2014-04-09 22:34 ` [PATCH v2] " Yiannis Marangos
2014-04-09 23:05   ` Junio C Hamano
2014-04-09 22:52 ` [PATCH v3] " Yiannis Marangos
2014-04-10  5:22 ` [PATCH v4] " Yiannis Marangos
2014-04-10  5:34 ` [PATCH v5] " Yiannis Marangos
2014-04-10 10:40   ` Duy Nguyen
2014-04-10 11:57     ` Yiannis Marangos
2014-04-10 16:57     ` Junio C Hamano
2014-04-10 13:11 ` [PATCH v6] " Yiannis Marangos
2014-04-10 18:31 ` [PATCH v7 1/2] Add xpread() and xpwrite() Yiannis Marangos
2014-04-10 18:31   ` [PATCH v7 2/2] Verify index file before we opportunistically update it Yiannis Marangos
2014-04-10 19:28     ` Junio C Hamano
2014-04-11  2:57       ` Duy Nguyen
2014-04-11 19:24         ` Junio C Hamano
2014-04-11 20:43           ` Junio C Hamano
2014-04-11 23:30             ` Yiannis Marangos
2014-04-12  0:10             ` Duy Nguyen
2014-04-12  4:19               ` Junio C Hamano
2014-04-12  7:05                 ` Junio C Hamano
2014-04-12 10:13                 ` Duy Nguyen
2014-04-14 18:50                   ` Junio C Hamano
2014-04-11  7:47       ` Torsten Bögershausen
2014-04-11 15:58         ` Yiannis Marangos
2014-04-11 10:36       ` Duy Nguyen
2014-04-10 18:35   ` [PATCH v7 1/2] Add xpread() and xpwrite() Junio C Hamano
2014-04-10 18:44     ` Yiannis Marangos
2014-04-10 18:54 ` [PATCH v8 1/2] Add xpread() Yiannis Marangos
2014-04-10 19:12   ` Johannes Sixt
2014-04-10 19:20     ` Junio C Hamano

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.