All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v0 0/3] git add a-Big-file
@ 2011-05-08  8:47 Junio C Hamano
  2011-05-08  8:47 ` [PATCH v0 1/3] index_fd(): turn write_object and format_check arguments into one flag Junio C Hamano
                   ` (2 more replies)
  0 siblings, 3 replies; 18+ messages in thread
From: Junio C Hamano @ 2011-05-08  8:47 UTC (permalink / raw)
  To: git

This is merely a POC, hoping that interested parties might fill in the
blanks aka update the NEEDSWORK part of the patch.

Junio C Hamano (3):
  index_fd(): turn write_object and format_check arguments into one flag
  index_fd(): split into two helper functions
  Bigfile: teach "git add" to send a large file straight to a pack

 builtin/hash-object.c  |    5 +-
 builtin/update-index.c |    3 +-
 cache.h                |    7 ++-
 notes-merge.c          |    2 +-
 read-cache.c           |    4 +-
 sha1_file.c            |  165 +++++++++++++++++++++++++++++++++++++++++-------
 t/t1050-large.sh       |   22 +++++++
 7 files changed, 177 insertions(+), 31 deletions(-)
 create mode 100755 t/t1050-large.sh

-- 
1.7.5.1.268.gce5bd

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v0 1/3] index_fd(): turn write_object and format_check arguments into one flag
  2011-05-08  8:47 [PATCH v0 0/3] git add a-Big-file Junio C Hamano
@ 2011-05-08  8:47 ` Junio C Hamano
  2011-05-08  8:47 ` [PATCH v0 2/3] index_fd(): split into two helper functions Junio C Hamano
  2011-05-08  8:47 ` [PATCH v0 3/3] Bigfile: teach "git add" to send a large file straight to a pack Junio C Hamano
  2 siblings, 0 replies; 18+ messages in thread
From: Junio C Hamano @ 2011-05-08  8:47 UTC (permalink / raw)
  To: git

The "format_check" parameter tucked after the existing parameters is too
ugly an afterthought to live in any reasonable API.

Combine it with the other boolean parameter "write_object" into a single
"flags" parameter.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/hash-object.c  |    5 ++++-
 builtin/update-index.c |    3 ++-
 cache.h                |    7 +++++--
 notes-merge.c          |    2 +-
 read-cache.c           |    4 ++--
 sha1_file.c            |   29 +++++++++++++----------------
 6 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/builtin/hash-object.c b/builtin/hash-object.c
index b96f46a..33911fd 100644
--- a/builtin/hash-object.c
+++ b/builtin/hash-object.c
@@ -14,8 +14,11 @@ static void hash_fd(int fd, const char *type, int write_object, const char *path
 {
 	struct stat st;
 	unsigned char sha1[20];
+	unsigned flags = (HASH_FORMAT_CHECK |
+			  (write_object ? HASH_WRITE_OBJECT : 0));
+
 	if (fstat(fd, &st) < 0 ||
-	    index_fd(sha1, fd, &st, write_object, type_from_string(type), path, 1))
+	    index_fd(sha1, fd, &st, type_from_string(type), path, flags))
 		die(write_object
 		    ? "Unable to add %s to database"
 		    : "Unable to hash %s", path);
diff --git a/builtin/update-index.c b/builtin/update-index.c
index d7850c6..f14bc90 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -99,7 +99,8 @@ static int add_one_path(struct cache_entry *old, const char *path, int len, stru
 	fill_stat_cache_info(ce, st);
 	ce->ce_mode = ce_mode_from_stat(old, st->st_mode);
 
-	if (index_path(ce->sha1, path, st, !info_only))
+	if (index_path(ce->sha1, path, st,
+		       info_only ? 0 : HASH_WRITE_OBJECT))
 		return -1;
 	option = allow_add ? ADD_CACHE_OK_TO_ADD : 0;
 	option |= allow_replace ? ADD_CACHE_OK_TO_REPLACE : 0;
diff --git a/cache.h b/cache.h
index 2b34116..2be5333 100644
--- a/cache.h
+++ b/cache.h
@@ -518,8 +518,11 @@ struct pathspec {
 extern int init_pathspec(struct pathspec *, const char **);
 extern void free_pathspec(struct pathspec *);
 extern int ce_path_match(const struct cache_entry *ce, const struct pathspec *pathspec);
-extern int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, enum object_type type, const char *path, int format_check);
-extern int index_path(unsigned char *sha1, const char *path, struct stat *st, int write_object);
+
+#define HASH_WRITE_OBJECT 1
+#define HASH_FORMAT_CHECK 2
+extern int index_fd(unsigned char *sha1, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags);
+extern int index_path(unsigned char *sha1, const char *path, struct stat *st, unsigned flags);
 extern void fill_stat_cache_info(struct cache_entry *ce, struct stat *st);
 
 #define REFRESH_REALLY		0x0001	/* ignore_valid */
diff --git a/notes-merge.c b/notes-merge.c
index 28046a9..e1aaf43 100644
--- a/notes-merge.c
+++ b/notes-merge.c
@@ -707,7 +707,7 @@ int notes_merge_commit(struct notes_merge_options *o,
 		/* write file as blob, and add to partial_tree */
 		if (stat(ent->name, &st))
 			die_errno("Failed to stat '%s'", ent->name);
-		if (index_path(blob_sha1, ent->name, &st, 1))
+		if (index_path(blob_sha1, ent->name, &st, HASH_WRITE_OBJECT))
 			die("Failed to write blob object from '%s'", ent->name);
 		if (add_note(partial_tree, obj_sha1, blob_sha1, NULL))
 			die("Failed to add resolved note '%s' to notes tree",
diff --git a/read-cache.c b/read-cache.c
index f38471c..4ac9a03 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -92,7 +92,7 @@ static int ce_compare_data(struct cache_entry *ce, struct stat *st)
 
 	if (fd >= 0) {
 		unsigned char sha1[20];
-		if (!index_fd(sha1, fd, st, 0, OBJ_BLOB, ce->name, 0))
+		if (!index_fd(sha1, fd, st, OBJ_BLOB, ce->name, 0))
 			match = hashcmp(sha1, ce->sha1);
 		/* index_fd() closed the file descriptor already */
 	}
@@ -641,7 +641,7 @@ int add_to_index(struct index_state *istate, const char *path, struct stat *st,
 		return 0;
 	}
 	if (!intent_only) {
-		if (index_path(ce->sha1, path, st, 1))
+		if (index_path(ce->sha1, path, st, HASH_WRITE_OBJECT))
 			return error("unable to index file %s", path);
 	} else
 		record_intent_to_add(ce);
diff --git a/sha1_file.c b/sha1_file.c
index 889fe71..17c179c 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2581,10 +2581,11 @@ static void check_tag(const void *buf, size_t size)
 }
 
 static int index_mem(unsigned char *sha1, void *buf, size_t size,
-		     int write_object, enum object_type type,
-		     const char *path, int format_check)
+		     enum object_type type,
+		     const char *path, unsigned flags)
 {
 	int ret, re_allocated = 0;
+	int write_object = flags & HASH_WRITE_OBJECT;
 
 	if (!type)
 		type = OBJ_BLOB;
@@ -2600,7 +2601,7 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
 			re_allocated = 1;
 		}
 	}
-	if (format_check) {
+	if (flags & HASH_FORMAT_CHECK) {
 		if (type == OBJ_TREE)
 			check_tree(buf, size);
 		if (type == OBJ_COMMIT)
@@ -2620,8 +2621,8 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
 
 #define SMALL_FILE_SIZE (32*1024)
 
-int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object,
-	     enum object_type type, const char *path, int format_check)
+int index_fd(unsigned char *sha1, int fd, struct stat *st,
+	     enum object_type type, const char *path, unsigned flags)
 {
 	int ret;
 	size_t size = xsize_t(st->st_size);
@@ -2629,33 +2630,29 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object,
 	if (!S_ISREG(st->st_mode)) {
 		struct strbuf sbuf = STRBUF_INIT;
 		if (strbuf_read(&sbuf, fd, 4096) >= 0)
-			ret = index_mem(sha1, sbuf.buf, sbuf.len, write_object,
-					type, path, format_check);
+			ret = index_mem(sha1, sbuf.buf, sbuf.len, type,	path, flags);
 		else
 			ret = -1;
 		strbuf_release(&sbuf);
 	} else if (!size) {
-		ret = index_mem(sha1, NULL, size, write_object, type, path,
-				format_check);
+		ret = index_mem(sha1, NULL, size, type, path, flags);
 	} else if (size <= SMALL_FILE_SIZE) {
 		char *buf = xmalloc(size);
 		if (size == read_in_full(fd, buf, size))
-			ret = index_mem(sha1, buf, size, write_object, type,
-					path, format_check);
+			ret = index_mem(sha1, buf, size, type, path, flags);
 		else
 			ret = error("short read %s", strerror(errno));
 		free(buf);
 	} else {
 		void *buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
-		ret = index_mem(sha1, buf, size, write_object, type, path,
-				format_check);
+		ret = index_mem(sha1, buf, size, type, path, flags);
 		munmap(buf, size);
 	}
 	close(fd);
 	return ret;
 }
 
-int index_path(unsigned char *sha1, const char *path, struct stat *st, int write_object)
+int index_path(unsigned char *sha1, const char *path, struct stat *st, unsigned flags)
 {
 	int fd;
 	struct strbuf sb = STRBUF_INIT;
@@ -2666,7 +2663,7 @@ int index_path(unsigned char *sha1, const char *path, struct stat *st, int write
 		if (fd < 0)
 			return error("open(\"%s\"): %s", path,
 				     strerror(errno));
-		if (index_fd(sha1, fd, st, write_object, OBJ_BLOB, path, 0) < 0)
+		if (index_fd(sha1, fd, st, OBJ_BLOB, path, flags) < 0)
 			return error("%s: failed to insert into database",
 				     path);
 		break;
@@ -2676,7 +2673,7 @@ int index_path(unsigned char *sha1, const char *path, struct stat *st, int write
 			return error("readlink(\"%s\"): %s", path,
 			             errstr);
 		}
-		if (!write_object)
+		if (!(flags & HASH_WRITE_OBJECT))
 			hash_sha1_file(sb.buf, sb.len, blob_type, sha1);
 		else if (write_sha1_file(sb.buf, sb.len, blob_type, sha1))
 			return error("%s: failed to insert into database",
-- 
1.7.5.1.268.gce5bd

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v0 2/3] index_fd(): split into two helper functions
  2011-05-08  8:47 [PATCH v0 0/3] git add a-Big-file Junio C Hamano
  2011-05-08  8:47 ` [PATCH v0 1/3] index_fd(): turn write_object and format_check arguments into one flag Junio C Hamano
@ 2011-05-08  8:47 ` Junio C Hamano
  2011-05-08  8:47 ` [PATCH v0 3/3] Bigfile: teach "git add" to send a large file straight to a pack Junio C Hamano
  2 siblings, 0 replies; 18+ messages in thread
From: Junio C Hamano @ 2011-05-08  8:47 UTC (permalink / raw)
  To: git

Split out the case where we do not know the size of the input (hence we
read everything into a strbuf before doing anything) to index_pipe(), and
the other case where we mmap or read the whole data to index_bulk().

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 sha1_file.c |   42 +++++++++++++++++++++++++++++++-----------
 1 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/sha1_file.c b/sha1_file.c
index 17c179c..49416b0 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2619,22 +2619,29 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
 	return ret;
 }
 
+static int index_pipe(unsigned char *sha1, int fd, enum object_type type,
+		      const char *path, unsigned flags)
+{
+	struct strbuf sbuf = STRBUF_INIT;
+	int ret;
+
+	if (strbuf_read(&sbuf, fd, 4096) >= 0)
+		ret = index_mem(sha1, sbuf.buf, sbuf.len, type,	path, flags);
+	else
+		ret = -1;
+	strbuf_release(&sbuf);
+	return ret;
+}
+
 #define SMALL_FILE_SIZE (32*1024)
 
-int index_fd(unsigned char *sha1, int fd, struct stat *st,
-	     enum object_type type, const char *path, unsigned flags)
+static int index_core(unsigned char *sha1, int fd, size_t size,
+		      enum object_type type, const char *path,
+		      unsigned flags)
 {
 	int ret;
-	size_t size = xsize_t(st->st_size);
 
-	if (!S_ISREG(st->st_mode)) {
-		struct strbuf sbuf = STRBUF_INIT;
-		if (strbuf_read(&sbuf, fd, 4096) >= 0)
-			ret = index_mem(sha1, sbuf.buf, sbuf.len, type,	path, flags);
-		else
-			ret = -1;
-		strbuf_release(&sbuf);
-	} else if (!size) {
+	if (!size) {
 		ret = index_mem(sha1, NULL, size, type, path, flags);
 	} else if (size <= SMALL_FILE_SIZE) {
 		char *buf = xmalloc(size);
@@ -2648,6 +2655,19 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st,
 		ret = index_mem(sha1, buf, size, type, path, flags);
 		munmap(buf, size);
 	}
+	return ret;
+}
+
+int index_fd(unsigned char *sha1, int fd, struct stat *st,
+	     enum object_type type, const char *path, unsigned flags)
+{
+	int ret;
+	size_t size = xsize_t(st->st_size);
+
+	if (!S_ISREG(st->st_mode))
+		ret = index_pipe(sha1, fd, type, path, flags);
+	else
+		ret = index_core(sha1, fd, size, type, path, flags);
 	close(fd);
 	return ret;
 }
-- 
1.7.5.1.268.gce5bd

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v0 3/3] Bigfile: teach "git add" to send a large file straight to a pack
  2011-05-08  8:47 [PATCH v0 0/3] git add a-Big-file Junio C Hamano
  2011-05-08  8:47 ` [PATCH v0 1/3] index_fd(): turn write_object and format_check arguments into one flag Junio C Hamano
  2011-05-08  8:47 ` [PATCH v0 2/3] index_fd(): split into two helper functions Junio C Hamano
@ 2011-05-08  8:47 ` Junio C Hamano
  2011-05-08 10:19   ` Nguyen Thai Ngoc Duy
                     ` (2 more replies)
  2 siblings, 3 replies; 18+ messages in thread
From: Junio C Hamano @ 2011-05-08  8:47 UTC (permalink / raw)
  To: git

When adding a new content to the repository, we have always slurped
the blob in its entirety in-core first, and computed the object name
and compressed it into a loose object file.  Handling large binary
files (e.g.  video and audio asset for games) has been problematic
because of this design.

At the middle level of "git add" callchain is an internal API
index_fd() that takes an open file descriptor to read from the
working tree file being added with its size. Teach it to call out to
fast-import when adding a large blob.

This is merely a POC that has two large "NEEDSWORK" items.

 * The code in this patch runs fast-import via start_command() API;
   because the caller needs the object name, we can only stuff one
   object per pack (see in-code comments for future directions for a
   possible solution).

 * The decision to stream in this patch is based on the size of the
   blob, but it should be tied to an attribute, "bigdata". The
   attribute should also mean the blob will not undergo any
   convert-to-git processing.

The write-out codepath in entry.c::write_entry() should be taught to
stream, instead of reading everything in core. This should not be so
hard to implement, especially if we limit ourselves only to loose
object files and non-delta representation in packfiles.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 sha1_file.c      |  102 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 t/t1050-large.sh |   22 ++++++++++++
 2 files changed, 123 insertions(+), 1 deletions(-)
 create mode 100755 t/t1050-large.sh

diff --git a/sha1_file.c b/sha1_file.c
index 49416b0..ef1a698 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -11,6 +11,7 @@
 #include "pack.h"
 #include "blob.h"
 #include "commit.h"
+#include "run-command.h"
 #include "tag.h"
 #include "tree.h"
 #include "tree-walk.h"
@@ -2658,6 +2659,103 @@ static int index_core(unsigned char *sha1, int fd, size_t size,
 	return ret;
 }
 
+/*
+ * NEEDSWORK: This creates one packfile per large blob, because the
+ * caller immediately wants the result sha1, and fast-import can
+ * report the object name via marks mechanism only by closing the
+ * created packfile. We should instead add an internal "stuff objects
+ * into a single pack, all in non-delta representation, keeping track
+ * of only <object-name, offset> tuples in core" API, that keeps one
+ * append-only packfile open at a time.  Have the first call to this
+ * function open a packfile on demand, and make sure the caller calls
+ * another function in the API to close the packfile at end, at which
+ * point the in-core tuples of <object-name, offset> should be written
+ * out as a corresponding pack .idx file and the tentative .pack file
+ * renamed to the final name.
+ *
+ * This also bypasses the usual "convert-to-git" dance, but that is on
+ * purpose. We could write a streaming version of the converting
+ * functions and insert that before feeding the data to fast-import
+ * (or equivalent in-core API described above), but the primary
+ * motivation for trying to stream from the working tree file and to
+ * avoid mmaping it in core is to deal with large binary blobs, and
+ * by definition they do _not_ want to get any conversion.
+ */
+static int index_stream(unsigned char *sha1, int fd, size_t size,
+			enum object_type type, const char *path,
+			unsigned flags)
+{
+	struct child_process fast_import;
+	char export_marks[512];
+	const char *argv[] = { "fast-import", "--quiet", export_marks, NULL };
+	char tmpfile[512];
+	char fast_import_cmd[512];
+	char buf[512];
+	int len, tmpfd;
+
+	strcpy(tmpfile, git_path("hashstream_XXXXXX"));
+	tmpfd = git_mkstemp_mode(tmpfile, 0600);
+	if (tmpfd < 0)
+		die_errno("cannot create tempfile: %s", tmpfile);
+	if (close(tmpfd))
+		die_errno("cannot close tempfile: %s", tmpfile);
+	sprintf(export_marks, "--export-marks=%s", tmpfile);
+
+	memset(&fast_import, 0, sizeof(fast_import));
+	fast_import.in = -1;
+	fast_import.argv = argv;
+	fast_import.git_cmd = 1;
+	if (start_command(&fast_import))
+		die_errno("index-stream: git fast-import failed");
+
+	len = sprintf(fast_import_cmd, "blob\nmark :1\ndata %lu\n",
+		      (unsigned long) size);
+	write_or_whine(fast_import.in, fast_import_cmd, len,
+		       "index-stream: feeding fast-import");
+	while (size) {
+		char buf[10240];
+		size_t sz = size < sizeof(buf) ? size : sizeof(buf);
+		size_t actual;
+
+		actual = read_in_full(fd, buf, sz);
+		if (actual < 0)
+			die_errno("index-stream: reading input");
+		if (write_in_full(fast_import.in, buf, actual) != actual)
+			die_errno("index-stream: feeding fast-import");
+		size -= actual;
+	}
+	if (close(fast_import.in))
+		die_errno("index-stream: closing fast-import");
+	if (finish_command(&fast_import))
+		die_errno("index-stream: finishing fast-import");
+
+	tmpfd = open(tmpfile, O_RDONLY);
+	if (tmpfd < 0)
+		die_errno("index-stream: cannot open fast-import mark");
+	len = read(tmpfd, buf, sizeof(buf));
+	if (len < 0)
+		die_errno("index-stream: reading fast-import mark");
+	if (close(tmpfd) < 0)
+		die_errno("index-stream: closing fast-import mark");
+	if (unlink(tmpfile))
+		die_errno("index-stream: unlinking fast-import mark");
+	if (len != 44 ||
+	    memcmp(":1 ", buf, 3) ||
+	    get_sha1_hex(buf + 3, sha1))
+		die_errno("index-stream: unexpected fast-import mark: <%s>", buf);
+	return 0;
+}
+
+/*
+ * NEEDSWORK: Currently, we choose blobs that are bigger than
+ * core.bigFileThreshold to send out to a pack, but we might want to
+ * control this by choosing any blob that has "bigdata" attribute on
+ * path, regardless of its size. By design, index_stream() ignores
+ * any "convert-to-git" conversions, so associating an attribute with
+ * the logic to choose which paths to give index_stream() makes it
+ * easier to explain---we can say "bigdata will trump all the crlf,
+ * clean, smudge, ident and any other conversion related crap.
+ */
 int index_fd(unsigned char *sha1, int fd, struct stat *st,
 	     enum object_type type, const char *path, unsigned flags)
 {
@@ -2666,8 +2764,10 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st,
 
 	if (!S_ISREG(st->st_mode))
 		ret = index_pipe(sha1, fd, type, path, flags);
-	else
+	else if (size <= big_file_threshold && type != OBJ_BLOB)
 		ret = index_core(sha1, fd, size, type, path, flags);
+	else
+		ret = index_stream(sha1, fd, size, type, path, flags);
 	close(fd);
 	return ret;
 }
diff --git a/t/t1050-large.sh b/t/t1050-large.sh
new file mode 100755
index 0000000..489c0f0
--- /dev/null
+++ b/t/t1050-large.sh
@@ -0,0 +1,22 @@
+#!/bin/sh
+# Copyright (c) 2011, Google Inc.
+
+test_description='adding and checking out large blobs'
+
+. ./test-lib.sh
+
+test_expect_success setup '
+	git config core.bigfilethreshold 200k &&
+	echo X | dd of=large bs=1k seek=2000
+'
+
+test_expect_success 'add a large file' '
+	git add large &&
+	git cat-file blob :large >actual &&
+	# make sure we got a packfile and no loose objects
+	test -f .git/objects/pack/pack-*.pack &&
+	test ! -f .git/objects/??/?????????????????????????????????????? &&
+	cmp -s large actual  # This should be "cmp", not "test_cmp"
+'
+
+test_done
-- 
1.7.5.1.268.gce5bd

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v0 3/3] Bigfile: teach "git add" to send a large file straight to a pack
  2011-05-08  8:47 ` [PATCH v0 3/3] Bigfile: teach "git add" to send a large file straight to a pack Junio C Hamano
@ 2011-05-08 10:19   ` Nguyen Thai Ngoc Duy
  2011-05-08 17:37     ` Junio C Hamano
  2011-05-09 22:04     ` [PATCH 0/4] convert.c clean-up Junio C Hamano
  2011-05-09 14:05   ` [PATCH v0 3/3] Bigfile: teach "git add" to send a large file straight to a pack Shawn Pearce
  2011-05-29 18:20   ` Ævar Arnfjörð Bjarmason
  2 siblings, 2 replies; 18+ messages in thread
From: Nguyen Thai Ngoc Duy @ 2011-05-08 10:19 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Sun, May 08, 2011 at 01:47:35AM -0700, Junio C Hamano wrote:
>  * The decision to stream in this patch is based on the size of the
>    blob, but it should be tied to an attribute, "bigdata". The
>    attribute should also mean the blob will not undergo any
>    convert-to-git processing.

Instead we could detect if convert-from/to-git is unnecessary and
stream big blobs based on core.bigFileThresold. The below patch may
help. I made it in order to checkout files without inflating in memory
but that part's still not ready.

By the way, is there any way we can mark an object in the pack as
"non-compressed" so we can read it without inflating? zlib compression
level == 0 still splits the object into multiple parts.

-- 8< --
Subject: [PATCH] convert: add functions to determine whether conversion is needed

Blob conversion from/to repository requires the entire blob in memory.
The conversion is rarely used most of the time and that requirement
could put more pressure on memory for large blobs.

Add two functions to determine early whether we can bypass conversion
without looking at the content.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 cache.h   |    3 ++
 convert.c |   79 +++++++++++++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 65 insertions(+), 17 deletions(-)

diff --git a/cache.h b/cache.h
index 08a9022..be3845d 100644
--- a/cache.h
+++ b/cache.h
@@ -1091,7 +1091,10 @@ extern void trace_repo_setup(const char *prefix);
 /* returns 1 if *dst was used */
 extern int convert_to_git(const char *path, const char *src, size_t len,
                           struct strbuf *dst, enum safe_crlf checksafe);
+extern int convert_to_git_needed(const char *path, size_t len,
+				 enum safe_crlf checksafe);
 extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst);
+extern int convert_to_working_tree_needed(const char *path, size_t len);
 extern int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst);
 
 /* add */
diff --git a/convert.c b/convert.c
index d5aebed..39545ed 100644
--- a/convert.c
+++ b/convert.c
@@ -188,7 +188,8 @@ static int has_cr_in_index(const char *path)
 }
 
 static int crlf_to_git(const char *path, const char *src, size_t len,
-		       struct strbuf *buf, enum action action, enum safe_crlf checksafe)
+		       struct strbuf *buf, enum action action,
+		       enum safe_crlf checksafe, int dry_run)
 {
 	struct text_stat stats;
 	char *dst;
@@ -197,6 +198,9 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
 	    (action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE) || !len)
 		return 0;
 
+	if (dry_run)
+		return 1;
+
 	gather_stats(src, len, &stats);
 
 	if (action == CRLF_AUTO || action == CRLF_GUESS) {
@@ -257,7 +261,7 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
 }
 
 static int crlf_to_worktree(const char *path, const char *src, size_t len,
-			    struct strbuf *buf, enum action action)
+			    struct strbuf *buf, enum action action, int dry_run)
 {
 	char *to_free = NULL;
 	struct text_stat stats;
@@ -265,6 +269,9 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len,
 	if (!len || determine_output_conversion(action) != EOL_CRLF)
 		return 0;
 
+	if (dry_run)
+		return 1;
+
 	gather_stats(src, len, &stats);
 
 	/* No LF? Nothing to convert, regardless. */
@@ -374,7 +381,7 @@ static int filter_buffer(int in, int out, void *data)
 }
 
 static int apply_filter(const char *path, const char *src, size_t len,
-                        struct strbuf *dst, const char *cmd)
+			struct strbuf *dst, const char *cmd, int dry_run)
 {
 	/*
 	 * Create a pipeline to have the command filter the buffer's
@@ -390,6 +397,9 @@ static int apply_filter(const char *path, const char *src, size_t len,
 	if (!cmd)
 		return 0;
 
+	if (dry_run)
+		return 1;
+
 	memset(&async, 0, sizeof(async));
 	async.proc = filter_buffer;
 	async.data = &params;
@@ -541,11 +551,17 @@ static int count_ident(const char *cp, unsigned long size)
 }
 
 static int ident_to_git(const char *path, const char *src, size_t len,
-                        struct strbuf *buf, int ident)
+			struct strbuf *buf, int ident, int dry_run)
 {
 	char *dst, *dollar;
 
-	if (!ident || !count_ident(src, len))
+	if (!ident)
+		return 0;
+
+	if (dry_run)
+		return 1;
+
+	if(!count_ident(src, len))
 		return 0;
 
 	/* only grow if not in place */
@@ -582,7 +598,7 @@ static int ident_to_git(const char *path, const char *src, size_t len,
 }
 
 static int ident_to_worktree(const char *path, const char *src, size_t len,
-                             struct strbuf *buf, int ident)
+			     struct strbuf *buf, int ident, int dry_run)
 {
 	unsigned char sha1[20];
 	char *to_free = NULL, *dollar, *spc;
@@ -591,6 +607,9 @@ static int ident_to_worktree(const char *path, const char *src, size_t len,
 	if (!ident)
 		return 0;
 
+	if (dry_run)
+		return 1;
+
 	cnt = count_ident(src, len);
 	if (!cnt)
 		return 0;
@@ -726,8 +745,9 @@ static enum action determine_action(enum action text_attr, enum eol eol_attr)
 	return text_attr;
 }
 
-int convert_to_git(const char *path, const char *src, size_t len,
-                   struct strbuf *dst, enum safe_crlf checksafe)
+static int convert_to_git_1(const char *path, const char *src, size_t len,
+			    struct strbuf *dst, enum safe_crlf checksafe,
+			    int dry_run)
 {
 	struct git_attr_check check[5];
 	enum action action = CRLF_GUESS;
@@ -748,23 +768,39 @@ int convert_to_git(const char *path, const char *src, size_t len,
 			filter = drv->clean;
 	}
 
-	ret |= apply_filter(path, src, len, dst, filter);
+	ret |= apply_filter(path, src, len, dst, filter, dry_run);
 	if (ret) {
+		if (dry_run)
+			return 1;
 		src = dst->buf;
 		len = dst->len;
 	}
 	action = determine_action(action, eol_attr);
-	ret |= crlf_to_git(path, src, len, dst, action, checksafe);
+	ret |= crlf_to_git(path, src, len, dst, action, checksafe, dry_run);
 	if (ret) {
+		if (dry_run)
+			return 1;
 		src = dst->buf;
 		len = dst->len;
 	}
-	return ret | ident_to_git(path, src, len, dst, ident);
+	return ret | ident_to_git(path, src, len, dst, ident, dry_run);
+}
+
+int convert_to_git(const char *path, const char *src, size_t len,
+		   struct strbuf *dst, enum safe_crlf checksafe)
+{
+	return convert_to_git_1(path, src, len, dst, checksafe, 0);
+}
+
+int convert_to_git_needed(const char *path, size_t len,
+			  enum safe_crlf checksafe)
+{
+	return convert_to_git_1(path, NULL, len, NULL, checksafe, 1);
 }
 
 static int convert_to_working_tree_internal(const char *path, const char *src,
 					    size_t len, struct strbuf *dst,
-					    int normalizing)
+					    int normalizing, int dry_run)
 {
 	struct git_attr_check check[5];
 	enum action action = CRLF_GUESS;
@@ -785,8 +821,10 @@ static int convert_to_working_tree_internal(const char *path, const char *src,
 			filter = drv->smudge;
 	}
 
-	ret |= ident_to_worktree(path, src, len, dst, ident);
+	ret |= ident_to_worktree(path, src, len, dst, ident, dry_run);
 	if (ret) {
+		if (dry_run)
+			return 1;
 		src = dst->buf;
 		len = dst->len;
 	}
@@ -796,23 +834,30 @@ static int convert_to_working_tree_internal(const char *path, const char *src,
 	 */
 	if (filter || !normalizing) {
 		action = determine_action(action, eol_attr);
-		ret |= crlf_to_worktree(path, src, len, dst, action);
+		ret |= crlf_to_worktree(path, src, len, dst, action, dry_run);
 		if (ret) {
+			if (dry_run)
+				return 1;
 			src = dst->buf;
 			len = dst->len;
 		}
 	}
-	return ret | apply_filter(path, src, len, dst, filter);
+	return ret | apply_filter(path, src, len, dst, filter, dry_run);
 }
 
 int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst)
 {
-	return convert_to_working_tree_internal(path, src, len, dst, 0);
+	return convert_to_working_tree_internal(path, src, len, dst, 0, 0);
+}
+
+int convert_to_working_tree_needed(const char *path, size_t len)
+{
+	return convert_to_working_tree_internal(path, NULL, len, NULL, 0, 1);
 }
 
 int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst)
 {
-	int ret = convert_to_working_tree_internal(path, src, len, dst, 1);
+	int ret = convert_to_working_tree_internal(path, src, len, dst, 1, 0);
 	if (ret) {
 		src = dst->buf;
 		len = dst->len;
-- 
1.7.4.74.g639db


-- 8< --

-- 
Duy

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v0 3/3] Bigfile: teach "git add" to send a large file straight to a pack
  2011-05-08 10:19   ` Nguyen Thai Ngoc Duy
@ 2011-05-08 17:37     ` Junio C Hamano
  2011-05-09 22:04     ` [PATCH 0/4] convert.c clean-up Junio C Hamano
  1 sibling, 0 replies; 18+ messages in thread
From: Junio C Hamano @ 2011-05-08 17:37 UTC (permalink / raw)
  To: Nguyen Thai Ngoc Duy; +Cc: git

Nguyen Thai Ngoc Duy <pclouds@gmail.com> writes:

> By the way, is there any way we can mark an object in the pack as
> "non-compressed" so we can read it without inflating?

No. Such a packfile will be unusable by existing git, so I would suggest
you not even think about going there.  It will not happen.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v0 3/3] Bigfile: teach "git add" to send a large file straight to a pack
  2011-05-08  8:47 ` [PATCH v0 3/3] Bigfile: teach "git add" to send a large file straight to a pack Junio C Hamano
  2011-05-08 10:19   ` Nguyen Thai Ngoc Duy
@ 2011-05-09 14:05   ` Shawn Pearce
  2011-05-09 15:58     ` Junio C Hamano
  2011-05-29 18:20   ` Ævar Arnfjörð Bjarmason
  2 siblings, 1 reply; 18+ messages in thread
From: Shawn Pearce @ 2011-05-09 14:05 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Sun, May 8, 2011 at 01:47, Junio C Hamano <gitster@pobox.com> wrote:
> + * NEEDSWORK: This creates one packfile per large blob, because the
> + * caller immediately wants the result sha1, and fast-import can
> + * report the object name via marks mechanism only by closing the
> + * created packfile. We should instead add an internal "stuff objects
> + * into a single pack, all in non-delta representation, keeping track
> + * of only <object-name, offset> tuples in core" API, that keeps one
> + * append-only packfile open at a time.  Have the first call to this
> + * function open a packfile on demand, and make sure the caller calls
> + * another function in the API to close the packfile at end, at which
> + * point the in-core tuples of <object-name, offset> should be written
> + * out as a corresponding pack .idx file and the tentative .pack file
> + * renamed to the final name.

The other problem here is the caller cannot access the written objects
until the pack is closed. That is one of the things that has made
fast-import difficult for git-svn to use, because git-svn expects the
object to be available immediately. I assume that within a single git
add or git update-index process we don't need to worry about this, so
its probably a non-issue.

-- 
Shawn.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v0 3/3] Bigfile: teach "git add" to send a large file straight to a pack
  2011-05-09 14:05   ` [PATCH v0 3/3] Bigfile: teach "git add" to send a large file straight to a pack Shawn Pearce
@ 2011-05-09 15:58     ` Junio C Hamano
  2011-05-09 16:14       ` Junio C Hamano
  0 siblings, 1 reply; 18+ messages in thread
From: Junio C Hamano @ 2011-05-09 15:58 UTC (permalink / raw)
  To: Shawn Pearce; +Cc: git

Shawn Pearce <spearce@spearce.org> writes:

> The other problem here is the caller cannot access the written objects
> until the pack is closed. That is one of the things that has made
> fast-import difficult for git-svn to use, because git-svn expects the
> object to be available immediately. I assume that within a single git
> add or git update-index process we don't need to worry about this, so
> its probably a non-issue.

Yes, it is part of a possible issue to be addressed in the plan.

I envisioned that the "API" I talked about in the NEEDSWORK you quoted
would keep an open file descriptor to the "currently being built" packfile
wrapped in a "struct packed_git", with an in-core index_data that is
adjusted every time you add a straight-to-pack kind of object. Upon a
"finalize" call, it would determines the final pack name, write the real
pack .idx file out, and rename the "being built" packfile to the final
name to make it available to the outside world.

Within a single git process that approach would give access to the set of
objects that are going straight to the pack.  When it needs to spawn a git
subprocess, it however would need to finalize the pack to give access to
the new object, just like when fast-import flushes when asked to expose
the marks.

After all, this topic is about handling large binary files that would not
fit in core at once (we do not support them now at all). It may not too
bad to say we stuff one object per packfile and immediately close the
packfile (which is what the use of fast-import by the POC patch
does). Once the packfile is closed, the object in it is automatically
available to the outside world, and it is just the matter of making a
reprepare_packed_git() call to make it available to ourselves. When there
are many such objects, as they would exceed bigfilethreashold, repacking
them would just amount to copying the already compressed data literally (I
haven't re-checked the code though) and the cost shouldn't be more than
proportional to the size of the data. Expecting any system to do better
than that is asking for moon and I am not willing to bend backwards to
cater to such demands before running out of other better things to do ;-).

So I am tempted to keep the "spawn an external fast-import" code at least
for now, and give it a higher priority to make the other side (writing out
the blob to a working tree) streamable.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v0 3/3] Bigfile: teach "git add" to send a large file straight to a pack
  2011-05-09 15:58     ` Junio C Hamano
@ 2011-05-09 16:14       ` Junio C Hamano
  0 siblings, 0 replies; 18+ messages in thread
From: Junio C Hamano @ 2011-05-09 16:14 UTC (permalink / raw)
  To: Shawn Pearce; +Cc: git

Junio C Hamano <gitster@pobox.com> writes:

> I envisioned that the "API" I talked about in the NEEDSWORK you quoted
> would keep an open file descriptor to the "currently being built" packfile
> wrapped in a "struct packed_git", with an in-core index_data that is
> adjusted every time you add a straight-to-pack kind of object. Upon a
> "finalize" call, it would determines the final pack name, write the real
> pack .idx file out, and rename the "being built" packfile to the final
> name to make it available to the outside world.
>
> Within a single git process that approach would give access to the set of
> objects that are going straight to the pack.  When it needs to spawn a git
> subprocess, it however would need to finalize the pack to give access to
> the new object, just like when fast-import flushes when asked to expose
> the marks.
>
> After all, this topic is about handling large binary files that would not
> fit in core at once (we do not support them now at all). It may not too
> bad to say we stuff one object per packfile and immediately close the
> packfile (which is what the use of fast-import by the POC patch
> does).

A (tentatively final) side note.

The primary reason why I wanted to think about using a single packfile
that is kept open and add multiple objects to the pack was because we may
later want to use this kind of set-up for "initial import", regardless of
the size of the object being added.  But now I think about it I do not
think that use case matters a lot.  The resulting single pack would have
much worse object density, compared to the case where you add them
normally, initially creating loose object files and then repack/gc at
which time you are likely to have more than one rev to sanely deltify.

Using one pack per large object while creating is not too bad to begin
with.  If you had a large enough core to hold such a large binary file,
the current system would store it as a single loose object file, so it is
not like we are making things any worse.  In either form, these "single
object per a file" initial storage will find their more permanent home
upon the first repack/gc.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 0/4] convert.c clean-up
  2011-05-08 10:19   ` Nguyen Thai Ngoc Duy
  2011-05-08 17:37     ` Junio C Hamano
@ 2011-05-09 22:04     ` Junio C Hamano
  2011-05-09 22:04       ` [PATCH 1/4] convert: rename the "eol" global variable to "core_eol" Junio C Hamano
                         ` (4 more replies)
  1 sibling, 5 replies; 18+ messages in thread
From: Junio C Hamano @ 2011-05-09 22:04 UTC (permalink / raw)
  To: git; +Cc: pclouds

Nguyen Thai Ngoc Duy <pclouds@gmail.com> writes:

> Instead we could detect if convert-from/to-git is unnecessary and
> stream big blobs based on core.bigFileThresold. The below patch may
> help.

Yeah, I think that would probably make more sense. If a file is small
enough, it is more sensible to send it to a loose object just like any
other files. We do not want to invite users to make a mistake of marking
it as bigdata and send it straight to a packfile. Having one less knob to
tweak is always a good thing to do.

However, while reviewing your patch, I noticed that convert.c was littered
with misnamed types, variables and functions to the point to make it
almost unreadble as the result of its evolution.  I originally wrote this
series so that I can add "bigdata" sensibly, and it turns out that there
is no benefit to do so for now, but the clean-up by itself would be worth
it.

So there...

Junio C Hamano (4):
  convert: rename the "eol" global variable to "core_eol"
  convert: give saner names to crlf/eol variables, types and functions
  convert: make it safer to add conversion attributes
  convert: make it harder to screw up adding a conversion attribute

 cache.h       |    2 +-
 config.c      |   12 ++--
 convert.c     |  158 +++++++++++++++++++++++++++-----------------------------
 environment.c |    2 +-
 4 files changed, 84 insertions(+), 90 deletions(-)

-- 
1.7.5.1.288.g93ebc

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 1/4] convert: rename the "eol" global variable to "core_eol"
  2011-05-09 22:04     ` [PATCH 0/4] convert.c clean-up Junio C Hamano
@ 2011-05-09 22:04       ` Junio C Hamano
  2011-05-09 22:04       ` [PATCH 2/4] convert: give saner names to crlf/eol variables, types and functions Junio C Hamano
                         ` (3 subsequent siblings)
  4 siblings, 0 replies; 18+ messages in thread
From: Junio C Hamano @ 2011-05-09 22:04 UTC (permalink / raw)
  To: git; +Cc: pclouds

Yes, it is clear that "eol" wants to mean some sort of end-of-line thing,
but as the name of a global variable, it is way too short to describe what
kind of end-of-line thing it wants to represent. Besides, there are many
codepaths that want to use their own local "char *eol" variable to point
at the end of the current line they are processing.

This global variable holds what we read from core.eol configuration
variable. Name it as such.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 cache.h       |    2 +-
 config.c      |   12 ++++++------
 convert.c     |    4 ++--
 environment.c |    2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/cache.h b/cache.h
index 2b34116..4e9123b 100644
--- a/cache.h
+++ b/cache.h
@@ -606,7 +606,7 @@ enum eol {
 #endif
 };
 
-extern enum eol eol;
+extern enum eol core_eol;
 
 enum branch_track {
 	BRANCH_TRACK_UNSPECIFIED = -1,
diff --git a/config.c b/config.c
index 5f9ec28..671c8df 100644
--- a/config.c
+++ b/config.c
@@ -583,7 +583,7 @@ static int git_default_core_config(const char *var, const char *value)
 
 	if (!strcmp(var, "core.autocrlf")) {
 		if (value && !strcasecmp(value, "input")) {
-			if (eol == EOL_CRLF)
+			if (core_eol == EOL_CRLF)
 				return error("core.autocrlf=input conflicts with core.eol=crlf");
 			auto_crlf = AUTO_CRLF_INPUT;
 			return 0;
@@ -603,14 +603,14 @@ static int git_default_core_config(const char *var, const char *value)
 
 	if (!strcmp(var, "core.eol")) {
 		if (value && !strcasecmp(value, "lf"))
-			eol = EOL_LF;
+			core_eol = EOL_LF;
 		else if (value && !strcasecmp(value, "crlf"))
-			eol = EOL_CRLF;
+			core_eol = EOL_CRLF;
 		else if (value && !strcasecmp(value, "native"))
-			eol = EOL_NATIVE;
+			core_eol = EOL_NATIVE;
 		else
-			eol = EOL_UNSET;
-		if (eol == EOL_CRLF && auto_crlf == AUTO_CRLF_INPUT)
+			core_eol = EOL_UNSET;
+		if (core_eol == EOL_CRLF && auto_crlf == AUTO_CRLF_INPUT)
 			return error("core.autocrlf=input conflicts with core.eol=crlf");
 		return 0;
 	}
diff --git a/convert.c b/convert.c
index 7eb51b1..4dba329 100644
--- a/convert.c
+++ b/convert.c
@@ -113,10 +113,10 @@ static enum eol determine_output_conversion(enum action action)
 			return EOL_CRLF;
 		else if (auto_crlf == AUTO_CRLF_INPUT)
 			return EOL_LF;
-		else if (eol == EOL_UNSET)
+		else if (core_eol == EOL_UNSET)
 			return EOL_NATIVE;
 	}
-	return eol;
+	return core_eol;
 }
 
 static void check_safe_crlf(const char *path, enum action action,
diff --git a/environment.c b/environment.c
index 40185bc..7fe9f10 100644
--- a/environment.c
+++ b/environment.c
@@ -43,7 +43,7 @@ const char *askpass_program;
 const char *excludes_file;
 enum auto_crlf auto_crlf = AUTO_CRLF_FALSE;
 int read_replace_refs = 1;
-enum eol eol = EOL_UNSET;
+enum eol core_eol = EOL_UNSET;
 enum safe_crlf safe_crlf = SAFE_CRLF_WARN;
 unsigned whitespace_rule_cfg = WS_DEFAULT_RULE;
 enum branch_track git_branch_track = BRANCH_TRACK_REMOTE;
-- 
1.7.5.1.288.g93ebc

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 2/4] convert: give saner names to crlf/eol variables, types and functions
  2011-05-09 22:04     ` [PATCH 0/4] convert.c clean-up Junio C Hamano
  2011-05-09 22:04       ` [PATCH 1/4] convert: rename the "eol" global variable to "core_eol" Junio C Hamano
@ 2011-05-09 22:04       ` Junio C Hamano
  2011-05-09 22:05       ` [PATCH 3/4] convert: make it safer to add conversion attributes Junio C Hamano
                         ` (2 subsequent siblings)
  4 siblings, 0 replies; 18+ messages in thread
From: Junio C Hamano @ 2011-05-09 22:04 UTC (permalink / raw)
  To: git; +Cc: pclouds

Back when the conversion was only about the end-of-line convention, it
might have made sense to call what we do upon seeing CR/LF simply an
"action", but these days the conversion routines do a lot more than just
tweaking the line ending.  Raname "action" to "crlf_action".

The function that decides what end of line conversion to use on the output
codepath was called "determine_output_conversion", as if there is no other
kind of output conversion.  Rename it to "output_eol"; it is a function
that returns what EOL convention is to be used.

A function that decides what "crlf_action" needs to be used on the input
codepath, given what conversion attribute is set to the path and global
end-of-line convention, was called "determine_action".  Rename it to
"input_crlf_action".

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 convert.c |   61 +++++++++++++++++++++++++++++++------------------------------
 1 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/convert.c b/convert.c
index 4dba329..e0ee245 100644
--- a/convert.c
+++ b/convert.c
@@ -12,7 +12,7 @@
  * translation when the "text" attribute or "auto_crlf" option is set.
  */
 
-enum action {
+enum crlf_action {
 	CRLF_GUESS = -1,
 	CRLF_BINARY = 0,
 	CRLF_TEXT,
@@ -94,9 +94,9 @@ static int is_binary(unsigned long size, struct text_stat *stats)
 	return 0;
 }
 
-static enum eol determine_output_conversion(enum action action)
+static enum eol output_eol(enum crlf_action crlf_action)
 {
-	switch (action) {
+	switch (crlf_action) {
 	case CRLF_BINARY:
 		return EOL_UNSET;
 	case CRLF_CRLF:
@@ -119,13 +119,13 @@ static enum eol determine_output_conversion(enum action action)
 	return core_eol;
 }
 
-static void check_safe_crlf(const char *path, enum action action,
+static void check_safe_crlf(const char *path, enum crlf_action crlf_action,
                             struct text_stat *stats, enum safe_crlf checksafe)
 {
 	if (!checksafe)
 		return;
 
-	if (determine_output_conversion(action) == EOL_LF) {
+	if (output_eol(crlf_action) == EOL_LF) {
 		/*
 		 * CRLFs would not be restored by checkout:
 		 * check if we'd remove CRLFs
@@ -136,7 +136,7 @@ static void check_safe_crlf(const char *path, enum action action,
 			else /* i.e. SAFE_CRLF_FAIL */
 				die("CRLF would be replaced by LF in %s.", path);
 		}
-	} else if (determine_output_conversion(action) == EOL_CRLF) {
+	} else if (output_eol(crlf_action) == EOL_CRLF) {
 		/*
 		 * CRLFs would be added by checkout:
 		 * check if we have "naked" LFs
@@ -188,18 +188,19 @@ static int has_cr_in_index(const char *path)
 }
 
 static int crlf_to_git(const char *path, const char *src, size_t len,
-		       struct strbuf *buf, enum action action, enum safe_crlf checksafe)
+		       struct strbuf *buf,
+		       enum crlf_action crlf_action, enum safe_crlf checksafe)
 {
 	struct text_stat stats;
 	char *dst;
 
-	if (action == CRLF_BINARY ||
-	    (action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE) || !len)
+	if (crlf_action == CRLF_BINARY ||
+	    (crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE) || !len)
 		return 0;
 
 	gather_stats(src, len, &stats);
 
-	if (action == CRLF_AUTO || action == CRLF_GUESS) {
+	if (crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS) {
 		/*
 		 * We're currently not going to even try to convert stuff
 		 * that has bare CR characters. Does anybody do that crazy
@@ -214,7 +215,7 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
 		if (is_binary(len, &stats))
 			return 0;
 
-		if (action == CRLF_GUESS) {
+		if (crlf_action == CRLF_GUESS) {
 			/*
 			 * If the file in the index has any CR in it, do not convert.
 			 * This is the new safer autocrlf handling.
@@ -224,7 +225,7 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
 		}
 	}
 
-	check_safe_crlf(path, action, &stats, checksafe);
+	check_safe_crlf(path, crlf_action, &stats, checksafe);
 
 	/* Optimization: No CR? Nothing to convert, regardless. */
 	if (!stats.cr)
@@ -234,7 +235,7 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
 	if (strbuf_avail(buf) + buf->len < len)
 		strbuf_grow(buf, len - buf->len);
 	dst = buf->buf;
-	if (action == CRLF_AUTO || action == CRLF_GUESS) {
+	if (crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS) {
 		/*
 		 * If we guessed, we already know we rejected a file with
 		 * lone CR, and we can strip a CR without looking at what
@@ -257,12 +258,12 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
 }
 
 static int crlf_to_worktree(const char *path, const char *src, size_t len,
-			    struct strbuf *buf, enum action action)
+			    struct strbuf *buf, enum crlf_action crlf_action)
 {
 	char *to_free = NULL;
 	struct text_stat stats;
 
-	if (!len || determine_output_conversion(action) != EOL_CRLF)
+	if (!len || output_eol(crlf_action) != EOL_CRLF)
 		return 0;
 
 	gather_stats(src, len, &stats);
@@ -275,8 +276,8 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len,
 	if (stats.lf == stats.crlf)
 		return 0;
 
-	if (action == CRLF_AUTO || action == CRLF_GUESS) {
-		if (action == CRLF_GUESS) {
+	if (crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS) {
+		if (crlf_action == CRLF_GUESS) {
 			/* If we have any CR or CRLF line endings, we do not touch it */
 			/* This is the new safer autocrlf-handling */
 			if (stats.cr > 0 || stats.crlf > 0)
@@ -715,7 +716,7 @@ static int git_path_check_ident(const char *path, struct git_attr_check *check)
 	return !!ATTR_TRUE(value);
 }
 
-static enum action determine_action(enum action text_attr, enum eol eol_attr)
+static enum crlf_action input_crlf_action(enum crlf_action text_attr, enum eol eol_attr)
 {
 	if (text_attr == CRLF_BINARY)
 		return CRLF_BINARY;
@@ -730,7 +731,7 @@ int convert_to_git(const char *path, const char *src, size_t len,
                    struct strbuf *dst, enum safe_crlf checksafe)
 {
 	struct git_attr_check check[5];
-	enum action action = CRLF_GUESS;
+	enum crlf_action crlf_action = CRLF_GUESS;
 	enum eol eol_attr = EOL_UNSET;
 	int ident = 0, ret = 0;
 	const char *filter = NULL;
@@ -738,9 +739,9 @@ int convert_to_git(const char *path, const char *src, size_t len,
 	setup_convert_check(check);
 	if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
 		struct convert_driver *drv;
-		action = git_path_check_crlf(path, check + 4);
-		if (action == CRLF_GUESS)
-			action = git_path_check_crlf(path, check + 0);
+		crlf_action = git_path_check_crlf(path, check + 4);
+		if (crlf_action == CRLF_GUESS)
+			crlf_action = git_path_check_crlf(path, check + 0);
 		ident = git_path_check_ident(path, check + 1);
 		drv = git_path_check_convert(path, check + 2);
 		eol_attr = git_path_check_eol(path, check + 3);
@@ -753,8 +754,8 @@ int convert_to_git(const char *path, const char *src, size_t len,
 		src = dst->buf;
 		len = dst->len;
 	}
-	action = determine_action(action, eol_attr);
-	ret |= crlf_to_git(path, src, len, dst, action, checksafe);
+	crlf_action = input_crlf_action(crlf_action, eol_attr);
+	ret |= crlf_to_git(path, src, len, dst, crlf_action, checksafe);
 	if (ret) {
 		src = dst->buf;
 		len = dst->len;
@@ -767,7 +768,7 @@ static int convert_to_working_tree_internal(const char *path, const char *src,
 					    int normalizing)
 {
 	struct git_attr_check check[5];
-	enum action action = CRLF_GUESS;
+	enum crlf_action crlf_action = CRLF_GUESS;
 	enum eol eol_attr = EOL_UNSET;
 	int ident = 0, ret = 0;
 	const char *filter = NULL;
@@ -775,9 +776,9 @@ static int convert_to_working_tree_internal(const char *path, const char *src,
 	setup_convert_check(check);
 	if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
 		struct convert_driver *drv;
-		action = git_path_check_crlf(path, check + 4);
-		if (action == CRLF_GUESS)
-			action = git_path_check_crlf(path, check + 0);
+		crlf_action = git_path_check_crlf(path, check + 4);
+		if (crlf_action == CRLF_GUESS)
+			crlf_action = git_path_check_crlf(path, check + 0);
 		ident = git_path_check_ident(path, check + 1);
 		drv = git_path_check_convert(path, check + 2);
 		eol_attr = git_path_check_eol(path, check + 3);
@@ -795,8 +796,8 @@ static int convert_to_working_tree_internal(const char *path, const char *src,
 	 * is a smudge filter.  The filter might expect CRLFs.
 	 */
 	if (filter || !normalizing) {
-		action = determine_action(action, eol_attr);
-		ret |= crlf_to_worktree(path, src, len, dst, action);
+		crlf_action = input_crlf_action(crlf_action, eol_attr);
+		ret |= crlf_to_worktree(path, src, len, dst, crlf_action);
 		if (ret) {
 			src = dst->buf;
 			len = dst->len;
-- 
1.7.5.1.288.g93ebc

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 3/4] convert: make it safer to add conversion attributes
  2011-05-09 22:04     ` [PATCH 0/4] convert.c clean-up Junio C Hamano
  2011-05-09 22:04       ` [PATCH 1/4] convert: rename the "eol" global variable to "core_eol" Junio C Hamano
  2011-05-09 22:04       ` [PATCH 2/4] convert: give saner names to crlf/eol variables, types and functions Junio C Hamano
@ 2011-05-09 22:05       ` Junio C Hamano
  2011-05-09 22:05       ` [PATCH 4/4] convert: make it harder to screw up adding a conversion attribute Junio C Hamano
  2011-05-10 12:59       ` [PATCH 0/4] convert.c clean-up Nguyen Thai Ngoc Duy
  4 siblings, 0 replies; 18+ messages in thread
From: Junio C Hamano @ 2011-05-09 22:05 UTC (permalink / raw)
  To: git; +Cc: pclouds

The places that need to pass an array of "struct git_attr_check" needed to
be careful to pass a large enough array and know what index each element
lied.  Make it safer and easier to code these.

Besides, the hard-coded sequence of initializing various attributes was
too ugly after we gained more than a few attributes.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 convert.c |   48 ++++++++++++++++++++++--------------------------
 1 files changed, 22 insertions(+), 26 deletions(-)

diff --git a/convert.c b/convert.c
index e0ee245..a05820b 100644
--- a/convert.c
+++ b/convert.c
@@ -475,30 +475,6 @@ static int read_convert_config(const char *var, const char *value, void *cb)
 	return 0;
 }
 
-static void setup_convert_check(struct git_attr_check *check)
-{
-	static struct git_attr *attr_text;
-	static struct git_attr *attr_crlf;
-	static struct git_attr *attr_eol;
-	static struct git_attr *attr_ident;
-	static struct git_attr *attr_filter;
-
-	if (!attr_text) {
-		attr_text = git_attr("text");
-		attr_crlf = git_attr("crlf");
-		attr_eol = git_attr("eol");
-		attr_ident = git_attr("ident");
-		attr_filter = git_attr("filter");
-		user_convert_tail = &user_convert;
-		git_config(read_convert_config, NULL);
-	}
-	check[0].attr = attr_crlf;
-	check[1].attr = attr_ident;
-	check[2].attr = attr_filter;
-	check[3].attr = attr_eol;
-	check[4].attr = attr_text;
-}
-
 static int count_ident(const char *cp, unsigned long size)
 {
 	/*
@@ -727,10 +703,30 @@ static enum crlf_action input_crlf_action(enum crlf_action text_attr, enum eol e
 	return text_attr;
 }
 
+static const char *conv_attr_name[] = {
+	"crlf", "ident", "filter", "eol", "text",
+};
+#define NUM_CONV_ATTRS ARRAY_SIZE(conv_attr_name)
+
+static void setup_convert_check(struct git_attr_check *check)
+{
+	int i;
+	static struct git_attr_check ccheck[NUM_CONV_ATTRS];
+
+	if (!ccheck[0].attr) {
+		for (i = 0; i < NUM_CONV_ATTRS; i++)
+			ccheck[i].attr = git_attr(conv_attr_name[i]);
+		user_convert_tail = &user_convert;
+		git_config(read_convert_config, NULL);
+	}
+	for (i = 0; i < NUM_CONV_ATTRS; i++)
+		check[i].attr = ccheck[i].attr;
+}
+
 int convert_to_git(const char *path, const char *src, size_t len,
                    struct strbuf *dst, enum safe_crlf checksafe)
 {
-	struct git_attr_check check[5];
+	struct git_attr_check check[NUM_CONV_ATTRS];
 	enum crlf_action crlf_action = CRLF_GUESS;
 	enum eol eol_attr = EOL_UNSET;
 	int ident = 0, ret = 0;
@@ -767,7 +763,7 @@ static int convert_to_working_tree_internal(const char *path, const char *src,
 					    size_t len, struct strbuf *dst,
 					    int normalizing)
 {
-	struct git_attr_check check[5];
+	struct git_attr_check check[NUM_CONV_ATTRS];
 	enum crlf_action crlf_action = CRLF_GUESS;
 	enum eol eol_attr = EOL_UNSET;
 	int ident = 0, ret = 0;
-- 
1.7.5.1.288.g93ebc

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 4/4] convert: make it harder to screw up adding a conversion attribute
  2011-05-09 22:04     ` [PATCH 0/4] convert.c clean-up Junio C Hamano
                         ` (2 preceding siblings ...)
  2011-05-09 22:05       ` [PATCH 3/4] convert: make it safer to add conversion attributes Junio C Hamano
@ 2011-05-09 22:05       ` Junio C Hamano
  2011-05-10 12:59       ` [PATCH 0/4] convert.c clean-up Nguyen Thai Ngoc Duy
  4 siblings, 0 replies; 18+ messages in thread
From: Junio C Hamano @ 2011-05-09 22:05 UTC (permalink / raw)
  To: git; +Cc: pclouds

The current internal API requires the callers of setup_convert_check() to
supply the git_attr_check structures (hence they need to know how many to
allocate), but they grab the same set of attributes for given path.

Define a new convert_attrs() API that fills a higher level information that
the callers (convert_to_git and convert_to_working_tree) really want, and
move the common code to interact with the attributes system to it.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 convert.c |   79 +++++++++++++++++++++++++++++-------------------------------
 1 files changed, 38 insertions(+), 41 deletions(-)

diff --git a/convert.c b/convert.c
index a05820b..efc7e07 100644
--- a/convert.c
+++ b/convert.c
@@ -703,12 +703,19 @@ static enum crlf_action input_crlf_action(enum crlf_action text_attr, enum eol e
 	return text_attr;
 }
 
+struct conv_attrs {
+	struct convert_driver *drv;
+	enum crlf_action crlf_action;
+	enum eol eol_attr;
+	int ident;
+};
+
 static const char *conv_attr_name[] = {
 	"crlf", "ident", "filter", "eol", "text",
 };
 #define NUM_CONV_ATTRS ARRAY_SIZE(conv_attr_name)
 
-static void setup_convert_check(struct git_attr_check *check)
+static void convert_attrs(struct conv_attrs *ca, const char *path)
 {
 	int i;
 	static struct git_attr_check ccheck[NUM_CONV_ATTRS];
@@ -719,70 +726,60 @@ static void setup_convert_check(struct git_attr_check *check)
 		user_convert_tail = &user_convert;
 		git_config(read_convert_config, NULL);
 	}
-	for (i = 0; i < NUM_CONV_ATTRS; i++)
-		check[i].attr = ccheck[i].attr;
+
+	if (!git_checkattr(path, NUM_CONV_ATTRS, ccheck)) {
+		ca->crlf_action = git_path_check_crlf(path, ccheck + 4);
+		if (ca->crlf_action == CRLF_GUESS)
+			ca->crlf_action = git_path_check_crlf(path, ccheck + 0);
+		ca->ident = git_path_check_ident(path, ccheck + 1);
+		ca->drv = git_path_check_convert(path, ccheck + 2);
+		ca->eol_attr = git_path_check_eol(path, ccheck + 3);
+	} else {
+		ca->drv = NULL;
+		ca->crlf_action = CRLF_GUESS;
+		ca->eol_attr = EOL_UNSET;
+		ca->ident = 0;
+	}
 }
 
 int convert_to_git(const char *path, const char *src, size_t len,
                    struct strbuf *dst, enum safe_crlf checksafe)
 {
-	struct git_attr_check check[NUM_CONV_ATTRS];
-	enum crlf_action crlf_action = CRLF_GUESS;
-	enum eol eol_attr = EOL_UNSET;
-	int ident = 0, ret = 0;
+	int ret = 0;
 	const char *filter = NULL;
+	struct conv_attrs ca;
 
-	setup_convert_check(check);
-	if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
-		struct convert_driver *drv;
-		crlf_action = git_path_check_crlf(path, check + 4);
-		if (crlf_action == CRLF_GUESS)
-			crlf_action = git_path_check_crlf(path, check + 0);
-		ident = git_path_check_ident(path, check + 1);
-		drv = git_path_check_convert(path, check + 2);
-		eol_attr = git_path_check_eol(path, check + 3);
-		if (drv && drv->clean)
-			filter = drv->clean;
-	}
+	convert_attrs(&ca, path);
+	if (ca.drv)
+		filter = ca.drv->clean;
 
 	ret |= apply_filter(path, src, len, dst, filter);
 	if (ret) {
 		src = dst->buf;
 		len = dst->len;
 	}
-	crlf_action = input_crlf_action(crlf_action, eol_attr);
-	ret |= crlf_to_git(path, src, len, dst, crlf_action, checksafe);
+	ca.crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
+	ret |= crlf_to_git(path, src, len, dst, ca.crlf_action, checksafe);
 	if (ret) {
 		src = dst->buf;
 		len = dst->len;
 	}
-	return ret | ident_to_git(path, src, len, dst, ident);
+	return ret | ident_to_git(path, src, len, dst, ca.ident);
 }
 
 static int convert_to_working_tree_internal(const char *path, const char *src,
 					    size_t len, struct strbuf *dst,
 					    int normalizing)
 {
-	struct git_attr_check check[NUM_CONV_ATTRS];
-	enum crlf_action crlf_action = CRLF_GUESS;
-	enum eol eol_attr = EOL_UNSET;
-	int ident = 0, ret = 0;
+	int ret = 0;
 	const char *filter = NULL;
+	struct conv_attrs ca;
 
-	setup_convert_check(check);
-	if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
-		struct convert_driver *drv;
-		crlf_action = git_path_check_crlf(path, check + 4);
-		if (crlf_action == CRLF_GUESS)
-			crlf_action = git_path_check_crlf(path, check + 0);
-		ident = git_path_check_ident(path, check + 1);
-		drv = git_path_check_convert(path, check + 2);
-		eol_attr = git_path_check_eol(path, check + 3);
-		if (drv && drv->smudge)
-			filter = drv->smudge;
-	}
+	convert_attrs(&ca, path);
+	if (ca.drv)
+		filter = ca.drv->smudge;
 
-	ret |= ident_to_worktree(path, src, len, dst, ident);
+	ret |= ident_to_worktree(path, src, len, dst, ca.ident);
 	if (ret) {
 		src = dst->buf;
 		len = dst->len;
@@ -792,8 +789,8 @@ static int convert_to_working_tree_internal(const char *path, const char *src,
 	 * is a smudge filter.  The filter might expect CRLFs.
 	 */
 	if (filter || !normalizing) {
-		crlf_action = input_crlf_action(crlf_action, eol_attr);
-		ret |= crlf_to_worktree(path, src, len, dst, crlf_action);
+		ca.crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
+		ret |= crlf_to_worktree(path, src, len, dst, ca.crlf_action);
 		if (ret) {
 			src = dst->buf;
 			len = dst->len;
-- 
1.7.5.1.288.g93ebc

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/4] convert.c clean-up
  2011-05-09 22:04     ` [PATCH 0/4] convert.c clean-up Junio C Hamano
                         ` (3 preceding siblings ...)
  2011-05-09 22:05       ` [PATCH 4/4] convert: make it harder to screw up adding a conversion attribute Junio C Hamano
@ 2011-05-10 12:59       ` Nguyen Thai Ngoc Duy
  2011-05-10 14:09         ` Junio C Hamano
  4 siblings, 1 reply; 18+ messages in thread
From: Nguyen Thai Ngoc Duy @ 2011-05-10 12:59 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Tue, May 10, 2011 at 5:04 AM, Junio C Hamano <gitster@pobox.com> wrote:
> Yeah, I think that would probably make more sense. If a file is small
> enough, it is more sensible to send it to a loose object just like any
> other files. We do not want to invite users to make a mistake of marking
> it as bigdata and send it straight to a packfile. Having one less knob to
> tweak is always a good thing to do.
>
> However, while reviewing your patch, I noticed that convert.c was littered
> with misnamed types, variables and functions to the point to make it
> almost unreadble as the result of its evolution.  I originally wrote this
> series so that I can add "bigdata" sensibly, and it turns out that there
> is no benefit to do so for now, but the clean-up by itself would be worth
> it.

I still don't like "bigdata" attribute. It sounds overlapping with
bigFileThreshold we already have. Maybe "inPack", "packed" or
"noLoose" a better name? It makes it quite clear that this attribute
sends objects to a pack. If they want to process tiny files this way
by setting inPack/noLoose, I don't care. But files larger than
core.bigFileThreshold should be automatically marked "inPack/noLoose".

> So there...

Yeah, I wish you did this before I touched convert.c. Anyway it looks
better from now on.
-- 
Duy

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/4] convert.c clean-up
  2011-05-10 12:59       ` [PATCH 0/4] convert.c clean-up Nguyen Thai Ngoc Duy
@ 2011-05-10 14:09         ` Junio C Hamano
  0 siblings, 0 replies; 18+ messages in thread
From: Junio C Hamano @ 2011-05-10 14:09 UTC (permalink / raw)
  To: Nguyen Thai Ngoc Duy; +Cc: git

Nguyen Thai Ngoc Duy <pclouds@gmail.com> writes:

> On Tue, May 10, 2011 at 5:04 AM, Junio C Hamano <gitster@pobox.com> wrote:
>> Yeah, I think that would probably make more sense. If a file is small
>> enough, it is more sensible to send it to a loose object just like any
>> other files. We do not want to invite users to make a mistake of marking
>> it as bigdata and send it straight to a packfile. Having one less knob to
>> tweak is always a good thing to do.
>>
>> However, while reviewing your patch, I noticed that convert.c was littered
>> with misnamed types, variables and functions to the point to make it
>> almost unreadble as the result of its evolution. I originally wrote this
>> series so that I can add "bigdata" sensibly, and it turns out that there
>> is no benefit to do so for now, but the clean-up by itself would be worth
>> it.
>
> I still don't like "bigdata" attribute.

I think we are in agreement. I do not like it anymore and that was what I
tried to say in the first paragraph, and that was why I said "it turns out
that there is no benefit" in the second paragraph.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v0 3/3] Bigfile: teach "git add" to send a large file straight to a pack
  2011-05-08  8:47 ` [PATCH v0 3/3] Bigfile: teach "git add" to send a large file straight to a pack Junio C Hamano
  2011-05-08 10:19   ` Nguyen Thai Ngoc Duy
  2011-05-09 14:05   ` [PATCH v0 3/3] Bigfile: teach "git add" to send a large file straight to a pack Shawn Pearce
@ 2011-05-29 18:20   ` Ævar Arnfjörð Bjarmason
  2011-05-29 20:29     ` Junio C Hamano
  2 siblings, 1 reply; 18+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2011-05-29 18:20 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Sun, May 8, 2011 at 10:47, Junio C Hamano <gitster@pobox.com> wrote:

> +               char buf[10240];
> +               size_t sz = size < sizeof(buf) ? size : sizeof(buf);
> +               size_t actual;
> +
> +               actual = read_in_full(fd, buf, sz);
> +               if (actual < 0)
> +                       die_errno("index-stream: reading input");

From clang:

    sha1_file.c:2710:14: warning: comparison of unsigned expression <
0 is always false [-Wtautological-compare]
                    if (actual < 0)
                        ~~~~~~ ^ ~

Looks like it's right. size_t is unsigned according to the standard,
so that die_errno() is never reached.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v0 3/3] Bigfile: teach "git add" to send a large file straight to a pack
  2011-05-29 18:20   ` Ævar Arnfjörð Bjarmason
@ 2011-05-29 20:29     ` Junio C Hamano
  0 siblings, 0 replies; 18+ messages in thread
From: Junio C Hamano @ 2011-05-29 20:29 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason; +Cc: git

Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes:

> On Sun, May 8, 2011 at 10:47, Junio C Hamano <gitster@pobox.com> wrote:
>
>> +               char buf[10240];
>> +               size_t sz = size < sizeof(buf) ? size : sizeof(buf);
>> +               size_t actual;
>> +
>> +               actual = read_in_full(fd, buf, sz);
>> +               if (actual < 0)
>> +                       die_errno("index-stream: reading input");

It already has been fixed with 23c7df6 (sha1_file: use the correct type
(ssize_t, not size_t) for read-style function, 2011-05-26).

Scanning the list and updating your copy of 'pu' from k.org would often
save your time for topics that are relatively new.

Thanks.

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2011-05-29 20:29 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-05-08  8:47 [PATCH v0 0/3] git add a-Big-file Junio C Hamano
2011-05-08  8:47 ` [PATCH v0 1/3] index_fd(): turn write_object and format_check arguments into one flag Junio C Hamano
2011-05-08  8:47 ` [PATCH v0 2/3] index_fd(): split into two helper functions Junio C Hamano
2011-05-08  8:47 ` [PATCH v0 3/3] Bigfile: teach "git add" to send a large file straight to a pack Junio C Hamano
2011-05-08 10:19   ` Nguyen Thai Ngoc Duy
2011-05-08 17:37     ` Junio C Hamano
2011-05-09 22:04     ` [PATCH 0/4] convert.c clean-up Junio C Hamano
2011-05-09 22:04       ` [PATCH 1/4] convert: rename the "eol" global variable to "core_eol" Junio C Hamano
2011-05-09 22:04       ` [PATCH 2/4] convert: give saner names to crlf/eol variables, types and functions Junio C Hamano
2011-05-09 22:05       ` [PATCH 3/4] convert: make it safer to add conversion attributes Junio C Hamano
2011-05-09 22:05       ` [PATCH 4/4] convert: make it harder to screw up adding a conversion attribute Junio C Hamano
2011-05-10 12:59       ` [PATCH 0/4] convert.c clean-up Nguyen Thai Ngoc Duy
2011-05-10 14:09         ` Junio C Hamano
2011-05-09 14:05   ` [PATCH v0 3/3] Bigfile: teach "git add" to send a large file straight to a pack Shawn Pearce
2011-05-09 15:58     ` Junio C Hamano
2011-05-09 16:14       ` Junio C Hamano
2011-05-29 18:20   ` Ævar Arnfjörð Bjarmason
2011-05-29 20:29     ` Junio C Hamano

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.