All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] Bulk check-in
@ 2011-10-28 23:54 Junio C Hamano
  2011-10-28 23:54 ` [PATCH 1/4] write_pack_header(): a helper function Junio C Hamano
                   ` (4 more replies)
  0 siblings, 5 replies; 10+ messages in thread
From: Junio C Hamano @ 2011-10-28 23:54 UTC (permalink / raw)
  To: git

This miniseries is a continuation of the "large file" topic from 1.7.6
development cycle.

The first three are moving existing code around for better reuse.  The
last one serves two purposes: to lift the one-pack-per-one-large-blob
constraint by introducing the concept of "plugging/unplugging" (i.e. you
plug the drain and throw many large blob at index_fd(), and they appear in
a single pack when you unplug it), and to stop using fast-import in this
codepath.

Only very lightly tested.

Junio C Hamano (4):
  write_pack_header(): a helper function
  create_tmp_packfile(): a helper function
  finish_tmp_packfile(): a helper function
  Bulk check-in

 Makefile               |    2 +
 builtin/add.c          |    5 ++
 builtin/pack-objects.c |   56 +++++------------
 bulk-checkin.c         |  159 ++++++++++++++++++++++++++++++++++++++++++++++++
 bulk-checkin.h         |   16 +++++
 pack-write.c           |   53 ++++++++++++++++
 pack.h                 |    6 ++
 sha1_file.c            |   67 +-------------------
 t/t1050-large.sh       |   26 ++++++--
 9 files changed, 282 insertions(+), 108 deletions(-)
 create mode 100644 bulk-checkin.c
 create mode 100644 bulk-checkin.h

-- 
1.7.7.1.573.ga40d2

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/4] write_pack_header(): a helper function
  2011-10-28 23:54 [PATCH 0/4] Bulk check-in Junio C Hamano
@ 2011-10-28 23:54 ` Junio C Hamano
  2011-10-28 23:54 ` [PATCH 2/4] create_tmp_packfile(): " Junio C Hamano
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 10+ messages in thread
From: Junio C Hamano @ 2011-10-28 23:54 UTC (permalink / raw)
  To: git

Factor out a small logic out of the private write_pack_file() function
in builtin/pack-objects.c

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-objects.c |    9 +++------
 pack-write.c           |   12 ++++++++++++
 pack.h                 |    2 ++
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index ba3705d..6643c16 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -571,7 +571,6 @@ static void write_pack_file(void)
 	uint32_t i = 0, j;
 	struct sha1file *f;
 	off_t offset;
-	struct pack_header hdr;
 	uint32_t nr_remaining = nr_result;
 	time_t last_mtime = 0;
 	struct object_entry **write_order;
@@ -596,11 +595,9 @@ static void write_pack_file(void)
 			f = sha1fd(fd, pack_tmp_name);
 		}
 
-		hdr.hdr_signature = htonl(PACK_SIGNATURE);
-		hdr.hdr_version = htonl(PACK_VERSION);
-		hdr.hdr_entries = htonl(nr_remaining);
-		sha1write(f, &hdr, sizeof(hdr));
-		offset = sizeof(hdr);
+		offset = write_pack_header(f, nr_remaining);
+		if (!offset)
+			die_errno("unable to write pack header");
 		nr_written = 0;
 		for (; i < nr_objects; i++) {
 			struct object_entry *e = write_order[i];
diff --git a/pack-write.c b/pack-write.c
index 9cd3bfb..46f3f84 100644
--- a/pack-write.c
+++ b/pack-write.c
@@ -178,6 +178,18 @@ const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec
 	return index_name;
 }
 
+off_t write_pack_header(struct sha1file *f, uint32_t nr_entries)
+{
+	struct pack_header hdr;
+
+	hdr.hdr_signature = htonl(PACK_SIGNATURE);
+	hdr.hdr_version = htonl(PACK_VERSION);
+	hdr.hdr_entries = htonl(nr_entries);
+	if (sha1write(f, &hdr, sizeof(hdr)))
+		return 0;
+	return sizeof(hdr);
+}
+
 /*
  * Update pack header with object_count and compute new SHA1 for pack data
  * associated to pack_fd, and write that SHA1 at the end.  That new SHA1
diff --git a/pack.h b/pack.h
index 722a54e..d429d8a 100644
--- a/pack.h
+++ b/pack.h
@@ -2,6 +2,7 @@
 #define PACK_H
 
 #include "object.h"
+#include "csum-file.h"
 
 /*
  * Packed object header
@@ -74,6 +75,7 @@ extern const char *write_idx_file(const char *index_name, struct pack_idx_entry
 extern int check_pack_crc(struct packed_git *p, struct pack_window **w_curs, off_t offset, off_t len, unsigned int nr);
 extern int verify_pack_index(struct packed_git *);
 extern int verify_pack(struct packed_git *);
+extern off_t write_pack_header(struct sha1file *f, uint32_t);
 extern void fixup_pack_header_footer(int, unsigned char *, const char *, uint32_t, unsigned char *, off_t);
 extern char *index_pack_lockfile(int fd);
 extern int encode_in_pack_object_header(enum object_type, uintmax_t, unsigned char *);
-- 
1.7.7.1.573.ga40d2

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/4] create_tmp_packfile(): a helper function
  2011-10-28 23:54 [PATCH 0/4] Bulk check-in Junio C Hamano
  2011-10-28 23:54 ` [PATCH 1/4] write_pack_header(): a helper function Junio C Hamano
@ 2011-10-28 23:54 ` Junio C Hamano
  2011-10-28 23:54 ` [PATCH 3/4] finish_tmp_packfile(): " Junio C Hamano
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 10+ messages in thread
From: Junio C Hamano @ 2011-10-28 23:54 UTC (permalink / raw)
  To: git

Factor out a small logic out of the private write_pack_file() function
in builtin/pack-objects.c

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-objects.c |   12 +++---------
 pack-write.c           |   10 ++++++++++
 pack.h                 |    3 +++
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 6643c16..3258fa9 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -584,16 +584,10 @@ static void write_pack_file(void)
 		unsigned char sha1[20];
 		char *pack_tmp_name = NULL;
 
-		if (pack_to_stdout) {
+		if (pack_to_stdout)
 			f = sha1fd_throughput(1, "<stdout>", progress_state);
-		} else {
-			char tmpname[PATH_MAX];
-			int fd;
-			fd = odb_mkstemp(tmpname, sizeof(tmpname),
-					 "pack/tmp_pack_XXXXXX");
-			pack_tmp_name = xstrdup(tmpname);
-			f = sha1fd(fd, pack_tmp_name);
-		}
+		else
+			f = create_tmp_packfile(&pack_tmp_name);
 
 		offset = write_pack_header(f, nr_remaining);
 		if (!offset)
diff --git a/pack-write.c b/pack-write.c
index 46f3f84..863cce8 100644
--- a/pack-write.c
+++ b/pack-write.c
@@ -328,3 +328,13 @@ int encode_in_pack_object_header(enum object_type type, uintmax_t size, unsigned
 	*hdr = c;
 	return n;
 }
+
+struct sha1file *create_tmp_packfile(char **pack_tmp_name)
+{
+	char tmpname[PATH_MAX];
+	int fd;
+
+	fd = odb_mkstemp(tmpname, sizeof(tmpname), "pack/tmp_pack_XXXXXX");
+	*pack_tmp_name = xstrdup(tmpname);
+	return sha1fd(fd, *pack_tmp_name);
+}
diff --git a/pack.h b/pack.h
index d429d8a..0027ac6 100644
--- a/pack.h
+++ b/pack.h
@@ -84,4 +84,7 @@ extern int encode_in_pack_object_header(enum object_type, uintmax_t, unsigned ch
 #define PH_ERROR_PACK_SIGNATURE	(-2)
 #define PH_ERROR_PROTOCOL	(-3)
 extern int read_pack_header(int fd, struct pack_header *);
+
+extern struct sha1file *create_tmp_packfile(char **pack_tmp_name);
+
 #endif
-- 
1.7.7.1.573.ga40d2

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/4] finish_tmp_packfile(): a helper function
  2011-10-28 23:54 [PATCH 0/4] Bulk check-in Junio C Hamano
  2011-10-28 23:54 ` [PATCH 1/4] write_pack_header(): a helper function Junio C Hamano
  2011-10-28 23:54 ` [PATCH 2/4] create_tmp_packfile(): " Junio C Hamano
@ 2011-10-28 23:54 ` Junio C Hamano
  2011-10-28 23:54 ` [PATCH 4/4] Bulk check-in Junio C Hamano
  2011-11-18  7:11 ` [PATCH 0/3] bulk-checkin continued Junio C Hamano
  4 siblings, 0 replies; 10+ messages in thread
From: Junio C Hamano @ 2011-10-28 23:54 UTC (permalink / raw)
  To: git

Factor out a small logic out of the private write_pack_file() function
in builtin/pack-objects.c.

This changes the order of finishing multi-pack generation slightly. The
code used to

 - adjust shared perm of temporary packfile
 - rename temporary packfile to the final name
 - update mtime of the packfile under the final name
 - adjust shared perm of temporary idxfile
 - rename temporary idxfile to the final name

but because the helper does not want to do the mtime thing, the updated
code does that step first and then all the rest.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-objects.c |   33 ++++++++++-----------------------
 pack-write.c           |   31 +++++++++++++++++++++++++++++++
 pack.h                 |    1 +
 3 files changed, 42 insertions(+), 23 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 3258fa9..b458b6d 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -617,20 +617,8 @@ static void write_pack_file(void)
 
 		if (!pack_to_stdout) {
 			struct stat st;
-			const char *idx_tmp_name;
 			char tmpname[PATH_MAX];
 
-			idx_tmp_name = write_idx_file(NULL, written_list, nr_written,
-						      &pack_idx_opts, sha1);
-
-			snprintf(tmpname, sizeof(tmpname), "%s-%s.pack",
-				 base_name, sha1_to_hex(sha1));
-			free_pack_by_name(tmpname);
-			if (adjust_shared_perm(pack_tmp_name))
-				die_errno("unable to make temporary pack file readable");
-			if (rename(pack_tmp_name, tmpname))
-				die_errno("unable to rename temporary pack file");
-
 			/*
 			 * Packs are runtime accessed in their mtime
 			 * order since newer packs are more likely to contain
@@ -638,28 +626,27 @@ static void write_pack_file(void)
 			 * packs then we should modify the mtime of later ones
 			 * to preserve this property.
 			 */
-			if (stat(tmpname, &st) < 0) {
+			if (stat(pack_tmp_name, &st) < 0) {
 				warning("failed to stat %s: %s",
-					tmpname, strerror(errno));
+					pack_tmp_name, strerror(errno));
 			} else if (!last_mtime) {
 				last_mtime = st.st_mtime;
 			} else {
 				struct utimbuf utb;
 				utb.actime = st.st_atime;
 				utb.modtime = --last_mtime;
-				if (utime(tmpname, &utb) < 0)
+				if (utime(pack_tmp_name, &utb) < 0)
 					warning("failed utime() on %s: %s",
 						tmpname, strerror(errno));
 			}
 
-			snprintf(tmpname, sizeof(tmpname), "%s-%s.idx",
-				 base_name, sha1_to_hex(sha1));
-			if (adjust_shared_perm(idx_tmp_name))
-				die_errno("unable to make temporary index file readable");
-			if (rename(idx_tmp_name, tmpname))
-				die_errno("unable to rename temporary index file");
-
-			free((void *) idx_tmp_name);
+			/* Enough space for "-<sha-1>.pack"? */
+			if (sizeof(tmpname) <= strlen(base_name) + 50)
+				die("pack base name '%s' too long", base_name);
+			snprintf(tmpname, sizeof(tmpname), "%s-", base_name);
+			finish_tmp_packfile(tmpname, pack_tmp_name,
+					    written_list, nr_written,
+					    &pack_idx_opts, sha1);
 			free(pack_tmp_name);
 			puts(sha1_to_hex(sha1));
 		}
diff --git a/pack-write.c b/pack-write.c
index 863cce8..cadc3e1 100644
--- a/pack-write.c
+++ b/pack-write.c
@@ -338,3 +338,34 @@ struct sha1file *create_tmp_packfile(char **pack_tmp_name)
 	*pack_tmp_name = xstrdup(tmpname);
 	return sha1fd(fd, *pack_tmp_name);
 }
+
+void finish_tmp_packfile(char *name_buffer,
+			 const char *pack_tmp_name,
+			 struct pack_idx_entry **written_list,
+			 uint32_t nr_written,
+			 struct pack_idx_option *pack_idx_opts,
+			 unsigned char sha1[])
+{
+	const char *idx_tmp_name;
+	char *end_of_name_prefix = strrchr(name_buffer, 0);
+
+	if (adjust_shared_perm(pack_tmp_name))
+		die_errno("unable to make temporary pack file readable");
+
+	idx_tmp_name = write_idx_file(NULL, written_list, nr_written,
+				      pack_idx_opts, sha1);
+	if (adjust_shared_perm(idx_tmp_name))
+		die_errno("unable to make temporary index file readable");
+
+	sprintf(end_of_name_prefix, "%s.pack", sha1_to_hex(sha1));
+	free_pack_by_name(name_buffer);
+
+	if (rename(pack_tmp_name, name_buffer))
+		die_errno("unable to rename temporary pack file");
+
+	sprintf(end_of_name_prefix, "%s.idx", sha1_to_hex(sha1));
+	if (rename(idx_tmp_name, name_buffer))
+		die_errno("unable to rename temporary index file");
+
+	free((void *)idx_tmp_name);
+}
diff --git a/pack.h b/pack.h
index 0027ac6..cfb0f69 100644
--- a/pack.h
+++ b/pack.h
@@ -86,5 +86,6 @@ extern int encode_in_pack_object_header(enum object_type, uintmax_t, unsigned ch
 extern int read_pack_header(int fd, struct pack_header *);
 
 extern struct sha1file *create_tmp_packfile(char **pack_tmp_name);
+extern void finish_tmp_packfile(char *name_buffer, const char *pack_tmp_name, struct pack_idx_entry **written_list, uint32_t nr_written, struct pack_idx_option *pack_idx_opts, unsigned char sha1[]);
 
 #endif
-- 
1.7.7.1.573.ga40d2

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 4/4] Bulk check-in
  2011-10-28 23:54 [PATCH 0/4] Bulk check-in Junio C Hamano
                   ` (2 preceding siblings ...)
  2011-10-28 23:54 ` [PATCH 3/4] finish_tmp_packfile(): " Junio C Hamano
@ 2011-10-28 23:54 ` Junio C Hamano
  2011-11-18  7:11 ` [PATCH 0/3] bulk-checkin continued Junio C Hamano
  4 siblings, 0 replies; 10+ messages in thread
From: Junio C Hamano @ 2011-10-28 23:54 UTC (permalink / raw)
  To: git

This extends the earlier approach to stream a large file directly from the
filesystem to its own packfile, and allows "git add" to send large files
directly into a single pack. Older code used to spawn fast-import, but
the new bulk_checkin API replaces it.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Makefile         |    2 +
 builtin/add.c    |    5 ++
 bulk-checkin.c   |  159 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 bulk-checkin.h   |   16 ++++++
 sha1_file.c      |   67 ++---------------------
 t/t1050-large.sh |   26 +++++++--
 6 files changed, 206 insertions(+), 69 deletions(-)
 create mode 100644 bulk-checkin.c
 create mode 100644 bulk-checkin.h

diff --git a/Makefile b/Makefile
index 3139c19..418dd2e 100644
--- a/Makefile
+++ b/Makefile
@@ -505,6 +505,7 @@ LIB_H += argv-array.h
 LIB_H += attr.h
 LIB_H += blob.h
 LIB_H += builtin.h
+LIB_H += bulk-checkin.h
 LIB_H += cache.h
 LIB_H += cache-tree.h
 LIB_H += color.h
@@ -591,6 +592,7 @@ LIB_OBJS += base85.o
 LIB_OBJS += bisect.o
 LIB_OBJS += blob.o
 LIB_OBJS += branch.o
+LIB_OBJS += bulk-checkin.o
 LIB_OBJS += bundle.o
 LIB_OBJS += cache-tree.o
 LIB_OBJS += color.o
diff --git a/builtin/add.c b/builtin/add.c
index c59b0c9..1c42900 100644
--- a/builtin/add.c
+++ b/builtin/add.c
@@ -13,6 +13,7 @@
 #include "diff.h"
 #include "diffcore.h"
 #include "revision.h"
+#include "bulk-checkin.h"
 
 static const char * const builtin_add_usage[] = {
 	"git add [options] [--] <filepattern>...",
@@ -458,11 +459,15 @@ int cmd_add(int argc, const char **argv, const char *prefix)
 		free(seen);
 	}
 
+	plug_bulk_checkin();
+
 	exit_status |= add_files_to_cache(prefix, pathspec, flags);
 
 	if (add_new_files)
 		exit_status |= add_files(&dir, flags);
 
+	unplug_bulk_checkin();
+
  finish:
 	if (active_cache_changed) {
 		if (write_cache(newfd, active_cache, active_nr) ||
diff --git a/bulk-checkin.c b/bulk-checkin.c
new file mode 100644
index 0000000..cad7a0b
--- /dev/null
+++ b/bulk-checkin.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2011, Google Inc.
+ */
+#include "bulk-checkin.h"
+#include "csum-file.h"
+#include "pack.h"
+
+static int pack_compression_level = Z_DEFAULT_COMPRESSION;
+
+static struct bulk_checkin_state {
+	unsigned plugged:1;
+
+	char *pack_tmp_name;
+	struct sha1file *f;
+	off_t offset;
+	struct pack_idx_option pack_idx_opts;
+
+	struct pack_idx_entry **written;
+	uint32_t alloc_written;
+	uint32_t nr_written;
+} state;
+
+static void finish_bulk_checkin(struct bulk_checkin_state *state)
+{
+	unsigned char sha1[20];
+	char packname[PATH_MAX];
+	int i;
+
+	if (!state->f)
+		return;
+
+	if (state->nr_written == 1) {
+		sha1close(state->f, sha1, CSUM_FSYNC);
+	} else {
+		int fd = sha1close(state->f, sha1, 0);
+		fixup_pack_header_footer(fd, sha1, state->pack_tmp_name,
+					 state->nr_written, sha1,
+					 state->offset);
+		close(fd);
+	}
+
+	sprintf(packname, "%s/pack/pack-", get_object_directory());
+	finish_tmp_packfile(packname, state->pack_tmp_name,
+			    state->written, state->nr_written,
+			    &state->pack_idx_opts, sha1);
+	for (i = 0; i < state->nr_written; i++)
+		free(state->written[i]);
+	free(state->written);
+	memset(state, 0, sizeof(*state));
+
+	/* Make objects we just wrote available to ourselves */
+	reprepare_packed_git();
+}
+
+static void deflate_to_pack(struct bulk_checkin_state *state,
+			    unsigned char sha1[],
+			    int fd, size_t size, enum object_type type,
+			    const char *path, unsigned flags)
+{
+	unsigned char obuf[16384];
+	unsigned hdrlen;
+	git_zstream s;
+	git_SHA_CTX ctx;
+	int write_object = (flags & HASH_WRITE_OBJECT);
+	int status = Z_OK;
+	struct pack_idx_entry *idx = NULL;
+
+	hdrlen = sprintf((char *)obuf, "%s %" PRIuMAX, typename(type), size) + 1;
+	git_SHA1_Init(&ctx);
+	git_SHA1_Update(&ctx, obuf, hdrlen);
+
+	if (write_object) {
+		idx = xcalloc(1, sizeof(*idx));
+		idx->offset = state->offset;
+		crc32_begin(state->f);
+	}
+	memset(&s, 0, sizeof(s));
+	git_deflate_init(&s, pack_compression_level);
+
+	hdrlen = encode_in_pack_object_header(type, size, obuf);
+	s.next_out = obuf + hdrlen;
+	s.avail_out = sizeof(obuf) - hdrlen;
+
+	while (status != Z_STREAM_END) {
+		unsigned char ibuf[16384];
+
+		if (size && !s.avail_in) {
+			ssize_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf);
+			if (xread(fd, ibuf, rsize) != rsize)
+				die("failed to read %d bytes from '%s'",
+				    (int)rsize, path);
+			git_SHA1_Update(&ctx, ibuf, rsize);
+			s.next_in = ibuf;
+			s.avail_in = rsize;
+			size -= rsize;
+		}
+
+		status = git_deflate(&s, size ? 0 : Z_FINISH);
+
+		if (!s.avail_out || status == Z_STREAM_END) {
+			size_t written = s.next_out - obuf;
+			if (write_object) {
+				sha1write(state->f, obuf, written);
+				state->offset += written;
+			}
+			s.next_out = obuf;
+			s.avail_out = sizeof(obuf);
+		}
+
+		switch (status) {
+		case Z_OK:
+		case Z_BUF_ERROR:
+		case Z_STREAM_END:
+			continue;
+		default:
+			die("unexpected deflate failure: %d", status);
+		}
+	}
+	git_deflate_end(&s);
+	git_SHA1_Final(sha1, &ctx);
+	if (write_object) {
+		idx->crc32 = crc32_end(state->f);
+		hashcpy(idx->sha1, sha1);
+		ALLOC_GROW(state->written,
+			   state->nr_written + 1, state->alloc_written);
+		state->written[state->nr_written++] = idx;
+	}
+}
+
+int index_bulk_checkin(unsigned char *sha1,
+		       int fd, size_t size, enum object_type type,
+		       const char *path, unsigned flags)
+{
+	if (!state.f && (flags & HASH_WRITE_OBJECT)) {
+		state.f = create_tmp_packfile(&state.pack_tmp_name);
+		reset_pack_idx_option(&state.pack_idx_opts);
+		/* Pretend we are going to write only one object */
+		state.offset = write_pack_header(state.f, 1);
+		if (!state.offset)
+			die_errno("unable to write pack header");
+	}
+
+	deflate_to_pack(&state, sha1, fd, size, type, path, flags);
+	if (!state.plugged)
+		finish_bulk_checkin(&state);
+	return 0;
+}
+
+void plug_bulk_checkin(void)
+{
+	state.plugged = 1;
+}
+
+void unplug_bulk_checkin(void)
+{
+	state.plugged = 0;
+	if (state.f)
+		finish_bulk_checkin(&state);
+}
diff --git a/bulk-checkin.h b/bulk-checkin.h
new file mode 100644
index 0000000..4f599f8
--- /dev/null
+++ b/bulk-checkin.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2011, Google Inc.
+ */
+#ifndef BULK_CHECKIN_H
+#define BULK_CHECKIN_H
+
+#include "cache.h"
+
+extern int index_bulk_checkin(unsigned char sha1[],
+			      int fd, size_t size, enum object_type type,
+			      const char *path, unsigned flags);
+
+extern void plug_bulk_checkin(void);
+extern void unplug_bulk_checkin(void);
+
+#endif
diff --git a/sha1_file.c b/sha1_file.c
index 27f3b9b..c96e366 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -18,6 +18,7 @@
 #include "refs.h"
 #include "pack-revindex.h"
 #include "sha1-lookup.h"
+#include "bulk-checkin.h"
 
 #ifndef O_NOATIME
 #if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
@@ -2679,10 +2680,8 @@ static int index_core(unsigned char *sha1, int fd, size_t size,
 }
 
 /*
- * This creates one packfile per large blob, because the caller
- * immediately wants the result sha1, and fast-import can report the
- * object name via marks mechanism only by closing the created
- * packfile.
+ * This creates one packfile per large blob unless bulk-checkin
+ * machinery is "plugged".
  *
  * This also bypasses the usual "convert-to-git" dance, and that is on
  * purpose. We could write a streaming version of the converting
@@ -2696,65 +2695,7 @@ static int index_stream(unsigned char *sha1, int fd, size_t size,
 			enum object_type type, const char *path,
 			unsigned flags)
 {
-	struct child_process fast_import;
-	char export_marks[512];
-	const char *argv[] = { "fast-import", "--quiet", export_marks, NULL };
-	char tmpfile[512];
-	char fast_import_cmd[512];
-	char buf[512];
-	int len, tmpfd;
-
-	strcpy(tmpfile, git_path("hashstream_XXXXXX"));
-	tmpfd = git_mkstemp_mode(tmpfile, 0600);
-	if (tmpfd < 0)
-		die_errno("cannot create tempfile: %s", tmpfile);
-	if (close(tmpfd))
-		die_errno("cannot close tempfile: %s", tmpfile);
-	sprintf(export_marks, "--export-marks=%s", tmpfile);
-
-	memset(&fast_import, 0, sizeof(fast_import));
-	fast_import.in = -1;
-	fast_import.argv = argv;
-	fast_import.git_cmd = 1;
-	if (start_command(&fast_import))
-		die_errno("index-stream: git fast-import failed");
-
-	len = sprintf(fast_import_cmd, "blob\nmark :1\ndata %lu\n",
-		      (unsigned long) size);
-	write_or_whine(fast_import.in, fast_import_cmd, len,
-		       "index-stream: feeding fast-import");
-	while (size) {
-		char buf[10240];
-		size_t sz = size < sizeof(buf) ? size : sizeof(buf);
-		ssize_t actual;
-
-		actual = read_in_full(fd, buf, sz);
-		if (actual < 0)
-			die_errno("index-stream: reading input");
-		if (write_in_full(fast_import.in, buf, actual) != actual)
-			die_errno("index-stream: feeding fast-import");
-		size -= actual;
-	}
-	if (close(fast_import.in))
-		die_errno("index-stream: closing fast-import");
-	if (finish_command(&fast_import))
-		die_errno("index-stream: finishing fast-import");
-
-	tmpfd = open(tmpfile, O_RDONLY);
-	if (tmpfd < 0)
-		die_errno("index-stream: cannot open fast-import mark");
-	len = read(tmpfd, buf, sizeof(buf));
-	if (len < 0)
-		die_errno("index-stream: reading fast-import mark");
-	if (close(tmpfd) < 0)
-		die_errno("index-stream: closing fast-import mark");
-	if (unlink(tmpfile))
-		die_errno("index-stream: unlinking fast-import mark");
-	if (len != 44 ||
-	    memcmp(":1 ", buf, 3) ||
-	    get_sha1_hex(buf + 3, sha1))
-		die_errno("index-stream: unexpected fast-import mark: <%s>", buf);
-	return 0;
+	return index_bulk_checkin(sha1, fd, size, type, path, flags);
 }
 
 int index_fd(unsigned char *sha1, int fd, struct stat *st,
diff --git a/t/t1050-large.sh b/t/t1050-large.sh
index deba111..36def25 100755
--- a/t/t1050-large.sh
+++ b/t/t1050-large.sh
@@ -7,14 +7,28 @@ test_description='adding and checking out large blobs'
 
 test_expect_success setup '
 	git config core.bigfilethreshold 200k &&
-	echo X | dd of=large bs=1k seek=2000
+	echo X | dd of=large bs=1k seek=2000 &&
+	echo Y | dd of=huge bs=1k seek=2500
 '
 
-test_expect_success 'add a large file' '
-	git add large &&
-	# make sure we got a packfile and no loose objects
-	test -f .git/objects/pack/pack-*.pack &&
-	test ! -f .git/objects/??/??????????????????????????????????????
+test_expect_success 'add a large file or two' '
+	git add large huge &&
+	# make sure we got a single packfile and no loose objects
+	bad= count=0 &&
+	for p in .git/objects/pack/pack-*.pack
+	do
+		count=$(( $count + 1 ))
+		test -f "$p" && continue
+		bad=t
+	done &&
+	test -z "$bad" &&
+	test $count = 1 &&
+	for l in .git/objects/??/??????????????????????????????????????
+	do
+		test -f "$l" || continue
+		bad=t
+	done &&
+	test -z "$bad"
 '
 
 test_expect_success 'checkout a large file' '
-- 
1.7.7.1.573.ga40d2

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 0/3] bulk-checkin continued
  2011-10-28 23:54 [PATCH 0/4] Bulk check-in Junio C Hamano
                   ` (3 preceding siblings ...)
  2011-10-28 23:54 ` [PATCH 4/4] Bulk check-in Junio C Hamano
@ 2011-11-18  7:11 ` Junio C Hamano
  2011-11-18  7:11   ` [PATCH 1/3] csum-file: introduce sha1file_checkpoint Junio C Hamano
                     ` (3 more replies)
  4 siblings, 4 replies; 10+ messages in thread
From: Junio C Hamano @ 2011-11-18  7:11 UTC (permalink / raw)
  To: git

This updates the earlier bulk-checkin series ($gmane/184440) to further
enhance the "large file" topic from 1.7.6 cycle.

The first one adds two API functions to allow truncating a checksummed
file that is being written. This is the same patch as the one I sent
earlier today.

The second one prevents the bulk-checkin code from writing the same object
twice to the stream, and the third one further makes it notice that the
object it has just written already exists in the repository. In either
case, the packfile is rewound using the new sha1file_checkpoint/truncate
API, and the packfile itself is removed if truncation results in an empty
output.

The next step is to add the "split-blob" entry in the packfile, but that
is a much larger task and will take longer.

Junio C Hamano (3):
  csum-file: introduce sha1file_checkpoint
  bulk-checkin: do not write the same object twice
  bulk-checkin: do not write an object that already exists

 bulk-checkin.c   |   40 +++++++++++++++++++++++++++++++++++-----
 csum-file.c      |   20 ++++++++++++++++++++
 csum-file.h      |    9 +++++++++
 fast-import.c    |   25 ++++++++-----------------
 t/t1050-large.sh |   38 ++++++++++++++++++++++++++++++--------
 5 files changed, 102 insertions(+), 30 deletions(-)

-- 
1.7.8.rc3.111.g7d421

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/3] csum-file: introduce sha1file_checkpoint
  2011-11-18  7:11 ` [PATCH 0/3] bulk-checkin continued Junio C Hamano
@ 2011-11-18  7:11   ` Junio C Hamano
  2011-11-18  7:11   ` [PATCH 2/3] bulk-checkin: do not write the same object twice Junio C Hamano
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 10+ messages in thread
From: Junio C Hamano @ 2011-11-18  7:11 UTC (permalink / raw)
  To: git

It is useful to be able to rewind a check-summed file to a certain
previous state after writing data into it using sha1write() API. The
fast-import command does this after streaming a blob data to the packfile
being generated and then noticing that the same blob has already been
written, and it does this with a private code truncate_pack() that is
commented as "Yes, this is a layering violation".

Introduce two API functions, sha1file_checkpoint(), that allows the caller
to save a state of a sha1file, and then later revert it to the saved state.
Use it to reimplement truncate_pack().

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 csum-file.c   |   20 ++++++++++++++++++++
 csum-file.h   |    9 +++++++++
 fast-import.c |   25 ++++++++-----------------
 3 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/csum-file.c b/csum-file.c
index fc97d6e..53f5375 100644
--- a/csum-file.c
+++ b/csum-file.c
@@ -158,6 +158,26 @@ struct sha1file *sha1fd_throughput(int fd, const char *name, struct progress *tp
 	return f;
 }
 
+void sha1file_checkpoint(struct sha1file *f, struct sha1file_checkpoint *checkpoint)
+{
+	sha1flush(f);
+	checkpoint->offset = f->total;
+	checkpoint->ctx = f->ctx;
+}
+
+int sha1file_truncate(struct sha1file *f, struct sha1file_checkpoint *checkpoint)
+{
+	off_t offset = checkpoint->offset;
+
+	if (ftruncate(f->fd, offset) ||
+	    lseek(f->fd, offset, SEEK_SET) != offset)
+		return -1;
+	f->total = offset;
+	f->ctx = checkpoint->ctx;
+	f->offset = 0; /* sha1flush() was called in checkpoint */
+	return 0;
+}
+
 void crc32_begin(struct sha1file *f)
 {
 	f->crc32 = crc32(0, NULL, 0);
diff --git a/csum-file.h b/csum-file.h
index 6a7967c..3b540bd 100644
--- a/csum-file.h
+++ b/csum-file.h
@@ -17,6 +17,15 @@ struct sha1file {
 	unsigned char buffer[8192];
 };
 
+/* Checkpoint */
+struct sha1file_checkpoint {
+	off_t offset;
+	git_SHA_CTX ctx;
+};
+
+extern void sha1file_checkpoint(struct sha1file *, struct sha1file_checkpoint *);
+extern int sha1file_truncate(struct sha1file *, struct sha1file_checkpoint *);
+
 /* sha1close flags */
 #define CSUM_CLOSE	1
 #define CSUM_FSYNC	2
diff --git a/fast-import.c b/fast-import.c
index 8d8ea3c..a8db41b 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -1143,17 +1143,11 @@ static int store_object(
 	return 0;
 }
 
-static void truncate_pack(off_t to, git_SHA_CTX *ctx)
+static void truncate_pack(struct sha1file_checkpoint *checkpoint)
 {
-	if (ftruncate(pack_data->pack_fd, to)
-	 || lseek(pack_data->pack_fd, to, SEEK_SET) != to)
+	if (sha1file_truncate(pack_file, checkpoint))
 		die_errno("cannot truncate pack to skip duplicate");
-	pack_size = to;
-
-	/* yes this is a layering violation */
-	pack_file->total = to;
-	pack_file->offset = 0;
-	pack_file->ctx = *ctx;
+	pack_size = checkpoint->offset;
 }
 
 static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
@@ -1166,8 +1160,8 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
 	unsigned long hdrlen;
 	off_t offset;
 	git_SHA_CTX c;
-	git_SHA_CTX pack_file_ctx;
 	git_zstream s;
+	struct sha1file_checkpoint checkpoint;
 	int status = Z_OK;
 
 	/* Determine if we should auto-checkpoint. */
@@ -1175,11 +1169,8 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
 		|| (pack_size + 60 + len) < pack_size)
 		cycle_packfile();
 
-	offset = pack_size;
-
-	/* preserve the pack_file SHA1 ctx in case we have to truncate later */
-	sha1flush(pack_file);
-	pack_file_ctx = pack_file->ctx;
+	sha1file_checkpoint(pack_file, &checkpoint);
+	offset = checkpoint.offset;
 
 	hdrlen = snprintf((char *)out_buf, out_sz, "blob %" PRIuMAX, len) + 1;
 	if (out_sz <= hdrlen)
@@ -1245,14 +1236,14 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
 
 	if (e->idx.offset) {
 		duplicate_count_by_type[OBJ_BLOB]++;
-		truncate_pack(offset, &pack_file_ctx);
+		truncate_pack(&checkpoint);
 
 	} else if (find_sha1_pack(sha1, packed_git)) {
 		e->type = OBJ_BLOB;
 		e->pack_id = MAX_PACK_ID;
 		e->idx.offset = 1; /* just not zero! */
 		duplicate_count_by_type[OBJ_BLOB]++;
-		truncate_pack(offset, &pack_file_ctx);
+		truncate_pack(&checkpoint);
 
 	} else {
 		e->depth = 0;
-- 
1.7.8.rc3.111.g7d421

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/3] bulk-checkin: do not write the same object twice
  2011-11-18  7:11 ` [PATCH 0/3] bulk-checkin continued Junio C Hamano
  2011-11-18  7:11   ` [PATCH 1/3] csum-file: introduce sha1file_checkpoint Junio C Hamano
@ 2011-11-18  7:11   ` Junio C Hamano
  2011-11-18  7:11   ` [PATCH 3/3] bulk-checkin: do not write an object that already exists Junio C Hamano
  2011-11-24  5:18   ` [PATCH] bulk-checkin: honor pack.packsizelimit Junio C Hamano
  3 siblings, 0 replies; 10+ messages in thread
From: Junio C Hamano @ 2011-11-18  7:11 UTC (permalink / raw)
  To: git

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 bulk-checkin.c   |   28 ++++++++++++++++++++++++----
 t/t1050-large.sh |   20 +++++++++++++-------
 2 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/bulk-checkin.c b/bulk-checkin.c
index c7e693e..82166ba 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -52,6 +52,17 @@ static void finish_bulk_checkin(struct bulk_checkin_state *state)
 	reprepare_packed_git();
 }
 
+static int already_written(struct bulk_checkin_state *state, unsigned char sha1[])
+{
+	int i;
+
+	/* Might want to keep the list sorted */
+	for (i = 0; i < state->nr_written; i++)
+		if (!hashcmp(state->written[i]->sha1, sha1))
+			return 1;
+	return 0;
+}
+
 static void deflate_to_pack(struct bulk_checkin_state *state,
 			    unsigned char sha1[],
 			    int fd, size_t size, enum object_type type,
@@ -64,6 +75,7 @@ static void deflate_to_pack(struct bulk_checkin_state *state,
 	int write_object = (flags & HASH_WRITE_OBJECT);
 	int status = Z_OK;
 	struct pack_idx_entry *idx = NULL;
+	struct sha1file_checkpoint checkpoint;
 
 	hdrlen = sprintf((char *)obuf, "%s %" PRIuMAX,
 			 typename(type), (uintmax_t)size) + 1;
@@ -73,6 +85,7 @@ static void deflate_to_pack(struct bulk_checkin_state *state,
 	if (write_object) {
 		idx = xcalloc(1, sizeof(*idx));
 		idx->offset = state->offset;
+		sha1file_checkpoint(state->f, &checkpoint);
 		crc32_begin(state->f);
 	}
 	memset(&s, 0, sizeof(s));
@@ -121,10 +134,17 @@ static void deflate_to_pack(struct bulk_checkin_state *state,
 	git_SHA1_Final(sha1, &ctx);
 	if (write_object) {
 		idx->crc32 = crc32_end(state->f);
-		hashcpy(idx->sha1, sha1);
-		ALLOC_GROW(state->written,
-			   state->nr_written + 1, state->alloc_written);
-		state->written[state->nr_written++] = idx;
+
+		if (already_written(state, sha1)) {
+			sha1file_truncate(state->f, &checkpoint);
+			state->offset = checkpoint.offset;
+			free(idx);
+		} else {
+			hashcpy(idx->sha1, sha1);
+			ALLOC_GROW(state->written,
+				   state->nr_written + 1, state->alloc_written);
+			state->written[state->nr_written++] = idx;
+		}
 	}
 }
 
diff --git a/t/t1050-large.sh b/t/t1050-large.sh
index 36def25..fbd5ced 100755
--- a/t/t1050-large.sh
+++ b/t/t1050-large.sh
@@ -7,22 +7,28 @@ test_description='adding and checking out large blobs'
 
 test_expect_success setup '
 	git config core.bigfilethreshold 200k &&
-	echo X | dd of=large bs=1k seek=2000 &&
+	echo X | dd of=large1 bs=1k seek=2000 &&
+	echo X | dd of=large2 bs=1k seek=2000 &&
 	echo Y | dd of=huge bs=1k seek=2500
 '
 
 test_expect_success 'add a large file or two' '
-	git add large huge &&
+	git add large1 huge large2 &&
 	# make sure we got a single packfile and no loose objects
-	bad= count=0 &&
+	bad= count=0 idx= &&
 	for p in .git/objects/pack/pack-*.pack
 	do
 		count=$(( $count + 1 ))
-		test -f "$p" && continue
+		if test -f "$p" && idx=${p%.pack}.idx && test -f "$idx"
+		then
+			continue
+		fi
 		bad=t
 	done &&
 	test -z "$bad" &&
 	test $count = 1 &&
+	cnt=$(git show-index <"$idx" | wc -l) &&
+	test $cnt = 2 &&
 	for l in .git/objects/??/??????????????????????????????????????
 	do
 		test -f "$l" || continue
@@ -32,10 +38,10 @@ test_expect_success 'add a large file or two' '
 '
 
 test_expect_success 'checkout a large file' '
-	large=$(git rev-parse :large) &&
-	git update-index --add --cacheinfo 100644 $large another &&
+	large1=$(git rev-parse :large1) &&
+	git update-index --add --cacheinfo 100644 $large1 another &&
 	git checkout another &&
-	cmp large another ;# this must not be test_cmp
+	cmp large1 another ;# this must not be test_cmp
 '
 
 test_done
-- 
1.7.8.rc3.111.g7d421

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/3] bulk-checkin: do not write an object that already exists
  2011-11-18  7:11 ` [PATCH 0/3] bulk-checkin continued Junio C Hamano
  2011-11-18  7:11   ` [PATCH 1/3] csum-file: introduce sha1file_checkpoint Junio C Hamano
  2011-11-18  7:11   ` [PATCH 2/3] bulk-checkin: do not write the same object twice Junio C Hamano
@ 2011-11-18  7:11   ` Junio C Hamano
  2011-11-24  5:18   ` [PATCH] bulk-checkin: honor pack.packsizelimit Junio C Hamano
  3 siblings, 0 replies; 10+ messages in thread
From: Junio C Hamano @ 2011-11-18  7:11 UTC (permalink / raw)
  To: git

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 bulk-checkin.c   |   12 +++++++++++-
 t/t1050-large.sh |   18 +++++++++++++++++-
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/bulk-checkin.c b/bulk-checkin.c
index 82166ba..60178ef 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -29,7 +29,11 @@ static void finish_bulk_checkin(struct bulk_checkin_state *state)
 	if (!state->f)
 		return;
 
-	if (state->nr_written == 1) {
+	if (state->nr_written == 0) {
+		close(state->f->fd);
+		unlink(state->pack_tmp_name);
+		goto clear_exit;
+	} else if (state->nr_written == 1) {
 		sha1close(state->f, sha1, CSUM_FSYNC);
 	} else {
 		int fd = sha1close(state->f, sha1, 0);
@@ -45,6 +49,8 @@ static void finish_bulk_checkin(struct bulk_checkin_state *state)
 			    &state->pack_idx_opts, sha1);
 	for (i = 0; i < state->nr_written; i++)
 		free(state->written[i]);
+
+clear_exit:
 	free(state->written);
 	memset(state, 0, sizeof(*state));
 
@@ -56,6 +62,10 @@ static int already_written(struct bulk_checkin_state *state, unsigned char sha1[
 {
 	int i;
 
+	/* The object may already exist in the repository */
+	if (has_sha1_file(sha1))
+		return 1;
+
 	/* Might want to keep the list sorted */
 	for (i = 0; i < state->nr_written; i++)
 		if (!hashcmp(state->written[i]->sha1, sha1))
diff --git a/t/t1050-large.sh b/t/t1050-large.sh
index fbd5ced..0726472 100755
--- a/t/t1050-large.sh
+++ b/t/t1050-large.sh
@@ -9,6 +9,7 @@ test_expect_success setup '
 	git config core.bigfilethreshold 200k &&
 	echo X | dd of=large1 bs=1k seek=2000 &&
 	echo X | dd of=large2 bs=1k seek=2000 &&
+	echo X | dd of=large3 bs=1k seek=2000 &&
 	echo Y | dd of=huge bs=1k seek=2500
 '
 
@@ -34,7 +35,22 @@ test_expect_success 'add a large file or two' '
 		test -f "$l" || continue
 		bad=t
 	done &&
-	test -z "$bad"
+	test -z "$bad" &&
+
+	# attempt to add another copy of the same
+	git add large3 &&
+	bad= count=0 &&
+	for p in .git/objects/pack/pack-*.pack
+	do
+		count=$(( $count + 1 ))
+		if test -f "$p" && idx=${p%.pack}.idx && test -f "$idx"
+		then
+			continue
+		fi
+		bad=t
+	done &&
+	test -z "$bad" &&
+	test $count = 1
 '
 
 test_expect_success 'checkout a large file' '
-- 
1.7.8.rc3.111.g7d421

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH] bulk-checkin: honor pack.packsizelimit
  2011-11-18  7:11 ` [PATCH 0/3] bulk-checkin continued Junio C Hamano
                     ` (2 preceding siblings ...)
  2011-11-18  7:11   ` [PATCH 3/3] bulk-checkin: do not write an object that already exists Junio C Hamano
@ 2011-11-24  5:18   ` Junio C Hamano
  3 siblings, 0 replies; 10+ messages in thread
From: Junio C Hamano @ 2011-11-24  5:18 UTC (permalink / raw)
  To: git

The bulk-checkin interface is designed to throw multiple blobs into a
single output packfile during the lifetime of a single process by
"plugging" the output. The direct streaming of the data to a packfile
however is primarily meant as a way to deal with large blobs better, and
it is possible that we end up with a single humongous packfile that is
awkward to handle.

Pay attention to the pack.packsizelimit configuration the same way as
the pack-object does, and make sure we close a packfile and switch to a
new one before busting the size limit.

We allow the limit to be busted if a single object is too large to be
contained in a pack that is smaller than the limit on its own, as there is
no way to store such an object otherwise; the same is already done in
pack-objects.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-objects.c |    6 +-----
 bulk-checkin.c         |   35 ++++++++++++++++++++++++++++++-----
 cache.h                |    1 +
 config.c               |    4 ++++
 environment.c          |    1 +
 5 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index b458b6d..dde913e 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -76,7 +76,7 @@ static struct pack_idx_option pack_idx_opts;
 static const char *base_name;
 static int progress = 1;
 static int window = 10;
-static unsigned long pack_size_limit, pack_size_limit_cfg;
+static unsigned long pack_size_limit;
 static int depth = 50;
 static int delta_search_threads;
 static int pack_to_stdout;
@@ -2009,10 +2009,6 @@ static int git_pack_config(const char *k, const char *v, void *cb)
 			    pack_idx_opts.version);
 		return 0;
 	}
-	if (!strcmp(k, "pack.packsizelimit")) {
-		pack_size_limit_cfg = git_config_ulong(k, v);
-		return 0;
-	}
 	return git_default_config(k, v, cb);
 }
 
diff --git a/bulk-checkin.c b/bulk-checkin.c
index 60178ef..2adc67b 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -73,10 +73,13 @@ static int already_written(struct bulk_checkin_state *state, unsigned char sha1[
 	return 0;
 }
 
-static void deflate_to_pack(struct bulk_checkin_state *state,
-			    unsigned char sha1[],
-			    int fd, size_t size, enum object_type type,
-			    const char *path, unsigned flags)
+#define DEFLATE_TO_PACK_OK 0
+#define DEFLATE_TO_PACK_TOOBIG 1
+
+static int deflate_to_pack(struct bulk_checkin_state *state,
+			   unsigned char sha1[],
+			   int fd, size_t size, enum object_type type,
+			   const char *path, unsigned flags)
 {
 	unsigned char obuf[16384];
 	unsigned hdrlen;
@@ -149,6 +152,13 @@ static void deflate_to_pack(struct bulk_checkin_state *state,
 			sha1file_truncate(state->f, &checkpoint);
 			state->offset = checkpoint.offset;
 			free(idx);
+		} else if (state->nr_written &&
+			   pack_size_limit_cfg &&
+			   pack_size_limit_cfg < state->offset) {
+			sha1file_truncate(state->f, &checkpoint);
+			state->offset = checkpoint.offset;
+			free(idx);
+			return DEFLATE_TO_PACK_TOOBIG;
 		} else {
 			hashcpy(idx->sha1, sha1);
 			ALLOC_GROW(state->written,
@@ -156,12 +166,17 @@ static void deflate_to_pack(struct bulk_checkin_state *state,
 			state->written[state->nr_written++] = idx;
 		}
 	}
+	return DEFLATE_TO_PACK_OK;
 }
 
 int index_bulk_checkin(unsigned char *sha1,
 		       int fd, size_t size, enum object_type type,
 		       const char *path, unsigned flags)
 {
+	off_t seekback;
+	int status;
+
+again:
 	if (!state.f && (flags & HASH_WRITE_OBJECT)) {
 		state.f = create_tmp_packfile(&state.pack_tmp_name);
 		reset_pack_idx_option(&state.pack_idx_opts);
@@ -171,7 +186,17 @@ int index_bulk_checkin(unsigned char *sha1,
 			die_errno("unable to write pack header");
 	}
 
-	deflate_to_pack(&state, sha1, fd, size, type, path, flags);
+	seekback = lseek(fd, 0, SEEK_CUR);
+	if (seekback == (off_t) -1)
+		return error("cannot seek");
+	status = deflate_to_pack(&state, sha1, fd, size, type, path, flags);
+	if (status == DEFLATE_TO_PACK_TOOBIG) {
+		finish_bulk_checkin(&state);
+		if (lseek(fd, seekback, SEEK_SET) == (off_t) -1)
+			return error("cannot seek back");
+		goto again;
+	}
+
 	if (!state.plugged)
 		finish_bulk_checkin(&state);
 	return 0;
diff --git a/cache.h b/cache.h
index 2e6ad36..b158d3e 100644
--- a/cache.h
+++ b/cache.h
@@ -598,6 +598,7 @@ extern size_t packed_git_window_size;
 extern size_t packed_git_limit;
 extern size_t delta_base_cache_limit;
 extern unsigned long big_file_threshold;
+extern unsigned long pack_size_limit_cfg;
 extern int read_replace_refs;
 extern int fsync_object_files;
 extern int core_preload_index;
diff --git a/config.c b/config.c
index edf9914..c736802 100644
--- a/config.c
+++ b/config.c
@@ -797,6 +797,10 @@ int git_default_config(const char *var, const char *value, void *dummy)
 		return 0;
 	}
 
+	if (!strcmp(var, "pack.packsizelimit")) {
+		pack_size_limit_cfg = git_config_ulong(var, value);
+		return 0;
+	}
 	/* Add other config variables here and to Documentation/config.txt. */
 	return 0;
 }
diff --git a/environment.c b/environment.c
index 0bee6a7..31e4284 100644
--- a/environment.c
+++ b/environment.c
@@ -60,6 +60,7 @@ char *notes_ref_name;
 int grafts_replace_parents = 1;
 int core_apply_sparse_checkout;
 struct startup_info *startup_info;
+unsigned long pack_size_limit_cfg;
 
 /* Parallel index stat data preload? */
 int core_preload_index = 0;
-- 
1.7.8.rc3.208.g1edbd

^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2011-11-24  5:19 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-10-28 23:54 [PATCH 0/4] Bulk check-in Junio C Hamano
2011-10-28 23:54 ` [PATCH 1/4] write_pack_header(): a helper function Junio C Hamano
2011-10-28 23:54 ` [PATCH 2/4] create_tmp_packfile(): " Junio C Hamano
2011-10-28 23:54 ` [PATCH 3/4] finish_tmp_packfile(): " Junio C Hamano
2011-10-28 23:54 ` [PATCH 4/4] Bulk check-in Junio C Hamano
2011-11-18  7:11 ` [PATCH 0/3] bulk-checkin continued Junio C Hamano
2011-11-18  7:11   ` [PATCH 1/3] csum-file: introduce sha1file_checkpoint Junio C Hamano
2011-11-18  7:11   ` [PATCH 2/3] bulk-checkin: do not write the same object twice Junio C Hamano
2011-11-18  7:11   ` [PATCH 3/3] bulk-checkin: do not write an object that already exists Junio C Hamano
2011-11-24  5:18   ` [PATCH] bulk-checkin: honor pack.packsizelimit Junio C Hamano

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.