All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/6] fast-import updates
@ 2010-02-17 19:05 Nicolas Pitre
  2010-02-17 19:05 ` [PATCH 1/6] fast-import: start using struct pack_idx_entry Nicolas Pitre
                   ` (6 more replies)
  0 siblings, 7 replies; 9+ messages in thread
From: Nicolas Pitre @ 2010-02-17 19:05 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Shawn O. Pearce

Will follow a couple fast-import updates, with the most significant
change being the ability for fast-import to produce pack index v2 by
default.  Overall this should make fast-import produced data more imune
to silent corruptions, and also lift the limit on the maximum pack size
it could produce.

[PATCH 1/6] fast-import: start using struct pack_idx_entry
[PATCH 2/6] fast-import: use sha1write() for pack data
[PATCH 3/6] fast-import: use write_idx_file() instead of custom code
[PATCH 4/6] fast-import: make default pack size unlimited
[PATCH 5/6] fast-import: honor pack.indexversion and pack.packsizelimit config vars
[PATCH 6/6] fast-import: use the diff_delta() max_delta_size argument


Nicolas

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/6] fast-import: start using struct pack_idx_entry
  2010-02-17 19:05 [PATCH 0/6] fast-import updates Nicolas Pitre
@ 2010-02-17 19:05 ` Nicolas Pitre
  2010-02-17 19:05 ` [PATCH 2/6] fast-import: use sha1write() for pack data Nicolas Pitre
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: Nicolas Pitre @ 2010-02-17 19:05 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Shawn O. Pearce

This is in preparation for using write_idx_file().

Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
---
 fast-import.c |   57 ++++++++++++++++++++++++++++-----------------------------
 1 files changed, 28 insertions(+), 29 deletions(-)

diff --git a/fast-import.c b/fast-import.c
index b477dc6..c29737e 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -164,12 +164,11 @@ Format of STDIN stream:
 
 struct object_entry
 {
+	struct pack_idx_entry idx;
 	struct object_entry *next;
-	uint32_t offset;
 	uint32_t type : TYPE_BITS,
 		pack_id : PACK_ID_BITS,
 		depth : DEPTH_BITS;
-	unsigned char sha1[20];
 };
 
 struct object_entry_pool
@@ -521,7 +520,7 @@ static struct object_entry *new_object(unsigned char *sha1)
 		alloc_objects(object_entry_alloc);
 
 	e = blocks->next_free++;
-	hashcpy(e->sha1, sha1);
+	hashcpy(e->idx.sha1, sha1);
 	return e;
 }
 
@@ -530,7 +529,7 @@ static struct object_entry *find_object(unsigned char *sha1)
 	unsigned int h = sha1[0] << 8 | sha1[1];
 	struct object_entry *e;
 	for (e = object_table[h]; e; e = e->next)
-		if (!hashcmp(sha1, e->sha1))
+		if (!hashcmp(sha1, e->idx.sha1))
 			return e;
 	return NULL;
 }
@@ -542,7 +541,7 @@ static struct object_entry *insert_object(unsigned char *sha1)
 	struct object_entry *p = NULL;
 
 	while (e) {
-		if (!hashcmp(sha1, e->sha1))
+		if (!hashcmp(sha1, e->idx.sha1))
 			return e;
 		p = e;
 		e = e->next;
@@ -550,7 +549,7 @@ static struct object_entry *insert_object(unsigned char *sha1)
 
 	e = new_object(sha1);
 	e->next = NULL;
-	e->offset = 0;
+	e->idx.offset = 0;
 	if (p)
 		p->next = e;
 	else
@@ -857,7 +856,7 @@ static int oecmp (const void *a_, const void *b_)
 {
 	struct object_entry *a = *((struct object_entry**)a_);
 	struct object_entry *b = *((struct object_entry**)b_);
-	return hashcmp(a->sha1, b->sha1);
+	return hashcmp(a->idx.sha1, b->idx.sha1);
 }
 
 static char *create_index(void)
@@ -887,7 +886,7 @@ static char *create_index(void)
 	for (i = 0; i < 256; i++) {
 		struct object_entry **next = c;
 		while (next < last) {
-			if ((*next)->sha1[0] != i)
+			if ((*next)->idx.sha1[0] != i)
 				break;
 			next++;
 		}
@@ -901,10 +900,10 @@ static char *create_index(void)
 	sha1write(f, array, 256 * sizeof(int));
 	git_SHA1_Init(&ctx);
 	for (c = idx; c != last; c++) {
-		uint32_t offset = htonl((*c)->offset);
+		uint32_t offset = htonl((*c)->idx.offset);
 		sha1write(f, &offset, 4);
-		sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
-		git_SHA1_Update(&ctx, (*c)->sha1, 20);
+		sha1write(f, (*c)->idx.sha1, sizeof((*c)->idx.sha1));
+		git_SHA1_Update(&ctx, (*c)->idx.sha1, 20);
 	}
 	sha1write(f, pack_data->sha1, sizeof(pack_data->sha1));
 	sha1close(f, NULL, CSUM_FSYNC);
@@ -1063,13 +1062,13 @@ static int store_object(
 	e = insert_object(sha1);
 	if (mark)
 		insert_mark(mark, e);
-	if (e->offset) {
+	if (e->idx.offset) {
 		duplicate_count_by_type[type]++;
 		return 1;
 	} else if (find_sha1_pack(sha1, packed_git)) {
 		e->type = type;
 		e->pack_id = MAX_PACK_ID;
-		e->offset = 1; /* just not zero! */
+		e->idx.offset = 1; /* just not zero! */
 		duplicate_count_by_type[type]++;
 		return 1;
 	}
@@ -1127,12 +1126,12 @@ static int store_object(
 
 	e->type = type;
 	e->pack_id = pack_id;
-	e->offset = pack_size;
+	e->idx.offset = pack_size;
 	object_count++;
 	object_count_by_type[type]++;
 
 	if (delta) {
-		unsigned long ofs = e->offset - last->offset;
+		unsigned long ofs = e->idx.offset - last->offset;
 		unsigned pos = sizeof(hdr) - 1;
 
 		delta_count_by_type[type]++;
@@ -1165,7 +1164,7 @@ static int store_object(
 		} else {
 			strbuf_swap(&last->data, dat);
 		}
-		last->offset = e->offset;
+		last->offset = e->idx.offset;
 		last->depth = e->depth;
 	}
 	return 0;
@@ -1259,14 +1258,14 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
 	if (mark)
 		insert_mark(mark, e);
 
-	if (e->offset) {
+	if (e->idx.offset) {
 		duplicate_count_by_type[OBJ_BLOB]++;
 		truncate_pack(offset);
 
 	} else if (find_sha1_pack(sha1, packed_git)) {
 		e->type = OBJ_BLOB;
 		e->pack_id = MAX_PACK_ID;
-		e->offset = 1; /* just not zero! */
+		e->idx.offset = 1; /* just not zero! */
 		duplicate_count_by_type[OBJ_BLOB]++;
 		truncate_pack(offset);
 
@@ -1274,7 +1273,7 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
 		e->depth = 0;
 		e->type = OBJ_BLOB;
 		e->pack_id = pack_id;
-		e->offset = offset;
+		e->idx.offset = offset;
 		object_count++;
 		object_count_by_type[OBJ_BLOB]++;
 	}
@@ -1326,7 +1325,7 @@ static void *gfi_unpack_entry(
 		 */
 		p->pack_size = pack_size + 20;
 	}
-	return unpack_entry(p, oe->offset, &type, sizep);
+	return unpack_entry(p, oe->idx.offset, &type, sizep);
 }
 
 static const char *get_mode(const char *str, uint16_t *modep)
@@ -1457,7 +1456,7 @@ static void store_tree(struct tree_entry *root)
 	if (S_ISDIR(root->versions[0].mode) && le && le->pack_id == pack_id) {
 		mktree(t, 0, &old_tree);
 		lo.data = old_tree;
-		lo.offset = le->offset;
+		lo.offset = le->idx.offset;
 		lo.depth = t->delta_depth;
 	}
 
@@ -1715,7 +1714,7 @@ static void dump_marks_helper(FILE *f,
 		for (k = 0; k < 1024; k++) {
 			if (m->data.marked[k])
 				fprintf(f, ":%" PRIuMAX " %s\n", base + k,
-					sha1_to_hex(m->data.marked[k]->sha1));
+					sha1_to_hex(m->data.marked[k]->idx.sha1));
 		}
 	}
 }
@@ -1798,7 +1797,7 @@ static void read_marks(void)
 			e = insert_object(sha1);
 			e->type = type;
 			e->pack_id = MAX_PACK_ID;
-			e->offset = 1; /* just not zero! */
+			e->idx.offset = 1; /* just not zero! */
 		}
 		insert_mark(mark, e);
 	}
@@ -2183,7 +2182,7 @@ static void file_change_m(struct branch *b)
 	if (*p == ':') {
 		char *x;
 		oe = find_mark(strtoumax(p + 1, &x, 10));
-		hashcpy(sha1, oe->sha1);
+		hashcpy(sha1, oe->idx.sha1);
 		p = x;
 	} else if (!prefixcmp(p, "inline")) {
 		inline_data = 1;
@@ -2316,7 +2315,7 @@ static void note_change_n(struct branch *b, unsigned char old_fanout)
 	if (*p == ':') {
 		char *x;
 		oe = find_mark(strtoumax(p + 1, &x, 10));
-		hashcpy(sha1, oe->sha1);
+		hashcpy(sha1, oe->idx.sha1);
 		p = x;
 	} else if (!prefixcmp(p, "inline")) {
 		inline_data = 1;
@@ -2339,7 +2338,7 @@ static void note_change_n(struct branch *b, unsigned char old_fanout)
 		struct object_entry *commit_oe = find_mark(commit_mark);
 		if (commit_oe->type != OBJ_COMMIT)
 			die("Mark :%" PRIuMAX " not a commit", commit_mark);
-		hashcpy(commit_sha1, commit_oe->sha1);
+		hashcpy(commit_sha1, commit_oe->idx.sha1);
 	} else if (!get_sha1(p, commit_sha1)) {
 		unsigned long size;
 		char *buf = read_object_with_reference(commit_sha1,
@@ -2446,7 +2445,7 @@ static int parse_from(struct branch *b)
 		struct object_entry *oe = find_mark(idnum);
 		if (oe->type != OBJ_COMMIT)
 			die("Mark :%" PRIuMAX " not a commit", idnum);
-		hashcpy(b->sha1, oe->sha1);
+		hashcpy(b->sha1, oe->idx.sha1);
 		if (oe->pack_id != MAX_PACK_ID) {
 			unsigned long size;
 			char *buf = gfi_unpack_entry(oe, &size);
@@ -2481,7 +2480,7 @@ static struct hash_list *parse_merge(unsigned int *count)
 			struct object_entry *oe = find_mark(idnum);
 			if (oe->type != OBJ_COMMIT)
 				die("Mark :%" PRIuMAX " not a commit", idnum);
-			hashcpy(n->sha1, oe->sha1);
+			hashcpy(n->sha1, oe->idx.sha1);
 		} else if (!get_sha1(from, n->sha1)) {
 			unsigned long size;
 			char *buf = read_object_with_reference(n->sha1,
@@ -2639,7 +2638,7 @@ static void parse_new_tag(void)
 		from_mark = strtoumax(from + 1, NULL, 10);
 		oe = find_mark(from_mark);
 		type = oe->type;
-		hashcpy(sha1, oe->sha1);
+		hashcpy(sha1, oe->idx.sha1);
 	} else if (!get_sha1(from, sha1)) {
 		unsigned long size;
 		char *buf;
-- 
1.7.0.23.gf5ef4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/6] fast-import: use sha1write() for pack data
  2010-02-17 19:05 [PATCH 0/6] fast-import updates Nicolas Pitre
  2010-02-17 19:05 ` [PATCH 1/6] fast-import: start using struct pack_idx_entry Nicolas Pitre
@ 2010-02-17 19:05 ` Nicolas Pitre
  2010-02-17 19:05 ` [PATCH 3/6] fast-import: use write_idx_file() instead of custom code Nicolas Pitre
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: Nicolas Pitre @ 2010-02-17 19:05 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Shawn O. Pearce

This is in preparation for using write_idx_file().  Also, by using
sha1write() we get some buffering to reduces the number of write
syscalls, and the written data is SHA1 summed which allows for the extra
data integrity validation check performed in fixup_pack_header_footer()
(details on this in commit abeb40e5aa).

Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
---
 fast-import.c |   35 +++++++++++++++++++++++++----------
 1 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/fast-import.c b/fast-import.c
index c29737e..7d737ba 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -312,6 +312,7 @@ static struct atom_str **atom_table;
 
 /* The .pack file being generated */
 static unsigned int pack_id;
+static struct sha1file *pack_file;
 static struct packed_git *pack_data;
 static struct packed_git **all_packs;
 static unsigned long pack_size;
@@ -838,11 +839,12 @@ static void start_packfile(void)
 	p = xcalloc(1, sizeof(*p) + strlen(tmpfile) + 2);
 	strcpy(p->pack_name, tmpfile);
 	p->pack_fd = pack_fd;
+	pack_file = sha1fd(pack_fd, p->pack_name);
 
 	hdr.hdr_signature = htonl(PACK_SIGNATURE);
 	hdr.hdr_version = htonl(2);
 	hdr.hdr_entries = 0;
-	write_or_die(p->pack_fd, &hdr, sizeof(hdr));
+	sha1write(pack_file, &hdr, sizeof(hdr));
 
 	pack_data = p;
 	pack_size = sizeof(hdr);
@@ -956,15 +958,17 @@ static void end_packfile(void)
 
 	clear_delta_base_cache();
 	if (object_count) {
+		unsigned char cur_pack_sha1[20];
 		char *idx_name;
 		int i;
 		struct branch *b;
 		struct tag *t;
 
 		close_pack_windows(pack_data);
+		sha1close(pack_file, cur_pack_sha1, 0);
 		fixup_pack_header_footer(pack_data->pack_fd, pack_data->sha1,
 				    pack_data->pack_name, object_count,
-				    NULL, 0);
+				    cur_pack_sha1, pack_size);
 		close(pack_data->pack_fd);
 		idx_name = keep_pack(create_index());
 
@@ -1138,22 +1142,22 @@ static int store_object(
 		e->depth = last->depth + 1;
 
 		hdrlen = encode_header(OBJ_OFS_DELTA, deltalen, hdr);
-		write_or_die(pack_data->pack_fd, hdr, hdrlen);
+		sha1write(pack_file, hdr, hdrlen);
 		pack_size += hdrlen;
 
 		hdr[pos] = ofs & 127;
 		while (ofs >>= 7)
 			hdr[--pos] = 128 | (--ofs & 127);
-		write_or_die(pack_data->pack_fd, hdr + pos, sizeof(hdr) - pos);
+		sha1write(pack_file, hdr + pos, sizeof(hdr) - pos);
 		pack_size += sizeof(hdr) - pos;
 	} else {
 		e->depth = 0;
 		hdrlen = encode_header(type, dat->len, hdr);
-		write_or_die(pack_data->pack_fd, hdr, hdrlen);
+		sha1write(pack_file, hdr, hdrlen);
 		pack_size += hdrlen;
 	}
 
-	write_or_die(pack_data->pack_fd, out, s.total_out);
+	sha1write(pack_file, out, s.total_out);
 	pack_size += s.total_out;
 
 	free(out);
@@ -1170,12 +1174,17 @@ static int store_object(
 	return 0;
 }
 
-static void truncate_pack(off_t to)
+static void truncate_pack(off_t to, git_SHA_CTX *ctx)
 {
 	if (ftruncate(pack_data->pack_fd, to)
 	 || lseek(pack_data->pack_fd, to, SEEK_SET) != to)
 		die_errno("cannot truncate pack to skip duplicate");
 	pack_size = to;
+
+	/* yes this is a layering violation */
+	pack_file->total = to;
+	pack_file->offset = 0;
+	pack_file->ctx = *ctx;
 }
 
 static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
@@ -1188,6 +1197,7 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
 	unsigned long hdrlen;
 	off_t offset;
 	git_SHA_CTX c;
+	git_SHA_CTX pack_file_ctx;
 	z_stream s;
 	int status = Z_OK;
 
@@ -1198,6 +1208,10 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
 
 	offset = pack_size;
 
+	/* preserve the pack_file SHA1 ctx in case we have to truncate later */
+	sha1flush(pack_file);
+	pack_file_ctx = pack_file->ctx;
+
 	hdrlen = snprintf((char *)out_buf, out_sz, "blob %" PRIuMAX, len) + 1;
 	if (out_sz <= hdrlen)
 		die("impossibly large object header");
@@ -1232,7 +1246,7 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
 
 		if (!s.avail_out || status == Z_STREAM_END) {
 			size_t n = s.next_out - out_buf;
-			write_or_die(pack_data->pack_fd, out_buf, n);
+			sha1write(pack_file, out_buf, n);
 			pack_size += n;
 			s.next_out = out_buf;
 			s.avail_out = out_sz;
@@ -1260,14 +1274,14 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
 
 	if (e->idx.offset) {
 		duplicate_count_by_type[OBJ_BLOB]++;
-		truncate_pack(offset);
+		truncate_pack(offset, &pack_file_ctx);
 
 	} else if (find_sha1_pack(sha1, packed_git)) {
 		e->type = OBJ_BLOB;
 		e->pack_id = MAX_PACK_ID;
 		e->idx.offset = 1; /* just not zero! */
 		duplicate_count_by_type[OBJ_BLOB]++;
-		truncate_pack(offset);
+		truncate_pack(offset, &pack_file_ctx);
 
 	} else {
 		e->depth = 0;
@@ -1316,6 +1330,7 @@ static void *gfi_unpack_entry(
 		 * the newly written data.
 		 */
 		close_pack_windows(p);
+		sha1flush(pack_file);
 
 		/* We have to offer 20 bytes additional on the end of
 		 * the packfile as the core unpacker code assumes the
-- 
1.7.0.23.gf5ef4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/6] fast-import: use write_idx_file() instead of custom code
  2010-02-17 19:05 [PATCH 0/6] fast-import updates Nicolas Pitre
  2010-02-17 19:05 ` [PATCH 1/6] fast-import: start using struct pack_idx_entry Nicolas Pitre
  2010-02-17 19:05 ` [PATCH 2/6] fast-import: use sha1write() for pack data Nicolas Pitre
@ 2010-02-17 19:05 ` Nicolas Pitre
  2010-02-17 19:05 ` [PATCH 4/6] fast-import: make default pack size unlimited Nicolas Pitre
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: Nicolas Pitre @ 2010-02-17 19:05 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Shawn O. Pearce

This allows for the creation of pack index version 2 with its object
CRC and the possibility for a pack to be larger than 4 GB.

Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
---
 fast-import.c |   63 +++++++++++++++-----------------------------------------
 1 files changed, 17 insertions(+), 46 deletions(-)

diff --git a/fast-import.c b/fast-import.c
index 7d737ba..9d7ab09 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -854,67 +854,30 @@ static void start_packfile(void)
 	all_packs[pack_id] = p;
 }
 
-static int oecmp (const void *a_, const void *b_)
+static const char *create_index(void)
 {
-	struct object_entry *a = *((struct object_entry**)a_);
-	struct object_entry *b = *((struct object_entry**)b_);
-	return hashcmp(a->idx.sha1, b->idx.sha1);
-}
-
-static char *create_index(void)
-{
-	static char tmpfile[PATH_MAX];
-	git_SHA_CTX ctx;
-	struct sha1file *f;
-	struct object_entry **idx, **c, **last, *e;
+	const char *tmpfile;
+	struct pack_idx_entry **idx, **c, **last;
+	struct object_entry *e;
 	struct object_entry_pool *o;
-	uint32_t array[256];
-	int i, idx_fd;
 
-	/* Build the sorted table of object IDs. */
-	idx = xmalloc(object_count * sizeof(struct object_entry*));
+	/* Build the table of object IDs. */
+	idx = xmalloc(object_count * sizeof(*idx));
 	c = idx;
 	for (o = blocks; o; o = o->next_pool)
 		for (e = o->next_free; e-- != o->entries;)
 			if (pack_id == e->pack_id)
-				*c++ = e;
+				*c++ = &e->idx;
 	last = idx + object_count;
 	if (c != last)
 		die("internal consistency error creating the index");
-	qsort(idx, object_count, sizeof(struct object_entry*), oecmp);
 
-	/* Generate the fan-out array. */
-	c = idx;
-	for (i = 0; i < 256; i++) {
-		struct object_entry **next = c;
-		while (next < last) {
-			if ((*next)->idx.sha1[0] != i)
-				break;
-			next++;
-		}
-		array[i] = htonl(next - idx);
-		c = next;
-	}
-
-	idx_fd = odb_mkstemp(tmpfile, sizeof(tmpfile),
-			     "pack/tmp_idx_XXXXXX");
-	f = sha1fd(idx_fd, tmpfile);
-	sha1write(f, array, 256 * sizeof(int));
-	git_SHA1_Init(&ctx);
-	for (c = idx; c != last; c++) {
-		uint32_t offset = htonl((*c)->idx.offset);
-		sha1write(f, &offset, 4);
-		sha1write(f, (*c)->idx.sha1, sizeof((*c)->idx.sha1));
-		git_SHA1_Update(&ctx, (*c)->idx.sha1, 20);
-	}
-	sha1write(f, pack_data->sha1, sizeof(pack_data->sha1));
-	sha1close(f, NULL, CSUM_FSYNC);
+	tmpfile = write_idx_file(NULL, idx, object_count, pack_data->sha1);
 	free(idx);
-	git_SHA1_Final(pack_data->sha1, &ctx);
 	return tmpfile;
 }
 
-static char *keep_pack(char *curr_index_name)
+static char *keep_pack(const char *curr_index_name)
 {
 	static char name[PATH_MAX];
 	static const char *keep_msg = "fast-import";
@@ -936,6 +899,7 @@ static char *keep_pack(char *curr_index_name)
 		 get_object_directory(), sha1_to_hex(pack_data->sha1));
 	if (move_temp_to_file(curr_index_name, name))
 		die("cannot store index file");
+	free((void *)curr_index_name);
 	return name;
 }
 
@@ -1134,6 +1098,8 @@ static int store_object(
 	object_count++;
 	object_count_by_type[type]++;
 
+	crc32_begin(pack_file);
+
 	if (delta) {
 		unsigned long ofs = e->idx.offset - last->offset;
 		unsigned pos = sizeof(hdr) - 1;
@@ -1160,6 +1126,8 @@ static int store_object(
 	sha1write(pack_file, out, s.total_out);
 	pack_size += s.total_out;
 
+	e->idx.crc32 = crc32_end(pack_file);
+
 	free(out);
 	free(delta);
 	if (last) {
@@ -1219,6 +1187,8 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
 	git_SHA1_Init(&c);
 	git_SHA1_Update(&c, out_buf, hdrlen);
 
+	crc32_begin(pack_file);
+
 	memset(&s, 0, sizeof(s));
 	deflateInit(&s, pack_compression_level);
 
@@ -1288,6 +1258,7 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
 		e->type = OBJ_BLOB;
 		e->pack_id = pack_id;
 		e->idx.offset = offset;
+		e->idx.crc32 = crc32_end(pack_file);
 		object_count++;
 		object_count_by_type[OBJ_BLOB]++;
 	}
-- 
1.7.0.23.gf5ef4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 4/6] fast-import: make default pack size unlimited
  2010-02-17 19:05 [PATCH 0/6] fast-import updates Nicolas Pitre
                   ` (2 preceding siblings ...)
  2010-02-17 19:05 ` [PATCH 3/6] fast-import: use write_idx_file() instead of custom code Nicolas Pitre
@ 2010-02-17 19:05 ` Nicolas Pitre
  2010-02-17 19:05 ` [PATCH 5/6] fast-import: honor pack.indexversion and pack.packsizelimit config vars Nicolas Pitre
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: Nicolas Pitre @ 2010-02-17 19:05 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Shawn O. Pearce

Now that fast-import is creating packs with index version 2, there is
no point limiting the pack size by default.  A pack split will still
happen if off_t is not sufficiently large to hold large offsets.

While updating the doc, let's remove the "packfiles fit on CDs"
suggestion.  Pack files created by fast-import are still suboptimal and
a 'git repack -a -f -d' or even 'git gc --aggressive' would be a pretty
good idea before considering storage on CDs.

Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
---
 Documentation/git-fast-import.txt |    5 +----
 fast-import.c                     |   12 ++++++------
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt
index 6764ff1..19082b0 100644
--- a/Documentation/git-fast-import.txt
+++ b/Documentation/git-fast-import.txt
@@ -45,10 +45,7 @@ OPTIONS
 
 --max-pack-size=<n>::
 	Maximum size of each output packfile.
-	The default is 4 GiB as that is the maximum allowed
-	packfile size (due to file format limitations). Some
-	importers may wish to lower this, such as to ensure the
-	resulting packfiles fit on CDs.
+	The default is unlimited.
 
 --big-file-threshold=<n>::
 	Maximum size of a blob that fast-import will attempt to
diff --git a/fast-import.c b/fast-import.c
index 9d7ab09..d2f45b1 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -191,7 +191,7 @@ struct mark_set
 struct last_object
 {
 	struct strbuf data;
-	uint32_t offset;
+	off_t offset;
 	unsigned int depth;
 	unsigned no_swap : 1;
 };
@@ -279,7 +279,7 @@ struct recent_command
 
 /* Configured limits on output */
 static unsigned long max_depth = 10;
-static off_t max_packsize = (1LL << 32) - 1;
+static off_t max_packsize;
 static uintmax_t big_file_threshold = 512 * 1024 * 1024;
 static int force_update;
 static int pack_compression_level = Z_DEFAULT_COMPRESSION;
@@ -315,7 +315,7 @@ static unsigned int pack_id;
 static struct sha1file *pack_file;
 static struct packed_git *pack_data;
 static struct packed_git **all_packs;
-static unsigned long pack_size;
+static off_t pack_size;
 
 /* Table of objects we've written. */
 static unsigned int object_entry_alloc = 5000;
@@ -1068,7 +1068,7 @@ static int store_object(
 	deflateEnd(&s);
 
 	/* Determine if we should auto-checkpoint. */
-	if ((pack_size + 60 + s.total_out) > max_packsize
+	if ((max_packsize && (pack_size + 60 + s.total_out) > max_packsize)
 		|| (pack_size + 60 + s.total_out) < pack_size) {
 
 		/* This new object needs to *not* have the current pack_id. */
@@ -1101,7 +1101,7 @@ static int store_object(
 	crc32_begin(pack_file);
 
 	if (delta) {
-		unsigned long ofs = e->idx.offset - last->offset;
+		off_t ofs = e->idx.offset - last->offset;
 		unsigned pos = sizeof(hdr) - 1;
 
 		delta_count_by_type[type]++;
@@ -1170,7 +1170,7 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
 	int status = Z_OK;
 
 	/* Determine if we should auto-checkpoint. */
-	if ((pack_size + 60 + len) > max_packsize
+	if ((max_packsize && (pack_size + 60 + len) > max_packsize)
 		|| (pack_size + 60 + len) < pack_size)
 		cycle_packfile();
 
-- 
1.7.0.23.gf5ef4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 5/6] fast-import: honor pack.indexversion and pack.packsizelimit config vars
  2010-02-17 19:05 [PATCH 0/6] fast-import updates Nicolas Pitre
                   ` (3 preceding siblings ...)
  2010-02-17 19:05 ` [PATCH 4/6] fast-import: make default pack size unlimited Nicolas Pitre
@ 2010-02-17 19:05 ` Nicolas Pitre
  2010-02-17 19:05 ` [PATCH 6/6] fast-import: use the diff_delta() max_delta_size argument Nicolas Pitre
  2010-02-17 23:23 ` [PATCH 0/6] fast-import updates Shawn O. Pearce
  6 siblings, 0 replies; 9+ messages in thread
From: Nicolas Pitre @ 2010-02-17 19:05 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Shawn O. Pearce

Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
---
 fast-import.c |   11 +++++++++++
 1 files changed, 11 insertions(+), 0 deletions(-)

diff --git a/fast-import.c b/fast-import.c
index d2f45b1..7fc9862 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -2876,6 +2876,17 @@ static int git_pack_config(const char *k, const char *v, void *cb)
 		pack_compression_seen = 1;
 		return 0;
 	}
+	if (!strcmp(k, "pack.indexversion")) {
+		pack_idx_default_version = git_config_int(k, v);
+		if (pack_idx_default_version > 2)
+			die("bad pack.indexversion=%"PRIu32,
+			    pack_idx_default_version);
+		return 0;
+	}
+	if (!strcmp(k, "pack.packsizelimit")) {
+		max_packsize = git_config_ulong(k, v);
+		return 0;
+	}
 	if (!strcmp(k, "core.bigfilethreshold")) {
 		long n = git_config_int(k, v);
 		big_file_threshold = 0 < n ? n : 0;
-- 
1.7.0.23.gf5ef4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 6/6] fast-import: use the diff_delta() max_delta_size argument
  2010-02-17 19:05 [PATCH 0/6] fast-import updates Nicolas Pitre
                   ` (4 preceding siblings ...)
  2010-02-17 19:05 ` [PATCH 5/6] fast-import: honor pack.indexversion and pack.packsizelimit config vars Nicolas Pitre
@ 2010-02-17 19:05 ` Nicolas Pitre
  2010-02-17 23:23 ` [PATCH 0/6] fast-import updates Shawn O. Pearce
  6 siblings, 0 replies; 9+ messages in thread
From: Nicolas Pitre @ 2010-02-17 19:05 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Shawn O. Pearce

This let diff_delta() abort early if it is going to bust the given
size limit.  Also, only objects larger than 20 bytes are considered
as objects smaller than that are most certainly going to produce
larger deltas than the original object due to the additional headers.

Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
---
 fast-import.c |    8 ++------
 1 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/fast-import.c b/fast-import.c
index 7fc9862..74f08bd 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -1041,14 +1041,10 @@ static int store_object(
 		return 1;
 	}
 
-	if (last && last->data.buf && last->depth < max_depth) {
+	if (last && last->data.buf && last->depth < max_depth && dat->len > 20) {
 		delta = diff_delta(last->data.buf, last->data.len,
 			dat->buf, dat->len,
-			&deltalen, 0);
-		if (delta && deltalen >= dat->len) {
-			free(delta);
-			delta = NULL;
-		}
+			&deltalen, dat->len - 20);
 	} else
 		delta = NULL;
 
-- 
1.7.0.23.gf5ef4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 0/6] fast-import updates
  2010-02-17 19:05 [PATCH 0/6] fast-import updates Nicolas Pitre
                   ` (5 preceding siblings ...)
  2010-02-17 19:05 ` [PATCH 6/6] fast-import: use the diff_delta() max_delta_size argument Nicolas Pitre
@ 2010-02-17 23:23 ` Shawn O. Pearce
  2010-02-17 23:29   ` Junio C Hamano
  6 siblings, 1 reply; 9+ messages in thread
From: Shawn O. Pearce @ 2010-02-17 23:23 UTC (permalink / raw)
  To: Nicolas Pitre; +Cc: Junio C Hamano, git

Nicolas Pitre <nico@fluxnic.net> wrote:
> Will follow a couple fast-import updates, with the most significant
> change being the ability for fast-import to produce pack index v2 by
> default.  Overall this should make fast-import produced data more imune
> to silent corruptions, and also lift the limit on the maximum pack size
> it could produce.
> 
> [PATCH 1/6] fast-import: start using struct pack_idx_entry
> [PATCH 2/6] fast-import: use sha1write() for pack data
> [PATCH 3/6] fast-import: use write_idx_file() instead of custom code
> [PATCH 4/6] fast-import: make default pack size unlimited
> [PATCH 5/6] fast-import: honor pack.indexversion and pack.packsizelimit config vars
> [PATCH 6/6] fast-import: use the diff_delta() max_delta_size argument

Acked-by: Shawn O. Pearce <spearce@spearce.org>

Thanks Nico.  I wanted to do this myself, but couldn't find the time
since it was recently brought up that we still didn't use index v2
in fast-import.

-- 
Shawn.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 0/6] fast-import updates
  2010-02-17 23:23 ` [PATCH 0/6] fast-import updates Shawn O. Pearce
@ 2010-02-17 23:29   ` Junio C Hamano
  0 siblings, 0 replies; 9+ messages in thread
From: Junio C Hamano @ 2010-02-17 23:29 UTC (permalink / raw)
  To: Shawn O. Pearce; +Cc: Nicolas Pitre, git

Thanks, both.  Will merge to 'master' shortly.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2010-02-17 23:29 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-02-17 19:05 [PATCH 0/6] fast-import updates Nicolas Pitre
2010-02-17 19:05 ` [PATCH 1/6] fast-import: start using struct pack_idx_entry Nicolas Pitre
2010-02-17 19:05 ` [PATCH 2/6] fast-import: use sha1write() for pack data Nicolas Pitre
2010-02-17 19:05 ` [PATCH 3/6] fast-import: use write_idx_file() instead of custom code Nicolas Pitre
2010-02-17 19:05 ` [PATCH 4/6] fast-import: make default pack size unlimited Nicolas Pitre
2010-02-17 19:05 ` [PATCH 5/6] fast-import: honor pack.indexversion and pack.packsizelimit config vars Nicolas Pitre
2010-02-17 19:05 ` [PATCH 6/6] fast-import: use the diff_delta() max_delta_size argument Nicolas Pitre
2010-02-17 23:23 ` [PATCH 0/6] fast-import updates Shawn O. Pearce
2010-02-17 23:29   ` Junio C Hamano

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.