git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Add '--create-index' to git-unpack-objects
@ 2005-10-12 11:02 Johannes Schindelin
  2005-10-12 13:34 ` Sergey Vlasov
  0 siblings, 1 reply; 12+ messages in thread
From: Johannes Schindelin @ 2005-10-12 11:02 UTC (permalink / raw)
  To: git, junkio

Add the option '--create-index' to git-unpack-objects, which makes it 
create an index file instead of expanding its contents. While at it, 
document the dry-run option '-n', and optionally take a pack file instead 
of stdin.

Signed-off-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>

---

	Is anyone interested in writing the index to stdout? It is easy to
	add this feature to sha1create...

 Documentation/git-unpack-objects.txt |   13 +++-
 t/t5300-pack-object.sh               |    5 ++
 unpack-objects.c                     |  108 +++++++++++++++++++++++++++++++---
 3 files changed, 114 insertions(+), 12 deletions(-)

applies-to: 6dfbbc44a77ad3300caed532814e7feb02ff794b
1bff150c89d2636c54fa45bf62e7b248ba2a7b8d
diff --git a/Documentation/git-unpack-objects.txt b/Documentation/git-unpack-objects.txt
index b716ba1..58120e4 100644
--- a/Documentation/git-unpack-objects.txt
+++ b/Documentation/git-unpack-objects.txt
@@ -8,14 +8,15 @@ git-unpack-objects - Unpack objects from
 
 SYNOPSIS
 --------
-'git-unpack-objects' [-q] <pack-file
+'git-unpack-objects' [-q] [-n] [--create-index index-file] [pack-file]
 
 
 DESCRIPTION
 -----------
-Reads a packed archive (.pack) from the standard input, and
+Reads a packed archive (.pack) from a file or stdin, and
 expands the objects contained in the pack into "one-file
-one-object" format in $GIT_OBJECT_DIRECTORY.
+one-object" format in $GIT_OBJECT_DIRECTORY, or alternatively,
+create an index for it.
 
 OPTIONS
 -------
@@ -23,6 +24,12 @@ OPTIONS
 	The command usually shows percentage progress.  This
 	flag suppresses it.
 
+-n::
+	Perform a dry run, i.e. do not write any files.
+
+--create-index <filename>::
+	Instead of unpacking the files, create an index for this
+	pack.
 
 Author
 ------
diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh
index bb62336..593bfc2 100755
--- a/t/t5300-pack-object.sh
+++ b/t/t5300-pack-object.sh
@@ -79,6 +79,11 @@ test_expect_success \
      git-unpack-objects -n <test-2-${packname_2}.pack &&
      git-unpack-objects <test-2-${packname_2}.pack'
 
+test_expect_success \
+     'create-index from pack' \
+     'git-unpack-objects --create-index index <test-2-${packname_2}.pack &&
+     cmp test-2-${packname_2}.idx index'
+
 unset GIT_OBJECT_DIRECTORY
 cd $TRASH/.git2
 test_expect_success \
diff --git a/unpack-objects.c b/unpack-objects.c
index 8ae1a1c..b6dad74 100644
--- a/unpack-objects.c
+++ b/unpack-objects.c
@@ -2,17 +2,29 @@
 #include "object.h"
 #include "delta.h"
 #include "pack.h"
+#include "csum-file.h"
 
 #include <sys/time.h>
 
-static int dry_run, quiet;
-static const char unpack_usage[] = "git-unpack-objects [-q] < pack-file";
+static int dry_run, quiet, create_index;
+static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [--create-index filename] [pack-file]";
 
 /* We always read in 4kB chunks. */
+static int fd_in = 0;
 static unsigned char buffer[4096];
+static off_t buffer_offset;
 static unsigned long offset, len, eof;
 static SHA_CTX ctx;
 
+/* To recreate an index */
+static const char* index_filename;
+typedef struct {
+	unsigned int offset;
+	unsigned char sha1[20];
+} object_entry;
+static object_entry* objects;
+static unsigned char *current_sha1;
+
 /*
  * Make sure at least "min" bytes are available in the buffer, and
  * return the pointer to the buffer.
@@ -28,10 +40,11 @@ static void * fill(int min)
 	if (offset) {
 		SHA1_Update(&ctx, buffer, offset);
 		memcpy(buffer, buffer + offset, len);
+		buffer_offset += offset;
 		offset = 0;
 	}
 	do {
-		int ret = read(0, buffer + len, sizeof(buffer) - len);
+		int ret = read(fd_in, buffer + len, sizeof(buffer) - len);
 		if (ret <= 0) {
 			if (!ret)
 				die("early EOF");
@@ -83,6 +96,7 @@ struct delta_info {
 	unsigned char base_sha1[20];
 	unsigned long size;
 	void *delta;
+	unsigned char* sha1;
 	struct delta_info *next;
 };
 
@@ -95,6 +109,7 @@ static void add_delta_to_list(unsigned c
 	memcpy(info->base_sha1, base_sha1, 20);
 	info->size = size;
 	info->delta = delta;
+	info->sha1 = current_sha1;
 	info->next = delta_list;
 	delta_list = info;
 }
@@ -104,7 +119,15 @@ static void added_object(unsigned char *
 static void write_object(void *buf, unsigned long size, const char *type)
 {
 	unsigned char sha1[20];
-	if (write_sha1_file(buf, size, type, sha1) < 0)
+	if (create_index) {
+		char header[100];
+		SHA_CTX c;
+
+		SHA1_Init(&c);
+		SHA1_Update(&c, header, 1+sprintf(header, "%s %lu", type, size));
+		SHA1_Update(&c, buf, size);
+		SHA1_Final(current_sha1, &c);
+	} else if (write_sha1_file(buf, size, type, sha1) < 0)
 		die("failed to write object");
 	added_object(sha1, type, buf, size);
 }
@@ -136,6 +159,7 @@ static void added_object(unsigned char *
 		if (!memcmp(info->base_sha1, sha1, 20)) {
 			*p = info->next;
 			p = &delta_list;
+			current_sha1 = info->sha1;
 			resolve_delta(type, data, size, info->delta, info->size);
 			free(info);
 			continue;
@@ -156,8 +180,10 @@ static int unpack_non_delta_entry(enum o
 	case OBJ_TAG:    type = "tag"; break;
 	default: die("bad type %d", kind);
 	}
+
 	if (!dry_run)
 		write_object(buf, size, type);
+
 	free(buf);
 	return 0;
 }
@@ -174,7 +200,7 @@ static int unpack_delta_entry(unsigned l
 	use(20);
 
 	delta_data = get_data(delta_size);
-	if (dry_run) {
+	if (dry_run && !create_index) {
 		free(delta_data);
 		return 0;
 	}
@@ -239,6 +265,40 @@ static void unpack_one(unsigned nr, unsi
 	}
 }
 
+int compare_object_entries(const void* a, const void* b)
+{
+	const object_entry *first = a;
+	const object_entry *second = b;
+
+	return memcmp(first->sha1, second->sha1, 20);
+}
+
+void show_index(unsigned int nr_objects, char* pack_sha1)
+{
+	unsigned int n, i;
+	unsigned int top_index[256];
+	struct sha1file* index_file;
+	
+	/* sort by sha1 */
+	qsort(objects, nr_objects, sizeof(object_entry), compare_object_entries);
+
+	for (n = i = 0; i < 256; i++) {
+		while (n < nr_objects && objects[n].sha1[0] == i)
+			n++;
+		top_index[i] = htonl(n);
+	}
+	top_index[255] = nr_objects;
+
+	/* write to file */
+	index_file = sha1create(index_filename);
+	sha1write(index_file, top_index, sizeof(top_index));
+	sha1write(index_file, objects, sizeof(object_entry)*nr_objects);
+	sha1write(index_file, pack_sha1, 20);
+	sha1close(index_file, NULL, 1);
+
+	free(objects);
+}
+
 /*
  * We unpack from the end, older files first. Now, usually
  * there are deltas etc, so we'll not actually write the
@@ -251,17 +311,29 @@ static void unpack_all(void)
 	unsigned version = ntohl(hdr->hdr_version);
 	unsigned nr_objects = ntohl(hdr->hdr_entries);
 
+	if (create_index)
+		objects = xmalloc(sizeof(object_entry)*nr_objects);
+
 	if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE)
 		die("bad pack file");
 	if (version != PACK_VERSION)
 		die("unable to handle pack file version %d", version);
-	fprintf(stderr, "Unpacking %d objects\n", nr_objects);
+	if (!quiet)
+		fprintf(stderr, "Unpacking %d objects\n", nr_objects);
 
 	use(sizeof(struct pack_header));
-	for (i = 0; i < nr_objects; i++)
+	for (i = 0; i < nr_objects; i++) {
+		if (create_index) {
+			objects[i].offset = htonl(buffer_offset+offset);
+			current_sha1 = (unsigned char*)&objects[i].sha1;
+		}
 		unpack_one(i+1, nr_objects);
+	}
 	if (delta_list)
 		die("unresolved deltas left after unpacking");
+
+	if (create_index)
+		show_index(nr_objects, fill(20));
 }
 
 int main(int argc, char **argv)
@@ -281,11 +353,22 @@ int main(int argc, char **argv)
 				quiet = 1;
 				continue;
 			}
+			if (!strcmp(arg, "--create-index")) {
+				create_index = 1;
+				if (i >= argc-1)
+					usage(unpack_usage);
+				index_filename = argv[++i];
+				continue;
+			}
 			usage(unpack_usage);
 		}
 
-		/* We don't take any non-flag arguments now.. Maybe some day */
-		usage(unpack_usage);
+		if (i != argc-1)
+			usage(unpack_usage);
+
+		fd_in = open(argv[i], O_RDONLY);
+		if (fd_in < 0)
+			die("Could not open %s: %s\n", argv[i], strerror(errno));
 	}
 	SHA1_Init(&ctx);
 	unpack_all();
@@ -295,6 +378,9 @@ int main(int argc, char **argv)
 		die("final sha1 did not match");
 	use(20);
 
+	if (create_index)
+		return 0;
+
 	/* Write the last part of the buffer to stdout */
 	while (len) {
 		int ret = write(1, buffer + offset, len);
@@ -312,5 +398,9 @@ int main(int argc, char **argv)
 	/* All done */
 	if (!quiet)
 		fprintf(stderr, "\n");
+
+	if (fd_in)
+		close(fd_in);
+
 	return 0;
 }
---
0.99.8.GIT

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] Add '--create-index' to git-unpack-objects
  2005-10-12 11:02 [PATCH] Add '--create-index' to git-unpack-objects Johannes Schindelin
@ 2005-10-12 13:34 ` Sergey Vlasov
  2005-10-12 13:54   ` [PATCH] Add git-index-pack utility Sergey Vlasov
  2005-10-12 14:25   ` [PATCH] Add '--create-index' to git-unpack-objects Johannes Schindelin
  0 siblings, 2 replies; 12+ messages in thread
From: Sergey Vlasov @ 2005-10-12 13:34 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: git, junkio

[-- Attachment #1: Type: text/plain, Size: 1472 bytes --]

On Wed, 12 Oct 2005 13:02:36 +0200 (CEST) Johannes Schindelin wrote:

> Add the option '--create-index' to git-unpack-objects, which makes it 
> create an index file instead of expanding its contents. While at it, 
> document the dry-run option '-n', and optionally take a pack file instead 
> of stdin.


> @@ -104,7 +119,15 @@ static void added_object(unsigned char *
>  static void write_object(void *buf, unsigned long size, const char *type)
>  {
>  	unsigned char sha1[20];
> -	if (write_sha1_file(buf, size, type, sha1) < 0)
> +	if (create_index) {
> +		char header[100];
> +		SHA_CTX c;
> +
> +		SHA1_Init(&c);
> +		SHA1_Update(&c, header, 1+sprintf(header, "%s %lu", type, size));
> +		SHA1_Update(&c, buf, size);
> +		SHA1_Final(current_sha1, &c);
> +	} else if (write_sha1_file(buf, size, type, sha1) < 0)
>  		die("failed to write object");

Sorry, but this cannot work.  git-unpack-objects does a streaming
unpack, and it needs to be able to read back the objects it has written
out previously (in case a delta later in the stream references some
older object).  Saving unpacked objects in memory would obviously be
unacceptable.

However, if you need to create a pack index, you obviously have a pack
file with random access ability, and in this case it is possible to
build the index efficiently (in two passes over the pack file) without
storing unpacked objects in the filesystem.  I made a separate utility
to do this; will send a patch in some minutes.

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH] Add git-index-pack utility
  2005-10-12 13:34 ` Sergey Vlasov
@ 2005-10-12 13:54   ` Sergey Vlasov
  2005-10-12 14:33     ` Johannes Schindelin
  2005-10-12 23:57     ` [PATCH] Fix packname hash generation Junio C Hamano
  2005-10-12 14:25   ` [PATCH] Add '--create-index' to git-unpack-objects Johannes Schindelin
  1 sibling, 2 replies; 12+ messages in thread
From: Sergey Vlasov @ 2005-10-12 13:54 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

git-index-pack builds a pack index file for an existing packed
archive.  With this utility a packed archive which was transferred
without the corresponding pack index can be added to objects/pack/
without repacking.

Signed-off-by: Sergey Vlasov <vsu@altlinux.ru>


---

 Documentation/git-index-pack.txt |   44 ++++
 Documentation/git.txt            |    3 
 Makefile                         |    2 
 index-pack.c                     |  451 ++++++++++++++++++++++++++++++++++++++
 t/t5300-pack-object.sh           |   18 ++
 5 files changed, 517 insertions(+), 1 deletions(-)
 create mode 100644 Documentation/git-index-pack.txt
 create mode 100644 index-pack.c

230c0068103bd1ed87788a6464dda2d7de58c2e2
diff --git a/Documentation/git-index-pack.txt b/Documentation/git-index-pack.txt
new file mode 100644
--- /dev/null
+++ b/Documentation/git-index-pack.txt
@@ -0,0 +1,44 @@
+git-index-pack(1)
+=================
+
+NAME
+----
+git-index-pack - Build pack index file for an existing packed archive
+
+
+SYNOPSIS
+--------
+'git-index-pack' [-o <index-file>] <pack-file>
+
+
+DESCRIPTION
+-----------
+Reads a packed archive (.pack) from the specified file, and
+builds a pack index file (.idx) for it.  The packed archive
+together with the pack index can then be placed in the
+objects/pack/ directory of a git repository.
+
+
+OPTIONS
+-------
+-o <index-file>::
+	Write the generated pack index into the specified
+	file.  Without this option the name of pack index
+	file is constructed from the name of packed archive
+	file by replacing .pack with .idx (and the program
+	fails if the name of packed archive does not end
+	with .pack).
+
+
+Author
+------
+Written by Sergey Vlasov <vsu@altlinux.ru>
+
+Documentation
+-------------
+Documentation by Sergey Vlasov
+
+GIT
+---
+Part of the gitlink:git[7] suite
+
diff --git a/Documentation/git.txt b/Documentation/git.txt
--- a/Documentation/git.txt
+++ b/Documentation/git.txt
@@ -68,6 +68,9 @@ gitlink:git-commit-tree[1]::
 gitlink:git-hash-object[1]::
 	Computes the object ID from a file.
 
+gitlink:git-index-pack.html[1]::
+	Build pack index file for an existing packed archive.
+
 gitlink:git-init-db[1]::
 	Creates an empty git object database
 
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -110,7 +110,7 @@ PROGRAMS = \
 	git-convert-objects$X git-diff-files$X \
 	git-diff-index$X git-diff-stages$X \
 	git-diff-tree$X git-fetch-pack$X git-fsck-objects$X \
-	git-hash-object$X git-init-db$X \
+	git-hash-object$X git-index-pack$X git-init-db$X \
 	git-local-fetch$X git-ls-files$X git-ls-tree$X git-merge-base$X \
 	git-merge-index$X git-mktag$X git-pack-objects$X git-patch-id$X \
 	git-peek-remote$X git-prune-packed$X git-read-tree$X \
diff --git a/index-pack.c b/index-pack.c
new file mode 100644
--- /dev/null
+++ b/index-pack.c
@@ -0,0 +1,451 @@
+#include "cache.h"
+#include "delta.h"
+#include "pack.h"
+#include "csum-file.h"
+
+static const char index_pack_usage[] =
+"git-index-pack [-o index-file] pack-file";
+
+struct object_entry
+{
+	unsigned long offset;
+	enum object_type type;
+	enum object_type real_type;
+	unsigned char sha1[20];
+};
+
+struct delta_entry
+{
+	struct object_entry *obj;
+	unsigned char base_sha1[20];
+};
+
+static const char *pack_name;
+static unsigned char *pack_base;
+static unsigned long pack_size;
+static struct object_entry *objects;
+static struct delta_entry *deltas;
+static int nr_objects;
+static int nr_deltas;
+
+static void open_pack_file(void)
+{
+	int fd;
+	struct stat st;
+
+	fd = open(pack_name, O_RDONLY);
+	if (fd < 0)
+		die("cannot open packfile '%s': %s", pack_name,
+		    strerror(errno));
+	if (fstat(fd, &st)) {
+		int err = errno;
+		close(fd);
+		die("cannot fstat packfile '%s': %s", pack_name,
+		    strerror(err));
+	}
+	pack_size = st.st_size;
+	pack_base = mmap(NULL, pack_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	if (pack_base == MAP_FAILED) {
+		int err = errno;
+		close(fd);
+		die("cannot mmap packfile '%s': %s", pack_name,
+		    strerror(err));
+	}
+	close(fd);
+}
+
+static void parse_pack_header(void)
+{
+	const struct pack_header *hdr;
+	unsigned char sha1[20];
+	SHA_CTX ctx;
+
+	/* Ensure there are enough bytes for the header and final SHA1 */
+	if (pack_size < sizeof(struct pack_header) + 20)
+		die("packfile '%s' is too small", pack_name);
+
+	/* Header consistency check */
+	hdr = (void *)pack_base;
+	if (hdr->hdr_signature != htonl(PACK_SIGNATURE))
+		die("packfile '%s' signature mismatch", pack_name);
+	if (hdr->hdr_version != htonl(PACK_VERSION))
+		die("packfile '%s' version %d different from ours %d",
+		    pack_name, ntohl(hdr->hdr_version), PACK_VERSION);
+
+	nr_objects = ntohl(hdr->hdr_entries);
+
+	/* Check packfile integrity */
+	SHA1_Init(&ctx);
+	SHA1_Update(&ctx, pack_base, pack_size - 20);
+	SHA1_Final(sha1, &ctx);
+	if (memcmp(sha1, pack_base + pack_size - 20, 20))
+		die("packfile '%s' SHA1 mismatch", pack_name);
+}
+
+static void bad_object(unsigned long offset, const char *format,
+		       ...) NORETURN __attribute__((format (printf, 2, 3)));
+
+static void bad_object(unsigned long offset, const char *format, ...)
+{
+	va_list params;
+	char buf[1024];
+
+	va_start(params, format);
+	vsnprintf(buf, sizeof(buf), format, params);
+	va_end(params);
+	die("packfile '%s': bad object at offset %lu: %s",
+	    pack_name, offset, buf);
+}
+
+static void *unpack_entry_data(unsigned long offset,
+			       unsigned long *current_pos, unsigned long size)
+{
+	unsigned long pack_limit = pack_size - 20;
+	unsigned long pos = *current_pos;
+	z_stream stream;
+	void *buf = xmalloc(size);
+
+	memset(&stream, 0, sizeof(stream));
+	stream.next_out = buf;
+	stream.avail_out = size;
+	stream.next_in = pack_base + pos;
+	stream.avail_in = pack_limit - pos;
+	inflateInit(&stream);
+
+	for (;;) {
+		int ret = inflate(&stream, 0);
+		if (ret == Z_STREAM_END)
+			break;
+		if (ret != Z_OK)
+			bad_object(offset, "inflate returned %d", ret);
+	}
+	inflateEnd(&stream);
+	if (stream.total_out != size)
+		bad_object(offset, "size mismatch (expected %lu, got %lu)",
+			   size, stream.total_out);
+	*current_pos = pack_limit - stream.avail_in;
+	return buf;
+}
+
+static void *unpack_raw_entry(unsigned long offset,
+			      enum object_type *obj_type,
+			      unsigned long *obj_size,
+			      unsigned char *delta_base,
+			      unsigned long *next_obj_offset)
+{
+	unsigned long pack_limit = pack_size - 20;
+	unsigned long pos = offset;
+	unsigned char c;
+	unsigned long size;
+	unsigned shift;
+	enum object_type type;
+	void *data;
+
+	c = pack_base[pos++];
+	type = (c >> 4) & 7;
+	size = (c & 15);
+	shift = 4;
+	while (c & 0x80) {
+		if (pos >= pack_limit)
+			bad_object(offset, "object extends past end of pack");
+		c = pack_base[pos++];
+		size += (c & 0x7fUL) << shift;
+		shift += 7;
+	}
+
+	switch (type) {
+	case OBJ_DELTA:
+		if (pos + 20 >= pack_limit)
+			bad_object(offset, "object extends past end of pack");
+		memcpy(delta_base, pack_base + pos, 20);
+		pos += 20;
+		/* fallthru */
+	case OBJ_COMMIT:
+	case OBJ_TREE:
+	case OBJ_BLOB:
+	case OBJ_TAG:
+		data = unpack_entry_data(offset, &pos, size);
+		break;
+	default:
+		bad_object(offset, "bad object type %d", type);
+	}
+
+	*obj_type = type;
+	*obj_size = size;
+	*next_obj_offset = pos;
+	return data;
+}
+
+static int find_delta(const unsigned char *base_sha1)
+{
+	int first = 0, last = nr_deltas;
+
+        while (first < last) {
+                int next = (first + last) / 2;
+                struct delta_entry *delta = &deltas[next];
+                int cmp;
+
+                cmp = memcmp(base_sha1, delta->base_sha1, 20);
+                if (!cmp)
+                        return next;
+                if (cmp < 0) {
+                        last = next;
+                        continue;
+                }
+                first = next+1;
+        }
+        return -first-1;
+}
+
+static int find_deltas_based_on_sha1(const unsigned char *base_sha1,
+				     int *first_index, int *last_index)
+{
+	int first = find_delta(base_sha1);
+	int last = first;
+	int end = nr_deltas - 1;
+
+	if (first < 0)
+		return -1;
+	while (first > 0 && !memcmp(deltas[first-1].base_sha1, base_sha1, 20))
+		--first;
+	while (last < end && !memcmp(deltas[last+1].base_sha1, base_sha1, 20))
+		++last;
+	*first_index = first;
+	*last_index = last;
+	return 0;
+}
+
+static void sha1_object(const void *data, unsigned long size,
+			enum object_type type, unsigned char *sha1)
+{
+	SHA_CTX ctx;
+	char header[50];
+	int header_size;
+	const char *type_str;
+
+	switch (type) {
+	case OBJ_COMMIT: type_str = "commit"; break;
+	case OBJ_TREE:   type_str = "tree"; break;
+	case OBJ_BLOB:   type_str = "blob"; break;
+	case OBJ_TAG:    type_str = "tag"; break;
+	default:
+		die("bad type %d", type);
+	}
+
+	header_size = sprintf(header, "%s %lu", type_str, size) + 1;
+
+	SHA1_Init(&ctx);
+	SHA1_Update(&ctx, header, header_size);
+	SHA1_Update(&ctx, data, size);
+	SHA1_Final(sha1, &ctx);
+}
+
+static void resolve_delta(struct delta_entry *delta, void *base_data,
+			  unsigned long base_size, enum object_type type)
+{
+	struct object_entry *obj = delta->obj;
+	void *delta_data;
+	unsigned long delta_size;
+	void *result;
+	unsigned long result_size;
+	enum object_type delta_type;
+	unsigned char base_sha1[20];
+	unsigned long next_obj_offset;
+	int j, first, last;
+
+	obj->real_type = type;
+	delta_data = unpack_raw_entry(obj->offset, &delta_type,
+				      &delta_size, base_sha1,
+				      &next_obj_offset);
+	result = patch_delta(base_data, base_size, delta_data, delta_size,
+			     &result_size);
+	free(delta_data);
+	if (!result)
+		bad_object(obj->offset, "failed to apply delta");
+	sha1_object(result, result_size, type, obj->sha1);
+	if (!find_deltas_based_on_sha1(obj->sha1, &first, &last)) {
+		for (j = first; j <= last; j++)
+			resolve_delta(&deltas[j], result, result_size, type);
+	}
+	free(result);
+}
+
+static int compare_delta_entry(const void *a, const void *b)
+{
+	const struct delta_entry *delta_a = a;
+	const struct delta_entry *delta_b = b;
+	return memcmp(delta_a->base_sha1, delta_b->base_sha1, 20);
+}
+
+static void parse_pack_objects(void)
+{
+	int i;
+	unsigned long offset = sizeof(struct pack_header);
+	unsigned char base_sha1[20];
+	void *data;
+	unsigned long data_size;
+
+	/*
+	 * First pass:
+	 * - find locations of all objects;
+	 * - calculate SHA1 of all non-delta objects;
+	 * - remember base SHA1 for all deltas.
+	 */
+	for (i = 0; i < nr_objects; i++) {
+		struct object_entry *obj = &objects[i];
+		obj->offset = offset;
+		data = unpack_raw_entry(offset, &obj->type, &data_size,
+					base_sha1, &offset);
+		obj->real_type = obj->type;
+		if (obj->type == OBJ_DELTA) {
+			struct delta_entry *delta = &deltas[nr_deltas++];
+			delta->obj = obj;
+			memcpy(delta->base_sha1, base_sha1, 20);
+		} else
+			sha1_object(data, data_size, obj->type, obj->sha1);
+		free(data);
+	}
+	if (offset != pack_size - 20)
+		die("packfile '%s' has junk at the end", pack_name);
+
+	/* Sort deltas by base SHA1 for fast searching */
+	qsort(deltas, nr_deltas, sizeof(struct delta_entry),
+	      compare_delta_entry);
+
+	/*
+	 * Second pass:
+	 * - for all non-delta objects, look if it is used as a base for
+	 *   deltas;
+	 * - if used as a base, uncompress the object and apply all deltas,
+	 *   recursively checking if the resulting object is used as a base
+	 *   for some more deltas.
+	 */
+	for (i = 0; i < nr_objects; i++) {
+		struct object_entry *obj = &objects[i];
+		int j, first, last;
+
+		if (obj->type == OBJ_DELTA)
+			continue;
+		if (find_deltas_based_on_sha1(obj->sha1, &first, &last))
+			continue;
+		data = unpack_raw_entry(obj->offset, &obj->type, &data_size,
+					base_sha1, &offset);
+		for (j = first; j <= last; j++)
+			resolve_delta(&deltas[j], data, data_size, obj->type);
+		free(data);
+	}
+
+	/* Check for unresolved deltas */
+	for (i = 0; i < nr_deltas; i++) {
+		if (deltas[i].obj->real_type == OBJ_DELTA)
+			die("packfile '%s' has unresolved deltas",  pack_name);
+	}
+}
+
+static int sha1_compare(const void *_a, const void *_b)
+{
+	struct object_entry *a = *(struct object_entry **)_a;
+	struct object_entry *b = *(struct object_entry **)_b;
+	return memcmp(a->sha1, b->sha1, 20);
+}
+
+static void write_index_file(const char *index_name)
+{
+	struct sha1file *f;
+	struct object_entry **sorted_by_sha =
+		xcalloc(nr_objects, sizeof(struct object_entry *));
+	struct object_entry **list = sorted_by_sha;
+	struct object_entry **last = sorted_by_sha + nr_objects;
+	unsigned int array[256];
+	int i;
+
+	for (i = 0; i < nr_objects; ++i)
+		sorted_by_sha[i] = &objects[i];
+	qsort(sorted_by_sha, nr_objects, sizeof(sorted_by_sha[0]),
+	      sha1_compare);
+
+	unlink(index_name);
+	f = sha1create("%s", index_name);
+
+	/*
+	 * Write the first-level table (the list is sorted,
+	 * but we use a 256-entry lookup to be able to avoid
+	 * having to do eight extra binary search iterations).
+	 */
+	for (i = 0; i < 256; i++) {
+		struct object_entry **next = list;
+		while (next < last) {
+			struct object_entry *obj = *next;
+			if (obj->sha1[0] != i)
+				break;
+			next++;
+		}
+		array[i] = htonl(next - sorted_by_sha);
+		list = next;
+	}
+	sha1write(f, array, 256 * sizeof(int));
+
+	/*
+	 * Write the actual SHA1 entries..
+	 */
+	list = sorted_by_sha;
+	for (i = 0; i < nr_objects; i++) {
+		struct object_entry *obj = *list++;
+		unsigned int offset = htonl(obj->offset);
+		sha1write(f, &offset, 4);
+		sha1write(f, obj->sha1, 20);
+	}
+	sha1write(f, pack_base + pack_size - 20, 20);
+	sha1close(f, NULL, 1);
+	free(sorted_by_sha);
+}
+
+int main(int argc, char **argv)
+{
+	int i;
+	char *index_name = NULL;
+	char *index_name_buf = NULL;
+
+	for (i = 1; i < argc; i++) {
+		const char *arg = argv[i];
+
+		if (*arg == '-') {
+			if (!strcmp(arg, "-o")) {
+				if (index_name || (i+1) >= argc)
+					usage(index_pack_usage);
+				index_name = argv[++i];
+			} else
+				usage(index_pack_usage);
+			continue;
+		}
+
+		if (pack_name)
+			usage(index_pack_usage);
+		pack_name = arg;
+	}
+
+	if (!pack_name)
+		usage(index_pack_usage);
+	if (!index_name) {
+		int len = strlen(pack_name);
+		if (len < 5 || strcmp(pack_name + len - 5, ".pack"))
+			die("packfile name '%s' does not end with '.pack'",
+			    pack_name);
+		index_name_buf = xmalloc(len - 1);
+		memcpy(index_name_buf, pack_name, len - 5);
+		strcpy(index_name_buf + len - 5, ".idx");
+		index_name = index_name_buf;
+	}
+
+	open_pack_file();
+	parse_pack_header();
+	objects = xcalloc(nr_objects, sizeof(struct object_entry));
+	deltas = xcalloc(nr_objects, sizeof(struct delta_entry));
+	parse_pack_objects();
+	free(deltas);
+	write_index_file(index_name);
+	free(objects);
+	free(index_name_buf);
+
+	return 0;
+}
diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh
--- a/t/t5300-pack-object.sh
+++ b/t/t5300-pack-object.sh
@@ -165,4 +165,22 @@ test_expect_success \
 
      :'
 
+test_expect_success \
+    'build pack index for an existing pack' \
+    'cp test-1-${packname_1}.pack test-3.pack &&
+     git-index-pack -o tmp.idx test-3.pack &&
+     cmp tmp.idx test-1-${packname_1}.idx &&
+
+     git-index-pack test-3.pack &&
+     cmp test-3.idx test-1-${packname_1}.idx &&
+
+     cp test-2-${packname_2}.pack test-3.pack &&
+     git-index-pack -o tmp.idx test-2-${packname_2}.pack &&
+     cmp tmp.idx test-2-${packname_2}.idx &&
+
+     git-index-pack test-3.pack &&
+     cmp test-3.idx test-2-${packname_2}.idx &&
+
+     :'
+
 test_done

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] Add '--create-index' to git-unpack-objects
  2005-10-12 13:34 ` Sergey Vlasov
  2005-10-12 13:54   ` [PATCH] Add git-index-pack utility Sergey Vlasov
@ 2005-10-12 14:25   ` Johannes Schindelin
  2005-10-12 14:55     ` Sergey Vlasov
  1 sibling, 1 reply; 12+ messages in thread
From: Johannes Schindelin @ 2005-10-12 14:25 UTC (permalink / raw)
  To: Sergey Vlasov; +Cc: git, junkio

Hi,

On Wed, 12 Oct 2005, Sergey Vlasov wrote:

> On Wed, 12 Oct 2005 13:02:36 +0200 (CEST) Johannes Schindelin wrote:
> 
> >  static void write_object(void *buf, unsigned long size, const char *type)
> >  {
> >  	unsigned char sha1[20];
> > -	if (write_sha1_file(buf, size, type, sha1) < 0)
> > +	if (create_index) {
> > +		char header[100];
> > +		SHA_CTX c;
> > +
> > +		SHA1_Init(&c);
> > +		SHA1_Update(&c, header, 1+sprintf(header, "%s %lu", type, size));
> > +		SHA1_Update(&c, buf, size);
> > +		SHA1_Final(current_sha1, &c);
> > +	} else if (write_sha1_file(buf, size, type, sha1) < 0)
> >  		die("failed to write object");
> 
> Sorry, but this cannot work.  git-unpack-objects does a streaming
> unpack, and it needs to be able to read back the objects it has written
> out previously (in case a delta later in the stream references some
> older object).

Even worse, my code did not anticipate that the base objects could have 
been handled earlier (and thus the deltas would never be resolved).

> Saving unpacked objects in memory would obviously be unacceptable.

Actually, this is what git-unpack-objects does. All unresolved deltas are 
stored in a linked list, and handled later.

Of course, it would be nicer to use a seekable file if you have one. But 
then, I am not at all sure that base objects should be allowed to come 
later in the file: since the delta chains must not be cyclic, the objects
can be sorted. Thus, it could be guaranteed that the base objects are 
already unpacked when unpacking the derived object.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] Add git-index-pack utility
  2005-10-12 13:54   ` [PATCH] Add git-index-pack utility Sergey Vlasov
@ 2005-10-12 14:33     ` Johannes Schindelin
  2005-10-12 15:01       ` Sergey Vlasov
  2005-10-12 23:57     ` [PATCH] Fix packname hash generation Junio C Hamano
  1 sibling, 1 reply; 12+ messages in thread
From: Johannes Schindelin @ 2005-10-12 14:33 UTC (permalink / raw)
  To: Sergey Vlasov; +Cc: Junio C Hamano, git

Hi,

you cheated! You use mmap(), not lseek()! Note that mmap() is more 
efficient only if the platform provides mmap()...

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] Add '--create-index' to git-unpack-objects
  2005-10-12 14:25   ` [PATCH] Add '--create-index' to git-unpack-objects Johannes Schindelin
@ 2005-10-12 14:55     ` Sergey Vlasov
  2005-10-12 15:08       ` Johannes Schindelin
  2005-10-12 15:20       ` Linus Torvalds
  0 siblings, 2 replies; 12+ messages in thread
From: Sergey Vlasov @ 2005-10-12 14:55 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: git, junkio

[-- Attachment #1: Type: text/plain, Size: 1242 bytes --]

On Wed, Oct 12, 2005 at 04:25:22PM +0200, Johannes Schindelin wrote:
> On Wed, 12 Oct 2005, Sergey Vlasov wrote:
> > Saving unpacked objects in memory would obviously be unacceptable.
> 
> Actually, this is what git-unpack-objects does. All unresolved deltas are 
> stored in a linked list, and handled later.

Yes, this may be a problem if the pack is large and contains many
deltas.  But these stored deltas are thrown away immediately when the
base object is found; if you want to implement a streaming reindex,
you will need to store them until you reach the end of pack (or write
those objects to some temporary files).

> Of course, it would be nicer to use a seekable file if you have one. But 
> then, I am not at all sure that base objects should be allowed to come 
> later in the file: since the delta chains must not be cyclic, the objects
> can be sorted. Thus, it could be guaranteed that the base objects are 
> already unpacked when unpacking the derived object.

Hmm, pack-objects.c:write_one() does exactly the opposite - it writes
the base object _after_ writing out the delta (but it does not ensure
that ordering completely, so references to base objects can be
pointing in both directions).  Why?

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] Add git-index-pack utility
  2005-10-12 14:33     ` Johannes Schindelin
@ 2005-10-12 15:01       ` Sergey Vlasov
  0 siblings, 0 replies; 12+ messages in thread
From: Sergey Vlasov @ 2005-10-12 15:01 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Junio C Hamano, git

[-- Attachment #1: Type: text/plain, Size: 288 bytes --]

On Wed, Oct 12, 2005 at 04:33:17PM +0200, Johannes Schindelin wrote:
> you cheated! You use mmap(), not lseek()! Note that mmap() is more 
> efficient only if the platform provides mmap()...

Yes, I need to steal some code for handling the deflated streams from
unpack-objects.c...

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] Add '--create-index' to git-unpack-objects
  2005-10-12 14:55     ` Sergey Vlasov
@ 2005-10-12 15:08       ` Johannes Schindelin
  2005-10-12 15:20       ` Linus Torvalds
  1 sibling, 0 replies; 12+ messages in thread
From: Johannes Schindelin @ 2005-10-12 15:08 UTC (permalink / raw)
  To: Sergey Vlasov; +Cc: git, junkio

Hi,

On Wed, 12 Oct 2005, Sergey Vlasov wrote:

> Hmm, pack-objects.c:write_one() does exactly the opposite - it writes
> the base object _after_ writing out the delta (but it does not ensure
> that ordering completely, so references to base objects can be
> pointing in both directions).  Why?

Okay, I did not read that far. However, having quite a few packs out there 
in this format, there is no option to change the format now.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] Add '--create-index' to git-unpack-objects
  2005-10-12 14:55     ` Sergey Vlasov
  2005-10-12 15:08       ` Johannes Schindelin
@ 2005-10-12 15:20       ` Linus Torvalds
  1 sibling, 0 replies; 12+ messages in thread
From: Linus Torvalds @ 2005-10-12 15:20 UTC (permalink / raw)
  To: Sergey Vlasov; +Cc: Johannes Schindelin, git, junkio



On Wed, 12 Oct 2005, Sergey Vlasov wrote:
> 
> Hmm, pack-objects.c:write_one() does exactly the opposite - it writes
> the base object _after_ writing out the delta (but it does not ensure
> that ordering completely, so references to base objects can be
> pointing in both directions).  Why?

pack-objects.c is actually going to some trouble to make sure that the 
resulting pack is "optimal" in layout for the most recent case.

Not that I have actually verified optimality, but it was _meant_ to be 
that way. And my limited tests seemed to agree.

So it writes out all objects in "recency order", which is the order it 
gets them from git-rev-list: it's the same order as the objects are 
discovered when we traverse the history in time (except all commits come 
first, since most operations will traverse the commit history more than 
they will traverse the rest of the object links).

So the objects that are reachable in the most recent tree are all supposed 
to be at the beginning of the pack-file, just after the commits.

Now, think about what happens if such an object is a delta against 
something else...

In other words, if the most recent tree contains a delta against a much 
older object, we want not only the _delta_ to be early in the pack-file, 
we want the object that it is a delta _against_ to be there too (just 
_after_ the delta, to be exact: we obviously read the delta first, so it 
should come first in the pack).

The point being, that if you unpack the latest tree (ie "git checkout" or 
any of the normal "git diff" behaviour), the pack-file will basically be 
walked in a dense manner, and linearly starting roughly from the 
beginning. Which is the optimal IO pattern. Dense and ascending reads.

Now, if the object is reachable through some recent branch, but the delta 
is not, then that is not true. In that case, you want to write the recent 
base object early in the pack-file, but you do _not_ want to write the 
delta together with it, because that would be the wrong thing for the 
"recent head" case: it would add stuff to the beginning of the pack-file 
that isn't needed for recent objects.

So that's why it's an assymmetric thing. The preferred ordering of time 
breaks the symmetry.

			Linus

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH] Fix packname hash generation.
  2005-10-12 13:54   ` [PATCH] Add git-index-pack utility Sergey Vlasov
  2005-10-12 14:33     ` Johannes Schindelin
@ 2005-10-12 23:57     ` Junio C Hamano
  2005-10-13  1:23       ` [PATCH] clone-pack: new option --keep to keep the pack unexploded Junio C Hamano
  2005-10-13  2:46       ` [PATCH] Fix packname hash generation Junio C Hamano
  1 sibling, 2 replies; 12+ messages in thread
From: Junio C Hamano @ 2005-10-12 23:57 UTC (permalink / raw)
  To: git

This changes the generation of hash packfiles have in their names, from
"hash of object names as fed to us" to "hash of object names in the
resulting pack, in the order they appear in the index file".  The new
"git-index-pack" command is taught to output the computed hash value
to its standard output.

With this, we can store downloaded pack in a temporary file without
knowing its final name, run git-index-pack to generate idx for it
while finding out its final name, and then rename the pack and idx to
their final names.

Signed-off-by: Junio C Hamano <junkio@cox.net>

---

 * Right now, the pack "hash" name only serves the collision
   avoidance purposes, but not true identification.  The same
   set of objects can be fed to pack-objects in different order,
   produce the same pack, and still end up with different
   names.

   This will be used in the next experiment, "git-clone not
   exploding the downloaded pack".

 index-pack.c   |   15 +++++++++++++--
 pack-objects.c |   14 ++++++++++----
 2 files changed, 23 insertions(+), 6 deletions(-)

applies-to: ea37b42d53264d65f746b3e42577349e8a44d5c4
3b97470e3711d7af3505baddc34428a0d9bd8214
diff --git a/index-pack.c b/index-pack.c
index badbeab..785fe71 100644
--- a/index-pack.c
+++ b/index-pack.c
@@ -349,7 +349,7 @@ static int sha1_compare(const void *_a, 
 	return memcmp(a->sha1, b->sha1, 20);
 }
 
-static void write_index_file(const char *index_name)
+static void write_index_file(const char *index_name, unsigned char *sha1)
 {
 	struct sha1file *f;
 	struct object_entry **sorted_by_sha =
@@ -358,6 +358,7 @@ static void write_index_file(const char 
 	struct object_entry **last = sorted_by_sha + nr_objects;
 	unsigned int array[256];
 	int i;
+	SHA_CTX ctx;
 
 	for (i = 0; i < nr_objects; ++i)
 		sorted_by_sha[i] = &objects[i];
@@ -385,6 +386,11 @@ static void write_index_file(const char 
 	}
 	sha1write(f, array, 256 * sizeof(int));
 
+	/* recompute the SHA1 hash of sorted object names.
+	 * currently pack-objects does not do this, but that
+	 * can be fixed.
+	 */
+	SHA1_Init(&ctx);
 	/*
 	 * Write the actual SHA1 entries..
 	 */
@@ -394,10 +400,12 @@ static void write_index_file(const char 
 		unsigned int offset = htonl(obj->offset);
 		sha1write(f, &offset, 4);
 		sha1write(f, obj->sha1, 20);
+		SHA1_Update(&ctx, obj->sha1, 20);
 	}
 	sha1write(f, pack_base + pack_size - 20, 20);
 	sha1close(f, NULL, 1);
 	free(sorted_by_sha);
+	SHA1_Final(sha1, &ctx);
 }
 
 int main(int argc, char **argv)
@@ -405,6 +413,7 @@ int main(int argc, char **argv)
 	int i;
 	char *index_name = NULL;
 	char *index_name_buf = NULL;
+	unsigned char sha1[20];
 
 	for (i = 1; i < argc; i++) {
 		const char *arg = argv[i];
@@ -443,9 +452,11 @@ int main(int argc, char **argv)
 	deltas = xcalloc(nr_objects, sizeof(struct delta_entry));
 	parse_pack_objects();
 	free(deltas);
-	write_index_file(index_name);
+	write_index_file(index_name, sha1);
 	free(objects);
 	free(index_name_buf);
 
+	printf("%s\n", sha1_to_hex(sha1));
+
 	return 0;
 }
diff --git a/pack-objects.c b/pack-objects.c
index 3d62278..ef55cab 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -393,6 +393,7 @@ int main(int argc, char **argv)
 	SHA_CTX ctx;
 	char line[PATH_MAX + 20];
 	int window = 10, depth = 10, pack_to_stdout = 0;
+	struct object_entry **list;
 	int i;
 
 	for (i = 1; i < argc; i++) {
@@ -435,7 +436,6 @@ int main(int argc, char **argv)
 	if (pack_to_stdout != !base_name)
 		usage(pack_usage);
 
-	SHA1_Init(&ctx);
 	while (fgets(line, sizeof(line), stdin) != NULL) {
 		unsigned int hash;
 		char *p;
@@ -451,10 +451,8 @@ int main(int argc, char **argv)
 				continue;
 			hash = hash * 11 + c;
 		}
-		if (add_object_entry(sha1, hash))
-			SHA1_Update(&ctx, sha1, 20);
+		add_object_entry(sha1, hash);
 	}
-	SHA1_Final(object_list_sha1, &ctx);
 	if (non_empty && !nr_objects)
 		return 0;
 	get_object_details();
@@ -462,6 +460,14 @@ int main(int argc, char **argv)
 	fprintf(stderr, "Packing %d objects\n", nr_objects);
 
 	sorted_by_sha = create_sorted_list(sha1_sort);
+	SHA1_Init(&ctx);
+	list = sorted_by_sha;
+	for (i = 0; i < nr_objects; i++) {
+		struct object_entry *entry = *list++;
+		SHA1_Update(&ctx, entry->sha1, 20);
+	}
+	SHA1_Final(object_list_sha1, &ctx);
+
 	sorted_by_type = create_sorted_list(type_size_sort);
 	if (window && depth)
 		find_deltas(sorted_by_type, window+1, depth);
---
0.99.8.GIT

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH] clone-pack: new option --keep to keep the pack unexploded.
  2005-10-12 23:57     ` [PATCH] Fix packname hash generation Junio C Hamano
@ 2005-10-13  1:23       ` Junio C Hamano
  2005-10-13  2:46       ` [PATCH] Fix packname hash generation Junio C Hamano
  1 sibling, 0 replies; 12+ messages in thread
From: Junio C Hamano @ 2005-10-13  1:23 UTC (permalink / raw)
  To: git

With new option --keep, or a configuration item clone.keeppack (we
need a better name, or start allowing dash,"clone.keep-pack"), the packed
data downloaded while cloning is saved as a pack in .git/objects/pack/
locally, with index generated for it with git-index-pack.

Signed-off-by: Junio C Hamano <junkio@cox.net>

---

 * Here is how to use it.

    $ mkdir test
    $ cd test
    $ git-init-db
    defaulting to local storage area
    $ git-clone-pack --keep ../other/repo/sito/ry/.git
    Packing 9044 objects
    $ git-checkout

   After cloning, you would notice there is no individual
   objects; instead you will find a single packfile in
   .git/objects/pack/ directory.

 Documentation/git-clone-pack.txt |    7 ++++++-
 clone-pack.c                     |    3 ++-
 2 files changed, 8 insertions(+), 2 deletions(-)

applies-to: fa5213875bff7fdb8c7d05f35a047eedf3cb3af2
b64a46c81d8347e59285584e830e1ad99ad387e0
diff --git a/Documentation/git-clone-pack.txt b/Documentation/git-clone-pack.txt
index 87c0e46..b58165a 100644
--- a/Documentation/git-clone-pack.txt
+++ b/Documentation/git-clone-pack.txt
@@ -8,7 +8,7 @@ git-clone-pack - Clones a repository by 
 
 SYNOPSIS
 --------
-'git-clone-pack' [-q] [--exec=<git-upload-pack>] [<host>:]<directory> [<head>...]
+'git-clone-pack' [-q] [--keep] [--exec=<git-upload-pack>] [<host>:]<directory> [<head>...]
 
 DESCRIPTION
 -----------
@@ -23,6 +23,11 @@ OPTIONS
 	Pass '-q' flag to 'git-unpack-objects'; this makes the
 	cloning process less verbose.
 
+--keep::
+	Do not invoke 'git-unpack-objects' on received data, but
+	create a single packfile out of it instead, and store it
+	in the object database.
+
 --exec=<git-upload-pack>::
 	Use this to specify the path to 'git-upload-pack' on the
 	remote side, if it is not found on your $PATH.
diff --git a/clone-pack.c b/clone-pack.c
index 9567900..2f09df0 100644
--- a/clone-pack.c
+++ b/clone-pack.c
@@ -5,7 +5,8 @@
 
 static int quiet;
 static int keep_pack;
-static const char clone_pack_usage[] = "git-clone-pack [-q] [--exec=<git-upload-pack>] [<host>:]<directory> [<heads>]*";
+static const char clone_pack_usage[] =
+"git-clone-pack [-q] [--keep] [--exec=<git-upload-pack>] [<host>:]<directory> [<heads>]*";
 static const char *exec = "git-upload-pack";
 
 static void clone_handshake(int fd[2], struct ref *ref)
---
0.99.8.GIT

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] Fix packname hash generation.
  2005-10-12 23:57     ` [PATCH] Fix packname hash generation Junio C Hamano
  2005-10-13  1:23       ` [PATCH] clone-pack: new option --keep to keep the pack unexploded Junio C Hamano
@ 2005-10-13  2:46       ` Junio C Hamano
  1 sibling, 0 replies; 12+ messages in thread
From: Junio C Hamano @ 2005-10-13  2:46 UTC (permalink / raw)
  To: git

Junio C Hamano <junkio@cox.net> writes:

> This changes the generation of hash packfiles have in their names, from
> "hash of object names as fed to us" to "hash of object names in the
> resulting pack, in the order they appear in the index file".  The new
> "git-index-pack" command is taught to output the computed hash value
> to its standard output.

In case it was not obvious, this is not a backward incompatible
change.  Your existing packs will be valid after this change.

What those 40-byte hashes were buying us was that we did not
have to worry about name clashes.  We could have said "these two
packs have the same name so they must have the same set of
objects", but there is no tool that relies on this fact.  We
could not even say "these two packs have different names so the
set of objects contained by them must be different" -- the
resulting pack name depended on the order of objects fed to
git-pack-objects, even if you fed the same set of objects.

The really core part never cared about how packfiles and their
indices are named.  The only restrictions were that they live
immediately under .git/objects/pack/, have .pack and .idx suffix
respectively, and their basename match with each other.

The commit walkers (anything that link with fetch.c) impose
another limitation that their basenames are "pack-" followed by
40-byte hexadecimal digits.  But they do not check if the name
is consistent with the set of objects in the pack (checking it
was computationally infeasible for huge packs in the previous
hashing mechanism -- you have to feed all permutations of
objects contained in the pack to SHA1 hash and see if any
produces the same hash as the pack name).  We _could_ now do
this additional check if we wanted to (the same goes to the
really core part in sha1_file.c::check_packed_git_idx()).

In short, it does not matter if your existing packs are named
using the old hashing mechanism.  They will continue to be
valid.

But if you really care about consistency, here is an easy way to
rename your existing packs to their new names the new hashing
scheme would produce.

#!/bin/sh

: ${GIT_DIR=.git}
: ${GIT_OBJECT_DIRECTORY="${GIT_DIR}/objects"}

O="$GIT_OBJECT_DIRECTORY"
P="$GIT_OBJECT_DIRECTORY/pack"
for existing in `cd "$GIT_OBJECT_DIRECTORY" &&
		 find pack -name '*.pack' -print`
do
    idx=`expr "$existing" : '\(.*\)\.pack$'`.idx &&
    test -f "$O/$idx" || {
        echo >&2 "Missing idx $idx?"
        continue
    }
    new=`git-index-pack -o tmp-idx "$O/$existing"` || {
        echo >&2 "Corrupt pack $existing?"
        continue
    }           
    # index generated for an existing pack should match.
    cmp "$O/$idx" tmp-idx || {
        echo >&2 "Corrupt idx $idx?"
        continue
    }
    if test "pack/pack-$new.pack" = "$existing"
    then
        echo >&2 "Already converted $existing."
        continue
    fi
    if test -f "$P/pack-$new.pack" || test -f "$P/pack-$new.idx"
    then
        echo >&2 "Name clash! $new"
        continue
    fi
    mv "$O/$existing" "$P/pack-$new.pack" &&
    mv "$O/$idx" "$P/pack-$new.idx" || {
        echo >&2 "Cannot move $existing to $new"
        continue
    }
    echo >&2 "Renamed $existing -> $new"
done

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2005-10-13  2:46 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-10-12 11:02 [PATCH] Add '--create-index' to git-unpack-objects Johannes Schindelin
2005-10-12 13:34 ` Sergey Vlasov
2005-10-12 13:54   ` [PATCH] Add git-index-pack utility Sergey Vlasov
2005-10-12 14:33     ` Johannes Schindelin
2005-10-12 15:01       ` Sergey Vlasov
2005-10-12 23:57     ` [PATCH] Fix packname hash generation Junio C Hamano
2005-10-13  1:23       ` [PATCH] clone-pack: new option --keep to keep the pack unexploded Junio C Hamano
2005-10-13  2:46       ` [PATCH] Fix packname hash generation Junio C Hamano
2005-10-12 14:25   ` [PATCH] Add '--create-index' to git-unpack-objects Johannes Schindelin
2005-10-12 14:55     ` Sergey Vlasov
2005-10-12 15:08       ` Johannes Schindelin
2005-10-12 15:20       ` Linus Torvalds

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).