All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 09/16] subtree: rewrite incoming commits
Date: Sat, 31 Jul 2010 23:18:18 +0700	[thread overview]
Message-ID: <1280593105-22015-10-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1280593105-22015-1-git-send-email-pclouds@gmail.com>

This adds the main function, subtree_import(), which is intended to be
used by "git clone".

Because subtree packs are not complete. They are barely usable. Git
client will cry out missing objects here and there... Theortically,
client code could be adapted to only look for objects within
subtree. That was painful to try.

Alternatively, subtree_import() rewrites commits to have only the
specified subtree, sealing all broken path. Git client now happily
works with these new commits.

However, users might not, because it's different commit, different
SHA-1. They can't use those SHA-1 to communicate within their team. To
work around this, all original commits are replaced by new commits
using git-replace.

Of course this is still not perfect. Users may be able to send SHA-1
around, which is consistent. They may not do the same with tree SHA-1.

Rewriting/replacing commits takes time and space. For replacing _all_
commits, the current replace mechanism is not suitable, which is why
subtree_lookup_object() was introduced in previous patches.

For rewriting, writing a huge number of objects is slow. So
subtree_import() builds a pack for all new objects. These packs are
not optimized. But it does reduce wait time for rewriting.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 subtree.c |  244 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 subtree.h |    1 +
 2 files changed, 245 insertions(+), 0 deletions(-)

diff --git a/subtree.c b/subtree.c
index 601d827..8c075be 100644
--- a/subtree.c
+++ b/subtree.c
@@ -115,3 +115,247 @@ const unsigned char *subtree_lookup_object(const unsigned char *sha1)
 		return subtree_commit[pos]->sha1[1];
 	return sha1;
 }
+
+static unsigned long do_compress(void **pptr, unsigned long size)
+{
+	z_stream stream;
+	void *in, *out;
+	unsigned long maxsize;
+
+	memset(&stream, 0, sizeof(stream));
+	deflateInit(&stream, Z_DEFAULT_COMPRESSION);
+	maxsize = deflateBound(&stream, size);
+
+	in = *pptr;
+	out = xmalloc(maxsize);
+	*pptr = out;
+
+	stream.next_in = in;
+	stream.avail_in = size;
+	stream.next_out = out;
+	stream.avail_out = maxsize;
+	while (deflate(&stream, Z_FINISH) == Z_OK)
+		; /* nothing */
+	deflateEnd(&stream);
+
+	return stream.total_out;
+}
+
+static int nr_written;
+static int add_sha1_to_pack(int fd, void *buf, unsigned long size, enum object_type type)
+{
+	unsigned long datalen;
+	unsigned hdrlen;
+	unsigned char header[10];
+
+	datalen = do_compress(&buf, size);
+	hdrlen = encode_in_pack_object_header(type, size, header);
+	write(fd, header, hdrlen);
+	write(fd, buf, datalen);
+	nr_written++;
+	free(buf);
+	return 0;
+}
+
+/*
+ * Take sha1 of a tree, rewrite it to only return the prefix and return
+ * the newsha1.
+ *
+ * If if is zero, write to object store. If fd is greater than zero,
+ * it's a pack file handle.
+ */
+static int narrow_tree(const unsigned char *sha1, unsigned char *newsha1,
+		       const char *prefix, int fd)
+{
+	struct tree_desc desc;
+	struct name_entry entry;
+	struct strbuf buffer;
+	const char *slash;
+	int subtree_len;
+	enum object_type type;
+	unsigned long size;
+	char *tree;
+	struct object *obj;
+
+	slash = strchr(prefix, '/');
+	subtree_len = slash ? slash - prefix : strlen(prefix);
+
+	tree = read_sha1_file(sha1, &type, &size);
+	if (type != OBJ_TREE)
+		die("%s is not a tree", sha1_to_hex(sha1));
+
+	init_tree_desc(&desc, tree, size);
+	strbuf_init(&buffer, 1024);
+	while (tree_entry(&desc, &entry)) {
+		if (!S_ISDIR(entry.mode))
+			continue;
+
+		if (subtree_len == strlen(entry.path) &&
+		    !strncmp(entry.path, prefix, subtree_len)) {
+			unsigned char newtree_sha1[20];
+
+			if (slash && slash[1]) /* trailing slash does not count */
+				narrow_tree(entry.sha1, newtree_sha1, prefix+subtree_len+1, fd);
+			else
+				memcpy(newtree_sha1, entry.sha1, 20);
+
+			strbuf_addf(&buffer, "%o %.*s%c", entry.mode, strlen(entry.path), entry.path, '\0');
+			strbuf_add(&buffer, newtree_sha1, 20);
+			break;
+		}
+	}
+	free(tree);
+
+	if (fd == 0) {
+		if (write_sha1_file(buffer.buf, buffer.len, tree_type, newsha1)) {
+			error("Could not write replaced tree for %s", sha1_to_hex(sha1));
+			strbuf_release(&buffer);
+			return 1;
+		}
+		strbuf_release(&buffer);
+		return 0;
+	}
+
+	hash_sha1_file(buffer.buf, buffer.len, tree_type, newsha1);
+	obj = (struct object *)lookup_tree(newsha1);
+	if (fd > 0 &&
+	    !(obj->flags & SEEN) &&
+	    add_sha1_to_pack(fd, buffer.buf, buffer.len, OBJ_TREE)) {
+		error("Could not write replaced tree for %s", sha1_to_hex(sha1));
+		strbuf_release(&buffer);
+		return 1;
+	}
+	obj->flags |= SEEN;
+
+	strbuf_release(&buffer);
+	return 0;
+}
+
+/*
+ * Take sha1 of a commit, rewrite its tree using narrow_tree(), then
+ * add a replace entry to file pointer fp (which is $GIT_DIR/subtree).
+ *
+ * Also update replace-object database so that the given sha1 can be
+ * replaced with the new one right after this function returns.
+ */
+static int shadow_commit(const unsigned char *sha1, const char *prefix, int fd, FILE *fp)
+{
+	unsigned char newsha1[20], treesha1[20];
+	enum object_type type;
+	unsigned long size;
+	void *buffer;
+	struct object *obj;
+	int saved_read_replace_refs = read_replace_refs;
+
+	read_replace_refs = 0;
+	buffer = read_sha1_file(sha1, &type, &size);
+	read_replace_refs = saved_read_replace_refs;
+	get_sha1_hex(buffer+5, treesha1);
+
+	if (!buffer || type != OBJ_COMMIT ||
+	    narrow_tree(treesha1, newsha1, prefix, fd)) {
+		free(buffer);
+		error("Failed to narrow tree for commit %s", sha1_to_hex(sha1));
+		return 1;
+	}
+
+	/* replace new tree in */
+	memcpy((char*)buffer+5, sha1_to_hex(newsha1), 40);
+
+	if (fd == 0) {
+		if (write_sha1_file(buffer, size, commit_type, newsha1)) {
+			free(buffer);
+			error("Could not write replaced commit for %s", sha1_to_hex(sha1));
+			return 1;
+		}
+	}
+	else {
+		hash_sha1_file(buffer, size, commit_type, newsha1);
+		obj = (struct object *)lookup_commit(newsha1);
+		if (fd > 0 &&
+		    !(obj->flags & SEEN) &&
+		    add_sha1_to_pack(fd, buffer, size, OBJ_COMMIT)) {
+			free(buffer);
+			error("Could not write replaced commit for %s", sha1_to_hex(sha1));
+			return 1;
+		}
+		obj->flags |= SEEN;
+	}
+
+	if (fp) {
+		char buf[82];
+		memcpy(buf, sha1_to_hex(sha1), 40);
+		buf[40] = ' ';
+		memcpy(buf+41, sha1_to_hex(newsha1), 40);
+		buf[81] = '\n';
+		fwrite(buf, 82, 1, fp);
+	}
+	free(buffer);
+
+	return 0;
+}
+
+/*
+ * Rewrite all reachable commits in repo using shadow_commit().
+ * Write out the pack that contains new tree/commit objects.
+ */
+void subtree_import()
+{
+	const char *args[] = {"rev-list", "--all", NULL};
+	struct pack_header hdr;
+	struct progress *ps;
+	struct rev_info revs;
+	struct commit *c;
+	unsigned char sha1[20];
+	unsigned commit_nr = 0;
+	char *pack_tmp_name;
+	char tmpname[PATH_MAX];
+	int pack_fd, i;
+	FILE *fp;
+	char cmd[1024];
+
+	/* Packing */
+	init_revisions(&revs, NULL);
+	setup_revisions(2, args, &revs, NULL);
+	if (prepare_revision_walk(&revs))
+		die("revision walk setup failed");
+	fp = fopen(git_path("subtree"), "w+");
+
+	pack_fd = odb_mkstemp(tmpname, sizeof(tmpname), "pack/tmp_pack_XXXXXX");
+	pack_tmp_name = xstrdup(tmpname);
+
+	hdr.hdr_signature = htonl(PACK_SIGNATURE);
+	hdr.hdr_version = htonl(PACK_VERSION);
+	hdr.hdr_entries = htonl(0);
+	write(pack_fd, &hdr, sizeof(hdr));
+
+	ps = start_progress("Preparing subtree commits", 0);
+	while ((c = get_revision(&revs)) != NULL) {
+		if (shadow_commit(c->object.sha1, core_subtree, pack_fd, fp))
+			die("Failed to shadow commit %s", c->object.sha1);
+		display_progress(ps, ++commit_nr);
+	}
+	stop_progress(&ps);
+	fclose(fp);
+	fixup_pack_header_footer(pack_fd, sha1, pack_tmp_name, nr_written, NULL, 0);
+	close(pack_fd);
+	sprintf(cmd, "git index-pack --stdin < %s", pack_tmp_name);
+	system(cmd);
+	unlink(pack_tmp_name);
+
+	reprepare_packed_git();
+	if (subtree_commit)
+		free(subtree_commit);
+	prepare_subtree_commit();
+
+	/* Invalidate all replaced commits */
+	for (i = 0; i < subtree_commit_nr; i++) {
+		/* lookup_commit() would create new objects, we don't want that */
+		c = (struct commit *)lookup_object(subtree_commit[i]->sha1[0]);
+		if (c)
+			invalidate_commit(c);
+	}
+
+	if (revs.pending.nr)
+		free(revs.pending.objects);
+}
diff --git a/subtree.h b/subtree.h
index 157153a..3512e2a 100644
--- a/subtree.h
+++ b/subtree.h
@@ -1,2 +1,3 @@
 void prepare_subtree_commit();
 const unsigned char *subtree_lookup_object(const unsigned char *sha1);
+void subtree_import();
-- 
1.7.1.rc1.69.g24c2f7

  parent reply	other threads:[~2010-08-01  2:39 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-07-31 16:18 [PATCH 00/16] Subtree clone proof of concept Nguyễn Thái Ngọc Duy
2010-07-31 16:18 ` [PATCH 01/16] Add core.subtree Nguyễn Thái Ngọc Duy
2010-07-31 16:18 ` [PATCH 02/16] list-objects: limit traversing within the given subtree if core.subtree is set Nguyễn Thái Ngọc Duy
2010-08-01 11:30   ` Ævar Arnfjörð Bjarmason
2010-08-01 23:11     ` Nguyen Thai Ngoc Duy
2010-08-02  4:21   ` Elijah Newren
2010-08-02  6:51     ` Nguyen Thai Ngoc Duy
2010-07-31 16:18 ` [PATCH 03/16] parse_object: keep sha1 even when parsing replaced one Nguyễn Thái Ngọc Duy
2010-07-31 16:18 ` [PATCH 04/16] Allow to invalidate a commit in in-memory object store Nguyễn Thái Ngọc Duy
2010-07-31 16:18 ` [PATCH 05/16] Hook up replace-object to allow bulk commit replacement Nguyễn Thái Ngọc Duy
2010-08-02 19:58   ` Junio C Hamano
2010-08-02 22:42     ` Nguyen Thai Ngoc Duy
2010-07-31 16:18 ` [PATCH 06/16] upload-pack: use a separate variable to control whether internal rev-list is used Nguyễn Thái Ngọc Duy
2010-08-02  4:25   ` Elijah Newren
2010-07-31 16:18 ` [PATCH 07/16] upload-pack: support subtree pack Nguyễn Thái Ngọc Duy
2010-08-02  4:27   ` Elijah Newren
2010-07-31 16:18 ` [PATCH 08/16] fetch-pack: support --subtree Nguyễn Thái Ngọc Duy
2010-07-31 16:18 ` Nguyễn Thái Ngọc Duy [this message]
2010-08-02  4:37   ` [PATCH 09/16] subtree: rewrite incoming commits Elijah Newren
2010-07-31 16:18 ` [PATCH 10/16] clone: support subtree clone with parameter --subtree Nguyễn Thái Ngọc Duy
2010-07-31 16:18 ` [PATCH 11/16] pack-objects: add --subtree (for pushing) Nguyễn Thái Ngọc Duy
2010-07-31 16:18 ` [PATCH 12/16] subtree: rewriting outgoing commits Nguyễn Thái Ngọc Duy
2010-08-02  4:40   ` Elijah Newren
2010-07-31 16:18 ` [PATCH 13/16] Update commit_tree() interface to take base tree too Nguyễn Thái Ngọc Duy
2010-07-31 16:18 ` [PATCH 14/16] commit_tree(): rewriting/replacing new commits Nguyễn Thái Ngọc Duy
2010-07-31 16:18 ` [PATCH 15/16] commit: rewrite outgoing commits Nguyễn Thái Ngọc Duy
2010-07-31 16:18 ` [PATCH 16/16] do not use thin packs and subtree together (just a bad feeling about this) Nguyễn Thái Ngọc Duy
2010-08-01  4:14 ` [PATCH 00/16] Subtree clone proof of concept Sverre Rabbelier
2010-08-01  6:58   ` Nguyen Thai Ngoc Duy
2010-08-01 20:05     ` Sverre Rabbelier
2010-08-02  5:18 ` Elijah Newren
2010-08-02  7:10   ` Nguyen Thai Ngoc Duy
2010-08-02 22:55   ` Nguyen Thai Ngoc Duy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1280593105-22015-10-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.