All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jeff King <peff@peff.net>
To: Junio C Hamano <gitster@pobox.com>
Cc: David Turner <novalis@novalis.org>,
	Duy Nguyen <pclouds@gmail.com>,
	Git Mailing List <git@vger.kernel.org>
Subject: Re: "disabling bitmap writing, as some objects are not being packed"?
Date: Wed, 8 Feb 2017 20:12:41 -0500	[thread overview]
Message-ID: <20170209011241.vfiup56gwrvlxm2k@sigill.intra.peff.net> (raw)
In-Reply-To: <xmqqbmuctdwu.fsf@gitster.mtv.corp.google.com>

On Wed, Feb 08, 2017 at 04:18:25PM -0800, Junio C Hamano wrote:

> > We wrote something similar at GitHub, too, but we never ended up using
> > it in production. We found that with a sane scheduler, it's not too big
> > a deal to just do maintenance once in a while.
> 
> Thanks again for this.  I've also been wondering about how effective
> a "concatenate packs without paying reachability penalty" would be.

For the sake of posterity, I'll include our patch at the end (sorry, not
chunked into nice readable commits; that never existed in the first
place).

> > I'm still not sure if it's worth making the fatal/non-fatal distinction.
> > Doing so is perhaps safer, but it does mean that somebody has to decide
> > which errors are important enough to block a retry totally, and which
> > are not. In theory, it would be safe to always _try_ and then the gc
> > process can decide when something is broken and abort. And all you've
> > wasted is some processing power each day.
> 
> Yup, and somebody or something need to monitor so that repeated
> failures can be dealt with.

Yes. I think that part is probably outside the scope of Git. But if
auto-gc leaves gc.log lying around, it would be easy to visit each repo
and collect the various failures.

-- >8 --
This is the "pack-fast" patch, for reference. It applies on v2.6.5,
though I had to do some wiggling due to a few of our other custom
patches, so it's possible I introduced new bugs. It compiles, but I
didn't actually re-test the result.  I _think_ the original at least
generated valid packs in all cases.

So I would certainly not recommend anybody run this. It's just a
possible base to work off of if anybody's interested in the topic. I
haven't looked at David's combine-packs at all to see if it is any less
gross. :)

---
 Makefile            |   1 +
 builtin.h           |   1 +
 builtin/pack-fast.c | 618 +++++++++++++++++++++++++++++++++++
 cache.h             |   5 +
 git.c               |   1 +
 pack-bitmap-write.c | 167 +++++++++-
 pack-bitmap.c       |   2 +-
 pack-bitmap.h       |   8 +
 sha1_file.c         |   4 +-
 9 files changed, 792 insertions(+), 15 deletions(-)

diff --git a/Makefile b/Makefile
index 37e2d9e18..524b185ec 100644
--- a/Makefile
+++ b/Makefile
@@ -887,6 +887,7 @@ BUILTIN_OBJS += builtin/mv.o
 BUILTIN_OBJS += builtin/name-rev.o
 BUILTIN_OBJS += builtin/notes.o
 BUILTIN_OBJS += builtin/pack-objects.o
+BUILTIN_OBJS += builtin/pack-fast.o
 BUILTIN_OBJS += builtin/pack-redundant.o
 BUILTIN_OBJS += builtin/pack-refs.o
 BUILTIN_OBJS += builtin/patch-id.o
diff --git a/builtin.h b/builtin.h
index 79aaf0afe..df4e4d668 100644
--- a/builtin.h
+++ b/builtin.h
@@ -95,6 +95,7 @@ extern int cmd_mv(int argc, const char **argv, const char *prefix);
 extern int cmd_name_rev(int argc, const char **argv, const char *prefix);
 extern int cmd_notes(int argc, const char **argv, const char *prefix);
 extern int cmd_pack_objects(int argc, const char **argv, const char *prefix);
+extern int cmd_pack_fast(int argc, const char **argv, const char *prefix);
 extern int cmd_pack_redundant(int argc, const char **argv, const char *prefix);
 extern int cmd_patch_id(int argc, const char **argv, const char *prefix);
 extern int cmd_prune(int argc, const char **argv, const char *prefix);
diff --git a/builtin/pack-fast.c b/builtin/pack-fast.c
new file mode 100644
index 000000000..ad9f5e5f1
--- /dev/null
+++ b/builtin/pack-fast.c
@@ -0,0 +1,618 @@
+#include "builtin.h"
+#include "cache.h"
+#include "pack.h"
+#include "progress.h"
+#include "csum-file.h"
+#include "sha1-lookup.h"
+#include "parse-options.h"
+#include "tempfile.h"
+#include "pack-bitmap.h"
+#include "pack-revindex.h"
+
+static const char *pack_usage[] = {
+	N_("git pack-fast --quiet [options...] [base-name]"),
+	NULL
+};
+
+struct packwriter {
+	struct tempfile *tmp;
+	off_t total;
+	int fd;
+	uint32_t crc32;
+	unsigned do_crc;
+};
+
+static void packwriter_crc32_start(struct packwriter *w)
+{
+	w->crc32 = crc32(0, NULL, 0);
+	w->do_crc = 1;
+}
+
+static uint32_t packwriter_crc32_end(struct packwriter *w)
+{
+	w->do_crc = 0;
+	return w->crc32;
+}
+
+static void packwriter_write(struct packwriter *w, const void *buf, unsigned int count)
+{
+	if (w->do_crc)
+		w->crc32 = crc32(w->crc32, buf, count);
+	write_or_die(w->fd, buf, count);
+	w->total += count;
+}
+
+static off_t packwriter_total(struct packwriter *w)
+{
+	return w->total;
+}
+
+static void packwriter_init(struct packwriter *w)
+{
+	char tmpname[PATH_MAX];
+
+	w->fd = odb_mkstemp(tmpname, sizeof(tmpname), "pack/tmp_pack_XXXXXX");
+	w->total = 0;
+	w->do_crc = 0;
+	w->tmp = xcalloc(1, sizeof(*w->tmp));
+
+	register_tempfile(w->tmp, tmpname);
+}
+
+
+static int progress = 1;
+static struct progress *progress_state;
+static struct pack_idx_option pack_idx_opts;
+static const char *base_name = "pack-fast";
+static int skip_largest;
+static int write_bitmap_index = 1;
+
+static struct packed_git **all_packfiles;
+static unsigned int all_packfiles_nr;
+
+static struct pack_idx_entry **written_list;
+static unsigned int written_nr;
+
+struct write_slab {
+	struct write_slab *next;
+	unsigned int nr;
+
+	struct write_slab_entry {
+		struct pack_idx_entry idx;
+		enum object_type real_type;
+	} entries[];
+};
+
+static struct write_slab *written_slab_root;
+static struct write_slab *written_slab_current;
+
+static void add_to_write_list(
+	const unsigned char *sha1, off_t offset, uint32_t crc32,
+	enum object_type real_type)
+{
+	struct write_slab *slab = written_slab_current;
+	struct write_slab_entry *entry = &(slab->entries[slab->nr++]);
+
+	entry->real_type = real_type;
+	entry->idx.offset = offset;
+	entry->idx.crc32 = crc32;
+	hashcpy(entry->idx.sha1, sha1);
+}
+
+static void preallocate_write_slab(unsigned int num_entries)
+{
+	struct write_slab *slab = xmalloc(
+		sizeof(struct write_slab) +
+		num_entries * sizeof(struct write_slab_entry));
+
+	slab->next = NULL;
+	slab->nr = 0;
+
+	if (!written_slab_current) {
+		written_slab_current = slab;
+		written_slab_root = slab;
+	} else {
+		written_slab_current->next = slab;
+		written_slab_current = slab;
+	}
+}
+
+static struct skipped_object {
+	off_t skipped_offset;
+	off_t real_offset;
+} *skipped_list;
+static unsigned int skipped_nr;
+static unsigned int skipped_alloc;
+
+static void add_to_skipped_list(off_t skipped_offset, off_t real_offset)
+{
+	if (skipped_nr >= skipped_alloc) {
+		skipped_alloc = (skipped_alloc + 32) * 2;
+		REALLOC_ARRAY(skipped_list, skipped_alloc);
+	}
+
+	skipped_list[skipped_nr].skipped_offset = skipped_offset;
+	skipped_list[skipped_nr].real_offset = real_offset;
+	skipped_nr++;
+}
+
+static off_t find_real_offset_for_base(off_t skipped_offset)
+{
+	int lo = 0, hi = skipped_nr;
+	while (lo < hi) {
+		int mi = lo + ((hi - lo) / 2);
+		if (skipped_offset == skipped_list[mi].skipped_offset)
+			return skipped_list[mi].real_offset;
+		if (skipped_offset < skipped_list[mi].skipped_offset)
+			hi = mi;
+		else
+			lo = mi + 1;
+	}
+
+	return 0;
+}
+
+/*
+ * Record the offsets needed in our reused packfile chunks due to
+ * "gaps" where we omitted some objects.
+ */
+static struct reused_chunk {
+	off_t start;
+	off_t offset;
+} *reused_chunks;
+static int reused_chunks_nr;
+static int reused_chunks_alloc;
+
+static void record_reused_object(off_t where, off_t offset)
+{
+	if (reused_chunks_nr && reused_chunks[reused_chunks_nr-1].offset == offset)
+		return;
+
+	ALLOC_GROW(reused_chunks, reused_chunks_nr + 1,
+		   reused_chunks_alloc);
+	reused_chunks[reused_chunks_nr].start = where;
+	reused_chunks[reused_chunks_nr].offset = offset;
+	reused_chunks_nr++;
+}
+
+/*
+ * Binary search to find the chunk that "where" is in. Note
+ * that we're not looking for an exact match, just the first
+ * chunk that contains it (which implicitly ends at the start
+ * of the next chunk.
+ */
+static off_t find_reused_offset(off_t where)
+{
+	int lo = 0, hi = reused_chunks_nr;
+	while (lo < hi) {
+		int mi = lo + ((hi - lo) / 2);
+		if (where == reused_chunks[mi].start)
+			return reused_chunks[mi].offset;
+		if (where < reused_chunks[mi].start)
+			hi = mi;
+		else
+			lo = mi + 1;
+	}
+
+	/*
+	 * The first chunk starts at zero, so we can't have gone below
+	 * there.
+	 */
+	assert(lo);
+	return reused_chunks[lo-1].offset;
+}
+
+static uint32_t nth_packed_object_crc32(const struct packed_git *p, uint32_t nr)
+{
+	const uint32_t *index_crc = p->index_data;
+	index_crc += 2 + 256 + p->num_objects * (20/4) + nr;
+	return ntohl(*index_crc);
+}
+
+static void load_index_or_die(struct packed_git *p)
+{
+	if (open_pack_index(p) < 0)
+		die("failed to open index for '%s'", p->pack_name);
+
+	if (p->index_version != 2)
+		die("unsupported index version %d (fast-pack requires index v2)\n",
+			p->index_version);
+}
+
+static int sort_pack(const void *a_, const void *b_)
+{
+	struct packed_git *a = *((struct packed_git **)a_);
+	struct packed_git *b = *((struct packed_git **)b_);
+
+	if (a->mtime > b->mtime)
+		return 1;
+	else if (a->mtime == b->mtime)
+		return 0;
+	return -1;
+}
+
+static void find_packfiles(void)
+{
+	struct packed_git *p;
+	unsigned int n;
+
+	prepare_packed_git();
+
+	for (n = 0, p = packed_git; p; p = p->next) {
+		if (p->pack_local)
+			n++;
+	}
+
+	all_packfiles = xcalloc(n, sizeof(struct packed_git *));
+	all_packfiles_nr = n;
+
+	for (n = 0, p = packed_git; p; p = p->next) {
+		if (p->pack_local)
+			all_packfiles[n++] = p;
+	}
+
+	for (n = 1; n < all_packfiles_nr; ++n) {
+		if (all_packfiles[n]->pack_size > all_packfiles[0]->pack_size) {
+			struct packed_git *tmp = all_packfiles[0];
+			all_packfiles[0] = all_packfiles[n];
+			all_packfiles[n] = tmp;
+		}
+	}
+
+	qsort(all_packfiles + 1, all_packfiles_nr - 1, sizeof(struct packed_git *), sort_pack);
+}
+
+static int sha1_index__cmp(const void *a_, const void *b_)
+{
+	struct pack_idx_entry *a = *((struct pack_idx_entry **)a_);
+	struct pack_idx_entry *b = *((struct pack_idx_entry **)b_);
+	return hashcmp(a->sha1, b->sha1);
+}
+
+static const unsigned char *sha1_index__access(size_t pos, void *table)
+{
+	struct pack_idx_entry **index = table;
+	return index[pos]->sha1;
+}
+
+static void sha1_index_update(void)
+{
+	const unsigned int left_nr = written_nr;
+	const unsigned int right_nr = written_slab_current->nr;
+	const unsigned int total_nr = left_nr + right_nr;
+
+	struct pack_idx_entry **left = written_list;
+	struct pack_idx_entry **right = xmalloc(right_nr * sizeof(struct pack_idx_entry *));
+	struct pack_idx_entry **result = xmalloc(total_nr * sizeof(struct pack_idx_entry *));
+
+	unsigned int i, j, n;
+
+	for (j = 0; j < right_nr; ++j)
+		right[j] = (struct pack_idx_entry *)(&written_slab_current->entries[j]);
+
+	qsort(right, right_nr, sizeof(struct pack_idx_entry  *), sha1_index__cmp);
+
+	for (i = j = n = 0; i < left_nr && j < right_nr; ++n) {
+		struct pack_idx_entry *a = left[i];
+		struct pack_idx_entry *b = right[j];
+
+		if (hashcmp(a->sha1, b->sha1) <= 0) {
+			result[n] = a;
+			i++;
+		} else {
+			result[n] = b;
+			j++;
+		}
+	}
+
+	for (; i < left_nr; ++n, ++i)
+		result[n] = left[i];
+
+	for (; j < right_nr; ++n, ++j)
+		result[n] = right[j];
+
+	free(written_list);
+	free(right);
+
+	written_list = result;
+	written_nr = total_nr;
+}
+
+static off_t sha1_index_find_offset(const unsigned char *sha1)
+{
+	int pos = sha1_pos(sha1, written_list, written_nr, sha1_index__access);
+	return (pos < 0) ? 0 : written_list[pos]->offset;
+}
+
+static void copy_pack_data(
+		struct packwriter *w,
+		struct packed_git *p,
+		struct pack_window **w_curs,
+		off_t offset,
+		off_t len)
+{
+	unsigned char *in;
+	unsigned long avail;
+
+	while (len) {
+		in = use_pack(p, w_curs, offset, &avail);
+		if (avail > len)
+			avail = (unsigned long)len;
+		packwriter_write(w, in, avail);
+		offset += avail;
+		len -= avail;
+	}
+}
+
+extern enum object_type packed_to_object_type(
+	struct packed_git *p, off_t obj_offset, enum object_type type,
+	struct pack_window **w_curs, off_t curpos);
+
+static int append_object_1(
+	struct revindex_entry *reventry,
+	struct packwriter *w,
+	struct packed_git *pack,
+	struct pack_window **w_curs,
+	enum object_type *real_type)
+{
+	const off_t offset = reventry[0].offset;
+	const off_t next = reventry[1].offset;
+
+	off_t cur;
+	enum object_type type;
+	unsigned long size;
+
+	record_reused_object(offset, offset - packwriter_total(w));
+
+	cur = offset;
+	type = unpack_object_header(pack, w_curs, &cur, &size);
+	assert(type >= 0);
+
+	if (write_bitmap_index)
+		*real_type = packed_to_object_type(pack, offset, type, w_curs, cur);
+
+	if (type == OBJ_OFS_DELTA) {
+		const off_t base_offset = get_delta_base(pack, w_curs, &cur, type, offset);
+		const off_t real_base_offset = find_real_offset_for_base(base_offset);
+		off_t fixed_offset = 0;
+
+		assert(base_offset != 0);
+
+		if (real_base_offset) {
+			fixed_offset = packwriter_total(w) - real_base_offset;
+		} else {
+			off_t fixup = find_reused_offset(offset) - find_reused_offset(base_offset);
+			if (fixup)
+				fixed_offset = offset - base_offset - fixup;
+		}
+
+		if (fixed_offset) {
+			unsigned char header[10], ofs_header[10];
+			unsigned i, len, ofs_len;
+
+			assert(fixed_offset > 0);
+			len = encode_in_pack_object_header(OBJ_OFS_DELTA, size, header);
+
+			i = sizeof(ofs_header) - 1;
+			ofs_header[i] = fixed_offset & 127;
+			while (fixed_offset >>= 7)
+				ofs_header[--i] = 128 | (--fixed_offset & 127);
+
+			ofs_len = sizeof(ofs_header) - i;
+
+			packwriter_write(w, header, len);
+			packwriter_write(w, ofs_header + sizeof(ofs_header) - ofs_len, ofs_len);
+			copy_pack_data(w, pack, w_curs, cur, next - cur);
+			return 1;
+		}
+
+		/* ...otherwise we have no fixup, and can write it verbatim */
+	}
+
+	copy_pack_data(w, pack, w_curs, offset, next - offset);
+	return 0;
+}
+
+static int copy_packfile(int from, struct packwriter *w)
+{
+	unsigned char buffer[8192];
+	struct stat st;
+	ssize_t to_read;
+
+	if (from < 0 || fstat(from, &st))
+		return -1;
+
+	posix_fadvise(from, 0, st.st_size, POSIX_FADV_SEQUENTIAL);
+	to_read = st.st_size - 20;
+
+	if (progress)
+		fprintf(stderr, "Copying main packfile...");
+
+	while (to_read) {
+		ssize_t r, cap = sizeof(buffer);
+
+		if (cap > to_read)
+			cap = to_read;
+
+		r = xread(from, buffer, cap);
+		if (r < 0)
+			return -1;
+
+		packwriter_write(w, buffer, r);
+		to_read -= r;
+	}
+
+	if (progress)
+		fprintf(stderr, " done.\n");
+	assert(to_read == 0);
+	return 0;
+}
+
+static void write_initial_packfile(struct packed_git *p, struct packwriter *w)
+{
+	unsigned int n;
+	int source_fd = git_open_noatime(p->pack_name);
+
+	if (copy_packfile(source_fd, w) < 0)
+		die_errno("failed to copy '%s'", p->pack_name);
+	close(source_fd);
+
+	load_index_or_die(p);
+	preallocate_write_slab(p->num_objects);
+
+	if (progress)
+		progress_state = start_progress("Indexing main packfile", p->num_objects);
+
+	for (n = 0; n < p->num_objects; ++n) {
+		const unsigned char *sha1 = nth_packed_object_sha1(p, n);
+		const off_t offset = nth_packed_object_offset(p, n);
+		const uint32_t crc32 = nth_packed_object_crc32(p, n);
+		add_to_write_list(sha1, offset, crc32, OBJ_BAD);
+		display_progress(progress_state, n + 1);
+	}
+
+	stop_progress(&progress_state);
+	close_pack_index(p);
+
+	written_list = xmalloc(p->num_objects * sizeof(struct packed_git *));
+	written_nr = p->num_objects;
+	for (n = 0; n < written_nr; ++n)
+		written_list[n] = (struct pack_idx_entry *)(&written_slab_current->entries[n]);
+}
+
+static void append_packfile(struct packed_git *p, struct packwriter *w)
+{
+	struct pack_window *w_curs = NULL;
+	struct pack_revindex *revidx;
+
+	unsigned int n;
+
+	load_index_or_die(p);
+	preallocate_write_slab(p->num_objects);
+	revidx = revindex_for_pack(p);
+
+	if (progress)
+		progress_state = start_progress("Appending packfile", p->num_objects);
+
+	for (n = 0; n < p->num_objects; ++n) {
+		struct revindex_entry *reventry = &revidx->revindex[n];
+		const unsigned char *sha1 = nth_packed_object_sha1(p, reventry[0].nr);
+		const off_t offset_in_pack = sha1_index_find_offset(sha1);
+
+		if (!offset_in_pack) {
+			const off_t offset = packwriter_total(w);
+
+			enum object_type real_type = OBJ_BAD;
+			uint32_t crc32;
+			int rewrite_header;
+
+			packwriter_crc32_start(w);
+			rewrite_header = append_object_1(reventry, w, p, &w_curs, &real_type);
+			crc32 = packwriter_crc32_end(w);
+
+			if (!rewrite_header && crc32 != nth_packed_object_crc32(p, reventry[0].nr))
+				die("crc32 check failed for %s", sha1_to_hex(sha1));
+
+			add_to_write_list(sha1, offset, crc32, real_type);
+		} else {
+			add_to_skipped_list(reventry[0].offset, offset_in_pack);
+		}
+
+		display_progress(progress_state, n + 1);
+	}
+
+	stop_progress(&progress_state);
+	unuse_pack(&w_curs);
+	close_pack_windows(p);
+	close_pack_index(p);
+
+	sha1_index_update();
+	skipped_nr = 0;
+	reused_chunks_nr = 0;
+}
+
+static void write_packs(void)
+{
+	struct packwriter w;
+	unsigned int i;
+
+	packwriter_init(&w);
+	write_initial_packfile(all_packfiles[0], &w);
+
+	for (i = 1; i < all_packfiles_nr; ++i)
+		append_packfile(all_packfiles[i], &w);
+
+	/* finalize pack */
+	{
+		unsigned char sha1[20];
+		struct strbuf tmpname = STRBUF_INIT;
+
+		fixup_pack_header_footer(w.fd, sha1, w.tmp->filename.buf, written_nr, NULL, 0);
+		close(w.fd);
+
+		strbuf_addf(&tmpname, "%s-", base_name);
+
+		finish_tmp_packfile(&tmpname, w.tmp->filename.buf,
+				written_list, written_nr,
+				&pack_idx_opts, sha1);
+
+		if (write_bitmap_index) {
+			strbuf_addf(&tmpname, "%s.bitmap", sha1_to_hex(sha1));
+			bitmap_rewrite_existing(
+				all_packfiles[0],
+				written_list, written_nr,
+				packwriter_total(&w),
+				sha1, tmpname.buf);
+		}
+
+		strbuf_release(&tmpname);
+		puts(sha1_to_hex(sha1));
+	}
+}
+
+void pack_fast_grow_typemaps(struct packed_git *p, struct ewah_bitmap **typemaps)
+{
+	uint32_t n;
+	size_t pos = p->num_objects;
+	struct write_slab *slab = written_slab_root;
+
+	assert(slab->nr == p->num_objects);
+	assert(slab->next);
+	slab = slab->next;
+
+	while (slab) {
+		for (n = 0; n < slab->nr; ++n) {
+			const enum object_type real_type = slab->entries[n].real_type;
+			assert(real_type >= OBJ_COMMIT && real_type <= OBJ_TAG);
+			ewah_set(typemaps[real_type - 1], pos++);
+		}
+		slab = slab->next;
+	}
+}
+
+int cmd_pack_fast(int argc, const char **argv, const char *prefix)
+{
+	struct option pack_fast_options[] = {
+		OPT_SET_INT('q', "quiet", &progress,
+			    N_("do not show progress meter"), 0),
+		OPT_SET_INT(0, "progress", &progress,
+			    N_("show progress meter"), 1),
+		OPT_BOOL(0, "skip-largest", &skip_largest,
+			 N_("do not pack the largest packfile in the repository")),
+		OPT_END(),
+	};
+
+	reset_pack_idx_option(&pack_idx_opts);
+	progress = isatty(2);
+	argc = parse_options(argc, argv, prefix, pack_fast_options,
+			     pack_usage, 0);
+
+	if (argc) {
+		base_name = argv[0];
+		argc--;
+	}
+
+	find_packfiles();
+	write_packs();
+	return 0;
+}
diff --git a/cache.h b/cache.h
index 6f53962bf..1a13961bd 100644
--- a/cache.h
+++ b/cache.h
@@ -1336,6 +1336,11 @@ extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsign
 extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
 extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
 extern int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *);
+extern off_t get_delta_base(struct packed_git *p,
+			    struct pack_window **w_curs,
+			    off_t *curpos,
+			    enum object_type type,
+			    off_t delta_obj_offset);
 
 /*
  * Iterate over the files in the loose-object parts of the object
diff --git a/git.c b/git.c
index 40f9df089..d81bd4469 100644
--- a/git.c
+++ b/git.c
@@ -440,6 +440,7 @@ static struct cmd_struct commands[] = {
 	{ "name-rev", cmd_name_rev, RUN_SETUP },
 	{ "notes", cmd_notes, RUN_SETUP },
 	{ "pack-objects", cmd_pack_objects, RUN_SETUP },
+	{ "pack-fast", cmd_pack_fast, RUN_SETUP },
 	{ "pack-redundant", cmd_pack_redundant, RUN_SETUP },
 	{ "pack-refs", cmd_pack_refs, RUN_SETUP },
 	{ "patch-id", cmd_patch_id },
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index c05d1386a..449715f02 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -505,23 +505,39 @@ void bitmap_writer_set_checksum(unsigned char *sha1)
 	hashcpy(writer.pack_checksum, sha1);
 }
 
+static struct sha1file *bitmap_file_new(char *tmp_file, size_t len)
+{
+	int fd = odb_mkstemp(tmp_file, len, "pack/tmp_bitmap_XXXXXX");
+
+	if (fd < 0)
+		die_errno("unable to create '%s'", tmp_file);
+
+	return sha1fd(fd, tmp_file);
+}
+
+static void bitmap_file_close(struct sha1file *f, const char *tmp_file, const char *dest)
+{
+	sha1close(f, NULL, CSUM_FSYNC);
+
+	if (adjust_shared_perm(tmp_file))
+		die_errno("unable to make temporary bitmap file readable");
+
+	if (rename(tmp_file, dest))
+		die_errno("unable to rename temporary bitmap file to '%s'", dest);
+}
+
 void bitmap_writer_finish(struct pack_idx_entry **index,
 			  uint32_t index_nr,
 			  const char *filename,
 			  uint16_t options)
 {
-	static char tmp_file[PATH_MAX];
 	static uint16_t default_version = 1;
 	static uint16_t flags = BITMAP_OPT_FULL_DAG;
+	char tmp_file[PATH_MAX];
 	struct sha1file *f;
-
 	struct bitmap_disk_header header;
 
-	int fd = odb_mkstemp(tmp_file, sizeof(tmp_file), "pack/tmp_bitmap_XXXXXX");
-
-	if (fd < 0)
-		die_errno("unable to create '%s'", tmp_file);
-	f = sha1fd(fd, tmp_file);
+	f = bitmap_file_new(tmp_file, sizeof(tmp_file));
 
 	memcpy(header.magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE));
 	header.version = htons(default_version);
@@ -539,11 +555,138 @@ void bitmap_writer_finish(struct pack_idx_entry **index,
 	if (options & BITMAP_OPT_HASH_CACHE)
 		write_hash_cache(f, index, index_nr);
 
-	sha1close(f, NULL, CSUM_FSYNC);
+	bitmap_file_close(f, tmp_file, filename);
+}
 
-	if (adjust_shared_perm(tmp_file))
-		die_errno("unable to make temporary bitmap file readable");
+static void *try_load_bitmap(struct packed_git *p, size_t *_size_out)
+{
+	void *reused_bitmap;
+	size_t reused_bitmap_size;
+
+	int fd;
+	struct stat st;
+	char *idx_name;
+
+	idx_name = pack_bitmap_filename(p);
+	fd = git_open_noatime(idx_name);
+	free(idx_name);
+
+	if (fd < 0)
+		return NULL;
+
+	if (fstat(fd, &st)) {
+		close(fd);
+		return NULL;
+	}
+
+	reused_bitmap_size = xsize_t(st.st_size);
+	reused_bitmap = xmmap(NULL, reused_bitmap_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	close(fd);
+
+	*_size_out = reused_bitmap_size;
+	return reused_bitmap;
+}
+
+extern void pack_fast_grow_typemaps(struct packed_git *p, struct ewah_bitmap **typemaps);
+
+static size_t rewrite_type_maps(struct sha1file *f,
+	struct packed_git *p, unsigned char *original_map, size_t original_size, size_t pos)
+{
+	struct ewah_bitmap *typemaps[4];
+	int r, i;
+
+	for (i = 0; i < 4; ++i) {
+		typemaps[i] = ewah_pool_new();
+		r = ewah_read_mmap(typemaps[i], original_map + pos, original_size - pos);
+		if (r < 0)
+			die("failed to read bitmap index");
+		pos += r;
+	}
+
+	pack_fast_grow_typemaps(p, typemaps);
+
+	for (i = 0; i < 4; ++i) {
+		dump_bitmap(f, typemaps[i]);
+		ewah_pool_free(typemaps[i]);
+	}
+
+	return pos;
+}
+
+static size_t rewrite_bitmaps(struct sha1file *f,
+	struct packed_git *p, unsigned char *original_map, size_t original_size, size_t pos,
+	uint32_t entry_count, struct pack_idx_entry **index, uint32_t index_nr)
+{
+	uint32_t i;
+
+	for (i = 0; i < entry_count; ++i) {
+		const unsigned char *sha1;
+		uint32_t src_idx, src_buffer_len, total_len;
+		int new_idx;
+
+		src_idx = get_be32(original_map + pos);
+		pos += 4;
+
+		sha1 = nth_packed_object_sha1(p, src_idx);
+		new_idx = sha1_pos(sha1, index, index_nr, sha1_access);
+		sha1write_be32(f, (uint32_t)new_idx);
+
+		src_buffer_len = get_be32(original_map + pos + 2 + 4);
+		total_len = (3 * 4) + (src_buffer_len * 8);
+
+		sha1write(f, original_map + pos, 2 + total_len);
+		pos += 2 + total_len;
+
+		if (pos > original_size)
+			die("unexpected end of file");
+	}
+
+	return pos;
+}
+
+void bitmap_rewrite_existing(
+	struct packed_git *p,
+	struct pack_idx_entry **index,
+	uint32_t index_nr,
+	off_t pack_offset,
+	const unsigned char *pack_sha1,
+	const char *filename)
+{
+	char tmp_file[PATH_MAX];
+	struct sha1file *f;
+
+	unsigned char *original_map;
+	size_t original_size, pos = 0;
+	struct bitmap_disk_header header;
+
+	original_map = try_load_bitmap(p, &original_size);
+	if (!original_map || original_size < sizeof(header) + 20)
+		return;
+
+	memcpy(&header, original_map, sizeof(header));
+	hashcpy(header.checksum, pack_sha1);
+
+	if (memcmp(header.magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE)) != 0)
+		die("existing bitmap for '%s' is corrupted", p->pack_name);
+
+	if (ntohs(header.version) != 1)
+		die("existing bitmap for '%s' has an unsupported version", p->pack_name);
+
+	f = bitmap_file_new(tmp_file, sizeof(tmp_file));
+
+	sha1write(f, &header, sizeof(header));
+	pos = sizeof(header);
+	pos = rewrite_type_maps(f, p, original_map, original_size, pos);
+	pos = rewrite_bitmaps(f, p, original_map, original_size, pos,
+			ntohl(header.entry_count), index, index_nr);
+
+	if (ntohs(header.options) & BITMAP_OPT_HASH_CACHE) {
+		uint32_t i, zero = 0;
+		sha1write(f, original_map + pos, p->num_objects * 4);
+		for (i = p->num_objects; i < index_nr; ++i)
+			sha1write(f, &zero, 4);
+		pos += (p->num_objects * 4);
+	}
 
-	if (rename(tmp_file, filename))
-		die_errno("unable to rename temporary bitmap file to '%s'", filename);
+	bitmap_file_close(f, tmp_file, filename);
 }
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 637770af8..ee361fa6a 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -250,7 +250,7 @@ static int load_bitmap_entries_v1(struct bitmap_index *index)
 	return 0;
 }
 
-static char *pack_bitmap_filename(struct packed_git *p)
+char *pack_bitmap_filename(struct packed_git *p)
 {
 	char *idx_name;
 	int len;
diff --git a/pack-bitmap.h b/pack-bitmap.h
index 0adcef77b..398523dbb 100644
--- a/pack-bitmap.h
+++ b/pack-bitmap.h
@@ -34,6 +34,7 @@ typedef int (*show_reachable_fn)(
 	struct packed_git *found_pack,
 	off_t found_offset);
 
+char *pack_bitmap_filename(struct packed_git *p);
 int prepare_bitmap_git(void);
 void count_bitmap_commit_list(uint32_t *commits, uint32_t *trees, uint32_t *blobs, uint32_t *tags);
 void traverse_bitmap_commit_list(show_reachable_fn show_reachable);
@@ -53,5 +54,12 @@ void bitmap_writer_finish(struct pack_idx_entry **index,
 			  uint32_t index_nr,
 			  const char *filename,
 			  uint16_t options);
+void bitmap_rewrite_existing(
+	struct packed_git *p,
+	struct pack_idx_entry **index,
+	uint32_t index_nr,
+	off_t pack_offset,
+	const unsigned char *pack_sha1,
+	const char *filename);
 
 #endif
diff --git a/sha1_file.c b/sha1_file.c
index 72289696d..bcd447f16 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1821,7 +1821,7 @@ unsigned long get_size_from_delta(struct packed_git *p,
 	return get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
 }
 
-static off_t get_delta_base(struct packed_git *p,
+off_t get_delta_base(struct packed_git *p,
 				    struct pack_window **w_curs,
 				    off_t *curpos,
 				    enum object_type type,
@@ -1936,7 +1936,7 @@ static int retry_bad_packed_offset(struct packed_git *p, off_t obj_offset)
 
 #define POI_STACK_PREALLOC 64
 
-static enum object_type packed_to_object_type(struct packed_git *p,
+enum object_type packed_to_object_type(struct packed_git *p,
 					      off_t obj_offset,
 					      enum object_type type,
 					      struct pack_window **w_curs,

      reply	other threads:[~2017-02-09  1:20 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-12-16 21:05 "disabling bitmap writing, as some objects are not being packed"? David Turner
2016-12-16 21:27 ` Jeff King
2016-12-16 21:28 ` Junio C Hamano
2016-12-16 21:32   ` Jeff King
2016-12-16 21:40     ` David Turner
2016-12-16 21:49       ` Jeff King
2016-12-16 23:59         ` [PATCH] pack-objects: don't warn about bitmaps on incremental pack David Turner
2016-12-17  4:04           ` Jeff King
2016-12-19 16:03             ` David Turner
2016-12-17  7:50   ` "disabling bitmap writing, as some objects are not being packed"? Duy Nguyen
2017-02-08  1:03     ` David Turner
2017-02-08  6:45       ` Duy Nguyen
2017-02-08  8:24         ` David Turner
2017-02-08  8:37           ` Duy Nguyen
2017-02-08 17:44             ` Junio C Hamano
2017-02-08 19:05               ` David Turner
2017-02-08 19:08                 ` Jeff King
2017-02-08 22:14                   ` David Turner
2017-02-08 23:00                     ` Jeff King
2017-02-09  0:18                       ` Junio C Hamano
2017-02-09  1:12                         ` Jeff King [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170209011241.vfiup56gwrvlxm2k@sigill.intra.peff.net \
    --to=peff@peff.net \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=novalis@novalis.org \
    --cc=pclouds@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.