git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Junio C Hamano <gitster@pobox.com>
To: Thomas Rast <trast@student.ethz.ch>
Cc: git@vger.kernel.org, "Stefan Zager" <szager@google.com>,
	"Jeff King" <peff@peff.net>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>,
	"Nicolas Pitre" <nico@fluxnic.net>
Subject: Re: [PATCH v3 3/3] sha1_file: remove recursion in unpack_entry
Date: Wed, 27 Mar 2013 13:29:23 -0700	[thread overview]
Message-ID: <7vli98lh7g.fsf@alter.siamese.dyndns.org> (raw)
In-Reply-To: <1b5875495c959ba96848fcf9d3067e25e3c1e75b.1364414442.git.trast@inf.ethz.ch> (Thomas Rast's message of "Wed, 27 Mar 2013 21:03:42 +0100")

Thomas Rast <trast@student.ethz.ch> writes:

> Similar to the recursion in packed_object_info(), this leads to
> problems on stack-space-constrained systems in the presence of long
> delta chains.
>
> We proceed in three phases:
>
> 1. Dig through the delta chain, saving each delta object's offsets and
>    size on an ad-hoc stack.
>
> 2. Unpack the base object at the bottom.
>
> 3. Unpack and apply the deltas from the stack.
>
> Signed-off-by: Thomas Rast <trast@student.ethz.ch>
> ---

Sounds sensible.

Do we keep the packfiles open that hold the deltas involved in the
chain so that they won't be removed by a concurrent repack?  I do
not think this rewrite changes anything with respect to that access
pattern, though.

Will replace and merge to 'next'.  Thanks.

>  sha1_file.c | 231 +++++++++++++++++++++++++++++++++++++++---------------------
>  1 file changed, 150 insertions(+), 81 deletions(-)
>
> diff --git a/sha1_file.c b/sha1_file.c
> index da41f51..f9191aa 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -1943,68 +1943,6 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
>  static void *read_object(const unsigned char *sha1, enum object_type *type,
>  			 unsigned long *size);
>  
> -static void *unpack_delta_entry(struct packed_git *p,
> -				struct pack_window **w_curs,
> -				off_t curpos,
> -				unsigned long delta_size,
> -				off_t obj_offset,
> -				enum object_type *type,
> -				unsigned long *sizep)
> -{
> -	void *delta_data, *result, *base;
> -	unsigned long base_size;
> -	off_t base_offset;
> -
> -	base_offset = get_delta_base(p, w_curs, &curpos, *type, obj_offset);
> -	if (!base_offset) {
> -		error("failed to validate delta base reference "
> -		      "at offset %"PRIuMAX" from %s",
> -		      (uintmax_t)curpos, p->pack_name);
> -		return NULL;
> -	}
> -	unuse_pack(w_curs);
> -	base = cache_or_unpack_entry(p, base_offset, &base_size, type, 0);
> -	if (!base) {
> -		/*
> -		 * We're probably in deep shit, but let's try to fetch
> -		 * the required base anyway from another pack or loose.
> -		 * This is costly but should happen only in the presence
> -		 * of a corrupted pack, and is better than failing outright.
> -		 */
> -		struct revindex_entry *revidx;
> -		const unsigned char *base_sha1;
> -		revidx = find_pack_revindex(p, base_offset);
> -		if (!revidx)
> -			return NULL;
> -		base_sha1 = nth_packed_object_sha1(p, revidx->nr);
> -		error("failed to read delta base object %s"
> -		      " at offset %"PRIuMAX" from %s",
> -		      sha1_to_hex(base_sha1), (uintmax_t)base_offset,
> -		      p->pack_name);
> -		mark_bad_packed_object(p, base_sha1);
> -		base = read_object(base_sha1, type, &base_size);
> -		if (!base)
> -			return NULL;
> -	}
> -
> -	delta_data = unpack_compressed_entry(p, w_curs, curpos, delta_size);
> -	if (!delta_data) {
> -		error("failed to unpack compressed delta "
> -		      "at offset %"PRIuMAX" from %s",
> -		      (uintmax_t)curpos, p->pack_name);
> -		free(base);
> -		return NULL;
> -	}
> -	result = patch_delta(base, base_size,
> -			     delta_data, delta_size,
> -			     sizep);
> -	if (!result)
> -		die("failed to apply delta");
> -	free(delta_data);
> -	add_delta_base_cache(p, base_offset, base, base_size, *type);
> -	return result;
> -}
> -
>  static void write_pack_access_log(struct packed_git *p, off_t obj_offset)
>  {
>  	static FILE *log_file;
> @@ -2025,48 +1963,179 @@ static void write_pack_access_log(struct packed_git *p, off_t obj_offset)
>  
>  int do_check_packed_object_crc;
>  
> +#define UNPACK_ENTRY_STACK_PREALLOC 64
> +struct unpack_entry_stack_ent {
> +	off_t obj_offset;
> +	off_t curpos;
> +	unsigned long size;
> +};
> +
>  void *unpack_entry(struct packed_git *p, off_t obj_offset,
> -		   enum object_type *type, unsigned long *sizep)
> +		   enum object_type *final_type, unsigned long *final_size)
>  {
>  	struct pack_window *w_curs = NULL;
>  	off_t curpos = obj_offset;
> -	void *data;
> +	void *data = NULL;
> +	unsigned long size;
> +	enum object_type type;
> +	struct unpack_entry_stack_ent small_delta_stack[UNPACK_ENTRY_STACK_PREALLOC];
> +	struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
> +	int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
> +	int base_from_cache = 0;
>  
>  	if (log_pack_access)
>  		write_pack_access_log(p, obj_offset);
>  
> -	if (do_check_packed_object_crc && p->index_version > 1) {
> -		struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
> -		unsigned long len = revidx[1].offset - obj_offset;
> -		if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {
> -			const unsigned char *sha1 =
> -				nth_packed_object_sha1(p, revidx->nr);
> -			error("bad packed object CRC for %s",
> -			      sha1_to_hex(sha1));
> -			mark_bad_packed_object(p, sha1);
> -			unuse_pack(&w_curs);
> -			return NULL;
> +	/* PHASE 1: drill down to the innermost base object */
> +	for (;;) {
> +		off_t base_offset;
> +		int i;
> +		struct delta_base_cache_entry *ent;
> +
> +		if (do_check_packed_object_crc && p->index_version > 1) {
> +			struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
> +			unsigned long len = revidx[1].offset - obj_offset;
> +			if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {
> +				const unsigned char *sha1 =
> +					nth_packed_object_sha1(p, revidx->nr);
> +				error("bad packed object CRC for %s",
> +				      sha1_to_hex(sha1));
> +				mark_bad_packed_object(p, sha1);
> +				unuse_pack(&w_curs);
> +				return NULL;
> +			}
> +		}
> +
> +		ent = get_delta_base_cache_entry(p, curpos);
> +		if (eq_delta_base_cache_entry(ent, p, curpos)) {
> +			type = ent->type;
> +			data = ent->data;
> +			size = ent->size;
> +			clear_delta_base_cache_entry(ent);
> +			base_from_cache = 1;
> +			break;
> +		}
> +
> +		type = unpack_object_header(p, &w_curs, &curpos, &size);
> +		if (type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA)
> +			break;
> +
> +		base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
> +		if (!base_offset) {
> +			error("failed to validate delta base reference "
> +			      "at offset %"PRIuMAX" from %s",
> +			      (uintmax_t)curpos, p->pack_name);
> +			/* bail to phase 2, in hopes of recovery */
> +			data = NULL;
> +			break;
>  		}
> +
> +		/* push object, proceed to base */
> +		if (delta_stack_nr >= delta_stack_alloc
> +		    && delta_stack == small_delta_stack) {
> +			delta_stack_alloc = alloc_nr(delta_stack_nr);
> +			delta_stack = xmalloc(sizeof(*delta_stack)*delta_stack_alloc);
> +			memcpy(delta_stack, small_delta_stack,
> +			       sizeof(*delta_stack)*delta_stack_nr);
> +		} else {
> +			ALLOC_GROW(delta_stack, delta_stack_nr+1, delta_stack_alloc);
> +		}
> +		i = delta_stack_nr++;
> +		delta_stack[i].obj_offset = obj_offset;
> +		delta_stack[i].curpos = curpos;
> +		delta_stack[i].size = size;
> +
> +		curpos = obj_offset = base_offset;
>  	}
>  
> -	*type = unpack_object_header(p, &w_curs, &curpos, sizep);
> -	switch (*type) {
> +	/* PHASE 2: handle the base */
> +	switch (type) {
>  	case OBJ_OFS_DELTA:
>  	case OBJ_REF_DELTA:
> -		data = unpack_delta_entry(p, &w_curs, curpos, *sizep,
> -					  obj_offset, type, sizep);
> +		if (data)
> +			die("BUG in unpack_entry: left loop at a valid delta");
>  		break;
>  	case OBJ_COMMIT:
>  	case OBJ_TREE:
>  	case OBJ_BLOB:
>  	case OBJ_TAG:
> -		data = unpack_compressed_entry(p, &w_curs, curpos, *sizep);
> +		if (!base_from_cache)
> +			data = unpack_compressed_entry(p, &w_curs, curpos, size);
>  		break;
>  	default:
>  		data = NULL;
>  		error("unknown object type %i at offset %"PRIuMAX" in %s",
> -		      *type, (uintmax_t)obj_offset, p->pack_name);
> +		      type, (uintmax_t)obj_offset, p->pack_name);
>  	}
> +
> +	/* PHASE 3: apply deltas in order */
> +
> +	/* invariants:
> +	 *   'data' holds the base data, or NULL if there was corruption
> +	 */
> +	while (delta_stack_nr) {
> +		void *delta_data;
> +		void *base = data;
> +		unsigned long delta_size, base_size = size;
> +		int i;
> +
> +		data = NULL;
> +
> +		if (base)
> +			add_delta_base_cache(p, obj_offset, base, base_size, type);
> +
> +		if (!base) {
> +			/*
> +			 * We're probably in deep shit, but let's try to fetch
> +			 * the required base anyway from another pack or loose.
> +			 * This is costly but should happen only in the presence
> +			 * of a corrupted pack, and is better than failing outright.
> +			 */
> +			struct revindex_entry *revidx;
> +			const unsigned char *base_sha1;
> +			revidx = find_pack_revindex(p, obj_offset);
> +			if (revidx) {
> +				base_sha1 = nth_packed_object_sha1(p, revidx->nr);
> +				error("failed to read delta base object %s"
> +				      " at offset %"PRIuMAX" from %s",
> +				      sha1_to_hex(base_sha1), (uintmax_t)obj_offset,
> +				      p->pack_name);
> +				mark_bad_packed_object(p, base_sha1);
> +				base = read_object(base_sha1, &type, &base_size);
> +			}
> +		}
> +
> +		i = --delta_stack_nr;
> +		obj_offset = delta_stack[i].obj_offset;
> +		curpos = delta_stack[i].curpos;
> +		delta_size = delta_stack[i].size;
> +
> +		if (!base)
> +			continue;
> +
> +		delta_data = unpack_compressed_entry(p, &w_curs, curpos, delta_size);
> +
> +		if (!delta_data) {
> +			error("failed to unpack compressed delta "
> +			      "at offset %"PRIuMAX" from %s",
> +			      (uintmax_t)curpos, p->pack_name);
> +			free(base);
> +			data = NULL;
> +			continue;
> +		}
> +
> +		data = patch_delta(base, base_size,
> +				   delta_data, delta_size,
> +				   &size);
> +		if (!data)
> +			die("failed to apply delta");
> +
> +		free (delta_data);
> +	}
> +
> +	*final_type = type;
> +	*final_size = size;
> +
>  	unuse_pack(&w_curs);
>  	return data;
>  }

  reply	other threads:[~2013-03-27 20:30 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-03-15 22:42 regression in multi-threaded git-pack-index Stefan Zager
2013-03-16 11:41 ` Jeff King
2013-03-16 12:38   ` Duy Nguyen
2013-03-19  8:17   ` Thomas Rast
2013-03-19  9:30     ` Jeff King
2013-03-19  9:59       ` Jeff King
2013-03-19 10:08         ` Jeff King
2013-03-19 10:24           ` Jeff King
2013-03-19 10:29             ` Thomas Rast
2013-03-19 10:33               ` Jeff King
2013-03-19 10:45                 ` Thomas Rast
2013-03-19 10:47                   ` Jeff King
2013-03-19 10:58             ` [PATCH] index-pack: always zero-initialize object_entry list Jeff King
2013-03-19 15:33               ` Thomas Rast
2013-03-19 15:43                 ` Jeff King
2013-03-19 15:52                   ` Jeff King
2013-03-19 16:17                     ` [PATCH v2] " Jeff King
2013-03-19 16:27                       ` Thomas Rast
2013-03-19 17:13                         ` Junio C Hamano
2013-03-20 19:12                       ` Eric Sunshine
2013-03-20 19:13                         ` Jeff King
2013-03-20 19:14                           ` Eric Sunshine
2013-03-19 12:35     ` regression in multi-threaded git-pack-index Duy Nguyen
2013-03-19 13:01       ` [PATCH] index-pack: protect deepest_delta in multithread code Nguyễn Thái Ngọc Duy
2013-03-19 13:25         ` Jeff King
2013-03-19 13:50         ` Thomas Rast
2013-03-19 14:07           ` Duy Nguyen
2013-03-19 14:16             ` [PATCH v2] index-pack: guard nr_resolved_deltas reads by lock Thomas Rast
2013-03-19 15:53               ` Junio C Hamano
2013-03-19 15:41 ` regression in multi-threaded git-pack-index Thomas Rast
2013-03-19 15:45   ` Thomas Rast
2013-03-19 16:11     ` Thomas Rast
2013-03-19 17:58       ` Junio C Hamano
2013-03-19 22:08         ` [PATCH] sha1_file: remove recursion in packed_object_info Thomas Rast
2013-03-20 16:47           ` Junio C Hamano
2013-03-25  9:27             ` thomas
2013-03-25 18:07               ` [PATCH v2 0/3] Recursion-free unpack_entry and packed_object_info Thomas Rast
2013-03-25 18:07                 ` [PATCH v2 1/3] sha1_file: remove recursion in packed_object_info Thomas Rast
2013-03-25 18:07                 ` [PATCH v2 2/3] Refactor parts of in_delta_base_cache/cache_or_unpack_entry Thomas Rast
2013-03-25 23:15                   ` Junio C Hamano
2013-03-26 11:09                     ` thomas
2013-03-25 18:07                 ` [PATCH v2 3/3] sha1_file: remove recursion in unpack_entry Thomas Rast
2013-03-25 23:19                   ` Junio C Hamano
2013-03-26  3:37                 ` [PATCH v2 0/3] Recursion-free unpack_entry and packed_object_info Nicolas Pitre
2013-03-25 18:17               ` [PATCH] sha1_file: remove recursion in packed_object_info Junio C Hamano
2013-03-27 20:03               ` [PATCH v3 0/3] Recursion-free unpack_entry and packed_object_info Thomas Rast
2013-03-27 20:03                 ` [PATCH v3 1/3] sha1_file: remove recursion in packed_object_info Thomas Rast
2013-03-27 20:03                 ` [PATCH v3 2/3] Refactor parts of in_delta_base_cache/cache_or_unpack_entry Thomas Rast
2013-03-27 20:03                 ` [PATCH v3 3/3] sha1_file: remove recursion in unpack_entry Thomas Rast
2013-03-27 20:29                   ` Junio C Hamano [this message]
2013-03-20  1:17       ` regression in multi-threaded git-pack-index Duy Nguyen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7vli98lh7g.fsf@alter.siamese.dyndns.org \
    --to=gitster@pobox.com \
    --cc=git@vger.kernel.org \
    --cc=nico@fluxnic.net \
    --cc=pclouds@gmail.com \
    --cc=peff@peff.net \
    --cc=szager@google.com \
    --cc=trast@student.ethz.ch \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).