All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nicolas Pitre <nico@fluxnic.net>
To: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Cc: git@vger.kernel.org, Junio C Hamano <gitster@pobox.com>
Subject: Re: [PATCH v2 3/4] pack-objects: refactor write_object()
Date: Fri, 18 May 2012 22:43:59 -0400 (EDT)	[thread overview]
Message-ID: <alpine.LFD.2.02.1205182243180.13185@xanadu.home> (raw)
In-Reply-To: <1337169731-23416-3-git-send-email-pclouds@gmail.com>

[-- Attachment #1: Type: TEXT/PLAIN, Size: 12290 bytes --]

On Wed, 16 May 2012, Nguyễn Thái Ngọc Duy wrote:

> Move !to_reuse and to_reuse write code out into two separate functions
> and remove "goto no_reuse;" hack
> 
> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>

I like this very much.

Acked-by: Nicolas Pitre <nico@fluxnic.net>


> ---
>  builtin/pack-objects.c |  322 ++++++++++++++++++++++++++----------------------
>  1 files changed, 172 insertions(+), 150 deletions(-)
> 
> diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
> index b2e0940..ccfcbad 100644
> --- a/builtin/pack-objects.c
> +++ b/builtin/pack-objects.c
> @@ -200,22 +200,178 @@ static void copy_pack_data(struct sha1file *f,
>  }
>  
>  /* Return 0 if we will bust the pack-size limit */
> -static unsigned long write_object(struct sha1file *f,
> -				  struct object_entry *entry,
> -				  off_t write_offset)
> +static unsigned long write_no_reuse_object(struct sha1file *f, struct object_entry *entry,
> +					   unsigned long limit, int usable_delta)
>  {
> -	unsigned long size, limit, datalen;
> -	void *buf;
> +	unsigned long size, datalen;
>  	unsigned char header[10], dheader[10];
>  	unsigned hdrlen;
>  	enum object_type type;
> +	void *buf;
> +
> +	if (!usable_delta) {
> +		buf = read_sha1_file(entry->idx.sha1, &type, &size);
> +		if (!buf)
> +			die("unable to read %s", sha1_to_hex(entry->idx.sha1));
> +		/*
> +		 * make sure no cached delta data remains from a
> +		 * previous attempt before a pack split occurred.
> +		 */
> +		free(entry->delta_data);
> +		entry->delta_data = NULL;
> +		entry->z_delta_size = 0;
> +	} else if (entry->delta_data) {
> +		size = entry->delta_size;
> +		buf = entry->delta_data;
> +		entry->delta_data = NULL;
> +		type = (allow_ofs_delta && entry->delta->idx.offset) ?
> +			OBJ_OFS_DELTA : OBJ_REF_DELTA;
> +	} else {
> +		buf = get_delta(entry);
> +		size = entry->delta_size;
> +		type = (allow_ofs_delta && entry->delta->idx.offset) ?
> +			OBJ_OFS_DELTA : OBJ_REF_DELTA;
> +	}
> +
> +	if (entry->z_delta_size)
> +		datalen = entry->z_delta_size;
> +	else
> +		datalen = do_compress(&buf, size);
> +
> +	/*
> +	 * The object header is a byte of 'type' followed by zero or
> +	 * more bytes of length.
> +	 */
> +	hdrlen = encode_in_pack_object_header(type, size, header);
> +
> +	if (type == OBJ_OFS_DELTA) {
> +		/*
> +		 * Deltas with relative base contain an additional
> +		 * encoding of the relative offset for the delta
> +		 * base from this object's position in the pack.
> +		 */
> +		off_t ofs = entry->idx.offset - entry->delta->idx.offset;
> +		unsigned pos = sizeof(dheader) - 1;
> +		dheader[pos] = ofs & 127;
> +		while (ofs >>= 7)
> +			dheader[--pos] = 128 | (--ofs & 127);
> +		if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) {
> +			free(buf);
> +			return 0;
> +		}
> +		sha1write(f, header, hdrlen);
> +		sha1write(f, dheader + pos, sizeof(dheader) - pos);
> +		hdrlen += sizeof(dheader) - pos;
> +	} else if (type == OBJ_REF_DELTA) {
> +		/*
> +		 * Deltas with a base reference contain
> +		 * an additional 20 bytes for the base sha1.
> +		 */
> +		if (limit && hdrlen + 20 + datalen + 20 >= limit) {
> +			free(buf);
> +			return 0;
> +		}
> +		sha1write(f, header, hdrlen);
> +		sha1write(f, entry->delta->idx.sha1, 20);
> +		hdrlen += 20;
> +	} else {
> +		if (limit && hdrlen + datalen + 20 >= limit) {
> +			free(buf);
> +			return 0;
> +		}
> +		sha1write(f, header, hdrlen);
> +	}
> +	sha1write(f, buf, datalen);
> +	free(buf);
> +
> +	return hdrlen + datalen;
> +}
> +
> +/* Return 0 if we will bust the pack-size limit */
> +static unsigned long write_reuse_object(struct sha1file *f, struct object_entry *entry,
> +					unsigned long limit, int usable_delta)
> +{
> +	struct packed_git *p = entry->in_pack;
> +	struct pack_window *w_curs = NULL;
> +	struct revindex_entry *revidx;
> +	off_t offset;
> +	enum object_type type = entry->type;
> +	unsigned long datalen;
> +	unsigned char header[10], dheader[10];
> +	unsigned hdrlen;
> +
> +	if (entry->delta)
> +		type = (allow_ofs_delta && entry->delta->idx.offset) ?
> +			OBJ_OFS_DELTA : OBJ_REF_DELTA;
> +	hdrlen = encode_in_pack_object_header(type, entry->size, header);
> +
> +	offset = entry->in_pack_offset;
> +	revidx = find_pack_revindex(p, offset);
> +	datalen = revidx[1].offset - offset;
> +	if (!pack_to_stdout && p->index_version > 1 &&
> +	    check_pack_crc(p, &w_curs, offset, datalen, revidx->nr)) {
> +		error("bad packed object CRC for %s", sha1_to_hex(entry->idx.sha1));
> +		unuse_pack(&w_curs);
> +		return write_no_reuse_object(f, entry, limit, usable_delta);
> +	}
> +
> +	offset += entry->in_pack_header_size;
> +	datalen -= entry->in_pack_header_size;
> +
> +	if (!pack_to_stdout && p->index_version == 1 &&
> +	    check_pack_inflate(p, &w_curs, offset, datalen, entry->size)) {
> +		error("corrupt packed object for %s", sha1_to_hex(entry->idx.sha1));
> +		unuse_pack(&w_curs);
> +		return write_no_reuse_object(f, entry, limit, usable_delta);
> +	}
> +
> +	if (type == OBJ_OFS_DELTA) {
> +		off_t ofs = entry->idx.offset - entry->delta->idx.offset;
> +		unsigned pos = sizeof(dheader) - 1;
> +		dheader[pos] = ofs & 127;
> +		while (ofs >>= 7)
> +			dheader[--pos] = 128 | (--ofs & 127);
> +		if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) {
> +			unuse_pack(&w_curs);
> +			return 0;
> +		}
> +		sha1write(f, header, hdrlen);
> +		sha1write(f, dheader + pos, sizeof(dheader) - pos);
> +		hdrlen += sizeof(dheader) - pos;
> +		reused_delta++;
> +	} else if (type == OBJ_REF_DELTA) {
> +		if (limit && hdrlen + 20 + datalen + 20 >= limit) {
> +			unuse_pack(&w_curs);
> +			return 0;
> +		}
> +		sha1write(f, header, hdrlen);
> +		sha1write(f, entry->delta->idx.sha1, 20);
> +		hdrlen += 20;
> +		reused_delta++;
> +	} else {
> +		if (limit && hdrlen + datalen + 20 >= limit) {
> +			unuse_pack(&w_curs);
> +			return 0;
> +		}
> +		sha1write(f, header, hdrlen);
> +	}
> +	copy_pack_data(f, p, &w_curs, offset, datalen);
> +	unuse_pack(&w_curs);
> +	reused++;
> +	return hdrlen + datalen;
> +}
> +
> +/* Return 0 if we will bust the pack-size limit */
> +static unsigned long write_object(struct sha1file *f,
> +				  struct object_entry *entry,
> +				  off_t write_offset)
> +{
> +	unsigned long limit, len;
>  	int usable_delta, to_reuse;
>  
>  	if (!pack_to_stdout)
>  		crc32_begin(f);
>  
> -	type = entry->type;
> -
>  	/* apply size limit if limited packsize and not first object */
>  	if (!pack_size_limit || !nr_written)
>  		limit = 0;
> @@ -243,11 +399,11 @@ static unsigned long write_object(struct sha1file *f,
>  		to_reuse = 0;	/* explicit */
>  	else if (!entry->in_pack)
>  		to_reuse = 0;	/* can't reuse what we don't have */
> -	else if (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA)
> +	else if (entry->type == OBJ_REF_DELTA || entry->type == OBJ_OFS_DELTA)
>  				/* check_object() decided it for us ... */
>  		to_reuse = usable_delta;
>  				/* ... but pack split may override that */
> -	else if (type != entry->in_pack_type)
> +	else if (entry->type != entry->in_pack_type)
>  		to_reuse = 0;	/* pack has delta which is unusable */
>  	else if (entry->delta)
>  		to_reuse = 0;	/* we want to pack afresh */
> @@ -256,153 +412,19 @@ static unsigned long write_object(struct sha1file *f,
>  				 * and we do not need to deltify it.
>  				 */
>  
> -	if (!to_reuse) {
> -		no_reuse:
> -		if (!usable_delta) {
> -			buf = read_sha1_file(entry->idx.sha1, &type, &size);
> -			if (!buf)
> -				die("unable to read %s", sha1_to_hex(entry->idx.sha1));
> -			/*
> -			 * make sure no cached delta data remains from a
> -			 * previous attempt before a pack split occurred.
> -			 */
> -			free(entry->delta_data);
> -			entry->delta_data = NULL;
> -			entry->z_delta_size = 0;
> -		} else if (entry->delta_data) {
> -			size = entry->delta_size;
> -			buf = entry->delta_data;
> -			entry->delta_data = NULL;
> -			type = (allow_ofs_delta && entry->delta->idx.offset) ?
> -				OBJ_OFS_DELTA : OBJ_REF_DELTA;
> -		} else {
> -			buf = get_delta(entry);
> -			size = entry->delta_size;
> -			type = (allow_ofs_delta && entry->delta->idx.offset) ?
> -				OBJ_OFS_DELTA : OBJ_REF_DELTA;
> -		}
> -
> -		if (entry->z_delta_size)
> -			datalen = entry->z_delta_size;
> -		else
> -			datalen = do_compress(&buf, size);
> -
> -		/*
> -		 * The object header is a byte of 'type' followed by zero or
> -		 * more bytes of length.
> -		 */
> -		hdrlen = encode_in_pack_object_header(type, size, header);
> -
> -		if (type == OBJ_OFS_DELTA) {
> -			/*
> -			 * Deltas with relative base contain an additional
> -			 * encoding of the relative offset for the delta
> -			 * base from this object's position in the pack.
> -			 */
> -			off_t ofs = entry->idx.offset - entry->delta->idx.offset;
> -			unsigned pos = sizeof(dheader) - 1;
> -			dheader[pos] = ofs & 127;
> -			while (ofs >>= 7)
> -				dheader[--pos] = 128 | (--ofs & 127);
> -			if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) {
> -				free(buf);
> -				return 0;
> -			}
> -			sha1write(f, header, hdrlen);
> -			sha1write(f, dheader + pos, sizeof(dheader) - pos);
> -			hdrlen += sizeof(dheader) - pos;
> -		} else if (type == OBJ_REF_DELTA) {
> -			/*
> -			 * Deltas with a base reference contain
> -			 * an additional 20 bytes for the base sha1.
> -			 */
> -			if (limit && hdrlen + 20 + datalen + 20 >= limit) {
> -				free(buf);
> -				return 0;
> -			}
> -			sha1write(f, header, hdrlen);
> -			sha1write(f, entry->delta->idx.sha1, 20);
> -			hdrlen += 20;
> -		} else {
> -			if (limit && hdrlen + datalen + 20 >= limit) {
> -				free(buf);
> -				return 0;
> -			}
> -			sha1write(f, header, hdrlen);
> -		}
> -		sha1write(f, buf, datalen);
> -		free(buf);
> -	}
> -	else {
> -		struct packed_git *p = entry->in_pack;
> -		struct pack_window *w_curs = NULL;
> -		struct revindex_entry *revidx;
> -		off_t offset;
> -
> -		if (entry->delta)
> -			type = (allow_ofs_delta && entry->delta->idx.offset) ?
> -				OBJ_OFS_DELTA : OBJ_REF_DELTA;
> -		hdrlen = encode_in_pack_object_header(type, entry->size, header);
> -
> -		offset = entry->in_pack_offset;
> -		revidx = find_pack_revindex(p, offset);
> -		datalen = revidx[1].offset - offset;
> -		if (!pack_to_stdout && p->index_version > 1 &&
> -		    check_pack_crc(p, &w_curs, offset, datalen, revidx->nr)) {
> -			error("bad packed object CRC for %s", sha1_to_hex(entry->idx.sha1));
> -			unuse_pack(&w_curs);
> -			goto no_reuse;
> -		}
> -
> -		offset += entry->in_pack_header_size;
> -		datalen -= entry->in_pack_header_size;
> -		if (!pack_to_stdout && p->index_version == 1 &&
> -		    check_pack_inflate(p, &w_curs, offset, datalen, entry->size)) {
> -			error("corrupt packed object for %s", sha1_to_hex(entry->idx.sha1));
> -			unuse_pack(&w_curs);
> -			goto no_reuse;
> -		}
> +	if (!to_reuse)
> +		len = write_no_reuse_object(f, entry, limit, usable_delta);
> +	else
> +		len = write_reuse_object(f, entry, limit, usable_delta);
> +	if (!len)
> +		return 0;
>  
> -		if (type == OBJ_OFS_DELTA) {
> -			off_t ofs = entry->idx.offset - entry->delta->idx.offset;
> -			unsigned pos = sizeof(dheader) - 1;
> -			dheader[pos] = ofs & 127;
> -			while (ofs >>= 7)
> -				dheader[--pos] = 128 | (--ofs & 127);
> -			if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) {
> -				unuse_pack(&w_curs);
> -				return 0;
> -			}
> -			sha1write(f, header, hdrlen);
> -			sha1write(f, dheader + pos, sizeof(dheader) - pos);
> -			hdrlen += sizeof(dheader) - pos;
> -			reused_delta++;
> -		} else if (type == OBJ_REF_DELTA) {
> -			if (limit && hdrlen + 20 + datalen + 20 >= limit) {
> -				unuse_pack(&w_curs);
> -				return 0;
> -			}
> -			sha1write(f, header, hdrlen);
> -			sha1write(f, entry->delta->idx.sha1, 20);
> -			hdrlen += 20;
> -			reused_delta++;
> -		} else {
> -			if (limit && hdrlen + datalen + 20 >= limit) {
> -				unuse_pack(&w_curs);
> -				return 0;
> -			}
> -			sha1write(f, header, hdrlen);
> -		}
> -		copy_pack_data(f, p, &w_curs, offset, datalen);
> -		unuse_pack(&w_curs);
> -		reused++;
> -	}
>  	if (usable_delta)
>  		written_delta++;
>  	written++;
>  	if (!pack_to_stdout)
>  		entry->idx.crc32 = crc32_end(f);
> -	return hdrlen + datalen;
> +	return len;
>  }
>  
>  enum write_one_status {
> -- 
> 1.7.8.36.g69ee2
> 

  parent reply	other threads:[~2012-05-19  2:44 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-05-12 10:26 [PATCH] pack-objects: use streaming interface for reading large loose blobs Nguyễn Thái Ngọc Duy
2012-05-12 16:51 ` Nicolas Pitre
2012-05-13  4:37   ` [PATCH v2] " Nguyễn Thái Ngọc Duy
2012-05-14 15:56     ` Junio C Hamano
2012-05-14 19:43     ` Junio C Hamano
2012-05-15 11:18       ` Nguyen Thai Ngoc Duy
2012-05-15 15:27         ` Junio C Hamano
2012-05-16  7:09           ` Nguyen Thai Ngoc Duy
2012-05-16 12:02 ` [PATCH v2 1/4] streaming: allow to call close_istream(NULL); Nguyễn Thái Ngọc Duy
2012-05-16 12:02   ` [PATCH v2 2/4] pack-objects, streaming: turn "xx >= big_file_threshold" to ".. > .." Nguyễn Thái Ngọc Duy
2012-05-18 21:05     ` Junio C Hamano
2012-05-16 12:02   ` [PATCH v2 3/4] pack-objects: refactor write_object() Nguyễn Thái Ngọc Duy
2012-05-18 21:16     ` Junio C Hamano
2012-05-19  2:43     ` Nicolas Pitre [this message]
2012-05-16 12:02   ` [PATCH v2 4/4] pack-objects: use streaming interface for reading large loose blobs Nguyễn Thái Ngọc Duy
2012-05-18 21:02   ` [PATCH v2 1/4] streaming: allow to call close_istream(NULL); Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=alpine.LFD.2.02.1205182243180.13185@xanadu.home \
    --to=nico@fluxnic.net \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=pclouds@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.