All of lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH v6 5/6] unpack-objects.c: add dry_run mode for get_data()
@ 2021-12-21 11:40 Han Xin
  2021-12-21 17:17 ` Junio C Hamano
  0 siblings, 1 reply; 4+ messages in thread
From: Han Xin @ 2021-12-21 11:40 UTC (permalink / raw)
  To: l.s.r
  Cc: avarab, chiyutianyi, git, gitster, hanxin.hx, peff, philipoakley,
	stolee, zhiyou.jx

> Clever.  Looks good to me.
>
> For some reason I was expecting this patch to have some connection to
> one of the earlier ones (perhaps because get_data() was mentioned),
> but it is technically independent.

I think I should adjust the order of this patch to move it forward.

Thanks
-Han Xin

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v6 5/6] unpack-objects.c: add dry_run mode for get_data()
  2021-12-21 11:40 [PATCH v6 5/6] unpack-objects.c: add dry_run mode for get_data() Han Xin
@ 2021-12-21 17:17 ` Junio C Hamano
  0 siblings, 0 replies; 4+ messages in thread
From: Junio C Hamano @ 2021-12-21 17:17 UTC (permalink / raw)
  To: Han Xin
  Cc: l.s.r, avarab, git, hanxin.hx, peff, philipoakley, stolee, zhiyou.jx

Han Xin <chiyutianyi@gmail.com> writes:

>> Clever.  Looks good to me.
>>
>> For some reason I was expecting this patch to have some connection to
>> one of the earlier ones (perhaps because get_data() was mentioned),
>> but it is technically independent.
>
> I think I should adjust the order of this patch to move it forward.

Or just eject it from the series and send it in as an independent
single patch?

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v6 5/6] unpack-objects.c: add dry_run mode for get_data()
  2021-12-17 11:26 ` [PATCH v6 5/6] unpack-objects.c: add dry_run mode for get_data() Han Xin
@ 2021-12-17 21:22   ` René Scharfe
  0 siblings, 0 replies; 4+ messages in thread
From: René Scharfe @ 2021-12-17 21:22 UTC (permalink / raw)
  To: Han Xin, Junio C Hamano, Git List, Jeff King, Jiang Xin,
	Philip Oakley, Ævar Arnfjörð Bjarmason,
	Derrick Stolee
  Cc: Han Xin

Am 17.12.21 um 12:26 schrieb Han Xin:
> From: Han Xin <hanxin.hx@alibaba-inc.com>
>
> In dry_run mode, "get_data()" is used to verify the inflation of data,
> and the returned buffer will not be used at all and will be freed
> immediately. Even in dry_run mode, it is dangerous to allocate a
> full-size buffer for a large blob object. Therefore, only allocate a
> low memory footprint when calling "get_data()" in dry_run mode.

Clever.  Looks good to me.

For some reason I was expecting this patch to have some connection to
one of the earlier ones (perhaps because get_data() was mentioned),
but it is technically independent.

>
> Suggested-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
> Signed-off-by: Han Xin <hanxin.hx@alibaba-inc.com>
> ---
>  builtin/unpack-objects.c | 23 +++++++++++++++++------
>  1 file changed, 17 insertions(+), 6 deletions(-)
>
> diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
> index 4a9466295b..c4a17bdb44 100644
> --- a/builtin/unpack-objects.c
> +++ b/builtin/unpack-objects.c
> @@ -96,15 +96,21 @@ static void use(int bytes)
>  	display_throughput(progress, consumed_bytes);
>  }
>
> -static void *get_data(unsigned long size)
> +static void *get_data(unsigned long size, int dry_run)
>  {
>  	git_zstream stream;
> -	void *buf = xmallocz(size);
> +	unsigned long bufsize;
> +	void *buf;
>
>  	memset(&stream, 0, sizeof(stream));
> +	if (dry_run && size > 8192)
> +		bufsize = 8192;
> +	else
> +		bufsize = size;
> +	buf = xmallocz(bufsize);
>
>  	stream.next_out = buf;
> -	stream.avail_out = size;
> +	stream.avail_out = bufsize;
>  	stream.next_in = fill(1);
>  	stream.avail_in = len;
>  	git_inflate_init(&stream);
> @@ -124,6 +130,11 @@ static void *get_data(unsigned long size)
>  		}
>  		stream.next_in = fill(1);
>  		stream.avail_in = len;
> +		if (dry_run) {
> +			/* reuse the buffer in dry_run mode */
> +			stream.next_out = buf;
> +			stream.avail_out = bufsize;
> +		}
>  	}
>  	git_inflate_end(&stream);
>  	return buf;
> @@ -323,7 +334,7 @@ static void added_object(unsigned nr, enum object_type type,
>  static void unpack_non_delta_entry(enum object_type type, unsigned long size,
>  				   unsigned nr)
>  {
> -	void *buf = get_data(size);
> +	void *buf = get_data(size, dry_run);
>
>  	if (!dry_run && buf)
>  		write_object(nr, type, buf, size);
> @@ -357,7 +368,7 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
>  	if (type == OBJ_REF_DELTA) {
>  		oidread(&base_oid, fill(the_hash_algo->rawsz));
>  		use(the_hash_algo->rawsz);
> -		delta_data = get_data(delta_size);
> +		delta_data = get_data(delta_size, dry_run);
>  		if (dry_run || !delta_data) {
>  			free(delta_data);
>  			return;
> @@ -396,7 +407,7 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
>  		if (base_offset <= 0 || base_offset >= obj_list[nr].offset)
>  			die("offset value out of bound for delta base object");
>
> -		delta_data = get_data(delta_size);
> +		delta_data = get_data(delta_size, dry_run);
>  		if (dry_run || !delta_data) {
>  			free(delta_data);
>  			return;


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v6 5/6] unpack-objects.c: add dry_run mode for get_data()
  2021-12-10 10:34 [PATCH v5 0/6] unpack large blobs in stream Han Xin
@ 2021-12-17 11:26 ` Han Xin
  2021-12-17 21:22   ` René Scharfe
  0 siblings, 1 reply; 4+ messages in thread
From: Han Xin @ 2021-12-17 11:26 UTC (permalink / raw)
  To: Junio C Hamano, Git List, Jeff King, Jiang Xin, Philip Oakley,
	Ævar Arnfjörð Bjarmason, Derrick Stolee
  Cc: Han Xin

From: Han Xin <hanxin.hx@alibaba-inc.com>

In dry_run mode, "get_data()" is used to verify the inflation of data,
and the returned buffer will not be used at all and will be freed
immediately. Even in dry_run mode, it is dangerous to allocate a
full-size buffer for a large blob object. Therefore, only allocate a
low memory footprint when calling "get_data()" in dry_run mode.

Suggested-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Han Xin <hanxin.hx@alibaba-inc.com>
---
 builtin/unpack-objects.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index 4a9466295b..c4a17bdb44 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -96,15 +96,21 @@ static void use(int bytes)
 	display_throughput(progress, consumed_bytes);
 }
 
-static void *get_data(unsigned long size)
+static void *get_data(unsigned long size, int dry_run)
 {
 	git_zstream stream;
-	void *buf = xmallocz(size);
+	unsigned long bufsize;
+	void *buf;
 
 	memset(&stream, 0, sizeof(stream));
+	if (dry_run && size > 8192)
+		bufsize = 8192;
+	else
+		bufsize = size;
+	buf = xmallocz(bufsize);
 
 	stream.next_out = buf;
-	stream.avail_out = size;
+	stream.avail_out = bufsize;
 	stream.next_in = fill(1);
 	stream.avail_in = len;
 	git_inflate_init(&stream);
@@ -124,6 +130,11 @@ static void *get_data(unsigned long size)
 		}
 		stream.next_in = fill(1);
 		stream.avail_in = len;
+		if (dry_run) {
+			/* reuse the buffer in dry_run mode */
+			stream.next_out = buf;
+			stream.avail_out = bufsize;
+		}
 	}
 	git_inflate_end(&stream);
 	return buf;
@@ -323,7 +334,7 @@ static void added_object(unsigned nr, enum object_type type,
 static void unpack_non_delta_entry(enum object_type type, unsigned long size,
 				   unsigned nr)
 {
-	void *buf = get_data(size);
+	void *buf = get_data(size, dry_run);
 
 	if (!dry_run && buf)
 		write_object(nr, type, buf, size);
@@ -357,7 +368,7 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
 	if (type == OBJ_REF_DELTA) {
 		oidread(&base_oid, fill(the_hash_algo->rawsz));
 		use(the_hash_algo->rawsz);
-		delta_data = get_data(delta_size);
+		delta_data = get_data(delta_size, dry_run);
 		if (dry_run || !delta_data) {
 			free(delta_data);
 			return;
@@ -396,7 +407,7 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
 		if (base_offset <= 0 || base_offset >= obj_list[nr].offset)
 			die("offset value out of bound for delta base object");
 
-		delta_data = get_data(delta_size);
+		delta_data = get_data(delta_size, dry_run);
 		if (dry_run || !delta_data) {
 			free(delta_data);
 			return;
-- 
2.34.1.52.gfcc2252aea.agit.6.5.6


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-12-21 17:18 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-21 11:40 [PATCH v6 5/6] unpack-objects.c: add dry_run mode for get_data() Han Xin
2021-12-21 17:17 ` Junio C Hamano
  -- strict thread matches above, loose matches on Subject: below --
2021-12-10 10:34 [PATCH v5 0/6] unpack large blobs in stream Han Xin
2021-12-17 11:26 ` [PATCH v6 5/6] unpack-objects.c: add dry_run mode for get_data() Han Xin
2021-12-17 21:22   ` René Scharfe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.